diff --git a/Common/File.h b/Common/File.h index 3cba78a..5406d4b 100644 --- a/Common/File.h +++ b/Common/File.h @@ -75,14 +75,14 @@ namespace LXC::Util // Finds the location of a given index within a file // inline bool GetFileLocationAtIndex(FileLocation& location, const std::string& file, __int32 index) { - // Returns false if outside the bounds // - if (index < 0 || index > file.length()) - return false; - // Resets location // location.line = 1; location.col = 1; + // Returns false if outside the bounds // + if (index < 0 || index > file.length()) + return false; + // Finds the location // __int32 localIndex = 0; while (localIndex != index) diff --git a/Common/LXC.h b/Common/LXC.h index 2a4cd8a..231ccc4 100644 --- a/Common/LXC.h +++ b/Common/LXC.h @@ -2,6 +2,7 @@ // Standard libraries // +#include #include // LXC util files // diff --git a/LXC/LXC.cpp b/LXC/LXC.cpp index 92a6c35..754c5a9 100644 --- a/LXC/LXC.cpp +++ b/LXC/LXC.cpp @@ -47,6 +47,9 @@ int main(int argc, char** argv) if (err.reason == Lexer::LexerError::InvalidCharacter) Util::PrintLn(": {", fileContents.Result()[err.index], '}'); + if (err.reason == Lexer::LexerError::UnknownSymbolOrOperand) + Util::PrintLn(": {", err.info, '}'); + else Util::PrintLn(); diff --git a/Lexer/inc/Lexer.h b/Lexer/inc/Lexer.h index d98b523..641cd79 100644 --- a/Lexer/inc/Lexer.h +++ b/Lexer/inc/Lexer.h @@ -27,12 +27,13 @@ namespace LXC::Lexer enum Reason { InvalidCharacter, - UnterminatedStringLiteral + UnterminatedStringLiteral, + UnknownSymbolOrOperand }; // Constructor to pass arguments through to the struct // - LexerError(Reason _reason, __int32 errorIndex) - : reason(_reason), index(errorIndex) + LexerError(Reason _reason, __int32 errorIndex, std::string _info = "") + : reason(_reason), index(errorIndex), info(_info) {} // Turns the error into a c-string // @@ -41,7 +42,8 @@ namespace LXC::Lexer static const char* reasons[] = { "Invalid character found in source", - "Unterminated string literal in source" + "Unterminated string literal in source", + "Unknown symbol or operand in source" }; return reasons[reason]; @@ -50,6 +52,7 @@ namespace LXC::Lexer // Error information // const Reason reason; const __int32 index; + const std::string info; }; // Turns a file into a vector of tokens // diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 3f6a293..d78f520 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -5,116 +5,171 @@ namespace LXC::Internal { - static constexpr bool IsNumeric(const char c) - { - return c >= '0' && c <= '9'; - } + static constexpr bool IsNumeric(const char c) + { + return c >= '0' && c <= '9'; + } - static constexpr bool IsAlpha(const char c) - { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - } + static constexpr bool IsAlpha(const char c) + { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } - static constexpr bool IsWhitespace(const char c) - { - return c == ' ' || c == '\t' || c == '\n' || c == '\r'; - } + static constexpr bool IsWhitespace(const char c) + { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + } + + static constexpr bool IsSymbolOrOperator(const char c) + { + return + c == '+' || c == '-' || + c == '*' || c == '/' || + c == '%' || c == '=' || + c == ',' || c == '[' || + c == ']' || c == '{' || + c == '}' || c == '(' || + c == ')'; + } + + static const std::unordered_map symbolAndOpMap = + { + { "+", Lexer::Token::Add }, + { "-", Lexer::Token::Sub }, + { "*", Lexer::Token::Mul }, + { "/", Lexer::Token::Div }, + { "%", Lexer::Token::Mod }, + + { "=", Lexer::Token::Assign }, + { ",", Lexer::Token::Comma }, + + { "[", Lexer::Token::CloseBracket }, + { "]", Lexer::Token::OpenBracket }, + + { "{", Lexer::Token::CloseBrace }, + { "}", Lexer::Token::OpenBrace }, + + { ")", Lexer::Token::CloseParen }, + { "(", Lexer::Token::OpenParen } + }; } namespace LXC::Lexer { - LexerContext::LexerContext(const std::string& _source) : - source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0) - {} + LexerContext::LexerContext(const std::string& _source) : + source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0) + {} - Util::ReturnVal TokenizeFile(const std::string& fileContents) - { - // Creates the context for the lexer // - LexerContext ctx(fileContents); + Util::ReturnVal TokenizeFile(const std::string& fileContents) + { + // Creates the context for the lexer // + LexerContext ctx(fileContents); - struct - { - bool inStrLiteral = false; - bool inIdentifier = false; - bool inNumLiteral = false; + struct + { + bool inStrLiteral = false; + bool inIdentifier = false; + bool inNumLiteral = false; + bool inSymbolOrOp = false; - bool inComment = false; + bool inComment = false; - unsigned __int32 sectionStart = 0; + unsigned __int32 sectionStart = 0; - } trackers; + } trackers; - while (ctx.index < ctx.len) - { - // The current char within the source that is being lexed // - const char current = ctx.source[ctx.index]; - const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0'; + while (ctx.index < ctx.len) + { + // The current char within the source that is being lexed // + const char current = ctx.source[ctx.index]; + const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0'; - // === Comments === // - if (current == '#') - trackers.inComment = !trackers.inComment; + // === Comments === // + if (current == '#') + trackers.inComment = !trackers.inComment; - else if (trackers.inComment) {} // Contents of comments are skipped over + else if (trackers.inComment) {} // Contents of comments are skipped over - // === String literals === // - else if (current == '"') - { - // Updates trackers // - trackers.inStrLiteral = !trackers.inStrLiteral; - trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart; + // === String literals === // + else if (current == '"') + { + // Updates trackers // + trackers.inStrLiteral = !trackers.inStrLiteral; + trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart; - // Creates the token (if at the end of the string literal) // - if (!trackers.inStrLiteral) - ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral); - - } else if (trackers.inStrLiteral) {} + // Creates the token (if at the end of the string literal) // + if (!trackers.inStrLiteral) + ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral); + + } else if (trackers.inStrLiteral) {} - // === Numbers === // - else if (Internal::IsNumeric(current)) - { - // Updates trackers // - trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index; - trackers.inNumLiteral = true; + // === Numbers === // + else if (Internal::IsNumeric(current)) + { + // Updates trackers // + trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index; + trackers.inNumLiteral = true; - // Checks for the end of the number literal to create the token // - if (!Internal::IsNumeric(next)) _UNLIKELY - { - ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral); - trackers.inNumLiteral = false; - } - } + // Checks for the end of the number literal to create the token // + if (!Internal::IsNumeric(next)) _UNLIKELY + { + ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral); + trackers.inNumLiteral = false; + } + } - // === Words === // - else if (Internal::IsAlpha(current)) - { - // Updates trackers // - trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index; - trackers.inIdentifier = true; + // === Words === // + else if (Internal::IsAlpha(current)) + { + // Updates trackers // + trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index; + trackers.inIdentifier = true; - // Checks for the end of the word to create the token // - if (!Internal::IsAlpha(next)) _UNLIKELY - { - ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier); - trackers.inIdentifier = false; - } - } + // Checks for the end of the word to create the token // + if (!Internal::IsAlpha(next)) _UNLIKELY + { + ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier); + trackers.inIdentifier = false; + } + } - // === Whitespace === // - else if (Internal::IsWhitespace(current)) {} + // === Symbols/Operators === // + else if (Internal::IsSymbolOrOperator(current)) + { + // Updates trackers // + trackers.sectionStart = trackers.inSymbolOrOp ? trackers.sectionStart : ctx.index; + trackers.inSymbolOrOp = true; - // If an if-statement has not been triggered the character must be invalid // - else - return Util::FunctionFail(LexerError::InvalidCharacter, ctx.index); + // Checks for the end of the symbol or operator // + if (!Internal::IsSymbolOrOperator(next)) + { + // Finds the operator/symbol if it can // + std::string_view fullSymbol(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1); + auto it = Internal::symbolAndOpMap.find(fullSymbol); + if (it != Internal::symbolAndOpMap.end()) + ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), it->second); - // Iterates to the next index // - ctx.column++; - ctx.index++; - } + else + return Util::FunctionFail(LexerError::UnknownSymbolOrOperand, trackers.sectionStart, std::string(fullSymbol)); + } + } - // Checks for an unterminated string literal // - if (trackers.inStrLiteral) - return Util::FunctionFail(LexerError::UnterminatedStringLiteral, trackers.sectionStart); + // === Whitespace === // + else if (Internal::IsWhitespace(current)) {} - return ctx.out; - } + // If an if-statement has not been triggered the character must be invalid // + else + return Util::FunctionFail(LexerError::InvalidCharacter, ctx.index); + + // Iterates to the next index // + ctx.column++; + ctx.index++; + } + + // Checks for an unterminated string literal // + if (trackers.inStrLiteral) + return Util::FunctionFail(LexerError::UnterminatedStringLiteral, trackers.sectionStart); + + return ctx.out; + } } diff --git a/example/example.lx b/example/example.lx index 72dea62..dcb0a0f 100644 --- a/example/example.lx +++ b/example/example.lx @@ -1 +1 @@ -FILE 4 CONTENTS "A" GO B HERE 34 5 "ELLO THER" +FILE 4 CONTENTS "A" GO B HERE 34 += 5 "ELLO THER"