From bb3d8fb13e627a5346c3c8bfb6de555a1ef20c4f Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Sun, 20 Jul 2025 21:54:37 +0100 Subject: [PATCH] Started implementing Lexer errors --- Common/FileRead.h | 4 ++-- LXC/LXC.cpp | 9 ++++++++- Lexer/inc/Lexer.h | 34 +++++++++++++++++++++++++++++++--- Lexer/inc/Token.h | 18 +++++++++--------- Lexer/src/Lexer.cpp | 31 ++++++++++++++++++++++++------- Lexer/src/Token.cpp | 18 +++++++++--------- example/example.lx | 2 +- 7 files changed, 84 insertions(+), 32 deletions(-) diff --git a/Common/FileRead.h b/Common/FileRead.h index b4d913c..7b10b24 100644 --- a/Common/FileRead.h +++ b/Common/FileRead.h @@ -6,7 +6,7 @@ namespace LXC::Util { // Error returned when Util::ReadFile runs into errors // - struct FileReadError + struct FileReadError final { // Different reasons why the error can occur // enum Reason @@ -26,7 +26,7 @@ namespace LXC::Util const Reason reason; // Turns the error into a c-string // - inline static const char* const ReasonStr(const Reason& reason) + inline static const char* const ReasonStr(Reason reason) { static const char* reasons[] = { diff --git a/LXC/LXC.cpp b/LXC/LXC.cpp index c9b4813..20d431b 100644 --- a/LXC/LXC.cpp +++ b/LXC/LXC.cpp @@ -9,8 +9,10 @@ int main(int argc, char** argv) // Creates the debug log // Util::CreateLog("LXC.log"); + std::filesystem::path src = "example/example.lx"; + // Reads the given file to a string // - Util::ReturnVal fileContents = Util::ReadFile("example/example.lx"); + Util::ReturnVal fileContents = Util::ReadFile(src); if (fileContents.Failed()) _UNLIKELY { // Stores the error for easier access // @@ -34,6 +36,11 @@ int main(int argc, char** argv) // Stores the error for easier access // Lexer::LexerError& err = tokens.Error(); + // Prints the error to the console // + Util::PrintAs("[LXC] "); + Util::Print(src.filename().string()); + Util::PrintAs(" Error: "); + Util::Stop(); } diff --git a/Lexer/inc/Lexer.h b/Lexer/inc/Lexer.h index ac11bda..cc1a7fe 100644 --- a/Lexer/inc/Lexer.h +++ b/Lexer/inc/Lexer.h @@ -4,7 +4,7 @@ namespace LXC::Lexer { - struct LexerContext + struct LexerContext final { // Constructor to set the information of the context // LexerContext(const std::string& _source); @@ -21,8 +21,36 @@ namespace LXC::Lexer unsigned short line; }; - struct LexerError - {}; + struct LexerError final + { + // Different reasons why the Lexer can fail // + enum Reason + { + InvalidCharacter, + UnterminatedStringLiteral + }; + + // Constructor to pass arguments through to the struct // + LexerError(Reason _reason, __int32 errorIndex) + : reason(_reason), index(errorIndex) + {} + + // Turns the error into a c-string // + inline static const char* const ReasonStr(Reason reason) + { + static const char* reasons[] = + { + "Invalid character found in source: ", + "Unterminated string literal in source starting at: " + }; + + return reasons[reason]; + } + + // Error information // + const Reason reason; + const __int32 index; + }; // Turns a file into a vector of tokens // Util::ReturnVal TokenizeFile(const std::string& fileContents); diff --git a/Lexer/inc/Token.h b/Lexer/inc/Token.h index 88bf691..360d002 100644 --- a/Lexer/inc/Token.h +++ b/Lexer/inc/Token.h @@ -48,28 +48,28 @@ namespace LXC::Lexer For = TokenClass::Keyword, While, If, - Else_If, + ElseIf, Else, Return, // === User defined === // - String_Literal = TokenClass::UserDefined, - Num_Literal, + StringLiteral = TokenClass::UserDefined, + NumLiteral, Identifier, // === Symbols === // Assign = TokenClass::Symbols, - Close_bracket, - Open_bracket, + CloseBracket, + OpenBracket, - Close_brace, - Open_brace, + CloseBrace, + OpenBrace, - Close_paren, - Open_paren, + CloseParen, + OpenParen, Comma, diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index bd50c3f..93e2a07 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -3,7 +3,7 @@ #include #include -namespace LXC::Lexer +namespace LXC::Internal { static constexpr bool IsNumeric(const char c) { @@ -15,6 +15,14 @@ namespace LXC::Lexer return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } + static constexpr bool IsWhitespace(const char c) + { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + } +} + +namespace LXC::Lexer +{ LexerContext::LexerContext(const std::string& _source) : source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0) {} @@ -57,40 +65,49 @@ namespace LXC::Lexer // Creates the token (if at the end of the string literal) // if (!trackers.inStrLiteral) - ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::String_Literal); + ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral); } else if (trackers.inStrLiteral) {} // === Numbers === // - else if (IsNumeric(current)) + else if (Internal::IsNumeric(current)) { // Updates trackers // trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index; trackers.inNumLiteral = true; // Checks for the end of the number literal to create the token // - if (!IsNumeric(next)) _UNLIKELY + if (!Internal::IsNumeric(next)) _UNLIKELY { - ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Num_Literal); + ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral); trackers.inNumLiteral = false; } } // === Words === // - else if (IsAlpha(current)) + else if (Internal::IsAlpha(current)) { // Updates trackers // trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index; trackers.inIdentifier = true; // Checks for the end of the word to create the token // - if (!IsAlpha(next)) _UNLIKELY + if (!Internal::IsAlpha(next)) _UNLIKELY { ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier); trackers.inIdentifier = false; } } + // === Whitespace === // + else if (Internal::IsWhitespace(current)) {} + + // If an if-statement has not been triggered the character must be invalid // + else + { + return Util::FunctionFail(LexerError::InvalidCharacter, -1); + } + // Iterates to the next index // ctx.column++; ctx.index++; diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp index 9992edf..d808f90 100644 --- a/Lexer/src/Token.cpp +++ b/Lexer/src/Token.cpp @@ -48,21 +48,21 @@ namespace LXC::Lexer TOKEN_TYPE_CASE(Token::For); TOKEN_TYPE_CASE(Token::While); TOKEN_TYPE_CASE(Token::If); - TOKEN_TYPE_CASE(Token::Else_If); + TOKEN_TYPE_CASE(Token::ElseIf); TOKEN_TYPE_CASE(Token::Else); TOKEN_TYPE_CASE(Token::Return); - TOKEN_TYPE_CASE(Token::String_Literal); - TOKEN_TYPE_CASE(Token::Num_Literal); + TOKEN_TYPE_CASE(Token::StringLiteral); + TOKEN_TYPE_CASE(Token::NumLiteral); TOKEN_TYPE_CASE(Token::Identifier); TOKEN_TYPE_CASE(Token::Assign); - TOKEN_TYPE_CASE(Token::Close_bracket); - TOKEN_TYPE_CASE(Token::Open_bracket); - TOKEN_TYPE_CASE(Token::Close_brace); - TOKEN_TYPE_CASE(Token::Open_brace); - TOKEN_TYPE_CASE(Token::Close_paren); - TOKEN_TYPE_CASE(Token::Open_paren); + TOKEN_TYPE_CASE(Token::CloseBracket); + TOKEN_TYPE_CASE(Token::OpenBracket); + TOKEN_TYPE_CASE(Token::CloseBrace); + TOKEN_TYPE_CASE(Token::OpenBrace); + TOKEN_TYPE_CASE(Token::CloseParen); + TOKEN_TYPE_CASE(Token::OpenParen); TOKEN_TYPE_CASE(Token::Comma); TOKEN_TYPE_CASE(Token::End_of_file); diff --git a/example/example.lx b/example/example.lx index 1c8f718..d6227fa 100644 --- a/example/example.lx +++ b/example/example.lx @@ -1 +1 @@ -FILE 4 CONTENTS "A" GO B HERE 34 "ELLO THER" +FILE 4 CONTENTS "A" GO B HERE 34 * 5 "ELLO THER"