Started implementing Lexer errors

This commit is contained in:
Pasha Bibko
2025-07-20 21:54:37 +01:00
parent 6a0d2fc0a2
commit bb3d8fb13e
7 changed files with 84 additions and 32 deletions

View File

@@ -6,7 +6,7 @@
namespace LXC::Util namespace LXC::Util
{ {
// Error returned when Util::ReadFile runs into errors // // Error returned when Util::ReadFile runs into errors //
struct FileReadError struct FileReadError final
{ {
// Different reasons why the error can occur // // Different reasons why the error can occur //
enum Reason enum Reason
@@ -26,7 +26,7 @@ namespace LXC::Util
const Reason reason; const Reason reason;
// Turns the error into a c-string // // Turns the error into a c-string //
inline static const char* const ReasonStr(const Reason& reason) inline static const char* const ReasonStr(Reason reason)
{ {
static const char* reasons[] = static const char* reasons[] =
{ {

View File

@@ -9,8 +9,10 @@ int main(int argc, char** argv)
// Creates the debug log // // Creates the debug log //
Util::CreateLog("LXC.log"); Util::CreateLog("LXC.log");
std::filesystem::path src = "example/example.lx";
// Reads the given file to a string // // Reads the given file to a string //
Util::ReturnVal fileContents = Util::ReadFile("example/example.lx"); Util::ReturnVal fileContents = Util::ReadFile(src);
if (fileContents.Failed()) _UNLIKELY if (fileContents.Failed()) _UNLIKELY
{ {
// Stores the error for easier access // // Stores the error for easier access //
@@ -34,6 +36,11 @@ int main(int argc, char** argv)
// Stores the error for easier access // // Stores the error for easier access //
Lexer::LexerError& err = tokens.Error(); Lexer::LexerError& err = tokens.Error();
// Prints the error to the console //
Util::PrintAs<Util::WHITE>("[LXC] ");
Util::Print(src.filename().string());
Util::PrintAs<Util::LIGHT_RED>(" Error: ");
Util::Stop(); Util::Stop();
} }

View File

@@ -4,7 +4,7 @@
namespace LXC::Lexer namespace LXC::Lexer
{ {
struct LexerContext struct LexerContext final
{ {
// Constructor to set the information of the context // // Constructor to set the information of the context //
LexerContext(const std::string& _source); LexerContext(const std::string& _source);
@@ -21,8 +21,36 @@ namespace LXC::Lexer
unsigned short line; unsigned short line;
}; };
struct LexerError struct LexerError final
{}; {
// Different reasons why the Lexer can fail //
enum Reason
{
InvalidCharacter,
UnterminatedStringLiteral
};
// Constructor to pass arguments through to the struct //
LexerError(Reason _reason, __int32 errorIndex)
: reason(_reason), index(errorIndex)
{}
// Turns the error into a c-string //
inline static const char* const ReasonStr(Reason reason)
{
static const char* reasons[] =
{
"Invalid character found in source: ",
"Unterminated string literal in source starting at: "
};
return reasons[reason];
}
// Error information //
const Reason reason;
const __int32 index;
};
// Turns a file into a vector of tokens // // Turns a file into a vector of tokens //
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents); Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);

View File

@@ -48,28 +48,28 @@ namespace LXC::Lexer
For = TokenClass::Keyword, For = TokenClass::Keyword,
While, While,
If, If,
Else_If, ElseIf,
Else, Else,
Return, Return,
// === User defined === // // === User defined === //
String_Literal = TokenClass::UserDefined, StringLiteral = TokenClass::UserDefined,
Num_Literal, NumLiteral,
Identifier, Identifier,
// === Symbols === // // === Symbols === //
Assign = TokenClass::Symbols, Assign = TokenClass::Symbols,
Close_bracket, CloseBracket,
Open_bracket, OpenBracket,
Close_brace, CloseBrace,
Open_brace, OpenBrace,
Close_paren, CloseParen,
Open_paren, OpenParen,
Comma, Comma,

View File

@@ -3,7 +3,7 @@
#include <Lexer.h> #include <Lexer.h>
#include <Token.h> #include <Token.h>
namespace LXC::Lexer namespace LXC::Internal
{ {
static constexpr bool IsNumeric(const char c) static constexpr bool IsNumeric(const char c)
{ {
@@ -15,6 +15,14 @@ namespace LXC::Lexer
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
} }
static constexpr bool IsWhitespace(const char c)
{
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
}
namespace LXC::Lexer
{
LexerContext::LexerContext(const std::string& _source) : LexerContext::LexerContext(const std::string& _source) :
source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0) source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
{} {}
@@ -57,40 +65,49 @@ namespace LXC::Lexer
// Creates the token (if at the end of the string literal) // // Creates the token (if at the end of the string literal) //
if (!trackers.inStrLiteral) if (!trackers.inStrLiteral)
ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::String_Literal); ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
} else if (trackers.inStrLiteral) {} } else if (trackers.inStrLiteral) {}
// === Numbers === // // === Numbers === //
else if (IsNumeric(current)) else if (Internal::IsNumeric(current))
{ {
// Updates trackers // // Updates trackers //
trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index; trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
trackers.inNumLiteral = true; trackers.inNumLiteral = true;
// Checks for the end of the number literal to create the token // // Checks for the end of the number literal to create the token //
if (!IsNumeric(next)) _UNLIKELY if (!Internal::IsNumeric(next)) _UNLIKELY
{ {
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Num_Literal); ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
trackers.inNumLiteral = false; trackers.inNumLiteral = false;
} }
} }
// === Words === // // === Words === //
else if (IsAlpha(current)) else if (Internal::IsAlpha(current))
{ {
// Updates trackers // // Updates trackers //
trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index; trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
trackers.inIdentifier = true; trackers.inIdentifier = true;
// Checks for the end of the word to create the token // // Checks for the end of the word to create the token //
if (!IsAlpha(next)) _UNLIKELY if (!Internal::IsAlpha(next)) _UNLIKELY
{ {
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier); ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
trackers.inIdentifier = false; trackers.inIdentifier = false;
} }
} }
// === Whitespace === //
else if (Internal::IsWhitespace(current)) {}
// If an if-statement has not been triggered the character must be invalid //
else
{
return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, -1);
}
// Iterates to the next index // // Iterates to the next index //
ctx.column++; ctx.column++;
ctx.index++; ctx.index++;

View File

@@ -48,21 +48,21 @@ namespace LXC::Lexer
TOKEN_TYPE_CASE(Token::For); TOKEN_TYPE_CASE(Token::For);
TOKEN_TYPE_CASE(Token::While); TOKEN_TYPE_CASE(Token::While);
TOKEN_TYPE_CASE(Token::If); TOKEN_TYPE_CASE(Token::If);
TOKEN_TYPE_CASE(Token::Else_If); TOKEN_TYPE_CASE(Token::ElseIf);
TOKEN_TYPE_CASE(Token::Else); TOKEN_TYPE_CASE(Token::Else);
TOKEN_TYPE_CASE(Token::Return); TOKEN_TYPE_CASE(Token::Return);
TOKEN_TYPE_CASE(Token::String_Literal); TOKEN_TYPE_CASE(Token::StringLiteral);
TOKEN_TYPE_CASE(Token::Num_Literal); TOKEN_TYPE_CASE(Token::NumLiteral);
TOKEN_TYPE_CASE(Token::Identifier); TOKEN_TYPE_CASE(Token::Identifier);
TOKEN_TYPE_CASE(Token::Assign); TOKEN_TYPE_CASE(Token::Assign);
TOKEN_TYPE_CASE(Token::Close_bracket); TOKEN_TYPE_CASE(Token::CloseBracket);
TOKEN_TYPE_CASE(Token::Open_bracket); TOKEN_TYPE_CASE(Token::OpenBracket);
TOKEN_TYPE_CASE(Token::Close_brace); TOKEN_TYPE_CASE(Token::CloseBrace);
TOKEN_TYPE_CASE(Token::Open_brace); TOKEN_TYPE_CASE(Token::OpenBrace);
TOKEN_TYPE_CASE(Token::Close_paren); TOKEN_TYPE_CASE(Token::CloseParen);
TOKEN_TYPE_CASE(Token::Open_paren); TOKEN_TYPE_CASE(Token::OpenParen);
TOKEN_TYPE_CASE(Token::Comma); TOKEN_TYPE_CASE(Token::Comma);
TOKEN_TYPE_CASE(Token::End_of_file); TOKEN_TYPE_CASE(Token::End_of_file);

View File

@@ -1 +1 @@
FILE 4 CONTENTS "A" GO B HERE 34 "ELLO THER" FILE 4 CONTENTS "A" GO B HERE 34 * 5 "ELLO THER"