From bb3d8fb13e627a5346c3c8bfb6de555a1ef20c4f Mon Sep 17 00:00:00 2001
From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com>
Date: Sun, 20 Jul 2025 21:54:37 +0100
Subject: [PATCH] Started implementing Lexer errors

---
 Common/FileRead.h   |  4 ++--
 LXC/LXC.cpp         |  9 ++++++++-
 Lexer/inc/Lexer.h   | 34 +++++++++++++++++++++++++++++++---
 Lexer/inc/Token.h   | 18 +++++++++---------
 Lexer/src/Lexer.cpp | 31 ++++++++++++++++++++++++-------
 Lexer/src/Token.cpp | 18 +++++++++---------
 example/example.lx  |  2 +-
 7 files changed, 84 insertions(+), 32 deletions(-)

diff --git a/Common/FileRead.h b/Common/FileRead.h
index b4d913c..7b10b24 100644
--- a/Common/FileRead.h
+++ b/Common/FileRead.h
@@ -6,7 +6,7 @@
 namespace LXC::Util
 {
 	// Error returned when Util::ReadFile runs into errors //
-	struct FileReadError
+	struct FileReadError final
 	{
 		// Different reasons why the error can occur //
 		enum Reason
@@ -26,7 +26,7 @@ namespace LXC::Util
 		const Reason reason;
 
 		// Turns the error into a c-string //
-		inline static const char* const ReasonStr(const Reason& reason)
+		inline static const char* const ReasonStr(Reason reason)
 		{
 			static const char* reasons[] =
 			{
diff --git a/LXC/LXC.cpp b/LXC/LXC.cpp
index c9b4813..20d431b 100644
--- a/LXC/LXC.cpp
+++ b/LXC/LXC.cpp
@@ -9,8 +9,10 @@ int main(int argc, char** argv)
 	// Creates the debug log //
 	Util::CreateLog("LXC.log");
 
+	std::filesystem::path src = "example/example.lx";
+
 	// Reads the given file to a string //
-	Util::ReturnVal fileContents = Util::ReadFile("example/example.lx");
+	Util::ReturnVal fileContents = Util::ReadFile(src);
 	if (fileContents.Failed()) _UNLIKELY
 	{
 		// Stores the error for easier access //
@@ -34,6 +36,11 @@ int main(int argc, char** argv)
 		// Stores the error for easier access //
 		Lexer::LexerError& err = tokens.Error();
 
+		// Prints the error to the console //
+		Util::PrintAs<Util::WHITE>("[LXC] ");
+		Util::Print(src.filename().string());
+		Util::PrintAs<Util::LIGHT_RED>(" Error: ");
+
 		Util::Stop();
 	}
 
diff --git a/Lexer/inc/Lexer.h b/Lexer/inc/Lexer.h
index ac11bda..cc1a7fe 100644
--- a/Lexer/inc/Lexer.h
+++ b/Lexer/inc/Lexer.h
@@ -4,7 +4,7 @@
 
 namespace LXC::Lexer
 {
-	struct LexerContext
+	struct LexerContext final
 	{
 		// Constructor to set the information of the context //
 		LexerContext(const std::string& _source);
@@ -21,8 +21,36 @@ namespace LXC::Lexer
 		unsigned short line;
 	};
 
-	struct LexerError
-	{};
+	struct LexerError final
+	{
+		// Different reasons why the Lexer can fail //
+		enum Reason
+		{
+			InvalidCharacter,
+			UnterminatedStringLiteral
+		};
+
+		// Constructor to pass arguments through to the struct //
+		LexerError(Reason _reason, __int32 errorIndex)
+			: reason(_reason), index(errorIndex)
+		{}
+
+		// Turns the error into a c-string //
+		inline static const char* const ReasonStr(Reason reason)
+		{
+			static const char* reasons[] =
+			{
+				"Invalid character found in source: ",
+				"Unterminated string literal in source starting at: "
+			};
+
+			return reasons[reason];
+		}
+
+		// Error information //
+		const Reason reason;
+		const __int32 index;
+	};
 
 	// Turns a file into a vector of tokens //
 	Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
diff --git a/Lexer/inc/Token.h b/Lexer/inc/Token.h
index 88bf691..360d002 100644
--- a/Lexer/inc/Token.h
+++ b/Lexer/inc/Token.h
@@ -48,28 +48,28 @@ namespace LXC::Lexer
 				For				= TokenClass::Keyword,
 				While,
 				If,
-				Else_If,
+				ElseIf,
 				Else,
 				Return,
 
 				// === User defined === //
 
-				String_Literal	= TokenClass::UserDefined,
-				Num_Literal,
+				StringLiteral	= TokenClass::UserDefined,
+				NumLiteral,
 				Identifier,
 
 				// === Symbols === //
 
 				Assign			= TokenClass::Symbols,
 
-				Close_bracket,
-				Open_bracket,
+				CloseBracket,
+				OpenBracket,
 
-				Close_brace,
-				Open_brace,
+				CloseBrace,
+				OpenBrace,
 
-				Close_paren,
-				Open_paren,
+				CloseParen,
+				OpenParen,
 
 				Comma,
 
diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp
index bd50c3f..93e2a07 100644
--- a/Lexer/src/Lexer.cpp
+++ b/Lexer/src/Lexer.cpp
@@ -3,7 +3,7 @@
 #include <Lexer.h>
 #include <Token.h>
 
-namespace LXC::Lexer
+namespace LXC::Internal
 {
 	static constexpr bool IsNumeric(const char c)
 	{
@@ -15,6 +15,14 @@ namespace LXC::Lexer
 		return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
 	}
 
+	static constexpr bool IsWhitespace(const char c)
+	{
+		return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+	}
+}
+
+namespace LXC::Lexer
+{
 	LexerContext::LexerContext(const std::string& _source) :
 		source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
 	{}
@@ -57,40 +65,49 @@ namespace LXC::Lexer
 
 				// Creates the token (if at the end of the string literal) //
 				if (!trackers.inStrLiteral)
-					ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::String_Literal);
+					ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
 			
 			} else if (trackers.inStrLiteral) {}
 
 			// === Numbers === //
-			else if (IsNumeric(current))
+			else if (Internal::IsNumeric(current))
 			{
 				// Updates trackers //
 				trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
 				trackers.inNumLiteral = true;
 
 				// Checks for the end of the number literal to create the token //
-				if (!IsNumeric(next)) _UNLIKELY
+				if (!Internal::IsNumeric(next)) _UNLIKELY
 				{
-					ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Num_Literal);
+					ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
 					trackers.inNumLiteral = false;
 				}
 			}
 
 			// === Words === //
-			else if (IsAlpha(current))
+			else if (Internal::IsAlpha(current))
 			{
 				// Updates trackers //
 				trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
 				trackers.inIdentifier = true;
 
 				// Checks for the end of the word to create the token //
-				if (!IsAlpha(next)) _UNLIKELY
+				if (!Internal::IsAlpha(next)) _UNLIKELY
 				{
 					ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
 					trackers.inIdentifier = false;
 				}
 			}
 
+			// === Whitespace === //
+			else if (Internal::IsWhitespace(current)) {}
+
+			// If an if-statement has not been triggered the character must be invalid //
+			else
+			{
+				return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, -1);
+			}
+
 			// Iterates to the next index //
 			ctx.column++;
 			ctx.index++;
diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp
index 9992edf..d808f90 100644
--- a/Lexer/src/Token.cpp
+++ b/Lexer/src/Token.cpp
@@ -48,21 +48,21 @@ namespace LXC::Lexer
 			TOKEN_TYPE_CASE(Token::For);
 			TOKEN_TYPE_CASE(Token::While);
 			TOKEN_TYPE_CASE(Token::If);
-			TOKEN_TYPE_CASE(Token::Else_If);
+			TOKEN_TYPE_CASE(Token::ElseIf);
 			TOKEN_TYPE_CASE(Token::Else);
 			TOKEN_TYPE_CASE(Token::Return);
 
-			TOKEN_TYPE_CASE(Token::String_Literal);
-			TOKEN_TYPE_CASE(Token::Num_Literal);
+			TOKEN_TYPE_CASE(Token::StringLiteral);
+			TOKEN_TYPE_CASE(Token::NumLiteral);
 			TOKEN_TYPE_CASE(Token::Identifier);
 
 			TOKEN_TYPE_CASE(Token::Assign);
-			TOKEN_TYPE_CASE(Token::Close_bracket);
-			TOKEN_TYPE_CASE(Token::Open_bracket);
-			TOKEN_TYPE_CASE(Token::Close_brace);
-			TOKEN_TYPE_CASE(Token::Open_brace);
-			TOKEN_TYPE_CASE(Token::Close_paren);
-			TOKEN_TYPE_CASE(Token::Open_paren);
+			TOKEN_TYPE_CASE(Token::CloseBracket);
+			TOKEN_TYPE_CASE(Token::OpenBracket);
+			TOKEN_TYPE_CASE(Token::CloseBrace);
+			TOKEN_TYPE_CASE(Token::OpenBrace);
+			TOKEN_TYPE_CASE(Token::CloseParen);
+			TOKEN_TYPE_CASE(Token::OpenParen);
 			TOKEN_TYPE_CASE(Token::Comma);
 
 			TOKEN_TYPE_CASE(Token::End_of_file);
diff --git a/example/example.lx b/example/example.lx
index 1c8f718..d6227fa 100644
--- a/example/example.lx
+++ b/example/example.lx
@@ -1 +1 @@
-FILE 4 CONTENTS "A" GO B HERE 34 "ELLO THER"
+FILE 4 CONTENTS "A" GO B HERE 34 * 5 "ELLO THER"