Added symbols/operators to lexer

2025-07-21 17:34:47 +01:00
parent a6afeff493
commit 24fde1b770
6 changed files with 158 additions and 96 deletions
--- a/Common/File.h
+++ b/Common/File.h
@@ -75,14 +75,14 @@ namespace LXC::Util
 	// Finds the location of a given index within a file //
 	inline bool GetFileLocationAtIndex(FileLocation& location, const std::string& file, __int32 index)
 	{
 		// Returns false if outside the bounds //
 		if (index < 0 || index > file.length())
 			return false;
 		// Resets location //
 		location.line = 1;
 		location.col = 1;
 		// Returns false if outside the bounds //
 		if (index < 0 || index > file.length())
 			return false;
 		// Finds the location //
 		__int32 localIndex = 0;
 		while (localIndex != index)
--- a/Common/LXC.h
+++ b/Common/LXC.h
@@ -2,6 +2,7 @@
 // Standard libraries //
 #include <unordered_map>
 #include <vector>
 // LXC util files //
--- a/LXC/LXC.cpp
+++ b/LXC/LXC.cpp
@@ -47,6 +47,9 @@ int main(int argc, char** argv)
 		if (err.reason == Lexer::LexerError::InvalidCharacter)
 			Util::PrintLn(": {", fileContents.Result()[err.index], '}');
 		if (err.reason == Lexer::LexerError::UnknownSymbolOrOperand)
 			Util::PrintLn(": {", err.info, '}');
 		else
 			Util::PrintLn();
--- a/Lexer/inc/Lexer.h
+++ b/Lexer/inc/Lexer.h
@@ -27,12 +27,13 @@ namespace LXC::Lexer
 		enum Reason
 		{
 			InvalidCharacter,
-			UnterminatedStringLiteral
+			UnterminatedStringLiteral,
 			UnknownSymbolOrOperand
 		};
 		// Constructor to pass arguments through to the struct //
-		LexerError(Reason _reason, __int32 errorIndex)
+		LexerError(Reason _reason, __int32 errorIndex, std::string _info = "")
-			: reason(_reason), index(errorIndex)
+			: reason(_reason), index(errorIndex), info(_info)
 		{}
 		// Turns the error into a c-string //
@@ -41,7 +42,8 @@ namespace LXC::Lexer
 			static const char* reasons[] =
 			{
 				"Invalid character found in source",
-				"Unterminated string literal in source"
+				"Unterminated string literal in source",
 				"Unknown symbol or operand in source"
 			};
 			return reasons[reason];
@@ -50,6 +52,7 @@ namespace LXC::Lexer
 		// Error information //
 		const Reason reason;
 		const __int32 index;
 		const std::string info;
 	};
 	// Turns a file into a vector of tokens //
--- a/Lexer/src/Lexer.cpp
+++ b/Lexer/src/Lexer.cpp
@@ -5,116 +5,171 @@
 namespace LXC::Internal
 {
-	static constexpr bool IsNumeric(const char c)
+    static constexpr bool IsNumeric(const char c)
-	{
+    {
-		return c >= '0' && c <= '9';
+        return c >= '0' && c <= '9';
-	}
+    }
-	static constexpr bool IsAlpha(const char c)
+    static constexpr bool IsAlpha(const char c)
-	{
+    {
-		return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
-	}
+    }
-	static constexpr bool IsWhitespace(const char c)
+    static constexpr bool IsWhitespace(const char c)
-	{
+    {
-		return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+        return c == ' ' || c == '\t' || c == '\n' || c == '\r';
-	}
+    }
    static constexpr bool IsSymbolOrOperator(const char c)
    {
        return
            c == '+' || c == '-' ||
            c == '*' || c == '/' ||
            c == '%' || c == '=' ||
            c == ',' || c == '[' ||
            c == ']' || c == '{' ||
            c == '}' || c == '(' ||
            c == ')';
    }
    static const std::unordered_map<std::string_view, Lexer::Token::TokenType> symbolAndOpMap =
    {
        { "+",	Lexer::Token::Add				},
        { "-",	Lexer::Token::Sub				},	
        { "*",	Lexer::Token::Mul				},
        { "/",	Lexer::Token::Div				},
        { "%",	Lexer::Token::Mod				},
        { "=",	Lexer::Token::Assign			},
        { ",",	Lexer::Token::Comma				},
        { "[",	Lexer::Token::CloseBracket		},
        { "]",	Lexer::Token::OpenBracket		},
        { "{",	Lexer::Token::CloseBrace		},
        { "}",	Lexer::Token::OpenBrace			},
        { ")",	Lexer::Token::CloseParen		},
        { "(",	Lexer::Token::OpenParen			}
    };
 }
 namespace LXC::Lexer
 {
-	LexerContext::LexerContext(const std::string& _source) :
+    LexerContext::LexerContext(const std::string& _source) :
-		source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
+        source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
-	{}
+    {}
-	Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
+    Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
-	{
+    {
-		// Creates the context for the lexer //
+        // Creates the context for the lexer //
-		LexerContext ctx(fileContents);
+        LexerContext ctx(fileContents);
-		struct
+        struct
-		{
+        {
-			bool inStrLiteral = false;
+            bool inStrLiteral = false;
-			bool inIdentifier = false;
+            bool inIdentifier = false;
-			bool inNumLiteral = false;
+            bool inNumLiteral = false;
            bool inSymbolOrOp = false;
-			bool inComment = false;
+            bool inComment = false;
-			unsigned __int32 sectionStart = 0;
+            unsigned __int32 sectionStart = 0;
-		} trackers;
+        } trackers;
-		while (ctx.index < ctx.len)
+        while (ctx.index < ctx.len)
-		{
+        {
-			// The current char within the source that is being lexed //
+            // The current char within the source that is being lexed //
-			const char current = ctx.source[ctx.index];
+            const char current = ctx.source[ctx.index];
-			const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
+            const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
-			// === Comments === //
+            // === Comments === //
-			if (current == '#')
+            if (current == '#')
-				trackers.inComment = !trackers.inComment;
+                trackers.inComment = !trackers.inComment;
-			else if (trackers.inComment) {} // Contents of comments are skipped over
+            else if (trackers.inComment) {} // Contents of comments are skipped over
-			// === String literals === //
+            // === String literals === //
-			else if (current == '"')
+            else if (current == '"')
-			{
+            {
-				// Updates trackers //
+                // Updates trackers //
-				trackers.inStrLiteral = !trackers.inStrLiteral;
+                trackers.inStrLiteral = !trackers.inStrLiteral;
-				trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
+                trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
-				// Creates the token (if at the end of the string literal) //
+                // Creates the token (if at the end of the string literal) //
-				if (!trackers.inStrLiteral)
+                if (!trackers.inStrLiteral)
-					ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
+                    ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
-			} else if (trackers.inStrLiteral) {}
+            } else if (trackers.inStrLiteral) {}
-			// === Numbers === //
+            // === Numbers === //
-			else if (Internal::IsNumeric(current))
+            else if (Internal::IsNumeric(current))
-			{
+            {
-				// Updates trackers //
+                // Updates trackers //
-				trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
+                trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
-				trackers.inNumLiteral = true;
+                trackers.inNumLiteral = true;
-				// Checks for the end of the number literal to create the token //
+                // Checks for the end of the number literal to create the token //
-				if (!Internal::IsNumeric(next)) _UNLIKELY
+                if (!Internal::IsNumeric(next)) _UNLIKELY
-				{
+                {
-					ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
+                    ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
-					trackers.inNumLiteral = false;
+                    trackers.inNumLiteral = false;
-				}
+                }
-			}
+            }
-			// === Words === //
+            // === Words === //
-			else if (Internal::IsAlpha(current))
+            else if (Internal::IsAlpha(current))
-			{
+            {
-				// Updates trackers //
+                // Updates trackers //
-				trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
+                trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
-				trackers.inIdentifier = true;
+                trackers.inIdentifier = true;
-				// Checks for the end of the word to create the token //
+                // Checks for the end of the word to create the token //
-				if (!Internal::IsAlpha(next)) _UNLIKELY
+                if (!Internal::IsAlpha(next)) _UNLIKELY
-				{
+                {
-					ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
+                    ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
-					trackers.inIdentifier = false;
+                    trackers.inIdentifier = false;
-				}
+                }
-			}
+            }
-			// === Whitespace === //
+            // === Symbols/Operators === //
-			else if (Internal::IsWhitespace(current)) {}
+            else if (Internal::IsSymbolOrOperator(current))
            {
                // Updates trackers //
                trackers.sectionStart = trackers.inSymbolOrOp ? trackers.sectionStart : ctx.index;
                trackers.inSymbolOrOp = true;
-			// If an if-statement has not been triggered the character must be invalid //
+                // Checks for the end of the symbol or operator //
-			else
+                if (!Internal::IsSymbolOrOperator(next))
-				return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
+                {
                    // Finds the operator/symbol if it can //
                    std::string_view fullSymbol(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
                    auto it = Internal::symbolAndOpMap.find(fullSymbol);
                    if (it != Internal::symbolAndOpMap.end())
                       ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), it->second);
-			// Iterates to the next index //
+                    else
-			ctx.column++;
+                        return Util::FunctionFail<LexerError>(LexerError::UnknownSymbolOrOperand, trackers.sectionStart, std::string(fullSymbol));
-			ctx.index++;
+                }
-		}
+            }
-		// Checks for an unterminated string literal //
+            // === Whitespace === //
-		if (trackers.inStrLiteral)
+            else if (Internal::IsWhitespace(current)) {}
 			return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
-		return ctx.out;
+            // If an if-statement has not been triggered the character must be invalid //
-	}
+            else
                return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
            // Iterates to the next index //
            ctx.column++;
            ctx.index++;
        }
        // Checks for an unterminated string literal //
        if (trackers.inStrLiteral)
            return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
        return ctx.out;
    }
 }
--- a/example/example.lx
+++ b/example/example.lx
@@ -1 +1 @@
-FILE 4 CONTENTS "A" GO B HERE 34 5 "ELLO THER"
+FILE 4 CONTENTS "A" GO B HERE 34 += 5 "ELLO THER"
`@@ -1 +1 @@`
	`FILE 4 CONTENTS "A" GO B HERE 34 5 "ELLO THER"`	`FILE 4 CONTENTS "A" GO B HERE 34 += 5 "ELLO THER"`