Fixing folder capitlisation [2/2]

2025-08-09 21:43:18 +01:00
parent f6bbaf910c
commit f9fb455ba9
5 changed files with 0 additions and 0 deletions
--- a/lexer/CMakeLists.txt
+++ b/lexer/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Fetches all .cpp files for the binary #
+add_library(Lexer STATIC
+    src/Lexer.cpp
+    src/Token.cpp
+)
+
+# Adds the headers in the current directory #
+target_include_directories (
+    Lexer PUBLIC
+    ${CMAKE_CURRENT_SOURCE_DIR}/inc
+)
+
+# Links to the all needed internal libraries #
+target_link_libraries(Lexer PRIVATE PashaBibko-UTIL)
+
+# Creates the precompiled header for the binary #
+target_include_directories(Lexer PRIVATE ${CMAKE_SOURCE_DIR}/external/util)
+target_precompile_headers(Lexer PRIVATE ${CMAKE_SOURCE_DIR}/external/util/Util.h)
--- a/lexer/inc/Lexer.h
+++ b/lexer/inc/Lexer.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <Token.h>
+
+namespace PashaBibko::LXC::Lexer
+{
+    struct LexerContext final
+    {
+        // Constructor to set the information of the context //
+        LexerContext(const std::string& _source);
+
+        // Trackers for the Lexer itself //
+        const std::string& source;
+        uint32_t index;
+
+        LexerOutput out;
+        const uint32_t len;
+
+        // Trackers for where the Lexer is within the user version of source //
+        unsigned short column;
+        unsigned short line;
+    };
+
+    struct LexerError final
+    {
+        // Different reasons why the Lexer can fail //
+        enum Reason
+        {
+            InvalidCharacter,
+            UnterminatedStringLiteral,
+            UnknownSymbolOrOperand
+        };
+
+        // Constructor to pass arguments through to the struct //
+        LexerError(Reason _reason, uint32_t errorIndex, std::string _info = "")
+            : reason(_reason), index(errorIndex), info(_info)
+        {}
+
+        // Turns the error into a c-string //
+        inline static const char* const ReasonStr(Reason reason)
+        {
+            static const char* reasons[] =
+            {
+                "Invalid character found in source",
+                "Unterminated string literal in source",
+                "Unknown symbol or operand in source"
+            };
+
+            return reasons[reason];
+        }
+
+        // Error information //
+        const Reason reason;
+        const uint32_t index;
+        const std::string info;
+    };
+
+    // Turns a file into a vector of tokens //
+    Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
+}
--- a/lexer/inc/Token.h
+++ b/lexer/inc/Token.h
@@ -0,0 +1,136 @@
+#pragma once
+
+#include <Util.h>
+
+namespace PashaBibko::LXC::Lexer
+{
+    namespace TokenClass
+    {
+        // Bitmask for different token classes //
+        enum ClassMask : unsigned short
+        {
+            // Mathematical and logic operators //
+            Operator		= 1 << (1 + 8),
+
+            // Special words defined by the compiler //
+            Keyword			= 1 << (2 + 8),
+
+            // Words such as literals and identifiers //
+            UserDefined		= 1 << (3 + 8),
+
+            // Symbols in the source like (? , . ! <) //
+            Symbols			= 1 << (4 + 8),
+
+            // Tokens not defined by previous classes //
+            Misc			= 1 << (5 + 8)
+        };
+    };
+
+    struct LexerContext;
+    
+    // Data type for storing the output of the lexer //
+    class Token final
+    {
+        public:
+            // Enum of token type organised by their token class //
+            enum TokenType : unsigned short
+            {
+                // === Operators === //
+
+                Add				= TokenClass::Operator,
+                Sub,
+                Mul,
+                Div,
+                Mod,
+
+                Eql,
+
+                // === Keywords === //
+
+                For				= TokenClass::Keyword,
+                While,
+                If,
+                ElseIf,
+                Else,
+                Return,
+
+                FunctionDef,
+
+                // === User defined === //
+
+                StringLiteral	= TokenClass::UserDefined,
+                NumLiteral,
+                Identifier,
+
+                // === Symbols === //
+
+                Assign			= TokenClass::Symbols,
+                Colon,
+
+                CloseBracket,
+                OpenBracket,
+
+                CloseBrace,
+                OpenBrace,
+
+                CloseParen,
+                OpenParen,
+
+                CloseCrocodile,
+                OpenCrocodile,
+
+                Comma,
+
+                // === Misc === //
+
+                End_of_file		= TokenClass::Misc,
+
+                UNDEFINED = 65535 // Invalid token type (max number)
+            };
+
+            // Util function calculating wether a token is of a given class //
+            template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) 
+	        {
+		        using T = std::underlying_type_t<TokenType>;
+	            return static_cast<T>(type) & static_cast<T>(mask);
+	        }
+
+            // Constructor to set the data of the token for more complex token types //
+            Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);
+
+            // Copy constructor //
+            Token(const Token& other);
+
+            // Move constructor (transfers memory allocated) //
+            Token(Token&& other) noexcept;
+
+            // Cannot use these as members are const //
+            Token& operator=(const Token&) = delete;
+            Token& operator=(Token&&) = delete;
+
+            // Deconstructor to clean up the allocated memory //
+            ~Token();
+
+            // Getters for the c-string to stop it being reassigned (or deleted) //
+            inline const char* const Str() const { return contents; }
+
+            // Outputs all the relevant infomration in a string for logging purposes //
+            std::string LogStr() const;
+
+            // The type of the token //
+            const TokenType type;
+
+            // The length of the token //
+            const unsigned short length;
+
+            // Start index of the token //
+            const uint32_t index;
+
+        private:
+            // The data of the token //
+            char* contents;
+    };
+
+    // Typedef for the output type of how the Lexer outputs //
+    typedef std::vector<Token> LexerOutput;
+}
--- a/lexer/src/Lexer.cpp
+++ b/lexer/src/Lexer.cpp
@@ -0,0 +1,215 @@
+#include <Util.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+namespace PashaBibko::LXC::Internal
+{
+    static constexpr bool IsNumeric(const char c)
+    {
+        return c >= '0' && c <= '9';
+    }
+
+    static constexpr bool IsAlpha(const char c)
+    {
+        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+    }
+
+    static constexpr bool IsWhitespace(const char c)
+    {
+        return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+    }
+
+    static constexpr bool IsOperator(const char c)
+    {
+        return
+            c == '+' || c == '-' ||
+            c == '*' || c == '/' ||
+            c == '%' || c == '=';
+    }
+
+    static constexpr bool IsSymbol(const char c)
+    {
+        return
+            c == ',' || c == '[' ||
+            c == ']' || c == '{' ||
+            c == '}' || c == '(' ||
+            c == ')' || c == '<' ||
+            c == '>' || c == ':';
+    }
+
+    static const std::unordered_map<std::string_view, Lexer::Token::TokenType> operatorMap =
+    {
+        { "+",	Lexer::Token::Add				},
+        { "-",	Lexer::Token::Sub				},	
+        { "*",	Lexer::Token::Mul				},
+        { "/",	Lexer::Token::Div				},
+        { "%",	Lexer::Token::Mod				},
+
+        { "==", Lexer::Token::Eql               },
+
+        { "=",	Lexer::Token::Assign			}
+    };
+
+    static const std::unordered_map<char, Lexer::Token::TokenType> symbolMap =
+    {
+        { ',',	Lexer::Token::Comma				},
+        { ':',  Lexer::Token::Colon             },
+
+        { '[',	Lexer::Token::CloseBracket		},
+        { ']',	Lexer::Token::OpenBracket		},
+
+        { '}',	Lexer::Token::CloseBrace		},
+        { '{',	Lexer::Token::OpenBrace			},
+
+        { ')',	Lexer::Token::CloseParen		},
+        { '(',	Lexer::Token::OpenParen			},
+
+        { '>', Lexer::Token::CloseCrocodile     },
+        { '<', Lexer::Token::OpenCrocodile      }
+    };
+
+    static const std::unordered_map<std::string_view, Lexer::Token::TokenType> keywords =
+    {
+        { "for",        Lexer::Token::For           },
+        { "while",      Lexer::Token::While         },
+        { "if",         Lexer::Token::If            },
+        { "elif",       Lexer::Token::ElseIf        },
+        { "else",       Lexer::Token::Else          },
+        { "return",     Lexer::Token::Return        },
+        { "func",       Lexer::Token::FunctionDef   },
+    };
+}
+
+namespace PashaBibko::LXC::Lexer
+{
+    LexerContext::LexerContext(const std::string& _source) :
+        source(_source), index(0), out{}, len((uint32_t)_source.length()), column(0), line(0)
+    {}
+
+    Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
+    {
+        // Creates the context for the lexer //
+        LexerContext ctx(fileContents);
+
+        struct
+        {
+            bool inStrLiteral = false;
+            bool inIdentifier = false;
+            bool inNumLiteral = false;
+            bool inOperator = false;
+
+            bool inComment = false;
+
+            uint32_t sectionStart = 0;
+
+        } trackers;
+
+        while (ctx.index < ctx.len)
+        {
+            // The current char within the source that is being lexed //
+            const char current = ctx.source[ctx.index];
+            const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
+
+            // === Comments === //
+            if (current == '#')
+                trackers.inComment = !trackers.inComment;
+
+            else if (trackers.inComment) {} // Contents of comments are skipped over
+
+            // === String literals === //
+            else if (current == '"')
+            {
+                // Updates trackers //
+                trackers.inStrLiteral = !trackers.inStrLiteral;
+                trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
+
+                // Creates the token (if at the end of the string literal) //
+                if (!trackers.inStrLiteral)
+                    ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (unsigned short)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
+            
+            } else if (trackers.inStrLiteral) {}
+
+            // === Numbers === //
+            else if (Internal::IsNumeric(current))
+            {
+                // Updates trackers //
+                trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
+                trackers.inNumLiteral = true;
+
+                // Checks for the end of the number literal to create the token //
+                if (!Internal::IsNumeric(next)) _UNLIKELY
+                {
+                    ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
+                    trackers.inNumLiteral = false;
+                }
+            }
+
+            // === Words === //
+            else if (Internal::IsAlpha(current))
+            {
+                // Updates trackers //
+                trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
+                trackers.inIdentifier = true;
+
+                // Checks for the end of the word to create the token //
+                if (!Internal::IsAlpha(next)) _UNLIKELY
+                {
+                    // Finds out if the word is a keyword or not //
+                    std::string_view fullWord(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
+                    auto it = Internal::keywords.find(fullWord);
+                    Token::TokenType tType = (it != Internal::keywords.end()) ? it->second : Token::Identifier;
+
+                    ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), tType);
+                    trackers.inIdentifier = false;
+                }
+            }
+
+            // === Operators === //
+            else if (Internal::IsOperator(current))
+            {
+                // Updates trackers //
+                trackers.sectionStart = trackers.inOperator ? trackers.sectionStart : ctx.index;
+                trackers.inOperator = true;
+
+                // Checks for the end of the symbol or operator //
+                if (!Internal::IsOperator(next)) _LIKELY
+                {
+                    trackers.inOperator = false;
+
+                    // Finds the operator/symbol if it can //
+                    std::string_view fullSymbol(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
+                    auto it = Internal::operatorMap.find(fullSymbol);
+                    if (it != Internal::operatorMap.end())
+                       ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), it->second);
+
+                    else
+                        return Util::FunctionFail<LexerError>(LexerError::UnknownSymbolOrOperand, trackers.sectionStart, std::string(fullSymbol));
+                }
+            }
+
+            // === Symbols === //
+            else if (Internal::IsSymbol(current))
+            {
+                ctx.out.emplace_back(ctx, ctx.index, 1, Internal::symbolMap.at(current));
+            }
+
+            // === Whitespace === //
+            else if (Internal::IsWhitespace(current)) _LIKELY {}
+
+            // If an if-statement has not been triggered the character must be invalid //
+            else
+                return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
+
+            // Iterates to the next index //
+            ctx.column++;
+            ctx.index++;
+        }
+
+        // Checks for an unterminated string literal //
+        if (trackers.inStrLiteral)
+            return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
+
+        return std::move(ctx.out);
+    }
+}
--- a/lexer/src/Token.cpp
+++ b/lexer/src/Token.cpp
@@ -0,0 +1,119 @@
+#include <Util.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+#include <iomanip>
+
+namespace PashaBibko::LXC::Lexer
+{
+    // Constructor to assign the members of the token class //
+    Token::Token(const LexerContext& ctx, const uint32_t start, unsigned short len, TokenType _type) :
+        type(_type), length(len), index(start), contents(nullptr)
+    {
+        // Only user defined class tokens need to store c-string //
+        if (Token::IsTypeClass<TokenClass::UserDefined>(type))
+        {
+            // Copies the memory to a c-string //
+            contents = new char[len + 1]; // +1 for null terminator
+            std::memcpy(contents, ctx.source.data() + start, len);
+            contents[len] = '\0';
+        }
+    }
+
+    // Copy constructor //
+    Token::Token(const Token& other) :
+        type(other.type), length(other.length), index(other.index), contents(nullptr)
+    {
+        if (other.contents != nullptr)
+        {
+            size_t len = std::strlen(other.contents) + 1; // Adds one for null-terminator
+            contents = new char[len];
+            std::memcpy(contents, other.contents, len);
+        }
+    }
+
+    // Move constructor (transfers memory allocated) //
+    Token::Token(Token&& other) noexcept :
+        type(other.type), length(other.length), index(other.index), contents(other.contents)
+    {
+        // Stops the other from thinking it owns the memory //
+        other.contents = nullptr;
+    }
+
+    // Destructor to clean up the memory of the token that can be allocated //
+    Token::~Token()
+    {
+        // Frees any allocated memory //
+        if (contents != nullptr) _UNLIKELY
+        {
+            delete[] contents;
+            contents = nullptr;
+        }
+    }
+
+    // Helper macro for converting type to string //
+    #define TOKEN_TYPE_CASE(type) case type: return #type;
+
+    static constexpr const char* TokenTypeToCStr(Token::TokenType type)
+    {
+        switch (type)
+        {
+            // All the different types of tokens //
+            TOKEN_TYPE_CASE(Token::Add);
+            TOKEN_TYPE_CASE(Token::Sub);
+            TOKEN_TYPE_CASE(Token::Mul);
+            TOKEN_TYPE_CASE(Token::Div);
+            TOKEN_TYPE_CASE(Token::Mod);
+
+            TOKEN_TYPE_CASE(Token::Eql);
+
+            TOKEN_TYPE_CASE(Token::For);
+            TOKEN_TYPE_CASE(Token::While);
+            TOKEN_TYPE_CASE(Token::If);
+            TOKEN_TYPE_CASE(Token::ElseIf);
+            TOKEN_TYPE_CASE(Token::Else);
+            TOKEN_TYPE_CASE(Token::Return);
+
+            TOKEN_TYPE_CASE(Token::FunctionDef);
+
+            TOKEN_TYPE_CASE(Token::StringLiteral);
+            TOKEN_TYPE_CASE(Token::NumLiteral);
+            TOKEN_TYPE_CASE(Token::Identifier);
+
+            TOKEN_TYPE_CASE(Token::Assign);
+            TOKEN_TYPE_CASE(Token::Colon);
+            TOKEN_TYPE_CASE(Token::CloseBracket);
+            TOKEN_TYPE_CASE(Token::OpenBracket);
+            TOKEN_TYPE_CASE(Token::CloseBrace);
+            TOKEN_TYPE_CASE(Token::OpenBrace);
+            TOKEN_TYPE_CASE(Token::CloseParen);
+            TOKEN_TYPE_CASE(Token::OpenParen);
+            TOKEN_TYPE_CASE(Token::CloseCrocodile);
+            TOKEN_TYPE_CASE(Token::OpenCrocodile);
+            TOKEN_TYPE_CASE(Token::Comma);
+
+            TOKEN_TYPE_CASE(Token::End_of_file);
+            TOKEN_TYPE_CASE(Token::UNDEFINED);
+
+            // When the case has not been defined yet //
+            default:
+                return "UNKNOWN";
+        }
+    }
+
+    std::string LXC::Lexer::Token::LogStr() const
+    {
+        // Output stream to log to //
+        std::ostringstream os;
+        os << std::setw(25) << std::left << TokenTypeToCStr(type) << " | ";
+
+        // Prints the contents if they are not null //
+        if (contents != nullptr)
+            os << std::setw(25) << std::left << std::string('"' + std::string(contents) + '"');
+        else
+            os << std::setw(25) << std::left << "EMPTY";
+
+        return os.str();
+    }
+}