From c8975f0c2095e3d54a52c8b55994d3247eaab3b3 Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Sat, 19 Jul 2025 23:02:16 +0100 Subject: [PATCH] Added the token class --- LXC/LXC.cpp | 9 ++++ Lexer/inc/Lexer.h | 17 +++++++ Lexer/inc/Token.h | 115 ++++++++++++++++++++++++++++++++++++++++++++ Lexer/src/Lexer.cpp | 7 +++ Lexer/src/Token.cpp | 37 ++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 Lexer/inc/Lexer.h create mode 100644 Lexer/inc/Token.h create mode 100644 Lexer/src/Token.cpp diff --git a/LXC/LXC.cpp b/LXC/LXC.cpp index ee9e5fb..713a8b9 100644 --- a/LXC/LXC.cpp +++ b/LXC/LXC.cpp @@ -1,9 +1,18 @@ #include +#include + int main(int argc, char** argv) { using namespace LXC; + // + + Lexer::LexerContext context; + Lexer::Token exampleToken(context, 2); + + // + Util::ReturnVal fileContents = Util::ReadFile("example/example.lx"); if (fileContents.Suceeded()) diff --git a/Lexer/inc/Lexer.h b/Lexer/inc/Lexer.h new file mode 100644 index 0000000..a9e844f --- /dev/null +++ b/Lexer/inc/Lexer.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace LXC::Lexer +{ + struct LexerContext + { + // Trackers for the Lexer itself // + std::string source; + size_t index; + + // Trackers for where the Lexer is within the user version of source // + unsigned short column; + unsigned short line; + }; +} diff --git a/Lexer/inc/Token.h b/Lexer/inc/Token.h new file mode 100644 index 0000000..8c89f81 --- /dev/null +++ b/Lexer/inc/Token.h @@ -0,0 +1,115 @@ +#pragma once + +#include + +namespace LXC::Lexer +{ + // Foward declaration to allow it passing to the Token class // + struct LexerContext; + + namespace TokenClass + { + // Bitmask for different token classes // + enum ClassMask : unsigned short + { + // Mathematical and logic operators // + Operator = 1 << (1 + 8), + + // Special words defined by the compiler // + Keyword = 1 << (2 + 8), + + // Words such as literals and identifiers // + UserDefined = 1 << (3 + 8), + + // Symbols in the source like (? , . ! <) // + Symbols = 1 << (4 + 8), + + // Tokens not defined by previous classes // + Misc = 1 << (5 + 8) + }; + }; + + // Data type for storing the output of the lexer // + class Token final + { + public: + // Enum of token type organised by their token class // + enum TokenType : unsigned short + { + // === Operators === // + + Add = TokenClass::Operator, + Sub, + Mul, + Div, + Mod, + + // === Keywords === // + + For = TokenClass::Keyword, + While, + If, + Else_If, + Else, + Return, + + // === User defined === // + + String_Literal = TokenClass::UserDefined, + Int_Literal, + Float_Literal, + Identifier, + + // === Symbols === // + + Assign = TokenClass::Symbols, + + Close_bracket, + Open_bracket, + + Close_brace, + Open_brace, + + Close_paren, + Open_paren, + + Comma, + + // === Misc === // + + End_of_file = TokenClass::Misc, + + UNDEFINED = 65535 // Invalid token type (max number) + }; + + // Util function calculating wether a token is of a given class // + template static constexpr bool IsTypeClass(TokenType type) { return type & mask; } + template static constexpr bool IsTypeClass(Token token) { return token.type & mask; } + + // Constructor to set the data of the token // + Token(const LexerContext& context, const unsigned short _length, TokenType _type); + + // Deconstructor to clean up the allocated memory // + ~Token(); + + // Getters for the c-string to stop it being reassigned (or deleted) // + inline const char* const Str() const { return contents; } + operator const char* const() { return contents; } + + // The type of the token // + const TokenType type; + + // The length of the token // + const unsigned short length; + + // The line the token is on (starts on 1) // + const unsigned short line; + + // The index on the line (starts on 1) // + const unsigned short column; + + private: + // The data of the token // + const char* contents; + }; +} diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index c5cd877..21a5932 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -1 +1,8 @@ #include + +#include +#include + +namespace LXC::Lexer +{ +} diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp new file mode 100644 index 0000000..3c2138e --- /dev/null +++ b/Lexer/src/Token.cpp @@ -0,0 +1,37 @@ +#include + +#include +#include + +namespace LXC::Lexer +{ + static const char* const CopySubstrToMem(const LexerContext& context, const size_t length, Token::TokenType type) + { + // Only user defined class tokens need to store their type // + if (!Token::IsTypeClass(type)) + return nullptr; + + // Copies the memory to a c-string // + char* cStr = new char[length + 1]; + std::memcpy(cStr, context.source.data() + context.index, length); + cStr[length] = '\0'; + + return cStr; + } + + // Constructor to assign the members of the token class // + Token::Token(const LexerContext& context, const unsigned short _length, TokenType _type) : + type(_type), length(_length), line(context.line), column(context.column), + contents(CopySubstrToMem(context, _length, _type)) + {} + + // Destructor to clean up the memory of the token that can be allocated // + Token::~Token() + { + // Frees any allocated memory // + if (contents != nullptr) + delete[] contents; + + contents = nullptr; + } +}