Added the token class

This commit is contained in:
Pasha Bibko
2025-07-19 23:02:16 +01:00
parent ab564e9649
commit c8975f0c20
5 changed files with 185 additions and 0 deletions

17
Lexer/inc/Lexer.h Normal file
View File

@@ -0,0 +1,17 @@
#pragma once
#include <Token.h>
namespace LXC::Lexer
{
struct LexerContext
{
// Trackers for the Lexer itself //
std::string source;
size_t index;
// Trackers for where the Lexer is within the user version of source //
unsigned short column;
unsigned short line;
};
}

115
Lexer/inc/Token.h Normal file
View File

@@ -0,0 +1,115 @@
#pragma once
#include <LXC.h>
namespace LXC::Lexer
{
// Foward declaration to allow it passing to the Token class //
struct LexerContext;
namespace TokenClass
{
// Bitmask for different token classes //
enum ClassMask : unsigned short
{
// Mathematical and logic operators //
Operator = 1 << (1 + 8),
// Special words defined by the compiler //
Keyword = 1 << (2 + 8),
// Words such as literals and identifiers //
UserDefined = 1 << (3 + 8),
// Symbols in the source like (? , . ! <) //
Symbols = 1 << (4 + 8),
// Tokens not defined by previous classes //
Misc = 1 << (5 + 8)
};
};
// Data type for storing the output of the lexer //
class Token final
{
public:
// Enum of token type organised by their token class //
enum TokenType : unsigned short
{
// === Operators === //
Add = TokenClass::Operator,
Sub,
Mul,
Div,
Mod,
// === Keywords === //
For = TokenClass::Keyword,
While,
If,
Else_If,
Else,
Return,
// === User defined === //
String_Literal = TokenClass::UserDefined,
Int_Literal,
Float_Literal,
Identifier,
// === Symbols === //
Assign = TokenClass::Symbols,
Close_bracket,
Open_bracket,
Close_brace,
Open_brace,
Close_paren,
Open_paren,
Comma,
// === Misc === //
End_of_file = TokenClass::Misc,
UNDEFINED = 65535 // Invalid token type (max number)
};
// Util function calculating wether a token is of a given class //
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
// Constructor to set the data of the token //
Token(const LexerContext& context, const unsigned short _length, TokenType _type);
// Deconstructor to clean up the allocated memory //
~Token();
// Getters for the c-string to stop it being reassigned (or deleted) //
inline const char* const Str() const { return contents; }
operator const char* const() { return contents; }
// The type of the token //
const TokenType type;
// The length of the token //
const unsigned short length;
// The line the token is on (starts on 1) //
const unsigned short line;
// The index on the line (starts on 1) //
const unsigned short column;
private:
// The data of the token //
const char* contents;
};
}

View File

@@ -1 +1,8 @@
#include <LXC.h>
#include <Lexer.h>
#include <Token.h>
namespace LXC::Lexer
{
}

37
Lexer/src/Token.cpp Normal file
View File

@@ -0,0 +1,37 @@
#include <LXC.h>
#include <Lexer.h>
#include <Token.h>
namespace LXC::Lexer
{
static const char* const CopySubstrToMem(const LexerContext& context, const size_t length, Token::TokenType type)
{
// Only user defined class tokens need to store their type //
if (!Token::IsTypeClass<TokenClass::UserDefined>(type))
return nullptr;
// Copies the memory to a c-string //
char* cStr = new char[length + 1];
std::memcpy(cStr, context.source.data() + context.index, length);
cStr[length] = '\0';
return cStr;
}
// Constructor to assign the members of the token class //
Token::Token(const LexerContext& context, const unsigned short _length, TokenType _type) :
type(_type), length(_length), line(context.line), column(context.column),
contents(CopySubstrToMem(context, _length, _type))
{}
// Destructor to clean up the memory of the token that can be allocated //
Token::~Token()
{
// Frees any allocated memory //
if (contents != nullptr)
delete[] contents;
contents = nullptr;
}
}