Files
LXC/Lexer/inc/Token.h
2025-07-23 18:22:24 +01:00

131 lines
3.5 KiB
C++

#pragma once
#include <LXC.h>
namespace LXC::Lexer
{
namespace TokenClass
{
// Bitmask for different token classes //
enum ClassMask : unsigned short
{
// Mathematical and logic operators //
Operator = 1 << (1 + 8),
// Special words defined by the compiler //
Keyword = 1 << (2 + 8),
// Words such as literals and identifiers //
UserDefined = 1 << (3 + 8),
// Symbols in the source like (? , . ! <) //
Symbols = 1 << (4 + 8),
// Tokens not defined by previous classes //
Misc = 1 << (5 + 8)
};
};
struct LexerContext;
// Data type for storing the output of the lexer //
class Token final
{
public:
// Enum of token type organised by their token class //
enum TokenType : unsigned short
{
// === Operators === //
Add = TokenClass::Operator,
Sub,
Mul,
Div,
Mod,
Eql,
// === Keywords === //
For = TokenClass::Keyword,
While,
If,
ElseIf,
Else,
Return,
// === User defined === //
StringLiteral = TokenClass::UserDefined,
NumLiteral,
Identifier,
// === Symbols === //
Assign = TokenClass::Symbols,
CloseBracket,
OpenBracket,
CloseBrace,
OpenBrace,
CloseParen,
OpenParen,
Comma,
// === Misc === //
End_of_file = TokenClass::Misc,
UNDEFINED = 65535 // Invalid token type (max number)
};
// Util function calculating wether a token is of a given class //
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type)
{
using T = std::underlying_type_t<TokenType>;
return static_cast<T>(type) & static_cast<T>(mask);
}
// Constructor to set the data of the token for more complex token types //
Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);
// Copy constructor //
Token(const Token& other);
// Move constructor (transfers memory allocated) //
Token(Token&& other) noexcept;
// Cannot use these as members are const //
Token& operator=(const Token&) = delete;
Token& operator=(Token&&) = delete;
// Deconstructor to clean up the allocated memory //
~Token();
// Getters for the c-string to stop it being reassigned (or deleted) //
inline const char* const Str() const { return contents; }
// Outputs all the relevant infomration in a string for logging purposes //
std::string LogStr() const;
// The type of the token //
const TokenType type;
// The length of the token //
const unsigned short length;
// Start index of the token //
const uint32_t index;
private:
// The data of the token //
char* contents;
};
// Typedef for the output type of how the Lexer outputs //
typedef std::vector<Token> LexerOutput;
}