116 lines
2.5 KiB
C++
116 lines
2.5 KiB
C++
#pragma once
|
|
|
|
#include <LXC.h>
|
|
|
|
namespace LXC::Lexer
|
|
{
|
|
namespace TokenClass
|
|
{
|
|
// Bitmask for different token classes //
|
|
enum ClassMask : unsigned short
|
|
{
|
|
// Mathematical and logic operators //
|
|
Operator = 1 << (1 + 8),
|
|
|
|
// Special words defined by the compiler //
|
|
Keyword = 1 << (2 + 8),
|
|
|
|
// Words such as literals and identifiers //
|
|
UserDefined = 1 << (3 + 8),
|
|
|
|
// Symbols in the source like (? , . ! <) //
|
|
Symbols = 1 << (4 + 8),
|
|
|
|
// Tokens not defined by previous classes //
|
|
Misc = 1 << (5 + 8)
|
|
};
|
|
};
|
|
|
|
struct LexerContext;
|
|
|
|
// Data type for storing the output of the lexer //
|
|
class Token final
|
|
{
|
|
public:
|
|
// Enum of token type organised by their token class //
|
|
enum TokenType : unsigned short
|
|
{
|
|
// === Operators === //
|
|
|
|
Add = TokenClass::Operator,
|
|
Sub,
|
|
Mul,
|
|
Div,
|
|
Mod,
|
|
|
|
// === Keywords === //
|
|
|
|
For = TokenClass::Keyword,
|
|
While,
|
|
If,
|
|
Else_If,
|
|
Else,
|
|
Return,
|
|
|
|
// === User defined === //
|
|
|
|
String_Literal = TokenClass::UserDefined,
|
|
Num_Literal,
|
|
Identifier,
|
|
|
|
// === Symbols === //
|
|
|
|
Assign = TokenClass::Symbols,
|
|
|
|
Close_bracket,
|
|
Open_bracket,
|
|
|
|
Close_brace,
|
|
Open_brace,
|
|
|
|
Close_paren,
|
|
Open_paren,
|
|
|
|
Comma,
|
|
|
|
// === Misc === //
|
|
|
|
End_of_file = TokenClass::Misc,
|
|
|
|
UNDEFINED = 65535 // Invalid token type (max number)
|
|
};
|
|
|
|
// Util function calculating wether a token is of a given class //
|
|
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
|
|
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
|
|
|
|
// Constructor to set the data of the token for more complex token types //
|
|
Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type);
|
|
|
|
// Deconstructor to clean up the allocated memory //
|
|
~Token();
|
|
|
|
// Getters for the c-string to stop it being reassigned (or deleted) //
|
|
inline const char* const Str() const { return contents; }
|
|
|
|
// Outputs all the relevant infomration in a string for logging purposes //
|
|
std::string LogStr() const;
|
|
|
|
// The type of the token //
|
|
const TokenType type;
|
|
|
|
// The length of the token //
|
|
const unsigned short length;
|
|
|
|
// Start index of the token //
|
|
const unsigned __int32 index;
|
|
|
|
private:
|
|
// The data of the token //
|
|
char* contents;
|
|
};
|
|
|
|
// Typedef for the output type of how the Lexer outputs //
|
|
typedef std::vector<Token> LexerOutput;
|
|
}
|