137 lines
3.6 KiB
C++
137 lines
3.6 KiB
C++
#pragma once
|
|
|
|
#include <Util.h>
|
|
|
|
namespace PashaBibko::LXC::Lexer
|
|
{
|
|
namespace TokenClass
|
|
{
|
|
// Bitmask for different token classes //
|
|
enum ClassMask : unsigned short
|
|
{
|
|
// Mathematical and logic operators //
|
|
Operator = 1 << (1 + 8),
|
|
|
|
// Special words defined by the compiler //
|
|
Keyword = 1 << (2 + 8),
|
|
|
|
// Words such as literals and identifiers //
|
|
UserDefined = 1 << (3 + 8),
|
|
|
|
// Symbols in the source like (? , . ! <) //
|
|
Symbols = 1 << (4 + 8),
|
|
|
|
// Tokens not defined by previous classes //
|
|
Misc = 1 << (5 + 8)
|
|
};
|
|
};
|
|
|
|
struct LexerContext;
|
|
|
|
// Data type for storing the output of the lexer //
|
|
class Token final
|
|
{
|
|
public:
|
|
// Enum of token type organised by their token class //
|
|
enum TokenType : unsigned short
|
|
{
|
|
// === Operators === //
|
|
|
|
Add = TokenClass::Operator,
|
|
Sub,
|
|
Mul,
|
|
Div,
|
|
Mod,
|
|
|
|
Eql,
|
|
|
|
// === Keywords === //
|
|
|
|
For = TokenClass::Keyword,
|
|
While,
|
|
If,
|
|
ElseIf,
|
|
Else,
|
|
Return,
|
|
|
|
FunctionDef,
|
|
|
|
// === User defined === //
|
|
|
|
StringLiteral = TokenClass::UserDefined,
|
|
NumLiteral,
|
|
Identifier,
|
|
|
|
// === Symbols === //
|
|
|
|
Assign = TokenClass::Symbols,
|
|
Colon,
|
|
|
|
CloseBracket,
|
|
OpenBracket,
|
|
|
|
CloseBrace,
|
|
OpenBrace,
|
|
|
|
CloseParen,
|
|
OpenParen,
|
|
|
|
CloseCrocodile,
|
|
OpenCrocodile,
|
|
|
|
Comma,
|
|
|
|
// === Misc === //
|
|
|
|
End_of_file = TokenClass::Misc,
|
|
|
|
UNDEFINED = 65535 // Invalid token type (max number)
|
|
};
|
|
|
|
// Util function calculating wether a token is of a given class //
|
|
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type)
|
|
{
|
|
using T = std::underlying_type_t<TokenType>;
|
|
return static_cast<T>(type) & static_cast<T>(mask);
|
|
}
|
|
|
|
// Constructor to set the data of the token for more complex token types //
|
|
Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);
|
|
|
|
// Copy constructor //
|
|
Token(const Token& other);
|
|
|
|
// Move constructor (transfers memory allocated) //
|
|
Token(Token&& other) noexcept;
|
|
|
|
// Cannot use these as members are const //
|
|
Token& operator=(const Token&) = delete;
|
|
Token& operator=(Token&&) = delete;
|
|
|
|
// Deconstructor to clean up the allocated memory //
|
|
~Token();
|
|
|
|
// Getters for the c-string to stop it being reassigned (or deleted) //
|
|
inline const char* const Str() const { return contents; }
|
|
|
|
// Outputs all the relevant infomration in a string for logging purposes //
|
|
std::string LogStr() const;
|
|
|
|
// The type of the token //
|
|
const TokenType type;
|
|
|
|
// The length of the token //
|
|
const unsigned short length;
|
|
|
|
// Start index of the token //
|
|
const uint32_t index;
|
|
|
|
private:
|
|
// The data of the token //
|
|
char* contents;
|
|
};
|
|
|
|
// Typedef for the output type of how the Lexer outputs //
|
|
typedef std::vector<Token> LexerOutput;
|
|
}
|