Fixing folder capitlisation [2/2]
This commit is contained in:
60
lexer/inc/Lexer.h
Normal file
60
lexer/inc/Lexer.h
Normal file
@@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#include <Token.h>
|
||||
|
||||
namespace PashaBibko::LXC::Lexer
|
||||
{
|
||||
struct LexerContext final
|
||||
{
|
||||
// Constructor to set the information of the context //
|
||||
LexerContext(const std::string& _source);
|
||||
|
||||
// Trackers for the Lexer itself //
|
||||
const std::string& source;
|
||||
uint32_t index;
|
||||
|
||||
LexerOutput out;
|
||||
const uint32_t len;
|
||||
|
||||
// Trackers for where the Lexer is within the user version of source //
|
||||
unsigned short column;
|
||||
unsigned short line;
|
||||
};
|
||||
|
||||
struct LexerError final
|
||||
{
|
||||
// Different reasons why the Lexer can fail //
|
||||
enum Reason
|
||||
{
|
||||
InvalidCharacter,
|
||||
UnterminatedStringLiteral,
|
||||
UnknownSymbolOrOperand
|
||||
};
|
||||
|
||||
// Constructor to pass arguments through to the struct //
|
||||
LexerError(Reason _reason, uint32_t errorIndex, std::string _info = "")
|
||||
: reason(_reason), index(errorIndex), info(_info)
|
||||
{}
|
||||
|
||||
// Turns the error into a c-string //
|
||||
inline static const char* const ReasonStr(Reason reason)
|
||||
{
|
||||
static const char* reasons[] =
|
||||
{
|
||||
"Invalid character found in source",
|
||||
"Unterminated string literal in source",
|
||||
"Unknown symbol or operand in source"
|
||||
};
|
||||
|
||||
return reasons[reason];
|
||||
}
|
||||
|
||||
// Error information //
|
||||
const Reason reason;
|
||||
const uint32_t index;
|
||||
const std::string info;
|
||||
};
|
||||
|
||||
// Turns a file into a vector of tokens //
|
||||
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
|
||||
}
|
||||
136
lexer/inc/Token.h
Normal file
136
lexer/inc/Token.h
Normal file
@@ -0,0 +1,136 @@
|
||||
#pragma once
|
||||
|
||||
#include <Util.h>
|
||||
|
||||
namespace PashaBibko::LXC::Lexer
|
||||
{
|
||||
namespace TokenClass
|
||||
{
|
||||
// Bitmask for different token classes //
|
||||
enum ClassMask : unsigned short
|
||||
{
|
||||
// Mathematical and logic operators //
|
||||
Operator = 1 << (1 + 8),
|
||||
|
||||
// Special words defined by the compiler //
|
||||
Keyword = 1 << (2 + 8),
|
||||
|
||||
// Words such as literals and identifiers //
|
||||
UserDefined = 1 << (3 + 8),
|
||||
|
||||
// Symbols in the source like (? , . ! <) //
|
||||
Symbols = 1 << (4 + 8),
|
||||
|
||||
// Tokens not defined by previous classes //
|
||||
Misc = 1 << (5 + 8)
|
||||
};
|
||||
};
|
||||
|
||||
struct LexerContext;
|
||||
|
||||
// Data type for storing the output of the lexer //
|
||||
class Token final
|
||||
{
|
||||
public:
|
||||
// Enum of token type organised by their token class //
|
||||
enum TokenType : unsigned short
|
||||
{
|
||||
// === Operators === //
|
||||
|
||||
Add = TokenClass::Operator,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Mod,
|
||||
|
||||
Eql,
|
||||
|
||||
// === Keywords === //
|
||||
|
||||
For = TokenClass::Keyword,
|
||||
While,
|
||||
If,
|
||||
ElseIf,
|
||||
Else,
|
||||
Return,
|
||||
|
||||
FunctionDef,
|
||||
|
||||
// === User defined === //
|
||||
|
||||
StringLiteral = TokenClass::UserDefined,
|
||||
NumLiteral,
|
||||
Identifier,
|
||||
|
||||
// === Symbols === //
|
||||
|
||||
Assign = TokenClass::Symbols,
|
||||
Colon,
|
||||
|
||||
CloseBracket,
|
||||
OpenBracket,
|
||||
|
||||
CloseBrace,
|
||||
OpenBrace,
|
||||
|
||||
CloseParen,
|
||||
OpenParen,
|
||||
|
||||
CloseCrocodile,
|
||||
OpenCrocodile,
|
||||
|
||||
Comma,
|
||||
|
||||
// === Misc === //
|
||||
|
||||
End_of_file = TokenClass::Misc,
|
||||
|
||||
UNDEFINED = 65535 // Invalid token type (max number)
|
||||
};
|
||||
|
||||
// Util function calculating wether a token is of a given class //
|
||||
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type)
|
||||
{
|
||||
using T = std::underlying_type_t<TokenType>;
|
||||
return static_cast<T>(type) & static_cast<T>(mask);
|
||||
}
|
||||
|
||||
// Constructor to set the data of the token for more complex token types //
|
||||
Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);
|
||||
|
||||
// Copy constructor //
|
||||
Token(const Token& other);
|
||||
|
||||
// Move constructor (transfers memory allocated) //
|
||||
Token(Token&& other) noexcept;
|
||||
|
||||
// Cannot use these as members are const //
|
||||
Token& operator=(const Token&) = delete;
|
||||
Token& operator=(Token&&) = delete;
|
||||
|
||||
// Deconstructor to clean up the allocated memory //
|
||||
~Token();
|
||||
|
||||
// Getters for the c-string to stop it being reassigned (or deleted) //
|
||||
inline const char* const Str() const { return contents; }
|
||||
|
||||
// Outputs all the relevant infomration in a string for logging purposes //
|
||||
std::string LogStr() const;
|
||||
|
||||
// The type of the token //
|
||||
const TokenType type;
|
||||
|
||||
// The length of the token //
|
||||
const unsigned short length;
|
||||
|
||||
// Start index of the token //
|
||||
const uint32_t index;
|
||||
|
||||
private:
|
||||
// The data of the token //
|
||||
char* contents;
|
||||
};
|
||||
|
||||
// Typedef for the output type of how the Lexer outputs //
|
||||
typedef std::vector<Token> LexerOutput;
|
||||
}
|
||||
Reference in New Issue
Block a user