Tabs -> Spaces

This commit is contained in:
Pasha Bibko
2025-07-21 17:39:43 +01:00
parent 24fde1b770
commit 5bfeb75536
10 changed files with 514 additions and 515 deletions

View File

@@ -4,8 +4,8 @@ file (GLOB LexerSources src/*.cpp inc/*.h)
add_library(Lexer STATIC ${LexerSources})
target_include_directories (
Lexer PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/inc
Lexer PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/inc
)
# Creates the precompiled header for the binary #

View File

@@ -4,57 +4,57 @@
namespace LXC::Lexer
{
struct LexerContext final
{
// Constructor to set the information of the context //
LexerContext(const std::string& _source);
struct LexerContext final
{
// Constructor to set the information of the context //
LexerContext(const std::string& _source);
// Trackers for the Lexer itself //
const std::string& source;
__int32 index;
// Trackers for the Lexer itself //
const std::string& source;
__int32 index;
LexerOutput out;
const __int32 len;
LexerOutput out;
const __int32 len;
// Trackers for where the Lexer is within the user version of source //
unsigned short column;
unsigned short line;
};
// Trackers for where the Lexer is within the user version of source //
unsigned short column;
unsigned short line;
};
struct LexerError final
{
// Different reasons why the Lexer can fail //
enum Reason
{
InvalidCharacter,
UnterminatedStringLiteral,
UnknownSymbolOrOperand
};
struct LexerError final
{
// Different reasons why the Lexer can fail //
enum Reason
{
InvalidCharacter,
UnterminatedStringLiteral,
UnknownSymbolOrOperand
};
// Constructor to pass arguments through to the struct //
LexerError(Reason _reason, __int32 errorIndex, std::string _info = "")
: reason(_reason), index(errorIndex), info(_info)
{}
// Constructor to pass arguments through to the struct //
LexerError(Reason _reason, __int32 errorIndex, std::string _info = "")
: reason(_reason), index(errorIndex), info(_info)
{}
// Turns the error into a c-string //
inline static const char* const ReasonStr(Reason reason)
{
static const char* reasons[] =
{
"Invalid character found in source",
"Unterminated string literal in source",
"Unknown symbol or operand in source"
};
// Turns the error into a c-string //
inline static const char* const ReasonStr(Reason reason)
{
static const char* reasons[] =
{
"Invalid character found in source",
"Unterminated string literal in source",
"Unknown symbol or operand in source"
};
return reasons[reason];
}
return reasons[reason];
}
// Error information //
const Reason reason;
const __int32 index;
const std::string info;
};
// Error information //
const Reason reason;
const __int32 index;
const std::string info;
};
// Turns a file into a vector of tokens //
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
// Turns a file into a vector of tokens //
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
}

View File

@@ -4,112 +4,112 @@
namespace LXC::Lexer
{
namespace TokenClass
{
// Bitmask for different token classes //
enum ClassMask : unsigned short
{
// Mathematical and logic operators //
Operator = 1 << (1 + 8),
namespace TokenClass
{
// Bitmask for different token classes //
enum ClassMask : unsigned short
{
// Mathematical and logic operators //
Operator = 1 << (1 + 8),
// Special words defined by the compiler //
Keyword = 1 << (2 + 8),
// Special words defined by the compiler //
Keyword = 1 << (2 + 8),
// Words such as literals and identifiers //
UserDefined = 1 << (3 + 8),
// Words such as literals and identifiers //
UserDefined = 1 << (3 + 8),
// Symbols in the source like (? , . ! <) //
Symbols = 1 << (4 + 8),
// Symbols in the source like (? , . ! <) //
Symbols = 1 << (4 + 8),
// Tokens not defined by previous classes //
Misc = 1 << (5 + 8)
};
};
// Tokens not defined by previous classes //
Misc = 1 << (5 + 8)
};
};
struct LexerContext;
// Data type for storing the output of the lexer //
class Token final
{
public:
// Enum of token type organised by their token class //
enum TokenType : unsigned short
{
// === Operators === //
struct LexerContext;
// Data type for storing the output of the lexer //
class Token final
{
public:
// Enum of token type organised by their token class //
enum TokenType : unsigned short
{
// === Operators === //
Add = TokenClass::Operator,
Sub,
Mul,
Div,
Mod,
Add = TokenClass::Operator,
Sub,
Mul,
Div,
Mod,
// === Keywords === //
// === Keywords === //
For = TokenClass::Keyword,
While,
If,
ElseIf,
Else,
Return,
For = TokenClass::Keyword,
While,
If,
ElseIf,
Else,
Return,
// === User defined === //
// === User defined === //
StringLiteral = TokenClass::UserDefined,
NumLiteral,
Identifier,
StringLiteral = TokenClass::UserDefined,
NumLiteral,
Identifier,
// === Symbols === //
// === Symbols === //
Assign = TokenClass::Symbols,
Assign = TokenClass::Symbols,
CloseBracket,
OpenBracket,
CloseBracket,
OpenBracket,
CloseBrace,
OpenBrace,
CloseBrace,
OpenBrace,
CloseParen,
OpenParen,
CloseParen,
OpenParen,
Comma,
Comma,
// === Misc === //
// === Misc === //
End_of_file = TokenClass::Misc,
End_of_file = TokenClass::Misc,
UNDEFINED = 65535 // Invalid token type (max number)
};
UNDEFINED = 65535 // Invalid token type (max number)
};
// Util function calculating wether a token is of a given class //
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
// Util function calculating wether a token is of a given class //
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
// Constructor to set the data of the token for more complex token types //
Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type);
// Constructor to set the data of the token for more complex token types //
Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type);
// Deconstructor to clean up the allocated memory //
~Token();
// Deconstructor to clean up the allocated memory //
~Token();
// Getters for the c-string to stop it being reassigned (or deleted) //
inline const char* const Str() const { return contents; }
// Getters for the c-string to stop it being reassigned (or deleted) //
inline const char* const Str() const { return contents; }
// Outputs all the relevant infomration in a string for logging purposes //
std::string LogStr() const;
// Outputs all the relevant infomration in a string for logging purposes //
std::string LogStr() const;
// The type of the token //
const TokenType type;
// The type of the token //
const TokenType type;
// The length of the token //
const unsigned short length;
// The length of the token //
const unsigned short length;
// Start index of the token //
const unsigned __int32 index;
// Start index of the token //
const unsigned __int32 index;
private:
// The data of the token //
char* contents;
};
private:
// The data of the token //
char* contents;
};
// Typedef for the output type of how the Lexer outputs //
typedef std::vector<Token> LexerOutput;
// Typedef for the output type of how the Lexer outputs //
typedef std::vector<Token> LexerOutput;
}

View File

@@ -7,84 +7,84 @@
namespace LXC::Lexer
{
// Constructor to assign the members of the token class //
Token::Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type) :
type(_type), length(len), index(start), contents(nullptr)
{
// Only user defined class tokens need to store c-string //
if (Token::IsTypeClass<TokenClass::UserDefined>(type))
{
// Copies the memory to a c-string //
contents = new char[len + 1]; // +1 for null terminator
std::memcpy(contents, ctx.source.data() + start, len);
contents[len] = '\0';
}
}
// Constructor to assign the members of the token class //
Token::Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type) :
type(_type), length(len), index(start), contents(nullptr)
{
// Only user defined class tokens need to store c-string //
if (Token::IsTypeClass<TokenClass::UserDefined>(type))
{
// Copies the memory to a c-string //
contents = new char[len + 1]; // +1 for null terminator
std::memcpy(contents, ctx.source.data() + start, len);
contents[len] = '\0';
}
}
// Destructor to clean up the memory of the token that can be allocated //
Token::~Token()
{
// Frees any allocated memory //
//if (contents != nullptr)
// delete[] contents;
// Destructor to clean up the memory of the token that can be allocated //
Token::~Token()
{
// Frees any allocated memory //
//if (contents != nullptr)
// delete[] contents;
contents = nullptr;
}
contents = nullptr;
}
// Helper macro for converting type to string //
#define TOKEN_TYPE_CASE(type) case type: return #type;
// Helper macro for converting type to string //
#define TOKEN_TYPE_CASE(type) case type: return #type;
static constexpr const char* TokenTypeToCStr(Token::TokenType type)
{
switch (type)
{
// All the different types of tokens //
TOKEN_TYPE_CASE(Token::Add);
TOKEN_TYPE_CASE(Token::Sub);
TOKEN_TYPE_CASE(Token::Mul);
TOKEN_TYPE_CASE(Token::Div);
TOKEN_TYPE_CASE(Token::Mod);
static constexpr const char* TokenTypeToCStr(Token::TokenType type)
{
switch (type)
{
// All the different types of tokens //
TOKEN_TYPE_CASE(Token::Add);
TOKEN_TYPE_CASE(Token::Sub);
TOKEN_TYPE_CASE(Token::Mul);
TOKEN_TYPE_CASE(Token::Div);
TOKEN_TYPE_CASE(Token::Mod);
TOKEN_TYPE_CASE(Token::For);
TOKEN_TYPE_CASE(Token::While);
TOKEN_TYPE_CASE(Token::If);
TOKEN_TYPE_CASE(Token::ElseIf);
TOKEN_TYPE_CASE(Token::Else);
TOKEN_TYPE_CASE(Token::Return);
TOKEN_TYPE_CASE(Token::For);
TOKEN_TYPE_CASE(Token::While);
TOKEN_TYPE_CASE(Token::If);
TOKEN_TYPE_CASE(Token::ElseIf);
TOKEN_TYPE_CASE(Token::Else);
TOKEN_TYPE_CASE(Token::Return);
TOKEN_TYPE_CASE(Token::StringLiteral);
TOKEN_TYPE_CASE(Token::NumLiteral);
TOKEN_TYPE_CASE(Token::Identifier);
TOKEN_TYPE_CASE(Token::StringLiteral);
TOKEN_TYPE_CASE(Token::NumLiteral);
TOKEN_TYPE_CASE(Token::Identifier);
TOKEN_TYPE_CASE(Token::Assign);
TOKEN_TYPE_CASE(Token::CloseBracket);
TOKEN_TYPE_CASE(Token::OpenBracket);
TOKEN_TYPE_CASE(Token::CloseBrace);
TOKEN_TYPE_CASE(Token::OpenBrace);
TOKEN_TYPE_CASE(Token::CloseParen);
TOKEN_TYPE_CASE(Token::OpenParen);
TOKEN_TYPE_CASE(Token::Comma);
TOKEN_TYPE_CASE(Token::Assign);
TOKEN_TYPE_CASE(Token::CloseBracket);
TOKEN_TYPE_CASE(Token::OpenBracket);
TOKEN_TYPE_CASE(Token::CloseBrace);
TOKEN_TYPE_CASE(Token::OpenBrace);
TOKEN_TYPE_CASE(Token::CloseParen);
TOKEN_TYPE_CASE(Token::OpenParen);
TOKEN_TYPE_CASE(Token::Comma);
TOKEN_TYPE_CASE(Token::End_of_file);
TOKEN_TYPE_CASE(Token::UNDEFINED);
TOKEN_TYPE_CASE(Token::End_of_file);
TOKEN_TYPE_CASE(Token::UNDEFINED);
// When the case has not been defined yet //
default:
return "UNKNOWN";
}
}
// When the case has not been defined yet //
default:
return "UNKNOWN";
}
}
std::string LXC::Lexer::Token::LogStr() const
{
// Output stream to log to //
std::ostringstream os;
os << std::setw(25) << std::left << TokenTypeToCStr(type) << " | ";
std::string LXC::Lexer::Token::LogStr() const
{
// Output stream to log to //
std::ostringstream os;
os << std::setw(25) << std::left << TokenTypeToCStr(type) << " | ";
if (contents != nullptr)
os << std::setw(25) << std::left << std::string('"' + std::string(contents) + '"');
else
os << std::setw(25) << std::left << "EMPTY";
if (contents != nullptr)
os << std::setw(25) << std::left << std::string('"' + std::string(contents) + '"');
else
os << std::setw(25) << std::left << "EMPTY";
return os.str();
}
return os.str();
}
}