Fixing folder capitlisation [2/2]

This commit is contained in:
2025-08-09 21:43:18 +01:00
parent f6bbaf910c
commit f9fb455ba9
5 changed files with 0 additions and 0 deletions

18
lexer/CMakeLists.txt Normal file
View File

@@ -0,0 +1,18 @@
# Fetches all .cpp files for the binary #
add_library(Lexer STATIC
src/Lexer.cpp
src/Token.cpp
)
# Adds the headers in the current directory #
target_include_directories (
Lexer PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/inc
)
# Links to the all needed internal libraries #
target_link_libraries(Lexer PRIVATE PashaBibko-UTIL)
# Creates the precompiled header for the binary #
target_include_directories(Lexer PRIVATE ${CMAKE_SOURCE_DIR}/external/util)
target_precompile_headers(Lexer PRIVATE ${CMAKE_SOURCE_DIR}/external/util/Util.h)

60
lexer/inc/Lexer.h Normal file
View File

@@ -0,0 +1,60 @@
#pragma once
#include <Token.h>
namespace PashaBibko::LXC::Lexer
{
struct LexerContext final
{
// Constructor to set the information of the context //
LexerContext(const std::string& _source);
// Trackers for the Lexer itself //
const std::string& source;
uint32_t index;
LexerOutput out;
const uint32_t len;
// Trackers for where the Lexer is within the user version of source //
unsigned short column;
unsigned short line;
};
struct LexerError final
{
// Different reasons why the Lexer can fail //
enum Reason
{
InvalidCharacter,
UnterminatedStringLiteral,
UnknownSymbolOrOperand
};
// Constructor to pass arguments through to the struct //
LexerError(Reason _reason, uint32_t errorIndex, std::string _info = "")
: reason(_reason), index(errorIndex), info(_info)
{}
// Turns the error into a c-string //
inline static const char* const ReasonStr(Reason reason)
{
static const char* reasons[] =
{
"Invalid character found in source",
"Unterminated string literal in source",
"Unknown symbol or operand in source"
};
return reasons[reason];
}
// Error information //
const Reason reason;
const uint32_t index;
const std::string info;
};
// Turns a file into a vector of tokens //
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents);
}

136
lexer/inc/Token.h Normal file
View File

@@ -0,0 +1,136 @@
#pragma once
#include <Util.h>
namespace PashaBibko::LXC::Lexer
{
namespace TokenClass
{
// Bitmask for different token classes //
enum ClassMask : unsigned short
{
// Mathematical and logic operators //
Operator = 1 << (1 + 8),
// Special words defined by the compiler //
Keyword = 1 << (2 + 8),
// Words such as literals and identifiers //
UserDefined = 1 << (3 + 8),
// Symbols in the source like (? , . ! <) //
Symbols = 1 << (4 + 8),
// Tokens not defined by previous classes //
Misc = 1 << (5 + 8)
};
};
struct LexerContext;
// Data type for storing the output of the lexer //
class Token final
{
public:
// Enum of token type organised by their token class //
enum TokenType : unsigned short
{
// === Operators === //
Add = TokenClass::Operator,
Sub,
Mul,
Div,
Mod,
Eql,
// === Keywords === //
For = TokenClass::Keyword,
While,
If,
ElseIf,
Else,
Return,
FunctionDef,
// === User defined === //
StringLiteral = TokenClass::UserDefined,
NumLiteral,
Identifier,
// === Symbols === //
Assign = TokenClass::Symbols,
Colon,
CloseBracket,
OpenBracket,
CloseBrace,
OpenBrace,
CloseParen,
OpenParen,
CloseCrocodile,
OpenCrocodile,
Comma,
// === Misc === //
End_of_file = TokenClass::Misc,
UNDEFINED = 65535 // Invalid token type (max number)
};
// Util function calculating wether a token is of a given class //
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type)
{
using T = std::underlying_type_t<TokenType>;
return static_cast<T>(type) & static_cast<T>(mask);
}
// Constructor to set the data of the token for more complex token types //
Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);
// Copy constructor //
Token(const Token& other);
// Move constructor (transfers memory allocated) //
Token(Token&& other) noexcept;
// Cannot use these as members are const //
Token& operator=(const Token&) = delete;
Token& operator=(Token&&) = delete;
// Deconstructor to clean up the allocated memory //
~Token();
// Getters for the c-string to stop it being reassigned (or deleted) //
inline const char* const Str() const { return contents; }
// Outputs all the relevant infomration in a string for logging purposes //
std::string LogStr() const;
// The type of the token //
const TokenType type;
// The length of the token //
const unsigned short length;
// Start index of the token //
const uint32_t index;
private:
// The data of the token //
char* contents;
};
// Typedef for the output type of how the Lexer outputs //
typedef std::vector<Token> LexerOutput;
}

215
lexer/src/Lexer.cpp Normal file
View File

@@ -0,0 +1,215 @@
#include <Util.h>
#include <Lexer.h>
#include <Token.h>
namespace PashaBibko::LXC::Internal
{
static constexpr bool IsNumeric(const char c)
{
return c >= '0' && c <= '9';
}
static constexpr bool IsAlpha(const char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static constexpr bool IsWhitespace(const char c)
{
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
static constexpr bool IsOperator(const char c)
{
return
c == '+' || c == '-' ||
c == '*' || c == '/' ||
c == '%' || c == '=';
}
static constexpr bool IsSymbol(const char c)
{
return
c == ',' || c == '[' ||
c == ']' || c == '{' ||
c == '}' || c == '(' ||
c == ')' || c == '<' ||
c == '>' || c == ':';
}
static const std::unordered_map<std::string_view, Lexer::Token::TokenType> operatorMap =
{
{ "+", Lexer::Token::Add },
{ "-", Lexer::Token::Sub },
{ "*", Lexer::Token::Mul },
{ "/", Lexer::Token::Div },
{ "%", Lexer::Token::Mod },
{ "==", Lexer::Token::Eql },
{ "=", Lexer::Token::Assign }
};
static const std::unordered_map<char, Lexer::Token::TokenType> symbolMap =
{
{ ',', Lexer::Token::Comma },
{ ':', Lexer::Token::Colon },
{ '[', Lexer::Token::CloseBracket },
{ ']', Lexer::Token::OpenBracket },
{ '}', Lexer::Token::CloseBrace },
{ '{', Lexer::Token::OpenBrace },
{ ')', Lexer::Token::CloseParen },
{ '(', Lexer::Token::OpenParen },
{ '>', Lexer::Token::CloseCrocodile },
{ '<', Lexer::Token::OpenCrocodile }
};
static const std::unordered_map<std::string_view, Lexer::Token::TokenType> keywords =
{
{ "for", Lexer::Token::For },
{ "while", Lexer::Token::While },
{ "if", Lexer::Token::If },
{ "elif", Lexer::Token::ElseIf },
{ "else", Lexer::Token::Else },
{ "return", Lexer::Token::Return },
{ "func", Lexer::Token::FunctionDef },
};
}
namespace PashaBibko::LXC::Lexer
{
LexerContext::LexerContext(const std::string& _source) :
source(_source), index(0), out{}, len((uint32_t)_source.length()), column(0), line(0)
{}
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
{
// Creates the context for the lexer //
LexerContext ctx(fileContents);
struct
{
bool inStrLiteral = false;
bool inIdentifier = false;
bool inNumLiteral = false;
bool inOperator = false;
bool inComment = false;
uint32_t sectionStart = 0;
} trackers;
while (ctx.index < ctx.len)
{
// The current char within the source that is being lexed //
const char current = ctx.source[ctx.index];
const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
// === Comments === //
if (current == '#')
trackers.inComment = !trackers.inComment;
else if (trackers.inComment) {} // Contents of comments are skipped over
// === String literals === //
else if (current == '"')
{
// Updates trackers //
trackers.inStrLiteral = !trackers.inStrLiteral;
trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
// Creates the token (if at the end of the string literal) //
if (!trackers.inStrLiteral)
ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (unsigned short)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
} else if (trackers.inStrLiteral) {}
// === Numbers === //
else if (Internal::IsNumeric(current))
{
// Updates trackers //
trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
trackers.inNumLiteral = true;
// Checks for the end of the number literal to create the token //
if (!Internal::IsNumeric(next)) _UNLIKELY
{
ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
trackers.inNumLiteral = false;
}
}
// === Words === //
else if (Internal::IsAlpha(current))
{
// Updates trackers //
trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
trackers.inIdentifier = true;
// Checks for the end of the word to create the token //
if (!Internal::IsAlpha(next)) _UNLIKELY
{
// Finds out if the word is a keyword or not //
std::string_view fullWord(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
auto it = Internal::keywords.find(fullWord);
Token::TokenType tType = (it != Internal::keywords.end()) ? it->second : Token::Identifier;
ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), tType);
trackers.inIdentifier = false;
}
}
// === Operators === //
else if (Internal::IsOperator(current))
{
// Updates trackers //
trackers.sectionStart = trackers.inOperator ? trackers.sectionStart : ctx.index;
trackers.inOperator = true;
// Checks for the end of the symbol or operator //
if (!Internal::IsOperator(next)) _LIKELY
{
trackers.inOperator = false;
// Finds the operator/symbol if it can //
std::string_view fullSymbol(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
auto it = Internal::operatorMap.find(fullSymbol);
if (it != Internal::operatorMap.end())
ctx.out.emplace_back(ctx, trackers.sectionStart, (unsigned short)(ctx.index - trackers.sectionStart + 1), it->second);
else
return Util::FunctionFail<LexerError>(LexerError::UnknownSymbolOrOperand, trackers.sectionStart, std::string(fullSymbol));
}
}
// === Symbols === //
else if (Internal::IsSymbol(current))
{
ctx.out.emplace_back(ctx, ctx.index, 1, Internal::symbolMap.at(current));
}
// === Whitespace === //
else if (Internal::IsWhitespace(current)) _LIKELY {}
// If an if-statement has not been triggered the character must be invalid //
else
return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
// Iterates to the next index //
ctx.column++;
ctx.index++;
}
// Checks for an unterminated string literal //
if (trackers.inStrLiteral)
return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
return std::move(ctx.out);
}
}

119
lexer/src/Token.cpp Normal file
View File

@@ -0,0 +1,119 @@
#include <Util.h>
#include <Lexer.h>
#include <Token.h>
#include <iomanip>
namespace PashaBibko::LXC::Lexer
{
// Constructor to assign the members of the token class //
Token::Token(const LexerContext& ctx, const uint32_t start, unsigned short len, TokenType _type) :
type(_type), length(len), index(start), contents(nullptr)
{
// Only user defined class tokens need to store c-string //
if (Token::IsTypeClass<TokenClass::UserDefined>(type))
{
// Copies the memory to a c-string //
contents = new char[len + 1]; // +1 for null terminator
std::memcpy(contents, ctx.source.data() + start, len);
contents[len] = '\0';
}
}
// Copy constructor //
Token::Token(const Token& other) :
type(other.type), length(other.length), index(other.index), contents(nullptr)
{
if (other.contents != nullptr)
{
size_t len = std::strlen(other.contents) + 1; // Adds one for null-terminator
contents = new char[len];
std::memcpy(contents, other.contents, len);
}
}
// Move constructor (transfers memory allocated) //
Token::Token(Token&& other) noexcept :
type(other.type), length(other.length), index(other.index), contents(other.contents)
{
// Stops the other from thinking it owns the memory //
other.contents = nullptr;
}
// Destructor to clean up the memory of the token that can be allocated //
Token::~Token()
{
// Frees any allocated memory //
if (contents != nullptr) _UNLIKELY
{
delete[] contents;
contents = nullptr;
}
}
// Helper macro for converting type to string //
#define TOKEN_TYPE_CASE(type) case type: return #type;
static constexpr const char* TokenTypeToCStr(Token::TokenType type)
{
switch (type)
{
// All the different types of tokens //
TOKEN_TYPE_CASE(Token::Add);
TOKEN_TYPE_CASE(Token::Sub);
TOKEN_TYPE_CASE(Token::Mul);
TOKEN_TYPE_CASE(Token::Div);
TOKEN_TYPE_CASE(Token::Mod);
TOKEN_TYPE_CASE(Token::Eql);
TOKEN_TYPE_CASE(Token::For);
TOKEN_TYPE_CASE(Token::While);
TOKEN_TYPE_CASE(Token::If);
TOKEN_TYPE_CASE(Token::ElseIf);
TOKEN_TYPE_CASE(Token::Else);
TOKEN_TYPE_CASE(Token::Return);
TOKEN_TYPE_CASE(Token::FunctionDef);
TOKEN_TYPE_CASE(Token::StringLiteral);
TOKEN_TYPE_CASE(Token::NumLiteral);
TOKEN_TYPE_CASE(Token::Identifier);
TOKEN_TYPE_CASE(Token::Assign);
TOKEN_TYPE_CASE(Token::Colon);
TOKEN_TYPE_CASE(Token::CloseBracket);
TOKEN_TYPE_CASE(Token::OpenBracket);
TOKEN_TYPE_CASE(Token::CloseBrace);
TOKEN_TYPE_CASE(Token::OpenBrace);
TOKEN_TYPE_CASE(Token::CloseParen);
TOKEN_TYPE_CASE(Token::OpenParen);
TOKEN_TYPE_CASE(Token::CloseCrocodile);
TOKEN_TYPE_CASE(Token::OpenCrocodile);
TOKEN_TYPE_CASE(Token::Comma);
TOKEN_TYPE_CASE(Token::End_of_file);
TOKEN_TYPE_CASE(Token::UNDEFINED);
// When the case has not been defined yet //
default:
return "UNKNOWN";
}
}
std::string LXC::Lexer::Token::LogStr() const
{
// Output stream to log to //
std::ostringstream os;
os << std::setw(25) << std::left << TokenTypeToCStr(type) << " | ";
// Prints the contents if they are not null //
if (contents != nullptr)
os << std::setw(25) << std::left << std::string('"' + std::string(contents) + '"');
else
os << std::setw(25) << std::left << "EMPTY";
return os.str();
}
}