mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-04 01:49:05 +00:00
Improved compile times
Added A LOT of comments
This commit is contained in:
@@ -14,6 +14,7 @@ namespace LX
|
||||
|
||||
#define TOKEN_CASE(type) case type: return #type;
|
||||
|
||||
// Logging function to turn a tokentype enum val into it's string //
|
||||
static std::string ToString(Token::TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
@@ -37,6 +38,7 @@ namespace LX
|
||||
}
|
||||
}
|
||||
|
||||
// All the keywords the lexer currently supports with their token-enum equivalents //
|
||||
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
||||
{
|
||||
{ "for" , Token::FOR },
|
||||
@@ -48,6 +50,8 @@ namespace LX
|
||||
{ "return" , Token::RETURN }
|
||||
};
|
||||
|
||||
// All the single-char operators currently supported by the lexer with their token-enum equivalents //
|
||||
// TODO: Support multi-char operators such as: ==, -> +=, &&
|
||||
static const std::unordered_map<char, Token::TokenType> operators =
|
||||
{
|
||||
{ '+', Token::ADD },
|
||||
@@ -56,27 +60,37 @@ namespace LX
|
||||
{ '/', Token::DIV }
|
||||
};
|
||||
|
||||
// Checks if the given word is a keyword before adding it to the tokens //
|
||||
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens)
|
||||
{
|
||||
// Checks the map for a check and if so adds it with its enum equivalent //
|
||||
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
||||
{
|
||||
tokens.push_back({ keyword->second, "" });
|
||||
}
|
||||
|
||||
// Else adds it as a type of IDENTIFIER //
|
||||
else
|
||||
{
|
||||
tokens.push_back({ Token::IDENTIFIER, word });
|
||||
}
|
||||
}
|
||||
|
||||
// Struct to store the current information of the lexer //
|
||||
struct LexerInfo
|
||||
{
|
||||
// Current index within the lexer //
|
||||
std::streamsize index = 0;
|
||||
|
||||
// Trackers for when a multi-char token started //
|
||||
|
||||
std::streamsize startOfWord = 0;
|
||||
std::streamsize startOfNumberLiteral = 0;
|
||||
std::streamsize startOfStringLiteral = 0;
|
||||
|
||||
// Different flags of the lexer //
|
||||
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
|
||||
|
||||
bool isAlpha : 1 = false;
|
||||
bool isNumeric : 1 = false;
|
||||
bool inComment : 1 = false;
|
||||
@@ -92,8 +106,8 @@ namespace LX
|
||||
// Logs the start of the lexical analysis
|
||||
SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
|
||||
|
||||
// Allocates a large ammount of memory to hold the output
|
||||
// Will shrink the size later on to stop excess memory
|
||||
// Allocates a large ammount of memory to hold the output //
|
||||
// Will shrink the size later on to stop excess memory being allocated //
|
||||
std::vector<Token> tokens = {};
|
||||
tokens.reserve(0xFFFF);
|
||||
|
||||
@@ -115,119 +129,130 @@ namespace LX
|
||||
// Stores the current character for easy access
|
||||
const char current = contents[info.index];
|
||||
|
||||
//
|
||||
if (info.index + 1 < len)
|
||||
// Checks if it is not at end //
|
||||
// Predicts it is not at end for microptimsation //
|
||||
if (info.index + 1 < len) [[likely]]
|
||||
{
|
||||
// Gets the next character //
|
||||
const char next = contents[info.index + 1];
|
||||
|
||||
// Sets flags depending on the value of the next character //
|
||||
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
|
||||
info.isNextCharNumeric = (next >= '0' && next <= '9');
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// Else defaults the next character's flags to false //
|
||||
info.isNextCharAlpha = false;
|
||||
info.isNextCharNumeric = false;
|
||||
}
|
||||
|
||||
// Works out if the current character is alphabetic or numeric
|
||||
// Works out if the current character is alphabetic or numeric //
|
||||
info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
||||
info.isNumeric = (current >= '0' && current <= '9');
|
||||
|
||||
// Updates string literal tracker and skips over rest if in a string literal
|
||||
// Updates string literal tracker and skips over rest if in a string literal //
|
||||
if (current == '"')
|
||||
{
|
||||
// Start of string literal
|
||||
// Start of string literal //
|
||||
if (info.inStringLiteral == false)
|
||||
{
|
||||
// Updates the neccesarry trackers
|
||||
// Updates the neccesarry trackers //
|
||||
info.startOfStringLiteral = info.index + 1;
|
||||
info.inStringLiteral = true;
|
||||
}
|
||||
|
||||
// End of string literal
|
||||
// End of string literal //
|
||||
else
|
||||
{
|
||||
// Adds the string literal token to the token vector
|
||||
// Adds the string literal token to the token vector //
|
||||
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
|
||||
tokens.push_back({ Token::STRING_LITERAL, lit });
|
||||
|
||||
// Updates trackers
|
||||
// Updates trackers //
|
||||
info.inStringLiteral = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Skips over rest if within a string literal
|
||||
// Skips over rest if within a string literal //
|
||||
else if (info.inStringLiteral);
|
||||
|
||||
// Updates comment state
|
||||
// Updates comment state //
|
||||
else if (current == '#')
|
||||
{
|
||||
info.inComment = !info.inComment;
|
||||
}
|
||||
|
||||
// Skips over if within a comment
|
||||
// Skips over if within a comment //
|
||||
else if (info.inComment);
|
||||
|
||||
// Start of a word
|
||||
// Start of a word //
|
||||
else if (info.isAlpha == true && info.wasLastCharAlpha == false)
|
||||
{
|
||||
// Stores the start of the word //
|
||||
info.startOfWord = info.index;
|
||||
|
||||
// Single letter word
|
||||
// Checks if it is at the end (single char words) //
|
||||
if (info.isNextCharAlpha == false)
|
||||
{
|
||||
// Calls the function designed to handle the tokenisation of words //
|
||||
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens);
|
||||
}
|
||||
}
|
||||
|
||||
// End of a word
|
||||
// End of a word //
|
||||
else if (info.isAlpha == true && info.isNextCharAlpha == false)
|
||||
{
|
||||
// Calls the function designed to handle the tokenisation of words //
|
||||
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens);
|
||||
}
|
||||
|
||||
// During a word
|
||||
// During a word //
|
||||
else if (info.isAlpha == true);
|
||||
|
||||
// Start of a number
|
||||
// Start of a number //
|
||||
else if (info.isNumeric == true && info.wasLastCharNumeric == false)
|
||||
{
|
||||
// Stores the start of the number //
|
||||
info.startOfNumberLiteral = info.index;
|
||||
|
||||
// Checks if it as the end (single char numbers) //
|
||||
if (info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num });
|
||||
}
|
||||
}
|
||||
|
||||
// End of a number
|
||||
// End of a number //
|
||||
else if (info.isNumeric == true && info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num });
|
||||
}
|
||||
|
||||
// During a number
|
||||
// During a number //
|
||||
else if (info.isNumeric == true);
|
||||
|
||||
// Operators (+, -, /, *)
|
||||
// Operators (+, -, /, *) //
|
||||
else if (auto op = operators.find(current); op != operators.end())
|
||||
{
|
||||
tokens.push_back({ op->second, "" });
|
||||
}
|
||||
|
||||
// If it is here and not whitespace that means it's an invalid character
|
||||
// If it is here and not whitespace that means it's an invalid character //
|
||||
else if (current == ' ' || current == '\t' || current == '\r' || current == '\n');
|
||||
|
||||
else
|
||||
{
|
||||
// Throws an error to alert the user
|
||||
// Throws an error to alert the user of the invalid character //
|
||||
throw InvalidCharInSource(info.index, current);
|
||||
}
|
||||
|
||||
// Updates trackers //
|
||||
// Updates trackers to their default state of a new character //
|
||||
|
||||
info.index++;
|
||||
info.wasLastCharAlpha = info.isAlpha;
|
||||
|
||||
Reference in New Issue
Block a user