mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-03 17:39:02 +00:00
Cleaned up Lexer
This commit is contained in:
@@ -57,4 +57,19 @@ namespace LX
|
|||||||
// Returns the string between start and end //
|
// Returns the string between start and end //
|
||||||
return src.substr(start, end - start);
|
return src.substr(start, end - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Util function for turning a a char to a string. Used to stop '\t' being printed as a tab //
|
||||||
|
inline std::string CharAsStrLit(const char c)
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
// Stores them as pure string literals //
|
||||||
|
case '\n': return R"(\n)";
|
||||||
|
case '\t': return R"(\t)";
|
||||||
|
case '\r': return R"(\r)";
|
||||||
|
|
||||||
|
// Else returns a string of length one with the char inside //
|
||||||
|
default: return std::string(1, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,9 +5,6 @@
|
|||||||
|
|
||||||
extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_outPath)
|
extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_outPath)
|
||||||
{
|
{
|
||||||
// Initalises the log //
|
|
||||||
LX::Log::Init();
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// Initalises the log //
|
// Initalises the log //
|
||||||
|
|||||||
@@ -148,9 +148,11 @@
|
|||||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="src\Lexer.cpp" />
|
<ClCompile Include="src\Lexer.cpp" />
|
||||||
|
<ClCompile Include="src\LexerErrors.cpp" />
|
||||||
<ClCompile Include="src\Token.cpp" />
|
<ClCompile Include="src\Token.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClInclude Include="inc\Constants.h" />
|
||||||
<ClInclude Include="inc\LexerErrors.h" />
|
<ClInclude Include="inc\LexerErrors.h" />
|
||||||
<ClInclude Include="inc\LexerInfo.h" />
|
<ClInclude Include="inc\LexerInfo.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|||||||
@@ -20,6 +20,9 @@
|
|||||||
<ClCompile Include="inc\pch.cpp">
|
<ClCompile Include="inc\pch.cpp">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="src\LexerErrors.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="inc\LexerErrors.h">
|
<ClInclude Include="inc\LexerErrors.h">
|
||||||
@@ -28,5 +31,8 @@
|
|||||||
<ClInclude Include="inc\LexerInfo.h">
|
<ClInclude Include="inc\LexerInfo.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="inc\Constants.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
42
Lexer/inc/Constants.h
Normal file
42
Lexer/inc/Constants.h
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#include <LX-Common.h>
|
||||||
|
|
||||||
|
#include <Lexer.h>
|
||||||
|
|
||||||
|
namespace LX
|
||||||
|
{
|
||||||
|
// All the keywords the lexer currently supports with their token-enum equivalents //
|
||||||
|
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
||||||
|
{
|
||||||
|
{ "for" , Token::FOR },
|
||||||
|
{ "while" , Token::WHILE },
|
||||||
|
{ "if" , Token::IF },
|
||||||
|
{ "else" , Token::ELSE },
|
||||||
|
{ "elif" , Token::ELIF },
|
||||||
|
{ "func" , Token::FUNCTION },
|
||||||
|
{ "return" , Token::RETURN },
|
||||||
|
{ "int" , Token::INT_DEC }
|
||||||
|
};
|
||||||
|
|
||||||
|
// All the symbols supported by the lexer //
|
||||||
|
static const std::unordered_map<char, Token::TokenType> symbols =
|
||||||
|
{
|
||||||
|
{ '{', Token::OPEN_BRACKET },
|
||||||
|
{ '}', Token::CLOSE_BRACKET },
|
||||||
|
{ '[', Token::OPEN_BRACE },
|
||||||
|
{ ']', Token::CLOSE_BRACE },
|
||||||
|
{ '(', Token::OPEN_PAREN },
|
||||||
|
{ ')', Token::CLOSE_PAREN },
|
||||||
|
{ ',', Token::COMMA },
|
||||||
|
{ '=', Token::ASSIGN }
|
||||||
|
};
|
||||||
|
|
||||||
|
// All the single-char operators currently supported by the lexer with their token-enum equivalents //
|
||||||
|
// TODO: Support multi-char operators such as: ==, -> +=, &&
|
||||||
|
static const std::unordered_map<char, Token::TokenType> operators =
|
||||||
|
{
|
||||||
|
{ '+', Token::ADD },
|
||||||
|
{ '-', Token::SUB },
|
||||||
|
{ '*', Token::MUL },
|
||||||
|
{ '/', Token::DIV }
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -11,7 +11,7 @@ namespace LX
|
|||||||
{
|
{
|
||||||
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
|
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
|
||||||
|
|
||||||
InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file);
|
InvalidCharInSource(const LexerInfo& info, const std::string _file);
|
||||||
|
|
||||||
std::string lineContents;
|
std::string lineContents;
|
||||||
std::string file;
|
std::string file;
|
||||||
|
|||||||
@@ -7,6 +7,11 @@ namespace LX
|
|||||||
// Struct to store the current information of the lexer //
|
// Struct to store the current information of the lexer //
|
||||||
struct LexerInfo
|
struct LexerInfo
|
||||||
{
|
{
|
||||||
|
// Constructor to set the constants //
|
||||||
|
LexerInfo(const std::string& _source)
|
||||||
|
: source(_source), len(_source.length())
|
||||||
|
{}
|
||||||
|
|
||||||
// Current trackers of where in the source it is //
|
// Current trackers of where in the source it is //
|
||||||
|
|
||||||
std::streamsize line = 1; // <- Lines start on 1 (probably because of non-programmer's)
|
std::streamsize line = 1; // <- Lines start on 1 (probably because of non-programmer's)
|
||||||
@@ -19,6 +24,11 @@ namespace LX
|
|||||||
std::streamsize startOfNumberLiteral = 0;
|
std::streamsize startOfNumberLiteral = 0;
|
||||||
std::streamsize startOfStringLiteral = 0;
|
std::streamsize startOfStringLiteral = 0;
|
||||||
|
|
||||||
|
// Information about the source //
|
||||||
|
|
||||||
|
const std::string& source;
|
||||||
|
const std::streamsize len;
|
||||||
|
|
||||||
// Different flags of the lexer //
|
// Different flags of the lexer //
|
||||||
// Stored as a bitset to minimse memory allocated //
|
// Stored as a bitset to minimse memory allocated //
|
||||||
// - Basically no difference, because only one exists at any given time //
|
// - Basically no difference, because only one exists at any given time //
|
||||||
|
|||||||
@@ -4,174 +4,67 @@
|
|||||||
|
|
||||||
#include <LexerErrors.h>
|
#include <LexerErrors.h>
|
||||||
#include <LexerInfo.h>
|
#include <LexerInfo.h>
|
||||||
|
#include <Constants.h>
|
||||||
|
|
||||||
namespace LX
|
namespace LX
|
||||||
{
|
{
|
||||||
InvalidCharInSource::InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file)
|
|
||||||
: col(info.column), line(info.line), file(_file), lineContents{}, invalid(source[info.index])
|
|
||||||
{
|
|
||||||
// Gets the line the error is on //
|
|
||||||
lineContents = GetLineAtIndexOf(source, info.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void InvalidCharInSource::PrintToConsole() const
|
|
||||||
{
|
|
||||||
// Calculates the length of the line number in the console so it is formatted correctly //
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << std::setw(3) << line;
|
|
||||||
size_t lineNumberWidthInConsole = std::max(oss.str().size(), (size_t)3);
|
|
||||||
|
|
||||||
// Prints the error with the relevant information to the console //
|
|
||||||
std::cout << "\n";
|
|
||||||
LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED);
|
|
||||||
std::cout << "Invalid character found in ";
|
|
||||||
LX::PrintStringAsColor(file, LX::Color::WHITE);
|
|
||||||
std::cout << " {";
|
|
||||||
LX::PrintStringAsColor(std::string(1, invalid), LX::Color::LIGHT_RED);
|
|
||||||
std::cout << "}:\n";
|
|
||||||
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << lineContents << "\n";
|
|
||||||
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(col - 1) << "";
|
|
||||||
LX::PrintStringAsColor("^", LX::Color::LIGHT_RED);
|
|
||||||
std::cout << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* InvalidCharInSource::ErrorType() const
|
|
||||||
{
|
|
||||||
return "Invalid char in source";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper macro for outputting token type //
|
|
||||||
#define TOKEN_CASE(type) case type: return #type;
|
|
||||||
|
|
||||||
// Helper util function to translate a tokentype to it's enum val //
|
|
||||||
static std::string ToStringNoFormat(Token::TokenType type)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
TOKEN_CASE(Token::STRING_LITERAL);
|
|
||||||
TOKEN_CASE(Token::IDENTIFIER);
|
|
||||||
TOKEN_CASE(Token::FOR);
|
|
||||||
TOKEN_CASE(Token::WHILE);
|
|
||||||
TOKEN_CASE(Token::IF);
|
|
||||||
TOKEN_CASE(Token::ELSE);
|
|
||||||
TOKEN_CASE(Token::ELIF);
|
|
||||||
TOKEN_CASE(Token::FUNCTION);
|
|
||||||
TOKEN_CASE(Token::ADD);
|
|
||||||
TOKEN_CASE(Token::SUB);
|
|
||||||
TOKEN_CASE(Token::MUL);
|
|
||||||
TOKEN_CASE(Token::DIV);
|
|
||||||
TOKEN_CASE(Token::NUMBER_LITERAL);
|
|
||||||
TOKEN_CASE(Token::RETURN);
|
|
||||||
TOKEN_CASE(Token::OPEN_BRACE);
|
|
||||||
TOKEN_CASE(Token::CLOSE_BRACE);
|
|
||||||
TOKEN_CASE(Token::OPEN_BRACKET);
|
|
||||||
TOKEN_CASE(Token::CLOSE_BRACKET);
|
|
||||||
TOKEN_CASE(Token::OPEN_PAREN);
|
|
||||||
TOKEN_CASE(Token::CLOSE_PAREN);
|
|
||||||
TOKEN_CASE(Token::ASSIGN);
|
|
||||||
TOKEN_CASE(Token::INT_DEC);
|
|
||||||
|
|
||||||
default:
|
|
||||||
return "Unknown: " + std::to_string(type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Logging function to turn a tokentype enum val into a nicely formatted string //
|
|
||||||
std::string ToString(Token::TokenType type)
|
|
||||||
{
|
|
||||||
// Gets the unformated version of the string //
|
|
||||||
std::string unformatted = ToStringNoFormat(type);
|
|
||||||
unformatted = unformatted.substr(7); // Removes the Token:: prefix
|
|
||||||
|
|
||||||
// Formats the string (turns to lowercase and replaces _ with a space //
|
|
||||||
std::string formatted;
|
|
||||||
|
|
||||||
for (char current : unformatted)
|
|
||||||
{
|
|
||||||
// Adding 32 makes it lowercase due to how ASCII works //
|
|
||||||
if ((current >= 'A' && current <= 'Z')) { formatted.push_back(current + 32); }
|
|
||||||
|
|
||||||
// Replaces _ with spaces //
|
|
||||||
else if (current == '_') { formatted.push_back(' '); }
|
|
||||||
|
|
||||||
// Else adds the current character //
|
|
||||||
else { formatted.push_back(current); }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the formatted string //
|
|
||||||
return formatted;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stops use outside of the function //
|
|
||||||
#undef TOKEN_CASE
|
|
||||||
|
|
||||||
// Helper function for dealing with floating-point number literals //
|
// Helper function for dealing with floating-point number literals //
|
||||||
static constexpr bool CanBePartOfNumberLiteral(const char c) { return (c == '.') || (c == 'f'); }
|
static constexpr bool CanBePartOfNumberLiteral(const char c) { return (c == '.') || (c == 'f'); }
|
||||||
|
|
||||||
// Helper function to stop printing whitespace as pure whitespace //
|
|
||||||
static std::string PrintChar(const char c)
|
|
||||||
{
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
// Stores them as pure string literals //
|
|
||||||
case '\n': return R"(\n)";
|
|
||||||
case '\t': return R"(\t)";
|
|
||||||
case '\r': return R"(\r)";
|
|
||||||
|
|
||||||
// Else returns a string of length one with the char inside //
|
|
||||||
default: return std::string(1, c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All the keywords the lexer currently supports with their token-enum equivalents //
|
|
||||||
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
|
||||||
{
|
|
||||||
{ "for" , Token::FOR },
|
|
||||||
{ "while" , Token::WHILE },
|
|
||||||
{ "if" , Token::IF },
|
|
||||||
{ "else" , Token::ELSE },
|
|
||||||
{ "elif" , Token::ELIF },
|
|
||||||
{ "func" , Token::FUNCTION },
|
|
||||||
{ "return" , Token::RETURN },
|
|
||||||
{ "int" , Token::INT_DEC }
|
|
||||||
};
|
|
||||||
|
|
||||||
// All the symbols supported by the lexer //
|
|
||||||
static const std::unordered_map<char, Token::TokenType> symbols =
|
|
||||||
{
|
|
||||||
{ '{', Token::OPEN_BRACKET },
|
|
||||||
{ '}', Token::CLOSE_BRACKET },
|
|
||||||
{ '[', Token::OPEN_BRACE },
|
|
||||||
{ ']', Token::CLOSE_BRACE },
|
|
||||||
{ '(', Token::OPEN_PAREN },
|
|
||||||
{ ')', Token::CLOSE_PAREN },
|
|
||||||
{ ',', Token::COMMA },
|
|
||||||
{ '=', Token::ASSIGN }
|
|
||||||
};
|
|
||||||
|
|
||||||
// All the single-char operators currently supported by the lexer with their token-enum equivalents //
|
|
||||||
// TODO: Support multi-char operators such as: ==, -> +=, &&
|
|
||||||
static const std::unordered_map<char, Token::TokenType> operators =
|
|
||||||
{
|
|
||||||
{ '+', Token::ADD },
|
|
||||||
{ '-', Token::SUB },
|
|
||||||
{ '*', Token::MUL },
|
|
||||||
{ '/', Token::DIV }
|
|
||||||
};
|
|
||||||
|
|
||||||
// Checks if the given word is a keyword before adding it to the tokens //
|
// Checks if the given word is a keyword before adding it to the tokens //
|
||||||
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens, LexerInfo& info, const std::string& contents)
|
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens, LexerInfo& info)
|
||||||
{
|
{
|
||||||
// Checks the map for a check and if so adds it with its enum equivalent //
|
// Checks the map for a check and if so adds it with its enum equivalent //
|
||||||
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
||||||
{
|
{
|
||||||
tokens.push_back({ keyword->second, info, (std::streamsize)word.size(), contents });
|
tokens.push_back({ keyword->second, info, (std::streamsize)word.size(), info.source });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Else adds it as a type of IDENTIFIER //
|
// Else adds it as a type of IDENTIFIER //
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size(), contents });
|
tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size(), info.source });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void UpdateLexerInfo(LexerInfo& info)
|
||||||
|
{
|
||||||
|
// Transfers the previous state if not at the beginning //
|
||||||
|
if (info.index != 0) [[likely]]
|
||||||
|
{
|
||||||
|
info.wasLastCharAlpha = info.isAlpha;
|
||||||
|
info.wasLastCharNumeric = info.isNumeric;
|
||||||
|
|
||||||
|
info.isAlpha = info.isNextCharAlpha;
|
||||||
|
info.isNumeric = info.isNextCharNumeric;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Stores the current character for easy access
|
||||||
|
const char current = info.source[info.index];
|
||||||
|
|
||||||
|
// Works out if the current character is alphabetic or numeric //
|
||||||
|
info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
||||||
|
info.isNumeric = (current >= '0' && current <= '9');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only does next character checks when not at the end //
|
||||||
|
if (info.index + 1 < info.len) [[likely]]
|
||||||
|
{
|
||||||
|
// Gets the next character //
|
||||||
|
const char next = info.source[info.index + 1];
|
||||||
|
|
||||||
|
// Sets flags depending on the value of the next character //
|
||||||
|
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
|
||||||
|
info.isNextCharNumeric = (next >= '0' && next <= '9') || CanBePartOfNumberLiteral(next);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Else defaults the flags to false //
|
||||||
|
else
|
||||||
|
{
|
||||||
|
info.isNextCharAlpha = false;
|
||||||
|
info.isNextCharNumeric = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,8 +73,8 @@ namespace LX
|
|||||||
// Logs that the file is being read //
|
// Logs that the file is being read //
|
||||||
Log::LogNewSection("Reading file: ", path.string());
|
Log::LogNewSection("Reading file: ", path.string());
|
||||||
|
|
||||||
std::string contents = ReadFileToString(path);
|
std::string fileContents = ReadFileToString(path);
|
||||||
const std::streamsize len = contents.length();
|
const std::streamsize len = fileContents.length();
|
||||||
|
|
||||||
// Logs the start of the lexical analysis
|
// Logs the start of the lexical analysis
|
||||||
Log::LogNewSection("Lexing file");
|
Log::LogNewSection("Lexing file");
|
||||||
@@ -192,36 +85,16 @@ namespace LX
|
|||||||
tokens.reserve(0xFFFF);
|
tokens.reserve(0xFFFF);
|
||||||
|
|
||||||
// Trackers for when the program is iterating over the file //
|
// Trackers for when the program is iterating over the file //
|
||||||
LexerInfo info;
|
LexerInfo info(fileContents);
|
||||||
|
|
||||||
// Iterates over the file and turns it into tokens //
|
// Iterates over the file and turns it into tokens //
|
||||||
while (info.index < len)
|
while (info.index < len)
|
||||||
{
|
{
|
||||||
// Stores the current character for easy access
|
// Stores the current character for easy access
|
||||||
const char current = contents[info.index];
|
const char current = info.source[info.index];
|
||||||
|
|
||||||
// Checks if it is not at end //
|
// Updates the LexerInfo //
|
||||||
// Predicts it is not at end for microptimsation //
|
UpdateLexerInfo(info);
|
||||||
if (info.index + 1 < len) [[likely]]
|
|
||||||
{
|
|
||||||
// Gets the next character //
|
|
||||||
const char next = contents[info.index + 1];
|
|
||||||
|
|
||||||
// Sets flags depending on the value of the next character //
|
|
||||||
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
|
|
||||||
info.isNextCharNumeric = (next >= '0' && next <= '9') || CanBePartOfNumberLiteral(next);
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Else defaults the next character's flags to false //
|
|
||||||
info.isNextCharAlpha = false;
|
|
||||||
info.isNextCharNumeric = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Works out if the current character is alphabetic or numeric //
|
|
||||||
info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
|
||||||
info.isNumeric = (current >= '0' && current <= '9');
|
|
||||||
|
|
||||||
// Updates string literal tracker and skips over rest if in a string literal //
|
// Updates string literal tracker and skips over rest if in a string literal //
|
||||||
if (current == '"')
|
if (current == '"')
|
||||||
@@ -238,8 +111,8 @@ namespace LX
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Adds the string literal token to the token vector //
|
// Adds the string literal token to the token vector //
|
||||||
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
|
std::string lit(info.source.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
|
||||||
tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2, contents }); // Adding two makes the "" be stored as well
|
tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2, info.source }); // Adding two makes the "" be stored as well
|
||||||
|
|
||||||
// Updates trackers //
|
// Updates trackers //
|
||||||
info.inStringLiteral = false;
|
info.inStringLiteral = false;
|
||||||
@@ -268,8 +141,8 @@ namespace LX
|
|||||||
if (info.isNextCharNumeric == false)
|
if (info.isNextCharNumeric == false)
|
||||||
{
|
{
|
||||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
std::string num(info.source.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||||
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents });
|
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), info.source });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stores it is lexing a number literal //
|
// Stores it is lexing a number literal //
|
||||||
@@ -280,8 +153,8 @@ namespace LX
|
|||||||
else if ((info.isNumeric == true || CanBePartOfNumberLiteral(current)) && info.isNextCharNumeric == false && info.lexingNumber == true)
|
else if ((info.isNumeric == true || CanBePartOfNumberLiteral(current)) && info.isNextCharNumeric == false && info.lexingNumber == true)
|
||||||
{
|
{
|
||||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
std::string num(info.source.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||||
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents });
|
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), info.source });
|
||||||
info.lexingNumber = false; // Stops storing it is lexing a number
|
info.lexingNumber = false; // Stops storing it is lexing a number
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -299,7 +172,7 @@ namespace LX
|
|||||||
if (info.isNextCharAlpha == false)
|
if (info.isNextCharAlpha == false)
|
||||||
{
|
{
|
||||||
// Calls the function designed to handle the tokenisation of words //
|
// Calls the function designed to handle the tokenisation of words //
|
||||||
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens, info, contents);
|
TokenizeWord({ info.source.data() + info.startOfWord, 1 }, tokens, info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -307,7 +180,7 @@ namespace LX
|
|||||||
else if (info.isAlpha == true && info.isNextCharAlpha == false)
|
else if (info.isAlpha == true && info.isNextCharAlpha == false)
|
||||||
{
|
{
|
||||||
// Calls the function designed to handle the tokenisation of words //
|
// Calls the function designed to handle the tokenisation of words //
|
||||||
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info, contents);
|
TokenizeWord({ info.source.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
// During a word //
|
// During a word //
|
||||||
@@ -316,13 +189,13 @@ namespace LX
|
|||||||
// Symbols //
|
// Symbols //
|
||||||
else if (auto sym = symbols.find(current); sym != symbols.end())
|
else if (auto sym = symbols.find(current); sym != symbols.end())
|
||||||
{
|
{
|
||||||
tokens.push_back({ sym->second, info, 1, contents });
|
tokens.push_back({ sym->second, info, 1, info.source });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Operators (+, -, /, *) //
|
// Operators (+, -, /, *) //
|
||||||
else if (auto op = operators.find(current); op != operators.end())
|
else if (auto op = operators.find(current); op != operators.end())
|
||||||
{
|
{
|
||||||
tokens.push_back({ op->second, info, 1, contents });
|
tokens.push_back({ op->second, info, 1, info.source });
|
||||||
}
|
}
|
||||||
|
|
||||||
// If it is here and not whitespace that means it's an invalid character //
|
// If it is here and not whitespace that means it's an invalid character //
|
||||||
@@ -345,7 +218,7 @@ namespace LX
|
|||||||
// Throws an error with all the relevant information //
|
// Throws an error with all the relevant information //
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ThrowIf<InvalidCharInSource>(true, info, contents, path.string());
|
ThrowIf<InvalidCharInSource>(true, info, path.string());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log dumps A LOT of info //
|
// Log dumps A LOT of info //
|
||||||
@@ -361,16 +234,13 @@ namespace LX
|
|||||||
" Next Char Numeric: ", info.wasLastCharNumeric,
|
" Next Char Numeric: ", info.wasLastCharNumeric,
|
||||||
" Last Char Numeric: ", info.wasLastCharAlpha,
|
" Last Char Numeric: ", info.wasLastCharAlpha,
|
||||||
" Lexing number: ", info.lexingNumber,
|
" Lexing number: ", info.lexingNumber,
|
||||||
" Current: {", PrintChar(current), "}"
|
" Current: {", CharAsStrLit(current), "}"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Updates trackers to their default state of a new character //
|
// Updates the indecies to the next character //
|
||||||
|
|
||||||
info.index++;
|
info.index++;
|
||||||
info.column++;
|
info.column++;
|
||||||
|
|
||||||
info.wasLastCharAlpha = info.isAlpha;
|
|
||||||
info.wasLastCharNumeric = info.isNumeric;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Log::out("\n"); // Puts a space to clean up the log
|
Log::out("\n"); // Puts a space to clean up the log
|
||||||
@@ -383,7 +253,7 @@ namespace LX
|
|||||||
"{ Line: ", std::setw(3), token.line,
|
"{ Line: ", std::setw(3), token.line,
|
||||||
", Index: ", std::setw(3), token.index,
|
", Index: ", std::setw(3), token.index,
|
||||||
", Length: ", std::setw(2), token.length, " } ",
|
", Length: ", std::setw(2), token.length, " } ",
|
||||||
std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.GetContents(), "}"
|
std::setw(30), ToString(token.type) + ":", "{", token.GetContents(), "}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
38
Lexer/src/LexerErrors.cpp
Normal file
38
Lexer/src/LexerErrors.cpp
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#include <LX-Common.h>
|
||||||
|
|
||||||
|
#include <LexerErrors.h>
|
||||||
|
|
||||||
|
#include <LexerInfo.h>
|
||||||
|
|
||||||
|
namespace LX
|
||||||
|
{
|
||||||
|
InvalidCharInSource::InvalidCharInSource(const LexerInfo& info, const std::string _file)
|
||||||
|
: col(info.column), line(info.line), file(_file), lineContents(GetLineAtIndexOf(info.source, info.index)), invalid(info.source[info.index])
|
||||||
|
{}
|
||||||
|
|
||||||
|
void InvalidCharInSource::PrintToConsole() const
|
||||||
|
{
|
||||||
|
// Calculates the length of the line number in the console so it is formatted correctly //
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << std::setw(3) << line;
|
||||||
|
size_t lineNumberWidthInConsole = std::max(oss.str().size(), (size_t)3);
|
||||||
|
|
||||||
|
// Prints the error with the relevant information to the console //
|
||||||
|
std::cout << "\n";
|
||||||
|
LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED);
|
||||||
|
std::cout << "Invalid character found in ";
|
||||||
|
LX::PrintStringAsColor(file, LX::Color::WHITE);
|
||||||
|
std::cout << " {";
|
||||||
|
LX::PrintStringAsColor(std::string(1, invalid), LX::Color::LIGHT_RED);
|
||||||
|
std::cout << "}:\n";
|
||||||
|
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << lineContents << "\n";
|
||||||
|
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(col - 1) << "";
|
||||||
|
LX::PrintStringAsColor("^", LX::Color::LIGHT_RED);
|
||||||
|
std::cout << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* InvalidCharInSource::ErrorType() const
|
||||||
|
{
|
||||||
|
return "Invalid char in source";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,13 +6,78 @@
|
|||||||
|
|
||||||
namespace LX
|
namespace LX
|
||||||
{
|
{
|
||||||
|
// Helper macro for the function below //
|
||||||
|
// Undefined after the function to stop accidental use //
|
||||||
|
#define TOKEN_CASE(type) case type: return #type;
|
||||||
|
|
||||||
|
// Static to stop use outside of this file as ToString should be used instead //
|
||||||
|
static std::string ToStringNoFormat(Token::TokenType type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
TOKEN_CASE(Token::STRING_LITERAL);
|
||||||
|
TOKEN_CASE(Token::IDENTIFIER);
|
||||||
|
TOKEN_CASE(Token::FOR);
|
||||||
|
TOKEN_CASE(Token::WHILE);
|
||||||
|
TOKEN_CASE(Token::IF);
|
||||||
|
TOKEN_CASE(Token::ELSE);
|
||||||
|
TOKEN_CASE(Token::ELIF);
|
||||||
|
TOKEN_CASE(Token::FUNCTION);
|
||||||
|
TOKEN_CASE(Token::ADD);
|
||||||
|
TOKEN_CASE(Token::SUB);
|
||||||
|
TOKEN_CASE(Token::MUL);
|
||||||
|
TOKEN_CASE(Token::DIV);
|
||||||
|
TOKEN_CASE(Token::NUMBER_LITERAL);
|
||||||
|
TOKEN_CASE(Token::RETURN);
|
||||||
|
TOKEN_CASE(Token::OPEN_BRACE);
|
||||||
|
TOKEN_CASE(Token::CLOSE_BRACE);
|
||||||
|
TOKEN_CASE(Token::OPEN_BRACKET);
|
||||||
|
TOKEN_CASE(Token::CLOSE_BRACKET);
|
||||||
|
TOKEN_CASE(Token::OPEN_PAREN);
|
||||||
|
TOKEN_CASE(Token::CLOSE_PAREN);
|
||||||
|
TOKEN_CASE(Token::ASSIGN);
|
||||||
|
TOKEN_CASE(Token::INT_DEC);
|
||||||
|
|
||||||
|
// Default just returns it as it's numerical value //
|
||||||
|
default: return "Unknown: " + std::to_string((int)type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef TOKEN_CASE
|
||||||
|
|
||||||
|
// Logging function to turn a tokentype enum val into a nicer formatted string than the base equivalent //
|
||||||
|
std::string ToString(Token::TokenType type)
|
||||||
|
{
|
||||||
|
// Gets the unformated version of the string //
|
||||||
|
std::string unformatted = ToStringNoFormat(type);
|
||||||
|
unformatted = unformatted.substr(7); // Removes the Token:: prefix
|
||||||
|
|
||||||
|
// Formats the string (turns to lowercase and replaces _ with a space //
|
||||||
|
std::string formatted;
|
||||||
|
|
||||||
|
for (char current : unformatted)
|
||||||
|
{
|
||||||
|
// Adding 32 makes it lowercase due to how ASCII works //
|
||||||
|
if ((current >= 'A' && current <= 'Z')) { formatted.push_back(current + 32); }
|
||||||
|
|
||||||
|
// Replaces _ with spaces //
|
||||||
|
else if (current == '_') { formatted.push_back(' '); }
|
||||||
|
|
||||||
|
// Else adds the current character //
|
||||||
|
else { formatted.push_back(current); }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the formatted string //
|
||||||
|
return formatted;
|
||||||
|
}
|
||||||
|
|
||||||
// Passes the constructor args to the values //
|
// Passes the constructor args to the values //
|
||||||
Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source)
|
Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source)
|
||||||
: type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length), contents(source.data() + index, length)
|
: type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length), contents(source.data() + index, length)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// This function used to have a use //
|
// This function used to have a use but now it is just a simple getter //
|
||||||
// Now it exists cause I'm lazy //
|
// Recommended to use in case of future changes //
|
||||||
std::string Token::GetContents() const
|
std::string Token::GetContents() const
|
||||||
{
|
{
|
||||||
return contents;
|
return contents;
|
||||||
|
|||||||
@@ -3,8 +3,6 @@ func main()
|
|||||||
int a
|
int a
|
||||||
a = 65465
|
a = 65465
|
||||||
|
|
||||||
/
|
|
||||||
|
|
||||||
int b
|
int b
|
||||||
b = 6
|
b = 6
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user