Made Lexer errors fully hidden in global scope

Also improved general ease of use with debugging use __debugbreak when an error is thrown.

NOTE: Parser errors currently crash the program
This commit is contained in:
Pasha Bibko
2025-05-07 16:31:44 +01:00
parent 0c34e7174e
commit 4e78a9f6ae
11 changed files with 161 additions and 103 deletions

View File

@@ -159,6 +159,7 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\Console.cpp" />
<ClCompile Include="src\Error.cpp" />
<ClCompile Include="src\Logger.cpp" />
<ClCompile Include="src\pch.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>

View File

@@ -37,5 +37,8 @@
<ClCompile Include="src\Console.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\Error.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@@ -2,14 +2,41 @@ namespace LX
{
// Base error class for all LX thrown errors //
// Holds nothing apart from the v-table //
struct RuntimeError
struct COMMON_API RuntimeError
{
// Default constructor which throws a breakpoint on being created //
RuntimeError();
// Prints the error to the console //
// Include Common/Console.h for printing util functions //
virtual void PrintToConsole() const = 0;
// Returns a C-String of the type that was thrown //
virtual const char* ErrorType() const = 0;
// Virtual destructor because of polymorphism //
virtual ~RuntimeError() = default;
};
// --- Common errors that can be thrown --- //
// Error thrown when there is an invalid file path //
struct COMMON_API InvalidFilePath : public RuntimeError
{
// Constructor to turn the C++ types to C to expose them in DLL //
InvalidFilePath(const std::string& _name, const std::filesystem::path& path);
// Prints the error to the console //
void PrintToConsole() const;
// Returns the error as c-string //
const char* ErrorType() const;
// Name of the file that is invalid (used for console output) //
const char* name;
// The location of the file (used for console output) //
const char* fileLocation;
};
}

47
Common/src/Error.cpp Normal file
View File

@@ -0,0 +1,47 @@
#include <LX-Common.h>
namespace LX
{
RuntimeError::RuntimeError()
{
// Debuggers can only be attached in Debug configuration so this code is useless in Release/Distribution builds //
#ifdef _DEBUG
// Checks a debugger is present before throwing a breakpoint //
if (IsDebuggerPresent()) { __debugbreak(); }
#endif
}
InvalidFilePath::InvalidFilePath(const std::string& _name, const std::filesystem::path& path)
: name(nullptr), fileLocation(nullptr)
{
// Stores the actual strings in static memory //
static std::string s_ErrorPath;
static std::string s_ErrorName;
s_ErrorPath = path.string();
s_ErrorName = _name;
// Assigns a pointer to the c-string of the strings //
// Done like this because of how DLLs work //
fileLocation = s_ErrorPath.c_str();
name = s_ErrorName.c_str();
}
void InvalidFilePath::PrintToConsole() const
{
// Tells the user the input file could not be found and how to fix the issue //
LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED);
std::cout << "Invalid " << name << ": ";
LX::PrintStringAsColor(fileLocation, LX::Color::WHITE);
std::cout << "\n\nMake sure the file exists and the process has the correct path to the file\n";
}
const char* InvalidFilePath::ErrorType() const
{
return "Invalid File Path";
}
}

View File

@@ -13,9 +13,6 @@ namespace LX
// Data type to store a more computer readable version of files
struct __declspec(novtable) Token final
{
//
static std::string* source;
// Enum to hold the type of the token //
enum TokenType : short
{
@@ -59,7 +56,7 @@ namespace LX
};
// Constructor of the tokens to set their info //
Token(const TokenType _type, const LexerInfo& info, std::streamsize _length);
Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source);
// Works out the contents of the token and returns them as it is not stored in the token //
std::string GetContents() const;
@@ -79,11 +76,14 @@ namespace LX
// The column on the line where it is located //
const std::streamsize column;
// The contents of the token //
const std::string contents;
};
// Logging function to turn a tokentype enum val into it's string //
std::string ToString(Token::TokenType t);
// Lexer function to take in a file and output a vector of tokens //
const std::vector<Token> LexicalAnalyze(const std::string& contents, const std::streamsize len);
const std::vector<Token> LexicalAnalyze(const std::filesystem::path& path);
}

View File

@@ -3,67 +3,19 @@
#include <Parser.h>
#include <Lexer.h>
#include <../Lexer/inc/LexerErrors.h> // <- TEMP (I hope)
namespace LX
{
// Different errors thrown by main //
struct InvalidFilePath : public RuntimeError
{
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
InvalidFilePath(const std::string& _name, const std::filesystem::path& _path)
: name(_name), path(_path)
{}
std::string name;
std::filesystem::path path;
};
void InvalidFilePath::PrintToConsole() const
{
// Tells the user the input file could not be found and how to fix the issue //
LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED);
std::cout << "Invalid " << name << ": ";
LX::PrintStringAsColor(path.string().c_str(), LX::Color::WHITE);
std::cout << "\n\nMake sure the file exists and the process has the correct path to the file\n";
}
const char* InvalidFilePath::ErrorType() const
{
return "Invalid File Path";
}
}
extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_outPath)
{
// Initalises the log //
LX::Log::Init();
// Creates the file paths outside of the try-catch so they can be used in errors //
std::filesystem::path inpPath;
std::filesystem::path outPath;
// Creates the contents string outside of the try-catch so they can be used in errors //
std::string contents;
LX::Token::source = &contents;
try
{
// Initalises the log //
LX::Log::Init();
// Turns the file paths into the C++ type for handling them //
inpPath = a_inpPath;
outPath = a_outPath;
// Checks the input file exists and opens it //
LX::ThrowIf<LX::InvalidFilePath>(std::filesystem::exists(inpPath) == false, "input file path", inpPath);
std::ifstream inpFile(inpPath, std::ios::binary | std::ios::ate); // Opens in binary at the end for microptimisation //
LX::ThrowIf<LX::InvalidFilePath>(inpFile.is_open() == false, "input file path", inpPath);
// Copies the file into the string //
const std::streamsize len = inpFile.tellg(); // Gets length of file because it was opened at the end
inpFile.seekg(0, std::ios::beg); // Goes back to the beginning
contents = std::string(len, '\0'); // Allocates all the space for the string
inpFile.read(&contents[0], len); // Transfers file contents to string
std::filesystem::path inpPath = a_inpPath;
std::filesystem::path outPath = a_outPath;
// Opens / Creates the output file //
std::ofstream outFile(outPath);
@@ -74,14 +26,9 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_
std::cout << std::filesystem::absolute(inpPath) << " -> " << std::filesystem::absolute(outPath) << std::endl;
// Create tokens out of the input file //
LX::InvalidCharInSource::s_Source = &contents;
LX::InvalidCharInSource::s_SourceFile = &inpPath;
std::vector<LX::Token>tokens = LX::LexicalAnalyze(contents, len);
std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpPath);
// Turns the tokens into an AST //
LX::UnexpectedToken::s_Source = &contents;
LX::UnexpectedToken::s_SourceFile = &inpPath;
LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens);
// Turns the AST into LLVM IR //
@@ -111,6 +58,12 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_
// Catches any std errors, there should be none //
catch (std::exception& e)
{
// If using a debugger, throws a breakpoint so it can be caught //
if (IsDebuggerPresent())
{
__debugbreak();
}
// Logs the error. Does not need to close it as it is done after this function returns //
LX::Log::LogNewSection("std::exception thrown: ", e.what());
@@ -126,6 +79,12 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_
// Default catches any non-specified errors //
catch (...)
{
// If using a debugger, throws a breakpoint so it can be caught //
if (IsDebuggerPresent())
{
__debugbreak();
}
// Exit code -1 means an undefined error //
return -1;
}

View File

@@ -2,7 +2,7 @@
#include <LX-Common.h>
#include <Lexer.h>
#include <LexerInfo.h>
namespace LX
{
@@ -11,14 +11,13 @@ namespace LX
{
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid);
InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file);
static std::string* s_Source;
static std::filesystem::path* s_SourceFile;
std::string lineContents;
std::string file;
std::streamsize col;
std::streamsize line;
std::streamsize index;
char invalid;
};

View File

@@ -1,6 +1,6 @@
#include <LX-Common.h>
#pragma once
#include <Lexer.h>
#include <LX-Common.h>
namespace LX
{

View File

@@ -7,37 +7,34 @@
namespace LX
{
std::string* InvalidCharInSource::s_Source = nullptr;
std::filesystem::path* InvalidCharInSource::s_SourceFile = nullptr;
InvalidCharInSource::InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file)
: col(info.column), line(info.line), file(_file), lineContents{}, invalid(source[info.index])
{
// Gets the line the error is on //
lineContents = GetLineAtIndexOf(source, info.index);
}
InvalidCharInSource::InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid)
: col(_col), line(_line), index(_index), invalid(_invalid)
void InvalidCharInSource::PrintToConsole() const
{
// Calculates the length of the line number in the console so it is formatted correctly //
std::ostringstream oss;
oss << std::setw(3) << line;
size_t lineNumberWidthInConsole = std::max(oss.str().size(), (size_t)3);
// Gets the line of the error //
std::string errorLine = LX::GetLineAtIndexOf(*s_Source, index);
// Prints the error with the relevant information to the console //
std::cout << "\n";
LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED);
std::cout << "Invalid character found in ";
LX::PrintStringAsColor(s_SourceFile->filename().string(), LX::Color::WHITE);
LX::PrintStringAsColor(file, LX::Color::WHITE);
std::cout << " {";
LX::PrintStringAsColor(std::string(1, invalid), LX::Color::LIGHT_RED);
std::cout << "}:\n";
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << errorLine << "\n";
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << lineContents << "\n";
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(col - 1) << "";
LX::PrintStringAsColor("^", LX::Color::LIGHT_RED);
std::cout << "\n";
}
void InvalidCharInSource::PrintToConsole() const
{}
const char* InvalidCharInSource::ErrorType() const
{
return "Invalid char in source";
@@ -163,23 +160,48 @@ namespace LX
};
// Checks if the given word is a keyword before adding it to the tokens //
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens, LexerInfo& info)
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens, LexerInfo& info, const std::string& contents)
{
// Checks the map for a check and if so adds it with its enum equivalent //
if (auto keyword = keywords.find(word); keyword != keywords.end())
{
tokens.push_back({ keyword->second, info, (std::streamsize)word.size() });
tokens.push_back({ keyword->second, info, (std::streamsize)word.size(), contents });
}
// Else adds it as a type of IDENTIFIER //
else
{
tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size() });
tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size(), contents });
}
}
const std::vector<Token> LX::LexicalAnalyze(const std::string& contents, std::streamsize len)
static std::string ReadFileToString(const std::filesystem::path& path)
{
// Verifies the file path is valid //
ThrowIf<LX::InvalidFilePath>(std::filesystem::exists(path) == false, "input file path", path);
// Opens the file //
std::ifstream file(path, std::ios::binary | std::ios::ate); // Opens in binary and at the end (microptimsation)
ThrowIf<LX::InvalidFilePath>(file.is_open() == false, "input file path", path);
// Stores the length of the string and goes back to the beginning //
const std::streamsize len = file.tellg(); // tellg returns length because it was opened at the end
file.seekg(0, std::ios::beg);
// Transfers the file contents to the output //
std::string contents(len, '\0'); // Allocates an empty string which is the size of the file
file.read(&contents[0], len);
return contents;
}
const std::vector<Token> LX::LexicalAnalyze(const std::filesystem::path& path)
{
// Logs that the file is being read //
Log::LogNewSection("Reading file: ", path.string());
std::string contents = ReadFileToString(path);
const std::streamsize len = contents.length();
// Logs the start of the lexical analysis
Log::LogNewSection("Lexing file");
@@ -236,7 +258,7 @@ namespace LX
{
// Adds the string literal token to the token vector //
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2 }); // Adding two makes the "" be stored as well
tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2, contents }); // Adding two makes the "" be stored as well
// Updates trackers //
info.inStringLiteral = false;
@@ -266,7 +288,7 @@ namespace LX
{
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents });
}
// Stores it is lexing a number literal //
@@ -278,7 +300,7 @@ namespace LX
{
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents });
info.lexingNumber = false; // Stops storing it is lexing a number
}
@@ -296,7 +318,7 @@ namespace LX
if (info.isNextCharAlpha == false)
{
// Calls the function designed to handle the tokenisation of words //
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens, info);
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens, info, contents);
}
}
@@ -304,7 +326,7 @@ namespace LX
else if (info.isAlpha == true && info.isNextCharAlpha == false)
{
// Calls the function designed to handle the tokenisation of words //
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info);
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info, contents);
}
// During a word //
@@ -313,13 +335,13 @@ namespace LX
// Symbols //
else if (auto sym = symbols.find(current); sym != symbols.end())
{
tokens.push_back({ sym->second, info, 1 });
tokens.push_back({ sym->second, info, 1, contents });
}
// Operators (+, -, /, *) //
else if (auto op = operators.find(current); op != operators.end())
{
tokens.push_back({ op->second, info, 1 });
tokens.push_back({ op->second, info, 1, contents });
}
// If it is here and not whitespace that means it's an invalid character //
@@ -342,7 +364,7 @@ namespace LX
// Throws an error with all the relevant information //
else
{
ThrowIf<InvalidCharInSource>(true, info.column, info.line, info.index, contents[info.index]);
ThrowIf<InvalidCharInSource>(true, info, contents, path.string());
}
// Log dumps A LOT of info //

View File

@@ -6,17 +6,15 @@
namespace LX
{
// Creates the memory for the pointer to the source //
std::string* Token::source = nullptr;
// Passes the constructor args to the values //
Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length)
: type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length)
Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source)
: type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length), contents(source.data() + index, length)
{}
//
// This function used to have a use //
// Now it exists cause I'm lazy //
std::string Token::GetContents() const
{
return std::string(source->data() + index, length);
return contents;
}
}

View File

@@ -3,6 +3,8 @@ func main()
int a
a = 65465
/
int b
b = 6