From 4e78a9f6aef902f5a9741c68b801fd8e45d00b6d Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Wed, 7 May 2025 16:31:44 +0100 Subject: [PATCH] Made Lexer errors fully hidden in global scope Also improved general ease of use with debugging use __debugbreak when an error is thrown. NOTE: Parser errors currently crash the program --- Common/Common.vcxproj | 1 + Common/Common.vcxproj.filters | 3 ++ Common/inc/Error.h | 29 ++++++++++++- Common/src/Error.cpp | 47 +++++++++++++++++++++ IR-Generator/inc/Lexer.h | 10 ++--- IR-Generator/src/Generator.cpp | 77 ++++++++-------------------------- Lexer/inc/LexerErrors.h | 9 ++-- Lexer/inc/LexerInfo.h | 4 +- Lexer/src/Lexer.cpp | 70 ++++++++++++++++++++----------- Lexer/src/Token.cpp | 12 +++--- example/main.lx | 2 + 11 files changed, 161 insertions(+), 103 deletions(-) create mode 100644 Common/src/Error.cpp diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 8744c21..2f88c59 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -159,6 +159,7 @@ + Create diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 333be8b..6bc7020 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -37,5 +37,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/Common/inc/Error.h b/Common/inc/Error.h index bb7e8ac..ee72708 100644 --- a/Common/inc/Error.h +++ b/Common/inc/Error.h @@ -2,14 +2,41 @@ namespace LX { // Base error class for all LX thrown errors // // Holds nothing apart from the v-table // - struct RuntimeError + struct COMMON_API RuntimeError { + // Default constructor which throws a breakpoint on being created // + RuntimeError(); + // Prints the error to the console // // Include Common/Console.h for printing util functions // virtual void PrintToConsole() const = 0; // Returns a C-String of the type that was thrown // virtual const char* ErrorType() const = 0; + + // Virtual destructor because of polymorphism // + virtual ~RuntimeError() = default; + }; + + // --- Common errors that can be thrown --- // + + // Error thrown when there is an invalid file path // + struct COMMON_API InvalidFilePath : public RuntimeError + { + // Constructor to turn the C++ types to C to expose them in DLL // + InvalidFilePath(const std::string& _name, const std::filesystem::path& path); + + // Prints the error to the console // + void PrintToConsole() const; + + // Returns the error as c-string // + const char* ErrorType() const; + + // Name of the file that is invalid (used for console output) // + const char* name; + + // The location of the file (used for console output) // + const char* fileLocation; }; } diff --git a/Common/src/Error.cpp b/Common/src/Error.cpp new file mode 100644 index 0000000..05e1efc --- /dev/null +++ b/Common/src/Error.cpp @@ -0,0 +1,47 @@ +#include + +namespace LX +{ + RuntimeError::RuntimeError() + { + // Debuggers can only be attached in Debug configuration so this code is useless in Release/Distribution builds // + #ifdef _DEBUG + + // Checks a debugger is present before throwing a breakpoint // + if (IsDebuggerPresent()) { __debugbreak(); } + + #endif + } + + InvalidFilePath::InvalidFilePath(const std::string& _name, const std::filesystem::path& path) + : name(nullptr), fileLocation(nullptr) + { + // Stores the actual strings in static memory // + + static std::string s_ErrorPath; + static std::string s_ErrorName; + + s_ErrorPath = path.string(); + s_ErrorName = _name; + + // Assigns a pointer to the c-string of the strings // + // Done like this because of how DLLs work // + + fileLocation = s_ErrorPath.c_str(); + name = s_ErrorName.c_str(); + } + + void InvalidFilePath::PrintToConsole() const + { + // Tells the user the input file could not be found and how to fix the issue // + LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED); + std::cout << "Invalid " << name << ": "; + LX::PrintStringAsColor(fileLocation, LX::Color::WHITE); + std::cout << "\n\nMake sure the file exists and the process has the correct path to the file\n"; + } + + const char* InvalidFilePath::ErrorType() const + { + return "Invalid File Path"; + } +} diff --git a/IR-Generator/inc/Lexer.h b/IR-Generator/inc/Lexer.h index 3d29019..64ca3e2 100644 --- a/IR-Generator/inc/Lexer.h +++ b/IR-Generator/inc/Lexer.h @@ -13,9 +13,6 @@ namespace LX // Data type to store a more computer readable version of files struct __declspec(novtable) Token final { - // - static std::string* source; - // Enum to hold the type of the token // enum TokenType : short { @@ -59,7 +56,7 @@ namespace LX }; // Constructor of the tokens to set their info // - Token(const TokenType _type, const LexerInfo& info, std::streamsize _length); + Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source); // Works out the contents of the token and returns them as it is not stored in the token // std::string GetContents() const; @@ -79,11 +76,14 @@ namespace LX // The column on the line where it is located // const std::streamsize column; + + // The contents of the token // + const std::string contents; }; // Logging function to turn a tokentype enum val into it's string // std::string ToString(Token::TokenType t); // Lexer function to take in a file and output a vector of tokens // - const std::vector LexicalAnalyze(const std::string& contents, const std::streamsize len); + const std::vector LexicalAnalyze(const std::filesystem::path& path); } diff --git a/IR-Generator/src/Generator.cpp b/IR-Generator/src/Generator.cpp index 92002b7..960f8f0 100644 --- a/IR-Generator/src/Generator.cpp +++ b/IR-Generator/src/Generator.cpp @@ -3,67 +3,19 @@ #include #include -#include <../Lexer/inc/LexerErrors.h> // <- TEMP (I hope) - -namespace LX -{ - // Different errors thrown by main // - struct InvalidFilePath : public RuntimeError - { - GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS; - - InvalidFilePath(const std::string& _name, const std::filesystem::path& _path) - : name(_name), path(_path) - {} - - std::string name; - std::filesystem::path path; - }; - - void InvalidFilePath::PrintToConsole() const - { - // Tells the user the input file could not be found and how to fix the issue // - LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED); - std::cout << "Invalid " << name << ": "; - LX::PrintStringAsColor(path.string().c_str(), LX::Color::WHITE); - std::cout << "\n\nMake sure the file exists and the process has the correct path to the file\n"; - } - - const char* InvalidFilePath::ErrorType() const - { - return "Invalid File Path"; - } -} - extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_outPath) { // Initalises the log // LX::Log::Init(); - // Creates the file paths outside of the try-catch so they can be used in errors // - std::filesystem::path inpPath; - std::filesystem::path outPath; - - // Creates the contents string outside of the try-catch so they can be used in errors // - std::string contents; - LX::Token::source = &contents; - try { + // Initalises the log // + LX::Log::Init(); + // Turns the file paths into the C++ type for handling them // - inpPath = a_inpPath; - outPath = a_outPath; - - // Checks the input file exists and opens it // - LX::ThrowIf(std::filesystem::exists(inpPath) == false, "input file path", inpPath); - std::ifstream inpFile(inpPath, std::ios::binary | std::ios::ate); // Opens in binary at the end for microptimisation // - LX::ThrowIf(inpFile.is_open() == false, "input file path", inpPath); - - // Copies the file into the string // - const std::streamsize len = inpFile.tellg(); // Gets length of file because it was opened at the end - inpFile.seekg(0, std::ios::beg); // Goes back to the beginning - contents = std::string(len, '\0'); // Allocates all the space for the string - inpFile.read(&contents[0], len); // Transfers file contents to string + std::filesystem::path inpPath = a_inpPath; + std::filesystem::path outPath = a_outPath; // Opens / Creates the output file // std::ofstream outFile(outPath); @@ -74,14 +26,9 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_ std::cout << std::filesystem::absolute(inpPath) << " -> " << std::filesystem::absolute(outPath) << std::endl; // Create tokens out of the input file // - LX::InvalidCharInSource::s_Source = &contents; - LX::InvalidCharInSource::s_SourceFile = &inpPath; - std::vectortokens = LX::LexicalAnalyze(contents, len); + std::vectortokens = LX::LexicalAnalyze(inpPath); // Turns the tokens into an AST // - LX::UnexpectedToken::s_Source = &contents; - LX::UnexpectedToken::s_SourceFile = &inpPath; - LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens); // Turns the AST into LLVM IR // @@ -111,6 +58,12 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_ // Catches any std errors, there should be none // catch (std::exception& e) { + // If using a debugger, throws a breakpoint so it can be caught // + if (IsDebuggerPresent()) + { + __debugbreak(); + } + // Logs the error. Does not need to close it as it is done after this function returns // LX::Log::LogNewSection("std::exception thrown: ", e.what()); @@ -126,6 +79,12 @@ extern "C" int __declspec(dllexport) GenIR(const char* a_inpPath, const char* a_ // Default catches any non-specified errors // catch (...) { + // If using a debugger, throws a breakpoint so it can be caught // + if (IsDebuggerPresent()) + { + __debugbreak(); + } + // Exit code -1 means an undefined error // return -1; } diff --git a/Lexer/inc/LexerErrors.h b/Lexer/inc/LexerErrors.h index 94a314e..baf40ac 100644 --- a/Lexer/inc/LexerErrors.h +++ b/Lexer/inc/LexerErrors.h @@ -2,7 +2,7 @@ #include -#include +#include namespace LX { @@ -11,14 +11,13 @@ namespace LX { GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS; - InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid); + InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file); - static std::string* s_Source; - static std::filesystem::path* s_SourceFile; + std::string lineContents; + std::string file; std::streamsize col; std::streamsize line; - std::streamsize index; char invalid; }; diff --git a/Lexer/inc/LexerInfo.h b/Lexer/inc/LexerInfo.h index f50efbd..86b29a9 100644 --- a/Lexer/inc/LexerInfo.h +++ b/Lexer/inc/LexerInfo.h @@ -1,6 +1,6 @@ -#include +#pragma once -#include +#include namespace LX { diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 1553404..2a89c66 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -7,37 +7,34 @@ namespace LX { - std::string* InvalidCharInSource::s_Source = nullptr; - std::filesystem::path* InvalidCharInSource::s_SourceFile = nullptr; + InvalidCharInSource::InvalidCharInSource(const LexerInfo& info, const std::string& source, const std::string _file) + : col(info.column), line(info.line), file(_file), lineContents{}, invalid(source[info.index]) + { + // Gets the line the error is on // + lineContents = GetLineAtIndexOf(source, info.index); + } - InvalidCharInSource::InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid) - : col(_col), line(_line), index(_index), invalid(_invalid) + void InvalidCharInSource::PrintToConsole() const { // Calculates the length of the line number in the console so it is formatted correctly // std::ostringstream oss; oss << std::setw(3) << line; size_t lineNumberWidthInConsole = std::max(oss.str().size(), (size_t)3); - // Gets the line of the error // - std::string errorLine = LX::GetLineAtIndexOf(*s_Source, index); - // Prints the error with the relevant information to the console // std::cout << "\n"; LX::PrintStringAsColor("Error: ", LX::Color::LIGHT_RED); std::cout << "Invalid character found in "; - LX::PrintStringAsColor(s_SourceFile->filename().string(), LX::Color::WHITE); + LX::PrintStringAsColor(file, LX::Color::WHITE); std::cout << " {"; LX::PrintStringAsColor(std::string(1, invalid), LX::Color::LIGHT_RED); std::cout << "}:\n"; - std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << errorLine << "\n"; + std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << line << " | " << lineContents << "\n"; std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(col - 1) << ""; LX::PrintStringAsColor("^", LX::Color::LIGHT_RED); std::cout << "\n"; } - void InvalidCharInSource::PrintToConsole() const - {} - const char* InvalidCharInSource::ErrorType() const { return "Invalid char in source"; @@ -163,23 +160,48 @@ namespace LX }; // Checks if the given word is a keyword before adding it to the tokens // - static void TokenizeWord(const std::string& word, std::vector& tokens, LexerInfo& info) + static void TokenizeWord(const std::string& word, std::vector& tokens, LexerInfo& info, const std::string& contents) { // Checks the map for a check and if so adds it with its enum equivalent // if (auto keyword = keywords.find(word); keyword != keywords.end()) { - tokens.push_back({ keyword->second, info, (std::streamsize)word.size() }); + tokens.push_back({ keyword->second, info, (std::streamsize)word.size(), contents }); } // Else adds it as a type of IDENTIFIER // else { - tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size() }); + tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size(), contents }); } } - const std::vector LX::LexicalAnalyze(const std::string& contents, std::streamsize len) + static std::string ReadFileToString(const std::filesystem::path& path) { + // Verifies the file path is valid // + ThrowIf(std::filesystem::exists(path) == false, "input file path", path); + + // Opens the file // + std::ifstream file(path, std::ios::binary | std::ios::ate); // Opens in binary and at the end (microptimsation) + ThrowIf(file.is_open() == false, "input file path", path); + + // Stores the length of the string and goes back to the beginning // + const std::streamsize len = file.tellg(); // tellg returns length because it was opened at the end + file.seekg(0, std::ios::beg); + + // Transfers the file contents to the output // + std::string contents(len, '\0'); // Allocates an empty string which is the size of the file + file.read(&contents[0], len); + return contents; + } + + const std::vector LX::LexicalAnalyze(const std::filesystem::path& path) + { + // Logs that the file is being read // + Log::LogNewSection("Reading file: ", path.string()); + + std::string contents = ReadFileToString(path); + const std::streamsize len = contents.length(); + // Logs the start of the lexical analysis Log::LogNewSection("Lexing file"); @@ -236,7 +258,7 @@ namespace LX { // Adds the string literal token to the token vector // std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral); - tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2 }); // Adding two makes the "" be stored as well + tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2, contents }); // Adding two makes the "" be stored as well // Updates trackers // info.inStringLiteral = false; @@ -266,7 +288,7 @@ namespace LX { // Pushes the number to the token vector. Number literals are stored as string in the tokens // std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() }); + tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents }); } // Stores it is lexing a number literal // @@ -278,7 +300,7 @@ namespace LX { // Pushes the number to the token vector. Number literals are stored as string in the tokens // std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() }); + tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size(), contents }); info.lexingNumber = false; // Stops storing it is lexing a number } @@ -296,7 +318,7 @@ namespace LX if (info.isNextCharAlpha == false) { // Calls the function designed to handle the tokenisation of words // - TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens, info); + TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens, info, contents); } } @@ -304,7 +326,7 @@ namespace LX else if (info.isAlpha == true && info.isNextCharAlpha == false) { // Calls the function designed to handle the tokenisation of words // - TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info); + TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens, info, contents); } // During a word // @@ -313,13 +335,13 @@ namespace LX // Symbols // else if (auto sym = symbols.find(current); sym != symbols.end()) { - tokens.push_back({ sym->second, info, 1 }); + tokens.push_back({ sym->second, info, 1, contents }); } // Operators (+, -, /, *) // else if (auto op = operators.find(current); op != operators.end()) { - tokens.push_back({ op->second, info, 1 }); + tokens.push_back({ op->second, info, 1, contents }); } // If it is here and not whitespace that means it's an invalid character // @@ -342,7 +364,7 @@ namespace LX // Throws an error with all the relevant information // else { - ThrowIf(true, info.column, info.line, info.index, contents[info.index]); + ThrowIf(true, info, contents, path.string()); } // Log dumps A LOT of info // diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp index 1c30c43..ee54024 100644 --- a/Lexer/src/Token.cpp +++ b/Lexer/src/Token.cpp @@ -6,17 +6,15 @@ namespace LX { - // Creates the memory for the pointer to the source // - std::string* Token::source = nullptr; - // Passes the constructor args to the values // - Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length) - : type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length) + Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length, const std::string& source) + : type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length), contents(source.data() + index, length) {} - // + // This function used to have a use // + // Now it exists cause I'm lazy // std::string Token::GetContents() const { - return std::string(source->data() + index, length); + return contents; } } diff --git a/example/main.lx b/example/main.lx index d56ce30..5b58e0e 100644 --- a/example/main.lx +++ b/example/main.lx @@ -3,6 +3,8 @@ func main() int a a = 65465 + / + int b b = 6