From feea149cc17b9b52ca9c50b90dc4eae4672c9f7c Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Mon, 21 Apr 2025 15:40:46 +0100 Subject: [PATCH] Made lexer support floating point numbers Wow I didn't think the lexer could get worse --- Lexer/src/Lexer.cpp | 97 ++++++++++++++++++++++++++++++++------------- Main.cpp | 3 ++ common/Util.h | 8 ++++ 3 files changed, 81 insertions(+), 27 deletions(-) diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 65afcb6..fa76f55 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -40,6 +40,22 @@ namespace LX } } + static constexpr bool CanBePartOfNumberLiteral(const char c) + { + return (c == '.') || (c == 'f'); + } + + static std::string PrintChar(const char c) + { + switch (c) + { + case '\n': return R"(\n)"; + case '\t': return R"(\t)"; + case '\r': return R"(\r)"; + default: return std::string(1, c); + } + } + // Struct to store the current information of the lexer // struct LexerInfo { @@ -66,6 +82,7 @@ namespace LX bool isNextCharNumeric : 1 = false; bool wasLastCharAlpha : 1 = false; bool wasLastCharNumeric : 1 = false; + bool lexingNumber : 1 = false; }; // All the keywords the lexer currently supports with their token-enum equivalents // @@ -143,7 +160,7 @@ namespace LX // Sets flags depending on the value of the next character // info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z'); - info.isNextCharNumeric = (next >= '0' && next <= '9'); + info.isNextCharNumeric = (next >= '0' && next <= '9') || CanBePartOfNumberLiteral(next); } else @@ -192,6 +209,37 @@ namespace LX // Skips over if within a comment // else if (info.inComment); + // Start of a number // + else if (info.isNumeric == true && info.wasLastCharNumeric == false && info.lexingNumber == false) + { + // Stores the start of the number // + info.startOfNumberLiteral = info.index; + + // Checks if it as the end (single char numbers) // + if (info.isNextCharNumeric == false) + { + // Pushes the number to the token vector. Number literals are stored as string in the tokens // + std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); + tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() }); + } + + // Stores it is lexing a number literal // + else { info.lexingNumber = true; } + } + + // End of a number // + else if ((info.isNumeric == true || CanBePartOfNumberLiteral(current)) && info.isNextCharNumeric == false && info.lexingNumber == true) + { + // Pushes the number to the token vector. Number literals are stored as string in the tokens // + std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); + tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() }); + info.lexingNumber = false; // Stops storing it is lexing a number + } + + // During a number // + else if (info.isNumeric == true); + else if (info.lexingNumber == true && CanBePartOfNumberLiteral(current)); + // Start of a word // else if (info.isAlpha == true && info.wasLastCharAlpha == false) { @@ -216,32 +264,6 @@ namespace LX // During a word // else if (info.isAlpha == true); - // Start of a number // - else if (info.isNumeric == true && info.wasLastCharNumeric == false) - { - // Stores the start of the number // - info.startOfNumberLiteral = info.index; - - // Checks if it as the end (single char numbers) // - if (info.isNextCharNumeric == false) - { - // Pushes the number to the token vector. Number literals are stored as string in the tokens // - std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()}); - } - } - - // End of a number // - else if (info.isNumeric == true && info.isNextCharNumeric == false) - { - // Pushes the number to the token vector. Number literals are stored as string in the tokens // - std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()}); - } - - // During a number // - else if (info.isNumeric == true); - // Operators (+, -, /, *) // else if (auto op = operators.find(current); op != operators.end()) { @@ -265,6 +287,7 @@ namespace LX info.line++; } + // Throws an error with all the relevant information //s else { // Finds the start of the line // @@ -283,6 +306,22 @@ namespace LX throw InvalidCharInSource(info.column, info.line, line, contents[info.index]); } + // Log dumps A LOT of info // + + SafeLog + ( + log, + "Is Alpha: ", info.isAlpha, + " Is Numeric: ", info.isNumeric, + " In Comment: ", info.inComment, + " In String: ", info.inStringLiteral, + " Next Char Alpha: ", info.isNextCharAlpha, + " Next Char Numeric: ", info.wasLastCharNumeric, + " Last Char Numeric: ", info.wasLastCharAlpha, + " Lexing number: ", info.lexingNumber, + " Current: {", PrintChar(current), "}" + ); + // Updates trackers to their default state of a new character // info.index++; @@ -295,6 +334,8 @@ namespace LX // Logs the tokens if logging is on // if (log != nullptr) { + SafeLog(log, LOG_BREAK, "Tokens", LOG_BREAK); + for (auto& token : tokens) { if (token.contents.empty() == false) @@ -307,6 +348,8 @@ namespace LX SafeLog(log, "{ Line: ", std::left, std::setw(3), token.line, ", Column: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, "} ", ToString(token.type)); } } + + SafeLog(log, "\n END OF TOKENS"); } // Shrinks the vector down to minimum size before returning to avoid excess memory being allocated diff --git a/Main.cpp b/Main.cpp index 4f88176..2ba0a7c 100644 --- a/Main.cpp +++ b/Main.cpp @@ -60,12 +60,15 @@ int main(int argc, char** argv) // Create tokens out of the input file // std::vectortokens = LX::LexicalAnalyze(inpFile, log.get()); + LX::SafeFlush(log.get()); // Turns the tokens into an AST // LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get()); + LX::SafeFlush(log.get()); // Turns the AST into LLVM IR // LX::GenerateIR(AST, inpPath.filename().string()); + LX::SafeFlush(log.get()); // Returns success return 0; diff --git a/common/Util.h b/common/Util.h index 1cf48fd..88db0da 100644 --- a/common/Util.h +++ b/common/Util.h @@ -18,6 +18,14 @@ namespace LX if (log != nullptr) { (*log << ... << args); *log << "\n"; } } + inline void SafeFlush(std::ofstream* log) + { + if (log != nullptr) + { + log->flush(); + } + } + // Gives a standard way to mark a change between different sections within the log output // constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n"; }