From c95d91b867876a940ce96f206b69713a0c5d9cb2 Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Tue, 22 Apr 2025 10:55:35 +0100 Subject: [PATCH] Changed how tokens are represented in memory Also made string-literal tokens now include the quotes as part of their contents. --- Frontend-Main.cpp | 5 +++-- Lexer/src/Lexer.cpp | 30 +++++++++++++++--------------- Lexer/src/Token.cpp | 13 +++++++++++-- Parser/src/Parser.cpp | 4 ++-- build-test/main.lx | 2 +- common/Lexer.h | 14 ++++++++------ 6 files changed, 40 insertions(+), 28 deletions(-) diff --git a/Frontend-Main.cpp b/Frontend-Main.cpp index b490ba1..80910af 100644 --- a/Frontend-Main.cpp +++ b/Frontend-Main.cpp @@ -44,6 +44,7 @@ int main(int argc, char** argv) // Creates the contents string outside of the try-catch so they can be used in errors // std::string contents; + LX::Token::source = &contents; // Creates the log-file out of the try-catch so it can be closed propely if an error is thrown // std::unique_ptr log = nullptr; @@ -212,9 +213,9 @@ int main(int argc, char** argv) std::cout << "\n"; // Prints the code with the error to the console // - std::string errorSquiggle(e.got.length, '^'); + std::string errorSquiggle(e.got.length, '~'); std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << e.got.line << " | " << line << "\n"; - std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column - 1) << ""; + std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column) << ""; LX::PrintStringAsColor(errorSquiggle, LX::Color::LIGHT_RED); std::cout << "\n"; diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 89d6f03..8b6e19c 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -114,13 +115,13 @@ namespace LX // Checks the map for a check and if so adds it with its enum equivalent // if (auto keyword = keywords.find(word); keyword != keywords.end()) { - tokens.push_back({ keyword->second, info, "", (std::streamsize)word.size() }); + tokens.push_back({ keyword->second, info, (std::streamsize)word.size() }); } // Else adds it as a type of IDENTIFIER // else { - tokens.push_back({ Token::IDENTIFIER, info, word, (std::streamsize)word.size() }); + tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size() }); } } @@ -182,7 +183,7 @@ namespace LX { // Adds the string literal token to the token vector // std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral); - tokens.push_back({ Token::STRING_LITERAL, info, lit, (std::streamsize)lit.length() + 1 }); + tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2 }); // Adding two makes the "" be stored as well // Updates trackers // info.inStringLiteral = false; @@ -212,7 +213,7 @@ namespace LX { // Pushes the number to the token vector. Number literals are stored as string in the tokens // std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() }); + tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() }); } // Stores it is lexing a number literal // @@ -224,7 +225,7 @@ namespace LX { // Pushes the number to the token vector. Number literals are stored as string in the tokens // std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); - tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() }); + tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() }); info.lexingNumber = false; // Stops storing it is lexing a number } @@ -259,7 +260,7 @@ namespace LX // Operators (+, -, /, *) // else if (auto op = operators.find(current); op != operators.end()) { - tokens.push_back({ op->second, info, "", 1 }); + tokens.push_back({ op->second, info, 1 }); } // If it is here and not whitespace that means it's an invalid character // @@ -323,15 +324,14 @@ namespace LX for (auto& token : tokens) { - if (token.contents.empty() == false) - { - SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.contents, "}"); - } - - else - { - SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", ToStringNoFormat(token.type)); - } + SafeLog + ( + log, std::left, + "{ Line: ", std::setw(3), token.line, + ", Index: ", std::setw(3), token.index, + ", Length: ", std::setw(2), token.length, " } ", + std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.GetContents(), "}" + ); } SafeLog(log, "\n END OF TOKENS"); diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp index a2bd3a4..01ebd0a 100644 --- a/Lexer/src/Token.cpp +++ b/Lexer/src/Token.cpp @@ -5,8 +5,17 @@ namespace LX { + // Creates the memory for the pointer to the source // + std::string* Token::source = nullptr; + // Passes the constructor args to the values // - Token::Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length) - : type(_type), contents(_contents), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length) + Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length) + : type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length) {} + + // + std::string Token::GetContents() const + { + return std::string(source->data() + index, length); + } } diff --git a/Parser/src/Parser.cpp b/Parser/src/Parser.cpp index 550da06..3980be9 100644 --- a/Parser/src/Parser.cpp +++ b/Parser/src/Parser.cpp @@ -43,7 +43,7 @@ namespace LX // Number literals just require them to be turned into an AST node // // Note: Number literals are stored as strings because i'm a masochist // case Token::NUMBER_LITERAL: - return std::make_unique(p.tokens[p.index++].contents); + return std::make_unique(p.tokens[p.index++].GetContents()); // Returns nullptr, the parsing function that recives that value will decide if that is valid // default: @@ -133,7 +133,7 @@ namespace LX // Assigns the function name // ExpectToken(p.tokens[p.index]); - func.name = p.tokens[p.index++].contents; + func.name = p.tokens[p.index++].GetContents(); // Loops over the body until it reaches the end // // TODO: Detect the end instead of looping over the entire token vector diff --git a/build-test/main.lx b/build-test/main.lx index 468ac24..de2dda0 100644 --- a/build-test/main.lx +++ b/build-test/main.lx @@ -1,2 +1,2 @@ func main - return 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + return 375 + "heloo there" diff --git a/common/Lexer.h b/common/Lexer.h index 24c97e5..4f1a7a3 100644 --- a/common/Lexer.h +++ b/common/Lexer.h @@ -62,6 +62,9 @@ namespace LX // Data type to store a more computer readable version of files struct __declspec(novtable) Token final { + // + static std::string* source; + // Enum to hold the type of the token // enum TokenType : short { @@ -87,13 +90,12 @@ namespace LX UNDEFINED = -1 }; - + // Constructor of the tokens to set their info // - Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length); + Token(const TokenType _type, const LexerInfo& info, std::streamsize _length); - // Contents of the token (may be empty if not needed) // - // Const to avoid external changes // - const std::string contents; + // + std::string GetContents() const; // Type of the token // // Const to avoid external changes // @@ -102,7 +104,7 @@ namespace LX // Index in the source of the token // const std::streamsize index; - // The length of the token on the line, may be different to the length of contents // + // The length of the token on the line // const std::streamsize length; // The line the token is located on //