Changed how tokens are represented in memory

Also made string-literal tokens now include the quotes as part of their contents.
This commit is contained in:
Pasha Bibko
2025-04-22 10:55:35 +01:00
parent 4b47e803ce
commit c95d91b867
6 changed files with 40 additions and 28 deletions

View File

@@ -44,6 +44,7 @@ int main(int argc, char** argv)
// Creates the contents string outside of the try-catch so they can be used in errors // // Creates the contents string outside of the try-catch so they can be used in errors //
std::string contents; std::string contents;
LX::Token::source = &contents;
// Creates the log-file out of the try-catch so it can be closed propely if an error is thrown // // Creates the log-file out of the try-catch so it can be closed propely if an error is thrown //
std::unique_ptr<std::ofstream> log = nullptr; std::unique_ptr<std::ofstream> log = nullptr;
@@ -212,9 +213,9 @@ int main(int argc, char** argv)
std::cout << "\n"; std::cout << "\n";
// Prints the code with the error to the console // // Prints the code with the error to the console //
std::string errorSquiggle(e.got.length, '^'); std::string errorSquiggle(e.got.length, '~');
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << e.got.line << " | " << line << "\n"; std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << e.got.line << " | " << line << "\n";
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column - 1) << ""; std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column) << "";
LX::PrintStringAsColor(errorSquiggle, LX::Color::LIGHT_RED); LX::PrintStringAsColor(errorSquiggle, LX::Color::LIGHT_RED);
std::cout << "\n"; std::cout << "\n";

View File

@@ -5,6 +5,7 @@
#include <string_view> #include <string_view>
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include <string>
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
@@ -114,13 +115,13 @@ namespace LX
// Checks the map for a check and if so adds it with its enum equivalent // // Checks the map for a check and if so adds it with its enum equivalent //
if (auto keyword = keywords.find(word); keyword != keywords.end()) if (auto keyword = keywords.find(word); keyword != keywords.end())
{ {
tokens.push_back({ keyword->second, info, "", (std::streamsize)word.size() }); tokens.push_back({ keyword->second, info, (std::streamsize)word.size() });
} }
// Else adds it as a type of IDENTIFIER // // Else adds it as a type of IDENTIFIER //
else else
{ {
tokens.push_back({ Token::IDENTIFIER, info, word, (std::streamsize)word.size() }); tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size() });
} }
} }
@@ -182,7 +183,7 @@ namespace LX
{ {
// Adds the string literal token to the token vector // // Adds the string literal token to the token vector //
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral); std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
tokens.push_back({ Token::STRING_LITERAL, info, lit, (std::streamsize)lit.length() + 1 }); tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2 }); // Adding two makes the "" be stored as well
// Updates trackers // // Updates trackers //
info.inStringLiteral = false; info.inStringLiteral = false;
@@ -212,7 +213,7 @@ namespace LX
{ {
// Pushes the number to the token vector. Number literals are stored as string in the tokens // // Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() }); tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
} }
// Stores it is lexing a number literal // // Stores it is lexing a number literal //
@@ -224,7 +225,7 @@ namespace LX
{ {
// Pushes the number to the token vector. Number literals are stored as string in the tokens // // Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() }); tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
info.lexingNumber = false; // Stops storing it is lexing a number info.lexingNumber = false; // Stops storing it is lexing a number
} }
@@ -259,7 +260,7 @@ namespace LX
// Operators (+, -, /, *) // // Operators (+, -, /, *) //
else if (auto op = operators.find(current); op != operators.end()) else if (auto op = operators.find(current); op != operators.end())
{ {
tokens.push_back({ op->second, info, "", 1 }); tokens.push_back({ op->second, info, 1 });
} }
// If it is here and not whitespace that means it's an invalid character // // If it is here and not whitespace that means it's an invalid character //
@@ -323,15 +324,14 @@ namespace LX
for (auto& token : tokens) for (auto& token : tokens)
{ {
if (token.contents.empty() == false) SafeLog
{ (
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.contents, "}"); log, std::left,
} "{ Line: ", std::setw(3), token.line,
", Index: ", std::setw(3), token.index,
else ", Length: ", std::setw(2), token.length, " } ",
{ std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.GetContents(), "}"
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", ToStringNoFormat(token.type)); );
}
} }
SafeLog(log, "\n END OF TOKENS"); SafeLog(log, "\n END OF TOKENS");

View File

@@ -5,8 +5,17 @@
namespace LX namespace LX
{ {
// Creates the memory for the pointer to the source //
std::string* Token::source = nullptr;
// Passes the constructor args to the values // // Passes the constructor args to the values //
Token::Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length) Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length)
: type(_type), contents(_contents), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length) : type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length)
{} {}
//
std::string Token::GetContents() const
{
return std::string(source->data() + index, length);
}
} }

View File

@@ -43,7 +43,7 @@ namespace LX
// Number literals just require them to be turned into an AST node // // Number literals just require them to be turned into an AST node //
// Note: Number literals are stored as strings because i'm a masochist // // Note: Number literals are stored as strings because i'm a masochist //
case Token::NUMBER_LITERAL: case Token::NUMBER_LITERAL:
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index++].contents); return std::make_unique<AST::NumberLiteral>(p.tokens[p.index++].GetContents());
// Returns nullptr, the parsing function that recives that value will decide if that is valid // // Returns nullptr, the parsing function that recives that value will decide if that is valid //
default: default:
@@ -133,7 +133,7 @@ namespace LX
// Assigns the function name // // Assigns the function name //
ExpectToken<Token::IDENTIFIER>(p.tokens[p.index]); ExpectToken<Token::IDENTIFIER>(p.tokens[p.index]);
func.name = p.tokens[p.index++].contents; func.name = p.tokens[p.index++].GetContents();
// Loops over the body until it reaches the end // // Loops over the body until it reaches the end //
// TODO: Detect the end instead of looping over the entire token vector // TODO: Detect the end instead of looping over the entire token vector

View File

@@ -1,2 +1,2 @@
func main func main
return 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 return 375 + "heloo there"

View File

@@ -62,6 +62,9 @@ namespace LX
// Data type to store a more computer readable version of files // Data type to store a more computer readable version of files
struct __declspec(novtable) Token final struct __declspec(novtable) Token final
{ {
//
static std::string* source;
// Enum to hold the type of the token // // Enum to hold the type of the token //
enum TokenType : short enum TokenType : short
{ {
@@ -87,13 +90,12 @@ namespace LX
UNDEFINED = -1 UNDEFINED = -1
}; };
// Constructor of the tokens to set their info // // Constructor of the tokens to set their info //
Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length); Token(const TokenType _type, const LexerInfo& info, std::streamsize _length);
// Contents of the token (may be empty if not needed) // //
// Const to avoid external changes // std::string GetContents() const;
const std::string contents;
// Type of the token // // Type of the token //
// Const to avoid external changes // // Const to avoid external changes //
@@ -102,7 +104,7 @@ namespace LX
// Index in the source of the token // // Index in the source of the token //
const std::streamsize index; const std::streamsize index;
// The length of the token on the line, may be different to the length of contents // // The length of the token on the line //
const std::streamsize length; const std::streamsize length;
// The line the token is located on // // The line the token is located on //