Changed how tokens are represented in memory

Also made string-literal tokens now include the quotes as part of their contents.
This commit is contained in:
Pasha Bibko
2025-04-22 10:55:35 +01:00
parent 4b47e803ce
commit c95d91b867
6 changed files with 40 additions and 28 deletions

View File

@@ -44,6 +44,7 @@ int main(int argc, char** argv)
// Creates the contents string outside of the try-catch so they can be used in errors //
std::string contents;
LX::Token::source = &contents;
// Creates the log-file out of the try-catch so it can be closed propely if an error is thrown //
std::unique_ptr<std::ofstream> log = nullptr;
@@ -212,9 +213,9 @@ int main(int argc, char** argv)
std::cout << "\n";
// Prints the code with the error to the console //
std::string errorSquiggle(e.got.length, '^');
std::string errorSquiggle(e.got.length, '~');
std::cout << "Line: " << std::setw(lineNumberWidthInConsole) << e.got.line << " | " << line << "\n";
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column - 1) << "";
std::cout << " " << std::setw(lineNumberWidthInConsole) << "" << " | " << std::setw(e.got.column) << "";
LX::PrintStringAsColor(errorSquiggle, LX::Color::LIGHT_RED);
std::cout << "\n";

View File

@@ -5,6 +5,7 @@
#include <string_view>
#include <fstream>
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
@@ -114,13 +115,13 @@ namespace LX
// Checks the map for a check and if so adds it with its enum equivalent //
if (auto keyword = keywords.find(word); keyword != keywords.end())
{
tokens.push_back({ keyword->second, info, "", (std::streamsize)word.size() });
tokens.push_back({ keyword->second, info, (std::streamsize)word.size() });
}
// Else adds it as a type of IDENTIFIER //
else
{
tokens.push_back({ Token::IDENTIFIER, info, word, (std::streamsize)word.size() });
tokens.push_back({ Token::IDENTIFIER, info, (std::streamsize)word.size() });
}
}
@@ -182,7 +183,7 @@ namespace LX
{
// Adds the string literal token to the token vector //
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
tokens.push_back({ Token::STRING_LITERAL, info, lit, (std::streamsize)lit.length() + 1 });
tokens.push_back({ Token::STRING_LITERAL, info, (std::streamsize)lit.length() + 2 }); // Adding two makes the "" be stored as well
// Updates trackers //
info.inStringLiteral = false;
@@ -212,7 +213,7 @@ namespace LX
{
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() });
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
}
// Stores it is lexing a number literal //
@@ -224,7 +225,7 @@ namespace LX
{
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() });
tokens.push_back({ Token::NUMBER_LITERAL, info, (std::streamsize)num.size() });
info.lexingNumber = false; // Stops storing it is lexing a number
}
@@ -259,7 +260,7 @@ namespace LX
// Operators (+, -, /, *) //
else if (auto op = operators.find(current); op != operators.end())
{
tokens.push_back({ op->second, info, "", 1 });
tokens.push_back({ op->second, info, 1 });
}
// If it is here and not whitespace that means it's an invalid character //
@@ -323,15 +324,14 @@ namespace LX
for (auto& token : tokens)
{
if (token.contents.empty() == false)
{
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.contents, "}");
}
else
{
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", ToStringNoFormat(token.type));
}
SafeLog
(
log, std::left,
"{ Line: ", std::setw(3), token.line,
", Index: ", std::setw(3), token.index,
", Length: ", std::setw(2), token.length, " } ",
std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.GetContents(), "}"
);
}
SafeLog(log, "\n END OF TOKENS");

View File

@@ -5,8 +5,17 @@
namespace LX
{
// Creates the memory for the pointer to the source //
std::string* Token::source = nullptr;
// Passes the constructor args to the values //
Token::Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length)
: type(_type), contents(_contents), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length)
Token::Token(const TokenType _type, const LexerInfo& info, std::streamsize _length)
: type(_type), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length)
{}
//
std::string Token::GetContents() const
{
return std::string(source->data() + index, length);
}
}

View File

@@ -43,7 +43,7 @@ namespace LX
// Number literals just require them to be turned into an AST node //
// Note: Number literals are stored as strings because i'm a masochist //
case Token::NUMBER_LITERAL:
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index++].contents);
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index++].GetContents());
// Returns nullptr, the parsing function that recives that value will decide if that is valid //
default:
@@ -133,7 +133,7 @@ namespace LX
// Assigns the function name //
ExpectToken<Token::IDENTIFIER>(p.tokens[p.index]);
func.name = p.tokens[p.index++].contents;
func.name = p.tokens[p.index++].GetContents();
// Loops over the body until it reaches the end //
// TODO: Detect the end instead of looping over the entire token vector

View File

@@ -1,2 +1,2 @@
func main
return 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8
return 375 + "heloo there"

View File

@@ -62,6 +62,9 @@ namespace LX
// Data type to store a more computer readable version of files
struct __declspec(novtable) Token final
{
//
static std::string* source;
// Enum to hold the type of the token //
enum TokenType : short
{
@@ -87,13 +90,12 @@ namespace LX
UNDEFINED = -1
};
// Constructor of the tokens to set their info //
Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length);
Token(const TokenType _type, const LexerInfo& info, std::streamsize _length);
// Contents of the token (may be empty if not needed) //
// Const to avoid external changes //
const std::string contents;
//
std::string GetContents() const;
// Type of the token //
// Const to avoid external changes //
@@ -102,7 +104,7 @@ namespace LX
// Index in the source of the token //
const std::streamsize index;
// The length of the token on the line, may be different to the length of contents //
// The length of the token on the line //
const std::streamsize length;
// The line the token is located on //