mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-04 01:49:05 +00:00
Improved logging
Small errors with logging parser unexpected token errors. Tokens need to be re-written with how they store memory as it can be halved.
This commit is contained in:
@@ -11,12 +11,11 @@
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Local macros cause im lazy //
|
||||
|
||||
// Helper macro for outputting token type //
|
||||
#define TOKEN_CASE(type) case type: return #type;
|
||||
|
||||
// Logging function to turn a tokentype enum val into it's string //
|
||||
static std::string ToString(Token::TokenType type)
|
||||
// Helper util function to translate a tokentype to it's enum val //
|
||||
static std::string ToStringNoFormat(Token::TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@@ -40,51 +39,53 @@ namespace LX
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr bool CanBePartOfNumberLiteral(const char c)
|
||||
// Logging function to turn a tokentype enum val into a nicely formatted string //
|
||||
std::string ToString(Token::TokenType type)
|
||||
{
|
||||
return (c == '.') || (c == 'f');
|
||||
}
|
||||
// Gets the unformated version of the string //
|
||||
std::string unformatted = ToStringNoFormat(type);
|
||||
unformatted = unformatted.substr(7); // Removes the Token:: prefix
|
||||
|
||||
// Formats the string (turns to lowercase and replaces _ with a space //
|
||||
std::string formatted;
|
||||
|
||||
for (char current : unformatted)
|
||||
{
|
||||
// Adding 32 makes it lowercase due to how ASCII works //
|
||||
if ((current >= 'A' && current <= 'Z')) { formatted.push_back(current + 32); }
|
||||
|
||||
// Replaces _ with spaces //
|
||||
else if (current == '_') { formatted.push_back(' '); }
|
||||
|
||||
// Else adds the current character //
|
||||
else { formatted.push_back(current); }
|
||||
}
|
||||
|
||||
// Returns the formatted string //
|
||||
return formatted;
|
||||
}
|
||||
|
||||
// Stops use outside of the function //
|
||||
#undef TOKEN_CASE
|
||||
|
||||
// Helper function for dealing with floating-point number literals //
|
||||
static constexpr bool CanBePartOfNumberLiteral(const char c) { return (c == '.') || (c == 'f'); }
|
||||
|
||||
// Helper function to stop printing whitespace as pure whitespace //
|
||||
static std::string PrintChar(const char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
// Stores them as pure string literals //
|
||||
case '\n': return R"(\n)";
|
||||
case '\t': return R"(\t)";
|
||||
case '\r': return R"(\r)";
|
||||
|
||||
// Else returns a string of length one with the char inside //
|
||||
default: return std::string(1, c);
|
||||
}
|
||||
}
|
||||
|
||||
// Struct to store the current information of the lexer //
|
||||
struct LexerInfo
|
||||
{
|
||||
// Current trackers of where in the source it is //
|
||||
|
||||
std::streamsize line = 1; // <- Lines start on 1 (probably because of non-programmer's)
|
||||
std::streamsize index = 0;
|
||||
std::streamsize column = 0; // <- Columns start on 1 (probably because of non-programmer's)
|
||||
|
||||
// Trackers for when a multi-char token started //
|
||||
|
||||
std::streamsize startOfWord = 0;
|
||||
std::streamsize startOfNumberLiteral = 0;
|
||||
std::streamsize startOfStringLiteral = 0;
|
||||
|
||||
// Different flags of the lexer //
|
||||
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
|
||||
|
||||
bool isAlpha : 1 = false;
|
||||
bool isNumeric : 1 = false;
|
||||
bool inComment : 1 = false;
|
||||
bool inStringLiteral : 1 = false;
|
||||
bool isNextCharAlpha : 1 = false;
|
||||
bool isNextCharNumeric : 1 = false;
|
||||
bool wasLastCharAlpha : 1 = false;
|
||||
bool wasLastCharNumeric : 1 = false;
|
||||
bool lexingNumber : 1 = false;
|
||||
};
|
||||
|
||||
// All the keywords the lexer currently supports with their token-enum equivalents //
|
||||
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
||||
{
|
||||
@@ -113,17 +114,17 @@ namespace LX
|
||||
// Checks the map for a check and if so adds it with its enum equivalent //
|
||||
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
||||
{
|
||||
tokens.push_back({ keyword->second, "", info.line, info.column - (std::streamsize)word.size(), (std::streamsize)word.size()});
|
||||
tokens.push_back({ keyword->second, info, "", (std::streamsize)word.size() });
|
||||
}
|
||||
|
||||
// Else adds it as a type of IDENTIFIER //
|
||||
else
|
||||
{
|
||||
tokens.push_back({ Token::IDENTIFIER, word, info.line, info.column - (std::streamsize)word.size(), (std::streamsize)word.size()});
|
||||
tokens.push_back({ Token::IDENTIFIER, info, word, (std::streamsize)word.size() });
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
|
||||
const std::vector<Token> LX::LexicalAnalyze(const std::string& contents, std::streamsize len, std::ofstream* log)
|
||||
{
|
||||
// Logs the start of the lexical analysis
|
||||
SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
|
||||
@@ -133,15 +134,6 @@ namespace LX
|
||||
std::vector<Token> tokens = {};
|
||||
tokens.reserve(0xFFFF);
|
||||
|
||||
// Turns the contents of the file into a string //
|
||||
|
||||
// Gets length of the file because it is opened at the end
|
||||
const std::streamsize len = src.tellg();
|
||||
src.seekg(0, std::ios::beg); // Goes back to the beginning
|
||||
|
||||
std::string contents(len, '\0'); // Preallocates all space needed
|
||||
src.read(&contents[0], len); // Transfers file to string
|
||||
|
||||
// Trackers for when the program is iterating over the file //
|
||||
LexerInfo info;
|
||||
|
||||
@@ -154,7 +146,7 @@ namespace LX
|
||||
// Checks if it is not at end //
|
||||
// Predicts it is not at end for microptimsation //
|
||||
if (info.index + 1 < len) [[likely]]
|
||||
{
|
||||
{
|
||||
// Gets the next character //
|
||||
const char next = contents[info.index + 1];
|
||||
|
||||
@@ -190,7 +182,7 @@ namespace LX
|
||||
{
|
||||
// Adds the string literal token to the token vector //
|
||||
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
|
||||
tokens.push_back({ Token::STRING_LITERAL, lit, info.line, info.column - (std::streamsize)lit.length(), (std::streamsize)lit.length() });
|
||||
tokens.push_back({ Token::STRING_LITERAL, info, lit, (std::streamsize)lit.length() + 1 });
|
||||
|
||||
// Updates trackers //
|
||||
info.inStringLiteral = false;
|
||||
@@ -220,7 +212,7 @@ namespace LX
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() });
|
||||
}
|
||||
|
||||
// Stores it is lexing a number literal //
|
||||
@@ -232,7 +224,7 @@ namespace LX
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, info, num, (std::streamsize)num.size() });
|
||||
info.lexingNumber = false; // Stops storing it is lexing a number
|
||||
}
|
||||
|
||||
@@ -267,7 +259,7 @@ namespace LX
|
||||
// Operators (+, -, /, *) //
|
||||
else if (auto op = operators.find(current); op != operators.end())
|
||||
{
|
||||
tokens.push_back({ op->second, "", info.line, info.column, 1});
|
||||
tokens.push_back({ op->second, info, "", 1 });
|
||||
}
|
||||
|
||||
// If it is here and not whitespace that means it's an invalid character //
|
||||
@@ -287,31 +279,20 @@ namespace LX
|
||||
info.line++;
|
||||
}
|
||||
|
||||
// Throws an error with all the relevant information //s
|
||||
// Throws an error with all the relevant information //
|
||||
else
|
||||
{
|
||||
// Finds the start of the line //
|
||||
size_t start = contents.rfind('\n', info.index);
|
||||
if (start == std::string::npos) { start = 0; } // std::npos means none was found so defaults to 1
|
||||
else { start = start + 1; } // Skips the new line character
|
||||
|
||||
// Finds the end of the line //
|
||||
size_t end = contents.find('\n', info.index);
|
||||
if (end == std::string::npos) { end = contents.size(); } // If it reaches the end with no /n it defaults to the length of the string
|
||||
|
||||
// The line where the invalid character is //
|
||||
std::string line = contents.substr(start, end - start);
|
||||
|
||||
// Throws an error to alert the user of the invalid character //
|
||||
throw InvalidCharInSource(info.column, info.line, line, contents[info.index]);
|
||||
throw InvalidCharInSource(info.column, info.line, info.index, contents[info.index]);
|
||||
}
|
||||
|
||||
// Log dumps A LOT of info //
|
||||
|
||||
#ifdef LOG_EVERYTHING
|
||||
|
||||
SafeLog
|
||||
(
|
||||
log,
|
||||
"Is Alpha: ", info.isAlpha,
|
||||
log, "Index: ", std::left, std::setw(3), info.index,
|
||||
" Is Alpha: ", info.isAlpha,
|
||||
" Is Numeric: ", info.isNumeric,
|
||||
" In Comment: ", info.inComment,
|
||||
" In String: ", info.inStringLiteral,
|
||||
@@ -322,6 +303,8 @@ namespace LX
|
||||
" Current: {", PrintChar(current), "}"
|
||||
);
|
||||
|
||||
#endif // LOG_EVERYTHING
|
||||
|
||||
// Updates trackers to their default state of a new character //
|
||||
|
||||
info.index++;
|
||||
@@ -334,18 +317,20 @@ namespace LX
|
||||
// Logs the tokens if logging is on //
|
||||
if (log != nullptr)
|
||||
{
|
||||
SafeLog(log, LOG_BREAK, "Tokens", LOG_BREAK);
|
||||
#ifdef LOG_EVERYTHING
|
||||
SafeLog(log, "\n"); // Puts a space when there is a lot in the log
|
||||
#endif // LOG_EVERYTHING
|
||||
|
||||
for (auto& token : tokens)
|
||||
{
|
||||
if (token.contents.empty() == false)
|
||||
{
|
||||
SafeLog(log, "{ Line: ", std::left, std::setw(3), token.line, ", Column: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, "} ", std::setw(30), ToString(token.type) + ":", "{", token.contents, "}");
|
||||
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", std::setw(30), ToStringNoFormat(token.type) + ":", "{", token.contents, "}");
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
SafeLog(log, "{ Line: ", std::left, std::setw(3), token.line, ", Column: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, "} ", ToString(token.type));
|
||||
SafeLog(log, std::left, "{ Line: ", std::setw(3), token.line, ", Index: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, " } ", ToStringNoFormat(token.type));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
namespace LX
|
||||
{
|
||||
// Passes the constructor args to the values //
|
||||
Token::Token(const TokenType _type, std::string _contents, std::streamsize _line, std::streamsize _index, std::streamsize _length)
|
||||
: type(_type), contents(_contents), line(_line), index(_index), length(_length)
|
||||
Token::Token(const TokenType _type, const LexerInfo& info, std::string _contents, std::streamsize _length)
|
||||
: type(_type), contents(_contents), index(info.index - _length + 1), line(info.line), column(info.column - _length), length(_length)
|
||||
{}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user