mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-03 17:39:02 +00:00
Made lexer support floating point numbers
Wow I didn't think the lexer could get worse
This commit is contained in:
@@ -40,6 +40,22 @@ namespace LX
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr bool CanBePartOfNumberLiteral(const char c)
|
||||
{
|
||||
return (c == '.') || (c == 'f');
|
||||
}
|
||||
|
||||
static std::string PrintChar(const char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\n': return R"(\n)";
|
||||
case '\t': return R"(\t)";
|
||||
case '\r': return R"(\r)";
|
||||
default: return std::string(1, c);
|
||||
}
|
||||
}
|
||||
|
||||
// Struct to store the current information of the lexer //
|
||||
struct LexerInfo
|
||||
{
|
||||
@@ -66,6 +82,7 @@ namespace LX
|
||||
bool isNextCharNumeric : 1 = false;
|
||||
bool wasLastCharAlpha : 1 = false;
|
||||
bool wasLastCharNumeric : 1 = false;
|
||||
bool lexingNumber : 1 = false;
|
||||
};
|
||||
|
||||
// All the keywords the lexer currently supports with their token-enum equivalents //
|
||||
@@ -143,7 +160,7 @@ namespace LX
|
||||
|
||||
// Sets flags depending on the value of the next character //
|
||||
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
|
||||
info.isNextCharNumeric = (next >= '0' && next <= '9');
|
||||
info.isNextCharNumeric = (next >= '0' && next <= '9') || CanBePartOfNumberLiteral(next);
|
||||
}
|
||||
|
||||
else
|
||||
@@ -192,6 +209,37 @@ namespace LX
|
||||
// Skips over if within a comment //
|
||||
else if (info.inComment);
|
||||
|
||||
// Start of a number //
|
||||
else if (info.isNumeric == true && info.wasLastCharNumeric == false && info.lexingNumber == false)
|
||||
{
|
||||
// Stores the start of the number //
|
||||
info.startOfNumberLiteral = info.index;
|
||||
|
||||
// Checks if it as the end (single char numbers) //
|
||||
if (info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
|
||||
}
|
||||
|
||||
// Stores it is lexing a number literal //
|
||||
else { info.lexingNumber = true; }
|
||||
}
|
||||
|
||||
// End of a number //
|
||||
else if ((info.isNumeric == true || CanBePartOfNumberLiteral(current)) && info.isNextCharNumeric == false && info.lexingNumber == true)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
|
||||
info.lexingNumber = false; // Stops storing it is lexing a number
|
||||
}
|
||||
|
||||
// During a number //
|
||||
else if (info.isNumeric == true);
|
||||
else if (info.lexingNumber == true && CanBePartOfNumberLiteral(current));
|
||||
|
||||
// Start of a word //
|
||||
else if (info.isAlpha == true && info.wasLastCharAlpha == false)
|
||||
{
|
||||
@@ -216,32 +264,6 @@ namespace LX
|
||||
// During a word //
|
||||
else if (info.isAlpha == true);
|
||||
|
||||
// Start of a number //
|
||||
else if (info.isNumeric == true && info.wasLastCharNumeric == false)
|
||||
{
|
||||
// Stores the start of the number //
|
||||
info.startOfNumberLiteral = info.index;
|
||||
|
||||
// Checks if it as the end (single char numbers) //
|
||||
if (info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()});
|
||||
}
|
||||
}
|
||||
|
||||
// End of a number //
|
||||
else if (info.isNumeric == true && info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()});
|
||||
}
|
||||
|
||||
// During a number //
|
||||
else if (info.isNumeric == true);
|
||||
|
||||
// Operators (+, -, /, *) //
|
||||
else if (auto op = operators.find(current); op != operators.end())
|
||||
{
|
||||
@@ -265,6 +287,7 @@ namespace LX
|
||||
info.line++;
|
||||
}
|
||||
|
||||
// Throws an error with all the relevant information //s
|
||||
else
|
||||
{
|
||||
// Finds the start of the line //
|
||||
@@ -283,6 +306,22 @@ namespace LX
|
||||
throw InvalidCharInSource(info.column, info.line, line, contents[info.index]);
|
||||
}
|
||||
|
||||
// Log dumps A LOT of info //
|
||||
|
||||
SafeLog
|
||||
(
|
||||
log,
|
||||
"Is Alpha: ", info.isAlpha,
|
||||
" Is Numeric: ", info.isNumeric,
|
||||
" In Comment: ", info.inComment,
|
||||
" In String: ", info.inStringLiteral,
|
||||
" Next Char Alpha: ", info.isNextCharAlpha,
|
||||
" Next Char Numeric: ", info.wasLastCharNumeric,
|
||||
" Last Char Numeric: ", info.wasLastCharAlpha,
|
||||
" Lexing number: ", info.lexingNumber,
|
||||
" Current: {", PrintChar(current), "}"
|
||||
);
|
||||
|
||||
// Updates trackers to their default state of a new character //
|
||||
|
||||
info.index++;
|
||||
@@ -295,6 +334,8 @@ namespace LX
|
||||
// Logs the tokens if logging is on //
|
||||
if (log != nullptr)
|
||||
{
|
||||
SafeLog(log, LOG_BREAK, "Tokens", LOG_BREAK);
|
||||
|
||||
for (auto& token : tokens)
|
||||
{
|
||||
if (token.contents.empty() == false)
|
||||
@@ -307,6 +348,8 @@ namespace LX
|
||||
SafeLog(log, "{ Line: ", std::left, std::setw(3), token.line, ", Column: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, "} ", ToString(token.type));
|
||||
}
|
||||
}
|
||||
|
||||
SafeLog(log, "\n END OF TOKENS");
|
||||
}
|
||||
|
||||
// Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
|
||||
|
||||
3
Main.cpp
3
Main.cpp
@@ -60,12 +60,15 @@ int main(int argc, char** argv)
|
||||
|
||||
// Create tokens out of the input file //
|
||||
std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get());
|
||||
LX::SafeFlush(log.get());
|
||||
|
||||
// Turns the tokens into an AST //
|
||||
LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get());
|
||||
LX::SafeFlush(log.get());
|
||||
|
||||
// Turns the AST into LLVM IR //
|
||||
LX::GenerateIR(AST, inpPath.filename().string());
|
||||
LX::SafeFlush(log.get());
|
||||
|
||||
// Returns success
|
||||
return 0;
|
||||
|
||||
@@ -18,6 +18,14 @@ namespace LX
|
||||
if (log != nullptr) { (*log << ... << args); *log << "\n"; }
|
||||
}
|
||||
|
||||
inline void SafeFlush(std::ofstream* log)
|
||||
{
|
||||
if (log != nullptr)
|
||||
{
|
||||
log->flush();
|
||||
}
|
||||
}
|
||||
|
||||
// Gives a standard way to mark a change between different sections within the log output //
|
||||
constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user