Made lexer support floating point numbers

Wow I didn't think the lexer could get worse
2026-05-30 04:28:14 +00:00 · 2025-04-21 15:40:46 +01:00
parent 9f05d42945
commit feea149cc1
3 changed files with 81 additions and 27 deletions
--- a/Lexer/src/Lexer.cpp
+++ b/Lexer/src/Lexer.cpp
@@ -40,6 +40,22 @@ namespace LX
 		}
 	}
 	static constexpr bool CanBePartOfNumberLiteral(const char c)
 	{
 		return (c == '.') || (c == 'f');
 	}
 	static std::string PrintChar(const char c)
 	{
 		switch (c)
 		{
 			case '\n': return R"(\n)";
 			case '\t': return R"(\t)";
 			case '\r': return R"(\r)";
 			default: return std::string(1, c);
 		}
 	}
 	// Struct to store the current information of the lexer //
 	struct LexerInfo
 	{
@@ -66,6 +82,7 @@ namespace LX
 		bool isNextCharNumeric : 1 = false;
 		bool wasLastCharAlpha : 1 = false;
 		bool wasLastCharNumeric : 1 = false;
 		bool lexingNumber : 1 = false;
 	};
 	// All the keywords the lexer currently supports with their token-enum equivalents //
@@ -143,7 +160,7 @@ namespace LX
 				// Sets flags depending on the value of the next character //
 				info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
-				info.isNextCharNumeric = (next >= '0' && next <= '9');
+				info.isNextCharNumeric = (next >= '0' && next <= '9') || CanBePartOfNumberLiteral(next);
 			}
 			else
@@ -192,6 +209,37 @@ namespace LX
 			// Skips over if within a comment //
 			else if (info.inComment);
 			// Start of a number //
 			else if (info.isNumeric == true && info.wasLastCharNumeric == false && info.lexingNumber == false)
 			{
 				// Stores the start of the number //
 				info.startOfNumberLiteral = info.index;
 				// Checks if it as the end (single char numbers) //
 				if (info.isNextCharNumeric == false)
 				{
 					// Pushes the number to the token vector. Number literals are stored as string in the tokens //
 					std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
 					tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
 				}
 				// Stores it is lexing a number literal //
 				else { info.lexingNumber = true; }
 			}
 			// End of a number //
 			else if ((info.isNumeric == true || CanBePartOfNumberLiteral(current)) && info.isNextCharNumeric == false && info.lexingNumber == true)
 			{
 				// Pushes the number to the token vector. Number literals are stored as string in the tokens //
 				std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
 				tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size() });
 				info.lexingNumber = false; // Stops storing it is lexing a number
 			}
 			// During a number //
 			else if (info.isNumeric == true);
 			else if (info.lexingNumber == true && CanBePartOfNumberLiteral(current));
 			// Start of a word //
 			else if (info.isAlpha == true && info.wasLastCharAlpha == false)
 			{
@@ -216,32 +264,6 @@ namespace LX
 			// During a word //
 			else if (info.isAlpha == true);
 			// Start of a number //
 			else if (info.isNumeric == true && info.wasLastCharNumeric == false)
 			{
 				// Stores the start of the number //
 				info.startOfNumberLiteral = info.index;
 				// Checks if it as the end (single char numbers) //
 				if (info.isNextCharNumeric == false)
 				{
 					// Pushes the number to the token vector. Number literals are stored as string in the tokens //
 					std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
 					tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()});
 				}
 			}
 			// End of a number //
 			else if (info.isNumeric == true && info.isNextCharNumeric == false)
 			{
 				// Pushes the number to the token vector. Number literals are stored as string in the tokens //
 				std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
 				tokens.push_back({ Token::NUMBER_LITERAL, num, info.line, info.column - (std::streamsize)num.size(), (std::streamsize)num.size()});
 			}
 			// During a number //
 			else if (info.isNumeric == true);
 			// Operators (+, -, /, *) //
 			else if (auto op = operators.find(current); op != operators.end())
 			{
@@ -265,6 +287,7 @@ namespace LX
 				info.line++;
 			}
 			// Throws an error with all the relevant information //s
 			else
 			{
 				// Finds the start of the line //
@@ -283,6 +306,22 @@ namespace LX
 				throw InvalidCharInSource(info.column, info.line, line, contents[info.index]);
 			}
 			// Log dumps A LOT of info //
 			SafeLog
 			(
 				log,
 				"Is Alpha: ", info.isAlpha,
 				" Is Numeric: ", info.isNumeric,
 				" In Comment: ", info.inComment,
 				" In String: ", info.inStringLiteral,
 				" Next Char Alpha: ", info.isNextCharAlpha,
 				" Next Char Numeric: ", info.wasLastCharNumeric,
 				" Last Char Numeric: ", info.wasLastCharAlpha,
 				" Lexing number: ", info.lexingNumber,
 				" Current: {", PrintChar(current), "}"
 			);
 			// Updates trackers to their default state of a new character //
 			info.index++;
@@ -295,6 +334,8 @@ namespace LX
 		// Logs the tokens if logging is on //
 		if (log != nullptr)
 		{
 			SafeLog(log, LOG_BREAK, "Tokens", LOG_BREAK);
 			for (auto& token : tokens)
 			{
 				if (token.contents.empty() == false)
@@ -307,6 +348,8 @@ namespace LX
 					SafeLog(log, "{ Line: ", std::left, std::setw(3), token.line, ", Column: ", std::setw(3), token.index, ", Length: ", std::setw(2), token.length, "} ", ToString(token.type));
 				}
 			}
 			SafeLog(log, "\n END OF TOKENS");
 		}
 		// Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
--- a/Main.cpp
+++ b/Main.cpp
@@ -60,12 +60,15 @@ int main(int argc, char** argv)
 		// Create tokens out of the input file //
 		std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get());
 		LX::SafeFlush(log.get());
 		// Turns the tokens into an AST //
 		LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get());
 		LX::SafeFlush(log.get());
 		// Turns the AST into LLVM IR //
 		LX::GenerateIR(AST, inpPath.filename().string());
 		LX::SafeFlush(log.get());
 		// Returns success
 		return 0;
--- a/common/Util.h
+++ b/common/Util.h
@@ -18,6 +18,14 @@ namespace LX
 		if (log != nullptr) { (*log << ... << args); *log << "\n"; }
 	}
 	inline void SafeFlush(std::ofstream* log)
 	{
 		if (log != nullptr)
 		{
 			log->flush();
 		}
 	}
 	// Gives a standard way to mark a change between different sections within the log output //
 	constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n";
 }