Fixed bugs in Lexer

Also added basic math operators
This commit is contained in:
Pasha Bibko
2025-04-17 13:04:28 +01:00
parent 84f2a4cc5e
commit 2f34a23ba2
5 changed files with 98 additions and 55 deletions

View File

@@ -12,7 +12,6 @@ namespace LX
{ {
// Local macros cause im lazy // // Local macros cause im lazy //
#define ITERATE index++; continue
#define TOKEN_CASE(type) case type: return #type; #define TOKEN_CASE(type) case type: return #type;
static std::string ToString(Token::TokenType type) static std::string ToString(Token::TokenType type)
@@ -27,22 +26,47 @@ namespace LX
TOKEN_CASE(Token::ELSE); TOKEN_CASE(Token::ELSE);
TOKEN_CASE(Token::ELIF); TOKEN_CASE(Token::ELIF);
TOKEN_CASE(Token::FUNCTION); TOKEN_CASE(Token::FUNCTION);
TOKEN_CASE(Token::ADD);
TOKEN_CASE(Token::SUB);
TOKEN_CASE(Token::MUL);
TOKEN_CASE(Token::DIV);
default: default:
return std::string("Unknown: " + (short)type); return "Unknown: " + std::to_string(type);
} }
} }
static const std::unordered_map<std::string, Token::TokenType> keywords = static const std::unordered_map<std::string, Token::TokenType> keywords =
{ {
{ "for", Token::FOR }, { "for" , Token::FOR },
{ "while", Token::WHILE }, { "while" , Token::WHILE },
{ "if", Token::IF }, { "if" , Token::IF },
{ "else", Token::ELSE }, { "else" , Token::ELSE },
{ "elif", Token::ELIF }, { "elif" , Token::ELIF },
{ "func", Token::FUNCTION }, { "func" , Token::FUNCTION },
}; };
static const std::unordered_map<char, Token::TokenType> operators =
{
{ '+', Token::ADD },
{ '-', Token::SUB },
{ '*', Token::MUL },
{ '/', Token::DIV }
};
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens)
{
if (auto keyword = keywords.find(word); keyword != keywords.end())
{
tokens.push_back({ keyword->second, word });
}
else
{
tokens.push_back({ Token::IDENTIFIER, word });
}
}
const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log) const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
{ {
// Logs the start of the lexical analysis // Logs the start of the lexical analysis
@@ -69,6 +93,7 @@ namespace LX
std::streamsize startOfWord = 0; std::streamsize startOfWord = 0;
std::streamsize startOfStringLiteral = 0; std::streamsize startOfStringLiteral = 0;
bool isAlpha = false;
bool inComment = false; bool inComment = false;
bool inStringLiteral = false; bool inStringLiteral = false;
bool wasLastCharAlpha = false; bool wasLastCharAlpha = false;
@@ -79,6 +104,9 @@ namespace LX
// Stores the current character for easy access // Stores the current character for easy access
const char current = contents[index]; const char current = contents[index];
// Works out if the current character is alphabetic
isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
// Updates string literal tracker and skips over rest if in a string literal // Updates string literal tracker and skips over rest if in a string literal
if (current == '"') if (current == '"')
{ {
@@ -88,7 +116,6 @@ namespace LX
// Updates the neccesarry trackers // Updates the neccesarry trackers
startOfStringLiteral = index + 1; startOfStringLiteral = index + 1;
inStringLiteral = true; inStringLiteral = true;
ITERATE;
} }
// End of string literal // End of string literal
@@ -100,69 +127,62 @@ namespace LX
// Updates trackers // Updates trackers
inStringLiteral = false; inStringLiteral = false;
ITERATE;
} }
} }
// Skips over rest if within a string literal // Skips over rest if within a string literal
if (inStringLiteral) { ITERATE; } else if (inStringLiteral);
// Updates comment state // Updates comment state
if (current == '#') else if (current == '#')
{ {
inComment = !inComment; inComment = !inComment;
ITERATE;
} }
// Skips over if within a comment // Skips over if within a comment
if (inComment) { ITERATE; } else if (inComment);
// Works out if the current character is alphabetic // Start of a word
bool isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z'); else if (isAlpha == true && wasLastCharAlpha == false)
if (isAlpha == true)
{ {
// Start of a word startOfWord = index;
if (wasLastCharAlpha == false) }
{
// Updates trackers
wasLastCharAlpha = true;
startOfWord = index;
}
ITERATE; // During a word
else if (isAlpha == true);
// Operators (+, -, /, *)
else if (auto op = operators.find(current); op != operators.end())
{
tokens.push_back({ op->second, "" });
}
// If it is here and not whitespace that means it's an invalid character
else if (current == ' ' || current == '\t' || current == '\r' || current == '\n');
else
{
// Throws an error to alert the user
throw InvalidCharInSource(index, current);
} }
// End of a word // End of a word
if (isAlpha == false && wasLastCharAlpha == true) if (isAlpha == false && wasLastCharAlpha == true)
{ {
// Adds the word token to the token vector TokenizeWord({ contents.data() + startOfWord, (unsigned __int64)(index - startOfWord) }, tokens);
std::string word(contents.data() + startOfWord, index - startOfWord);
if (auto keyword = keywords.find(word); keyword != keywords.end())
{
tokens.push_back({ keyword->second, word });
}
else
{
tokens.push_back({ Token::IDENTIFIER, word });
}
} }
// Operators will eventually go here // Updates trackers //
// If it is here and not whitespace that means it's an invalid character index++;
if (current == ' ' || current == '\t' || current == '\r' || current == '\n') wasLastCharAlpha = isAlpha;
{ }
// Updates trackers
wasLastCharAlpha = isAlpha;
ITERATE;
}
// Throws an error to alert the user // Words are only added the iteration after they end so it has to be done like this //
throw InvalidCharInSource(index, current); if (wasLastCharAlpha && isAlpha)
{
std::string word(contents.data() + startOfWord, index - startOfWord);
TokenizeWord(word, tokens);
} }
// Logs the tokens if logging is on // // Logs the tokens if logging is on //
@@ -170,7 +190,15 @@ namespace LX
{ {
for (auto& token : tokens) for (auto& token : tokens)
{ {
SafeLog(log, ToString(token.type), ":\t", token.contents); if (token.contents.empty() == false)
{
SafeLog(log, ToString(token.type), ":\t", token.contents);
}
else
{
SafeLog(log, ToString(token.type));
}
} }
} }

View File

@@ -74,24 +74,32 @@ int main(int argc, char** argv)
{ {
// Tells the user the output file could not be opened // Tells the user the output file could not be opened
std::cout << "\nCould not open/create {" << argv[2] << "}\n"; std::cout << "\nCould not open/create {" << argv[2] << "}\n";
return 3;
} }
catch (LX::InvalidCharInSource& e) catch (LX::InvalidCharInSource& e)
{ {
// //
std::cout << "\nInvalid character found in source file: {" << e.invalid << "} at index: " << e.index << "\n"; std::cout << "\nInvalid character found in source file: {" << e.invalid << "} at index: " << e.index << "\n";
return 4;
} }
catch (std::exception& e) catch (std::exception& e)
{ {
// Prints the std exception to the console // Prints the std exception to the console
std::cout << "\nAn error occured:\n" << e.what() << std::endl; std::cout << "\nAn error occured:\n" << e.what() << std::endl;
return 5;
} }
catch (...) catch (...)
{ {
// Tells the user if an error has happened // Tells the user if an error has happened
std::cout << "\nAn Error occured\n"; std::cout << "\nAn Error occured\n";
return 6;
} }
return 0; return 0;

View File

@@ -3,7 +3,9 @@
Started lexing file Started lexing file
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Token::FUNCTION: func Token::IDENTIFIER: int
Token::IDENTIFIER: main Token::IDENTIFIER: main
Token::IDENTIFIER: print Token::IDENTIFIER: return
Token::STRING_LITERAL: Hello World! Token::IDENTIFIER: a
Token::ADD
Token::IDENTIFIER: b

View File

@@ -1,2 +1,2 @@
func main int main
print "Hello World!" return a## + b

View File

@@ -40,6 +40,11 @@ namespace LX
STRING_LITERAL, STRING_LITERAL,
IDENTIFIER, IDENTIFIER,
RETURN,
// Operators //
ADD, SUB, MUL, DIV,
// Keywords // // Keywords //