mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-04 18:09:03 +00:00
Fixed bugs in Lexer
Also added basic math operators
This commit is contained in:
@@ -12,7 +12,6 @@ namespace LX
|
|||||||
{
|
{
|
||||||
// Local macros cause im lazy //
|
// Local macros cause im lazy //
|
||||||
|
|
||||||
#define ITERATE index++; continue
|
|
||||||
#define TOKEN_CASE(type) case type: return #type;
|
#define TOKEN_CASE(type) case type: return #type;
|
||||||
|
|
||||||
static std::string ToString(Token::TokenType type)
|
static std::string ToString(Token::TokenType type)
|
||||||
@@ -27,22 +26,47 @@ namespace LX
|
|||||||
TOKEN_CASE(Token::ELSE);
|
TOKEN_CASE(Token::ELSE);
|
||||||
TOKEN_CASE(Token::ELIF);
|
TOKEN_CASE(Token::ELIF);
|
||||||
TOKEN_CASE(Token::FUNCTION);
|
TOKEN_CASE(Token::FUNCTION);
|
||||||
|
TOKEN_CASE(Token::ADD);
|
||||||
|
TOKEN_CASE(Token::SUB);
|
||||||
|
TOKEN_CASE(Token::MUL);
|
||||||
|
TOKEN_CASE(Token::DIV);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return std::string("Unknown: " + (short)type);
|
return "Unknown: " + std::to_string(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
||||||
{
|
{
|
||||||
{ "for", Token::FOR },
|
{ "for" , Token::FOR },
|
||||||
{ "while", Token::WHILE },
|
{ "while" , Token::WHILE },
|
||||||
{ "if", Token::IF },
|
{ "if" , Token::IF },
|
||||||
{ "else", Token::ELSE },
|
{ "else" , Token::ELSE },
|
||||||
{ "elif", Token::ELIF },
|
{ "elif" , Token::ELIF },
|
||||||
{ "func", Token::FUNCTION },
|
{ "func" , Token::FUNCTION },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const std::unordered_map<char, Token::TokenType> operators =
|
||||||
|
{
|
||||||
|
{ '+', Token::ADD },
|
||||||
|
{ '-', Token::SUB },
|
||||||
|
{ '*', Token::MUL },
|
||||||
|
{ '/', Token::DIV }
|
||||||
|
};
|
||||||
|
|
||||||
|
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens)
|
||||||
|
{
|
||||||
|
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
||||||
|
{
|
||||||
|
tokens.push_back({ keyword->second, word });
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tokens.push_back({ Token::IDENTIFIER, word });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
|
const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
|
||||||
{
|
{
|
||||||
// Logs the start of the lexical analysis
|
// Logs the start of the lexical analysis
|
||||||
@@ -69,6 +93,7 @@ namespace LX
|
|||||||
std::streamsize startOfWord = 0;
|
std::streamsize startOfWord = 0;
|
||||||
std::streamsize startOfStringLiteral = 0;
|
std::streamsize startOfStringLiteral = 0;
|
||||||
|
|
||||||
|
bool isAlpha = false;
|
||||||
bool inComment = false;
|
bool inComment = false;
|
||||||
bool inStringLiteral = false;
|
bool inStringLiteral = false;
|
||||||
bool wasLastCharAlpha = false;
|
bool wasLastCharAlpha = false;
|
||||||
@@ -79,6 +104,9 @@ namespace LX
|
|||||||
// Stores the current character for easy access
|
// Stores the current character for easy access
|
||||||
const char current = contents[index];
|
const char current = contents[index];
|
||||||
|
|
||||||
|
// Works out if the current character is alphabetic
|
||||||
|
isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
||||||
|
|
||||||
// Updates string literal tracker and skips over rest if in a string literal
|
// Updates string literal tracker and skips over rest if in a string literal
|
||||||
if (current == '"')
|
if (current == '"')
|
||||||
{
|
{
|
||||||
@@ -88,7 +116,6 @@ namespace LX
|
|||||||
// Updates the neccesarry trackers
|
// Updates the neccesarry trackers
|
||||||
startOfStringLiteral = index + 1;
|
startOfStringLiteral = index + 1;
|
||||||
inStringLiteral = true;
|
inStringLiteral = true;
|
||||||
ITERATE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// End of string literal
|
// End of string literal
|
||||||
@@ -100,78 +127,79 @@ namespace LX
|
|||||||
|
|
||||||
// Updates trackers
|
// Updates trackers
|
||||||
inStringLiteral = false;
|
inStringLiteral = false;
|
||||||
ITERATE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skips over rest if within a string literal
|
// Skips over rest if within a string literal
|
||||||
if (inStringLiteral) { ITERATE; }
|
else if (inStringLiteral);
|
||||||
|
|
||||||
// Updates comment state
|
// Updates comment state
|
||||||
if (current == '#')
|
else if (current == '#')
|
||||||
{
|
{
|
||||||
inComment = !inComment;
|
inComment = !inComment;
|
||||||
|
|
||||||
ITERATE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skips over if within a comment
|
// Skips over if within a comment
|
||||||
if (inComment) { ITERATE; }
|
else if (inComment);
|
||||||
|
|
||||||
// Works out if the current character is alphabetic
|
|
||||||
bool isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
|
||||||
|
|
||||||
if (isAlpha == true)
|
|
||||||
{
|
|
||||||
// Start of a word
|
// Start of a word
|
||||||
if (wasLastCharAlpha == false)
|
else if (isAlpha == true && wasLastCharAlpha == false)
|
||||||
{
|
{
|
||||||
// Updates trackers
|
|
||||||
wasLastCharAlpha = true;
|
|
||||||
startOfWord = index;
|
startOfWord = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
ITERATE;
|
// During a word
|
||||||
|
else if (isAlpha == true);
|
||||||
|
|
||||||
|
// Operators (+, -, /, *)
|
||||||
|
else if (auto op = operators.find(current); op != operators.end())
|
||||||
|
{
|
||||||
|
tokens.push_back({ op->second, "" });
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it is here and not whitespace that means it's an invalid character
|
||||||
|
else if (current == ' ' || current == '\t' || current == '\r' || current == '\n');
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Throws an error to alert the user
|
||||||
|
throw InvalidCharInSource(index, current);
|
||||||
}
|
}
|
||||||
|
|
||||||
// End of a word
|
// End of a word
|
||||||
if (isAlpha == false && wasLastCharAlpha == true)
|
if (isAlpha == false && wasLastCharAlpha == true)
|
||||||
{
|
{
|
||||||
// Adds the word token to the token vector
|
TokenizeWord({ contents.data() + startOfWord, (unsigned __int64)(index - startOfWord) }, tokens);
|
||||||
std::string word(contents.data() + startOfWord, index - startOfWord);
|
|
||||||
|
|
||||||
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
|
||||||
{
|
|
||||||
tokens.push_back({ keyword->second, word });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
// Updates trackers //
|
||||||
{
|
|
||||||
tokens.push_back({ Token::IDENTIFIER, word });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Operators will eventually go here
|
index++;
|
||||||
|
|
||||||
// If it is here and not whitespace that means it's an invalid character
|
|
||||||
if (current == ' ' || current == '\t' || current == '\r' || current == '\n')
|
|
||||||
{
|
|
||||||
// Updates trackers
|
|
||||||
wasLastCharAlpha = isAlpha;
|
wasLastCharAlpha = isAlpha;
|
||||||
ITERATE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Throws an error to alert the user
|
// Words are only added the iteration after they end so it has to be done like this //
|
||||||
throw InvalidCharInSource(index, current);
|
if (wasLastCharAlpha && isAlpha)
|
||||||
|
{
|
||||||
|
std::string word(contents.data() + startOfWord, index - startOfWord);
|
||||||
|
TokenizeWord(word, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Logs the tokens if logging is on //
|
// Logs the tokens if logging is on //
|
||||||
if (log != nullptr)
|
if (log != nullptr)
|
||||||
{
|
{
|
||||||
for (auto& token : tokens)
|
for (auto& token : tokens)
|
||||||
|
{
|
||||||
|
if (token.contents.empty() == false)
|
||||||
{
|
{
|
||||||
SafeLog(log, ToString(token.type), ":\t", token.contents);
|
SafeLog(log, ToString(token.type), ":\t", token.contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SafeLog(log, ToString(token.type));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
|
// Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
|
||||||
|
|||||||
8
Main.cpp
8
Main.cpp
@@ -74,24 +74,32 @@ int main(int argc, char** argv)
|
|||||||
{
|
{
|
||||||
// Tells the user the output file could not be opened
|
// Tells the user the output file could not be opened
|
||||||
std::cout << "\nCould not open/create {" << argv[2] << "}\n";
|
std::cout << "\nCould not open/create {" << argv[2] << "}\n";
|
||||||
|
|
||||||
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
catch (LX::InvalidCharInSource& e)
|
catch (LX::InvalidCharInSource& e)
|
||||||
{
|
{
|
||||||
//
|
//
|
||||||
std::cout << "\nInvalid character found in source file: {" << e.invalid << "} at index: " << e.index << "\n";
|
std::cout << "\nInvalid character found in source file: {" << e.invalid << "} at index: " << e.index << "\n";
|
||||||
|
|
||||||
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
catch (std::exception& e)
|
catch (std::exception& e)
|
||||||
{
|
{
|
||||||
// Prints the std exception to the console
|
// Prints the std exception to the console
|
||||||
std::cout << "\nAn error occured:\n" << e.what() << std::endl;
|
std::cout << "\nAn error occured:\n" << e.what() << std::endl;
|
||||||
|
|
||||||
|
return 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
// Tells the user if an error has happened
|
// Tells the user if an error has happened
|
||||||
std::cout << "\nAn Error occured\n";
|
std::cout << "\nAn Error occured\n";
|
||||||
|
|
||||||
|
return 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -3,7 +3,9 @@
|
|||||||
Started lexing file
|
Started lexing file
|
||||||
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
|
|
||||||
Token::FUNCTION: func
|
Token::IDENTIFIER: int
|
||||||
Token::IDENTIFIER: main
|
Token::IDENTIFIER: main
|
||||||
Token::IDENTIFIER: print
|
Token::IDENTIFIER: return
|
||||||
Token::STRING_LITERAL: Hello World!
|
Token::IDENTIFIER: a
|
||||||
|
Token::ADD
|
||||||
|
Token::IDENTIFIER: b
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
func main
|
int main
|
||||||
print "Hello World!"
|
return a## + b
|
||||||
@@ -40,6 +40,11 @@ namespace LX
|
|||||||
|
|
||||||
STRING_LITERAL,
|
STRING_LITERAL,
|
||||||
IDENTIFIER,
|
IDENTIFIER,
|
||||||
|
RETURN,
|
||||||
|
|
||||||
|
// Operators //
|
||||||
|
|
||||||
|
ADD, SUB, MUL, DIV,
|
||||||
|
|
||||||
// Keywords //
|
// Keywords //
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user