From 099e543e95c50cbc4b43c0900bf3fc75a343fac3 Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Tue, 22 Apr 2025 22:32:07 +0100 Subject: [PATCH] Im vonfused --- IR-Generator/inc/Lexer.h | 6 ++++ Lexer/src/Lexer.cpp | 23 +++++++++++++++ Parser/Parser.vcxproj | 1 + Parser/Parser.vcxproj.filters | 3 ++ Parser/inc/AST.h | 5 ++++ Parser/src/AST-Loggers.cpp | 17 +++++++++++ Parser/src/Parser.cpp | 53 +++++++++++++++++++++++++---------- example/main.lx | 7 +++++ 8 files changed, 100 insertions(+), 15 deletions(-) create mode 100644 Parser/src/AST-Loggers.cpp diff --git a/IR-Generator/inc/Lexer.h b/IR-Generator/inc/Lexer.h index 4f1a7a3..57104c6 100644 --- a/IR-Generator/inc/Lexer.h +++ b/IR-Generator/inc/Lexer.h @@ -75,6 +75,12 @@ namespace LX IDENTIFIER, RETURN, + // Symbols // + + OPEN_BRACKET, CLOSE_BRACKET, + OPEN_BRACE, CLOSE_BRACE, + OPEN_PAREN, CLOSE_PAREN, + // Operators // ADD, SUB, MUL, DIV, diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 8b6e19c..6f43842 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -34,6 +34,12 @@ namespace LX TOKEN_CASE(Token::DIV); TOKEN_CASE(Token::NUMBER_LITERAL); TOKEN_CASE(Token::RETURN); + TOKEN_CASE(Token::OPEN_BRACE); + TOKEN_CASE(Token::CLOSE_BRACE); + TOKEN_CASE(Token::OPEN_BRACKET); + TOKEN_CASE(Token::CLOSE_BRACKET); + TOKEN_CASE(Token::OPEN_PAREN); + TOKEN_CASE(Token::CLOSE_PAREN); default: return "Unknown: " + std::to_string(type); @@ -99,6 +105,17 @@ namespace LX { "return" , Token::RETURN } }; + // All the symbols supported by the lexer // + static const std::unordered_map symbols = + { + { '{', Token::OPEN_BRACKET }, + { '}', Token::CLOSE_BRACKET }, + { '[', Token::OPEN_BRACE }, + { ']', Token::CLOSE_BRACE }, + { '(', Token::OPEN_PAREN }, + { ')', Token::CLOSE_PAREN } + }; + // All the single-char operators currently supported by the lexer with their token-enum equivalents // // TODO: Support multi-char operators such as: ==, -> +=, && static const std::unordered_map operators = @@ -257,6 +274,12 @@ namespace LX // During a word // else if (info.isAlpha == true); + // Symbols // + else if (auto sym = symbols.find(current); sym != symbols.end()) + { + tokens.push_back({ sym->second, info, 1 }); + } + // Operators (+, -, /, *) // else if (auto op = operators.find(current); op != operators.end()) { diff --git a/Parser/Parser.vcxproj b/Parser/Parser.vcxproj index 6fa5752..34fa5eb 100644 --- a/Parser/Parser.vcxproj +++ b/Parser/Parser.vcxproj @@ -141,6 +141,7 @@ + diff --git a/Parser/Parser.vcxproj.filters b/Parser/Parser.vcxproj.filters index 71e279b..e4d3224 100644 --- a/Parser/Parser.vcxproj.filters +++ b/Parser/Parser.vcxproj.filters @@ -23,6 +23,9 @@ Source Files + + Source Files + diff --git a/Parser/inc/AST.h b/Parser/inc/AST.h index a115332..57359ca 100644 --- a/Parser/inc/AST.h +++ b/Parser/inc/AST.h @@ -2,6 +2,8 @@ #include +#include + namespace LX { // Wrapper over the LLVM variables for easier passing around // @@ -14,6 +16,9 @@ namespace LX llvm::Module module; llvm::IRBuilder<> builder; }; + + // Function to turn a AST node into string // + std::string ToString(std::unique_ptr& node); } namespace LX::AST diff --git a/Parser/src/AST-Loggers.cpp b/Parser/src/AST-Loggers.cpp new file mode 100644 index 0000000..6852f5c --- /dev/null +++ b/Parser/src/AST-Loggers.cpp @@ -0,0 +1,17 @@ +#include + +namespace LX +{ + std::string ToString(std::unique_ptr& node) + { + if (node == nullptr) { return "NULL Node"; } + + switch (node->m_Type) + { + case AST::Node::IDENTIFIER: return "IDENTIFIER"; + case AST::Node::OPERATION: return "OPERATION"; + case AST::Node::RETURN_STATEMENT: return "return"; + case AST::Node::NUMBER_LITERAL: return "number"; + } + } +} diff --git a/Parser/src/Parser.cpp b/Parser/src/Parser.cpp index 3980be9..28b673d 100644 --- a/Parser/src/Parser.cpp +++ b/Parser/src/Parser.cpp @@ -7,18 +7,12 @@ namespace LX { - template - static inline void ExpectToken(const Token& t) - { - ThrowIf(type != t.type); - } - // Local struct so everything can be public // struct Parser { // Passes constructor args to members // Parser(std::vector& _tokens, std::ofstream* _log) - : tokens(_tokens), log(_log), index(0), len(_tokens.size()) + : tokens(_tokens), log(_log), index(0), len(_tokens.size()), scopeDepth(0) {} // Tokens created by the lexer // @@ -32,6 +26,9 @@ namespace LX // Current index within the token vector // size_t index; + + // Current scope depth // + size_t scopeDepth; }; // Base of the call stack to handle the simplest of tokens // @@ -45,6 +42,18 @@ namespace LX case Token::NUMBER_LITERAL: return std::make_unique(p.tokens[p.index++].GetContents()); + // + case Token::OPEN_BRACKET: + p.scopeDepth++; + p.index++; + return nullptr; + + case Token::CLOSE_BRACE: + ThrowIf(p.scopeDepth == 0, Token::UNDEFINED, "need a different error", p.tokens[p.index]); + p.scopeDepth--; + p.index++; + return nullptr; + // Returns nullptr, the parsing function that recives that value will decide if that is valid // default: p.index++; @@ -90,7 +99,7 @@ namespace LX if (p.tokens[p.index].type == Token::RETURN) { // If so it adds an AST node with the value being returned // - // TODO: Allow this to return nullptr // + // Does not mind if this returns nullptr as that just means nothing was returned // p.index++; return std::make_unique(ParseOperation(p)); } @@ -99,11 +108,15 @@ namespace LX return ParseOperation(p); } - // Helper function to call the top of the Parse-Call-Stack + // Helper function to call the top of the Parse-Call-Stack // static inline std::unique_ptr Parse(Parser& p) { - // ParseReturn is currently the topmost function in the call stack // - return ParseReturn(p); + // Parses the current token // + std::unique_ptr out = ParseReturn(p); + + // Checks it is valid before returning // + ThrowIf(out == nullptr, Token::UNDEFINED, "top level statement", p.tokens[p.index - 1]); + return out; } // Turns the tokens of a file into it's abstract syntax tree equivalent // @@ -124,7 +137,7 @@ namespace LX { case Token::FUNCTION: { - // Skips over function token + name token // + // Skips over function token // p.index++; // Pushes a new function to the vector and gets a reference to it for adding the body // @@ -132,20 +145,30 @@ namespace LX FunctionDefinition& func = output.functions.back(); // Assigns the function name // - ExpectToken(p.tokens[p.index]); + ThrowIf(p.tokens[p.index].type != Token::IDENTIFIER, Token::IDENTIFIER, "", p.tokens[p.index]); func.name = p.tokens[p.index++].GetContents(); + // Checks for opening bracket '{' // + ThrowIf(p.tokens[p.index].type != Token::OPEN_BRACKET, Token::OPEN_BRACKET, "", p.tokens[p.index]); + p.index++; + // Loops over the body until it reaches the end // // TODO: Detect the end instead of looping over the entire token vector - while (p.index < p.len) + while (p.index < p.len && (p.tokens[p.index].type == Token::CLOSE_BRACKET && p.scopeDepth == 0) == false) { // Actually parses the function std::unique_ptr node = Parse(p); + // Logs the node to the log // + SafeLog(log, ToString(node)); + // Adds it to the vector func.body.push_back(std::move(node)); } + // Skips over closing bracket // + p.index++; + // Goes to the next iteration of the loop // continue; } @@ -153,7 +176,7 @@ namespace LX // Lets the user know there is an error // // TODO: Makes this error actually output useful information // default: - std::cout << "UNKNOWN TOKEN FOUND: " << p.tokens[p.index].type << std::endl; + std::cout << "UNKNOWN TOKEN FOUND: " << ToString(p.tokens[p.index].type) << std::endl; return output; } } diff --git a/example/main.lx b/example/main.lx index 45be109..140ec80 100644 --- a/example/main.lx +++ b/example/main.lx @@ -1,2 +1,9 @@ +func add +{ + return 1 + 2 +} + func main +{ return 375 + 32 +}