From 3017e70631d6608da1e05f15180cbde09f9829f8 Mon Sep 17 00:00:00 2001 From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com> Date: Thu, 17 Apr 2025 14:47:19 +0100 Subject: [PATCH] Parser is held together by duck-tape and a wish --- Lexer/src/Lexer.cpp | 1 + Main.cpp | 3 + Parser/src/Parser.cpp | 106 ++++++++++++++++++++++++++++++++++- build-test/Log.txt | 10 +++- build-test/main.lx | 3 +- common/Parser.h | 126 ++++++++++++++++++++++++++++++++---------- 6 files changed, 216 insertions(+), 33 deletions(-) diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp index 94f44d2..a3f36b2 100644 --- a/Lexer/src/Lexer.cpp +++ b/Lexer/src/Lexer.cpp @@ -45,6 +45,7 @@ namespace LX { "else" , Token::ELSE }, { "elif" , Token::ELIF }, { "func" , Token::FUNCTION }, + { "return" , Token::RETURN } }; static const std::unordered_map operators = diff --git a/Main.cpp b/Main.cpp index 58de56e..9b6f741 100644 --- a/Main.cpp +++ b/Main.cpp @@ -52,6 +52,9 @@ int main(int argc, char** argv) // Create tokens out of the input file std::vectortokens = LX::LexicalAnalyze(inpFile, log.get()); + + // Turns the tokens into an AST + LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get()); } catch (LX::IncorrectCommandLineArgs) diff --git a/Parser/src/Parser.cpp b/Parser/src/Parser.cpp index 5903287..53f7dc3 100644 --- a/Parser/src/Parser.cpp +++ b/Parser/src/Parser.cpp @@ -1,9 +1,113 @@ #include +#include + +#include namespace LX { + // Local struct so everything can be public // + struct Parser + { + Parser(std::vector& _tokens, std::ofstream* _log) + : tokens(_tokens), log(_log), index(0), len(_tokens.size()) + {} + + std::vector& tokens; + std::ofstream* log; + + const size_t len; + size_t index; + }; + + static std::unique_ptr ParsePrimary(Parser& p) + { + switch (p.tokens[p.index].type) + { + case Token::NUMBER_LITERAL: + return std::make_unique(p.tokens[p.index].contents); + + default: + std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl; + return nullptr; + } + } + + static std::unique_ptr ParseOperation(Parser& p) + { + if (p.tokens[p.index + 1].type == Token::ADD) + { + std::unique_ptr lhs = ParsePrimary(p); + p.index++; + + // Skips over operator (again i'm lazy) + p.index++; + + std::unique_ptr rhs = ParsePrimary(p); + + return std::make_unique(std::move(lhs), Token::ADD, std::move(rhs)); + } + + return ParsePrimary(p); + } + + static std::unique_ptr ParseReturn(Parser& p) + { + if (p.tokens[p.index].type == Token::RETURN) + { + p.index++; + return std::make_unique(ParseOperation(p)); + } + + return ParseOperation(p); + } + + static std::unique_ptr Parse(Parser& p) + { + return ParseReturn(p); + } + FileAST TurnTokensIntoAbstractSyntaxTree(std::vector& tokens, std::ofstream* log) { - return FileAST(); + // Logs the start of the parsing + SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK); + + FileAST output; + Parser p(tokens, log); + + while (p.index < p.len) + { + switch (p.tokens[p.index].type) + { + case Token::FUNCTION: + { + // Skips over function token + name token + // TEMPORARY + p.index++; p.index++; + + output.functions.emplace_back(); + FunctionDefinition& func = output.functions.back(); + + // TEMPORARY casue im lazy + while (p.index < p.len) + { + // Actually parses the function + std::unique_ptr node = Parse(p); + + // Adds it to the vector and iterates to the next token + func.body.push_back(std::move(node)); + p.index++; + } + + continue; + } + + default: + std::cout << "UNKNOWN TOKEN FOUND" << std::endl; + } + } + + SafeLog(log, "AST length: ", output.functions[0].body.size()); + + return output; } } diff --git a/build-test/Log.txt b/build-test/Log.txt index d2b3892..a060136 100644 --- a/build-test/Log.txt +++ b/build-test/Log.txt @@ -5,7 +5,13 @@ Started lexing file Token::FUNCTION Token::IDENTIFIER: main -Token::IDENTIFIER: return +Unknown: 3 Token::NUMBER_LITERAL: 34 Token::ADD -Token::NUMBER_LITERAL: 4 +Token::NUMBER_LITERAL: 4324 + +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +Started parsing tokens +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + +AST length: 1 diff --git a/build-test/main.lx b/build-test/main.lx index 0348916..538eb44 100644 --- a/build-test/main.lx +++ b/build-test/main.lx @@ -1,3 +1,2 @@ func main - return 34 + 4 - \ No newline at end of file + return 34 + 4324 diff --git a/common/Parser.h b/common/Parser.h index 3e04b9d..62dcbf3 100644 --- a/common/Parser.h +++ b/common/Parser.h @@ -24,52 +24,122 @@ namespace llvm namespace LX::AST { // Base node that everything else inherits from - class Node + struct Node + { + // Enum for storing the type of node // + // Used so a pointer to Node can be used and then turned into it's true type // + enum NodeType + { + // General Nodes // + + IDENTIFIER, + NUMBER_LITERAL, + OPERATION, + + // Control flow Nodes // + + RETURN_STATEMENT, + + // If an error happened somewhere // + UNDEFINED = -1 + }; + + // Constructor to set the node type // + Node(NodeType type) + : m_Type(type) + {} + + // Virtual destructor because of polymorphism // + virtual ~Node() = default; + + // Function for generating LLVN IR (Intermediate representation) // + virtual llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) = 0; + + // Function for generating C/C++ code (Currently not implemented) // + //virtual void GenC() = 0; + + // The type of the node // + const NodeType m_Type; + }; + + class NumberLiteral : public Node { public: - // Enum for storing the type of node // - // Used so a pointer to Node can be used and then turned into it's true type // - enum NodeType - { - // General Nodes // - - IDENTIFIER, - - // Control flow Nodes // - - // If an error happened somewhere // - UNDEFINED = -1 - }; - - // Constructor to set the node type // - Node(NodeType type) - : m_Type(type) + NumberLiteral(std::string num) + : Node(Node::NUMBER_LITERAL), m_Number(num) {} - // Virtual destructor because of polymorphism // - virtual ~Node() = default; + llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) + { + return nullptr; + } - // Function for generating LLVN IR (Intermediate representation) // - virtual llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) = 0; + private: + // The number it stores + // Yes the number is stored as a string + // It's horrible I know + std::string m_Number; + }; - // Function for generating C/C++ code (Currently not implemented) // - virtual void GenC() = 0; + // + class Operation : public Node + { + public: + Operation(std::unique_ptr lhs, Token::TokenType op, std::unique_ptr rhs) + : Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs)) + {} - // The type of the node // - const NodeType m_Type; + llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) + { + return nullptr; + } + + private: + // The sides of the operation + // Unary operations are handled by a different class + std::unique_ptr m_Lhs, m_Rhs; + + // The operation to be applied to the two sides + Token::TokenType m_Operand; + }; + + // + class ReturnStatement : public Node + { + public: + ReturnStatement(std::unique_ptr val) + : Node(Node::RETURN_STATEMENT), m_Val(std::move(val)) + {} + + llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) + { + return nullptr; + } + + private: + // What it is returning (can be null) + std::unique_ptr m_Val; }; } namespace LX { - struct FunctionDeclaration + struct FunctionDefinition { + FunctionDefinition() + : body{} + {} + std::vector> body; }; struct FileAST { - std::vector functions; + FileAST() + : functions{} + {} + + std::vector functions; }; FileAST TurnTokensIntoAbstractSyntaxTree(std::vector& tokens, std::ofstream* log);