Parser is held together by duck-tape and a wish

This commit is contained in:
Pasha Bibko
2025-04-17 14:47:19 +01:00
parent e1fce699da
commit 3017e70631
6 changed files with 216 additions and 33 deletions

View File

@@ -45,6 +45,7 @@ namespace LX
{ "else" , Token::ELSE },
{ "elif" , Token::ELIF },
{ "func" , Token::FUNCTION },
{ "return" , Token::RETURN }
};
static const std::unordered_map<char, Token::TokenType> operators =

View File

@@ -52,6 +52,9 @@ int main(int argc, char** argv)
// Create tokens out of the input file
std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get());
// Turns the tokens into an AST
LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get());
}
catch (LX::IncorrectCommandLineArgs)

View File

@@ -1,9 +1,113 @@
#include <Parser.h>
#include <Util.h>
#include <iostream>
namespace LX
{
// Local struct so everything can be public //
struct Parser
{
Parser(std::vector<Token>& _tokens, std::ofstream* _log)
: tokens(_tokens), log(_log), index(0), len(_tokens.size())
{}
std::vector<Token>& tokens;
std::ofstream* log;
const size_t len;
size_t index;
};
static std::unique_ptr<AST::Node> ParsePrimary(Parser& p)
{
switch (p.tokens[p.index].type)
{
case Token::NUMBER_LITERAL:
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index].contents);
default:
std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl;
return nullptr;
}
}
static std::unique_ptr<AST::Node> ParseOperation(Parser& p)
{
if (p.tokens[p.index + 1].type == Token::ADD)
{
std::unique_ptr<AST::Node> lhs = ParsePrimary(p);
p.index++;
// Skips over operator (again i'm lazy)
p.index++;
std::unique_ptr<AST::Node> rhs = ParsePrimary(p);
return std::make_unique<AST::Operation>(std::move(lhs), Token::ADD, std::move(rhs));
}
return ParsePrimary(p);
}
static std::unique_ptr<AST::Node> ParseReturn(Parser& p)
{
if (p.tokens[p.index].type == Token::RETURN)
{
p.index++;
return std::make_unique<AST::ReturnStatement>(ParseOperation(p));
}
return ParseOperation(p);
}
static std::unique_ptr<AST::Node> Parse(Parser& p)
{
return ParseReturn(p);
}
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log)
{
return FileAST();
// Logs the start of the parsing
SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK);
FileAST output;
Parser p(tokens, log);
while (p.index < p.len)
{
switch (p.tokens[p.index].type)
{
case Token::FUNCTION:
{
// Skips over function token + name token
// TEMPORARY
p.index++; p.index++;
output.functions.emplace_back();
FunctionDefinition& func = output.functions.back();
// TEMPORARY casue im lazy
while (p.index < p.len)
{
// Actually parses the function
std::unique_ptr<AST::Node> node = Parse(p);
// Adds it to the vector and iterates to the next token
func.body.push_back(std::move(node));
p.index++;
}
continue;
}
default:
std::cout << "UNKNOWN TOKEN FOUND" << std::endl;
}
}
SafeLog(log, "AST length: ", output.functions[0].body.size());
return output;
}
}

View File

@@ -5,7 +5,13 @@ Started lexing file
Token::FUNCTION
Token::IDENTIFIER: main
Token::IDENTIFIER: return
Unknown: 3
Token::NUMBER_LITERAL: 34
Token::ADD
Token::NUMBER_LITERAL: 4
Token::NUMBER_LITERAL: 4324
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Started parsing tokens
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
AST length: 1

View File

@@ -1,3 +1,2 @@
func main
return 34 + 4
return 34 + 4324

View File

@@ -24,52 +24,122 @@ namespace llvm
namespace LX::AST
{
// Base node that everything else inherits from
class Node
struct Node
{
// Enum for storing the type of node //
// Used so a pointer to Node can be used and then turned into it's true type //
enum NodeType
{
// General Nodes //
IDENTIFIER,
NUMBER_LITERAL,
OPERATION,
// Control flow Nodes //
RETURN_STATEMENT,
// If an error happened somewhere //
UNDEFINED = -1
};
// Constructor to set the node type //
Node(NodeType type)
: m_Type(type)
{}
// Virtual destructor because of polymorphism //
virtual ~Node() = default;
// Function for generating LLVN IR (Intermediate representation) //
virtual llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) = 0;
// Function for generating C/C++ code (Currently not implemented) //
//virtual void GenC() = 0;
// The type of the node //
const NodeType m_Type;
};
class NumberLiteral : public Node
{
public:
// Enum for storing the type of node //
// Used so a pointer to Node can be used and then turned into it's true type //
enum NodeType
{
// General Nodes //
IDENTIFIER,
// Control flow Nodes //
// If an error happened somewhere //
UNDEFINED = -1
};
// Constructor to set the node type //
Node(NodeType type)
: m_Type(type)
NumberLiteral(std::string num)
: Node(Node::NUMBER_LITERAL), m_Number(num)
{}
// Virtual destructor because of polymorphism //
virtual ~Node() = default;
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder)
{
return nullptr;
}
// Function for generating LLVN IR (Intermediate representation) //
virtual llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) = 0;
private:
// The number it stores
// Yes the number is stored as a string
// It's horrible I know
std::string m_Number;
};
// Function for generating C/C++ code (Currently not implemented) //
virtual void GenC() = 0;
//
class Operation : public Node
{
public:
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs)
: Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs))
{}
// The type of the node //
const NodeType m_Type;
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder)
{
return nullptr;
}
private:
// The sides of the operation
// Unary operations are handled by a different class
std::unique_ptr<Node> m_Lhs, m_Rhs;
// The operation to be applied to the two sides
Token::TokenType m_Operand;
};
//
class ReturnStatement : public Node
{
public:
ReturnStatement(std::unique_ptr<Node> val)
: Node(Node::RETURN_STATEMENT), m_Val(std::move(val))
{}
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder)
{
return nullptr;
}
private:
// What it is returning (can be null)
std::unique_ptr<Node> m_Val;
};
}
namespace LX
{
struct FunctionDeclaration
struct FunctionDefinition
{
FunctionDefinition()
: body{}
{}
std::vector<std::unique_ptr<AST::Node>> body;
};
struct FileAST
{
std::vector<FunctionDeclaration> functions;
FileAST()
: functions{}
{}
std::vector<FunctionDefinition> functions;
};
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log);