Merge branch 'LLVM-Test'

This commit is contained in:
Pasha Bibko
2025-04-20 19:32:56 +01:00
16 changed files with 397 additions and 103 deletions

View File

@@ -134,6 +134,9 @@
<ClCompile Include="src\AST-Constructors.cpp" />
<ClCompile Include="src\Parser.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\AST.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>

View File

@@ -18,4 +18,9 @@
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="inc\AST.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

70
Parser/inc/AST.h Normal file
View File

@@ -0,0 +1,70 @@
#include <Parser.h>
#include <LLVM.h>
namespace LX
{
// Wrapper over the LLVM variables for easier passing around //
struct InfoLLVM
{
// Constructor to initalize them correctly (only constructor available) //
InfoLLVM(std::string name);
llvm::LLVMContext context;
llvm::Module module;
llvm::IRBuilder<> builder;
};
}
namespace LX::AST
{
// Node to represent any number within the AST //
class NumberLiteral : public Node
{
public:
// Constructor to set values and automatically set type //
NumberLiteral(std::string num);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// The number it stores //
// Yes the number is stored as a string, It's horrible I know //
std::string m_Number;
};
// Node to represent any 2-sided mathematical or logical operation within the AST //
class Operation : public Node
{
public:
// Constructor to set values and automatically set type //
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// The sides of the operation //
// Unary operations are handled by a different class //
std::unique_ptr<Node> m_Lhs, m_Rhs;
// The operation to be applied to the two sides //
Token::TokenType m_Operand;
};
// Node to represent any return statement within the AST //
class ReturnStatement : public Node
{
public:
// Constructor to set values and automatically set type
ReturnStatement(std::unique_ptr<Node> val);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// What it is returning (can be null) //
std::unique_ptr<Node> m_Val;
};
}

View File

@@ -1,15 +1,43 @@
#include <Parser.h>
#include <AST.h>
namespace LX
{
// Default constructor that just initalises LLVM variables that it holds //
InfoLLVM::InfoLLVM(std::string name)
: context{}, builder(context), module(name, context)
{}
// Reserves space for nodes (stops excess allocations) //
FunctionDefinition::FunctionDefinition()
: body{}
{ body.reserve(32); }
// Reserves space for functions (stops excess allocations) //
FileAST::FileAST()
: functions{}
{ functions.reserve(8); }
}
namespace LX::AST
{
// Passes constructor args to values //
Node::Node(NodeType type)
: m_Type(type)
{}
// Passes constructor args to values and sets type //
NumberLiteral::NumberLiteral(std::string num)
: Node(Node::NUMBER_LITERAL), m_Number(num)
{}
// Passes constructor args to values and sets type //
Operation::Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs)
: Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs))
{}
// Passes constructor args to values and sets type //
ReturnStatement::ReturnStatement(std::unique_ptr<Node> val)
: Node(Node::RETURN_STATEMENT), m_Val(std::move(val))
{}

69
Parser/src/AST-LLVM.cpp Normal file
View File

@@ -0,0 +1,69 @@
#include <Parser.h>
#include <LLVM.h>
#include <Util.h>
#include <AST.h>
namespace LX::AST
{
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* NumberLiteral::GenIR(InfoLLVM& LLVM)
{
// Converts the string to it's int equivalent //
// TODO: Support floating point values //
int number = std::stoi(m_Number);
// Returns it as a llvm value (if valid) //
// TODO: Support floating point values //
// TODO: Make the error actually output information //
llvm::Value* out = llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), number, true);
ThrowIf<IRGenerationError>(out == nullptr);
return out;
}
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* Operation::GenIR(InfoLLVM& LLVM)
{
// Generates the IR for both sides of the operation //
llvm::Value* lhs = m_Lhs->GenIR(LLVM);
llvm::Value* rhs = m_Rhs->GenIR(LLVM);
// If either side is null then return null to prevent invalid IR //
// TODO: Make the error actually output information //
if (lhs == nullptr || rhs == nullptr)
{
ThrowIf<IRGenerationError>(true);
return nullptr;
}
// Generates the IR of the operation //
// TODO: Support other operators other than ADD //
// TODO: Make the error actually output information //
llvm::Value* out = LLVM.builder.CreateAdd(lhs, rhs);
ThrowIf<IRGenerationError>(out == nullptr);
return out;
}
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* ReturnStatement::GenIR(InfoLLVM& LLVM)
{
// Checks if it is a void return //
if (m_Val == nullptr)
{
// Void returns are currently not implemented //
// TODO: Find out how to return nothing from a function //
ThrowIf<IRGenerationError>(true);
return nullptr;
}
// Else it will be returning a value //
else
{
// Generates the value and creates a return for it //
// TODO: Make the error actually output information //
llvm::Value* out = LLVM.builder.CreateRet(m_Val->GenIR(LLVM));
ThrowIf<IRGenerationError>(out == nullptr);
return out;
}
}
}

59
Parser/src/GenIR.cpp Normal file
View File

@@ -0,0 +1,59 @@
#include <Parser.h>
#include <Util.h>
#include <AST.h>
#include <iostream>
namespace LX
{
// Tells the generator if the current node is allowed to be within a top-level context //
// TODO: Make this function do something other than return true
static constexpr bool IsValidTopLevelNode(AST::Node::NodeType type)
{
return true;
}
// Generates the LLVM IR for the given function //
static void GenerateFunctionIR(FunctionDefinition& funcAST, InfoLLVM& LLVM)
{
// Creates the functions signature and return type //
llvm::FunctionType* retType = llvm::FunctionType::get(llvm::Type::getInt32Ty(LLVM.context), false); // <- Defaults to int currently
llvm::Function* func = llvm::Function::Create(retType, llvm::Function::ExternalLinkage, "main", LLVM.module); // Defaults to main currently
llvm::BasicBlock* entry = llvm::BasicBlock::Create(LLVM.context, "entry", func);
LLVM.builder.SetInsertPoint(entry);
// Generates the IR within the function by looping over the nodes //
for (auto& node : funcAST.body)
{
ThrowIf<int>(IsValidTopLevelNode(node->m_Type) == false); // <- TODO: replace with actual error type
node->GenIR(LLVM);
}
// Adds a terminator if there is none //
if (entry->getTerminator() == nullptr)
{
LLVM.builder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), 0, true));
}
// Verifies the function works //
ThrowIf<int>(llvm::verifyFunction(*func), &llvm::errs()); // <- TODO: Make error type
}
// Turns an abstract binary tree into LLVM intermediate representation //
void GenerateIR(FileAST& ast)
{
// Creates the LLVM variables needed for generating IR that are shared between functions //
InfoLLVM LLVM("add_itns");
// Loops over the functions to generate their LLVM IR //
for (auto& func : ast.functions)
{
GenerateFunctionIR(func, LLVM);
}
// Outputs the IR to the console //
LLVM.module.print(llvm::outs(), nullptr);
}
}

View File

@@ -1,5 +1,7 @@
#include <Parser.h>
#include <Util.h>
#include <AST.h>
#include <iostream>
@@ -8,72 +10,105 @@ namespace LX
// Local struct so everything can be public //
struct Parser
{
// Passes constructor args to members //
Parser(std::vector<Token>& _tokens, std::ofstream* _log)
: tokens(_tokens), log(_log), index(0), len(_tokens.size())
{}
// Tokens created by the lexer //
std::vector<Token>& tokens;
// Log to output to (can be null) //
std::ofstream* log;
// Length of the the token vector //
const size_t len;
// Current index within the token vector //
size_t index;
};
// Base of the call stack to handle the simplest of tokens //
static std::unique_ptr<AST::Node> ParsePrimary(Parser& p)
{
// There are lots of possible token's that can be here so a switch is used //
switch (p.tokens[p.index].type)
{
// Number literals just require them to be turned into an AST node //
// Note: Number literals are stored as strings because i'm a masochist //
case Token::NUMBER_LITERAL:
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index].contents);
// Default just alerts the user of an error //
// TODO: Actually make this error tell the user something useful //
default:
std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl;
return nullptr;
}
}
// Handles operations, if it is not currently at an operation goes to ParsePrimary //
static std::unique_ptr<AST::Node> ParseOperation(Parser& p)
{
// Checks if the next token is an operator //
// TODO: Add more than just add //
// TODO: Make this not crash when at the end //
if (p.tokens[p.index + 1].type == Token::ADD)
{
// Parses the left hand side of the operation //
std::unique_ptr<AST::Node> lhs = ParsePrimary(p);
p.index++;
// Skips over operator (again i'm lazy)
// Stores the operator to pass into the AST node //
Token::TokenType op = p.tokens[p.index].type;
p.index++;
// Parses the right hand of the operation //
std::unique_ptr<AST::Node> rhs = ParsePrimary(p);
return std::make_unique<AST::Operation>(std::move(lhs), Token::ADD, std::move(rhs));
// Returns an AST node as all of the components combined together //
return std::make_unique<AST::Operation>(std::move(lhs), op, std::move(rhs));
}
// Else goes down the call stack //
return ParsePrimary(p);
}
// Handles return statements, if not calls ParseOperation //
static std::unique_ptr<AST::Node> ParseReturn(Parser& p)
{
// Checks if the current token is a return //
if (p.tokens[p.index].type == Token::RETURN)
{
// If so it adds an AST node with the value being returned //
// TODO: Allow this to return nullptr //
p.index++;
return std::make_unique<AST::ReturnStatement>(ParseOperation(p));
}
// Else goes down the call stack //
return ParseOperation(p);
}
static std::unique_ptr<AST::Node> Parse(Parser& p)
// Helper function to call the top of the Parse-Call-Stack
static inline std::unique_ptr<AST::Node> Parse(Parser& p)
{
// ParseReturn is currently the topmost function in the call stack //
return ParseReturn(p);
}
// Turns the tokens of a file into it's abstract syntax tree equivalent //
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log)
{
// Logs the start of the parsing
SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK);
// Creates the output storer and the parser //
FileAST output;
Parser p(tokens, log);
// Loops over the tokens and calls the correct parsing function //
// Which depends on their type and current state of the parser //
while (p.index < p.len)
{
switch (p.tokens[p.index].type)
@@ -81,13 +116,15 @@ namespace LX
case Token::FUNCTION:
{
// Skips over function token + name token
// TEMPORARY
// TODO: Store function name in the type
p.index++; p.index++;
// Pushes a new function to the vector and gets a reference to it for adding the body //
output.functions.emplace_back();
FunctionDefinition& func = output.functions.back();
// TEMPORARY casue im lazy
// Loops over the body until it reaches the end //
// TODO: Detect the end instead of looping over the entire token vector
while (p.index < p.len)
{
// Actually parses the function
@@ -98,16 +135,23 @@ namespace LX
p.index++;
}
// Goes to the next iteration of the loop //
continue;
}
// Lets the user know there is an error //
// TODO: Makes this error actually output useful information //
default:
std::cout << "UNKNOWN TOKEN FOUND" << std::endl;
}
}
// Logs that AST has finished parsing //
// TODO: Make this output the AST in a human-readable form //
SafeLog(log, "AST length: ", output.functions[0].body.size());
// Returns the output and shrinks all uneccesarry allocated memory
output.functions.shrink_to_fit();
return output;
}
}