mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-03 17:39:02 +00:00
Merge branch 'LLVM-Test'
This commit is contained in:
@@ -14,6 +14,7 @@ namespace LX
|
||||
|
||||
#define TOKEN_CASE(type) case type: return #type;
|
||||
|
||||
// Logging function to turn a tokentype enum val into it's string //
|
||||
static std::string ToString(Token::TokenType type)
|
||||
{
|
||||
switch (type)
|
||||
@@ -37,6 +38,7 @@ namespace LX
|
||||
}
|
||||
}
|
||||
|
||||
// All the keywords the lexer currently supports with their token-enum equivalents //
|
||||
static const std::unordered_map<std::string, Token::TokenType> keywords =
|
||||
{
|
||||
{ "for" , Token::FOR },
|
||||
@@ -48,6 +50,8 @@ namespace LX
|
||||
{ "return" , Token::RETURN }
|
||||
};
|
||||
|
||||
// All the single-char operators currently supported by the lexer with their token-enum equivalents //
|
||||
// TODO: Support multi-char operators such as: ==, -> +=, &&
|
||||
static const std::unordered_map<char, Token::TokenType> operators =
|
||||
{
|
||||
{ '+', Token::ADD },
|
||||
@@ -56,27 +60,37 @@ namespace LX
|
||||
{ '/', Token::DIV }
|
||||
};
|
||||
|
||||
// Checks if the given word is a keyword before adding it to the tokens //
|
||||
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens)
|
||||
{
|
||||
// Checks the map for a check and if so adds it with its enum equivalent //
|
||||
if (auto keyword = keywords.find(word); keyword != keywords.end())
|
||||
{
|
||||
tokens.push_back({ keyword->second, "" });
|
||||
}
|
||||
|
||||
// Else adds it as a type of IDENTIFIER //
|
||||
else
|
||||
{
|
||||
tokens.push_back({ Token::IDENTIFIER, word });
|
||||
}
|
||||
}
|
||||
|
||||
// Struct to store the current information of the lexer //
|
||||
struct LexerInfo
|
||||
{
|
||||
// Current index within the lexer //
|
||||
std::streamsize index = 0;
|
||||
|
||||
// Trackers for when a multi-char token started //
|
||||
|
||||
std::streamsize startOfWord = 0;
|
||||
std::streamsize startOfNumberLiteral = 0;
|
||||
std::streamsize startOfStringLiteral = 0;
|
||||
|
||||
// Different flags of the lexer //
|
||||
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
|
||||
|
||||
bool isAlpha : 1 = false;
|
||||
bool isNumeric : 1 = false;
|
||||
bool inComment : 1 = false;
|
||||
@@ -92,8 +106,8 @@ namespace LX
|
||||
// Logs the start of the lexical analysis
|
||||
SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
|
||||
|
||||
// Allocates a large ammount of memory to hold the output
|
||||
// Will shrink the size later on to stop excess memory
|
||||
// Allocates a large ammount of memory to hold the output //
|
||||
// Will shrink the size later on to stop excess memory being allocated //
|
||||
std::vector<Token> tokens = {};
|
||||
tokens.reserve(0xFFFF);
|
||||
|
||||
@@ -115,119 +129,130 @@ namespace LX
|
||||
// Stores the current character for easy access
|
||||
const char current = contents[info.index];
|
||||
|
||||
//
|
||||
if (info.index + 1 < len)
|
||||
// Checks if it is not at end //
|
||||
// Predicts it is not at end for microptimsation //
|
||||
if (info.index + 1 < len) [[likely]]
|
||||
{
|
||||
// Gets the next character //
|
||||
const char next = contents[info.index + 1];
|
||||
|
||||
// Sets flags depending on the value of the next character //
|
||||
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
|
||||
info.isNextCharNumeric = (next >= '0' && next <= '9');
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// Else defaults the next character's flags to false //
|
||||
info.isNextCharAlpha = false;
|
||||
info.isNextCharNumeric = false;
|
||||
}
|
||||
|
||||
// Works out if the current character is alphabetic or numeric
|
||||
// Works out if the current character is alphabetic or numeric //
|
||||
info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
|
||||
info.isNumeric = (current >= '0' && current <= '9');
|
||||
|
||||
// Updates string literal tracker and skips over rest if in a string literal
|
||||
// Updates string literal tracker and skips over rest if in a string literal //
|
||||
if (current == '"')
|
||||
{
|
||||
// Start of string literal
|
||||
// Start of string literal //
|
||||
if (info.inStringLiteral == false)
|
||||
{
|
||||
// Updates the neccesarry trackers
|
||||
// Updates the neccesarry trackers //
|
||||
info.startOfStringLiteral = info.index + 1;
|
||||
info.inStringLiteral = true;
|
||||
}
|
||||
|
||||
// End of string literal
|
||||
// End of string literal //
|
||||
else
|
||||
{
|
||||
// Adds the string literal token to the token vector
|
||||
// Adds the string literal token to the token vector //
|
||||
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
|
||||
tokens.push_back({ Token::STRING_LITERAL, lit });
|
||||
|
||||
// Updates trackers
|
||||
// Updates trackers //
|
||||
info.inStringLiteral = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Skips over rest if within a string literal
|
||||
// Skips over rest if within a string literal //
|
||||
else if (info.inStringLiteral);
|
||||
|
||||
// Updates comment state
|
||||
// Updates comment state //
|
||||
else if (current == '#')
|
||||
{
|
||||
info.inComment = !info.inComment;
|
||||
}
|
||||
|
||||
// Skips over if within a comment
|
||||
// Skips over if within a comment //
|
||||
else if (info.inComment);
|
||||
|
||||
// Start of a word
|
||||
// Start of a word //
|
||||
else if (info.isAlpha == true && info.wasLastCharAlpha == false)
|
||||
{
|
||||
// Stores the start of the word //
|
||||
info.startOfWord = info.index;
|
||||
|
||||
// Single letter word
|
||||
// Checks if it is at the end (single char words) //
|
||||
if (info.isNextCharAlpha == false)
|
||||
{
|
||||
// Calls the function designed to handle the tokenisation of words //
|
||||
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens);
|
||||
}
|
||||
}
|
||||
|
||||
// End of a word
|
||||
// End of a word //
|
||||
else if (info.isAlpha == true && info.isNextCharAlpha == false)
|
||||
{
|
||||
// Calls the function designed to handle the tokenisation of words //
|
||||
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens);
|
||||
}
|
||||
|
||||
// During a word
|
||||
// During a word //
|
||||
else if (info.isAlpha == true);
|
||||
|
||||
// Start of a number
|
||||
// Start of a number //
|
||||
else if (info.isNumeric == true && info.wasLastCharNumeric == false)
|
||||
{
|
||||
// Stores the start of the number //
|
||||
info.startOfNumberLiteral = info.index;
|
||||
|
||||
// Checks if it as the end (single char numbers) //
|
||||
if (info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num });
|
||||
}
|
||||
}
|
||||
|
||||
// End of a number
|
||||
// End of a number //
|
||||
else if (info.isNumeric == true && info.isNextCharNumeric == false)
|
||||
{
|
||||
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
|
||||
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
|
||||
tokens.push_back({ Token::NUMBER_LITERAL, num });
|
||||
}
|
||||
|
||||
// During a number
|
||||
// During a number //
|
||||
else if (info.isNumeric == true);
|
||||
|
||||
// Operators (+, -, /, *)
|
||||
// Operators (+, -, /, *) //
|
||||
else if (auto op = operators.find(current); op != operators.end())
|
||||
{
|
||||
tokens.push_back({ op->second, "" });
|
||||
}
|
||||
|
||||
// If it is here and not whitespace that means it's an invalid character
|
||||
// If it is here and not whitespace that means it's an invalid character //
|
||||
else if (current == ' ' || current == '\t' || current == '\r' || current == '\n');
|
||||
|
||||
else
|
||||
{
|
||||
// Throws an error to alert the user
|
||||
// Throws an error to alert the user of the invalid character //
|
||||
throw InvalidCharInSource(info.index, current);
|
||||
}
|
||||
|
||||
// Updates trackers //
|
||||
// Updates trackers to their default state of a new character //
|
||||
|
||||
info.index++;
|
||||
info.wasLastCharAlpha = info.isAlpha;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Passes the constructor args to the values //
|
||||
Token::Token(const TokenType _type, std::string _contents)
|
||||
: type(_type), contents(_contents)
|
||||
{}
|
||||
|
||||
7
Main.cpp
7
Main.cpp
@@ -53,6 +53,13 @@ int main(int argc, char** argv)
|
||||
// Create tokens out of the input file
|
||||
std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get());
|
||||
|
||||
// Saves the log //
|
||||
if (log != nullptr)
|
||||
{
|
||||
log->close();
|
||||
//log->open(argv[3]);
|
||||
}
|
||||
|
||||
// Turns the tokens into an AST
|
||||
LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get());
|
||||
|
||||
|
||||
@@ -134,6 +134,9 @@
|
||||
<ClCompile Include="src\AST-Constructors.cpp" />
|
||||
<ClCompile Include="src\Parser.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\AST.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
|
||||
@@ -18,4 +18,9 @@
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="inc\AST.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
70
Parser/inc/AST.h
Normal file
70
Parser/inc/AST.h
Normal file
@@ -0,0 +1,70 @@
|
||||
#include <Parser.h>
|
||||
|
||||
#include <LLVM.h>
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Wrapper over the LLVM variables for easier passing around //
|
||||
struct InfoLLVM
|
||||
{
|
||||
// Constructor to initalize them correctly (only constructor available) //
|
||||
InfoLLVM(std::string name);
|
||||
|
||||
llvm::LLVMContext context;
|
||||
llvm::Module module;
|
||||
llvm::IRBuilder<> builder;
|
||||
};
|
||||
}
|
||||
|
||||
namespace LX::AST
|
||||
{
|
||||
// Node to represent any number within the AST //
|
||||
class NumberLiteral : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type //
|
||||
NumberLiteral(std::string num);
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* GenIR(InfoLLVM& LLVM) override;
|
||||
|
||||
private:
|
||||
// The number it stores //
|
||||
// Yes the number is stored as a string, It's horrible I know //
|
||||
std::string m_Number;
|
||||
};
|
||||
|
||||
// Node to represent any 2-sided mathematical or logical operation within the AST //
|
||||
class Operation : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type //
|
||||
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs);
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* GenIR(InfoLLVM& LLVM) override;
|
||||
|
||||
private:
|
||||
// The sides of the operation //
|
||||
// Unary operations are handled by a different class //
|
||||
std::unique_ptr<Node> m_Lhs, m_Rhs;
|
||||
|
||||
// The operation to be applied to the two sides //
|
||||
Token::TokenType m_Operand;
|
||||
};
|
||||
|
||||
// Node to represent any return statement within the AST //
|
||||
class ReturnStatement : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type
|
||||
ReturnStatement(std::unique_ptr<Node> val);
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* GenIR(InfoLLVM& LLVM) override;
|
||||
|
||||
private:
|
||||
// What it is returning (can be null) //
|
||||
std::unique_ptr<Node> m_Val;
|
||||
};
|
||||
}
|
||||
@@ -1,15 +1,43 @@
|
||||
#include <Parser.h>
|
||||
|
||||
#include <AST.h>
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Default constructor that just initalises LLVM variables that it holds //
|
||||
InfoLLVM::InfoLLVM(std::string name)
|
||||
: context{}, builder(context), module(name, context)
|
||||
{}
|
||||
|
||||
// Reserves space for nodes (stops excess allocations) //
|
||||
FunctionDefinition::FunctionDefinition()
|
||||
: body{}
|
||||
{ body.reserve(32); }
|
||||
|
||||
// Reserves space for functions (stops excess allocations) //
|
||||
FileAST::FileAST()
|
||||
: functions{}
|
||||
{ functions.reserve(8); }
|
||||
}
|
||||
|
||||
namespace LX::AST
|
||||
{
|
||||
// Passes constructor args to values //
|
||||
Node::Node(NodeType type)
|
||||
: m_Type(type)
|
||||
{}
|
||||
|
||||
// Passes constructor args to values and sets type //
|
||||
NumberLiteral::NumberLiteral(std::string num)
|
||||
: Node(Node::NUMBER_LITERAL), m_Number(num)
|
||||
{}
|
||||
|
||||
// Passes constructor args to values and sets type //
|
||||
Operation::Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs)
|
||||
: Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs))
|
||||
{}
|
||||
|
||||
// Passes constructor args to values and sets type //
|
||||
ReturnStatement::ReturnStatement(std::unique_ptr<Node> val)
|
||||
: Node(Node::RETURN_STATEMENT), m_Val(std::move(val))
|
||||
{}
|
||||
|
||||
69
Parser/src/AST-LLVM.cpp
Normal file
69
Parser/src/AST-LLVM.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <Parser.h>
|
||||
|
||||
#include <LLVM.h>
|
||||
#include <Util.h>
|
||||
#include <AST.h>
|
||||
|
||||
namespace LX::AST
|
||||
{
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* NumberLiteral::GenIR(InfoLLVM& LLVM)
|
||||
{
|
||||
// Converts the string to it's int equivalent //
|
||||
// TODO: Support floating point values //
|
||||
int number = std::stoi(m_Number);
|
||||
|
||||
// Returns it as a llvm value (if valid) //
|
||||
// TODO: Support floating point values //
|
||||
// TODO: Make the error actually output information //
|
||||
llvm::Value* out = llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), number, true);
|
||||
ThrowIf<IRGenerationError>(out == nullptr);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* Operation::GenIR(InfoLLVM& LLVM)
|
||||
{
|
||||
// Generates the IR for both sides of the operation //
|
||||
llvm::Value* lhs = m_Lhs->GenIR(LLVM);
|
||||
llvm::Value* rhs = m_Rhs->GenIR(LLVM);
|
||||
|
||||
// If either side is null then return null to prevent invalid IR //
|
||||
// TODO: Make the error actually output information //
|
||||
if (lhs == nullptr || rhs == nullptr)
|
||||
{
|
||||
ThrowIf<IRGenerationError>(true);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Generates the IR of the operation //
|
||||
// TODO: Support other operators other than ADD //
|
||||
// TODO: Make the error actually output information //
|
||||
llvm::Value* out = LLVM.builder.CreateAdd(lhs, rhs);
|
||||
ThrowIf<IRGenerationError>(out == nullptr);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
llvm::Value* ReturnStatement::GenIR(InfoLLVM& LLVM)
|
||||
{
|
||||
// Checks if it is a void return //
|
||||
if (m_Val == nullptr)
|
||||
{
|
||||
// Void returns are currently not implemented //
|
||||
// TODO: Find out how to return nothing from a function //
|
||||
ThrowIf<IRGenerationError>(true);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Else it will be returning a value //
|
||||
else
|
||||
{
|
||||
// Generates the value and creates a return for it //
|
||||
// TODO: Make the error actually output information //
|
||||
llvm::Value* out = LLVM.builder.CreateRet(m_Val->GenIR(LLVM));
|
||||
ThrowIf<IRGenerationError>(out == nullptr);
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
59
Parser/src/GenIR.cpp
Normal file
59
Parser/src/GenIR.cpp
Normal file
@@ -0,0 +1,59 @@
|
||||
#include <Parser.h>
|
||||
|
||||
#include <Util.h>
|
||||
#include <AST.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Tells the generator if the current node is allowed to be within a top-level context //
|
||||
// TODO: Make this function do something other than return true
|
||||
static constexpr bool IsValidTopLevelNode(AST::Node::NodeType type)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Generates the LLVM IR for the given function //
|
||||
static void GenerateFunctionIR(FunctionDefinition& funcAST, InfoLLVM& LLVM)
|
||||
{
|
||||
// Creates the functions signature and return type //
|
||||
|
||||
llvm::FunctionType* retType = llvm::FunctionType::get(llvm::Type::getInt32Ty(LLVM.context), false); // <- Defaults to int currently
|
||||
llvm::Function* func = llvm::Function::Create(retType, llvm::Function::ExternalLinkage, "main", LLVM.module); // Defaults to main currently
|
||||
llvm::BasicBlock* entry = llvm::BasicBlock::Create(LLVM.context, "entry", func);
|
||||
LLVM.builder.SetInsertPoint(entry);
|
||||
|
||||
// Generates the IR within the function by looping over the nodes //
|
||||
for (auto& node : funcAST.body)
|
||||
{
|
||||
ThrowIf<int>(IsValidTopLevelNode(node->m_Type) == false); // <- TODO: replace with actual error type
|
||||
node->GenIR(LLVM);
|
||||
}
|
||||
|
||||
// Adds a terminator if there is none //
|
||||
if (entry->getTerminator() == nullptr)
|
||||
{
|
||||
LLVM.builder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), 0, true));
|
||||
}
|
||||
|
||||
// Verifies the function works //
|
||||
ThrowIf<int>(llvm::verifyFunction(*func), &llvm::errs()); // <- TODO: Make error type
|
||||
}
|
||||
|
||||
// Turns an abstract binary tree into LLVM intermediate representation //
|
||||
void GenerateIR(FileAST& ast)
|
||||
{
|
||||
// Creates the LLVM variables needed for generating IR that are shared between functions //
|
||||
InfoLLVM LLVM("add_itns");
|
||||
|
||||
// Loops over the functions to generate their LLVM IR //
|
||||
for (auto& func : ast.functions)
|
||||
{
|
||||
GenerateFunctionIR(func, LLVM);
|
||||
}
|
||||
|
||||
// Outputs the IR to the console //
|
||||
LLVM.module.print(llvm::outs(), nullptr);
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
#include <Parser.h>
|
||||
|
||||
#include <Util.h>
|
||||
#include <AST.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -8,72 +10,105 @@ namespace LX
|
||||
// Local struct so everything can be public //
|
||||
struct Parser
|
||||
{
|
||||
// Passes constructor args to members //
|
||||
Parser(std::vector<Token>& _tokens, std::ofstream* _log)
|
||||
: tokens(_tokens), log(_log), index(0), len(_tokens.size())
|
||||
{}
|
||||
|
||||
// Tokens created by the lexer //
|
||||
std::vector<Token>& tokens;
|
||||
|
||||
// Log to output to (can be null) //
|
||||
std::ofstream* log;
|
||||
|
||||
// Length of the the token vector //
|
||||
const size_t len;
|
||||
|
||||
// Current index within the token vector //
|
||||
size_t index;
|
||||
};
|
||||
|
||||
// Base of the call stack to handle the simplest of tokens //
|
||||
static std::unique_ptr<AST::Node> ParsePrimary(Parser& p)
|
||||
{
|
||||
// There are lots of possible token's that can be here so a switch is used //
|
||||
switch (p.tokens[p.index].type)
|
||||
{
|
||||
// Number literals just require them to be turned into an AST node //
|
||||
// Note: Number literals are stored as strings because i'm a masochist //
|
||||
case Token::NUMBER_LITERAL:
|
||||
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index].contents);
|
||||
|
||||
// Default just alerts the user of an error //
|
||||
// TODO: Actually make this error tell the user something useful //
|
||||
default:
|
||||
std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Handles operations, if it is not currently at an operation goes to ParsePrimary //
|
||||
static std::unique_ptr<AST::Node> ParseOperation(Parser& p)
|
||||
{
|
||||
// Checks if the next token is an operator //
|
||||
// TODO: Add more than just add //
|
||||
// TODO: Make this not crash when at the end //
|
||||
if (p.tokens[p.index + 1].type == Token::ADD)
|
||||
{
|
||||
// Parses the left hand side of the operation //
|
||||
std::unique_ptr<AST::Node> lhs = ParsePrimary(p);
|
||||
p.index++;
|
||||
|
||||
// Skips over operator (again i'm lazy)
|
||||
// Stores the operator to pass into the AST node //
|
||||
Token::TokenType op = p.tokens[p.index].type;
|
||||
p.index++;
|
||||
|
||||
// Parses the right hand of the operation //
|
||||
std::unique_ptr<AST::Node> rhs = ParsePrimary(p);
|
||||
|
||||
return std::make_unique<AST::Operation>(std::move(lhs), Token::ADD, std::move(rhs));
|
||||
// Returns an AST node as all of the components combined together //
|
||||
return std::make_unique<AST::Operation>(std::move(lhs), op, std::move(rhs));
|
||||
}
|
||||
|
||||
// Else goes down the call stack //
|
||||
return ParsePrimary(p);
|
||||
}
|
||||
|
||||
// Handles return statements, if not calls ParseOperation //
|
||||
static std::unique_ptr<AST::Node> ParseReturn(Parser& p)
|
||||
{
|
||||
// Checks if the current token is a return //
|
||||
if (p.tokens[p.index].type == Token::RETURN)
|
||||
{
|
||||
// If so it adds an AST node with the value being returned //
|
||||
// TODO: Allow this to return nullptr //
|
||||
p.index++;
|
||||
return std::make_unique<AST::ReturnStatement>(ParseOperation(p));
|
||||
}
|
||||
|
||||
// Else goes down the call stack //
|
||||
return ParseOperation(p);
|
||||
}
|
||||
|
||||
static std::unique_ptr<AST::Node> Parse(Parser& p)
|
||||
|
||||
// Helper function to call the top of the Parse-Call-Stack
|
||||
static inline std::unique_ptr<AST::Node> Parse(Parser& p)
|
||||
{
|
||||
// ParseReturn is currently the topmost function in the call stack //
|
||||
return ParseReturn(p);
|
||||
}
|
||||
|
||||
// Turns the tokens of a file into it's abstract syntax tree equivalent //
|
||||
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log)
|
||||
{
|
||||
// Logs the start of the parsing
|
||||
SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK);
|
||||
|
||||
// Creates the output storer and the parser //
|
||||
FileAST output;
|
||||
Parser p(tokens, log);
|
||||
|
||||
// Loops over the tokens and calls the correct parsing function //
|
||||
// Which depends on their type and current state of the parser //
|
||||
while (p.index < p.len)
|
||||
{
|
||||
switch (p.tokens[p.index].type)
|
||||
@@ -81,13 +116,15 @@ namespace LX
|
||||
case Token::FUNCTION:
|
||||
{
|
||||
// Skips over function token + name token
|
||||
// TEMPORARY
|
||||
// TODO: Store function name in the type
|
||||
p.index++; p.index++;
|
||||
|
||||
// Pushes a new function to the vector and gets a reference to it for adding the body //
|
||||
output.functions.emplace_back();
|
||||
FunctionDefinition& func = output.functions.back();
|
||||
|
||||
// TEMPORARY casue im lazy
|
||||
// Loops over the body until it reaches the end //
|
||||
// TODO: Detect the end instead of looping over the entire token vector
|
||||
while (p.index < p.len)
|
||||
{
|
||||
// Actually parses the function
|
||||
@@ -98,16 +135,23 @@ namespace LX
|
||||
p.index++;
|
||||
}
|
||||
|
||||
// Goes to the next iteration of the loop //
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lets the user know there is an error //
|
||||
// TODO: Makes this error actually output useful information //
|
||||
default:
|
||||
std::cout << "UNKNOWN TOKEN FOUND" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Logs that AST has finished parsing //
|
||||
// TODO: Make this output the AST in a human-readable form //
|
||||
SafeLog(log, "AST length: ", output.functions[0].body.size());
|
||||
|
||||
// Returns the output and shrinks all uneccesarry allocated memory
|
||||
output.functions.shrink_to_fit();
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,10 +8,4 @@ Token::IDENTIFIER: main
|
||||
Unknown: 3
|
||||
Token::NUMBER_LITERAL: 3
|
||||
Token::ADD
|
||||
Token::NUMBER_LITERAL: 4
|
||||
|
||||
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
Started parsing tokens
|
||||
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|
||||
AST length: 1
|
||||
Token::NUMBER_LITERAL: 56
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
func main
|
||||
return 3 + 4
|
||||
return 3 + 56
|
||||
|
||||
25
common/LLVM.h
Normal file
25
common/LLVM.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
// Helper file for including all neccesarry parts of LLVM //
|
||||
#ifdef _MSC_VER
|
||||
|
||||
// Disables all warnings as LLVM files have a lot of Data-loss casts that won't cause issues //
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4244)
|
||||
#pragma warning(disable : 4267)
|
||||
#pragma warning(disable : 4624)
|
||||
#pragma warning(disable : 4800)
|
||||
|
||||
// Includes the LLVM files //
|
||||
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Verifier.h>
|
||||
|
||||
// Re-enables all warnings //
|
||||
#pragma warning(pop)
|
||||
|
||||
#else
|
||||
#error This code is only designed to work with MSVC due to the use of vcpkg and other aspects
|
||||
#endif // _MSC_VER
|
||||
@@ -6,7 +6,7 @@
|
||||
// Foward declarations of STD classes to minimise includes //
|
||||
namespace std
|
||||
{
|
||||
template<typename T1 = char>
|
||||
template<typename T1>
|
||||
struct char_traits;
|
||||
|
||||
template<typename T1, typename T2>
|
||||
@@ -24,6 +24,7 @@ namespace std
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Error type with index and character to alert the user that LX does not understand that symbol //
|
||||
struct InvalidCharInSource
|
||||
{
|
||||
std::streamsize index;
|
||||
|
||||
@@ -7,19 +7,10 @@
|
||||
|
||||
// Foward declares all items of the llvm lib that we need //
|
||||
// Done to avoid including LLVM.h to shorten compile times //
|
||||
/*
|
||||
namespace llvm
|
||||
{
|
||||
class Value;
|
||||
class LLVMContext;
|
||||
class Module;
|
||||
namespace llvm { class Value; }
|
||||
|
||||
class ConstantFolder;
|
||||
class IRBuilderDefaultInserter;
|
||||
|
||||
template<typename T1 = ConstantFolder, typename T2 = IRBuilderDefaultInserter>
|
||||
class IRBuilder;
|
||||
}*/
|
||||
// Foward declares the wrapper around the LLVM objects we need to pass around //
|
||||
namespace LX { struct InfoLLVM; }
|
||||
|
||||
// The nodes of the abstract syntax tree constructed by the parser from the tokens //
|
||||
namespace LX::AST
|
||||
@@ -45,84 +36,51 @@ namespace LX::AST
|
||||
UNDEFINED = -1
|
||||
};
|
||||
|
||||
// Constructor to set the node type //
|
||||
Node(NodeType type)
|
||||
: m_Type(type)
|
||||
{}
|
||||
// Constructor to set the node type (no others provided) //
|
||||
Node(NodeType type);
|
||||
|
||||
// Virtual destructor because of polymorphism //
|
||||
virtual ~Node() = default;
|
||||
|
||||
// Function for generating LLVN IR (Intermediate representation) //
|
||||
virtual llvm::Value* GenIR(InfoLLVM& LLVM) = 0;
|
||||
|
||||
// Function for generating C/C++ code (Currently not implemented) //
|
||||
//virtual void GenC() = 0;
|
||||
|
||||
// The type of the node //
|
||||
const NodeType m_Type;
|
||||
};
|
||||
|
||||
class NumberLiteral : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type
|
||||
NumberLiteral(std::string num);
|
||||
|
||||
private:
|
||||
// The number it stores
|
||||
// Yes the number is stored as a string
|
||||
// It's horrible I know
|
||||
std::string m_Number;
|
||||
};
|
||||
|
||||
//
|
||||
class Operation : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type
|
||||
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs);
|
||||
|
||||
private:
|
||||
// The sides of the operation
|
||||
// Unary operations are handled by a different class
|
||||
std::unique_ptr<Node> m_Lhs, m_Rhs;
|
||||
|
||||
// The operation to be applied to the two sides
|
||||
Token::TokenType m_Operand;
|
||||
};
|
||||
|
||||
//
|
||||
class ReturnStatement : public Node
|
||||
{
|
||||
public:
|
||||
// Constructor to set values and automatically set type
|
||||
ReturnStatement(std::unique_ptr<Node> val);
|
||||
|
||||
private:
|
||||
// What it is returning (can be null)
|
||||
std::unique_ptr<Node> m_Val;
|
||||
};
|
||||
}
|
||||
|
||||
namespace LX
|
||||
{
|
||||
// Thrown if there was an error during IR Generation //
|
||||
struct IRGenerationError {};
|
||||
|
||||
// Holds all needed info about a function //
|
||||
// Currently only holds the body but in the future will hold: name, params, namespace/class-member
|
||||
struct FunctionDefinition
|
||||
{
|
||||
FunctionDefinition()
|
||||
: body{}
|
||||
{}
|
||||
// Defualt constructor (none other given) //
|
||||
FunctionDefinition();
|
||||
|
||||
// The instructions of the body of the function //
|
||||
std::vector<std::unique_ptr<AST::Node>> body;
|
||||
};
|
||||
|
||||
struct FileAST
|
||||
{
|
||||
FileAST()
|
||||
: functions{}
|
||||
{}
|
||||
// Default constructor (none other given) //
|
||||
FileAST();
|
||||
|
||||
// All the functions within this file //
|
||||
std::vector<FunctionDefinition> functions;
|
||||
};
|
||||
|
||||
// Turns the tokens of a file into it's abstract syntax tree equivalent //
|
||||
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log);
|
||||
|
||||
// Turns an abstract binary tree into LLVM intermediate representation //
|
||||
void GenerateIR(FileAST& ast);
|
||||
}
|
||||
|
||||
@@ -5,14 +5,19 @@
|
||||
namespace LX
|
||||
{
|
||||
template<typename T, typename... Args>
|
||||
// Helper function to throw given error if condition is true //
|
||||
// Also micro-optimises to predict there is no errors thrown //
|
||||
inline void ThrowIf(const bool condition, Args... args)
|
||||
{ if (condition) [[unlikely]] { throw T(args...); }}
|
||||
|
||||
template<typename... Args>
|
||||
// Helper function for logging //
|
||||
// Only logs the given args if the log is not null //
|
||||
inline void SafeLog(std::ofstream* log, Args... args)
|
||||
{
|
||||
if (log != nullptr) { (*log << ... << args); *log << "\n"; }
|
||||
}
|
||||
|
||||
// Gives a standard way to mark a change between different sections within the log output //
|
||||
constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user