Improved compile times

Added A LOT of comments
This commit is contained in:
Pasha Bibko
2025-04-20 19:32:25 +01:00
parent 680a3d1323
commit 6ba9f8e596
15 changed files with 286 additions and 149 deletions

View File

@@ -14,6 +14,7 @@ namespace LX
#define TOKEN_CASE(type) case type: return #type; #define TOKEN_CASE(type) case type: return #type;
// Logging function to turn a tokentype enum val into it's string //
static std::string ToString(Token::TokenType type) static std::string ToString(Token::TokenType type)
{ {
switch (type) switch (type)
@@ -37,6 +38,7 @@ namespace LX
} }
} }
// All the keywords the lexer currently supports with their token-enum equivalents //
static const std::unordered_map<std::string, Token::TokenType> keywords = static const std::unordered_map<std::string, Token::TokenType> keywords =
{ {
{ "for" , Token::FOR }, { "for" , Token::FOR },
@@ -48,6 +50,8 @@ namespace LX
{ "return" , Token::RETURN } { "return" , Token::RETURN }
}; };
// All the single-char operators currently supported by the lexer with their token-enum equivalents //
// TODO: Support multi-char operators such as: ==, -> +=, &&
static const std::unordered_map<char, Token::TokenType> operators = static const std::unordered_map<char, Token::TokenType> operators =
{ {
{ '+', Token::ADD }, { '+', Token::ADD },
@@ -56,27 +60,37 @@ namespace LX
{ '/', Token::DIV } { '/', Token::DIV }
}; };
// Checks if the given word is a keyword before adding it to the tokens //
static void TokenizeWord(const std::string& word, std::vector<Token>& tokens) static void TokenizeWord(const std::string& word, std::vector<Token>& tokens)
{ {
// Checks the map for a check and if so adds it with its enum equivalent //
if (auto keyword = keywords.find(word); keyword != keywords.end()) if (auto keyword = keywords.find(word); keyword != keywords.end())
{ {
tokens.push_back({ keyword->second, "" }); tokens.push_back({ keyword->second, "" });
} }
// Else adds it as a type of IDENTIFIER //
else else
{ {
tokens.push_back({ Token::IDENTIFIER, word }); tokens.push_back({ Token::IDENTIFIER, word });
} }
} }
// Struct to store the current information of the lexer //
struct LexerInfo struct LexerInfo
{ {
// Current index within the lexer //
std::streamsize index = 0; std::streamsize index = 0;
// Trackers for when a multi-char token started //
std::streamsize startOfWord = 0; std::streamsize startOfWord = 0;
std::streamsize startOfNumberLiteral = 0; std::streamsize startOfNumberLiteral = 0;
std::streamsize startOfStringLiteral = 0; std::streamsize startOfStringLiteral = 0;
// Different flags of the lexer //
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
bool isAlpha : 1 = false; bool isAlpha : 1 = false;
bool isNumeric : 1 = false; bool isNumeric : 1 = false;
bool inComment : 1 = false; bool inComment : 1 = false;
@@ -92,8 +106,8 @@ namespace LX
// Logs the start of the lexical analysis // Logs the start of the lexical analysis
SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK); SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
// Allocates a large ammount of memory to hold the output // Allocates a large ammount of memory to hold the output //
// Will shrink the size later on to stop excess memory // Will shrink the size later on to stop excess memory being allocated //
std::vector<Token> tokens = {}; std::vector<Token> tokens = {};
tokens.reserve(0xFFFF); tokens.reserve(0xFFFF);
@@ -115,119 +129,130 @@ namespace LX
// Stores the current character for easy access // Stores the current character for easy access
const char current = contents[info.index]; const char current = contents[info.index];
// // Checks if it is not at end //
if (info.index + 1 < len) // Predicts it is not at end for microptimsation //
if (info.index + 1 < len) [[likely]]
{ {
// Gets the next character //
const char next = contents[info.index + 1]; const char next = contents[info.index + 1];
// Sets flags depending on the value of the next character //
info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z'); info.isNextCharAlpha = (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z');
info.isNextCharNumeric = (next >= '0' && next <= '9'); info.isNextCharNumeric = (next >= '0' && next <= '9');
} }
else else
{ {
// Else defaults the next character's flags to false //
info.isNextCharAlpha = false; info.isNextCharAlpha = false;
info.isNextCharNumeric = false; info.isNextCharNumeric = false;
} }
// Works out if the current character is alphabetic or numeric // Works out if the current character is alphabetic or numeric //
info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z'); info.isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
info.isNumeric = (current >= '0' && current <= '9'); info.isNumeric = (current >= '0' && current <= '9');
// Updates string literal tracker and skips over rest if in a string literal // Updates string literal tracker and skips over rest if in a string literal //
if (current == '"') if (current == '"')
{ {
// Start of string literal // Start of string literal //
if (info.inStringLiteral == false) if (info.inStringLiteral == false)
{ {
// Updates the neccesarry trackers // Updates the neccesarry trackers //
info.startOfStringLiteral = info.index + 1; info.startOfStringLiteral = info.index + 1;
info.inStringLiteral = true; info.inStringLiteral = true;
} }
// End of string literal // End of string literal //
else else
{ {
// Adds the string literal token to the token vector // Adds the string literal token to the token vector //
std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral); std::string lit(contents.data() + info.startOfStringLiteral, info.index - info.startOfStringLiteral);
tokens.push_back({ Token::STRING_LITERAL, lit }); tokens.push_back({ Token::STRING_LITERAL, lit });
// Updates trackers // Updates trackers //
info.inStringLiteral = false; info.inStringLiteral = false;
} }
} }
// Skips over rest if within a string literal // Skips over rest if within a string literal //
else if (info.inStringLiteral); else if (info.inStringLiteral);
// Updates comment state // Updates comment state //
else if (current == '#') else if (current == '#')
{ {
info.inComment = !info.inComment; info.inComment = !info.inComment;
} }
// Skips over if within a comment // Skips over if within a comment //
else if (info.inComment); else if (info.inComment);
// Start of a word // Start of a word //
else if (info.isAlpha == true && info.wasLastCharAlpha == false) else if (info.isAlpha == true && info.wasLastCharAlpha == false)
{ {
// Stores the start of the word //
info.startOfWord = info.index; info.startOfWord = info.index;
// Single letter word // Checks if it is at the end (single char words) //
if (info.isNextCharAlpha == false) if (info.isNextCharAlpha == false)
{ {
// Calls the function designed to handle the tokenisation of words //
TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens); TokenizeWord({ contents.data() + info.startOfWord, 1 }, tokens);
} }
} }
// End of a word // End of a word //
else if (info.isAlpha == true && info.isNextCharAlpha == false) else if (info.isAlpha == true && info.isNextCharAlpha == false)
{ {
// Calls the function designed to handle the tokenisation of words //
TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens); TokenizeWord({ contents.data() + info.startOfWord, (unsigned __int64)((info.index + 1) - info.startOfWord) }, tokens);
} }
// During a word // During a word //
else if (info.isAlpha == true); else if (info.isAlpha == true);
// Start of a number // Start of a number //
else if (info.isNumeric == true && info.wasLastCharNumeric == false) else if (info.isNumeric == true && info.wasLastCharNumeric == false)
{ {
// Stores the start of the number //
info.startOfNumberLiteral = info.index; info.startOfNumberLiteral = info.index;
// Checks if it as the end (single char numbers) //
if (info.isNextCharNumeric == false) if (info.isNextCharNumeric == false)
{ {
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, num }); tokens.push_back({ Token::NUMBER_LITERAL, num });
} }
} }
// End of a number // End of a number //
else if (info.isNumeric == true && info.isNextCharNumeric == false) else if (info.isNumeric == true && info.isNextCharNumeric == false)
{ {
// Pushes the number to the token vector. Number literals are stored as string in the tokens //
std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral); std::string num(contents.data() + info.startOfNumberLiteral, (unsigned __int64)(info.index + 1) - info.startOfNumberLiteral);
tokens.push_back({ Token::NUMBER_LITERAL, num }); tokens.push_back({ Token::NUMBER_LITERAL, num });
} }
// During a number // During a number //
else if (info.isNumeric == true); else if (info.isNumeric == true);
// Operators (+, -, /, *) // Operators (+, -, /, *) //
else if (auto op = operators.find(current); op != operators.end()) else if (auto op = operators.find(current); op != operators.end())
{ {
tokens.push_back({ op->second, "" }); tokens.push_back({ op->second, "" });
} }
// If it is here and not whitespace that means it's an invalid character // If it is here and not whitespace that means it's an invalid character //
else if (current == ' ' || current == '\t' || current == '\r' || current == '\n'); else if (current == ' ' || current == '\t' || current == '\r' || current == '\n');
else else
{ {
// Throws an error to alert the user // Throws an error to alert the user of the invalid character //
throw InvalidCharInSource(info.index, current); throw InvalidCharInSource(info.index, current);
} }
// Updates trackers // // Updates trackers to their default state of a new character //
info.index++; info.index++;
info.wasLastCharAlpha = info.isAlpha; info.wasLastCharAlpha = info.isAlpha;

View File

@@ -5,6 +5,7 @@
namespace LX namespace LX
{ {
// Passes the constructor args to the values //
Token::Token(const TokenType _type, std::string _contents) Token::Token(const TokenType _type, std::string _contents)
: type(_type), contents(_contents) : type(_type), contents(_contents)
{} {}

View File

@@ -53,6 +53,13 @@ int main(int argc, char** argv)
// Create tokens out of the input file // Create tokens out of the input file
std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get()); std::vector<LX::Token>tokens = LX::LexicalAnalyze(inpFile, log.get());
// Saves the log //
if (log != nullptr)
{
log->close();
//log->open(argv[3]);
}
// Turns the tokens into an AST // Turns the tokens into an AST
LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get()); LX::FileAST AST = LX::TurnTokensIntoAbstractSyntaxTree(tokens, log.get());

View File

@@ -136,6 +136,9 @@
<ClCompile Include="src\GenIR.cpp" /> <ClCompile Include="src\GenIR.cpp" />
<ClCompile Include="src\Parser.cpp" /> <ClCompile Include="src\Parser.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<ClInclude Include="inc\AST.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>

View File

@@ -24,4 +24,9 @@
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup>
<ClInclude Include="inc\AST.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project> </Project>

70
Parser/inc/AST.h Normal file
View File

@@ -0,0 +1,70 @@
#include <Parser.h>
#include <LLVM.h>
namespace LX
{
// Wrapper over the LLVM variables for easier passing around //
struct InfoLLVM
{
// Constructor to initalize them correctly (only constructor available) //
InfoLLVM(std::string name);
llvm::LLVMContext context;
llvm::Module module;
llvm::IRBuilder<> builder;
};
}
namespace LX::AST
{
// Node to represent any number within the AST //
class NumberLiteral : public Node
{
public:
// Constructor to set values and automatically set type //
NumberLiteral(std::string num);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// The number it stores //
// Yes the number is stored as a string, It's horrible I know //
std::string m_Number;
};
// Node to represent any 2-sided mathematical or logical operation within the AST //
class Operation : public Node
{
public:
// Constructor to set values and automatically set type //
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// The sides of the operation //
// Unary operations are handled by a different class //
std::unique_ptr<Node> m_Lhs, m_Rhs;
// The operation to be applied to the two sides //
Token::TokenType m_Operand;
};
// Node to represent any return statement within the AST //
class ReturnStatement : public Node
{
public:
// Constructor to set values and automatically set type
ReturnStatement(std::unique_ptr<Node> val);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(InfoLLVM& LLVM) override;
private:
// What it is returning (can be null) //
std::unique_ptr<Node> m_Val;
};
}

View File

@@ -1,15 +1,43 @@
#include <Parser.h> #include <Parser.h>
#include <AST.h>
namespace LX
{
// Default constructor that just initalises LLVM variables that it holds //
InfoLLVM::InfoLLVM(std::string name)
: context{}, builder(context), module(name, context)
{}
// Reserves space for nodes (stops excess allocations) //
FunctionDefinition::FunctionDefinition()
: body{}
{ body.reserve(32); }
// Reserves space for functions (stops excess allocations) //
FileAST::FileAST()
: functions{}
{ functions.reserve(8); }
}
namespace LX::AST namespace LX::AST
{ {
// Passes constructor args to values //
Node::Node(NodeType type)
: m_Type(type)
{}
// Passes constructor args to values and sets type //
NumberLiteral::NumberLiteral(std::string num) NumberLiteral::NumberLiteral(std::string num)
: Node(Node::NUMBER_LITERAL), m_Number(num) : Node(Node::NUMBER_LITERAL), m_Number(num)
{} {}
// Passes constructor args to values and sets type //
Operation::Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs) Operation::Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs)
: Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs)) : Node(Node::OPERATION), m_Lhs(std::move(lhs)), m_Operand(op), m_Rhs(std::move(rhs))
{} {}
// Passes constructor args to values and sets type //
ReturnStatement::ReturnStatement(std::unique_ptr<Node> val) ReturnStatement::ReturnStatement(std::unique_ptr<Node> val)
: Node(Node::RETURN_STATEMENT), m_Val(std::move(val)) : Node(Node::RETURN_STATEMENT), m_Val(std::move(val))
{} {}

View File

@@ -2,51 +2,66 @@
#include <LLVM.h> #include <LLVM.h>
#include <Util.h> #include <Util.h>
#include <AST.h>
namespace LX::AST namespace LX::AST
{ {
llvm::Value* NumberLiteral::GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) // Function for generating LLVN IR (Intermediate representation) //
llvm::Value* NumberLiteral::GenIR(InfoLLVM& LLVM)
{ {
// Converts the string to it's int equivalent // Converts the string to it's int equivalent //
// Will eventually need to do floating point stuff here as well // TODO: Support floating point values //
int number = std::stoi(m_Number); int number = std::stoi(m_Number);
// Returns it as a llvm value (if valid) // Returns it as a llvm value (if valid) //
llvm::Value* out = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), number, true); // TODO: Support floating point values //
// TODO: Make the error actually output information //
llvm::Value* out = llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), number, true);
ThrowIf<IRGenerationError>(out == nullptr); ThrowIf<IRGenerationError>(out == nullptr);
return out; return out;
} }
llvm::Value* Operation::GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) // Function for generating LLVN IR (Intermediate representation) //
llvm::Value* Operation::GenIR(InfoLLVM& LLVM)
{ {
// Gets the IR for both sides of the operation // Generates the IR for both sides of the operation //
llvm::Value* lhs = m_Lhs->GenIR(context, module, builder); llvm::Value* lhs = m_Lhs->GenIR(LLVM);
llvm::Value* rhs = m_Rhs->GenIR(context, module, builder); llvm::Value* rhs = m_Rhs->GenIR(LLVM);
// If either side is null then return null to prevent invalid IR // // If either side is null then return null to prevent invalid IR //
// TODO: Make the error actually output information //
if (lhs == nullptr || rhs == nullptr) if (lhs == nullptr || rhs == nullptr)
{ {
ThrowIf<IRGenerationError>(true); ThrowIf<IRGenerationError>(true);
return nullptr; return nullptr;
} }
// Will eventually get the correct operator but for now everything is add // Generates the IR of the operation //
llvm::Value* out = builder.CreateAdd(lhs, rhs); // TODO: Support other operators other than ADD //
// TODO: Make the error actually output information //
llvm::Value* out = LLVM.builder.CreateAdd(lhs, rhs);
ThrowIf<IRGenerationError>(out == nullptr); ThrowIf<IRGenerationError>(out == nullptr);
return out; return out;
} }
llvm::Value* ReturnStatement::GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) // Function for generating LLVN IR (Intermediate representation) //
llvm::Value* ReturnStatement::GenIR(InfoLLVM& LLVM)
{ {
// Checks if it is a void return //
if (m_Val == nullptr) if (m_Val == nullptr)
{ {
// Void returns are currently not implemented //
// TODO: Find out how to return nothing from a function //
ThrowIf<IRGenerationError>(true); ThrowIf<IRGenerationError>(true);
return nullptr; return nullptr;
} }
// Else it will be returning a value //
else else
{ {
llvm::Value* out = builder.CreateRet(m_Val->GenIR(context, module, builder)); // Generates the value and creates a return for it //
// TODO: Make the error actually output information //
llvm::Value* out = LLVM.builder.CreateRet(m_Val->GenIR(LLVM));
ThrowIf<IRGenerationError>(out == nullptr); ThrowIf<IRGenerationError>(out == nullptr);
return out; return out;
} }

View File

@@ -1,63 +1,59 @@
#include <Parser.h> #include <Parser.h>
#include <Util.h> #include <Util.h>
#include <AST.h>
#include <iostream> #include <iostream>
namespace LX namespace LX
{ {
// Tells the generator if the current node is allowed to be within a top-level context //
// TODO: Make this function do something other than return true
static constexpr bool IsValidTopLevelNode(AST::Node::NodeType type) static constexpr bool IsValidTopLevelNode(AST::Node::NodeType type)
{ {
return true; return true;
} }
static void GenerateFunctionIR(FunctionDefinition& funcAST, llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) // Generates the LLVM IR for the given function //
static void GenerateFunctionIR(FunctionDefinition& funcAST, InfoLLVM& LLVM)
{ {
// Creates the functions signature and return type // // Creates the functions signature and return type //
llvm::FunctionType* retType = llvm::FunctionType::get(llvm::Type::getInt32Ty(context), false); // <- Defaults to int currently llvm::FunctionType* retType = llvm::FunctionType::get(llvm::Type::getInt32Ty(LLVM.context), false); // <- Defaults to int currently
llvm::Function* func = llvm::Function::Create(retType, llvm::Function::ExternalLinkage, "main", module); // Defaults to main currently llvm::Function* func = llvm::Function::Create(retType, llvm::Function::ExternalLinkage, "main", LLVM.module); // Defaults to main currently
llvm::BasicBlock* entry = llvm::BasicBlock::Create(context, "entry", func); llvm::BasicBlock* entry = llvm::BasicBlock::Create(LLVM.context, "entry", func);
builder.SetInsertPoint(entry); LLVM.builder.SetInsertPoint(entry);
// Generates the IR within the function //
// Generates the IR within the function by looping over the nodes //
for (auto& node : funcAST.body) for (auto& node : funcAST.body)
{ {
ThrowIf<int>(IsValidTopLevelNode(node->m_Type) == false); // <- TODO: replace with actual error type ThrowIf<int>(IsValidTopLevelNode(node->m_Type) == false); // <- TODO: replace with actual error type
node->GenIR(context, module, builder); node->GenIR(LLVM);
} }
// Adds a terminator if there is none // // Adds a terminator if there is none //
if (entry->getTerminator() == nullptr) if (entry->getTerminator() == nullptr)
{ {
builder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 0, true)); LLVM.builder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVM.context), 0, true));
} }
// Verifies the function works // // Verifies the function works //
ThrowIf<int>(llvm::verifyFunction(*func), &llvm::errs()); // <- TODO: Make error type
ThrowIf<int>(llvm::verifyFunction(*func), &llvm::errs()); // <- Make error type
} }
// Turns an abstract binary tree into LLVM intermediate representation //
void GenerateIR(FileAST& ast) void GenerateIR(FileAST& ast)
{ {
// Creates the LLVM variables needed for generating IR that are shared between functions // // Creates the LLVM variables needed for generating IR that are shared between functions //
InfoLLVM LLVM("add_itns");
llvm::LLVMContext context; // Loops over the functions to generate their LLVM IR //
llvm::IRBuilder<> builder(context);
llvm::Module module("add_ints", context);
// Loops over AST to generate IR //
for (auto& func : ast.functions) for (auto& func : ast.functions)
{ {
GenerateFunctionIR(func, context, module, builder); GenerateFunctionIR(func, LLVM);
} }
// Outputs the IR to the console // // Outputs the IR to the console //
LLVM.module.print(llvm::outs(), nullptr);
module.print(llvm::outs(), nullptr);
std::cout << "Finished generating IR" << std::endl;
} }
} }

View File

@@ -1,5 +1,7 @@
#include <Parser.h> #include <Parser.h>
#include <Util.h> #include <Util.h>
#include <AST.h>
#include <iostream> #include <iostream>
@@ -8,72 +10,105 @@ namespace LX
// Local struct so everything can be public // // Local struct so everything can be public //
struct Parser struct Parser
{ {
// Passes constructor args to members //
Parser(std::vector<Token>& _tokens, std::ofstream* _log) Parser(std::vector<Token>& _tokens, std::ofstream* _log)
: tokens(_tokens), log(_log), index(0), len(_tokens.size()) : tokens(_tokens), log(_log), index(0), len(_tokens.size())
{} {}
// Tokens created by the lexer //
std::vector<Token>& tokens; std::vector<Token>& tokens;
// Log to output to (can be null) //
std::ofstream* log; std::ofstream* log;
// Length of the the token vector //
const size_t len; const size_t len;
// Current index within the token vector //
size_t index; size_t index;
}; };
// Base of the call stack to handle the simplest of tokens //
static std::unique_ptr<AST::Node> ParsePrimary(Parser& p) static std::unique_ptr<AST::Node> ParsePrimary(Parser& p)
{ {
// There are lots of possible token's that can be here so a switch is used //
switch (p.tokens[p.index].type) switch (p.tokens[p.index].type)
{ {
// Number literals just require them to be turned into an AST node //
// Note: Number literals are stored as strings because i'm a masochist //
case Token::NUMBER_LITERAL: case Token::NUMBER_LITERAL:
return std::make_unique<AST::NumberLiteral>(p.tokens[p.index].contents); return std::make_unique<AST::NumberLiteral>(p.tokens[p.index].contents);
// Default just alerts the user of an error //
// TODO: Actually make this error tell the user something useful //
default: default:
std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl; std::cout << "UNKNOWN TOKEN: " << p.tokens[p.index].type << std::endl;
return nullptr; return nullptr;
} }
} }
// Handles operations, if it is not currently at an operation goes to ParsePrimary //
static std::unique_ptr<AST::Node> ParseOperation(Parser& p) static std::unique_ptr<AST::Node> ParseOperation(Parser& p)
{ {
// Checks if the next token is an operator //
// TODO: Add more than just add //
// TODO: Make this not crash when at the end //
if (p.tokens[p.index + 1].type == Token::ADD) if (p.tokens[p.index + 1].type == Token::ADD)
{ {
// Parses the left hand side of the operation //
std::unique_ptr<AST::Node> lhs = ParsePrimary(p); std::unique_ptr<AST::Node> lhs = ParsePrimary(p);
p.index++; p.index++;
// Skips over operator (again i'm lazy) // Stores the operator to pass into the AST node //
Token::TokenType op = p.tokens[p.index].type;
p.index++; p.index++;
// Parses the right hand of the operation //
std::unique_ptr<AST::Node> rhs = ParsePrimary(p); std::unique_ptr<AST::Node> rhs = ParsePrimary(p);
return std::make_unique<AST::Operation>(std::move(lhs), Token::ADD, std::move(rhs)); // Returns an AST node as all of the components combined together //
return std::make_unique<AST::Operation>(std::move(lhs), op, std::move(rhs));
} }
// Else goes down the call stack //
return ParsePrimary(p); return ParsePrimary(p);
} }
// Handles return statements, if not calls ParseOperation //
static std::unique_ptr<AST::Node> ParseReturn(Parser& p) static std::unique_ptr<AST::Node> ParseReturn(Parser& p)
{ {
// Checks if the current token is a return //
if (p.tokens[p.index].type == Token::RETURN) if (p.tokens[p.index].type == Token::RETURN)
{ {
// If so it adds an AST node with the value being returned //
// TODO: Allow this to return nullptr //
p.index++; p.index++;
return std::make_unique<AST::ReturnStatement>(ParseOperation(p)); return std::make_unique<AST::ReturnStatement>(ParseOperation(p));
} }
// Else goes down the call stack //
return ParseOperation(p); return ParseOperation(p);
} }
static std::unique_ptr<AST::Node> Parse(Parser& p) // Helper function to call the top of the Parse-Call-Stack
static inline std::unique_ptr<AST::Node> Parse(Parser& p)
{ {
// ParseReturn is currently the topmost function in the call stack //
return ParseReturn(p); return ParseReturn(p);
} }
// Turns the tokens of a file into it's abstract syntax tree equivalent //
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log) FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log)
{ {
// Logs the start of the parsing // Logs the start of the parsing
SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK); SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK);
// Creates the output storer and the parser //
FileAST output; FileAST output;
Parser p(tokens, log); Parser p(tokens, log);
// Loops over the tokens and calls the correct parsing function //
// Which depends on their type and current state of the parser //
while (p.index < p.len) while (p.index < p.len)
{ {
switch (p.tokens[p.index].type) switch (p.tokens[p.index].type)
@@ -81,13 +116,15 @@ namespace LX
case Token::FUNCTION: case Token::FUNCTION:
{ {
// Skips over function token + name token // Skips over function token + name token
// TEMPORARY // TODO: Store function name in the type
p.index++; p.index++; p.index++; p.index++;
// Pushes a new function to the vector and gets a reference to it for adding the body //
output.functions.emplace_back(); output.functions.emplace_back();
FunctionDefinition& func = output.functions.back(); FunctionDefinition& func = output.functions.back();
// TEMPORARY casue im lazy // Loops over the body until it reaches the end //
// TODO: Detect the end instead of looping over the entire token vector
while (p.index < p.len) while (p.index < p.len)
{ {
// Actually parses the function // Actually parses the function
@@ -98,16 +135,23 @@ namespace LX
p.index++; p.index++;
} }
// Goes to the next iteration of the loop //
continue; continue;
} }
// Lets the user know there is an error //
// TODO: Makes this error actually output useful information //
default: default:
std::cout << "UNKNOWN TOKEN FOUND" << std::endl; std::cout << "UNKNOWN TOKEN FOUND" << std::endl;
} }
} }
// Logs that AST has finished parsing //
// TODO: Make this output the AST in a human-readable form //
SafeLog(log, "AST length: ", output.functions[0].body.size()); SafeLog(log, "AST length: ", output.functions[0].body.size());
// Returns the output and shrinks all uneccesarry allocated memory
output.functions.shrink_to_fit();
return output; return output;
} }
} }

View File

@@ -9,9 +9,3 @@ Unknown: 3
Token::NUMBER_LITERAL: 3 Token::NUMBER_LITERAL: 3
Token::ADD Token::ADD
Token::NUMBER_LITERAL: 56 Token::NUMBER_LITERAL: 56
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Started parsing tokens
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
AST length: 1

View File

@@ -21,5 +21,5 @@
#pragma warning(pop) #pragma warning(pop)
#else #else
#error This code only works with MSVC / VS22 #error This code is only designed to work with MSVC due to the use of vcpkg and other aspects
#endif // _MSC_VER #endif // _MSC_VER

View File

@@ -6,7 +6,7 @@
// Foward declarations of STD classes to minimise includes // // Foward declarations of STD classes to minimise includes //
namespace std namespace std
{ {
template<typename T1 = char> template<typename T1>
struct char_traits; struct char_traits;
template<typename T1, typename T2> template<typename T1, typename T2>
@@ -24,6 +24,7 @@ namespace std
namespace LX namespace LX
{ {
// Error type with index and character to alert the user that LX does not understand that symbol //
struct InvalidCharInSource struct InvalidCharInSource
{ {
std::streamsize index; std::streamsize index;

View File

@@ -2,25 +2,15 @@
// Lexer foward declares fstream components so we can use them here // // Lexer foward declares fstream components so we can use them here //
#include <Lexer.h> #include <Lexer.h>
#include <LLVM.h>
#include <memory> #include <memory>
// Foward declares all items of the llvm lib that we need // // Foward declares all items of the llvm lib that we need //
// Done to avoid including LLVM.h to shorten compile times // // Done to avoid including LLVM.h to shorten compile times //
/* namespace llvm { class Value; }
namespace llvm
{
class Value;
class LLVMContext;
class Module;
class ConstantFolder; // Foward declares the wrapper around the LLVM objects we need to pass around //
class IRBuilderDefaultInserter; namespace LX { struct InfoLLVM; }
template<typename T1 = ConstantFolder, typename T2 = IRBuilderDefaultInserter>
class IRBuilder;
}*/
// The nodes of the abstract syntax tree constructed by the parser from the tokens // // The nodes of the abstract syntax tree constructed by the parser from the tokens //
namespace LX::AST namespace LX::AST
@@ -46,16 +36,14 @@ namespace LX::AST
UNDEFINED = -1 UNDEFINED = -1
}; };
// Constructor to set the node type // // Constructor to set the node type (no others provided) //
Node(NodeType type) Node(NodeType type);
: m_Type(type)
{}
// Virtual destructor because of polymorphism // // Virtual destructor because of polymorphism //
virtual ~Node() = default; virtual ~Node() = default;
// Function for generating LLVN IR (Intermediate representation) // // Function for generating LLVN IR (Intermediate representation) //
virtual llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) = 0; virtual llvm::Value* GenIR(InfoLLVM& LLVM) = 0;
// Function for generating C/C++ code (Currently not implemented) // // Function for generating C/C++ code (Currently not implemented) //
//virtual void GenC() = 0; //virtual void GenC() = 0;
@@ -63,81 +51,36 @@ namespace LX::AST
// The type of the node // // The type of the node //
const NodeType m_Type; const NodeType m_Type;
}; };
class NumberLiteral : public Node
{
public:
// Constructor to set values and automatically set type
NumberLiteral(std::string num);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) override;
private:
// The number it stores
// Yes the number is stored as a string
// It's horrible I know
std::string m_Number;
};
//
class Operation : public Node
{
public:
// Constructor to set values and automatically set type
Operation(std::unique_ptr<Node> lhs, Token::TokenType op, std::unique_ptr<Node> rhs);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) override;
private:
// The sides of the operation
// Unary operations are handled by a different class
std::unique_ptr<Node> m_Lhs, m_Rhs;
// The operation to be applied to the two sides
Token::TokenType m_Operand;
};
//
class ReturnStatement : public Node
{
public:
// Constructor to set values and automatically set type
ReturnStatement(std::unique_ptr<Node> val);
// Function for generating LLVN IR (Intermediate representation) //
llvm::Value* GenIR(llvm::LLVMContext& context, llvm::Module& module, llvm::IRBuilder<>& builder) override;
private:
// What it is returning (can be null)
std::unique_ptr<Node> m_Val;
};
} }
namespace LX namespace LX
{ {
// Thrown if there was an error during IR Generation //
struct IRGenerationError {}; struct IRGenerationError {};
// Holds all needed info about a function //
// Currently only holds the body but in the future will hold: name, params, namespace/class-member
struct FunctionDefinition struct FunctionDefinition
{ {
FunctionDefinition() // Defualt constructor (none other given) //
: body{} FunctionDefinition();
{}
// The instructions of the body of the function //
std::vector<std::unique_ptr<AST::Node>> body; std::vector<std::unique_ptr<AST::Node>> body;
}; };
struct FileAST struct FileAST
{ {
FileAST() // Default constructor (none other given) //
: functions{} FileAST();
{}
// All the functions within this file //
std::vector<FunctionDefinition> functions; std::vector<FunctionDefinition> functions;
}; };
// Turns the tokens of a file into it's abstract syntax tree equivalent //
FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log); FileAST TurnTokensIntoAbstractSyntaxTree(std::vector<Token>& tokens, std::ofstream* log);
// Turns an abstract binary tree into LLVM intermediate representation //
void GenerateIR(FileAST& ast); void GenerateIR(FileAST& ast);
} }

View File

@@ -5,14 +5,19 @@
namespace LX namespace LX
{ {
template<typename T, typename... Args> template<typename T, typename... Args>
// Helper function to throw given error if condition is true //
// Also micro-optimises to predict there is no errors thrown //
inline void ThrowIf(const bool condition, Args... args) inline void ThrowIf(const bool condition, Args... args)
{ if (condition) [[unlikely]] { throw T(args...); }} { if (condition) [[unlikely]] { throw T(args...); }}
template<typename... Args> template<typename... Args>
// Helper function for logging //
// Only logs the given args if the log is not null //
inline void SafeLog(std::ofstream* log, Args... args) inline void SafeLog(std::ofstream* log, Args... args)
{ {
if (log != nullptr) { (*log << ... << args); *log << "\n"; } if (log != nullptr) { (*log << ... << args); *log << "\n"; }
} }
// Gives a standard way to mark a change between different sections within the log output //
constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n"; constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n";
} }