#include #include #include #include namespace LX { // Local struct so everything can be public // struct Parser { // Passes constructor args to members // Parser(std::vector& _tokens, std::ofstream* _log) : tokens(_tokens), log(_log), index(0), len(_tokens.size()), scopeDepth(0) {} // Tokens created by the lexer // std::vector& tokens; // Log to output to (can be null) // std::ofstream* log; // Length of the the token vector // const size_t len; // Current index within the token vector // size_t index; // Current scope depth // size_t scopeDepth; }; // Base of the call stack to handle the simplest of tokens // static std::unique_ptr ParsePrimary(Parser& p) { // There are lots of possible token's that can be here so a switch is used // switch (p.tokens[p.index].type) { // Number literals just require them to be turned into an AST node // // Note: Number literals are stored as strings because i'm a masochist // case Token::NUMBER_LITERAL: return std::make_unique(p.tokens[p.index++].GetContents()); // If an Identifier has got here it means a variable is being accessed // case Token::IDENTIFIER: return std::make_unique(p.tokens[p.index++].GetContents()); // TODO: Fix this // case Token::OPEN_BRACKET: p.scopeDepth++; p.index++; return nullptr; // TODO: Fix this // case Token::CLOSE_BRACE: ThrowIf(p.scopeDepth == 0, Token::UNDEFINED, "need a different error", p.tokens[p.index]); p.scopeDepth--; p.index++; return nullptr; // Returns nullptr, the parsing function that recives that value will decide if that is valid // default: p.index++; return nullptr; } } // Handles operations, if it is not currently at an operation goes to ParsePrimary // static std::unique_ptr ParseOperation(Parser& p) { // Checks if the next token is an operator // // TODO: Add more than just add // if (p.index + 1 < p.len) [[likely]] { if (p.tokens[p.index + 1].type == Token::ADD) { // Parses the left hand side of the operation // std::unique_ptr lhs = ParsePrimary(p); ThrowIf(lhs == nullptr, Token::UNDEFINED, "value", p.tokens[p.index - 1]); // Stores the operator to pass into the AST node // Token::TokenType op = p.tokens[p.index].type; p.index++; // Parses the right hand of the operation // std::unique_ptr rhs = ParseOperation(p); ThrowIf(rhs == nullptr, Token::UNDEFINED, "value", p.tokens[p.index - 1]); // Returns an AST node as all of the components combined together // return std::make_unique(std::move(lhs), op, std::move(rhs)); } } // Else goes down the call stack // return ParsePrimary(p); } // Handles return statements, if not calls ParseOperation // static std::unique_ptr ParseReturn(Parser& p) { // Checks if the current token is a return // if (p.tokens[p.index].type == Token::RETURN) { // If so it adds an AST node with the value being returned // // Does not mind if this returns nullptr as that just means nothing was returned // p.index++; return std::make_unique(ParseOperation(p)); } // Else goes down the call stack // return ParseOperation(p); } // Handles variable declarations, if not calls ParseReturn // static std::unique_ptr ParseVarDeclaration(Parser& p) { // Checks if the current token is a declaration // if (p.tokens[p.index].type == Token::INT_DEC) { // Skips over the dec token // p.index++; // Checks for the variable name // ThrowIf(p.tokens[p.index].type != Token::IDENTIFIER, Token::IDENTIFIER, "", p.tokens[p.index]); std::string name = p.tokens[p.index].GetContents(); p.index++; // <- Goes over the identifier token // Returns the declaration if there is no default assignment to the variable // if (p.tokens[p.index].type != Token::ASSIGN) { // Creates the variable name from the contents of the token and returns it // return std::make_unique(name); } p.index++; // Skips over Token::ASSIGN // Gets the value to be assigned to the variable // std::unique_ptr defaultVal = ParsePrimary(p); ThrowIf(defaultVal.get() == nullptr, Token::UNDEFINED, "value", p.tokens[p.index - 1]); return std::make_unique(name); } // Else goes down the call stack // return ParseReturn(p); } // Handles variable assignments, if not calls ParseVarDeclaration // static std::unique_ptr ParseVarAssignment(Parser& p) { // Checks if the next token is an equals // if (p.index + 1 < p.len) [[likely]] { if (p.tokens[p.index + 1].type == Token::ASSIGN) { // Gets the variable that is being assigned too // ThrowIf(p.tokens[p.index].type != Token::IDENTIFIER, Token::IDENTIFIER, "", p.tokens[p.index]); std::string name = p.tokens[p.index].GetContents(); // Skips over the assign token and name of the variable // p.index = p.index + 2; // Gets the value that is being assigned // std::unique_ptr value = ParseOperation(p); // Returns an AST node of the variable assignment with it's name and value // return std::make_unique(name, std::move(value)); } } // Else goes down the call stack // return ParseVarDeclaration(p); } // Helper function to call the top of the Parse-Call-Stack // static inline std::unique_ptr Parse(Parser& p) { // Parses the current token // std::unique_ptr out = ParseVarAssignment(p); // Checks it is valid before returning // ThrowIf(out == nullptr, Token::UNDEFINED, "top level statement", p.tokens[p.index - 1]); return out; } // Turns the tokens of a file into it's abstract syntax tree equivalent // FileAST TurnTokensIntoAbstractSyntaxTree(std::vector& tokens, std::ofstream* log) { // Logs the start of the parsing SafeLog(log, LOG_BREAK, "Started parsing tokens", LOG_BREAK); // Creates the output storer and the parser // FileAST output; Parser p(tokens, log); // Loops over the tokens and calls the correct parsing function // // Which depends on their type and current state of the parser // while (p.index < p.len) { switch (p.tokens[p.index].type) { case Token::FUNCTION: { // Skips over function token // p.index++; // Pushes a new function to the vector and gets a reference to it for adding the body // output.functions.emplace_back(); FunctionDefinition& func = output.functions.back(); // Assigns the function name // ThrowIf(p.tokens[p.index].type != Token::IDENTIFIER, Token::IDENTIFIER, "", p.tokens[p.index]); func.name = p.tokens[p.index++].GetContents(); // Checks for opening paren '(' // ThrowIf(p.tokens[p.index].type != Token::OPEN_PAREN, Token::OPEN_PAREN, "", p.tokens[p.index]); p.index++; // Loops over all the arguments of the function // // TODO: Do something with the parameters while (p.index < p.len && (p.tokens[p.index].type == Token::CLOSE_PAREN) == false) { p.index++; } // Skips over close bracket // p.index++; // Checks for opening bracket '{' // ThrowIf(p.tokens[p.index].type != Token::OPEN_BRACKET, Token::OPEN_BRACKET, "", p.tokens[p.index]); p.index++; // Loops over the body until it reaches the end // while (p.index < p.len && (p.tokens[p.index].type == Token::CLOSE_BRACKET && p.scopeDepth == 0) == false) { // Actually parses the function // std::unique_ptr node = Parse(p); // Expands the node if it contains multiple // if (node->m_Type == AST::Node::MULTI_NODE) { for (std::unique_ptr& containedNode : ((AST::MultiNode*)node.get())->nodes) { // Logs the node to the log // if (log != nullptr) { node->Log(log, 0); } // Adds it to the vector // func.body.push_back(std::move(containedNode)); } } // Else adds the singular node to the vector // else { // Logs the node to the log // if (log != nullptr) { node->Log(log, 0); } // Adds it to the vector // func.body.push_back(std::move(node)); } } // Skips over closing bracket // p.index++; // Goes to the next iteration of the loop // continue; } // Lets the user know there is an error // // TODO: Makes this error actually output useful information // default: std::cout << "UNKNOWN TOKEN FOUND: " << ToString(p.tokens[p.index].type) << std::endl; return output; } } // Logs that AST has finished parsing // // TODO: Make this output the AST in a human-readable form // SafeLog(log, "AST length: ", output.functions[0].body.size()); // Returns the output and shrinks all uneccesarry allocated memory output.functions.shrink_to_fit(); return output; } }