Files
MIRROR-LX-OriginalRepo/IR-Generator/inc/Lexer.h
2025-05-05 16:45:34 +01:00

133 lines
3.1 KiB
C++

#pragma once
#include <LX-Common.h>
// This file contains everything that is exported from Lexer.lib
// The rest of the items within the Lexer project are internal only
namespace LX
{
// Error type with index and character to alert the user that LX does not understand that symbol //
struct InvalidCharInSource : public RuntimeError
{
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid);
static std::string* s_Source;
static std::filesystem::path* s_SourceFile;
std::streamsize col;
std::streamsize line;
std::streamsize index;
char invalid;
};
// Struct to store the current information of the lexer //
struct LexerInfo
{
// Current trackers of where in the source it is //
std::streamsize line = 1; // <- Lines start on 1 (probably because of non-programmer's)
std::streamsize index = 0;
std::streamsize column = 0; // <- Columns start on 1 (probably because of non-programmer's)
// Trackers for when a multi-char token started //
std::streamsize startOfWord = 0;
std::streamsize startOfNumberLiteral = 0;
std::streamsize startOfStringLiteral = 0;
// Different flags of the lexer //
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
bool isAlpha : 1 = false;
bool isNumeric : 1 = false;
bool inComment : 1 = false;
bool inStringLiteral : 1 = false;
bool isNextCharAlpha : 1 = false;
bool isNextCharNumeric : 1 = false;
bool wasLastCharAlpha : 1 = false;
bool wasLastCharNumeric : 1 = false;
bool lexingNumber : 1 = false;
};
// Data type to store a more computer readable version of files
struct __declspec(novtable) Token final
{
//
static std::string* source;
// Enum to hold the type of the token //
enum TokenType : short
{
// General tokens //
STRING_LITERAL,
NUMBER_LITERAL,
IDENTIFIER,
RETURN,
// Built-in types //
INT_DEC,
// Symbols //
OPEN_BRACKET, CLOSE_BRACKET,
OPEN_BRACE, CLOSE_BRACE,
OPEN_PAREN, CLOSE_PAREN,
COMMA,
//
ASSIGN,
// Operators //
ADD, SUB, MUL, DIV,
// Keywords //
FOR, WHILE,
IF, ELSE, ELIF,
FUNCTION,
// You made a mistake somehow //
UNDEFINED = -1
};
// Constructor of the tokens to set their info //
Token(const TokenType _type, const LexerInfo& info, std::streamsize _length);
//
std::string GetContents() const;
// Type of the token //
// Const to avoid external changes //
const TokenType type;
// Index in the source of the token //
const std::streamsize index;
// The length of the token on the line //
const std::streamsize length;
// The line the token is located on //
const std::streamsize line;
// The column on the line where it is located //
const std::streamsize column;
};
// Logging function to turn a tokentype enum val into it's string //
std::string ToString(Token::TokenType t);
// Lexer function to take in a file and output a vector of tokens //
const std::vector<Token> LexicalAnalyze(const std::string& contents, const std::streamsize len, std::ofstream* log);
}