mirror of
https://github.com/PashaBibko/LX.git
synced 2026-04-03 17:39:02 +00:00
133 lines
3.1 KiB
C++
133 lines
3.1 KiB
C++
#pragma once
|
|
|
|
#include <LX-Common.h>
|
|
|
|
// This file contains everything that is exported from Lexer.lib
|
|
// The rest of the items within the Lexer project are internal only
|
|
|
|
namespace LX
|
|
{
|
|
// Error type with index and character to alert the user that LX does not understand that symbol //
|
|
struct InvalidCharInSource : public RuntimeError
|
|
{
|
|
GENERATE_LX_ERROR_REQUIRED_FUNCTION_DECLARATIONS;
|
|
|
|
InvalidCharInSource(std::streamsize _col, std::streamsize _line, std::streamsize _index, char _invalid);
|
|
|
|
static std::string* s_Source;
|
|
static std::filesystem::path* s_SourceFile;
|
|
|
|
std::streamsize col;
|
|
std::streamsize line;
|
|
std::streamsize index;
|
|
|
|
char invalid;
|
|
};
|
|
|
|
// Struct to store the current information of the lexer //
|
|
struct LexerInfo
|
|
{
|
|
// Current trackers of where in the source it is //
|
|
|
|
std::streamsize line = 1; // <- Lines start on 1 (probably because of non-programmer's)
|
|
std::streamsize index = 0;
|
|
std::streamsize column = 0; // <- Columns start on 1 (probably because of non-programmer's)
|
|
|
|
// Trackers for when a multi-char token started //
|
|
|
|
std::streamsize startOfWord = 0;
|
|
std::streamsize startOfNumberLiteral = 0;
|
|
std::streamsize startOfStringLiteral = 0;
|
|
|
|
// Different flags of the lexer //
|
|
// Stored as a bitset to minimse memory allocated (basically no difference, because only one exists at any given time) //
|
|
|
|
bool isAlpha : 1 = false;
|
|
bool isNumeric : 1 = false;
|
|
bool inComment : 1 = false;
|
|
bool inStringLiteral : 1 = false;
|
|
bool isNextCharAlpha : 1 = false;
|
|
bool isNextCharNumeric : 1 = false;
|
|
bool wasLastCharAlpha : 1 = false;
|
|
bool wasLastCharNumeric : 1 = false;
|
|
bool lexingNumber : 1 = false;
|
|
};
|
|
|
|
// Data type to store a more computer readable version of files
|
|
struct __declspec(novtable) Token final
|
|
{
|
|
//
|
|
static std::string* source;
|
|
|
|
// Enum to hold the type of the token //
|
|
enum TokenType : short
|
|
{
|
|
// General tokens //
|
|
|
|
STRING_LITERAL,
|
|
NUMBER_LITERAL,
|
|
IDENTIFIER,
|
|
RETURN,
|
|
|
|
// Built-in types //
|
|
|
|
INT_DEC,
|
|
|
|
// Symbols //
|
|
|
|
OPEN_BRACKET, CLOSE_BRACKET,
|
|
OPEN_BRACE, CLOSE_BRACE,
|
|
OPEN_PAREN, CLOSE_PAREN,
|
|
|
|
COMMA,
|
|
|
|
//
|
|
|
|
ASSIGN,
|
|
|
|
// Operators //
|
|
|
|
ADD, SUB, MUL, DIV,
|
|
|
|
// Keywords //
|
|
|
|
FOR, WHILE,
|
|
IF, ELSE, ELIF,
|
|
|
|
FUNCTION,
|
|
|
|
// You made a mistake somehow //
|
|
|
|
UNDEFINED = -1
|
|
};
|
|
|
|
// Constructor of the tokens to set their info //
|
|
Token(const TokenType _type, const LexerInfo& info, std::streamsize _length);
|
|
|
|
//
|
|
std::string GetContents() const;
|
|
|
|
// Type of the token //
|
|
// Const to avoid external changes //
|
|
const TokenType type;
|
|
|
|
// Index in the source of the token //
|
|
const std::streamsize index;
|
|
|
|
// The length of the token on the line //
|
|
const std::streamsize length;
|
|
|
|
// The line the token is located on //
|
|
const std::streamsize line;
|
|
|
|
// The column on the line where it is located //
|
|
const std::streamsize column;
|
|
};
|
|
|
|
// Logging function to turn a tokentype enum val into it's string //
|
|
std::string ToString(Token::TokenType t);
|
|
|
|
// Lexer function to take in a file and output a vector of tokens //
|
|
const std::vector<Token> LexicalAnalyze(const std::string& contents, const std::streamsize len, std::ofstream* log);
|
|
}
|