Added the token class

2025-07-19 23:02:16 +01:00
parent ab564e9649
commit c8975f0c20
5 changed files with 185 additions and 0 deletions
--- a/Lexer/inc/Lexer.h
+++ b/Lexer/inc/Lexer.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+	struct LexerContext
+	{
+		// Trackers for the Lexer itself //
+		std::string source;
+		size_t index;
+
+		// Trackers for where the Lexer is within the user version of source //
+		unsigned short column;
+		unsigned short line;
+	};
+}
--- a/Lexer/inc/Token.h
+++ b/Lexer/inc/Token.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include <LXC.h>
+
+namespace LXC::Lexer
+{
+	// Foward declaration to allow it passing to the Token class //
+	struct LexerContext;
+
+	namespace TokenClass
+	{
+		// Bitmask for different token classes //
+		enum ClassMask : unsigned short
+		{
+			// Mathematical and logic operators //
+			Operator		= 1 << (1 + 8),
+
+			// Special words defined by the compiler //
+			Keyword			= 1 << (2 + 8),
+
+			// Words such as literals and identifiers //
+			UserDefined		= 1 << (3 + 8),
+
+			// Symbols in the source like (? , . ! <) //
+			Symbols			= 1 << (4 + 8),
+
+			// Tokens not defined by previous classes //
+			Misc			= 1 << (5 + 8)
+		};
+	};
+
+	// Data type for storing the output of the lexer //
+	class Token final
+	{
+		public:
+			// Enum of token type organised by their token class //
+			enum TokenType : unsigned short
+			{
+				// === Operators === //
+
+				Add				= TokenClass::Operator,
+				Sub,
+				Mul,
+				Div,
+				Mod,
+
+				// === Keywords === //
+
+				For				= TokenClass::Keyword,
+				While,
+				If,
+				Else_If,
+				Else,
+				Return,
+
+				// === User defined === //
+
+				String_Literal	= TokenClass::UserDefined,
+				Int_Literal,
+				Float_Literal,
+				Identifier,
+
+				// === Symbols === //
+
+				Assign			= TokenClass::Symbols,
+
+				Close_bracket,
+				Open_bracket,
+
+				Close_brace,
+				Open_brace,
+
+				Close_paren,
+				Open_paren,
+
+				Comma,
+
+				// === Misc === //
+
+				End_of_file		= TokenClass::Misc,
+
+				UNDEFINED = 65535 // Invalid token type (max number)
+			};
+
+			// Util function calculating wether a token is of a given class //
+			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
+			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
+
+			// Constructor to set the data of the token //
+			Token(const LexerContext& context, const unsigned short _length, TokenType _type);
+
+			// Deconstructor to clean up the allocated memory //
+			~Token();
+
+			// Getters for the c-string to stop it being reassigned (or deleted) //
+			inline const char* const Str() const { return contents; }
+			operator const char* const() { return contents; }
+
+			// The type of the token //
+			const TokenType type;
+
+			// The length of the token //
+			const unsigned short length;
+
+			// The line the token is on (starts on 1) //
+			const unsigned short line;
+
+			// The index on the line (starts on 1) //
+			const unsigned short column;
+
+		private:
+			// The data of the token //
+			const char* contents;
+	};
+}
--- a/Lexer/src/Lexer.cpp
+++ b/Lexer/src/Lexer.cpp
@@ -1 +1,8 @@
 #include <LXC.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+}
--- a/Lexer/src/Token.cpp
+++ b/Lexer/src/Token.cpp
@@ -0,0 +1,37 @@
+#include <LXC.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+	static const char* const CopySubstrToMem(const LexerContext& context, const size_t length, Token::TokenType type) 
+	{
+		// Only user defined class tokens need to store their type //
+		if (!Token::IsTypeClass<TokenClass::UserDefined>(type))
+			return nullptr;
+
+		// Copies the memory to a c-string //
+		char* cStr = new char[length + 1];
+		std::memcpy(cStr, context.source.data() + context.index, length);
+		cStr[length] = '\0';
+
+		return cStr;
+	}
+
+	// Constructor to assign the members of the token class //
+	Token::Token(const LexerContext& context, const unsigned short _length, TokenType _type) :
+		type(_type), length(_length), line(context.line), column(context.column),
+		contents(CopySubstrToMem(context, _length, _type))
+	{}
+
+	// Destructor to clean up the memory of the token that can be allocated //
+	Token::~Token()
+	{
+		// Frees any allocated memory //
+		if (contents != nullptr)
+			delete[] contents;
+
+		contents = nullptr;
+	}
+}