From c8975f0c2095e3d54a52c8b55994d3247eaab3b3 Mon Sep 17 00:00:00 2001
From: Pasha Bibko <156938226+PashaBibko@users.noreply.github.com>
Date: Sat, 19 Jul 2025 23:02:16 +0100
Subject: [PATCH] Added the token class

---
 LXC/LXC.cpp         |   9 ++++
 Lexer/inc/Lexer.h   |  17 +++++++
 Lexer/inc/Token.h   | 115 ++++++++++++++++++++++++++++++++++++++++++++
 Lexer/src/Lexer.cpp |   7 +++
 Lexer/src/Token.cpp |  37 ++++++++++++++
 5 files changed, 185 insertions(+)
 create mode 100644 Lexer/inc/Lexer.h
 create mode 100644 Lexer/inc/Token.h
 create mode 100644 Lexer/src/Token.cpp

diff --git a/LXC/LXC.cpp b/LXC/LXC.cpp
index ee9e5fb..713a8b9 100644
--- a/LXC/LXC.cpp
+++ b/LXC/LXC.cpp
@@ -1,9 +1,18 @@
 #include <LXC.h>
 
+#include <Lexer.h>
+
 int main(int argc, char** argv)
 {
 	using namespace LXC;
 
+	//
+
+	Lexer::LexerContext context;
+	Lexer::Token exampleToken(context, 2);
+
+	//
+
 	Util::ReturnVal fileContents = Util::ReadFile("example/example.lx");
 
 	if (fileContents.Suceeded())
diff --git a/Lexer/inc/Lexer.h b/Lexer/inc/Lexer.h
new file mode 100644
index 0000000..a9e844f
--- /dev/null
+++ b/Lexer/inc/Lexer.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+	struct LexerContext
+	{
+		// Trackers for the Lexer itself //
+		std::string source;
+		size_t index;
+
+		// Trackers for where the Lexer is within the user version of source //
+		unsigned short column;
+		unsigned short line;
+	};
+}
diff --git a/Lexer/inc/Token.h b/Lexer/inc/Token.h
new file mode 100644
index 0000000..8c89f81
--- /dev/null
+++ b/Lexer/inc/Token.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include <LXC.h>
+
+namespace LXC::Lexer
+{
+	// Foward declaration to allow it passing to the Token class //
+	struct LexerContext;
+
+	namespace TokenClass
+	{
+		// Bitmask for different token classes //
+		enum ClassMask : unsigned short
+		{
+			// Mathematical and logic operators //
+			Operator		= 1 << (1 + 8),
+
+			// Special words defined by the compiler //
+			Keyword			= 1 << (2 + 8),
+
+			// Words such as literals and identifiers //
+			UserDefined		= 1 << (3 + 8),
+
+			// Symbols in the source like (? , . ! <) //
+			Symbols			= 1 << (4 + 8),
+
+			// Tokens not defined by previous classes //
+			Misc			= 1 << (5 + 8)
+		};
+	};
+
+	// Data type for storing the output of the lexer //
+	class Token final
+	{
+		public:
+			// Enum of token type organised by their token class //
+			enum TokenType : unsigned short
+			{
+				// === Operators === //
+
+				Add				= TokenClass::Operator,
+				Sub,
+				Mul,
+				Div,
+				Mod,
+
+				// === Keywords === //
+
+				For				= TokenClass::Keyword,
+				While,
+				If,
+				Else_If,
+				Else,
+				Return,
+
+				// === User defined === //
+
+				String_Literal	= TokenClass::UserDefined,
+				Int_Literal,
+				Float_Literal,
+				Identifier,
+
+				// === Symbols === //
+
+				Assign			= TokenClass::Symbols,
+
+				Close_bracket,
+				Open_bracket,
+
+				Close_brace,
+				Open_brace,
+
+				Close_paren,
+				Open_paren,
+
+				Comma,
+
+				// === Misc === //
+
+				End_of_file		= TokenClass::Misc,
+
+				UNDEFINED = 65535 // Invalid token type (max number)
+			};
+
+			// Util function calculating wether a token is of a given class //
+			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
+			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
+
+			// Constructor to set the data of the token //
+			Token(const LexerContext& context, const unsigned short _length, TokenType _type);
+
+			// Deconstructor to clean up the allocated memory //
+			~Token();
+
+			// Getters for the c-string to stop it being reassigned (or deleted) //
+			inline const char* const Str() const { return contents; }
+			operator const char* const() { return contents; }
+
+			// The type of the token //
+			const TokenType type;
+
+			// The length of the token //
+			const unsigned short length;
+
+			// The line the token is on (starts on 1) //
+			const unsigned short line;
+
+			// The index on the line (starts on 1) //
+			const unsigned short column;
+
+		private:
+			// The data of the token //
+			const char* contents;
+	};
+}
diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp
index c5cd877..21a5932 100644
--- a/Lexer/src/Lexer.cpp
+++ b/Lexer/src/Lexer.cpp
@@ -1 +1,8 @@
 #include <LXC.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+}
diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp
new file mode 100644
index 0000000..3c2138e
--- /dev/null
+++ b/Lexer/src/Token.cpp
@@ -0,0 +1,37 @@
+#include <LXC.h>
+
+#include <Lexer.h>
+#include <Token.h>
+
+namespace LXC::Lexer
+{
+	static const char* const CopySubstrToMem(const LexerContext& context, const size_t length, Token::TokenType type) 
+	{
+		// Only user defined class tokens need to store their type //
+		if (!Token::IsTypeClass<TokenClass::UserDefined>(type))
+			return nullptr;
+
+		// Copies the memory to a c-string //
+		char* cStr = new char[length + 1];
+		std::memcpy(cStr, context.source.data() + context.index, length);
+		cStr[length] = '\0';
+
+		return cStr;
+	}
+
+	// Constructor to assign the members of the token class //
+	Token::Token(const LexerContext& context, const unsigned short _length, TokenType _type) :
+		type(_type), length(_length), line(context.line), column(context.column),
+		contents(CopySubstrToMem(context, _length, _type))
+	{}
+
+	// Destructor to clean up the memory of the token that can be allocated //
+	Token::~Token()
+	{
+		// Frees any allocated memory //
+		if (contents != nullptr)
+			delete[] contents;
+
+		contents = nullptr;
+	}
+}