LXC/Lexer/inc/Token.h

#pragma once

#include <LXC.h>

namespace LXC::Lexer
{
	namespace TokenClass
	{
		// Bitmask for different token classes //
		enum ClassMask : unsigned short
		{
			// Mathematical and logic operators //
			Operator		= 1 << (1 + 8),

			// Special words defined by the compiler //
			Keyword			= 1 << (2 + 8),

			// Words such as literals and identifiers //
			UserDefined		= 1 << (3 + 8),

			// Symbols in the source like (? , . ! <) //
			Symbols			= 1 << (4 + 8),

			// Tokens not defined by previous classes //
			Misc			= 1 << (5 + 8)
		};
	};

	struct LexerContext;

	// Data type for storing the output of the lexer //
	class Token final
	{
		public:
			// Enum of token type organised by their token class //
			enum TokenType : unsigned short
			{
				// === Operators === //

				Add				= TokenClass::Operator,
				Sub,
				Mul,
				Div,
				Mod,

				// === Keywords === //

				For				= TokenClass::Keyword,
				While,
				If,
				Else_If,
				Else,
				Return,

				// === User defined === //

				String_Literal	= TokenClass::UserDefined,
				Num_Literal,
				Identifier,

				// === Symbols === //

				Assign			= TokenClass::Symbols,

				Close_bracket,
				Open_bracket,

				Close_brace,
				Open_brace,

				Close_paren,
				Open_paren,

				Comma,

				// === Misc === //

				End_of_file		= TokenClass::Misc,

				UNDEFINED = 65535 // Invalid token type (max number)
			};

			// Util function calculating wether a token is of a given class //
			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
			template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }

			// Constructor to set the data of the token for more complex token types //
			Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type);

			// Deconstructor to clean up the allocated memory //
			~Token();

			// Getters for the c-string to stop it being reassigned (or deleted) //
			inline const char* const Str() const { return contents; }

			// Outputs all the relevant infomration in a string for logging purposes //
			std::string LogStr() const;

			// The type of the token //
			const TokenType type;

			// The length of the token //
			const unsigned short length;

			// Start index of the token //
			const unsigned __int32 index;

		private:
			// The data of the token //
			char* contents;
	};

	// Typedef for the output type of how the Lexer outputs //
	typedef std::vector<Token> LexerOutput;
}