LXC/Lexer/inc/Token.h

#pragma once

#include <Util.h>

namespace PashaBibko::LXC::Lexer
{
    namespace TokenClass
    {
        // Bitmask for different token classes //
        enum ClassMask : unsigned short
        {
            // Mathematical and logic operators //
            Operator		= 1 << (1 + 8),

            // Special words defined by the compiler //
            Keyword			= 1 << (2 + 8),

            // Words such as literals and identifiers //
            UserDefined		= 1 << (3 + 8),

            // Symbols in the source like (? , . ! <) //
            Symbols			= 1 << (4 + 8),

            // Tokens not defined by previous classes //
            Misc			= 1 << (5 + 8)
        };
    };

    struct LexerContext;

    // Data type for storing the output of the lexer //
    class Token final
    {
        public:
            // Enum of token type organised by their token class //
            enum TokenType : unsigned short
            {
                // === Operators === //

                Add				= TokenClass::Operator,
                Sub,
                Mul,
                Div,
                Mod,

                Eql,

                // === Keywords === //

                For				= TokenClass::Keyword,
                While,
                If,
                ElseIf,
                Else,
                Return,

                FunctionDef,

                // === User defined === //

                StringLiteral	= TokenClass::UserDefined,
                NumLiteral,
                Identifier,

                // === Symbols === //

                Assign			= TokenClass::Symbols,
                Colon,

                CloseBracket,
                OpenBracket,

                CloseBrace,
                OpenBrace,

                CloseParen,
                OpenParen,

                CloseCrocodile,
                OpenCrocodile,

                Comma,

                // === Misc === //

                End_of_file		= TokenClass::Misc,

                UNDEFINED = 65535 // Invalid token type (max number)
            };

            // Util function calculating wether a token is of a given class //
            template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type)
	        {
		        using T = std::underlying_type_t<TokenType>;
	            return static_cast<T>(type) & static_cast<T>(mask);
	        }

            // Constructor to set the data of the token for more complex token types //
            Token(const LexerContext& ctx, uint32_t start, unsigned short len, TokenType _type);

            // Copy constructor //
            Token(const Token& other);

            // Move constructor (transfers memory allocated) //
            Token(Token&& other) noexcept;

            // Cannot use these as members are const //
            Token& operator=(const Token&) = delete;
            Token& operator=(Token&&) = delete;

            // Deconstructor to clean up the allocated memory //
            ~Token();

            // Getters for the c-string to stop it being reassigned (or deleted) //
            inline const char* const Str() const { return contents; }

            // Outputs all the relevant infomration in a string for logging purposes //
            std::string LogStr() const;

            // The type of the token //
            const TokenType type;

            // The length of the token //
            const unsigned short length;

            // Start index of the token //
            const uint32_t index;

        private:
            // The data of the token //
            char* contents;
    };

    // Typedef for the output type of how the Lexer outputs //
    typedef std::vector<Token> LexerOutput;
}