Added basic lexer
This commit is contained in:
@@ -11,10 +11,10 @@ namespace LXC::Lexer
|
||||
|
||||
// Trackers for the Lexer itself //
|
||||
const std::string& source;
|
||||
size_t index;
|
||||
__int32 index;
|
||||
|
||||
LexerOutput out;
|
||||
const size_t len;
|
||||
const __int32 len;
|
||||
|
||||
// Trackers for where the Lexer is within the user version of source //
|
||||
unsigned short column;
|
||||
|
||||
@@ -4,9 +4,6 @@
|
||||
|
||||
namespace LXC::Lexer
|
||||
{
|
||||
// Foward declaration to allow it passing to the Token class //
|
||||
struct LexerContext;
|
||||
|
||||
namespace TokenClass
|
||||
{
|
||||
// Bitmask for different token classes //
|
||||
@@ -29,6 +26,8 @@ namespace LXC::Lexer
|
||||
};
|
||||
};
|
||||
|
||||
struct LexerContext;
|
||||
|
||||
// Data type for storing the output of the lexer //
|
||||
class Token final
|
||||
{
|
||||
@@ -56,8 +55,7 @@ namespace LXC::Lexer
|
||||
// === User defined === //
|
||||
|
||||
String_Literal = TokenClass::UserDefined,
|
||||
Int_Literal,
|
||||
Float_Literal,
|
||||
Num_Literal,
|
||||
Identifier,
|
||||
|
||||
// === Symbols === //
|
||||
@@ -86,8 +84,8 @@ namespace LXC::Lexer
|
||||
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(TokenType type) { return type & mask; }
|
||||
template<TokenClass::ClassMask mask> static constexpr bool IsTypeClass(Token token) { return token.type & mask; }
|
||||
|
||||
// Constructor to set the data of the token //
|
||||
Token(const LexerContext& context, const unsigned short _length, TokenType _type);
|
||||
// Constructor to set the data of the token for more complex token types //
|
||||
Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type);
|
||||
|
||||
// Deconstructor to clean up the allocated memory //
|
||||
~Token();
|
||||
@@ -102,15 +100,12 @@ namespace LXC::Lexer
|
||||
// The length of the token //
|
||||
const unsigned short length;
|
||||
|
||||
// The line the token is on (starts on 1) //
|
||||
const unsigned short line;
|
||||
|
||||
// The index on the line (starts on 1) //
|
||||
const unsigned short column;
|
||||
// Start index of the token //
|
||||
const unsigned __int32 index;
|
||||
|
||||
private:
|
||||
// The data of the token //
|
||||
const char* contents;
|
||||
char* contents;
|
||||
};
|
||||
|
||||
// Typedef for the output type of how the Lexer outputs //
|
||||
|
||||
@@ -5,22 +5,97 @@
|
||||
|
||||
namespace LXC::Lexer
|
||||
{
|
||||
static constexpr bool IsNumeric(const char c)
|
||||
{
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
static constexpr bool IsAlpha(const char c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
LexerContext::LexerContext(const std::string& _source) :
|
||||
source(_source), index(0), out{}, len(_source.length()), column(0), line(0)
|
||||
source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
|
||||
{}
|
||||
|
||||
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
|
||||
{
|
||||
// Creates the context for the lexer //
|
||||
LexerContext context(fileContents);
|
||||
LexerContext ctx(fileContents);
|
||||
|
||||
while (context.index > context.len)
|
||||
struct
|
||||
{
|
||||
bool inStrLiteral = false;
|
||||
bool inIdentifier = false;
|
||||
bool inNumLiteral = false;
|
||||
|
||||
bool inComment = false;
|
||||
|
||||
unsigned __int32 sectionStart = 0;
|
||||
|
||||
} trackers;
|
||||
|
||||
while (ctx.index > ctx.len)
|
||||
{
|
||||
// The current char within the source that is being lexed //
|
||||
const char current = ctx.source[ctx.index];
|
||||
const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
|
||||
|
||||
// === Comments === //
|
||||
if (current == '#')
|
||||
trackers.inComment = !trackers.inComment;
|
||||
|
||||
else if (trackers.inComment) {} // Contents of comments are skipped over
|
||||
|
||||
// === String literals === //
|
||||
else if (current == '"')
|
||||
{
|
||||
// Updates trackers //
|
||||
trackers.inStrLiteral = !trackers.inStrLiteral;
|
||||
trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
|
||||
|
||||
// Creates the token (if at the end of the string literal) //
|
||||
if (!trackers.inStrLiteral)
|
||||
ctx.out.push_back({ ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart), Token::String_Literal });
|
||||
|
||||
} else if (trackers.inStrLiteral) {}
|
||||
|
||||
// === Numbers === //
|
||||
else if (IsNumeric(current))
|
||||
{
|
||||
// Updates trackers //
|
||||
trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
|
||||
trackers.inNumLiteral = true;
|
||||
|
||||
// Checks for the end of the number literal to create the token //
|
||||
if (!IsNumeric(next)) _UNLIKELY
|
||||
{
|
||||
ctx.out.push_back({ ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart), Token::Num_Literal });
|
||||
trackers.inNumLiteral = false;
|
||||
}
|
||||
}
|
||||
|
||||
// === Words === //
|
||||
else if (IsAlpha(current))
|
||||
{
|
||||
// Updates trackers //
|
||||
trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
|
||||
trackers.inIdentifier = true;
|
||||
|
||||
// Checks for the end of the word to create the token //
|
||||
if (!IsAlpha(next)) _UNLIKELY
|
||||
{
|
||||
ctx.out.push_back({ ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart), Token::Identifier });
|
||||
trackers.inIdentifier = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Iterates to the next index //
|
||||
context.column++;
|
||||
context.index++;
|
||||
ctx.column++;
|
||||
ctx.index++;
|
||||
}
|
||||
|
||||
return context.out;
|
||||
return ctx.out;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,25 +5,19 @@
|
||||
|
||||
namespace LXC::Lexer
|
||||
{
|
||||
static const char* const CopySubstrToMem(const LexerContext& context, const size_t length, Token::TokenType type)
|
||||
{
|
||||
// Only user defined class tokens need to store their type //
|
||||
if (!Token::IsTypeClass<TokenClass::UserDefined>(type))
|
||||
return nullptr;
|
||||
|
||||
// Copies the memory to a c-string //
|
||||
char* cStr = new char[length + 1];
|
||||
std::memcpy(cStr, context.source.data() + context.index, length);
|
||||
cStr[length] = '\0';
|
||||
|
||||
return cStr;
|
||||
}
|
||||
|
||||
// Constructor to assign the members of the token class //
|
||||
Token::Token(const LexerContext& context, const unsigned short _length, TokenType _type) :
|
||||
type(_type), length(_length), line(context.line), column(context.column),
|
||||
contents(CopySubstrToMem(context, _length, _type))
|
||||
{}
|
||||
Token::Token(const LexerContext& ctx, unsigned __int32 start, unsigned short len, TokenType _type) :
|
||||
type(_type), length(len), index(start), contents(nullptr)
|
||||
{
|
||||
// Only user defined class tokens need to store c-string //
|
||||
if (Token::IsTypeClass<TokenClass::UserDefined>(type))
|
||||
{
|
||||
// Copies the memory to a c-string //
|
||||
contents = new char[len + 1]; // +1 for null terminator
|
||||
std::memcpy(contents, ctx.source.data() + start, len);
|
||||
contents[len] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
// Destructor to clean up the memory of the token that can be allocated //
|
||||
Token::~Token()
|
||||
|
||||
Reference in New Issue
Block a user