|
|
|
|
@@ -5,116 +5,171 @@
|
|
|
|
|
|
|
|
|
|
namespace LXC::Internal
|
|
|
|
|
{
|
|
|
|
|
static constexpr bool IsNumeric(const char c)
|
|
|
|
|
{
|
|
|
|
|
return c >= '0' && c <= '9';
|
|
|
|
|
}
|
|
|
|
|
static constexpr bool IsNumeric(const char c)
|
|
|
|
|
{
|
|
|
|
|
return c >= '0' && c <= '9';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static constexpr bool IsAlpha(const char c)
|
|
|
|
|
{
|
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
|
|
}
|
|
|
|
|
static constexpr bool IsAlpha(const char c)
|
|
|
|
|
{
|
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static constexpr bool IsWhitespace(const char c)
|
|
|
|
|
{
|
|
|
|
|
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
|
|
|
|
}
|
|
|
|
|
static constexpr bool IsWhitespace(const char c)
|
|
|
|
|
{
|
|
|
|
|
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static constexpr bool IsSymbolOrOperator(const char c)
|
|
|
|
|
{
|
|
|
|
|
return
|
|
|
|
|
c == '+' || c == '-' ||
|
|
|
|
|
c == '*' || c == '/' ||
|
|
|
|
|
c == '%' || c == '=' ||
|
|
|
|
|
c == ',' || c == '[' ||
|
|
|
|
|
c == ']' || c == '{' ||
|
|
|
|
|
c == '}' || c == '(' ||
|
|
|
|
|
c == ')';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const std::unordered_map<std::string_view, Lexer::Token::TokenType> symbolAndOpMap =
|
|
|
|
|
{
|
|
|
|
|
{ "+", Lexer::Token::Add },
|
|
|
|
|
{ "-", Lexer::Token::Sub },
|
|
|
|
|
{ "*", Lexer::Token::Mul },
|
|
|
|
|
{ "/", Lexer::Token::Div },
|
|
|
|
|
{ "%", Lexer::Token::Mod },
|
|
|
|
|
|
|
|
|
|
{ "=", Lexer::Token::Assign },
|
|
|
|
|
{ ",", Lexer::Token::Comma },
|
|
|
|
|
|
|
|
|
|
{ "[", Lexer::Token::CloseBracket },
|
|
|
|
|
{ "]", Lexer::Token::OpenBracket },
|
|
|
|
|
|
|
|
|
|
{ "{", Lexer::Token::CloseBrace },
|
|
|
|
|
{ "}", Lexer::Token::OpenBrace },
|
|
|
|
|
|
|
|
|
|
{ ")", Lexer::Token::CloseParen },
|
|
|
|
|
{ "(", Lexer::Token::OpenParen }
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace LXC::Lexer
|
|
|
|
|
{
|
|
|
|
|
LexerContext::LexerContext(const std::string& _source) :
|
|
|
|
|
source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
|
|
|
|
|
{}
|
|
|
|
|
LexerContext::LexerContext(const std::string& _source) :
|
|
|
|
|
source(_source), index(0), out{}, len((__int32)_source.length()), column(0), line(0)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
|
|
|
|
|
{
|
|
|
|
|
// Creates the context for the lexer //
|
|
|
|
|
LexerContext ctx(fileContents);
|
|
|
|
|
Util::ReturnVal<LexerOutput, LexerError> TokenizeFile(const std::string& fileContents)
|
|
|
|
|
{
|
|
|
|
|
// Creates the context for the lexer //
|
|
|
|
|
LexerContext ctx(fileContents);
|
|
|
|
|
|
|
|
|
|
struct
|
|
|
|
|
{
|
|
|
|
|
bool inStrLiteral = false;
|
|
|
|
|
bool inIdentifier = false;
|
|
|
|
|
bool inNumLiteral = false;
|
|
|
|
|
struct
|
|
|
|
|
{
|
|
|
|
|
bool inStrLiteral = false;
|
|
|
|
|
bool inIdentifier = false;
|
|
|
|
|
bool inNumLiteral = false;
|
|
|
|
|
bool inSymbolOrOp = false;
|
|
|
|
|
|
|
|
|
|
bool inComment = false;
|
|
|
|
|
bool inComment = false;
|
|
|
|
|
|
|
|
|
|
unsigned __int32 sectionStart = 0;
|
|
|
|
|
unsigned __int32 sectionStart = 0;
|
|
|
|
|
|
|
|
|
|
} trackers;
|
|
|
|
|
} trackers;
|
|
|
|
|
|
|
|
|
|
while (ctx.index < ctx.len)
|
|
|
|
|
{
|
|
|
|
|
// The current char within the source that is being lexed //
|
|
|
|
|
const char current = ctx.source[ctx.index];
|
|
|
|
|
const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
|
|
|
|
|
while (ctx.index < ctx.len)
|
|
|
|
|
{
|
|
|
|
|
// The current char within the source that is being lexed //
|
|
|
|
|
const char current = ctx.source[ctx.index];
|
|
|
|
|
const char next = (ctx.index + 1 < ctx.len) ? ctx.source[ctx.index + 1] : '\0';
|
|
|
|
|
|
|
|
|
|
// === Comments === //
|
|
|
|
|
if (current == '#')
|
|
|
|
|
trackers.inComment = !trackers.inComment;
|
|
|
|
|
// === Comments === //
|
|
|
|
|
if (current == '#')
|
|
|
|
|
trackers.inComment = !trackers.inComment;
|
|
|
|
|
|
|
|
|
|
else if (trackers.inComment) {} // Contents of comments are skipped over
|
|
|
|
|
else if (trackers.inComment) {} // Contents of comments are skipped over
|
|
|
|
|
|
|
|
|
|
// === String literals === //
|
|
|
|
|
else if (current == '"')
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.inStrLiteral = !trackers.inStrLiteral;
|
|
|
|
|
trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
|
|
|
|
|
// === String literals === //
|
|
|
|
|
else if (current == '"')
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.inStrLiteral = !trackers.inStrLiteral;
|
|
|
|
|
trackers.sectionStart = trackers.inStrLiteral ? ctx.index : trackers.sectionStart;
|
|
|
|
|
|
|
|
|
|
// Creates the token (if at the end of the string literal) //
|
|
|
|
|
if (!trackers.inStrLiteral)
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
|
|
|
|
|
|
|
|
|
|
} else if (trackers.inStrLiteral) {}
|
|
|
|
|
// Creates the token (if at the end of the string literal) //
|
|
|
|
|
if (!trackers.inStrLiteral)
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart + 1, (USHORT)(ctx.index - trackers.sectionStart - 1), Token::StringLiteral);
|
|
|
|
|
|
|
|
|
|
} else if (trackers.inStrLiteral) {}
|
|
|
|
|
|
|
|
|
|
// === Numbers === //
|
|
|
|
|
else if (Internal::IsNumeric(current))
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
|
|
|
|
|
trackers.inNumLiteral = true;
|
|
|
|
|
// === Numbers === //
|
|
|
|
|
else if (Internal::IsNumeric(current))
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.sectionStart = trackers.inNumLiteral ? trackers.sectionStart : ctx.index;
|
|
|
|
|
trackers.inNumLiteral = true;
|
|
|
|
|
|
|
|
|
|
// Checks for the end of the number literal to create the token //
|
|
|
|
|
if (!Internal::IsNumeric(next)) _UNLIKELY
|
|
|
|
|
{
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
|
|
|
|
|
trackers.inNumLiteral = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Checks for the end of the number literal to create the token //
|
|
|
|
|
if (!Internal::IsNumeric(next)) _UNLIKELY
|
|
|
|
|
{
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::NumLiteral);
|
|
|
|
|
trackers.inNumLiteral = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// === Words === //
|
|
|
|
|
else if (Internal::IsAlpha(current))
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
|
|
|
|
|
trackers.inIdentifier = true;
|
|
|
|
|
// === Words === //
|
|
|
|
|
else if (Internal::IsAlpha(current))
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.sectionStart = trackers.inIdentifier ? trackers.sectionStart : ctx.index;
|
|
|
|
|
trackers.inIdentifier = true;
|
|
|
|
|
|
|
|
|
|
// Checks for the end of the word to create the token //
|
|
|
|
|
if (!Internal::IsAlpha(next)) _UNLIKELY
|
|
|
|
|
{
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
|
|
|
|
|
trackers.inIdentifier = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Checks for the end of the word to create the token //
|
|
|
|
|
if (!Internal::IsAlpha(next)) _UNLIKELY
|
|
|
|
|
{
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), Token::Identifier);
|
|
|
|
|
trackers.inIdentifier = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// === Whitespace === //
|
|
|
|
|
else if (Internal::IsWhitespace(current)) {}
|
|
|
|
|
// === Symbols/Operators === //
|
|
|
|
|
else if (Internal::IsSymbolOrOperator(current))
|
|
|
|
|
{
|
|
|
|
|
// Updates trackers //
|
|
|
|
|
trackers.sectionStart = trackers.inSymbolOrOp ? trackers.sectionStart : ctx.index;
|
|
|
|
|
trackers.inSymbolOrOp = true;
|
|
|
|
|
|
|
|
|
|
// If an if-statement has not been triggered the character must be invalid //
|
|
|
|
|
else
|
|
|
|
|
return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
|
|
|
|
|
// Checks for the end of the symbol or operator //
|
|
|
|
|
if (!Internal::IsSymbolOrOperator(next))
|
|
|
|
|
{
|
|
|
|
|
// Finds the operator/symbol if it can //
|
|
|
|
|
std::string_view fullSymbol(ctx.source.data() + trackers.sectionStart, ctx.index - trackers.sectionStart + 1);
|
|
|
|
|
auto it = Internal::symbolAndOpMap.find(fullSymbol);
|
|
|
|
|
if (it != Internal::symbolAndOpMap.end())
|
|
|
|
|
ctx.out.emplace_back(ctx, trackers.sectionStart, (USHORT)(ctx.index - trackers.sectionStart + 1), it->second);
|
|
|
|
|
|
|
|
|
|
// Iterates to the next index //
|
|
|
|
|
ctx.column++;
|
|
|
|
|
ctx.index++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return Util::FunctionFail<LexerError>(LexerError::UnknownSymbolOrOperand, trackers.sectionStart, std::string(fullSymbol));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checks for an unterminated string literal //
|
|
|
|
|
if (trackers.inStrLiteral)
|
|
|
|
|
return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
|
|
|
|
|
// === Whitespace === //
|
|
|
|
|
else if (Internal::IsWhitespace(current)) {}
|
|
|
|
|
|
|
|
|
|
return ctx.out;
|
|
|
|
|
}
|
|
|
|
|
// If an if-statement has not been triggered the character must be invalid //
|
|
|
|
|
else
|
|
|
|
|
return Util::FunctionFail<LexerError>(LexerError::InvalidCharacter, ctx.index);
|
|
|
|
|
|
|
|
|
|
// Iterates to the next index //
|
|
|
|
|
ctx.column++;
|
|
|
|
|
ctx.index++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checks for an unterminated string literal //
|
|
|
|
|
if (trackers.inStrLiteral)
|
|
|
|
|
return Util::FunctionFail<LexerError>(LexerError::UnterminatedStringLiteral, trackers.sectionStart);
|
|
|
|
|
|
|
|
|
|
return ctx.out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|