Added lexer from previous project

This commit is contained in:
Pasha Bibko
2025-04-16 19:32:26 +01:00
parent 753c72c653
commit 4ac5061f03
14 changed files with 506 additions and 23 deletions

140
Lexer/Lexer.vcxproj Normal file
View File

@@ -0,0 +1,140 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{4e4019f5-12e0-4ee2-9658-a0dd3038eeda}</ProjectGuid>
<RootNamespace>Lexer</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>$(ProjectDir)inc;$(SolutionDir)common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp20</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>$(ProjectDir)inc;$(SolutionDir)common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp20</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="src\Lexer.cpp" />
<ClCompile Include="src\Token.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\Lexer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\Token.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

4
Lexer/Lexer.vcxproj.user Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

181
Lexer/src/Lexer.cpp Normal file
View File

@@ -0,0 +1,181 @@
#include <Lexer.h>
#include <Util.h>
#include <unordered_map>
#include <string_view>
#include <fstream>
#include <vector>
#include <iostream>
namespace LX
{
// Local macros cause im lazy //
#define ITERATE index++; continue
#define TOKEN_CASE(type) case type: return #type;
static std::string ToString(Token::TokenType type)
{
switch (type)
{
TOKEN_CASE(Token::STRING_LITERAL);
TOKEN_CASE(Token::IDENTIFIER);
TOKEN_CASE(Token::FOR);
TOKEN_CASE(Token::WHILE);
TOKEN_CASE(Token::IF);
TOKEN_CASE(Token::ELSE);
TOKEN_CASE(Token::ELIF);
TOKEN_CASE(Token::FUNCTION);
default:
return std::string("Unknown: " + (short)type);
}
}
static const std::unordered_map<std::string, Token::TokenType> keywords =
{
{ "for", Token::FOR },
{ "while", Token::WHILE },
{ "if", Token::IF },
{ "else", Token::ELSE },
{ "elif", Token::ELIF },
{ "func", Token::FUNCTION },
};
const std::vector<Token> LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
{
// Logs the start of the lexical analysis
SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
// Allocates a large ammount of memory to hold the output
// Will shrink the size later on to stop excess memory
std::vector<Token> tokens = {};
tokens.reserve(0xFFFF);
// Turns the contents of the file into a string //
// Gets length of the file because it is opened at the end
const std::streamsize len = src.tellg();
src.seekg(0, std::ios::beg); // Goes back to the beginning
std::string contents(len, '\0'); // Preallocates all space needed
src.read(&contents[0], len); // Transfers file to string
// Trackers for when the program is iterating over the file //
std::streamsize index = 0;
std::streamsize startOfWord = 0;
std::streamsize startOfStringLiteral = 0;
bool inComment = false;
bool inStringLiteral = false;
bool wasLastCharAlpha = false;
// Iterates over the file and turns it into tokens //
while (index < len)
{
// Stores the current character for easy access
const char current = contents[index];
// Updates string literal tracker and skips over rest if in a string literal
if (current == '"')
{
// Start of string literal
if (inStringLiteral == false)
{
// Updates the neccesarry trackers
startOfStringLiteral = index + 1;
inStringLiteral = true;
ITERATE;
}
// End of string literal
else
{
// Adds the string literal token to the token vector
std::string lit(contents.data() + startOfStringLiteral, index - startOfStringLiteral);
tokens.push_back({ Token::STRING_LITERAL, lit });
// Updates trackers
inStringLiteral = false;
ITERATE;
}
}
// Skips over rest if within a string literal
if (inStringLiteral) { ITERATE; }
// Updates comment state
if (current == '#')
{
inComment = !inComment;
ITERATE;
}
// Skips over if within a comment
if (inComment) { ITERATE; }
// Works out if the current character is alphabetic
bool isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
if (isAlpha == true)
{
// Start of a word
if (wasLastCharAlpha == false)
{
// Updates trackers
wasLastCharAlpha = true;
startOfWord = index;
}
ITERATE;
}
// End of a word
if (isAlpha == false && wasLastCharAlpha == true)
{
// Adds the word token to the token vector
std::string word(contents.data() + startOfWord, index - startOfWord);
if (auto keyword = keywords.find(word); keyword != keywords.end())
{
tokens.push_back({ keyword->second, word });
}
else
{
tokens.push_back({ Token::IDENTIFIER, word });
}
}
// Operators will eventually go here
// If it is here and not whitespace that means it's an invalid character
if (current == ' ' || current == '\t' || current == '\r' || current == '\n')
{
// Updates trackers
wasLastCharAlpha = isAlpha;
ITERATE;
}
// Throws an error to alert the user
throw InvalidCharInSource(index, current);
}
// Logs the tokens if logging is on //
if (log != nullptr)
{
for (auto& token : tokens)
{
SafeLog(log, ToString(token.type), ":\t", token.contents);
}
}
// Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
tokens.shrink_to_fit();
return tokens;
}
}

11
Lexer/src/Token.cpp Normal file
View File

@@ -0,0 +1,11 @@
#include <Lexer.h>
#include <string>
#include <vector>
namespace LX
{
Token::Token(const TokenType _type, std::string _contents)
: type(_type), contents(_contents)
{}
}