diff --git a/LX-Compiler.sln b/LX-Compiler.sln
index 06aa645..e1b0f26 100644
--- a/LX-Compiler.sln
+++ b/LX-Compiler.sln
@@ -1,9 +1,14 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
-VisualStudioVersion = 17.13.35931.197 d17.13
+VisualStudioVersion = 17.13.35931.197
MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LX-LLVM", "LX-LLVM.vcxproj", "{CC37E36F-B3B3-41B0-A887-01E8EFE84994}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LX-Compiler", "LX-LLVM.vcxproj", "{CC37E36F-B3B3-41B0-A887-01E8EFE84994}"
+ ProjectSection(ProjectDependencies) = postProject
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA} = {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Lexer", "Lexer\Lexer.vcxproj", "{4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -21,6 +26,14 @@ Global
{CC37E36F-B3B3-41B0-A887-01E8EFE84994}.Release|x64.Build.0 = Release|x64
{CC37E36F-B3B3-41B0-A887-01E8EFE84994}.Release|x86.ActiveCfg = Release|Win32
{CC37E36F-B3B3-41B0-A887-01E8EFE84994}.Release|x86.Build.0 = Release|Win32
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Debug|x64.ActiveCfg = Debug|x64
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Debug|x64.Build.0 = Debug|x64
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Debug|x86.ActiveCfg = Debug|Win32
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Debug|x86.Build.0 = Debug|Win32
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Release|x64.ActiveCfg = Release|x64
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Release|x64.Build.0 = Release|x64
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Release|x86.ActiveCfg = Release|Win32
+ {4E4019F5-12E0-4EE2-9658-A0DD3038EEDA}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/LX-LLVM.vcxproj b/LX-LLVM.vcxproj
index 7bad884..44d9d5f 100644
--- a/LX-LLVM.vcxproj
+++ b/LX-LLVM.vcxproj
@@ -106,11 +106,13 @@
_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
true
stdcpp20
- $(SolutionDir)/Common;%(AdditionalIncludeDirectories)
+ $(SolutionDir)common;%(AdditionalIncludeDirectories)
Console
true
+ Lexer.lib;%(AdditionalDependencies)
+ $(SolutionDir)$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)
@@ -122,19 +124,22 @@
NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
true
stdcpp20
- $(SolutionDir)/Common;%(AdditionalIncludeDirectories)
+ $(SolutionDir)common;%(AdditionalIncludeDirectories)
Console
true
true
true
+ Lexer.lib;%(AdditionalDependencies)
+ $(SolutionDir)$(Platform)\$(Configuration)\;%(AdditionalLibraryDirectories)
+
diff --git a/LX-LLVM.vcxproj.filters b/LX-LLVM.vcxproj.filters
index 805c715..4066ea5 100644
--- a/LX-LLVM.vcxproj.filters
+++ b/LX-LLVM.vcxproj.filters
@@ -1,23 +1,20 @@
-
- {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
- cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
-
-
+
{93995380-89BD-4b04-88EB-625FBE52EBFB}
h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
-
-
- Source Files
-
-
- Header Files
+ Common
+
+
+ Common
+
+
+
\ No newline at end of file
diff --git a/LX-LLVM.vcxproj.user b/LX-LLVM.vcxproj.user
index bf3f381..8459cf7 100644
--- a/LX-LLVM.vcxproj.user
+++ b/LX-LLVM.vcxproj.user
@@ -4,11 +4,11 @@
false
- build-test/Main.lx build-test/Main.ll
+ build-test/Main.lx build-test/Main.ll build-test/Log.txt
WindowsLocalDebugger
- build-test/Main.lx build-test/Main.ll
+ build-test/Main.lx build-test/Main.ll build-test/Log.txt
WindowsLocalDebugger
\ No newline at end of file
diff --git a/Lexer/Lexer.vcxproj b/Lexer/Lexer.vcxproj
new file mode 100644
index 0000000..6f3b908
--- /dev/null
+++ b/Lexer/Lexer.vcxproj
@@ -0,0 +1,140 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Release
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ 17.0
+ Win32Proj
+ {4e4019f5-12e0-4ee2-9658-a0dd3038eeda}
+ Lexer
+ 10.0
+
+
+
+ Application
+ true
+ v143
+ Unicode
+
+
+ Application
+ false
+ v143
+ true
+ Unicode
+
+
+ StaticLibrary
+ true
+ v143
+ Unicode
+
+
+ StaticLibrary
+ false
+ v143
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Level3
+ true
+ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+
+
+ Console
+ true
+
+
+
+
+ Level3
+ true
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+ Level3
+ true
+ _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ $(ProjectDir)inc;$(SolutionDir)common;%(AdditionalIncludeDirectories)
+ stdcpp20
+
+
+ Console
+ true
+
+
+
+
+ Level3
+ true
+ true
+ true
+ NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ $(ProjectDir)inc;$(SolutionDir)common;%(AdditionalIncludeDirectories)
+ stdcpp20
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Lexer/Lexer.vcxproj.filters b/Lexer/Lexer.vcxproj.filters
new file mode 100644
index 0000000..3ec9ec0
--- /dev/null
+++ b/Lexer/Lexer.vcxproj.filters
@@ -0,0 +1,21 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/Lexer/Lexer.vcxproj.user b/Lexer/Lexer.vcxproj.user
new file mode 100644
index 0000000..88a5509
--- /dev/null
+++ b/Lexer/Lexer.vcxproj.user
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/Lexer/src/Lexer.cpp b/Lexer/src/Lexer.cpp
new file mode 100644
index 0000000..1c69e7d
--- /dev/null
+++ b/Lexer/src/Lexer.cpp
@@ -0,0 +1,181 @@
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace LX
+{
+ // Local macros cause im lazy //
+
+ #define ITERATE index++; continue
+ #define TOKEN_CASE(type) case type: return #type;
+
+ static std::string ToString(Token::TokenType type)
+ {
+ switch (type)
+ {
+ TOKEN_CASE(Token::STRING_LITERAL);
+ TOKEN_CASE(Token::IDENTIFIER);
+ TOKEN_CASE(Token::FOR);
+ TOKEN_CASE(Token::WHILE);
+ TOKEN_CASE(Token::IF);
+ TOKEN_CASE(Token::ELSE);
+ TOKEN_CASE(Token::ELIF);
+ TOKEN_CASE(Token::FUNCTION);
+
+ default:
+ return std::string("Unknown: " + (short)type);
+ }
+ }
+
+ static const std::unordered_map keywords =
+ {
+ { "for", Token::FOR },
+ { "while", Token::WHILE },
+ { "if", Token::IF },
+ { "else", Token::ELSE },
+ { "elif", Token::ELIF },
+ { "func", Token::FUNCTION },
+ };
+
+ const std::vector LX::LexicalAnalyze(std::ifstream& src, std::ofstream* log)
+ {
+ // Logs the start of the lexical analysis
+ SafeLog(log, LOG_BREAK, "Started lexing file", LOG_BREAK);
+
+ // Allocates a large ammount of memory to hold the output
+ // Will shrink the size later on to stop excess memory
+ std::vector tokens = {};
+ tokens.reserve(0xFFFF);
+
+ // Turns the contents of the file into a string //
+
+ // Gets length of the file because it is opened at the end
+ const std::streamsize len = src.tellg();
+ src.seekg(0, std::ios::beg); // Goes back to the beginning
+
+ std::string contents(len, '\0'); // Preallocates all space needed
+ src.read(&contents[0], len); // Transfers file to string
+
+ // Trackers for when the program is iterating over the file //
+
+ std::streamsize index = 0;
+
+ std::streamsize startOfWord = 0;
+ std::streamsize startOfStringLiteral = 0;
+
+ bool inComment = false;
+ bool inStringLiteral = false;
+ bool wasLastCharAlpha = false;
+
+ // Iterates over the file and turns it into tokens //
+ while (index < len)
+ {
+ // Stores the current character for easy access
+ const char current = contents[index];
+
+ // Updates string literal tracker and skips over rest if in a string literal
+ if (current == '"')
+ {
+ // Start of string literal
+ if (inStringLiteral == false)
+ {
+ // Updates the neccesarry trackers
+ startOfStringLiteral = index + 1;
+ inStringLiteral = true;
+ ITERATE;
+ }
+
+ // End of string literal
+ else
+ {
+ // Adds the string literal token to the token vector
+ std::string lit(contents.data() + startOfStringLiteral, index - startOfStringLiteral);
+ tokens.push_back({ Token::STRING_LITERAL, lit });
+
+ // Updates trackers
+ inStringLiteral = false;
+ ITERATE;
+ }
+ }
+
+ // Skips over rest if within a string literal
+ if (inStringLiteral) { ITERATE; }
+
+ // Updates comment state
+ if (current == '#')
+ {
+ inComment = !inComment;
+
+ ITERATE;
+ }
+
+ // Skips over if within a comment
+ if (inComment) { ITERATE; }
+
+ // Works out if the current character is alphabetic
+ bool isAlpha = (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z');
+
+ if (isAlpha == true)
+ {
+ // Start of a word
+ if (wasLastCharAlpha == false)
+ {
+ // Updates trackers
+ wasLastCharAlpha = true;
+ startOfWord = index;
+ }
+
+ ITERATE;
+ }
+
+ // End of a word
+ if (isAlpha == false && wasLastCharAlpha == true)
+ {
+ // Adds the word token to the token vector
+ std::string word(contents.data() + startOfWord, index - startOfWord);
+
+ if (auto keyword = keywords.find(word); keyword != keywords.end())
+ {
+ tokens.push_back({ keyword->second, word });
+ }
+
+ else
+ {
+ tokens.push_back({ Token::IDENTIFIER, word });
+ }
+ }
+
+ // Operators will eventually go here
+
+ // If it is here and not whitespace that means it's an invalid character
+ if (current == ' ' || current == '\t' || current == '\r' || current == '\n')
+ {
+ // Updates trackers
+ wasLastCharAlpha = isAlpha;
+ ITERATE;
+ }
+
+ // Throws an error to alert the user
+ throw InvalidCharInSource(index, current);
+ }
+
+ // Logs the tokens if logging is on //
+ if (log != nullptr)
+ {
+ for (auto& token : tokens)
+ {
+ SafeLog(log, ToString(token.type), ":\t", token.contents);
+ }
+ }
+
+ // Shrinks the vector down to minimum size before returning to avoid excess memory being allocated
+ tokens.shrink_to_fit();
+ return tokens;
+ }
+}
\ No newline at end of file
diff --git a/Lexer/src/Token.cpp b/Lexer/src/Token.cpp
new file mode 100644
index 0000000..0acd429
--- /dev/null
+++ b/Lexer/src/Token.cpp
@@ -0,0 +1,11 @@
+#include
+
+#include
+#include
+
+namespace LX
+{
+ Token::Token(const TokenType _type, std::string _contents)
+ : type(_type), contents(_contents)
+ {}
+}
diff --git a/Main.cpp b/Main.cpp
index 613bd1a..8ac84dc 100644
--- a/Main.cpp
+++ b/Main.cpp
@@ -2,7 +2,9 @@
#include
#include
#include
+#include
+#include
#include
namespace LX
@@ -12,6 +14,7 @@ namespace LX
struct IncorrectCommandLineArgs {};
struct InvalidInputFilePath {};
struct InvalidOutputFilePath {};
+ struct InvalidLogFilePath {};
}
int main(int argc, char** argv)
@@ -19,7 +22,7 @@ int main(int argc, char** argv)
try
{
// Checks there is the correct ammount of arguments
- LX::ThrowIf(argc != 3);
+ LX::ThrowIf((argc == 3 || argc == 4) == false);
// Turns the file paths into the C++ type for handling them
std::filesystem::path inpPath = argv[1];
@@ -28,18 +31,32 @@ int main(int argc, char** argv)
// Prints the full paths to the console to let the user know compiling is being done
std::cout << std::filesystem::absolute(inpPath) << " -> " << std::filesystem::absolute(outPath) << std::endl;
- // Checks the input file exists
+ // Checks the input file exists and opens it
LX::ThrowIf(std::filesystem::exists(inpPath) == false);
+ std::ifstream inpFile(inpPath, std::ios::binary | std::ios::ate); // Opens in binary at the end for microptimisation
+ LX::ThrowIf(inpFile.is_open() == false);
- // Opens / Creates the output file and checks if it is open
+ // Opens / Creates the output file
std::ofstream outFile(outPath);
LX::ThrowIf(outFile.is_open() == false);
+
+ // Opens / Creates the log file
+ std::unique_ptr log = nullptr;
+
+ if (argc == 4)
+ {
+ log = std::make_unique(argv[3]);
+ LX::ThrowIf(log->is_open() == false);
+ }
+
+ // Create tokens out of the input file
+ std::vectortokens = LX::LexicalAnalyze(inpFile, log.get());
}
catch (LX::IncorrectCommandLineArgs)
{
// Displays to the console of how to use the program
- std::cout << "\nUsage: [source file] [output file]\n";
+ std::cout << "\nUsage: [source file] [output file] (optional)[log file]\n";
return 1;
}
@@ -47,7 +64,7 @@ int main(int argc, char** argv)
catch (LX::InvalidInputFilePath)
{
// Tells user the input file could not be found
- std::cout << "\nFile path: {" << argv[1] << "} could not be found\n";
+ std::cout << "\nFile path: {" << argv[1] << "} could not be opened\n";
return 2;
}
@@ -55,7 +72,25 @@ int main(int argc, char** argv)
catch (LX::InvalidOutputFilePath)
{
// Tells the user the output file could not be opened
- std::cout << "\nCould not open/create {" << argv[2] << "}";
+ std::cout << "\nCould not open/create {" << argv[2] << "}\n";
+ }
+
+ catch (LX::InvalidCharInSource& e)
+ {
+ //
+ std::cout << "\nInvalid character found in source file: {" << e.invalid << "} at index: " << e.index << "\n";
+ }
+
+ catch (std::exception& e)
+ {
+ // Prints the std exception to the console
+ std::cout << "\nAn error occured:\n" << e.what() << std::endl;
+ }
+
+ catch (...)
+ {
+ // Tells the user if an error has happened
+ std::cout << "\nAn Error occured\n";
}
return 0;
diff --git a/build-test/Log.txt b/build-test/Log.txt
new file mode 100644
index 0000000..0cf4dac
--- /dev/null
+++ b/build-test/Log.txt
@@ -0,0 +1,9 @@
+
+-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+Started lexing file
+-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+Token::FUNCTION: func
+Token::IDENTIFIER: main
+Token::IDENTIFIER: print
+Token::STRING_LITERAL: Hello World!
diff --git a/build-test/main.lx b/build-test/main.lx
index e69de29..f3f3246 100644
--- a/build-test/main.lx
+++ b/build-test/main.lx
@@ -0,0 +1,2 @@
+func main
+ print "Hello World!"
diff --git a/common/Lexer.h b/common/Lexer.h
new file mode 100644
index 0000000..2010860
--- /dev/null
+++ b/common/Lexer.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include
+#include
+#include
+
+// This file contains everything that is exported from Lexer.lib
+// The rest of the items within the Lexer project are internal only
+
+namespace LX
+{
+ struct InvalidCharInSource
+ {
+ std::streamsize index;
+ char invalid;
+ };
+
+ // Data type to store a more computer readable version of files
+ struct __declspec(novtable) Token final
+ {
+ // Enum to hold the type of the token
+ enum TokenType : short
+ {
+ // General tokens //
+
+ STRING_LITERAL,
+ IDENTIFIER,
+
+ // Keywords //
+
+ FOR, WHILE,
+ IF, ELSE, ELIF,
+
+ FUNCTION,
+
+ // You made a mistake somehow //
+
+ UNDEFINED = -1
+ };
+
+ // Constructor of the tokens to set their info
+ Token(const TokenType _type, std::string _contents);
+
+ // Contents of the token (may be empty if not needed)
+ // Const to avoid external changes
+ const std::string contents;
+
+ // Type of the token
+ // Const to avoid external changes
+ const TokenType type;
+ };
+
+ // Lexer function to take in a file and output a vector of tokens
+ const std::vector LexicalAnalyze(std::ifstream& src, std::ofstream* log);
+}
diff --git a/common/Util.h b/common/Util.h
index c72015b..04dff5d 100644
--- a/common/Util.h
+++ b/common/Util.h
@@ -1,8 +1,18 @@
#pragma once
+#include
+
namespace LX
{
template
inline void ThrowIf(const bool condition, Args... args)
{ if (condition) [[unlikely]] { throw T(args...); }}
+
+ template
+ inline void SafeLog(std::ofstream* log, Args... args)
+ {
+ if (log != nullptr) { (*log << ... << args); *log << "\n"; }
+ }
+
+ constexpr const char* LOG_BREAK = "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n";
}