diff --git a/Lexer.cs b/Lexer.cs new file mode 100644 index 0000000..b18a68b --- /dev/null +++ b/Lexer.cs @@ -0,0 +1,217 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace ShintenScript +{ + public class Lexer + { + static readonly (string, Token.Type)[] symbolTokens = new (string, Token.Type)[] + { + ("==", Token.Type.EQ), + (">=", Token.Type.GE), + ("<=", Token.Type.LE), + ("!=", Token.Type.NE), + ("+=", Token.Type.PLUSASSIGN), + ("-=", Token.Type.MINUSASSIGN), + ("*=", Token.Type.ASTERISKASSIGN), + ("/=", Token.Type.SLASHASSIGN), + + ("+", Token.Type.PLUS), + ("-", Token.Type.MINUS), + ("*", Token.Type.ASTERISK), + ("/", Token.Type.SLASH), + ("=", Token.Type.ASSIGN), + ("(", Token.Type.LPAREN), + (")", Token.Type.RPAREN), + ("{", Token.Type.LBRACE), + ("}", Token.Type.RBRACE), + (">", Token.Type.GT), + ("<", Token.Type.LT), + }; + + static readonly HashSet keywords = new HashSet + { + "if", "while", "fn" + }; + + string code; + int index = 0; + + public Lexer(string code_) + { + if (code_ == null) + throw new ArgumentNullException("code"); + + code = code_; + } + + Token current = new Token { type = Token.Type.NULL }; + + Token ParseOne() + { + if (EOF()) + return new Token { type = Token.Type.EOF }; + + while (char.IsWhiteSpace(code[index])) + { + index++; + + if (EOF()) + return new Token { type = Token.Type.EOF }; + } + + foreach ((string symbol, Token.Type token) in symbolTokens) + { + if (code.Substring(index, symbol.Length) == symbol) + { + index += symbol.Length; + return new Token { type = token }; + } + } + + if (char.IsLetter(code[index])) + { + StringBuilder identifierBuilder = new StringBuilder(); + + while (char.IsLetterOrDigit(code[index])) + { + identifierBuilder.Append(code[index]); + index++; + } + + string identifier = identifierBuilder.ToString(); + + if (keywords.Contains(identifier)) + { + Enum.TryParse(identifier, true, out Token.Type type); + return new Token { type = type }; + } + + return new Token { type = Token.Type.IDENTIFIER, data = identifier }; + } + + if (char.IsNumber(code[index])) + { + StringBuilder numberBuilder = new StringBuilder(); + + while (char.IsLetterOrDigit(code[index]) || code[index] == '.' || (code[index] == '-' && (code[index - 1] == 'e' || code[index - 1] == 'E'))) + { + numberBuilder.Append(code[index]); + index++; + } + + string numberString = numberBuilder.ToString(); + float number; + + if (numberString.Contains(".")) + { + if (!float.TryParse(numberString, NumberStyles.Float, CultureInfo.InvariantCulture, out number)) + { + throw new LexingException($"Invalid float literal \"{numberString}\""); + } + } + + else + { + if ( + (numberString.ToLowerInvariant().StartsWith("0x") && long.TryParse(numberString.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out long integer)) + || long.TryParse(numberString, NumberStyles.Number, CultureInfo.InvariantCulture, out integer)) + { + number = (float)integer; + } + else + { + throw new LexingException($"Invalid integer literal \"{numberString}\""); + } + } + + return new Token { type = Token.Type.NUMBER, data = number }; + } + + throw new LexingException($"Encountered unexpected character '{code[index]}'"); + } + + bool EOF() + { + return index >= code.Length; + } + + public Token Current() + { + if (current.Null()) + { + return current = ParseOne(); + } + else + { + return current; + } + } + + public Token Pop() + { + Token token = Current(); + current = new Token { type = Token.Type.NULL }; + return token; + } + + public bool Accept(params Token.Type[] types) + { + Token token = Current(); + + foreach (Token.Type type in types) { + if (token.type == type) + { + return true; + } + } + + return false; + } + + public Token Expect(params Token.Type[] types) + { + Token token = Pop(); + + foreach (Token.Type type in types) + { + if (token.type == type) + { + return token; + } + } + + if (types.Length == 1) + { + throw new LexingException($"Encountered {token.type} when expecting {types[0]}"); + } + else if (types.Length == 2) + { + throw new LexingException($"Encountered {token.type} when expecting one of {types[0]} or {types[1]}"); + } + else + { + string list = string.Join(", ", types.Take(types.Length - 1)); + throw new LexingException($"Encountered {token.type} when expecting one of {list}, or {types[types.Length - 1]}"); + } + } + + public IEnumerable TokenStream() + { + while (!EOF()) + { + yield return Pop(); + } + } + + public void Print() + { + foreach (Token token in TokenStream()) + Console.WriteLine(token); + } + } +} diff --git a/LexingException.cs b/LexingException.cs new file mode 100644 index 0000000..ab8e094 --- /dev/null +++ b/LexingException.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace ShintenScript +{ + public class LexingException : Exception + { + public LexingException(string message) : base(message) + { + + } + } +} diff --git a/Properties/AssemblyInfo.cs b/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..2080e65 --- /dev/null +++ b/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("ShintenScript")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("ShintenScript")] +[assembly: AssemblyCopyright("Copyright © 2023")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("f5926e19-83e0-4821-a9ff-5f34dec8fdc9")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/ShintenScript.csproj b/ShintenScript.csproj new file mode 100644 index 0000000..211747f --- /dev/null +++ b/ShintenScript.csproj @@ -0,0 +1,50 @@ + + + + + Debug + AnyCPU + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9} + Library + Properties + ShintenScript + ShintenScript + v4.7.2 + 512 + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ShintenScript.sln b/ShintenScript.sln new file mode 100644 index 0000000..5835211 --- /dev/null +++ b/ShintenScript.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.33130.400 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ShintenScript", "ShintenScript.csproj", "{F5926E19-83E0-4821-A9FF-5F34DEC8FDC9}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ShintenScriptTest", "ShintenScriptTest\ShintenScriptTest.csproj", "{E21E804A-941B-4992-BBA5-9A9F95FE4F00}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9}.Release|Any CPU.Build.0 = Release|Any CPU + {E21E804A-941B-4992-BBA5-9A9F95FE4F00}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E21E804A-941B-4992-BBA5-9A9F95FE4F00}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E21E804A-941B-4992-BBA5-9A9F95FE4F00}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E21E804A-941B-4992-BBA5-9A9F95FE4F00}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {218E18CB-AD4F-41CB-A3B2-E6E079C82769} + EndGlobalSection +EndGlobal diff --git a/ShintenScriptTest/LexerTests.cs b/ShintenScriptTest/LexerTests.cs new file mode 100644 index 0000000..65537d4 --- /dev/null +++ b/ShintenScriptTest/LexerTests.cs @@ -0,0 +1,67 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using ShintenScript; +using System; +using System.Linq; + +namespace ShintenScriptTest +{ + [TestClass] + public class LexerTests + { + [TestMethod] + public void TestAllTokens() + { + Lexer lexer = new Lexer(@" + hello + 5 + 0.5 + 0xCafeBabe + 1.5e6 + 2.4e-6 + + ( ) { } + + - * / + > < == >= <= != + = + += -= *= /= + + if while fn + "); + + Assert.IsTrue(lexer.TokenStream().SequenceEqual(new Token[] { + new Token { type = Token.Type.IDENTIFIER, data = "hello" }, + new Token { type = Token.Type.NUMBER, data = 5f }, + new Token { type = Token.Type.NUMBER, data = 0.5f }, + new Token { type = Token.Type.NUMBER, data = 3405691582f }, + new Token { type = Token.Type.NUMBER, data = 1.5e6f }, + new Token { type = Token.Type.NUMBER, data = 2.4e-6f }, + + new Token { type = Token.Type.LPAREN }, + new Token { type = Token.Type.RPAREN }, + new Token { type = Token.Type.LBRACE }, + new Token { type = Token.Type.RBRACE }, + new Token { type = Token.Type.PLUS }, + new Token { type = Token.Type.MINUS }, + new Token { type = Token.Type.ASTERISK }, + new Token { type = Token.Type.SLASH }, + new Token { type = Token.Type.GT }, + new Token { type = Token.Type.LT }, + new Token { type = Token.Type.EQ }, + new Token { type = Token.Type.GE }, + new Token { type = Token.Type.LE }, + new Token { type = Token.Type.NE }, + new Token { type = Token.Type.ASSIGN }, + new Token { type = Token.Type.PLUSASSIGN }, + new Token { type = Token.Type.MINUSASSIGN }, + new Token { type = Token.Type.ASTERISKASSIGN }, + new Token { type = Token.Type.SLASHASSIGN }, + + new Token { type = Token.Type.IF }, + new Token { type = Token.Type.WHILE }, + new Token { type = Token.Type.FN }, + + new Token { type = Token.Type.EOF }, + })); + } + } +} diff --git a/ShintenScriptTest/Properties/AssemblyInfo.cs b/ShintenScriptTest/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..6e61d0f --- /dev/null +++ b/ShintenScriptTest/Properties/AssemblyInfo.cs @@ -0,0 +1,20 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: AssemblyTitle("ShintenScriptTest")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("ShintenScriptTest")] +[assembly: AssemblyCopyright("Copyright © 2023")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +[assembly: ComVisible(false)] + +[assembly: Guid("e21e804a-941b-4992-bba5-9a9f95fe4f00")] + +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/ShintenScriptTest/ShintenScriptTest.csproj b/ShintenScriptTest/ShintenScriptTest.csproj new file mode 100644 index 0000000..408dbca --- /dev/null +++ b/ShintenScriptTest/ShintenScriptTest.csproj @@ -0,0 +1,74 @@ + + + + + + Debug + AnyCPU + {E21E804A-941B-4992-BBA5-9A9F95FE4F00} + Library + Properties + ShintenScriptTest + ShintenScriptTest + v4.7.2 + 512 + {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + 15.0 + $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) + $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages + False + UnitTest + + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + ..\packages\MSTest.TestFramework.2.1.2\lib\net45\Microsoft.VisualStudio.TestPlatform.TestFramework.dll + + + ..\packages\MSTest.TestFramework.2.1.2\lib\net45\Microsoft.VisualStudio.TestPlatform.TestFramework.Extensions.dll + + + + + + + + + + + + + + {F5926E19-83E0-4821-A9FF-5F34DEC8FDC9} + ShintenScript + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + \ No newline at end of file diff --git a/ShintenScriptTest/packages.config b/ShintenScriptTest/packages.config new file mode 100644 index 0000000..f84cb10 --- /dev/null +++ b/ShintenScriptTest/packages.config @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/Token.cs b/Token.cs new file mode 100644 index 0000000..b525f23 --- /dev/null +++ b/Token.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace ShintenScript +{ + public struct Token + { + public enum Type + { + NULL, + EOF, + IDENTIFIER, + NUMBER, + + LPAREN, RPAREN, LBRACE, RBRACE, + PLUS, MINUS, ASTERISK, SLASH, + GT, LT, EQ, GE, LE, NE, + ASSIGN, + PLUSASSIGN, MINUSASSIGN, ASTERISKASSIGN, SLASHASSIGN, + + IF, WHILE, FN + } + + public Type type; + public object data; + + public bool Null() + { + return type == Type.NULL; + } + + public override int GetHashCode() + { + return (int)type; + } + + public override bool Equals(object obj) + { + if (obj is Token other) + { + return type == other.type && ((data == null && other.data == null) || (data != null && other.data != null && data.Equals(other.data))); + } + + return false; + } + + public override string ToString() + { + return $"{type} {data?.ToString() ?? "null"}"; + } + } +}