using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Threading.Tasks; namespace ShintenScript { public class Lexer { static readonly (string, Token.Type)[] symbolTokens = new (string, Token.Type)[] { ("==", Token.Type.EQ), (">=", Token.Type.GE), ("<=", Token.Type.LE), ("!=", Token.Type.NE), ("+=", Token.Type.PLUSASSIGN), ("-=", Token.Type.MINUSASSIGN), ("*=", Token.Type.ASTERISKASSIGN), ("/=", Token.Type.SLASHASSIGN), (";", Token.Type.SEMICOLON), ("+", Token.Type.PLUS), ("-", Token.Type.MINUS), ("*", Token.Type.ASTERISK), ("/", Token.Type.SLASH), ("=", Token.Type.ASSIGN), ("(", Token.Type.LPAREN), (")", Token.Type.RPAREN), ("{", Token.Type.LBRACE), ("}", Token.Type.RBRACE), (">", Token.Type.GT), ("<", Token.Type.LT), }; static readonly HashSet keywords = new HashSet { "if", "while", "fn" }; string code; int index = 0; public Lexer(string code_) { if (code_ == null) throw new ArgumentNullException("code"); code = code_; } Token current = new Token { type = Token.Type.NULL }; public virtual Token ParseOne() { if (EOF()) return new Token { type = Token.Type.EOF }; while (char.IsWhiteSpace(code[index])) { index++; if (EOF()) return new Token { type = Token.Type.EOF }; } foreach ((string symbol, Token.Type token) in symbolTokens) { if (code.Substring(index, symbol.Length) == symbol) { index += symbol.Length; return new Token { type = token }; } } if (char.IsLetter(code[index])) { StringBuilder identifierBuilder = new StringBuilder(); while (!EOF() && char.IsLetterOrDigit(code[index])) { identifierBuilder.Append(code[index]); index++; } string identifier = identifierBuilder.ToString(); if (keywords.Contains(identifier)) { Enum.TryParse(identifier, true, out Token.Type type); return new Token { type = type }; } return new Token { type = Token.Type.IDENTIFIER, data = identifier }; } if (char.IsNumber(code[index])) { StringBuilder numberBuilder = new StringBuilder(); bool seenLetter = false; while (!EOF() && (char.IsLetterOrDigit(code[index]) || code[index] == '.' || (!seenLetter && code[index] == '-' && (code[index - 1] == 'e' || code[index - 1] == 'E')))) { numberBuilder.Append(code[index]); if (char.IsLetter(code[index]) && code[index] != 'e' && code[index] != 'E') seenLetter = true; index++; } string numberString = numberBuilder.ToString(); float number; if (numberString.Contains(".")) { if (!float.TryParse(numberString, NumberStyles.Float, CultureInfo.InvariantCulture, out number)) { throw new LexingException($"Invalid float literal \"{numberString}\""); } } else { if ( (numberString.ToLowerInvariant().StartsWith("0x") && long.TryParse(numberString.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out long integer)) || long.TryParse(numberString, NumberStyles.Number, CultureInfo.InvariantCulture, out integer)) { number = (float)integer; } else { throw new LexingException($"Invalid integer literal \"{numberString}\""); } } return new Token { type = Token.Type.NUMBER, data = number }; } throw new LexingException($"Encountered unexpected character '{code[index]}'"); } bool EOF() { return index >= code.Length; } public Token Current() { if (current.Null()) { return current = ParseOne(); } else { return current; } } public Token Pop() { Token token = Current(); current = new Token { type = Token.Type.NULL }; return token; } public bool Accept(params Token.Type[] types) { Token token = Current(); foreach (Token.Type type in types) { if (token.type == type) { return true; } } return false; } public Token Expect(params Token.Type[] types) { Token token = Pop(); foreach (Token.Type type in types) { if (token.type == type) { return token; } } if (types.Length == 1) { throw new ParsingException($"Encountered {token.type} when expecting {types[0]}"); } else if (types.Length == 2) { throw new ParsingException($"Encountered {token.type} when expecting one of {types[0]} or {types[1]}"); } else { string list = string.Join(", ", types.Take(types.Length - 1)); throw new ParsingException($"Encountered {token.type} when expecting one of {list}, or {types[types.Length - 1]}"); } } public IEnumerable TokenStream() { while (true) { Token token = Pop(); if (token.type == Token.Type.EOF) break; yield return token; } } public void Print() { foreach (Token token in TokenStream()) Console.WriteLine(token); } } }