ShintenScript/Lexer.cs

218 lines
6.3 KiB
C#

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ShintenScript
{
public class Lexer
{
static readonly (string, Token.Type)[] symbolTokens = new (string, Token.Type)[]
{
("==", Token.Type.EQ),
(">=", Token.Type.GE),
("<=", Token.Type.LE),
("!=", Token.Type.NE),
("+=", Token.Type.PLUSASSIGN),
("-=", Token.Type.MINUSASSIGN),
("*=", Token.Type.ASTERISKASSIGN),
("/=", Token.Type.SLASHASSIGN),
("+", Token.Type.PLUS),
("-", Token.Type.MINUS),
("*", Token.Type.ASTERISK),
("/", Token.Type.SLASH),
("=", Token.Type.ASSIGN),
("(", Token.Type.LPAREN),
(")", Token.Type.RPAREN),
("{", Token.Type.LBRACE),
("}", Token.Type.RBRACE),
(">", Token.Type.GT),
("<", Token.Type.LT),
};
static readonly HashSet<string> keywords = new HashSet<string>
{
"if", "while", "fn"
};
string code;
int index = 0;
public Lexer(string code_)
{
if (code_ == null)
throw new ArgumentNullException("code");
code = code_;
}
Token current = new Token { type = Token.Type.NULL };
Token ParseOne()
{
if (EOF())
return new Token { type = Token.Type.EOF };
while (char.IsWhiteSpace(code[index]))
{
index++;
if (EOF())
return new Token { type = Token.Type.EOF };
}
foreach ((string symbol, Token.Type token) in symbolTokens)
{
if (code.Substring(index, symbol.Length) == symbol)
{
index += symbol.Length;
return new Token { type = token };
}
}
if (char.IsLetter(code[index]))
{
StringBuilder identifierBuilder = new StringBuilder();
while (char.IsLetterOrDigit(code[index]))
{
identifierBuilder.Append(code[index]);
index++;
}
string identifier = identifierBuilder.ToString();
if (keywords.Contains(identifier))
{
Enum.TryParse(identifier, true, out Token.Type type);
return new Token { type = type };
}
return new Token { type = Token.Type.IDENTIFIER, data = identifier };
}
if (char.IsNumber(code[index]))
{
StringBuilder numberBuilder = new StringBuilder();
while (char.IsLetterOrDigit(code[index]) || code[index] == '.' || (code[index] == '-' && (code[index - 1] == 'e' || code[index - 1] == 'E')))
{
numberBuilder.Append(code[index]);
index++;
}
string numberString = numberBuilder.ToString();
float number;
if (numberString.Contains("."))
{
if (!float.TryParse(numberString, NumberStyles.Float, CultureInfo.InvariantCulture, out number))
{
throw new LexingException($"Invalid float literal \"{numberString}\"");
}
}
else
{
if (
(numberString.ToLowerInvariant().StartsWith("0x") && long.TryParse(numberString.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out long integer))
|| long.TryParse(numberString, NumberStyles.Number, CultureInfo.InvariantCulture, out integer))
{
number = (float)integer;
}
else
{
throw new LexingException($"Invalid integer literal \"{numberString}\"");
}
}
return new Token { type = Token.Type.NUMBER, data = number };
}
throw new LexingException($"Encountered unexpected character '{code[index]}'");
}
bool EOF()
{
return index >= code.Length;
}
public Token Current()
{
if (current.Null())
{
return current = ParseOne();
}
else
{
return current;
}
}
public Token Pop()
{
Token token = Current();
current = new Token { type = Token.Type.NULL };
return token;
}
public bool Accept(params Token.Type[] types)
{
Token token = Current();
foreach (Token.Type type in types) {
if (token.type == type)
{
return true;
}
}
return false;
}
public Token Expect(params Token.Type[] types)
{
Token token = Pop();
foreach (Token.Type type in types)
{
if (token.type == type)
{
return token;
}
}
if (types.Length == 1)
{
throw new LexingException($"Encountered {token.type} when expecting {types[0]}");
}
else if (types.Length == 2)
{
throw new LexingException($"Encountered {token.type} when expecting one of {types[0]} or {types[1]}");
}
else
{
string list = string.Join(", ", types.Take(types.Length - 1));
throw new LexingException($"Encountered {token.type} when expecting one of {list}, or {types[types.Length - 1]}");
}
}
public IEnumerable<Token> TokenStream()
{
while (!EOF())
{
yield return Pop();
}
}
public void Print()
{
foreach (Token token in TokenStream())
Console.WriteLine(token);
}
}
}