218 lines
6.3 KiB
C#
218 lines
6.3 KiB
C#
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Globalization;
|
|||
|
using System.Linq;
|
|||
|
using System.Text;
|
|||
|
using System.Threading.Tasks;
|
|||
|
|
|||
|
namespace ShintenScript
|
|||
|
{
|
|||
|
public class Lexer
|
|||
|
{
|
|||
|
static readonly (string, Token.Type)[] symbolTokens = new (string, Token.Type)[]
|
|||
|
{
|
|||
|
("==", Token.Type.EQ),
|
|||
|
(">=", Token.Type.GE),
|
|||
|
("<=", Token.Type.LE),
|
|||
|
("!=", Token.Type.NE),
|
|||
|
("+=", Token.Type.PLUSASSIGN),
|
|||
|
("-=", Token.Type.MINUSASSIGN),
|
|||
|
("*=", Token.Type.ASTERISKASSIGN),
|
|||
|
("/=", Token.Type.SLASHASSIGN),
|
|||
|
|
|||
|
("+", Token.Type.PLUS),
|
|||
|
("-", Token.Type.MINUS),
|
|||
|
("*", Token.Type.ASTERISK),
|
|||
|
("/", Token.Type.SLASH),
|
|||
|
("=", Token.Type.ASSIGN),
|
|||
|
("(", Token.Type.LPAREN),
|
|||
|
(")", Token.Type.RPAREN),
|
|||
|
("{", Token.Type.LBRACE),
|
|||
|
("}", Token.Type.RBRACE),
|
|||
|
(">", Token.Type.GT),
|
|||
|
("<", Token.Type.LT),
|
|||
|
};
|
|||
|
|
|||
|
static readonly HashSet<string> keywords = new HashSet<string>
|
|||
|
{
|
|||
|
"if", "while", "fn"
|
|||
|
};
|
|||
|
|
|||
|
string code;
|
|||
|
int index = 0;
|
|||
|
|
|||
|
public Lexer(string code_)
|
|||
|
{
|
|||
|
if (code_ == null)
|
|||
|
throw new ArgumentNullException("code");
|
|||
|
|
|||
|
code = code_;
|
|||
|
}
|
|||
|
|
|||
|
Token current = new Token { type = Token.Type.NULL };
|
|||
|
|
|||
|
Token ParseOne()
|
|||
|
{
|
|||
|
if (EOF())
|
|||
|
return new Token { type = Token.Type.EOF };
|
|||
|
|
|||
|
while (char.IsWhiteSpace(code[index]))
|
|||
|
{
|
|||
|
index++;
|
|||
|
|
|||
|
if (EOF())
|
|||
|
return new Token { type = Token.Type.EOF };
|
|||
|
}
|
|||
|
|
|||
|
foreach ((string symbol, Token.Type token) in symbolTokens)
|
|||
|
{
|
|||
|
if (code.Substring(index, symbol.Length) == symbol)
|
|||
|
{
|
|||
|
index += symbol.Length;
|
|||
|
return new Token { type = token };
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (char.IsLetter(code[index]))
|
|||
|
{
|
|||
|
StringBuilder identifierBuilder = new StringBuilder();
|
|||
|
|
|||
|
while (char.IsLetterOrDigit(code[index]))
|
|||
|
{
|
|||
|
identifierBuilder.Append(code[index]);
|
|||
|
index++;
|
|||
|
}
|
|||
|
|
|||
|
string identifier = identifierBuilder.ToString();
|
|||
|
|
|||
|
if (keywords.Contains(identifier))
|
|||
|
{
|
|||
|
Enum.TryParse(identifier, true, out Token.Type type);
|
|||
|
return new Token { type = type };
|
|||
|
}
|
|||
|
|
|||
|
return new Token { type = Token.Type.IDENTIFIER, data = identifier };
|
|||
|
}
|
|||
|
|
|||
|
if (char.IsNumber(code[index]))
|
|||
|
{
|
|||
|
StringBuilder numberBuilder = new StringBuilder();
|
|||
|
|
|||
|
while (char.IsLetterOrDigit(code[index]) || code[index] == '.' || (code[index] == '-' && (code[index - 1] == 'e' || code[index - 1] == 'E')))
|
|||
|
{
|
|||
|
numberBuilder.Append(code[index]);
|
|||
|
index++;
|
|||
|
}
|
|||
|
|
|||
|
string numberString = numberBuilder.ToString();
|
|||
|
float number;
|
|||
|
|
|||
|
if (numberString.Contains("."))
|
|||
|
{
|
|||
|
if (!float.TryParse(numberString, NumberStyles.Float, CultureInfo.InvariantCulture, out number))
|
|||
|
{
|
|||
|
throw new LexingException($"Invalid float literal \"{numberString}\"");
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
else
|
|||
|
{
|
|||
|
if (
|
|||
|
(numberString.ToLowerInvariant().StartsWith("0x") && long.TryParse(numberString.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out long integer))
|
|||
|
|| long.TryParse(numberString, NumberStyles.Number, CultureInfo.InvariantCulture, out integer))
|
|||
|
{
|
|||
|
number = (float)integer;
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
throw new LexingException($"Invalid integer literal \"{numberString}\"");
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return new Token { type = Token.Type.NUMBER, data = number };
|
|||
|
}
|
|||
|
|
|||
|
throw new LexingException($"Encountered unexpected character '{code[index]}'");
|
|||
|
}
|
|||
|
|
|||
|
bool EOF()
|
|||
|
{
|
|||
|
return index >= code.Length;
|
|||
|
}
|
|||
|
|
|||
|
public Token Current()
|
|||
|
{
|
|||
|
if (current.Null())
|
|||
|
{
|
|||
|
return current = ParseOne();
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
return current;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public Token Pop()
|
|||
|
{
|
|||
|
Token token = Current();
|
|||
|
current = new Token { type = Token.Type.NULL };
|
|||
|
return token;
|
|||
|
}
|
|||
|
|
|||
|
public bool Accept(params Token.Type[] types)
|
|||
|
{
|
|||
|
Token token = Current();
|
|||
|
|
|||
|
foreach (Token.Type type in types) {
|
|||
|
if (token.type == type)
|
|||
|
{
|
|||
|
return true;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return false;
|
|||
|
}
|
|||
|
|
|||
|
public Token Expect(params Token.Type[] types)
|
|||
|
{
|
|||
|
Token token = Pop();
|
|||
|
|
|||
|
foreach (Token.Type type in types)
|
|||
|
{
|
|||
|
if (token.type == type)
|
|||
|
{
|
|||
|
return token;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (types.Length == 1)
|
|||
|
{
|
|||
|
throw new LexingException($"Encountered {token.type} when expecting {types[0]}");
|
|||
|
}
|
|||
|
else if (types.Length == 2)
|
|||
|
{
|
|||
|
throw new LexingException($"Encountered {token.type} when expecting one of {types[0]} or {types[1]}");
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
string list = string.Join(", ", types.Take(types.Length - 1));
|
|||
|
throw new LexingException($"Encountered {token.type} when expecting one of {list}, or {types[types.Length - 1]}");
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public IEnumerable<Token> TokenStream()
|
|||
|
{
|
|||
|
while (!EOF())
|
|||
|
{
|
|||
|
yield return Pop();
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public void Print()
|
|||
|
{
|
|||
|
foreach (Token token in TokenStream())
|
|||
|
Console.WriteLine(token);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|