ShintenScript/Lexer.cs

229 lines
6.7 KiB
C#
Raw Permalink Normal View History

2023-01-27 11:24:58 +00:00
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ShintenScript
{
public class Lexer
{
static readonly (string, Token.Type)[] symbolTokens = new (string, Token.Type)[]
{
("==", Token.Type.EQ),
(">=", Token.Type.GE),
("<=", Token.Type.LE),
("!=", Token.Type.NE),
("+=", Token.Type.PLUSASSIGN),
("-=", Token.Type.MINUSASSIGN),
("*=", Token.Type.ASTERISKASSIGN),
("/=", Token.Type.SLASHASSIGN),
2023-02-02 14:05:53 +00:00
(";", Token.Type.SEMICOLON),
2023-01-27 11:24:58 +00:00
("+", Token.Type.PLUS),
("-", Token.Type.MINUS),
("*", Token.Type.ASTERISK),
("/", Token.Type.SLASH),
("=", Token.Type.ASSIGN),
("(", Token.Type.LPAREN),
(")", Token.Type.RPAREN),
("{", Token.Type.LBRACE),
("}", Token.Type.RBRACE),
(">", Token.Type.GT),
("<", Token.Type.LT),
};
static readonly HashSet<string> keywords = new HashSet<string>
{
"if", "while", "fn"
};
string code;
int index = 0;
public Lexer(string code_)
{
if (code_ == null)
throw new ArgumentNullException("code");
code = code_;
}
Token current = new Token { type = Token.Type.NULL };
2023-02-02 14:05:53 +00:00
public virtual Token ParseOne()
2023-01-27 11:24:58 +00:00
{
if (EOF())
return new Token { type = Token.Type.EOF };
while (char.IsWhiteSpace(code[index]))
{
index++;
if (EOF())
return new Token { type = Token.Type.EOF };
}
foreach ((string symbol, Token.Type token) in symbolTokens)
{
if (code.Substring(index, symbol.Length) == symbol)
{
index += symbol.Length;
return new Token { type = token };
}
}
if (char.IsLetter(code[index]))
{
StringBuilder identifierBuilder = new StringBuilder();
2023-02-02 14:05:53 +00:00
while (!EOF() && char.IsLetterOrDigit(code[index]))
2023-01-27 11:24:58 +00:00
{
identifierBuilder.Append(code[index]);
index++;
}
string identifier = identifierBuilder.ToString();
if (keywords.Contains(identifier))
{
Enum.TryParse(identifier, true, out Token.Type type);
return new Token { type = type };
}
return new Token { type = Token.Type.IDENTIFIER, data = identifier };
}
if (char.IsNumber(code[index]))
{
StringBuilder numberBuilder = new StringBuilder();
2023-02-02 14:05:53 +00:00
bool seenLetter = false;
2023-01-27 11:24:58 +00:00
2023-02-02 14:05:53 +00:00
while (!EOF() && (char.IsLetterOrDigit(code[index]) || code[index] == '.' || (!seenLetter && code[index] == '-' && (code[index - 1] == 'e' || code[index - 1] == 'E'))))
2023-01-27 11:24:58 +00:00
{
numberBuilder.Append(code[index]);
2023-02-02 14:05:53 +00:00
if (char.IsLetter(code[index]) && code[index] != 'e' && code[index] != 'E')
seenLetter = true;
2023-01-27 11:24:58 +00:00
index++;
}
string numberString = numberBuilder.ToString();
float number;
if (numberString.Contains("."))
{
if (!float.TryParse(numberString, NumberStyles.Float, CultureInfo.InvariantCulture, out number))
{
throw new LexingException($"Invalid float literal \"{numberString}\"");
}
}
else
{
if (
(numberString.ToLowerInvariant().StartsWith("0x") && long.TryParse(numberString.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out long integer))
|| long.TryParse(numberString, NumberStyles.Number, CultureInfo.InvariantCulture, out integer))
{
number = (float)integer;
}
else
{
throw new LexingException($"Invalid integer literal \"{numberString}\"");
}
}
return new Token { type = Token.Type.NUMBER, data = number };
}
throw new LexingException($"Encountered unexpected character '{code[index]}'");
}
bool EOF()
{
return index >= code.Length;
}
public Token Current()
{
if (current.Null())
{
return current = ParseOne();
}
else
{
return current;
}
}
public Token Pop()
{
Token token = Current();
current = new Token { type = Token.Type.NULL };
return token;
}
public bool Accept(params Token.Type[] types)
{
Token token = Current();
foreach (Token.Type type in types) {
if (token.type == type)
{
return true;
}
}
return false;
}
public Token Expect(params Token.Type[] types)
{
Token token = Pop();
foreach (Token.Type type in types)
{
if (token.type == type)
{
return token;
}
}
if (types.Length == 1)
{
2023-02-02 14:05:53 +00:00
throw new ParsingException($"Encountered {token.type} when expecting {types[0]}");
2023-01-27 11:24:58 +00:00
}
else if (types.Length == 2)
{
2023-02-02 14:05:53 +00:00
throw new ParsingException($"Encountered {token.type} when expecting one of {types[0]} or {types[1]}");
2023-01-27 11:24:58 +00:00
}
else
{
string list = string.Join(", ", types.Take(types.Length - 1));
2023-02-02 14:05:53 +00:00
throw new ParsingException($"Encountered {token.type} when expecting one of {list}, or {types[types.Length - 1]}");
2023-01-27 11:24:58 +00:00
}
}
public IEnumerable<Token> TokenStream()
{
2023-02-02 14:05:53 +00:00
while (true)
2023-01-27 11:24:58 +00:00
{
2023-02-02 14:05:53 +00:00
Token token = Pop();
if (token.type == Token.Type.EOF)
break;
yield return token;
2023-01-27 11:24:58 +00:00
}
}
public void Print()
{
foreach (Token token in TokenStream())
Console.WriteLine(token);
}
}
}