Files
jarvis-models/src/dotchain/runtime/interpreter.py
2024-04-08 10:29:50 +08:00

420 lines
13 KiB
Python

from ast import Expression
import copy
from .ast import Assignment, BinaryExpression, Block, BoolLiteral, BreakStatement, CallExpression, EmptyStatement, FloatLiteral, Fun, Identifier, IfStatement, IntLiteral, Program, ReturnStatement, Statement, StringLiteral, UnaryExpression, VariableDeclaration, WhileStatement
from .tokenizer import Token, TokenType, Tokenizer
unary_prev_statement = [
TokenType.COMMENTS,
TokenType.LEFT_PAREN,
TokenType.COMMA,
TokenType.LEFT_BRACE,
TokenType.RIGHT_BRACE,
TokenType.SEMICOLON,
TokenType.LET,
TokenType.RETURN,
TokenType.IF,
TokenType.ELSE,
TokenType.WHILE,
TokenType.FOR,
TokenType.LOGICAL_OPERATOR,
TokenType.NOT,
TokenType.ASSIGNMENT,
TokenType.MULTIPLICATIVE_OPERATOR,
TokenType.ADDITIVE_OPERATOR,
TokenType.ARROW,
]
unary_end_statement = [
TokenType.MULTIPLICATIVE_OPERATOR,
TokenType.ADDITIVE_OPERATOR,
TokenType.LOGICAL_OPERATOR,
]
end_statement = [
TokenType.SEMICOLON,
TokenType.COMMA,
TokenType.ARROW,
TokenType.RETURN,
TokenType.LET,
TokenType.IF,
TokenType.ELSE,
TokenType.WHILE,
TokenType.FOR,
TokenType.ASSIGNMENT,
TokenType.RIGHT_BRACE,
TokenType.LEFT_BRACE,
]
def program_parser(tkr: Tokenizer):
statements = list[Statement]()
count = 0
while True:
if tkr.token() is None:
break
if tkr.token().type == TokenType.SEMICOLON:
tkr.next()
continue
statement = statement_parser(tkr)
statements.append(statement)
count += 1
return Program(statements)
def if_parser(tkr: Tokenizer):
tkr.eat(TokenType.IF)
condition = ExpressionParser(tkr).parse()
block = block_statement(tkr)
if tkr.type_is(TokenType.ELSE):
tkr.eat(TokenType.ELSE)
if tkr.type_is(TokenType.IF):
print("else if")
return IfStatement(condition, block, Block([if_parser(tkr)]))
return IfStatement(condition, block, block_statement(tkr))
return IfStatement(condition, block, Block([]))
def while_parser(tkr: Tokenizer):
tkr.eat(TokenType.WHILE)
condition = ExpressionParser(tkr).parse()
block = block_statement(tkr)
return WhileStatement(condition, block)
def identifier(tkr: Tokenizer):
token = tkr.token()
if token.type != TokenType.IDENTIFIER:
raise Exception("Invalid identifier", token)
tkr.next()
return Identifier(token.value)
def block_statement(tkr: Tokenizer):
tkr.eat(TokenType.LEFT_BRACE)
statements = list[Statement]()
while True:
if tkr.token() is None:
raise Exception("Invalid block expression", tkr.token())
if tkr.tokenType() == TokenType.RIGHT_BRACE:
tkr.eat(TokenType.RIGHT_BRACE)
break
if tkr.tokenType() == TokenType.SEMICOLON:
tkr.next()
continue
statements.append(statement_parser(tkr))
return Block(statements)
def return_parser(tkr: Tokenizer):
tkr.eat(TokenType.RETURN)
return ReturnStatement(ExpressionParser(tkr).parse())
def statement_parser(tkr: Tokenizer):
token = tkr.token()
if token is None:
return EmptyStatement()
if token.type == TokenType.SEMICOLON:
tkr.next()
return EmptyStatement()
if token.type == TokenType.LET:
return let_expression_parser(tkr)
if _try_assignment_expression(tkr):
return assignment_parser(tkr)
if token.type == TokenType.IF:
return if_parser(tkr)
if token.type == TokenType.WHILE:
return while_parser(tkr)
if token.type == TokenType.RETURN:
return return_parser(tkr)
if token.type == TokenType.BREAK:
tkr.eat(TokenType.BREAK)
return BreakStatement()
return ExpressionParser(tkr).parse()
def assignment_parser(tkr: Tokenizer):
id = identifier(tkr)
tkr.eat(TokenType.ASSIGNMENT)
return Assignment(id, ExpressionParser(tkr).parse())
def let_expression_parser(tkr: Tokenizer):
tkr.eat(TokenType.LET)
token = tkr.token()
if token.type != TokenType.IDENTIFIER:
raise Exception("Invalid let statement", token)
id = identifier(tkr)
token = tkr.token()
if token is None:
raise Exception("Invalid let statement", token)
if token.type != TokenType.ASSIGNMENT:
raise Exception("Invalid let statement", token.type)
tkr.next()
ast = ExpressionParser(tkr).parse()
return VariableDeclaration(id, ast)
class ExpressionParser:
def __init__(self, tkr: Tokenizer):
self.stack = list[Expression | Token]()
self.operator_stack = list[Token]()
self.tkr = tkr
def parse(self, unary = False):
while not self.is_end():
token = self.tkr.token()
if unary and not self.is_unary() and token.type in unary_end_statement:
break
if self.is_unary():
self.push_stack(self.unary_expression_parser())
elif self._try_fun_expression():
return self.fun_expression()
# -(hello x 123) // !(true and false)
elif unary and token.type == TokenType.LEFT_PAREN:
self.tkr.next()
self.push_stack(ExpressionParser(self.tkr).parse())
elif self._is_operator(token) or token.type in [TokenType.LEFT_PAREN, TokenType.RIGHT_PAREN ]:
self.push_operator_stack(token)
self.tkr.next()
else:
self.push_stack(self.expression_parser())
self.pop_all()
return self.expression()
def expression(self):
if len(self.stack) == 0:
return EmptyStatement()
if len(self.stack) == 1:
return self.stack[0]
return expression_list_to_binary(self.stack)
def expression_parser(self):
token = self.tkr.token()
if token is None:
return EmptyStatement()
expression = None
if token.type == TokenType.INT:
self.tkr.eat(TokenType.INT)
expression = IntLiteral(int(token.value))
elif token.type == TokenType.FLOAT:
self.tkr.eat(TokenType.FLOAT)
expression = FloatLiteral(float(token.value))
elif token.type == TokenType.STRING:
self.tkr.eat(TokenType.STRING)
expression = StringLiteral(token.value[1:-1])
elif token.type == TokenType.BOOL:
self.tkr.eat(TokenType.BOOL)
expression = BoolLiteral(token.value == "true")
elif token.type == TokenType.IDENTIFIER:
expression = self.identifier_or_fun_call_parser()
return expression
def _try_fun_expression(self):
return _try_fun_expression(self.tkr)
def fun_expression(self):
tkr = self.tkr
tkr.next()
args = list[Identifier]()
token_type = tkr.tokenType()
while token_type != TokenType.RIGHT_PAREN:
args.append(Identifier(tkr.token().value))
tkr.next()
token_type = tkr.tokenType()
if token_type == TokenType.RIGHT_PAREN:
break
tkr.next()
token_type = tkr.tokenType()
token_type = tkr.next_token_type()
if token_type != TokenType.ARROW:
raise Exception("Invalid fun_expression", tkr.token())
tkr.next()
return Fun(args, block_statement(tkr))
def push_stack(self, expression: Expression | Token):
self.stack.append(expression)
def _pop_by_right_paren(self):
token = self.operator_stack.pop()
if token.type != TokenType.LEFT_PAREN:
self.push_stack(token)
self._pop_by_right_paren()
def pop(self):
self.push_stack(self.operator_stack.pop())
def pop_all(self):
while len(self.operator_stack) > 0:
self.pop()
def push_operator_stack(self, token: Token):
if len(self.operator_stack) == 0:
self.operator_stack.append(token)
return
if token.type == TokenType.LEFT_PAREN:
self.operator_stack.append(token)
return
if token.type == TokenType.RIGHT_PAREN:
self._pop_by_right_paren()
return
top_operator = self.operator_stack[-1]
if top_operator.type == TokenType.LEFT_PAREN:
self.operator_stack.append(token)
return
# priority is in descending order
if self._priority(token) >= self._priority(top_operator):
self.pop()
self.push_operator_stack(token)
return
self.operator_stack.append(token)
def unary_expression_parser(self):
token = self.tkr.token()
self.tkr.next()
return UnaryExpression(token.value, ExpressionParser(self.tkr).parse(True))
def identifier_or_fun_call_parser(self):
id = self.identifier()
tokenType = self.tkr.tokenType()
if tokenType == TokenType.LEFT_PAREN:
return self.fun_call_parser(id)
return id
def fun_call_parser(self, id: Identifier):
self.tkr.eat(TokenType.LEFT_PAREN)
args = list[Expression]()
while self.tkr.tokenType() != TokenType.RIGHT_PAREN:
args.append(ExpressionParser(self.tkr).parse())
if self.tkr.tokenType() == TokenType.COMMA:
self.tkr.eat(TokenType.COMMA)
self.tkr.eat(TokenType.RIGHT_PAREN)
return CallExpression(id, args)
def identifier(self):
return identifier(self.tkr)
def is_unary(self):
token = self.tkr.token()
if not self.unary_operator(token):
return False
if token.type == TokenType.NOT:
return True
prev_token = self.tkr.get_prev()
if prev_token is None:
return True
if prev_token.type == TokenType.LEFT_PAREN:
return True
if prev_token.type in unary_prev_statement:
return True
return False
def unary_operator(self, token: Token):
if token is None:
return False
return token.value in ["+", "-", "!"]
def _has_brackets(self):
return TokenType.LEFT_PAREN in map(lambda x: x.type, self.operator_stack)
def is_end(self):
token = self.tkr.token()
if token is None:
return True
if token.type == TokenType.SEMICOLON:
return True
if not self._has_brackets() and token.type == TokenType.RIGHT_PAREN:
return True
if token.type in end_statement:
return True
return False
def _is_operator(self, token: Token):
if token is None:
return False
return token.type in [TokenType.ADDITIVE_OPERATOR, TokenType.MULTIPLICATIVE_OPERATOR, TokenType.LOGICAL_OPERATOR, TokenType.NOT]
def _debug_print_tokens(self):
print("operator stack:----")
for token in self.operator_stack:
print(token)
def _debug_print_stack(self):
print("stack:----")
for expression in self.stack:
print(expression)
def _priority(self, token: Token):
return _priority(token.value)
def expression_list_to_binary(expression_list: list[Expression | Token], stack: list = None):
if stack is None:
stack = list()
if len(expression_list) == 0:
return stack[0]
top = expression_list[0]
if isinstance(top, Token):
right = stack.pop()
left = stack.pop()
return expression_list_to_binary(expression_list[1:], stack + [BinaryExpression(left, top.value, right)])
else:
stack.append(top)
return expression_list_to_binary(expression_list[1:], stack)
def _priority(operator: str):
priority = 0
if operator in ["*", "/", "%"]:
return priority
priority += 1
if operator in ["+", "-"]:
return priority
priority += 1
if operator in ["<", ">", "<=", ">="]:
return priority
priority += 1
if operator in ["==", "!="]:
return priority
priority += 1
if operator in ["&&"]:
return priority
priority += 1
if operator in ["||"]:
return priority
priority += 1
return priority
def _try_assignment_expression(tkr: Tokenizer):
tkr = copy.deepcopy(tkr)
token = tkr.token()
if token is None:
return False
if token.type != TokenType.IDENTIFIER:
return False
tkr.next()
token = tkr.token()
if token is None:
return False
if token.type != TokenType.ASSIGNMENT:
return False
return True
def _try_fun_expression(_tkr: Tokenizer):
tkr = copy.deepcopy(_tkr)
token = tkr.token()
if token is None:
return False
if token.type != TokenType.LEFT_PAREN:
return False
tkr.next()
token_type = tkr.tokenType()
while token_type != TokenType.RIGHT_PAREN:
if token_type == TokenType.IDENTIFIER:
tkr.next()
token_type = tkr.tokenType()
if token_type == TokenType.RIGHT_PAREN:
break
if token_type != TokenType.COMMA:
return False
tkr.next()
token_type = tkr.tokenType()
if token_type == TokenType.RIGHT_PAREN:
return False
else:
return False
token_type = tkr.next_token_type()
if token_type != TokenType.ARROW:
return False
return True