mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
420 lines
13 KiB
Python
420 lines
13 KiB
Python
from ast import Expression
|
|
import copy
|
|
from .ast import Assignment, BinaryExpression, Block, BoolLiteral, BreakStatement, CallExpression, EmptyStatement, FloatLiteral, Fun, Identifier, IfStatement, IntLiteral, Program, ReturnStatement, Statement, StringLiteral, UnaryExpression, VariableDeclaration, WhileStatement
|
|
from .tokenizer import Token, TokenType, Tokenizer
|
|
|
|
unary_prev_statement = [
|
|
TokenType.COMMENTS,
|
|
TokenType.LEFT_PAREN,
|
|
TokenType.COMMA,
|
|
TokenType.LEFT_BRACE,
|
|
TokenType.RIGHT_BRACE,
|
|
TokenType.SEMICOLON,
|
|
TokenType.LET,
|
|
TokenType.RETURN,
|
|
TokenType.IF,
|
|
TokenType.ELSE,
|
|
TokenType.WHILE,
|
|
TokenType.FOR,
|
|
TokenType.LOGICAL_OPERATOR,
|
|
TokenType.NOT,
|
|
TokenType.ASSIGNMENT,
|
|
TokenType.MULTIPLICATIVE_OPERATOR,
|
|
TokenType.ADDITIVE_OPERATOR,
|
|
TokenType.ARROW,
|
|
]
|
|
|
|
unary_end_statement = [
|
|
TokenType.MULTIPLICATIVE_OPERATOR,
|
|
TokenType.ADDITIVE_OPERATOR,
|
|
TokenType.LOGICAL_OPERATOR,
|
|
]
|
|
|
|
end_statement = [
|
|
TokenType.SEMICOLON,
|
|
TokenType.COMMA,
|
|
TokenType.ARROW,
|
|
TokenType.RETURN,
|
|
TokenType.LET,
|
|
TokenType.IF,
|
|
TokenType.ELSE,
|
|
TokenType.WHILE,
|
|
TokenType.FOR,
|
|
TokenType.ASSIGNMENT,
|
|
TokenType.RIGHT_BRACE,
|
|
TokenType.LEFT_BRACE,
|
|
]
|
|
|
|
def program_parser(tkr: Tokenizer):
|
|
statements = list[Statement]()
|
|
count = 0
|
|
while True:
|
|
if tkr.token() is None:
|
|
break
|
|
if tkr.token().type == TokenType.SEMICOLON:
|
|
tkr.next()
|
|
continue
|
|
statement = statement_parser(tkr)
|
|
statements.append(statement)
|
|
count += 1
|
|
return Program(statements)
|
|
|
|
def if_parser(tkr: Tokenizer):
|
|
tkr.eat(TokenType.IF)
|
|
condition = ExpressionParser(tkr).parse()
|
|
block = block_statement(tkr)
|
|
if tkr.type_is(TokenType.ELSE):
|
|
tkr.eat(TokenType.ELSE)
|
|
if tkr.type_is(TokenType.IF):
|
|
print("else if")
|
|
return IfStatement(condition, block, Block([if_parser(tkr)]))
|
|
return IfStatement(condition, block, block_statement(tkr))
|
|
return IfStatement(condition, block, Block([]))
|
|
|
|
def while_parser(tkr: Tokenizer):
|
|
tkr.eat(TokenType.WHILE)
|
|
condition = ExpressionParser(tkr).parse()
|
|
block = block_statement(tkr)
|
|
return WhileStatement(condition, block)
|
|
|
|
|
|
def identifier(tkr: Tokenizer):
|
|
token = tkr.token()
|
|
if token.type != TokenType.IDENTIFIER:
|
|
raise Exception("Invalid identifier", token)
|
|
tkr.next()
|
|
return Identifier(token.value)
|
|
|
|
def block_statement(tkr: Tokenizer):
|
|
tkr.eat(TokenType.LEFT_BRACE)
|
|
statements = list[Statement]()
|
|
while True:
|
|
if tkr.token() is None:
|
|
raise Exception("Invalid block expression", tkr.token())
|
|
if tkr.tokenType() == TokenType.RIGHT_BRACE:
|
|
tkr.eat(TokenType.RIGHT_BRACE)
|
|
break
|
|
if tkr.tokenType() == TokenType.SEMICOLON:
|
|
tkr.next()
|
|
continue
|
|
statements.append(statement_parser(tkr))
|
|
return Block(statements)
|
|
|
|
|
|
def return_parser(tkr: Tokenizer):
|
|
tkr.eat(TokenType.RETURN)
|
|
return ReturnStatement(ExpressionParser(tkr).parse())
|
|
|
|
def statement_parser(tkr: Tokenizer):
|
|
token = tkr.token()
|
|
if token is None:
|
|
return EmptyStatement()
|
|
if token.type == TokenType.SEMICOLON:
|
|
tkr.next()
|
|
return EmptyStatement()
|
|
if token.type == TokenType.LET:
|
|
return let_expression_parser(tkr)
|
|
if _try_assignment_expression(tkr):
|
|
return assignment_parser(tkr)
|
|
if token.type == TokenType.IF:
|
|
return if_parser(tkr)
|
|
if token.type == TokenType.WHILE:
|
|
return while_parser(tkr)
|
|
if token.type == TokenType.RETURN:
|
|
return return_parser(tkr)
|
|
if token.type == TokenType.BREAK:
|
|
tkr.eat(TokenType.BREAK)
|
|
return BreakStatement()
|
|
return ExpressionParser(tkr).parse()
|
|
|
|
def assignment_parser(tkr: Tokenizer):
|
|
id = identifier(tkr)
|
|
tkr.eat(TokenType.ASSIGNMENT)
|
|
return Assignment(id, ExpressionParser(tkr).parse())
|
|
|
|
def let_expression_parser(tkr: Tokenizer):
|
|
tkr.eat(TokenType.LET)
|
|
token = tkr.token()
|
|
if token.type != TokenType.IDENTIFIER:
|
|
raise Exception("Invalid let statement", token)
|
|
id = identifier(tkr)
|
|
token = tkr.token()
|
|
if token is None:
|
|
raise Exception("Invalid let statement", token)
|
|
if token.type != TokenType.ASSIGNMENT:
|
|
raise Exception("Invalid let statement", token.type)
|
|
tkr.next()
|
|
ast = ExpressionParser(tkr).parse()
|
|
return VariableDeclaration(id, ast)
|
|
|
|
class ExpressionParser:
|
|
|
|
def __init__(self, tkr: Tokenizer):
|
|
self.stack = list[Expression | Token]()
|
|
self.operator_stack = list[Token]()
|
|
self.tkr = tkr
|
|
|
|
def parse(self, unary = False):
|
|
while not self.is_end():
|
|
token = self.tkr.token()
|
|
if unary and not self.is_unary() and token.type in unary_end_statement:
|
|
break
|
|
if self.is_unary():
|
|
self.push_stack(self.unary_expression_parser())
|
|
elif self._try_fun_expression():
|
|
return self.fun_expression()
|
|
# -(hello x 123) // !(true and false)
|
|
elif unary and token.type == TokenType.LEFT_PAREN:
|
|
self.tkr.next()
|
|
self.push_stack(ExpressionParser(self.tkr).parse())
|
|
elif self._is_operator(token) or token.type in [TokenType.LEFT_PAREN, TokenType.RIGHT_PAREN ]:
|
|
self.push_operator_stack(token)
|
|
self.tkr.next()
|
|
else:
|
|
self.push_stack(self.expression_parser())
|
|
self.pop_all()
|
|
return self.expression()
|
|
|
|
def expression(self):
|
|
if len(self.stack) == 0:
|
|
return EmptyStatement()
|
|
if len(self.stack) == 1:
|
|
return self.stack[0]
|
|
return expression_list_to_binary(self.stack)
|
|
|
|
def expression_parser(self):
|
|
token = self.tkr.token()
|
|
if token is None:
|
|
return EmptyStatement()
|
|
expression = None
|
|
if token.type == TokenType.INT:
|
|
self.tkr.eat(TokenType.INT)
|
|
expression = IntLiteral(int(token.value))
|
|
elif token.type == TokenType.FLOAT:
|
|
self.tkr.eat(TokenType.FLOAT)
|
|
expression = FloatLiteral(float(token.value))
|
|
elif token.type == TokenType.STRING:
|
|
self.tkr.eat(TokenType.STRING)
|
|
expression = StringLiteral(token.value[1:-1])
|
|
elif token.type == TokenType.BOOL:
|
|
self.tkr.eat(TokenType.BOOL)
|
|
expression = BoolLiteral(token.value == "true")
|
|
elif token.type == TokenType.IDENTIFIER:
|
|
expression = self.identifier_or_fun_call_parser()
|
|
return expression
|
|
|
|
def _try_fun_expression(self):
|
|
return _try_fun_expression(self.tkr)
|
|
|
|
def fun_expression(self):
|
|
tkr = self.tkr
|
|
tkr.next()
|
|
args = list[Identifier]()
|
|
token_type = tkr.tokenType()
|
|
while token_type != TokenType.RIGHT_PAREN:
|
|
args.append(Identifier(tkr.token().value))
|
|
tkr.next()
|
|
token_type = tkr.tokenType()
|
|
if token_type == TokenType.RIGHT_PAREN:
|
|
break
|
|
tkr.next()
|
|
token_type = tkr.tokenType()
|
|
token_type = tkr.next_token_type()
|
|
if token_type != TokenType.ARROW:
|
|
raise Exception("Invalid fun_expression", tkr.token())
|
|
tkr.next()
|
|
return Fun(args, block_statement(tkr))
|
|
|
|
def push_stack(self, expression: Expression | Token):
|
|
self.stack.append(expression)
|
|
|
|
def _pop_by_right_paren(self):
|
|
token = self.operator_stack.pop()
|
|
if token.type != TokenType.LEFT_PAREN:
|
|
self.push_stack(token)
|
|
self._pop_by_right_paren()
|
|
|
|
def pop(self):
|
|
self.push_stack(self.operator_stack.pop())
|
|
|
|
def pop_all(self):
|
|
while len(self.operator_stack) > 0:
|
|
self.pop()
|
|
|
|
def push_operator_stack(self, token: Token):
|
|
if len(self.operator_stack) == 0:
|
|
self.operator_stack.append(token)
|
|
return
|
|
if token.type == TokenType.LEFT_PAREN:
|
|
self.operator_stack.append(token)
|
|
return
|
|
if token.type == TokenType.RIGHT_PAREN:
|
|
self._pop_by_right_paren()
|
|
return
|
|
top_operator = self.operator_stack[-1]
|
|
if top_operator.type == TokenType.LEFT_PAREN:
|
|
self.operator_stack.append(token)
|
|
return
|
|
# priority is in descending order
|
|
if self._priority(token) >= self._priority(top_operator):
|
|
self.pop()
|
|
self.push_operator_stack(token)
|
|
return
|
|
self.operator_stack.append(token)
|
|
|
|
def unary_expression_parser(self):
|
|
token = self.tkr.token()
|
|
self.tkr.next()
|
|
return UnaryExpression(token.value, ExpressionParser(self.tkr).parse(True))
|
|
|
|
def identifier_or_fun_call_parser(self):
|
|
id = self.identifier()
|
|
tokenType = self.tkr.tokenType()
|
|
if tokenType == TokenType.LEFT_PAREN:
|
|
return self.fun_call_parser(id)
|
|
return id
|
|
|
|
def fun_call_parser(self, id: Identifier):
|
|
self.tkr.eat(TokenType.LEFT_PAREN)
|
|
args = list[Expression]()
|
|
while self.tkr.tokenType() != TokenType.RIGHT_PAREN:
|
|
args.append(ExpressionParser(self.tkr).parse())
|
|
if self.tkr.tokenType() == TokenType.COMMA:
|
|
self.tkr.eat(TokenType.COMMA)
|
|
self.tkr.eat(TokenType.RIGHT_PAREN)
|
|
return CallExpression(id, args)
|
|
|
|
def identifier(self):
|
|
return identifier(self.tkr)
|
|
|
|
def is_unary(self):
|
|
token = self.tkr.token()
|
|
if not self.unary_operator(token):
|
|
return False
|
|
if token.type == TokenType.NOT:
|
|
return True
|
|
prev_token = self.tkr.get_prev()
|
|
if prev_token is None:
|
|
return True
|
|
if prev_token.type == TokenType.LEFT_PAREN:
|
|
return True
|
|
if prev_token.type in unary_prev_statement:
|
|
return True
|
|
return False
|
|
|
|
def unary_operator(self, token: Token):
|
|
if token is None:
|
|
return False
|
|
return token.value in ["+", "-", "!"]
|
|
|
|
def _has_brackets(self):
|
|
return TokenType.LEFT_PAREN in map(lambda x: x.type, self.operator_stack)
|
|
|
|
def is_end(self):
|
|
token = self.tkr.token()
|
|
if token is None:
|
|
return True
|
|
if token.type == TokenType.SEMICOLON:
|
|
return True
|
|
if not self._has_brackets() and token.type == TokenType.RIGHT_PAREN:
|
|
return True
|
|
if token.type in end_statement:
|
|
return True
|
|
return False
|
|
|
|
def _is_operator(self, token: Token):
|
|
if token is None:
|
|
return False
|
|
return token.type in [TokenType.ADDITIVE_OPERATOR, TokenType.MULTIPLICATIVE_OPERATOR, TokenType.LOGICAL_OPERATOR, TokenType.NOT]
|
|
|
|
def _debug_print_tokens(self):
|
|
print("operator stack:----")
|
|
for token in self.operator_stack:
|
|
print(token)
|
|
|
|
def _debug_print_stack(self):
|
|
print("stack:----")
|
|
for expression in self.stack:
|
|
print(expression)
|
|
|
|
def _priority(self, token: Token):
|
|
return _priority(token.value)
|
|
|
|
def expression_list_to_binary(expression_list: list[Expression | Token], stack: list = None):
|
|
if stack is None:
|
|
stack = list()
|
|
if len(expression_list) == 0:
|
|
return stack[0]
|
|
top = expression_list[0]
|
|
if isinstance(top, Token):
|
|
right = stack.pop()
|
|
left = stack.pop()
|
|
return expression_list_to_binary(expression_list[1:], stack + [BinaryExpression(left, top.value, right)])
|
|
else:
|
|
stack.append(top)
|
|
return expression_list_to_binary(expression_list[1:], stack)
|
|
|
|
def _priority(operator: str):
|
|
priority = 0
|
|
if operator in ["*", "/", "%"]:
|
|
return priority
|
|
priority += 1
|
|
if operator in ["+", "-"]:
|
|
return priority
|
|
priority += 1
|
|
if operator in ["<", ">", "<=", ">="]:
|
|
return priority
|
|
priority += 1
|
|
if operator in ["==", "!="]:
|
|
return priority
|
|
priority += 1
|
|
if operator in ["&&"]:
|
|
return priority
|
|
priority += 1
|
|
if operator in ["||"]:
|
|
return priority
|
|
priority += 1
|
|
return priority
|
|
|
|
def _try_assignment_expression(tkr: Tokenizer):
|
|
tkr = copy.deepcopy(tkr)
|
|
token = tkr.token()
|
|
if token is None:
|
|
return False
|
|
if token.type != TokenType.IDENTIFIER:
|
|
return False
|
|
tkr.next()
|
|
token = tkr.token()
|
|
if token is None:
|
|
return False
|
|
if token.type != TokenType.ASSIGNMENT:
|
|
return False
|
|
return True
|
|
|
|
def _try_fun_expression(_tkr: Tokenizer):
|
|
tkr = copy.deepcopy(_tkr)
|
|
token = tkr.token()
|
|
if token is None:
|
|
return False
|
|
if token.type != TokenType.LEFT_PAREN:
|
|
return False
|
|
tkr.next()
|
|
token_type = tkr.tokenType()
|
|
while token_type != TokenType.RIGHT_PAREN:
|
|
if token_type == TokenType.IDENTIFIER:
|
|
tkr.next()
|
|
token_type = tkr.tokenType()
|
|
if token_type == TokenType.RIGHT_PAREN:
|
|
break
|
|
if token_type != TokenType.COMMA:
|
|
return False
|
|
tkr.next()
|
|
token_type = tkr.tokenType()
|
|
if token_type == TokenType.RIGHT_PAREN:
|
|
return False
|
|
else:
|
|
return False
|
|
token_type = tkr.next_token_type()
|
|
if token_type != TokenType.ARROW:
|
|
return False
|
|
return True |