From 49659dcc138cf6654b8e93e8c9d3603f17a19ff4 Mon Sep 17 00:00:00 2001 From: Dan Chen Date: Thu, 21 Mar 2024 15:13:32 +0800 Subject: [PATCH] feat: runtime --- runtime/ast/parser.py | 164 +++++++++++++++++++++++++++++++++++++ runtime/ast/runtime.py | 93 +++++++++++++++++++++ runtime/ast/tokenizer.py | 78 ++++++++++++++++++ src/asr/asr.py | 2 +- src/blackbox/blackbox.py | 4 +- src/blackbox/calculator.py | 2 +- 6 files changed, 339 insertions(+), 4 deletions(-) create mode 100644 runtime/ast/parser.py create mode 100644 runtime/ast/runtime.py create mode 100644 runtime/ast/tokenizer.py diff --git a/runtime/ast/parser.py b/runtime/ast/parser.py new file mode 100644 index 0000000..2c2b68e --- /dev/null +++ b/runtime/ast/parser.py @@ -0,0 +1,164 @@ + + + +from runtime.ast.tokenizer import Tokenizer + + +class Parser: + + def __init__(self) -> None: + self.script = "" + self.tokenizer = Tokenizer() + self.current_token = None + + def parse(self, script: str): + self.script = script + self.tokenizer.init(script) + self.current_token = self.tokenizer.get_next_token() + return self.program() + + def program(self): + return { + "type": 'Program', + "body": { + "type": "BlockStatement", + "body": self.statement_list() + }, + } + + def return_statement(self): + self.eat('return') + return { + "type": 'ReturnStatement', + "value": self.expression_statement(), + } + + def statement_list(self): + statment_list = [self.statement()] + while self.current_token != None: + statment_list.append(self.statement()) + return statment_list + + def statement(self): + if self.token_type() == "let": + return self.variable_statement() + return self.expression_statement() + + def block_statement(self): + if self.token_type() != "{": + self.eat('{') + body = self.statement_list() + if self.token_type() != "}": + self.eat('}') + return { + "type": 'BlockStatement', + "body": body, + } + + def expression_statement(self): + if self.token_type() == "return": + return self.return_statement() + if self._is_literal(): + return self.literal() + if self.token_type() == "IDENTIFIER": + identifier = self.identifier() + if self.token_type() == "(": + return self.call_expression(identifier) + if self.token_type() == "SIMPLE_ASSIGN": + return self.assignment_expression(identifier) + return identifier + raise Exception("Unexpected token: " + self.token_type()) + + def assignment_expression(self,identifier): + self.eat('SIMPLE_ASSIGN') + return { + "type": 'AssignmentExpression', + "identifier": identifier, + "value": self.statement(), + } + + def call_expression(self, identifier): + self.eat('(') + arguments = self.argument_list() + self.eat(')') + return { + "type": 'CallExpression', + "arguments": arguments, + "callee": identifier, + } + + def token_type(self): + if self.current_token == None: + return None + return self.current_token["type"] + + def _is_literal(self): + return self.current_token["type"] in ["NUMBER", "STRING", "FLOAT"] + + # variable + def variable_statement(self): + self.eat('let') + identifier = self.identifier() + self.eat('SIMPLE_ASSIGN') + return { + "type": 'VariableDeclaration', + "identifier": identifier, + "value": self.statement(), + } + + def eat(self, tokenType): + token = self.current_token + if token == None: + raise Exception("Unexpected EOF") + + if token["type"] != tokenType: + raise Exception("Unexpected token: " + token["type"]) + + self.current_token = self.tokenizer.get_next_token() + return token + + def identifier(self): + name = self.eat('IDENTIFIER') + return { + "type": 'Identifier', + "name": name["value"], + } + + def literal(self): + token_type = self.current_token["type"] + if token_type == "NUMBER": + return self.numberic_literal() + if token_type == "STRING": + return self.string_literal() + if token_type == "FLOAT": + return self.float_literal() + raise Exception("Unexpected token: " + token_type) + + def numberic_literal(self): + token = self.eat('NUMBER') + return { + "type": 'NumericLiteral', + "value": token["value"], + } + + def string_literal(self): + token = self.eat('STRING') + return { + "type": 'StringLiteral', + "value": token["value"][1:-1], + } + + def float_literal(self): + token = self.eat('FLOAT') + return { + "type": 'FloatLiteral', + "value": token["value"], + } + + def argument_list(self): + args = [] + while self.token_type() != ")": + args.append(self.statement()) + if self.token_type() == ",": + self.eat(',') + return args \ No newline at end of file diff --git a/runtime/ast/runtime.py b/runtime/ast/runtime.py new file mode 100644 index 0000000..bd82f5d --- /dev/null +++ b/runtime/ast/runtime.py @@ -0,0 +1,93 @@ +class Runtime: + + def __init__(self, records={}, parent=None): + self.parent = parent + self.records = records + + def run(self, ast): + if ast["type"] == "Program": + return self.program(ast) + + def program(self, ast): + return self.block(ast.get("body")) + + def block(self, ast): + for statement in ast.get("body"): + s = self.switch(statement) + if s != None: + return s + + def switch(self, ast): + t = ast["type"] + if t == "VariableDeclaration": + self.variable_declaration(ast) + if t == "AssignmentExpression": + self.assignment_expression(ast) + if t == "CallExpression": + self.call_function(ast) + if t == "ReturnStatement": + return self.exec_return(ast) + + def assignment_expression(self, ast): + id = ast.get("identifier").get("name") + v = ast.get("value") + l = self.literal(v) + self.records[id] = self.unquote(v) + + def _is_call_function(self, ast): + return ast["type"] == "CallExpression" + + def call_function(self,ast): + id = ast.get("callee").get("name") + args = ast.get("arguments") + unquoted_args = [] + for arg in args: + unquoted_args.append(self.unquote(arg)) + fu = self.records.get(id) + return fu(*unquoted_args) + + def unquote(self, ast): + if self._is_identifier(ast): + return self.records.get(ast.get("name")) + if self._is_literal(ast): + return self.literal(ast) + if self._is_call_function(ast): + return self.call_function(ast) + + def variable_declaration(self, ast): + id = ast.get("identifier").get("name") + v = ast.get("value") + if self._is_literal(v): + l = self.literal(v) + if l != None: + self.records[id] = l + else: + raise Exception("Unknown literal type: " + v.get("type")) + if self._is_identifier(v): + self.records[id] = self.records.get(v.get("name")) + + def literal(self, ast): + if ast.get("type") == "StringLiteral": + return ast.get("value") + elif ast.get("type") == "NumericLiteral": + return int(ast.get("value")) + elif ast.get("type") == "FloatLiteral": + return float(ast.get("value")) + + def _is_identifier(self, ast): + return ast["type"] == "Identifier" + + def _is_literal(self, ast): + return ast["type"] in ["NumericLiteral", "StringLiteral", "FloatLiteral"] + + def exec_return(self, ast): + v = ast.get("value") + if self._is_literal(v): + return self.literal(v) + if self._is_identifier(v): + return self.records.get(v.get("name")) + if self._is_call_function(v): + return self.call_function(v) + + def debug_print_records(self): + print(self.records) \ No newline at end of file diff --git a/runtime/ast/tokenizer.py b/runtime/ast/tokenizer.py new file mode 100644 index 0000000..2ff6aae --- /dev/null +++ b/runtime/ast/tokenizer.py @@ -0,0 +1,78 @@ +import re + +specs = ( + # Space: + (re.compile(r"^\s"), None), + + # Comments: + (re.compile(r"^//.*"), None), + + # Keywords: + (re.compile(r"^\blet\b"), "let"), + (re.compile(r"^\breturn\b"), "return"), + (re.compile(r"^;"), ";"), + + # Floats: + (re.compile(r"^[-+]?[0-9]+\.[0-9]+"), "FLOAT"), + + # Numbers: + (re.compile(r"^[-+]?[0-9]+"), "NUMBER"), + + # Identifiers: + (re.compile(r"^\w+"), "IDENTIFIER"), + + # Assignment: + (re.compile(r"^="), "SIMPLE_ASSIGN"), + + # Double-quoted strings + (re.compile(r"^\"[^\"]*\""), "STRING"), + + # Symbols: + (re.compile(r"^\("), "("), + (re.compile(r"^\)"), ")"), + (re.compile(r"^\,"), ","), + (re.compile(r"^\{"), "{"), + (re.compile(r"^\}"), "}"), + +) + +class Tokenizer: + + def __init__(self): + self.script = "" + self.cursor = 0 + + def init(self, script: str): + self.script = script + self.cursor = 0 + + def isEOF(self): + return self.cursor == len(self.script) + + def has_more_tokens(self): + return self.cursor < len(self.script) + + def get_next_token(self): + if not self.has_more_tokens(): + return None + _string = self.script[self.cursor:] + for spec in specs: + tokenValue = self.match(spec[0], _string) + if tokenValue == None: + continue + + if (spec[1] == None): + return self.get_next_token() + + return { + "type": spec[1], + "value": tokenValue, + } + raise Exception("Unknown token: " + _string[0]) + + def match(self, reg: re, _script): + matched = reg.search(_script) + if matched == None: + return None + self.cursor += matched.span(0)[1] + return matched[0] diff --git a/src/asr/asr.py b/src/asr/asr.py index 32b9541..dbcd0a8 100644 --- a/src/asr/asr.py +++ b/src/asr/asr.py @@ -15,7 +15,7 @@ class ASR(Blackbox): self.paraformer = RapidParaformer(config) super().__init__(config) - async def processing(self, data: any): + async def processing(self, data: bytes): results = self.paraformer([BytesIO(data)]) if len(results) == 0: return None diff --git a/src/blackbox/blackbox.py b/src/blackbox/blackbox.py index 8e07a28..c20b483 100644 --- a/src/blackbox/blackbox.py +++ b/src/blackbox/blackbox.py @@ -18,14 +18,14 @@ class Blackbox(ABC): Output same as above. """ @abstractmethod - async def processing(self, data: any) -> any: + async def processing(self, *args, **kwargs) -> any: pass """ valid method should return True if the data is valid and False if the data is invalid """ @abstractmethod - def valid(self, data: any) -> bool: + def valid(self, *args, **kwargs) -> bool: pass """ diff --git a/src/blackbox/calculator.py b/src/blackbox/calculator.py index cd1e6c6..c7a39c7 100644 --- a/src/blackbox/calculator.py +++ b/src/blackbox/calculator.py @@ -10,7 +10,7 @@ class Calculator(Blackbox): def valid(self, data: any) -> bool: return isinstance(data, dict) and "op" in data and "left" in data and "right" in data - def processing(self, data: dict) -> any: + def processing(self, data: dict) -> int | float: if not self.valid(data): raise ValueError("Invalid data") a = data["left"]