diff --git a/install-dependencies.sh b/install-dependencies.sh deleted file mode 100644 index fce7084..0000000 --- a/install-dependencies.sh +++ /dev/null @@ -1,13 +0,0 @@ -# !/bin/bash -pip install filetype -pip install fastapi -pip install python-multipart -pip install "uvicorn[standard]" -pip install SpeechRecognition -pip install gTTS -pip install PyYAML -pip install injector -pip install landchain -pip install chromadb -pip install lagent -pip install sentence_transformers \ No newline at end of file diff --git a/src/blackbox/blackbox_factory.py b/src/blackbox/blackbox_factory.py index 8565eb1..1b43b50 100644 --- a/src/blackbox/blackbox_factory.py +++ b/src/blackbox/blackbox_factory.py @@ -102,6 +102,16 @@ def cosyvoicetts_loader(): from .cosyvoicetts import CosyVoiceTTS return Injector().get(CosyVoiceTTS) +@model_loader(lazy=blackboxConf.lazyloading) +def workflow_loader(): + from .workflow import Workflow + return Injector().get(Workflow) + +@model_loader(lazy=blackboxConf.lazyloading) +def sum_loader(): + from .sum import Sum + return Injector().get(Sum) + @singleton class BlackboxFactory: models = {} @@ -124,6 +134,8 @@ class BlackboxFactory: self.models["chat"] = chat_loader self.models["chat_llama"] = chat_llama_loader self.models["cosyvoicetts"] = cosyvoicetts_loader + self.models["workflow"] = workflow_loader + self.models["sum"] = sum_loader def __call__(self, *args, **kwargs): return self.processing(*args, **kwargs) diff --git a/src/blackbox/sum.py b/src/blackbox/sum.py new file mode 100644 index 0000000..8eae3c3 --- /dev/null +++ b/src/blackbox/sum.py @@ -0,0 +1,24 @@ +from .blackbox import Blackbox +from injector import singleton + +@singleton +class Sum(Blackbox): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + return self.processing(*args, **kwargs) + + def processing(self, *args, **kwargs): + total = 0 + for arg in args[0]: + total += arg + return total + + def valid(self, *args, **kwargs) -> bool: + return super().valid(*args, **kwargs) + + async def fast_api_handler(self, request): + json = await request.json() + return self.processing(json) \ No newline at end of file diff --git a/src/blackbox/workflow.py b/src/blackbox/workflow.py new file mode 100644 index 0000000..a5f574e --- /dev/null +++ b/src/blackbox/workflow.py @@ -0,0 +1,61 @@ + +from .sum import Sum +from fastapi import Request +from .blackbox import Blackbox +from injector import singleton, inject +from ..dotchain.runtime.interpreter import program_parser +from ..dotchain.runtime.runtime import Runtime +from ..dotchain.runtime.tokenizer import Tokenizer +from ..dotchain.runtime.ast import Literal + +@singleton +class Workflow(Blackbox): + + @inject + def __init__(self, sum: Sum): + self.sum_blackbox = sum + + def __call__(self, *args, **kwargs): + return self.processing(*args, **kwargs) + + def sum(self, *args, **kwargs): + return Literal(self.sum_blackbox.processing(*args, **kwargs)) + + async def processing(self, *args, **kwargs): + request = args[0] + json = await request.json() + result = None + + def set_result(r): + nonlocal result + result = r + + def get_value(d: dict, key): + value = d.get(key) + if value is dict: + return value + if value is list: + return value + return Literal(value) + + script = json["script"] + t = Tokenizer() + t.init(script) + runtime = Runtime( + context={"json": json}, + exteral_fun={ + "get_value": get_value, + "print": print, + "set_result": set_result, + "sum": self.sum, + } + ) + ast = program_parser(t) + ast.exec(runtime) + return result + + def valid(self, *args, **kwargs) -> bool: + return super().valid(*args, **kwargs) + + async def fast_api_handler(self, request: Request): + return await self.processing(request) \ No newline at end of file diff --git a/src/dotchain/README.md b/src/dotchain/README.md new file mode 100644 index 0000000..601720c --- /dev/null +++ b/src/dotchain/README.md @@ -0,0 +1,24 @@ +# Dotchain + +# 語法 +``` +// 註解 + +// 變量宣告 +let hello = 123 + +// 函數宣告 +let add = (left, right) => { + // 返回值 + return left + right +} + +``` +## Keywords +``` +let while if else true false +``` + +```bash +python -m unittest +``` \ No newline at end of file diff --git a/src/dotchain/main.dc b/src/dotchain/main.dc new file mode 100644 index 0000000..5100dd0 --- /dev/null +++ b/src/dotchain/main.dc @@ -0,0 +1,16 @@ +// 註解 + +// 變量宣告 +let hello = 123; + +// 函數宣告 +let add = (left, right) => { + // 返回值 + return left + right; +} + +// TODO 函數呼叫 +add(1,2); +add(3, add(1,2)); +// 以 . 呼叫函數,將以 . 前的值作為第一個參數 +// hello.add(2) == add(hello, 2); \ No newline at end of file diff --git a/src/dotchain/main.py b/src/dotchain/main.py new file mode 100644 index 0000000..01f0109 --- /dev/null +++ b/src/dotchain/main.py @@ -0,0 +1,16 @@ + +from runtime.interpreter import program_parser +from runtime.runtime import Runtime +from runtime.tokenizer import Tokenizer + +script = """ +print(hello); +""" + +if __name__ == "__main__": + + t = Tokenizer() + t.init(script) + runtime = Runtime(context={"hello": [1,2,3,4], "good": "123"} ,exteral_fun={"print": print}) + ast = program_parser(t) + result = ast.exec(runtime) \ No newline at end of file diff --git a/src/dotchain/runtime/__init__.py b/src/dotchain/runtime/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dotchain/runtime/ast.py b/src/dotchain/runtime/ast.py new file mode 100644 index 0000000..31d5871 --- /dev/null +++ b/src/dotchain/runtime/ast.py @@ -0,0 +1,388 @@ +from abc import ABC, abstractmethod + +from dataclasses import dataclass + +from .runtime import Runtime + +@dataclass +class ReturnValue(): + value: any + +class Node(ABC): + def type(self): + return self.__class__.__name__ + +@dataclass +class Statement(Node, ABC): + + @abstractmethod + def exec(self, runtime: Runtime): + print(self) + pass + + @abstractmethod + def dict(self): + pass + +@dataclass +class Expression(Node): + + @abstractmethod + def eval(self, runtime: Runtime): + pass + + @abstractmethod + def dict(self): + pass + +@dataclass +class Literal(Expression): + value: str | int | float | bool + + def __init__(self, value): + self.value = value + + def eval(self, runtime: Runtime): + return self.value + + def dict(self) -> dict: + return { + "type": "Literal", + "value": self.value + } + +@dataclass +class StringLiteral(Literal): + value: str + + def dict(self) -> dict: + return { + "type": "StringLiteral", + "value": self.value + } + +@dataclass +class IntLiteral(Literal): + value: int + + def dict(self): + return { + "type": "IntLiteral", + "value": self.value + } + +@dataclass +class FloatLiteral(Literal): + value: float + + def dict(self): + return { + "type": "FloatLiteral", + "value": self.value + } + +@dataclass +class BoolLiteral(Literal): + value: bool + + def dict(self): + return { + "type": "FloatLiteral", + "value": self.value + } + +@dataclass +class UnaryExpression(Expression): + operator: str + expression: Expression + def eval(self, runtime: Runtime): + if self.operator == "-": + return -self.expression.eval(runtime) + if self.operator == "!": + return not self.expression.eval(runtime) + return self.expression.eval(runtime) + + def dict(self): + return { + "type": "UnaryExpression", + "operator": self.operator, + "argument": self.expression.dict() + } + +@dataclass +class Program(Statement): + body: list[Statement] + + def exec(self, runtime: Runtime): + index = 0 + while index < len(self.body): + statement = self.body[index] + result = statement.exec(runtime) + if isinstance(result, ReturnValue): + return result + index += 1 + + def dict(self): + return { + "type": self.type(), + "body": [statement.dict() for statement in self.body] + } + +@dataclass +class Identifier(Expression): + name: str + def eval(self,runtime: Runtime): + return runtime.deep_get_value(self.name) + + def dict(self): + return { + "type": self.type(), + "name": self.name + } + +@dataclass +class Block(Statement): + body: list[Statement] + def exec(self, runtime: Runtime): + index = 0 + while index < len(self.body): + statement = self.body[index] + result = statement.exec(runtime) + if isinstance(result, ReturnValue): + return result + if isinstance(result, BreakStatement): + return result + index += 1 + + def dict(self): + return { + "type": "Block", + "body": [statement.dict() for statement in self.body] + } + +@dataclass +class WhileStatement(Statement): + test: Expression + body: Block + + def exec(self, runtime: Runtime): + while self.test.eval(runtime): + while_runtime = Runtime(parent=runtime,name="while") + result = self.body.exec(while_runtime) + if isinstance(result, ReturnValue): + return result + if isinstance(result, BreakStatement): + return result + + def dict(self): + return { + "type": "WhileStatement", + "test": self.test.dict(), + "body": self.body.dict() + } + +@dataclass +class BreakStatement(Statement): + + def exec(self, _: Runtime): + return self + + def dict(self): + return { + "type": "BreakStatement" + } + +@dataclass +class ReturnStatement(Statement): + value: Expression + + def exec(self, runtime: Runtime): + return ReturnValue(self.value.eval(runtime)) + + def dict(self): + return { + "type": "ReturnStatement", + "value": self.value.dict() + } + +@dataclass +class IfStatement(Statement): + test: Expression + consequent: Block + alternate: Block + + def exec(self, runtime: Runtime): + if_runtime = Runtime(parent=runtime) + if self.test.eval(runtime): + return self.consequent.exec(if_runtime) + else: + return self.alternate.exec(if_runtime) + + def dict(self): + return { + "type": "IfStatement", + "test": self.test.dict(), + "consequent": self.consequent.dict(), + "alternate": self.alternate.dict() + } + +@dataclass +class VariableDeclaration(Statement): + id: Identifier + value: Expression + value_type: str = "any" + def exec(self, runtime: Runtime): + runtime.declare(self.id.name, self.value.eval(runtime)) + + def dict(self): + return { + "type": "VariableDeclaration", + "id": self.id.dict(), + "value": self.value.dict() + } + +@dataclass +class Assignment(Statement): + id: Identifier + value: Expression + + def exec(self, runtime: Runtime): + runtime.assign(self.id.name, self.value.eval(runtime)) + + def dict(self): + return { + "type": "Assignment", + "id": self.id.dict(), + "value": self.value.dict() + } + +@dataclass +class Argument(Expression): + id: Identifier + value: Expression + + def dict(self): + return { + "type": "Argument", + "id": self.id.dict(), + "value": self.value.dict() + } + +@dataclass +class BinaryExpression(Expression): + left: Expression + operator: str + right: Expression + + def eval(self, runtime: Runtime): + left = self.left.eval(runtime) + right = self.right.eval(runtime) + if self.operator == "+": + return left + right + if self.operator == "-": + return left - right + if self.operator == "*": + return left * right + if self.operator == "/": + return left / right + if self.operator == "%": + return left % right + if self.operator == "<": + return left < right + if self.operator == ">": + return left > right + if self.operator == "<=": + return left <= right + if self.operator == ">=": + return left >= right + if self.operator == "==": + return left == right + if self.operator == "!=": + return left != right + if self.operator == "&&": + return left and right + if self.operator == "||": + return left or right + return None + + def dict(self): + return { + "type": "BinaryExpression", + "left": self.left.dict(), + "operator": self.operator, + "right": self.right.dict() + } + +@dataclass +class CallExpression(Expression): + callee: Identifier + arguments: list[Expression] + def exec(self, runtime: Runtime, args: list=None): + if args == None: + args = [] + for index, argument in enumerate(self.arguments): + args.append(argument.eval(runtime)) + if runtime.has_value(self.callee.name): + fun:FunEnv = runtime.get_value(self.callee.name) + return fun.exec(args) + if runtime.parent is not None: + return self.exec(runtime.parent,args) + if self.callee.name in runtime.exteral_fun: + return runtime.exteral_fun[self.callee.name](*args) + + + def eval(self, runtime): + result = self.exec(runtime) + if result is not None: + return result.value + + def dict(self): + return { + "type": "CallExpression", + "callee": self.callee.dict(), + "arguments": [argument.dict() for argument in self.arguments] + } + +@dataclass +class Fun(Statement): + params: list[Identifier] + body: Block + + def exec(self, runtime: Runtime): + return self.body.exec(runtime) + + def eval(self, runtime: Runtime): + return FunEnv(runtime, self) + + def dict(self): + return { + "type": "Fun", + "params": [param.dict() for param in self.params], + "body": self.body.dict() + } + +class EmptyStatement(Statement): + + def exec(self, _: Runtime): + return None + + def eval(self, _: Runtime): + return None + + def dict(self): + return { + "type": "EmptyStatement" + } + + +class FunEnv(): + + def __init__(self, parent: Runtime, body: Fun): + self.parent = parent + self.body = body + + def exec(self, args: list): + fun_runtime = Runtime(parent=self.parent) + for index, param in enumerate(self.body.params): + fun_runtime.declare(param.name, args[index]) + return self.body.exec(fun_runtime) \ No newline at end of file diff --git a/src/dotchain/runtime/interpreter.py b/src/dotchain/runtime/interpreter.py new file mode 100644 index 0000000..bf33cb8 --- /dev/null +++ b/src/dotchain/runtime/interpreter.py @@ -0,0 +1,420 @@ +from ast import Expression +import copy +from .ast import Assignment, BinaryExpression, Block, BoolLiteral, BreakStatement, CallExpression, EmptyStatement, FloatLiteral, Fun, Identifier, IfStatement, IntLiteral, Program, ReturnStatement, Statement, StringLiteral, UnaryExpression, VariableDeclaration, WhileStatement +from .tokenizer import Token, TokenType, Tokenizer + +unary_prev_statement = [ + TokenType.COMMENTS, + TokenType.LEFT_PAREN, + TokenType.COMMA, + TokenType.LEFT_BRACE, + TokenType.RIGHT_BRACE, + TokenType.SEMICOLON, + TokenType.LET, + TokenType.RETURN, + TokenType.IF, + TokenType.ELSE, + TokenType.WHILE, + TokenType.FOR, + TokenType.LOGICAL_OPERATOR, + TokenType.NOT, + TokenType.ASSIGNMENT, + TokenType.MULTIPLICATIVE_OPERATOR, + TokenType.ADDITIVE_OPERATOR, + TokenType.ARROW, +] + +unary_end_statement = [ + TokenType.MULTIPLICATIVE_OPERATOR, + TokenType.ADDITIVE_OPERATOR, + TokenType.LOGICAL_OPERATOR, +] + +end_statement = [ + TokenType.SEMICOLON, + TokenType.COMMA, + TokenType.ARROW, + TokenType.RETURN, + TokenType.LET, + TokenType.IF, + TokenType.ELSE, + TokenType.WHILE, + TokenType.FOR, + TokenType.ASSIGNMENT, + TokenType.RIGHT_BRACE, + TokenType.LEFT_BRACE, +] + +def program_parser(tkr: Tokenizer): + statements = list[Statement]() + count = 0 + while True: + if tkr.token() is None: + break + if tkr.token().type == TokenType.SEMICOLON: + tkr.next() + continue + statement = statement_parser(tkr) + statements.append(statement) + count += 1 + return Program(statements) + +def if_parser(tkr: Tokenizer): + tkr.eat(TokenType.IF) + condition = ExpressionParser(tkr).parse() + block = block_statement(tkr) + if tkr.type_is(TokenType.ELSE): + tkr.eat(TokenType.ELSE) + if tkr.type_is(TokenType.IF): + print("else if") + return IfStatement(condition, block, Block([if_parser(tkr)])) + return IfStatement(condition, block, block_statement(tkr)) + return IfStatement(condition, block, Block([])) + +def while_parser(tkr: Tokenizer): + tkr.eat(TokenType.WHILE) + condition = ExpressionParser(tkr).parse() + block = block_statement(tkr) + return WhileStatement(condition, block) + + +def identifier(tkr: Tokenizer): + token = tkr.token() + if token.type != TokenType.IDENTIFIER: + raise Exception("Invalid identifier", token) + tkr.next() + return Identifier(token.value) + +def block_statement(tkr: Tokenizer): + tkr.eat(TokenType.LEFT_BRACE) + statements = list[Statement]() + while True: + if tkr.token() is None: + raise Exception("Invalid block expression", tkr.token()) + if tkr.tokenType() == TokenType.RIGHT_BRACE: + tkr.eat(TokenType.RIGHT_BRACE) + break + if tkr.tokenType() == TokenType.SEMICOLON: + tkr.next() + continue + statements.append(statement_parser(tkr)) + return Block(statements) + + +def return_parser(tkr: Tokenizer): + tkr.eat(TokenType.RETURN) + return ReturnStatement(ExpressionParser(tkr).parse()) + +def statement_parser(tkr: Tokenizer): + token = tkr.token() + if token is None: + return EmptyStatement() + if token.type == TokenType.SEMICOLON: + tkr.next() + return EmptyStatement() + if token.type == TokenType.LET: + return let_expression_parser(tkr) + if _try_assignment_expression(tkr): + return assignment_parser(tkr) + if token.type == TokenType.IF: + return if_parser(tkr) + if token.type == TokenType.WHILE: + return while_parser(tkr) + if token.type == TokenType.RETURN: + return return_parser(tkr) + if token.type == TokenType.BREAK: + tkr.eat(TokenType.BREAK) + return BreakStatement() + return ExpressionParser(tkr).parse() + +def assignment_parser(tkr: Tokenizer): + id = identifier(tkr) + tkr.eat(TokenType.ASSIGNMENT) + return Assignment(id, ExpressionParser(tkr).parse()) + +def let_expression_parser(tkr: Tokenizer): + tkr.eat(TokenType.LET) + token = tkr.token() + if token.type != TokenType.IDENTIFIER: + raise Exception("Invalid let statement", token) + id = identifier(tkr) + token = tkr.token() + if token is None: + raise Exception("Invalid let statement", token) + if token.type != TokenType.ASSIGNMENT: + raise Exception("Invalid let statement", token.type) + tkr.next() + ast = ExpressionParser(tkr).parse() + return VariableDeclaration(id, ast) + +class ExpressionParser: + + def __init__(self, tkr: Tokenizer): + self.stack = list[Expression | Token]() + self.operator_stack = list[Token]() + self.tkr = tkr + + def parse(self, unary = False): + while not self.is_end(): + token = self.tkr.token() + if unary and not self.is_unary() and token.type in unary_end_statement: + break + if self.is_unary(): + self.push_stack(self.unary_expression_parser()) + elif self._try_fun_expression(): + return self.fun_expression() + # -(hello x 123) // !(true and false) + elif unary and token.type == TokenType.LEFT_PAREN: + self.tkr.next() + self.push_stack(ExpressionParser(self.tkr).parse()) + elif self._is_operator(token) or token.type in [TokenType.LEFT_PAREN, TokenType.RIGHT_PAREN ]: + self.push_operator_stack(token) + self.tkr.next() + else: + self.push_stack(self.expression_parser()) + self.pop_all() + return self.expression() + + def expression(self): + if len(self.stack) == 0: + return EmptyStatement() + if len(self.stack) == 1: + return self.stack[0] + return expression_list_to_binary(self.stack) + + def expression_parser(self): + token = self.tkr.token() + if token is None: + return EmptyStatement() + expression = None + if token.type == TokenType.INT: + self.tkr.eat(TokenType.INT) + expression = IntLiteral(int(token.value)) + elif token.type == TokenType.FLOAT: + self.tkr.eat(TokenType.FLOAT) + expression = FloatLiteral(float(token.value)) + elif token.type == TokenType.STRING: + self.tkr.eat(TokenType.STRING) + expression = StringLiteral(token.value[1:-1]) + elif token.type == TokenType.BOOL: + self.tkr.eat(TokenType.BOOL) + expression = BoolLiteral(token.value == "true") + elif token.type == TokenType.IDENTIFIER: + expression = self.identifier_or_fun_call_parser() + return expression + + def _try_fun_expression(self): + return _try_fun_expression(self.tkr) + + def fun_expression(self): + tkr = self.tkr + tkr.next() + args = list[Identifier]() + token_type = tkr.tokenType() + while token_type != TokenType.RIGHT_PAREN: + args.append(Identifier(tkr.token().value)) + tkr.next() + token_type = tkr.tokenType() + if token_type == TokenType.RIGHT_PAREN: + break + tkr.next() + token_type = tkr.tokenType() + token_type = tkr.next_token_type() + if token_type != TokenType.ARROW: + raise Exception("Invalid fun_expression", tkr.token()) + tkr.next() + return Fun(args, block_statement(tkr)) + + def push_stack(self, expression: Expression | Token): + self.stack.append(expression) + + def _pop_by_right_paren(self): + token = self.operator_stack.pop() + if token.type != TokenType.LEFT_PAREN: + self.push_stack(token) + self._pop_by_right_paren() + + def pop(self): + self.push_stack(self.operator_stack.pop()) + + def pop_all(self): + while len(self.operator_stack) > 0: + self.pop() + + def push_operator_stack(self, token: Token): + if len(self.operator_stack) == 0: + self.operator_stack.append(token) + return + if token.type == TokenType.LEFT_PAREN: + self.operator_stack.append(token) + return + if token.type == TokenType.RIGHT_PAREN: + self._pop_by_right_paren() + return + top_operator = self.operator_stack[-1] + if top_operator.type == TokenType.LEFT_PAREN: + self.operator_stack.append(token) + return + # priority is in descending order + if self._priority(token) >= self._priority(top_operator): + self.pop() + self.push_operator_stack(token) + return + self.operator_stack.append(token) + + def unary_expression_parser(self): + token = self.tkr.token() + self.tkr.next() + return UnaryExpression(token.value, ExpressionParser(self.tkr).parse(True)) + + def identifier_or_fun_call_parser(self): + id = self.identifier() + tokenType = self.tkr.tokenType() + if tokenType == TokenType.LEFT_PAREN: + return self.fun_call_parser(id) + return id + + def fun_call_parser(self, id: Identifier): + self.tkr.eat(TokenType.LEFT_PAREN) + args = list[Expression]() + while self.tkr.tokenType() != TokenType.RIGHT_PAREN: + args.append(ExpressionParser(self.tkr).parse()) + if self.tkr.tokenType() == TokenType.COMMA: + self.tkr.eat(TokenType.COMMA) + self.tkr.eat(TokenType.RIGHT_PAREN) + return CallExpression(id, args) + + def identifier(self): + return identifier(self.tkr) + + def is_unary(self): + token = self.tkr.token() + if not self.unary_operator(token): + return False + if token.type == TokenType.NOT: + return True + prev_token = self.tkr.get_prev() + if prev_token is None: + return True + if prev_token.type == TokenType.LEFT_PAREN: + return True + if prev_token.type in unary_prev_statement: + return True + return False + + def unary_operator(self, token: Token): + if token is None: + return False + return token.value in ["+", "-", "!"] + + def _has_brackets(self): + return TokenType.LEFT_PAREN in map(lambda x: x.type, self.operator_stack) + + def is_end(self): + token = self.tkr.token() + if token is None: + return True + if token.type == TokenType.SEMICOLON: + return True + if not self._has_brackets() and token.type == TokenType.RIGHT_PAREN: + return True + if token.type in end_statement: + return True + return False + + def _is_operator(self, token: Token): + if token is None: + return False + return token.type in [TokenType.ADDITIVE_OPERATOR, TokenType.MULTIPLICATIVE_OPERATOR, TokenType.LOGICAL_OPERATOR, TokenType.NOT] + + def _debug_print_tokens(self): + print("operator stack:----") + for token in self.operator_stack: + print(token) + + def _debug_print_stack(self): + print("stack:----") + for expression in self.stack: + print(expression) + + def _priority(self, token: Token): + return _priority(token.value) + +def expression_list_to_binary(expression_list: list[Expression | Token], stack: list = None): + if stack is None: + stack = list() + if len(expression_list) == 0: + return stack[0] + top = expression_list[0] + if isinstance(top, Token): + right = stack.pop() + left = stack.pop() + return expression_list_to_binary(expression_list[1:], stack + [BinaryExpression(left, top.value, right)]) + else: + stack.append(top) + return expression_list_to_binary(expression_list[1:], stack) + +def _priority(operator: str): + priority = 0 + if operator in ["*", "/", "%"]: + return priority + priority += 1 + if operator in ["+", "-"]: + return priority + priority += 1 + if operator in ["<", ">", "<=", ">="]: + return priority + priority += 1 + if operator in ["==", "!="]: + return priority + priority += 1 + if operator in ["&&"]: + return priority + priority += 1 + if operator in ["||"]: + return priority + priority += 1 + return priority + +def _try_assignment_expression(tkr: Tokenizer): + tkr = copy.deepcopy(tkr) + token = tkr.token() + if token is None: + return False + if token.type != TokenType.IDENTIFIER: + return False + tkr.next() + token = tkr.token() + if token is None: + return False + if token.type != TokenType.ASSIGNMENT: + return False + return True + +def _try_fun_expression(_tkr: Tokenizer): + tkr = copy.deepcopy(_tkr) + token = tkr.token() + if token is None: + return False + if token.type != TokenType.LEFT_PAREN: + return False + tkr.next() + token_type = tkr.tokenType() + while token_type != TokenType.RIGHT_PAREN: + if token_type == TokenType.IDENTIFIER: + tkr.next() + token_type = tkr.tokenType() + if token_type == TokenType.RIGHT_PAREN: + break + if token_type != TokenType.COMMA: + return False + tkr.next() + token_type = tkr.tokenType() + if token_type == TokenType.RIGHT_PAREN: + return False + else: + return False + token_type = tkr.next_token_type() + if token_type != TokenType.ARROW: + return False + return True \ No newline at end of file diff --git a/src/dotchain/runtime/runtime.py b/src/dotchain/runtime/runtime.py new file mode 100644 index 0000000..4ae9f3f --- /dev/null +++ b/src/dotchain/runtime/runtime.py @@ -0,0 +1,40 @@ + +class Runtime(): + + def __init__(self, context=None, parent=None, exteral_fun=None, name=None) -> None: + self.name = name + self.parent = parent + self.context = context if context is not None else dict() + self.exteral_fun = exteral_fun if exteral_fun is not None else dict() + + def has_value(self, identifier: str) -> bool: + return identifier in self.context + + def get_value(self, identifier: str): + return self.context.get(identifier) + + def deep_get_value(self, id: str): + if self.has_value(id): + return self.get_value(id) + if self.parent is not None: + return self.parent.deep_get_value(id) + return None + + def set_value(self, identifier: str, value): + self.context[identifier] = value + + def declare(self, identifier: str, value): + if self.has_value(identifier): + raise Exception(f"Variable {identifier} is already declared") + self.set_value(identifier, value) + + def assign(self, identifier: str, value): + if self.has_value(identifier): + self.set_value(identifier, value) + elif self.parent is not None: + self.parent.assign(identifier, value) + else: + raise Exception(f"Variable {identifier} is not declared") + + def show_values(self): + print(self.context) diff --git a/src/dotchain/runtime/tests/__init__.py b/src/dotchain/runtime/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dotchain/runtime/tests/test_expression_parser.py b/src/dotchain/runtime/tests/test_expression_parser.py new file mode 100644 index 0000000..5f606ec --- /dev/null +++ b/src/dotchain/runtime/tests/test_expression_parser.py @@ -0,0 +1,153 @@ + +import unittest +from runtime.ast import BoolLiteral, CallExpression, FloatLiteral, Identifier, IntLiteral, UnaryExpression +from runtime.interpreter import ExpressionParser, _priority, _try_fun_expression +from runtime.tokenizer import TokenType, Tokenizer,Token + + + +class TestExpressionParser(unittest.TestCase): + + def test__try_fun_expression(self): + t = Tokenizer() + t.init("()") + self.assertFalse(_try_fun_expression(t)) + + t.init("() =>") + self.assertTrue(_try_fun_expression(t)) + + t.init("(a) =>") + self.assertTrue(_try_fun_expression(t)) + + t.init("(a,) =>") + self.assertFalse(_try_fun_expression(t)) + + t.init("(a,b,c,d) =>;") + self.assertTrue(_try_fun_expression(t)) + + t.init("(a,b,c,true) =>;") + self.assertFalse(_try_fun_expression(t)) + + t.init("(a,b,c,1.23) =>;") + self.assertFalse(_try_fun_expression(t)) + + def test_is_unary(self): + t = Tokenizer() + t.init("!") + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init("+") + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init("--123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init("+-123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init(")-123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertFalse(pred) + + t.init("=> - 123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init(", - 123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init("* - 123") + t.next() + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertTrue(pred) + + t.init("* - 123") + parser = ExpressionParser(t) + pred = parser.is_unary() + self.assertFalse(pred) + + def test_expression_parser(self): + t = Tokenizer() + t.init("a") + parser = ExpressionParser(t) + expression = parser.expression_parser() + self.assertIsInstance(expression, Identifier) + + t.init("true") + parser = ExpressionParser(t) + expression = parser.expression_parser() + self.assertIsInstance(expression, BoolLiteral) + self.assertEqual(expression.value, True) + + t.init("false") + parser = ExpressionParser(t) + expression = parser.expression_parser() + self.assertIsInstance(expression, BoolLiteral) + self.assertEqual(expression.value, False) + + t.init("12341") + parser = ExpressionParser(t) + expression = parser.expression_parser() + self.assertEqual(expression.value, 12341) + self.assertIsInstance(expression, IntLiteral) + + t.init("12341.42") + parser = ExpressionParser(t) + expression = parser.expression_parser() + self.assertEqual(expression.value, 12341.42) + self.assertIsInstance(expression, FloatLiteral) + + t.init("hello") + parser = ExpressionParser(t) + expression: Identifier = parser.expression_parser() + self.assertIsInstance(expression, Identifier) + self.assertEqual(expression.name, "hello") + + t.init("print()") + parser = ExpressionParser(t) + expression: CallExpression = parser.expression_parser() + self.assertIsInstance(expression, CallExpression) + self.assertEqual(expression.callee.name, "print") + + t.init("print(1,2,3,hello)") + parser = ExpressionParser(t) + expression: CallExpression = parser.expression_parser() + self.assertIsInstance(expression, CallExpression) + self.assertEqual(expression.callee.name, "print") + self.assertEqual(len(expression.arguments), 4) + + def test_binary_expression(self): + t = Tokenizer() + + def test__priority(self): + self.assertEqual(_priority("*"), 0) + self.assertEqual(_priority("/"), 0) + self.assertEqual(_priority("%"), 0) + self.assertEqual(_priority("+"), 1) + self.assertEqual(_priority("-"), 1) + self.assertEqual(_priority(">"), 2) + self.assertEqual(_priority("<"), 2) + self.assertEqual(_priority(">="), 2) + self.assertEqual(_priority("<="), 2) + self.assertEqual(_priority("=="), 3) + self.assertEqual(_priority("!="), 3) + self.assertEqual(_priority("&&"), 4) + self.assertEqual(_priority("||"), 5) \ No newline at end of file diff --git a/src/dotchain/runtime/tests/test_runtime.py b/src/dotchain/runtime/tests/test_runtime.py new file mode 100644 index 0000000..698db5d --- /dev/null +++ b/src/dotchain/runtime/tests/test_runtime.py @@ -0,0 +1,7 @@ + +import unittest + +class TestRuntime(unittest.TestCase): + + def test_eval(self): + self.assertTrue(True) \ No newline at end of file diff --git a/src/dotchain/runtime/tests/test_tokenizer.py b/src/dotchain/runtime/tests/test_tokenizer.py new file mode 100644 index 0000000..63188bf --- /dev/null +++ b/src/dotchain/runtime/tests/test_tokenizer.py @@ -0,0 +1,151 @@ + +import unittest +from runtime.tokenizer import TokenType, Tokenizer,Token + +class TestTokenizer(unittest.TestCase): + + def test_init(self): + t = Tokenizer() + self.assertEqual(t.script, "") + self.assertEqual(t.cursor, 0) + self.assertEqual(t.col, 0) + self.assertEqual(t.row, 0) + + def test_tokenizer(self): + t = Tokenizer() + t.init("a") + self.assertEqual(t.token().value, "a") + self.assertEqual(t.token().type, TokenType.IDENTIFIER) + + t.init("12341") + self.assertEqual(t.token().value, "12341") + self.assertEqual(t.token().type, TokenType.INT) + + t.init("12341.1234124") + self.assertEqual(t.token().value, "12341.1234124") + self.assertEqual(t.token().type, TokenType.FLOAT) + + t.init("false") + self.assertEqual(t.token().value, "false") + self.assertEqual(t.token().type, TokenType.BOOL) + + t.init("\"false\"") + self.assertEqual(t.token().value, "\"false\"") + self.assertEqual(t.token().type, TokenType.STRING) + + t.init("helloworld") + self.assertEqual(t.token().value, "helloworld") + self.assertEqual(t.token().type, TokenType.IDENTIFIER) + + t.init("!") + self.assertEqual(t.token().value, "!") + self.assertEqual(t.token().type, TokenType.NOT) + + t.init("==") + self.assertEqual(t.token().value, "==") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("!=") + self.assertEqual(t.token().value, "!=") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("<=") + self.assertEqual(t.token().value, "<=") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init(">=") + self.assertEqual(t.token().value, ">=") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("<") + self.assertEqual(t.token().value, "<") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init(">") + self.assertEqual(t.token().value, ">") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("&&") + self.assertEqual(t.token().value, "&&") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("||") + self.assertEqual(t.token().value, "||") + self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) + + t.init("=") + self.assertEqual(t.token().value, "=") + self.assertEqual(t.token().type, TokenType.ASSIGNMENT) + + t.init("+") + self.assertEqual(t.token().value, "+") + self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR) + + t.init("-") + self.assertEqual(t.token().value, "-") + self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR) + + t.init("*") + self.assertEqual(t.token().value, "*") + self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) + + t.init("/") + self.assertEqual(t.token().value, "/") + self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) + + t.init("%") + self.assertEqual(t.token().value, "%") + self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) + + t.init("(") + self.assertEqual(t.token().value, "(") + self.assertEqual(t.token().type, TokenType.LEFT_PAREN) + + t.init(")") + self.assertEqual(t.token().value, ")") + self.assertEqual(t.token().type, TokenType.RIGHT_PAREN) + + t.init("{") + self.assertEqual(t.token().value, "{") + self.assertEqual(t.token().type, TokenType.LEFT_BRACE) + + t.init("}") + self.assertEqual(t.token().value, "}") + self.assertEqual(t.token().type, TokenType.RIGHT_BRACE) + + def test_init(self): + t = Tokenizer() + script = "a + 9 * ( 3 - 1 ) * 3 + 10 / 2;" + t.init(script) + self.assertEqual(t.script, script) + self.assertEqual(len(t.tokens), 16) + self.assertEqual(t.get_prev(), None) + self.assertEqual(t.token().value, "a") + self.assertEqual(t.get_next().value, "+") + self.assertEqual(t.next().value, "+") + self.assertEqual(t.next().value, "9") + self.assertEqual(t.next().value, "*") + t.prev() + self.assertEqual(t.token().value, "9") + t.prev() + self.assertEqual(t.token().value, "+") + + script = "a + 9" + t.init(script) + self.assertEqual(t.token().type, TokenType.IDENTIFIER) + self.assertEqual(t.next().type, TokenType.ADDITIVE_OPERATOR) + self.assertEqual(t.next().type, TokenType.INT) + self.assertEqual(t.next(), None) + self.assertEqual(t._current_token_index, 3) + self.assertEqual(t.next(), None) + self.assertEqual(t.next(), None) + self.assertEqual(t._current_token_index, 3) + self.assertEqual(t.next(), None) + t.prev() + self.assertEqual(t.token().value, "9") + t.prev() + self.assertEqual(t.token().value, "+") + t.prev() + self.assertEqual(t.token().value, "a") + t.prev() + self.assertEqual(t.token().value, "a") \ No newline at end of file diff --git a/src/dotchain/runtime/tokenizer.py b/src/dotchain/runtime/tokenizer.py new file mode 100644 index 0000000..cd7cd34 --- /dev/null +++ b/src/dotchain/runtime/tokenizer.py @@ -0,0 +1,259 @@ +import re +from enum import Enum + +from dataclasses import dataclass + +class TokenType(Enum): + NEW_LINE = 1 + SPACE = 2 + COMMENTS = 3 + LEFT_PAREN = 4 + RIGHT_PAREN = 5 + COMMA = 6 + LEFT_BRACE = 7 + RIGHT_BRACE = 8 + SEMICOLON = 9 + LET = 10 + RETURN = 11 + IF = 12 + ELSE = 13 + WHILE = 14 + FOR = 15 + FLOAT = 18 + INT = 19 + IDENTIFIER = 20 + LOGICAL_OPERATOR = 21 + NOT = 22 + ASSIGNMENT = 23 + MULTIPLICATIVE_OPERATOR = 24 + ADDITIVE_OPERATOR = 25 + STRING = 26 + ARROW = 27 + BOOL = 28 + BREAK = 29 + TYPE_DEFINITION = 30 + COLON = 31 + +specs = ( + (re.compile(r"^\n"),TokenType.NEW_LINE), + # Space: + (re.compile(r"^\s"),TokenType.SPACE), + # Comments: + (re.compile(r"^//.*"), TokenType.COMMENTS), + + # Symbols: + (re.compile(r"^\("), TokenType.LEFT_PAREN), + (re.compile(r"^\)"), TokenType.RIGHT_PAREN), + (re.compile(r"^\,"), TokenType.COMMA), + (re.compile(r"^\{"), TokenType.LEFT_BRACE), + (re.compile(r"^\}"), TokenType.RIGHT_BRACE), + (re.compile(r"^;"), TokenType.SEMICOLON), + (re.compile(r"^:"), TokenType.COLON), + (re.compile(r"^=>"), TokenType.ARROW), + + # Keywords: + (re.compile(r"^\blet\b"), TokenType.LET), + (re.compile(r"^\breturn\b"), TokenType.RETURN), + (re.compile(r"^\bif\b"), TokenType.IF), + (re.compile(r"^\belse\b"), TokenType.ELSE), + (re.compile(r"^\bwhile\b"), TokenType.WHILE), + (re.compile(r"^\bfor\b"), TokenType.FOR), + (re.compile(r"^\bbreak\b"), TokenType.BREAK), + + (re.compile(r"^\btrue\b"), TokenType.BOOL), + (re.compile(r"^\bfalse\b"), TokenType.BOOL), + + # Type definition: + (re.compile(r"^\bstring\b"), TokenType.TYPE_DEFINITION), + (re.compile(r"^\bint\b"), TokenType.TYPE_DEFINITION), + (re.compile(r"^\bfloat\b"), TokenType.TYPE_DEFINITION), + (re.compile(r"^\bbool\b"), TokenType.TYPE_DEFINITION), + (re.compile(r"^\bany\b"), TokenType.TYPE_DEFINITION), + + # Floats: + (re.compile(r"^[0-9]+\.[0-9]+"), TokenType.FLOAT), + + # Ints: + (re.compile(r"^[0-9]+"), TokenType.INT), + + # Identifiers: + (re.compile(r"^\w+"), TokenType.IDENTIFIER), + + + # Logical operators: + (re.compile(r"^&&"), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^\|\|"), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^=="), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^!="), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^<="), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^>="), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^<"), TokenType.LOGICAL_OPERATOR), + (re.compile(r"^>"), TokenType.LOGICAL_OPERATOR), + + (re.compile(r"^!"), TokenType.NOT), + + # Assignment: + (re.compile(r"^="), TokenType.ASSIGNMENT), + + # Math operators: +, -, *, /: + (re.compile(r"^[*/%]"), TokenType.MULTIPLICATIVE_OPERATOR), + (re.compile(r"^[+-]"), TokenType.ADDITIVE_OPERATOR), + + # Double-quoted strings + # TODO: escape character \" and + (re.compile(r"^\"[^\"]*\""), TokenType.STRING), +) + +@dataclass +class Token: + type: TokenType + value: str + row: int + col: int + col_end: int + cursor: int + + def __str__(self) -> str: + return f"Token({self.type}, {self.value}, row={self.row}, col={self.col}, col_end={self.col_end}, cursor={self.cursor})" + + +class Tokenizer: + + def __init__(self): + self._current_token = None + self.script = "" + self.cursor = 0 + self.col = 0 + self.row = 0 + self._current_token_index = 0 + self.tokens = list[Token]() + self.checkpoint = list[int]() + + def init(self, script: str): + self.checkpoint = list[int]() + self.tokens = list[Token]() + self._current_token_index = 0 + self._current_token = None + self.script = script + self.cursor = 0 + self.col = 0 + self.row = 0 + self._get_next_token() + while self._current_token is not None: + self.tokens.append(self._current_token) + self._get_next_token() + + def checkpoint_push(self): + self.checkpoint.append(self._current_token_index) + + def checkpoint_pop(self): + self._current_token_index = self.checkpoint.pop() + + def next(self): + if self._current_token_index < len(self.tokens): + self._current_token_index += 1 + return self.token() + + def next_token_type(self): + if self._current_token_index < len(self.tokens): + self._current_token_index += 1 + return self.tokenType() + + def prev(self): + if self._current_token_index > 0: + self._current_token_index -= 1 + return self.token() + + def get_prev(self): + if self._current_token_index == 0: + return None + return self.tokens[self._current_token_index - 1] + + def get_next(self): + if self._current_token_index >= len(self.tokens): + return None + return self.tokens[self._current_token_index + 1] + + def token(self): + if self._current_token_index >= len(self.tokens): + return None + return self.tokens[self._current_token_index] + + def tokenType(self): + if self._current_token_index >= len(self.tokens): + return None + return self.tokens[self._current_token_index].type + + + def _get_next_token(self): + if self._is_eof(): + self._current_token = None + return None + _string = self.script[self.cursor:] + for spec in specs: + tokenValue, offset = self.match(spec[0], _string) + if tokenValue == None: + continue + if spec[1] == TokenType.NEW_LINE: + self.row += 1 + self.col = 0 + return self._get_next_token() + if spec[1] == TokenType.COMMENTS: + return self._get_next_token() + if spec[1] == TokenType.SPACE: + self.col += offset + return self._get_next_token() + if spec[1] == None: + return self._get_next_token() + self._current_token = Token(spec[1],tokenValue, self.cursor, self.row, self.col, self.col + offset) + self.col += offset + return self.get_current_token() + raise Exception("Unknown token: " + _string[0]) + + def _is_eof(self): + return self.cursor == len(self.script) + + def has_more_tokens(self): + return self.cursor < len(self.script) + + def get_current_token(self): + return self._current_token + + def match(self, reg: re, _script): + matched = reg.search(_script) + if matched == None: + return None,0 + self.cursor = self.cursor + matched.span(0)[1] + return matched[0], matched.span(0)[1] + + def eat(self, value: str | TokenType): + if isinstance(value, str): + return self.eat_value(value) + if isinstance(value, TokenType): + return self.eat_token_type(value) + + def eat_value(self, value: str): + token = self.token() + if token is None: + raise Exception(f"Expected {value} but got None") + if token.value != value: + raise Exception(f"Expected {value} but got {token.value}") + self.next() + return token + + def eat_token_type(self,tokenType: TokenType): + token = self.token() + if token is None: + raise Exception(f"Expected {tokenType} but got None") + if token.type != tokenType: + raise Exception(f"Expected {tokenType} but got {token.type}") + self.next() + return token + + def type_is(self, tokenType: TokenType): + if self.token() is None: + return False + return self.token().type == tokenType + + def the_rest(self): + return self.tokens[self._current_token_index:] \ No newline at end of file