diff --git a/src/dotchain/README.md b/src/dotchain/README.md deleted file mode 100644 index ed2b399..0000000 --- a/src/dotchain/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# Dotchain -Dotchain 是一種函數式編程語言. 文件後綴`.dc` - -# 語法 -``` -// 註解 - -// 變量宣告 -let hello = 123 - -// 函數宣告 -let add = (left, right) => { - // 返回值 - return left + right -} - -// TODO: 函數呼叫 -add(1,2) -add(3, add(1,2)) -// 以 . 呼叫函數,將以 . 前的值作為第一個參數 -// hello.add(2) 等價於 add(hello, 2) -``` -## Keywords -``` -let while if else true false -``` - -```bash -python -m unittest -``` \ No newline at end of file diff --git a/src/dotchain/main.dc b/src/dotchain/main.dc deleted file mode 100644 index 5100dd0..0000000 --- a/src/dotchain/main.dc +++ /dev/null @@ -1,16 +0,0 @@ -// 註解 - -// 變量宣告 -let hello = 123; - -// 函數宣告 -let add = (left, right) => { - // 返回值 - return left + right; -} - -// TODO 函數呼叫 -add(1,2); -add(3, add(1,2)); -// 以 . 呼叫函數,將以 . 前的值作為第一個參數 -// hello.add(2) == add(hello, 2); \ No newline at end of file diff --git a/src/dotchain/main.py b/src/dotchain/main.py deleted file mode 100644 index 552f201..0000000 --- a/src/dotchain/main.py +++ /dev/null @@ -1,29 +0,0 @@ - -from runtime.interpreter import program_parser -from runtime.runtime import Runtime -from runtime.tokenizer import Tokenizer -import json - -script = """ -let rec = (c) => { - print(c); - if c == 0 { - return "c + 1"; - } - rec(c-1); -} - -let main = () => { - print("hello 嘉妮"); - print(rec(10)); -} - -main(); -""" - -if __name__ == "__main__": - t = Tokenizer() - t.init(script) - runtime = Runtime(exteral_fun={"print": print}) - ast = program_parser(t) - result = ast.exec(runtime) \ No newline at end of file diff --git a/src/dotchain/runtime/__init__.py b/src/dotchain/runtime/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/dotchain/runtime/ast.py b/src/dotchain/runtime/ast.py deleted file mode 100644 index c8006d5..0000000 --- a/src/dotchain/runtime/ast.py +++ /dev/null @@ -1,384 +0,0 @@ -from abc import ABC, abstractmethod - -from attr import dataclass - -from .runtime import Runtime - -@dataclass -class ReturnValue(): - value: any - -class Node(ABC): - def type(self): - return self.__class__.__name__ - -@dataclass -class Statement(Node, ABC): - - @abstractmethod - def exec(self, runtime: Runtime): - print(self) - pass - - @abstractmethod - def dict(self): - pass - -@dataclass -class Expression(Node): - - @abstractmethod - def eval(self, runtime: Runtime): - pass - - @abstractmethod - def dict(self): - pass - -@dataclass -class Literal(Expression): - value: str | int | float | bool - def eval(self, runtime: Runtime): - return self.value - - def dict(self) -> dict: - return { - "type": "Literal", - "value": self.value - } - -@dataclass -class StringLiteral(Literal): - value: str - - def dict(self) -> dict: - return { - "type": "StringLiteral", - "value": self.value - } - -@dataclass -class IntLiteral(Literal): - value: int - - def dict(self): - return { - "type": "IntLiteral", - "value": self.value - } - -@dataclass -class FloatLiteral(Literal): - value: float - - def dict(self): - return { - "type": "FloatLiteral", - "value": self.value - } - -@dataclass -class BoolLiteral(Literal): - value: bool - - def dict(self): - return { - "type": "FloatLiteral", - "value": self.value - } - -@dataclass -class UnaryExpression(Expression): - operator: str - expression: Expression - def eval(self, runtime: Runtime): - if self.operator == "-": - return -self.expression.eval(runtime) - if self.operator == "!": - return not self.expression.eval(runtime) - return self.expression.eval(runtime) - - def dict(self): - return { - "type": "UnaryExpression", - "operator": self.operator, - "argument": self.expression.dict() - } - -@dataclass -class Program(Statement): - body: list[Statement] - - def exec(self, runtime: Runtime): - index = 0 - while index < len(self.body): - statement = self.body[index] - result = statement.exec(runtime) - if isinstance(result, ReturnValue): - return result - index += 1 - - def dict(self): - return { - "type": self.type(), - "body": [statement.dict() for statement in self.body] - } - -@dataclass -class Identifier(Expression): - name: str - def eval(self,runtime: Runtime): - return runtime.deep_get_value(self.name) - - def dict(self): - return { - "type": self.type(), - "name": self.name - } - -@dataclass -class Block(Statement): - body: list[Statement] - def exec(self, runtime: Runtime): - index = 0 - while index < len(self.body): - statement = self.body[index] - result = statement.exec(runtime) - if isinstance(result, ReturnValue): - return result - if isinstance(result, BreakStatement): - return result - index += 1 - - def dict(self): - return { - "type": "Block", - "body": [statement.dict() for statement in self.body] - } - -@dataclass -class WhileStatement(Statement): - test: Expression - body: Block - - def exec(self, runtime: Runtime): - while self.test.eval(runtime): - while_runtime = Runtime(parent=runtime,name="while") - result = self.body.exec(while_runtime) - if isinstance(result, ReturnValue): - return result - if isinstance(result, BreakStatement): - return result - - def dict(self): - return { - "type": "WhileStatement", - "test": self.test.dict(), - "body": self.body.dict() - } - -@dataclass -class BreakStatement(Statement): - - def exec(self, _: Runtime): - return self - - def dict(self): - return { - "type": "BreakStatement" - } - -@dataclass -class ReturnStatement(Statement): - value: Expression - - def exec(self, runtime: Runtime): - return ReturnValue(self.value.eval(runtime)) - - def dict(self): - return { - "type": "ReturnStatement", - "value": self.value.dict() - } - -@dataclass -class IfStatement(Statement): - test: Expression - consequent: Block - alternate: Block - - def exec(self, runtime: Runtime): - if_runtime = Runtime(parent=runtime) - if self.test.eval(runtime): - return self.consequent.exec(if_runtime) - else: - return self.alternate.exec(if_runtime) - - def dict(self): - return { - "type": "IfStatement", - "test": self.test.dict(), - "consequent": self.consequent.dict(), - "alternate": self.alternate.dict() - } - -@dataclass -class VariableDeclaration(Statement): - id: Identifier - value: Expression - value_type: str = "any" - def exec(self, runtime: Runtime): - runtime.declare(self.id.name, self.value.eval(runtime)) - - def dict(self): - return { - "type": "VariableDeclaration", - "id": self.id.dict(), - "value": self.value.dict() - } - -@dataclass -class Assignment(Statement): - id: Identifier - value: Expression - - def exec(self, runtime: Runtime): - runtime.assign(self.id.name, self.value.eval(runtime)) - - def dict(self): - return { - "type": "Assignment", - "id": self.id.dict(), - "value": self.value.dict() - } - -@dataclass -class Argument(Expression): - id: Identifier - value: Expression - - def dict(self): - return { - "type": "Argument", - "id": self.id.dict(), - "value": self.value.dict() - } - -@dataclass -class BinaryExpression(Expression): - left: Expression - operator: str - right: Expression - - def eval(self, runtime: Runtime): - left = self.left.eval(runtime) - right = self.right.eval(runtime) - if self.operator == "+": - return left + right - if self.operator == "-": - return left - right - if self.operator == "*": - return left * right - if self.operator == "/": - return left / right - if self.operator == "%": - return left % right - if self.operator == "<": - return left < right - if self.operator == ">": - return left > right - if self.operator == "<=": - return left <= right - if self.operator == ">=": - return left >= right - if self.operator == "==": - return left == right - if self.operator == "!=": - return left != right - if self.operator == "&&": - return left and right - if self.operator == "||": - return left or right - return None - - def dict(self): - return { - "type": "BinaryExpression", - "left": self.left.dict(), - "operator": self.operator, - "right": self.right.dict() - } - -@dataclass -class CallExpression(Expression): - callee: Identifier - arguments: list[Expression] - def exec(self, runtime: Runtime, args: list=None): - if args == None: - args = [] - for index, argument in enumerate(self.arguments): - args.append(argument.eval(runtime)) - if runtime.has_value(self.callee.name): - fun:FunEnv = runtime.get_value(self.callee.name) - return fun.exec(args) - if runtime.parent is not None: - return self.exec(runtime.parent,args) - if self.callee.name in runtime.exteral_fun: - return runtime.exteral_fun[self.callee.name](*args) - - - def eval(self, runtime): - result = self.exec(runtime) - if result is not None: - return result.value - - def dict(self): - return { - "type": "CallExpression", - "callee": self.callee.dict(), - "arguments": [argument.dict() for argument in self.arguments] - } - -@dataclass -class Fun(Statement): - params: list[Identifier] - body: Block - - def exec(self, runtime: Runtime): - return self.body.exec(runtime) - - def eval(self, runtime: Runtime): - return FunEnv(runtime, self) - - def dict(self): - return { - "type": "Fun", - "params": [param.dict() for param in self.params], - "body": self.body.dict() - } - -class EmptyStatement(Statement): - - def exec(self, _: Runtime): - return None - - def eval(self, _: Runtime): - return None - - def dict(self): - return { - "type": "EmptyStatement" - } - - -class FunEnv(): - - def __init__(self, parent: Runtime, body: Fun): - self.parent = parent - self.body = body - - def exec(self, args: list): - fun_runtime = Runtime(parent=self.parent) - for index, param in enumerate(self.body.params): - fun_runtime.declare(param.name, args[index]) - return self.body.exec(fun_runtime) \ No newline at end of file diff --git a/src/dotchain/runtime/interpreter.py b/src/dotchain/runtime/interpreter.py deleted file mode 100644 index bf33cb8..0000000 --- a/src/dotchain/runtime/interpreter.py +++ /dev/null @@ -1,420 +0,0 @@ -from ast import Expression -import copy -from .ast import Assignment, BinaryExpression, Block, BoolLiteral, BreakStatement, CallExpression, EmptyStatement, FloatLiteral, Fun, Identifier, IfStatement, IntLiteral, Program, ReturnStatement, Statement, StringLiteral, UnaryExpression, VariableDeclaration, WhileStatement -from .tokenizer import Token, TokenType, Tokenizer - -unary_prev_statement = [ - TokenType.COMMENTS, - TokenType.LEFT_PAREN, - TokenType.COMMA, - TokenType.LEFT_BRACE, - TokenType.RIGHT_BRACE, - TokenType.SEMICOLON, - TokenType.LET, - TokenType.RETURN, - TokenType.IF, - TokenType.ELSE, - TokenType.WHILE, - TokenType.FOR, - TokenType.LOGICAL_OPERATOR, - TokenType.NOT, - TokenType.ASSIGNMENT, - TokenType.MULTIPLICATIVE_OPERATOR, - TokenType.ADDITIVE_OPERATOR, - TokenType.ARROW, -] - -unary_end_statement = [ - TokenType.MULTIPLICATIVE_OPERATOR, - TokenType.ADDITIVE_OPERATOR, - TokenType.LOGICAL_OPERATOR, -] - -end_statement = [ - TokenType.SEMICOLON, - TokenType.COMMA, - TokenType.ARROW, - TokenType.RETURN, - TokenType.LET, - TokenType.IF, - TokenType.ELSE, - TokenType.WHILE, - TokenType.FOR, - TokenType.ASSIGNMENT, - TokenType.RIGHT_BRACE, - TokenType.LEFT_BRACE, -] - -def program_parser(tkr: Tokenizer): - statements = list[Statement]() - count = 0 - while True: - if tkr.token() is None: - break - if tkr.token().type == TokenType.SEMICOLON: - tkr.next() - continue - statement = statement_parser(tkr) - statements.append(statement) - count += 1 - return Program(statements) - -def if_parser(tkr: Tokenizer): - tkr.eat(TokenType.IF) - condition = ExpressionParser(tkr).parse() - block = block_statement(tkr) - if tkr.type_is(TokenType.ELSE): - tkr.eat(TokenType.ELSE) - if tkr.type_is(TokenType.IF): - print("else if") - return IfStatement(condition, block, Block([if_parser(tkr)])) - return IfStatement(condition, block, block_statement(tkr)) - return IfStatement(condition, block, Block([])) - -def while_parser(tkr: Tokenizer): - tkr.eat(TokenType.WHILE) - condition = ExpressionParser(tkr).parse() - block = block_statement(tkr) - return WhileStatement(condition, block) - - -def identifier(tkr: Tokenizer): - token = tkr.token() - if token.type != TokenType.IDENTIFIER: - raise Exception("Invalid identifier", token) - tkr.next() - return Identifier(token.value) - -def block_statement(tkr: Tokenizer): - tkr.eat(TokenType.LEFT_BRACE) - statements = list[Statement]() - while True: - if tkr.token() is None: - raise Exception("Invalid block expression", tkr.token()) - if tkr.tokenType() == TokenType.RIGHT_BRACE: - tkr.eat(TokenType.RIGHT_BRACE) - break - if tkr.tokenType() == TokenType.SEMICOLON: - tkr.next() - continue - statements.append(statement_parser(tkr)) - return Block(statements) - - -def return_parser(tkr: Tokenizer): - tkr.eat(TokenType.RETURN) - return ReturnStatement(ExpressionParser(tkr).parse()) - -def statement_parser(tkr: Tokenizer): - token = tkr.token() - if token is None: - return EmptyStatement() - if token.type == TokenType.SEMICOLON: - tkr.next() - return EmptyStatement() - if token.type == TokenType.LET: - return let_expression_parser(tkr) - if _try_assignment_expression(tkr): - return assignment_parser(tkr) - if token.type == TokenType.IF: - return if_parser(tkr) - if token.type == TokenType.WHILE: - return while_parser(tkr) - if token.type == TokenType.RETURN: - return return_parser(tkr) - if token.type == TokenType.BREAK: - tkr.eat(TokenType.BREAK) - return BreakStatement() - return ExpressionParser(tkr).parse() - -def assignment_parser(tkr: Tokenizer): - id = identifier(tkr) - tkr.eat(TokenType.ASSIGNMENT) - return Assignment(id, ExpressionParser(tkr).parse()) - -def let_expression_parser(tkr: Tokenizer): - tkr.eat(TokenType.LET) - token = tkr.token() - if token.type != TokenType.IDENTIFIER: - raise Exception("Invalid let statement", token) - id = identifier(tkr) - token = tkr.token() - if token is None: - raise Exception("Invalid let statement", token) - if token.type != TokenType.ASSIGNMENT: - raise Exception("Invalid let statement", token.type) - tkr.next() - ast = ExpressionParser(tkr).parse() - return VariableDeclaration(id, ast) - -class ExpressionParser: - - def __init__(self, tkr: Tokenizer): - self.stack = list[Expression | Token]() - self.operator_stack = list[Token]() - self.tkr = tkr - - def parse(self, unary = False): - while not self.is_end(): - token = self.tkr.token() - if unary and not self.is_unary() and token.type in unary_end_statement: - break - if self.is_unary(): - self.push_stack(self.unary_expression_parser()) - elif self._try_fun_expression(): - return self.fun_expression() - # -(hello x 123) // !(true and false) - elif unary and token.type == TokenType.LEFT_PAREN: - self.tkr.next() - self.push_stack(ExpressionParser(self.tkr).parse()) - elif self._is_operator(token) or token.type in [TokenType.LEFT_PAREN, TokenType.RIGHT_PAREN ]: - self.push_operator_stack(token) - self.tkr.next() - else: - self.push_stack(self.expression_parser()) - self.pop_all() - return self.expression() - - def expression(self): - if len(self.stack) == 0: - return EmptyStatement() - if len(self.stack) == 1: - return self.stack[0] - return expression_list_to_binary(self.stack) - - def expression_parser(self): - token = self.tkr.token() - if token is None: - return EmptyStatement() - expression = None - if token.type == TokenType.INT: - self.tkr.eat(TokenType.INT) - expression = IntLiteral(int(token.value)) - elif token.type == TokenType.FLOAT: - self.tkr.eat(TokenType.FLOAT) - expression = FloatLiteral(float(token.value)) - elif token.type == TokenType.STRING: - self.tkr.eat(TokenType.STRING) - expression = StringLiteral(token.value[1:-1]) - elif token.type == TokenType.BOOL: - self.tkr.eat(TokenType.BOOL) - expression = BoolLiteral(token.value == "true") - elif token.type == TokenType.IDENTIFIER: - expression = self.identifier_or_fun_call_parser() - return expression - - def _try_fun_expression(self): - return _try_fun_expression(self.tkr) - - def fun_expression(self): - tkr = self.tkr - tkr.next() - args = list[Identifier]() - token_type = tkr.tokenType() - while token_type != TokenType.RIGHT_PAREN: - args.append(Identifier(tkr.token().value)) - tkr.next() - token_type = tkr.tokenType() - if token_type == TokenType.RIGHT_PAREN: - break - tkr.next() - token_type = tkr.tokenType() - token_type = tkr.next_token_type() - if token_type != TokenType.ARROW: - raise Exception("Invalid fun_expression", tkr.token()) - tkr.next() - return Fun(args, block_statement(tkr)) - - def push_stack(self, expression: Expression | Token): - self.stack.append(expression) - - def _pop_by_right_paren(self): - token = self.operator_stack.pop() - if token.type != TokenType.LEFT_PAREN: - self.push_stack(token) - self._pop_by_right_paren() - - def pop(self): - self.push_stack(self.operator_stack.pop()) - - def pop_all(self): - while len(self.operator_stack) > 0: - self.pop() - - def push_operator_stack(self, token: Token): - if len(self.operator_stack) == 0: - self.operator_stack.append(token) - return - if token.type == TokenType.LEFT_PAREN: - self.operator_stack.append(token) - return - if token.type == TokenType.RIGHT_PAREN: - self._pop_by_right_paren() - return - top_operator = self.operator_stack[-1] - if top_operator.type == TokenType.LEFT_PAREN: - self.operator_stack.append(token) - return - # priority is in descending order - if self._priority(token) >= self._priority(top_operator): - self.pop() - self.push_operator_stack(token) - return - self.operator_stack.append(token) - - def unary_expression_parser(self): - token = self.tkr.token() - self.tkr.next() - return UnaryExpression(token.value, ExpressionParser(self.tkr).parse(True)) - - def identifier_or_fun_call_parser(self): - id = self.identifier() - tokenType = self.tkr.tokenType() - if tokenType == TokenType.LEFT_PAREN: - return self.fun_call_parser(id) - return id - - def fun_call_parser(self, id: Identifier): - self.tkr.eat(TokenType.LEFT_PAREN) - args = list[Expression]() - while self.tkr.tokenType() != TokenType.RIGHT_PAREN: - args.append(ExpressionParser(self.tkr).parse()) - if self.tkr.tokenType() == TokenType.COMMA: - self.tkr.eat(TokenType.COMMA) - self.tkr.eat(TokenType.RIGHT_PAREN) - return CallExpression(id, args) - - def identifier(self): - return identifier(self.tkr) - - def is_unary(self): - token = self.tkr.token() - if not self.unary_operator(token): - return False - if token.type == TokenType.NOT: - return True - prev_token = self.tkr.get_prev() - if prev_token is None: - return True - if prev_token.type == TokenType.LEFT_PAREN: - return True - if prev_token.type in unary_prev_statement: - return True - return False - - def unary_operator(self, token: Token): - if token is None: - return False - return token.value in ["+", "-", "!"] - - def _has_brackets(self): - return TokenType.LEFT_PAREN in map(lambda x: x.type, self.operator_stack) - - def is_end(self): - token = self.tkr.token() - if token is None: - return True - if token.type == TokenType.SEMICOLON: - return True - if not self._has_brackets() and token.type == TokenType.RIGHT_PAREN: - return True - if token.type in end_statement: - return True - return False - - def _is_operator(self, token: Token): - if token is None: - return False - return token.type in [TokenType.ADDITIVE_OPERATOR, TokenType.MULTIPLICATIVE_OPERATOR, TokenType.LOGICAL_OPERATOR, TokenType.NOT] - - def _debug_print_tokens(self): - print("operator stack:----") - for token in self.operator_stack: - print(token) - - def _debug_print_stack(self): - print("stack:----") - for expression in self.stack: - print(expression) - - def _priority(self, token: Token): - return _priority(token.value) - -def expression_list_to_binary(expression_list: list[Expression | Token], stack: list = None): - if stack is None: - stack = list() - if len(expression_list) == 0: - return stack[0] - top = expression_list[0] - if isinstance(top, Token): - right = stack.pop() - left = stack.pop() - return expression_list_to_binary(expression_list[1:], stack + [BinaryExpression(left, top.value, right)]) - else: - stack.append(top) - return expression_list_to_binary(expression_list[1:], stack) - -def _priority(operator: str): - priority = 0 - if operator in ["*", "/", "%"]: - return priority - priority += 1 - if operator in ["+", "-"]: - return priority - priority += 1 - if operator in ["<", ">", "<=", ">="]: - return priority - priority += 1 - if operator in ["==", "!="]: - return priority - priority += 1 - if operator in ["&&"]: - return priority - priority += 1 - if operator in ["||"]: - return priority - priority += 1 - return priority - -def _try_assignment_expression(tkr: Tokenizer): - tkr = copy.deepcopy(tkr) - token = tkr.token() - if token is None: - return False - if token.type != TokenType.IDENTIFIER: - return False - tkr.next() - token = tkr.token() - if token is None: - return False - if token.type != TokenType.ASSIGNMENT: - return False - return True - -def _try_fun_expression(_tkr: Tokenizer): - tkr = copy.deepcopy(_tkr) - token = tkr.token() - if token is None: - return False - if token.type != TokenType.LEFT_PAREN: - return False - tkr.next() - token_type = tkr.tokenType() - while token_type != TokenType.RIGHT_PAREN: - if token_type == TokenType.IDENTIFIER: - tkr.next() - token_type = tkr.tokenType() - if token_type == TokenType.RIGHT_PAREN: - break - if token_type != TokenType.COMMA: - return False - tkr.next() - token_type = tkr.tokenType() - if token_type == TokenType.RIGHT_PAREN: - return False - else: - return False - token_type = tkr.next_token_type() - if token_type != TokenType.ARROW: - return False - return True \ No newline at end of file diff --git a/src/dotchain/runtime/runtime.py b/src/dotchain/runtime/runtime.py deleted file mode 100644 index 65fd683..0000000 --- a/src/dotchain/runtime/runtime.py +++ /dev/null @@ -1,44 +0,0 @@ -from ast import Expression - -from attr import dataclass - -class Runtime(): - - def __init__(self, context=None, parent=None, exteral_fun=None, name=None) -> None: - self.name = name - self.parent = parent - self.context = context if context is not None else dict() - self.exteral_fun = exteral_fun if exteral_fun is not None else dict() - - def has_value(self, identifier: str) -> bool: - return identifier in self.context - - def get_value(self, identifier: str): - return self.context.get(identifier) - - def deep_get_value(self, id: str): - if self.has_value(id): - return self.get_value(id) - if self.parent is not None: - return self.parent.deep_get_value(id) - return None - - def set_value(self, identifier: str, value): - self.context[identifier] = value - - def declare(self, identifier: str, value): - if self.has_value(identifier): - raise Exception(f"Variable {identifier} is already declared") - self.set_value(identifier, value) - - def assign(self, identifier: str, value): - if self.has_value(identifier): - self.set_value(identifier, value) - elif self.parent is not None: - self.parent.assign(identifier, value) - else: - raise Exception(f"Variable {identifier} is not declared") - - def show_values(self): - print(self.context) - diff --git a/src/dotchain/runtime/tests/__init__.py b/src/dotchain/runtime/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/dotchain/runtime/tests/test_expression_parser.py b/src/dotchain/runtime/tests/test_expression_parser.py deleted file mode 100644 index 5f606ec..0000000 --- a/src/dotchain/runtime/tests/test_expression_parser.py +++ /dev/null @@ -1,153 +0,0 @@ - -import unittest -from runtime.ast import BoolLiteral, CallExpression, FloatLiteral, Identifier, IntLiteral, UnaryExpression -from runtime.interpreter import ExpressionParser, _priority, _try_fun_expression -from runtime.tokenizer import TokenType, Tokenizer,Token - - - -class TestExpressionParser(unittest.TestCase): - - def test__try_fun_expression(self): - t = Tokenizer() - t.init("()") - self.assertFalse(_try_fun_expression(t)) - - t.init("() =>") - self.assertTrue(_try_fun_expression(t)) - - t.init("(a) =>") - self.assertTrue(_try_fun_expression(t)) - - t.init("(a,) =>") - self.assertFalse(_try_fun_expression(t)) - - t.init("(a,b,c,d) =>;") - self.assertTrue(_try_fun_expression(t)) - - t.init("(a,b,c,true) =>;") - self.assertFalse(_try_fun_expression(t)) - - t.init("(a,b,c,1.23) =>;") - self.assertFalse(_try_fun_expression(t)) - - def test_is_unary(self): - t = Tokenizer() - t.init("!") - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init("+") - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init("--123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init("+-123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init(")-123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertFalse(pred) - - t.init("=> - 123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init(", - 123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init("* - 123") - t.next() - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertTrue(pred) - - t.init("* - 123") - parser = ExpressionParser(t) - pred = parser.is_unary() - self.assertFalse(pred) - - def test_expression_parser(self): - t = Tokenizer() - t.init("a") - parser = ExpressionParser(t) - expression = parser.expression_parser() - self.assertIsInstance(expression, Identifier) - - t.init("true") - parser = ExpressionParser(t) - expression = parser.expression_parser() - self.assertIsInstance(expression, BoolLiteral) - self.assertEqual(expression.value, True) - - t.init("false") - parser = ExpressionParser(t) - expression = parser.expression_parser() - self.assertIsInstance(expression, BoolLiteral) - self.assertEqual(expression.value, False) - - t.init("12341") - parser = ExpressionParser(t) - expression = parser.expression_parser() - self.assertEqual(expression.value, 12341) - self.assertIsInstance(expression, IntLiteral) - - t.init("12341.42") - parser = ExpressionParser(t) - expression = parser.expression_parser() - self.assertEqual(expression.value, 12341.42) - self.assertIsInstance(expression, FloatLiteral) - - t.init("hello") - parser = ExpressionParser(t) - expression: Identifier = parser.expression_parser() - self.assertIsInstance(expression, Identifier) - self.assertEqual(expression.name, "hello") - - t.init("print()") - parser = ExpressionParser(t) - expression: CallExpression = parser.expression_parser() - self.assertIsInstance(expression, CallExpression) - self.assertEqual(expression.callee.name, "print") - - t.init("print(1,2,3,hello)") - parser = ExpressionParser(t) - expression: CallExpression = parser.expression_parser() - self.assertIsInstance(expression, CallExpression) - self.assertEqual(expression.callee.name, "print") - self.assertEqual(len(expression.arguments), 4) - - def test_binary_expression(self): - t = Tokenizer() - - def test__priority(self): - self.assertEqual(_priority("*"), 0) - self.assertEqual(_priority("/"), 0) - self.assertEqual(_priority("%"), 0) - self.assertEqual(_priority("+"), 1) - self.assertEqual(_priority("-"), 1) - self.assertEqual(_priority(">"), 2) - self.assertEqual(_priority("<"), 2) - self.assertEqual(_priority(">="), 2) - self.assertEqual(_priority("<="), 2) - self.assertEqual(_priority("=="), 3) - self.assertEqual(_priority("!="), 3) - self.assertEqual(_priority("&&"), 4) - self.assertEqual(_priority("||"), 5) \ No newline at end of file diff --git a/src/dotchain/runtime/tests/test_runtime.py b/src/dotchain/runtime/tests/test_runtime.py deleted file mode 100644 index 698db5d..0000000 --- a/src/dotchain/runtime/tests/test_runtime.py +++ /dev/null @@ -1,7 +0,0 @@ - -import unittest - -class TestRuntime(unittest.TestCase): - - def test_eval(self): - self.assertTrue(True) \ No newline at end of file diff --git a/src/dotchain/runtime/tests/test_tokenizer.py b/src/dotchain/runtime/tests/test_tokenizer.py deleted file mode 100644 index 63188bf..0000000 --- a/src/dotchain/runtime/tests/test_tokenizer.py +++ /dev/null @@ -1,151 +0,0 @@ - -import unittest -from runtime.tokenizer import TokenType, Tokenizer,Token - -class TestTokenizer(unittest.TestCase): - - def test_init(self): - t = Tokenizer() - self.assertEqual(t.script, "") - self.assertEqual(t.cursor, 0) - self.assertEqual(t.col, 0) - self.assertEqual(t.row, 0) - - def test_tokenizer(self): - t = Tokenizer() - t.init("a") - self.assertEqual(t.token().value, "a") - self.assertEqual(t.token().type, TokenType.IDENTIFIER) - - t.init("12341") - self.assertEqual(t.token().value, "12341") - self.assertEqual(t.token().type, TokenType.INT) - - t.init("12341.1234124") - self.assertEqual(t.token().value, "12341.1234124") - self.assertEqual(t.token().type, TokenType.FLOAT) - - t.init("false") - self.assertEqual(t.token().value, "false") - self.assertEqual(t.token().type, TokenType.BOOL) - - t.init("\"false\"") - self.assertEqual(t.token().value, "\"false\"") - self.assertEqual(t.token().type, TokenType.STRING) - - t.init("helloworld") - self.assertEqual(t.token().value, "helloworld") - self.assertEqual(t.token().type, TokenType.IDENTIFIER) - - t.init("!") - self.assertEqual(t.token().value, "!") - self.assertEqual(t.token().type, TokenType.NOT) - - t.init("==") - self.assertEqual(t.token().value, "==") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("!=") - self.assertEqual(t.token().value, "!=") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("<=") - self.assertEqual(t.token().value, "<=") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init(">=") - self.assertEqual(t.token().value, ">=") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("<") - self.assertEqual(t.token().value, "<") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init(">") - self.assertEqual(t.token().value, ">") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("&&") - self.assertEqual(t.token().value, "&&") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("||") - self.assertEqual(t.token().value, "||") - self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR) - - t.init("=") - self.assertEqual(t.token().value, "=") - self.assertEqual(t.token().type, TokenType.ASSIGNMENT) - - t.init("+") - self.assertEqual(t.token().value, "+") - self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR) - - t.init("-") - self.assertEqual(t.token().value, "-") - self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR) - - t.init("*") - self.assertEqual(t.token().value, "*") - self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) - - t.init("/") - self.assertEqual(t.token().value, "/") - self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) - - t.init("%") - self.assertEqual(t.token().value, "%") - self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR) - - t.init("(") - self.assertEqual(t.token().value, "(") - self.assertEqual(t.token().type, TokenType.LEFT_PAREN) - - t.init(")") - self.assertEqual(t.token().value, ")") - self.assertEqual(t.token().type, TokenType.RIGHT_PAREN) - - t.init("{") - self.assertEqual(t.token().value, "{") - self.assertEqual(t.token().type, TokenType.LEFT_BRACE) - - t.init("}") - self.assertEqual(t.token().value, "}") - self.assertEqual(t.token().type, TokenType.RIGHT_BRACE) - - def test_init(self): - t = Tokenizer() - script = "a + 9 * ( 3 - 1 ) * 3 + 10 / 2;" - t.init(script) - self.assertEqual(t.script, script) - self.assertEqual(len(t.tokens), 16) - self.assertEqual(t.get_prev(), None) - self.assertEqual(t.token().value, "a") - self.assertEqual(t.get_next().value, "+") - self.assertEqual(t.next().value, "+") - self.assertEqual(t.next().value, "9") - self.assertEqual(t.next().value, "*") - t.prev() - self.assertEqual(t.token().value, "9") - t.prev() - self.assertEqual(t.token().value, "+") - - script = "a + 9" - t.init(script) - self.assertEqual(t.token().type, TokenType.IDENTIFIER) - self.assertEqual(t.next().type, TokenType.ADDITIVE_OPERATOR) - self.assertEqual(t.next().type, TokenType.INT) - self.assertEqual(t.next(), None) - self.assertEqual(t._current_token_index, 3) - self.assertEqual(t.next(), None) - self.assertEqual(t.next(), None) - self.assertEqual(t._current_token_index, 3) - self.assertEqual(t.next(), None) - t.prev() - self.assertEqual(t.token().value, "9") - t.prev() - self.assertEqual(t.token().value, "+") - t.prev() - self.assertEqual(t.token().value, "a") - t.prev() - self.assertEqual(t.token().value, "a") \ No newline at end of file diff --git a/src/dotchain/runtime/tokenizer.py b/src/dotchain/runtime/tokenizer.py deleted file mode 100644 index 45235af..0000000 --- a/src/dotchain/runtime/tokenizer.py +++ /dev/null @@ -1,259 +0,0 @@ -import re -from enum import Enum - -from attr import dataclass - -class TokenType(Enum): - NEW_LINE = 1 - SPACE = 2 - COMMENTS = 3 - LEFT_PAREN = 4 - RIGHT_PAREN = 5 - COMMA = 6 - LEFT_BRACE = 7 - RIGHT_BRACE = 8 - SEMICOLON = 9 - LET = 10 - RETURN = 11 - IF = 12 - ELSE = 13 - WHILE = 14 - FOR = 15 - FLOAT = 18 - INT = 19 - IDENTIFIER = 20 - LOGICAL_OPERATOR = 21 - NOT = 22 - ASSIGNMENT = 23 - MULTIPLICATIVE_OPERATOR = 24 - ADDITIVE_OPERATOR = 25 - STRING = 26 - ARROW = 27 - BOOL = 28 - BREAK = 29 - TYPE_DEFINITION = 30 - COLON = 31 - -specs = ( - (re.compile(r"^\n"),TokenType.NEW_LINE), - # Space: - (re.compile(r"^\s"),TokenType.SPACE), - # Comments: - (re.compile(r"^//.*"), TokenType.COMMENTS), - - # Symbols: - (re.compile(r"^\("), TokenType.LEFT_PAREN), - (re.compile(r"^\)"), TokenType.RIGHT_PAREN), - (re.compile(r"^\,"), TokenType.COMMA), - (re.compile(r"^\{"), TokenType.LEFT_BRACE), - (re.compile(r"^\}"), TokenType.RIGHT_BRACE), - (re.compile(r"^;"), TokenType.SEMICOLON), - (re.compile(r"^:"), TokenType.COLON), - (re.compile(r"^=>"), TokenType.ARROW), - - # Keywords: - (re.compile(r"^\blet\b"), TokenType.LET), - (re.compile(r"^\breturn\b"), TokenType.RETURN), - (re.compile(r"^\bif\b"), TokenType.IF), - (re.compile(r"^\belse\b"), TokenType.ELSE), - (re.compile(r"^\bwhile\b"), TokenType.WHILE), - (re.compile(r"^\bfor\b"), TokenType.FOR), - (re.compile(r"^\bbreak\b"), TokenType.BREAK), - - (re.compile(r"^\btrue\b"), TokenType.BOOL), - (re.compile(r"^\bfalse\b"), TokenType.BOOL), - - # Type definition: - (re.compile(r"^\bstring\b"), TokenType.TYPE_DEFINITION), - (re.compile(r"^\bint\b"), TokenType.TYPE_DEFINITION), - (re.compile(r"^\bfloat\b"), TokenType.TYPE_DEFINITION), - (re.compile(r"^\bbool\b"), TokenType.TYPE_DEFINITION), - (re.compile(r"^\bany\b"), TokenType.TYPE_DEFINITION), - - # Floats: - (re.compile(r"^[0-9]+\.[0-9]+"), TokenType.FLOAT), - - # Ints: - (re.compile(r"^[0-9]+"), TokenType.INT), - - # Identifiers: - (re.compile(r"^\w+"), TokenType.IDENTIFIER), - - - # Logical operators: - (re.compile(r"^&&"), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^\|\|"), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^=="), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^!="), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^<="), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^>="), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^<"), TokenType.LOGICAL_OPERATOR), - (re.compile(r"^>"), TokenType.LOGICAL_OPERATOR), - - (re.compile(r"^!"), TokenType.NOT), - - # Assignment: - (re.compile(r"^="), TokenType.ASSIGNMENT), - - # Math operators: +, -, *, /: - (re.compile(r"^[*/%]"), TokenType.MULTIPLICATIVE_OPERATOR), - (re.compile(r"^[+-]"), TokenType.ADDITIVE_OPERATOR), - - # Double-quoted strings - # TODO: escape character \" and - (re.compile(r"^\"[^\"]*\""), TokenType.STRING), -) - -@dataclass -class Token: - type: TokenType - value: str - row: int - col: int - col_end: int - cursor: int - - def __str__(self) -> str: - return f"Token({self.type}, {self.value}, row={self.row}, col={self.col}, col_end={self.col_end}, cursor={self.cursor})" - - -class Tokenizer: - - def __init__(self): - self._current_token = None - self.script = "" - self.cursor = 0 - self.col = 0 - self.row = 0 - self._current_token_index = 0 - self.tokens = list[Token]() - self.checkpoint = list[int]() - - def init(self, script: str): - self.checkpoint = list[int]() - self.tokens = list[Token]() - self._current_token_index = 0 - self._current_token = None - self.script = script - self.cursor = 0 - self.col = 0 - self.row = 0 - self._get_next_token() - while self._current_token is not None: - self.tokens.append(self._current_token) - self._get_next_token() - - def checkpoint_push(self): - self.checkpoint.append(self._current_token_index) - - def checkpoint_pop(self): - self._current_token_index = self.checkpoint.pop() - - def next(self): - if self._current_token_index < len(self.tokens): - self._current_token_index += 1 - return self.token() - - def next_token_type(self): - if self._current_token_index < len(self.tokens): - self._current_token_index += 1 - return self.tokenType() - - def prev(self): - if self._current_token_index > 0: - self._current_token_index -= 1 - return self.token() - - def get_prev(self): - if self._current_token_index == 0: - return None - return self.tokens[self._current_token_index - 1] - - def get_next(self): - if self._current_token_index >= len(self.tokens): - return None - return self.tokens[self._current_token_index + 1] - - def token(self): - if self._current_token_index >= len(self.tokens): - return None - return self.tokens[self._current_token_index] - - def tokenType(self): - if self._current_token_index >= len(self.tokens): - return None - return self.tokens[self._current_token_index].type - - - def _get_next_token(self): - if self._is_eof(): - self._current_token = None - return None - _string = self.script[self.cursor:] - for spec in specs: - tokenValue, offset = self.match(spec[0], _string) - if tokenValue == None: - continue - if spec[1] == TokenType.NEW_LINE: - self.row += 1 - self.col = 0 - return self._get_next_token() - if spec[1] == TokenType.COMMENTS: - return self._get_next_token() - if spec[1] == TokenType.SPACE: - self.col += offset - return self._get_next_token() - if spec[1] == None: - return self._get_next_token() - self._current_token = Token(spec[1],tokenValue, self.cursor, self.row, self.col, self.col + offset) - self.col += offset - return self.get_current_token() - raise Exception("Unknown token: " + _string[0]) - - def _is_eof(self): - return self.cursor == len(self.script) - - def has_more_tokens(self): - return self.cursor < len(self.script) - - def get_current_token(self): - return self._current_token - - def match(self, reg: re, _script): - matched = reg.search(_script) - if matched == None: - return None,0 - self.cursor = self.cursor + matched.span(0)[1] - return matched[0], matched.span(0)[1] - - def eat(self, value: str | TokenType): - if isinstance(value, str): - return self.eat_value(value) - if isinstance(value, TokenType): - return self.eat_token_type(value) - - def eat_value(self, value: str): - token = self.token() - if token is None: - raise Exception(f"Expected {value} but got None") - if token.value != value: - raise Exception(f"Expected {value} but got {token.value}") - self.next() - return token - - def eat_token_type(self,tokenType: TokenType): - token = self.token() - if token is None: - raise Exception(f"Expected {tokenType} but got None") - if token.type != tokenType: - raise Exception(f"Expected {tokenType} but got {token.type}") - self.next() - return token - - def type_is(self, tokenType: TokenType): - if self.token() is None: - return False - return self.token().type == tokenType - - def the_rest(self): - return self.tokens[self._current_token_index:] \ No newline at end of file