mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
feat: dotchain
This commit is contained in:
@ -1,13 +0,0 @@
|
||||
# !/bin/bash
|
||||
pip install filetype
|
||||
pip install fastapi
|
||||
pip install python-multipart
|
||||
pip install "uvicorn[standard]"
|
||||
pip install SpeechRecognition
|
||||
pip install gTTS
|
||||
pip install PyYAML
|
||||
pip install injector
|
||||
pip install landchain
|
||||
pip install chromadb
|
||||
pip install lagent
|
||||
pip install sentence_transformers
|
||||
@ -102,6 +102,16 @@ def cosyvoicetts_loader():
|
||||
from .cosyvoicetts import CosyVoiceTTS
|
||||
return Injector().get(CosyVoiceTTS)
|
||||
|
||||
@model_loader(lazy=blackboxConf.lazyloading)
|
||||
def workflow_loader():
|
||||
from .workflow import Workflow
|
||||
return Injector().get(Workflow)
|
||||
|
||||
@model_loader(lazy=blackboxConf.lazyloading)
|
||||
def sum_loader():
|
||||
from .sum import Sum
|
||||
return Injector().get(Sum)
|
||||
|
||||
@singleton
|
||||
class BlackboxFactory:
|
||||
models = {}
|
||||
@ -124,6 +134,8 @@ class BlackboxFactory:
|
||||
self.models["chat"] = chat_loader
|
||||
self.models["chat_llama"] = chat_llama_loader
|
||||
self.models["cosyvoicetts"] = cosyvoicetts_loader
|
||||
self.models["workflow"] = workflow_loader
|
||||
self.models["sum"] = sum_loader
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.processing(*args, **kwargs)
|
||||
|
||||
24
src/blackbox/sum.py
Normal file
24
src/blackbox/sum.py
Normal file
@ -0,0 +1,24 @@
|
||||
from .blackbox import Blackbox
|
||||
from injector import singleton
|
||||
|
||||
@singleton
|
||||
class Sum(Blackbox):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.processing(*args, **kwargs)
|
||||
|
||||
def processing(self, *args, **kwargs):
|
||||
total = 0
|
||||
for arg in args[0]:
|
||||
total += arg
|
||||
return total
|
||||
|
||||
def valid(self, *args, **kwargs) -> bool:
|
||||
return super().valid(*args, **kwargs)
|
||||
|
||||
async def fast_api_handler(self, request):
|
||||
json = await request.json()
|
||||
return self.processing(json)
|
||||
61
src/blackbox/workflow.py
Normal file
61
src/blackbox/workflow.py
Normal file
@ -0,0 +1,61 @@
|
||||
|
||||
from .sum import Sum
|
||||
from fastapi import Request
|
||||
from .blackbox import Blackbox
|
||||
from injector import singleton, inject
|
||||
from ..dotchain.runtime.interpreter import program_parser
|
||||
from ..dotchain.runtime.runtime import Runtime
|
||||
from ..dotchain.runtime.tokenizer import Tokenizer
|
||||
from ..dotchain.runtime.ast import Literal
|
||||
|
||||
@singleton
|
||||
class Workflow(Blackbox):
|
||||
|
||||
@inject
|
||||
def __init__(self, sum: Sum):
|
||||
self.sum_blackbox = sum
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.processing(*args, **kwargs)
|
||||
|
||||
def sum(self, *args, **kwargs):
|
||||
return Literal(self.sum_blackbox.processing(*args, **kwargs))
|
||||
|
||||
async def processing(self, *args, **kwargs):
|
||||
request = args[0]
|
||||
json = await request.json()
|
||||
result = None
|
||||
|
||||
def set_result(r):
|
||||
nonlocal result
|
||||
result = r
|
||||
|
||||
def get_value(d: dict, key):
|
||||
value = d.get(key)
|
||||
if value is dict:
|
||||
return value
|
||||
if value is list:
|
||||
return value
|
||||
return Literal(value)
|
||||
|
||||
script = json["script"]
|
||||
t = Tokenizer()
|
||||
t.init(script)
|
||||
runtime = Runtime(
|
||||
context={"json": json},
|
||||
exteral_fun={
|
||||
"get_value": get_value,
|
||||
"print": print,
|
||||
"set_result": set_result,
|
||||
"sum": self.sum,
|
||||
}
|
||||
)
|
||||
ast = program_parser(t)
|
||||
ast.exec(runtime)
|
||||
return result
|
||||
|
||||
def valid(self, *args, **kwargs) -> bool:
|
||||
return super().valid(*args, **kwargs)
|
||||
|
||||
async def fast_api_handler(self, request: Request):
|
||||
return await self.processing(request)
|
||||
24
src/dotchain/README.md
Normal file
24
src/dotchain/README.md
Normal file
@ -0,0 +1,24 @@
|
||||
# Dotchain
|
||||
|
||||
# 語法
|
||||
```
|
||||
// 註解
|
||||
|
||||
// 變量宣告
|
||||
let hello = 123
|
||||
|
||||
// 函數宣告
|
||||
let add = (left, right) => {
|
||||
// 返回值
|
||||
return left + right
|
||||
}
|
||||
|
||||
```
|
||||
## Keywords
|
||||
```
|
||||
let while if else true false
|
||||
```
|
||||
|
||||
```bash
|
||||
python -m unittest
|
||||
```
|
||||
16
src/dotchain/main.dc
Normal file
16
src/dotchain/main.dc
Normal file
@ -0,0 +1,16 @@
|
||||
// 註解
|
||||
|
||||
// 變量宣告
|
||||
let hello = 123;
|
||||
|
||||
// 函數宣告
|
||||
let add = (left, right) => {
|
||||
// 返回值
|
||||
return left + right;
|
||||
}
|
||||
|
||||
// TODO 函數呼叫
|
||||
add(1,2);
|
||||
add(3, add(1,2));
|
||||
// 以 . 呼叫函數,將以 . 前的值作為第一個參數
|
||||
// hello.add(2) == add(hello, 2);
|
||||
16
src/dotchain/main.py
Normal file
16
src/dotchain/main.py
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
from runtime.interpreter import program_parser
|
||||
from runtime.runtime import Runtime
|
||||
from runtime.tokenizer import Tokenizer
|
||||
|
||||
script = """
|
||||
print(hello);
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
t = Tokenizer()
|
||||
t.init(script)
|
||||
runtime = Runtime(context={"hello": [1,2,3,4], "good": "123"} ,exteral_fun={"print": print})
|
||||
ast = program_parser(t)
|
||||
result = ast.exec(runtime)
|
||||
0
src/dotchain/runtime/__init__.py
Normal file
0
src/dotchain/runtime/__init__.py
Normal file
388
src/dotchain/runtime/ast.py
Normal file
388
src/dotchain/runtime/ast.py
Normal file
@ -0,0 +1,388 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .runtime import Runtime
|
||||
|
||||
@dataclass
|
||||
class ReturnValue():
|
||||
value: any
|
||||
|
||||
class Node(ABC):
|
||||
def type(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
@dataclass
|
||||
class Statement(Node, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def exec(self, runtime: Runtime):
|
||||
print(self)
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def dict(self):
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class Expression(Node):
|
||||
|
||||
@abstractmethod
|
||||
def eval(self, runtime: Runtime):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def dict(self):
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class Literal(Expression):
|
||||
value: str | int | float | bool
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def eval(self, runtime: Runtime):
|
||||
return self.value
|
||||
|
||||
def dict(self) -> dict:
|
||||
return {
|
||||
"type": "Literal",
|
||||
"value": self.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class StringLiteral(Literal):
|
||||
value: str
|
||||
|
||||
def dict(self) -> dict:
|
||||
return {
|
||||
"type": "StringLiteral",
|
||||
"value": self.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class IntLiteral(Literal):
|
||||
value: int
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "IntLiteral",
|
||||
"value": self.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class FloatLiteral(Literal):
|
||||
value: float
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "FloatLiteral",
|
||||
"value": self.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class BoolLiteral(Literal):
|
||||
value: bool
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "FloatLiteral",
|
||||
"value": self.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class UnaryExpression(Expression):
|
||||
operator: str
|
||||
expression: Expression
|
||||
def eval(self, runtime: Runtime):
|
||||
if self.operator == "-":
|
||||
return -self.expression.eval(runtime)
|
||||
if self.operator == "!":
|
||||
return not self.expression.eval(runtime)
|
||||
return self.expression.eval(runtime)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "UnaryExpression",
|
||||
"operator": self.operator,
|
||||
"argument": self.expression.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Program(Statement):
|
||||
body: list[Statement]
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
index = 0
|
||||
while index < len(self.body):
|
||||
statement = self.body[index]
|
||||
result = statement.exec(runtime)
|
||||
if isinstance(result, ReturnValue):
|
||||
return result
|
||||
index += 1
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": self.type(),
|
||||
"body": [statement.dict() for statement in self.body]
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Identifier(Expression):
|
||||
name: str
|
||||
def eval(self,runtime: Runtime):
|
||||
return runtime.deep_get_value(self.name)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": self.type(),
|
||||
"name": self.name
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Block(Statement):
|
||||
body: list[Statement]
|
||||
def exec(self, runtime: Runtime):
|
||||
index = 0
|
||||
while index < len(self.body):
|
||||
statement = self.body[index]
|
||||
result = statement.exec(runtime)
|
||||
if isinstance(result, ReturnValue):
|
||||
return result
|
||||
if isinstance(result, BreakStatement):
|
||||
return result
|
||||
index += 1
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "Block",
|
||||
"body": [statement.dict() for statement in self.body]
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class WhileStatement(Statement):
|
||||
test: Expression
|
||||
body: Block
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
while self.test.eval(runtime):
|
||||
while_runtime = Runtime(parent=runtime,name="while")
|
||||
result = self.body.exec(while_runtime)
|
||||
if isinstance(result, ReturnValue):
|
||||
return result
|
||||
if isinstance(result, BreakStatement):
|
||||
return result
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "WhileStatement",
|
||||
"test": self.test.dict(),
|
||||
"body": self.body.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class BreakStatement(Statement):
|
||||
|
||||
def exec(self, _: Runtime):
|
||||
return self
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "BreakStatement"
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class ReturnStatement(Statement):
|
||||
value: Expression
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
return ReturnValue(self.value.eval(runtime))
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "ReturnStatement",
|
||||
"value": self.value.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class IfStatement(Statement):
|
||||
test: Expression
|
||||
consequent: Block
|
||||
alternate: Block
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
if_runtime = Runtime(parent=runtime)
|
||||
if self.test.eval(runtime):
|
||||
return self.consequent.exec(if_runtime)
|
||||
else:
|
||||
return self.alternate.exec(if_runtime)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "IfStatement",
|
||||
"test": self.test.dict(),
|
||||
"consequent": self.consequent.dict(),
|
||||
"alternate": self.alternate.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class VariableDeclaration(Statement):
|
||||
id: Identifier
|
||||
value: Expression
|
||||
value_type: str = "any"
|
||||
def exec(self, runtime: Runtime):
|
||||
runtime.declare(self.id.name, self.value.eval(runtime))
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "VariableDeclaration",
|
||||
"id": self.id.dict(),
|
||||
"value": self.value.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Assignment(Statement):
|
||||
id: Identifier
|
||||
value: Expression
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
runtime.assign(self.id.name, self.value.eval(runtime))
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "Assignment",
|
||||
"id": self.id.dict(),
|
||||
"value": self.value.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Argument(Expression):
|
||||
id: Identifier
|
||||
value: Expression
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "Argument",
|
||||
"id": self.id.dict(),
|
||||
"value": self.value.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class BinaryExpression(Expression):
|
||||
left: Expression
|
||||
operator: str
|
||||
right: Expression
|
||||
|
||||
def eval(self, runtime: Runtime):
|
||||
left = self.left.eval(runtime)
|
||||
right = self.right.eval(runtime)
|
||||
if self.operator == "+":
|
||||
return left + right
|
||||
if self.operator == "-":
|
||||
return left - right
|
||||
if self.operator == "*":
|
||||
return left * right
|
||||
if self.operator == "/":
|
||||
return left / right
|
||||
if self.operator == "%":
|
||||
return left % right
|
||||
if self.operator == "<":
|
||||
return left < right
|
||||
if self.operator == ">":
|
||||
return left > right
|
||||
if self.operator == "<=":
|
||||
return left <= right
|
||||
if self.operator == ">=":
|
||||
return left >= right
|
||||
if self.operator == "==":
|
||||
return left == right
|
||||
if self.operator == "!=":
|
||||
return left != right
|
||||
if self.operator == "&&":
|
||||
return left and right
|
||||
if self.operator == "||":
|
||||
return left or right
|
||||
return None
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "BinaryExpression",
|
||||
"left": self.left.dict(),
|
||||
"operator": self.operator,
|
||||
"right": self.right.dict()
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class CallExpression(Expression):
|
||||
callee: Identifier
|
||||
arguments: list[Expression]
|
||||
def exec(self, runtime: Runtime, args: list=None):
|
||||
if args == None:
|
||||
args = []
|
||||
for index, argument in enumerate(self.arguments):
|
||||
args.append(argument.eval(runtime))
|
||||
if runtime.has_value(self.callee.name):
|
||||
fun:FunEnv = runtime.get_value(self.callee.name)
|
||||
return fun.exec(args)
|
||||
if runtime.parent is not None:
|
||||
return self.exec(runtime.parent,args)
|
||||
if self.callee.name in runtime.exteral_fun:
|
||||
return runtime.exteral_fun[self.callee.name](*args)
|
||||
|
||||
|
||||
def eval(self, runtime):
|
||||
result = self.exec(runtime)
|
||||
if result is not None:
|
||||
return result.value
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "CallExpression",
|
||||
"callee": self.callee.dict(),
|
||||
"arguments": [argument.dict() for argument in self.arguments]
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Fun(Statement):
|
||||
params: list[Identifier]
|
||||
body: Block
|
||||
|
||||
def exec(self, runtime: Runtime):
|
||||
return self.body.exec(runtime)
|
||||
|
||||
def eval(self, runtime: Runtime):
|
||||
return FunEnv(runtime, self)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "Fun",
|
||||
"params": [param.dict() for param in self.params],
|
||||
"body": self.body.dict()
|
||||
}
|
||||
|
||||
class EmptyStatement(Statement):
|
||||
|
||||
def exec(self, _: Runtime):
|
||||
return None
|
||||
|
||||
def eval(self, _: Runtime):
|
||||
return None
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"type": "EmptyStatement"
|
||||
}
|
||||
|
||||
|
||||
class FunEnv():
|
||||
|
||||
def __init__(self, parent: Runtime, body: Fun):
|
||||
self.parent = parent
|
||||
self.body = body
|
||||
|
||||
def exec(self, args: list):
|
||||
fun_runtime = Runtime(parent=self.parent)
|
||||
for index, param in enumerate(self.body.params):
|
||||
fun_runtime.declare(param.name, args[index])
|
||||
return self.body.exec(fun_runtime)
|
||||
420
src/dotchain/runtime/interpreter.py
Normal file
420
src/dotchain/runtime/interpreter.py
Normal file
@ -0,0 +1,420 @@
|
||||
from ast import Expression
|
||||
import copy
|
||||
from .ast import Assignment, BinaryExpression, Block, BoolLiteral, BreakStatement, CallExpression, EmptyStatement, FloatLiteral, Fun, Identifier, IfStatement, IntLiteral, Program, ReturnStatement, Statement, StringLiteral, UnaryExpression, VariableDeclaration, WhileStatement
|
||||
from .tokenizer import Token, TokenType, Tokenizer
|
||||
|
||||
unary_prev_statement = [
|
||||
TokenType.COMMENTS,
|
||||
TokenType.LEFT_PAREN,
|
||||
TokenType.COMMA,
|
||||
TokenType.LEFT_BRACE,
|
||||
TokenType.RIGHT_BRACE,
|
||||
TokenType.SEMICOLON,
|
||||
TokenType.LET,
|
||||
TokenType.RETURN,
|
||||
TokenType.IF,
|
||||
TokenType.ELSE,
|
||||
TokenType.WHILE,
|
||||
TokenType.FOR,
|
||||
TokenType.LOGICAL_OPERATOR,
|
||||
TokenType.NOT,
|
||||
TokenType.ASSIGNMENT,
|
||||
TokenType.MULTIPLICATIVE_OPERATOR,
|
||||
TokenType.ADDITIVE_OPERATOR,
|
||||
TokenType.ARROW,
|
||||
]
|
||||
|
||||
unary_end_statement = [
|
||||
TokenType.MULTIPLICATIVE_OPERATOR,
|
||||
TokenType.ADDITIVE_OPERATOR,
|
||||
TokenType.LOGICAL_OPERATOR,
|
||||
]
|
||||
|
||||
end_statement = [
|
||||
TokenType.SEMICOLON,
|
||||
TokenType.COMMA,
|
||||
TokenType.ARROW,
|
||||
TokenType.RETURN,
|
||||
TokenType.LET,
|
||||
TokenType.IF,
|
||||
TokenType.ELSE,
|
||||
TokenType.WHILE,
|
||||
TokenType.FOR,
|
||||
TokenType.ASSIGNMENT,
|
||||
TokenType.RIGHT_BRACE,
|
||||
TokenType.LEFT_BRACE,
|
||||
]
|
||||
|
||||
def program_parser(tkr: Tokenizer):
|
||||
statements = list[Statement]()
|
||||
count = 0
|
||||
while True:
|
||||
if tkr.token() is None:
|
||||
break
|
||||
if tkr.token().type == TokenType.SEMICOLON:
|
||||
tkr.next()
|
||||
continue
|
||||
statement = statement_parser(tkr)
|
||||
statements.append(statement)
|
||||
count += 1
|
||||
return Program(statements)
|
||||
|
||||
def if_parser(tkr: Tokenizer):
|
||||
tkr.eat(TokenType.IF)
|
||||
condition = ExpressionParser(tkr).parse()
|
||||
block = block_statement(tkr)
|
||||
if tkr.type_is(TokenType.ELSE):
|
||||
tkr.eat(TokenType.ELSE)
|
||||
if tkr.type_is(TokenType.IF):
|
||||
print("else if")
|
||||
return IfStatement(condition, block, Block([if_parser(tkr)]))
|
||||
return IfStatement(condition, block, block_statement(tkr))
|
||||
return IfStatement(condition, block, Block([]))
|
||||
|
||||
def while_parser(tkr: Tokenizer):
|
||||
tkr.eat(TokenType.WHILE)
|
||||
condition = ExpressionParser(tkr).parse()
|
||||
block = block_statement(tkr)
|
||||
return WhileStatement(condition, block)
|
||||
|
||||
|
||||
def identifier(tkr: Tokenizer):
|
||||
token = tkr.token()
|
||||
if token.type != TokenType.IDENTIFIER:
|
||||
raise Exception("Invalid identifier", token)
|
||||
tkr.next()
|
||||
return Identifier(token.value)
|
||||
|
||||
def block_statement(tkr: Tokenizer):
|
||||
tkr.eat(TokenType.LEFT_BRACE)
|
||||
statements = list[Statement]()
|
||||
while True:
|
||||
if tkr.token() is None:
|
||||
raise Exception("Invalid block expression", tkr.token())
|
||||
if tkr.tokenType() == TokenType.RIGHT_BRACE:
|
||||
tkr.eat(TokenType.RIGHT_BRACE)
|
||||
break
|
||||
if tkr.tokenType() == TokenType.SEMICOLON:
|
||||
tkr.next()
|
||||
continue
|
||||
statements.append(statement_parser(tkr))
|
||||
return Block(statements)
|
||||
|
||||
|
||||
def return_parser(tkr: Tokenizer):
|
||||
tkr.eat(TokenType.RETURN)
|
||||
return ReturnStatement(ExpressionParser(tkr).parse())
|
||||
|
||||
def statement_parser(tkr: Tokenizer):
|
||||
token = tkr.token()
|
||||
if token is None:
|
||||
return EmptyStatement()
|
||||
if token.type == TokenType.SEMICOLON:
|
||||
tkr.next()
|
||||
return EmptyStatement()
|
||||
if token.type == TokenType.LET:
|
||||
return let_expression_parser(tkr)
|
||||
if _try_assignment_expression(tkr):
|
||||
return assignment_parser(tkr)
|
||||
if token.type == TokenType.IF:
|
||||
return if_parser(tkr)
|
||||
if token.type == TokenType.WHILE:
|
||||
return while_parser(tkr)
|
||||
if token.type == TokenType.RETURN:
|
||||
return return_parser(tkr)
|
||||
if token.type == TokenType.BREAK:
|
||||
tkr.eat(TokenType.BREAK)
|
||||
return BreakStatement()
|
||||
return ExpressionParser(tkr).parse()
|
||||
|
||||
def assignment_parser(tkr: Tokenizer):
|
||||
id = identifier(tkr)
|
||||
tkr.eat(TokenType.ASSIGNMENT)
|
||||
return Assignment(id, ExpressionParser(tkr).parse())
|
||||
|
||||
def let_expression_parser(tkr: Tokenizer):
|
||||
tkr.eat(TokenType.LET)
|
||||
token = tkr.token()
|
||||
if token.type != TokenType.IDENTIFIER:
|
||||
raise Exception("Invalid let statement", token)
|
||||
id = identifier(tkr)
|
||||
token = tkr.token()
|
||||
if token is None:
|
||||
raise Exception("Invalid let statement", token)
|
||||
if token.type != TokenType.ASSIGNMENT:
|
||||
raise Exception("Invalid let statement", token.type)
|
||||
tkr.next()
|
||||
ast = ExpressionParser(tkr).parse()
|
||||
return VariableDeclaration(id, ast)
|
||||
|
||||
class ExpressionParser:
|
||||
|
||||
def __init__(self, tkr: Tokenizer):
|
||||
self.stack = list[Expression | Token]()
|
||||
self.operator_stack = list[Token]()
|
||||
self.tkr = tkr
|
||||
|
||||
def parse(self, unary = False):
|
||||
while not self.is_end():
|
||||
token = self.tkr.token()
|
||||
if unary and not self.is_unary() and token.type in unary_end_statement:
|
||||
break
|
||||
if self.is_unary():
|
||||
self.push_stack(self.unary_expression_parser())
|
||||
elif self._try_fun_expression():
|
||||
return self.fun_expression()
|
||||
# -(hello x 123) // !(true and false)
|
||||
elif unary and token.type == TokenType.LEFT_PAREN:
|
||||
self.tkr.next()
|
||||
self.push_stack(ExpressionParser(self.tkr).parse())
|
||||
elif self._is_operator(token) or token.type in [TokenType.LEFT_PAREN, TokenType.RIGHT_PAREN ]:
|
||||
self.push_operator_stack(token)
|
||||
self.tkr.next()
|
||||
else:
|
||||
self.push_stack(self.expression_parser())
|
||||
self.pop_all()
|
||||
return self.expression()
|
||||
|
||||
def expression(self):
|
||||
if len(self.stack) == 0:
|
||||
return EmptyStatement()
|
||||
if len(self.stack) == 1:
|
||||
return self.stack[0]
|
||||
return expression_list_to_binary(self.stack)
|
||||
|
||||
def expression_parser(self):
|
||||
token = self.tkr.token()
|
||||
if token is None:
|
||||
return EmptyStatement()
|
||||
expression = None
|
||||
if token.type == TokenType.INT:
|
||||
self.tkr.eat(TokenType.INT)
|
||||
expression = IntLiteral(int(token.value))
|
||||
elif token.type == TokenType.FLOAT:
|
||||
self.tkr.eat(TokenType.FLOAT)
|
||||
expression = FloatLiteral(float(token.value))
|
||||
elif token.type == TokenType.STRING:
|
||||
self.tkr.eat(TokenType.STRING)
|
||||
expression = StringLiteral(token.value[1:-1])
|
||||
elif token.type == TokenType.BOOL:
|
||||
self.tkr.eat(TokenType.BOOL)
|
||||
expression = BoolLiteral(token.value == "true")
|
||||
elif token.type == TokenType.IDENTIFIER:
|
||||
expression = self.identifier_or_fun_call_parser()
|
||||
return expression
|
||||
|
||||
def _try_fun_expression(self):
|
||||
return _try_fun_expression(self.tkr)
|
||||
|
||||
def fun_expression(self):
|
||||
tkr = self.tkr
|
||||
tkr.next()
|
||||
args = list[Identifier]()
|
||||
token_type = tkr.tokenType()
|
||||
while token_type != TokenType.RIGHT_PAREN:
|
||||
args.append(Identifier(tkr.token().value))
|
||||
tkr.next()
|
||||
token_type = tkr.tokenType()
|
||||
if token_type == TokenType.RIGHT_PAREN:
|
||||
break
|
||||
tkr.next()
|
||||
token_type = tkr.tokenType()
|
||||
token_type = tkr.next_token_type()
|
||||
if token_type != TokenType.ARROW:
|
||||
raise Exception("Invalid fun_expression", tkr.token())
|
||||
tkr.next()
|
||||
return Fun(args, block_statement(tkr))
|
||||
|
||||
def push_stack(self, expression: Expression | Token):
|
||||
self.stack.append(expression)
|
||||
|
||||
def _pop_by_right_paren(self):
|
||||
token = self.operator_stack.pop()
|
||||
if token.type != TokenType.LEFT_PAREN:
|
||||
self.push_stack(token)
|
||||
self._pop_by_right_paren()
|
||||
|
||||
def pop(self):
|
||||
self.push_stack(self.operator_stack.pop())
|
||||
|
||||
def pop_all(self):
|
||||
while len(self.operator_stack) > 0:
|
||||
self.pop()
|
||||
|
||||
def push_operator_stack(self, token: Token):
|
||||
if len(self.operator_stack) == 0:
|
||||
self.operator_stack.append(token)
|
||||
return
|
||||
if token.type == TokenType.LEFT_PAREN:
|
||||
self.operator_stack.append(token)
|
||||
return
|
||||
if token.type == TokenType.RIGHT_PAREN:
|
||||
self._pop_by_right_paren()
|
||||
return
|
||||
top_operator = self.operator_stack[-1]
|
||||
if top_operator.type == TokenType.LEFT_PAREN:
|
||||
self.operator_stack.append(token)
|
||||
return
|
||||
# priority is in descending order
|
||||
if self._priority(token) >= self._priority(top_operator):
|
||||
self.pop()
|
||||
self.push_operator_stack(token)
|
||||
return
|
||||
self.operator_stack.append(token)
|
||||
|
||||
def unary_expression_parser(self):
|
||||
token = self.tkr.token()
|
||||
self.tkr.next()
|
||||
return UnaryExpression(token.value, ExpressionParser(self.tkr).parse(True))
|
||||
|
||||
def identifier_or_fun_call_parser(self):
|
||||
id = self.identifier()
|
||||
tokenType = self.tkr.tokenType()
|
||||
if tokenType == TokenType.LEFT_PAREN:
|
||||
return self.fun_call_parser(id)
|
||||
return id
|
||||
|
||||
def fun_call_parser(self, id: Identifier):
|
||||
self.tkr.eat(TokenType.LEFT_PAREN)
|
||||
args = list[Expression]()
|
||||
while self.tkr.tokenType() != TokenType.RIGHT_PAREN:
|
||||
args.append(ExpressionParser(self.tkr).parse())
|
||||
if self.tkr.tokenType() == TokenType.COMMA:
|
||||
self.tkr.eat(TokenType.COMMA)
|
||||
self.tkr.eat(TokenType.RIGHT_PAREN)
|
||||
return CallExpression(id, args)
|
||||
|
||||
def identifier(self):
|
||||
return identifier(self.tkr)
|
||||
|
||||
def is_unary(self):
|
||||
token = self.tkr.token()
|
||||
if not self.unary_operator(token):
|
||||
return False
|
||||
if token.type == TokenType.NOT:
|
||||
return True
|
||||
prev_token = self.tkr.get_prev()
|
||||
if prev_token is None:
|
||||
return True
|
||||
if prev_token.type == TokenType.LEFT_PAREN:
|
||||
return True
|
||||
if prev_token.type in unary_prev_statement:
|
||||
return True
|
||||
return False
|
||||
|
||||
def unary_operator(self, token: Token):
|
||||
if token is None:
|
||||
return False
|
||||
return token.value in ["+", "-", "!"]
|
||||
|
||||
def _has_brackets(self):
|
||||
return TokenType.LEFT_PAREN in map(lambda x: x.type, self.operator_stack)
|
||||
|
||||
def is_end(self):
|
||||
token = self.tkr.token()
|
||||
if token is None:
|
||||
return True
|
||||
if token.type == TokenType.SEMICOLON:
|
||||
return True
|
||||
if not self._has_brackets() and token.type == TokenType.RIGHT_PAREN:
|
||||
return True
|
||||
if token.type in end_statement:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_operator(self, token: Token):
|
||||
if token is None:
|
||||
return False
|
||||
return token.type in [TokenType.ADDITIVE_OPERATOR, TokenType.MULTIPLICATIVE_OPERATOR, TokenType.LOGICAL_OPERATOR, TokenType.NOT]
|
||||
|
||||
def _debug_print_tokens(self):
|
||||
print("operator stack:----")
|
||||
for token in self.operator_stack:
|
||||
print(token)
|
||||
|
||||
def _debug_print_stack(self):
|
||||
print("stack:----")
|
||||
for expression in self.stack:
|
||||
print(expression)
|
||||
|
||||
def _priority(self, token: Token):
|
||||
return _priority(token.value)
|
||||
|
||||
def expression_list_to_binary(expression_list: list[Expression | Token], stack: list = None):
|
||||
if stack is None:
|
||||
stack = list()
|
||||
if len(expression_list) == 0:
|
||||
return stack[0]
|
||||
top = expression_list[0]
|
||||
if isinstance(top, Token):
|
||||
right = stack.pop()
|
||||
left = stack.pop()
|
||||
return expression_list_to_binary(expression_list[1:], stack + [BinaryExpression(left, top.value, right)])
|
||||
else:
|
||||
stack.append(top)
|
||||
return expression_list_to_binary(expression_list[1:], stack)
|
||||
|
||||
def _priority(operator: str):
|
||||
priority = 0
|
||||
if operator in ["*", "/", "%"]:
|
||||
return priority
|
||||
priority += 1
|
||||
if operator in ["+", "-"]:
|
||||
return priority
|
||||
priority += 1
|
||||
if operator in ["<", ">", "<=", ">="]:
|
||||
return priority
|
||||
priority += 1
|
||||
if operator in ["==", "!="]:
|
||||
return priority
|
||||
priority += 1
|
||||
if operator in ["&&"]:
|
||||
return priority
|
||||
priority += 1
|
||||
if operator in ["||"]:
|
||||
return priority
|
||||
priority += 1
|
||||
return priority
|
||||
|
||||
def _try_assignment_expression(tkr: Tokenizer):
|
||||
tkr = copy.deepcopy(tkr)
|
||||
token = tkr.token()
|
||||
if token is None:
|
||||
return False
|
||||
if token.type != TokenType.IDENTIFIER:
|
||||
return False
|
||||
tkr.next()
|
||||
token = tkr.token()
|
||||
if token is None:
|
||||
return False
|
||||
if token.type != TokenType.ASSIGNMENT:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _try_fun_expression(_tkr: Tokenizer):
|
||||
tkr = copy.deepcopy(_tkr)
|
||||
token = tkr.token()
|
||||
if token is None:
|
||||
return False
|
||||
if token.type != TokenType.LEFT_PAREN:
|
||||
return False
|
||||
tkr.next()
|
||||
token_type = tkr.tokenType()
|
||||
while token_type != TokenType.RIGHT_PAREN:
|
||||
if token_type == TokenType.IDENTIFIER:
|
||||
tkr.next()
|
||||
token_type = tkr.tokenType()
|
||||
if token_type == TokenType.RIGHT_PAREN:
|
||||
break
|
||||
if token_type != TokenType.COMMA:
|
||||
return False
|
||||
tkr.next()
|
||||
token_type = tkr.tokenType()
|
||||
if token_type == TokenType.RIGHT_PAREN:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
token_type = tkr.next_token_type()
|
||||
if token_type != TokenType.ARROW:
|
||||
return False
|
||||
return True
|
||||
40
src/dotchain/runtime/runtime.py
Normal file
40
src/dotchain/runtime/runtime.py
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
class Runtime():
|
||||
|
||||
def __init__(self, context=None, parent=None, exteral_fun=None, name=None) -> None:
|
||||
self.name = name
|
||||
self.parent = parent
|
||||
self.context = context if context is not None else dict()
|
||||
self.exteral_fun = exteral_fun if exteral_fun is not None else dict()
|
||||
|
||||
def has_value(self, identifier: str) -> bool:
|
||||
return identifier in self.context
|
||||
|
||||
def get_value(self, identifier: str):
|
||||
return self.context.get(identifier)
|
||||
|
||||
def deep_get_value(self, id: str):
|
||||
if self.has_value(id):
|
||||
return self.get_value(id)
|
||||
if self.parent is not None:
|
||||
return self.parent.deep_get_value(id)
|
||||
return None
|
||||
|
||||
def set_value(self, identifier: str, value):
|
||||
self.context[identifier] = value
|
||||
|
||||
def declare(self, identifier: str, value):
|
||||
if self.has_value(identifier):
|
||||
raise Exception(f"Variable {identifier} is already declared")
|
||||
self.set_value(identifier, value)
|
||||
|
||||
def assign(self, identifier: str, value):
|
||||
if self.has_value(identifier):
|
||||
self.set_value(identifier, value)
|
||||
elif self.parent is not None:
|
||||
self.parent.assign(identifier, value)
|
||||
else:
|
||||
raise Exception(f"Variable {identifier} is not declared")
|
||||
|
||||
def show_values(self):
|
||||
print(self.context)
|
||||
0
src/dotchain/runtime/tests/__init__.py
Normal file
0
src/dotchain/runtime/tests/__init__.py
Normal file
153
src/dotchain/runtime/tests/test_expression_parser.py
Normal file
153
src/dotchain/runtime/tests/test_expression_parser.py
Normal file
@ -0,0 +1,153 @@
|
||||
|
||||
import unittest
|
||||
from runtime.ast import BoolLiteral, CallExpression, FloatLiteral, Identifier, IntLiteral, UnaryExpression
|
||||
from runtime.interpreter import ExpressionParser, _priority, _try_fun_expression
|
||||
from runtime.tokenizer import TokenType, Tokenizer,Token
|
||||
|
||||
|
||||
|
||||
class TestExpressionParser(unittest.TestCase):
|
||||
|
||||
def test__try_fun_expression(self):
|
||||
t = Tokenizer()
|
||||
t.init("()")
|
||||
self.assertFalse(_try_fun_expression(t))
|
||||
|
||||
t.init("() =>")
|
||||
self.assertTrue(_try_fun_expression(t))
|
||||
|
||||
t.init("(a) =>")
|
||||
self.assertTrue(_try_fun_expression(t))
|
||||
|
||||
t.init("(a,) =>")
|
||||
self.assertFalse(_try_fun_expression(t))
|
||||
|
||||
t.init("(a,b,c,d) =>;")
|
||||
self.assertTrue(_try_fun_expression(t))
|
||||
|
||||
t.init("(a,b,c,true) =>;")
|
||||
self.assertFalse(_try_fun_expression(t))
|
||||
|
||||
t.init("(a,b,c,1.23) =>;")
|
||||
self.assertFalse(_try_fun_expression(t))
|
||||
|
||||
def test_is_unary(self):
|
||||
t = Tokenizer()
|
||||
t.init("!")
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init("+")
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init("--123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init("+-123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init(")-123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertFalse(pred)
|
||||
|
||||
t.init("=> - 123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init(", - 123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init("* - 123")
|
||||
t.next()
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertTrue(pred)
|
||||
|
||||
t.init("* - 123")
|
||||
parser = ExpressionParser(t)
|
||||
pred = parser.is_unary()
|
||||
self.assertFalse(pred)
|
||||
|
||||
def test_expression_parser(self):
|
||||
t = Tokenizer()
|
||||
t.init("a")
|
||||
parser = ExpressionParser(t)
|
||||
expression = parser.expression_parser()
|
||||
self.assertIsInstance(expression, Identifier)
|
||||
|
||||
t.init("true")
|
||||
parser = ExpressionParser(t)
|
||||
expression = parser.expression_parser()
|
||||
self.assertIsInstance(expression, BoolLiteral)
|
||||
self.assertEqual(expression.value, True)
|
||||
|
||||
t.init("false")
|
||||
parser = ExpressionParser(t)
|
||||
expression = parser.expression_parser()
|
||||
self.assertIsInstance(expression, BoolLiteral)
|
||||
self.assertEqual(expression.value, False)
|
||||
|
||||
t.init("12341")
|
||||
parser = ExpressionParser(t)
|
||||
expression = parser.expression_parser()
|
||||
self.assertEqual(expression.value, 12341)
|
||||
self.assertIsInstance(expression, IntLiteral)
|
||||
|
||||
t.init("12341.42")
|
||||
parser = ExpressionParser(t)
|
||||
expression = parser.expression_parser()
|
||||
self.assertEqual(expression.value, 12341.42)
|
||||
self.assertIsInstance(expression, FloatLiteral)
|
||||
|
||||
t.init("hello")
|
||||
parser = ExpressionParser(t)
|
||||
expression: Identifier = parser.expression_parser()
|
||||
self.assertIsInstance(expression, Identifier)
|
||||
self.assertEqual(expression.name, "hello")
|
||||
|
||||
t.init("print()")
|
||||
parser = ExpressionParser(t)
|
||||
expression: CallExpression = parser.expression_parser()
|
||||
self.assertIsInstance(expression, CallExpression)
|
||||
self.assertEqual(expression.callee.name, "print")
|
||||
|
||||
t.init("print(1,2,3,hello)")
|
||||
parser = ExpressionParser(t)
|
||||
expression: CallExpression = parser.expression_parser()
|
||||
self.assertIsInstance(expression, CallExpression)
|
||||
self.assertEqual(expression.callee.name, "print")
|
||||
self.assertEqual(len(expression.arguments), 4)
|
||||
|
||||
def test_binary_expression(self):
|
||||
t = Tokenizer()
|
||||
|
||||
def test__priority(self):
|
||||
self.assertEqual(_priority("*"), 0)
|
||||
self.assertEqual(_priority("/"), 0)
|
||||
self.assertEqual(_priority("%"), 0)
|
||||
self.assertEqual(_priority("+"), 1)
|
||||
self.assertEqual(_priority("-"), 1)
|
||||
self.assertEqual(_priority(">"), 2)
|
||||
self.assertEqual(_priority("<"), 2)
|
||||
self.assertEqual(_priority(">="), 2)
|
||||
self.assertEqual(_priority("<="), 2)
|
||||
self.assertEqual(_priority("=="), 3)
|
||||
self.assertEqual(_priority("!="), 3)
|
||||
self.assertEqual(_priority("&&"), 4)
|
||||
self.assertEqual(_priority("||"), 5)
|
||||
7
src/dotchain/runtime/tests/test_runtime.py
Normal file
7
src/dotchain/runtime/tests/test_runtime.py
Normal file
@ -0,0 +1,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
class TestRuntime(unittest.TestCase):
|
||||
|
||||
def test_eval(self):
|
||||
self.assertTrue(True)
|
||||
151
src/dotchain/runtime/tests/test_tokenizer.py
Normal file
151
src/dotchain/runtime/tests/test_tokenizer.py
Normal file
@ -0,0 +1,151 @@
|
||||
|
||||
import unittest
|
||||
from runtime.tokenizer import TokenType, Tokenizer,Token
|
||||
|
||||
class TestTokenizer(unittest.TestCase):
|
||||
|
||||
def test_init(self):
|
||||
t = Tokenizer()
|
||||
self.assertEqual(t.script, "")
|
||||
self.assertEqual(t.cursor, 0)
|
||||
self.assertEqual(t.col, 0)
|
||||
self.assertEqual(t.row, 0)
|
||||
|
||||
def test_tokenizer(self):
|
||||
t = Tokenizer()
|
||||
t.init("a")
|
||||
self.assertEqual(t.token().value, "a")
|
||||
self.assertEqual(t.token().type, TokenType.IDENTIFIER)
|
||||
|
||||
t.init("12341")
|
||||
self.assertEqual(t.token().value, "12341")
|
||||
self.assertEqual(t.token().type, TokenType.INT)
|
||||
|
||||
t.init("12341.1234124")
|
||||
self.assertEqual(t.token().value, "12341.1234124")
|
||||
self.assertEqual(t.token().type, TokenType.FLOAT)
|
||||
|
||||
t.init("false")
|
||||
self.assertEqual(t.token().value, "false")
|
||||
self.assertEqual(t.token().type, TokenType.BOOL)
|
||||
|
||||
t.init("\"false\"")
|
||||
self.assertEqual(t.token().value, "\"false\"")
|
||||
self.assertEqual(t.token().type, TokenType.STRING)
|
||||
|
||||
t.init("helloworld")
|
||||
self.assertEqual(t.token().value, "helloworld")
|
||||
self.assertEqual(t.token().type, TokenType.IDENTIFIER)
|
||||
|
||||
t.init("!")
|
||||
self.assertEqual(t.token().value, "!")
|
||||
self.assertEqual(t.token().type, TokenType.NOT)
|
||||
|
||||
t.init("==")
|
||||
self.assertEqual(t.token().value, "==")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("!=")
|
||||
self.assertEqual(t.token().value, "!=")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("<=")
|
||||
self.assertEqual(t.token().value, "<=")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init(">=")
|
||||
self.assertEqual(t.token().value, ">=")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("<")
|
||||
self.assertEqual(t.token().value, "<")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init(">")
|
||||
self.assertEqual(t.token().value, ">")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("&&")
|
||||
self.assertEqual(t.token().value, "&&")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("||")
|
||||
self.assertEqual(t.token().value, "||")
|
||||
self.assertEqual(t.token().type, TokenType.LOGICAL_OPERATOR)
|
||||
|
||||
t.init("=")
|
||||
self.assertEqual(t.token().value, "=")
|
||||
self.assertEqual(t.token().type, TokenType.ASSIGNMENT)
|
||||
|
||||
t.init("+")
|
||||
self.assertEqual(t.token().value, "+")
|
||||
self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR)
|
||||
|
||||
t.init("-")
|
||||
self.assertEqual(t.token().value, "-")
|
||||
self.assertEqual(t.token().type, TokenType.ADDITIVE_OPERATOR)
|
||||
|
||||
t.init("*")
|
||||
self.assertEqual(t.token().value, "*")
|
||||
self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR)
|
||||
|
||||
t.init("/")
|
||||
self.assertEqual(t.token().value, "/")
|
||||
self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR)
|
||||
|
||||
t.init("%")
|
||||
self.assertEqual(t.token().value, "%")
|
||||
self.assertEqual(t.token().type, TokenType.MULTIPLICATIVE_OPERATOR)
|
||||
|
||||
t.init("(")
|
||||
self.assertEqual(t.token().value, "(")
|
||||
self.assertEqual(t.token().type, TokenType.LEFT_PAREN)
|
||||
|
||||
t.init(")")
|
||||
self.assertEqual(t.token().value, ")")
|
||||
self.assertEqual(t.token().type, TokenType.RIGHT_PAREN)
|
||||
|
||||
t.init("{")
|
||||
self.assertEqual(t.token().value, "{")
|
||||
self.assertEqual(t.token().type, TokenType.LEFT_BRACE)
|
||||
|
||||
t.init("}")
|
||||
self.assertEqual(t.token().value, "}")
|
||||
self.assertEqual(t.token().type, TokenType.RIGHT_BRACE)
|
||||
|
||||
def test_init(self):
|
||||
t = Tokenizer()
|
||||
script = "a + 9 * ( 3 - 1 ) * 3 + 10 / 2;"
|
||||
t.init(script)
|
||||
self.assertEqual(t.script, script)
|
||||
self.assertEqual(len(t.tokens), 16)
|
||||
self.assertEqual(t.get_prev(), None)
|
||||
self.assertEqual(t.token().value, "a")
|
||||
self.assertEqual(t.get_next().value, "+")
|
||||
self.assertEqual(t.next().value, "+")
|
||||
self.assertEqual(t.next().value, "9")
|
||||
self.assertEqual(t.next().value, "*")
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "9")
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "+")
|
||||
|
||||
script = "a + 9"
|
||||
t.init(script)
|
||||
self.assertEqual(t.token().type, TokenType.IDENTIFIER)
|
||||
self.assertEqual(t.next().type, TokenType.ADDITIVE_OPERATOR)
|
||||
self.assertEqual(t.next().type, TokenType.INT)
|
||||
self.assertEqual(t.next(), None)
|
||||
self.assertEqual(t._current_token_index, 3)
|
||||
self.assertEqual(t.next(), None)
|
||||
self.assertEqual(t.next(), None)
|
||||
self.assertEqual(t._current_token_index, 3)
|
||||
self.assertEqual(t.next(), None)
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "9")
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "+")
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "a")
|
||||
t.prev()
|
||||
self.assertEqual(t.token().value, "a")
|
||||
259
src/dotchain/runtime/tokenizer.py
Normal file
259
src/dotchain/runtime/tokenizer.py
Normal file
@ -0,0 +1,259 @@
|
||||
import re
|
||||
from enum import Enum
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
class TokenType(Enum):
|
||||
NEW_LINE = 1
|
||||
SPACE = 2
|
||||
COMMENTS = 3
|
||||
LEFT_PAREN = 4
|
||||
RIGHT_PAREN = 5
|
||||
COMMA = 6
|
||||
LEFT_BRACE = 7
|
||||
RIGHT_BRACE = 8
|
||||
SEMICOLON = 9
|
||||
LET = 10
|
||||
RETURN = 11
|
||||
IF = 12
|
||||
ELSE = 13
|
||||
WHILE = 14
|
||||
FOR = 15
|
||||
FLOAT = 18
|
||||
INT = 19
|
||||
IDENTIFIER = 20
|
||||
LOGICAL_OPERATOR = 21
|
||||
NOT = 22
|
||||
ASSIGNMENT = 23
|
||||
MULTIPLICATIVE_OPERATOR = 24
|
||||
ADDITIVE_OPERATOR = 25
|
||||
STRING = 26
|
||||
ARROW = 27
|
||||
BOOL = 28
|
||||
BREAK = 29
|
||||
TYPE_DEFINITION = 30
|
||||
COLON = 31
|
||||
|
||||
specs = (
|
||||
(re.compile(r"^\n"),TokenType.NEW_LINE),
|
||||
# Space:
|
||||
(re.compile(r"^\s"),TokenType.SPACE),
|
||||
# Comments:
|
||||
(re.compile(r"^//.*"), TokenType.COMMENTS),
|
||||
|
||||
# Symbols:
|
||||
(re.compile(r"^\("), TokenType.LEFT_PAREN),
|
||||
(re.compile(r"^\)"), TokenType.RIGHT_PAREN),
|
||||
(re.compile(r"^\,"), TokenType.COMMA),
|
||||
(re.compile(r"^\{"), TokenType.LEFT_BRACE),
|
||||
(re.compile(r"^\}"), TokenType.RIGHT_BRACE),
|
||||
(re.compile(r"^;"), TokenType.SEMICOLON),
|
||||
(re.compile(r"^:"), TokenType.COLON),
|
||||
(re.compile(r"^=>"), TokenType.ARROW),
|
||||
|
||||
# Keywords:
|
||||
(re.compile(r"^\blet\b"), TokenType.LET),
|
||||
(re.compile(r"^\breturn\b"), TokenType.RETURN),
|
||||
(re.compile(r"^\bif\b"), TokenType.IF),
|
||||
(re.compile(r"^\belse\b"), TokenType.ELSE),
|
||||
(re.compile(r"^\bwhile\b"), TokenType.WHILE),
|
||||
(re.compile(r"^\bfor\b"), TokenType.FOR),
|
||||
(re.compile(r"^\bbreak\b"), TokenType.BREAK),
|
||||
|
||||
(re.compile(r"^\btrue\b"), TokenType.BOOL),
|
||||
(re.compile(r"^\bfalse\b"), TokenType.BOOL),
|
||||
|
||||
# Type definition:
|
||||
(re.compile(r"^\bstring\b"), TokenType.TYPE_DEFINITION),
|
||||
(re.compile(r"^\bint\b"), TokenType.TYPE_DEFINITION),
|
||||
(re.compile(r"^\bfloat\b"), TokenType.TYPE_DEFINITION),
|
||||
(re.compile(r"^\bbool\b"), TokenType.TYPE_DEFINITION),
|
||||
(re.compile(r"^\bany\b"), TokenType.TYPE_DEFINITION),
|
||||
|
||||
# Floats:
|
||||
(re.compile(r"^[0-9]+\.[0-9]+"), TokenType.FLOAT),
|
||||
|
||||
# Ints:
|
||||
(re.compile(r"^[0-9]+"), TokenType.INT),
|
||||
|
||||
# Identifiers:
|
||||
(re.compile(r"^\w+"), TokenType.IDENTIFIER),
|
||||
|
||||
|
||||
# Logical operators:
|
||||
(re.compile(r"^&&"), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^\|\|"), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^=="), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^!="), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^<="), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^>="), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^<"), TokenType.LOGICAL_OPERATOR),
|
||||
(re.compile(r"^>"), TokenType.LOGICAL_OPERATOR),
|
||||
|
||||
(re.compile(r"^!"), TokenType.NOT),
|
||||
|
||||
# Assignment:
|
||||
(re.compile(r"^="), TokenType.ASSIGNMENT),
|
||||
|
||||
# Math operators: +, -, *, /:
|
||||
(re.compile(r"^[*/%]"), TokenType.MULTIPLICATIVE_OPERATOR),
|
||||
(re.compile(r"^[+-]"), TokenType.ADDITIVE_OPERATOR),
|
||||
|
||||
# Double-quoted strings
|
||||
# TODO: escape character \" and
|
||||
(re.compile(r"^\"[^\"]*\""), TokenType.STRING),
|
||||
)
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
type: TokenType
|
||||
value: str
|
||||
row: int
|
||||
col: int
|
||||
col_end: int
|
||||
cursor: int
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Token({self.type}, {self.value}, row={self.row}, col={self.col}, col_end={self.col_end}, cursor={self.cursor})"
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
|
||||
def __init__(self):
|
||||
self._current_token = None
|
||||
self.script = ""
|
||||
self.cursor = 0
|
||||
self.col = 0
|
||||
self.row = 0
|
||||
self._current_token_index = 0
|
||||
self.tokens = list[Token]()
|
||||
self.checkpoint = list[int]()
|
||||
|
||||
def init(self, script: str):
|
||||
self.checkpoint = list[int]()
|
||||
self.tokens = list[Token]()
|
||||
self._current_token_index = 0
|
||||
self._current_token = None
|
||||
self.script = script
|
||||
self.cursor = 0
|
||||
self.col = 0
|
||||
self.row = 0
|
||||
self._get_next_token()
|
||||
while self._current_token is not None:
|
||||
self.tokens.append(self._current_token)
|
||||
self._get_next_token()
|
||||
|
||||
def checkpoint_push(self):
|
||||
self.checkpoint.append(self._current_token_index)
|
||||
|
||||
def checkpoint_pop(self):
|
||||
self._current_token_index = self.checkpoint.pop()
|
||||
|
||||
def next(self):
|
||||
if self._current_token_index < len(self.tokens):
|
||||
self._current_token_index += 1
|
||||
return self.token()
|
||||
|
||||
def next_token_type(self):
|
||||
if self._current_token_index < len(self.tokens):
|
||||
self._current_token_index += 1
|
||||
return self.tokenType()
|
||||
|
||||
def prev(self):
|
||||
if self._current_token_index > 0:
|
||||
self._current_token_index -= 1
|
||||
return self.token()
|
||||
|
||||
def get_prev(self):
|
||||
if self._current_token_index == 0:
|
||||
return None
|
||||
return self.tokens[self._current_token_index - 1]
|
||||
|
||||
def get_next(self):
|
||||
if self._current_token_index >= len(self.tokens):
|
||||
return None
|
||||
return self.tokens[self._current_token_index + 1]
|
||||
|
||||
def token(self):
|
||||
if self._current_token_index >= len(self.tokens):
|
||||
return None
|
||||
return self.tokens[self._current_token_index]
|
||||
|
||||
def tokenType(self):
|
||||
if self._current_token_index >= len(self.tokens):
|
||||
return None
|
||||
return self.tokens[self._current_token_index].type
|
||||
|
||||
|
||||
def _get_next_token(self):
|
||||
if self._is_eof():
|
||||
self._current_token = None
|
||||
return None
|
||||
_string = self.script[self.cursor:]
|
||||
for spec in specs:
|
||||
tokenValue, offset = self.match(spec[0], _string)
|
||||
if tokenValue == None:
|
||||
continue
|
||||
if spec[1] == TokenType.NEW_LINE:
|
||||
self.row += 1
|
||||
self.col = 0
|
||||
return self._get_next_token()
|
||||
if spec[1] == TokenType.COMMENTS:
|
||||
return self._get_next_token()
|
||||
if spec[1] == TokenType.SPACE:
|
||||
self.col += offset
|
||||
return self._get_next_token()
|
||||
if spec[1] == None:
|
||||
return self._get_next_token()
|
||||
self._current_token = Token(spec[1],tokenValue, self.cursor, self.row, self.col, self.col + offset)
|
||||
self.col += offset
|
||||
return self.get_current_token()
|
||||
raise Exception("Unknown token: " + _string[0])
|
||||
|
||||
def _is_eof(self):
|
||||
return self.cursor == len(self.script)
|
||||
|
||||
def has_more_tokens(self):
|
||||
return self.cursor < len(self.script)
|
||||
|
||||
def get_current_token(self):
|
||||
return self._current_token
|
||||
|
||||
def match(self, reg: re, _script):
|
||||
matched = reg.search(_script)
|
||||
if matched == None:
|
||||
return None,0
|
||||
self.cursor = self.cursor + matched.span(0)[1]
|
||||
return matched[0], matched.span(0)[1]
|
||||
|
||||
def eat(self, value: str | TokenType):
|
||||
if isinstance(value, str):
|
||||
return self.eat_value(value)
|
||||
if isinstance(value, TokenType):
|
||||
return self.eat_token_type(value)
|
||||
|
||||
def eat_value(self, value: str):
|
||||
token = self.token()
|
||||
if token is None:
|
||||
raise Exception(f"Expected {value} but got None")
|
||||
if token.value != value:
|
||||
raise Exception(f"Expected {value} but got {token.value}")
|
||||
self.next()
|
||||
return token
|
||||
|
||||
def eat_token_type(self,tokenType: TokenType):
|
||||
token = self.token()
|
||||
if token is None:
|
||||
raise Exception(f"Expected {tokenType} but got None")
|
||||
if token.type != tokenType:
|
||||
raise Exception(f"Expected {tokenType} but got {token.type}")
|
||||
self.next()
|
||||
return token
|
||||
|
||||
def type_is(self, tokenType: TokenType):
|
||||
if self.token() is None:
|
||||
return False
|
||||
return self.token().type == tokenType
|
||||
|
||||
def the_rest(self):
|
||||
return self.tokens[self._current_token_index:]
|
||||
Reference in New Issue
Block a user