Files
jarvis-models/runtime/ast/tokenizer.py
2024-03-21 15:13:32 +08:00

79 lines
1.8 KiB
Python

import re
specs = (
# Space:
(re.compile(r"^\s"), None),
# Comments:
(re.compile(r"^//.*"), None),
# Keywords:
(re.compile(r"^\blet\b"), "let"),
(re.compile(r"^\breturn\b"), "return"),
(re.compile(r"^;"), ";"),
# Floats:
(re.compile(r"^[-+]?[0-9]+\.[0-9]+"), "FLOAT"),
# Numbers:
(re.compile(r"^[-+]?[0-9]+"), "NUMBER"),
# Identifiers:
(re.compile(r"^\w+"), "IDENTIFIER"),
# Assignment:
(re.compile(r"^="), "SIMPLE_ASSIGN"),
# Double-quoted strings
(re.compile(r"^\"[^\"]*\""), "STRING"),
# Symbols:
(re.compile(r"^\("), "("),
(re.compile(r"^\)"), ")"),
(re.compile(r"^\,"), ","),
(re.compile(r"^\{"), "{"),
(re.compile(r"^\}"), "}"),
)
class Tokenizer:
def __init__(self):
self.script = ""
self.cursor = 0
def init(self, script: str):
self.script = script
self.cursor = 0
def isEOF(self):
return self.cursor == len(self.script)
def has_more_tokens(self):
return self.cursor < len(self.script)
def get_next_token(self):
if not self.has_more_tokens():
return None
_string = self.script[self.cursor:]
for spec in specs:
tokenValue = self.match(spec[0], _string)
if tokenValue == None:
continue
if (spec[1] == None):
return self.get_next_token()
return {
"type": spec[1],
"value": tokenValue,
}
raise Exception("Unknown token: " + _string[0])
def match(self, reg: re, _script):
matched = reg.search(_script)
if matched == None:
return None
self.cursor += matched.span(0)[1]
return matched[0]