From 4c3756811d8a79c44ba93f93387c69a137579822 Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Tue, 20 Aug 2024 09:41:10 +0800 Subject: [PATCH 1/2] modety vlms with lmdeploy --- README.md | 187 +++++++++++++++--------------- src/blackbox/vlms.py | 191 +++++++++++++++++++----------- src/configuration.py | 269 ++++++++++++++++++++++--------------------- 3 files changed, 357 insertions(+), 290 deletions(-) diff --git a/README.md b/README.md index 4b0ca7b..f29c2fa 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,95 @@ -# jarvis-models -## Conda Environment and Python Library Requirement -```bash -conda create -n jarvis-models python==3.10.11 -pip install -r sample/requirement_out_of_pytorch.txt -pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 -``` -## More Dependencies -| System | package | web | install command | -| --- |-----------------------| --- | --- | -| python | filetype | https://pypi.org/project/filetype/ | pip install filetype | -| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi | -| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart | -| python | uvicorn | https://www.uvicorn.org/ | pip install "uvicorn[standard]" | -| python | SpeechRecognition | https://pypi.org/project/SpeechRecognition/ | pip install SpeechRecognition | -| python | gtts | https://pypi.org/project/gTTS/ | pip install gTTS | -| python | PyYAML | https://pypi.org/project/PyYAML/ | pip install PyYAML | -| python | injector | https://github.com/python-injector/injector | pip install injector | -| python | langchain | https://github.com/langchain-ai/langchain | pip install langchain | -| python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb | -| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent | -| python | sentence_transformers | https://github.com/InternLM/lagent/blob/main/README.md | pip install sentence_transformers | - - -## Start -Start the jarvis-models service via -```bash -uvicorn main:app --reload -``` -or -```bash -python main.py -``` - -## Configuration -Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration -```yaml -env: - version: 0.0.1 - host: 0.0.0.0 - port: 8000 - -log: - level: debug - time_format: "%Y-%m-%d %H:%M:%S" - filename: "D:/Workspace/Logging/jarvis/jarvis-models.log" - -melotts: - mode: local # or docker - url: http://10.6.44.16:18080/convert/tts - speed: 0.9 - device: 'cuda' - language: 'ZH' - speaker: 'ZH' - -tesou: - url: http://120.196.116.194:48891/chat/ - -TokenIDConverter: - token_path: src/asr/resources/models/token_list.pkl - unk_symbol: - -CharTokenizer: - symbol_value: - space_symbol: - remove_non_linguistic_symbols: false - -WavFrontend: - cmvn_file: src/asr/resources/models/am.mvn - frontend_conf: - fs: 16000 - window: hamming - n_mels: 80 - frame_length: 25 - frame_shift: 10 - lfr_m: 7 - lfr_n: 6 - filter_length_max: -.inf - dither: 0.0 - -Model: - model_path: src/asr/resources/models/model.onnx - use_cuda: false - CUDAExecutionProvider: - device_id: 0 - arena_extend_strategy: kNextPowerOfTwo - cudnn_conv_algo_search: EXHAUSTIVE - do_copy_in_default_stream: true - batch_size: 3 -blackbox: - lazyloading: true -``` +# jarvis-models +## Conda Environment and Python Library Requirement +```bash +conda create -n jarvis-models python==3.10.11 +pip install -r sample/requirement_out_of_pytorch.txt +pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 +``` +## More Dependencies +| System | package | web | install command | +| --- |-----------------------| --- | --- | +| python | filetype | https://pypi.org/project/filetype/ | pip install filetype | +| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi | +| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart | +| python | uvicorn | https://www.uvicorn.org/ | pip install "uvicorn[standard]" | +| python | SpeechRecognition | https://pypi.org/project/SpeechRecognition/ | pip install SpeechRecognition | +| python | gtts | https://pypi.org/project/gTTS/ | pip install gTTS | +| python | PyYAML | https://pypi.org/project/PyYAML/ | pip install PyYAML | +| python | injector | https://github.com/python-injector/injector | pip install injector | +| python | langchain | https://github.com/langchain-ai/langchain | pip install langchain | +| python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb | +| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent | +| python | sentence_transformers | https://github.com/InternLM/lagent/blob/main/README.md | pip install sentence_transformers | + + +## Start +Start the jarvis-models service via +```bash +uvicorn main:app --reload +``` +or +```bash +python main.py +``` + +## Configuration +Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration +```yaml +env: + version: 0.0.1 + host: 0.0.0.0 + port: 8000 + +log: + level: debug + time_format: "%Y-%m-%d %H:%M:%S" + filename: "D:/Workspace/Logging/jarvis/jarvis-models.log" + +melotts: + mode: local # or docker + url: http://10.6.44.16:18080/convert/tts + speed: 0.9 + device: 'cuda' + language: 'ZH' + speaker: 'ZH' + +tesou: + url: http://120.196.116.194:48891/chat/ + +TokenIDConverter: + token_path: src/asr/resources/models/token_list.pkl + unk_symbol: + +CharTokenizer: + symbol_value: + space_symbol: + remove_non_linguistic_symbols: false + +WavFrontend: + cmvn_file: src/asr/resources/models/am.mvn + frontend_conf: + fs: 16000 + window: hamming + n_mels: 80 + frame_length: 25 + frame_shift: 10 + lfr_m: 7 + lfr_n: 6 + filter_length_max: -.inf + dither: 0.0 + +Model: + model_path: src/asr/resources/models/model.onnx + use_cuda: false + CUDAExecutionProvider: + device_id: 0 + arena_extend_strategy: kNextPowerOfTwo + cudnn_conv_algo_search: EXHAUSTIVE + do_copy_in_default_stream: true + batch_size: 3 +blackbox: + lazyloading: true + +vlms: + url: http://10.6.80.87:23333 +``` diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index cd02ce8..3e07bfa 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -1,67 +1,124 @@ -from fastapi import Request, Response, status -from fastapi.responses import JSONResponse -from .blackbox import Blackbox -from typing import Optional - -import requests -import base64 - - -def is_base64(value) -> bool: - try: - base64.b64decode(base64.b64decode(value)) == value.encode() - return True - except Exception: - return False - - -class VLMS(Blackbox): - - def __call__(self, *args, **kwargs): - return self.processing(*args, **kwargs) - - def valid(self, *args, **kwargs) -> bool: - data = args[0] - return isinstance(data, list) - - def processing(self, prompt, images, model_name: Optional[str] = None) -> str: - - if model_name == "Qwen-VL-Chat": - model_name = "infer-qwen-vl" - elif model_name == "llava-llama-3-8b-v1_1-transformers": - model_name = "infer-lav-lam-v1-1" - else: - model_name = "infer-qwen-vl" - - url = 'http://120.196.116.194:48894/' + model_name + '/' - - if is_base64(images): - images_data = images - else: - with open(images, "rb") as img_file: - images_data = str(base64.b64encode(img_file.read()), 'utf-8') - - data_input = {'model': model_name, 'prompt': prompt, 'img_data': images_data} - - data = requests.post(url, json=data_input) - - return data.text - - async def fast_api_handler(self, request: Request) -> Response: - try: - data = await request.json() - except: - return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) - - model_name = data.get("model_name") - prompt = data.get("prompt") - img_data = data.get("img_data") - - if prompt is None: - return JSONResponse(content={'error': "Question is required"}, status_code=status.HTTP_400_BAD_REQUEST) - - if model_name is None or model_name.isspace(): - model_name = "Qwen-VL-Chat" - - jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8") - return JSONResponse(content={"response": jsonresp}, status_code=status.HTTP_200_OK) \ No newline at end of file +from fastapi import Request, Response, status +from fastapi.responses import JSONResponse +from injector import singleton,inject +from typing import Optional + +from .blackbox import Blackbox +from ..log.logging_time import logging_time +from .chroma_query import ChromaQuery +from ..configuration import VLMConf + +import requests +import base64 + +import io +from PIL import Image +from lmdeploy.serve.openai.api_client import APIClient + +def is_base64(value) -> bool: + try: + base64.b64decode(base64.b64decode(value)) == value.encode() + return True + except Exception: + return False + +@singleton +class VLMS(Blackbox): + + @inject + def __init__(self, vlm_config: VLMConf): + # Chroma database initially set up for RAG for vision model. + # It could be expended to history store. + # self.chroma_query = chroma_query + self.url = vlm_config.url + + def __call__(self, *args, **kwargs): + return self.processing(*args, **kwargs) + + def valid(self, *args, **kwargs) -> bool: + data = args[0] + return isinstance(data, list) + + def processing(self, prompt, images, model_name: Optional[str] = None) -> str: + + if model_name == "Qwen-VL-Chat": + model_name = "infer-qwen-vl" + elif model_name == "llava-llama-3-8b-v1_1-transformers": + model_name = "infer-lav-lam-v1-1" + else: + model_name = "infer-qwen-vl" + + + ## AutoLoad Model + # url = 'http://10.6.80.87:8000/' + model_name + '/' + + if is_base64(images): + images_data = images + else: + # print("{}Type of image data in form {}".format('#'*20,type(images))) + # print("{}Type of image data in form {}".format('#'*20,type(images.file))) + # byte_stream = io.BytesIO(images.read()) + # print("{}Type of image data in form {}".format('#'*20,type(byte_stream))) + # # roiImg = Image.open(byte_stream) + # # print("{}Successful {}".format('#'*20,type(roiImg))) + # return str(type(byte_stream)) + # images_data = base64.b64encode(byte_stream) + with open(images, "rb") as img_file: + # images_data = str(base64.b64encode(img_file.read()), 'utf-8') + images_data = base64.b64encode(img_file.read()) + + # data_input = {'model': model_name, 'prompt': prompt, 'img_data': images_data} + + # data = requests.post(url, json=data_input) + # print(data.text) + # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' + ## Lmdeploy + api_client = APIClient(self.url) + # api_client = APIClient(f'http://10.6.80.87:23333') + model_name = api_client.available_models[0] + messages = [{ + 'role': + 'user', + 'content': [{ + 'type': 'text', + 'text': prompt, + }, { + 'type': 'image_url', + 'image_url': { + 'url': f"data:image/jpeg;base64,{images_data}", + # './val_data/image_5.jpg', + }, + }] + } + ] + + responses = '' + for i,item in enumerate(api_client.chat_completions_v1(model=model_name, + messages=messages#,stream = True + )): + print(item["choices"][0]["message"]['content']) + responses += item["choices"][0]["message"]['content'] + + return responses + + # return data.text + + async def fast_api_handler(self, request: Request) -> Response: + try: + data = await request.form() + except: + return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) + + model_name = data.get("model_name") + prompt = data.get("prompt") + img_data = data.get("img_data") + + if prompt is None: + return JSONResponse(content={'error': "Question is required"}, status_code=status.HTTP_400_BAD_REQUEST) + + if model_name is None or model_name.isspace(): + model_name = "Qwen-VL-Chat" + + # jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8") + + return JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}, status_code=status.HTTP_200_OK) \ No newline at end of file diff --git a/src/configuration.py b/src/configuration.py index 96d57aa..9af1bec 100644 --- a/src/configuration.py +++ b/src/configuration.py @@ -1,131 +1,138 @@ - -from dataclasses import dataclass -from injector import inject,singleton -import yaml -import sys -import logging - -@singleton -class Configuration(): - - @inject - def __init__(self) -> None: - config_file_path = "" - try: - config_file_path = sys.argv[1] - except: - config_file_path = ".env.yaml" - with open(config_file_path) as f: - cfg = yaml.load(f, Loader=yaml.FullLoader) - self.cfg = cfg - - def getDict(self): - return self.cfg - - """ - # yaml 檔中的路徑 get("aaa.bbb.ccc") - aaa: - bbb: - ccc: "hello world" - """ - def get(self, path: str | list[str], cfg: dict = None, default=None): - if isinstance(path, str): - if cfg is None: - cfg = self.cfg - return self.get(path.split("."), cfg) - length = len(path) - if length == 0 or not isinstance(cfg, dict): - return default - if length == 1: - return cfg.get(path[0]) - return self.get(path[1:], cfg.get(path[0])) - -class TesouConf(): - url: str - - @inject - def __init__(self, config: Configuration) -> None: - self.url = config.get("tesou.url") - - -class MeloConf(): - mode: str - url: str - speed: int - device: str - language: str - speaker: str - - @inject - def __init__(self, config: Configuration) -> None: - self.mode = config.get("melotts.mode") - self.url = config.get("melotts.url") - self.speed = config.get("melotts.speed") - self.device = config.get("melotts.device") - self.language = config.get("melotts.language") - self.speaker = config.get("melotts.speaker") - -class CosyVoiceConf(): - mode: str - url: str - speed: int - device: str - language: str - speaker: str - - @inject - def __init__(self, config: Configuration) -> None: - self.mode = config.get("cosyvoicetts.mode") - self.url = config.get("cosyvoicetts.url") - self.speed = config.get("cosyvoicetts.speed") - self.device = config.get("cosyvoicetts.device") - self.language = config.get("cosyvoicetts.language") - self.speaker = config.get("cosyvoicetts.speaker") - -# 'CRITICAL': CRITICAL, -# 'FATAL': FATAL, -# 'ERROR': ERROR, -# 'WARN': WARNING, -# 'WARNING': WARNING, -# 'INFO': INFO, -# 'DEBUG': DEBUG, -# 'NOTSET': NOTSET, -DEFAULT_LEVEL="WARNING" -DEFAULT_TIME_FORMAT="%Y-%m-%d %H:%M:%S" - -@singleton -class LogConf(): - level: int - time_format = "%Y-%m-%d %H:%M:%S" - filename: str | None - @inject - def __init__(self, config: Configuration) -> None: - self.level = config.get("log.level") - c = config.get("log.level", default=DEFAULT_LEVEL).upper() - level=logging._nameToLevel.get(c) - if level is None: - self.level = logging.WARNING - else: - self.level = level - self.filename = config.get("log.filename") - self.time_format = config.get("log.time_format", default=DEFAULT_TIME_FORMAT) - -@singleton -class EnvConf(): - version: str - host: str - port: str - @inject - def __init__(self, config: Configuration) -> None: - self.version = "0.0.1" - self.host = config.get("env.host", default="0.0.0.0") - self.port = config.get("env.port", default="8080") - -@singleton -@dataclass -class BlackboxConf(): - lazyloading: bool - - @inject - def __init__(self, config: Configuration) -> None: - self.lazyloading = bool(config.get("blackbox.lazyloading", default=False)) \ No newline at end of file + +from dataclasses import dataclass +from injector import inject,singleton +import yaml +import sys +import logging + +@singleton +class Configuration(): + + @inject + def __init__(self) -> None: + config_file_path = "" + try: + config_file_path = sys.argv[1] + except: + config_file_path = ".env.yaml" + with open(config_file_path) as f: + cfg = yaml.load(f, Loader=yaml.FullLoader) + self.cfg = cfg + + def getDict(self): + return self.cfg + + """ + # yaml 檔中的路徑 get("aaa.bbb.ccc") + aaa: + bbb: + ccc: "hello world" + """ + def get(self, path: str | list[str], cfg: dict = None, default=None): + if isinstance(path, str): + if cfg is None: + cfg = self.cfg + return self.get(path.split("."), cfg) + length = len(path) + if length == 0 or not isinstance(cfg, dict): + return default + if length == 1: + return cfg.get(path[0]) + return self.get(path[1:], cfg.get(path[0])) + +class TesouConf(): + url: str + + @inject + def __init__(self, config: Configuration) -> None: + self.url = config.get("tesou.url") + + +class MeloConf(): + mode: str + url: str + speed: int + device: str + language: str + speaker: str + + @inject + def __init__(self, config: Configuration) -> None: + self.mode = config.get("melotts.mode") + self.url = config.get("melotts.url") + self.speed = config.get("melotts.speed") + self.device = config.get("melotts.device") + self.language = config.get("melotts.language") + self.speaker = config.get("melotts.speaker") + +class CosyVoiceConf(): + mode: str + url: str + speed: int + device: str + language: str + speaker: str + + @inject + def __init__(self, config: Configuration) -> None: + self.mode = config.get("cosyvoicetts.mode") + self.url = config.get("cosyvoicetts.url") + self.speed = config.get("cosyvoicetts.speed") + self.device = config.get("cosyvoicetts.device") + self.language = config.get("cosyvoicetts.language") + self.speaker = config.get("cosyvoicetts.speaker") + +# 'CRITICAL': CRITICAL, +# 'FATAL': FATAL, +# 'ERROR': ERROR, +# 'WARN': WARNING, +# 'WARNING': WARNING, +# 'INFO': INFO, +# 'DEBUG': DEBUG, +# 'NOTSET': NOTSET, +DEFAULT_LEVEL="WARNING" +DEFAULT_TIME_FORMAT="%Y-%m-%d %H:%M:%S" + +@singleton +class LogConf(): + level: int + time_format = "%Y-%m-%d %H:%M:%S" + filename: str | None + @inject + def __init__(self, config: Configuration) -> None: + self.level = config.get("log.level") + c = config.get("log.level", default=DEFAULT_LEVEL).upper() + level=logging._nameToLevel.get(c) + if level is None: + self.level = logging.WARNING + else: + self.level = level + self.filename = config.get("log.filename") + self.time_format = config.get("log.time_format", default=DEFAULT_TIME_FORMAT) + +@singleton +class EnvConf(): + version: str + host: str + port: str + @inject + def __init__(self, config: Configuration) -> None: + self.version = "0.0.1" + self.host = config.get("env.host", default="0.0.0.0") + self.port = config.get("env.port", default="8080") + +@singleton +@dataclass +class BlackboxConf(): + lazyloading: bool + + @inject + def __init__(self, config: Configuration) -> None: + self.lazyloading = bool(config.get("blackbox.lazyloading", default=False)) + +@singleton +class VLMConf(): + + @inject + def __init__(self, config: Configuration) -> None: + self.url = config.get("vlms.url") \ No newline at end of file From 4d260b33618a4ee9f694837b05d2f67f16fae8dd Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Tue, 20 Aug 2024 18:02:44 +0800 Subject: [PATCH 2/2] support formdata of request --- src/blackbox/vlms.py | 88 +++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index 3e07bfa..baff1da 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -1,11 +1,11 @@ from fastapi import Request, Response, status from fastapi.responses import JSONResponse from injector import singleton,inject -from typing import Optional +from typing import Optional, List from .blackbox import Blackbox from ..log.logging_time import logging_time -from .chroma_query import ChromaQuery +# from .chroma_query import ChromaQuery from ..configuration import VLMConf import requests @@ -39,46 +39,53 @@ class VLMS(Blackbox): data = args[0] return isinstance(data, list) - def processing(self, prompt, images, model_name: Optional[str] = None) -> str: + def processing(self, prompt:str, images:str | bytes, model_name: Optional[str] = None, user_context: List[dict] = None) -> str: + """ + Args: + prompt: a string query to the model. + images: a base64 string of image data; + user_context: a list of history conversation, should be a list of openai format. + + Return: + response: a string + history: a list + """ if model_name == "Qwen-VL-Chat": model_name = "infer-qwen-vl" elif model_name == "llava-llama-3-8b-v1_1-transformers": model_name = "infer-lav-lam-v1-1" else: model_name = "infer-qwen-vl" + - + # Transform the images into base64 format where openai format need. + if is_base64(images): # image as base64 str + images_data = images + elif isinstance(images,bytes): # image as bytes + images_data = str(base64.b64encode(images),'utf-8') + else: # image as pathLike str + # with open(images, "rb") as img_file: + # images_data = str(base64.b64encode(img_file.read()), 'utf-8') + res = requests.get(images) + images_data = str(base64.b64encode(res.content),'utf-8') ## AutoLoad Model # url = 'http://10.6.80.87:8000/' + model_name + '/' - - if is_base64(images): - images_data = images - else: - # print("{}Type of image data in form {}".format('#'*20,type(images))) - # print("{}Type of image data in form {}".format('#'*20,type(images.file))) - # byte_stream = io.BytesIO(images.read()) - # print("{}Type of image data in form {}".format('#'*20,type(byte_stream))) - # # roiImg = Image.open(byte_stream) - # # print("{}Successful {}".format('#'*20,type(roiImg))) - # return str(type(byte_stream)) - # images_data = base64.b64encode(byte_stream) - with open(images, "rb") as img_file: - # images_data = str(base64.b64encode(img_file.read()), 'utf-8') - images_data = base64.b64encode(img_file.read()) - # data_input = {'model': model_name, 'prompt': prompt, 'img_data': images_data} - # data = requests.post(url, json=data_input) # print(data.text) + # return data.text + # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' ## Lmdeploy + if not user_context: + user_context = [] + # user_context = [{'role':'user','content':'你好'}, {'role': 'assistant', 'content': '你好!很高兴为你提供帮助。'}] api_client = APIClient(self.url) - # api_client = APIClient(f'http://10.6.80.87:23333') model_name = api_client.available_models[0] - messages = [{ - 'role': - 'user', + + messages = user_context + [{ + 'role': 'user', 'content': [{ 'type': 'text', 'text': prompt, @@ -93,25 +100,37 @@ class VLMS(Blackbox): ] responses = '' + total_token_usage = 0 # which can be used to count the cost of a query for i,item in enumerate(api_client.chat_completions_v1(model=model_name, messages=messages#,stream = True )): - print(item["choices"][0]["message"]['content']) + # print(item["choices"][0]["message"]['content']) responses += item["choices"][0]["message"]['content'] - - return responses + total_token_usage += item['usage']['total_tokens'] # 'usage': {'prompt_tokens': *, 'total_tokens': *, 'completion_tokens': *} + user_context = messages + [{'role': 'assistant', 'content': responses}] + return responses, user_context - # return data.text + async def fast_api_handler(self, request: Request) -> Response: + json_request = True try: - data = await request.form() - except: + content_type = request.headers['content-type'] + if content_type == 'application/json': + data = await request.json() + else: + data = await request.form() + json_request = False + except Exception as e: return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST) - + model_name = data.get("model_name") prompt = data.get("prompt") - img_data = data.get("img_data") + + if json_request: + img_data = data.get("img_data") + else: + img_data = await data.get("img_data").read() if prompt is None: return JSONResponse(content={'error': "Question is required"}, status_code=status.HTTP_400_BAD_REQUEST) @@ -119,6 +138,7 @@ class VLMS(Blackbox): if model_name is None or model_name.isspace(): model_name = "Qwen-VL-Chat" + response, history = self.processing(prompt, img_data, model_name) # jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8") - return JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}, status_code=status.HTTP_200_OK) \ No newline at end of file + return JSONResponse(content={"response": response, "history": history}, status_code=status.HTTP_200_OK) \ No newline at end of file