From db47bb1e69f86570a1eb22459809aaf484507b9a Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Fri, 13 Sep 2024 16:17:59 +0800 Subject: [PATCH] feat: support user_context --- src/blackbox/vlms.py | 102 +++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index 489bbbe..a5777ab 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -27,7 +27,6 @@ def is_base64(value) -> bool: except Exception: return False -@singleton @singleton class VLMS(Blackbox): @@ -133,27 +132,27 @@ class VLMS(Blackbox): # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' ## Lmdeploy - if not user_context: - user_context = [] + # if not user_context: + # user_context = [] ## Predefine user_context only for testing # user_context = [{'role':'user','content':'你好,我叫康康,你是谁?'}, {'role': 'assistant', 'content': '你好!很高兴为你提供帮助。'}] - user_context = [{ - 'role': 'user', - 'content': [{ - 'type': 'text', - 'text': '图中有什么,请描述一下', - }, { - 'type': 'image_url', - 'image_url': { - 'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' - }, - }] - },{ - 'role': 'assistant', - 'content': '图片中主要展示了一只老虎,它正在绿色的草地上休息。草地上有很多可以让人坐下的地方,而且看起来相当茂盛。背景比较模糊,可能是因为老虎的影响,让整个图片的其他部分都变得不太清晰了。' - } - ] + # user_context = [{ + # 'role': 'user', + # 'content': [{ + # 'type': 'text', + # 'text': '图中有什么,请描述一下', + # }, { + # 'type': 'image_url', + # 'image_url': { + # 'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' + # }, + # }] + # },{ + # 'role': 'assistant', + # 'content': '图片中主要展示了一只老虎,它正在绿色的草地上休息。草地上有很多可以让人坐下的地方,而且看起来相当茂盛。背景比较模糊,可能是因为老虎的影响,让整个图片的其他部分都变得不太清晰了。' + # } + # ] api_client = APIClient(self.url) model_name = api_client.available_models[0] @@ -208,9 +207,62 @@ class VLMS(Blackbox): user_context = messages + [{'role': 'assistant', 'content': responses}] return responses, user_context - + def _into_openai_format(self, context:List[list]): + """ + Convert the data into openai format. + context: a list of list, each element have the form [user_input, response], + and the first one of list 'user_input' is also tuple with [,text]; [image,text] or [[imgs],text] + #TODO: add support for multiple images + """ + user_context = [] + for i,item in enumerate(context): + user_content = item[0] + if isinstance(user_content, list): + if len(user_content) == 1: + user_content = [{ + 'type': 'text', + 'text': user_content[0] + }] + elif is_base64(user_content[0]): + user_content = [{ + 'type': 'image_url', + 'image_url': { + 'url': f"data:image/jpeg;base64,{user_content[0]}" + }, + },{ + 'type': 'text', + 'text': user_content[1] + }] + else: + user_content = [{ + 'type': 'image_url', + 'image_url': { + 'url': user_content[0] + }, + },{ + 'type': 'text', + 'text': user_content[1] + }] + else: + user_content = [{ + 'type': 'text', + 'text': user_content + }] + user_context.append({ + 'role': 'user', + 'content': user_content + }) + + user_context.append({ + 'role': 'assistant', + 'content': item[1] + }) + + return user_context + async def fast_api_handler(self, request: Request) -> Response: + ## TODO: add support for multiple images and support image in form-data format json_request = True try: content_type = request.headers['content-type'] @@ -225,7 +277,13 @@ class VLMS(Blackbox): model_name = data.get("model_name") prompt = data.get("prompt") settings: dict = data.get('settings') - + context = data.get("context") + print(context) + print(type(context)) + + user_context = self._into_openai_format(context) if context else [] + + print(user_context) if json_request: img_data = data.get("img_data") else: @@ -238,7 +296,7 @@ class VLMS(Blackbox): if model_name is None or model_name.isspace(): model_name = "Qwen-VL-Chat" - response, history = self.processing(prompt, img_data,settings, model_name) + response, history = self.processing(prompt, img_data,settings, model_name,user_context=user_context) # jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8") return JSONResponse(content={"response": response, "history": history}, status_code=status.HTTP_200_OK) \ No newline at end of file