From db47bb1e69f86570a1eb22459809aaf484507b9a Mon Sep 17 00:00:00 2001
From: Ivan087 <wujiatai087@gmail.com>
Date: Fri, 13 Sep 2024 16:17:59 +0800
Subject: [PATCH] feat: support user_context

---
 src/blackbox/vlms.py | 102 +++++++++++++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 22 deletions(-)

diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py
index 489bbbe..a5777ab 100644
--- a/src/blackbox/vlms.py
+++ b/src/blackbox/vlms.py
@@ -27,7 +27,6 @@ def is_base64(value) -> bool:
     except Exception:
         return False
 
-@singleton
 @singleton
 class VLMS(Blackbox):
 
@@ -133,27 +132,27 @@ class VLMS(Blackbox):
 
         # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg'
         ## Lmdeploy
-        if not user_context:
-            user_context = []
+        # if not user_context:
+        #     user_context = []
 
             ## Predefine user_context only for testing
             # user_context = [{'role':'user','content':'你好，我叫康康，你是谁？'}, {'role': 'assistant', 'content': '你好！很高兴为你提供帮助。'}]
-            user_context = [{
-                    'role': 'user',
-                    'content': [{
-                        'type': 'text',
-                        'text': '图中有什么，请描述一下',
-                    }, {
-                        'type': 'image_url',
-                        'image_url': {
-                            'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg'
-                        },
-                    }]
-                },{
-                    'role': 'assistant',
-                    'content': '图片中主要展示了一只老虎，它正在绿色的草地上休息。草地上有很多可以让人坐下的地方，而且看起来相当茂盛。背景比较模糊，可能是因为老虎的影响，让整个图片的其他部分都变得不太清晰了。'
-                    }
-            ]
+            # user_context = [{
+            #         'role': 'user',
+            #         'content': [{
+            #             'type': 'text',
+            #             'text': '图中有什么，请描述一下',
+            #         }, {
+            #             'type': 'image_url',
+            #             'image_url': {
+            #                 'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg'
+            #             },
+            #         }]
+            #     },{
+            #         'role': 'assistant',
+            #         'content': '图片中主要展示了一只老虎，它正在绿色的草地上休息。草地上有很多可以让人坐下的地方，而且看起来相当茂盛。背景比较模糊，可能是因为老虎的影响，让整个图片的其他部分都变得不太清晰了。'
+            #         }
+            # ]
         api_client = APIClient(self.url)
         model_name = api_client.available_models[0]
 
@@ -208,9 +207,62 @@ class VLMS(Blackbox):
         user_context = messages + [{'role': 'assistant', 'content': responses}]
         return responses, user_context
 
-        
+    def _into_openai_format(self, context:List[list]):
+        """
+        Convert the data into openai format.
+        context: a list of list, each element have the form [user_input, response],
+                and the first one of list 'user_input' is also tuple with [,text]; [image,text] or [[imgs],text]
+                #TODO: add support for multiple images
+        """
+        user_context = []
+        for i,item in enumerate(context):
+            user_content = item[0]
+            if isinstance(user_content, list):
+                if len(user_content) == 1:
+                    user_content = [{
+                        'type': 'text',
+                        'text': user_content[0]
+                    }]
+                elif is_base64(user_content[0]):
+                    user_content = [{
+                        'type': 'image_url',
+                        'image_url': {
+                            'url': f"data:image/jpeg;base64,{user_content[0]}"
+                        },
+                    },{
+                        'type': 'text',
+                        'text': user_content[1]
+                    }]
+                else:
+                    user_content = [{
+                        'type': 'image_url',
+                        'image_url': {
+                            'url': user_content[0]
+                        },
+                    },{
+                        'type': 'text',
+                        'text': user_content[1]
+                    }]
+            else:
+                user_content = [{
+                    'type': 'text',
+                    'text': user_content
+                }]
+            user_context.append({
+                'role': 'user',
+                'content': user_content
+            })
+
+            user_context.append({
+                'role': 'assistant',
+                'content': item[1]
+            })
+            
+        return user_context
+    
 
     async def fast_api_handler(self, request: Request) -> Response:
+        ## TODO: add support for multiple images and support image in form-data format
         json_request = True
         try:
             content_type = request.headers['content-type']
@@ -225,7 +277,13 @@ class VLMS(Blackbox):
         model_name = data.get("model_name")
         prompt = data.get("prompt")
         settings: dict = data.get('settings') 
-
+        context = data.get("context")
+        print(context)
+        print(type(context))
+        
+        user_context = self._into_openai_format(context) if context else [] 
+        
+        print(user_context)
         if json_request:
             img_data = data.get("img_data")    
         else:
@@ -238,7 +296,7 @@ class VLMS(Blackbox):
         if model_name is None or model_name.isspace():
             model_name = "Qwen-VL-Chat"
 
-        response, history = self.processing(prompt, img_data,settings, model_name)
+        response, history = self.processing(prompt, img_data,settings, model_name,user_context=user_context)
         # jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8")
 
         return JSONResponse(content={"response": response, "history": history}, status_code=status.HTTP_200_OK)
\ No newline at end of file