diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index ba2ab43..489bbbe 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -112,15 +112,18 @@ class VLMS(Blackbox): settings = {} # Transform the images into base64 format where openai format need. - if is_base64(images): # image as base64 str - images_data = images - elif isinstance(images,bytes): # image as bytes - images_data = str(base64.b64encode(images),'utf-8') - else: # image as pathLike str - # with open(images, "rb") as img_file: - # images_data = str(base64.b64encode(img_file.read()), 'utf-8') - res = requests.get(images) - images_data = str(base64.b64encode(res.content),'utf-8') + if images: + if is_base64(images): # image as base64 str + images_data = images + elif isinstance(images,bytes): # image as bytes + images_data = str(base64.b64encode(images),'utf-8') + else: # image as pathLike str + # with open(images, "rb") as img_file: + # images_data = str(base64.b64encode(img_file.read()), 'utf-8') + res = requests.get(images) + images_data = str(base64.b64encode(res.content),'utf-8') + else: + images_data = None ## AutoLoad Model # url = 'http://10.6.80.87:8000/' + model_name + '/' # data_input = {'model': model_name, 'prompt': prompt, 'img_data': images_data} @@ -132,38 +135,74 @@ class VLMS(Blackbox): ## Lmdeploy if not user_context: user_context = [] - # user_context = [{'role':'user','content':'你好'}, {'role': 'assistant', 'content': '你好!很高兴为你提供帮助。'}] + + ## Predefine user_context only for testing + # user_context = [{'role':'user','content':'你好,我叫康康,你是谁?'}, {'role': 'assistant', 'content': '你好!很高兴为你提供帮助。'}] + user_context = [{ + 'role': 'user', + 'content': [{ + 'type': 'text', + 'text': '图中有什么,请描述一下', + }, { + 'type': 'image_url', + 'image_url': { + 'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' + }, + }] + },{ + 'role': 'assistant', + 'content': '图片中主要展示了一只老虎,它正在绿色的草地上休息。草地上有很多可以让人坐下的地方,而且看起来相当茂盛。背景比较模糊,可能是因为老虎的影响,让整个图片的其他部分都变得不太清晰了。' + } + ] api_client = APIClient(self.url) model_name = api_client.available_models[0] - messages = user_context + [{ - 'role': 'user', - 'content': [{ - 'type': 'text', - 'text': prompt, - }, { - 'type': 'image_url', - 'image_url': { - 'url': f"data:image/jpeg;base64,{images_data}", - # './val_data/image_5.jpg', - }, - }] - } - ] + # Reformat input into openai format to request. + if images_data: + messages = user_context + [{ + 'role': 'user', + 'content': [{ + 'type': 'text', + 'text': prompt, + },{ + 'type': 'image_url', + 'image_url': { # Image two + 'url': + # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' + # './val_data/image_5.jpg' + f"data:image/jpeg;base64,{images_data}", + }, + # },{ # Image one + # 'type': 'image_url', + # 'image_url': { + # 'url': 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' + # }, + }] + } + ] + else: + messages = user_context + [{ + 'role': 'user', + 'content': [{ + 'type': 'text', + 'text': prompt, + }] + } + ] responses = '' total_token_usage = 0 # which can be used to count the cost of a query for i,item in enumerate(api_client.chat_completions_v1(model=model_name, - messages=messages,stream = True, + messages=messages,#stream = True, **settings, # session_id=, )): # Stream output - print(item["choices"][0]["delta"]['content'],end='') - responses += item["choices"][0]["delta"]['content'] + # print(item["choices"][0]["delta"]['content'],end='') + # responses += item["choices"][0]["delta"]['content'] - # print(item["choices"][0]["message"]['content']) - # responses += item["choices"][0]["message"]['content'] + print(item["choices"][0]["message"]['content']) + responses += item["choices"][0]["message"]['content'] # total_token_usage += item['usage']['total_tokens'] # 'usage': {'prompt_tokens': *, 'total_tokens': *, 'completion_tokens': *} user_context = messages + [{'role': 'assistant', 'content': responses}] @@ -185,13 +224,13 @@ class VLMS(Blackbox): model_name = data.get("model_name") prompt = data.get("prompt") - + settings: dict = data.get('settings') + if json_request: - img_data = data.get("img_data") - settings: dict = data.get('settings') + img_data = data.get("img_data") else: img_data = await data.get("img_data").read() - settings: dict = ast.literal_eval(data.get('settings')) + if settings: settings = ast.literal_eval(settings) if prompt is None: return JSONResponse(content={'error': "Question is required"}, status_code=status.HTTP_400_BAD_REQUEST)