diff --git a/README.md b/README.md index f29c2fa..cdffb6b 100644 --- a/README.md +++ b/README.md @@ -91,5 +91,8 @@ blackbox: lazyloading: true vlms: - url: http://10.6.80.87:23333 + urls: + qwen_vl: http://10.6.80.87:8000 + qwen2_vl: http://10.6.80.87:23333 + qwen2_vl_72b: http://10.6.80.91:23333 ``` diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index 2ff2b15..4fac80c 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -24,6 +24,8 @@ import io from PIL import Image from lmdeploy.serve.openai.api_client import APIClient +from openai import OpenAI + def is_base64(value) -> bool: try: @@ -56,14 +58,15 @@ class VLMS(Blackbox): - skip_special_tokens (bool): Whether or not to remove special tokens in the decoding. Default to be True.""" self.model_dict = vlm_config.urls - self.model_url = None + # self.model_url = None + self.available_models = {} self.temperature: float = 0.7 self.top_p:float = 1 self.max_tokens: (int |None) = 512 self.repetition_penalty: float = 1 self.stop: (str | List[str] |None) = ['<|endoftext|>','<|im_end|>'] - self.top_k: (int) = None + self.top_k: (int) = 40 self.ignore_eos: (bool) = False self.skip_special_tokens: (bool) = True @@ -76,11 +79,16 @@ class VLMS(Blackbox): "top_k": self.top_k, "ignore_eos": self.ignore_eos, "skip_special_tokens": self.skip_special_tokens, - # "system_prompt":"", - # "vlm_model_name":" ", } - + for model, url in self.model_dict.items(): + try: + response = requests.get(url+'/health',timeout=3) + if response.status_code == 200: + self.available_models[model] = url + except Exception as e: + # print(e) + pass def __call__(self, *args, **kwargs): return self.processing(*args, **kwargs) @@ -100,21 +108,30 @@ class VLMS(Blackbox): response: a string history: a list """ + config: dict = { + "lmdeploy_infer":True, + "system_prompt":"", + "vlm_model_name":"", + } if settings: for k in list(settings.keys()): if k not in self.settings: print("Warning: '{}' is not a support argument and ignore this argment, check the arguments {}".format(k,self.settings.keys())) - settings.pop(k) + config[k] = settings.pop(k) tmp = copy.deepcopy(self.settings) tmp.update(settings) settings = tmp else: settings = {} + config['lmdeploy_infer'] = str(config['lmdeploy_infer']).strip().lower() == 'true' + if not prompt: prompt = '你是一个辅助机器人,请就此图做一个简短的概括性描述,包括图中的主体物品及状态,不超过50字。' if images else '你好' - # Transform the images into base64 format where openai format need. + # Transform the images into base64 format where openai url) + # print(self.config['vlm_model_name']) + # print(self.available_models)format need. if images: if is_base64(images): # image as base64 str images_data = images @@ -131,7 +148,6 @@ class VLMS(Blackbox): # url = 'http://10.6.80.87:8000/' + model_name + '/' # data_input = {'model': model_name, 'prompt': prompt, 'img_data': images_data} # data = requests.post(url, json=data_input) - # print(data.text) # return data.text # 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' @@ -157,13 +173,10 @@ class VLMS(Blackbox): # 'content': '图片中主要展示了一只老虎,它正在绿色的草地上休息。草地上有很多可以让人坐下的地方,而且看起来相当茂盛。背景比较模糊,可能是因为老虎的影响,让整个图片的其他部分都变得不太清晰了。' # } # ] - + if not user_context and config['system_prompt']: user_context = [{'role':'system','content': config['system_prompt']}] user_context = self.keep_last_k_images(user_context,k = 1) - if self.model_url is None: self.model_url = self._get_model_url(model_name) - - api_client = APIClient(self.model_url) - # api_client = APIClient("http://10.6.80.91:23333") - model_name = api_client.available_models[0] + # if self.model_url is None: self.model_url = self._get_model_url(model_name) + # Reformat input into openai format to request. if images_data: messages = user_context + [{ @@ -199,40 +212,60 @@ class VLMS(Blackbox): responses = '' total_token_usage = 0 # which can be used to count the cost of a query - for i,item in enumerate(api_client.chat_completions_v1(model=model_name, + model_url = self._get_model_url(config['vlm_model_name']) + # print(model_url) + # print(self.config['vlm_model_name']) + # print(self.available_models) + if config['lmdeploy_infer']: + api_client = APIClient(model_url) + model_name = api_client.available_models[0] + for i,item in enumerate(api_client.chat_completions_v1(model=model_name, messages=messages,stream = True, **settings, # session_id=, )): - # Stream output - print(item["choices"][0]["delta"]['content'],end='\n') - yield item["choices"][0]["delta"]['content'] - responses += item["choices"][0]["delta"]['content'] + # Stream output + # print(item["choices"][0]["delta"]['content'],end='\n') + yield item["choices"][0]["delta"]['content'] + responses += item["choices"][0]["delta"]['content'] - # print(item["choices"][0]["message"]['content']) - # responses += item["choices"][0]["message"]['content'] - # total_token_usage += item['usage']['total_tokens'] # 'usage': {'prompt_tokens': *, 'total_tokens': *, 'completion_tokens': *} + # print(item["choices"][0]["message"]['content']) + # responses += item["choices"][0]["message"]['content'] + # total_token_usage += item['usage']['total_tokens'] # 'usage': {'prompt_tokens': *, 'total_tokens': *, 'completion_tokens': *} + else: + api_key = "EMPTY_API_KEY" + # print(model_url+'/v1') + api_client = OpenAI(api_key=api_key, base_url=model_url+'/v1') + model_name = api_client.models.list().data[0].id + for item in api_client.chat.completions.create( + model=model_name, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=True): + yield(item.choices[0].delta.content) + responses += item.choices[0].delta.content + # response = api_client.chat.completions.create( + # model=model_name, + # messages=messages, + # temperature=0.8, + # top_p=0.8) + # print(response.choices[0].message.content) + # return response.choices[0].message.content + + user_context = messages + [{'role': 'assistant', 'content': responses}] self.custom_print(user_context) # return responses, user_context def _get_model_url(self,model_name:str | None): - available_models = {} - for model, url in self.model_dict.items(): - try: - response = requests.get(url,timeout=3) - if response.status_code == 200: - available_models[model] = url - except Exception as e: - # print(e) - pass - if not available_models: print("There are no available running models and please check your endpoint urls.") - if model_name and model_name in available_models: - return available_models[model_name] + if not self.available_models: print("There are no available running models and please check your endpoint urls.") + if model_name and model_name in self.available_models: + return self.available_models[model_name] else: - model = random.choice(list(available_models.keys())) + model = random.choice(list(self.available_models.keys())) print(f"No such model {model_name}, using {model} instead.") if model_name else print(f"Using random model {model}.") - return available_models[model] + return self.available_models[model] def _into_openai_format(self, context:List[list]) -> List[dict]: """