From cdf5c214d0e94cd504c9df9007c2b80cdc5d1a9d Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Wed, 5 Mar 2025 14:46:16 +0800 Subject: [PATCH] support vlms streaming output text --- src/blackbox/vlms.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/blackbox/vlms.py b/src/blackbox/vlms.py index a5b3370..b2716e0 100644 --- a/src/blackbox/vlms.py +++ b/src/blackbox/vlms.py @@ -1,5 +1,6 @@ from fastapi import Request, Response, status -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, StreamingResponse +from sse_starlette.sse import EventSourceResponse from injector import singleton,inject from typing import Optional, List @@ -194,20 +195,21 @@ class VLMS(Blackbox): responses = '' total_token_usage = 0 # which can be used to count the cost of a query for i,item in enumerate(api_client.chat_completions_v1(model=model_name, - messages=messages,#stream = True, + messages=messages,stream = True, **settings, # session_id=, )): # Stream output - # print(item["choices"][0]["delta"]['content'],end='') - # responses += item["choices"][0]["delta"]['content'] + print(item["choices"][0]["delta"]['content'],end='\n') + yield item["choices"][0]["delta"]['content'] + responses += item["choices"][0]["delta"]['content'] - print(item["choices"][0]["message"]['content']) - responses += item["choices"][0]["message"]['content'] + # print(item["choices"][0]["message"]['content']) + # responses += item["choices"][0]["message"]['content'] # total_token_usage += item['usage']['total_tokens'] # 'usage': {'prompt_tokens': *, 'total_tokens': *, 'completion_tokens': *} user_context = messages + [{'role': 'assistant', 'content': responses}] self.custom_print(user_context) - return responses, user_context + # return responses, user_context def _get_model_url(self,model_name:str | None): available_models = {} @@ -346,8 +348,11 @@ class VLMS(Blackbox): # if model_name is None or model_name.isspace(): # model_name = "Qwen-VL-Chat" + # response,_ = self.processing(prompt, img_data,settings, model_name,user_context=user_context) + # return StreamingResponse(self.processing(prompt, img_data,settings, model_name,user_context=user_context), status_code=status.HTTP_200_OK) + return EventSourceResponse(self.processing(prompt, img_data,settings, model_name,user_context=user_context), status_code=status.HTTP_200_OK) + + # HTTP JsonResponse response, history = self.processing(prompt, img_data,settings, model_name,user_context=user_context) - # jsonresp = str(JSONResponse(content={"response": self.processing(prompt, img_data, model_name)}).body, "utf-8") - - return JSONResponse(content={"response": response}, status_code=status.HTTP_200_OK) \ No newline at end of file + # return JSONResponse(content={"response": response}, status_code=status.HTTP_200_OK) \ No newline at end of file