Files
jarvis-models/src/blackbox/fastchat.py
2024-05-24 10:41:17 +08:00

133 lines
5.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import Any, Coroutine
from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from .blackbox import Blackbox
import requests
import json
from injector import singleton
@singleton
class Fastchat(Blackbox):
def __call__(self, *args, **kwargs):
return self.processing(*args, **kwargs)
def valid(self, *args, **kwargs) -> bool:
data = args[0]
return isinstance(data, list)
# model_name有 Qwen1.5-14B-Chat , internlm2-chat-20b
def processing(self, model_name, prompt, template, context: list, temperature, top_p, top_k, n, max_tokens) -> str:
if context == None:
context = []
url = 'http://120.196.116.194:48892/v1/chat/completions'
# context可以为空列表也可以是用户的对话历史
# context = [
# {
# "role": "user",
# "content": "智能体核心思想"
# },
# {
# "role": "assistant",
# "content": "智能体的核心思想是将人工智能应用于问题求解者角色,它通过算法模拟人类决策过程,通过感知环境、学习、规划和执行行动,以实现特定任务或目标。其目标是通过自我适应和优化,实现高效问题解决。"
# },
# ]
prompt_template = [
{"role": "system", "content": template},
]
fastchat_inputs={
"model": model_name,
"messages": prompt_template + context + [
{
"role": "user",
"content": prompt
}
],
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"n": n,
"max_tokens": max_tokens,
"stream": False,
}
# {
# "model": "string",
# "messages": "string",
# "temperature": 0.7, # between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
# "top_p": 1, # 控制生成下一个单词的概率分布,即从所有可能的单词中,只选择概率最高的一部分作为候选单词
# "top_k": -1, # top-k 参数设置为 3意味着选择前三个tokens。
# "n": 1, # How many chat completion choices to generate for each input message.
# "max_tokens": 1024, # The maximum number of tokens to generate in the chat completion.
# "stop": [
# "string"
# ],
# "stream": False,
# "presence_penalty": 0, # Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
# "frequency_penalty": 0, # Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the models likelihood to repeat the same line verbatim.
# "user": "string"
# }
fastchat_response = requests.post(url, json=fastchat_inputs)
# user_message = fastchat_inputs["messages"]
# context.append(user_message)
assistant_message = fastchat_response.json()["choices"][0]["message"]
# context.append(assistant_message)
fastchat_content = assistant_message["content"]
return fastchat_content
async def fast_api_handler(self, request: Request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
user_model_name = data.get("model_name")
user_context = data.get("context")
user_question = data.get("question")
user_template = data.get("template")
user_temperature = data.get("temperature")
user_top_p = data.get("top_p")
user_top_k = data.get("top_k")
user_n = data.get("n")
user_max_tokens = data.get("max_tokens")
if user_question is None:
return JSONResponse(content={"error": "question is required"}, status_code=status.HTTP_400_BAD_REQUEST)
if user_model_name is None or user_model_name.isspace() or user_model_name == "":
user_model_name = "Qwen1.5-14B-Chat"
if user_template is None or user_template.isspace():
# user_template 是定义LLM的语气例如template = "使用小丑的语气说话。"user_template可以为空字串或者是用户自定义的语气
user_template = ""
if user_temperature is None or user_temperature == "":
user_temperature = 0.7
if user_top_p is None or user_top_p == "":
user_top_p = 1
if user_top_k is None or user_top_k == "":
user_top_k = -1
if user_n is None or user_n == "":
user_n = 1
if user_max_tokens is None or user_max_tokens == "":
user_max_tokens = 1024
return JSONResponse(content={"response": self.processing(user_model_name, user_question, user_template, user_context,
user_temperature, user_top_p, user_top_k, user_n, user_max_tokens)}, status_code=status.HTTP_200_OK)