feat: four mode agent

This commit is contained in:
0Xiao0
2026-06-04 15:54:09 +08:00
parent 52e6d3cd9c
commit 78b9138c17
3 changed files with 385 additions and 8 deletions

View File

@ -42,7 +42,6 @@ logger = logging.getLogger("custom-agent")
CUSTOM_ENV_PATH = Path(__file__).with_name(".env")
load_dotenv(dotenv_path=CUSTOM_ENV_PATH)
AGENT_NAME = os.getenv("CUSTOM_AGENT_NAME", "")
ROOM_LOCATOR_INSTRUCTIONS = """
你是一个房间物品定位助手。
@ -72,6 +71,95 @@ VOICE_INPUT_MODE = "voice"
VISION_VOICE_INPUT_MODE = "vision_voice"
AUTO_INPUT_MODE = "auto"
VISION_FRAME_TOPIC = "vision.frame"
DEFAULT_AGENT_PROFILE = "normal"
@dataclass(frozen=True)
class AgentProfile:
agent_name: str
llm_provider: str
input_mode: str
AGENT_PROFILES = {
"normal": AgentProfile(
agent_name="normal-agent",
llm_provider="openai-compatible",
input_mode=VOICE_INPUT_MODE,
),
"beaver": AgentProfile(
agent_name="beaver-agent",
llm_provider="beaver",
input_mode=VOICE_INPUT_MODE,
),
"vision-normal": AgentProfile(
agent_name="vision-normal-agent",
llm_provider="openai-compatible",
input_mode=VISION_VOICE_INPUT_MODE,
),
"vision-beaver": AgentProfile(
agent_name="vision-beaver-agent",
llm_provider="beaver",
input_mode=VISION_VOICE_INPUT_MODE,
),
}
AGENT_PROFILE_ALIASES = {
"default": "normal",
"openai": "normal",
"openai-compatible": "normal",
"llm": "normal",
"text": "normal",
"voice": "normal",
"vision": "vision-normal",
"vision-llm": "vision-normal",
"vision-openai": "vision-normal",
"vision-openai-compatible": "vision-normal",
}
def _normalize_agent_profile(value: str | None) -> str:
if not value or not value.strip():
return DEFAULT_AGENT_PROFILE
normalized = value.strip().lower().replace("_", "-")
profile = AGENT_PROFILE_ALIASES.get(normalized, normalized)
if profile in AGENT_PROFILES:
return profile
logger.warning(
"Invalid CUSTOM_AGENT_PROFILE=%r, using %s",
value,
DEFAULT_AGENT_PROFILE,
)
return DEFAULT_AGENT_PROFILE
def _agent_profile_from_name(agent_name: str | None) -> str | None:
if not agent_name or not agent_name.strip():
return None
normalized = agent_name.strip().lower().replace("_", "-")
for profile_name, profile in AGENT_PROFILES.items():
if normalized == profile.agent_name:
return profile_name
return None
def _selected_agent_profile_name() -> str:
configured_profile = os.getenv("CUSTOM_AGENT_PROFILE")
if configured_profile and configured_profile.strip():
return _normalize_agent_profile(configured_profile)
inferred_profile = _agent_profile_from_name(os.getenv("CUSTOM_AGENT_NAME"))
if inferred_profile is not None:
return inferred_profile
return DEFAULT_AGENT_PROFILE
AGENT_PROFILE_NAME = _selected_agent_profile_name()
AGENT_PROFILE = AGENT_PROFILES[AGENT_PROFILE_NAME]
AGENT_NAME = os.getenv("CUSTOM_AGENT_NAME") or AGENT_PROFILE.agent_name
DEFAULT_EMOTION = "neutral"
EMOTION_LABELS = {
@ -614,7 +702,25 @@ def _model_image_save_dir_from_env() -> Path | None:
return Path(__file__).with_name("model_images")
server = AgentServer()
def _agent_server_from_env() -> AgentServer:
configured_port = os.getenv("CUSTOM_AGENT_HTTP_PORT")
if configured_port is None:
return AgentServer()
try:
port = int(configured_port)
except ValueError:
logger.warning("Invalid integer for CUSTOM_AGENT_HTTP_PORT=%r, using 0", configured_port)
port = 0
if port < 0 or port > 65535:
logger.warning("Invalid CUSTOM_AGENT_HTTP_PORT=%r, using 0", configured_port)
port = 0
return AgentServer(port=port)
server = _agent_server_from_env()
def prewarm(proc: JobProcess) -> None:
@ -640,10 +746,10 @@ async def entrypoint(ctx: JobContext) -> None:
LLM_BASE_URL = os.getenv("CUSTOM_LLM_BASE_URL")
LLM_MODEL = os.getenv("CUSTOM_LLM_MODEL", "qwen-max")
LLM_API_KEY = os.getenv("CUSTOM_LLM_API_KEY")
LLM_PROVIDER = os.getenv("CUSTOM_LLM_PROVIDER", "openai").strip().lower()
LLM_PROVIDER = os.getenv("CUSTOM_LLM_PROVIDER", AGENT_PROFILE.llm_provider).strip().lower()
TEXT_LLM_MODEL = os.getenv("CUSTOM_TEXT_LLM_MODEL", LLM_MODEL)
VISION_LLM_MODEL = os.getenv("CUSTOM_VISION_LLM_MODEL", LLM_MODEL)
INPUT_MODE = _normalize_input_mode(os.getenv("CUSTOM_AGENT_INPUT_MODE"))
INPUT_MODE = _normalize_input_mode(os.getenv("CUSTOM_AGENT_INPUT_MODE", AGENT_PROFILE.input_mode))
if LLM_PROVIDER not in {
"openai",
"openai-compatible",
@ -656,7 +762,10 @@ async def entrypoint(ctx: JobContext) -> None:
if LLM_PROVIDER in {"openai", "openai-compatible"} and not LLM_API_KEY:
raise RuntimeError(f"CUSTOM_LLM_API_KEY is not set in {CUSTOM_ENV_PATH}")
logger.info(
"Using LLM provider=%s model=%s base_url=%s",
"Using agent profile=%s agent_name=%s input_mode=%s llm_provider=%s model=%s base_url=%s",
AGENT_PROFILE_NAME,
AGENT_NAME or "<automatic>",
INPUT_MODE,
LLM_PROVIDER,
LLM_MODEL,
LLM_BASE_URL or "OpenAI default",