Files
beaver_project/app-instance/backend/nanobot/agent/context.py
2026-03-13 16:40:08 +08:00

253 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""上下文构建器:负责为每次 LLM 调用组装完整消息上下文。
本模块主要做三件事:
1. 生成 system prompt身份、运行时信息、bootstrap 文件、记忆、技能摘要);
2. 将历史消息与当前用户输入拼接成模型可消费的 messages
3. 在工具调用循环中追加 assistant/tool 消息,维持对话状态连续性。
"""
import base64
import mimetypes
import platform
from pathlib import Path
from typing import Any
from nanobot.agent.agent_registry import AgentRegistry
from nanobot.agent.memory import MemoryStore
from nanobot.agent.skills import SkillsLoader
class ContextBuilder:
"""
Agent 上下文装配器。
设计目标:
- 把“静态配置”AGENTS/USER/TOOLS 等)与“动态上下文”(时间、会话、历史)统一拼装;
- 保持 prompt 结构稳定,降低模型行为波动;
- 让工具调用前后的消息追加逻辑集中在一个位置,便于维护。
"""
# bootstrap 文件按此顺序加载并拼接,顺序会影响最终提示词语义优先级。
BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md", "IDENTITY.md"]
def __init__(
self,
workspace: Path,
skills_loader: SkillsLoader | None = None,
agent_registry: AgentRegistry | None = None,
):
self.workspace = workspace
# 记忆与技能都按 workspace 维度隔离,避免跨项目污染。
self.memory = MemoryStore(workspace)
# 若上层已构造好 SkillsLoader / AgentRegistry则复用避免重复扫描磁盘。
self.skills = skills_loader or SkillsLoader(workspace)
# agent_registry 可选:只有支持多 agent 委派时才会把可用 agent 摘要塞进 prompt。
self.agent_registry = agent_registry
def build_system_prompt(
self,
skill_names: list[str] | None = None,
execution_context: str | None = None,
) -> str:
"""构建 system prompt身份 + 配置 + 记忆 + 技能信息)。"""
# skill_names 目前作为接口预留,便于未来按需只加载指定技能。
parts = []
# 1) 核心身份段:包含当前时间、系统环境、工作区路径等动态信息。
parts.append(self._get_identity())
# 2) workspace 里的 bootstrap 文件(若存在)按顺序拼接。
bootstrap = self._load_bootstrap_files()
if bootstrap:
parts.append(bootstrap)
# 3) 长期记忆上下文(来自 memory/MEMORY.md 等)。
memory = self.memory.get_memory_context()
if memory:
parts.append(f"# Memory\n\n{memory}")
# 4) 技能采用“渐进加载”策略。
# 4.1 always 技能:直接把完整内容塞进当前 prompt。
always_skills = self.skills.get_always_skills()
if always_skills:
always_content = self.skills.load_skills_for_context(always_skills)
if always_content:
parts.append(f"# Active Skills\n\n{always_content}")
# 4.2 可用技能:只放摘要,具体内容让 agent 运行时按需 read_file。
# 这样可以控制 token 体积,避免把所有技能全文塞入上下文。
skills_summary = self.skills.build_skills_summary()
if skills_summary:
parts.append(f"""# Skills
The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool.
Skills with available="false" need dependencies installed first - you can try installing them with apt/brew.
{skills_summary}""")
if self.agent_registry:
# 把可委派 agent 目录加入 system prompt模型才知道 `spawn` 能调用谁。
agents_summary = self.agent_registry.build_agents_summary()
if agents_summary:
parts.append(f"""# Available Agents
The following agents can be delegated to via the `spawn` tool.
Use `target` for a single agent and `targets` for a group.
{agents_summary}""")
if execution_context:
# `execution_context` 用于 cron / system task 这类“不是普通用户消息”的额外运行说明。
parts.append(f"# Execution Context\n\n{execution_context.strip()}")
# 各块之间用分隔线拼接,提升提示词可读性与结构稳定性。
return "\n\n---\n\n".join(parts)
def _get_identity(self) -> str:
"""生成核心身份段。"""
import time as _time
from datetime import datetime
# 时间与时区在 system prompt 中显式给出,减少模型对“当前时间”的猜测。
now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)")
tz = _time.strftime("%Z") or "UTC"
# 固化绝对工作区路径,帮助模型生成更准确的文件操作指令。
workspace_path = str(self.workspace.expanduser().resolve())
# 运行时信息可帮助模型在跨平台命令选择时更稳健(如 macOS/Linux 差异)。
system = platform.system()
runtime = f"{'macOS' if system == 'Darwin' else system} {platform.machine()}, Python {platform.python_version()}"
return f"""# nanobot 🐈
You are nanobot, a helpful AI assistant.
## Current Time
{now} ({tz})
## Runtime
{runtime}
## Workspace
Your workspace is at: {workspace_path}
- Long-term memory: {workspace_path}/memory/MEMORY.md
- History log: {workspace_path}/memory/HISTORY.md (grep-searchable)
- Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md
Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel.
## Tool Call Guidelines
- Before calling tools, you may briefly state your intent (e.g. "Let me check that"), but NEVER predict or describe the expected result before receiving it.
- Before modifying a file, read it first to confirm its current content.
- Do not assume a file or directory exists — use list_dir or read_file to verify.
- After writing or editing a file, re-read it if accuracy matters.
- If a tool call fails, analyze the error before retrying with a different approach.
- Do not write directly into `{workspace_path}/skills`; new or updated skills must go through the review flow before activation.
## Memory
- Remember important facts: write to {workspace_path}/memory/MEMORY.md
- Recall past events: grep {workspace_path}/memory/HISTORY.md"""
def _load_bootstrap_files(self) -> str:
"""从 workspace 读取 bootstrap 文件并拼接。"""
parts = []
for filename in self.BOOTSTRAP_FILES:
file_path = self.workspace / filename
if file_path.exists():
# 缺失文件时静默跳过,保持默认可用。
content = file_path.read_text(encoding="utf-8")
parts.append(f"## {filename}\n\n{content}")
return "\n\n".join(parts) if parts else ""
def build_messages(
self,
history: list[dict[str, Any]],
current_message: str,
skill_names: list[str] | None = None,
execution_context: str | None = None,
media: list[str] | None = None,
channel: str | None = None,
chat_id: str | None = None,
) -> list[dict[str, Any]]:
"""构建一次 LLM 调用的完整 messages 数组。"""
messages = []
# 第 1 条固定是 system prompt。
system_prompt = self.build_system_prompt(skill_names, execution_context=execution_context)
if channel and chat_id:
# 把当前会话路由信息也写入系统提示,便于模型做跨渠道决策。
system_prompt += f"\n\n## Current Session\nChannel: {channel}\nChat ID: {chat_id}"
messages.append({"role": "system", "content": system_prompt})
# 追加历史消息(通常已由 SessionManager 做窗口与清洗)。
messages.extend(history)
# 追加当前用户输入;若带图片则转换为多模态 content 结构。
user_content = self._build_user_content(current_message, media)
messages.append({"role": "user", "content": user_content})
return messages
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
"""构建 user content支持文本或“文本+图片”多模态格式。"""
# 无媒体时直接走纯文本,保持最简单路径。
if not media:
return text
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
# 仅接收本地图片文件,其他媒体类型暂不注入到模型内容。
if not p.is_file() or not mime or not mime.startswith("image/"):
continue
# 按 data URL 形式内联图片,兼容支持 image_url 的 provider 接口。
b64 = base64.b64encode(p.read_bytes()).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
# 没有合法图片时回退纯文本,避免传空数组导致模型侧解析异常。
if not images:
return text
# 多模态结构中把图片放前、文本放后,便于模型先“看图”再读文字指令。
return images + [{"type": "text", "text": text}]
def add_tool_result(
self,
messages: list[dict[str, Any]],
tool_call_id: str,
tool_name: str,
result: str
) -> list[dict[str, Any]]:
"""把工具执行结果追加到 messages。"""
messages.append({
"role": "tool",
"tool_call_id": tool_call_id,
"name": tool_name,
"content": result
})
return messages
def add_assistant_message(
self,
messages: list[dict[str, Any]],
content: str | None,
tool_calls: list[dict[str, Any]] | None = None,
reasoning_content: str | None = None,
) -> list[dict[str, Any]]:
"""把 assistant 消息追加到 messages可携带 tool_calls/reasoning"""
msg: dict[str, Any] = {"role": "assistant"}
# 始终写入 content 键:
# 部分 provider 在 key 缺失时会拒绝请求(即使值是 None 也要有该键)。
msg["content"] = content
if tool_calls:
msg["tool_calls"] = tool_calls
# reasoning_content 是“思考模型”专用字段,仅在有值时附加。
if reasoning_content is not None:
msg["reasoning_content"] = reasoning_content
messages.append(msg)
return messages