beaver_project/app-instance/backend/nanobot/agent/context.py

"""上下文构建器：负责为每次 LLM 调用组装完整消息上下文。

本模块主要做三件事：
1. 生成 system prompt（身份、运行时信息、bootstrap 文件、记忆、技能摘要）；
2. 将历史消息与当前用户输入拼接成模型可消费的 messages；
3. 在工具调用循环中追加 assistant/tool 消息，维持对话状态连续性。
"""

import base64
import mimetypes
import platform
from pathlib import Path
from typing import Any

from nanobot.agent.agent_registry import AgentRegistry
from nanobot.agent.memory import MemoryStore
from nanobot.agent.skills import SkillsLoader


class ContextBuilder:
    """
    Agent 上下文装配器。

    设计目标：
    - 把“静态配置”（AGENTS/USER/TOOLS 等）与“动态上下文”（时间、会话、历史）统一拼装；
    - 保持 prompt 结构稳定，降低模型行为波动；
    - 让工具调用前后的消息追加逻辑集中在一个位置，便于维护。
    """

    # bootstrap 文件按此顺序加载并拼接，顺序会影响最终提示词语义优先级。
    BOOTSTRAP_FILES = ["AGENTS.md", "SOUL.md", "USER.md", "TOOLS.md", "IDENTITY.md"]

    def __init__(
        self,
        workspace: Path,
        skills_loader: SkillsLoader | None = None,
        agent_registry: AgentRegistry | None = None,
    ):
        self.workspace = workspace
        # 记忆与技能都按 workspace 维度隔离，避免跨项目污染。
        self.memory = MemoryStore(workspace)
        # 若上层已构造好 SkillsLoader / AgentRegistry，则复用，避免重复扫描磁盘。
        self.skills = skills_loader or SkillsLoader(workspace)
        # agent_registry 可选：只有支持多 agent 委派时才会把可用 agent 摘要塞进 prompt。
        self.agent_registry = agent_registry

    def build_system_prompt(
        self,
        skill_names: list[str] | None = None,
        execution_context: str | None = None,
    ) -> str:
        """构建 system prompt（身份 + 配置 + 记忆 + 技能信息）。"""
        # skill_names 目前作为接口预留，便于未来按需只加载指定技能。
        parts = []

        # 1) 核心身份段：包含当前时间、系统环境、工作区路径等动态信息。
        parts.append(self._get_identity())

        # 2) workspace 里的 bootstrap 文件（若存在）按顺序拼接。
        bootstrap = self._load_bootstrap_files()
        if bootstrap:
            parts.append(bootstrap)

        # 3) 长期记忆上下文（来自 memory/MEMORY.md 等）。
        memory = self.memory.get_memory_context()
        if memory:
            parts.append(f"# Memory\n\n{memory}")

        # 4) 技能采用“渐进加载”策略。
        # 4.1 always 技能：直接把完整内容塞进当前 prompt。
        always_skills = self.skills.get_always_skills()
        if always_skills:
            always_content = self.skills.load_skills_for_context(always_skills)
            if always_content:
                parts.append(f"# Active Skills\n\n{always_content}")

        # 4.2 可用技能：只放摘要，具体内容让 agent 运行时按需 read_file。
        # 这样可以控制 token 体积，避免把所有技能全文塞入上下文。
        skills_summary = self.skills.build_skills_summary()
        if skills_summary:
            parts.append(f"""# Skills

The following skills extend your capabilities. To use a skill, read its SKILL.md file using the read_file tool.
Skills with available="false" need dependencies installed first - you can try installing them with apt/brew.

{skills_summary}""")

        if self.agent_registry:
            parts.append("""# Delegation Tools

Use `spawn_subagent` when the task should go to one delegated worker.
Use `spawn_agent_team` when the task should be explored in parallel by multiple workers.
At the top level, you do not need to choose concrete downstream agents.
Use the `skills` argument when the delegated worker or team must follow specific skills.""")

        if execution_context:
            # `execution_context` 用于 cron / system task 这类“不是普通用户消息”的额外运行说明。
            parts.append(f"# Execution Context\n\n{execution_context.strip()}")

        # 各块之间用分隔线拼接，提升提示词可读性与结构稳定性。
        return "\n\n---\n\n".join(parts)

    def _get_identity(self) -> str:
        """生成核心身份段。"""
        import time as _time
        from datetime import datetime
        # 时间与时区在 system prompt 中显式给出，减少模型对“当前时间”的猜测。
        now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)")
        tz = _time.strftime("%Z") or "UTC"
        # 固化绝对工作区路径，帮助模型生成更准确的文件操作指令。
        workspace_path = str(self.workspace.expanduser().resolve())
        # 运行时信息可帮助模型在跨平台命令选择时更稳健（如 macOS/Linux 差异）。
        system = platform.system()
        runtime = f"{'macOS' if system == 'Darwin' else system} {platform.machine()}, Python {platform.python_version()}"

        return f"""# Boardware Genius

You are Boardware Genius, a helpful AI assistant.

## Current Time
{now} ({tz})

## Runtime
{runtime}

## Workspace
Your workspace is at: {workspace_path}
- Long-term memory: {workspace_path}/memory/MEMORY.md
- History log: {workspace_path}/memory/HISTORY.md (grep-searchable)
- Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md

Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel.

## Tool Call Guidelines
- Before calling tools, you may briefly state your intent (e.g. "Let me check that"), but NEVER predict or describe the expected result before receiving it.
- Before modifying a file, read it first to confirm its current content.
- Do not assume a file or directory exists — use list_dir or read_file to verify.
- After writing or editing a file, re-read it if accuracy matters.
- If a tool call fails, analyze the error before retrying with a different approach.
- Do not write directly into `{workspace_path}/skills`; new or updated skills must go through the review flow before activation.

## Delegation Policy
- Solve simple tasks yourself when the work is short, direct, and does not benefit from delegation.
- Delegate only when the task is complex, multi-step, time-consuming, or benefits from specialized/parallel work.
- Use `spawn_subagent` for one focused delegated worker when only the final result matters.
- Use `spawn_agent_team` when multiple agents should explore the task in parallel, compare findings, or work across separate areas.
- Do not delegate by default if you can complete the task reliably in the current turn.
- Do not create or modify persistent local sub-agents unless the user explicitly asks for a reusable long-lived worker.

## Memory
- Remember important facts: write to {workspace_path}/memory/MEMORY.md
- Recall past events: grep {workspace_path}/memory/HISTORY.md"""

    def _load_bootstrap_files(self) -> str:
        """从 workspace 读取 bootstrap 文件并拼接。"""
        parts = []

        for filename in self.BOOTSTRAP_FILES:
            file_path = self.workspace / filename
            if file_path.exists():
                # 缺失文件时静默跳过，保持默认可用。
                content = file_path.read_text(encoding="utf-8")
                parts.append(f"## {filename}\n\n{content}")

        return "\n\n".join(parts) if parts else ""

    def build_messages(
        self,
        history: list[dict[str, Any]],
        current_message: str,
        skill_names: list[str] | None = None,
        execution_context: str | None = None,
        media: list[str] | None = None,
        channel: str | None = None,
        chat_id: str | None = None,
    ) -> list[dict[str, Any]]:
        """构建一次 LLM 调用的完整 messages 数组。"""
        messages = []

        # 第 1 条固定是 system prompt。
        system_prompt = self.build_system_prompt(skill_names, execution_context=execution_context)
        if channel and chat_id:
            # 把当前会话路由信息也写入系统提示，便于模型做跨渠道决策。
            system_prompt += f"\n\n## Current Session\nChannel: {channel}\nChat ID: {chat_id}"
        messages.append({"role": "system", "content": system_prompt})

        # 追加历史消息（通常已由 SessionManager 做窗口与清洗）。
        messages.extend(history)

        # 追加当前用户输入；若带图片则转换为多模态 content 结构。
        user_content = self._build_user_content(current_message, media)
        messages.append({"role": "user", "content": user_content})

        return messages

    def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
        """构建 user content，支持文本或“文本+图片”多模态格式。"""
        # 无媒体时直接走纯文本，保持最简单路径。
        if not media:
            return text

        images = []
        for path in media:
            p = Path(path)
            mime, _ = mimetypes.guess_type(path)
            # 仅接收本地图片文件，其他媒体类型暂不注入到模型内容。
            if not p.is_file() or not mime or not mime.startswith("image/"):
                continue
            # 按 data URL 形式内联图片，兼容支持 image_url 的 provider 接口。
            b64 = base64.b64encode(p.read_bytes()).decode()
            images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})

        # 没有合法图片时回退纯文本，避免传空数组导致模型侧解析异常。
        if not images:
            return text
        # 多模态结构中把图片放前、文本放后，便于模型先“看图”再读文字指令。
        return images + [{"type": "text", "text": text}]

    def add_tool_result(
        self,
        messages: list[dict[str, Any]],
        tool_call_id: str,
        tool_name: str,
        result: str
    ) -> list[dict[str, Any]]:
        """把工具执行结果追加到 messages。"""
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call_id,
            "name": tool_name,
            "content": result
        })
        return messages

    def add_assistant_message(
        self,
        messages: list[dict[str, Any]],
        content: str | None,
        tool_calls: list[dict[str, Any]] | None = None,
        reasoning_content: str | None = None,
    ) -> list[dict[str, Any]]:
        """把 assistant 消息追加到 messages（可携带 tool_calls/reasoning）。"""
        msg: dict[str, Any] = {"role": "assistant"}

        # 始终写入 content 键：
        # 部分 provider 在 key 缺失时会拒绝请求（即使值是 None 也要有该键）。
        msg["content"] = content

        if tool_calls:
            msg["tool_calls"] = tool_calls

        # reasoning_content 是“思考模型”专用字段，仅在有值时附加。
        if reasoning_content is not None:
            msg["reasoning_content"] = reasoning_content

        messages.append(msg)
        return messages