beaver_project/app-instance/backend/nanobot/agent/loop.py

"""Agent 主循环：Boardware Genius 的核心处理引擎。

职责概览：
1. 从消息总线读取入站消息；
2. 结合会话历史、记忆与工作区上下文构建提示词；
3. 调用 LLM 并迭代执行工具调用；
4. 将结果写回会话并发布出站消息；
5. 在后台处理记忆归档与 MCP 工具连接生命周期。
"""

from __future__ import annotations

import asyncio
import json
import re
from contextlib import AsyncExitStack
from pathlib import Path
from typing import TYPE_CHECKING, Any, Awaitable, Callable

from loguru import logger

from nanobot.agent.agent_registry import AgentRegistry
from nanobot.agent.context import ContextBuilder
from nanobot.agent.delegation import DelegationManager
from nanobot.agent.memory import MemoryStore
from nanobot.agent.plugins import PluginLoader
from nanobot.agent.process_events import process_event_sink
from nanobot.agent.subagent import SubagentManager
from nanobot.agent.tools.base import Tool
from nanobot.agent.tools.cron import CronTool
from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
from nanobot.agent.tools.message import MessageTool
from nanobot.agent.tools.registry import ToolRegistry
from nanobot.agent.tools.shell import ExecTool
from nanobot.agent.tools.spawn import DelegationTool, SpawnAgentTeamTool, SpawnSubagentTool
from nanobot.agent.tools.web import WebFetchTool, WebSearchTool
from nanobot.bus.events import InboundMessage, OutboundMessage
from nanobot.bus.queue import MessageBus
from nanobot.providers.base import LLMProvider
from nanobot.session.manager import Session, SessionManager

if TYPE_CHECKING:
    from nanobot.config.schema import A2AConfig, ChannelsConfig, ExecToolConfig
    from nanobot.cron.service import CronService


class AgentLoop:
    """
    AgentLoop 是 Boardware Genius 运行时的“对话编排器”。

    一次标准处理链路：
    1. 接收入站消息（来自 CLI 或外部渠道）；
    2. 恢复对应会话并构建当前轮上下文；
    3. 调用模型，解析工具调用并执行；
    4. 将本轮新增消息写入会话；
    5. 输出最终回复（或由消息工具自行发送）。
    """

    def __init__(
        self,
        bus: MessageBus,
        provider: LLMProvider,
        workspace: Path,
        model: str | None = None,
        max_iterations: int = 40,
        temperature: float = 0.1,
        max_tokens: int = 4096,
        memory_window: int = 100,
        brave_api_key: str | None = None,
        exec_config: ExecToolConfig | None = None,
        a2a_config: "A2AConfig | None" = None,
        cron_service: CronService | None = None,
        restrict_to_workspace: bool = False,
        session_manager: SessionManager | None = None,
        mcp_servers: dict | None = None,
        channels_config: ChannelsConfig | None = None,
        authz_config: Any | None = None,
        backend_identity: Any | None = None,
        allow_spawn: bool = True,
        allow_message: bool = True,
        allow_cron: bool = True,
        include_local_fallback: bool = True,
        allow_local_delegation: bool = True,
        allow_plugin_delegation: bool = True,
        include_plugin_agents: bool = True,
        gateway_port: int = 18790,
    ):
        from nanobot.config.schema import A2AConfig, ExecToolConfig
        # 基础依赖与运行参数。
        self.bus = bus
        self.channels_config = channels_config
        self.provider = provider
        self.workspace = workspace
        self.model = model or provider.get_default_model()
        self.max_iterations = max_iterations
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.memory_window = memory_window
        self.brave_api_key = brave_api_key
        self.exec_config = exec_config or ExecToolConfig()
        self.a2a_config = a2a_config or A2AConfig()
        self.cron_service = cron_service
        self.restrict_to_workspace = restrict_to_workspace
        self.authz_config = authz_config
        self.backend_identity = backend_identity
        self.allow_spawn = allow_spawn
        self.allow_message = allow_message
        self.allow_cron = allow_cron
        self.include_local_fallback = include_local_fallback
        self.allow_local_delegation = allow_local_delegation
        self.allow_plugin_delegation = allow_plugin_delegation
        self.include_plugin_agents = include_plugin_agents

        # 核心组件：上下文构建、会话管理、工具注册、子代理管理。
        self.plugins = PluginLoader(workspace)
        # SkillsLoader 需要感知 plugin 附带的 skill 目录，因此单独抽到 helper 构建。
        self.skills = self._build_skills_loader()
        self.agent_registry = AgentRegistry(
            workspace,
            plugins=self.plugins,
            skills=self.skills,
            allow_skill_cards=self.a2a_config.allow_skill_cards,
            allow_workspace_agents=self.a2a_config.allow_workspace_agents,
            include_local_fallback=self.include_local_fallback,
            include_plugin_agents=self.include_plugin_agents,
        )
        self.context = ContextBuilder(
            workspace,
            skills_loader=self.skills,
            agent_registry=self.agent_registry,
        )
        self.sessions = session_manager or SessionManager(workspace)
        self.tools = ToolRegistry()
        self.subagents = SubagentManager(
            provider=provider,
            workspace=workspace,
            model=self.model,
            temperature=self.temperature,
            max_tokens=self.max_tokens,
            brave_api_key=brave_api_key,
            exec_config=self.exec_config,
            restrict_to_workspace=restrict_to_workspace,
        )
        self.delegation = DelegationManager(
            provider=provider,
            model=self.model,
            workspace=workspace,
            bus=bus,
            registry=self.agent_registry,
            skills_loader=self.skills,
            local_executor=self.subagents,
            timeout_seconds=self.a2a_config.timeout_seconds,
            poll_interval_seconds=self.a2a_config.poll_interval_seconds,
            card_cache_ttl_seconds=self.a2a_config.card_cache_ttl_seconds,
            max_parallel_agents=self.a2a_config.max_parallel_agents,
            allowed_hosts=self.a2a_config.allowed_hosts,
            authz_config=self.authz_config,
            backend_identity=self.backend_identity,
            allow_local_delegation=self.allow_local_delegation,
            allow_plugin_delegation=self.allow_plugin_delegation,
            allow_local_fallback=self.include_local_fallback,
            gateway_port=gateway_port,
        )
        self.subagents.set_nested_delegate(self.delegation)

        # 运行时状态位。
        self._running = False
        self._mcp_servers = mcp_servers or {}
        self._mcp_stack: AsyncExitStack | None = None
        self._mcp_connected = False
        self._mcp_connecting = False
        # `_mcp_report` 保存最近一次连接结果，供 Web API 展示状态和错误信息。
        self._mcp_report: dict[str, dict[str, Any]] = {}
        # 会话级记忆归档控制：避免同一会话并发归档。
        self._consolidating: set[str] = set()  # Session keys with consolidation in progress
        self._consolidation_tasks: set[asyncio.Task] = set()  # Strong refs to in-flight tasks
        self._consolidation_locks: dict[str, asyncio.Lock] = {}
        self._register_default_tools()

    def apply_runtime_config(self, *, authz_config: Any | None, backend_identity: Any | None) -> None:
        """同步运行中 loop 的鉴权上下文，避免变更后必须重启。"""
        self.authz_config = authz_config
        self.backend_identity = backend_identity
        self.delegation.a2a_client.authz_config = authz_config
        self.delegation.a2a_client.backend_identity = backend_identity

    def _register_default_tools(self) -> None:
        """注册默认工具集合。"""
        # 启用工作区限制时，文件读写工具仅允许访问 workspace 目录树。
        allowed_dir = self.workspace if self.restrict_to_workspace else None
        protected_skill_paths = [self.workspace / "skills"]
        self.tools.register(ReadFileTool(workspace=self.workspace, allowed_dir=allowed_dir))
        self.tools.register(ListDirTool(workspace=self.workspace, allowed_dir=allowed_dir))
        self.tools.register(
            WriteFileTool(
                workspace=self.workspace,
                allowed_dir=allowed_dir,
                protected_paths=protected_skill_paths,
            )
        )
        self.tools.register(
            EditFileTool(
                workspace=self.workspace,
                allowed_dir=allowed_dir,
                protected_paths=protected_skill_paths,
            )
        )

        # Shell 工具独立配置超时与目录约束。
        self.tools.register(ExecTool(
            working_dir=str(self.workspace),
            timeout=self.exec_config.timeout,
            restrict_to_workspace=self.restrict_to_workspace,
            protected_paths=protected_skill_paths,
        ))

        # 网络、消息、委派工具按职责注册。
        self.tools.register(WebSearchTool(api_key=self.brave_api_key))
        self.tools.register(WebFetchTool())
        if self.allow_message:
            self.tools.register(MessageTool(send_callback=self.bus.publish_outbound))
        if self.allow_spawn:
            self.tools.register(SpawnSubagentTool(manager=self.delegation))
            self.tools.register(SpawnAgentTeamTool(manager=self.delegation))

        # 只有注入 cron_service 时才暴露 cron 工具，避免空引用。
        if self.cron_service and self.allow_cron:
            self.tools.register(CronTool(self.cron_service))

    async def _connect_mcp(self) -> None:
        """懒加载连接 MCP 服务器（单次连接，失败可重试）。"""
        # 已连接 / 正在连接 / 未配置时直接返回。
        if self._mcp_connected or self._mcp_connecting or not self._mcp_servers:
            return
        self._mcp_connecting = True
        from nanobot.agent.tools.mcp import connect_mcp_servers
        try:
            # 用 AsyncExitStack 统一托管各 MCP 连接的退出清理。
            self._mcp_stack = AsyncExitStack()
            await self._mcp_stack.__aenter__()
            self._mcp_report = await connect_mcp_servers(
                self._mcp_servers,
                self.tools,
                self._mcp_stack,
                authz_config=self.authz_config,
                backend_identity=self.backend_identity,
            )
            self._mcp_connected = any(item.get("status") == "connected" for item in self._mcp_report.values())
        except Exception as e:
            # 失败后保留可重试能力：释放已建立资源，下一条消息再尝试连接。
            logger.error("Failed to connect MCP servers (will retry next message): {}", e)
            if self._mcp_stack:
                try:
                    await self._mcp_stack.aclose()
                except Exception:
                    pass
                self._mcp_stack = None
            self._mcp_report = {
                name: {
                    "status": "error",
                    "last_error": str(e),
                    "tool_names": [],
                    "tool_count": 0,
                    "transport": "stdio" if getattr(cfg, "command", "") else "http",
                }
                for name, cfg in self._mcp_servers.items()
            }
        finally:
            self._mcp_connecting = False

    def _clear_mcp_tools(self) -> None:
        """移除当前 registry 里所有 MCP 工具包装器。"""
        for tool_name in list(self.tools.tool_names):
            if tool_name.startswith("mcp_"):
                self.tools.unregister(tool_name)

    async def reload_mcp_servers(self, mcp_servers: dict | None) -> None:
        """替换 MCP 配置并按新配置重新连接。"""
        # 先彻底关闭旧连接并移除旧工具，避免新旧配置混杂。
        await self.close_mcp()
        self._clear_mcp_tools()
        self._mcp_servers = mcp_servers or {}
        self._mcp_connected = False
        self._mcp_connecting = False
        self._mcp_report = {}
        if self._mcp_servers:
            await self._connect_mcp()

    def get_mcp_servers_view(self) -> list[dict[str, Any]]:
        """返回 MCP 静态配置与运行态状态合并后的视图。"""
        result: list[dict[str, Any]] = []
        for name in sorted(self._mcp_servers):
            cfg = self._mcp_servers[name]
            report = self._mcp_report.get(name, {})
            sensitive = bool(getattr(cfg, "sensitive", False))
            tool_names = report.get("tool_names")
            if not isinstance(tool_names, list):
                # 若当前 report 不完整，则退化为扫描已注册工具名进行推断。
                tool_names = [
                    item
                    for item in self.tools.tool_names
                    if item.startswith(f"mcp_{name}_")
                ]
            result.append({
                "id": name,
                "name": name,
                "transport": "stdio" if getattr(cfg, "command", "") else "http",
                "url": getattr(cfg, "url", "") or None,
                "command": getattr(cfg, "command", "") or None,
                "args": list(getattr(cfg, "args", []) or []),
                "auth_mode": getattr(cfg, "auth_mode", "none") or "none",
                "auth_audience": getattr(cfg, "auth_audience", "") or None,
                "auth_scopes": [str(item) for item in list(getattr(cfg, "auth_scopes", []) or [])],
                "headers": (
                    {key: "***" for key in dict(getattr(cfg, "headers", {}) or {})}
                    if sensitive
                    else dict(getattr(cfg, "headers", {}) or {})
                ),
                "env": (
                    {key: "***" for key in dict(getattr(cfg, "env", {}) or {})}
                    if sensitive
                    else dict(getattr(cfg, "env", {}) or {})
                ),
                "tool_timeout": int(getattr(cfg, "tool_timeout", 30)),
                "sensitive": sensitive,
                "enabled": True,
                "status": report.get("status", "disconnected"),
                "tool_count": int(report.get("tool_count", len(tool_names))),
                "tool_names": tool_names,
                "last_error": report.get("last_error"),
            })
        return result

    def _set_tool_context(
        self,
        channel: str,
        chat_id: str,
        message_id: str | None = None,
        session_key: str | None = None,
    ) -> None:
        """把当前请求的路由上下文写入各工具的默认目标。

        设计目的：
        1. 工具调用参数里不一定每次都显式传 `channel/chat_id`；
        2. 通过这里预注入默认值，工具可自动回落到“当前会话”；
        3. 每条消息处理前都调用一次，避免沿用上一轮残留上下文。
        """
        # message 工具：需要 channel/chat_id 才能发消息；
        # message_id 在支持线程回复/引用回复的渠道里可用于“回这条消息”。
        if message_tool := self.tools.get("message"):
            # ToolRegistry.get() 返回通用 Tool | None，
            # 用 isinstance 确认具体类型后再调用专有 set_context()。
            if isinstance(message_tool, MessageTool):
                message_tool.set_context(channel, chat_id, message_id)

        # 委派工具：后台任务完成后需要把结果回投到原会话，
        # 因此只需记住来源 channel/chat_id。
        for tool_name in ("spawn_subagent", "spawn_agent_team"):
            if delegation_tool := self.tools.get(tool_name):
                if isinstance(delegation_tool, DelegationTool):
                    delegation_tool.set_context(channel, chat_id, announce_via_bus=self._running)

        # cron 工具：创建任务时会把 deliver 目标写入任务 payload，
        # 后续定时触发时才能把结果送回同一会话。
        if cron_tool := self.tools.get("cron"):
            if isinstance(cron_tool, CronTool):
                cron_tool.set_context(channel, chat_id, session_key=session_key)

    def _build_skills_loader(self):
        """构造可感知 plugin skill 目录的 SkillsLoader。"""
        from nanobot.agent.skills import SkillsLoader

        return SkillsLoader(self.workspace, extra_dirs=self.plugins.get_skill_dirs())

    @staticmethod
    def _strip_think(text: str | None) -> str | None:
        """去除模型输出中的 `<think>...</think>` 推理块。"""
        # 某些模型会把思考内容混入最终文本，这里统一做显示层清洗。
        if not text:
            return None
        return re.sub(r"<think>[\s\S]*?</think>", "", text).strip() or None

    @staticmethod
    def _tool_hint(tool_calls: list) -> str:
        """把工具调用格式化为简短提示，如 `web_search("query")`。"""
        def _fmt(tc):
            val = next(iter(tc.arguments.values()), None) if tc.arguments else None
            if not isinstance(val, str):
                return tc.name
            return f'{tc.name}("{val[:40]}…")' if len(val) > 40 else f'{tc.name}("{val}")'
        return ", ".join(_fmt(tc) for tc in tool_calls)

    async def _run_agent_loop(
        self,
        initial_messages: list[dict],
        on_progress: Callable[..., Awaitable[None]] | None = None,
        tool_registry: ToolRegistry | None = None,
    ) -> tuple[str | None, list[str], list[dict]]:
        """执行 agent 迭代循环。

        返回：
        - final_content: 最终可回复文本（无则为 None）
        - tools_used: 本轮调用过的工具名列表
        - messages: 迭代结束后的完整消息数组（含 tool 结果）
        """
        messages = initial_messages
        tools = tool_registry or self.tools
        iteration = 0
        final_content = None
        tools_used: list[str] = []

        # 循环直到拿到最终回复，或达到最大迭代次数。
        while iteration < self.max_iterations:
            iteration += 1

            # 每一轮都带上当前消息状态与工具定义，让模型决定是否继续调工具。
            response = await self.provider.chat(
                messages=messages,
                tools=tools.get_definitions(),
                model=self.model,
                temperature=self.temperature,
                max_tokens=self.max_tokens,
            )

            if response.has_tool_calls:
                # 进度回调用于 CLI/渠道侧实时展示：先输出正文片段，再输出工具提示。
                if on_progress:
                    clean = self._strip_think(response.content)
                    if clean:
                        await on_progress(clean)
                    await on_progress(self._tool_hint(response.tool_calls), tool_hint=True)

                tool_call_dicts = [
                    {
                        "id": tc.id,
                        "type": "function",
                        "function": {
                            "name": tc.name,
                            "arguments": json.dumps(tc.arguments, ensure_ascii=False)
                        }
                    }
                    for tc in response.tool_calls
                ]
                # 把 assistant 的“工具调用意图”写入对话，再逐个执行工具。
                messages = self.context.add_assistant_message(
                    messages, response.content, tool_call_dicts,
                    reasoning_content=response.reasoning_content,
                )

                for tool_call in response.tool_calls:
                    tools_used.append(tool_call.name)
                    args_str = json.dumps(tool_call.arguments, ensure_ascii=False)
                    logger.info("Tool call: {}({})", tool_call.name, args_str[:200])
                    result = await tools.execute(tool_call.name, tool_call.arguments)
                    messages = self.context.add_tool_result(
                        messages, tool_call.id, tool_call.name, result
                    )
            else:
                # 无工具调用即视为本轮收敛，输出最终内容。
                final_content = self._strip_think(response.content)
                # 将最终 assistant 回复写入消息链，确保会话可持久化回放。
                # 对于空/None 内容，回退到原始 content（或空串）避免丢失一轮回复。
                persist_content = final_content if final_content is not None else (response.content or "")
                messages = self.context.add_assistant_message(
                    messages,
                    persist_content,
                    reasoning_content=response.reasoning_content,
                )
                break

        if final_content is None and iteration >= self.max_iterations:
            # 兜底提示：防止模型反复调工具导致“无终止回复”。
            logger.warning("Max iterations ({}) reached", self.max_iterations)
            final_content = (
                f"I reached the maximum number of tool call iterations ({self.max_iterations}) "
                "without completing the task. You can try breaking the task into smaller steps."
            )
            # 将兜底回复也写入会话，避免刷新后看不到最终结论。
            messages = self.context.add_assistant_message(messages, final_content)

        return final_content, tools_used, messages

    async def run(self) -> None:
        """启动常驻循环：持续消费入站消息并发布出站消息。"""
        self._running = True
        await self._connect_mcp()
        logger.info("Agent loop started")

        while self._running:
            try:
                # 用短超时轮询，便于 stop() 后快速退出循环。
                msg = await asyncio.wait_for(
                    self.bus.consume_inbound(),
                    timeout=1.0
                )
                try:
                    response = await self._process_message(msg)
                    if response is not None:
                        await self.bus.publish_outbound(response)
                    elif msg.channel == "cli":
                        # CLI 下若消息工具已代发，仍回一个空结束包通知“本轮结束”。
                        await self.bus.publish_outbound(OutboundMessage(
                            channel=msg.channel, chat_id=msg.chat_id, content="", metadata=msg.metadata or {},
                        ))
                except Exception as e:
                    # 单条消息失败不影响主循环存活。
                    logger.error("Error processing message: {}", e)
                    await self.bus.publish_outbound(OutboundMessage(
                        channel=msg.channel,
                        chat_id=msg.chat_id,
                        content=f"Sorry, I encountered an error: {str(e)}"
                    ))
            except asyncio.TimeoutError:
                continue

    async def close_mcp(self) -> None:
        """关闭 MCP 连接并释放退出栈。"""
        if self._mcp_stack:
            try:
                await self._mcp_stack.aclose()
            except (RuntimeError, BaseExceptionGroup):
                # MCP SDK 在取消清理阶段可能抛出噪声异常，这里忽略即可。
                pass
            self._mcp_stack = None
        self._mcp_connected = False
        self._mcp_connecting = False

    def stop(self) -> None:
        """请求停止主循环。"""
        self._running = False
        logger.info("Agent loop stopping")

    def _get_consolidation_lock(self, session_key: str) -> asyncio.Lock:
        """获取会话级归档锁；不存在则创建。"""
        lock = self._consolidation_locks.get(session_key)
        if lock is None:
            lock = asyncio.Lock()
            self._consolidation_locks[session_key] = lock
        return lock

    def _prune_consolidation_lock(self, session_key: str, lock: asyncio.Lock) -> None:
        """在锁空闲时清理缓存，避免锁字典无限增长。"""
        if not lock.locked():
            self._consolidation_locks.pop(session_key, None)

    async def _process_message(
        self,
        msg: InboundMessage,
        session_key: str | None = None,
        on_progress: Callable[[str], Awaitable[None]] | None = None,
        execution_context: str | None = None,
        extra_tools: list[Tool] | None = None,
    ) -> OutboundMessage | None:
        """处理单条入站消息并返回出站消息（或 None）。"""
        # system 通道用于内部任务（如 cron/heartbeat），来源路由编码在 chat_id。
        if msg.channel == "system":
            channel, chat_id = (msg.chat_id.split(":", 1) if ":" in msg.chat_id
                                else ("cli", msg.chat_id))
            logger.info("Processing system message from {}", msg.sender_id)
            key = f"{channel}:{chat_id}"
            session = self.sessions.get_or_create(key)
            self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"), session_key=key)
            history = session.get_history(max_messages=self.memory_window)
            messages = self.context.build_messages(
                history=history,
                current_message=msg.content,
                execution_context=execution_context,
                channel=channel,
                chat_id=chat_id,
            )
            final_content, _, all_msgs = await self._run_agent_loop(messages)
            self._save_turn(session, all_msgs, 1 + len(history))
            self.sessions.save(session)
            return OutboundMessage(channel=channel, chat_id=chat_id,
                                  content=final_content or "Background task completed.")

        preview = msg.content[:80] + "..." if len(msg.content) > 80 else msg.content
        logger.info("Processing message from {}:{}: {}", msg.channel, msg.sender_id, preview)

        key = session_key or msg.session_key
        session = self.sessions.get_or_create(key)

        # 内建斜杠命令：在进入模型前优先处理。
        cmd = msg.content.strip().lower()
        if cmd == "/new":
            # `/new` 的语义是“开启新会话”，但在真正清空前要先做一次强制归档：
            # - 把尚未沉淀的消息写入 MEMORY/HISTORY；
            # - 若归档失败则直接返回，不执行清空，避免用户上下文丢失。

            # 取会话级锁并标记 consolidating，防止与后台自动归档并发执行。
            # （同一会话同时归档可能导致重复写入或状态错乱）
            lock = self._get_consolidation_lock(session.key)
            self._consolidating.add(session.key)
            try:
                async with lock:
                    # 只处理“未归档尾部”消息：
                    # [0:last_consolidated] 视为已经落入长期记忆，
                    # [last_consolidated:] 才是本次需要补归档的增量。
                    snapshot = session.messages[session.last_consolidated:]
                    if snapshot:
                        # 用临时 Session 包装快照，再传给 consolidate：
                        # 1) 不污染当前 live session 对象；
                        # 2) 即便归档失败，也不会提前改动原会话结构。
                        temp = Session(key=session.key)
                        temp.messages = list(snapshot)
                        # archive_all=True：对这个临时快照做“全量归档”，
                        # 确保 /new 前的上下文尽可能完整地写入记忆文件。
                        if not await self._consolidate_memory(temp, archive_all=True):
                            return OutboundMessage(
                                channel=msg.channel, chat_id=msg.chat_id,
                                content="Memory archival failed, session not cleared. Please try again.",
                            )
            except Exception:
                # 归档过程任何异常都视为失败，保持原会话不动并给出明确提示。
                logger.exception("/new archival failed for {}", session.key)
                return OutboundMessage(
                    channel=msg.channel, chat_id=msg.chat_id,
                    content="Memory archival failed, session not cleared. Please try again.",
                )
            finally:
                # 无论成功/失败都要撤销 in-progress 标记并清理空闲锁缓存，
                # 避免会话长期卡在 consolidating 状态。
                self._consolidating.discard(session.key)
                self._prune_consolidation_lock(session.key, lock)

            # 走到这里说明归档已成功（或本就无增量可归档），才执行真正清空。
            session.clear()
            # clear 后立即落盘，保证重启后状态一致。
            self.sessions.save(session)
            # 使内存缓存失效，后续读取将基于磁盘中的“新空会话”重新构建。
            self.sessions.invalidate(session.key)
            return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                  content="New session started.")
        if cmd == "/help":
            return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                  content="Boardware Genius commands:\n/new — Start a new conversation\n/help — Show available commands")

        # 异步触发记忆归档：达到窗口阈值时在后台执行，不阻塞当前回复。
        unconsolidated = len(session.messages) - session.last_consolidated
        if (unconsolidated >= self.memory_window and session.key not in self._consolidating):
            self._consolidating.add(session.key)
            lock = self._get_consolidation_lock(session.key)

            async def _consolidate_and_unlock():
                try:
                    async with lock:
                        await self._consolidate_memory(session)
                finally:
                    # 无论成功失败都要解注册状态，避免会话长期卡在 consolidating。
                    self._consolidating.discard(session.key)
                    self._prune_consolidation_lock(session.key, lock)
                    _task = asyncio.current_task()
                    if _task is not None:
                        self._consolidation_tasks.discard(_task)

            _task = asyncio.create_task(_consolidate_and_unlock())
            self._consolidation_tasks.add(_task)

        # 每轮处理前刷新工具上下文，并重置 message 工具的“本轮已发送”状态。
        self._set_tool_context(
            msg.channel,
            msg.chat_id,
            msg.metadata.get("message_id"),
            session_key=key,
        )
        if message_tool := self.tools.get("message"):
            if isinstance(message_tool, MessageTool):
                message_tool.start_turn()

        active_tools = self.tools
        if extra_tools:
            active_tools = self.tools.clone()
            for tool in extra_tools:
                active_tools.register(tool)

        # 从会话中截取有限历史，避免上下文无限膨胀。
        history = session.get_history(max_messages=self.memory_window)
        # 组装本轮发给模型的初始消息：
        # - history: 会话历史（已按窗口裁剪）
        # - current_message: 用户本轮输入
        # - media: 可选多模态附件（如图片）
        # - channel/chat_id: 当前会话路由信息（写入 system prompt 供工具决策）
        initial_messages = self.context.build_messages(
            history=history,
            current_message=msg.content,
            execution_context=execution_context,
            media=msg.media if msg.media else None,
            channel=msg.channel, chat_id=msg.chat_id,
        )

        async def _bus_progress(content: str, *, tool_hint: bool = False) -> None:
            # `_bus_progress` 是“默认进度回调”：
            # - 当 _run_agent_loop 里出现中间文本/工具提示时被调用；
            # - 不走最终回复通道，而是作为“中间态事件”发到 outbound。
            #
            # 这样做的好处：
            # 1) CLI/渠道可以实时显示“正在做什么”，而不是一直静默等待；
            # 2) 进度消息与最终答复共用同一队列，但可通过 metadata 区分。
            meta = dict(msg.metadata or {})
            # `_progress=True`：标记这是进度事件，消费端可选择轻量渲染。
            meta["_progress"] = True
            # `_tool_hint=True`：标记这是工具调用提示（例如 web_search(...)）。
            # 消费端可按配置独立开关（send_tool_hints）来显示/隐藏。
            meta["_tool_hint"] = tool_hint
            # 进度消息仍沿用原始 channel/chat_id，保证路由到当前会话。
            await self.bus.publish_outbound(OutboundMessage(
                channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
            ))

        # 执行核心 agent 迭代：
        # - 可能多轮“模型 -> 工具 -> 模型”
        # - on_progress 若外部未传，则默认走 `_bus_progress` 输出中间态
        final_content, _, all_msgs = await self._run_agent_loop(
            initial_messages,
            on_progress=on_progress or _bus_progress,
            tool_registry=active_tools,
        )

        if final_content is None:
            # 极少数情况下模型未给出最终文本（例如异常边界），这里兜底避免空回复。
            final_content = "I've completed processing but have no response to give."

        # 日志只打印预览，避免超长内容污染日志输出。
        preview = final_content[:120] + "..." if len(final_content) > 120 else final_content
        logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview)

        # 把本轮新增消息（assistant/tool/final）写回会话并持久化到磁盘。
        # `1 + len(history)` 用于跳过本轮前已存在的 system+history 部分。
        self._save_turn(session, all_msgs, 1 + len(history))
        self.sessions.save(session)

        if message_tool := self.tools.get("message"):
            if isinstance(message_tool, MessageTool) and message_tool._sent_in_turn:
                # 去重保护：
                # 若本轮 agent 已通过 message 工具主动发过消息，
                # 再返回 OutboundMessage 会导致渠道侧“同内容重复发送”。
                # 因此返回 None，交给上层按“已发过”路径结束本轮。
                return None

        return OutboundMessage(
            channel=msg.channel, chat_id=msg.chat_id, content=final_content,
            metadata=msg.metadata or {},
        )

    _TOOL_RESULT_MAX_CHARS = 500

    def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
        """保存本轮新增消息到会话，并截断过长工具输出。"""
        from datetime import datetime
        for m in messages[skip:]:
            # 不持久化 reasoning_content，避免会话文件冗长且混入思考文本。
            entry = {k: v for k, v in m.items() if k != "reasoning_content"}
            if entry.get("role") == "tool" and isinstance(entry.get("content"), str):
                content = entry["content"]
                if len(content) > self._TOOL_RESULT_MAX_CHARS:
                    # 大工具结果只保留前缀，兼顾可读性与存储体积。
                    entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)"
            entry.setdefault("timestamp", datetime.now().isoformat())
            session.messages.append(entry)
        session.updated_at = datetime.now()

    async def _consolidate_memory(self, session, archive_all: bool = False) -> bool:
        """调用 MemoryStore 做记忆归档；成功返回 True。"""
        return await MemoryStore(self.workspace).consolidate(
            session, self.provider, self.model,
            archive_all=archive_all, memory_window=self.memory_window,
        )

    async def process_system_announcement(
        self,
        content: str,
        *,
        origin_channel: str,
        origin_chat_id: str,
        sender_id: str = "delegation",
    ) -> str:
        """在无常驻 run() 的场景下，本地处理一条 system 公告。"""
        await self._connect_mcp()
        msg = InboundMessage(
            channel="system",
            sender_id=sender_id,
            chat_id=f"{origin_channel}:{origin_chat_id}",
            content=content,
        )
        response = await self._process_message(msg)
        return response.content if response else ""

    async def process_direct(
        self,
        content: str,
        session_key: str = "cli:direct",
        channel: str = "cli",
        chat_id: str = "direct",
        on_progress: Callable[[str], Awaitable[None]] | None = None,
        process_event_callback: Callable[[dict[str, Any]], Awaitable[None]] | None = None,
        execution_context: str | None = None,
        extra_tools: list[Tool] | None = None,
    ) -> str:
        """直接处理一条消息（用于 CLI 单轮或 cron 触发）。"""
        # 直连模式不依赖 run() 主循环，但仍需确保 MCP 可用。
        await self._connect_mcp()
        msg = InboundMessage(channel=channel, sender_id="user", chat_id=chat_id, content=content)
        # process_event_sink 只在当前调用链内生效，因此不会污染其他并发请求。
        with process_event_sink(process_event_callback):
            response = await self._process_message(
                msg,
                session_key=session_key,
                on_progress=on_progress,
                # execution_context / extra_tools 主要服务于 cron 和其他系统触发场景。
                execution_context=execution_context,
                extra_tools=extra_tools,
            )
        return response.content if response else ""