```

feat(engine): 优化智能体循环中的助手消息处理逻辑 - 在没有工具调用时才添加助手消息到上下文 - 确保工具调用响应正确添加到消息上下文中 - 修复了消息构建的条件逻辑 fix(cron): 改进定时任务调度的时间解析功能 - 添加正则表达式导入用于时间显示解析 - 实现从显示文本中提取毫秒间隔的功能 - 增强整数转换的安全性，避免类型错误 - 优化定时任务配置的解析逻辑 feat(outlook): 增强Outlook集成的功能和稳定性 - 将默认超时时间从10秒增加到180秒 - 为状态检查函数添加可选的验证参数 - 串行执行邮件概览获取操作而非并行 - 改进连接状态验证逻辑 feat(channel): 添加设备名称作为会话标识的选项 - 为终端WebSocket适配器添加新的配置选项 - 实现基于设备名称生成会话对等ID的功能 - 记录原始对等ID和设备名称的元数据 - 支持从设备名称创建会话对等ID feat(skills): 完善技能学习评估系统和进度跟踪 - 在应用启动时自动调度待评估的技能草稿 - 为技能评估工作创建独立的循环工厂 - 实现异步技能评估任务的取消和清理机制 - 添加技能评估进度报告和状态跟踪功能 - 扩展会话列表API以包含更多详细信息 - 防止对不存在的会话进行操作 - 优化技能草稿提交和评估的业务逻辑 perf(skills): 提升技能评估的并发性能 - 实现并行技能案例评估以提高效率 - 添加最大并行案例数的环境变量控制 - 实现实时评估进度更新和回调机制 - 优化评估过程中的资源管理和同步 refactor(services): 创建隔离的智能体循环实例 - 添加创建独立智能体循环的工厂方法 - 确保新循环继承运行时服务配置 - 支持技能评估等需要隔离环境的场景 ```
2026-06-15 14:48:16 +08:00
parent 8aeb97a5fc
commit 4b0bf65ace
53 changed files with 4328 additions and 292 deletions
--- a/app-instance/backend/beaver/skills/learning/replay.py
+++ b/app-instance/backend/beaver/skills/learning/replay.py
@ -3,7 +3,8 @@
 from __future__ import annotations

 from dataclasses import dataclass, field
-from typing import Any, Literal
+from time import perf_counter
+from typing import Any, Callable, Literal
 from uuid import uuid4

 from beaver.tools.base import ToolContext, ToolResult, ToolSpec
@ -59,6 +60,7 @@ class ReplayToolExecutor:
        *,
        context: ToolContext | None = None,
    ) -> ToolResult:
+        started_at = perf_counter()
        tool = self.registry.get(tool_name)
        spec = tool.spec if tool is not None else ToolSpec(
            name=tool_name,
@ -84,6 +86,7 @@ class ReplayToolExecutor:
                "error": result.error,
                "content": result.content[:2000],
            }
+            trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
            self.traces.append(trace)
            return result
        if mode == "surrogate":
@ -92,6 +95,7 @@ class ReplayToolExecutor:
                "error": "replay_surrogate",
                "content": "Tool call recorded for surrogate evaluation.",
            }
+            trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
            self.traces.append(trace)
            return ToolResult(
                success=True,
@ -105,6 +109,7 @@ class ReplayToolExecutor:
            "error": "replay_blocked",
            "content": "Tool call blocked by replay policy.",
        }
+        trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
        self.traces.append(trace)
        return ToolResult(
            success=False,
@ -151,12 +156,20 @@ class ReplayArmRequest:


 class ReplayRunner:
-    def __init__(self, *, agent_loop: Any, policy: ReplayToolPolicy | None = None) -> None:
+    def __init__(
+        self,
+        *,
+        agent_loop: Any,
+        policy: ReplayToolPolicy | None = None,
+        isolated_loop_factory: Callable[[], Any] | None = None,
+    ) -> None:
        self.agent_loop = agent_loop
        self.policy = policy or ReplayToolPolicy()
+        self.isolated_loop_factory = isolated_loop_factory

    async def run_arm(self, request: ReplayArmRequest) -> dict[str, Any]:
-        loaded = self.agent_loop.boot()
+        target_loop = self.isolated_loop_factory() if self.isolated_loop_factory is not None else self.agent_loop
+        loaded = target_loop.boot()
        replay_executor = ReplayToolExecutor(
            loaded.tool_executor,
            registry=loaded.tool_registry,
@ -174,23 +187,42 @@ class ReplayRunner:
            "tool_executor_override": replay_executor,
        }
        try:
-            result = await self.agent_loop.process_direct(request.task_text, **direct_kwargs)
-        except RuntimeError as exc:
-            if not _is_process_direct_disabled_while_running(exc) or not hasattr(self.agent_loop, "submit_direct"):
-                raise
-            result = await self.agent_loop.submit_direct(request.task_text, **direct_kwargs)
-        return {
-            "case_id": request.case_id,
-            "arm": request.arm,
-            "session_id": result.session_id,
-            "run_id": result.run_id,
-            "task_text": request.task_text,
-            "finish_reason": result.finish_reason,
-            "final_answer": result.output_text,
-            "tool_calls": list(replay_executor.traces),
-            "artifacts": [],
-            "side_effects": _side_effects_from_traces(replay_executor.traces),
-        }
+            try:
+                result = await target_loop.process_direct(request.task_text, **direct_kwargs)
+            except RuntimeError as exc:
+                if not _is_process_direct_disabled_while_running(exc) or not hasattr(target_loop, "submit_direct"):
+                    raise
+                result = await target_loop.submit_direct(request.task_text, **direct_kwargs)
+            session_manager = getattr(loaded, "session_manager", None)
+            if session_manager is not None and hasattr(session_manager, "end_session"):
+                session_manager.end_session(result.session_id, "evaluation_complete")
+            return {
+                "case_id": request.case_id,
+                "arm": request.arm,
+                "session_id": result.session_id,
+                "run_id": result.run_id,
+                "task_text": request.task_text,
+                "finish_reason": result.finish_reason,
+                "final_answer": result.output_text,
+                "tool_calls": list(replay_executor.traces),
+                "artifacts": [],
+                "side_effects": _side_effects_from_traces(replay_executor.traces),
+            }
+        finally:
+            if target_loop is not self.agent_loop and hasattr(target_loop, "close"):
+                mcp_manager = getattr(loaded, "mcp_manager", None)
+                if mcp_manager is not None and hasattr(mcp_manager, "close"):
+                    try:
+                        await mcp_manager.close()
+                    finally:
+                        closeables = getattr(loaded, "closeables", None)
+                        if isinstance(closeables, list):
+                            loaded.closeables = [
+                                (name, close_fn)
+                                for name, close_fn in closeables
+                                if name != "mcp_manager"
+                            ]
+                target_loop.close()


 def _is_process_direct_disabled_while_running(exc: RuntimeError) -> bool: