From 33a98455660f6151beb8be96c101d23648e119e5 Mon Sep 17 00:00:00 2001 From: steven_li Date: Wed, 27 May 2026 13:37:06 +0800 Subject: [PATCH] =?UTF-8?q?```=20feat(engine):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=8A=80=E8=83=BD=E6=9F=A5=E7=9C=8B=E5=B7=A5=E5=85=B7=E5=B9=B6?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=BC=82=E6=AD=A5=E4=BB=BB=E5=8A=A1=E7=AE=A1?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加SkillViewTool到引擎加载器中,增强技能管理功能 - 在AgentLoop中引入_active_direct_task来跟踪活跃任务 - 实现直接任务执行时的同步处理逻辑 - 更新工具实例化方式以支持依赖注入 feat(config): 增加智能体运行时参数配置支持 - 扩展AgentDefaultsConfig添加max_tokens和temperature字段 - 实现配置解析函数_first_config_value处理多个配置源 - 支持通过Web API动态更新智能体运行时参数 - 添加前端页面配置表单和验证逻辑 refactor(provider): 统一最大令牌数参数类型为可选整型 - 将所有LLM提供者的max_tokens参数改为int | None类型 - 为AnthropicProvider实现模型特定的最大令牌数默认值 - 调整参数传递逻辑,优先级:调用参数 > 配置文件 > 模型默认值 - 移除硬编码的默认值,改用条件判断 feat(process): 增强事件投影功能 - 添加工具调用开始/结束事件的映射逻辑 - 实现技能激活事件的识别和展示 - 添加辅助函数处理工具调用名称和参数提取 - 优化运行记录关联逻辑,提升事件匹配准确性 fix(web): 更新网络请求客户端信任环境设置 - 将WebFetchTool和WebSearchTool的trust_env参数设为True - 确保HTTP客户端能够正确使用系统代理配置 - 修复可能的网络连接问题 test: 添加配置加载和事件投影相关测试 - 新增智能体默认参数配置测试用例 - 实现API配置持久化和重载测试 - 添加技能卡片和工具事件的投影测试 ``` --- app-instance/backend/agents/registry.json | 145 +++++++++++++ app-instance/backend/beaver/engine/loader.py | 22 +- app-instance/backend/beaver/engine/loop.py | 14 +- .../beaver/engine/providers/anthropic.py | 20 +- .../backend/beaver/engine/providers/base.py | 2 +- .../backend/beaver/engine/providers/chain.py | 4 +- .../backend/beaver/engine/providers/codex.py | 2 +- .../backend/beaver/engine/providers/custom.py | 5 +- .../beaver/engine/providers/litellm.py | 5 +- .../beaver/foundation/config/loader.py | 26 ++- .../beaver/foundation/config/schema.py | 2 + .../backend/beaver/interfaces/web/app.py | 34 ++++ .../beaver/interfaces/web/schemas/__init__.py | 4 + .../beaver/interfaces/web/schemas/chat.py | 14 ++ .../backend/beaver/services/agent_service.py | 8 + .../beaver/services/process_service.py | 134 +++++++++++- .../backend/beaver/tools/builtins/web.py | 4 +- .../backend/tests/unit/test_agent_loop.py | 47 +++++ .../backend/tests/unit/test_config_loader.py | 86 +++++++- .../unit/test_initial_skill_tool_hints.py | 58 ++++++ .../tests/unit/test_max_tokens_defaults.py | 64 ++++++ .../tests/unit/test_process_projection.py | 125 ++++++++++++ .../backend/tests/unit/test_web_tools.py | 44 ++++ .../frontend/app/(app)/status/page.tsx | 100 ++++++++- .../app/(app)/tasks/[taskId]/page.tsx | 68 +++---- .../task-detail/TaskAcceptanceCard.tsx | 60 ++++-- .../components/task-detail/TaskLiveHeader.tsx | 14 +- .../components/task-detail/TaskSideRail.tsx | 20 ++ .../components/task-detail/TaskTimeline.tsx | 8 +- .../task-detail/TaskTimelineCard.tsx | 79 +++++++- app-instance/frontend/lib/api.ts | 8 + .../frontend/lib/task-detail-refresh.test.ts | 37 ++++ .../frontend/lib/task-detail-refresh.ts | 18 ++ .../frontend/lib/task-timeline.test.ts | 172 ++++++++++++++++ app-instance/frontend/lib/task-timeline.ts | 133 +++++++++++- app-instance/frontend/types/index.ts | 9 +- scripts/deploy-initial-skills.sh | 61 ++++++ skills/_index/disabled.json | 3 + skills/_index/published.json | 13 ++ skills/cron-scheduler/current.json | 3 + skills/cron-scheduler/skill.json | 13 ++ skills/cron-scheduler/versions/v0001/SKILL.md | 34 ++++ .../versions/v0001/version.json | 22 ++ skills/filesystem-operation/current.json | 3 + skills/filesystem-operation/skill.json | 13 ++ .../versions/v0001/SKILL.md | 50 +++++ .../versions/v0001/version.json | 22 ++ skills/memory-management/current.json | 3 + skills/memory-management/skill.json | 13 ++ .../memory-management/versions/v0001/SKILL.md | 32 +++ .../versions/v0001/version.json | 22 ++ skills/officebench-mcp/current.json | 4 + skills/officebench-mcp/skill.json | 21 ++ .../officebench-mcp/versions/v0001/SKILL.md | 190 ++++++++++++++++++ .../versions/v0001/version.json | 80 ++++++++ skills/outlook-mail/current.json | 3 + skills/outlook-mail/skill.json | 13 ++ skills/outlook-mail/versions/v0001/SKILL.md | 150 ++++++++++++++ .../outlook-mail/versions/v0001/version.json | 22 ++ skills/skills-admin/current.json | 3 + skills/skills-admin/skill.json | 13 ++ skills/skills-admin/versions/v0001/SKILL.md | 42 ++++ .../skills-admin/versions/v0001/version.json | 22 ++ skills/terminal-operation/current.json | 3 + skills/terminal-operation/skill.json | 13 ++ .../versions/v0001/SKILL.md | 46 +++++ .../versions/v0001/version.json | 22 ++ skills/utility-tools/current.json | 3 + skills/utility-tools/skill.json | 13 ++ skills/utility-tools/versions/v0001/SKILL.md | 52 +++++ .../utility-tools/versions/v0001/version.json | 22 ++ skills/web-operation/current.json | 3 + skills/web-operation/skill.json | 13 ++ skills/web-operation/versions/v0001/SKILL.md | 36 ++++ .../web-operation/versions/v0001/version.json | 22 ++ 75 files changed, 2599 insertions(+), 114 deletions(-) create mode 100644 app-instance/backend/agents/registry.json create mode 100644 app-instance/backend/tests/unit/test_agent_loop.py create mode 100644 app-instance/backend/tests/unit/test_initial_skill_tool_hints.py create mode 100644 app-instance/backend/tests/unit/test_max_tokens_defaults.py create mode 100644 app-instance/backend/tests/unit/test_web_tools.py create mode 100644 app-instance/frontend/lib/task-detail-refresh.test.ts create mode 100644 app-instance/frontend/lib/task-detail-refresh.ts create mode 100644 scripts/deploy-initial-skills.sh create mode 100644 skills/_index/disabled.json create mode 100644 skills/_index/published.json create mode 100644 skills/cron-scheduler/current.json create mode 100644 skills/cron-scheduler/skill.json create mode 100644 skills/cron-scheduler/versions/v0001/SKILL.md create mode 100644 skills/cron-scheduler/versions/v0001/version.json create mode 100644 skills/filesystem-operation/current.json create mode 100644 skills/filesystem-operation/skill.json create mode 100644 skills/filesystem-operation/versions/v0001/SKILL.md create mode 100644 skills/filesystem-operation/versions/v0001/version.json create mode 100644 skills/memory-management/current.json create mode 100644 skills/memory-management/skill.json create mode 100644 skills/memory-management/versions/v0001/SKILL.md create mode 100644 skills/memory-management/versions/v0001/version.json create mode 100644 skills/officebench-mcp/current.json create mode 100644 skills/officebench-mcp/skill.json create mode 100644 skills/officebench-mcp/versions/v0001/SKILL.md create mode 100644 skills/officebench-mcp/versions/v0001/version.json create mode 100644 skills/outlook-mail/current.json create mode 100644 skills/outlook-mail/skill.json create mode 100644 skills/outlook-mail/versions/v0001/SKILL.md create mode 100644 skills/outlook-mail/versions/v0001/version.json create mode 100644 skills/skills-admin/current.json create mode 100644 skills/skills-admin/skill.json create mode 100644 skills/skills-admin/versions/v0001/SKILL.md create mode 100644 skills/skills-admin/versions/v0001/version.json create mode 100644 skills/terminal-operation/current.json create mode 100644 skills/terminal-operation/skill.json create mode 100644 skills/terminal-operation/versions/v0001/SKILL.md create mode 100644 skills/terminal-operation/versions/v0001/version.json create mode 100644 skills/utility-tools/current.json create mode 100644 skills/utility-tools/skill.json create mode 100644 skills/utility-tools/versions/v0001/SKILL.md create mode 100644 skills/utility-tools/versions/v0001/version.json create mode 100644 skills/web-operation/current.json create mode 100644 skills/web-operation/skill.json create mode 100644 skills/web-operation/versions/v0001/SKILL.md create mode 100644 skills/web-operation/versions/v0001/version.json diff --git a/app-instance/backend/agents/registry.json b/app-instance/backend/agents/registry.json new file mode 100644 index 0000000..2d58775 --- /dev/null +++ b/app-instance/backend/agents/registry.json @@ -0,0 +1,145 @@ +{ + "agents": [ + { + "agent_id": "researcher", + "capabilities": [ + "research", + "analysis", + "source review", + "requirements" + ], + "created_at": "2026-05-27T05:25:11.756341+00:00", + "description": "Finds facts, references, constraints, and implementation options.", + "display_name": "Researcher", + "metadata": {}, + "model": null, + "name": "researcher", + "priority": 50, + "provider_name": null, + "role": "research", + "skill_names": [], + "source": "builtin", + "status": "active", + "system_prompt": "You are a research specialist. Gather concise evidence and tradeoffs for the parent task.", + "tags": [ + "planning", + "research" + ], + "tool_hints": [], + "updated_at": "2026-05-27T05:25:11.756349+00:00" + }, + { + "agent_id": "implementer", + "capabilities": [ + "implementation", + "coding", + "refactor", + "integration" + ], + "created_at": "2026-05-27T05:25:11.756351+00:00", + "description": "Builds scoped implementation slices and proposes concrete changes.", + "display_name": "Implementer", + "metadata": {}, + "model": null, + "name": "implementer", + "priority": 45, + "provider_name": null, + "role": "implementation", + "skill_names": [], + "source": "builtin", + "status": "active", + "system_prompt": "You are an implementation specialist. Produce practical, scoped implementation output.", + "tags": [ + "coding", + "build" + ], + "tool_hints": [], + "updated_at": "2026-05-27T05:25:11.756353+00:00" + }, + { + "agent_id": "reviewer", + "capabilities": [ + "review", + "quality", + "risk", + "verification" + ], + "created_at": "2026-05-27T05:25:11.756355+00:00", + "description": "Reviews plans, code, outputs, and risks before final synthesis.", + "display_name": "Reviewer", + "metadata": {}, + "model": null, + "name": "reviewer", + "priority": 45, + "provider_name": null, + "role": "review", + "skill_names": [], + "source": "builtin", + "status": "active", + "system_prompt": "You are a review specialist. Focus on defects, missing requirements, and risks.", + "tags": [ + "review", + "quality" + ], + "tool_hints": [], + "updated_at": "2026-05-27T05:25:11.756356+00:00" + }, + { + "agent_id": "tester", + "capabilities": [ + "testing", + "verification", + "regression", + "qa" + ], + "created_at": "2026-05-27T05:25:11.756358+00:00", + "description": "Designs and executes verification checks for task outputs.", + "display_name": "Tester", + "metadata": {}, + "model": null, + "name": "tester", + "priority": 40, + "provider_name": null, + "role": "testing", + "skill_names": [], + "source": "builtin", + "status": "active", + "system_prompt": "You are a testing specialist. Identify focused checks and report pass/fail evidence.", + "tags": [ + "test", + "quality" + ], + "tool_hints": [], + "updated_at": "2026-05-27T05:25:11.756358+00:00" + }, + { + "agent_id": "documenter", + "capabilities": [ + "documentation", + "explanation", + "migration notes", + "release notes" + ], + "created_at": "2026-05-27T05:25:11.756360+00:00", + "description": "Writes and reconciles user-facing and internal documentation updates.", + "display_name": "Documenter", + "metadata": {}, + "model": null, + "name": "documenter", + "priority": 35, + "provider_name": null, + "role": "documentation", + "skill_names": [], + "source": "builtin", + "status": "active", + "system_prompt": "You are a documentation specialist. Produce concise docs aligned with the implementation.", + "tags": [ + "docs", + "communication" + ], + "tool_hints": [], + "updated_at": "2026-05-27T05:25:11.756360+00:00" + } + ], + "version": 1 +} diff --git a/app-instance/backend/beaver/engine/loader.py b/app-instance/backend/beaver/engine/loader.py index e12e14d..09a582c 100644 --- a/app-instance/backend/beaver/engine/loader.py +++ b/app-instance/backend/beaver/engine/loader.py @@ -44,6 +44,7 @@ from beaver.tools.builtins import ( SpawnTool, SessionSearchTool, SkillManageTool, + SkillViewTool, SkillsListTool, TerminalTool, TodoTool, @@ -220,16 +221,17 @@ class EngineLoader: ObjectBackedTool(WriteFileTool()), ObjectBackedTool(PatchFileTool()), ObjectBackedTool(WebFetchTool()), - ObjectBackedTool(WebSearchTool()), - ObjectBackedTool(TerminalTool()), - ObjectBackedTool(ProcessTool()), - ObjectBackedTool(ExecuteCodeTool()), - ObjectBackedTool(TodoTool()), - ObjectBackedTool(ClarifyTool()), - ObjectBackedTool(SendMessageTool()), - ObjectBackedTool(DelegateTool()), - ObjectBackedTool(SpawnTool()), - SkillsListTool(), + ObjectBackedTool(WebSearchTool()), + ObjectBackedTool(TerminalTool()), + ObjectBackedTool(ProcessTool()), + ObjectBackedTool(ExecuteCodeTool()), + ObjectBackedTool(TodoTool()), + ObjectBackedTool(ClarifyTool()), + ObjectBackedTool(SendMessageTool()), + ObjectBackedTool(DelegateTool()), + ObjectBackedTool(SpawnTool()), + SkillsListTool(), + ObjectBackedTool(SkillViewTool(loader=skills_loader)), SkillManageTool(), CronTool(), ] diff --git a/app-instance/backend/beaver/engine/loop.py b/app-instance/backend/beaver/engine/loop.py index 6749a0b..4e612fb 100644 --- a/app-instance/backend/beaver/engine/loop.py +++ b/app-instance/backend/beaver/engine/loop.py @@ -48,7 +48,7 @@ class AgentProfile: name: str = "default" system_prompt: str = "" default_model: str = "gpt-4.1-mini" - max_tokens: int = 4096 + max_tokens: int | None = None max_context_messages: int = 1000 temperature: float = 0.2 max_tool_iterations: int = 30 @@ -89,6 +89,7 @@ class AgentLoop: self.loaded: EngineLoadResult | None = None self.runtime_services: dict[str, Any] = {} self._run_queue: asyncio.Queue[_DirectRunRequest | None] | None = None + self._active_direct_task: asyncio.Task[Any] | None = None self._running = False self._stop_requested = False @@ -130,6 +131,8 @@ class AgentLoop: if item.future.cancelled(): continue + previous_direct_task = self._active_direct_task + self._active_direct_task = asyncio.current_task() try: result = await self._process_direct_impl(item.task, **item.kwargs) except asyncio.CancelledError: @@ -142,6 +145,8 @@ class AgentLoop: else: if not item.future.done(): item.future.set_result(result) + finally: + self._active_direct_task = previous_direct_task finally: if self._run_queue is not None: while True: @@ -183,6 +188,9 @@ class AgentLoop: if self._stop_requested: raise RuntimeError("AgentLoop.submit_direct() is not accepting new tasks after stop()") + if asyncio.current_task() is self._active_direct_task: + return await self._process_direct_impl(task, **kwargs) + future: asyncio.Future[AgentRunResult] = asyncio.get_running_loop().create_future() await self._run_queue.put(_DirectRunRequest(task=task, kwargs=dict(kwargs), future=future)) return await future @@ -363,7 +371,7 @@ class AgentLoop: resolved_request_timeout_seconds = configured_provider.get("request_timeout_seconds") resolved_embedding_model = embedding_model or config.default_embedding_model resolved_embedding_target = embedding_target or config.resolve_embedding_target() - resolved_max_tokens = max_tokens or self.profile.max_tokens + resolved_max_tokens = self.profile.max_tokens if max_tokens is None else max_tokens resolved_temperature = self.profile.temperature if temperature is None else temperature resolved_max_tool_iterations = ( self.profile.max_tool_iterations if max_tool_iterations is None else max_tool_iterations @@ -892,7 +900,7 @@ class AgentLoop: provider: Any, messages: list[dict[str, Any]], model: str, - max_tokens: int, + max_tokens: int | None, temperature: float, thinking_enabled: bool | None, ) -> str: diff --git a/app-instance/backend/beaver/engine/providers/anthropic.py b/app-instance/backend/beaver/engine/providers/anthropic.py index a7a9a65..2822a4f 100644 --- a/app-instance/backend/beaver/engine/providers/anthropic.py +++ b/app-instance/backend/beaver/engine/providers/anthropic.py @@ -43,7 +43,7 @@ class AnthropicProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: @@ -57,9 +57,14 @@ class AnthropicProvider(LLMProvider): "model": model or self.default_model, "system": system_prompt or "", "messages": anthropic_messages, - "max_tokens": max(1, max_tokens), "temperature": temperature, } + resolved_max_tokens = ( + _default_max_tokens_for_model(model or self.default_model) + if max_tokens is None + else max(1, max_tokens) + ) + kwargs["max_tokens"] = resolved_max_tokens if tools: kwargs["tools"] = _convert_tools(tools) @@ -100,6 +105,17 @@ class AnthropicProvider(LLMProvider): return self.default_model +def _default_max_tokens_for_model(model: str) -> int: + """Return a conservative native output ceiling for Anthropic Messages.""" + + normalized = model.lower().replace("_", "-") + if "sonnet-4" in normalized or "opus-4" in normalized or "3-7" in normalized or "3.7" in normalized: + return 64_000 + if "haiku" in normalized: + return 4_096 + return 8_192 + + def _convert_messages(messages: list[dict[str, Any]]) -> tuple[str, list[dict[str, Any]]]: system_prompt = "" converted: list[dict[str, Any]] = [] diff --git a/app-instance/backend/beaver/engine/providers/base.py b/app-instance/backend/beaver/engine/providers/base.py index 10dcb65..77f91fa 100644 --- a/app-instance/backend/beaver/engine/providers/base.py +++ b/app-instance/backend/beaver/engine/providers/base.py @@ -88,7 +88,7 @@ class LLMProvider(ABC): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: diff --git a/app-instance/backend/beaver/engine/providers/chain.py b/app-instance/backend/beaver/engine/providers/chain.py index 0830f1d..9bfd27b 100644 --- a/app-instance/backend/beaver/engine/providers/chain.py +++ b/app-instance/backend/beaver/engine/providers/chain.py @@ -56,7 +56,7 @@ class FallbackProviderChain(LLMProvider): messages: list[dict], tools: list[dict] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: @@ -115,7 +115,7 @@ class FallbackProviderChain(LLMProvider): messages: list[dict], tools: list[dict] | None, model: str, - max_tokens: int, + max_tokens: int | None, temperature: float, thinking_enabled: bool | None, ) -> LLMResponse: diff --git a/app-instance/backend/beaver/engine/providers/codex.py b/app-instance/backend/beaver/engine/providers/codex.py index 7d773ad..025b105 100644 --- a/app-instance/backend/beaver/engine/providers/codex.py +++ b/app-instance/backend/beaver/engine/providers/codex.py @@ -39,7 +39,7 @@ class OpenAICodexProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: diff --git a/app-instance/backend/beaver/engine/providers/custom.py b/app-instance/backend/beaver/engine/providers/custom.py index 9222feb..47e1c8e 100644 --- a/app-instance/backend/beaver/engine/providers/custom.py +++ b/app-instance/backend/beaver/engine/providers/custom.py @@ -47,7 +47,7 @@ class CustomProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: @@ -55,9 +55,10 @@ class CustomProvider(LLMProvider): kwargs: dict[str, Any] = { "model": model or self.default_model, "messages": self.sanitize_empty_content(messages), - "max_tokens": max(1, max_tokens), "temperature": temperature, } + if max_tokens is not None: + kwargs["max_tokens"] = max(1, max_tokens) if tools: kwargs.update(tools=tools, tool_choice="auto") try: diff --git a/app-instance/backend/beaver/engine/providers/litellm.py b/app-instance/backend/beaver/engine/providers/litellm.py index bcd8fde..74a5d3c 100644 --- a/app-instance/backend/beaver/engine/providers/litellm.py +++ b/app-instance/backend/beaver/engine/providers/litellm.py @@ -197,7 +197,7 @@ class LiteLLMProvider(LLMProvider): messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, model: str | None = None, - max_tokens: int = 4096, + max_tokens: int | None = None, temperature: float = 0.7, thinking_enabled: bool | None = None, ) -> LLMResponse: @@ -210,10 +210,11 @@ class LiteLLMProvider(LLMProvider): kwargs: dict[str, Any] = { "model": resolved_model, "messages": sanitized_messages, - "max_tokens": max(1, max_tokens), "temperature": temperature, "timeout": self.request_timeout_seconds or 45.0, } + if max_tokens is not None: + kwargs["max_tokens"] = max(1, max_tokens) if self.api_key: kwargs["api_key"] = self.api_key if self.api_base: diff --git a/app-instance/backend/beaver/foundation/config/loader.py b/app-instance/backend/beaver/foundation/config/loader.py index 19aa4a2..3e7a6d4 100644 --- a/app-instance/backend/beaver/foundation/config/loader.py +++ b/app-instance/backend/beaver/foundation/config/loader.py @@ -86,18 +86,25 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig: model=_string(defaults.get("model") or data.get("model")), provider=_string(defaults.get("provider") or data.get("provider")), embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")), + max_tokens=_int(_first_config_value( + defaults.get("maxTokens"), + defaults.get("max_tokens"), + data.get("maxTokens"), + data.get("max_tokens"), + )), + temperature=_float(_first_config_value(defaults.get("temperature"), data.get("temperature"))), max_context_messages=_int( defaults.get("maxContextMessages") or defaults.get("max_context_messages") or data.get("maxContextMessages") or data.get("max_context_messages") ), - max_tool_iterations=_int( - defaults.get("maxToolIterations") - or defaults.get("max_tool_iterations") - or data.get("maxToolIterations") - or data.get("max_tool_iterations") - ), + max_tool_iterations=_int(_first_config_value( + defaults.get("maxToolIterations"), + defaults.get("max_tool_iterations"), + data.get("maxToolIterations"), + data.get("max_tool_iterations"), + )), ) @@ -204,6 +211,13 @@ def _as_dict(value: Any) -> dict[str, Any]: return value if isinstance(value, dict) else {} +def _first_config_value(*values: Any) -> Any: + for value in values: + if value not in (None, ""): + return value + return None + + def _string(value: Any) -> str | None: if value is None: return None diff --git a/app-instance/backend/beaver/foundation/config/schema.py b/app-instance/backend/beaver/foundation/config/schema.py index 438068d..7062183 100644 --- a/app-instance/backend/beaver/foundation/config/schema.py +++ b/app-instance/backend/beaver/foundation/config/schema.py @@ -25,6 +25,8 @@ class AgentDefaultsConfig: model: str | None = None provider: str | None = None embedding_model: str | None = None + max_tokens: int | None = None + temperature: float | None = None max_context_messages: int | None = None max_tool_iterations: int | None = None diff --git a/app-instance/backend/beaver/interfaces/web/app.py b/app-instance/backend/beaver/interfaces/web/app.py index 1efafa8..ce8e188 100644 --- a/app-instance/backend/beaver/interfaces/web/app.py +++ b/app-instance/backend/beaver/interfaces/web/app.py @@ -51,6 +51,8 @@ from .schemas import ( WebChatRequest, WebChatResponse, WebErrorResponse, + WebAgentConfigRequest, + WebAgentConfigResponse, WebProviderConfigRequest, WebProviderConfigResponse, WebStatusResponse, @@ -595,6 +597,38 @@ def create_app( _reload_agent_config(agent_service, config_path) return WebProviderConfigResponse(ok=True, provider=spec.name, enabled=payload.enabled) + @app.post("/api/agent-config", response_model=WebAgentConfigResponse) + async def update_agent_config( + request: Request, + payload: WebAgentConfigRequest, + ) -> WebAgentConfigResponse: + if payload.max_tokens is not None and payload.max_tokens <= 0: + raise HTTPException(status_code=400, detail="max_tokens must be a positive integer or null") + if payload.temperature < 0 or payload.temperature > 2: + raise HTTPException(status_code=400, detail="temperature must be between 0 and 2") + if payload.max_tool_iterations < 0: + raise HTTPException(status_code=400, detail="max_tool_iterations must be zero or greater") + + agent_service = get_agent_service(request) + config_path = agent_service.loader.config.config_path or default_config_path(workspace=agent_service.loader.workspace) + raw = _read_config_json(config_path) + agents = _ensure_dict(raw, "agents") + defaults = _ensure_dict(agents, "defaults") + + if payload.max_tokens is None: + defaults.pop("maxTokens", None) + defaults.pop("max_tokens", None) + else: + defaults["maxTokens"] = payload.max_tokens + defaults.pop("max_tokens", None) + defaults["temperature"] = payload.temperature + defaults["maxToolIterations"] = payload.max_tool_iterations + defaults.pop("max_tool_iterations", None) + + _write_config_json(config_path, raw) + _reload_agent_config(agent_service, config_path) + return WebAgentConfigResponse(ok=True) + @app.get("/api/sessions") async def list_sessions(request: Request) -> list[dict[str, Any]]: loaded = get_agent_service(request).create_loop().boot() diff --git a/app-instance/backend/beaver/interfaces/web/schemas/__init__.py b/app-instance/backend/beaver/interfaces/web/schemas/__init__.py index 48d2d5b..150ef10 100644 --- a/app-instance/backend/beaver/interfaces/web/schemas/__init__.py +++ b/app-instance/backend/beaver/interfaces/web/schemas/__init__.py @@ -8,6 +8,8 @@ from .chat import ( WebChatRequest, WebChatResponse, WebErrorResponse, + WebAgentConfigRequest, + WebAgentConfigResponse, WebProviderConfigRequest, WebProviderConfigResponse, WebProviderTarget, @@ -22,6 +24,8 @@ __all__ = [ "WebChatRequest", "WebChatResponse", "WebErrorResponse", + "WebAgentConfigRequest", + "WebAgentConfigResponse", "WebProviderConfigRequest", "WebProviderConfigResponse", "WebProviderTarget", diff --git a/app-instance/backend/beaver/interfaces/web/schemas/chat.py b/app-instance/backend/beaver/interfaces/web/schemas/chat.py index c5127a4..bd6cd5d 100644 --- a/app-instance/backend/beaver/interfaces/web/schemas/chat.py +++ b/app-instance/backend/beaver/interfaces/web/schemas/chat.py @@ -139,6 +139,20 @@ class WebProviderConfigResponse(BaseModel): enabled: bool +class WebAgentConfigRequest(BaseModel): + """Agent runtime defaults update from the settings page.""" + + max_tokens: int | None = None + temperature: float + max_tool_iterations: int + + +class WebAgentConfigResponse(BaseModel): + """Agent runtime defaults update result.""" + + ok: bool + + class WebStatusResponse(BaseModel): """Web 宿主层状态响应。""" diff --git a/app-instance/backend/beaver/services/agent_service.py b/app-instance/backend/beaver/services/agent_service.py index a8331bf..842c4b5 100644 --- a/app-instance/backend/beaver/services/agent_service.py +++ b/app-instance/backend/beaver/services/agent_service.py @@ -68,6 +68,14 @@ class AgentService: def _apply_configured_profile_defaults(self) -> None: defaults = self.loader.config.agents_defaults + self.profile.max_tokens = None + self.profile.temperature = 0.2 + self.profile.max_context_messages = 1000 + self.profile.max_tool_iterations = 30 + if defaults.max_tokens is not None: + self.profile.max_tokens = max(1, defaults.max_tokens) + if defaults.temperature is not None: + self.profile.temperature = defaults.temperature if defaults.max_context_messages is not None: self.profile.max_context_messages = max(1, defaults.max_context_messages) if defaults.max_tool_iterations is not None: diff --git a/app-instance/backend/beaver/services/process_service.py b/app-instance/backend/beaver/services/process_service.py index dc79ab1..6a0c5a0 100644 --- a/app-instance/backend/beaver/services/process_service.py +++ b/app-instance/backend/beaver/services/process_service.py @@ -50,10 +50,11 @@ class SessionProcessProjector: for record in records: payload = dict(record.event_payload or {}) - task_id = payload.get("task_id") + run_record_for_event = run_records.get(str(record.run_id)) if record.run_id else None + task_id = payload.get("task_id") or getattr(run_record_for_event, "task_id", None) if not task_id: continue - attempt_index = int(payload.get("attempt_index") or 1) + attempt_index = int(payload.get("attempt_index") or getattr(run_record_for_event, "attempt_index", None) or 1) root_run_id = f"task:{task_id}:attempt:{attempt_index}" created_at = _timestamp(record.timestamp) root = runs.setdefault( @@ -73,7 +74,61 @@ class SessionProcessProjector: }, ) - if record.event_type == "task_execution_planned": + if record.event_type == "assistant_message_added" and record.tool_calls: + run_id = record.run_id or root_run_id + parent_run_id = root_run_id if run_id != root_run_id else None + for index, tool_call in enumerate(record.tool_calls): + if not isinstance(tool_call, dict): + continue + tool_name = _tool_call_name(tool_call) + add_event( + event_id=f"{_event_id(record, 'tool-call')}:{index}", + run_id=run_id, + parent_run_id=parent_run_id, + kind="tool_call_started", + actor_type="tool", + actor_id=tool_name, + actor_name=tool_name, + text=f"Calling tool: {tool_name}.", + created_at=created_at, + status="running", + metadata={ + "task_id": task_id, + "attempt_index": attempt_index, + "timeline_type": "tool_call", + "tool_name": tool_name, + "tool_call_id": tool_call.get("id"), + "arguments": _tool_call_arguments(tool_call), + }, + ) + + elif record.event_type == "tool_result_recorded": + run_id = record.run_id or root_run_id + parent_run_id = root_run_id if run_id != root_run_id else None + tool_name = str(record.tool_name or payload.get("tool_name") or "tool") + add_event( + event_id=_event_id(record, "tool-result"), + run_id=run_id, + parent_run_id=parent_run_id, + kind="tool_call_finished", + actor_type="tool", + actor_id=tool_name, + actor_name=tool_name, + text=_truncate(str(record.content or payload.get("error") or "")), + created_at=created_at, + status="done" if payload.get("success", True) else "error", + metadata={ + **dict(payload), + "task_id": task_id, + "attempt_index": attempt_index, + "timeline_type": "tool_result", + "tool_name": tool_name, + "tool_call_id": record.tool_call_id, + "result_summary": _truncate(str(record.content or payload.get("error") or "")), + }, + ) + + elif record.event_type == "task_execution_planned": plan_mode = payload.get("plan_mode") or "single" strategy = payload.get("strategy") or "single" node_ids = payload.get("node_ids") or [] @@ -241,6 +296,7 @@ class SessionProcessProjector: main_run_id = str(payload.get("main_run_id") or "") if main_run_id: run_record = run_records.get(main_run_id) + activated_skill_names = _activated_skill_names(run_record) runs[main_run_id] = { "run_id": main_run_id, "parent_run_id": root_run_id, @@ -254,8 +310,32 @@ class SessionProcessProjector: "started_at": run_record.started_at if run_record is not None else created_at, "finished_at": run_record.ended_at if run_record is not None else created_at, "summary": _truncate(run_record.task_text if run_record is not None else ""), - "metadata": {"task_id": task_id, "attempt_index": attempt_index}, + "metadata": { + "task_id": task_id, + "attempt_index": attempt_index, + "skill_names": activated_skill_names, + }, } + if activated_skill_names: + add_event( + event_id=_event_id(record, "synthesis-skills"), + run_id=main_run_id, + parent_run_id=root_run_id, + kind="skill_selected", + actor_type="system", + actor_id="skill-selector", + actor_name="Skill Selector", + text=f"Selected skill guidance: {', '.join(activated_skill_names)}.", + created_at=created_at, + status="done", + metadata={ + "task_id": task_id, + "attempt_index": attempt_index, + "timeline_type": "skill", + "skill_names": activated_skill_names, + "activation_reasons": _activated_skill_reasons(run_record), + }, + ) add_event( event_id=_event_id(record, "synthesis"), run_id=main_run_id, @@ -335,3 +415,49 @@ def _truncate(text: str, limit: int = 800) -> str: if len(cleaned) <= limit: return cleaned return cleaned[: limit - 1] + "..." + + +def _activated_skill_names(run_record: Any | None) -> list[str]: + if run_record is None: + return [] + names = [] + for receipt in getattr(run_record, "activated_skills", []) or []: + skill_name = str(getattr(receipt, "skill_name", "") or "").strip() + if skill_name: + names.append(skill_name) + return list(dict.fromkeys(names)) + + +def _activated_skill_reasons(run_record: Any | None) -> list[str]: + if run_record is None: + return [] + reasons = [] + for receipt in getattr(run_record, "activated_skills", []) or []: + reason = str(getattr(receipt, "activation_reason", "") or "").strip() + if reason: + reasons.append(reason) + return reasons + + +def _tool_call_name(tool_call: dict[str, Any]) -> str: + function_payload = tool_call.get("function") + if isinstance(function_payload, dict): + name = function_payload.get("name") + if name: + return str(name) + for key in ("name", "tool_name"): + value = tool_call.get(key) + if value: + return str(value) + return "tool" + + +def _tool_call_arguments(tool_call: dict[str, Any]) -> Any: + function_payload = tool_call.get("function") + if isinstance(function_payload, dict) and "arguments" in function_payload: + return function_payload.get("arguments") + if "arguments" in tool_call: + return tool_call.get("arguments") + if "args" in tool_call: + return tool_call.get("args") + return None diff --git a/app-instance/backend/beaver/tools/builtins/web.py b/app-instance/backend/beaver/tools/builtins/web.py index cd37ddf..8b5d469 100644 --- a/app-instance/backend/beaver/tools/builtins/web.py +++ b/app-instance/backend/beaver/tools/builtins/web.py @@ -51,7 +51,7 @@ class WebFetchTool: try: safe_url = _safe_url(url) limit = max(1000, min(int(max_chars or 12000), 50000)) - async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client: + async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client: response = await client.get( safe_url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"}, @@ -96,7 +96,7 @@ class WebSearchTool: raise ValueError("query is required") bounded = max(1, min(int(limit or 5), 10)) url = f"https://duckduckgo.com/html/?q={quote_plus(query)}" - async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client: + async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client: response = await client.get(url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"}) response.raise_for_status() html = response.text diff --git a/app-instance/backend/tests/unit/test_agent_loop.py b/app-instance/backend/tests/unit/test_agent_loop.py new file mode 100644 index 0000000..ab48736 --- /dev/null +++ b/app-instance/backend/tests/unit/test_agent_loop.py @@ -0,0 +1,47 @@ +import asyncio +from contextlib import suppress +from typing import Any + +from beaver.engine import AgentLoop, AgentRunResult, EngineLoader + + +def _run_result(run_id: str, output_text: str) -> AgentRunResult: + return AgentRunResult( + session_id="web:test", + run_id=run_id, + output_text=output_text, + finish_reason="stop", + tool_iterations=0, + ) + + +def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None: + async def run_case() -> None: + loop = AgentLoop(loader=EngineLoader(workspace=tmp_path)) + calls: list[str] = [] + + async def fake_process_direct(task: str, **kwargs: Any) -> AgentRunResult: + calls.append(task) + if task == "outer": + return await loop.submit_direct("inner", session_id="web:test") + return _run_result(task, "inner completed") + + loop._process_direct_impl = fake_process_direct # type: ignore[method-assign] + + loop_task = asyncio.create_task(loop.run()) + await asyncio.sleep(0) + try: + result = await asyncio.wait_for(loop.submit_direct("outer", session_id="web:test"), timeout=1) + finally: + await loop.stop() + with suppress(asyncio.TimeoutError): + await asyncio.wait_for(loop_task, timeout=1) + if not loop_task.done(): + loop_task.cancel() + with suppress(asyncio.CancelledError): + await loop_task + + assert result.output_text == "inner completed" + assert calls == ["outer", "inner"] + + asyncio.run(run_case()) diff --git a/app-instance/backend/tests/unit/test_config_loader.py b/app-instance/backend/tests/unit/test_config_loader.py index 43ddb76..ec46fa4 100644 --- a/app-instance/backend/tests/unit/test_config_loader.py +++ b/app-instance/backend/tests/unit/test_config_loader.py @@ -1,10 +1,12 @@ import json +from fastapi.testclient import TestClient + from beaver.engine import AgentLoop, EngineLoader from beaver.engine.providers import make_provider_bundle from beaver.engine.providers.litellm import LiteLLMProvider from beaver.foundation.config import load_config -from beaver.interfaces.web.app import _reload_agent_config +from beaver.interfaces.web.app import create_app, _reload_agent_config from beaver.services.agent_service import AgentService @@ -161,6 +163,88 @@ def test_reload_agent_config_updates_booted_loop_config(tmp_path) -> None: service.close() +def test_agent_defaults_include_runtime_controls(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "agents": { + "defaults": { + "maxTokens": 12345, + "temperature": 0.4, + "maxToolIterations": 9, + } + } + } + ), + encoding="utf-8", + ) + + config = load_config(config_path=config_path) + service = AgentService(config_path=config_path) + + assert config.agents_defaults.max_tokens == 12345 + assert config.agents_defaults.temperature == 0.4 + assert config.agents_defaults.max_tool_iterations == 9 + assert service.profile.max_tokens == 12345 + assert service.profile.temperature == 0.4 + assert service.profile.max_tool_iterations == 9 + service.close() + + +def test_agent_config_api_persists_and_reloads_defaults(tmp_path) -> None: + config_path = tmp_path / "config.json" + config_path.write_text(json.dumps({"agents": {"defaults": {}}}), encoding="utf-8") + service = AgentService(config_path=config_path) + app = create_app(service=service, manage_service_lifecycle=False) + + with TestClient(app) as client: + response = client.post( + "/api/agent-config", + json={"max_tokens": 8192, "temperature": 0.6, "max_tool_iterations": 12}, + ) + status = client.get("/api/status") + + saved = json.loads(config_path.read_text(encoding="utf-8")) + defaults = saved["agents"]["defaults"] + + assert response.status_code == 200 + assert response.json() == {"ok": True} + assert defaults["maxTokens"] == 8192 + assert defaults["temperature"] == 0.6 + assert defaults["maxToolIterations"] == 12 + assert service.profile.max_tokens == 8192 + assert service.profile.temperature == 0.6 + assert service.profile.max_tool_iterations == 12 + assert status.json()["max_tokens"] == 8192 + assert status.json()["temperature"] == 0.6 + assert status.json()["max_tool_iterations"] == 12 + service.close() + + +def test_agent_config_api_accepts_zero_temperature_and_iterations(tmp_path) -> None: + config_path = tmp_path / "config.json" + service = AgentService(config_path=config_path) + app = create_app(service=service, manage_service_lifecycle=False) + + with TestClient(app) as client: + response = client.post( + "/api/agent-config", + json={"max_tokens": None, "temperature": 0, "max_tool_iterations": 0}, + ) + + config = load_config(config_path=config_path) + + assert response.status_code == 200 + assert config.agents_defaults.max_tokens is None + assert config.agents_defaults.temperature == 0 + assert config.agents_defaults.max_tool_iterations == 0 + assert service.profile.max_tokens is None + assert service.profile.temperature == 0 + assert service.profile.max_tool_iterations == 0 + service.close() + + def test_openai_compatible_qwen_config_keeps_openai_provider() -> None: bundle = make_provider_bundle( model="qwen-plus", diff --git a/app-instance/backend/tests/unit/test_initial_skill_tool_hints.py b/app-instance/backend/tests/unit/test_initial_skill_tool_hints.py new file mode 100644 index 0000000..7118893 --- /dev/null +++ b/app-instance/backend/tests/unit/test_initial_skill_tool_hints.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from beaver.engine import EngineLoader +from beaver.skills.catalog.utils import parse_frontmatter + + +REPO_ROOT = Path(__file__).resolve().parents[4] + +EXPECTED_INITIAL_SKILL_TOOLS = { + "cron-scheduler": ["cron"], + "filesystem-operation": ["read_file", "write_file", "patch_file", "search_files", "list_directory"], + "memory-management": ["memory"], + "outlook-mail": [ + "mcp_outlook_mcp_mail_list_folders", + "mcp_outlook_mcp_mail_list_messages", + "mcp_outlook_mcp_mail_search_messages", + "mcp_outlook_mcp_mail_get_message", + "mcp_outlook_mcp_mail_send_email", + "mcp_outlook_mcp_mail_reply_to_message", + "mcp_outlook_mcp_mail_forward_message", + "mcp_outlook_mcp_mail_move_message", + "mcp_outlook_mcp_mail_delta_sync", + "mcp_outlook_mcp_calendar_list_events", + "mcp_outlook_mcp_calendar_create_event", + "mcp_outlook_mcp_calendar_update_event", + "mcp_outlook_mcp_calendar_get_schedule", + "mcp_outlook_mcp_calendar_find_meeting_times", + "mcp_outlook_mcp_calendar_delta_sync", + ], + "skills-admin": ["skills_list", "skill_manage", "skill_view"], + "terminal-operation": ["terminal", "process", "execute_code"], + "utility-tools": ["clarify", "delegate", "send_message", "spawn", "todo"], + "web-operation": ["web_fetch", "web_search"], +} + + +def test_initial_skill_tool_hints_match_runtime_tool_names() -> None: + for skill_name, expected_tools in EXPECTED_INITIAL_SKILL_TOOLS.items(): + skill_dir = REPO_ROOT / "skills" / skill_name / "versions" / "v0001" + frontmatter, _body = parse_frontmatter((skill_dir / "SKILL.md").read_text(encoding="utf-8")) + version = json.loads((skill_dir / "version.json").read_text(encoding="utf-8")) + + assert frontmatter["tools"] == expected_tools + assert version["frontmatter"]["tools"] == expected_tools + assert version["tool_hints"] == expected_tools + + +def test_default_runtime_registers_skill_view_tool(tmp_path: Path) -> None: + loaded = EngineLoader(workspace=tmp_path).load() + try: + assert "skill_view" in loaded.tools + assert loaded.tool_registry is not None + assert loaded.tool_registry.get("skill_view") is not None + finally: + loaded.close() diff --git a/app-instance/backend/tests/unit/test_max_tokens_defaults.py b/app-instance/backend/tests/unit/test_max_tokens_defaults.py new file mode 100644 index 0000000..a45c385 --- /dev/null +++ b/app-instance/backend/tests/unit/test_max_tokens_defaults.py @@ -0,0 +1,64 @@ +import asyncio +from types import SimpleNamespace + +from beaver.engine.loop import AgentProfile +from beaver.engine.providers.anthropic import AnthropicProvider +from beaver.engine.providers.litellm import LiteLLMProvider + + +def test_agent_profile_uses_provider_output_default() -> None: + assert AgentProfile().max_tokens is None + + +def test_litellm_omits_max_tokens_when_unset(monkeypatch) -> None: + captured_kwargs: dict = {} + + async def fake_acompletion(**kwargs): + captured_kwargs.update(kwargs) + return SimpleNamespace( + choices=[ + SimpleNamespace( + message=SimpleNamespace(content="ok", tool_calls=[]), + finish_reason="stop", + ) + ], + usage=None, + ) + + monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion) + + async def run_case(): + provider = LiteLLMProvider(default_model="openai/gpt-test") + return await provider.chat(messages=[{"role": "user", "content": "hi"}], max_tokens=None) + + response = asyncio.run(run_case()) + + assert response.content == "ok" + assert "max_tokens" not in captured_kwargs + + +def test_anthropic_uses_model_output_ceiling_when_unset(monkeypatch) -> None: + captured_kwargs: dict = {} + + class FakeMessages: + async def create(self, **kwargs): + captured_kwargs.update(kwargs) + return SimpleNamespace( + content=[SimpleNamespace(type="text", text="ok")], + usage=None, + stop_reason="stop", + ) + + class FakeClient: + messages = FakeMessages() + + monkeypatch.setattr(AnthropicProvider, "_client_or_raise", lambda self: FakeClient()) + + async def run_case(): + provider = AnthropicProvider(default_model="claude-sonnet-4-5") + return await provider.chat(messages=[{"role": "user", "content": "hi"}], max_tokens=None) + + response = asyncio.run(run_case()) + + assert response.content == "ok" + assert captured_kwargs["max_tokens"] == 64_000 diff --git a/app-instance/backend/tests/unit/test_process_projection.py b/app-instance/backend/tests/unit/test_process_projection.py index c579b20..a28ee64 100644 --- a/app-instance/backend/tests/unit/test_process_projection.py +++ b/app-instance/backend/tests/unit/test_process_projection.py @@ -5,6 +5,7 @@ from pathlib import Path from beaver.engine.session import SessionManager from beaver.memory.runs import RunMemoryStore, RunRecord from beaver.services.process_service import SessionProcessProjector +from beaver.skills.specs import SkillActivationReceipt def test_process_projection_maps_task_team_events(tmp_path: Path) -> None: @@ -238,6 +239,130 @@ def test_process_projection_uses_normalized_plan_metadata_defaults(tmp_path: Pat assert planned_event["metadata"]["strategy"] == "single" +def test_process_projection_emits_skill_card_from_main_run_receipts(tmp_path: Path) -> None: + session = SessionManager(tmp_path) + run_store = RunMemoryStore(tmp_path / "memory" / "runs") + run_store.append_run_record( + RunRecord( + run_id="main-run", + session_id="web:test", + task_id="task-1", + attempt_index=1, + task_text="main task", + started_at="2026-01-01T00:00:03+00:00", + ended_at="2026-01-01T00:00:04+00:00", + success=True, + finish_reason="stop", + activated_skills=[ + SkillActivationReceipt( + run_id="main-run", + session_id="web:test", + skill_name="web-operation", + skill_version="1", + content_hash="hash", + activated_at="2026-01-01T00:00:03+00:00", + activation_reason="Needs live web lookup.", + ) + ], + ) + ) + session.append_message( + "web:test", + role="system", + event_type="task_execution_planned", + event_payload={ + "task_id": "task-1", + "attempt_index": 1, + "plan_mode": "single", + "strategy": "single", + "selected_skill_names": [], + }, + context_visible=False, + ) + session.append_message( + "web:test", + role="system", + event_type="task_synthesis_completed", + event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"}, + context_visible=False, + ) + + projection = SessionProcessProjector(session, run_store).project("web:test") + + skill_events = [ + event + for event in projection["events"] + if event["kind"] == "skill_selected" and event["run_id"] == "main-run" + ] + assert skill_events + assert skill_events[0]["metadata"]["timeline_type"] == "skill" + assert skill_events[0]["metadata"]["skill_names"] == ["web-operation"] + + +def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -> None: + session = SessionManager(tmp_path) + run_store = RunMemoryStore(tmp_path / "memory" / "runs") + run_store.append_run_record( + RunRecord( + run_id="main-run", + session_id="web:test", + task_id="task-1", + attempt_index=1, + task_text="main task", + started_at="2026-01-01T00:00:03+00:00", + ended_at="2026-01-01T00:00:04+00:00", + success=True, + finish_reason="stop", + ) + ) + session.append_message( + "web:test", + role="system", + event_type="task_execution_planned", + event_payload={"task_id": "task-1", "attempt_index": 1}, + context_visible=False, + ) + session.append_message( + "web:test", + run_id="main-run", + role="assistant", + event_type="assistant_message_added", + event_payload={"task_id": "task-1"}, + content="Searching", + tool_calls=[ + { + "id": "call-1", + "name": "multi_search", + "arguments": {"query": "Macau cafe near Bóvia"}, + } + ], + context_visible=False, + ) + session.append_message( + "web:test", + run_id="main-run", + role="tool", + event_type="tool_result_recorded", + event_payload={"success": True, "error": None}, + content="Found 3 restaurants", + tool_name="multi_search", + tool_call_id="call-1", + context_visible=True, + ) + + projection = SessionProcessProjector(session, run_store).project("web:test") + + tool_call = next(event for event in projection["events"] if event["kind"] == "tool_call_started") + assert tool_call["metadata"]["timeline_type"] == "tool_call" + assert tool_call["metadata"]["tool_name"] == "multi_search" + assert tool_call["run_id"] == "main-run" + + tool_result = next(event for event in projection["events"] if event["kind"] == "tool_call_finished") + assert tool_result["metadata"]["timeline_type"] == "tool_result" + assert tool_result["metadata"]["tool_name"] == "multi_search" + assert tool_result["metadata"]["success"] is True + + def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None: session = SessionManager(tmp_path) run_store = RunMemoryStore(tmp_path / "memory" / "runs") diff --git a/app-instance/backend/tests/unit/test_web_tools.py b/app-instance/backend/tests/unit/test_web_tools.py new file mode 100644 index 0000000..0f621f4 --- /dev/null +++ b/app-instance/backend/tests/unit/test_web_tools.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import asyncio + +from beaver.tools.builtins import web + + +class _FakeResponse: + headers = {"content-type": "text/html"} + status_code = 200 + text = 'Example' + url = "https://example.com" + + def raise_for_status(self) -> None: + return None + + +class _FakeAsyncClient: + calls: list[dict[str, object]] = [] + + def __init__(self, **kwargs: object) -> None: + self.calls.append(kwargs) + + async def __aenter__(self) -> "_FakeAsyncClient": + return self + + async def __aexit__(self, *args: object) -> None: + return None + + async def get(self, *args: object, **kwargs: object) -> _FakeResponse: + return _FakeResponse() + + +def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None: + _FakeAsyncClient.calls = [] + monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) + + async def _run() -> None: + await web.WebFetchTool().execute(url="https://example.com") + await web.WebSearchTool().execute(query="example") + + asyncio.run(_run()) + + assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True] diff --git a/app-instance/frontend/app/(app)/status/page.tsx b/app-instance/frontend/app/(app)/status/page.tsx index 322b445..9aae720 100644 --- a/app-instance/frontend/app/(app)/status/page.tsx +++ b/app-instance/frontend/app/(app)/status/page.tsx @@ -15,7 +15,7 @@ import { Settings2, ScrollText, } from 'lucide-react'; -import { getStatus, updateProviderConfig } from '@/lib/api'; +import { getStatus, updateAgentConfig, updateProviderConfig } from '@/lib/api'; import { Button } from '@/components/ui/button'; import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; import { Badge } from '@/components/ui/badge'; @@ -42,6 +42,12 @@ type ProviderFormState = { requestTimeoutSeconds: string; }; +type AgentFormState = { + maxTokens: string; + temperature: string; + maxToolIterations: string; +}; + export default function StatusPage() { const { locale } = useAppI18n(); const [status, setStatus] = useState(null); @@ -57,6 +63,13 @@ export default function StatusPage() { })); const [savingProvider, setSavingProvider] = useState(false); const [providerError, setProviderError] = useState(null); + const [agentForm, setAgentForm] = useState(() => ({ + maxTokens: '', + temperature: '0.2', + maxToolIterations: '30', + })); + const [savingAgent, setSavingAgent] = useState(false); + const [agentError, setAgentError] = useState(null); const loadStatus = async () => { setLoading(true); @@ -64,6 +77,11 @@ export default function StatusPage() { try { const data = await getStatus(); setStatus(data); + setAgentForm({ + maxTokens: data.max_tokens == null ? '' : String(data.max_tokens), + temperature: String(data.temperature), + maxToolIterations: String(data.max_tool_iterations), + }); } catch (err: any) { setError(err.message || pickAppText(locale, '连接后端失败', 'Failed to connect to the backend')); } finally { @@ -115,6 +133,39 @@ export default function StatusPage() { } }; + const handleSaveAgentConfig = async () => { + setSavingAgent(true); + setAgentError(null); + try { + const maxTokensText = agentForm.maxTokens.trim(); + const maxTokens = maxTokensText ? Number(maxTokensText) : null; + const temperature = Number(agentForm.temperature.trim()); + const maxToolIterations = Number(agentForm.maxToolIterations.trim()); + if ( + maxTokens !== null && + (!Number.isInteger(maxTokens) || maxTokens <= 0) + ) { + throw new Error(pickAppText(locale, '最大令牌数必须为空或正整数', 'Max tokens must be blank or a positive integer')); + } + if (!Number.isFinite(temperature) || temperature < 0 || temperature > 2) { + throw new Error(pickAppText(locale, '温度必须在 0 到 2 之间', 'Temperature must be between 0 and 2')); + } + if (!Number.isInteger(maxToolIterations) || maxToolIterations < 0) { + throw new Error(pickAppText(locale, '最大工具迭代次数必须是非负整数', 'Max tool iterations must be a non-negative integer')); + } + await updateAgentConfig({ + max_tokens: maxTokens, + temperature, + max_tool_iterations: maxToolIterations, + }); + await loadStatus(); + } catch (err: any) { + setAgentError(err.message || pickAppText(locale, '保存智能体配置失败', 'Failed to save agent configuration')); + } finally { + setSavingAgent(false); + } + }; + if (loading) { return (
@@ -207,14 +258,47 @@ export default function StatusPage() { {pickAppText(locale, '智能体配置', 'Agent configuration')} - + - - - +
+
+ + setAgentForm((prev) => ({ ...prev, maxTokens: event.target.value }))} + placeholder={pickAppText(locale, '模型默认', 'Model default')} + /> +
+
+ + setAgentForm((prev) => ({ ...prev, temperature: event.target.value }))} + /> +
+
+ + setAgentForm((prev) => ({ ...prev, maxToolIterations: event.target.value }))} + /> +
+
+
+
{agentError || ''}
+ +
diff --git a/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx b/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx index ad36a2f..812478e 100644 --- a/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx +++ b/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx @@ -6,7 +6,6 @@ import React, { useMemo, useState } from 'react'; import { AlertCircle, ArrowLeft, Loader2, Trash2 } from 'lucide-react'; import { - TaskAcceptanceCard, TaskLiveHeader, TaskSideRail, TaskTimeline, @@ -19,10 +18,12 @@ import { deleteBackendTask, getBackendTask, submitChatFeedback } from '@/lib/api import { pickAppText } from '@/lib/i18n/core'; import { useAppI18n } from '@/lib/i18n/provider'; import { useChatStore } from '@/lib/store'; +import { shouldPollTaskDetail, taskDetailDurationMs } from '@/lib/task-detail-refresh'; import { buildTaskTimelineCards } from '@/lib/task-timeline'; import type { BackendTask } from '@/types'; const TERMINAL_TASK_STATUSES = new Set(['closed', 'abandoned', 'cancelled', 'error']); +const TASK_RESULT_REVIEW_ID = 'task-result-review'; export default function TaskDetailPage() { const { locale } = useAppI18n(); @@ -81,12 +82,12 @@ export default function TaskDetailPage() { const isTaskLive = backendTask ? !TERMINAL_TASK_STATUSES.has(backendTask.status) : false; React.useEffect(() => { - if (!isTaskLive || wsStatus === 'connected') return; + if (!shouldPollTaskDetail(backendTask)) return; const id = window.setInterval(() => { void loadBackendTask(); }, 4000); return () => window.clearInterval(id); - }, [isTaskLive, loadBackendTask, wsStatus]); + }, [backendTask, loadBackendTask]); const taskRunIds = useMemo(() => { const ids = new Set(); @@ -129,7 +130,7 @@ export default function TaskDetailPage() { const activeLabel = [...timelineCards].reverse().find((card) => !['acceptance', 'task_created'].includes(card.type))?.title ?? '-'; - const durationMs = backendTask ? taskDurationMs(backendTask) : null; + const durationMs = backendTask ? taskDetailDurationMs(backendTask) : null; const feedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null; const runAction = async (key: string, action: () => Promise) => { @@ -161,7 +162,7 @@ export default function TaskDetailPage() { return (
- +
@@ -187,30 +188,32 @@ export default function TaskDetailPage() { ) : null} - - - - runAction(`backend-feedback-${feedbackType}`, async () => { - if (!feedbackRunId) throw new Error(pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')); - await submitChatFeedback({ - sessionId: backendTask.session_id, - runId: feedbackRunId, - feedbackType, - comment, - }); - updateMessageFeedback(feedbackRunId, feedbackType); - setRevision(''); - await loadBackendTask(); - }) - } + + runAction(`backend-feedback-${feedbackType}`, async () => { + if (!feedbackRunId) throw new Error(pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')); + await submitChatFeedback({ + sessionId: backendTask.session_id, + runId: feedbackRunId, + feedbackType, + comment, + }); + updateMessageFeedback(feedbackRunId, feedbackType); + setRevision(''); + await loadBackendTask(); + }), + }} />
@@ -252,10 +255,3 @@ function pickFeedbackRunId(task: BackendTask): string | null { if (runs.length > 0) return runs[runs.length - 1].run_id; return null; } - -function taskDurationMs(task: BackendTask): number | null { - const start = new Date(task.created_at).getTime(); - const end = new Date(task.closed_at || task.updated_at).getTime(); - if (!Number.isFinite(start) || !Number.isFinite(end)) return null; - return Math.max(0, end - start); -} diff --git a/app-instance/frontend/components/task-detail/TaskAcceptanceCard.tsx b/app-instance/frontend/components/task-detail/TaskAcceptanceCard.tsx index 167706f..111c00a 100644 --- a/app-instance/frontend/components/task-detail/TaskAcceptanceCard.tsx +++ b/app-instance/frontend/components/task-detail/TaskAcceptanceCard.tsx @@ -113,19 +113,6 @@ export function TaskAcceptanceCard({ onSubmit, }: Props) { const { locale } = useAppI18n(); - const [localComment, setLocalComment] = React.useState(''); - const comment = revision ?? localComment; - const setComment = onRevisionChange ?? setLocalComment; - const isFinalized = taskStatus === 'closed' || taskStatus === 'abandoned'; - const isReadyForAcceptance = READY_FOR_ACCEPTANCE_STATUSES.has(taskStatus); - const recordedFeedback = feedbackForRun(feedbackItems, runId) ?? (isFinalized ? latestFeedback(feedbackItems) : null); - const canSubmit = Boolean(runId) && !recordedFeedback && !isFinalized && isReadyForAcceptance && !actionBusy; - const trimmedComment = comment.trim(); - - const submit = (feedbackType: TaskFeedbackType, nextComment?: string) => { - if (!runId || !canSubmit) return; - void onSubmit(feedbackType, nextComment); - }; return ( @@ -141,7 +128,49 @@ export function TaskAcceptanceCard({ )}
- + + + + + ); +} + +export function TaskAcceptanceControls({ + sessionId, + runId, + taskStatus, + feedbackItems, + actionBusy, + revision, + onRevisionChange, + onSubmit, +}: Props) { + const { locale } = useAppI18n(); + const [localComment, setLocalComment] = React.useState(''); + const comment = revision ?? localComment; + const setComment = onRevisionChange ?? setLocalComment; + const isFinalized = taskStatus === 'closed' || taskStatus === 'abandoned'; + const isReadyForAcceptance = READY_FOR_ACCEPTANCE_STATUSES.has(taskStatus); + const recordedFeedback = feedbackForRun(feedbackItems, runId) ?? (isFinalized ? latestFeedback(feedbackItems) : null); + const canSubmit = Boolean(runId) && !recordedFeedback && !isFinalized && isReadyForAcceptance && !actionBusy; + const trimmedComment = comment.trim(); + + const submit = (feedbackType: TaskFeedbackType, nextComment?: string) => { + if (!runId || !canSubmit) return; + void onSubmit(feedbackType, nextComment); + }; + + return ( +
{recordedFeedback ? (
@@ -207,7 +236,6 @@ export function TaskAcceptanceCard({ {pickAppText(locale, '会话:', 'Session: ')} {sessionId}
- - +
); } diff --git a/app-instance/frontend/components/task-detail/TaskLiveHeader.tsx b/app-instance/frontend/components/task-detail/TaskLiveHeader.tsx index 13a809b..ef9b743 100644 --- a/app-instance/frontend/components/task-detail/TaskLiveHeader.tsx +++ b/app-instance/frontend/components/task-detail/TaskLiveHeader.tsx @@ -1,7 +1,7 @@ 'use client'; import Link from 'next/link'; -import { ArrowLeft, MessageSquare } from 'lucide-react'; +import { ArrowLeft, CheckCircle2, MessageSquare } from 'lucide-react'; import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime } from '@/components/task-runtime/TaskRuntimeShared'; import { Badge } from '@/components/ui/badge'; @@ -15,6 +15,7 @@ type Props = { task: BackendTask; activeLabel: string; durationMs: number | null; + reviewTargetId?: string; }; const RUNTIME_STATUSES = new Set(['queued', 'running', 'waiting', 'blocked', 'done', 'error', 'cancelled']); @@ -36,9 +37,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') { return item ? pickAppText(locale, item[0], item[1]) : status; } -export function TaskLiveHeader({ task, activeLabel, durationMs }: Props) { +export function TaskLiveHeader({ task, activeLabel, durationMs, reviewTargetId }: Props) { const { locale } = useAppI18n(); const title = task.short_title || String(task.metadata?.short_title || '') || task.description || task.goal || task.task_id; + const showReviewLink = Boolean(reviewTargetId && ['awaiting_acceptance', 'needs_revision'].includes(task.status)); return (
@@ -67,6 +69,14 @@ export function TaskLiveHeader({ task, activeLabel, durationMs }: Props) { )} {activeLabel ? {activeLabel} : null} + {showReviewLink ? ( + + ) : null}
diff --git a/app-instance/frontend/components/task-detail/TaskSideRail.tsx b/app-instance/frontend/components/task-detail/TaskSideRail.tsx index 99a82d0..5e393bd 100644 --- a/app-instance/frontend/components/task-detail/TaskSideRail.tsx +++ b/app-instance/frontend/components/task-detail/TaskSideRail.tsx @@ -34,11 +34,28 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') { needs_revision: ['需要修改', 'Needs revision'], closed: ['已完成', 'Closed'], abandoned: ['已放弃', 'Abandoned'], + accept: ['已接受', 'Accepted'], + satisfied: ['已接受', 'Accepted'], + revise: ['已请求修改', 'Revision requested'], + abandon: ['已放弃', 'Abandoned'], }; const item = map[status]; return item ? pickAppText(locale, item[0], item[1]) : status; } +function latestFeedback(task: BackendTask): Record | null { + return [...(task.feedback ?? [])].reverse()[0] ?? null; +} + +function acceptanceState(task: BackendTask, locale: 'zh-CN' | 'en-US'): string { + const feedback = latestFeedback(task); + const kind = String(feedback?.acceptance_type || feedback?.feedback_type || ''); + if (kind) return humanTaskStatus(kind, locale); + if (task.status === 'awaiting_acceptance') return pickAppText(locale, '等待验收', 'Awaiting acceptance'); + if (task.status === 'needs_revision') return pickAppText(locale, '等待修改', 'Awaiting revision'); + return pickAppText(locale, '未验收', 'No acceptance yet'); +} + function toTime(value: string): number { const parsed = new Date(value).getTime(); return Number.isFinite(parsed) ? parsed : 0; @@ -135,6 +152,9 @@ export function TaskSideRail({ task, runs, artifacts, cards }: Props) {
{pickAppText(locale, '更新', 'Updated')}: {formatTaskRuntimeTime(task.updated_at, locale)}
+
+ {pickAppText(locale, '验收', 'Acceptance')}: {acceptanceState(task, locale)} +
diff --git a/app-instance/frontend/components/task-detail/TaskTimeline.tsx b/app-instance/frontend/components/task-detail/TaskTimeline.tsx index b6d9efb..dfa8f58 100644 --- a/app-instance/frontend/components/task-detail/TaskTimeline.tsx +++ b/app-instance/frontend/components/task-detail/TaskTimeline.tsx @@ -7,14 +7,16 @@ import { pickAppText } from '@/lib/i18n/core'; import { useAppI18n } from '@/lib/i18n/provider'; import type { TaskTimelineCard as TaskTimelineCardView } from '@/types'; -import { TaskTimelineCard } from './TaskTimelineCard'; +import { TaskTimelineCard, type TaskResultAcceptance } from './TaskTimelineCard'; type Props = { cards: TaskTimelineCardView[]; isLive: boolean; + resultAcceptance?: TaskResultAcceptance; + reviewTargetId?: string; }; -export function TaskTimeline({ cards, isLive }: Props) { +export function TaskTimeline({ cards, isLive, resultAcceptance, reviewTargetId }: Props) { const { locale } = useAppI18n(); return ( @@ -42,7 +44,7 @@ export function TaskTimeline({ cards, isLive }: Props) { ) : (
{cards.map((card) => ( - + ))}
)} diff --git a/app-instance/frontend/components/task-detail/TaskTimelineCard.tsx b/app-instance/frontend/components/task-detail/TaskTimelineCard.tsx index bf1d050..e07569c 100644 --- a/app-instance/frontend/components/task-detail/TaskTimelineCard.tsx +++ b/app-instance/frontend/components/task-detail/TaskTimelineCard.tsx @@ -6,8 +6,10 @@ import { Bot, CheckCircle2, ClipboardList, + ChevronDown, FileText, GitBranch, + History, ListChecks, Sparkles, TerminalSquare, @@ -24,8 +26,23 @@ import { useAppI18n } from '@/lib/i18n/provider'; import type { TaskRuntimeStatus } from '@/lib/task-runtime'; import type { TaskTimelineCard as TaskTimelineCardView, TaskTimelineCardType } from '@/types'; +import { TaskAcceptanceControls, type TaskFeedbackItem, type TaskFeedbackType } from './TaskAcceptanceCard'; + type Props = { card: TaskTimelineCardView; + resultAcceptance?: TaskResultAcceptance; + reviewTargetId?: string; +}; + +export type TaskResultAcceptance = { + sessionId: string; + runId: string | null; + taskStatus: string; + feedbackItems: TaskFeedbackItem[]; + actionBusy: string | null; + revision?: string; + onRevisionChange?: (value: string) => void; + onSubmit: (feedbackType: TaskFeedbackType, comment?: string) => Promise; }; const RUNTIME_STATUSES = new Set(['queued', 'running', 'waiting', 'blocked', 'done', 'error', 'cancelled']); @@ -60,6 +77,8 @@ function iconForType(type: TaskTimelineCardType) { return AlertTriangle; case 'result': return CheckCircle2; + case 'result_history': + return History; case 'acceptance': return ThumbsUp; } @@ -87,6 +106,7 @@ function cardTypeLabel(type: TaskTimelineCardType, locale: 'zh-CN' | 'en-US') { artifact: ['产物', 'Artifact'], error: ['异常', 'Error'], result: ['结果', 'Result'], + result_history: ['历史结果', 'Result history'], acceptance: ['验收', 'Acceptance'], }; const label = labels[type]; @@ -111,12 +131,57 @@ function humanStatus(status: string, locale: 'zh-CN' | 'en-US') { return label ? pickAppText(locale, label[0], label[1]) : status; } -export function TaskTimelineCard({ card }: Props) { +function historyVersions(details: Record | undefined): Array> { + const versions = details?.versions; + return Array.isArray(versions) ? versions.filter((item): item is Record => Boolean(item) && typeof item === 'object') : []; +} + +function renderHistoryStatus(version: Record, locale: 'zh-CN' | 'en-US') { + const status = String(version.acceptanceType || version.status || ''); + return status ? humanStatus(status, locale) : pickAppText(locale, '历史版本', 'Previous version'); +} + +function TaskResultHistory({ card }: { card: TaskTimelineCardView }) { const { locale } = useAppI18n(); - const Icon = iconForType(card.type); + const versions = historyVersions(card.details); return ( - +
+ + {pickAppText(locale, '展开历史版本', 'Show previous versions')} + + +
+ {versions.map((version, index) => ( +
+
+
+ {pickAppText(locale, `第 ${index + 1} 轮结果`, `Version ${index + 1}`)} +
+ + {renderHistoryStatus(version, locale)} + +
+ {version.result ?

{String(version.result)}

: null} + {version.comment ? ( +
+ {pickAppText(locale, '修改意见', 'Revision note')}: {String(version.comment)} +
+ ) : null} +
+ ))} +
+
+ ); +} + +export function TaskTimelineCard({ card, resultAcceptance, reviewTargetId }: Props) { + const { locale } = useAppI18n(); + const Icon = iconForType(card.type); + const shouldRenderResultAcceptance = Boolean(card.type === 'result' && resultAcceptance && card.runId === resultAcceptance.runId); + + return ( +
@@ -150,7 +215,13 @@ export function TaskTimelineCard({ card }: Props) { {card.summary ?

{card.summary}

: null} - {card.details ? ( + {shouldRenderResultAcceptance ? ( +
+ +
+ ) : null} + + {card.type === 'result_history' ? : card.details ? (
{pickAppText(locale, '详情 JSON', 'Details JSON')} diff --git a/app-instance/frontend/lib/api.ts b/app-instance/frontend/lib/api.ts index 54baabe..0ad0001 100644 --- a/app-instance/frontend/lib/api.ts +++ b/app-instance/frontend/lib/api.ts @@ -4,6 +4,7 @@ import type { AuthzStatus, AuthUser, ActiveTask, + AgentConfigPayload, ChatLogsResponse, BackendTask, ChatMessage, @@ -620,6 +621,13 @@ export async function getStatus(): Promise { return fetchJSON('/api/status'); } +export async function updateAgentConfig(payload: AgentConfigPayload): Promise<{ ok: boolean }> { + return fetchJSON('/api/agent-config', { + method: 'POST', + body: JSON.stringify(payload), + }); +} + export async function updateProviderConfig( providerId: string, payload: ProviderConfigPayload diff --git a/app-instance/frontend/lib/task-detail-refresh.test.ts b/app-instance/frontend/lib/task-detail-refresh.test.ts new file mode 100644 index 0000000..35502d4 --- /dev/null +++ b/app-instance/frontend/lib/task-detail-refresh.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { shouldPollTaskDetail, taskDetailDurationMs } from '@/lib/task-detail-refresh'; +import type { BackendTask } from '@/types'; + +const baseTask: BackendTask = { + task_id: 'task-1', + session_id: 'web:test', + description: '查找餐厅', + goal: '查找餐厅', + constraints: [], + priority: 0, + status: 'running', + creator: 'main-agent', + created_at: '2026-05-27T02:02:41.000Z', + updated_at: '2026-05-27T02:02:41.500Z', + run_ids: [], + skill_names: [], + feedback: [], + metadata: {}, +}; + +describe('task detail refresh helpers', () => { + it('polls executing task details regardless of websocket status', () => { + expect(shouldPollTaskDetail({ ...baseTask, status: 'running' })).toBe(true); + expect(shouldPollTaskDetail({ ...baseTask, status: 'open' })).toBe(true); + expect(shouldPollTaskDetail({ ...baseTask, status: 'awaiting_acceptance' })).toBe(false); + expect(shouldPollTaskDetail({ ...baseTask, status: 'closed' })).toBe(false); + }); + + it('uses current time for active task duration instead of stale updated_at', () => { + vi.setSystemTime(new Date('2026-05-27T02:03:41.000Z')); + + expect(taskDetailDurationMs(baseTask)).toBe(60_000); + expect(taskDetailDurationMs({ ...baseTask, status: 'awaiting_acceptance', updated_at: '2026-05-27T02:10:55.000Z' })).toBe(494_000); + }); +}); diff --git a/app-instance/frontend/lib/task-detail-refresh.ts b/app-instance/frontend/lib/task-detail-refresh.ts new file mode 100644 index 0000000..9bff2c6 --- /dev/null +++ b/app-instance/frontend/lib/task-detail-refresh.ts @@ -0,0 +1,18 @@ +import type { BackendTask } from '@/types'; + +const EXECUTING_TASK_STATUSES = new Set(['open', 'queued', 'running']); +const FINISHED_FOR_DURATION_STATUSES = new Set(['awaiting_acceptance', 'closed', 'abandoned', 'cancelled', 'error']); + +export function shouldPollTaskDetail(task: Pick | null): boolean { + return Boolean(task && EXECUTING_TASK_STATUSES.has(task.status)); +} + +export function taskDetailDurationMs(task: Pick): number | null { + const start = new Date(task.created_at).getTime(); + const end = FINISHED_FOR_DURATION_STATUSES.has(task.status) + ? new Date(task.closed_at || task.updated_at).getTime() + : Date.now(); + + if (!Number.isFinite(start) || !Number.isFinite(end)) return null; + return Math.max(0, end - start); +} diff --git a/app-instance/frontend/lib/task-timeline.test.ts b/app-instance/frontend/lib/task-timeline.test.ts index ef3387b..ecf0f80 100644 --- a/app-instance/frontend/lib/task-timeline.test.ts +++ b/app-instance/frontend/lib/task-timeline.test.ts @@ -166,6 +166,133 @@ describe('buildTaskTimelineCards', () => { expect(cards.at(-1)?.summary).toContain('可以'); }); + it('uses the latest assistant message from the acceptance run as the result body', () => { + const task = makeTask({ + status: 'awaiting_acceptance', + updated_at: '2026-05-26T10:04:00.000Z', + run_ids: ['run-main'], + runs: [ + { + run_id: 'run-main', + title: '主 Agent', + session_id: 'web:default', + messages: [ + { role: 'assistant', content: 'Draft answer', created_at: '2026-05-26T10:03:00.000Z' }, + { role: 'assistant', content: 'Final user-visible answer', created_at: '2026-05-26T10:04:00.000Z' }, + ], + }, + ], + }); + const processEvents: ProcessEvent[] = [ + { + event_id: 'evt-result-ready', + run_id: 'run-main', + parent_run_id: null, + kind: 'task_result_ready', + actor_type: 'system', + actor_id: 'evidence', + actor_name: 'Evidence', + text: 'The task result is ready for user acceptance.', + created_at: '2026-05-26T10:04:00.000Z', + metadata: { + result_summary: 'Summary should not replace the final answer.', + }, + }, + ]; + + const cards = buildTaskTimelineCards({ task, processEvents }); + const result = cards.find((card) => card.type === 'result'); + + expect(result?.summary).toBe('Final user-visible answer'); + expect(result?.details?.result_summary).toBe('Summary should not replace the final answer.'); + }); + + it('collapses previous result and acceptance cards into a history pack', () => { + const task = makeTask({ + status: 'awaiting_acceptance', + updated_at: '2026-05-26T10:12:00.000Z', + run_ids: ['run-1', 'run-2'], + feedback: [ + { + acceptance_type: 'revise', + comment: 'Add decisions', + created_at: '2026-05-26T10:06:00.000Z', + run_id: 'run-1', + }, + ], + runs: [ + { + run_id: 'run-1', + title: '主 Agent', + session_id: 'web:default', + messages: [{ role: 'assistant', content: 'Version one answer', created_at: '2026-05-26T10:05:00.000Z' }], + }, + { + run_id: 'run-2', + title: '主 Agent', + session_id: 'web:default', + messages: [{ role: 'assistant', content: 'Version two answer', created_at: '2026-05-26T10:12:00.000Z' }], + }, + ], + }); + const processEvents: ProcessEvent[] = [ + { + event_id: 'evt-result-1', + run_id: 'run-1', + parent_run_id: null, + kind: 'task_result_ready', + actor_type: 'system', + actor_id: 'evidence', + actor_name: 'Evidence', + text: 'Result one ready.', + created_at: '2026-05-26T10:05:00.000Z', + }, + { + event_id: 'evt-plan-2', + run_id: 'run-2', + parent_run_id: null, + kind: 'task_planned', + actor_type: 'system', + actor_id: 'planner', + actor_name: 'Task Planner', + text: 'Second attempt planned.', + created_at: '2026-05-26T10:08:00.000Z', + }, + { + event_id: 'evt-result-2', + run_id: 'run-2', + parent_run_id: null, + kind: 'task_result_ready', + actor_type: 'system', + actor_id: 'evidence', + actor_name: 'Evidence', + text: 'Result two ready.', + created_at: '2026-05-26T10:12:00.000Z', + }, + ]; + + const cards = buildTaskTimelineCards({ task, processEvents }); + + expect(cards.map((card) => card.type)).toEqual([ + 'task_created', + 'result_history', + 'plan', + 'result', + ]); + const history = cards.find((card) => card.type === 'result_history'); + expect(history?.summary).toBe('1 历史结果版本'); + expect(history?.details?.versions).toEqual([ + expect.objectContaining({ + runId: 'run-1', + result: 'Version one answer', + acceptanceType: 'revise', + comment: 'Add decisions', + }), + ]); + expect(cards.find((card) => card.id === 'evt-plan-2')).toBeTruthy(); + expect(cards.at(-1)?.summary).toBe('Version two answer'); + }); + it('does not add fallback progress when a child run already has progress events', () => { const task = makeTask(); const processRuns: ProcessRun[] = [ @@ -201,6 +328,51 @@ describe('buildTaskTimelineCards', () => { expect(cards.map((card) => card.id)).not.toContain('run-research:fallback-progress'); }); + it('marks a tool call as finished when a matching tool result exists', () => { + const task = makeTask(); + const processEvents: ProcessEvent[] = [ + { + event_id: 'evt-tool-start', + run_id: 'run-main', + parent_run_id: null, + kind: 'tool_call_started', + actor_type: 'mcp', + actor_id: 'web_search', + actor_name: 'web_search', + text: 'Calling tool: web_search.', + status: 'running', + created_at: '2026-05-26T10:02:00.000Z', + metadata: { + tool_call_id: 'call-1', + tool_name: 'web_search', + }, + }, + { + event_id: 'evt-tool-finish', + run_id: 'run-main', + parent_run_id: null, + kind: 'tool_call_finished', + actor_type: 'mcp', + actor_id: 'web_search', + actor_name: 'web_search', + text: 'Search failed.', + status: 'error', + created_at: '2026-05-26T10:03:00.000Z', + metadata: { + tool_call_id: 'call-1', + tool_name: 'web_search', + result_summary: 'Search failed.', + }, + }, + ]; + + const cards = buildTaskTimelineCards({ task, processEvents }); + + expect(cards.find((card) => card.id === 'evt-tool-start')?.status).toBe('error'); + expect(cards.find((card) => card.id === 'evt-tool-finish')?.type).toBe('tool_result'); + expect(cards.find((card) => card.id === 'evt-tool-finish')?.summary).toBe('Search failed.'); + }); + it('maps agent_finished events without timeline metadata to agent progress cards', () => { const task = makeTask(); const processEvents: ProcessEvent[] = [ diff --git a/app-instance/frontend/lib/task-timeline.ts b/app-instance/frontend/lib/task-timeline.ts index ae3c9a9..f0132fd 100644 --- a/app-instance/frontend/lib/task-timeline.ts +++ b/app-instance/frontend/lib/task-timeline.ts @@ -27,6 +27,7 @@ const TIMELINE_CARD_TYPES = new Set([ 'artifact', 'error', 'result', + 'result_history', 'acceptance', ]); @@ -77,10 +78,6 @@ function cardTypeForEvent(event: ProcessEvent): TaskTimelineCardType | null { return timelineType; } - if (event.status === 'error') { - return 'error'; - } - switch (String(event.kind)) { case 'task_planned': case 'run_started': @@ -106,6 +103,9 @@ function cardTypeForEvent(event: ProcessEvent): TaskTimelineCardType | null { case 'task_error': return 'error'; default: + if (event.status === 'error') { + return 'error'; + } return null; } } @@ -136,6 +136,8 @@ function titleForCard(type: TaskTimelineCardType, actorName?: string): string { return '执行遇到问题'; case 'result': return '本轮结果'; + case 'result_history': + return '历史结果版本'; case 'acceptance': return '任务验收'; } @@ -182,6 +184,22 @@ function resultSummary(task: BackendTask): string | undefined { ); } +function assistantResultForRun(task: BackendTask, runId: string | null | undefined): string | undefined { + if (!runId) return undefined; + const run = (task.runs ?? []).find((item) => item.run_id === runId); + if (!run) return undefined; + const assistantMessages = run.messages.filter((message) => message.role === 'assistant' && message.content.trim()); + return lastItem(assistantMessages)?.content.trim(); +} + +function resultSummaryForEvent(task: BackendTask, event: ProcessEvent): string | undefined { + return firstString(assistantResultForRun(task, event.run_id), summaryForEvent(event)); +} + +function fallbackResultSummary(task: BackendTask): string | undefined { + return firstString(assistantResultForRun(task, lastItem(task.run_ids)), resultSummary(task)); +} + function buildRunMap(processRuns: ProcessRun[]): Map { const map = new Map(); for (const run of processRuns) { @@ -239,12 +257,106 @@ function isCoveredByAcceptanceEvent( return matchingTypeEvents.length === 1; } +function cardTime(card: TaskTimelineCard): number { + return toTime(card.createdAt) ?? Number.MAX_SAFE_INTEGER; +} + +function cardComment(card: TaskTimelineCard): string | undefined { + return firstString(card.details?.comment, card.summary); +} + +function toolCallKeyFromEvent(event: ProcessEvent): string | null { + const toolCallId = firstString(event.metadata?.tool_call_id); + if (toolCallId) return `${event.run_id}:${toolCallId}`; + + const toolName = firstString(event.metadata?.tool_name, event.actor_name, event.actor_id); + if (toolName) return `${event.run_id}:${toolName}`; + + return null; +} + +function buildToolResultStatusByCall(processEvents: ProcessEvent[]): Map { + const statuses = new Map(); + for (const event of processEvents) { + if (cardTypeForEvent(event) !== 'tool_result') continue; + const key = toolCallKeyFromEvent(event); + if (!key) continue; + statuses.set(key, event.status || 'done'); + } + return statuses; +} + +function buildResultHistoryCard(task: BackendTask, resultCards: TaskTimelineCard[], acceptanceCards: TaskTimelineCard[]): TaskTimelineCard { + const versions = resultCards.map((resultCard) => { + const acceptanceCard = acceptanceCards + .filter((card) => card.runId === resultCard.runId) + .sort((a, b) => cardTime(a) - cardTime(b)) + .at(-1); + return { + runId: resultCard.runId ?? null, + result: resultCard.summary ?? '', + createdAt: resultCard.createdAt, + status: acceptanceCard?.status ?? resultCard.status ?? null, + acceptanceType: acceptanceCard?.status ?? null, + comment: acceptanceCard ? cardComment(acceptanceCard) ?? '' : '', + acceptedAt: acceptanceCard?.createdAt ?? null, + }; + }); + + return { + id: `${task.task_id}:result-history`, + taskId: task.task_id, + type: 'result_history', + title: titleForCard('result_history'), + summary: `${resultCards.length} 历史结果版本`, + createdAt: resultCards[0]?.createdAt ?? task.created_at, + details: { versions }, + }; +} + +function collapseHistoricalResults(task: BackendTask, cards: TaskTimelineCard[]): TaskTimelineCard[] { + const resultCards = cards.filter((card) => card.type === 'result'); + if (resultCards.length <= 1) return cards; + + const finalAcceptedRunId = firstString(task.metadata?.final_accepted_run_id); + const latestResult = + (finalAcceptedRunId ? resultCards.find((card) => card.runId === finalAcceptedRunId) : undefined) ?? + [...resultCards].sort((a, b) => cardTime(a) - cardTime(b)).at(-1); + if (!latestResult) return cards; + + const oldResults = resultCards + .filter((card) => card.id !== latestResult.id) + .sort((a, b) => cardTime(a) - cardTime(b)); + if (oldResults.length === 0) return cards; + + const oldRunIds = new Set(oldResults.map((card) => card.runId).filter(Boolean)); + const oldAcceptances = cards + .filter((card) => card.type === 'acceptance' && oldRunIds.has(card.runId)) + .sort((a, b) => cardTime(a) - cardTime(b)); + const foldedIds = new Set([...oldResults, ...oldAcceptances].map((card) => card.id)); + const historyCard = buildResultHistoryCard(task, oldResults, oldAcceptances); + const firstOldResultIndex = cards.findIndex((card) => card.id === oldResults[0].id); + const output: TaskTimelineCard[] = []; + + for (let index = 0; index < cards.length; index += 1) { + if (index === firstOldResultIndex) { + output.push(historyCard); + } + if (!foldedIds.has(cards[index].id)) { + output.push(cards[index]); + } + } + + return output; +} + export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): TaskTimelineCard[] { const { task } = input; const processRuns = input.processRuns ?? task.process_runs ?? []; const processEvents = input.processEvents ?? task.process_events ?? []; const processArtifacts = input.processArtifacts ?? task.process_artifacts ?? []; const runsById = buildRunMap(processRuns); + const toolResultStatusByCall = buildToolResultStatusByCall(processEvents); const runsWithProgressEvents = new Set(); const acceptanceEvents: AcceptanceEventIdentity[] = []; let hasResultEventCard = false; @@ -285,9 +397,12 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task parentRunId: event.parent_run_id, type, title: titleForCard(type, event.actor_name), - summary: summaryForEvent(event), + summary: type === 'result' ? resultSummaryForEvent(task, event) : summaryForEvent(event), actorName: event.actor_name, - status: event.status, + status: + type === 'tool_call' + ? toolResultStatusByCall.get(toolCallKeyFromEvent(event) ?? '') ?? event.status + : event.status, createdAt: event.created_at, details: detailsForEvent(event), }); @@ -340,7 +455,7 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task runId: lastItem(task.run_ids), type: 'result', title: titleForCard('result'), - summary: resultSummary(task), + summary: fallbackResultSummary(task), status: task.status, createdAt: task.closed_at ?? task.updated_at ?? task.created_at, details: task.validation_result ?? undefined, @@ -366,8 +481,10 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task }); } - return cards + const sortedCards = cards .map((card, index) => ({ card, index })) .sort(compareCardsByCreatedAt) .map(({ card }) => card); + + return collapseHistoricalResults(task, sortedCards); } diff --git a/app-instance/frontend/types/index.ts b/app-instance/frontend/types/index.ts index a89fa0a..d8dffeb 100644 --- a/app-instance/frontend/types/index.ts +++ b/app-instance/frontend/types/index.ts @@ -142,6 +142,12 @@ export interface ProviderConfigPayload { request_timeout_seconds?: number; } +export interface AgentConfigPayload { + max_tokens: number | null; + temperature: number; + max_tool_iterations: number; +} + export interface ChannelStatus { name: string; enabled: boolean; @@ -153,7 +159,7 @@ export interface SystemStatus { workspace: string; workspace_exists: boolean; model: string; - max_tokens: number; + max_tokens: number | null; max_context_messages?: number; temperature: number; max_tool_iterations: number; @@ -794,6 +800,7 @@ export type TaskTimelineCardType = | 'artifact' | 'error' | 'result' + | 'result_history' | 'acceptance'; export interface TaskTimelineCard { diff --git a/scripts/deploy-initial-skills.sh b/scripts/deploy-initial-skills.sh new file mode 100644 index 0000000..fe977ec --- /dev/null +++ b/scripts/deploy-initial-skills.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Deploy initial skills to all runtime instances via docker cp +# Usage: ./scripts/deploy-initial-skills.sh + +set -euo pipefail + +SKILL_SOURCE="/home/ivan/xuan/beaver_project/skills" +DOCKER_NAMES=("app-instance-steven" "app-instance-benson" "app-instance-jayc" "app-instance-officebench") + +SKILLS=( + "outlook-mail" + "filesystem-operation" + "terminal-operation" + "web-operation" + "utility-tools" + "skills-admin" + "cron-scheduler" + "memory-management" +) + +for container in "${DOCKER_NAMES[@]}"; do + echo "==> Deploying to $container..." + + docker exec "$container" mkdir -p /root/.beaver/workspace/skills/_index + + for skill in "${SKILLS[@]}"; do + if [ -d "$SKILL_SOURCE/$skill" ]; then + docker cp "$SKILL_SOURCE/$skill" "$container":/root/.beaver/workspace/skills/ + echo " + $skill" + fi + done + + # Merge index: keep existing entries + add new skills, no duplicates + docker exec "$container" python3 -c " +import json +from pathlib import Path + +idx = Path('/root/.beaver/workspace/skills/_index/published.json') +existing = json.loads(idx.read_text()) if idx.exists() else {'items': []} + +new_skills = $(printf '["%s"]' "$(IFS=,; echo "${SKILLS[*]}")" | sed 's/,/", "/g') + +seen = set(existing['items']) +for s in new_skills: + if s not in seen: + existing['items'].append(s) + seen.add(s) + +idx.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + '\n') +print(f\"Index updated: {len(existing['items'])} skills\") +" + + docker cp "$SKILL_SOURCE/_index/disabled.json" "$container":/root/.beaver/workspace/skills/_index/disabled.json + + echo " [done]" +done + +echo "" +echo "Done! All skills deployed to all instances." +echo "Containers: ${DOCKER_NAMES[*]}" +echo "Skills: ${SKILLS[*]}" diff --git a/skills/_index/disabled.json b/skills/_index/disabled.json new file mode 100644 index 0000000..fc69ce2 --- /dev/null +++ b/skills/_index/disabled.json @@ -0,0 +1,3 @@ +{ + "items": [] +} \ No newline at end of file diff --git a/skills/_index/published.json b/skills/_index/published.json new file mode 100644 index 0000000..8541897 --- /dev/null +++ b/skills/_index/published.json @@ -0,0 +1,13 @@ +{ + "items": [ + "outlook-mail", + "filesystem-operation", + "terminal-operation", + "web-operation", + "utility-tools", + "skills-admin", + "cron-scheduler", + "memory-management", + "officebench-mcp" + ] +} diff --git a/skills/cron-scheduler/current.json b/skills/cron-scheduler/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/cron-scheduler/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/cron-scheduler/skill.json b/skills/cron-scheduler/skill.json new file mode 100644 index 0000000..8d2958b --- /dev/null +++ b/skills/cron-scheduler/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。", + "display_name": "cron-scheduler", + "lineage": [], + "name": "cron-scheduler", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["cron", "scheduler", "timer", "periodic"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/cron-scheduler/versions/v0001/SKILL.md b/skills/cron-scheduler/versions/v0001/SKILL.md new file mode 100644 index 0000000..919995e --- /dev/null +++ b/skills/cron-scheduler/versions/v0001/SKILL.md @@ -0,0 +1,34 @@ +--- +name: cron-scheduler +description: 定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。 +tools: + - cron +--- + +# Cron Scheduler — 定时任务调度 + +基于 cron 表达式的定时任务和一次性提醒。 + +## 工具说明 + +### cron +创建和管理 Beaver 定时通知或 Task。 +- `action` (str): `add` | `list` | `remove` | `toggle` | `run` +- `message` (str): 触发时执行的任务说明,`add` 时必填 +- `schedule` (str): 调度表达式,例如 `every 15m`、`0 9 * * *` 或 ISO 时间 +- `every_seconds` (int | None): 固定秒级间隔 +- `cron_expr` (str | None): 标准 5 段 cron 表达式 +- `tz` (str | None): IANA 时区,例如 `Asia/Shanghai` +- `at_iso` (str | None): 一次性任务的 ISO 时间 +- `job_id` (str | None): `remove`、`toggle`、`run` 目标任务 ID +- `enabled` (bool | None): `toggle` 时设置启停状态 +- `mode` (str | None): `notification` 或 `task` +- `requires_followup` (bool | None): task 模式下是否需要用户跟进 + +## 使用原则 + +1. 避开 :00 和 :30 整点分钟,分散负载 +2. 一次性提醒优先使用 `at_iso` 或清晰的 `schedule` +3. 需要持续提醒时使用 `mode="notification"`,需要 Task 跟踪时才用 `mode="task"` +4. 定期用 `action="list"` 确认任务是否按预期调度 +5. 任务触发时 `message` 会完整执行,确保内容自包含 diff --git a/skills/cron-scheduler/versions/v0001/version.json b/skills/cron-scheduler/versions/v0001/version.json new file mode 100644 index 0000000..5f1ca7c --- /dev/null +++ b/skills/cron-scheduler/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for cron scheduling", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。", + "name": "cron-scheduler", + "tools": ["cron"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "cron-scheduler", + "summary": "Cron Scheduler — 基于 cron 表达式的定时任务和一次性提醒", + "summary_hash": "placeholder", + "tool_hints": ["cron"], + "version": "v0001" +} diff --git a/skills/filesystem-operation/current.json b/skills/filesystem-operation/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/filesystem-operation/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/filesystem-operation/skill.json b/skills/filesystem-operation/skill.json new file mode 100644 index 0000000..8b1c911 --- /dev/null +++ b/skills/filesystem-operation/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。", + "display_name": "filesystem-operation", + "lineage": [], + "name": "filesystem-operation", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["filesystem", "file", "io", "directory"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/filesystem-operation/versions/v0001/SKILL.md b/skills/filesystem-operation/versions/v0001/SKILL.md new file mode 100644 index 0000000..2357f5d --- /dev/null +++ b/skills/filesystem-operation/versions/v0001/SKILL.md @@ -0,0 +1,50 @@ +--- +name: filesystem-operation +description: 本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。 +tools: + - read_file + - write_file + - patch_file + - search_files + - list_directory +--- + +# Filesystem Operation — 文件系统操作 + +本地文件系统工具集,用于读写和搜索项目文件。 + +## 工具说明 + +### read_file +读取本地文件内容。 +- 使用 `skill_view` 查看文件预览 +- 大文件会分页返回,可通过 offset/limit 控制 + +### write_file +写入新文件或覆盖已有文件。 +- 创建新文件时自动创建父目录 +- 写入前确认不会覆盖重要配置 + +### patch_file +精确修改文件中的指定内容。 +- 通过搜索-替换方式修改 +- 适用于局部更新,避免整文件重写 + +### search_files +在项目中搜索文件名或内容。 +- 支持 glob 模式匹配 +- 支持按内容搜索 +- 支持限制搜索目录深度 + +### list_directory +列出目录内容。 +- 可递归列出子目录 +- 支持过滤文件类型 + +## 使用原则 + +1. 优先使用 `read_file` 查看文件内容,再决定修改方案 +2. 小范围修改用 `patch_file`,大范围用 `write_file` +3. 搜索文件时先确认路径是否存在 +4. 修改前确认文件编码(默认 UTF-8) +5. 敏感文件(.env、密钥等)不写入版本控制 diff --git a/skills/filesystem-operation/versions/v0001/version.json b/skills/filesystem-operation/versions/v0001/version.json new file mode 100644 index 0000000..25a2812 --- /dev/null +++ b/skills/filesystem-operation/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for local filesystem operations", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。", + "name": "filesystem-operation", + "tools": ["read_file", "write_file", "patch_file", "search_files", "list_directory"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "filesystem-operation", + "summary": "Filesystem Operation — 本地文件系统操作工具集", + "summary_hash": "placeholder", + "tool_hints": ["read_file", "write_file", "patch_file", "search_files", "list_directory"], + "version": "v0001" +} \ No newline at end of file diff --git a/skills/memory-management/current.json b/skills/memory-management/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/memory-management/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/memory-management/skill.json b/skills/memory-management/skill.json new file mode 100644 index 0000000..aed0db8 --- /dev/null +++ b/skills/memory-management/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。", + "display_name": "memory-management", + "lineage": [], + "name": "memory-management", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["memory", "persistence", "context", "preferences"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/memory-management/versions/v0001/SKILL.md b/skills/memory-management/versions/v0001/SKILL.md new file mode 100644 index 0000000..81859e5 --- /dev/null +++ b/skills/memory-management/versions/v0001/SKILL.md @@ -0,0 +1,32 @@ +--- +name: memory-management +description: 持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。 +tools: + - memory +--- + +# Memory Management — 记忆管理 + +持久化记忆系统,保存用户角色、项目上下文、偏好反馈等跨会话信息。 + +## 工具说明 + +### memory +管理记忆条目(增删改查)。 +- `action` (str): `add` | `replace` | `remove` +- `target` (str): `user` 或 `memory` +- `content` (str | None): `add` 和 `replace` 时的新内容 +- `old_text` (str | None): `replace` 和 `remove` 时定位旧条目的唯一短文本 +- 记忆目标: + - `user`: 用户角色、职责、知识背景、稳定偏好 + - `memory`: 项目约定、环境事实、稳定工具经验 +- 支持自动保存和检索 +- 跨会话持久化 + +## 使用原则 + +1. 了解用户角色偏好后及时保存到 `user` 类型 +2. 用户明确要求记住的内容立即保存 +3. 过时的记忆及时更新或删除 +4. 不保存可以从代码/git 推导出的信息 +5. 记忆是辅助参考,当前上下文和文件状态优先级更高 diff --git a/skills/memory-management/versions/v0001/version.json b/skills/memory-management/versions/v0001/version.json new file mode 100644 index 0000000..a37a813 --- /dev/null +++ b/skills/memory-management/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for memory management", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。", + "name": "memory-management", + "tools": ["memory"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "memory-management", + "summary": "Memory Management — 持久化记忆系统,支持跨会话信息存储", + "summary_hash": "placeholder", + "tool_hints": ["memory"], + "version": "v0001" +} diff --git a/skills/officebench-mcp/current.json b/skills/officebench-mcp/current.json new file mode 100644 index 0000000..72dae86 --- /dev/null +++ b/skills/officebench-mcp/current.json @@ -0,0 +1,4 @@ +{ + "current_version": "v0001" +} + diff --git a/skills/officebench-mcp/skill.json b/skills/officebench-mcp/skill.json new file mode 100644 index 0000000..60db4ea --- /dev/null +++ b/skills/officebench-mcp/skill.json @@ -0,0 +1,21 @@ +{ + "created_at": "2026-05-27T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files.", + "display_name": "officebench-mcp", + "lineage": [], + "name": "officebench-mcp", + "owners": [ + "system" + ], + "source_kind": "workspace", + "status": "active", + "tags": [ + "officebench", + "mcp", + "evaluation", + "office" + ], + "updated_at": "2026-05-27T00:00:00.000000+00:00" +} + diff --git a/skills/officebench-mcp/versions/v0001/SKILL.md b/skills/officebench-mcp/versions/v0001/SKILL.md new file mode 100644 index 0000000..d8286ca --- /dev/null +++ b/skills/officebench-mcp/versions/v0001/SKILL.md @@ -0,0 +1,190 @@ +--- +name: officebench-mcp +description: Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files. +always: true +tools: + - mcp_officebench_excel_read_file + - mcp_officebench_excel_set_cell + - mcp_officebench_excel_delete_cell + - mcp_officebench_excel_create_new_file + - mcp_officebench_excel_convert_to_pdf + - mcp_officebench_word_read_file + - mcp_officebench_word_write_to_file + - mcp_officebench_word_create_new_file + - mcp_officebench_word_convert_to_pdf + - mcp_officebench_email_list_emails + - mcp_officebench_email_read_email + - mcp_officebench_email_send_email + - mcp_officebench_calendar_create_event + - mcp_officebench_calendar_list_events + - mcp_officebench_calendar_delete_event + - mcp_officebench_pdf_read_file + - mcp_officebench_pdf_convert_to_word + - mcp_officebench_pdf_convert_to_image + - mcp_officebench_ocr_recognize_file + - mcp_officebench_shell_command + - mcp_officebench_shell_list_directory + - mcp_officebench_shell_read_file + - mcp_officebench_shell_write_file + - mcp_officebench_shell_copy_file + - mcp_officebench_system_finish_task + - mcp_officebench_system_get_status + - mcp_officebench_image_convert_to_pdf +--- + +# OfficeBench MCP Skill + +Use this skill for OfficeBench evaluation runs. OfficeBench task files live in the OfficeBench MCP server, not in Beaver's local filesystem. Complete the task by calling real `mcp_officebench_*` tools. + +## Critical Rules + +1. Use actual Beaver tool calls only. Do not print XML, DSML, JSON, or markdown that describes a tool call. +2. Never invent tool names. If you need to find files, use `mcp_officebench_shell_list_directory` or `mcp_officebench_shell_command`. +3. Do not use Beaver local filesystem, local runtime, local terminal, or local code tools for OfficeBench files. +4. Paths are relative to `/testbed` in the OfficeBench MCP container, such as `data/score.xlsx`. +5. If the task context gives a `workspace_id`, pass that same `workspace_id` argument in every OfficeBench MCP tool call that supports it. +6. Inspect files before editing them. +7. Verify the requested output file or edited cell exists before finishing. +8. Finish every task with `mcp_officebench_system_finish_task`. + +## Tool Names And Use + +### Excel + +Use these for `.xlsx` files: + +- `mcp_officebench_excel_read_file`: read workbook sheets and cell values. + - Required: `file_path` + - Optional: `sheet_name`, `workspace_id` +- `mcp_officebench_excel_set_cell`: write one cell. + - Required: `file_path`, `row`, `col`, `value` + - Optional: `sheet_name`, `workspace_id` + - Rows and columns are 1-based. +- `mcp_officebench_excel_delete_cell`: clear one cell. + - Required: `file_path`, `row`, `col` + - Optional: `sheet_name`, `workspace_id` +- `mcp_officebench_excel_create_new_file`: create a workbook. + - Required: `file_path` + - Optional: `workspace_id` +- `mcp_officebench_excel_convert_to_pdf`: convert an Excel file to PDF. + - Required: `file_path` + - Optional: `workspace_id` + +Typical Excel sequence: + +1. Call `mcp_officebench_shell_list_directory` on `data`. +2. Call `mcp_officebench_excel_read_file` on the target workbook. +3. Identify the exact row and column. +4. Call `mcp_officebench_excel_set_cell`. +5. Read the workbook again or use status/listing to verify. +6. Call `mcp_officebench_system_finish_task`. + +For the common task "change Bob's midterm1 score to 100 in score.xlsx", inspect `data/score.xlsx`, find Bob's row and the `midterm1` column, then call `mcp_officebench_excel_set_cell` with that row, that column, and value `100`. + +### Word + +Use these for `.docx` files: + +- `mcp_officebench_word_read_file`: read all paragraphs. + - Required: `file_path` + - Optional: `workspace_id` +- `mcp_officebench_word_write_to_file`: overwrite or append text. + - Required: `file_path`, `text` + - Optional: `append`, `workspace_id` +- `mcp_officebench_word_create_new_file`: create a new Word document. + - Required: `file_path` + - Optional: `workspace_id` +- `mcp_officebench_word_convert_to_pdf`: convert Word to PDF. + - Required: `file_path` + - Optional: `workspace_id` + +Preserve exact spelling, capitalization, punctuation, and line order from source files. + +### Email + +Use these for email tasks: + +- `mcp_officebench_email_list_emails`: list available `.eml` messages. + - Optional: `folder`, `workspace_id` +- `mcp_officebench_email_read_email`: read one email. + - Required: `email_path` + - Optional: `workspace_id` +- `mcp_officebench_email_send_email`: create/send an email artifact. + - Required: `to`, `subject`, `body` + - Optional: `attachments`, `workspace_id` + +For email-search tasks, final answers should use plain text with literal lines like `Subject: ...`. Do not add markdown labels. + +### Calendar + +Use these for calendar `.ics` tasks: + +- `mcp_officebench_calendar_list_events`: inspect calendar events. + - Optional: `calendar_path`, `workspace_id` +- `mcp_officebench_calendar_create_event`: create an event. + - Required fields depend on the task; include summary/title, start, end, and target calendar when needed. + - Optional: `workspace_id` +- `mcp_officebench_calendar_delete_event`: delete an event. + - Required fields depend on the task; inspect events first. + - Optional: `workspace_id` + +Use the task's current date/time context when interpreting relative dates. + +### PDF, OCR, And Images + +Use these for PDF/image tasks: + +- `mcp_officebench_pdf_read_file`: extract text from a PDF. + - Required: `pdf_file_path` + - Optional: `workspace_id` +- `mcp_officebench_pdf_convert_to_word`: convert PDF to Word. + - Required: `pdf_file_path` + - Optional: `workspace_id` +- `mcp_officebench_pdf_convert_to_image`: convert one PDF page to an image. + - Required: `pdf_file_path` + - Optional: `page_number`, `dpi`, `workspace_id` +- `mcp_officebench_ocr_recognize_file`: OCR an image. + - Required: `image_path` + - Optional: `language`, `workspace_id` +- `mcp_officebench_image_convert_to_pdf`: convert image to PDF. + - Required: `image_path` + - Optional: `output_path`, `workspace_id` + +For conversion tasks, create the exact requested filename and verify it exists. + +### Shell And System + +Use these for safe file discovery and text files: + +- `mcp_officebench_shell_list_directory`: list a directory. + - Optional: `path`, `workspace_id` +- `mcp_officebench_shell_read_file`: read text files such as `.txt`, `.csv`, `.json`, `.md`, `.xml`. + - Required: `file_path` + - Optional: `workspace_id` +- `mcp_officebench_shell_write_file`: write text files. + - Required: `file_path`, `content` + - Optional: `append`, `workspace_id` +- `mcp_officebench_shell_copy_file`: copy a file or directory. + - Required: `source`, `destination` + - Optional: `workspace_id` +- `mcp_officebench_shell_command`: run shell commands inside the OfficeBench MCP container. + - Required: `command` + - Optional: `workdir`, `workspace_id` +- `mcp_officebench_system_get_status`: inspect filesystem/git status. + - Optional: `workspace_id` +- `mcp_officebench_system_finish_task`: mark the task complete and optionally write an answer. + - Optional: `answer`, `workspace_id` + +Prefer dedicated Office tools for Office documents. Use shell tools for listing directories, copying/renaming files, and reading/writing plain text. + +## Anti-Patterns + +Do not do any of the following: + +- Do not call `mcp_officebench_find_in_workspace`; that tool does not exist. +- Do not output ``, ``, DSML, or pseudo tool call text. +- Do not answer "done" without calling the required OfficeBench tools. +- Do not edit guessed paths without first listing or reading relevant files. +- Do not use `/testbed` as a literal prefix in path arguments unless a tool explicitly asks for an absolute path. +- Do not correct misspellings found in source data. Preserve source text exactly. + diff --git a/skills/officebench-mcp/versions/v0001/version.json b/skills/officebench-mcp/versions/v0001/version.json new file mode 100644 index 0000000..79e214a --- /dev/null +++ b/skills/officebench-mcp/versions/v0001/version.json @@ -0,0 +1,80 @@ +{ + "change_reason": "Initial OfficeBench MCP skill for evaluation runs", + "content_hash": "6afdd5a93ce552f39c1e285fc552059cfada7971e0d5bb91bcd56c6ca608ba17", + "created_at": "2026-05-27T00:00:00.000000+00:00", + "created_by": "codex", + "frontmatter": { + "always": true, + "description": "Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files.", + "name": "officebench-mcp", + "tools": [ + "mcp_officebench_excel_read_file", + "mcp_officebench_excel_set_cell", + "mcp_officebench_excel_delete_cell", + "mcp_officebench_excel_create_new_file", + "mcp_officebench_excel_convert_to_pdf", + "mcp_officebench_word_read_file", + "mcp_officebench_word_write_to_file", + "mcp_officebench_word_create_new_file", + "mcp_officebench_word_convert_to_pdf", + "mcp_officebench_email_list_emails", + "mcp_officebench_email_read_email", + "mcp_officebench_email_send_email", + "mcp_officebench_calendar_create_event", + "mcp_officebench_calendar_list_events", + "mcp_officebench_calendar_delete_event", + "mcp_officebench_pdf_read_file", + "mcp_officebench_pdf_convert_to_word", + "mcp_officebench_pdf_convert_to_image", + "mcp_officebench_ocr_recognize_file", + "mcp_officebench_shell_command", + "mcp_officebench_shell_list_directory", + "mcp_officebench_shell_read_file", + "mcp_officebench_shell_write_file", + "mcp_officebench_shell_copy_file", + "mcp_officebench_system_finish_task", + "mcp_officebench_system_get_status", + "mcp_officebench_image_convert_to_pdf" + ] + }, + "parent_version": null, + "provenance": { + "source": "officebench_mcp", + "source_kind": "workspace" + }, + "review_state": "published", + "skill_name": "officebench-mcp", + "summary": "OfficeBench MCP skill for using registered mcp_officebench_* tools correctly during evaluation runs.", + "summary_hash": "914d6759650fce29884f648b84929e0482475c3ccd6601e9903c9b8b826dd874", + "tool_hints": [ + "mcp_officebench_excel_read_file", + "mcp_officebench_excel_set_cell", + "mcp_officebench_excel_delete_cell", + "mcp_officebench_excel_create_new_file", + "mcp_officebench_excel_convert_to_pdf", + "mcp_officebench_word_read_file", + "mcp_officebench_word_write_to_file", + "mcp_officebench_word_create_new_file", + "mcp_officebench_word_convert_to_pdf", + "mcp_officebench_email_list_emails", + "mcp_officebench_email_read_email", + "mcp_officebench_email_send_email", + "mcp_officebench_calendar_create_event", + "mcp_officebench_calendar_list_events", + "mcp_officebench_calendar_delete_event", + "mcp_officebench_pdf_read_file", + "mcp_officebench_pdf_convert_to_word", + "mcp_officebench_pdf_convert_to_image", + "mcp_officebench_ocr_recognize_file", + "mcp_officebench_shell_command", + "mcp_officebench_shell_list_directory", + "mcp_officebench_shell_read_file", + "mcp_officebench_shell_write_file", + "mcp_officebench_shell_copy_file", + "mcp_officebench_system_finish_task", + "mcp_officebench_system_get_status", + "mcp_officebench_image_convert_to_pdf" + ], + "version": "v0001" +} + diff --git a/skills/outlook-mail/current.json b/skills/outlook-mail/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/outlook-mail/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/outlook-mail/skill.json b/skills/outlook-mail/skill.json new file mode 100644 index 0000000..eb5ad4c --- /dev/null +++ b/skills/outlook-mail/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。", + "display_name": "outlook-mail", + "lineage": [], + "name": "outlook-mail", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["outlook", "email", "calendar", "mcp", "microsoft"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/outlook-mail/versions/v0001/SKILL.md b/skills/outlook-mail/versions/v0001/SKILL.md new file mode 100644 index 0000000..f349f2d --- /dev/null +++ b/skills/outlook-mail/versions/v0001/SKILL.md @@ -0,0 +1,150 @@ +--- +name: outlook-mail +description: 通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。 +tools: + - mcp_outlook_mcp_mail_list_folders + - mcp_outlook_mcp_mail_list_messages + - mcp_outlook_mcp_mail_search_messages + - mcp_outlook_mcp_mail_get_message + - mcp_outlook_mcp_mail_send_email + - mcp_outlook_mcp_mail_reply_to_message + - mcp_outlook_mcp_mail_forward_message + - mcp_outlook_mcp_mail_move_message + - mcp_outlook_mcp_mail_delta_sync + - mcp_outlook_mcp_calendar_list_events + - mcp_outlook_mcp_calendar_create_event + - mcp_outlook_mcp_calendar_update_event + - mcp_outlook_mcp_calendar_get_schedule + - mcp_outlook_mcp_calendar_find_meeting_times + - mcp_outlook_mcp_calendar_delta_sync +--- + +# Outlook MCP — 邮件与日历管理 + +通过 MCP server 连接 Outlook(Microsoft Graph / on-prem Exchange),提供邮件和日历的完整操作能力。 + +## 邮件工具 + +### mcp_outlook_mcp_mail_list_folders +列出 Outlook 邮件文件夹。 +- `top` (int, 默认 50): 返回数量上限 + +### mcp_outlook_mcp_mail_list_messages +列出指定文件夹的邮件。 +- `folder` (str, 默认 "inbox"): 文件夹名 +- `top` (int, 默认 20): 返回条数 +- `skip` (int, 默认 0): 跳过的条数 +- `unread_only` (bool, 默认 false): 仅未读 + +### mcp_outlook_mcp_mail_search_messages +搜索邮件(使用 Graph search 语义)。 +- `query` (str): 搜索关键词 +- `folder` (str | None): 限定文件夹 +- `top` (int, 默认 20): 返回条数 + +### mcp_outlook_mcp_mail_get_message +读取单封邮件的完整内容。 +- `message_id` (str): 邮件 ID +- `changekey` (str | None): EWS changekey(on-prem 需要) + +### mcp_outlook_mcp_mail_send_email +发送新邮件。**幂等操作**,支持 idempotency_key。 +- `subject` (str): 主题 +- `body` (str): 正文(支持 HTML) +- `to_recipients` (list[str]): 收件人 +- `cc_recipients` (list[str] | None): 抄送 +- `bcc_recipients` (list[str] | None): 密送 +- `idempotency_key` (str | None): 幂等键,防止重复发送 + +### mcp_outlook_mcp_mail_reply_to_message +回复一封邮件。 +- `message_id` (str): 原邮件 ID +- `comment` (str): 回复内容 +- `changekey` (str | None): EWS changekey +- `idempotency_key` (str | None) + +### mcp_outlook_mcp_mail_forward_message +转发邮件给其他人。 +- `message_id` (str): 原邮件 ID +- `to_recipients` (list[str]): 转发目标 +- `comment` (str): 附加说明 +- `cc_recipients` (list[str] | None) +- `changekey` (str | None) +- `idempotency_key` (str | None) + +### mcp_outlook_mcp_mail_move_message +移动邮件到其他文件夹。 +- `message_id` (str): 邮件 ID +- `destination_folder` (str): 目标文件夹 +- `changekey` (str | None) +- `idempotency_key` (str | None) + +### mcp_outlook_mcp_mail_delta_sync +增量同步邮件变更。支持游标持久化,适合长期同步场景。 +- `folder` (str, 默认 "inbox"): 文件夹 +- `delta_link` (str | None): 增量链接(续传时提供) +- `top` (int, 默认 50) +- `persist_cursor` (bool, 默认 true): 是否持久化游标 + +## 日历工具 + +### mcp_outlook_mcp_calendar_list_events +列出日历事件或日历视图。 +- `start_time` (str | None): ISO 开始时间,与 end_time 成对提供 +- `end_time` (str | None): ISO 结束时间 +- `top` (int, 默认 20) +- `skip` (int, 默认 0) + +### mcp_outlook_mcp_calendar_create_event +创建日历事件或正式会议邀请。**幂等操作**。 +- `subject` (str): 主题 +- `start_time` (str): ISO 开始时间 +- `end_time` (str): ISO 结束时间 +- `timezone` (str, 默认 "UTC"): 时区 +- `body` (str | None): 正文 +- `location` (str | None): 地点 +- `attendees` (list[str] | None): 参会人 +- `is_online_meeting` (bool, 默认 false): 是否创建 Teams 会议 +- `online_meeting_provider` (str, 默认 "teamsForBusiness"): 在线会议提供商 +- `transaction_id` (str | None): 事务 ID +- `idempotency_key` (str | None) + +### mcp_outlook_mcp_calendar_update_event +更新已有日历事件。 +- `event_id` (str): 事件 ID +- `subject` / `start_time` / `end_time` / `timezone` / `body` / `location` / `attendees`: 可选更新字段 +- `idempotency_key` (str | None) + +### mcp_outlook_mcp_calendar_get_schedule +查询与会人忙闲状态。 +- `schedules` (list[str]): 要查询的人员列表 +- `start_time` (str): ISO 开始 +- `end_time` (str): ISO 结束 +- `availability_view_interval` (int, 默认 30): 时间间隔(分钟) +- `timezone` (str, 默认 "UTC") + +### mcp_outlook_mcp_calendar_find_meeting_times +推荐最佳会议时间。 +- `attendees` (list[str]): 参会人 +- `start_time` (str): 时间范围开始 +- `end_time` (str): 时间范围结束 +- `duration_minutes` (int, 默认 30): 会议时长 +- `timezone` (str, 默认 "UTC") +- `max_candidates` (int, 默认 10): 候选数 + +### mcp_outlook_mcp_calendar_delta_sync +增量同步日历事件变更。 +- `start_time` (str): 同步窗口开始 +- `end_time` (str): 同步窗口结束 +- `delta_link` (str | None): 增量续传链接 +- `top` (int, 默认 50) +- `persist_cursor` (bool, 默认 true) +- `cursor_key` (str, 默认 "calendar:primary") + +## 使用原则 + +1. 邮件操作优先使用幂等键(idempotency_key)防止重复发送 +2. 日历时间参数统一使用 ISO 8601 格式 +3. 增量同步时优先使用返回的 delta_link 续传,避免全量拉取 +4. 发送邮件前确认收件人地址格式正确 +5. 创建会议时明确时区,避免跨时区混淆 diff --git a/skills/outlook-mail/versions/v0001/version.json b/skills/outlook-mail/versions/v0001/version.json new file mode 100644 index 0000000..abfcf68 --- /dev/null +++ b/skills/outlook-mail/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for Outlook MCP mail and calendar operations", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。", + "name": "outlook-mail", + "tools": ["mcp_outlook_mcp_mail_list_folders", "mcp_outlook_mcp_mail_list_messages", "mcp_outlook_mcp_mail_search_messages", "mcp_outlook_mcp_mail_get_message", "mcp_outlook_mcp_mail_send_email", "mcp_outlook_mcp_mail_reply_to_message", "mcp_outlook_mcp_mail_forward_message", "mcp_outlook_mcp_mail_move_message", "mcp_outlook_mcp_mail_delta_sync", "mcp_outlook_mcp_calendar_list_events", "mcp_outlook_mcp_calendar_create_event", "mcp_outlook_mcp_calendar_update_event", "mcp_outlook_mcp_calendar_get_schedule", "mcp_outlook_mcp_calendar_find_meeting_times", "mcp_outlook_mcp_calendar_delta_sync"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "outlook-mail", + "summary": "Outlook MCP — 邮件与日历管理。通过 MCP server 连接 Outlook,提供邮件和日历的完整操作能力。", + "summary_hash": "placeholder", + "tool_hints": ["mcp_outlook_mcp_mail_list_folders", "mcp_outlook_mcp_mail_list_messages", "mcp_outlook_mcp_mail_search_messages", "mcp_outlook_mcp_mail_get_message", "mcp_outlook_mcp_mail_send_email", "mcp_outlook_mcp_mail_reply_to_message", "mcp_outlook_mcp_mail_forward_message", "mcp_outlook_mcp_mail_move_message", "mcp_outlook_mcp_mail_delta_sync", "mcp_outlook_mcp_calendar_list_events", "mcp_outlook_mcp_calendar_create_event", "mcp_outlook_mcp_calendar_update_event", "mcp_outlook_mcp_calendar_get_schedule", "mcp_outlook_mcp_calendar_find_meeting_times", "mcp_outlook_mcp_calendar_delta_sync"], + "version": "v0001" +} diff --git a/skills/skills-admin/current.json b/skills/skills-admin/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/skills-admin/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/skills-admin/skill.json b/skills/skills-admin/skill.json new file mode 100644 index 0000000..02477c7 --- /dev/null +++ b/skills/skills-admin/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "技能(Skill)列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。", + "display_name": "skills-admin", + "lineage": [], + "name": "skills-admin", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["skills", "admin", "management", "draft"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/skills-admin/versions/v0001/SKILL.md b/skills/skills-admin/versions/v0001/SKILL.md new file mode 100644 index 0000000..dbbfbba --- /dev/null +++ b/skills/skills-admin/versions/v0001/SKILL.md @@ -0,0 +1,42 @@ +--- +name: skills-admin +description: 技能(Skill)列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。 +tools: + - skills_list + - skill_manage + - skill_view +--- + +# Skills Admin — 技能管理 + +查看已发布的技能列表、加载技能详情和创建新技能草稿。 + +## 工具说明 + +### skills_list +列出系统中所有可用技能及其描述。 +- 返回技能名称、描述和版本 +- 用于浏览当前可用的技能 + +### skill_view +加载某个技能的完整正文或支持文件。 +- `name` (str): 技能名称 +- `file_path` (str | None): 可选的支持文件路径 +- 不传文件路径时返回 SKILL.md 主内容 +- 支持按需加载 references/、templates/ 等目录 + +### skill_manage +创建新技能草稿(draft)。 +- `action` (str): 仅支持 "create_draft" +- `name` (str): 技能名称 +- `description` (str): 技能描述 +- `content` (str): 技能正文(SKILL.md 格式) +- 创建的草稿需经过 review → publish 流程 + +## 使用原则 + +1. 需要参考某个技能的详细内容时,先 `skills_list` 找到名称,再用 `skill_view` 加载 +2. 创建新技能时先写清楚 description,便于后续被 selector 选中 +3. 技能正文使用标准 frontmatter + Markdown 格式 +4. 支持文件放在 skill 目录的 references/、templates/、scripts/ 等子目录 +5. Draft 创建后需要走 review/publish 流程才能生效 diff --git a/skills/skills-admin/versions/v0001/version.json b/skills/skills-admin/versions/v0001/version.json new file mode 100644 index 0000000..3004c67 --- /dev/null +++ b/skills/skills-admin/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for skills management", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "技能(Skill)列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。", + "name": "skills-admin", + "tools": ["skills_list", "skill_manage", "skill_view"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "skills-admin", + "summary": "Skills Admin — 技能列表查看、内容加载和草稿管理", + "summary_hash": "placeholder", + "tool_hints": ["skills_list", "skill_manage", "skill_view"], + "version": "v0001" +} \ No newline at end of file diff --git a/skills/terminal-operation/current.json b/skills/terminal-operation/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/terminal-operation/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/terminal-operation/skill.json b/skills/terminal-operation/skill.json new file mode 100644 index 0000000..79aea5f --- /dev/null +++ b/skills/terminal-operation/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。", + "display_name": "terminal-operation", + "lineage": [], + "name": "terminal-operation", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["terminal", "shell", "command", "process", "execution"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/terminal-operation/versions/v0001/SKILL.md b/skills/terminal-operation/versions/v0001/SKILL.md new file mode 100644 index 0000000..a8104d6 --- /dev/null +++ b/skills/terminal-operation/versions/v0001/SKILL.md @@ -0,0 +1,46 @@ +--- +name: terminal-operation +description: Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。 +tools: + - terminal + - process + - execute_code +--- + +# Terminal Operation — 终端与进程管理 + +Shell 命令执行、后台进程管理和 Python 代码执行工具集。 + +## 工具说明 + +### terminal +执行 shell 命令。 +- `command` (str): 要执行的命令 +- `working_dir` (str, 默认 "."): 工作目录 +- `timeout` (int, 默认 60): 超时秒数(最大 600) +- `background` (bool, 默认 false): 是否后台运行 +- 后台运行时返回 process_id,可通过 process 工具管理 + +### process +管理后台进程。 +- `action` (str): `list` | `log` | `kill` +- `process_id` (str | None): 进程 ID +- `list`: 列出所有后台进程 +- `log`: 查看进程日志(最后 12000 字节) +- `kill`: 终止进程(先 SIGTERM,5 秒后 SIGKILL) + +### execute_code +执行 Python 代码片段。 +- `code` (str): Python 代码 +- `language` (str, 默认 "python"): 仅支持 python +- `timeout` (int, 默认 30, 最大 120): 执行超时 +- `working_dir` (str, 默认 "."): 工作目录 +- 适合快速验证脚本逻辑,不适合长期运行任务 + +## 使用原则 + +1. 长期运行任务使用 `background=true` +2. 执行危险命令(rm -rf、dd、格式化等)前必须确认用户意图 +3. `execute_code` 适合轻量脚本验证,重型任务用 `terminal` +4. 后台进程用完后及时 kill 清理 +5. 注意命令注入风险,不要直接拼接用户输入 diff --git a/skills/terminal-operation/versions/v0001/version.json b/skills/terminal-operation/versions/v0001/version.json new file mode 100644 index 0000000..a61bb76 --- /dev/null +++ b/skills/terminal-operation/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for terminal and process management", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。", + "name": "terminal-operation", + "tools": ["terminal", "process", "execute_code"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "terminal-operation", + "summary": "Terminal Operation — Shell 命令执行、后台进程管理、Python 代码执行", + "summary_hash": "placeholder", + "tool_hints": ["terminal", "process", "execute_code"], + "version": "v0001" +} \ No newline at end of file diff --git a/skills/utility-tools/current.json b/skills/utility-tools/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/utility-tools/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/utility-tools/skill.json b/skills/utility-tools/skill.json new file mode 100644 index 0000000..d1687c7 --- /dev/null +++ b/skills/utility-tools/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "辅助工具集,包括任务分解(Todo)、任务委托(Delegate)、子 Agent 生成(Spawn)、消息发送和需求澄清(Clarify)。", + "display_name": "utility-tools", + "lineage": [], + "name": "utility-tools", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["utility", "delegate", "todo", "spawn", "clarify"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/utility-tools/versions/v0001/SKILL.md b/skills/utility-tools/versions/v0001/SKILL.md new file mode 100644 index 0000000..70aa0d0 --- /dev/null +++ b/skills/utility-tools/versions/v0001/SKILL.md @@ -0,0 +1,52 @@ +--- +name: utility-tools +description: 辅助工具集,包括任务分解(Todo)、任务委托(Delegate)、子 Agent 生成(Spawn)、消息发送和需求澄清。 +tools: + - clarify + - delegate + - send_message + - spawn + - todo +--- + +# Utility Tools — 辅助工具集 + +任务管理、委托和协作的辅助工具。 + +## 工具说明 + +### todo (TodoWrite) +创建和管理任务列表,跟踪复杂任务的进度。 +- 适合多步骤、复杂任务时使用 +- 标记当前正在进行的任务 +- 完成后立即更新状态 + +### delegate (DelegateTool) +将任务委托给专门的子 Agent 执行。 +- 适合独立、可并行的工作 +- 委托时提供清晰的上下文和目标 +- 子 Agent 完成后再整合结果 + +### spawn (SpawnTool) +启动新的 Agent 实例执行特定任务。 +- 适合需要独立运行的工作 +- 支持后台运行(不阻塞主流程) + +### send_message (SendMessageTool) +与其他 Agent 或团队成员通信。 +- 适合多 Agent 协作场景 +- 消息会直接送达目标 + +### clarify (ClarifyTool) +当需求不明确时向用户提问澄清。 +- 提供 2-4 个选项供用户选择 +- 附带推荐选项和理由 +- 避免模糊提问,给出明确建议 + +## 使用原则 + +1. 复杂任务先创建 Todo 列表,明确步骤 +2. 可并行的工作使用 Delegate/Spawn 分散执行 +3. 需求不明确时主动 Clarify,不要猜测 +4. 多 Agent 协作时保持通信简洁 +5. 记得到 todo list 更新进度 diff --git a/skills/utility-tools/versions/v0001/version.json b/skills/utility-tools/versions/v0001/version.json new file mode 100644 index 0000000..341fe8c --- /dev/null +++ b/skills/utility-tools/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for utility and delegation tools", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "辅助工具集,包括任务分解(Todo)、任务委托(Delegate)、子 Agent 生成(Spawn)、消息发送和需求澄清。", + "name": "utility-tools", + "tools": ["clarify", "delegate", "send_message", "spawn", "todo"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "utility-tools", + "summary": "Utility Tools — 任务管理、委托和协作辅助工具集", + "summary_hash": "placeholder", + "tool_hints": ["clarify", "delegate", "send_message", "spawn", "todo"], + "version": "v0001" +} \ No newline at end of file diff --git a/skills/web-operation/current.json b/skills/web-operation/current.json new file mode 100644 index 0000000..0c6a67a --- /dev/null +++ b/skills/web-operation/current.json @@ -0,0 +1,3 @@ +{ + "current_version": "v0001" +} \ No newline at end of file diff --git a/skills/web-operation/skill.json b/skills/web-operation/skill.json new file mode 100644 index 0000000..ef342ac --- /dev/null +++ b/skills/web-operation/skill.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-05-26T00:00:00.000000+00:00", + "current_version": "v0001", + "description": "网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。", + "display_name": "web-operation", + "lineage": [], + "name": "web-operation", + "owners": ["system"], + "source_kind": "initial", + "status": "active", + "tags": ["web", "search", "fetch", "crawl"], + "updated_at": "2026-05-26T00:00:00.000000+00:00" +} \ No newline at end of file diff --git a/skills/web-operation/versions/v0001/SKILL.md b/skills/web-operation/versions/v0001/SKILL.md new file mode 100644 index 0000000..80ea3f0 --- /dev/null +++ b/skills/web-operation/versions/v0001/SKILL.md @@ -0,0 +1,36 @@ +--- +name: web-operation +description: 网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。 +tools: + - web_fetch + - web_search +--- + +# Web Operation — 网络抓取与搜索 + +网页抓取和网络搜索工具集。 + +## 工具说明 + +### web_fetch +获取指定 URL 的网页内容并转换为 Markdown。 +- 支持 HTML → Markdown 自动转换 +- 可使用 prompt 参数提取特定信息 +- 结果由 AI 总结后返回 +- HTTP URL 自动升级为 HTTPS +- 含 15 分钟缓存 + +### web_search +搜索引擎查询,获取最新网络信息。 +- 支持 domain 过滤(include/block) +- 搜索当前日期的信息使用正确年份 +- 返回结果包含 URL 链接 + +## 使用原则 + +1. 优先使用 `web_search` 搜索信息,再用 `web_fetch` 深入阅读 +2. 获取动态/需要认证的页面可能失败,此时尝试简化请求或换源 +3. 抓取 API 文档时注意区分 REST API 和 GraphQL +4. 搜索结果必须标注来源链接 +5. 避免短时间内大量请求同一站点(限频) +6. 不抓取需要登录认证的私密页面 diff --git a/skills/web-operation/versions/v0001/version.json b/skills/web-operation/versions/v0001/version.json new file mode 100644 index 0000000..0139a87 --- /dev/null +++ b/skills/web-operation/versions/v0001/version.json @@ -0,0 +1,22 @@ +{ + "change_reason": "Initial skill for web fetching and searching", + "content_hash": "placeholder", + "created_at": "2026-05-26T00:00:00.000000+00:00", + "created_by": "system", + "frontmatter": { + "description": "网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。", + "name": "web-operation", + "tools": ["web_fetch", "web_search"] + }, + "parent_version": null, + "provenance": { + "source": "initial_skills", + "source_kind": "initial" + }, + "review_state": "published", + "skill_name": "web-operation", + "summary": "Web Operation — 网页抓取与网络搜索工具集", + "summary_hash": "placeholder", + "tool_hints": ["web_fetch", "web_search"], + "version": "v0001" +} \ No newline at end of file