feat(engine): 添加技能查看工具并优化异步任务管理

- 添加SkillViewTool到引擎加载器中,增强技能管理功能
- 在AgentLoop中引入_active_direct_task来跟踪活跃任务
- 实现直接任务执行时的同步处理逻辑
- 更新工具实例化方式以支持依赖注入

feat(config): 增加智能体运行时参数配置支持

- 扩展AgentDefaultsConfig添加max_tokens和temperature字段
- 实现配置解析函数_first_config_value处理多个配置源
- 支持通过Web API动态更新智能体运行时参数
- 添加前端页面配置表单和验证逻辑

refactor(provider): 统一最大令牌数参数类型为可选整型

- 将所有LLM提供者的max_tokens参数改为int | None类型
- 为AnthropicProvider实现模型特定的最大令牌数默认值
- 调整参数传递逻辑,优先级:调用参数 > 配置文件 > 模型默认值
- 移除硬编码的默认值,改用条件判断

feat(process): 增强事件投影功能

- 添加工具调用开始/结束事件的映射逻辑
- 实现技能激活事件的识别和展示
- 添加辅助函数处理工具调用名称和参数提取
- 优化运行记录关联逻辑,提升事件匹配准确性

fix(web): 更新网络请求客户端信任环境设置

- 将WebFetchTool和WebSearchTool的trust_env参数设为True
- 确保HTTP客户端能够正确使用系统代理配置
- 修复可能的网络连接问题

test: 添加配置加载和事件投影相关测试

- 新增智能体默认参数配置测试用例
- 实现API配置持久化和重载测试
- 添加技能卡片和工具事件的投影测试
```
This commit is contained in:
2026-05-27 13:37:06 +08:00
parent 55b39563a0
commit 33a9845566
75 changed files with 2599 additions and 114 deletions

View File

@ -0,0 +1,145 @@
{
"agents": [
{
"agent_id": "researcher",
"capabilities": [
"research",
"analysis",
"source review",
"requirements"
],
"created_at": "2026-05-27T05:25:11.756341+00:00",
"description": "Finds facts, references, constraints, and implementation options.",
"display_name": "Researcher",
"metadata": {},
"model": null,
"name": "researcher",
"priority": 50,
"provider_name": null,
"role": "research",
"skill_names": [],
"source": "builtin",
"status": "active",
"system_prompt": "You are a research specialist. Gather concise evidence and tradeoffs for the parent task.",
"tags": [
"planning",
"research"
],
"tool_hints": [],
"updated_at": "2026-05-27T05:25:11.756349+00:00"
},
{
"agent_id": "implementer",
"capabilities": [
"implementation",
"coding",
"refactor",
"integration"
],
"created_at": "2026-05-27T05:25:11.756351+00:00",
"description": "Builds scoped implementation slices and proposes concrete changes.",
"display_name": "Implementer",
"metadata": {},
"model": null,
"name": "implementer",
"priority": 45,
"provider_name": null,
"role": "implementation",
"skill_names": [],
"source": "builtin",
"status": "active",
"system_prompt": "You are an implementation specialist. Produce practical, scoped implementation output.",
"tags": [
"coding",
"build"
],
"tool_hints": [],
"updated_at": "2026-05-27T05:25:11.756353+00:00"
},
{
"agent_id": "reviewer",
"capabilities": [
"review",
"quality",
"risk",
"verification"
],
"created_at": "2026-05-27T05:25:11.756355+00:00",
"description": "Reviews plans, code, outputs, and risks before final synthesis.",
"display_name": "Reviewer",
"metadata": {},
"model": null,
"name": "reviewer",
"priority": 45,
"provider_name": null,
"role": "review",
"skill_names": [],
"source": "builtin",
"status": "active",
"system_prompt": "You are a review specialist. Focus on defects, missing requirements, and risks.",
"tags": [
"review",
"quality"
],
"tool_hints": [],
"updated_at": "2026-05-27T05:25:11.756356+00:00"
},
{
"agent_id": "tester",
"capabilities": [
"testing",
"verification",
"regression",
"qa"
],
"created_at": "2026-05-27T05:25:11.756358+00:00",
"description": "Designs and executes verification checks for task outputs.",
"display_name": "Tester",
"metadata": {},
"model": null,
"name": "tester",
"priority": 40,
"provider_name": null,
"role": "testing",
"skill_names": [],
"source": "builtin",
"status": "active",
"system_prompt": "You are a testing specialist. Identify focused checks and report pass/fail evidence.",
"tags": [
"test",
"quality"
],
"tool_hints": [],
"updated_at": "2026-05-27T05:25:11.756358+00:00"
},
{
"agent_id": "documenter",
"capabilities": [
"documentation",
"explanation",
"migration notes",
"release notes"
],
"created_at": "2026-05-27T05:25:11.756360+00:00",
"description": "Writes and reconciles user-facing and internal documentation updates.",
"display_name": "Documenter",
"metadata": {},
"model": null,
"name": "documenter",
"priority": 35,
"provider_name": null,
"role": "documentation",
"skill_names": [],
"source": "builtin",
"status": "active",
"system_prompt": "You are a documentation specialist. Produce concise docs aligned with the implementation.",
"tags": [
"docs",
"communication"
],
"tool_hints": [],
"updated_at": "2026-05-27T05:25:11.756360+00:00"
}
],
"version": 1
}

View File

@ -44,6 +44,7 @@ from beaver.tools.builtins import (
SpawnTool,
SessionSearchTool,
SkillManageTool,
SkillViewTool,
SkillsListTool,
TerminalTool,
TodoTool,
@ -220,16 +221,17 @@ class EngineLoader:
ObjectBackedTool(WriteFileTool()),
ObjectBackedTool(PatchFileTool()),
ObjectBackedTool(WebFetchTool()),
ObjectBackedTool(WebSearchTool()),
ObjectBackedTool(TerminalTool()),
ObjectBackedTool(ProcessTool()),
ObjectBackedTool(ExecuteCodeTool()),
ObjectBackedTool(TodoTool()),
ObjectBackedTool(ClarifyTool()),
ObjectBackedTool(SendMessageTool()),
ObjectBackedTool(DelegateTool()),
ObjectBackedTool(SpawnTool()),
SkillsListTool(),
ObjectBackedTool(WebSearchTool()),
ObjectBackedTool(TerminalTool()),
ObjectBackedTool(ProcessTool()),
ObjectBackedTool(ExecuteCodeTool()),
ObjectBackedTool(TodoTool()),
ObjectBackedTool(ClarifyTool()),
ObjectBackedTool(SendMessageTool()),
ObjectBackedTool(DelegateTool()),
ObjectBackedTool(SpawnTool()),
SkillsListTool(),
ObjectBackedTool(SkillViewTool(loader=skills_loader)),
SkillManageTool(),
CronTool(),
]

View File

@ -48,7 +48,7 @@ class AgentProfile:
name: str = "default"
system_prompt: str = ""
default_model: str = "gpt-4.1-mini"
max_tokens: int = 4096
max_tokens: int | None = None
max_context_messages: int = 1000
temperature: float = 0.2
max_tool_iterations: int = 30
@ -89,6 +89,7 @@ class AgentLoop:
self.loaded: EngineLoadResult | None = None
self.runtime_services: dict[str, Any] = {}
self._run_queue: asyncio.Queue[_DirectRunRequest | None] | None = None
self._active_direct_task: asyncio.Task[Any] | None = None
self._running = False
self._stop_requested = False
@ -130,6 +131,8 @@ class AgentLoop:
if item.future.cancelled():
continue
previous_direct_task = self._active_direct_task
self._active_direct_task = asyncio.current_task()
try:
result = await self._process_direct_impl(item.task, **item.kwargs)
except asyncio.CancelledError:
@ -142,6 +145,8 @@ class AgentLoop:
else:
if not item.future.done():
item.future.set_result(result)
finally:
self._active_direct_task = previous_direct_task
finally:
if self._run_queue is not None:
while True:
@ -183,6 +188,9 @@ class AgentLoop:
if self._stop_requested:
raise RuntimeError("AgentLoop.submit_direct() is not accepting new tasks after stop()")
if asyncio.current_task() is self._active_direct_task:
return await self._process_direct_impl(task, **kwargs)
future: asyncio.Future[AgentRunResult] = asyncio.get_running_loop().create_future()
await self._run_queue.put(_DirectRunRequest(task=task, kwargs=dict(kwargs), future=future))
return await future
@ -363,7 +371,7 @@ class AgentLoop:
resolved_request_timeout_seconds = configured_provider.get("request_timeout_seconds")
resolved_embedding_model = embedding_model or config.default_embedding_model
resolved_embedding_target = embedding_target or config.resolve_embedding_target()
resolved_max_tokens = max_tokens or self.profile.max_tokens
resolved_max_tokens = self.profile.max_tokens if max_tokens is None else max_tokens
resolved_temperature = self.profile.temperature if temperature is None else temperature
resolved_max_tool_iterations = (
self.profile.max_tool_iterations if max_tool_iterations is None else max_tool_iterations
@ -892,7 +900,7 @@ class AgentLoop:
provider: Any,
messages: list[dict[str, Any]],
model: str,
max_tokens: int,
max_tokens: int | None,
temperature: float,
thinking_enabled: bool | None,
) -> str:

View File

@ -43,7 +43,7 @@ class AnthropicProvider(LLMProvider):
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
@ -57,9 +57,14 @@ class AnthropicProvider(LLMProvider):
"model": model or self.default_model,
"system": system_prompt or "",
"messages": anthropic_messages,
"max_tokens": max(1, max_tokens),
"temperature": temperature,
}
resolved_max_tokens = (
_default_max_tokens_for_model(model or self.default_model)
if max_tokens is None
else max(1, max_tokens)
)
kwargs["max_tokens"] = resolved_max_tokens
if tools:
kwargs["tools"] = _convert_tools(tools)
@ -100,6 +105,17 @@ class AnthropicProvider(LLMProvider):
return self.default_model
def _default_max_tokens_for_model(model: str) -> int:
"""Return a conservative native output ceiling for Anthropic Messages."""
normalized = model.lower().replace("_", "-")
if "sonnet-4" in normalized or "opus-4" in normalized or "3-7" in normalized or "3.7" in normalized:
return 64_000
if "haiku" in normalized:
return 4_096
return 8_192
def _convert_messages(messages: list[dict[str, Any]]) -> tuple[str, list[dict[str, Any]]]:
system_prompt = ""
converted: list[dict[str, Any]] = []

View File

@ -88,7 +88,7 @@ class LLMProvider(ABC):
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:

View File

@ -56,7 +56,7 @@ class FallbackProviderChain(LLMProvider):
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
@ -115,7 +115,7 @@ class FallbackProviderChain(LLMProvider):
messages: list[dict],
tools: list[dict] | None,
model: str,
max_tokens: int,
max_tokens: int | None,
temperature: float,
thinking_enabled: bool | None,
) -> LLMResponse:

View File

@ -39,7 +39,7 @@ class OpenAICodexProvider(LLMProvider):
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:

View File

@ -47,7 +47,7 @@ class CustomProvider(LLMProvider):
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
@ -55,9 +55,10 @@ class CustomProvider(LLMProvider):
kwargs: dict[str, Any] = {
"model": model or self.default_model,
"messages": self.sanitize_empty_content(messages),
"max_tokens": max(1, max_tokens),
"temperature": temperature,
}
if max_tokens is not None:
kwargs["max_tokens"] = max(1, max_tokens)
if tools:
kwargs.update(tools=tools, tool_choice="auto")
try:

View File

@ -197,7 +197,7 @@ class LiteLLMProvider(LLMProvider):
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int = 4096,
max_tokens: int | None = None,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
@ -210,10 +210,11 @@ class LiteLLMProvider(LLMProvider):
kwargs: dict[str, Any] = {
"model": resolved_model,
"messages": sanitized_messages,
"max_tokens": max(1, max_tokens),
"temperature": temperature,
"timeout": self.request_timeout_seconds or 45.0,
}
if max_tokens is not None:
kwargs["max_tokens"] = max(1, max_tokens)
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:

View File

@ -86,18 +86,25 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
model=_string(defaults.get("model") or data.get("model")),
provider=_string(defaults.get("provider") or data.get("provider")),
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
max_tokens=_int(_first_config_value(
defaults.get("maxTokens"),
defaults.get("max_tokens"),
data.get("maxTokens"),
data.get("max_tokens"),
)),
temperature=_float(_first_config_value(defaults.get("temperature"), data.get("temperature"))),
max_context_messages=_int(
defaults.get("maxContextMessages")
or defaults.get("max_context_messages")
or data.get("maxContextMessages")
or data.get("max_context_messages")
),
max_tool_iterations=_int(
defaults.get("maxToolIterations")
or defaults.get("max_tool_iterations")
or data.get("maxToolIterations")
or data.get("max_tool_iterations")
),
max_tool_iterations=_int(_first_config_value(
defaults.get("maxToolIterations"),
defaults.get("max_tool_iterations"),
data.get("maxToolIterations"),
data.get("max_tool_iterations"),
)),
)
@ -204,6 +211,13 @@ def _as_dict(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
def _first_config_value(*values: Any) -> Any:
for value in values:
if value not in (None, ""):
return value
return None
def _string(value: Any) -> str | None:
if value is None:
return None

View File

@ -25,6 +25,8 @@ class AgentDefaultsConfig:
model: str | None = None
provider: str | None = None
embedding_model: str | None = None
max_tokens: int | None = None
temperature: float | None = None
max_context_messages: int | None = None
max_tool_iterations: int | None = None

View File

@ -51,6 +51,8 @@ from .schemas import (
WebChatRequest,
WebChatResponse,
WebErrorResponse,
WebAgentConfigRequest,
WebAgentConfigResponse,
WebProviderConfigRequest,
WebProviderConfigResponse,
WebStatusResponse,
@ -595,6 +597,38 @@ def create_app(
_reload_agent_config(agent_service, config_path)
return WebProviderConfigResponse(ok=True, provider=spec.name, enabled=payload.enabled)
@app.post("/api/agent-config", response_model=WebAgentConfigResponse)
async def update_agent_config(
request: Request,
payload: WebAgentConfigRequest,
) -> WebAgentConfigResponse:
if payload.max_tokens is not None and payload.max_tokens <= 0:
raise HTTPException(status_code=400, detail="max_tokens must be a positive integer or null")
if payload.temperature < 0 or payload.temperature > 2:
raise HTTPException(status_code=400, detail="temperature must be between 0 and 2")
if payload.max_tool_iterations < 0:
raise HTTPException(status_code=400, detail="max_tool_iterations must be zero or greater")
agent_service = get_agent_service(request)
config_path = agent_service.loader.config.config_path or default_config_path(workspace=agent_service.loader.workspace)
raw = _read_config_json(config_path)
agents = _ensure_dict(raw, "agents")
defaults = _ensure_dict(agents, "defaults")
if payload.max_tokens is None:
defaults.pop("maxTokens", None)
defaults.pop("max_tokens", None)
else:
defaults["maxTokens"] = payload.max_tokens
defaults.pop("max_tokens", None)
defaults["temperature"] = payload.temperature
defaults["maxToolIterations"] = payload.max_tool_iterations
defaults.pop("max_tool_iterations", None)
_write_config_json(config_path, raw)
_reload_agent_config(agent_service, config_path)
return WebAgentConfigResponse(ok=True)
@app.get("/api/sessions")
async def list_sessions(request: Request) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()

View File

@ -8,6 +8,8 @@ from .chat import (
WebChatRequest,
WebChatResponse,
WebErrorResponse,
WebAgentConfigRequest,
WebAgentConfigResponse,
WebProviderConfigRequest,
WebProviderConfigResponse,
WebProviderTarget,
@ -22,6 +24,8 @@ __all__ = [
"WebChatRequest",
"WebChatResponse",
"WebErrorResponse",
"WebAgentConfigRequest",
"WebAgentConfigResponse",
"WebProviderConfigRequest",
"WebProviderConfigResponse",
"WebProviderTarget",

View File

@ -139,6 +139,20 @@ class WebProviderConfigResponse(BaseModel):
enabled: bool
class WebAgentConfigRequest(BaseModel):
"""Agent runtime defaults update from the settings page."""
max_tokens: int | None = None
temperature: float
max_tool_iterations: int
class WebAgentConfigResponse(BaseModel):
"""Agent runtime defaults update result."""
ok: bool
class WebStatusResponse(BaseModel):
"""Web 宿主层状态响应。"""

View File

@ -68,6 +68,14 @@ class AgentService:
def _apply_configured_profile_defaults(self) -> None:
defaults = self.loader.config.agents_defaults
self.profile.max_tokens = None
self.profile.temperature = 0.2
self.profile.max_context_messages = 1000
self.profile.max_tool_iterations = 30
if defaults.max_tokens is not None:
self.profile.max_tokens = max(1, defaults.max_tokens)
if defaults.temperature is not None:
self.profile.temperature = defaults.temperature
if defaults.max_context_messages is not None:
self.profile.max_context_messages = max(1, defaults.max_context_messages)
if defaults.max_tool_iterations is not None:

View File

@ -50,10 +50,11 @@ class SessionProcessProjector:
for record in records:
payload = dict(record.event_payload or {})
task_id = payload.get("task_id")
run_record_for_event = run_records.get(str(record.run_id)) if record.run_id else None
task_id = payload.get("task_id") or getattr(run_record_for_event, "task_id", None)
if not task_id:
continue
attempt_index = int(payload.get("attempt_index") or 1)
attempt_index = int(payload.get("attempt_index") or getattr(run_record_for_event, "attempt_index", None) or 1)
root_run_id = f"task:{task_id}:attempt:{attempt_index}"
created_at = _timestamp(record.timestamp)
root = runs.setdefault(
@ -73,7 +74,61 @@ class SessionProcessProjector:
},
)
if record.event_type == "task_execution_planned":
if record.event_type == "assistant_message_added" and record.tool_calls:
run_id = record.run_id or root_run_id
parent_run_id = root_run_id if run_id != root_run_id else None
for index, tool_call in enumerate(record.tool_calls):
if not isinstance(tool_call, dict):
continue
tool_name = _tool_call_name(tool_call)
add_event(
event_id=f"{_event_id(record, 'tool-call')}:{index}",
run_id=run_id,
parent_run_id=parent_run_id,
kind="tool_call_started",
actor_type="tool",
actor_id=tool_name,
actor_name=tool_name,
text=f"Calling tool: {tool_name}.",
created_at=created_at,
status="running",
metadata={
"task_id": task_id,
"attempt_index": attempt_index,
"timeline_type": "tool_call",
"tool_name": tool_name,
"tool_call_id": tool_call.get("id"),
"arguments": _tool_call_arguments(tool_call),
},
)
elif record.event_type == "tool_result_recorded":
run_id = record.run_id or root_run_id
parent_run_id = root_run_id if run_id != root_run_id else None
tool_name = str(record.tool_name or payload.get("tool_name") or "tool")
add_event(
event_id=_event_id(record, "tool-result"),
run_id=run_id,
parent_run_id=parent_run_id,
kind="tool_call_finished",
actor_type="tool",
actor_id=tool_name,
actor_name=tool_name,
text=_truncate(str(record.content or payload.get("error") or "")),
created_at=created_at,
status="done" if payload.get("success", True) else "error",
metadata={
**dict(payload),
"task_id": task_id,
"attempt_index": attempt_index,
"timeline_type": "tool_result",
"tool_name": tool_name,
"tool_call_id": record.tool_call_id,
"result_summary": _truncate(str(record.content or payload.get("error") or "")),
},
)
elif record.event_type == "task_execution_planned":
plan_mode = payload.get("plan_mode") or "single"
strategy = payload.get("strategy") or "single"
node_ids = payload.get("node_ids") or []
@ -241,6 +296,7 @@ class SessionProcessProjector:
main_run_id = str(payload.get("main_run_id") or "")
if main_run_id:
run_record = run_records.get(main_run_id)
activated_skill_names = _activated_skill_names(run_record)
runs[main_run_id] = {
"run_id": main_run_id,
"parent_run_id": root_run_id,
@ -254,8 +310,32 @@ class SessionProcessProjector:
"started_at": run_record.started_at if run_record is not None else created_at,
"finished_at": run_record.ended_at if run_record is not None else created_at,
"summary": _truncate(run_record.task_text if run_record is not None else ""),
"metadata": {"task_id": task_id, "attempt_index": attempt_index},
"metadata": {
"task_id": task_id,
"attempt_index": attempt_index,
"skill_names": activated_skill_names,
},
}
if activated_skill_names:
add_event(
event_id=_event_id(record, "synthesis-skills"),
run_id=main_run_id,
parent_run_id=root_run_id,
kind="skill_selected",
actor_type="system",
actor_id="skill-selector",
actor_name="Skill Selector",
text=f"Selected skill guidance: {', '.join(activated_skill_names)}.",
created_at=created_at,
status="done",
metadata={
"task_id": task_id,
"attempt_index": attempt_index,
"timeline_type": "skill",
"skill_names": activated_skill_names,
"activation_reasons": _activated_skill_reasons(run_record),
},
)
add_event(
event_id=_event_id(record, "synthesis"),
run_id=main_run_id,
@ -335,3 +415,49 @@ def _truncate(text: str, limit: int = 800) -> str:
if len(cleaned) <= limit:
return cleaned
return cleaned[: limit - 1] + "..."
def _activated_skill_names(run_record: Any | None) -> list[str]:
if run_record is None:
return []
names = []
for receipt in getattr(run_record, "activated_skills", []) or []:
skill_name = str(getattr(receipt, "skill_name", "") or "").strip()
if skill_name:
names.append(skill_name)
return list(dict.fromkeys(names))
def _activated_skill_reasons(run_record: Any | None) -> list[str]:
if run_record is None:
return []
reasons = []
for receipt in getattr(run_record, "activated_skills", []) or []:
reason = str(getattr(receipt, "activation_reason", "") or "").strip()
if reason:
reasons.append(reason)
return reasons
def _tool_call_name(tool_call: dict[str, Any]) -> str:
function_payload = tool_call.get("function")
if isinstance(function_payload, dict):
name = function_payload.get("name")
if name:
return str(name)
for key in ("name", "tool_name"):
value = tool_call.get(key)
if value:
return str(value)
return "tool"
def _tool_call_arguments(tool_call: dict[str, Any]) -> Any:
function_payload = tool_call.get("function")
if isinstance(function_payload, dict) and "arguments" in function_payload:
return function_payload.get("arguments")
if "arguments" in tool_call:
return tool_call.get("arguments")
if "args" in tool_call:
return tool_call.get("args")
return None

View File

@ -51,7 +51,7 @@ class WebFetchTool:
try:
safe_url = _safe_url(url)
limit = max(1000, min(int(max_chars or 12000), 50000))
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client:
response = await client.get(
safe_url,
headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"},
@ -96,7 +96,7 @@ class WebSearchTool:
raise ValueError("query is required")
bounded = max(1, min(int(limit or 5), 10))
url = f"https://duckduckgo.com/html/?q={quote_plus(query)}"
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=False) as client:
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client:
response = await client.get(url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"})
response.raise_for_status()
html = response.text

View File

@ -0,0 +1,47 @@
import asyncio
from contextlib import suppress
from typing import Any
from beaver.engine import AgentLoop, AgentRunResult, EngineLoader
def _run_result(run_id: str, output_text: str) -> AgentRunResult:
return AgentRunResult(
session_id="web:test",
run_id=run_id,
output_text=output_text,
finish_reason="stop",
tool_iterations=0,
)
def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None:
async def run_case() -> None:
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path))
calls: list[str] = []
async def fake_process_direct(task: str, **kwargs: Any) -> AgentRunResult:
calls.append(task)
if task == "outer":
return await loop.submit_direct("inner", session_id="web:test")
return _run_result(task, "inner completed")
loop._process_direct_impl = fake_process_direct # type: ignore[method-assign]
loop_task = asyncio.create_task(loop.run())
await asyncio.sleep(0)
try:
result = await asyncio.wait_for(loop.submit_direct("outer", session_id="web:test"), timeout=1)
finally:
await loop.stop()
with suppress(asyncio.TimeoutError):
await asyncio.wait_for(loop_task, timeout=1)
if not loop_task.done():
loop_task.cancel()
with suppress(asyncio.CancelledError):
await loop_task
assert result.output_text == "inner completed"
assert calls == ["outer", "inner"]
asyncio.run(run_case())

View File

@ -1,10 +1,12 @@
import json
from fastapi.testclient import TestClient
from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.providers import make_provider_bundle
from beaver.engine.providers.litellm import LiteLLMProvider
from beaver.foundation.config import load_config
from beaver.interfaces.web.app import _reload_agent_config
from beaver.interfaces.web.app import create_app, _reload_agent_config
from beaver.services.agent_service import AgentService
@ -161,6 +163,88 @@ def test_reload_agent_config_updates_booted_loop_config(tmp_path) -> None:
service.close()
def test_agent_defaults_include_runtime_controls(tmp_path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(
json.dumps(
{
"agents": {
"defaults": {
"maxTokens": 12345,
"temperature": 0.4,
"maxToolIterations": 9,
}
}
}
),
encoding="utf-8",
)
config = load_config(config_path=config_path)
service = AgentService(config_path=config_path)
assert config.agents_defaults.max_tokens == 12345
assert config.agents_defaults.temperature == 0.4
assert config.agents_defaults.max_tool_iterations == 9
assert service.profile.max_tokens == 12345
assert service.profile.temperature == 0.4
assert service.profile.max_tool_iterations == 9
service.close()
def test_agent_config_api_persists_and_reloads_defaults(tmp_path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps({"agents": {"defaults": {}}}), encoding="utf-8")
service = AgentService(config_path=config_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.post(
"/api/agent-config",
json={"max_tokens": 8192, "temperature": 0.6, "max_tool_iterations": 12},
)
status = client.get("/api/status")
saved = json.loads(config_path.read_text(encoding="utf-8"))
defaults = saved["agents"]["defaults"]
assert response.status_code == 200
assert response.json() == {"ok": True}
assert defaults["maxTokens"] == 8192
assert defaults["temperature"] == 0.6
assert defaults["maxToolIterations"] == 12
assert service.profile.max_tokens == 8192
assert service.profile.temperature == 0.6
assert service.profile.max_tool_iterations == 12
assert status.json()["max_tokens"] == 8192
assert status.json()["temperature"] == 0.6
assert status.json()["max_tool_iterations"] == 12
service.close()
def test_agent_config_api_accepts_zero_temperature_and_iterations(tmp_path) -> None:
config_path = tmp_path / "config.json"
service = AgentService(config_path=config_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.post(
"/api/agent-config",
json={"max_tokens": None, "temperature": 0, "max_tool_iterations": 0},
)
config = load_config(config_path=config_path)
assert response.status_code == 200
assert config.agents_defaults.max_tokens is None
assert config.agents_defaults.temperature == 0
assert config.agents_defaults.max_tool_iterations == 0
assert service.profile.max_tokens is None
assert service.profile.temperature == 0
assert service.profile.max_tool_iterations == 0
service.close()
def test_openai_compatible_qwen_config_keeps_openai_provider() -> None:
bundle = make_provider_bundle(
model="qwen-plus",

View File

@ -0,0 +1,58 @@
from __future__ import annotations
import json
from pathlib import Path
from beaver.engine import EngineLoader
from beaver.skills.catalog.utils import parse_frontmatter
REPO_ROOT = Path(__file__).resolve().parents[4]
EXPECTED_INITIAL_SKILL_TOOLS = {
"cron-scheduler": ["cron"],
"filesystem-operation": ["read_file", "write_file", "patch_file", "search_files", "list_directory"],
"memory-management": ["memory"],
"outlook-mail": [
"mcp_outlook_mcp_mail_list_folders",
"mcp_outlook_mcp_mail_list_messages",
"mcp_outlook_mcp_mail_search_messages",
"mcp_outlook_mcp_mail_get_message",
"mcp_outlook_mcp_mail_send_email",
"mcp_outlook_mcp_mail_reply_to_message",
"mcp_outlook_mcp_mail_forward_message",
"mcp_outlook_mcp_mail_move_message",
"mcp_outlook_mcp_mail_delta_sync",
"mcp_outlook_mcp_calendar_list_events",
"mcp_outlook_mcp_calendar_create_event",
"mcp_outlook_mcp_calendar_update_event",
"mcp_outlook_mcp_calendar_get_schedule",
"mcp_outlook_mcp_calendar_find_meeting_times",
"mcp_outlook_mcp_calendar_delta_sync",
],
"skills-admin": ["skills_list", "skill_manage", "skill_view"],
"terminal-operation": ["terminal", "process", "execute_code"],
"utility-tools": ["clarify", "delegate", "send_message", "spawn", "todo"],
"web-operation": ["web_fetch", "web_search"],
}
def test_initial_skill_tool_hints_match_runtime_tool_names() -> None:
for skill_name, expected_tools in EXPECTED_INITIAL_SKILL_TOOLS.items():
skill_dir = REPO_ROOT / "skills" / skill_name / "versions" / "v0001"
frontmatter, _body = parse_frontmatter((skill_dir / "SKILL.md").read_text(encoding="utf-8"))
version = json.loads((skill_dir / "version.json").read_text(encoding="utf-8"))
assert frontmatter["tools"] == expected_tools
assert version["frontmatter"]["tools"] == expected_tools
assert version["tool_hints"] == expected_tools
def test_default_runtime_registers_skill_view_tool(tmp_path: Path) -> None:
loaded = EngineLoader(workspace=tmp_path).load()
try:
assert "skill_view" in loaded.tools
assert loaded.tool_registry is not None
assert loaded.tool_registry.get("skill_view") is not None
finally:
loaded.close()

View File

@ -0,0 +1,64 @@
import asyncio
from types import SimpleNamespace
from beaver.engine.loop import AgentProfile
from beaver.engine.providers.anthropic import AnthropicProvider
from beaver.engine.providers.litellm import LiteLLMProvider
def test_agent_profile_uses_provider_output_default() -> None:
assert AgentProfile().max_tokens is None
def test_litellm_omits_max_tokens_when_unset(monkeypatch) -> None:
captured_kwargs: dict = {}
async def fake_acompletion(**kwargs):
captured_kwargs.update(kwargs)
return SimpleNamespace(
choices=[
SimpleNamespace(
message=SimpleNamespace(content="ok", tool_calls=[]),
finish_reason="stop",
)
],
usage=None,
)
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
async def run_case():
provider = LiteLLMProvider(default_model="openai/gpt-test")
return await provider.chat(messages=[{"role": "user", "content": "hi"}], max_tokens=None)
response = asyncio.run(run_case())
assert response.content == "ok"
assert "max_tokens" not in captured_kwargs
def test_anthropic_uses_model_output_ceiling_when_unset(monkeypatch) -> None:
captured_kwargs: dict = {}
class FakeMessages:
async def create(self, **kwargs):
captured_kwargs.update(kwargs)
return SimpleNamespace(
content=[SimpleNamespace(type="text", text="ok")],
usage=None,
stop_reason="stop",
)
class FakeClient:
messages = FakeMessages()
monkeypatch.setattr(AnthropicProvider, "_client_or_raise", lambda self: FakeClient())
async def run_case():
provider = AnthropicProvider(default_model="claude-sonnet-4-5")
return await provider.chat(messages=[{"role": "user", "content": "hi"}], max_tokens=None)
response = asyncio.run(run_case())
assert response.content == "ok"
assert captured_kwargs["max_tokens"] == 64_000

View File

@ -5,6 +5,7 @@ from pathlib import Path
from beaver.engine.session import SessionManager
from beaver.memory.runs import RunMemoryStore, RunRecord
from beaver.services.process_service import SessionProcessProjector
from beaver.skills.specs import SkillActivationReceipt
def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
@ -238,6 +239,130 @@ def test_process_projection_uses_normalized_plan_metadata_defaults(tmp_path: Pat
assert planned_event["metadata"]["strategy"] == "single"
def test_process_projection_emits_skill_card_from_main_run_receipts(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="main-run",
session_id="web:test",
task_id="task-1",
attempt_index=1,
task_text="main task",
started_at="2026-01-01T00:00:03+00:00",
ended_at="2026-01-01T00:00:04+00:00",
success=True,
finish_reason="stop",
activated_skills=[
SkillActivationReceipt(
run_id="main-run",
session_id="web:test",
skill_name="web-operation",
skill_version="1",
content_hash="hash",
activated_at="2026-01-01T00:00:03+00:00",
activation_reason="Needs live web lookup.",
)
],
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"plan_mode": "single",
"strategy": "single",
"selected_skill_names": [],
},
context_visible=False,
)
session.append_message(
"web:test",
role="system",
event_type="task_synthesis_completed",
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
context_visible=False,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
skill_events = [
event
for event in projection["events"]
if event["kind"] == "skill_selected" and event["run_id"] == "main-run"
]
assert skill_events
assert skill_events[0]["metadata"]["timeline_type"] == "skill"
assert skill_events[0]["metadata"]["skill_names"] == ["web-operation"]
def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="main-run",
session_id="web:test",
task_id="task-1",
attempt_index=1,
task_text="main task",
started_at="2026-01-01T00:00:03+00:00",
ended_at="2026-01-01T00:00:04+00:00",
success=True,
finish_reason="stop",
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={"task_id": "task-1", "attempt_index": 1},
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="assistant",
event_type="assistant_message_added",
event_payload={"task_id": "task-1"},
content="Searching",
tool_calls=[
{
"id": "call-1",
"name": "multi_search",
"arguments": {"query": "Macau cafe near Bóvia"},
}
],
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="tool",
event_type="tool_result_recorded",
event_payload={"success": True, "error": None},
content="Found 3 restaurants",
tool_name="multi_search",
tool_call_id="call-1",
context_visible=True,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
tool_call = next(event for event in projection["events"] if event["kind"] == "tool_call_started")
assert tool_call["metadata"]["timeline_type"] == "tool_call"
assert tool_call["metadata"]["tool_name"] == "multi_search"
assert tool_call["run_id"] == "main-run"
tool_result = next(event for event in projection["events"] if event["kind"] == "tool_call_finished")
assert tool_result["metadata"]["timeline_type"] == "tool_result"
assert tool_result["metadata"]["tool_name"] == "multi_search"
assert tool_result["metadata"]["success"] is True
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")

View File

@ -0,0 +1,44 @@
from __future__ import annotations
import asyncio
from beaver.tools.builtins import web
class _FakeResponse:
headers = {"content-type": "text/html"}
status_code = 200
text = '<a class="result__a" href="https://example.com">Example</a>'
url = "https://example.com"
def raise_for_status(self) -> None:
return None
class _FakeAsyncClient:
calls: list[dict[str, object]] = []
def __init__(self, **kwargs: object) -> None:
self.calls.append(kwargs)
async def __aenter__(self) -> "_FakeAsyncClient":
return self
async def __aexit__(self, *args: object) -> None:
return None
async def get(self, *args: object, **kwargs: object) -> _FakeResponse:
return _FakeResponse()
def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
_FakeAsyncClient.calls = []
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
async def _run() -> None:
await web.WebFetchTool().execute(url="https://example.com")
await web.WebSearchTool().execute(query="example")
asyncio.run(_run())
assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True]

View File

@ -15,7 +15,7 @@ import {
Settings2,
ScrollText,
} from 'lucide-react';
import { getStatus, updateProviderConfig } from '@/lib/api';
import { getStatus, updateAgentConfig, updateProviderConfig } from '@/lib/api';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
import { Badge } from '@/components/ui/badge';
@ -42,6 +42,12 @@ type ProviderFormState = {
requestTimeoutSeconds: string;
};
type AgentFormState = {
maxTokens: string;
temperature: string;
maxToolIterations: string;
};
export default function StatusPage() {
const { locale } = useAppI18n();
const [status, setStatus] = useState<SystemStatus | null>(null);
@ -57,6 +63,13 @@ export default function StatusPage() {
}));
const [savingProvider, setSavingProvider] = useState(false);
const [providerError, setProviderError] = useState<string | null>(null);
const [agentForm, setAgentForm] = useState<AgentFormState>(() => ({
maxTokens: '',
temperature: '0.2',
maxToolIterations: '30',
}));
const [savingAgent, setSavingAgent] = useState(false);
const [agentError, setAgentError] = useState<string | null>(null);
const loadStatus = async () => {
setLoading(true);
@ -64,6 +77,11 @@ export default function StatusPage() {
try {
const data = await getStatus();
setStatus(data);
setAgentForm({
maxTokens: data.max_tokens == null ? '' : String(data.max_tokens),
temperature: String(data.temperature),
maxToolIterations: String(data.max_tool_iterations),
});
} catch (err: any) {
setError(err.message || pickAppText(locale, '连接后端失败', 'Failed to connect to the backend'));
} finally {
@ -115,6 +133,39 @@ export default function StatusPage() {
}
};
const handleSaveAgentConfig = async () => {
setSavingAgent(true);
setAgentError(null);
try {
const maxTokensText = agentForm.maxTokens.trim();
const maxTokens = maxTokensText ? Number(maxTokensText) : null;
const temperature = Number(agentForm.temperature.trim());
const maxToolIterations = Number(agentForm.maxToolIterations.trim());
if (
maxTokens !== null &&
(!Number.isInteger(maxTokens) || maxTokens <= 0)
) {
throw new Error(pickAppText(locale, '最大令牌数必须为空或正整数', 'Max tokens must be blank or a positive integer'));
}
if (!Number.isFinite(temperature) || temperature < 0 || temperature > 2) {
throw new Error(pickAppText(locale, '温度必须在 0 到 2 之间', 'Temperature must be between 0 and 2'));
}
if (!Number.isInteger(maxToolIterations) || maxToolIterations < 0) {
throw new Error(pickAppText(locale, '最大工具迭代次数必须是非负整数', 'Max tool iterations must be a non-negative integer'));
}
await updateAgentConfig({
max_tokens: maxTokens,
temperature,
max_tool_iterations: maxToolIterations,
});
await loadStatus();
} catch (err: any) {
setAgentError(err.message || pickAppText(locale, '保存智能体配置失败', 'Failed to save agent configuration'));
} finally {
setSavingAgent(false);
}
};
if (loading) {
return (
<div className="flex items-center justify-center py-20">
@ -207,14 +258,47 @@ export default function StatusPage() {
{pickAppText(locale, '智能体配置', 'Agent configuration')}
</CardTitle>
</CardHeader>
<CardContent className="space-y-3">
<CardContent className="space-y-5">
<InfoRow label={pickAppText(locale, '模型', 'Model')} value={status.model} />
<InfoRow label={pickAppText(locale, '最大令牌数', 'Max tokens')} value={String(status.max_tokens)} />
<InfoRow label={pickAppText(locale, '温度', 'Temperature')} value={String(status.temperature)} />
<InfoRow
label={pickAppText(locale, '最大工具迭代次数', 'Max tool iterations')}
value={String(status.max_tool_iterations)}
/>
<div className="grid gap-4 border-t pt-5 md:grid-cols-3">
<div className="grid gap-2">
<Label htmlFor="agent-max-tokens">{pickAppText(locale, '最大令牌数', 'Max tokens')}</Label>
<Input
id="agent-max-tokens"
inputMode="numeric"
value={agentForm.maxTokens}
onChange={(event) => setAgentForm((prev) => ({ ...prev, maxTokens: event.target.value }))}
placeholder={pickAppText(locale, '模型默认', 'Model default')}
/>
</div>
<div className="grid gap-2">
<Label htmlFor="agent-temperature">{pickAppText(locale, '温度', 'Temperature')}</Label>
<Input
id="agent-temperature"
inputMode="decimal"
value={agentForm.temperature}
onChange={(event) => setAgentForm((prev) => ({ ...prev, temperature: event.target.value }))}
/>
</div>
<div className="grid gap-2">
<Label htmlFor="agent-max-tool-iterations">
{pickAppText(locale, '最大工具迭代次数', 'Max tool iterations')}
</Label>
<Input
id="agent-max-tool-iterations"
inputMode="numeric"
value={agentForm.maxToolIterations}
onChange={(event) => setAgentForm((prev) => ({ ...prev, maxToolIterations: event.target.value }))}
/>
</div>
</div>
<div className="flex flex-col gap-3 sm:flex-row sm:items-center sm:justify-between">
<div className="text-sm text-destructive">{agentError || ''}</div>
<Button onClick={handleSaveAgentConfig} disabled={savingAgent} className="sm:self-end">
{savingAgent ? <Loader2 className="mr-2 h-4 w-4 animate-spin" /> : null}
{pickAppText(locale, '保存智能体配置', 'Save agent config')}
</Button>
</div>
</CardContent>
</Card>

View File

@ -6,7 +6,6 @@ import React, { useMemo, useState } from 'react';
import { AlertCircle, ArrowLeft, Loader2, Trash2 } from 'lucide-react';
import {
TaskAcceptanceCard,
TaskLiveHeader,
TaskSideRail,
TaskTimeline,
@ -19,10 +18,12 @@ import { deleteBackendTask, getBackendTask, submitChatFeedback } from '@/lib/api
import { pickAppText } from '@/lib/i18n/core';
import { useAppI18n } from '@/lib/i18n/provider';
import { useChatStore } from '@/lib/store';
import { shouldPollTaskDetail, taskDetailDurationMs } from '@/lib/task-detail-refresh';
import { buildTaskTimelineCards } from '@/lib/task-timeline';
import type { BackendTask } from '@/types';
const TERMINAL_TASK_STATUSES = new Set(['closed', 'abandoned', 'cancelled', 'error']);
const TASK_RESULT_REVIEW_ID = 'task-result-review';
export default function TaskDetailPage() {
const { locale } = useAppI18n();
@ -81,12 +82,12 @@ export default function TaskDetailPage() {
const isTaskLive = backendTask ? !TERMINAL_TASK_STATUSES.has(backendTask.status) : false;
React.useEffect(() => {
if (!isTaskLive || wsStatus === 'connected') return;
if (!shouldPollTaskDetail(backendTask)) return;
const id = window.setInterval(() => {
void loadBackendTask();
}, 4000);
return () => window.clearInterval(id);
}, [isTaskLive, loadBackendTask, wsStatus]);
}, [backendTask, loadBackendTask]);
const taskRunIds = useMemo(() => {
const ids = new Set<string>();
@ -129,7 +130,7 @@ export default function TaskDetailPage() {
const activeLabel =
[...timelineCards].reverse().find((card) => !['acceptance', 'task_created'].includes(card.type))?.title ?? '-';
const durationMs = backendTask ? taskDurationMs(backendTask) : null;
const durationMs = backendTask ? taskDetailDurationMs(backendTask) : null;
const feedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;
const runAction = async (key: string, action: () => Promise<unknown>) => {
@ -161,7 +162,7 @@ export default function TaskDetailPage() {
return (
<div className="min-h-screen bg-background">
<TaskLiveHeader task={backendTask} activeLabel={activeLabel} durationMs={durationMs} />
<TaskLiveHeader task={backendTask} activeLabel={activeLabel} durationMs={durationMs} reviewTargetId={TASK_RESULT_REVIEW_ID} />
<main className="mx-auto grid max-w-7xl gap-6 p-6 xl:grid-cols-[minmax(0,1fr)_360px]">
<div className="space-y-4">
@ -187,30 +188,32 @@ export default function TaskDetailPage() {
</Card>
) : null}
<TaskTimeline cards={timelineCards} isLive={isTaskLive && wsStatus === 'connected'} />
<TaskAcceptanceCard
sessionId={backendTask.session_id}
runId={feedbackRunId}
taskStatus={backendTask.status}
feedbackItems={feedbackItems as TaskFeedbackItem[]}
actionBusy={actionBusy}
revision={revision}
onRevisionChange={setRevision}
onSubmit={(feedbackType: TaskFeedbackType, comment?: string) =>
runAction(`backend-feedback-${feedbackType}`, async () => {
if (!feedbackRunId) throw new Error(pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.'));
await submitChatFeedback({
sessionId: backendTask.session_id,
runId: feedbackRunId,
feedbackType,
comment,
});
updateMessageFeedback(feedbackRunId, feedbackType);
setRevision('');
await loadBackendTask();
})
}
<TaskTimeline
cards={timelineCards}
isLive={isTaskLive && wsStatus === 'connected'}
reviewTargetId={TASK_RESULT_REVIEW_ID}
resultAcceptance={{
sessionId: backendTask.session_id,
runId: feedbackRunId,
taskStatus: backendTask.status,
feedbackItems: feedbackItems as TaskFeedbackItem[],
actionBusy,
revision,
onRevisionChange: setRevision,
onSubmit: (feedbackType: TaskFeedbackType, comment?: string) =>
runAction(`backend-feedback-${feedbackType}`, async () => {
if (!feedbackRunId) throw new Error(pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.'));
await submitChatFeedback({
sessionId: backendTask.session_id,
runId: feedbackRunId,
feedbackType,
comment,
});
updateMessageFeedback(feedbackRunId, feedbackType);
setRevision('');
await loadBackendTask();
}),
}}
/>
</div>
@ -252,10 +255,3 @@ function pickFeedbackRunId(task: BackendTask): string | null {
if (runs.length > 0) return runs[runs.length - 1].run_id;
return null;
}
function taskDurationMs(task: BackendTask): number | null {
const start = new Date(task.created_at).getTime();
const end = new Date(task.closed_at || task.updated_at).getTime();
if (!Number.isFinite(start) || !Number.isFinite(end)) return null;
return Math.max(0, end - start);
}

View File

@ -113,19 +113,6 @@ export function TaskAcceptanceCard({
onSubmit,
}: Props) {
const { locale } = useAppI18n();
const [localComment, setLocalComment] = React.useState('');
const comment = revision ?? localComment;
const setComment = onRevisionChange ?? setLocalComment;
const isFinalized = taskStatus === 'closed' || taskStatus === 'abandoned';
const isReadyForAcceptance = READY_FOR_ACCEPTANCE_STATUSES.has(taskStatus);
const recordedFeedback = feedbackForRun(feedbackItems, runId) ?? (isFinalized ? latestFeedback(feedbackItems) : null);
const canSubmit = Boolean(runId) && !recordedFeedback && !isFinalized && isReadyForAcceptance && !actionBusy;
const trimmedComment = comment.trim();
const submit = (feedbackType: TaskFeedbackType, nextComment?: string) => {
if (!runId || !canSubmit) return;
void onSubmit(feedbackType, nextComment);
};
return (
<Card>
@ -141,7 +128,49 @@ export function TaskAcceptanceCard({
)}
</div>
</CardHeader>
<CardContent className="space-y-4">
<CardContent>
<TaskAcceptanceControls
sessionId={sessionId}
runId={runId}
taskStatus={taskStatus}
feedbackItems={feedbackItems}
actionBusy={actionBusy}
revision={revision}
onRevisionChange={onRevisionChange}
onSubmit={onSubmit}
/>
</CardContent>
</Card>
);
}
export function TaskAcceptanceControls({
sessionId,
runId,
taskStatus,
feedbackItems,
actionBusy,
revision,
onRevisionChange,
onSubmit,
}: Props) {
const { locale } = useAppI18n();
const [localComment, setLocalComment] = React.useState('');
const comment = revision ?? localComment;
const setComment = onRevisionChange ?? setLocalComment;
const isFinalized = taskStatus === 'closed' || taskStatus === 'abandoned';
const isReadyForAcceptance = READY_FOR_ACCEPTANCE_STATUSES.has(taskStatus);
const recordedFeedback = feedbackForRun(feedbackItems, runId) ?? (isFinalized ? latestFeedback(feedbackItems) : null);
const canSubmit = Boolean(runId) && !recordedFeedback && !isFinalized && isReadyForAcceptance && !actionBusy;
const trimmedComment = comment.trim();
const submit = (feedbackType: TaskFeedbackType, nextComment?: string) => {
if (!runId || !canSubmit) return;
void onSubmit(feedbackType, nextComment);
};
return (
<div className="space-y-4">
{recordedFeedback ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
<div className="flex items-center gap-2 font-medium">
@ -207,7 +236,6 @@ export function TaskAcceptanceCard({
{pickAppText(locale, '会话:', 'Session: ')}
<span className="font-mono">{sessionId}</span>
</div>
</CardContent>
</Card>
</div>
);
}

View File

@ -1,7 +1,7 @@
'use client';
import Link from 'next/link';
import { ArrowLeft, MessageSquare } from 'lucide-react';
import { ArrowLeft, CheckCircle2, MessageSquare } from 'lucide-react';
import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime } from '@/components/task-runtime/TaskRuntimeShared';
import { Badge } from '@/components/ui/badge';
@ -15,6 +15,7 @@ type Props = {
task: BackendTask;
activeLabel: string;
durationMs: number | null;
reviewTargetId?: string;
};
const RUNTIME_STATUSES = new Set<string>(['queued', 'running', 'waiting', 'blocked', 'done', 'error', 'cancelled']);
@ -36,9 +37,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
return item ? pickAppText(locale, item[0], item[1]) : status;
}
export function TaskLiveHeader({ task, activeLabel, durationMs }: Props) {
export function TaskLiveHeader({ task, activeLabel, durationMs, reviewTargetId }: Props) {
const { locale } = useAppI18n();
const title = task.short_title || String(task.metadata?.short_title || '') || task.description || task.goal || task.task_id;
const showReviewLink = Boolean(reviewTargetId && ['awaiting_acceptance', 'needs_revision'].includes(task.status));
return (
<header className="sticky top-0 z-20 border-b border-border bg-background/95 backdrop-blur supports-[backdrop-filter]:bg-background/80">
@ -67,6 +69,14 @@ export function TaskLiveHeader({ task, activeLabel, durationMs }: Props) {
</Badge>
)}
{activeLabel ? <Badge variant="secondary">{activeLabel}</Badge> : null}
{showReviewLink ? (
<Button asChild variant="default" size="sm">
<a href={`#${reviewTargetId}`}>
<CheckCircle2 className="mr-2 h-4 w-4" />
{pickAppText(locale, '验收', 'Review')}
</a>
</Button>
) : null}
</div>
</div>

View File

@ -34,11 +34,28 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
needs_revision: ['需要修改', 'Needs revision'],
closed: ['已完成', 'Closed'],
abandoned: ['已放弃', 'Abandoned'],
accept: ['已接受', 'Accepted'],
satisfied: ['已接受', 'Accepted'],
revise: ['已请求修改', 'Revision requested'],
abandon: ['已放弃', 'Abandoned'],
};
const item = map[status];
return item ? pickAppText(locale, item[0], item[1]) : status;
}
function latestFeedback(task: BackendTask): Record<string, unknown> | null {
return [...(task.feedback ?? [])].reverse()[0] ?? null;
}
function acceptanceState(task: BackendTask, locale: 'zh-CN' | 'en-US'): string {
const feedback = latestFeedback(task);
const kind = String(feedback?.acceptance_type || feedback?.feedback_type || '');
if (kind) return humanTaskStatus(kind, locale);
if (task.status === 'awaiting_acceptance') return pickAppText(locale, '等待验收', 'Awaiting acceptance');
if (task.status === 'needs_revision') return pickAppText(locale, '等待修改', 'Awaiting revision');
return pickAppText(locale, '未验收', 'No acceptance yet');
}
function toTime(value: string): number {
const parsed = new Date(value).getTime();
return Number.isFinite(parsed) ? parsed : 0;
@ -135,6 +152,9 @@ export function TaskSideRail({ task, runs, artifacts, cards }: Props) {
<div className="text-xs text-muted-foreground">
{pickAppText(locale, '更新', 'Updated')}: {formatTaskRuntimeTime(task.updated_at, locale)}
</div>
<div className="text-xs text-muted-foreground">
{pickAppText(locale, '验收', 'Acceptance')}: {acceptanceState(task, locale)}
</div>
</CardContent>
</Card>

View File

@ -7,14 +7,16 @@ import { pickAppText } from '@/lib/i18n/core';
import { useAppI18n } from '@/lib/i18n/provider';
import type { TaskTimelineCard as TaskTimelineCardView } from '@/types';
import { TaskTimelineCard } from './TaskTimelineCard';
import { TaskTimelineCard, type TaskResultAcceptance } from './TaskTimelineCard';
type Props = {
cards: TaskTimelineCardView[];
isLive: boolean;
resultAcceptance?: TaskResultAcceptance;
reviewTargetId?: string;
};
export function TaskTimeline({ cards, isLive }: Props) {
export function TaskTimeline({ cards, isLive, resultAcceptance, reviewTargetId }: Props) {
const { locale } = useAppI18n();
return (
@ -42,7 +44,7 @@ export function TaskTimeline({ cards, isLive }: Props) {
) : (
<div className="space-y-3">
{cards.map((card) => (
<TaskTimelineCard key={card.id} card={card} />
<TaskTimelineCard key={card.id} card={card} resultAcceptance={resultAcceptance} reviewTargetId={reviewTargetId} />
))}
</div>
)}

View File

@ -6,8 +6,10 @@ import {
Bot,
CheckCircle2,
ClipboardList,
ChevronDown,
FileText,
GitBranch,
History,
ListChecks,
Sparkles,
TerminalSquare,
@ -24,8 +26,23 @@ import { useAppI18n } from '@/lib/i18n/provider';
import type { TaskRuntimeStatus } from '@/lib/task-runtime';
import type { TaskTimelineCard as TaskTimelineCardView, TaskTimelineCardType } from '@/types';
import { TaskAcceptanceControls, type TaskFeedbackItem, type TaskFeedbackType } from './TaskAcceptanceCard';
type Props = {
card: TaskTimelineCardView;
resultAcceptance?: TaskResultAcceptance;
reviewTargetId?: string;
};
export type TaskResultAcceptance = {
sessionId: string;
runId: string | null;
taskStatus: string;
feedbackItems: TaskFeedbackItem[];
actionBusy: string | null;
revision?: string;
onRevisionChange?: (value: string) => void;
onSubmit: (feedbackType: TaskFeedbackType, comment?: string) => Promise<unknown>;
};
const RUNTIME_STATUSES = new Set<string>(['queued', 'running', 'waiting', 'blocked', 'done', 'error', 'cancelled']);
@ -60,6 +77,8 @@ function iconForType(type: TaskTimelineCardType) {
return AlertTriangle;
case 'result':
return CheckCircle2;
case 'result_history':
return History;
case 'acceptance':
return ThumbsUp;
}
@ -87,6 +106,7 @@ function cardTypeLabel(type: TaskTimelineCardType, locale: 'zh-CN' | 'en-US') {
artifact: ['产物', 'Artifact'],
error: ['异常', 'Error'],
result: ['结果', 'Result'],
result_history: ['历史结果', 'Result history'],
acceptance: ['验收', 'Acceptance'],
};
const label = labels[type];
@ -111,12 +131,57 @@ function humanStatus(status: string, locale: 'zh-CN' | 'en-US') {
return label ? pickAppText(locale, label[0], label[1]) : status;
}
export function TaskTimelineCard({ card }: Props) {
function historyVersions(details: Record<string, unknown> | undefined): Array<Record<string, unknown>> {
const versions = details?.versions;
return Array.isArray(versions) ? versions.filter((item): item is Record<string, unknown> => Boolean(item) && typeof item === 'object') : [];
}
function renderHistoryStatus(version: Record<string, unknown>, locale: 'zh-CN' | 'en-US') {
const status = String(version.acceptanceType || version.status || '');
return status ? humanStatus(status, locale) : pickAppText(locale, '历史版本', 'Previous version');
}
function TaskResultHistory({ card }: { card: TaskTimelineCardView }) {
const { locale } = useAppI18n();
const Icon = iconForType(card.type);
const versions = historyVersions(card.details);
return (
<Card className="rounded-md">
<details className="mt-3 rounded-md border border-border bg-muted/20 px-3 py-2 text-sm">
<summary className="flex cursor-pointer select-none items-center justify-between gap-3 font-medium">
<span>{pickAppText(locale, '展开历史版本', 'Show previous versions')}</span>
<ChevronDown className="h-4 w-4 text-muted-foreground" />
</summary>
<div className="mt-3 space-y-3">
{versions.map((version, index) => (
<div key={String(version.runId || index)} className="rounded-md border border-border bg-background p-3">
<div className="flex flex-wrap items-center justify-between gap-2">
<div className="text-sm font-medium">
{pickAppText(locale, `${index + 1} 轮结果`, `Version ${index + 1}`)}
</div>
<Badge variant="outline" className="text-[11px]">
{renderHistoryStatus(version, locale)}
</Badge>
</div>
{version.result ? <p className="mt-2 whitespace-pre-wrap text-sm leading-6 text-muted-foreground">{String(version.result)}</p> : null}
{version.comment ? (
<div className="mt-3 rounded-md bg-muted/35 p-2 text-xs text-muted-foreground">
{pickAppText(locale, '修改意见', 'Revision note')}: {String(version.comment)}
</div>
) : null}
</div>
))}
</div>
</details>
);
}
export function TaskTimelineCard({ card, resultAcceptance, reviewTargetId }: Props) {
const { locale } = useAppI18n();
const Icon = iconForType(card.type);
const shouldRenderResultAcceptance = Boolean(card.type === 'result' && resultAcceptance && card.runId === resultAcceptance.runId);
return (
<Card id={shouldRenderResultAcceptance ? reviewTargetId : undefined} className="rounded-md scroll-mt-28">
<CardContent className="p-4">
<div className="flex gap-3">
<div className="flex h-9 w-9 shrink-0 items-center justify-center rounded-md bg-muted">
@ -150,7 +215,13 @@ export function TaskTimelineCard({ card }: Props) {
{card.summary ? <p className="mt-3 whitespace-pre-wrap text-sm leading-6 text-muted-foreground">{card.summary}</p> : null}
{card.details ? (
{shouldRenderResultAcceptance ? (
<div className="mt-4 border-t border-border pt-4">
<TaskAcceptanceControls {...resultAcceptance!} />
</div>
) : null}
{card.type === 'result_history' ? <TaskResultHistory card={card} /> : card.details ? (
<details className="mt-3 rounded-md border border-border bg-muted/20 px-3 py-2 text-xs">
<summary className="cursor-pointer select-none font-medium text-muted-foreground">
{pickAppText(locale, '详情 JSON', 'Details JSON')}

View File

@ -4,6 +4,7 @@ import type {
AuthzStatus,
AuthUser,
ActiveTask,
AgentConfigPayload,
ChatLogsResponse,
BackendTask,
ChatMessage,
@ -620,6 +621,13 @@ export async function getStatus(): Promise<SystemStatus> {
return fetchJSON('/api/status');
}
export async function updateAgentConfig(payload: AgentConfigPayload): Promise<{ ok: boolean }> {
return fetchJSON('/api/agent-config', {
method: 'POST',
body: JSON.stringify(payload),
});
}
export async function updateProviderConfig(
providerId: string,
payload: ProviderConfigPayload

View File

@ -0,0 +1,37 @@
import { describe, expect, it, vi } from 'vitest';
import { shouldPollTaskDetail, taskDetailDurationMs } from '@/lib/task-detail-refresh';
import type { BackendTask } from '@/types';
const baseTask: BackendTask = {
task_id: 'task-1',
session_id: 'web:test',
description: '查找餐厅',
goal: '查找餐厅',
constraints: [],
priority: 0,
status: 'running',
creator: 'main-agent',
created_at: '2026-05-27T02:02:41.000Z',
updated_at: '2026-05-27T02:02:41.500Z',
run_ids: [],
skill_names: [],
feedback: [],
metadata: {},
};
describe('task detail refresh helpers', () => {
it('polls executing task details regardless of websocket status', () => {
expect(shouldPollTaskDetail({ ...baseTask, status: 'running' })).toBe(true);
expect(shouldPollTaskDetail({ ...baseTask, status: 'open' })).toBe(true);
expect(shouldPollTaskDetail({ ...baseTask, status: 'awaiting_acceptance' })).toBe(false);
expect(shouldPollTaskDetail({ ...baseTask, status: 'closed' })).toBe(false);
});
it('uses current time for active task duration instead of stale updated_at', () => {
vi.setSystemTime(new Date('2026-05-27T02:03:41.000Z'));
expect(taskDetailDurationMs(baseTask)).toBe(60_000);
expect(taskDetailDurationMs({ ...baseTask, status: 'awaiting_acceptance', updated_at: '2026-05-27T02:10:55.000Z' })).toBe(494_000);
});
});

View File

@ -0,0 +1,18 @@
import type { BackendTask } from '@/types';
const EXECUTING_TASK_STATUSES = new Set(['open', 'queued', 'running']);
const FINISHED_FOR_DURATION_STATUSES = new Set(['awaiting_acceptance', 'closed', 'abandoned', 'cancelled', 'error']);
export function shouldPollTaskDetail(task: Pick<BackendTask, 'status'> | null): boolean {
return Boolean(task && EXECUTING_TASK_STATUSES.has(task.status));
}
export function taskDetailDurationMs(task: Pick<BackendTask, 'created_at' | 'updated_at' | 'closed_at' | 'status'>): number | null {
const start = new Date(task.created_at).getTime();
const end = FINISHED_FOR_DURATION_STATUSES.has(task.status)
? new Date(task.closed_at || task.updated_at).getTime()
: Date.now();
if (!Number.isFinite(start) || !Number.isFinite(end)) return null;
return Math.max(0, end - start);
}

View File

@ -166,6 +166,133 @@ describe('buildTaskTimelineCards', () => {
expect(cards.at(-1)?.summary).toContain('可以');
});
it('uses the latest assistant message from the acceptance run as the result body', () => {
const task = makeTask({
status: 'awaiting_acceptance',
updated_at: '2026-05-26T10:04:00.000Z',
run_ids: ['run-main'],
runs: [
{
run_id: 'run-main',
title: '主 Agent',
session_id: 'web:default',
messages: [
{ role: 'assistant', content: 'Draft answer', created_at: '2026-05-26T10:03:00.000Z' },
{ role: 'assistant', content: 'Final user-visible answer', created_at: '2026-05-26T10:04:00.000Z' },
],
},
],
});
const processEvents: ProcessEvent[] = [
{
event_id: 'evt-result-ready',
run_id: 'run-main',
parent_run_id: null,
kind: 'task_result_ready',
actor_type: 'system',
actor_id: 'evidence',
actor_name: 'Evidence',
text: 'The task result is ready for user acceptance.',
created_at: '2026-05-26T10:04:00.000Z',
metadata: {
result_summary: 'Summary should not replace the final answer.',
},
},
];
const cards = buildTaskTimelineCards({ task, processEvents });
const result = cards.find((card) => card.type === 'result');
expect(result?.summary).toBe('Final user-visible answer');
expect(result?.details?.result_summary).toBe('Summary should not replace the final answer.');
});
it('collapses previous result and acceptance cards into a history pack', () => {
const task = makeTask({
status: 'awaiting_acceptance',
updated_at: '2026-05-26T10:12:00.000Z',
run_ids: ['run-1', 'run-2'],
feedback: [
{
acceptance_type: 'revise',
comment: 'Add decisions',
created_at: '2026-05-26T10:06:00.000Z',
run_id: 'run-1',
},
],
runs: [
{
run_id: 'run-1',
title: '主 Agent',
session_id: 'web:default',
messages: [{ role: 'assistant', content: 'Version one answer', created_at: '2026-05-26T10:05:00.000Z' }],
},
{
run_id: 'run-2',
title: '主 Agent',
session_id: 'web:default',
messages: [{ role: 'assistant', content: 'Version two answer', created_at: '2026-05-26T10:12:00.000Z' }],
},
],
});
const processEvents: ProcessEvent[] = [
{
event_id: 'evt-result-1',
run_id: 'run-1',
parent_run_id: null,
kind: 'task_result_ready',
actor_type: 'system',
actor_id: 'evidence',
actor_name: 'Evidence',
text: 'Result one ready.',
created_at: '2026-05-26T10:05:00.000Z',
},
{
event_id: 'evt-plan-2',
run_id: 'run-2',
parent_run_id: null,
kind: 'task_planned',
actor_type: 'system',
actor_id: 'planner',
actor_name: 'Task Planner',
text: 'Second attempt planned.',
created_at: '2026-05-26T10:08:00.000Z',
},
{
event_id: 'evt-result-2',
run_id: 'run-2',
parent_run_id: null,
kind: 'task_result_ready',
actor_type: 'system',
actor_id: 'evidence',
actor_name: 'Evidence',
text: 'Result two ready.',
created_at: '2026-05-26T10:12:00.000Z',
},
];
const cards = buildTaskTimelineCards({ task, processEvents });
expect(cards.map((card) => card.type)).toEqual([
'task_created',
'result_history',
'plan',
'result',
]);
const history = cards.find((card) => card.type === 'result_history');
expect(history?.summary).toBe('1 历史结果版本');
expect(history?.details?.versions).toEqual([
expect.objectContaining({
runId: 'run-1',
result: 'Version one answer',
acceptanceType: 'revise',
comment: 'Add decisions',
}),
]);
expect(cards.find((card) => card.id === 'evt-plan-2')).toBeTruthy();
expect(cards.at(-1)?.summary).toBe('Version two answer');
});
it('does not add fallback progress when a child run already has progress events', () => {
const task = makeTask();
const processRuns: ProcessRun[] = [
@ -201,6 +328,51 @@ describe('buildTaskTimelineCards', () => {
expect(cards.map((card) => card.id)).not.toContain('run-research:fallback-progress');
});
it('marks a tool call as finished when a matching tool result exists', () => {
const task = makeTask();
const processEvents: ProcessEvent[] = [
{
event_id: 'evt-tool-start',
run_id: 'run-main',
parent_run_id: null,
kind: 'tool_call_started',
actor_type: 'mcp',
actor_id: 'web_search',
actor_name: 'web_search',
text: 'Calling tool: web_search.',
status: 'running',
created_at: '2026-05-26T10:02:00.000Z',
metadata: {
tool_call_id: 'call-1',
tool_name: 'web_search',
},
},
{
event_id: 'evt-tool-finish',
run_id: 'run-main',
parent_run_id: null,
kind: 'tool_call_finished',
actor_type: 'mcp',
actor_id: 'web_search',
actor_name: 'web_search',
text: 'Search failed.',
status: 'error',
created_at: '2026-05-26T10:03:00.000Z',
metadata: {
tool_call_id: 'call-1',
tool_name: 'web_search',
result_summary: 'Search failed.',
},
},
];
const cards = buildTaskTimelineCards({ task, processEvents });
expect(cards.find((card) => card.id === 'evt-tool-start')?.status).toBe('error');
expect(cards.find((card) => card.id === 'evt-tool-finish')?.type).toBe('tool_result');
expect(cards.find((card) => card.id === 'evt-tool-finish')?.summary).toBe('Search failed.');
});
it('maps agent_finished events without timeline metadata to agent progress cards', () => {
const task = makeTask();
const processEvents: ProcessEvent[] = [

View File

@ -27,6 +27,7 @@ const TIMELINE_CARD_TYPES = new Set<TaskTimelineCardType>([
'artifact',
'error',
'result',
'result_history',
'acceptance',
]);
@ -77,10 +78,6 @@ function cardTypeForEvent(event: ProcessEvent): TaskTimelineCardType | null {
return timelineType;
}
if (event.status === 'error') {
return 'error';
}
switch (String(event.kind)) {
case 'task_planned':
case 'run_started':
@ -106,6 +103,9 @@ function cardTypeForEvent(event: ProcessEvent): TaskTimelineCardType | null {
case 'task_error':
return 'error';
default:
if (event.status === 'error') {
return 'error';
}
return null;
}
}
@ -136,6 +136,8 @@ function titleForCard(type: TaskTimelineCardType, actorName?: string): string {
return '执行遇到问题';
case 'result':
return '本轮结果';
case 'result_history':
return '历史结果版本';
case 'acceptance':
return '任务验收';
}
@ -182,6 +184,22 @@ function resultSummary(task: BackendTask): string | undefined {
);
}
function assistantResultForRun(task: BackendTask, runId: string | null | undefined): string | undefined {
if (!runId) return undefined;
const run = (task.runs ?? []).find((item) => item.run_id === runId);
if (!run) return undefined;
const assistantMessages = run.messages.filter((message) => message.role === 'assistant' && message.content.trim());
return lastItem(assistantMessages)?.content.trim();
}
function resultSummaryForEvent(task: BackendTask, event: ProcessEvent): string | undefined {
return firstString(assistantResultForRun(task, event.run_id), summaryForEvent(event));
}
function fallbackResultSummary(task: BackendTask): string | undefined {
return firstString(assistantResultForRun(task, lastItem(task.run_ids)), resultSummary(task));
}
function buildRunMap(processRuns: ProcessRun[]): Map<string, ProcessRun> {
const map = new Map<string, ProcessRun>();
for (const run of processRuns) {
@ -239,12 +257,106 @@ function isCoveredByAcceptanceEvent(
return matchingTypeEvents.length === 1;
}
function cardTime(card: TaskTimelineCard): number {
return toTime(card.createdAt) ?? Number.MAX_SAFE_INTEGER;
}
function cardComment(card: TaskTimelineCard): string | undefined {
return firstString(card.details?.comment, card.summary);
}
function toolCallKeyFromEvent(event: ProcessEvent): string | null {
const toolCallId = firstString(event.metadata?.tool_call_id);
if (toolCallId) return `${event.run_id}:${toolCallId}`;
const toolName = firstString(event.metadata?.tool_name, event.actor_name, event.actor_id);
if (toolName) return `${event.run_id}:${toolName}`;
return null;
}
function buildToolResultStatusByCall(processEvents: ProcessEvent[]): Map<string, string> {
const statuses = new Map<string, string>();
for (const event of processEvents) {
if (cardTypeForEvent(event) !== 'tool_result') continue;
const key = toolCallKeyFromEvent(event);
if (!key) continue;
statuses.set(key, event.status || 'done');
}
return statuses;
}
function buildResultHistoryCard(task: BackendTask, resultCards: TaskTimelineCard[], acceptanceCards: TaskTimelineCard[]): TaskTimelineCard {
const versions = resultCards.map((resultCard) => {
const acceptanceCard = acceptanceCards
.filter((card) => card.runId === resultCard.runId)
.sort((a, b) => cardTime(a) - cardTime(b))
.at(-1);
return {
runId: resultCard.runId ?? null,
result: resultCard.summary ?? '',
createdAt: resultCard.createdAt,
status: acceptanceCard?.status ?? resultCard.status ?? null,
acceptanceType: acceptanceCard?.status ?? null,
comment: acceptanceCard ? cardComment(acceptanceCard) ?? '' : '',
acceptedAt: acceptanceCard?.createdAt ?? null,
};
});
return {
id: `${task.task_id}:result-history`,
taskId: task.task_id,
type: 'result_history',
title: titleForCard('result_history'),
summary: `${resultCards.length} 历史结果版本`,
createdAt: resultCards[0]?.createdAt ?? task.created_at,
details: { versions },
};
}
function collapseHistoricalResults(task: BackendTask, cards: TaskTimelineCard[]): TaskTimelineCard[] {
const resultCards = cards.filter((card) => card.type === 'result');
if (resultCards.length <= 1) return cards;
const finalAcceptedRunId = firstString(task.metadata?.final_accepted_run_id);
const latestResult =
(finalAcceptedRunId ? resultCards.find((card) => card.runId === finalAcceptedRunId) : undefined) ??
[...resultCards].sort((a, b) => cardTime(a) - cardTime(b)).at(-1);
if (!latestResult) return cards;
const oldResults = resultCards
.filter((card) => card.id !== latestResult.id)
.sort((a, b) => cardTime(a) - cardTime(b));
if (oldResults.length === 0) return cards;
const oldRunIds = new Set(oldResults.map((card) => card.runId).filter(Boolean));
const oldAcceptances = cards
.filter((card) => card.type === 'acceptance' && oldRunIds.has(card.runId))
.sort((a, b) => cardTime(a) - cardTime(b));
const foldedIds = new Set([...oldResults, ...oldAcceptances].map((card) => card.id));
const historyCard = buildResultHistoryCard(task, oldResults, oldAcceptances);
const firstOldResultIndex = cards.findIndex((card) => card.id === oldResults[0].id);
const output: TaskTimelineCard[] = [];
for (let index = 0; index < cards.length; index += 1) {
if (index === firstOldResultIndex) {
output.push(historyCard);
}
if (!foldedIds.has(cards[index].id)) {
output.push(cards[index]);
}
}
return output;
}
export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): TaskTimelineCard[] {
const { task } = input;
const processRuns = input.processRuns ?? task.process_runs ?? [];
const processEvents = input.processEvents ?? task.process_events ?? [];
const processArtifacts = input.processArtifacts ?? task.process_artifacts ?? [];
const runsById = buildRunMap(processRuns);
const toolResultStatusByCall = buildToolResultStatusByCall(processEvents);
const runsWithProgressEvents = new Set<string>();
const acceptanceEvents: AcceptanceEventIdentity[] = [];
let hasResultEventCard = false;
@ -285,9 +397,12 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task
parentRunId: event.parent_run_id,
type,
title: titleForCard(type, event.actor_name),
summary: summaryForEvent(event),
summary: type === 'result' ? resultSummaryForEvent(task, event) : summaryForEvent(event),
actorName: event.actor_name,
status: event.status,
status:
type === 'tool_call'
? toolResultStatusByCall.get(toolCallKeyFromEvent(event) ?? '') ?? event.status
: event.status,
createdAt: event.created_at,
details: detailsForEvent(event),
});
@ -340,7 +455,7 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task
runId: lastItem(task.run_ids),
type: 'result',
title: titleForCard('result'),
summary: resultSummary(task),
summary: fallbackResultSummary(task),
status: task.status,
createdAt: task.closed_at ?? task.updated_at ?? task.created_at,
details: task.validation_result ?? undefined,
@ -366,8 +481,10 @@ export function buildTaskTimelineCards(input: BuildTaskTimelineCardsInput): Task
});
}
return cards
const sortedCards = cards
.map((card, index) => ({ card, index }))
.sort(compareCardsByCreatedAt)
.map(({ card }) => card);
return collapseHistoricalResults(task, sortedCards);
}

View File

@ -142,6 +142,12 @@ export interface ProviderConfigPayload {
request_timeout_seconds?: number;
}
export interface AgentConfigPayload {
max_tokens: number | null;
temperature: number;
max_tool_iterations: number;
}
export interface ChannelStatus {
name: string;
enabled: boolean;
@ -153,7 +159,7 @@ export interface SystemStatus {
workspace: string;
workspace_exists: boolean;
model: string;
max_tokens: number;
max_tokens: number | null;
max_context_messages?: number;
temperature: number;
max_tool_iterations: number;
@ -794,6 +800,7 @@ export type TaskTimelineCardType =
| 'artifact'
| 'error'
| 'result'
| 'result_history'
| 'acceptance';
export interface TaskTimelineCard {

View File

@ -0,0 +1,61 @@
#!/bin/bash
# Deploy initial skills to all runtime instances via docker cp
# Usage: ./scripts/deploy-initial-skills.sh
set -euo pipefail
SKILL_SOURCE="/home/ivan/xuan/beaver_project/skills"
DOCKER_NAMES=("app-instance-steven" "app-instance-benson" "app-instance-jayc" "app-instance-officebench")
SKILLS=(
"outlook-mail"
"filesystem-operation"
"terminal-operation"
"web-operation"
"utility-tools"
"skills-admin"
"cron-scheduler"
"memory-management"
)
for container in "${DOCKER_NAMES[@]}"; do
echo "==> Deploying to $container..."
docker exec "$container" mkdir -p /root/.beaver/workspace/skills/_index
for skill in "${SKILLS[@]}"; do
if [ -d "$SKILL_SOURCE/$skill" ]; then
docker cp "$SKILL_SOURCE/$skill" "$container":/root/.beaver/workspace/skills/
echo " + $skill"
fi
done
# Merge index: keep existing entries + add new skills, no duplicates
docker exec "$container" python3 -c "
import json
from pathlib import Path
idx = Path('/root/.beaver/workspace/skills/_index/published.json')
existing = json.loads(idx.read_text()) if idx.exists() else {'items': []}
new_skills = $(printf '["%s"]' "$(IFS=,; echo "${SKILLS[*]}")" | sed 's/,/", "/g')
seen = set(existing['items'])
for s in new_skills:
if s not in seen:
existing['items'].append(s)
seen.add(s)
idx.write_text(json.dumps(existing, ensure_ascii=False, indent=2) + '\n')
print(f\"Index updated: {len(existing['items'])} skills\")
"
docker cp "$SKILL_SOURCE/_index/disabled.json" "$container":/root/.beaver/workspace/skills/_index/disabled.json
echo " [done]"
done
echo ""
echo "Done! All skills deployed to all instances."
echo "Containers: ${DOCKER_NAMES[*]}"
echo "Skills: ${SKILLS[*]}"

View File

@ -0,0 +1,3 @@
{
"items": []
}

View File

@ -0,0 +1,13 @@
{
"items": [
"outlook-mail",
"filesystem-operation",
"terminal-operation",
"web-operation",
"utility-tools",
"skills-admin",
"cron-scheduler",
"memory-management",
"officebench-mcp"
]
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。",
"display_name": "cron-scheduler",
"lineage": [],
"name": "cron-scheduler",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["cron", "scheduler", "timer", "periodic"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,34 @@
---
name: cron-scheduler
description: 定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。
tools:
- cron
---
# Cron Scheduler — 定时任务调度
基于 cron 表达式的定时任务和一次性提醒。
## 工具说明
### cron
创建和管理 Beaver 定时通知或 Task。
- `action` (str): `add` | `list` | `remove` | `toggle` | `run`
- `message` (str): 触发时执行的任务说明,`add` 时必填
- `schedule` (str): 调度表达式,例如 `every 15m``0 9 * * *` 或 ISO 时间
- `every_seconds` (int | None): 固定秒级间隔
- `cron_expr` (str | None): 标准 5 段 cron 表达式
- `tz` (str | None): IANA 时区,例如 `Asia/Shanghai`
- `at_iso` (str | None): 一次性任务的 ISO 时间
- `job_id` (str | None): `remove``toggle``run` 目标任务 ID
- `enabled` (bool | None): `toggle` 时设置启停状态
- `mode` (str | None): `notification``task`
- `requires_followup` (bool | None): task 模式下是否需要用户跟进
## 使用原则
1. 避开 :00 和 :30 整点分钟,分散负载
2. 一次性提醒优先使用 `at_iso` 或清晰的 `schedule`
3. 需要持续提醒时使用 `mode="notification"`,需要 Task 跟踪时才用 `mode="task"`
4. 定期用 `action="list"` 确认任务是否按预期调度
5. 任务触发时 `message` 会完整执行,确保内容自包含

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for cron scheduling",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "定时任务和周期性调度。支持标准 cron 表达式、一次性提醒和持久化任务。",
"name": "cron-scheduler",
"tools": ["cron"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "cron-scheduler",
"summary": "Cron Scheduler — 基于 cron 表达式的定时任务和一次性提醒",
"summary_hash": "placeholder",
"tool_hints": ["cron"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。",
"display_name": "filesystem-operation",
"lineage": [],
"name": "filesystem-operation",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["filesystem", "file", "io", "directory"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,50 @@
---
name: filesystem-operation
description: 本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。
tools:
- read_file
- write_file
- patch_file
- search_files
- list_directory
---
# Filesystem Operation — 文件系统操作
本地文件系统工具集,用于读写和搜索项目文件。
## 工具说明
### read_file
读取本地文件内容。
- 使用 `skill_view` 查看文件预览
- 大文件会分页返回,可通过 offset/limit 控制
### write_file
写入新文件或覆盖已有文件。
- 创建新文件时自动创建父目录
- 写入前确认不会覆盖重要配置
### patch_file
精确修改文件中的指定内容。
- 通过搜索-替换方式修改
- 适用于局部更新,避免整文件重写
### search_files
在项目中搜索文件名或内容。
- 支持 glob 模式匹配
- 支持按内容搜索
- 支持限制搜索目录深度
### list_directory
列出目录内容。
- 可递归列出子目录
- 支持过滤文件类型
## 使用原则
1. 优先使用 `read_file` 查看文件内容,再决定修改方案
2. 小范围修改用 `patch_file`,大范围用 `write_file`
3. 搜索文件时先确认路径是否存在
4. 修改前确认文件编码(默认 UTF-8
5. 敏感文件(.env、密钥等不写入版本控制

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for local filesystem operations",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "本地文件系统读写、搜索和目录操作。支持读取、写入、修改、搜索文件和目录遍历。",
"name": "filesystem-operation",
"tools": ["read_file", "write_file", "patch_file", "search_files", "list_directory"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "filesystem-operation",
"summary": "Filesystem Operation — 本地文件系统操作工具集",
"summary_hash": "placeholder",
"tool_hints": ["read_file", "write_file", "patch_file", "search_files", "list_directory"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。",
"display_name": "memory-management",
"lineage": [],
"name": "memory-management",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["memory", "persistence", "context", "preferences"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,32 @@
---
name: memory-management
description: 持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。
tools:
- memory
---
# Memory Management — 记忆管理
持久化记忆系统,保存用户角色、项目上下文、偏好反馈等跨会话信息。
## 工具说明
### memory
管理记忆条目(增删改查)。
- `action` (str): `add` | `replace` | `remove`
- `target` (str): `user``memory`
- `content` (str | None): `add``replace` 时的新内容
- `old_text` (str | None): `replace``remove` 时定位旧条目的唯一短文本
- 记忆目标:
- `user`: 用户角色、职责、知识背景、稳定偏好
- `memory`: 项目约定、环境事实、稳定工具经验
- 支持自动保存和检索
- 跨会话持久化
## 使用原则
1. 了解用户角色偏好后及时保存到 `user` 类型
2. 用户明确要求记住的内容立即保存
3. 过时的记忆及时更新或删除
4. 不保存可以从代码/git 推导出的信息
5. 记忆是辅助参考,当前上下文和文件状态优先级更高

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for memory management",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "持久化记忆管理。存储用户信息、项目上下文、偏好和反馈,实现跨会话记忆。",
"name": "memory-management",
"tools": ["memory"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "memory-management",
"summary": "Memory Management — 持久化记忆系统,支持跨会话信息存储",
"summary_hash": "placeholder",
"tool_hints": ["memory"],
"version": "v0001"
}

View File

@ -0,0 +1,4 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,21 @@
{
"created_at": "2026-05-27T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files.",
"display_name": "officebench-mcp",
"lineage": [],
"name": "officebench-mcp",
"owners": [
"system"
],
"source_kind": "workspace",
"status": "active",
"tags": [
"officebench",
"mcp",
"evaluation",
"office"
],
"updated_at": "2026-05-27T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,190 @@
---
name: officebench-mcp
description: Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files.
always: true
tools:
- mcp_officebench_excel_read_file
- mcp_officebench_excel_set_cell
- mcp_officebench_excel_delete_cell
- mcp_officebench_excel_create_new_file
- mcp_officebench_excel_convert_to_pdf
- mcp_officebench_word_read_file
- mcp_officebench_word_write_to_file
- mcp_officebench_word_create_new_file
- mcp_officebench_word_convert_to_pdf
- mcp_officebench_email_list_emails
- mcp_officebench_email_read_email
- mcp_officebench_email_send_email
- mcp_officebench_calendar_create_event
- mcp_officebench_calendar_list_events
- mcp_officebench_calendar_delete_event
- mcp_officebench_pdf_read_file
- mcp_officebench_pdf_convert_to_word
- mcp_officebench_pdf_convert_to_image
- mcp_officebench_ocr_recognize_file
- mcp_officebench_shell_command
- mcp_officebench_shell_list_directory
- mcp_officebench_shell_read_file
- mcp_officebench_shell_write_file
- mcp_officebench_shell_copy_file
- mcp_officebench_system_finish_task
- mcp_officebench_system_get_status
- mcp_officebench_image_convert_to_pdf
---
# OfficeBench MCP Skill
Use this skill for OfficeBench evaluation runs. OfficeBench task files live in the OfficeBench MCP server, not in Beaver's local filesystem. Complete the task by calling real `mcp_officebench_*` tools.
## Critical Rules
1. Use actual Beaver tool calls only. Do not print XML, DSML, JSON, or markdown that describes a tool call.
2. Never invent tool names. If you need to find files, use `mcp_officebench_shell_list_directory` or `mcp_officebench_shell_command`.
3. Do not use Beaver local filesystem, local runtime, local terminal, or local code tools for OfficeBench files.
4. Paths are relative to `/testbed` in the OfficeBench MCP container, such as `data/score.xlsx`.
5. If the task context gives a `workspace_id`, pass that same `workspace_id` argument in every OfficeBench MCP tool call that supports it.
6. Inspect files before editing them.
7. Verify the requested output file or edited cell exists before finishing.
8. Finish every task with `mcp_officebench_system_finish_task`.
## Tool Names And Use
### Excel
Use these for `.xlsx` files:
- `mcp_officebench_excel_read_file`: read workbook sheets and cell values.
- Required: `file_path`
- Optional: `sheet_name`, `workspace_id`
- `mcp_officebench_excel_set_cell`: write one cell.
- Required: `file_path`, `row`, `col`, `value`
- Optional: `sheet_name`, `workspace_id`
- Rows and columns are 1-based.
- `mcp_officebench_excel_delete_cell`: clear one cell.
- Required: `file_path`, `row`, `col`
- Optional: `sheet_name`, `workspace_id`
- `mcp_officebench_excel_create_new_file`: create a workbook.
- Required: `file_path`
- Optional: `workspace_id`
- `mcp_officebench_excel_convert_to_pdf`: convert an Excel file to PDF.
- Required: `file_path`
- Optional: `workspace_id`
Typical Excel sequence:
1. Call `mcp_officebench_shell_list_directory` on `data`.
2. Call `mcp_officebench_excel_read_file` on the target workbook.
3. Identify the exact row and column.
4. Call `mcp_officebench_excel_set_cell`.
5. Read the workbook again or use status/listing to verify.
6. Call `mcp_officebench_system_finish_task`.
For the common task "change Bob's midterm1 score to 100 in score.xlsx", inspect `data/score.xlsx`, find Bob's row and the `midterm1` column, then call `mcp_officebench_excel_set_cell` with that row, that column, and value `100`.
### Word
Use these for `.docx` files:
- `mcp_officebench_word_read_file`: read all paragraphs.
- Required: `file_path`
- Optional: `workspace_id`
- `mcp_officebench_word_write_to_file`: overwrite or append text.
- Required: `file_path`, `text`
- Optional: `append`, `workspace_id`
- `mcp_officebench_word_create_new_file`: create a new Word document.
- Required: `file_path`
- Optional: `workspace_id`
- `mcp_officebench_word_convert_to_pdf`: convert Word to PDF.
- Required: `file_path`
- Optional: `workspace_id`
Preserve exact spelling, capitalization, punctuation, and line order from source files.
### Email
Use these for email tasks:
- `mcp_officebench_email_list_emails`: list available `.eml` messages.
- Optional: `folder`, `workspace_id`
- `mcp_officebench_email_read_email`: read one email.
- Required: `email_path`
- Optional: `workspace_id`
- `mcp_officebench_email_send_email`: create/send an email artifact.
- Required: `to`, `subject`, `body`
- Optional: `attachments`, `workspace_id`
For email-search tasks, final answers should use plain text with literal lines like `Subject: ...`. Do not add markdown labels.
### Calendar
Use these for calendar `.ics` tasks:
- `mcp_officebench_calendar_list_events`: inspect calendar events.
- Optional: `calendar_path`, `workspace_id`
- `mcp_officebench_calendar_create_event`: create an event.
- Required fields depend on the task; include summary/title, start, end, and target calendar when needed.
- Optional: `workspace_id`
- `mcp_officebench_calendar_delete_event`: delete an event.
- Required fields depend on the task; inspect events first.
- Optional: `workspace_id`
Use the task's current date/time context when interpreting relative dates.
### PDF, OCR, And Images
Use these for PDF/image tasks:
- `mcp_officebench_pdf_read_file`: extract text from a PDF.
- Required: `pdf_file_path`
- Optional: `workspace_id`
- `mcp_officebench_pdf_convert_to_word`: convert PDF to Word.
- Required: `pdf_file_path`
- Optional: `workspace_id`
- `mcp_officebench_pdf_convert_to_image`: convert one PDF page to an image.
- Required: `pdf_file_path`
- Optional: `page_number`, `dpi`, `workspace_id`
- `mcp_officebench_ocr_recognize_file`: OCR an image.
- Required: `image_path`
- Optional: `language`, `workspace_id`
- `mcp_officebench_image_convert_to_pdf`: convert image to PDF.
- Required: `image_path`
- Optional: `output_path`, `workspace_id`
For conversion tasks, create the exact requested filename and verify it exists.
### Shell And System
Use these for safe file discovery and text files:
- `mcp_officebench_shell_list_directory`: list a directory.
- Optional: `path`, `workspace_id`
- `mcp_officebench_shell_read_file`: read text files such as `.txt`, `.csv`, `.json`, `.md`, `.xml`.
- Required: `file_path`
- Optional: `workspace_id`
- `mcp_officebench_shell_write_file`: write text files.
- Required: `file_path`, `content`
- Optional: `append`, `workspace_id`
- `mcp_officebench_shell_copy_file`: copy a file or directory.
- Required: `source`, `destination`
- Optional: `workspace_id`
- `mcp_officebench_shell_command`: run shell commands inside the OfficeBench MCP container.
- Required: `command`
- Optional: `workdir`, `workspace_id`
- `mcp_officebench_system_get_status`: inspect filesystem/git status.
- Optional: `workspace_id`
- `mcp_officebench_system_finish_task`: mark the task complete and optionally write an answer.
- Optional: `answer`, `workspace_id`
Prefer dedicated Office tools for Office documents. Use shell tools for listing directories, copying/renaming files, and reading/writing plain text.
## Anti-Patterns
Do not do any of the following:
- Do not call `mcp_officebench_find_in_workspace`; that tool does not exist.
- Do not output `<tool_calls>`, `<invoke>`, DSML, or pseudo tool call text.
- Do not answer "done" without calling the required OfficeBench tools.
- Do not edit guessed paths without first listing or reading relevant files.
- Do not use `/testbed` as a literal prefix in path arguments unless a tool explicitly asks for an absolute path.
- Do not correct misspellings found in source data. Preserve source text exactly.

View File

@ -0,0 +1,80 @@
{
"change_reason": "Initial OfficeBench MCP skill for evaluation runs",
"content_hash": "6afdd5a93ce552f39c1e285fc552059cfada7971e0d5bb91bcd56c6ca608ba17",
"created_at": "2026-05-27T00:00:00.000000+00:00",
"created_by": "codex",
"frontmatter": {
"always": true,
"description": "Guidance for OfficeBench evaluation tasks. Use the registered mcp_officebench_* tools to inspect and edit OfficeBench files, spreadsheets, documents, emails, calendars, PDFs, and answer files.",
"name": "officebench-mcp",
"tools": [
"mcp_officebench_excel_read_file",
"mcp_officebench_excel_set_cell",
"mcp_officebench_excel_delete_cell",
"mcp_officebench_excel_create_new_file",
"mcp_officebench_excel_convert_to_pdf",
"mcp_officebench_word_read_file",
"mcp_officebench_word_write_to_file",
"mcp_officebench_word_create_new_file",
"mcp_officebench_word_convert_to_pdf",
"mcp_officebench_email_list_emails",
"mcp_officebench_email_read_email",
"mcp_officebench_email_send_email",
"mcp_officebench_calendar_create_event",
"mcp_officebench_calendar_list_events",
"mcp_officebench_calendar_delete_event",
"mcp_officebench_pdf_read_file",
"mcp_officebench_pdf_convert_to_word",
"mcp_officebench_pdf_convert_to_image",
"mcp_officebench_ocr_recognize_file",
"mcp_officebench_shell_command",
"mcp_officebench_shell_list_directory",
"mcp_officebench_shell_read_file",
"mcp_officebench_shell_write_file",
"mcp_officebench_shell_copy_file",
"mcp_officebench_system_finish_task",
"mcp_officebench_system_get_status",
"mcp_officebench_image_convert_to_pdf"
]
},
"parent_version": null,
"provenance": {
"source": "officebench_mcp",
"source_kind": "workspace"
},
"review_state": "published",
"skill_name": "officebench-mcp",
"summary": "OfficeBench MCP skill for using registered mcp_officebench_* tools correctly during evaluation runs.",
"summary_hash": "914d6759650fce29884f648b84929e0482475c3ccd6601e9903c9b8b826dd874",
"tool_hints": [
"mcp_officebench_excel_read_file",
"mcp_officebench_excel_set_cell",
"mcp_officebench_excel_delete_cell",
"mcp_officebench_excel_create_new_file",
"mcp_officebench_excel_convert_to_pdf",
"mcp_officebench_word_read_file",
"mcp_officebench_word_write_to_file",
"mcp_officebench_word_create_new_file",
"mcp_officebench_word_convert_to_pdf",
"mcp_officebench_email_list_emails",
"mcp_officebench_email_read_email",
"mcp_officebench_email_send_email",
"mcp_officebench_calendar_create_event",
"mcp_officebench_calendar_list_events",
"mcp_officebench_calendar_delete_event",
"mcp_officebench_pdf_read_file",
"mcp_officebench_pdf_convert_to_word",
"mcp_officebench_pdf_convert_to_image",
"mcp_officebench_ocr_recognize_file",
"mcp_officebench_shell_command",
"mcp_officebench_shell_list_directory",
"mcp_officebench_shell_read_file",
"mcp_officebench_shell_write_file",
"mcp_officebench_shell_copy_file",
"mcp_officebench_system_finish_task",
"mcp_officebench_system_get_status",
"mcp_officebench_image_convert_to_pdf"
],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。",
"display_name": "outlook-mail",
"lineage": [],
"name": "outlook-mail",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["outlook", "email", "calendar", "mcp", "microsoft"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,150 @@
---
name: outlook-mail
description: 通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。
tools:
- mcp_outlook_mcp_mail_list_folders
- mcp_outlook_mcp_mail_list_messages
- mcp_outlook_mcp_mail_search_messages
- mcp_outlook_mcp_mail_get_message
- mcp_outlook_mcp_mail_send_email
- mcp_outlook_mcp_mail_reply_to_message
- mcp_outlook_mcp_mail_forward_message
- mcp_outlook_mcp_mail_move_message
- mcp_outlook_mcp_mail_delta_sync
- mcp_outlook_mcp_calendar_list_events
- mcp_outlook_mcp_calendar_create_event
- mcp_outlook_mcp_calendar_update_event
- mcp_outlook_mcp_calendar_get_schedule
- mcp_outlook_mcp_calendar_find_meeting_times
- mcp_outlook_mcp_calendar_delta_sync
---
# Outlook MCP — 邮件与日历管理
通过 MCP server 连接 OutlookMicrosoft Graph / on-prem Exchange提供邮件和日历的完整操作能力。
## 邮件工具
### mcp_outlook_mcp_mail_list_folders
列出 Outlook 邮件文件夹。
- `top` (int, 默认 50): 返回数量上限
### mcp_outlook_mcp_mail_list_messages
列出指定文件夹的邮件。
- `folder` (str, 默认 "inbox"): 文件夹名
- `top` (int, 默认 20): 返回条数
- `skip` (int, 默认 0): 跳过的条数
- `unread_only` (bool, 默认 false): 仅未读
### mcp_outlook_mcp_mail_search_messages
搜索邮件(使用 Graph search 语义)。
- `query` (str): 搜索关键词
- `folder` (str | None): 限定文件夹
- `top` (int, 默认 20): 返回条数
### mcp_outlook_mcp_mail_get_message
读取单封邮件的完整内容。
- `message_id` (str): 邮件 ID
- `changekey` (str | None): EWS changekeyon-prem 需要)
### mcp_outlook_mcp_mail_send_email
发送新邮件。**幂等操作**,支持 idempotency_key。
- `subject` (str): 主题
- `body` (str): 正文(支持 HTML
- `to_recipients` (list[str]): 收件人
- `cc_recipients` (list[str] | None): 抄送
- `bcc_recipients` (list[str] | None): 密送
- `idempotency_key` (str | None): 幂等键,防止重复发送
### mcp_outlook_mcp_mail_reply_to_message
回复一封邮件。
- `message_id` (str): 原邮件 ID
- `comment` (str): 回复内容
- `changekey` (str | None): EWS changekey
- `idempotency_key` (str | None)
### mcp_outlook_mcp_mail_forward_message
转发邮件给其他人。
- `message_id` (str): 原邮件 ID
- `to_recipients` (list[str]): 转发目标
- `comment` (str): 附加说明
- `cc_recipients` (list[str] | None)
- `changekey` (str | None)
- `idempotency_key` (str | None)
### mcp_outlook_mcp_mail_move_message
移动邮件到其他文件夹。
- `message_id` (str): 邮件 ID
- `destination_folder` (str): 目标文件夹
- `changekey` (str | None)
- `idempotency_key` (str | None)
### mcp_outlook_mcp_mail_delta_sync
增量同步邮件变更。支持游标持久化,适合长期同步场景。
- `folder` (str, 默认 "inbox"): 文件夹
- `delta_link` (str | None): 增量链接(续传时提供)
- `top` (int, 默认 50)
- `persist_cursor` (bool, 默认 true): 是否持久化游标
## 日历工具
### mcp_outlook_mcp_calendar_list_events
列出日历事件或日历视图。
- `start_time` (str | None): ISO 开始时间,与 end_time 成对提供
- `end_time` (str | None): ISO 结束时间
- `top` (int, 默认 20)
- `skip` (int, 默认 0)
### mcp_outlook_mcp_calendar_create_event
创建日历事件或正式会议邀请。**幂等操作**。
- `subject` (str): 主题
- `start_time` (str): ISO 开始时间
- `end_time` (str): ISO 结束时间
- `timezone` (str, 默认 "UTC"): 时区
- `body` (str | None): 正文
- `location` (str | None): 地点
- `attendees` (list[str] | None): 参会人
- `is_online_meeting` (bool, 默认 false): 是否创建 Teams 会议
- `online_meeting_provider` (str, 默认 "teamsForBusiness"): 在线会议提供商
- `transaction_id` (str | None): 事务 ID
- `idempotency_key` (str | None)
### mcp_outlook_mcp_calendar_update_event
更新已有日历事件。
- `event_id` (str): 事件 ID
- `subject` / `start_time` / `end_time` / `timezone` / `body` / `location` / `attendees`: 可选更新字段
- `idempotency_key` (str | None)
### mcp_outlook_mcp_calendar_get_schedule
查询与会人忙闲状态。
- `schedules` (list[str]): 要查询的人员列表
- `start_time` (str): ISO 开始
- `end_time` (str): ISO 结束
- `availability_view_interval` (int, 默认 30): 时间间隔(分钟)
- `timezone` (str, 默认 "UTC")
### mcp_outlook_mcp_calendar_find_meeting_times
推荐最佳会议时间。
- `attendees` (list[str]): 参会人
- `start_time` (str): 时间范围开始
- `end_time` (str): 时间范围结束
- `duration_minutes` (int, 默认 30): 会议时长
- `timezone` (str, 默认 "UTC")
- `max_candidates` (int, 默认 10): 候选数
### mcp_outlook_mcp_calendar_delta_sync
增量同步日历事件变更。
- `start_time` (str): 同步窗口开始
- `end_time` (str): 同步窗口结束
- `delta_link` (str | None): 增量续传链接
- `top` (int, 默认 50)
- `persist_cursor` (bool, 默认 true)
- `cursor_key` (str, 默认 "calendar:primary")
## 使用原则
1. 邮件操作优先使用幂等键idempotency_key防止重复发送
2. 日历时间参数统一使用 ISO 8601 格式
3. 增量同步时优先使用返回的 delta_link 续传,避免全量拉取
4. 发送邮件前确认收件人地址格式正确
5. 创建会议时明确时区,避免跨时区混淆

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for Outlook MCP mail and calendar operations",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "通过 Outlook MCP 进行邮件收发、日历管理和会议安排。支持 Graph API 和 on-prem Exchange。",
"name": "outlook-mail",
"tools": ["mcp_outlook_mcp_mail_list_folders", "mcp_outlook_mcp_mail_list_messages", "mcp_outlook_mcp_mail_search_messages", "mcp_outlook_mcp_mail_get_message", "mcp_outlook_mcp_mail_send_email", "mcp_outlook_mcp_mail_reply_to_message", "mcp_outlook_mcp_mail_forward_message", "mcp_outlook_mcp_mail_move_message", "mcp_outlook_mcp_mail_delta_sync", "mcp_outlook_mcp_calendar_list_events", "mcp_outlook_mcp_calendar_create_event", "mcp_outlook_mcp_calendar_update_event", "mcp_outlook_mcp_calendar_get_schedule", "mcp_outlook_mcp_calendar_find_meeting_times", "mcp_outlook_mcp_calendar_delta_sync"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "outlook-mail",
"summary": "Outlook MCP — 邮件与日历管理。通过 MCP server 连接 Outlook提供邮件和日历的完整操作能力。",
"summary_hash": "placeholder",
"tool_hints": ["mcp_outlook_mcp_mail_list_folders", "mcp_outlook_mcp_mail_list_messages", "mcp_outlook_mcp_mail_search_messages", "mcp_outlook_mcp_mail_get_message", "mcp_outlook_mcp_mail_send_email", "mcp_outlook_mcp_mail_reply_to_message", "mcp_outlook_mcp_mail_forward_message", "mcp_outlook_mcp_mail_move_message", "mcp_outlook_mcp_mail_delta_sync", "mcp_outlook_mcp_calendar_list_events", "mcp_outlook_mcp_calendar_create_event", "mcp_outlook_mcp_calendar_update_event", "mcp_outlook_mcp_calendar_get_schedule", "mcp_outlook_mcp_calendar_find_meeting_times", "mcp_outlook_mcp_calendar_delta_sync"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "技能Skill列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。",
"display_name": "skills-admin",
"lineage": [],
"name": "skills-admin",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["skills", "admin", "management", "draft"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,42 @@
---
name: skills-admin
description: 技能Skill列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。
tools:
- skills_list
- skill_manage
- skill_view
---
# Skills Admin — 技能管理
查看已发布的技能列表、加载技能详情和创建新技能草稿。
## 工具说明
### skills_list
列出系统中所有可用技能及其描述。
- 返回技能名称、描述和版本
- 用于浏览当前可用的技能
### skill_view
加载某个技能的完整正文或支持文件。
- `name` (str): 技能名称
- `file_path` (str | None): 可选的支持文件路径
- 不传文件路径时返回 SKILL.md 主内容
- 支持按需加载 references/、templates/ 等目录
### skill_manage
创建新技能草稿draft
- `action` (str): 仅支持 "create_draft"
- `name` (str): 技能名称
- `description` (str): 技能描述
- `content` (str): 技能正文SKILL.md 格式)
- 创建的草稿需经过 review → publish 流程
## 使用原则
1. 需要参考某个技能的详细内容时,先 `skills_list` 找到名称,再用 `skill_view` 加载
2. 创建新技能时先写清楚 description便于后续被 selector 选中
3. 技能正文使用标准 frontmatter + Markdown 格式
4. 支持文件放在 skill 目录的 references/、templates/、scripts/ 等子目录
5. Draft 创建后需要走 review/publish 流程才能生效

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for skills management",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "技能Skill列表查看、内容加载和草稿管理。用于浏览已发布技能和创建新技能草稿。",
"name": "skills-admin",
"tools": ["skills_list", "skill_manage", "skill_view"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "skills-admin",
"summary": "Skills Admin — 技能列表查看、内容加载和草稿管理",
"summary_hash": "placeholder",
"tool_hints": ["skills_list", "skill_manage", "skill_view"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。",
"display_name": "terminal-operation",
"lineage": [],
"name": "terminal-operation",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["terminal", "shell", "command", "process", "execution"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,46 @@
---
name: terminal-operation
description: Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。
tools:
- terminal
- process
- execute_code
---
# Terminal Operation — 终端与进程管理
Shell 命令执行、后台进程管理和 Python 代码执行工具集。
## 工具说明
### terminal
执行 shell 命令。
- `command` (str): 要执行的命令
- `working_dir` (str, 默认 "."): 工作目录
- `timeout` (int, 默认 60): 超时秒数(最大 600
- `background` (bool, 默认 false): 是否后台运行
- 后台运行时返回 process_id可通过 process 工具管理
### process
管理后台进程。
- `action` (str): `list` | `log` | `kill`
- `process_id` (str | None): 进程 ID
- `list`: 列出所有后台进程
- `log`: 查看进程日志(最后 12000 字节)
- `kill`: 终止进程(先 SIGTERM5 秒后 SIGKILL
### execute_code
执行 Python 代码片段。
- `code` (str): Python 代码
- `language` (str, 默认 "python"): 仅支持 python
- `timeout` (int, 默认 30, 最大 120): 执行超时
- `working_dir` (str, 默认 "."): 工作目录
- 适合快速验证脚本逻辑,不适合长期运行任务
## 使用原则
1. 长期运行任务使用 `background=true`
2. 执行危险命令rm -rf、dd、格式化等前必须确认用户意图
3. `execute_code` 适合轻量脚本验证,重型任务用 `terminal`
4. 后台进程用完后及时 kill 清理
5. 注意命令注入风险,不要直接拼接用户输入

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for terminal and process management",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "Shell 命令执行、后台进程管理和 Python 代码执行。支持超时控制和后台运行。",
"name": "terminal-operation",
"tools": ["terminal", "process", "execute_code"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "terminal-operation",
"summary": "Terminal Operation — Shell 命令执行、后台进程管理、Python 代码执行",
"summary_hash": "placeholder",
"tool_hints": ["terminal", "process", "execute_code"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "辅助工具集包括任务分解Todo、任务委托Delegate、子 Agent 生成Spawn、消息发送和需求澄清Clarify。",
"display_name": "utility-tools",
"lineage": [],
"name": "utility-tools",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["utility", "delegate", "todo", "spawn", "clarify"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,52 @@
---
name: utility-tools
description: 辅助工具集包括任务分解Todo、任务委托Delegate、子 Agent 生成Spawn、消息发送和需求澄清。
tools:
- clarify
- delegate
- send_message
- spawn
- todo
---
# Utility Tools — 辅助工具集
任务管理、委托和协作的辅助工具。
## 工具说明
### todo (TodoWrite)
创建和管理任务列表,跟踪复杂任务的进度。
- 适合多步骤、复杂任务时使用
- 标记当前正在进行的任务
- 完成后立即更新状态
### delegate (DelegateTool)
将任务委托给专门的子 Agent 执行。
- 适合独立、可并行的工作
- 委托时提供清晰的上下文和目标
- 子 Agent 完成后再整合结果
### spawn (SpawnTool)
启动新的 Agent 实例执行特定任务。
- 适合需要独立运行的工作
- 支持后台运行(不阻塞主流程)
### send_message (SendMessageTool)
与其他 Agent 或团队成员通信。
- 适合多 Agent 协作场景
- 消息会直接送达目标
### clarify (ClarifyTool)
当需求不明确时向用户提问澄清。
- 提供 2-4 个选项供用户选择
- 附带推荐选项和理由
- 避免模糊提问,给出明确建议
## 使用原则
1. 复杂任务先创建 Todo 列表,明确步骤
2. 可并行的工作使用 Delegate/Spawn 分散执行
3. 需求不明确时主动 Clarify不要猜测
4. 多 Agent 协作时保持通信简洁
5. 记得到 todo list 更新进度

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for utility and delegation tools",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "辅助工具集包括任务分解Todo、任务委托Delegate、子 Agent 生成Spawn、消息发送和需求澄清。",
"name": "utility-tools",
"tools": ["clarify", "delegate", "send_message", "spawn", "todo"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "utility-tools",
"summary": "Utility Tools — 任务管理、委托和协作辅助工具集",
"summary_hash": "placeholder",
"tool_hints": ["clarify", "delegate", "send_message", "spawn", "todo"],
"version": "v0001"
}

View File

@ -0,0 +1,3 @@
{
"current_version": "v0001"
}

View File

@ -0,0 +1,13 @@
{
"created_at": "2026-05-26T00:00:00.000000+00:00",
"current_version": "v0001",
"description": "网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。",
"display_name": "web-operation",
"lineage": [],
"name": "web-operation",
"owners": ["system"],
"source_kind": "initial",
"status": "active",
"tags": ["web", "search", "fetch", "crawl"],
"updated_at": "2026-05-26T00:00:00.000000+00:00"
}

View File

@ -0,0 +1,36 @@
---
name: web-operation
description: 网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。
tools:
- web_fetch
- web_search
---
# Web Operation — 网络抓取与搜索
网页抓取和网络搜索工具集。
## 工具说明
### web_fetch
获取指定 URL 的网页内容并转换为 Markdown。
- 支持 HTML → Markdown 自动转换
- 可使用 prompt 参数提取特定信息
- 结果由 AI 总结后返回
- HTTP URL 自动升级为 HTTPS
- 含 15 分钟缓存
### web_search
搜索引擎查询,获取最新网络信息。
- 支持 domain 过滤include/block
- 搜索当前日期的信息使用正确年份
- 返回结果包含 URL 链接
## 使用原则
1. 优先使用 `web_search` 搜索信息,再用 `web_fetch` 深入阅读
2. 获取动态/需要认证的页面可能失败,此时尝试简化请求或换源
3. 抓取 API 文档时注意区分 REST API 和 GraphQL
4. 搜索结果必须标注来源链接
5. 避免短时间内大量请求同一站点(限频)
6. 不抓取需要登录认证的私密页面

View File

@ -0,0 +1,22 @@
{
"change_reason": "Initial skill for web fetching and searching",
"content_hash": "placeholder",
"created_at": "2026-05-26T00:00:00.000000+00:00",
"created_by": "system",
"frontmatter": {
"description": "网页内容抓取和搜索引擎查询。支持任意 URL 抓取、多搜索引擎和结构化数据提取。",
"name": "web-operation",
"tools": ["web_fetch", "web_search"]
},
"parent_version": null,
"provenance": {
"source": "initial_skills",
"source_kind": "initial"
},
"review_state": "published",
"skill_name": "web-operation",
"summary": "Web Operation — 网页抓取与网络搜索工具集",
"summary_hash": "placeholder",
"tool_hints": ["web_fetch", "web_search"],
"version": "v0001"
}