feat: 将项目从nano重命名为beaver并更新相关配置

- 将所有环境变量前缀从NANO_改为BEAVER_
- 更新README.md文档内容,包括项目介绍、组件说明和快速开始指南
- 修改.gitignore文件,添加auth-portal运行时路径排除规则
- 更新app-instance镜像标签从nano/app-instance改为beaver/app-instance
- 增强技能安全检查器,支持工具前缀白名单功能
- 添加技能草稿重新检查安全性API端点
- 扩展证据选择器,收集工具调用名称用于技能学习
- 改进技能合成器,基于实际调用的工具生成工具提示
- 优化路由超时处理机制,增加重试逻辑
- 更新后端架构文档,添加可视化入口和基础概念说明
- 实现在WebSocket消息中传递工具迭代次数信息
This commit is contained in:
2026-05-20 18:01:06 +08:00
parent 3b0af173cc
commit 9d6cde2d23
63 changed files with 4894 additions and 1596 deletions

View File

@ -45,14 +45,14 @@ runtime/registry/instances.json
### 1. 构建镜像
```bash
docker build -t nano/app-instance:latest .
docker build -t beaver/app-instance:latest .
```
### 2. 创建实例
```bash
./create-instance.sh \
--image nano/app-instance:latest \
--image beaver/app-instance:latest \
--instance-id demo-001 \
--auth-username admin \
--auth-password 123456 \

View File

@ -260,7 +260,12 @@ class EngineLoader:
review_service=review_service,
publisher=skill_publisher,
safety_checker=SkillDraftSafetyChecker(
allowed_tool_names={spec.name for spec in tool_registry.list_specs()}
allowed_tool_names={spec.name for spec in tool_registry.list_specs()},
allowed_tool_prefixes={
f"mcp_{server_id}_"
for server_id in self.config.tools.mcp_servers
if str(server_id).strip()
},
),
evaluator=SkillDraftEvaluator(run_memory_store),
)

View File

@ -1437,6 +1437,15 @@ def create_app(
raise HTTPException(status_code=404, detail="Safety report not found")
return report.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/safety")
async def recheck_skill_draft_safety(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
report = loaded.skill_learning_pipeline.check_safety(skill_name, draft_id) # type: ignore[union-attr]
except ValueError as exc:
raise _skill_draft_http_error(exc) from exc
return report.to_dict()
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/eval")
async def get_skill_draft_eval(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
@ -1831,6 +1840,7 @@ def create_app(
"model": _clean_text(payload.get("model")) or None,
"provider_name": _clean_text(payload.get("provider_name")) or None,
"embedding_model": _clean_text(payload.get("embedding_model")) or None,
"max_tool_iterations": _int_or_none(payload.get("max_tool_iterations")),
}
websocket_thinking_enabled = _bool_or_none(payload.get("thinking_enabled"))
if websocket_thinking_enabled is not None:
@ -1844,6 +1854,7 @@ def create_app(
"content": f"Run failed before completion: {exc}",
"session_id": session_id,
"finish_reason": "error",
"tool_iterations": 0,
"metadata": {
"error": str(exc),
"input_metadata": _websocket_input_metadata(payload),
@ -2403,6 +2414,15 @@ def _bool_or_none(value: Any) -> bool | None:
return None
def _int_or_none(value: Any) -> int | None:
if value in (None, ""):
return None
try:
return int(value)
except (TypeError, ValueError):
return None
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
validation_result = getattr(result, "validation_result", None)
task_id = getattr(result, "task_id", None)
@ -2414,6 +2434,7 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
"session_id": getattr(result, "session_id", None),
"run_id": getattr(result, "run_id", None),
"finish_reason": getattr(result, "finish_reason", None),
"tool_iterations": getattr(result, "tool_iterations", 0),
"provider_name": getattr(result, "provider_name", None),
"model": getattr(result, "model", None),
"usage": dict(getattr(result, "usage", {}) or {}),

View File

@ -42,6 +42,8 @@ class EvidenceSelector:
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
task_summaries: list[str] = []
session_excerpts: list[str] = []
tool_names: list[str] = []
selected_tool_names: list[str] = []
for run_id in run_ids:
record = runs_by_id.get(run_id)
if record is None:
@ -56,12 +58,19 @@ class EvidenceSelector:
excerpt = self._session_excerpt(record.session_id, run_id)
if excerpt:
session_excerpts.append(excerpt)
run_tool_names, run_selected_tool_names = self._run_tool_names(record.session_id, run_id)
tool_names.extend(run_tool_names)
selected_tool_names.extend(run_selected_tool_names)
return EvidencePacket(
run_ids=resolved_run_ids,
session_ids=resolved_session_ids,
task_summaries=task_summaries[:8],
session_excerpts=session_excerpts[:6],
metadata={"bounded": True},
metadata={
"bounded": True,
"tool_names": _unique_strings(tool_names),
"selected_tool_names": _unique_strings(selected_tool_names),
},
)
def _session_excerpt(self, session_id: str, run_id: str) -> str:
@ -74,3 +83,37 @@ class EvidenceSelector:
continue
visible.append(f"{event.role}: {event.content.strip()}")
return "\n".join(visible[:12])[:2000]
def _run_tool_names(self, session_id: str, run_id: str) -> tuple[list[str], list[str]]:
if self.session_manager is None:
return [], []
names: list[str] = []
selected_names: list[str] = []
for event in self.session_manager.get_run_event_records(session_id, run_id):
if event.tool_name:
names.append(event.tool_name)
if event.tool_calls:
for call in event.tool_calls:
if not isinstance(call, dict):
continue
name = call.get("name")
function = call.get("function")
if not name and isinstance(function, dict):
name = function.get("name")
if name:
names.append(str(name))
if event.event_type == "tool_selection_snapshotted" and isinstance(event.event_payload, dict):
selected = event.event_payload.get("tool_names")
if isinstance(selected, list):
selected_names.extend(str(item) for item in selected if str(item).strip())
return _unique_strings(names), _unique_strings(selected_names)
def _unique_strings(values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
cleaned = str(value).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result

View File

@ -32,8 +32,14 @@ class SkillDraftSafetyChecker:
"credentials",
}
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
def __init__(
self,
*,
allowed_tool_names: set[str] | None = None,
allowed_tool_prefixes: set[str] | None = None,
) -> None:
self.allowed_tool_names = allowed_tool_names
self.allowed_tool_prefixes = allowed_tool_prefixes or set()
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
issues: list[str] = []
@ -50,7 +56,7 @@ class SkillDraftSafetyChecker:
tool_hints = _tool_hints(frontmatter)
if self.allowed_tool_names is not None:
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
unknown = [name for name in tool_hints if not self._is_allowed_tool_hint(name)]
if unknown:
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
@ -80,6 +86,11 @@ class SkillDraftSafetyChecker:
created_at=_utc_now(),
)
def _is_allowed_tool_hint(self, name: str) -> bool:
if self.allowed_tool_names is not None and name in self.allowed_tool_names:
return True
return any(name.startswith(prefix) and len(name) > len(prefix) for prefix in self.allowed_tool_prefixes)
def _tool_hints(frontmatter: dict) -> list[str]:
raw = frontmatter.get("tools")

View File

@ -65,19 +65,29 @@ class SkillDraftSynthesizer:
)
payload = self._parse_payload(response.content or "")
if payload:
return payload
return self._normalize_payload(payload, evidence_packet)
return self._fallback_payload(candidate, evidence_packet, action)
@staticmethod
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
tool_section = ", ".join(tool_names) if tool_names else "none observed"
selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names"))
selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded"
return (
f"Action: {action}\n"
f"Candidate kind: {candidate.kind}\n"
f"Reason: {candidate.reason}\n"
f"Related skills: {candidate.related_skill_names}\n"
f"Called tool names: {tool_section}\n"
f"Run-selected tool names: {selected_tool_section}\n"
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
+ "\n\nReturn JSON only."
+ "\n\nReturn JSON only. The frontmatter object must include:"
+ "\n- description: a concise skill description"
+ "\n- tools: an explicit JSON array of exact tool names this skill needs. "
+ "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. "
+ "Use [] only when no tool is required."
)
@staticmethod
@ -103,6 +113,19 @@ class SkillDraftSynthesizer:
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]:
frontmatter = dict(payload.get("frontmatter") or {})
tool_hints = _coerce_string_list(frontmatter.get("tools"))
if not tool_hints:
tool_hints = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
frontmatter["tools"] = tool_hints
return {
"frontmatter": frontmatter,
"content": str(payload.get("content") or "").strip(),
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
@ -111,8 +134,25 @@ class SkillDraftSynthesizer:
return {
"frontmatter": {
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
"tools": [],
"tools": _coerce_string_list(evidence_packet.metadata.get("tool_names")),
},
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
}
def _coerce_string_list(value: Any) -> list[str]:
raw_items: list[Any]
if isinstance(value, list):
raw_items = value
elif isinstance(value, str):
raw_items = value.split(",")
else:
raw_items = []
result: list[str] = []
for item in raw_items:
cleaned = str(item).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result

View File

@ -26,38 +26,42 @@ class MainAgentRouter:
) -> MainAgentDecision:
if provider is None:
return self._fallback(active_task=active_task, reason="router_provider_unavailable")
try:
chat_kwargs: dict[str, Any] = {
"messages": [
{
"role": "system",
"content": (
"You are Beaver's Intent Agent. Your only job is to route the user's "
"message to simple chat or internal Task mode. Return only compact JSON. "
"Do not answer the user. Do not explain."
),
},
{
"role": "user",
"content": self._prompt(
message=message,
active_task=active_task,
recent_messages=recent_messages or [],
intent_skill=intent_skill,
),
},
],
"tools": None,
"model": model,
"max_tokens": 256,
"temperature": 0.0,
}
if thinking_enabled is not None:
chat_kwargs["thinking_enabled"] = thinking_enabled
response = await asyncio.wait_for(provider.chat(**chat_kwargs), timeout=timeout_seconds)
return self.from_json(response.content or "", active_task=active_task)
except Exception as exc:
return self._fallback(active_task=active_task, reason=f"router_failed: {exc}")
chat_kwargs: dict[str, Any] = {
"messages": [
{
"role": "system",
"content": (
"You are Beaver's Intent Agent. Your only job is to route the user's "
"message to simple chat or internal Task mode. Return only compact JSON. "
"Do not answer the user. Do not explain."
),
},
{
"role": "user",
"content": self._prompt(
message=message,
active_task=active_task,
recent_messages=recent_messages or [],
intent_skill=intent_skill,
),
},
],
"tools": None,
"model": model,
"max_tokens": 256,
"temperature": 0.0,
}
if thinking_enabled is not None:
chat_kwargs["thinking_enabled"] = thinking_enabled
last_error: Exception | None = None
for attempt_timeout in (timeout_seconds, 12.0):
try:
response = await asyncio.wait_for(provider.chat(**chat_kwargs), timeout=attempt_timeout)
return self.from_json(response.content or "", active_task=active_task)
except Exception as exc:
last_error = exc
return self._fallback(active_task=active_task, reason=f"router_failed: {last_error}")
def from_json(self, text: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
payload = self._parse_json_object(text)

View File

@ -2,10 +2,46 @@
这是新 `Beaver` 后端的架构入口文档。
可视化入口:
- [Beaver Backend 可视化](backend-visualization.html)
## 给零基础读者的版本
可以先把这个后端理解成一个“帮用户完成任务的后台工厂”:
1. 用户在前端发一句话。
2. 后端入口把这句话接住。
3. 服务层判断它是闲聊,还是一个需要执行和跟踪的任务。
4. 如果是任务,系统会创建或继续一个 `Task`
5. 运行内核 `AgentLoop` 准备上下文、选择技能、选择工具、调用模型。
6. 如果模型需要查文件、写文件、搜索或调用外部系统,就通过 `tools` 执行。
7. 执行结果会写回会话、任务记录和运行记录,后续可以继续追踪、验证和学习。
这套结构里最重要的原则是:**所有 agent 共用同一个运行内核 `engine`**。也就是说,主 agent 和被拆出去的小 agent 不是两套系统,它们最终都会回到 `AgentLoop`,使用同一套上下文、工具、技能和记录方式。
## 先认识几个词
- `interfaces`:入口层。负责接收 Web、CLI、Gateway、MCP 等不同来源的请求。
- `services`:应用服务层。负责把入口请求转成系统内部要做的事情。
- `engine`:运行内核。真正组织 prompt、调用模型、执行 tool loop 的地方。
- `coordinator`:多 agent 编排层。负责把复杂任务拆成 sequence、parallel 或 DAG。
- `skills`:技能层。可以理解成给 agent 的专项说明书。
- `tools`:工具层。可以理解成 agent 能按需调用的动作,例如读文件、写文件、搜索、执行命令。
- `memory`:记忆层。保存会话、任务结果、运行记录、反馈和技能学习数据。
- `permissions`:权限与治理层。负责约束哪些能力能用、怎么用。
## 一句话请求的流转
典型路径是:
`interfaces` -> `AgentService` -> `MainAgentRouter` -> `TaskService` / `TaskExecutionPlanner` -> `AgentLoop` -> `skills` / `tools` / `memory` -> 返回用户。
如果任务很简单,可能只走单 agent。如果任务更复杂`TaskExecutionPlanner` 可能先生成一个 team plan`coordinator` 安排多个 sub-agent 分别处理,最后再由主 agent 综合输出。
当前约束:
1. 所有 agent 共用 `engine`
2. 多 agent 编排进入 `coordinator`
3. skills、memory、permissions 独立成能力层。
4. `interfaces` 只做薄入口。

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -158,7 +158,7 @@ def test_load_config_reads_mcp_authz_identity(tmp_path) -> None:
},
"authz": {
"enabled": True,
"baseUrl": "http://nano-authz-service:19090",
"baseUrl": "http://beaver-authz-service:19090",
},
"backend_identity": {
"backend_id": "stevenli",
@ -180,7 +180,7 @@ def test_load_config_reads_mcp_authz_identity(tmp_path) -> None:
assert server.sensitive is True
assert config.authz.enabled is True
assert config.authz.base_url == "http://nano-authz-service:19090"
assert config.authz.base_url == "http://beaver-authz-service:19090"
assert config.backend_identity.backend_id == "stevenli"
assert config.backend_identity.client_id == "stevenli"

View File

@ -38,6 +38,39 @@ class RouterProvider(LLMProvider):
return "stub-model"
class SequenceRouterProvider(LLMProvider):
def __init__(self, responses: list[str | Exception]) -> None:
super().__init__()
self.responses = list(responses)
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"thinking_enabled": thinking_enabled,
}
)
response = self.responses.pop(0)
if isinstance(response, Exception):
raise response
return LLMResponse(content=response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
@ -133,3 +166,38 @@ def test_router_fallback_keeps_active_task_but_not_new_task() -> None:
assert active.is_task
assert not inactive.is_task
def test_router_retries_once_after_provider_failure() -> None:
provider = SequenceRouterProvider(
[
TimeoutError(),
'{"action":"new_task","reason":"needs search","short_title":"中美会面"}',
]
)
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert decision.is_task
assert decision.action == "create_task"
assert len(provider.calls) == 2
def test_router_fallback_after_two_provider_failures() -> None:
provider = SequenceRouterProvider([TimeoutError(), RuntimeError("provider down")])
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert not decision.is_task
assert decision.reason == "router_failed: provider down"
assert len(provider.calls) == 2

View File

@ -15,7 +15,12 @@ from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
def _pipeline(tmp_path: Path, *, allowed_tools: set[str] | None = None) -> SkillLearningPipelineService:
def _pipeline(
tmp_path: Path,
*,
allowed_tools: set[str] | None = None,
allowed_prefixes: set[str] | None = None,
) -> SkillLearningPipelineService:
spec_store = SkillSpecStore(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
@ -32,7 +37,10 @@ def _pipeline(tmp_path: Path, *, allowed_tools: set[str] | None = None) -> Skill
draft_service=drafts,
review_service=ReviewService(spec_store),
publisher=SkillPublisher(spec_store),
safety_checker=SkillDraftSafetyChecker(allowed_tool_names=allowed_tools),
safety_checker=SkillDraftSafetyChecker(
allowed_tool_names=allowed_tools,
allowed_tool_prefixes=allowed_prefixes,
),
)
@ -106,3 +114,53 @@ def test_safety_blocks_unknown_tool_hint(tmp_path: Path) -> None:
assert report.passed is False
assert "unknown tool hints" in report.blocked_reasons[0]
def test_safety_allows_configured_mcp_tool_prefix(tmp_path: Path) -> None:
pipeline = _pipeline(
tmp_path,
allowed_tools={"echo"},
allowed_prefixes={"mcp_officebench_"},
)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="officebench-excel",
proposed_content="# OfficeBench Excel\n\nUse the configured OfficeBench MCP tools.",
proposed_frontmatter={
"description": "officebench",
"tools": [
"mcp_officebench_shell_list_directory",
"mcp_officebench_excel_read_file",
"mcp_officebench_excel_set_cell",
],
},
created_by="test",
reason="test",
)
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
assert report.passed is True
assert report.blocked_reasons == []
def test_safety_blocks_unconfigured_mcp_tool_prefix(tmp_path: Path) -> None:
pipeline = _pipeline(
tmp_path,
allowed_tools={"echo"},
allowed_prefixes={"mcp_outlook_mcp_"},
)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="wrong-mcp",
proposed_content="# Wrong MCP\n\nUse an unconfigured MCP namespace.",
proposed_frontmatter={
"description": "wrong mcp",
"tools": ["mcp_officebench_excel_set_cell"],
},
created_by="test",
reason="test",
)
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
assert report.passed is False
assert "mcp_officebench_excel_set_cell" in report.blocked_reasons[0]

View File

@ -7,6 +7,7 @@ from types import SimpleNamespace
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session import SessionManager
from beaver.memory.runs import RunMemoryStore, RunRecord
from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
from beaver.skills.drafts import DraftService
@ -125,6 +126,78 @@ def test_worker_retries_and_marks_failed_after_limit(tmp_path: Path) -> None:
assert "provider failed" in (candidate.last_error or "")
def test_synthesizer_fills_missing_tools_from_evidence(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
candidate = pipeline.get_candidate("candidate-1")
provider = JsonProvider(
payload={
"frontmatter": {"description": "Generated skill"},
"content": "# Generated\n\nUse the observed workflow.",
"change_reason": "learned",
}
)
packet = EvidenceSelector(pipeline.learning_service.run_store).build_evidence_packet(
candidate.source_run_ids,
candidate.source_session_ids,
)
packet.metadata["tool_names"] = ["web_fetch", "memory"]
payload = asyncio.run(
SkillDraftSynthesizer().synthesize_new_skill(candidate, packet, provider, "stub")
)
assert payload["frontmatter"]["tools"] == ["web_fetch", "memory"]
def test_evidence_selector_records_run_tool_names(tmp_path: Path) -> None:
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="run-1",
session_id="session-1",
task_text="research latest docs",
started_at="start",
ended_at="end",
success=True,
finish_reason="stop",
)
)
session_manager = SessionManager(tmp_path)
session_manager.ensure_session("session-1")
session_manager.append_message(
"session-1",
run_id="run-1",
role="system",
event_type="tool_selection_snapshotted",
event_payload={"tool_names": ["memory", "web_fetch"]},
context_visible=False,
)
session_manager.append_message(
"session-1",
run_id="run-1",
role="assistant",
tool_calls=[{"id": "call-1", "function": {"name": "web_search"}}],
)
session_manager.append_message(
"session-1",
run_id="run-1",
role="tool",
tool_name="web_fetch",
content="ok",
)
try:
packet = EvidenceSelector(run_store, session_manager).build_evidence_packet(
["run-1"],
["session-1"],
)
finally:
session_manager.close()
assert packet.metadata["tool_names"] == ["web_search", "web_fetch"]
assert packet.metadata["selected_tool_names"] == ["memory", "web_fetch"]
def test_worker_supersedes_candidate_when_active_draft_exists(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
pipeline.learning_store.record_learning_candidate(

View File

@ -78,6 +78,7 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
"model": None,
"provider_name": None,
"embedding_model": None,
"max_tool_iterations": None,
}
]
assert message["type"] == "message"
@ -128,5 +129,6 @@ def test_websocket_runtime_error_returns_assistant_error_message() -> None:
assert message["role"] == "assistant"
assert message["session_id"] == "web:alpha"
assert message["finish_reason"] == "error"
assert message["tool_iterations"] == 0
assert "boom" in message["content"]
assert pong == {"type": "pong"}

View File

@ -4,7 +4,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REGISTRY_TOOL="${SCRIPT_DIR}/instance-registry.py"
IMAGE_NAME="${IMAGE_NAME:-nano/app-instance:latest}"
IMAGE_NAME="${IMAGE_NAME:-beaver/app-instance:latest}"
INSTANCES_ROOT_DEFAULT="${SCRIPT_DIR}/runtime/instances"
REGISTRY_PATH_DEFAULT="${SCRIPT_DIR}/runtime/registry/instances.json"
KNOWN_PROVIDERS=" custom anthropic openai openrouter deepseek groq zhipu dashscope vllm gemini moonshot minimax aihubmix siliconflow volcengine "
@ -25,6 +25,7 @@ MODEL="openai/gpt-5"
PROVIDER="openai"
API_KEY="${API_KEY:-}"
API_BASE="${API_BASE:-}"
SKIP_PROVIDER_CONFIG=0
AUTH_USERNAME=""
AUTH_PASSWORD=""
USERNAME=""
@ -42,22 +43,23 @@ REPLACE=0
usage() {
cat <<'EOF'
Usage:
./create-instance.sh --instance-id demo --auth-username admin --auth-password 123456 --api-key sk-xxx [options]
./create-instance.sh --instance-id demo --auth-username admin --auth-password 123456 [options]
Required:
--instance-id <id> Unique instance id.
--auth-username <name> Initial web login username.
--auth-password <password> Initial web login password.
--api-key <key> Provider API key for Boardware Genius.
Optional:
--image <name> Docker image tag. Default: nano/app-instance:latest
--image <name> Docker image tag. Default: beaver/app-instance:latest
--container-name <name> Docker container name. Default: app-instance-<slug>
--host-port <port> Host port to publish. Default: auto-pick from 20000-29999.
--public-url <url> Public URL exposed to users. Default: http://127.0.0.1:<host-port>
--provider <name> Provider key in config.json. Default: openai
--api-base <url> Optional custom provider base URL.
--api-key <key> Provider API key for Boardware Genius.
--model <name> Model name. Default: openai/gpt-5
--skip-provider-config Create the instance without model/provider/API key settings.
--authz-base-url <url> AuthZ service base URL.
--authz-outlook-mcp-url <url>
Managed Outlook MCP URL for AuthZ mode.
@ -134,6 +136,7 @@ render_config_json() {
PROVIDER="$PROVIDER" \
API_KEY="$API_KEY" \
API_BASE="$API_BASE" \
SKIP_PROVIDER_CONFIG="$SKIP_PROVIDER_CONFIG" \
AUTHZ_BASE_URL="$AUTHZ_BASE_URL" \
AUTHZ_OUTLOOK_MCP_URL="$AUTHZ_OUTLOOK_MCP_URL" \
OUTLOOK_MCP_SERVER_ID="$OUTLOOK_MCP_SERVER_ID" \
@ -151,11 +154,20 @@ target = Path(os.environ["TARGET_PATH"])
provider = os.environ["PROVIDER"]
outlook_mcp_url = os.environ["AUTHZ_OUTLOOK_MCP_URL"].strip()
outlook_server_id = os.environ["OUTLOOK_MCP_SERVER_ID"].strip() or "outlook_mcp"
skip_provider_config = os.environ["SKIP_PROVIDER_CONFIG"].strip() == "1"
provider_cfg = {"apiKey": os.environ["API_KEY"]}
api_base = os.environ["API_BASE"].strip()
if api_base:
provider_cfg["apiBase"] = api_base
providers = {}
agent_defaults = {
"workspace": "/root/.beaver/workspace",
}
if not skip_provider_config:
provider_cfg = {"apiKey": os.environ["API_KEY"]}
api_base = os.environ["API_BASE"].strip()
if api_base:
provider_cfg["apiBase"] = api_base
providers[provider] = provider_cfg
agent_defaults["provider"] = provider
agent_defaults["model"] = os.environ["MODEL"]
outlook_tool_names = [
"auth_status",
@ -193,14 +205,9 @@ if outlook_mcp_url:
data = {
"agents": {
"defaults": {
"workspace": "/root/.beaver/workspace",
"model": os.environ["MODEL"],
}
},
"providers": {
provider: provider_cfg,
"defaults": agent_defaults
},
"providers": providers,
"tools": {
"restrictToWorkspace": True,
"mcpServers": default_mcp_servers,
@ -345,6 +352,10 @@ while [[ $# -gt 0 ]]; do
MODEL="${2:-}"
shift 2
;;
--skip-provider-config)
SKIP_PROVIDER_CONFIG=1
shift
;;
--auth-username)
AUTH_USERNAME="${2:-}"
shift 2
@ -438,7 +449,9 @@ done
[[ -n "$INSTANCE_ID" ]] || die "--instance-id is required"
[[ -n "$AUTH_USERNAME" ]] || die "--auth-username is required"
[[ -n "$AUTH_PASSWORD" ]] || die "--auth-password is required"
[[ -n "$API_KEY" ]] || die "--api-key is required"
if [[ "$SKIP_PROVIDER_CONFIG" -ne 1 ]]; then
[[ -n "$API_KEY" ]] || die "--api-key is required unless --skip-provider-config is set"
fi
INSTANCE_SLUG="$(slugify "$INSTANCE_ID")"
USERNAME="${USERNAME:-$AUTH_USERNAME}"
@ -469,10 +482,12 @@ if [[ -z "$INSTANCE_HOST" ]]; then
INSTANCE_HOST="$(extract_url_host "$PUBLIC_URL")"
fi
case "$KNOWN_PROVIDERS" in
*" ${PROVIDER} "*) ;;
*) die "unsupported provider '${PROVIDER}'" ;;
esac
if [[ "$SKIP_PROVIDER_CONFIG" -ne 1 ]]; then
case "$KNOWN_PROVIDERS" in
*" ${PROVIDER} "*) ;;
*) die "unsupported provider '${PROVIDER}'" ;;
esac
fi
if [[ -n "$BACKEND_ID$CLIENT_ID$CLIENT_SECRET" ]]; then
[[ -n "$BACKEND_ID" && -n "$CLIENT_ID" && -n "$CLIENT_SECRET" ]] || die "backend identity requires --backend-id, --client-id and --client-secret together"
@ -550,9 +565,9 @@ RUN_ARGS=(
-e "APP_FRONTEND_PORT=3000"
-e "APP_BACKEND_PORT=18080"
-e "BEAVER_OUTLOOK_MCP_SERVER_ID=${OUTLOOK_MCP_SERVER_ID}"
--label "nano.instance.id=${INSTANCE_ID}"
--label "nano.instance.slug=${INSTANCE_SLUG}"
--label "nano.instance.public_url=${PUBLIC_URL}"
--label "beaver.instance.id=${INSTANCE_ID}"
--label "beaver.instance.slug=${INSTANCE_SLUG}"
--label "beaver.instance.public_url=${PUBLIC_URL}"
)
if [[ -n "$NETWORK_NAME" ]]; then

View File

@ -41,6 +41,7 @@ import {
listSkillDrafts,
listSkills,
publishSkillDraft,
recheckSkillDraftSafety,
regenerateSkillDraft,
rejectSkillDraft,
rollbackPublishedSkill,
@ -412,6 +413,11 @@ export default function SkillsPage() {
rejectSkillDraft(draft.skill_name, draft.draft_id)
)
}
onRecheckSafety={() =>
runAction(`safety:${draft.draft_id}`, () =>
recheckSkillDraftSafety(draft.skill_name, draft.draft_id)
)
}
onPublish={(confirmHighRisk) =>
runAction(`publish:${draft.draft_id}`, () =>
publishSkillDraft(draft.skill_name, draft.draft_id, '', confirmHighRisk)
@ -697,6 +703,7 @@ function DraftCard({
onSubmit,
onApprove,
onReject,
onRecheckSafety,
onPublish,
}: {
draft: SkillDraft;
@ -704,6 +711,7 @@ function DraftCard({
onSubmit: () => Promise<unknown>;
onApprove: () => Promise<unknown>;
onReject: () => Promise<unknown>;
onRecheckSafety: () => Promise<unknown>;
onPublish: (confirmHighRisk: boolean) => Promise<unknown>;
}) {
const { locale } = useAppI18n();
@ -814,6 +822,10 @@ function DraftCard({
<XCircle className="mr-2 h-4 w-4" />
{t('拒绝', 'Reject')}
</Button>
<Button variant="outline" size="sm" disabled={busy || TERMINAL_DRAFT_STATUSES.has(draft.status)} onClick={() => void onRecheckSafety()}>
<ShieldCheck className="mr-2 h-4 w-4" />
{t('复检', 'Recheck')}
</Button>
<Button size="sm" disabled={busy || publishBlocked} onClick={handlePublish}>
<Rocket className="mr-2 h-4 w-4" />
{t('发布', 'Publish')}

View File

@ -3,7 +3,7 @@
import Link from 'next/link';
import { useParams, useRouter } from 'next/navigation';
import React, { useMemo, useState } from 'react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, MessageSquare, RefreshCw, Trash2, User, XCircle } from 'lucide-react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime, progressPercent } from '@/components/task-runtime/TaskRuntimeShared';
import { Badge } from '@/components/ui/badge';
@ -17,6 +17,14 @@ import { buildTaskRuntimeView, type TaskRuntimeNodeView } from '@/lib/task-runti
import { useChatStore } from '@/lib/store';
import type { BackendTask, BackendTaskRun, ProcessArtifact, ProcessEvent } from '@/types';
type TaskFeedbackType = 'satisfied' | 'revise' | 'abandon';
type TaskFeedbackItem = {
feedback_type?: unknown;
comment?: unknown;
created_at?: unknown;
run_id?: unknown;
};
function taskVisibleStatus(task: TaskRuntimeNodeView, locale: 'zh-CN' | 'en-US') {
if (task.status === 'error') return pickAppText(locale, '任务失败', 'Task failed');
if (task.status === 'cancelled') return pickAppText(locale, '已取消', 'Cancelled');
@ -53,11 +61,13 @@ export default function TaskDetailPage() {
const [backendTaskLoading, setBackendTaskLoading] = useState(false);
const [selectedRunId, setSelectedRunId] = useState<string | null>(task?.rootRunId ?? null);
const [revision, setRevision] = useState('');
const [runtimeFeedback, setRuntimeFeedback] = useState<TaskFeedbackItem | null>(null);
const [actionError, setActionError] = useState<string | null>(null);
const [actionBusy, setActionBusy] = useState<string | null>(null);
React.useEffect(() => {
setSelectedRunId(task?.rootRunId ?? null);
setRuntimeFeedback(null);
}, [task?.rootRunId]);
React.useEffect(() => {
@ -138,6 +148,8 @@ export default function TaskDetailPage() {
});
};
const backendFeedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;
if (!task && backendTask) {
const validation = backendTask.validation_result;
const accepted = Boolean(validation?.accepted);
@ -185,6 +197,26 @@ export default function TaskDetailPage() {
</CardContent>
</Card>
<TaskFeedbackPanel
sessionId={backendTask.session_id}
runId={backendFeedbackRunId}
taskStatus={backendTask.status}
feedbackItems={feedbackItems}
actionBusy={actionBusy}
onSubmit={(feedbackType, comment) =>
runAction(`backend-feedback-${feedbackType}`, async () => {
await submitChatFeedback({
sessionId: backendTask.session_id,
runId: backendFeedbackRunId!,
feedbackType,
comment,
});
const refreshed = await getBackendTask(backendTask.task_id);
setBackendTask(refreshed);
})
}
/>
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, 'Agent 执行过程', 'Agent conversation process')}</CardTitle>
@ -424,37 +456,33 @@ export default function TaskDetailPage() {
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '修订意见', 'Revision')}</CardTitle>
</CardHeader>
<CardContent className="space-y-3">
<Textarea
value={revision}
onChange={(event) => setRevision(event.target.value)}
placeholder={pickAppText(locale, '直接写下需要调整的地方...', 'Describe what should change...')}
/>
<Button
className="w-full"
disabled={!revision.trim() || Boolean(actionBusy)}
onClick={() =>
void runAction('revision', async () => {
updateMessageFeedback(task.rootRunId, 'revise');
await submitChatFeedback({
sessionId: task.sessionId || 'web:default',
runId: task.rootRunId,
feedbackType: 'revise',
comment: revision.trim(),
});
setRevision('');
})
}
>
<RefreshCw className="mr-2 h-4 w-4" />
{pickAppText(locale, '提交修订', 'Submit revision')}
</Button>
</CardContent>
</Card>
<TaskFeedbackPanel
sessionId={task.sessionId || 'web:default'}
runId={task.rootRunId}
taskStatus={task.status}
feedbackItems={runtimeFeedback ? [runtimeFeedback] : []}
actionBusy={actionBusy}
revision={revision}
onRevisionChange={setRevision}
onSubmit={(feedbackType, comment) =>
runAction(`runtime-feedback-${feedbackType}`, async () => {
updateMessageFeedback(task.rootRunId, feedbackType);
await submitChatFeedback({
sessionId: task.sessionId || 'web:default',
runId: task.rootRunId,
feedbackType,
comment,
});
setRuntimeFeedback({
feedback_type: feedbackType,
comment: comment || '',
created_at: new Date().toISOString(),
run_id: task.rootRunId,
});
setRevision('');
})
}
/>
<Card>
<CardHeader>
@ -521,6 +549,136 @@ function Metric({ label, value }: { label: string; value: string }) {
);
}
function TaskFeedbackPanel({
sessionId,
runId,
taskStatus,
feedbackItems,
actionBusy,
revision,
onRevisionChange,
onSubmit,
}: {
sessionId: string;
runId: string | null;
taskStatus: string;
feedbackItems: TaskFeedbackItem[];
actionBusy: string | null;
revision?: string;
onRevisionChange?: (value: string) => void;
onSubmit: (feedbackType: TaskFeedbackType, comment?: string) => Promise<unknown>;
}) {
const { locale } = useAppI18n();
const [localComment, setLocalComment] = React.useState('');
const comment = revision ?? localComment;
const setComment = onRevisionChange ?? setLocalComment;
const isFinalized = taskStatus === 'closed' || taskStatus === 'abandoned';
const recordedFeedback = feedbackForRun(feedbackItems, runId) ?? (isFinalized ? latestFeedback(feedbackItems) : null);
const canSubmit = Boolean(runId) && !recordedFeedback && !isFinalized && !actionBusy;
const submit = (feedbackType: TaskFeedbackType, nextComment?: string) => {
if (!runId || !canSubmit) return;
void onSubmit(feedbackType, nextComment);
};
return (
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '任务反馈', 'Task feedback')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
{recordedFeedback ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
<div className="flex items-center gap-2 font-medium">
<CheckCircle2 className="h-4 w-4 text-[#657162]" />
{pickAppText(locale, '已提交反馈', 'Feedback submitted')}: {humanFeedback(String(recordedFeedback.feedback_type || ''), locale)}
</div>
{recordedFeedback.comment ? (
<p className="mt-2 text-muted-foreground">{String(recordedFeedback.comment)}</p>
) : null}
{recordedFeedback.created_at ? (
<p className="mt-2 text-xs text-muted-foreground">{formatTaskRuntimeTime(String(recordedFeedback.created_at), locale)}</p>
) : null}
</div>
) : isFinalized ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '任务已结束,不能再提交新的反馈。', 'This task is finalized and cannot accept new feedback.')}
</div>
) : !runId ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '暂无可反馈的运行记录。', 'No run is available for feedback yet.')}
</div>
) : null}
<div className="grid gap-2 sm:grid-cols-3">
<FeedbackButton
type="satisfied"
icon={<ThumbsUp className="mr-2 h-4 w-4" />}
label={pickAppText(locale, '满意', 'Satisfied')}
actionBusy={actionBusy}
disabled={!canSubmit}
onClick={() => submit('satisfied', comment.trim() || undefined)}
/>
<FeedbackButton
type="revise"
icon={<RefreshCw className="mr-2 h-4 w-4" />}
label={pickAppText(locale, '需要修改', 'Needs revision')}
actionBusy={actionBusy}
disabled={!canSubmit || !comment.trim()}
onClick={() => submit('revise', comment.trim())}
/>
<FeedbackButton
type="abandon"
icon={<XCircle className="mr-2 h-4 w-4" />}
label={pickAppText(locale, '放弃', 'Abandon')}
actionBusy={actionBusy}
disabled={!canSubmit}
onClick={() => submit('abandon', comment.trim() || undefined)}
/>
</div>
<Textarea
value={comment}
onChange={(event) => setComment(event.target.value)}
disabled={Boolean(recordedFeedback) || isFinalized || Boolean(actionBusy)}
placeholder={pickAppText(locale, '需要修改时写下具体要求;满意或放弃可选填说明。', 'Describe requested changes; notes are optional for satisfied or abandon.')}
/>
<div className="text-xs text-muted-foreground">
{pickAppText(locale, '反馈将记录到当前任务运行:', 'Feedback will be recorded on run: ')}
<span className="font-mono">{runId || '-'}</span>
<span className="mx-1">·</span>
{pickAppText(locale, '会话:', 'Session: ')}
<span className="font-mono">{sessionId}</span>
</div>
</CardContent>
</Card>
);
}
function FeedbackButton({
type,
icon,
label,
actionBusy,
disabled,
onClick,
}: {
type: TaskFeedbackType;
icon: React.ReactNode;
label: string;
actionBusy: string | null;
disabled: boolean;
onClick: () => void;
}) {
const isBusy = Boolean(actionBusy?.endsWith(type));
return (
<Button type="button" variant="outline" className="w-full justify-center" disabled={disabled || Boolean(actionBusy)} onClick={onClick}>
{isBusy ? <Loader2 className="mr-2 h-4 w-4 animate-spin" /> : icon}
{label}
</Button>
);
}
function BackendRunConversation({ run, index }: { run: BackendTaskRun; index: number }) {
const { locale } = useAppI18n();
return (
@ -597,6 +755,24 @@ function humanFinishReason(reason: string, locale: 'zh-CN' | 'en-US') {
return reason;
}
function pickFeedbackRunId(task: BackendTask): string | null {
const runIds = task.run_ids.filter(Boolean);
if (runIds.length > 0) return runIds[runIds.length - 1];
const runs = task.runs ?? [];
if (runs.length > 0) return runs[runs.length - 1].run_id;
return null;
}
function feedbackForRun(items: TaskFeedbackItem[], runId: string | null): TaskFeedbackItem | null {
if (!runId) return null;
const ordered = [...items].reverse();
return ordered.find((item) => String(item.run_id || '') === runId) ?? null;
}
function latestFeedback(items: TaskFeedbackItem[]): TaskFeedbackItem | null {
return [...items].reverse()[0] ?? null;
}
function arrayOfStrings(value: unknown): string[] {
return Array.isArray(value) ? value.map((item) => String(item)).filter(Boolean) : [];
}

View File

@ -777,6 +777,13 @@ export async function getSkillDraftSafety(skillName: string, draftId: string): P
return fetchJSON(`/api/skills/${encodeURIComponent(skillName)}/drafts/${encodeURIComponent(draftId)}/safety`);
}
export async function recheckSkillDraftSafety(skillName: string, draftId: string): Promise<SkillDraftSafetyReport> {
return fetchJSON(`/api/skills/${encodeURIComponent(skillName)}/drafts/${encodeURIComponent(draftId)}/safety`, {
method: 'POST',
body: JSON.stringify({}),
});
}
export async function getSkillDraftEval(skillName: string, draftId: string): Promise<SkillDraftEvalReport> {
return fetchJSON(`/api/skills/${encodeURIComponent(skillName)}/drafts/${encodeURIComponent(draftId)}/eval`);
}