feat: 将项目从nano重命名为beaver并更新相关配置

- 将所有环境变量前缀从NANO_改为BEAVER_
- 更新README.md文档内容,包括项目介绍、组件说明和快速开始指南
- 修改.gitignore文件,添加auth-portal运行时路径排除规则
- 更新app-instance镜像标签从nano/app-instance改为beaver/app-instance
- 增强技能安全检查器,支持工具前缀白名单功能
- 添加技能草稿重新检查安全性API端点
- 扩展证据选择器,收集工具调用名称用于技能学习
- 改进技能合成器,基于实际调用的工具生成工具提示
- 优化路由超时处理机制,增加重试逻辑
- 更新后端架构文档,添加可视化入口和基础概念说明
- 实现在WebSocket消息中传递工具迭代次数信息
This commit is contained in:
2026-05-20 18:01:06 +08:00
parent 3b0af173cc
commit 9d6cde2d23
63 changed files with 4894 additions and 1596 deletions

View File

@ -260,7 +260,12 @@ class EngineLoader:
review_service=review_service,
publisher=skill_publisher,
safety_checker=SkillDraftSafetyChecker(
allowed_tool_names={spec.name for spec in tool_registry.list_specs()}
allowed_tool_names={spec.name for spec in tool_registry.list_specs()},
allowed_tool_prefixes={
f"mcp_{server_id}_"
for server_id in self.config.tools.mcp_servers
if str(server_id).strip()
},
),
evaluator=SkillDraftEvaluator(run_memory_store),
)

View File

@ -1437,6 +1437,15 @@ def create_app(
raise HTTPException(status_code=404, detail="Safety report not found")
return report.to_dict()
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/safety")
async def recheck_skill_draft_safety(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
report = loaded.skill_learning_pipeline.check_safety(skill_name, draft_id) # type: ignore[union-attr]
except ValueError as exc:
raise _skill_draft_http_error(exc) from exc
return report.to_dict()
@app.get("/api/skills/{skill_name}/drafts/{draft_id}/eval")
async def get_skill_draft_eval(skill_name: str, draft_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
@ -1831,6 +1840,7 @@ def create_app(
"model": _clean_text(payload.get("model")) or None,
"provider_name": _clean_text(payload.get("provider_name")) or None,
"embedding_model": _clean_text(payload.get("embedding_model")) or None,
"max_tool_iterations": _int_or_none(payload.get("max_tool_iterations")),
}
websocket_thinking_enabled = _bool_or_none(payload.get("thinking_enabled"))
if websocket_thinking_enabled is not None:
@ -1844,6 +1854,7 @@ def create_app(
"content": f"Run failed before completion: {exc}",
"session_id": session_id,
"finish_reason": "error",
"tool_iterations": 0,
"metadata": {
"error": str(exc),
"input_metadata": _websocket_input_metadata(payload),
@ -2403,6 +2414,15 @@ def _bool_or_none(value: Any) -> bool | None:
return None
def _int_or_none(value: Any) -> int | None:
if value in (None, ""):
return None
try:
return int(value)
except (TypeError, ValueError):
return None
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
validation_result = getattr(result, "validation_result", None)
task_id = getattr(result, "task_id", None)
@ -2414,6 +2434,7 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
"session_id": getattr(result, "session_id", None),
"run_id": getattr(result, "run_id", None),
"finish_reason": getattr(result, "finish_reason", None),
"tool_iterations": getattr(result, "tool_iterations", 0),
"provider_name": getattr(result, "provider_name", None),
"model": getattr(result, "model", None),
"usage": dict(getattr(result, "usage", {}) or {}),

View File

@ -42,6 +42,8 @@ class EvidenceSelector:
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
task_summaries: list[str] = []
session_excerpts: list[str] = []
tool_names: list[str] = []
selected_tool_names: list[str] = []
for run_id in run_ids:
record = runs_by_id.get(run_id)
if record is None:
@ -56,12 +58,19 @@ class EvidenceSelector:
excerpt = self._session_excerpt(record.session_id, run_id)
if excerpt:
session_excerpts.append(excerpt)
run_tool_names, run_selected_tool_names = self._run_tool_names(record.session_id, run_id)
tool_names.extend(run_tool_names)
selected_tool_names.extend(run_selected_tool_names)
return EvidencePacket(
run_ids=resolved_run_ids,
session_ids=resolved_session_ids,
task_summaries=task_summaries[:8],
session_excerpts=session_excerpts[:6],
metadata={"bounded": True},
metadata={
"bounded": True,
"tool_names": _unique_strings(tool_names),
"selected_tool_names": _unique_strings(selected_tool_names),
},
)
def _session_excerpt(self, session_id: str, run_id: str) -> str:
@ -74,3 +83,37 @@ class EvidenceSelector:
continue
visible.append(f"{event.role}: {event.content.strip()}")
return "\n".join(visible[:12])[:2000]
def _run_tool_names(self, session_id: str, run_id: str) -> tuple[list[str], list[str]]:
if self.session_manager is None:
return [], []
names: list[str] = []
selected_names: list[str] = []
for event in self.session_manager.get_run_event_records(session_id, run_id):
if event.tool_name:
names.append(event.tool_name)
if event.tool_calls:
for call in event.tool_calls:
if not isinstance(call, dict):
continue
name = call.get("name")
function = call.get("function")
if not name and isinstance(function, dict):
name = function.get("name")
if name:
names.append(str(name))
if event.event_type == "tool_selection_snapshotted" and isinstance(event.event_payload, dict):
selected = event.event_payload.get("tool_names")
if isinstance(selected, list):
selected_names.extend(str(item) for item in selected if str(item).strip())
return _unique_strings(names), _unique_strings(selected_names)
def _unique_strings(values: list[str]) -> list[str]:
result: list[str] = []
for value in values:
cleaned = str(value).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result

View File

@ -32,8 +32,14 @@ class SkillDraftSafetyChecker:
"credentials",
}
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
def __init__(
self,
*,
allowed_tool_names: set[str] | None = None,
allowed_tool_prefixes: set[str] | None = None,
) -> None:
self.allowed_tool_names = allowed_tool_names
self.allowed_tool_prefixes = allowed_tool_prefixes or set()
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
issues: list[str] = []
@ -50,7 +56,7 @@ class SkillDraftSafetyChecker:
tool_hints = _tool_hints(frontmatter)
if self.allowed_tool_names is not None:
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
unknown = [name for name in tool_hints if not self._is_allowed_tool_hint(name)]
if unknown:
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
@ -80,6 +86,11 @@ class SkillDraftSafetyChecker:
created_at=_utc_now(),
)
def _is_allowed_tool_hint(self, name: str) -> bool:
if self.allowed_tool_names is not None and name in self.allowed_tool_names:
return True
return any(name.startswith(prefix) and len(name) > len(prefix) for prefix in self.allowed_tool_prefixes)
def _tool_hints(frontmatter: dict) -> list[str]:
raw = frontmatter.get("tools")

View File

@ -65,19 +65,29 @@ class SkillDraftSynthesizer:
)
payload = self._parse_payload(response.content or "")
if payload:
return payload
return self._normalize_payload(payload, evidence_packet)
return self._fallback_payload(candidate, evidence_packet, action)
@staticmethod
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
tool_names = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
tool_section = ", ".join(tool_names) if tool_names else "none observed"
selected_tool_names = _coerce_string_list(evidence_packet.metadata.get("selected_tool_names"))
selected_tool_section = ", ".join(selected_tool_names) if selected_tool_names else "none recorded"
return (
f"Action: {action}\n"
f"Candidate kind: {candidate.kind}\n"
f"Reason: {candidate.reason}\n"
f"Related skills: {candidate.related_skill_names}\n"
f"Called tool names: {tool_section}\n"
f"Run-selected tool names: {selected_tool_section}\n"
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
+ "\n\nReturn JSON only."
+ "\n\nReturn JSON only. The frontmatter object must include:"
+ "\n- description: a concise skill description"
+ "\n- tools: an explicit JSON array of exact tool names this skill needs. "
+ "Prefer called tool names when the workflow depends on them; use run-selected tool names only when clearly required. "
+ "Use [] only when no tool is required."
)
@staticmethod
@ -103,6 +113,19 @@ class SkillDraftSynthesizer:
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _normalize_payload(payload: dict[str, Any], evidence_packet: EvidencePacket) -> dict[str, Any]:
frontmatter = dict(payload.get("frontmatter") or {})
tool_hints = _coerce_string_list(frontmatter.get("tools"))
if not tool_hints:
tool_hints = _coerce_string_list(evidence_packet.metadata.get("tool_names"))
frontmatter["tools"] = tool_hints
return {
"frontmatter": frontmatter,
"content": str(payload.get("content") or "").strip(),
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
@ -111,8 +134,25 @@ class SkillDraftSynthesizer:
return {
"frontmatter": {
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
"tools": [],
"tools": _coerce_string_list(evidence_packet.metadata.get("tool_names")),
},
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
}
def _coerce_string_list(value: Any) -> list[str]:
raw_items: list[Any]
if isinstance(value, list):
raw_items = value
elif isinstance(value, str):
raw_items = value.split(",")
else:
raw_items = []
result: list[str] = []
for item in raw_items:
cleaned = str(item).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result

View File

@ -26,38 +26,42 @@ class MainAgentRouter:
) -> MainAgentDecision:
if provider is None:
return self._fallback(active_task=active_task, reason="router_provider_unavailable")
try:
chat_kwargs: dict[str, Any] = {
"messages": [
{
"role": "system",
"content": (
"You are Beaver's Intent Agent. Your only job is to route the user's "
"message to simple chat or internal Task mode. Return only compact JSON. "
"Do not answer the user. Do not explain."
),
},
{
"role": "user",
"content": self._prompt(
message=message,
active_task=active_task,
recent_messages=recent_messages or [],
intent_skill=intent_skill,
),
},
],
"tools": None,
"model": model,
"max_tokens": 256,
"temperature": 0.0,
}
if thinking_enabled is not None:
chat_kwargs["thinking_enabled"] = thinking_enabled
response = await asyncio.wait_for(provider.chat(**chat_kwargs), timeout=timeout_seconds)
return self.from_json(response.content or "", active_task=active_task)
except Exception as exc:
return self._fallback(active_task=active_task, reason=f"router_failed: {exc}")
chat_kwargs: dict[str, Any] = {
"messages": [
{
"role": "system",
"content": (
"You are Beaver's Intent Agent. Your only job is to route the user's "
"message to simple chat or internal Task mode. Return only compact JSON. "
"Do not answer the user. Do not explain."
),
},
{
"role": "user",
"content": self._prompt(
message=message,
active_task=active_task,
recent_messages=recent_messages or [],
intent_skill=intent_skill,
),
},
],
"tools": None,
"model": model,
"max_tokens": 256,
"temperature": 0.0,
}
if thinking_enabled is not None:
chat_kwargs["thinking_enabled"] = thinking_enabled
last_error: Exception | None = None
for attempt_timeout in (timeout_seconds, 12.0):
try:
response = await asyncio.wait_for(provider.chat(**chat_kwargs), timeout=attempt_timeout)
return self.from_json(response.content or "", active_task=active_task)
except Exception as exc:
last_error = exc
return self._fallback(active_task=active_task, reason=f"router_failed: {last_error}")
def from_json(self, text: str, *, active_task: TaskRecord | None = None) -> MainAgentDecision:
payload = self._parse_json_object(text)

View File

@ -2,10 +2,46 @@
这是新 `Beaver` 后端的架构入口文档。
可视化入口:
- [Beaver Backend 可视化](backend-visualization.html)
## 给零基础读者的版本
可以先把这个后端理解成一个“帮用户完成任务的后台工厂”:
1. 用户在前端发一句话。
2. 后端入口把这句话接住。
3. 服务层判断它是闲聊,还是一个需要执行和跟踪的任务。
4. 如果是任务,系统会创建或继续一个 `Task`
5. 运行内核 `AgentLoop` 准备上下文、选择技能、选择工具、调用模型。
6. 如果模型需要查文件、写文件、搜索或调用外部系统,就通过 `tools` 执行。
7. 执行结果会写回会话、任务记录和运行记录,后续可以继续追踪、验证和学习。
这套结构里最重要的原则是:**所有 agent 共用同一个运行内核 `engine`**。也就是说,主 agent 和被拆出去的小 agent 不是两套系统,它们最终都会回到 `AgentLoop`,使用同一套上下文、工具、技能和记录方式。
## 先认识几个词
- `interfaces`:入口层。负责接收 Web、CLI、Gateway、MCP 等不同来源的请求。
- `services`:应用服务层。负责把入口请求转成系统内部要做的事情。
- `engine`:运行内核。真正组织 prompt、调用模型、执行 tool loop 的地方。
- `coordinator`:多 agent 编排层。负责把复杂任务拆成 sequence、parallel 或 DAG。
- `skills`:技能层。可以理解成给 agent 的专项说明书。
- `tools`:工具层。可以理解成 agent 能按需调用的动作,例如读文件、写文件、搜索、执行命令。
- `memory`:记忆层。保存会话、任务结果、运行记录、反馈和技能学习数据。
- `permissions`:权限与治理层。负责约束哪些能力能用、怎么用。
## 一句话请求的流转
典型路径是:
`interfaces` -> `AgentService` -> `MainAgentRouter` -> `TaskService` / `TaskExecutionPlanner` -> `AgentLoop` -> `skills` / `tools` / `memory` -> 返回用户。
如果任务很简单,可能只走单 agent。如果任务更复杂`TaskExecutionPlanner` 可能先生成一个 team plan`coordinator` 安排多个 sub-agent 分别处理,最后再由主 agent 综合输出。
当前约束:
1. 所有 agent 共用 `engine`
2. 多 agent 编排进入 `coordinator`
3. skills、memory、permissions 独立成能力层。
4. `interfaces` 只做薄入口。

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -158,7 +158,7 @@ def test_load_config_reads_mcp_authz_identity(tmp_path) -> None:
},
"authz": {
"enabled": True,
"baseUrl": "http://nano-authz-service:19090",
"baseUrl": "http://beaver-authz-service:19090",
},
"backend_identity": {
"backend_id": "stevenli",
@ -180,7 +180,7 @@ def test_load_config_reads_mcp_authz_identity(tmp_path) -> None:
assert server.sensitive is True
assert config.authz.enabled is True
assert config.authz.base_url == "http://nano-authz-service:19090"
assert config.authz.base_url == "http://beaver-authz-service:19090"
assert config.backend_identity.backend_id == "stevenli"
assert config.backend_identity.client_id == "stevenli"

View File

@ -38,6 +38,39 @@ class RouterProvider(LLMProvider):
return "stub-model"
class SequenceRouterProvider(LLMProvider):
def __init__(self, responses: list[str | Exception]) -> None:
super().__init__()
self.responses = list(responses)
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"thinking_enabled": thinking_enabled,
}
)
response = self.responses.pop(0)
if isinstance(response, Exception):
raise response
return LLMResponse(content=response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
@ -133,3 +166,38 @@ def test_router_fallback_keeps_active_task_but_not_new_task() -> None:
assert active.is_task
assert not inactive.is_task
def test_router_retries_once_after_provider_failure() -> None:
provider = SequenceRouterProvider(
[
TimeoutError(),
'{"action":"new_task","reason":"needs search","short_title":"中美会面"}',
]
)
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert decision.is_task
assert decision.action == "create_task"
assert len(provider.calls) == 2
def test_router_fallback_after_two_provider_failures() -> None:
provider = SequenceRouterProvider([TimeoutError(), RuntimeError("provider down")])
decision = asyncio.run(
MainAgentRouter().classify(
"帮我看看昨天的中美会面都谈了什么?",
provider=provider,
)
)
assert not decision.is_task
assert decision.reason == "router_failed: provider down"
assert len(provider.calls) == 2

View File

@ -15,7 +15,12 @@ from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
def _pipeline(tmp_path: Path, *, allowed_tools: set[str] | None = None) -> SkillLearningPipelineService:
def _pipeline(
tmp_path: Path,
*,
allowed_tools: set[str] | None = None,
allowed_prefixes: set[str] | None = None,
) -> SkillLearningPipelineService:
spec_store = SkillSpecStore(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
@ -32,7 +37,10 @@ def _pipeline(tmp_path: Path, *, allowed_tools: set[str] | None = None) -> Skill
draft_service=drafts,
review_service=ReviewService(spec_store),
publisher=SkillPublisher(spec_store),
safety_checker=SkillDraftSafetyChecker(allowed_tool_names=allowed_tools),
safety_checker=SkillDraftSafetyChecker(
allowed_tool_names=allowed_tools,
allowed_tool_prefixes=allowed_prefixes,
),
)
@ -106,3 +114,53 @@ def test_safety_blocks_unknown_tool_hint(tmp_path: Path) -> None:
assert report.passed is False
assert "unknown tool hints" in report.blocked_reasons[0]
def test_safety_allows_configured_mcp_tool_prefix(tmp_path: Path) -> None:
pipeline = _pipeline(
tmp_path,
allowed_tools={"echo"},
allowed_prefixes={"mcp_officebench_"},
)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="officebench-excel",
proposed_content="# OfficeBench Excel\n\nUse the configured OfficeBench MCP tools.",
proposed_frontmatter={
"description": "officebench",
"tools": [
"mcp_officebench_shell_list_directory",
"mcp_officebench_excel_read_file",
"mcp_officebench_excel_set_cell",
],
},
created_by="test",
reason="test",
)
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
assert report.passed is True
assert report.blocked_reasons == []
def test_safety_blocks_unconfigured_mcp_tool_prefix(tmp_path: Path) -> None:
pipeline = _pipeline(
tmp_path,
allowed_tools={"echo"},
allowed_prefixes={"mcp_outlook_mcp_"},
)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="wrong-mcp",
proposed_content="# Wrong MCP\n\nUse an unconfigured MCP namespace.",
proposed_frontmatter={
"description": "wrong mcp",
"tools": ["mcp_officebench_excel_set_cell"],
},
created_by="test",
reason="test",
)
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
assert report.passed is False
assert "mcp_officebench_excel_set_cell" in report.blocked_reasons[0]

View File

@ -7,6 +7,7 @@ from types import SimpleNamespace
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session import SessionManager
from beaver.memory.runs import RunMemoryStore, RunRecord
from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
from beaver.skills.drafts import DraftService
@ -125,6 +126,78 @@ def test_worker_retries_and_marks_failed_after_limit(tmp_path: Path) -> None:
assert "provider failed" in (candidate.last_error or "")
def test_synthesizer_fills_missing_tools_from_evidence(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
candidate = pipeline.get_candidate("candidate-1")
provider = JsonProvider(
payload={
"frontmatter": {"description": "Generated skill"},
"content": "# Generated\n\nUse the observed workflow.",
"change_reason": "learned",
}
)
packet = EvidenceSelector(pipeline.learning_service.run_store).build_evidence_packet(
candidate.source_run_ids,
candidate.source_session_ids,
)
packet.metadata["tool_names"] = ["web_fetch", "memory"]
payload = asyncio.run(
SkillDraftSynthesizer().synthesize_new_skill(candidate, packet, provider, "stub")
)
assert payload["frontmatter"]["tools"] == ["web_fetch", "memory"]
def test_evidence_selector_records_run_tool_names(tmp_path: Path) -> None:
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="run-1",
session_id="session-1",
task_text="research latest docs",
started_at="start",
ended_at="end",
success=True,
finish_reason="stop",
)
)
session_manager = SessionManager(tmp_path)
session_manager.ensure_session("session-1")
session_manager.append_message(
"session-1",
run_id="run-1",
role="system",
event_type="tool_selection_snapshotted",
event_payload={"tool_names": ["memory", "web_fetch"]},
context_visible=False,
)
session_manager.append_message(
"session-1",
run_id="run-1",
role="assistant",
tool_calls=[{"id": "call-1", "function": {"name": "web_search"}}],
)
session_manager.append_message(
"session-1",
run_id="run-1",
role="tool",
tool_name="web_fetch",
content="ok",
)
try:
packet = EvidenceSelector(run_store, session_manager).build_evidence_packet(
["run-1"],
["session-1"],
)
finally:
session_manager.close()
assert packet.metadata["tool_names"] == ["web_search", "web_fetch"]
assert packet.metadata["selected_tool_names"] == ["memory", "web_fetch"]
def test_worker_supersedes_candidate_when_active_draft_exists(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
pipeline.learning_store.record_learning_candidate(

View File

@ -78,6 +78,7 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
"model": None,
"provider_name": None,
"embedding_model": None,
"max_tool_iterations": None,
}
]
assert message["type"] == "message"
@ -128,5 +129,6 @@ def test_websocket_runtime_error_returns_assistant_error_message() -> None:
assert message["role"] == "assistant"
assert message["session_id"] == "web:alpha"
assert message["finish_reason"] == "error"
assert message["tool_iterations"] == 0
assert "boom" in message["content"]
assert pong == {"type": "pong"}