feat(app): 移除内置agents并添加CORS支持和技能上传优化

移除了agents/registry.json中的所有内置agents配置,将agents数组清空。
为web应用添加了CORS中间件支持,允许指定的前端地址跨域访问。
重构了技能上传功能,增加了LLM重写机制,自动规范化上传的技能格式。
新增了工具名称提取逻辑,从技能正文中自动识别Required Tools段落。
更新了技能学习候选者和草稿的载荷结构,添加评估报告统计信息。
修改了意图路由技能的说明,改进任务状态管理逻辑。
This commit is contained in:
2026-06-12 13:25:20 +08:00
parent fc9fd93c36
commit 8aeb97a5fc
76 changed files with 3382 additions and 553 deletions

View File

@ -7,6 +7,7 @@ import asyncio
import io
import mimetypes
import os
import re
import secrets
import shutil
import time
@ -49,9 +50,11 @@ from beaver.services.user_file_resolver import (
UserFileStorageResolver,
build_file_auth_context,
)
from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig
from beaver.skills.authoring import canonical_skill_format_instructions, ensure_canonical_skill_body, normalize_skill_frontmatter
from beaver.skills.authoring.format import parse_skill_rewrite_json
from beaver.skills.learning import SkillLearningService, SkillLearningWorker, SkillLearningWorkerConfig
from beaver.skills.learning.replay import ReplayRunner
from beaver.skills.catalog.utils import parse_frontmatter
from beaver.skills.catalog.utils import extract_required_tool_names, parse_frontmatter
from .deps import get_agent_service
from .files import (
@ -96,8 +99,11 @@ from .schemas import (
try:
from fastapi import FastAPI, File, Form, Header, HTTPException, Request, UploadFile, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, Response
except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only environments
CORSMiddleware = None # type: ignore[assignment]
def File(default: Any = None) -> Any: # type: ignore[override]
return default
@ -274,6 +280,7 @@ async def _app_lifespan(
worker = SkillLearningWorker(
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
provider_bundle_factory=lambda: attached_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
replay_runner_factory=lambda: ReplayRunner(agent_loop=attached_service.create_loop()),
config=worker_config,
)
worker_task = asyncio.create_task(worker.run_forever())
@ -516,6 +523,20 @@ def _self_restart_enabled() -> bool:
return os.getenv("BEAVER_ENABLE_SELF_RESTART", "1").strip() not in {"0", "false", "False"}
def _cors_allow_origins() -> list[str]:
raw = os.getenv("BEAVER_CORS_ALLOW_ORIGINS", "").strip()
if raw:
return [origin.strip().rstrip("/") for origin in raw.split(",") if origin.strip()]
return [
"http://127.0.0.1:3000",
"http://localhost:3000",
"http://127.0.0.1:3080",
"http://localhost:3080",
"http://127.0.0.1:3081",
"http://localhost:3081",
]
def _schedule_self_restart(delay_seconds: float = 0.75) -> None:
import threading
@ -556,6 +577,14 @@ def create_app(
shutdown_force=shutdown_force,
),
)
if CORSMiddleware is not None:
app.add_middleware(
CORSMiddleware,
allow_origins=_cors_allow_origins(),
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.state.auth_tokens = {}
app.state.handoff_codes = {}
app.state.auth_file = Path(os.getenv("BEAVER_AUTH_FILE") or "")
@ -1992,13 +2021,19 @@ def create_app(
filename = file.filename or ""
if not filename.endswith(".zip"):
raise HTTPException(status_code=400, detail="File must be a .zip archive")
loaded = get_agent_service(request).create_loop().boot()
agent_service = get_agent_service(request)
loaded = agent_service.create_loop().boot()
try:
content = await file.read()
draft = _create_skill_upload_draft(loaded, filename, content)
draft_payload = _create_skill_upload_draft(loaded, filename, content)
draft = loaded.draft_service.get_draft(draft_payload["skill_name"], draft_payload["draft_id"])
if draft is not None:
await _rewrite_uploaded_skill_draft_with_llm(agent_service, loaded, draft, filename=filename)
draft = loaded.draft_service.get_draft(draft.skill_name, draft.draft_id) or draft
draft_payload = draft.to_dict()
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return draft
return draft_payload
@app.get("/api/marketplaces/skills/search")
async def search_skillhub(
@ -2068,13 +2103,17 @@ def create_app(
@app.get("/api/skills/candidates")
async def list_skill_candidates(request: Request, status: str | None = None) -> list[dict[str, Any]]:
loaded = get_agent_service(request).create_loop().boot()
return [item.to_dict() for item in loaded.skill_learning_pipeline.list_candidates(status=status)] # type: ignore[union-attr]
return [
_skill_learning_candidate_payload(loaded, item)
for item in loaded.skill_learning_pipeline.list_candidates(status=status) # type: ignore[union-attr]
]
@app.get("/api/skills/candidates/{candidate_id}")
async def get_skill_candidate(candidate_id: str, request: Request) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
try:
return loaded.skill_learning_pipeline.get_candidate(candidate_id).to_dict() # type: ignore[union-attr]
candidate = loaded.skill_learning_pipeline.get_candidate(candidate_id) # type: ignore[union-attr]
return _skill_learning_candidate_payload(loaded, candidate)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
@ -2087,25 +2126,19 @@ def create_app(
candidate = loaded.skill_learning_pipeline.get_candidate(candidate_id) # type: ignore[union-attr]
if candidate.draft_skill_name and candidate.draft_id:
try:
return _skill_draft_payload(loaded, candidate.draft_skill_name, candidate.draft_id)
loaded.skill_learning_pipeline.get_draft(candidate.draft_skill_name, candidate.draft_id) # type: ignore[union-attr]
except ValueError:
pass
else:
return _skill_draft_payload(loaded, candidate.draft_skill_name, candidate.draft_id)
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
draft = await loaded.skill_learning_pipeline.synthesize_draft( # type: ignore[union-attr]
candidate_id,
provider_bundle=provider_bundle,
)
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=provider_bundle,
replay_runner=ReplayRunner(agent_loop=loop),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return draft.to_dict()
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)
@app.post("/api/skills/candidates/{candidate_id}/regenerate")
async def regenerate_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]:
@ -2118,17 +2151,9 @@ def create_app(
candidate_id,
provider_bundle=provider_bundle,
)
loaded.skill_learning_pipeline.check_safety(draft.skill_name, draft.draft_id) # type: ignore[union-attr]
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=provider_bundle,
replay_runner=ReplayRunner(agent_loop=loop),
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return draft.to_dict()
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)
@app.post("/api/skills/learning/run-once")
async def run_skill_learning_once(request: Request) -> dict[str, Any]:
@ -2185,17 +2210,31 @@ def create_app(
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/submit")
async def submit_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
loaded = get_agent_service(request).create_loop().boot()
agent_service = get_agent_service(request)
loop = agent_service.create_loop()
loaded = loop.boot()
try:
review = loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
skill_name,
draft_id,
requested_by=str((payload or {}).get("requested_by") or "web"),
notes=str((payload or {}).get("notes") or ""),
)
safety = loaded.skill_learning_pipeline.check_safety(skill_name, draft_id) # type: ignore[union-attr]
if safety.passed and safety.risk_level != "critical":
loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
skill_name,
draft_id,
requested_by=str((payload or {}).get("requested_by") or "web"),
notes=str((payload or {}).get("notes") or ""),
)
candidate_id = _skill_learning_candidate_id_for_draft(loaded, skill_name, draft_id)
if candidate_id is not None:
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
candidate_id,
skill_name,
draft_id,
provider_bundle=provider_bundle,
replay_runner=ReplayRunner(agent_loop=loop),
)
except ValueError as exc:
raise _skill_draft_http_error(exc) from exc
return review.to_dict()
return _skill_draft_payload(loaded, skill_name, draft_id)
@app.post("/api/skills/{skill_name}/drafts/{draft_id}/approve")
async def approve_skill_draft(skill_name: str, draft_id: str, request: Request, payload: dict[str, Any] | None = None) -> dict[str, Any]:
@ -2719,47 +2758,70 @@ def _create_skill_upload_draft(loaded: Any, filename: str, content: bytes) -> di
if not file_infos:
raise ValueError("Zip archive is empty")
skill_entries = []
for info in file_infos:
parts = Path(info.filename.replace("\\", "/")).parts
if "__MACOSX" in parts or Path(info.filename).name == ".DS_Store":
continue
if info.filename.replace("\\", "/").startswith("/") or any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"Unsafe archive entry: {info.filename}")
if parts[-1] == "SKILL.md":
if len(parts) not in (1, 2):
raise ValueError("SKILL.md must be at root or inside one top-level directory")
skill_entries.append(info.filename)
if not skill_entries:
raise ValueError("Zip must contain SKILL.md")
skill_entry = skill_entries[0]
top = Path(skill_entry).parts[0] if len(Path(skill_entry).parts) == 2 else ""
raw_skill = archive.read(skill_entry).decode("utf-8", errors="replace")
frontmatter, body = parse_frontmatter(raw_skill)
skill_name = str(frontmatter.get("name") or top or Path(filename).stem).strip().replace(" ", "-")
if not skill_name or "/" in skill_name or "\\" in skill_name or skill_name in {".", ".."}:
raise ValueError("Could not determine a safe skill name")
files: list[tuple[str, bytes]] = []
safe_entries: list[tuple[Any, str, tuple[str, ...]]] = []
for info in file_infos:
raw = info.filename.replace("\\", "/")
parts = Path(raw).parts
if "__MACOSX" in parts or Path(raw).name == ".DS_Store":
continue
if raw.startswith("/"):
if raw.startswith("/") or any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"Unsafe archive entry: {info.filename}")
if top and parts and parts[0] != top:
raise ValueError("Zip archive must contain a single top-level skill directory")
rel_parts = parts[1:] if top and parts and parts[0] == top else parts
safe_entries.append((info, raw, tuple(parts)))
if _is_skill_markdown_entry(parts[-1]):
skill_entries.append(raw)
if not skill_entries:
raise ValueError("Zip must contain SKILL.md")
if len(skill_entries) > 1:
raise ValueError("Zip must contain exactly one SKILL.md")
skill_entry = skill_entries[0]
skill_root = tuple(Path(skill_entry).parts[:-1])
raw_skill = archive.read(skill_entry).decode("utf-8", errors="replace")
frontmatter, body = parse_frontmatter(raw_skill)
skill_name = str(frontmatter.get("name") or (skill_root[-1] if skill_root else "") or Path(filename).stem).strip().replace(" ", "-")
if not skill_name or "/" in skill_name or "\\" in skill_name or skill_name in {".", ".."}:
raise ValueError("Could not determine a safe skill name")
proposed_frontmatter = normalize_skill_frontmatter(
{
**dict(frontmatter),
"name": skill_name,
"description": frontmatter.get("description") or skill_name,
},
skill_name=skill_name,
)
proposed_frontmatter["tools"] = _merge_tool_names(
proposed_frontmatter.get("tools"),
extract_required_tool_names(body),
_infer_uploaded_skill_tools(
skill_name=skill_name,
filename=filename,
frontmatter=proposed_frontmatter,
content=body,
loaded=loaded,
),
)
proposed_content = ensure_canonical_skill_body(
body,
title=skill_name,
description=str(proposed_frontmatter.get("description") or ""),
tools=list(proposed_frontmatter.get("tools") or []),
)
files: list[tuple[str, bytes]] = []
for info, raw, parts in safe_entries:
if raw == skill_entry:
continue
if skill_root:
if parts[: len(skill_root)] != skill_root:
continue
rel_parts = parts[len(skill_root):]
else:
rel_parts = parts
if not rel_parts or any(part in {"", ".", ".."} for part in rel_parts):
raise ValueError(f"Unsafe archive entry: {info.filename}")
files.append(("/".join(rel_parts), archive.read(info)))
draft = loaded.draft_service.create_new_skill_draft(
skill_name=skill_name,
proposed_content=body,
proposed_frontmatter={
**dict(frontmatter),
"name": skill_name,
"description": frontmatter.get("description") or skill_name,
},
proposed_content=proposed_content,
proposed_frontmatter=proposed_frontmatter,
created_by="web-upload",
reason=f"Uploaded {filename}",
evidence_refs=[{"kind": "upload", "filename": filename, "files": sorted(path for path, _ in files)}],
@ -2784,6 +2846,162 @@ def _create_skill_upload_draft(loaded: Any, filename: str, content: bytes) -> di
return draft.to_dict()
def _is_skill_markdown_entry(filename: str) -> bool:
return filename.strip().lower() in {"skill.md", "skills.md"}
def _merge_tool_names(*groups: Any) -> list[str]:
result: list[str] = []
for group in groups:
if isinstance(group, str):
raw_items = group.split(",")
elif isinstance(group, (list, tuple, set)):
raw_items = list(group)
else:
raw_items = []
for item in raw_items:
cleaned = str(item).strip()
if cleaned and cleaned not in result:
result.append(cleaned)
return result
def _infer_uploaded_skill_tools(
*,
skill_name: str,
filename: str,
frontmatter: dict[str, Any],
content: str,
loaded: Any,
) -> list[str]:
available = _available_runtime_tool_names(loaded)
text = "\n".join(
[
skill_name,
filename,
json.dumps(frontmatter, ensure_ascii=False, sort_keys=True),
content,
]
).lower()
inferred: list[str] = []
for tool_name in sorted(available or _COMMON_RUNTIME_TOOL_NAMES):
if re.search(rf"(?<![a-z0-9_]){re.escape(tool_name.lower())}(?![a-z0-9_])", text):
inferred.append(tool_name)
def add_if_available(*tool_names: str) -> None:
for tool_name in tool_names:
if available is not None and tool_name not in available:
continue
if tool_name not in inferred:
inferred.append(tool_name)
if re.search(r"\b(weather|forecast|temperature|precipitation|rain|snow|humidity|wind|air quality|aqi)\b", text):
add_if_available("web_fetch", "web_search")
if re.search(r"\b(latest|current|today|tomorrow|news|search|query|lookup|find online|web search)\b", text):
add_if_available("web_search")
if re.search(r"\b(url|http|https|website|webpage|page|fetch|crawl|browser|online source)\b", text):
add_if_available("web_fetch")
return inferred
def _available_runtime_tool_names(loaded: Any) -> set[str] | None:
registry = getattr(loaded, "tool_registry", None)
if registry is None:
return None
try:
return {spec.name for spec in registry.list_specs()}
except Exception:
return None
_COMMON_RUNTIME_TOOL_NAMES = {
"web_fetch",
"web_search",
"read_file",
"write_file",
"patch_file",
"search_files",
"list_directory",
"memory",
"terminal",
"process",
"execute_code",
"skill_view",
"skills_list",
"skill_manage",
"cron",
}
async def _rewrite_uploaded_skill_draft_with_llm(agent_service: Any, loaded: Any, draft: Any, *, filename: str) -> None:
try:
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
provider = getattr(provider_bundle, "auxiliary_provider", None) or getattr(provider_bundle, "main_provider", None)
runtime = getattr(provider_bundle, "auxiliary_runtime", None) or getattr(provider_bundle, "main_runtime", None)
if provider is None:
return
available_tool_names = sorted(_available_runtime_tool_names(loaded) or _COMMON_RUNTIME_TOOL_NAMES)
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You rewrite uploaded Beaver skills into the required house style. "
"Return only JSON with keys: frontmatter, content, change_reason. "
"Do not include markdown fences."
),
},
{
"role": "user",
"content": (
f"Uploaded filename: {filename}\n"
f"Skill name: {draft.skill_name}\n"
f"Current frontmatter:\n{json.dumps(draft.proposed_frontmatter, ensure_ascii=False, sort_keys=True)}\n\n"
f"Current content:\n{draft.proposed_content}\n\n"
f"Available runtime tool names:\n{json.dumps(available_tool_names, ensure_ascii=False)}\n\n"
f"{canonical_skill_format_instructions()}\n\n"
"Rewrite the skill so it is operational, concrete, and ready for review/publish. "
"Infer exact required runtime tools from the uploaded content when the workflow depends on tools. "
"Keep frontmatter.tools and the Required Tools section consistent."
),
},
],
tools=None,
model=getattr(runtime, "model", None),
max_tokens=4096,
temperature=0,
)
payload = parse_skill_rewrite_json(response.content or "", skill_name=draft.skill_name)
if payload is None:
return
payload["frontmatter"]["tools"] = _merge_tool_names(
payload["frontmatter"].get("tools"),
extract_required_tool_names(payload["content"]),
_infer_uploaded_skill_tools(
skill_name=draft.skill_name,
filename=filename,
frontmatter=payload["frontmatter"],
content=payload["content"],
loaded=loaded,
),
)
payload["content"] = ensure_canonical_skill_body(
payload["content"],
title=str(payload["frontmatter"].get("name") or draft.skill_name),
description=str(payload["frontmatter"].get("description") or ""),
tools=list(payload["frontmatter"].get("tools") or []),
)
draft.proposed_frontmatter = payload["frontmatter"]
draft.proposed_content = payload["content"]
if payload.get("change_reason"):
draft.reason = f"{draft.reason}; LLM rewrite: {payload['change_reason']}"
loaded.skill_spec_store.write_draft(draft)
except Exception:
return
def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[str, Any]]:
grouped: dict[str, list[Any]] = {}
run_order: list[str] = []
@ -3559,6 +3777,39 @@ def _skill_detail_payload(loaded: Any, name: str, version: str | None) -> dict[s
}
def _skill_learning_candidate_payload(loaded: Any, candidate: Any) -> dict[str, Any]:
payload = candidate.to_dict()
evidence = dict(payload.get("evidence") or {})
task_text = _skill_learning_candidate_task_text(loaded, candidate)
if task_text:
evidence["task_text"] = task_text
evidence["theme"] = SkillLearningService._task_theme(task_text)
payload["evidence"] = evidence
if candidate.kind == "new_skill":
payload["evidence_summary"] = f"Theme: {evidence['theme']}"
return payload
def _skill_learning_candidate_task_text(loaded: Any, candidate: Any) -> str:
evidence = candidate.evidence if isinstance(candidate.evidence, dict) else {}
task_id = str(evidence.get("task_id") or "").strip()
source_run_ids = set(candidate.source_run_ids or [])
try:
run_store = loaded.skill_learning_pipeline.learning_service.run_store
runs = run_store.list_runs()
except Exception:
return str(evidence.get("task_text") or "").strip()
if task_id:
task_runs = [record for record in runs if record.task_id == task_id]
if task_runs:
return SkillLearningService._representative_task_text(task_runs)
source_runs = [record for record in runs if record.run_id in source_run_ids]
if source_runs:
return SkillLearningService._representative_task_text(source_runs)
return str(evidence.get("task_text") or "").strip()
def _skill_draft_payload(loaded: Any, skill_name: str, draft_id: str, *, include_reviews: bool = False) -> dict[str, Any]:
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
safety = loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) # type: ignore[union-attr]
@ -3567,6 +3818,8 @@ def _skill_draft_payload(loaded: Any, skill_name: str, draft_id: str, *, include
**draft.to_dict(),
"safety_report": safety.to_dict() if safety is not None else None,
"eval_report": eval_report.to_dict() if eval_report is not None else None,
"target_version": _skill_draft_target_version(loaded, draft.skill_name, draft.proposal_kind),
"base_skill": _skill_draft_base_skill_payload(loaded, draft),
}
if include_reviews:
payload["reviews"] = [
@ -3576,6 +3829,45 @@ def _skill_draft_payload(loaded: Any, skill_name: str, draft_id: str, *, include
return payload
def _skill_draft_base_skill_payload(loaded: Any, draft: Any) -> dict[str, Any] | None:
if draft.proposal_kind == "new_skill" or not draft.base_version:
return None
store = loaded.skill_learning_pipeline.publisher.store # type: ignore[union-attr]
loaded_version = store.read_published_skill(draft.skill_name, draft.base_version)
if loaded_version is None:
return None
version = loaded_version.version
return {
"skill_name": version.skill_name,
"version": version.version,
"frontmatter": dict(version.frontmatter),
"content": loaded_version.content,
"summary": version.summary,
"tool_hints": list(version.tool_hints),
}
def _skill_draft_target_version(loaded: Any, skill_name: str, proposal_kind: str) -> str | None:
if proposal_kind == "retire_skill":
return None
versions = [
item
for item in loaded.skill_learning_pipeline.publisher.store.list_versions(skill_name) # type: ignore[union-attr]
if isinstance(item, str) and item.startswith("v") and item[1:].isdigit()
]
if not versions:
return "v0001"
latest = max(int(item[1:]) for item in versions)
return f"v{latest + 1:04d}"
def _skill_learning_candidate_id_for_draft(loaded: Any, skill_name: str, draft_id: str) -> str | None:
for candidate in loaded.skill_learning_pipeline.list_candidates(): # type: ignore[union-attr]
if candidate.draft_skill_name == skill_name and candidate.draft_id == draft_id:
return candidate.candidate_id
return None
def _skill_versions_payload(loaded: Any, record: Any) -> list[dict[str, Any]]:
if record.source != "workspace":
return [