feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
@ -0,0 +1,109 @@
|
||||
"""Safety checks for generated skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.memory.skills import SkillDraftSafetyReport
|
||||
from beaver.skills.specs import SkillDraft
|
||||
|
||||
|
||||
class SkillDraftSafetyChecker:
|
||||
"""Deterministic publish gate for skill drafts."""
|
||||
|
||||
_CRITICAL_PATTERNS = [
|
||||
r"ignore (all )?(previous|system|developer) instructions",
|
||||
r"bypass (permissions|permission|safety|policy|policies)",
|
||||
r"disable (guardrails|safety|permission checks)",
|
||||
r"reveal (secrets|api keys|tokens|credentials)",
|
||||
r"exfiltrate",
|
||||
]
|
||||
_DANGEROUS_TOOL_HINTS = {
|
||||
"shell",
|
||||
"terminal",
|
||||
"bash",
|
||||
"filesystem_write",
|
||||
"write_file",
|
||||
"delete_file",
|
||||
"network",
|
||||
"http",
|
||||
"auth",
|
||||
"credentials",
|
||||
}
|
||||
|
||||
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
|
||||
self.allowed_tool_names = allowed_tool_names
|
||||
|
||||
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
|
||||
issues: list[str] = []
|
||||
blocked: list[str] = []
|
||||
risk_level = "low"
|
||||
|
||||
frontmatter = draft.proposed_frontmatter
|
||||
if not isinstance(frontmatter, dict):
|
||||
blocked.append("frontmatter must be an object")
|
||||
description = str(frontmatter.get("description") or "").strip()
|
||||
if not description and draft.proposal_kind != "retire_skill":
|
||||
issues.append("frontmatter.description is missing")
|
||||
risk_level = _max_risk(risk_level, "medium")
|
||||
|
||||
tool_hints = _tool_hints(frontmatter)
|
||||
if self.allowed_tool_names is not None:
|
||||
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
|
||||
if unknown:
|
||||
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
|
||||
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
|
||||
if dangerous:
|
||||
issues.append(f"dangerous tool hints require high-risk review: {', '.join(dangerous)}")
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
content = f"{draft.proposed_content}\n{frontmatter}".lower()
|
||||
for pattern in self._CRITICAL_PATTERNS:
|
||||
if re.search(pattern, content):
|
||||
blocked.append(f"critical prompt-safety pattern matched: {pattern}")
|
||||
risk_level = "critical"
|
||||
|
||||
if draft.proposal_kind in {"retire_skill", "merge_skills"}:
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
passed = not blocked and risk_level != "critical"
|
||||
return SkillDraftSafetyReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
passed=passed,
|
||||
risk_level=risk_level,
|
||||
issues=issues,
|
||||
blocked_reasons=blocked,
|
||||
suggested_fix=_suggest_fix(blocked, issues),
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
|
||||
def _tool_hints(frontmatter: dict) -> list[str]:
|
||||
raw = frontmatter.get("tools")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
if isinstance(raw, str):
|
||||
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _max_risk(left: str, right: str) -> str:
|
||||
order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
|
||||
return left if order[left] >= order[right] else right
|
||||
|
||||
|
||||
def _suggest_fix(blocked: list[str], issues: list[str]) -> str:
|
||||
if blocked:
|
||||
return "Remove blocked instructions or invalid tool hints before review."
|
||||
if issues:
|
||||
return "Review the flagged issues before publishing."
|
||||
return ""
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
Reference in New Issue
Block a user