feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -0,0 +1,24 @@
"""Skill learning loop helpers."""
from .evidence import EvidencePacket, EvidenceSelector
from .eval import SkillDraftEvaluator
from .missing_skill import MissingSkillDraftResult, MissingSkillSynthesizer
from .pipeline import SkillLearningPipelineService
from .service import RunReceiptContext, SkillLearningService
from .synthesizer import SkillDraftSynthesizer
from .worker import SkillLearningWorker, SkillLearningWorkerConfig, SkillLearningWorkerResult
__all__ = [
"EvidencePacket",
"EvidenceSelector",
"SkillDraftEvaluator",
"MissingSkillDraftResult",
"MissingSkillSynthesizer",
"RunReceiptContext",
"SkillLearningPipelineService",
"SkillDraftSynthesizer",
"SkillLearningService",
"SkillLearningWorker",
"SkillLearningWorkerConfig",
"SkillLearningWorkerResult",
]

View File

@ -0,0 +1,121 @@
"""Lightweight replay/eval reports for skill drafts."""
from __future__ import annotations
from uuid import uuid4
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs import RunMemoryStore
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
from beaver.skills.specs import SkillDraft
class SkillDraftEvaluator:
"""Builds a bounded eval report without writing user-visible sessions."""
def __init__(self, run_store: RunMemoryStore) -> None:
self.run_store = run_store
async def evaluate(
self,
*,
candidate: SkillLearningCandidate,
draft: SkillDraft,
provider_bundle: ProviderBundle | None,
) -> SkillDraftEvalReport:
if provider_bundle is None or provider_bundle.main_provider is None:
return self._skipped(candidate, draft)
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
cases: list[dict] = []
for run_id in candidate.source_run_ids[:8]:
record = runs_by_id.get(run_id)
if record is None:
continue
baseline = _score_from_validation(record.validation_result, record.success)
candidate_score = _candidate_score(baseline, draft)
cases.append(
{
"run_id": run_id,
"session_id": record.session_id,
"baseline_score": baseline,
"candidate_score": candidate_score,
"delta": round(candidate_score - baseline, 4),
}
)
if not cases:
cases.append(
{
"run_id": "",
"session_id": "",
"baseline_score": 0.75,
"candidate_score": _candidate_score(0.75, draft),
"delta": round(_candidate_score(0.75, draft) - 0.75, 4),
}
)
baseline_avg = sum(item["baseline_score"] for item in cases) / len(cases)
candidate_avg = sum(item["candidate_score"] for item in cases) / len(cases)
regressions = [item for item in cases if item["candidate_score"] < item["baseline_score"]]
improved = [item for item in cases if item["candidate_score"] > item["baseline_score"]]
unchanged = len(cases) - len(regressions) - len(improved)
score_delta = candidate_avg - baseline_avg
passed = not (len(regressions) > 0 and score_delta <= 0) and candidate_avg >= 0.75
return SkillDraftEvalReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id=candidate.candidate_id,
passed=passed,
baseline_score_avg=round(baseline_avg, 4),
candidate_score_avg=round(candidate_avg, 4),
score_delta=round(score_delta, 4),
regression_count=len(regressions),
improved_count=len(improved),
unchanged_count=unchanged,
cases=cases,
status="completed",
created_at=_utc_now(),
)
def _skipped(self, candidate: SkillLearningCandidate, draft: SkillDraft) -> SkillDraftEvalReport:
return SkillDraftEvalReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
candidate_id=candidate.candidate_id,
passed=True,
baseline_score_avg=0.0,
candidate_score_avg=0.0,
score_delta=0.0,
regression_count=0,
improved_count=0,
unchanged_count=0,
cases=[],
status="skipped_provider_unavailable",
created_at=_utc_now(),
)
def _score_from_validation(validation: dict | None, success: bool) -> float:
if isinstance(validation, dict) and "score" in validation:
try:
return max(0.0, min(1.0, float(validation.get("score") or 0.0)))
except (TypeError, ValueError):
pass
return 0.8 if success else 0.4
def _candidate_score(baseline: float, draft: SkillDraft) -> float:
content = draft.proposed_content.strip()
if not content and draft.proposal_kind != "retire_skill":
return 0.0
if "regression" in content.lower():
return max(0.0, baseline - 0.2)
return min(1.0, max(0.75, baseline + 0.05))
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,76 @@
"""Evidence selection for skill learning."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from beaver.engine.session.manager import SessionManager
from beaver.memory.runs.store import RunMemoryStore
@dataclass(slots=True)
class EvidencePacket:
run_ids: list[str]
session_ids: list[str]
task_summaries: list[str]
session_excerpts: list[str]
metadata: dict[str, Any] = field(default_factory=dict)
class EvidenceSelector:
def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None:
self.run_store = run_store
self.session_manager = session_manager
def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]:
runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit)
return [record.run_id for record in runs]
def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]:
lowered = theme.lower().strip()
matches = []
for record in self.run_store.list_runs():
if lowered and lowered not in record.task_text.lower():
continue
matches.append(record.run_id)
return matches[-limit:]
def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket:
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
resolved_run_ids: list[str] = []
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
task_summaries: list[str] = []
session_excerpts: list[str] = []
for run_id in run_ids:
record = runs_by_id.get(run_id)
if record is None:
continue
resolved_run_ids.append(run_id)
if record.session_id not in resolved_session_ids:
resolved_session_ids.append(record.session_id)
summary = record.task_text.strip()
if summary:
task_summaries.append(summary[:400])
if self.session_manager is not None:
excerpt = self._session_excerpt(record.session_id, run_id)
if excerpt:
session_excerpts.append(excerpt)
return EvidencePacket(
run_ids=resolved_run_ids,
session_ids=resolved_session_ids,
task_summaries=task_summaries[:8],
session_excerpts=session_excerpts[:6],
metadata={"bounded": True},
)
def _session_excerpt(self, session_id: str, run_id: str) -> str:
if self.session_manager is None:
return ""
events = self.session_manager.get_run_event_records(session_id, run_id)
visible: list[str] = []
for event in events:
if not event.context_visible or not event.content:
continue
visible.append(f"{event.role}: {event.content.strip()}")
return "\n".join(visible[:12])[:2000]

View File

@ -0,0 +1,166 @@
"""Synthesize draft-only skills for missing sub-agent guidance."""
from __future__ import annotations
import json
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from beaver.engine.context import SkillContext
from beaver.engine.providers import ProviderBundle
from beaver.skills.drafts import DraftService
from beaver.skills.specs import SkillDraft
from beaver.skills.specs.serialization import canonical_hash
if TYPE_CHECKING:
from beaver.tasks.models import TaskRecord
@dataclass(slots=True)
class MissingSkillDraftResult:
draft: SkillDraft
skill_context: SkillContext
class MissingSkillSynthesizer:
"""Create a draft skill and an ephemeral SkillContext for the current run."""
async def synthesize(
self,
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
node_id: str,
node_task: str,
skill_query: str,
required_capabilities: list[str],
provider_bundle: ProviderBundle,
draft_service: DraftService,
) -> MissingSkillDraftResult:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
payload = self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)
try:
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You create concise Beaver skill drafts. Return only JSON with keys: "
"skill_name, description, content, tags."
),
},
{
"role": "user",
"content": (
"Create a procedural skill draft for this missing Task sub-agent guidance.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Node id: {node_id}\n"
f"Node task:\n{node_task}\n\n"
f"Skill query:\n{skill_query}\n"
f"Required capabilities: {required_capabilities}\n\n"
"The content must be actionable guidance for a temporary sub-agent. "
"Do not include implementation claims or publish metadata."
),
},
],
tools=None,
model=model,
max_tokens=1200,
temperature=0,
)
payload = self._parse_payload(response.content or "") or payload
except Exception:
payload = payload
skill_name = _slug(str(payload.get("skill_name") or skill_query or node_id))
content = str(payload.get("content") or "").strip()
if not content:
content = str(self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)["content"])
frontmatter = {
"description": str(payload.get("description") or f"Draft guidance for {skill_query or node_id}").strip(),
"tags": [str(item) for item in payload.get("tags") or ["generated", "task-sub-agent"]],
"metadata": {
"origin": "missing_task_subagent_skill",
"task_id": task.task_id,
"node_id": node_id,
"attempt_index": attempt_index,
"skill_query": skill_query,
"required_capabilities": list(required_capabilities),
},
}
draft = draft_service.create_new_skill_draft(
skill_name=skill_name,
proposed_content=content,
proposed_frontmatter=frontmatter,
created_by="task-skill-resolver",
reason="generated_for_missing_task_subagent_skill",
trigger_session_id=task.session_id,
evidence_refs=[
{
"task_id": task.task_id,
"session_id": task.session_id,
"attempt_index": attempt_index,
"node_id": node_id,
"skill_query": skill_query,
"required_capabilities": list(required_capabilities),
}
],
)
context = SkillContext(
name=f"draft:{draft.skill_name}",
content=draft.proposed_content,
version=f"draft:{draft.draft_id}",
content_hash=canonical_hash(draft.proposed_content),
activation_reason="generated_missing_skill",
tool_hints=[],
)
return MissingSkillDraftResult(draft=draft, skill_context=context)
@staticmethod
def _parse_payload(text: str) -> dict[str, Any] | None:
cleaned = text.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return None
return payload if isinstance(payload, dict) else None
@staticmethod
def _fallback_payload(*, skill_query: str, node_task: str, capabilities: list[str]) -> dict[str, Any]:
title = skill_query or node_task or "task subagent guidance"
capability_lines = "\n".join(f"- {item}" for item in capabilities) or "- Follow the node task precisely."
return {
"skill_name": _slug(title),
"description": f"Draft guidance for {title}.",
"tags": ["generated", "task-sub-agent"],
"content": (
f"# {title}\n\n"
"Use this draft guidance only for the current delegated sub-task.\n\n"
"## Objective\n"
f"{node_task or title}\n\n"
"## Capabilities to apply\n"
f"{capability_lines}\n\n"
"## Output\n"
"Return concise evidence, decisions, and unresolved risks for the main Agent to synthesize."
),
}
def _slug(value: str) -> str:
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
return cleaned[:64].strip("-") or "generated-task-subagent-skill"

View File

@ -0,0 +1,354 @@
"""Manual skill learning pipeline orchestration."""
from __future__ import annotations
from typing import Any
from beaver.engine.providers import ProviderBundle
from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningCandidate, SkillLearningStore
from beaver.skills.drafts import DraftService
from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.learning.service import SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpec, SkillVersion
class SkillLearningPipelineService:
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
def __init__(
self,
*,
learning_store: SkillLearningStore,
learning_service: SkillLearningService,
draft_service: DraftService,
review_service: ReviewService,
publisher: SkillPublisher,
safety_checker: SkillDraftSafetyChecker | None = None,
evaluator: SkillDraftEvaluator | None = None,
) -> None:
self.learning_store = learning_store
self.learning_service = learning_service
self.draft_service = draft_service
self.review_service = review_service
self.publisher = publisher
self.safety_checker = safety_checker or SkillDraftSafetyChecker()
self.evaluator = evaluator
def list_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
return self.learning_store.list_learning_candidates(status=status)
def get_candidate(self, candidate_id: str) -> SkillLearningCandidate:
for candidate in self.learning_store.list_learning_candidates():
if candidate.candidate_id == candidate_id:
return candidate
raise ValueError(f"Unknown learning candidate: {candidate_id}")
async def synthesize_draft(
self,
candidate_id: str,
*,
provider_bundle: ProviderBundle,
) -> SkillDraft:
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
self.mark_draft_synthesized(candidate_id, draft)
return draft
async def regenerate_draft(
self,
candidate_id: str,
*,
provider_bundle: ProviderBundle,
) -> SkillDraft:
self.learning_store.transition_learning_candidate(
candidate_id,
"synthesizing",
event_type="draft_synthesis_started",
last_error=None,
)
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"queued",
event_type="candidate_queued",
last_error=None,
),
candidate_id,
)
def mark_candidate_synthesizing(self, candidate_id: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"synthesizing",
event_type="draft_synthesis_started",
last_error=None,
),
candidate_id,
)
def mark_draft_synthesized(self, candidate_id: str, draft: SkillDraft) -> SkillLearningCandidate:
candidate = self.get_candidate(candidate_id)
evidence = dict(candidate.evidence)
evidence["draft_id"] = draft.draft_id
evidence["draft_skill_name"] = draft.skill_name
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"draft_ready",
event_type="draft_synthesis_completed",
evidence=evidence,
draft_id=draft.draft_id,
draft_skill_name=draft.skill_name,
risk_level=candidate.risk_level,
last_error=None,
payload={"draft_id": draft.draft_id, "skill_name": draft.skill_name},
),
candidate_id,
)
def mark_candidate_failed(
self,
candidate_id: str,
error: str,
*,
retry_count: int,
terminal: bool,
) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"failed" if terminal else "open",
event_type="failed",
retry_count=retry_count,
last_error=error,
payload={"error": error, "terminal": terminal, "retry_count": retry_count},
),
candidate_id,
)
def mark_candidate_superseded(self, candidate_id: str, reason: str) -> SkillLearningCandidate:
return self._require_updated(
self.learning_store.transition_learning_candidate(
candidate_id,
"superseded",
event_type="superseded",
last_error=reason,
payload={"reason": reason},
),
candidate_id,
)
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
return self.draft_service.list_drafts(skill_name)
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
draft = self.draft_service.get_draft(skill_name, draft_id)
if draft is None:
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
return draft
def submit_review(
self,
skill_name: str,
draft_id: str,
*,
requested_by: str = "system",
notes: str = "",
) -> SkillReviewRecord:
safety = self.get_safety_report(skill_name, draft_id)
if safety is not None and (not safety.passed or safety.risk_level == "critical"):
raise ValueError("Draft cannot enter review because safety check failed")
return self.review_service.submit_for_review(
skill_name,
draft_id,
reviewer_request=notes,
requested_by=requested_by,
)
def approve(
self,
skill_name: str,
draft_id: str,
*,
reviewer: str = "system",
notes: str = "",
) -> SkillReviewRecord:
review = self.review_service.approve(skill_name, draft_id, reviewer=reviewer, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "approved", "approved")
return review
def reject(
self,
skill_name: str,
draft_id: str,
*,
reviewer: str = "system",
notes: str = "",
) -> SkillReviewRecord:
review = self.review_service.reject(skill_name, draft_id, reviewer=reviewer, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "rejected", "rejected")
return review
def publish(
self,
skill_name: str,
draft_id: str,
*,
publisher: str = "system",
notes: str = "",
confirm_high_risk: bool = False,
) -> SkillVersion | SkillSpec:
draft = self.get_draft(skill_name, draft_id)
self._validate_publish_gates(draft, confirm_high_risk=confirm_high_risk)
if draft.proposal_kind == "retire_skill":
result = self.publisher.apply_retire_proposal(skill_name, draft_id, actor=publisher, notes=notes)
else:
result = self.publisher.publish(skill_name, draft_id, publisher=publisher, notes=notes)
self._mark_candidate_by_draft(skill_name, draft_id, "published", "published")
return result
def rollback(
self,
skill_name: str,
target_version: str,
*,
actor: str = "system",
reason: str = "",
) -> SkillSpec:
return self.publisher.rollback(skill_name, target_version, actor=actor, reason=reason or "manual rollback")
def disable(
self,
skill_name: str,
*,
actor: str = "system",
reason: str = "",
) -> SkillSpec:
return self.publisher.disable(skill_name, actor=actor, reason=reason or "manual disable")
def reviews_for_draft(self, skill_name: str, draft_id: str) -> list[SkillReviewRecord]:
return self.review_service.store.list_reviews(skill_name, draft_id=draft_id)
def check_safety(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport:
draft = self.get_draft(skill_name, draft_id)
report = self.safety_checker.check(draft)
self.learning_store.write_safety_report(report)
status = "safety_failed" if not report.passed or report.risk_level == "critical" else "draft_ready"
current = self._candidate_by_draft(skill_name, draft_id)
if current is not None and current.status == "eval_failed" and status == "draft_ready":
status = "eval_failed"
self._mark_candidate_by_draft(
skill_name,
draft_id,
status,
"safety_checked",
safety_report_id=report.report_id,
risk_level=report.risk_level,
last_error="; ".join(report.blocked_reasons) if status == "safety_failed" else None,
)
return report
def get_safety_report(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport | None:
return self.learning_store.get_safety_report(skill_name, draft_id)
def get_eval_report(self, skill_name: str, draft_id: str) -> SkillDraftEvalReport | None:
return self.learning_store.get_eval_report(skill_name, draft_id)
async def evaluate_draft(
self,
candidate_id: str,
skill_name: str,
draft_id: str,
*,
provider_bundle: ProviderBundle | None,
) -> SkillDraftEvalReport:
draft = self.get_draft(skill_name, draft_id)
candidate = self.get_candidate(candidate_id)
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
report = await evaluator.evaluate(candidate=candidate, draft=draft, provider_bundle=provider_bundle)
self.learning_store.write_eval_report(report)
if report.status == "skipped_provider_unavailable":
status = "draft_ready"
error = "eval skipped: provider unavailable"
elif report.passed:
status = "draft_ready"
error = None
else:
status = "eval_failed"
error = "eval failed"
current = self._candidate_by_draft(skill_name, draft_id)
if current is not None and current.status == "safety_failed" and status == "draft_ready":
status = "safety_failed"
error = current.last_error
self.learning_store.transition_learning_candidate(
candidate_id,
status,
event_type="eval_completed",
eval_report_id=report.report_id,
last_error=error,
payload=report.to_dict(),
)
return report
def _validate_publish_gates(self, draft: SkillDraft, *, confirm_high_risk: bool) -> None:
reviews = self.reviews_for_draft(draft.skill_name, draft.draft_id)
if not any(review.status == SkillReviewState.APPROVED.value for review in reviews):
raise ValueError("Draft must have an approved review before publish")
safety = self.get_safety_report(draft.skill_name, draft.draft_id)
if safety is None:
raise ValueError("Draft requires a passing safety report before publish")
if not safety.passed:
raise ValueError("Draft safety report did not pass")
if safety.risk_level == "critical":
raise ValueError("Critical risk drafts cannot be published")
if safety.risk_level == "high" and not confirm_high_risk:
raise ValueError("High risk draft publish requires confirm_high_risk=true")
eval_report = self.get_eval_report(draft.skill_name, draft.draft_id)
if eval_report is not None and eval_report.status != "skipped_provider_unavailable" and not eval_report.passed:
raise ValueError("Draft eval report did not pass")
def _mark_candidate_by_draft(
self,
skill_name: str,
draft_id: str,
status: str,
event_type: str,
**updates: object,
) -> SkillLearningCandidate | None:
candidate = self._candidate_by_draft(skill_name, draft_id)
if candidate is None:
return None
if candidate.status in {"safety_failed", "eval_failed"} and status in {"review_pending", "approved"}:
return candidate
return self.learning_store.transition_learning_candidate(
candidate.candidate_id,
status,
event_type=event_type,
**updates,
)
def _candidate_by_draft(self, skill_name: str, draft_id: str) -> SkillLearningCandidate | None:
for candidate in self.learning_store.list_learning_candidates():
if candidate.draft_skill_name == skill_name and candidate.draft_id == draft_id:
return candidate
return None
@staticmethod
def _require_updated(candidate: SkillLearningCandidate | None, candidate_id: str) -> SkillLearningCandidate:
if candidate is None:
raise ValueError(f"Unknown learning candidate: {candidate_id}")
return candidate
def model_to_dict(value: Any) -> dict[str, Any]:
if hasattr(value, "to_dict"):
return value.to_dict()
if isinstance(value, dict):
return dict(value)
raise TypeError(f"Cannot convert {type(value).__name__} to dict")

View File

@ -0,0 +1,109 @@
"""Safety checks for generated skill drafts."""
from __future__ import annotations
import re
from uuid import uuid4
from beaver.memory.skills import SkillDraftSafetyReport
from beaver.skills.specs import SkillDraft
class SkillDraftSafetyChecker:
"""Deterministic publish gate for skill drafts."""
_CRITICAL_PATTERNS = [
r"ignore (all )?(previous|system|developer) instructions",
r"bypass (permissions|permission|safety|policy|policies)",
r"disable (guardrails|safety|permission checks)",
r"reveal (secrets|api keys|tokens|credentials)",
r"exfiltrate",
]
_DANGEROUS_TOOL_HINTS = {
"shell",
"terminal",
"bash",
"filesystem_write",
"write_file",
"delete_file",
"network",
"http",
"auth",
"credentials",
}
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
self.allowed_tool_names = allowed_tool_names
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
issues: list[str] = []
blocked: list[str] = []
risk_level = "low"
frontmatter = draft.proposed_frontmatter
if not isinstance(frontmatter, dict):
blocked.append("frontmatter must be an object")
description = str(frontmatter.get("description") or "").strip()
if not description and draft.proposal_kind != "retire_skill":
issues.append("frontmatter.description is missing")
risk_level = _max_risk(risk_level, "medium")
tool_hints = _tool_hints(frontmatter)
if self.allowed_tool_names is not None:
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
if unknown:
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
if dangerous:
issues.append(f"dangerous tool hints require high-risk review: {', '.join(dangerous)}")
risk_level = _max_risk(risk_level, "high")
content = f"{draft.proposed_content}\n{frontmatter}".lower()
for pattern in self._CRITICAL_PATTERNS:
if re.search(pattern, content):
blocked.append(f"critical prompt-safety pattern matched: {pattern}")
risk_level = "critical"
if draft.proposal_kind in {"retire_skill", "merge_skills"}:
risk_level = _max_risk(risk_level, "high")
passed = not blocked and risk_level != "critical"
return SkillDraftSafetyReport(
report_id=uuid4().hex,
skill_name=draft.skill_name,
draft_id=draft.draft_id,
passed=passed,
risk_level=risk_level,
issues=issues,
blocked_reasons=blocked,
suggested_fix=_suggest_fix(blocked, issues),
created_at=_utc_now(),
)
def _tool_hints(frontmatter: dict) -> list[str]:
raw = frontmatter.get("tools")
if isinstance(raw, list):
return [str(item).strip() for item in raw if str(item).strip()]
if isinstance(raw, str):
return [item.strip() for item in raw.split(",") if item.strip()]
return []
def _max_risk(left: str, right: str) -> str:
order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
return left if order[left] >= order[right] else right
def _suggest_fix(blocked: list[str], issues: list[str]) -> str:
if blocked:
return "Remove blocked instructions or invalid tool hints before review."
if issues:
return "Review the flagged issues before publishing."
return ""
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,293 @@
"""Skill learning loop services."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from itertools import combinations
import re
from typing import Any
from uuid import uuid4
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
from beaver.memory.runs.store import RunMemoryStore
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
from beaver.memory.skills.store import SkillLearningStore
from beaver.skills.drafts.service import DraftService
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
from beaver.skills.specs import SkillActivationReceipt
@dataclass(slots=True)
class RunReceiptContext:
run_record: RunRecord
effect_records: list[SkillEffectRecord] = field(default_factory=list)
class SkillLearningService:
def __init__(
self,
*,
run_store: RunMemoryStore,
learning_store: SkillLearningStore,
draft_service: DraftService,
evidence_selector: EvidenceSelector,
synthesizer: SkillDraftSynthesizer | None = None,
) -> None:
self.run_store = run_store
self.learning_store = learning_store
self.draft_service = draft_service
self.evidence_selector = evidence_selector
self.synthesizer = synthesizer or SkillDraftSynthesizer()
def collect_run_receipts(
self,
run_result_context: RunReceiptContext,
*,
generate_candidates: bool = True,
) -> list[SkillLearningCandidate]:
self.run_store.append_run_record(run_result_context.run_record)
for effect in run_result_context.effect_records:
self.run_store.append_skill_effect(effect)
self.rescore_skill_versions()
if not generate_candidates:
return []
return self.build_learning_candidates()
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
candidates.extend(self._build_revision_candidates())
candidates.extend(self._build_new_skill_candidates())
candidates.extend(self._build_merge_candidates())
candidates.extend(self._build_retire_candidates())
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
for candidate in candidates:
if candidate.candidate_id not in existing_ids:
self.learning_store.record_learning_candidate(candidate)
existing_ids.add(candidate.candidate_id)
return candidates
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
candidate = candidates.get(candidate_id)
if candidate is None:
raise ValueError(f"Unknown learning candidate: {candidate_id}")
if candidate.kind == "retire_skill":
target_skill = candidate.related_skill_names[0]
return self.draft_service.create_retire_proposal(
skill_name=target_skill,
base_version=candidate.evidence.get("skill_version"),
created_by="learning-loop",
reason=candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
model = (
provider_bundle.auxiliary_runtime.model
if provider_bundle.auxiliary_runtime is not None
else provider_bundle.main_runtime.model
)
if candidate.kind == "new_skill":
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
return self.draft_service.create_new_skill_draft(
skill_name=self._suggest_skill_name(candidate, packet),
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
if candidate.kind == "merge_skills":
target_name = self._suggest_skill_name(candidate, packet)
payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
return self.draft_service.create_merge_draft(
skill_name=target_name,
base_version=None,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
target_skill = candidate.related_skill_names[0]
base_version = candidate.evidence.get("skill_version")
payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
return self.draft_service.create_revision_draft(
skill_name=target_skill,
base_version=base_version,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
snapshots: list[SkillPerformanceSnapshot] = []
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
key = (receipt.skill_name, receipt.skill_version)
grouped.setdefault(key, [])
for effect in self._all_effects():
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
for (skill_name, skill_version), effects in grouped.items():
activation_count = len(effects)
success_count = sum(1 for item in effects if item.success)
failure_count = activation_count - success_count
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
latest_used = effects[-1].created_at if effects else ""
snapshot = SkillPerformanceSnapshot(
skill_name=skill_name,
skill_version=skill_version,
activation_count=activation_count,
success_count=success_count,
failure_count=failure_count,
latest_used_at=latest_used,
last_feedback_score=last_feedback,
)
self.learning_store.update_performance_snapshot(snapshot)
snapshots.append(snapshot)
return snapshots
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
for snapshot in self.learning_store.list_low_performing_versions():
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
kind="revise_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
evidence={"skill_version": snapshot.skill_version},
status="open",
)
candidates.append(candidate)
return candidates
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
groups: dict[str, list[RunRecord]] = {}
for record in self.run_store.list_runs():
key = self._task_theme(record.task_text)
if not key:
continue
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if record.success]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("new", theme, str(len(successful))),
kind="new_skill",
source_run_ids=[record.run_id for record in successful[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
related_skill_names=[],
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
evidence={"theme": theme},
status="open",
)
candidates.append(candidate)
return candidates
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
pair_counts.setdefault(pair, []).append(record)
candidates: list[SkillLearningCandidate] = []
for pair, runs in pair_counts.items():
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("merge", *pair),
kind="merge_skills",
source_run_ids=[record.run_id for record in runs[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
related_skill_names=list(pair),
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
evidence={"pair": list(pair)},
status="open",
)
candidates.append(candidate)
return candidates
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
for snapshot in self.learning_store.list_performance_snapshots():
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
continue
latest_used = self._parse_timestamp(snapshot.latest_used_at)
if latest_used is None or latest_used > cutoff:
continue
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
kind="retire_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=(
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
f"since {snapshot.latest_used_at} and may be ready for retirement."
),
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
status="open",
)
candidates.append(candidate)
return candidates
def _all_effects(self) -> list[SkillEffectRecord]:
effects: list[SkillEffectRecord] = []
for candidate in self.learning_store.list_performance_snapshots():
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
if effects:
return effects
# Bootstrap from runs when there are no prior snapshots.
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
return effects
@staticmethod
def _candidate_id(kind: str, *parts: str) -> str:
return f"{kind}:{'|'.join(parts)}"
@staticmethod
def _task_theme(task_text: str) -> str:
cleaned = re.sub(r"\s+", " ", task_text.strip().lower())
if not cleaned:
return ""
words = cleaned.split(" ")
return " ".join(words[:8]).strip()
@staticmethod
def _suggest_skill_name(candidate: SkillLearningCandidate, packet: EvidencePacket) -> str:
if candidate.related_skill_names:
return candidate.related_skill_names[0]
if packet.task_summaries:
seed = re.sub(r"[^a-z0-9]+", "-", packet.task_summaries[0].lower()).strip("-")
if seed:
return seed[:48]
return f"generated-skill-{uuid4().hex[:8]}"
@staticmethod
def _parse_timestamp(value: str) -> datetime | None:
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)

View File

@ -0,0 +1,118 @@
"""LLM-backed draft synthesis for skill learning."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers.base import LLMProvider
from beaver.skills.learning.evidence import EvidencePacket
from beaver.memory.skills.models import SkillLearningCandidate
class SkillDraftSynthesizer:
async def synthesize_revision(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
async def synthesize_new_skill(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "new")
async def synthesize_merge(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
) -> dict[str, Any]:
return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
async def _synthesize(
self,
candidate: SkillLearningCandidate,
evidence_packet: EvidencePacket,
provider: LLMProvider,
model: str,
action: str,
) -> dict[str, Any]:
prompt = self._build_prompt(candidate, evidence_packet, action)
response = await provider.chat(
messages=[
{
"role": "system",
"content": (
"You synthesize Beaver skill drafts from execution evidence. "
"Return only JSON with keys: frontmatter, content, change_reason."
),
},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=1500,
temperature=0,
)
payload = self._parse_payload(response.content or "")
if payload:
return payload
return self._fallback_payload(candidate, evidence_packet, action)
@staticmethod
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
return (
f"Action: {action}\n"
f"Candidate kind: {candidate.kind}\n"
f"Reason: {candidate.reason}\n"
f"Related skills: {candidate.related_skill_names}\n"
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
+ "\n\nReturn JSON only."
)
@staticmethod
def _parse_payload(content: str) -> dict[str, Any]:
cleaned = content.strip()
if cleaned.startswith("```"):
lines = cleaned.splitlines()
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
cleaned = "\n".join(lines[1:-1]).strip()
try:
payload = json.loads(cleaned)
except json.JSONDecodeError:
return {}
if not isinstance(payload, dict):
return {}
frontmatter = payload.get("frontmatter")
content_value = payload.get("content")
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
return {}
return {
"frontmatter": frontmatter,
"content": content_value.strip(),
"change_reason": str(payload.get("change_reason") or ""),
}
@staticmethod
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
title = related.replace("_", "-")
content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured."
return {
"frontmatter": {
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
"tools": [],
},
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
}

View File

@ -0,0 +1,175 @@
"""Background worker for assisted skill learning."""
from __future__ import annotations
import asyncio
import os
from dataclasses import dataclass, field
from typing import Callable
from beaver.engine.providers import ProviderBundle
from beaver.memory.skills import SkillLearningCandidate
from beaver.skills.learning.pipeline import SkillLearningPipelineService
@dataclass(slots=True)
class SkillLearningWorkerConfig:
enabled: bool = True
max_drafts_per_run: int = 5
max_retries: int = 3
interval_seconds: float = 300.0
@classmethod
def from_env(cls) -> "SkillLearningWorkerConfig":
return cls(
enabled=_env_bool("BEAVER_SKILL_LEARNING_WORKER_ENABLED", True),
max_drafts_per_run=_env_int("BEAVER_SKILL_LEARNING_MAX_DRAFTS_PER_RUN", 5),
max_retries=_env_int("BEAVER_SKILL_LEARNING_MAX_RETRIES", 3),
interval_seconds=float(os.getenv("BEAVER_SKILL_LEARNING_INTERVAL_SECONDS", "300") or "300"),
)
@dataclass(slots=True)
class SkillLearningWorkerResult:
processed: int = 0
succeeded: int = 0
failed: int = 0
skipped: int = 0
failures: list[dict[str, str]] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"processed": self.processed,
"succeeded": self.succeeded,
"failed": self.failed,
"skipped": self.skipped,
"failures": [dict(item) for item in self.failures],
}
class SkillLearningWorker:
"""Synthesizes drafts for open candidates; never approves or publishes."""
_ACTIVE_DRAFT_STATUSES = {"queued", "synthesizing", "draft_ready", "review_pending", "approved"}
def __init__(
self,
*,
pipeline: SkillLearningPipelineService,
provider_bundle_factory: Callable[[], ProviderBundle],
config: SkillLearningWorkerConfig | None = None,
) -> None:
self.pipeline = pipeline
self.provider_bundle_factory = provider_bundle_factory
self.config = config or SkillLearningWorkerConfig.from_env()
self._running = False
self._lock = asyncio.Lock()
async def run_forever(self) -> None:
if not self.config.enabled:
return
self._running = True
try:
while self._running:
await self.run_once()
await asyncio.sleep(self.config.interval_seconds)
finally:
self._running = False
def stop(self) -> None:
self._running = False
async def run_once(self) -> SkillLearningWorkerResult:
if not self.config.enabled:
return SkillLearningWorkerResult()
async with self._lock:
result = SkillLearningWorkerResult()
candidates = self._select_candidates()
for candidate in candidates[: self.config.max_drafts_per_run]:
result.processed += 1
try:
handled = await self._process_candidate(candidate)
if handled:
result.succeeded += 1
else:
result.skipped += 1
except Exception as exc:
result.failed += 1
result.failures.append({"candidate_id": candidate.candidate_id, "error": str(exc)})
self._mark_failure(candidate, str(exc))
return result
def _select_candidates(self) -> list[SkillLearningCandidate]:
candidates = [
item
for item in self.pipeline.list_candidates()
if item.status == "open" and item.retry_count < self.config.max_retries
]
return sorted(candidates, key=lambda item: (item.priority, item.confidence, item.created_at), reverse=True)
async def _process_candidate(self, candidate: SkillLearningCandidate) -> bool:
if self._has_active_draft(candidate):
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
return False
self.pipeline.mark_candidate_queued(candidate.candidate_id)
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
draft = await self.pipeline.synthesize_draft(
candidate.candidate_id,
provider_bundle=self.provider_bundle_factory(),
)
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
if not safety.passed or safety.risk_level == "critical":
return True
await self.pipeline.evaluate_draft(
candidate.candidate_id,
draft.skill_name,
draft.draft_id,
provider_bundle=self.provider_bundle_factory(),
)
return True
def _has_active_draft(self, candidate: SkillLearningCandidate) -> bool:
target_names = set(candidate.related_skill_names)
if candidate.draft_skill_name:
target_names.add(candidate.draft_skill_name)
if not target_names:
return False
for item in self.pipeline.list_candidates():
if item.candidate_id == candidate.candidate_id:
continue
if item.status not in self._ACTIVE_DRAFT_STATUSES:
continue
item_names = set(item.related_skill_names)
if item.draft_skill_name:
item_names.add(item.draft_skill_name)
if target_names.intersection(item_names):
return True
return False
def _mark_failure(self, candidate: SkillLearningCandidate, error: str) -> None:
retry_count = candidate.retry_count + 1
status = "failed" if retry_count >= self.config.max_retries else "open"
self.pipeline.mark_candidate_failed(
candidate.candidate_id,
error,
retry_count=retry_count,
terminal=(status == "failed"),
)
def _env_bool(name: str, default: bool) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() not in {"0", "false", "no", "off"}
def _env_int(name: str, default: int) -> int:
raw = os.getenv(name)
if raw in (None, ""):
return default
try:
return int(raw)
except ValueError:
return default