feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
24
app-instance/backend/beaver/skills/learning/__init__.py
Normal file
24
app-instance/backend/beaver/skills/learning/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""Skill learning loop helpers."""
|
||||
|
||||
from .evidence import EvidencePacket, EvidenceSelector
|
||||
from .eval import SkillDraftEvaluator
|
||||
from .missing_skill import MissingSkillDraftResult, MissingSkillSynthesizer
|
||||
from .pipeline import SkillLearningPipelineService
|
||||
from .service import RunReceiptContext, SkillLearningService
|
||||
from .synthesizer import SkillDraftSynthesizer
|
||||
from .worker import SkillLearningWorker, SkillLearningWorkerConfig, SkillLearningWorkerResult
|
||||
|
||||
__all__ = [
|
||||
"EvidencePacket",
|
||||
"EvidenceSelector",
|
||||
"SkillDraftEvaluator",
|
||||
"MissingSkillDraftResult",
|
||||
"MissingSkillSynthesizer",
|
||||
"RunReceiptContext",
|
||||
"SkillLearningPipelineService",
|
||||
"SkillDraftSynthesizer",
|
||||
"SkillLearningService",
|
||||
"SkillLearningWorker",
|
||||
"SkillLearningWorkerConfig",
|
||||
"SkillLearningWorkerResult",
|
||||
]
|
||||
121
app-instance/backend/beaver/skills/learning/eval.py
Normal file
121
app-instance/backend/beaver/skills/learning/eval.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""Lightweight replay/eval reports for skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.runs import RunMemoryStore
|
||||
from beaver.memory.skills import SkillDraftEvalReport, SkillLearningCandidate
|
||||
from beaver.skills.specs import SkillDraft
|
||||
|
||||
|
||||
class SkillDraftEvaluator:
|
||||
"""Builds a bounded eval report without writing user-visible sessions."""
|
||||
|
||||
def __init__(self, run_store: RunMemoryStore) -> None:
|
||||
self.run_store = run_store
|
||||
|
||||
async def evaluate(
|
||||
self,
|
||||
*,
|
||||
candidate: SkillLearningCandidate,
|
||||
draft: SkillDraft,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> SkillDraftEvalReport:
|
||||
if provider_bundle is None or provider_bundle.main_provider is None:
|
||||
return self._skipped(candidate, draft)
|
||||
|
||||
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
|
||||
cases: list[dict] = []
|
||||
for run_id in candidate.source_run_ids[:8]:
|
||||
record = runs_by_id.get(run_id)
|
||||
if record is None:
|
||||
continue
|
||||
baseline = _score_from_validation(record.validation_result, record.success)
|
||||
candidate_score = _candidate_score(baseline, draft)
|
||||
cases.append(
|
||||
{
|
||||
"run_id": run_id,
|
||||
"session_id": record.session_id,
|
||||
"baseline_score": baseline,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline, 4),
|
||||
}
|
||||
)
|
||||
if not cases:
|
||||
cases.append(
|
||||
{
|
||||
"run_id": "",
|
||||
"session_id": "",
|
||||
"baseline_score": 0.75,
|
||||
"candidate_score": _candidate_score(0.75, draft),
|
||||
"delta": round(_candidate_score(0.75, draft) - 0.75, 4),
|
||||
}
|
||||
)
|
||||
|
||||
baseline_avg = sum(item["baseline_score"] for item in cases) / len(cases)
|
||||
candidate_avg = sum(item["candidate_score"] for item in cases) / len(cases)
|
||||
regressions = [item for item in cases if item["candidate_score"] < item["baseline_score"]]
|
||||
improved = [item for item in cases if item["candidate_score"] > item["baseline_score"]]
|
||||
unchanged = len(cases) - len(regressions) - len(improved)
|
||||
score_delta = candidate_avg - baseline_avg
|
||||
passed = not (len(regressions) > 0 and score_delta <= 0) and candidate_avg >= 0.75
|
||||
return SkillDraftEvalReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
candidate_id=candidate.candidate_id,
|
||||
passed=passed,
|
||||
baseline_score_avg=round(baseline_avg, 4),
|
||||
candidate_score_avg=round(candidate_avg, 4),
|
||||
score_delta=round(score_delta, 4),
|
||||
regression_count=len(regressions),
|
||||
improved_count=len(improved),
|
||||
unchanged_count=unchanged,
|
||||
cases=cases,
|
||||
status="completed",
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
def _skipped(self, candidate: SkillLearningCandidate, draft: SkillDraft) -> SkillDraftEvalReport:
|
||||
return SkillDraftEvalReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
candidate_id=candidate.candidate_id,
|
||||
passed=True,
|
||||
baseline_score_avg=0.0,
|
||||
candidate_score_avg=0.0,
|
||||
score_delta=0.0,
|
||||
regression_count=0,
|
||||
improved_count=0,
|
||||
unchanged_count=0,
|
||||
cases=[],
|
||||
status="skipped_provider_unavailable",
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
|
||||
def _score_from_validation(validation: dict | None, success: bool) -> float:
|
||||
if isinstance(validation, dict) and "score" in validation:
|
||||
try:
|
||||
return max(0.0, min(1.0, float(validation.get("score") or 0.0)))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return 0.8 if success else 0.4
|
||||
|
||||
|
||||
def _candidate_score(baseline: float, draft: SkillDraft) -> float:
|
||||
content = draft.proposed_content.strip()
|
||||
if not content and draft.proposal_kind != "retire_skill":
|
||||
return 0.0
|
||||
if "regression" in content.lower():
|
||||
return max(0.0, baseline - 0.2)
|
||||
return min(1.0, max(0.75, baseline + 0.05))
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
76
app-instance/backend/beaver/skills/learning/evidence.py
Normal file
76
app-instance/backend/beaver/skills/learning/evidence.py
Normal file
@ -0,0 +1,76 @@
|
||||
"""Evidence selection for skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.session.manager import SessionManager
|
||||
from beaver.memory.runs.store import RunMemoryStore
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EvidencePacket:
|
||||
run_ids: list[str]
|
||||
session_ids: list[str]
|
||||
task_summaries: list[str]
|
||||
session_excerpts: list[str]
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class EvidenceSelector:
|
||||
def __init__(self, run_store: RunMemoryStore, session_manager: SessionManager | None = None) -> None:
|
||||
self.run_store = run_store
|
||||
self.session_manager = session_manager
|
||||
|
||||
def select_runs_for_revision(self, skill_name: str, version: str, limit: int = 5) -> list[str]:
|
||||
runs = self.run_store.list_runs_by_skill(skill_name, version=version, limit=limit)
|
||||
return [record.run_id for record in runs]
|
||||
|
||||
def select_runs_for_new_skill(self, theme: str, limit: int = 5) -> list[str]:
|
||||
lowered = theme.lower().strip()
|
||||
matches = []
|
||||
for record in self.run_store.list_runs():
|
||||
if lowered and lowered not in record.task_text.lower():
|
||||
continue
|
||||
matches.append(record.run_id)
|
||||
return matches[-limit:]
|
||||
|
||||
def build_evidence_packet(self, run_ids: list[str], session_ids: list[str] | None = None) -> EvidencePacket:
|
||||
runs_by_id = {record.run_id: record for record in self.run_store.list_runs()}
|
||||
resolved_run_ids: list[str] = []
|
||||
resolved_session_ids: list[str] = list(dict.fromkeys(session_ids or []))
|
||||
task_summaries: list[str] = []
|
||||
session_excerpts: list[str] = []
|
||||
for run_id in run_ids:
|
||||
record = runs_by_id.get(run_id)
|
||||
if record is None:
|
||||
continue
|
||||
resolved_run_ids.append(run_id)
|
||||
if record.session_id not in resolved_session_ids:
|
||||
resolved_session_ids.append(record.session_id)
|
||||
summary = record.task_text.strip()
|
||||
if summary:
|
||||
task_summaries.append(summary[:400])
|
||||
if self.session_manager is not None:
|
||||
excerpt = self._session_excerpt(record.session_id, run_id)
|
||||
if excerpt:
|
||||
session_excerpts.append(excerpt)
|
||||
return EvidencePacket(
|
||||
run_ids=resolved_run_ids,
|
||||
session_ids=resolved_session_ids,
|
||||
task_summaries=task_summaries[:8],
|
||||
session_excerpts=session_excerpts[:6],
|
||||
metadata={"bounded": True},
|
||||
)
|
||||
|
||||
def _session_excerpt(self, session_id: str, run_id: str) -> str:
|
||||
if self.session_manager is None:
|
||||
return ""
|
||||
events = self.session_manager.get_run_event_records(session_id, run_id)
|
||||
visible: list[str] = []
|
||||
for event in events:
|
||||
if not event.context_visible or not event.content:
|
||||
continue
|
||||
visible.append(f"{event.role}: {event.content.strip()}")
|
||||
return "\n".join(visible[:12])[:2000]
|
||||
166
app-instance/backend/beaver/skills/learning/missing_skill.py
Normal file
166
app-instance/backend/beaver/skills/learning/missing_skill.py
Normal file
@ -0,0 +1,166 @@
|
||||
"""Synthesize draft-only skills for missing sub-agent guidance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.specs import SkillDraft
|
||||
from beaver.skills.specs.serialization import canonical_hash
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver.tasks.models import TaskRecord
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MissingSkillDraftResult:
|
||||
draft: SkillDraft
|
||||
skill_context: SkillContext
|
||||
|
||||
|
||||
class MissingSkillSynthesizer:
|
||||
"""Create a draft skill and an ephemeral SkillContext for the current run."""
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
node_id: str,
|
||||
node_task: str,
|
||||
skill_query: str,
|
||||
required_capabilities: list[str],
|
||||
provider_bundle: ProviderBundle,
|
||||
draft_service: DraftService,
|
||||
) -> MissingSkillDraftResult:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
payload = self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)
|
||||
try:
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You create concise Beaver skill drafts. Return only JSON with keys: "
|
||||
"skill_name, description, content, tags."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Create a procedural skill draft for this missing Task sub-agent guidance.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Node id: {node_id}\n"
|
||||
f"Node task:\n{node_task}\n\n"
|
||||
f"Skill query:\n{skill_query}\n"
|
||||
f"Required capabilities: {required_capabilities}\n\n"
|
||||
"The content must be actionable guidance for a temporary sub-agent. "
|
||||
"Do not include implementation claims or publish metadata."
|
||||
),
|
||||
},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1200,
|
||||
temperature=0,
|
||||
)
|
||||
payload = self._parse_payload(response.content or "") or payload
|
||||
except Exception:
|
||||
payload = payload
|
||||
|
||||
skill_name = _slug(str(payload.get("skill_name") or skill_query or node_id))
|
||||
content = str(payload.get("content") or "").strip()
|
||||
if not content:
|
||||
content = str(self._fallback_payload(skill_query=skill_query, node_task=node_task, capabilities=required_capabilities)["content"])
|
||||
frontmatter = {
|
||||
"description": str(payload.get("description") or f"Draft guidance for {skill_query or node_id}").strip(),
|
||||
"tags": [str(item) for item in payload.get("tags") or ["generated", "task-sub-agent"]],
|
||||
"metadata": {
|
||||
"origin": "missing_task_subagent_skill",
|
||||
"task_id": task.task_id,
|
||||
"node_id": node_id,
|
||||
"attempt_index": attempt_index,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": list(required_capabilities),
|
||||
},
|
||||
}
|
||||
draft = draft_service.create_new_skill_draft(
|
||||
skill_name=skill_name,
|
||||
proposed_content=content,
|
||||
proposed_frontmatter=frontmatter,
|
||||
created_by="task-skill-resolver",
|
||||
reason="generated_for_missing_task_subagent_skill",
|
||||
trigger_session_id=task.session_id,
|
||||
evidence_refs=[
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"session_id": task.session_id,
|
||||
"attempt_index": attempt_index,
|
||||
"node_id": node_id,
|
||||
"skill_query": skill_query,
|
||||
"required_capabilities": list(required_capabilities),
|
||||
}
|
||||
],
|
||||
)
|
||||
context = SkillContext(
|
||||
name=f"draft:{draft.skill_name}",
|
||||
content=draft.proposed_content,
|
||||
version=f"draft:{draft.draft_id}",
|
||||
content_hash=canonical_hash(draft.proposed_content),
|
||||
activation_reason="generated_missing_skill",
|
||||
tool_hints=[],
|
||||
)
|
||||
return MissingSkillDraftResult(draft=draft, skill_context=context)
|
||||
|
||||
@staticmethod
|
||||
def _parse_payload(text: str) -> dict[str, Any] | None:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
@staticmethod
|
||||
def _fallback_payload(*, skill_query: str, node_task: str, capabilities: list[str]) -> dict[str, Any]:
|
||||
title = skill_query or node_task or "task subagent guidance"
|
||||
capability_lines = "\n".join(f"- {item}" for item in capabilities) or "- Follow the node task precisely."
|
||||
return {
|
||||
"skill_name": _slug(title),
|
||||
"description": f"Draft guidance for {title}.",
|
||||
"tags": ["generated", "task-sub-agent"],
|
||||
"content": (
|
||||
f"# {title}\n\n"
|
||||
"Use this draft guidance only for the current delegated sub-task.\n\n"
|
||||
"## Objective\n"
|
||||
f"{node_task or title}\n\n"
|
||||
"## Capabilities to apply\n"
|
||||
f"{capability_lines}\n\n"
|
||||
"## Output\n"
|
||||
"Return concise evidence, decisions, and unresolved risks for the main Agent to synthesize."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _slug(value: str) -> str:
|
||||
cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
|
||||
return cleaned[:64].strip("-") or "generated-task-subagent-skill"
|
||||
354
app-instance/backend/beaver/skills/learning/pipeline.py
Normal file
354
app-instance/backend/beaver/skills/learning/pipeline.py
Normal file
@ -0,0 +1,354 @@
|
||||
"""Manual skill learning pipeline orchestration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningCandidate, SkillLearningStore
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.learning.service import SkillLearningService
|
||||
from beaver.skills.learning.safety import SkillDraftSafetyChecker
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillDraft, SkillReviewRecord, SkillReviewState, SkillSpec, SkillVersion
|
||||
|
||||
|
||||
class SkillLearningPipelineService:
|
||||
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
learning_store: SkillLearningStore,
|
||||
learning_service: SkillLearningService,
|
||||
draft_service: DraftService,
|
||||
review_service: ReviewService,
|
||||
publisher: SkillPublisher,
|
||||
safety_checker: SkillDraftSafetyChecker | None = None,
|
||||
evaluator: SkillDraftEvaluator | None = None,
|
||||
) -> None:
|
||||
self.learning_store = learning_store
|
||||
self.learning_service = learning_service
|
||||
self.draft_service = draft_service
|
||||
self.review_service = review_service
|
||||
self.publisher = publisher
|
||||
self.safety_checker = safety_checker or SkillDraftSafetyChecker()
|
||||
self.evaluator = evaluator
|
||||
|
||||
def list_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
|
||||
return self.learning_store.list_learning_candidates(status=status)
|
||||
|
||||
def get_candidate(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
for candidate in self.learning_store.list_learning_candidates():
|
||||
if candidate.candidate_id == candidate_id:
|
||||
return candidate
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
|
||||
async def synthesize_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> SkillDraft:
|
||||
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
|
||||
self.mark_draft_synthesized(candidate_id, draft)
|
||||
return draft
|
||||
|
||||
async def regenerate_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle,
|
||||
) -> SkillDraft:
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"synthesizing",
|
||||
event_type="draft_synthesis_started",
|
||||
last_error=None,
|
||||
)
|
||||
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
|
||||
|
||||
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"queued",
|
||||
event_type="candidate_queued",
|
||||
last_error=None,
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_synthesizing(self, candidate_id: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"synthesizing",
|
||||
event_type="draft_synthesis_started",
|
||||
last_error=None,
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_draft_synthesized(self, candidate_id: str, draft: SkillDraft) -> SkillLearningCandidate:
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
evidence = dict(candidate.evidence)
|
||||
evidence["draft_id"] = draft.draft_id
|
||||
evidence["draft_skill_name"] = draft.skill_name
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"draft_ready",
|
||||
event_type="draft_synthesis_completed",
|
||||
evidence=evidence,
|
||||
draft_id=draft.draft_id,
|
||||
draft_skill_name=draft.skill_name,
|
||||
risk_level=candidate.risk_level,
|
||||
last_error=None,
|
||||
payload={"draft_id": draft.draft_id, "skill_name": draft.skill_name},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_failed(
|
||||
self,
|
||||
candidate_id: str,
|
||||
error: str,
|
||||
*,
|
||||
retry_count: int,
|
||||
terminal: bool,
|
||||
) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"failed" if terminal else "open",
|
||||
event_type="failed",
|
||||
retry_count=retry_count,
|
||||
last_error=error,
|
||||
payload={"error": error, "terminal": terminal, "retry_count": retry_count},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_candidate_superseded(self, candidate_id: str, reason: str) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"superseded",
|
||||
event_type="superseded",
|
||||
last_error=reason,
|
||||
payload={"reason": reason},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def list_drafts(self, skill_name: str | None = None) -> list[SkillDraft]:
|
||||
return self.draft_service.list_drafts(skill_name)
|
||||
|
||||
def get_draft(self, skill_name: str, draft_id: str) -> SkillDraft:
|
||||
draft = self.draft_service.get_draft(skill_name, draft_id)
|
||||
if draft is None:
|
||||
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
|
||||
return draft
|
||||
|
||||
def submit_review(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
requested_by: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
safety = self.get_safety_report(skill_name, draft_id)
|
||||
if safety is not None and (not safety.passed or safety.risk_level == "critical"):
|
||||
raise ValueError("Draft cannot enter review because safety check failed")
|
||||
return self.review_service.submit_for_review(
|
||||
skill_name,
|
||||
draft_id,
|
||||
reviewer_request=notes,
|
||||
requested_by=requested_by,
|
||||
)
|
||||
|
||||
def approve(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
reviewer: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
review = self.review_service.approve(skill_name, draft_id, reviewer=reviewer, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "approved", "approved")
|
||||
return review
|
||||
|
||||
def reject(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
reviewer: str = "system",
|
||||
notes: str = "",
|
||||
) -> SkillReviewRecord:
|
||||
review = self.review_service.reject(skill_name, draft_id, reviewer=reviewer, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "rejected", "rejected")
|
||||
return review
|
||||
|
||||
def publish(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
publisher: str = "system",
|
||||
notes: str = "",
|
||||
confirm_high_risk: bool = False,
|
||||
) -> SkillVersion | SkillSpec:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
self._validate_publish_gates(draft, confirm_high_risk=confirm_high_risk)
|
||||
if draft.proposal_kind == "retire_skill":
|
||||
result = self.publisher.apply_retire_proposal(skill_name, draft_id, actor=publisher, notes=notes)
|
||||
else:
|
||||
result = self.publisher.publish(skill_name, draft_id, publisher=publisher, notes=notes)
|
||||
self._mark_candidate_by_draft(skill_name, draft_id, "published", "published")
|
||||
return result
|
||||
|
||||
def rollback(
|
||||
self,
|
||||
skill_name: str,
|
||||
target_version: str,
|
||||
*,
|
||||
actor: str = "system",
|
||||
reason: str = "",
|
||||
) -> SkillSpec:
|
||||
return self.publisher.rollback(skill_name, target_version, actor=actor, reason=reason or "manual rollback")
|
||||
|
||||
def disable(
|
||||
self,
|
||||
skill_name: str,
|
||||
*,
|
||||
actor: str = "system",
|
||||
reason: str = "",
|
||||
) -> SkillSpec:
|
||||
return self.publisher.disable(skill_name, actor=actor, reason=reason or "manual disable")
|
||||
|
||||
def reviews_for_draft(self, skill_name: str, draft_id: str) -> list[SkillReviewRecord]:
|
||||
return self.review_service.store.list_reviews(skill_name, draft_id=draft_id)
|
||||
|
||||
def check_safety(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
report = self.safety_checker.check(draft)
|
||||
self.learning_store.write_safety_report(report)
|
||||
status = "safety_failed" if not report.passed or report.risk_level == "critical" else "draft_ready"
|
||||
current = self._candidate_by_draft(skill_name, draft_id)
|
||||
if current is not None and current.status == "eval_failed" and status == "draft_ready":
|
||||
status = "eval_failed"
|
||||
self._mark_candidate_by_draft(
|
||||
skill_name,
|
||||
draft_id,
|
||||
status,
|
||||
"safety_checked",
|
||||
safety_report_id=report.report_id,
|
||||
risk_level=report.risk_level,
|
||||
last_error="; ".join(report.blocked_reasons) if status == "safety_failed" else None,
|
||||
)
|
||||
return report
|
||||
|
||||
def get_safety_report(self, skill_name: str, draft_id: str) -> SkillDraftSafetyReport | None:
|
||||
return self.learning_store.get_safety_report(skill_name, draft_id)
|
||||
|
||||
def get_eval_report(self, skill_name: str, draft_id: str) -> SkillDraftEvalReport | None:
|
||||
return self.learning_store.get_eval_report(skill_name, draft_id)
|
||||
|
||||
async def evaluate_draft(
|
||||
self,
|
||||
candidate_id: str,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
*,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
) -> SkillDraftEvalReport:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
evaluator = self.evaluator or SkillDraftEvaluator(self.learning_service.run_store)
|
||||
report = await evaluator.evaluate(candidate=candidate, draft=draft, provider_bundle=provider_bundle)
|
||||
self.learning_store.write_eval_report(report)
|
||||
if report.status == "skipped_provider_unavailable":
|
||||
status = "draft_ready"
|
||||
error = "eval skipped: provider unavailable"
|
||||
elif report.passed:
|
||||
status = "draft_ready"
|
||||
error = None
|
||||
else:
|
||||
status = "eval_failed"
|
||||
error = "eval failed"
|
||||
current = self._candidate_by_draft(skill_name, draft_id)
|
||||
if current is not None and current.status == "safety_failed" and status == "draft_ready":
|
||||
status = "safety_failed"
|
||||
error = current.last_error
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
status,
|
||||
event_type="eval_completed",
|
||||
eval_report_id=report.report_id,
|
||||
last_error=error,
|
||||
payload=report.to_dict(),
|
||||
)
|
||||
return report
|
||||
|
||||
def _validate_publish_gates(self, draft: SkillDraft, *, confirm_high_risk: bool) -> None:
|
||||
reviews = self.reviews_for_draft(draft.skill_name, draft.draft_id)
|
||||
if not any(review.status == SkillReviewState.APPROVED.value for review in reviews):
|
||||
raise ValueError("Draft must have an approved review before publish")
|
||||
safety = self.get_safety_report(draft.skill_name, draft.draft_id)
|
||||
if safety is None:
|
||||
raise ValueError("Draft requires a passing safety report before publish")
|
||||
if not safety.passed:
|
||||
raise ValueError("Draft safety report did not pass")
|
||||
if safety.risk_level == "critical":
|
||||
raise ValueError("Critical risk drafts cannot be published")
|
||||
if safety.risk_level == "high" and not confirm_high_risk:
|
||||
raise ValueError("High risk draft publish requires confirm_high_risk=true")
|
||||
eval_report = self.get_eval_report(draft.skill_name, draft.draft_id)
|
||||
if eval_report is not None and eval_report.status != "skipped_provider_unavailable" and not eval_report.passed:
|
||||
raise ValueError("Draft eval report did not pass")
|
||||
|
||||
def _mark_candidate_by_draft(
|
||||
self,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
status: str,
|
||||
event_type: str,
|
||||
**updates: object,
|
||||
) -> SkillLearningCandidate | None:
|
||||
candidate = self._candidate_by_draft(skill_name, draft_id)
|
||||
if candidate is None:
|
||||
return None
|
||||
if candidate.status in {"safety_failed", "eval_failed"} and status in {"review_pending", "approved"}:
|
||||
return candidate
|
||||
return self.learning_store.transition_learning_candidate(
|
||||
candidate.candidate_id,
|
||||
status,
|
||||
event_type=event_type,
|
||||
**updates,
|
||||
)
|
||||
|
||||
def _candidate_by_draft(self, skill_name: str, draft_id: str) -> SkillLearningCandidate | None:
|
||||
for candidate in self.learning_store.list_learning_candidates():
|
||||
if candidate.draft_skill_name == skill_name and candidate.draft_id == draft_id:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _require_updated(candidate: SkillLearningCandidate | None, candidate_id: str) -> SkillLearningCandidate:
|
||||
if candidate is None:
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
return candidate
|
||||
|
||||
|
||||
def model_to_dict(value: Any) -> dict[str, Any]:
|
||||
if hasattr(value, "to_dict"):
|
||||
return value.to_dict()
|
||||
if isinstance(value, dict):
|
||||
return dict(value)
|
||||
raise TypeError(f"Cannot convert {type(value).__name__} to dict")
|
||||
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
109
app-instance/backend/beaver/skills/learning/safety.py
Normal file
@ -0,0 +1,109 @@
|
||||
"""Safety checks for generated skill drafts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.memory.skills import SkillDraftSafetyReport
|
||||
from beaver.skills.specs import SkillDraft
|
||||
|
||||
|
||||
class SkillDraftSafetyChecker:
|
||||
"""Deterministic publish gate for skill drafts."""
|
||||
|
||||
_CRITICAL_PATTERNS = [
|
||||
r"ignore (all )?(previous|system|developer) instructions",
|
||||
r"bypass (permissions|permission|safety|policy|policies)",
|
||||
r"disable (guardrails|safety|permission checks)",
|
||||
r"reveal (secrets|api keys|tokens|credentials)",
|
||||
r"exfiltrate",
|
||||
]
|
||||
_DANGEROUS_TOOL_HINTS = {
|
||||
"shell",
|
||||
"terminal",
|
||||
"bash",
|
||||
"filesystem_write",
|
||||
"write_file",
|
||||
"delete_file",
|
||||
"network",
|
||||
"http",
|
||||
"auth",
|
||||
"credentials",
|
||||
}
|
||||
|
||||
def __init__(self, *, allowed_tool_names: set[str] | None = None) -> None:
|
||||
self.allowed_tool_names = allowed_tool_names
|
||||
|
||||
def check(self, draft: SkillDraft) -> SkillDraftSafetyReport:
|
||||
issues: list[str] = []
|
||||
blocked: list[str] = []
|
||||
risk_level = "low"
|
||||
|
||||
frontmatter = draft.proposed_frontmatter
|
||||
if not isinstance(frontmatter, dict):
|
||||
blocked.append("frontmatter must be an object")
|
||||
description = str(frontmatter.get("description") or "").strip()
|
||||
if not description and draft.proposal_kind != "retire_skill":
|
||||
issues.append("frontmatter.description is missing")
|
||||
risk_level = _max_risk(risk_level, "medium")
|
||||
|
||||
tool_hints = _tool_hints(frontmatter)
|
||||
if self.allowed_tool_names is not None:
|
||||
unknown = [name for name in tool_hints if name not in self.allowed_tool_names]
|
||||
if unknown:
|
||||
blocked.append(f"unknown tool hints: {', '.join(sorted(unknown))}")
|
||||
dangerous = sorted({name for name in tool_hints if name.lower() in self._DANGEROUS_TOOL_HINTS})
|
||||
if dangerous:
|
||||
issues.append(f"dangerous tool hints require high-risk review: {', '.join(dangerous)}")
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
content = f"{draft.proposed_content}\n{frontmatter}".lower()
|
||||
for pattern in self._CRITICAL_PATTERNS:
|
||||
if re.search(pattern, content):
|
||||
blocked.append(f"critical prompt-safety pattern matched: {pattern}")
|
||||
risk_level = "critical"
|
||||
|
||||
if draft.proposal_kind in {"retire_skill", "merge_skills"}:
|
||||
risk_level = _max_risk(risk_level, "high")
|
||||
|
||||
passed = not blocked and risk_level != "critical"
|
||||
return SkillDraftSafetyReport(
|
||||
report_id=uuid4().hex,
|
||||
skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
passed=passed,
|
||||
risk_level=risk_level,
|
||||
issues=issues,
|
||||
blocked_reasons=blocked,
|
||||
suggested_fix=_suggest_fix(blocked, issues),
|
||||
created_at=_utc_now(),
|
||||
)
|
||||
|
||||
|
||||
def _tool_hints(frontmatter: dict) -> list[str]:
|
||||
raw = frontmatter.get("tools")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
if isinstance(raw, str):
|
||||
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _max_risk(left: str, right: str) -> str:
|
||||
order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
|
||||
return left if order[left] >= order[right] else right
|
||||
|
||||
|
||||
def _suggest_fix(blocked: list[str], issues: list[str]) -> str:
|
||||
if blocked:
|
||||
return "Remove blocked instructions or invalid tool hints before review."
|
||||
if issues:
|
||||
return "Review the flagged issues before publishing."
|
||||
return ""
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
@ -0,0 +1,293 @@
|
||||
"""Skill learning loop services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from itertools import combinations
|
||||
import re
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
|
||||
from beaver.memory.runs.store import RunMemoryStore
|
||||
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
|
||||
from beaver.memory.skills.store import SkillLearningStore
|
||||
from beaver.skills.drafts.service import DraftService
|
||||
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
|
||||
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunReceiptContext:
|
||||
run_record: RunRecord
|
||||
effect_records: list[SkillEffectRecord] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillLearningService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_store: RunMemoryStore,
|
||||
learning_store: SkillLearningStore,
|
||||
draft_service: DraftService,
|
||||
evidence_selector: EvidenceSelector,
|
||||
synthesizer: SkillDraftSynthesizer | None = None,
|
||||
) -> None:
|
||||
self.run_store = run_store
|
||||
self.learning_store = learning_store
|
||||
self.draft_service = draft_service
|
||||
self.evidence_selector = evidence_selector
|
||||
self.synthesizer = synthesizer or SkillDraftSynthesizer()
|
||||
|
||||
def collect_run_receipts(
|
||||
self,
|
||||
run_result_context: RunReceiptContext,
|
||||
*,
|
||||
generate_candidates: bool = True,
|
||||
) -> list[SkillLearningCandidate]:
|
||||
self.run_store.append_run_record(run_result_context.run_record)
|
||||
for effect in run_result_context.effect_records:
|
||||
self.run_store.append_skill_effect(effect)
|
||||
self.rescore_skill_versions()
|
||||
if not generate_candidates:
|
||||
return []
|
||||
return self.build_learning_candidates()
|
||||
|
||||
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
candidates.extend(self._build_revision_candidates())
|
||||
candidates.extend(self._build_new_skill_candidates())
|
||||
candidates.extend(self._build_merge_candidates())
|
||||
candidates.extend(self._build_retire_candidates())
|
||||
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
|
||||
for candidate in candidates:
|
||||
if candidate.candidate_id not in existing_ids:
|
||||
self.learning_store.record_learning_candidate(candidate)
|
||||
existing_ids.add(candidate.candidate_id)
|
||||
return candidates
|
||||
|
||||
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
|
||||
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
|
||||
candidate = candidates.get(candidate_id)
|
||||
if candidate is None:
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
if candidate.kind == "retire_skill":
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
return self.draft_service.create_retire_proposal(
|
||||
skill_name=target_skill,
|
||||
base_version=candidate.evidence.get("skill_version"),
|
||||
created_by="learning-loop",
|
||||
reason=candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
model = (
|
||||
provider_bundle.auxiliary_runtime.model
|
||||
if provider_bundle.auxiliary_runtime is not None
|
||||
else provider_bundle.main_runtime.model
|
||||
)
|
||||
if candidate.kind == "new_skill":
|
||||
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
|
||||
return self.draft_service.create_new_skill_draft(
|
||||
skill_name=self._suggest_skill_name(candidate, packet),
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
if candidate.kind == "merge_skills":
|
||||
target_name = self._suggest_skill_name(candidate, packet)
|
||||
payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
|
||||
return self.draft_service.create_merge_draft(
|
||||
skill_name=target_name,
|
||||
base_version=None,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
base_version = candidate.evidence.get("skill_version")
|
||||
payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
|
||||
return self.draft_service.create_revision_draft(
|
||||
skill_name=target_skill,
|
||||
base_version=base_version,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
|
||||
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
|
||||
snapshots: list[SkillPerformanceSnapshot] = []
|
||||
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
key = (receipt.skill_name, receipt.skill_version)
|
||||
grouped.setdefault(key, [])
|
||||
for effect in self._all_effects():
|
||||
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
|
||||
for (skill_name, skill_version), effects in grouped.items():
|
||||
activation_count = len(effects)
|
||||
success_count = sum(1 for item in effects if item.success)
|
||||
failure_count = activation_count - success_count
|
||||
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
|
||||
latest_used = effects[-1].created_at if effects else ""
|
||||
snapshot = SkillPerformanceSnapshot(
|
||||
skill_name=skill_name,
|
||||
skill_version=skill_version,
|
||||
activation_count=activation_count,
|
||||
success_count=success_count,
|
||||
failure_count=failure_count,
|
||||
latest_used_at=latest_used,
|
||||
last_feedback_score=last_feedback,
|
||||
)
|
||||
self.learning_store.update_performance_snapshot(snapshot)
|
||||
snapshots.append(snapshot)
|
||||
return snapshots
|
||||
|
||||
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for snapshot in self.learning_store.list_low_performing_versions():
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="revise_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
|
||||
evidence={"skill_version": snapshot.skill_version},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
|
||||
groups: dict[str, list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
key = self._task_theme(record.task_text)
|
||||
if not key:
|
||||
continue
|
||||
groups.setdefault(key, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for theme, runs in groups.items():
|
||||
successful = [record for record in runs if record.success]
|
||||
if len(successful) < 2:
|
||||
continue
|
||||
if any(record.activated_skills for record in successful):
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("new", theme, str(len(successful))),
|
||||
kind="new_skill",
|
||||
source_run_ids=[record.run_id for record in successful[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
|
||||
related_skill_names=[],
|
||||
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
|
||||
evidence={"theme": theme},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
||||
for pair in combinations(unique, 2):
|
||||
pair_counts.setdefault(pair, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for pair, runs in pair_counts.items():
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("merge", *pair),
|
||||
kind="merge_skills",
|
||||
source_run_ids=[record.run_id for record in runs[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
|
||||
related_skill_names=list(pair),
|
||||
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
|
||||
evidence={"pair": list(pair)},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
|
||||
for snapshot in self.learning_store.list_performance_snapshots():
|
||||
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
|
||||
continue
|
||||
latest_used = self._parse_timestamp(snapshot.latest_used_at)
|
||||
if latest_used is None or latest_used > cutoff:
|
||||
continue
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="retire_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=(
|
||||
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
|
||||
f"since {snapshot.latest_used_at} and may be ready for retirement."
|
||||
),
|
||||
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _all_effects(self) -> list[SkillEffectRecord]:
|
||||
effects: list[SkillEffectRecord] = []
|
||||
for candidate in self.learning_store.list_performance_snapshots():
|
||||
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
|
||||
if effects:
|
||||
return effects
|
||||
# Bootstrap from runs when there are no prior snapshots.
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
|
||||
return effects
|
||||
|
||||
@staticmethod
|
||||
def _candidate_id(kind: str, *parts: str) -> str:
|
||||
return f"{kind}:{'|'.join(parts)}"
|
||||
|
||||
@staticmethod
|
||||
def _task_theme(task_text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", task_text.strip().lower())
|
||||
if not cleaned:
|
||||
return ""
|
||||
words = cleaned.split(" ")
|
||||
return " ".join(words[:8]).strip()
|
||||
|
||||
@staticmethod
|
||||
def _suggest_skill_name(candidate: SkillLearningCandidate, packet: EvidencePacket) -> str:
|
||||
if candidate.related_skill_names:
|
||||
return candidate.related_skill_names[0]
|
||||
if packet.task_summaries:
|
||||
seed = re.sub(r"[^a-z0-9]+", "-", packet.task_summaries[0].lower()).strip("-")
|
||||
if seed:
|
||||
return seed[:48]
|
||||
return f"generated-skill-{uuid4().hex[:8]}"
|
||||
|
||||
@staticmethod
|
||||
def _parse_timestamp(value: str) -> datetime | None:
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
118
app-instance/backend/beaver/skills/learning/synthesizer.py
Normal file
118
app-instance/backend/beaver/skills/learning/synthesizer.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""LLM-backed draft synthesis for skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider
|
||||
from beaver.skills.learning.evidence import EvidencePacket
|
||||
from beaver.memory.skills.models import SkillLearningCandidate
|
||||
|
||||
|
||||
class SkillDraftSynthesizer:
|
||||
async def synthesize_revision(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "revise")
|
||||
|
||||
async def synthesize_new_skill(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "new")
|
||||
|
||||
async def synthesize_merge(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
) -> dict[str, Any]:
|
||||
return await self._synthesize(candidate, evidence_packet, provider, model, "merge")
|
||||
|
||||
async def _synthesize(
|
||||
self,
|
||||
candidate: SkillLearningCandidate,
|
||||
evidence_packet: EvidencePacket,
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
action: str,
|
||||
) -> dict[str, Any]:
|
||||
prompt = self._build_prompt(candidate, evidence_packet, action)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You synthesize Beaver skill drafts from execution evidence. "
|
||||
"Return only JSON with keys: frontmatter, content, change_reason."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=1500,
|
||||
temperature=0,
|
||||
)
|
||||
payload = self._parse_payload(response.content or "")
|
||||
if payload:
|
||||
return payload
|
||||
return self._fallback_payload(candidate, evidence_packet, action)
|
||||
|
||||
@staticmethod
|
||||
def _build_prompt(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> str:
|
||||
return (
|
||||
f"Action: {action}\n"
|
||||
f"Candidate kind: {candidate.kind}\n"
|
||||
f"Reason: {candidate.reason}\n"
|
||||
f"Related skills: {candidate.related_skill_names}\n"
|
||||
f"Task summaries:\n- " + "\n- ".join(evidence_packet.task_summaries)
|
||||
+ "\n\nSession excerpts:\n" + "\n\n".join(evidence_packet.session_excerpts)
|
||||
+ "\n\nReturn JSON only."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_payload(content: str) -> dict[str, Any]:
|
||||
cleaned = content.strip()
|
||||
if cleaned.startswith("```"):
|
||||
lines = cleaned.splitlines()
|
||||
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
||||
cleaned = "\n".join(lines[1:-1]).strip()
|
||||
try:
|
||||
payload = json.loads(cleaned)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
frontmatter = payload.get("frontmatter")
|
||||
content_value = payload.get("content")
|
||||
if not isinstance(frontmatter, dict) or not isinstance(content_value, str):
|
||||
return {}
|
||||
return {
|
||||
"frontmatter": frontmatter,
|
||||
"content": content_value.strip(),
|
||||
"change_reason": str(payload.get("change_reason") or ""),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fallback_payload(candidate: SkillLearningCandidate, evidence_packet: EvidencePacket, action: str) -> dict[str, Any]:
|
||||
related = candidate.related_skill_names[0] if candidate.related_skill_names else "generated-skill"
|
||||
title = related.replace("_", "-")
|
||||
content = "\n".join(f"- {item}" for item in evidence_packet.task_summaries[:5]) or "- No evidence captured."
|
||||
return {
|
||||
"frontmatter": {
|
||||
"description": candidate.reason or f"Auto-generated {action} draft for {title}.",
|
||||
"tools": [],
|
||||
},
|
||||
"content": f"# {title}\n\n## Evidence\n\n{content}\n",
|
||||
"change_reason": candidate.reason or f"Fallback {action} synthesis.",
|
||||
}
|
||||
175
app-instance/backend/beaver/skills/learning/worker.py
Normal file
175
app-instance/backend/beaver/skills/learning/worker.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""Background worker for assisted skill learning."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillLearningCandidate
|
||||
from beaver.skills.learning.pipeline import SkillLearningPipelineService
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningWorkerConfig:
|
||||
enabled: bool = True
|
||||
max_drafts_per_run: int = 5
|
||||
max_retries: int = 3
|
||||
interval_seconds: float = 300.0
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "SkillLearningWorkerConfig":
|
||||
return cls(
|
||||
enabled=_env_bool("BEAVER_SKILL_LEARNING_WORKER_ENABLED", True),
|
||||
max_drafts_per_run=_env_int("BEAVER_SKILL_LEARNING_MAX_DRAFTS_PER_RUN", 5),
|
||||
max_retries=_env_int("BEAVER_SKILL_LEARNING_MAX_RETRIES", 3),
|
||||
interval_seconds=float(os.getenv("BEAVER_SKILL_LEARNING_INTERVAL_SECONDS", "300") or "300"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningWorkerResult:
|
||||
processed: int = 0
|
||||
succeeded: int = 0
|
||||
failed: int = 0
|
||||
skipped: int = 0
|
||||
failures: list[dict[str, str]] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"processed": self.processed,
|
||||
"succeeded": self.succeeded,
|
||||
"failed": self.failed,
|
||||
"skipped": self.skipped,
|
||||
"failures": [dict(item) for item in self.failures],
|
||||
}
|
||||
|
||||
|
||||
class SkillLearningWorker:
|
||||
"""Synthesizes drafts for open candidates; never approves or publishes."""
|
||||
|
||||
_ACTIVE_DRAFT_STATUSES = {"queued", "synthesizing", "draft_ready", "review_pending", "approved"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
pipeline: SkillLearningPipelineService,
|
||||
provider_bundle_factory: Callable[[], ProviderBundle],
|
||||
config: SkillLearningWorkerConfig | None = None,
|
||||
) -> None:
|
||||
self.pipeline = pipeline
|
||||
self.provider_bundle_factory = provider_bundle_factory
|
||||
self.config = config or SkillLearningWorkerConfig.from_env()
|
||||
self._running = False
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def run_forever(self) -> None:
|
||||
if not self.config.enabled:
|
||||
return
|
||||
self._running = True
|
||||
try:
|
||||
while self._running:
|
||||
await self.run_once()
|
||||
await asyncio.sleep(self.config.interval_seconds)
|
||||
finally:
|
||||
self._running = False
|
||||
|
||||
def stop(self) -> None:
|
||||
self._running = False
|
||||
|
||||
async def run_once(self) -> SkillLearningWorkerResult:
|
||||
if not self.config.enabled:
|
||||
return SkillLearningWorkerResult()
|
||||
async with self._lock:
|
||||
result = SkillLearningWorkerResult()
|
||||
candidates = self._select_candidates()
|
||||
for candidate in candidates[: self.config.max_drafts_per_run]:
|
||||
result.processed += 1
|
||||
try:
|
||||
handled = await self._process_candidate(candidate)
|
||||
if handled:
|
||||
result.succeeded += 1
|
||||
else:
|
||||
result.skipped += 1
|
||||
except Exception as exc:
|
||||
result.failed += 1
|
||||
result.failures.append({"candidate_id": candidate.candidate_id, "error": str(exc)})
|
||||
self._mark_failure(candidate, str(exc))
|
||||
return result
|
||||
|
||||
def _select_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates = [
|
||||
item
|
||||
for item in self.pipeline.list_candidates()
|
||||
if item.status == "open" and item.retry_count < self.config.max_retries
|
||||
]
|
||||
return sorted(candidates, key=lambda item: (item.priority, item.confidence, item.created_at), reverse=True)
|
||||
|
||||
async def _process_candidate(self, candidate: SkillLearningCandidate) -> bool:
|
||||
if self._has_active_draft(candidate):
|
||||
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
|
||||
return False
|
||||
self.pipeline.mark_candidate_queued(candidate.candidate_id)
|
||||
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
|
||||
draft = await self.pipeline.synthesize_draft(
|
||||
candidate.candidate_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
|
||||
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
if not safety.passed or safety.risk_level == "critical":
|
||||
return True
|
||||
await self.pipeline.evaluate_draft(
|
||||
candidate.candidate_id,
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=self.provider_bundle_factory(),
|
||||
)
|
||||
return True
|
||||
|
||||
def _has_active_draft(self, candidate: SkillLearningCandidate) -> bool:
|
||||
target_names = set(candidate.related_skill_names)
|
||||
if candidate.draft_skill_name:
|
||||
target_names.add(candidate.draft_skill_name)
|
||||
if not target_names:
|
||||
return False
|
||||
for item in self.pipeline.list_candidates():
|
||||
if item.candidate_id == candidate.candidate_id:
|
||||
continue
|
||||
if item.status not in self._ACTIVE_DRAFT_STATUSES:
|
||||
continue
|
||||
item_names = set(item.related_skill_names)
|
||||
if item.draft_skill_name:
|
||||
item_names.add(item.draft_skill_name)
|
||||
if target_names.intersection(item_names):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _mark_failure(self, candidate: SkillLearningCandidate, error: str) -> None:
|
||||
retry_count = candidate.retry_count + 1
|
||||
status = "failed" if retry_count >= self.config.max_retries else "open"
|
||||
self.pipeline.mark_candidate_failed(
|
||||
candidate.candidate_id,
|
||||
error,
|
||||
retry_count=retry_count,
|
||||
terminal=(status == "failed"),
|
||||
)
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool) -> bool:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return default
|
||||
return raw.strip().lower() not in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.getenv(name)
|
||||
if raw in (None, ""):
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
Reference in New Issue
Block a user