feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
293
app-instance/backend/beaver/skills/learning/service.py
Normal file
@ -0,0 +1,293 @@
|
||||
"""Skill learning loop services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from itertools import combinations
|
||||
import re
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
|
||||
from beaver.memory.runs.store import RunMemoryStore
|
||||
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
|
||||
from beaver.memory.skills.store import SkillLearningStore
|
||||
from beaver.skills.drafts.service import DraftService
|
||||
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
|
||||
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
|
||||
from beaver.skills.specs import SkillActivationReceipt
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RunReceiptContext:
|
||||
run_record: RunRecord
|
||||
effect_records: list[SkillEffectRecord] = field(default_factory=list)
|
||||
|
||||
|
||||
class SkillLearningService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_store: RunMemoryStore,
|
||||
learning_store: SkillLearningStore,
|
||||
draft_service: DraftService,
|
||||
evidence_selector: EvidenceSelector,
|
||||
synthesizer: SkillDraftSynthesizer | None = None,
|
||||
) -> None:
|
||||
self.run_store = run_store
|
||||
self.learning_store = learning_store
|
||||
self.draft_service = draft_service
|
||||
self.evidence_selector = evidence_selector
|
||||
self.synthesizer = synthesizer or SkillDraftSynthesizer()
|
||||
|
||||
def collect_run_receipts(
|
||||
self,
|
||||
run_result_context: RunReceiptContext,
|
||||
*,
|
||||
generate_candidates: bool = True,
|
||||
) -> list[SkillLearningCandidate]:
|
||||
self.run_store.append_run_record(run_result_context.run_record)
|
||||
for effect in run_result_context.effect_records:
|
||||
self.run_store.append_skill_effect(effect)
|
||||
self.rescore_skill_versions()
|
||||
if not generate_candidates:
|
||||
return []
|
||||
return self.build_learning_candidates()
|
||||
|
||||
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
candidates.extend(self._build_revision_candidates())
|
||||
candidates.extend(self._build_new_skill_candidates())
|
||||
candidates.extend(self._build_merge_candidates())
|
||||
candidates.extend(self._build_retire_candidates())
|
||||
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
|
||||
for candidate in candidates:
|
||||
if candidate.candidate_id not in existing_ids:
|
||||
self.learning_store.record_learning_candidate(candidate)
|
||||
existing_ids.add(candidate.candidate_id)
|
||||
return candidates
|
||||
|
||||
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
|
||||
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
|
||||
candidate = candidates.get(candidate_id)
|
||||
if candidate is None:
|
||||
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
||||
if candidate.kind == "retire_skill":
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
return self.draft_service.create_retire_proposal(
|
||||
skill_name=target_skill,
|
||||
base_version=candidate.evidence.get("skill_version"),
|
||||
created_by="learning-loop",
|
||||
reason=candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
model = (
|
||||
provider_bundle.auxiliary_runtime.model
|
||||
if provider_bundle.auxiliary_runtime is not None
|
||||
else provider_bundle.main_runtime.model
|
||||
)
|
||||
if candidate.kind == "new_skill":
|
||||
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
|
||||
return self.draft_service.create_new_skill_draft(
|
||||
skill_name=self._suggest_skill_name(candidate, packet),
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
if candidate.kind == "merge_skills":
|
||||
target_name = self._suggest_skill_name(candidate, packet)
|
||||
payload = await self.synthesizer.synthesize_merge(candidate, packet, provider, model)
|
||||
return self.draft_service.create_merge_draft(
|
||||
skill_name=target_name,
|
||||
base_version=None,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
target_skill = candidate.related_skill_names[0]
|
||||
base_version = candidate.evidence.get("skill_version")
|
||||
payload = await self.synthesizer.synthesize_revision(candidate, packet, provider, model)
|
||||
return self.draft_service.create_revision_draft(
|
||||
skill_name=target_skill,
|
||||
base_version=base_version,
|
||||
proposed_content=payload["content"],
|
||||
proposed_frontmatter=payload["frontmatter"],
|
||||
created_by="learning-loop",
|
||||
reason=payload["change_reason"] or candidate.reason,
|
||||
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
||||
)
|
||||
|
||||
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
|
||||
snapshots: list[SkillPerformanceSnapshot] = []
|
||||
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
key = (receipt.skill_name, receipt.skill_version)
|
||||
grouped.setdefault(key, [])
|
||||
for effect in self._all_effects():
|
||||
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
|
||||
for (skill_name, skill_version), effects in grouped.items():
|
||||
activation_count = len(effects)
|
||||
success_count = sum(1 for item in effects if item.success)
|
||||
failure_count = activation_count - success_count
|
||||
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
|
||||
latest_used = effects[-1].created_at if effects else ""
|
||||
snapshot = SkillPerformanceSnapshot(
|
||||
skill_name=skill_name,
|
||||
skill_version=skill_version,
|
||||
activation_count=activation_count,
|
||||
success_count=success_count,
|
||||
failure_count=failure_count,
|
||||
latest_used_at=latest_used,
|
||||
last_feedback_score=last_feedback,
|
||||
)
|
||||
self.learning_store.update_performance_snapshot(snapshot)
|
||||
snapshots.append(snapshot)
|
||||
return snapshots
|
||||
|
||||
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for snapshot in self.learning_store.list_low_performing_versions():
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="revise_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
|
||||
evidence={"skill_version": snapshot.skill_version},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
|
||||
groups: dict[str, list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
key = self._task_theme(record.task_text)
|
||||
if not key:
|
||||
continue
|
||||
groups.setdefault(key, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for theme, runs in groups.items():
|
||||
successful = [record for record in runs if record.success]
|
||||
if len(successful) < 2:
|
||||
continue
|
||||
if any(record.activated_skills for record in successful):
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("new", theme, str(len(successful))),
|
||||
kind="new_skill",
|
||||
source_run_ids=[record.run_id for record in successful[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
|
||||
related_skill_names=[],
|
||||
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
|
||||
evidence={"theme": theme},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
||||
for pair in combinations(unique, 2):
|
||||
pair_counts.setdefault(pair, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for pair, runs in pair_counts.items():
|
||||
if len(runs) < 2:
|
||||
continue
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("merge", *pair),
|
||||
kind="merge_skills",
|
||||
source_run_ids=[record.run_id for record in runs[-5:]],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
|
||||
related_skill_names=list(pair),
|
||||
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
|
||||
evidence={"pair": list(pair)},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
|
||||
for snapshot in self.learning_store.list_performance_snapshots():
|
||||
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
|
||||
continue
|
||||
latest_used = self._parse_timestamp(snapshot.latest_used_at)
|
||||
if latest_used is None or latest_used > cutoff:
|
||||
continue
|
||||
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
|
||||
candidate = SkillLearningCandidate(
|
||||
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
|
||||
kind="retire_skill",
|
||||
source_run_ids=[record.run_id for record in runs],
|
||||
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
||||
related_skill_names=[snapshot.skill_name],
|
||||
reason=(
|
||||
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
|
||||
f"since {snapshot.latest_used_at} and may be ready for retirement."
|
||||
),
|
||||
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
|
||||
status="open",
|
||||
)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
def _all_effects(self) -> list[SkillEffectRecord]:
|
||||
effects: list[SkillEffectRecord] = []
|
||||
for candidate in self.learning_store.list_performance_snapshots():
|
||||
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
|
||||
if effects:
|
||||
return effects
|
||||
# Bootstrap from runs when there are no prior snapshots.
|
||||
for record in self.run_store.list_runs():
|
||||
for receipt in record.activated_skills:
|
||||
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
|
||||
return effects
|
||||
|
||||
@staticmethod
|
||||
def _candidate_id(kind: str, *parts: str) -> str:
|
||||
return f"{kind}:{'|'.join(parts)}"
|
||||
|
||||
@staticmethod
|
||||
def _task_theme(task_text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", task_text.strip().lower())
|
||||
if not cleaned:
|
||||
return ""
|
||||
words = cleaned.split(" ")
|
||||
return " ".join(words[:8]).strip()
|
||||
|
||||
@staticmethod
|
||||
def _suggest_skill_name(candidate: SkillLearningCandidate, packet: EvidencePacket) -> str:
|
||||
if candidate.related_skill_names:
|
||||
return candidate.related_skill_names[0]
|
||||
if packet.task_summaries:
|
||||
seed = re.sub(r"[^a-z0-9]+", "-", packet.task_summaries[0].lower()).strip("-")
|
||||
if seed:
|
||||
return seed[:48]
|
||||
return f"generated-skill-{uuid4().hex[:8]}"
|
||||
|
||||
@staticmethod
|
||||
def _parse_timestamp(value: str) -> datetime | None:
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
Reference in New Issue
Block a user