Files
beaver_project/app-instance/backend/beaver/skills/learning/service.py
steven_li 83d9d8c200 ```
feat(learning): 添加技能学习候选者合成锁定机制

添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景,
确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis
方法来原子性地锁定候选者进行合成。

fix(web): 为技能草案创建端点添加适当的HTTP状态码

当草案没有变化或正在合成时,现在正确返回409状态码而不是内部错误。

feat(skills): 实现技能修订内容比较以检测无变化情况

添加了 _is_noop_revision 方法来比较基础技能和提议的修订,
如果内容没有实际变化则抛出 NoDraftChanges 异常。

refactor(process): 修复任务证据记录后根运行状态更新逻辑

将任务证据记录事件后的状态从 waiting 更改为 done,并设置 finished_at 时间戳。

feat(tools): 防止在同一运行中重复执行外部写入操作

为邮件发送、日历创建等外部写入工具添加去重机制,避免重复的外部操作。

test: 添加技能学习和工具执行的单元测试

增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。
```
2026-06-16 15:58:42 +08:00

640 lines
29 KiB
Python

"""Skill learning loop services."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from itertools import combinations
from pathlib import Path
import re
from typing import Any
from uuid import uuid4
from beaver.engine.providers import ProviderBundle
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
from beaver.memory.runs.store import RunMemoryStore
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
from beaver.memory.skills.store import SkillLearningStore
from beaver.plugins.hashing import hash_plugin_skill_tree
from beaver.plugins.tree_merge import merge_supporting_file_trees
from beaver.skills.drafts.service import DraftService
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillActivationReceipt
from beaver.skills.specs.serialization import normalize_frontmatter
@dataclass(slots=True)
class RunReceiptContext:
run_record: RunRecord
effect_records: list[SkillEffectRecord] = field(default_factory=list)
class NoDraftChanges(ValueError):
"""Raised when synthesis produces the same effective skill content as the base version."""
class SkillLearningService:
def __init__(
self,
*,
run_store: RunMemoryStore,
learning_store: SkillLearningStore,
draft_service: DraftService,
evidence_selector: EvidenceSelector,
synthesizer: SkillDraftSynthesizer | None = None,
) -> None:
self.run_store = run_store
self.learning_store = learning_store
self.draft_service = draft_service
self.evidence_selector = evidence_selector
self.synthesizer = synthesizer or SkillDraftSynthesizer()
def collect_run_receipts(
self,
run_result_context: RunReceiptContext,
*,
generate_candidates: bool = True,
) -> list[SkillLearningCandidate]:
self.run_store.append_run_record(run_result_context.run_record)
for effect in run_result_context.effect_records:
self.run_store.append_skill_effect(effect)
self.rescore_skill_versions()
if not generate_candidates:
return []
return self.build_learning_candidates()
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
candidates.extend(self._build_revision_candidates())
candidates.extend(self._build_new_skill_candidates())
candidates.extend(self._build_merge_candidates())
candidates.extend(self._build_retire_candidates())
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
for candidate in candidates:
if candidate.candidate_id not in existing_ids:
self.learning_store.record_learning_candidate(candidate)
existing_ids.add(candidate.candidate_id)
return candidates
def build_learning_candidates_for_task(
self,
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[SkillLearningCandidate]:
"""Build candidates from a user-accepted Task and all of its runs."""
final_accepted_run_id = final_accepted_run_id or trigger_run_id
if not final_accepted_run_id:
return []
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
if final_run is None or not self._is_task_accepted_run(final_run):
return []
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
if not source_runs:
return []
candidates: list[SkillLearningCandidate] = []
published_receipts = [
receipt
for record in source_runs
for receipt in record.activated_skills
if self._is_published_skill_receipt(receipt)
]
source_run_ids = [record.run_id for record in source_runs]
source_session_ids = list(dict.fromkeys(record.session_id for record in source_runs))
representative_task_text = self._representative_task_text(source_runs, fallback=final_run.task_text)
if not published_receipts:
candidates.append(
SkillLearningCandidate(
candidate_id=f"new:task:{task_id}",
kind="new_skill",
source_run_ids=source_run_ids,
source_session_ids=source_session_ids,
related_skill_names=[],
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
evidence={
"task_id": task_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"task_text": representative_task_text,
"theme": self._task_theme(representative_task_text),
},
status="open",
priority=1,
confidence=0.8,
trigger_reason="task_accepted",
)
)
else:
seen: set[tuple[str, str]] = set()
for receipt in published_receipts:
key = (receipt.skill_name, receipt.skill_version)
if key in seen:
continue
seen.add(key)
skill_runs = [
record
for record in source_runs
if any(
item.skill_name == receipt.skill_name
and item.skill_version == receipt.skill_version
and self._is_published_skill_receipt(item)
for item in record.activated_skills
)
]
candidates.append(
SkillLearningCandidate(
candidate_id=f"revise:{receipt.skill_name}:{receipt.skill_version}:task:{task_id}",
kind="revise_skill",
source_run_ids=[record.run_id for record in skill_runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in skill_runs)),
related_skill_names=[receipt.skill_name],
reason=(
f"Task {task_id} succeeded with published skill "
f"{receipt.skill_name}/{receipt.skill_version}; consider whether the skill should capture this evidence."
),
evidence={
"task_id": task_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"skill_version": receipt.skill_version,
},
status="open",
priority=1,
confidence=0.7,
trigger_reason="task_accepted",
)
)
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
created: list[SkillLearningCandidate] = []
for candidate in candidates:
if candidate.candidate_id in existing_ids:
continue
self.learning_store.record_learning_candidate(candidate)
existing_ids.add(candidate.candidate_id)
created.append(candidate)
return created
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
candidate = candidates.get(candidate_id)
if candidate is None:
raise ValueError(f"Unknown learning candidate: {candidate_id}")
if candidate.kind == "plugin_skill_update":
return await self._synthesize_plugin_update(candidate, provider_bundle)
if candidate.kind == "retire_skill":
target_skill = candidate.related_skill_names[0]
return self.draft_service.create_retire_proposal(
skill_name=target_skill,
base_version=candidate.evidence.get("skill_version"),
created_by="learning-loop",
reason=candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
model = (
provider_bundle.auxiliary_runtime.model
if provider_bundle.auxiliary_runtime is not None
else provider_bundle.main_runtime.model
)
if candidate.kind == "new_skill":
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
return self.draft_service.create_new_skill_draft(
skill_name=self._suggest_skill_name(candidate, packet, payload.get("frontmatter")),
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
if candidate.kind == "merge_skills":
target_name = self._suggest_skill_name(candidate, packet)
payload = await self.synthesizer.synthesize_merge(
candidate,
packet,
provider,
model,
base_skill=self._merged_base_skill_snapshot(candidate.related_skill_names),
)
return self.draft_service.create_merge_draft(
skill_name=target_name,
base_version=None,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
target_skill = candidate.related_skill_names[0]
base_version = candidate.evidence.get("skill_version")
base_skill = self._base_skill_snapshot(target_skill, base_version)
payload = await self.synthesizer.synthesize_revision(
candidate,
packet,
provider,
model,
base_skill=base_skill,
)
if self._is_noop_revision(payload, base_skill):
raise NoDraftChanges(
f"Synthesis produced no changes for {target_skill}/{base_version or 'current'}"
)
return self.draft_service.create_revision_draft(
skill_name=target_skill,
base_version=base_version,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
)
async def _synthesize_plugin_update(self, candidate: SkillLearningCandidate, provider_bundle: ProviderBundle) -> Any:
evidence = dict(candidate.evidence)
skill_name = str(evidence.get("skill_name") or (candidate.related_skill_names[0] if candidate.related_skill_names else ""))
plugin_id = str(evidence.get("plugin_id") or "")
new_upstream_tree_hash = str(evidence.get("new_upstream_tree_hash") or "")
local_version = str(evidence.get("local_version") or "")
merge_mode = str(evidence.get("merge_mode") or "")
if not skill_name or not plugin_id or not new_upstream_tree_hash or not local_version:
raise ValueError("Plugin update candidate is missing required evidence references")
new_upstream = self.draft_service.store.read_upstream_snapshot(
skill_name,
plugin_id,
new_upstream_tree_hash,
)
if new_upstream is None:
raise ValueError("Plugin update references a missing upstream snapshot")
frontmatter, body = parse_frontmatter(new_upstream.content)
if merge_mode == "fast_forward":
return self.draft_service.create_plugin_update_draft(
skill_name=skill_name,
base_version=local_version,
proposed_content=body.strip(),
proposed_frontmatter=frontmatter,
created_by="learning-loop",
reason=candidate.reason,
provenance={
**evidence,
"proposal_kind": "plugin_skill_update",
},
evidence_refs=[],
)
base_upstream_tree_hash = str(evidence.get("base_upstream_tree_hash") or "")
old_upstream = self.draft_service.store.read_upstream_snapshot(skill_name, plugin_id, base_upstream_tree_hash)
current_local = self.draft_service.store.read_published_skill(skill_name, local_version)
if old_upstream is None:
raise ValueError("Plugin update references a missing base upstream snapshot")
if current_local is None:
raise ValueError("Plugin update references a missing local skill version")
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
model = (
provider_bundle.auxiliary_runtime.model
if provider_bundle.auxiliary_runtime is not None
else provider_bundle.main_runtime.model
)
local_root = self.draft_service.store.root / skill_name / "versions" / local_version
file_plan = merge_supporting_file_trees(
base=_digest_map(old_upstream.root),
local=_digest_map(local_root),
upstream=_digest_map(new_upstream.root),
)
payload = await self.synthesizer.synthesize_plugin_update(
candidate,
packet,
provider,
model,
old_upstream={"content": old_upstream.content, "frontmatter": old_upstream.snapshot.frontmatter},
current_local={"content": current_local.content, "frontmatter": current_local.version.frontmatter},
new_upstream={"content": new_upstream.content, "frontmatter": frontmatter},
)
return self.draft_service.create_plugin_update_draft(
skill_name=skill_name,
base_version=local_version,
proposed_content=payload["content"],
proposed_frontmatter=payload["frontmatter"],
created_by="learning-loop",
reason=payload["change_reason"] or candidate.reason,
provenance={
**evidence,
"proposal_kind": "plugin_skill_update",
"preserved_local_sections": payload.get("preserved_local_sections", []),
"adopted_upstream_sections": payload.get("adopted_upstream_sections", []),
"resolved_conflicts": payload.get("resolved_conflicts", []),
"dropped_sections": payload.get("dropped_sections", []),
"supporting_file_plan": file_plan.to_dict(),
},
evidence_refs=[],
)
def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None:
loaded = self.draft_service.store.read_published_skill(skill_name, version)
if loaded is None:
return None
return {
"skill_name": loaded.version.skill_name,
"version": loaded.version.version,
"frontmatter": dict(loaded.version.frontmatter),
"content": loaded.content,
"summary": loaded.version.summary,
"tool_hints": list(loaded.version.tool_hints),
}
@staticmethod
def _is_noop_revision(payload: dict[str, Any], base_skill: dict[str, Any] | None) -> bool:
if base_skill is None:
return False
base_frontmatter = normalize_frontmatter(dict(base_skill.get("frontmatter") or {}))
proposed_frontmatter = normalize_frontmatter(dict(payload.get("frontmatter") or {}))
base_body = _normalize_skill_body(str(base_skill.get("content") or ""))
proposed_body = _normalize_skill_body(str(payload.get("content") or ""))
return base_frontmatter == proposed_frontmatter and base_body == proposed_body
def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None:
snapshots = [
snapshot
for name in skill_names
if (snapshot := self._base_skill_snapshot(name, None)) is not None
]
if not snapshots:
return None
return {
"skill_name": "merge:" + ",".join(str(item["skill_name"]) for item in snapshots),
"version": "mixed",
"frontmatter": {"merged_skills": [item["frontmatter"] for item in snapshots]},
"content": "\n\n".join(
f"<!-- base skill: {item['skill_name']} {item['version']} -->\n{item['content']}"
for item in snapshots
),
"summary": "\n".join(str(item["summary"]) for item in snapshots if item.get("summary")),
"tool_hints": list(
dict.fromkeys(
tool
for item in snapshots
for tool in item.get("tool_hints", [])
if str(tool).strip()
)
),
}
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
snapshots: list[SkillPerformanceSnapshot] = []
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
key = (receipt.skill_name, receipt.skill_version)
grouped.setdefault(key, [])
for effect in self._all_effects():
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
for (skill_name, skill_version), effects in grouped.items():
activation_count = len(effects)
success_count = sum(1 for item in effects if item.success)
failure_count = activation_count - success_count
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
latest_used = effects[-1].created_at if effects else ""
snapshot = SkillPerformanceSnapshot(
skill_name=skill_name,
skill_version=skill_version,
activation_count=activation_count,
success_count=success_count,
failure_count=failure_count,
latest_used_at=latest_used,
last_feedback_score=last_feedback,
)
self.learning_store.update_performance_snapshot(snapshot)
snapshots.append(snapshot)
return snapshots
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
for snapshot in self.learning_store.list_low_performing_versions():
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
kind="revise_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
evidence={"skill_version": snapshot.skill_version},
status="open",
)
candidates.append(candidate)
return candidates
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
groups: dict[str, list[RunRecord]] = {}
all_runs = self.run_store.list_runs()
runs_by_task: dict[str, list[RunRecord]] = {}
for record in all_runs:
if record.task_id:
runs_by_task.setdefault(record.task_id, []).append(record)
for record in all_runs:
task_runs = runs_by_task.get(record.task_id, [record])
key = self._task_theme(self._representative_task_text(task_runs, fallback=record.task_text))
if not key:
continue
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if self._is_task_accepted_run(record)]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("new", theme, str(len(successful))),
kind="new_skill",
source_run_ids=[record.run_id for record in successful[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
related_skill_names=[],
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
evidence={"theme": theme},
status="open",
)
candidates.append(candidate)
return candidates
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
if not self._is_task_accepted_run(record):
continue
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
pair_counts.setdefault(pair, []).append(record)
candidates: list[SkillLearningCandidate] = []
for pair, runs in pair_counts.items():
if len(runs) < 2:
continue
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("merge", *pair),
kind="merge_skills",
source_run_ids=[record.run_id for record in runs[-5:]],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
related_skill_names=list(pair),
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
evidence={"pair": list(pair)},
status="open",
)
candidates.append(candidate)
return candidates
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
candidates: list[SkillLearningCandidate] = []
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
for snapshot in self.learning_store.list_performance_snapshots():
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
continue
latest_used = self._parse_timestamp(snapshot.latest_used_at)
if latest_used is None or latest_used > cutoff:
continue
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
candidate = SkillLearningCandidate(
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
kind="retire_skill",
source_run_ids=[record.run_id for record in runs],
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
related_skill_names=[snapshot.skill_name],
reason=(
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
f"since {snapshot.latest_used_at} and may be ready for retirement."
),
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
status="open",
)
candidates.append(candidate)
return candidates
def _all_effects(self) -> list[SkillEffectRecord]:
effects: list[SkillEffectRecord] = []
for candidate in self.learning_store.list_performance_snapshots():
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
if effects:
return effects
# Bootstrap from runs when there are no prior snapshots.
for record in self.run_store.list_runs():
for receipt in record.activated_skills:
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
return effects
@staticmethod
def _is_task_accepted_run(record: RunRecord) -> bool:
feedback = record.feedback or {}
acceptance_type = feedback.get("acceptance_type")
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
acceptance_type = "accept"
return (
bool(record.success)
and bool(record.task_id)
and acceptance_type == "accept"
)
@staticmethod
def _is_published_skill_receipt(receipt: SkillActivationReceipt) -> bool:
return (
not receipt.skill_name.startswith(("draft:", "ephemeral:"))
and not receipt.skill_version.startswith(("draft:", "ephemeral:"))
and receipt.activation_reason not in {"generated_missing_skill", "ephemeral_guidance"}
)
@staticmethod
def _candidate_id(kind: str, *parts: str) -> str:
return f"{kind}:{'|'.join(parts)}"
@staticmethod
def _task_theme(task_text: str) -> str:
cleaned = re.sub(r"\s+", " ", task_text.strip())
if not cleaned:
return ""
first_sentence = re.split(r"[。!?.!?]", cleaned, maxsplit=1)[0].strip()
if not first_sentence:
first_sentence = cleaned
words = first_sentence.split(" ")
return " ".join(words[:8]).strip()
@staticmethod
def _representative_task_text(runs: list[RunRecord], *, fallback: str = "") -> str:
ordered = sorted(
runs,
key=lambda item: (
item.attempt_index is None,
item.attempt_index if item.attempt_index is not None else 0,
item.started_at,
item.run_id,
),
)
for record in ordered:
text = record.task_text.strip()
if text:
return text
return fallback.strip()
@staticmethod
def _suggest_skill_name(
candidate: SkillLearningCandidate,
packet: EvidencePacket,
frontmatter: dict[str, Any] | None = None,
) -> str:
if candidate.related_skill_names:
return candidate.related_skill_names[0]
if isinstance(frontmatter, dict):
description = str(frontmatter.get("description") or "")
seed = SkillLearningService._slugify_skill_name(description)
if seed:
return seed
if packet.task_summaries:
seed = SkillLearningService._slugify_skill_name(packet.task_summaries[0])
if seed:
return seed
return f"generated-skill-{uuid4().hex[:8]}"
@staticmethod
def _slugify_skill_name(value: str) -> str:
seed = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
seed = re.sub(r"-+", "-", seed)
if not seed or seed.isdigit() or len(seed) < 3:
return ""
words = [part for part in seed.split("-") if part and not part.isdigit()]
seed = "-".join(words) or seed
return seed[:48].strip("-")
@staticmethod
def _parse_timestamp(value: str) -> datetime | None:
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
def _normalize_skill_body(content: str) -> str:
return "\n".join(line.rstrip() for line in strip_frontmatter(content).strip().splitlines()).strip()
def _digest_map(root: Path) -> dict[str, dict[str, Any]]:
digest = hash_plugin_skill_tree(root)
return {
item.path: {
"content_hash": item.content_hash,
"executable": item.executable,
"size": item.size,
}
for item in digest.files
if item.path not in {"SKILL.md", "version.json", "upstream.json"}
}