feat(learning): 修复任务运行记录排序逻辑处理空attempt_index的情况 当RunRecord的attempt_index为None时,之前的排序逻辑会出现问题。 现在通过在排序键中显式处理None值来解决这个问题, 将None值排在前面,并将其转换为0进行比较。 同时添加了单元测试验证团队运行记录(没有attempt_index)的处理情况。 ```
518 lines
23 KiB
Python
518 lines
23 KiB
Python
"""Skill learning loop services."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta, timezone
|
|
from itertools import combinations
|
|
import re
|
|
from typing import Any
|
|
from uuid import uuid4
|
|
|
|
from beaver.engine.providers import ProviderBundle
|
|
from beaver.memory.runs.models import RunRecord, SkillEffectRecord
|
|
from beaver.memory.runs.store import RunMemoryStore
|
|
from beaver.memory.skills.models import SkillLearningCandidate, SkillPerformanceSnapshot
|
|
from beaver.memory.skills.store import SkillLearningStore
|
|
from beaver.skills.drafts.service import DraftService
|
|
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
|
|
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
|
|
from beaver.skills.specs import SkillActivationReceipt
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class RunReceiptContext:
|
|
run_record: RunRecord
|
|
effect_records: list[SkillEffectRecord] = field(default_factory=list)
|
|
|
|
|
|
class SkillLearningService:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
run_store: RunMemoryStore,
|
|
learning_store: SkillLearningStore,
|
|
draft_service: DraftService,
|
|
evidence_selector: EvidenceSelector,
|
|
synthesizer: SkillDraftSynthesizer | None = None,
|
|
) -> None:
|
|
self.run_store = run_store
|
|
self.learning_store = learning_store
|
|
self.draft_service = draft_service
|
|
self.evidence_selector = evidence_selector
|
|
self.synthesizer = synthesizer or SkillDraftSynthesizer()
|
|
|
|
def collect_run_receipts(
|
|
self,
|
|
run_result_context: RunReceiptContext,
|
|
*,
|
|
generate_candidates: bool = True,
|
|
) -> list[SkillLearningCandidate]:
|
|
self.run_store.append_run_record(run_result_context.run_record)
|
|
for effect in run_result_context.effect_records:
|
|
self.run_store.append_skill_effect(effect)
|
|
self.rescore_skill_versions()
|
|
if not generate_candidates:
|
|
return []
|
|
return self.build_learning_candidates()
|
|
|
|
def build_learning_candidates(self) -> list[SkillLearningCandidate]:
|
|
candidates: list[SkillLearningCandidate] = []
|
|
candidates.extend(self._build_revision_candidates())
|
|
candidates.extend(self._build_new_skill_candidates())
|
|
candidates.extend(self._build_merge_candidates())
|
|
candidates.extend(self._build_retire_candidates())
|
|
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
|
|
for candidate in candidates:
|
|
if candidate.candidate_id not in existing_ids:
|
|
self.learning_store.record_learning_candidate(candidate)
|
|
existing_ids.add(candidate.candidate_id)
|
|
return candidates
|
|
|
|
def build_learning_candidates_for_task(
|
|
self,
|
|
task_id: str,
|
|
*,
|
|
final_accepted_run_id: str | None = None,
|
|
trigger_run_id: str | None = None,
|
|
) -> list[SkillLearningCandidate]:
|
|
"""Build candidates from a user-accepted Task and all of its runs."""
|
|
|
|
final_accepted_run_id = final_accepted_run_id or trigger_run_id
|
|
if not final_accepted_run_id:
|
|
return []
|
|
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
|
|
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
|
|
if final_run is None or not self._is_task_accepted_run(final_run):
|
|
return []
|
|
|
|
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
|
|
if not source_runs:
|
|
return []
|
|
|
|
candidates: list[SkillLearningCandidate] = []
|
|
published_receipts = [
|
|
receipt
|
|
for record in source_runs
|
|
for receipt in record.activated_skills
|
|
if self._is_published_skill_receipt(receipt)
|
|
]
|
|
source_run_ids = [record.run_id for record in source_runs]
|
|
source_session_ids = list(dict.fromkeys(record.session_id for record in source_runs))
|
|
representative_task_text = self._representative_task_text(source_runs, fallback=final_run.task_text)
|
|
|
|
if not published_receipts:
|
|
candidates.append(
|
|
SkillLearningCandidate(
|
|
candidate_id=f"new:task:{task_id}",
|
|
kind="new_skill",
|
|
source_run_ids=source_run_ids,
|
|
source_session_ids=source_session_ids,
|
|
related_skill_names=[],
|
|
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
|
|
evidence={
|
|
"task_id": task_id,
|
|
"final_accepted_run_id": final_accepted_run_id,
|
|
"source_run_ids": source_run_ids,
|
|
"task_text": representative_task_text,
|
|
"theme": self._task_theme(representative_task_text),
|
|
},
|
|
status="open",
|
|
priority=1,
|
|
confidence=0.8,
|
|
trigger_reason="task_accepted",
|
|
)
|
|
)
|
|
else:
|
|
seen: set[tuple[str, str]] = set()
|
|
for receipt in published_receipts:
|
|
key = (receipt.skill_name, receipt.skill_version)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
skill_runs = [
|
|
record
|
|
for record in source_runs
|
|
if any(
|
|
item.skill_name == receipt.skill_name
|
|
and item.skill_version == receipt.skill_version
|
|
and self._is_published_skill_receipt(item)
|
|
for item in record.activated_skills
|
|
)
|
|
]
|
|
candidates.append(
|
|
SkillLearningCandidate(
|
|
candidate_id=f"revise:{receipt.skill_name}:{receipt.skill_version}:task:{task_id}",
|
|
kind="revise_skill",
|
|
source_run_ids=[record.run_id for record in skill_runs],
|
|
source_session_ids=list(dict.fromkeys(record.session_id for record in skill_runs)),
|
|
related_skill_names=[receipt.skill_name],
|
|
reason=(
|
|
f"Task {task_id} succeeded with published skill "
|
|
f"{receipt.skill_name}/{receipt.skill_version}; consider whether the skill should capture this evidence."
|
|
),
|
|
evidence={
|
|
"task_id": task_id,
|
|
"final_accepted_run_id": final_accepted_run_id,
|
|
"source_run_ids": source_run_ids,
|
|
"skill_version": receipt.skill_version,
|
|
},
|
|
status="open",
|
|
priority=1,
|
|
confidence=0.7,
|
|
trigger_reason="task_accepted",
|
|
)
|
|
)
|
|
|
|
existing_ids = {item.candidate_id for item in self.learning_store.list_learning_candidates()}
|
|
created: list[SkillLearningCandidate] = []
|
|
for candidate in candidates:
|
|
if candidate.candidate_id in existing_ids:
|
|
continue
|
|
self.learning_store.record_learning_candidate(candidate)
|
|
existing_ids.add(candidate.candidate_id)
|
|
created.append(candidate)
|
|
return created
|
|
|
|
async def synthesize_draft(self, candidate_id: str, provider_bundle: ProviderBundle) -> Any:
|
|
candidates = {item.candidate_id: item for item in self.learning_store.list_learning_candidates()}
|
|
candidate = candidates.get(candidate_id)
|
|
if candidate is None:
|
|
raise ValueError(f"Unknown learning candidate: {candidate_id}")
|
|
if candidate.kind == "retire_skill":
|
|
target_skill = candidate.related_skill_names[0]
|
|
return self.draft_service.create_retire_proposal(
|
|
skill_name=target_skill,
|
|
base_version=candidate.evidence.get("skill_version"),
|
|
created_by="learning-loop",
|
|
reason=candidate.reason,
|
|
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
|
)
|
|
packet = self.evidence_selector.build_evidence_packet(candidate.source_run_ids, candidate.source_session_ids)
|
|
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
|
model = (
|
|
provider_bundle.auxiliary_runtime.model
|
|
if provider_bundle.auxiliary_runtime is not None
|
|
else provider_bundle.main_runtime.model
|
|
)
|
|
if candidate.kind == "new_skill":
|
|
payload = await self.synthesizer.synthesize_new_skill(candidate, packet, provider, model)
|
|
return self.draft_service.create_new_skill_draft(
|
|
skill_name=self._suggest_skill_name(candidate, packet, payload.get("frontmatter")),
|
|
proposed_content=payload["content"],
|
|
proposed_frontmatter=payload["frontmatter"],
|
|
created_by="learning-loop",
|
|
reason=payload["change_reason"] or candidate.reason,
|
|
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
|
)
|
|
if candidate.kind == "merge_skills":
|
|
target_name = self._suggest_skill_name(candidate, packet)
|
|
payload = await self.synthesizer.synthesize_merge(
|
|
candidate,
|
|
packet,
|
|
provider,
|
|
model,
|
|
base_skill=self._merged_base_skill_snapshot(candidate.related_skill_names),
|
|
)
|
|
return self.draft_service.create_merge_draft(
|
|
skill_name=target_name,
|
|
base_version=None,
|
|
proposed_content=payload["content"],
|
|
proposed_frontmatter=payload["frontmatter"],
|
|
created_by="learning-loop",
|
|
reason=payload["change_reason"] or candidate.reason,
|
|
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
|
)
|
|
target_skill = candidate.related_skill_names[0]
|
|
base_version = candidate.evidence.get("skill_version")
|
|
payload = await self.synthesizer.synthesize_revision(
|
|
candidate,
|
|
packet,
|
|
provider,
|
|
model,
|
|
base_skill=self._base_skill_snapshot(target_skill, base_version),
|
|
)
|
|
return self.draft_service.create_revision_draft(
|
|
skill_name=target_skill,
|
|
base_version=base_version,
|
|
proposed_content=payload["content"],
|
|
proposed_frontmatter=payload["frontmatter"],
|
|
created_by="learning-loop",
|
|
reason=payload["change_reason"] or candidate.reason,
|
|
evidence_refs=[{"run_id": item} for item in candidate.source_run_ids],
|
|
)
|
|
|
|
def _base_skill_snapshot(self, skill_name: str, version: str | None) -> dict[str, Any] | None:
|
|
loaded = self.draft_service.store.read_published_skill(skill_name, version)
|
|
if loaded is None:
|
|
return None
|
|
return {
|
|
"skill_name": loaded.version.skill_name,
|
|
"version": loaded.version.version,
|
|
"frontmatter": dict(loaded.version.frontmatter),
|
|
"content": loaded.content,
|
|
"summary": loaded.version.summary,
|
|
"tool_hints": list(loaded.version.tool_hints),
|
|
}
|
|
|
|
def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None:
|
|
snapshots = [
|
|
snapshot
|
|
for name in skill_names
|
|
if (snapshot := self._base_skill_snapshot(name, None)) is not None
|
|
]
|
|
if not snapshots:
|
|
return None
|
|
return {
|
|
"skill_name": "merge:" + ",".join(str(item["skill_name"]) for item in snapshots),
|
|
"version": "mixed",
|
|
"frontmatter": {"merged_skills": [item["frontmatter"] for item in snapshots]},
|
|
"content": "\n\n".join(
|
|
f"<!-- base skill: {item['skill_name']} {item['version']} -->\n{item['content']}"
|
|
for item in snapshots
|
|
),
|
|
"summary": "\n".join(str(item["summary"]) for item in snapshots if item.get("summary")),
|
|
"tool_hints": list(
|
|
dict.fromkeys(
|
|
tool
|
|
for item in snapshots
|
|
for tool in item.get("tool_hints", [])
|
|
if str(tool).strip()
|
|
)
|
|
),
|
|
}
|
|
|
|
def rescore_skill_versions(self) -> list[SkillPerformanceSnapshot]:
|
|
snapshots: list[SkillPerformanceSnapshot] = []
|
|
grouped: dict[tuple[str, str], list[SkillEffectRecord]] = {}
|
|
for record in self.run_store.list_runs():
|
|
for receipt in record.activated_skills:
|
|
key = (receipt.skill_name, receipt.skill_version)
|
|
grouped.setdefault(key, [])
|
|
for effect in self._all_effects():
|
|
grouped.setdefault((effect.skill_name, effect.skill_version), []).append(effect)
|
|
for (skill_name, skill_version), effects in grouped.items():
|
|
activation_count = len(effects)
|
|
success_count = sum(1 for item in effects if item.success)
|
|
failure_count = activation_count - success_count
|
|
last_feedback = next((item.feedback_score for item in reversed(effects) if item.feedback_score is not None), None)
|
|
latest_used = effects[-1].created_at if effects else ""
|
|
snapshot = SkillPerformanceSnapshot(
|
|
skill_name=skill_name,
|
|
skill_version=skill_version,
|
|
activation_count=activation_count,
|
|
success_count=success_count,
|
|
failure_count=failure_count,
|
|
latest_used_at=latest_used,
|
|
last_feedback_score=last_feedback,
|
|
)
|
|
self.learning_store.update_performance_snapshot(snapshot)
|
|
snapshots.append(snapshot)
|
|
return snapshots
|
|
|
|
def _build_revision_candidates(self) -> list[SkillLearningCandidate]:
|
|
candidates: list[SkillLearningCandidate] = []
|
|
for snapshot in self.learning_store.list_low_performing_versions():
|
|
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=5)
|
|
if len(runs) < 2:
|
|
continue
|
|
candidate = SkillLearningCandidate(
|
|
candidate_id=self._candidate_id("revise", snapshot.skill_name, snapshot.skill_version),
|
|
kind="revise_skill",
|
|
source_run_ids=[record.run_id for record in runs],
|
|
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
|
related_skill_names=[snapshot.skill_name],
|
|
reason=f"Skill version {snapshot.skill_name}/{snapshot.skill_version} is underperforming across repeated runs.",
|
|
evidence={"skill_version": snapshot.skill_version},
|
|
status="open",
|
|
)
|
|
candidates.append(candidate)
|
|
return candidates
|
|
|
|
def _build_new_skill_candidates(self) -> list[SkillLearningCandidate]:
|
|
groups: dict[str, list[RunRecord]] = {}
|
|
all_runs = self.run_store.list_runs()
|
|
runs_by_task: dict[str, list[RunRecord]] = {}
|
|
for record in all_runs:
|
|
if record.task_id:
|
|
runs_by_task.setdefault(record.task_id, []).append(record)
|
|
for record in all_runs:
|
|
task_runs = runs_by_task.get(record.task_id, [record])
|
|
key = self._task_theme(self._representative_task_text(task_runs, fallback=record.task_text))
|
|
if not key:
|
|
continue
|
|
groups.setdefault(key, []).append(record)
|
|
candidates: list[SkillLearningCandidate] = []
|
|
for theme, runs in groups.items():
|
|
successful = [record for record in runs if self._is_task_accepted_run(record)]
|
|
if len(successful) < 2:
|
|
continue
|
|
if any(record.activated_skills for record in successful):
|
|
continue
|
|
candidate = SkillLearningCandidate(
|
|
candidate_id=self._candidate_id("new", theme, str(len(successful))),
|
|
kind="new_skill",
|
|
source_run_ids=[record.run_id for record in successful[-5:]],
|
|
source_session_ids=list(dict.fromkeys(record.session_id for record in successful[-5:])),
|
|
related_skill_names=[],
|
|
reason=f"Repeated successful tasks around '{theme}' suggest a reusable skill should be created.",
|
|
evidence={"theme": theme},
|
|
status="open",
|
|
)
|
|
candidates.append(candidate)
|
|
return candidates
|
|
|
|
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
|
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
|
for record in self.run_store.list_runs():
|
|
if not self._is_task_accepted_run(record):
|
|
continue
|
|
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
|
for pair in combinations(unique, 2):
|
|
pair_counts.setdefault(pair, []).append(record)
|
|
candidates: list[SkillLearningCandidate] = []
|
|
for pair, runs in pair_counts.items():
|
|
if len(runs) < 2:
|
|
continue
|
|
candidate = SkillLearningCandidate(
|
|
candidate_id=self._candidate_id("merge", *pair),
|
|
kind="merge_skills",
|
|
source_run_ids=[record.run_id for record in runs[-5:]],
|
|
source_session_ids=list(dict.fromkeys(record.session_id for record in runs[-5:])),
|
|
related_skill_names=list(pair),
|
|
reason=f"Skills {pair[0]} and {pair[1]} repeatedly co-activate and may benefit from consolidation.",
|
|
evidence={"pair": list(pair)},
|
|
status="open",
|
|
)
|
|
candidates.append(candidate)
|
|
return candidates
|
|
|
|
def _build_retire_candidates(self, *, stale_days: int = 30) -> list[SkillLearningCandidate]:
|
|
candidates: list[SkillLearningCandidate] = []
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=stale_days)
|
|
for snapshot in self.learning_store.list_performance_snapshots():
|
|
if snapshot.activation_count == 0 or not snapshot.latest_used_at:
|
|
continue
|
|
latest_used = self._parse_timestamp(snapshot.latest_used_at)
|
|
if latest_used is None or latest_used > cutoff:
|
|
continue
|
|
runs = self.run_store.list_runs_by_skill(snapshot.skill_name, version=snapshot.skill_version, limit=3)
|
|
candidate = SkillLearningCandidate(
|
|
candidate_id=self._candidate_id("retire", snapshot.skill_name, snapshot.skill_version),
|
|
kind="retire_skill",
|
|
source_run_ids=[record.run_id for record in runs],
|
|
source_session_ids=list(dict.fromkeys(record.session_id for record in runs)),
|
|
related_skill_names=[snapshot.skill_name],
|
|
reason=(
|
|
f"Skill version {snapshot.skill_name}/{snapshot.skill_version} has been inactive "
|
|
f"since {snapshot.latest_used_at} and may be ready for retirement."
|
|
),
|
|
evidence={"skill_version": snapshot.skill_version, "latest_used_at": snapshot.latest_used_at},
|
|
status="open",
|
|
)
|
|
candidates.append(candidate)
|
|
return candidates
|
|
|
|
def _all_effects(self) -> list[SkillEffectRecord]:
|
|
effects: list[SkillEffectRecord] = []
|
|
for candidate in self.learning_store.list_performance_snapshots():
|
|
effects.extend(self.run_store.list_skill_effects(candidate.skill_name, version=candidate.skill_version))
|
|
if effects:
|
|
return effects
|
|
# Bootstrap from runs when there are no prior snapshots.
|
|
for record in self.run_store.list_runs():
|
|
for receipt in record.activated_skills:
|
|
effects.extend(self.run_store.list_skill_effects(receipt.skill_name, version=receipt.skill_version))
|
|
return effects
|
|
|
|
@staticmethod
|
|
def _is_task_accepted_run(record: RunRecord) -> bool:
|
|
feedback = record.feedback or {}
|
|
acceptance_type = feedback.get("acceptance_type")
|
|
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
|
|
acceptance_type = "accept"
|
|
return (
|
|
bool(record.success)
|
|
and bool(record.task_id)
|
|
and acceptance_type == "accept"
|
|
)
|
|
|
|
@staticmethod
|
|
def _is_published_skill_receipt(receipt: SkillActivationReceipt) -> bool:
|
|
return (
|
|
not receipt.skill_name.startswith(("draft:", "ephemeral:"))
|
|
and not receipt.skill_version.startswith(("draft:", "ephemeral:"))
|
|
and receipt.activation_reason not in {"generated_missing_skill", "ephemeral_guidance"}
|
|
)
|
|
|
|
@staticmethod
|
|
def _candidate_id(kind: str, *parts: str) -> str:
|
|
return f"{kind}:{'|'.join(parts)}"
|
|
|
|
@staticmethod
|
|
def _task_theme(task_text: str) -> str:
|
|
cleaned = re.sub(r"\s+", " ", task_text.strip())
|
|
if not cleaned:
|
|
return ""
|
|
first_sentence = re.split(r"[。!?.!?]", cleaned, maxsplit=1)[0].strip()
|
|
if not first_sentence:
|
|
first_sentence = cleaned
|
|
words = first_sentence.split(" ")
|
|
return " ".join(words[:8]).strip()
|
|
|
|
@staticmethod
|
|
def _representative_task_text(runs: list[RunRecord], *, fallback: str = "") -> str:
|
|
ordered = sorted(
|
|
runs,
|
|
key=lambda item: (
|
|
item.attempt_index is None,
|
|
item.attempt_index if item.attempt_index is not None else 0,
|
|
item.started_at,
|
|
item.run_id,
|
|
),
|
|
)
|
|
for record in ordered:
|
|
text = record.task_text.strip()
|
|
if text:
|
|
return text
|
|
return fallback.strip()
|
|
|
|
@staticmethod
|
|
def _suggest_skill_name(
|
|
candidate: SkillLearningCandidate,
|
|
packet: EvidencePacket,
|
|
frontmatter: dict[str, Any] | None = None,
|
|
) -> str:
|
|
if candidate.related_skill_names:
|
|
return candidate.related_skill_names[0]
|
|
if isinstance(frontmatter, dict):
|
|
description = str(frontmatter.get("description") or "")
|
|
seed = SkillLearningService._slugify_skill_name(description)
|
|
if seed:
|
|
return seed
|
|
if packet.task_summaries:
|
|
seed = SkillLearningService._slugify_skill_name(packet.task_summaries[0])
|
|
if seed:
|
|
return seed
|
|
return f"generated-skill-{uuid4().hex[:8]}"
|
|
|
|
@staticmethod
|
|
def _slugify_skill_name(value: str) -> str:
|
|
seed = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
|
|
seed = re.sub(r"-+", "-", seed)
|
|
if not seed or seed.isdigit() or len(seed) < 3:
|
|
return ""
|
|
words = [part for part in seed.split("-") if part and not part.isdigit()]
|
|
seed = "-".join(words) or seed
|
|
return seed[:48].strip("-")
|
|
|
|
@staticmethod
|
|
def _parse_timestamp(value: str) -> datetime | None:
|
|
try:
|
|
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
except ValueError:
|
|
return None
|
|
if parsed.tzinfo is None:
|
|
return parsed.replace(tzinfo=timezone.utc)
|
|
return parsed.astimezone(timezone.utc)
|