Files
beaver_project/app-instance/backend/beaver/memory/skills/store.py
steven_li 83d9d8c200 ```
feat(learning): 添加技能学习候选者合成锁定机制

添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景,
确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis
方法来原子性地锁定候选者进行合成。

fix(web): 为技能草案创建端点添加适当的HTTP状态码

当草案没有变化或正在合成时,现在正确返回409状态码而不是内部错误。

feat(skills): 实现技能修订内容比较以检测无变化情况

添加了 _is_noop_revision 方法来比较基础技能和提议的修订,
如果内容没有实际变化则抛出 NoDraftChanges 异常。

refactor(process): 修复任务证据记录后根运行状态更新逻辑

将任务证据记录事件后的状态从 waiting 更改为 done,并设置 finished_at 时间戳。

feat(tools): 防止在同一运行中重复执行外部写入操作

为邮件发送、日历创建等外部写入工具添加去重机制,避免重复的外部操作。

test: 添加技能学习和工具执行的单元测试

增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。
```
2026-06-16 15:58:42 +08:00

305 lines
12 KiB
Python

"""File-backed skill learning store."""
from __future__ import annotations
import json
from pathlib import Path
import threading
from uuid import uuid4
from contextlib import contextmanager
from typing import Iterator
from beaver.foundation.utils.file_lock import WorkspaceWriteLock
from .models import (
SkillDraftEvalReport,
SkillDraftSafetyReport,
SkillLearningAuditEvent,
SkillLearningCandidate,
SkillPerformanceSnapshot,
)
class SkillLearningStore:
def __init__(self, root: str | Path, *, write_lock: WorkspaceWriteLock | None = None) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.write_lock = write_lock
self._local_lock = threading.RLock()
self.performance_path = self.root / "performance.jsonl"
self.candidates_path = self.root / "learning-candidates.jsonl"
self.audit_path = self.root / "learning-audit.jsonl"
self.safety_reports_dir = self.root / "safety-reports"
self.eval_reports_dir = self.root / "eval-reports"
def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None:
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
self._append_jsonl(self.candidates_path, normalized.to_dict())
self.append_audit_event(
normalized.candidate_id,
"candidate_created",
{
"kind": normalized.kind,
"status": normalized.status,
"reason": normalized.reason,
},
)
def record_learning_candidate_if_absent(
self,
candidate: SkillLearningCandidate,
) -> tuple[SkillLearningCandidate, bool]:
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
with self._locked():
existing = {
item.candidate_id: item
for item in self.list_learning_candidates()
}
found = existing.get(normalized.candidate_id)
if found is not None:
return found, False
self._append_jsonl(self.candidates_path, normalized.to_dict())
self.append_audit_event(
normalized.candidate_id,
"candidate_created",
{
"kind": normalized.kind,
"status": normalized.status,
"reason": normalized.reason,
},
)
return normalized, True
def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None:
with self._locked():
candidates = self.list_learning_candidates()
updated: SkillLearningCandidate | None = None
for index, candidate in enumerate(candidates):
if candidate.candidate_id != candidate_id:
continue
payload = candidate.to_dict()
payload.update(updates)
if "updated_at" not in updates:
payload["updated_at"] = _utc_now()
updated = SkillLearningCandidate.from_dict(payload)
candidates[index] = updated
break
if updated is None:
return None
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
self.candidates_path.write_text(
"".join(
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for candidate in candidates
),
encoding="utf-8",
)
return updated
def transition_learning_candidate(
self,
candidate_id: str,
status: str,
*,
event_type: str | None = None,
payload: dict | None = None,
**updates: object,
) -> SkillLearningCandidate | None:
updated = self.update_learning_candidate(candidate_id, status=status, **updates)
if updated is not None:
self.append_audit_event(
candidate_id,
event_type or f"candidate_{status}",
{"status": status, **dict(payload or {})},
)
return updated
def claim_learning_candidate_for_synthesis(
self,
candidate_id: str,
*,
force: bool = False,
) -> SkillLearningCandidate | None:
"""Atomically claim a candidate before the expensive draft synthesis step."""
with self._locked():
candidates = self.list_learning_candidates()
claimed: SkillLearningCandidate | None = None
for index, candidate in enumerate(candidates):
if candidate.candidate_id != candidate_id:
continue
if candidate.status in {"queued", "synthesizing"}:
return None
if not force and candidate.draft_skill_name and candidate.draft_id:
return None
payload = candidate.to_dict()
payload.update(
{
"status": "synthesizing",
"last_error": None,
"updated_at": _utc_now(),
}
)
claimed = SkillLearningCandidate.from_dict(payload)
candidates[index] = claimed
break
if claimed is None:
return None
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
self.candidates_path.write_text(
"".join(
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for candidate in candidates
),
encoding="utf-8",
)
self.append_audit_event(
candidate_id,
"draft_synthesis_started",
{"status": "synthesizing", "force": force},
)
return claimed
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
results: list[SkillLearningCandidate] = []
for payload in self._read_jsonl(self.candidates_path):
candidate = SkillLearningCandidate.from_dict(payload)
if status is not None and candidate.status != status:
continue
results.append(candidate)
return results
def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None:
snapshots = self.list_performance_snapshots()
filtered = [
item
for item in snapshots
if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version)
]
filtered.append(snapshot)
self.performance_path.write_text(
"".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered),
encoding="utf-8",
)
def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]:
return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)]
def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]:
results: list[SkillPerformanceSnapshot] = []
for snapshot in self.list_performance_snapshots():
if snapshot.activation_count < minimum_activations:
continue
if snapshot.activation_count == 0:
continue
ratio = snapshot.success_count / snapshot.activation_count
if ratio <= success_ratio_threshold:
results.append(snapshot)
return results
def list_merge_candidates(self) -> list[SkillLearningCandidate]:
return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"]
def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent:
event = SkillLearningAuditEvent(
event_id=uuid4().hex,
candidate_id=candidate_id,
event_type=event_type,
created_at=_utc_now(),
payload=dict(payload or {}),
)
self._append_jsonl(self.audit_path, event.to_dict())
return event
def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]:
events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)]
if candidate_id is None:
return events
return [event for event in events if event.candidate_id == candidate_id]
def write_safety_report(self, report: SkillDraftSafetyReport) -> None:
path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None:
reports = self.list_safety_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]:
root = self.safety_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftSafetyReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
def write_eval_report(self, report: SkillDraftEvalReport) -> None:
path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None:
reports = self.list_eval_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]:
root = self.eval_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftEvalReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
@staticmethod
def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path:
return root / skill_name / draft_id / f"report-{report_id}.json"
@staticmethod
def _append_jsonl(path: Path, payload: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
@staticmethod
def _read_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
results: list[dict] = []
for line in path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if isinstance(payload, dict):
results.append(payload)
return results
@staticmethod
def _read_json(path: Path) -> dict:
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"Expected JSON object in {path}")
return payload
@contextmanager
def _locked(self) -> Iterator[None]:
if self.write_lock is not None:
with self.write_lock.acquire(timeout_seconds=10):
yield
return
with self._local_lock:
yield
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()