feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -1,2 +1,19 @@
"""Memory related to skill evolution."""
from .models import (
SkillDraftEvalReport,
SkillDraftSafetyReport,
SkillLearningAuditEvent,
SkillLearningCandidate,
SkillPerformanceSnapshot,
)
from .store import SkillLearningStore
__all__ = [
"SkillDraftEvalReport",
"SkillDraftSafetyReport",
"SkillLearningAuditEvent",
"SkillLearningCandidate",
"SkillLearningStore",
"SkillPerformanceSnapshot",
]

View File

@ -0,0 +1,289 @@
"""Aggregated skill learning models."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
LEARNING_CANDIDATE_STATUSES = {
"open",
"queued",
"synthesizing",
"draft_ready",
"safety_failed",
"eval_failed",
"review_pending",
"approved",
"rejected",
"published",
"failed",
"superseded",
}
RISK_LEVELS = {"low", "medium", "high", "critical"}
@dataclass(slots=True)
class SkillPerformanceSnapshot:
skill_name: str
skill_version: str
activation_count: int
success_count: int
failure_count: int
latest_used_at: str
last_feedback_score: float | None = None
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"activation_count": self.activation_count,
"success_count": self.success_count,
"failure_count": self.failure_count,
"latest_used_at": self.latest_used_at,
"last_feedback_score": self.last_feedback_score,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
value = payload.get("last_feedback_score")
return cls(
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
activation_count=int(payload.get("activation_count", 0) or 0),
success_count=int(payload.get("success_count", 0) or 0),
failure_count=int(payload.get("failure_count", 0) or 0),
latest_used_at=str(payload.get("latest_used_at") or ""),
last_feedback_score=None if value in (None, "") else float(value),
)
@dataclass(slots=True)
class SkillLearningCandidate:
candidate_id: str
kind: str
source_run_ids: list[str]
source_session_ids: list[str]
related_skill_names: list[str]
reason: str
evidence: dict[str, Any] = field(default_factory=dict)
status: str = "open"
priority: int = 0
confidence: float = 0.0
risk_level: str = "medium"
owner: str | None = None
retry_count: int = 0
last_error: str | None = None
trigger_reason: str = ""
evidence_summary: str = ""
draft_skill_name: str | None = None
draft_id: str | None = None
safety_report_id: str | None = None
eval_report_id: str | None = None
created_at: str = ""
updated_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"candidate_id": self.candidate_id,
"kind": self.kind,
"source_run_ids": list(self.source_run_ids),
"source_session_ids": list(self.source_session_ids),
"related_skill_names": list(self.related_skill_names),
"reason": self.reason,
"evidence": dict(self.evidence),
"status": self.status,
"priority": self.priority,
"confidence": self.confidence,
"risk_level": self.risk_level,
"owner": self.owner,
"retry_count": self.retry_count,
"last_error": self.last_error,
"trigger_reason": self.trigger_reason,
"evidence_summary": self.evidence_summary,
"draft_skill_name": self.draft_skill_name,
"draft_id": self.draft_id,
"safety_report_id": self.safety_report_id,
"eval_report_id": self.eval_report_id,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
now = _utc_now()
status = str(payload.get("status") or "open")
risk_level = str(payload.get("risk_level") or "medium")
return cls(
candidate_id=str(payload["candidate_id"]),
kind=str(payload.get("kind") or "revise_skill"),
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
reason=str(payload.get("reason") or ""),
evidence=dict(payload.get("evidence") or {}),
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
priority=int(payload.get("priority", 0) or 0),
confidence=float(payload.get("confidence", 0.0) or 0.0),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
owner=_optional_str(payload.get("owner")),
retry_count=int(payload.get("retry_count", 0) or 0),
last_error=_optional_str(payload.get("last_error")),
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
draft_id=_optional_str(payload.get("draft_id")),
safety_report_id=_optional_str(payload.get("safety_report_id")),
eval_report_id=_optional_str(payload.get("eval_report_id")),
created_at=str(payload.get("created_at") or now),
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
)
@dataclass(slots=True)
class SkillLearningAuditEvent:
event_id: str
candidate_id: str
event_type: str
created_at: str
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"candidate_id": self.candidate_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
return cls(
event_id=str(payload["event_id"]),
candidate_id=str(payload["candidate_id"]),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class SkillDraftSafetyReport:
report_id: str
skill_name: str
draft_id: str
passed: bool
risk_level: str
issues: list[str] = field(default_factory=list)
blocked_reasons: list[str] = field(default_factory=list)
suggested_fix: str = ""
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"passed": self.passed,
"risk_level": self.risk_level,
"issues": list(self.issues),
"blocked_reasons": list(self.blocked_reasons),
"suggested_fix": self.suggested_fix,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
risk_level = str(payload.get("risk_level") or "medium")
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
passed=bool(payload.get("passed")),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
issues=[str(item) for item in payload.get("issues") or []],
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
suggested_fix=str(payload.get("suggested_fix") or ""),
created_at=str(payload.get("created_at") or ""),
)
@dataclass(slots=True)
class SkillDraftEvalReport:
report_id: str
skill_name: str
draft_id: str
candidate_id: str
passed: bool
baseline_score_avg: float
candidate_score_avg: float
score_delta: float
regression_count: int
improved_count: int
unchanged_count: int
cases: list[dict[str, Any]] = field(default_factory=list)
status: str = "completed"
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"candidate_id": self.candidate_id,
"passed": self.passed,
"baseline_score_avg": self.baseline_score_avg,
"candidate_score_avg": self.candidate_score_avg,
"score_delta": self.score_delta,
"regression_count": self.regression_count,
"improved_count": self.improved_count,
"unchanged_count": self.unchanged_count,
"cases": [dict(item) for item in self.cases],
"status": self.status,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
candidate_id=str(payload.get("candidate_id") or ""),
passed=bool(payload.get("passed")),
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
regression_count=int(payload.get("regression_count", 0) or 0),
improved_count=int(payload.get("improved_count", 0) or 0),
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
status=str(payload.get("status") or "completed"),
created_at=str(payload.get("created_at") or ""),
)
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _summarize_evidence(payload: dict[str, Any]) -> str:
evidence = payload.get("evidence")
if isinstance(evidence, dict):
theme = evidence.get("theme")
if theme:
return f"Theme: {theme}"
skill_version = evidence.get("skill_version")
if skill_version:
return f"Skill version: {skill_version}"
source_run_ids = payload.get("source_run_ids") or []
return f"{len(source_run_ids)} source run(s)"
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()

View File

@ -0,0 +1,216 @@
"""File-backed skill learning store."""
from __future__ import annotations
import json
from pathlib import Path
from uuid import uuid4
from .models import (
SkillDraftEvalReport,
SkillDraftSafetyReport,
SkillLearningAuditEvent,
SkillLearningCandidate,
SkillPerformanceSnapshot,
)
class SkillLearningStore:
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self.performance_path = self.root / "performance.jsonl"
self.candidates_path = self.root / "learning-candidates.jsonl"
self.audit_path = self.root / "learning-audit.jsonl"
self.safety_reports_dir = self.root / "safety-reports"
self.eval_reports_dir = self.root / "eval-reports"
def record_learning_candidate(self, candidate: SkillLearningCandidate) -> None:
normalized = SkillLearningCandidate.from_dict(candidate.to_dict())
self._append_jsonl(self.candidates_path, normalized.to_dict())
self.append_audit_event(
normalized.candidate_id,
"candidate_created",
{
"kind": normalized.kind,
"status": normalized.status,
"reason": normalized.reason,
},
)
def update_learning_candidate(self, candidate_id: str, **updates: object) -> SkillLearningCandidate | None:
candidates = self.list_learning_candidates()
updated: SkillLearningCandidate | None = None
for index, candidate in enumerate(candidates):
if candidate.candidate_id != candidate_id:
continue
payload = candidate.to_dict()
payload.update(updates)
if "updated_at" not in updates:
payload["updated_at"] = _utc_now()
updated = SkillLearningCandidate.from_dict(payload)
candidates[index] = updated
break
if updated is None:
return None
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
self.candidates_path.write_text(
"".join(
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for candidate in candidates
),
encoding="utf-8",
)
return updated
def transition_learning_candidate(
self,
candidate_id: str,
status: str,
*,
event_type: str | None = None,
payload: dict | None = None,
**updates: object,
) -> SkillLearningCandidate | None:
updated = self.update_learning_candidate(candidate_id, status=status, **updates)
if updated is not None:
self.append_audit_event(
candidate_id,
event_type or f"candidate_{status}",
{"status": status, **dict(payload or {})},
)
return updated
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
results: list[SkillLearningCandidate] = []
for payload in self._read_jsonl(self.candidates_path):
candidate = SkillLearningCandidate.from_dict(payload)
if status is not None and candidate.status != status:
continue
results.append(candidate)
return results
def update_performance_snapshot(self, snapshot: SkillPerformanceSnapshot) -> None:
snapshots = self.list_performance_snapshots()
filtered = [
item
for item in snapshots
if not (item.skill_name == snapshot.skill_name and item.skill_version == snapshot.skill_version)
]
filtered.append(snapshot)
self.performance_path.write_text(
"".join(json.dumps(item.to_dict(), ensure_ascii=False, sort_keys=True) + "\n" for item in filtered),
encoding="utf-8",
)
def list_performance_snapshots(self) -> list[SkillPerformanceSnapshot]:
return [SkillPerformanceSnapshot.from_dict(item) for item in self._read_jsonl(self.performance_path)]
def list_low_performing_versions(self, *, minimum_activations: int = 2, success_ratio_threshold: float = 0.5) -> list[SkillPerformanceSnapshot]:
results: list[SkillPerformanceSnapshot] = []
for snapshot in self.list_performance_snapshots():
if snapshot.activation_count < minimum_activations:
continue
if snapshot.activation_count == 0:
continue
ratio = snapshot.success_count / snapshot.activation_count
if ratio <= success_ratio_threshold:
results.append(snapshot)
return results
def list_merge_candidates(self) -> list[SkillLearningCandidate]:
return [item for item in self.list_learning_candidates(status="open") if item.kind == "merge_skills"]
def append_audit_event(self, candidate_id: str, event_type: str, payload: dict | None = None) -> SkillLearningAuditEvent:
event = SkillLearningAuditEvent(
event_id=uuid4().hex,
candidate_id=candidate_id,
event_type=event_type,
created_at=_utc_now(),
payload=dict(payload or {}),
)
self._append_jsonl(self.audit_path, event.to_dict())
return event
def list_audit_events(self, candidate_id: str | None = None) -> list[SkillLearningAuditEvent]:
events = [SkillLearningAuditEvent.from_dict(item) for item in self._read_jsonl(self.audit_path)]
if candidate_id is None:
return events
return [event for event in events if event.candidate_id == candidate_id]
def write_safety_report(self, report: SkillDraftSafetyReport) -> None:
path = self._report_path(self.safety_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_safety_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftSafetyReport | None:
reports = self.list_safety_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_safety_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftSafetyReport]:
root = self.safety_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftSafetyReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
def write_eval_report(self, report: SkillDraftEvalReport) -> None:
path = self._report_path(self.eval_reports_dir, report.skill_name, report.draft_id, report.report_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True) + "\n", encoding="utf-8")
def get_eval_report(self, skill_name: str, draft_id: str, report_id: str | None = None) -> SkillDraftEvalReport | None:
reports = self.list_eval_reports(skill_name, draft_id)
if report_id is not None:
return next((item for item in reports if item.report_id == report_id), None)
return reports[-1] if reports else None
def list_eval_reports(self, skill_name: str, draft_id: str) -> list[SkillDraftEvalReport]:
root = self.eval_reports_dir / skill_name / draft_id
if not root.exists():
return []
return [
SkillDraftEvalReport.from_dict(self._read_json(path))
for path in sorted(root.glob("report-*.json"))
]
@staticmethod
def _report_path(root: Path, skill_name: str, draft_id: str, report_id: str) -> Path:
return root / skill_name / draft_id / f"report-{report_id}.json"
@staticmethod
def _append_jsonl(path: Path, payload: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, ensure_ascii=False, sort_keys=True) + "\n")
@staticmethod
def _read_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
results: list[dict] = []
for line in path.read_text(encoding="utf-8").splitlines():
cleaned = line.strip()
if not cleaned:
continue
payload = json.loads(cleaned)
if isinstance(payload, dict):
results.append(payload)
return results
@staticmethod
def _read_json(path: Path) -> dict:
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"Expected JSON object in {path}")
return payload
def _utc_now() -> str:
from datetime import datetime, timezone
return datetime.now(timezone.utc).isoformat()