feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核

新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证
(通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。

实现Agent Team v1协调器,支持sequence/parallel/dag执行策略,
sub-agent复用主AgentLoop,每个run使用独立memory snapshot。

建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期,
通过Task验证通过且用户满意才生成学习候选。

重构目录结构,移除third_party依赖,建立统一engine内核,
所有agent共享运行时基础组件。

更新ContextBuilder清理provider消息字段,增强SkillContext版本管理,
集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
2026-05-08 17:14:14 +08:00
parent 5ba5c7e4c1
commit 8a12c30141
93 changed files with 16724 additions and 1247 deletions

View File

@ -0,0 +1,289 @@
"""Aggregated skill learning models."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
LEARNING_CANDIDATE_STATUSES = {
"open",
"queued",
"synthesizing",
"draft_ready",
"safety_failed",
"eval_failed",
"review_pending",
"approved",
"rejected",
"published",
"failed",
"superseded",
}
RISK_LEVELS = {"low", "medium", "high", "critical"}
@dataclass(slots=True)
class SkillPerformanceSnapshot:
skill_name: str
skill_version: str
activation_count: int
success_count: int
failure_count: int
latest_used_at: str
last_feedback_score: float | None = None
def to_dict(self) -> dict[str, Any]:
return {
"skill_name": self.skill_name,
"skill_version": self.skill_version,
"activation_count": self.activation_count,
"success_count": self.success_count,
"failure_count": self.failure_count,
"latest_used_at": self.latest_used_at,
"last_feedback_score": self.last_feedback_score,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
value = payload.get("last_feedback_score")
return cls(
skill_name=str(payload["skill_name"]),
skill_version=str(payload["skill_version"]),
activation_count=int(payload.get("activation_count", 0) or 0),
success_count=int(payload.get("success_count", 0) or 0),
failure_count=int(payload.get("failure_count", 0) or 0),
latest_used_at=str(payload.get("latest_used_at") or ""),
last_feedback_score=None if value in (None, "") else float(value),
)
@dataclass(slots=True)
class SkillLearningCandidate:
candidate_id: str
kind: str
source_run_ids: list[str]
source_session_ids: list[str]
related_skill_names: list[str]
reason: str
evidence: dict[str, Any] = field(default_factory=dict)
status: str = "open"
priority: int = 0
confidence: float = 0.0
risk_level: str = "medium"
owner: str | None = None
retry_count: int = 0
last_error: str | None = None
trigger_reason: str = ""
evidence_summary: str = ""
draft_skill_name: str | None = None
draft_id: str | None = None
safety_report_id: str | None = None
eval_report_id: str | None = None
created_at: str = ""
updated_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"candidate_id": self.candidate_id,
"kind": self.kind,
"source_run_ids": list(self.source_run_ids),
"source_session_ids": list(self.source_session_ids),
"related_skill_names": list(self.related_skill_names),
"reason": self.reason,
"evidence": dict(self.evidence),
"status": self.status,
"priority": self.priority,
"confidence": self.confidence,
"risk_level": self.risk_level,
"owner": self.owner,
"retry_count": self.retry_count,
"last_error": self.last_error,
"trigger_reason": self.trigger_reason,
"evidence_summary": self.evidence_summary,
"draft_skill_name": self.draft_skill_name,
"draft_id": self.draft_id,
"safety_report_id": self.safety_report_id,
"eval_report_id": self.eval_report_id,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
now = _utc_now()
status = str(payload.get("status") or "open")
risk_level = str(payload.get("risk_level") or "medium")
return cls(
candidate_id=str(payload["candidate_id"]),
kind=str(payload.get("kind") or "revise_skill"),
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
reason=str(payload.get("reason") or ""),
evidence=dict(payload.get("evidence") or {}),
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
priority=int(payload.get("priority", 0) or 0),
confidence=float(payload.get("confidence", 0.0) or 0.0),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
owner=_optional_str(payload.get("owner")),
retry_count=int(payload.get("retry_count", 0) or 0),
last_error=_optional_str(payload.get("last_error")),
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
draft_id=_optional_str(payload.get("draft_id")),
safety_report_id=_optional_str(payload.get("safety_report_id")),
eval_report_id=_optional_str(payload.get("eval_report_id")),
created_at=str(payload.get("created_at") or now),
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
)
@dataclass(slots=True)
class SkillLearningAuditEvent:
event_id: str
candidate_id: str
event_type: str
created_at: str
payload: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"event_id": self.event_id,
"candidate_id": self.candidate_id,
"event_type": self.event_type,
"created_at": self.created_at,
"payload": dict(self.payload),
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
return cls(
event_id=str(payload["event_id"]),
candidate_id=str(payload["candidate_id"]),
event_type=str(payload.get("event_type") or ""),
created_at=str(payload.get("created_at") or ""),
payload=dict(payload.get("payload") or {}),
)
@dataclass(slots=True)
class SkillDraftSafetyReport:
report_id: str
skill_name: str
draft_id: str
passed: bool
risk_level: str
issues: list[str] = field(default_factory=list)
blocked_reasons: list[str] = field(default_factory=list)
suggested_fix: str = ""
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"passed": self.passed,
"risk_level": self.risk_level,
"issues": list(self.issues),
"blocked_reasons": list(self.blocked_reasons),
"suggested_fix": self.suggested_fix,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
risk_level = str(payload.get("risk_level") or "medium")
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
passed=bool(payload.get("passed")),
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
issues=[str(item) for item in payload.get("issues") or []],
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
suggested_fix=str(payload.get("suggested_fix") or ""),
created_at=str(payload.get("created_at") or ""),
)
@dataclass(slots=True)
class SkillDraftEvalReport:
report_id: str
skill_name: str
draft_id: str
candidate_id: str
passed: bool
baseline_score_avg: float
candidate_score_avg: float
score_delta: float
regression_count: int
improved_count: int
unchanged_count: int
cases: list[dict[str, Any]] = field(default_factory=list)
status: str = "completed"
created_at: str = ""
def to_dict(self) -> dict[str, Any]:
return {
"report_id": self.report_id,
"skill_name": self.skill_name,
"draft_id": self.draft_id,
"candidate_id": self.candidate_id,
"passed": self.passed,
"baseline_score_avg": self.baseline_score_avg,
"candidate_score_avg": self.candidate_score_avg,
"score_delta": self.score_delta,
"regression_count": self.regression_count,
"improved_count": self.improved_count,
"unchanged_count": self.unchanged_count,
"cases": [dict(item) for item in self.cases],
"status": self.status,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
return cls(
report_id=str(payload["report_id"]),
skill_name=str(payload["skill_name"]),
draft_id=str(payload["draft_id"]),
candidate_id=str(payload.get("candidate_id") or ""),
passed=bool(payload.get("passed")),
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
regression_count=int(payload.get("regression_count", 0) or 0),
improved_count=int(payload.get("improved_count", 0) or 0),
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
status=str(payload.get("status") or "completed"),
created_at=str(payload.get("created_at") or ""),
)
def _optional_str(value: Any) -> str | None:
if value in (None, ""):
return None
return str(value)
def _summarize_evidence(payload: dict[str, Any]) -> str:
evidence = payload.get("evidence")
if isinstance(evidence, dict):
theme = evidence.get("theme")
if theme:
return f"Theme: {theme}"
skill_version = evidence.get("skill_version")
if skill_version:
return f"Skill version: {skill_version}"
source_run_ids = payload.get("source_run_ids") or []
return f"{len(source_run_ids)} source run(s)"
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()