feat(beaver): 完成Task Team功能v1实现,重构后端架构支持统一内核
新增内部Task系统,包括验证、反馈门控机制,实现自动质量验证 (通过率>=0.75)和用户反馈闭环(satisfied/revise/abandon)。 实现Agent Team v1协调器,支持sequence/parallel/dag执行策略, sub-agent复用主AgentLoop,每个run使用独立memory snapshot。 建立Skill学习pipeline,包含draft/审核/发布/回滚完整生命周期, 通过Task验证通过且用户满意才生成学习候选。 重构目录结构,移除third_party依赖,建立统一engine内核, 所有agent共享运行时基础组件。 更新ContextBuilder清理provider消息字段,增强SkillContext版本管理, 集成TaskExecutionPlanner和TaskSkillResolver实现技能解析机制。
This commit is contained in:
289
app-instance/backend/beaver/memory/skills/models.py
Normal file
289
app-instance/backend/beaver/memory/skills/models.py
Normal file
@ -0,0 +1,289 @@
|
||||
"""Aggregated skill learning models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
LEARNING_CANDIDATE_STATUSES = {
|
||||
"open",
|
||||
"queued",
|
||||
"synthesizing",
|
||||
"draft_ready",
|
||||
"safety_failed",
|
||||
"eval_failed",
|
||||
"review_pending",
|
||||
"approved",
|
||||
"rejected",
|
||||
"published",
|
||||
"failed",
|
||||
"superseded",
|
||||
}
|
||||
|
||||
RISK_LEVELS = {"low", "medium", "high", "critical"}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillPerformanceSnapshot:
|
||||
skill_name: str
|
||||
skill_version: str
|
||||
activation_count: int
|
||||
success_count: int
|
||||
failure_count: int
|
||||
latest_used_at: str
|
||||
last_feedback_score: float | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
"skill_version": self.skill_version,
|
||||
"activation_count": self.activation_count,
|
||||
"success_count": self.success_count,
|
||||
"failure_count": self.failure_count,
|
||||
"latest_used_at": self.latest_used_at,
|
||||
"last_feedback_score": self.last_feedback_score,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
|
||||
value = payload.get("last_feedback_score")
|
||||
return cls(
|
||||
skill_name=str(payload["skill_name"]),
|
||||
skill_version=str(payload["skill_version"]),
|
||||
activation_count=int(payload.get("activation_count", 0) or 0),
|
||||
success_count=int(payload.get("success_count", 0) or 0),
|
||||
failure_count=int(payload.get("failure_count", 0) or 0),
|
||||
latest_used_at=str(payload.get("latest_used_at") or ""),
|
||||
last_feedback_score=None if value in (None, "") else float(value),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningCandidate:
|
||||
candidate_id: str
|
||||
kind: str
|
||||
source_run_ids: list[str]
|
||||
source_session_ids: list[str]
|
||||
related_skill_names: list[str]
|
||||
reason: str
|
||||
evidence: dict[str, Any] = field(default_factory=dict)
|
||||
status: str = "open"
|
||||
priority: int = 0
|
||||
confidence: float = 0.0
|
||||
risk_level: str = "medium"
|
||||
owner: str | None = None
|
||||
retry_count: int = 0
|
||||
last_error: str | None = None
|
||||
trigger_reason: str = ""
|
||||
evidence_summary: str = ""
|
||||
draft_skill_name: str | None = None
|
||||
draft_id: str | None = None
|
||||
safety_report_id: str | None = None
|
||||
eval_report_id: str | None = None
|
||||
created_at: str = ""
|
||||
updated_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"candidate_id": self.candidate_id,
|
||||
"kind": self.kind,
|
||||
"source_run_ids": list(self.source_run_ids),
|
||||
"source_session_ids": list(self.source_session_ids),
|
||||
"related_skill_names": list(self.related_skill_names),
|
||||
"reason": self.reason,
|
||||
"evidence": dict(self.evidence),
|
||||
"status": self.status,
|
||||
"priority": self.priority,
|
||||
"confidence": self.confidence,
|
||||
"risk_level": self.risk_level,
|
||||
"owner": self.owner,
|
||||
"retry_count": self.retry_count,
|
||||
"last_error": self.last_error,
|
||||
"trigger_reason": self.trigger_reason,
|
||||
"evidence_summary": self.evidence_summary,
|
||||
"draft_skill_name": self.draft_skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"safety_report_id": self.safety_report_id,
|
||||
"eval_report_id": self.eval_report_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
|
||||
now = _utc_now()
|
||||
status = str(payload.get("status") or "open")
|
||||
risk_level = str(payload.get("risk_level") or "medium")
|
||||
return cls(
|
||||
candidate_id=str(payload["candidate_id"]),
|
||||
kind=str(payload.get("kind") or "revise_skill"),
|
||||
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
|
||||
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
|
||||
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
|
||||
reason=str(payload.get("reason") or ""),
|
||||
evidence=dict(payload.get("evidence") or {}),
|
||||
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
confidence=float(payload.get("confidence", 0.0) or 0.0),
|
||||
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
||||
owner=_optional_str(payload.get("owner")),
|
||||
retry_count=int(payload.get("retry_count", 0) or 0),
|
||||
last_error=_optional_str(payload.get("last_error")),
|
||||
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
|
||||
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
|
||||
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
|
||||
draft_id=_optional_str(payload.get("draft_id")),
|
||||
safety_report_id=_optional_str(payload.get("safety_report_id")),
|
||||
eval_report_id=_optional_str(payload.get("eval_report_id")),
|
||||
created_at=str(payload.get("created_at") or now),
|
||||
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillLearningAuditEvent:
|
||||
event_id: str
|
||||
candidate_id: str
|
||||
event_type: str
|
||||
created_at: str
|
||||
payload: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"event_id": self.event_id,
|
||||
"candidate_id": self.candidate_id,
|
||||
"event_type": self.event_type,
|
||||
"created_at": self.created_at,
|
||||
"payload": dict(self.payload),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
|
||||
return cls(
|
||||
event_id=str(payload["event_id"]),
|
||||
candidate_id=str(payload["candidate_id"]),
|
||||
event_type=str(payload.get("event_type") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
payload=dict(payload.get("payload") or {}),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillDraftSafetyReport:
|
||||
report_id: str
|
||||
skill_name: str
|
||||
draft_id: str
|
||||
passed: bool
|
||||
risk_level: str
|
||||
issues: list[str] = field(default_factory=list)
|
||||
blocked_reasons: list[str] = field(default_factory=list)
|
||||
suggested_fix: str = ""
|
||||
created_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"report_id": self.report_id,
|
||||
"skill_name": self.skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"passed": self.passed,
|
||||
"risk_level": self.risk_level,
|
||||
"issues": list(self.issues),
|
||||
"blocked_reasons": list(self.blocked_reasons),
|
||||
"suggested_fix": self.suggested_fix,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
|
||||
risk_level = str(payload.get("risk_level") or "medium")
|
||||
return cls(
|
||||
report_id=str(payload["report_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
draft_id=str(payload["draft_id"]),
|
||||
passed=bool(payload.get("passed")),
|
||||
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
|
||||
suggested_fix=str(payload.get("suggested_fix") or ""),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SkillDraftEvalReport:
|
||||
report_id: str
|
||||
skill_name: str
|
||||
draft_id: str
|
||||
candidate_id: str
|
||||
passed: bool
|
||||
baseline_score_avg: float
|
||||
candidate_score_avg: float
|
||||
score_delta: float
|
||||
regression_count: int
|
||||
improved_count: int
|
||||
unchanged_count: int
|
||||
cases: list[dict[str, Any]] = field(default_factory=list)
|
||||
status: str = "completed"
|
||||
created_at: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"report_id": self.report_id,
|
||||
"skill_name": self.skill_name,
|
||||
"draft_id": self.draft_id,
|
||||
"candidate_id": self.candidate_id,
|
||||
"passed": self.passed,
|
||||
"baseline_score_avg": self.baseline_score_avg,
|
||||
"candidate_score_avg": self.candidate_score_avg,
|
||||
"score_delta": self.score_delta,
|
||||
"regression_count": self.regression_count,
|
||||
"improved_count": self.improved_count,
|
||||
"unchanged_count": self.unchanged_count,
|
||||
"cases": [dict(item) for item in self.cases],
|
||||
"status": self.status,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
|
||||
return cls(
|
||||
report_id=str(payload["report_id"]),
|
||||
skill_name=str(payload["skill_name"]),
|
||||
draft_id=str(payload["draft_id"]),
|
||||
candidate_id=str(payload.get("candidate_id") or ""),
|
||||
passed=bool(payload.get("passed")),
|
||||
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
|
||||
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
|
||||
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
|
||||
regression_count=int(payload.get("regression_count", 0) or 0),
|
||||
improved_count=int(payload.get("improved_count", 0) or 0),
|
||||
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
|
||||
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
|
||||
status=str(payload.get("status") or "completed"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
)
|
||||
|
||||
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
|
||||
def _summarize_evidence(payload: dict[str, Any]) -> str:
|
||||
evidence = payload.get("evidence")
|
||||
if isinstance(evidence, dict):
|
||||
theme = evidence.get("theme")
|
||||
if theme:
|
||||
return f"Theme: {theme}"
|
||||
skill_version = evidence.get("skill_version")
|
||||
if skill_version:
|
||||
return f"Skill version: {skill_version}"
|
||||
source_run_ids = payload.get("source_run_ids") or []
|
||||
return f"{len(source_run_ids)} source run(s)"
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
Reference in New Issue
Block a user