feat(engine): 优化智能体循环中的助手消息处理逻辑 - 在没有工具调用时才添加助手消息到上下文 - 确保工具调用响应正确添加到消息上下文中 - 修复了消息构建的条件逻辑 fix(cron): 改进定时任务调度的时间解析功能 - 添加正则表达式导入用于时间显示解析 - 实现从显示文本中提取毫秒间隔的功能 - 增强整数转换的安全性,避免类型错误 - 优化定时任务配置的解析逻辑 feat(outlook): 增强Outlook集成的功能和稳定性 - 将默认超时时间从10秒增加到180秒 - 为状态检查函数添加可选的验证参数 - 串行执行邮件概览获取操作而非并行 - 改进连接状态验证逻辑 feat(channel): 添加设备名称作为会话标识的选项 - 为终端WebSocket适配器添加新的配置选项 - 实现基于设备名称生成会话对等ID的功能 - 记录原始对等ID和设备名称的元数据 - 支持从设备名称创建会话对等ID feat(skills): 完善技能学习评估系统和进度跟踪 - 在应用启动时自动调度待评估的技能草稿 - 为技能评估工作创建独立的循环工厂 - 实现异步技能评估任务的取消和清理机制 - 添加技能评估进度报告和状态跟踪功能 - 扩展会话列表API以包含更多详细信息 - 防止对不存在的会话进行操作 - 优化技能草稿提交和评估的业务逻辑 perf(skills): 提升技能评估的并发性能 - 实现并行技能案例评估以提高效率 - 添加最大并行案例数的环境变量控制 - 实现实时评估进度更新和回调机制 - 优化评估过程中的资源管理和同步 refactor(services): 创建隔离的智能体循环实例 - 添加创建独立智能体循环的工厂方法 - 确保新循环继承运行时服务配置 - 支持技能评估等需要隔离环境的场景 ```
363 lines
14 KiB
Python
363 lines
14 KiB
Python
"""Aggregated skill learning models."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
|
|
LEARNING_CANDIDATE_STATUSES = {
|
|
"open",
|
|
"queued",
|
|
"synthesizing",
|
|
"draft_ready",
|
|
"safety_failed",
|
|
"eval_failed",
|
|
"review_pending",
|
|
"approved",
|
|
"rejected",
|
|
"published",
|
|
"failed",
|
|
"superseded",
|
|
}
|
|
|
|
RISK_LEVELS = {"low", "medium", "high", "critical"}
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillPerformanceSnapshot:
|
|
skill_name: str
|
|
skill_version: str
|
|
activation_count: int
|
|
success_count: int
|
|
failure_count: int
|
|
latest_used_at: str
|
|
last_feedback_score: float | None = None
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"skill_name": self.skill_name,
|
|
"skill_version": self.skill_version,
|
|
"activation_count": self.activation_count,
|
|
"success_count": self.success_count,
|
|
"failure_count": self.failure_count,
|
|
"latest_used_at": self.latest_used_at,
|
|
"last_feedback_score": self.last_feedback_score,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, payload: dict[str, Any]) -> "SkillPerformanceSnapshot":
|
|
value = payload.get("last_feedback_score")
|
|
return cls(
|
|
skill_name=str(payload["skill_name"]),
|
|
skill_version=str(payload["skill_version"]),
|
|
activation_count=int(payload.get("activation_count", 0) or 0),
|
|
success_count=int(payload.get("success_count", 0) or 0),
|
|
failure_count=int(payload.get("failure_count", 0) or 0),
|
|
latest_used_at=str(payload.get("latest_used_at") or ""),
|
|
last_feedback_score=None if value in (None, "") else float(value),
|
|
)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillLearningCandidate:
|
|
candidate_id: str
|
|
kind: str
|
|
source_run_ids: list[str]
|
|
source_session_ids: list[str]
|
|
related_skill_names: list[str]
|
|
reason: str
|
|
evidence: dict[str, Any] = field(default_factory=dict)
|
|
status: str = "open"
|
|
priority: int = 0
|
|
confidence: float = 0.0
|
|
risk_level: str = "medium"
|
|
owner: str | None = None
|
|
retry_count: int = 0
|
|
last_error: str | None = None
|
|
trigger_reason: str = ""
|
|
evidence_summary: str = ""
|
|
draft_skill_name: str | None = None
|
|
draft_id: str | None = None
|
|
safety_report_id: str | None = None
|
|
eval_report_id: str | None = None
|
|
eval_progress: dict[str, Any] = field(default_factory=dict)
|
|
created_at: str = ""
|
|
updated_at: str = ""
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"candidate_id": self.candidate_id,
|
|
"kind": self.kind,
|
|
"source_run_ids": list(self.source_run_ids),
|
|
"source_session_ids": list(self.source_session_ids),
|
|
"related_skill_names": list(self.related_skill_names),
|
|
"reason": self.reason,
|
|
"evidence": dict(self.evidence),
|
|
"status": self.status,
|
|
"priority": self.priority,
|
|
"confidence": self.confidence,
|
|
"risk_level": self.risk_level,
|
|
"owner": self.owner,
|
|
"retry_count": self.retry_count,
|
|
"last_error": self.last_error,
|
|
"trigger_reason": self.trigger_reason,
|
|
"evidence_summary": self.evidence_summary,
|
|
"draft_skill_name": self.draft_skill_name,
|
|
"draft_id": self.draft_id,
|
|
"safety_report_id": self.safety_report_id,
|
|
"eval_report_id": self.eval_report_id,
|
|
"eval_progress": dict(self.eval_progress),
|
|
"created_at": self.created_at,
|
|
"updated_at": self.updated_at,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningCandidate":
|
|
now = _utc_now()
|
|
status = str(payload.get("status") or "open")
|
|
risk_level = str(payload.get("risk_level") or "medium")
|
|
return cls(
|
|
candidate_id=str(payload["candidate_id"]),
|
|
kind=str(payload.get("kind") or "revise_skill"),
|
|
source_run_ids=[str(item) for item in payload.get("source_run_ids") or []],
|
|
source_session_ids=[str(item) for item in payload.get("source_session_ids") or []],
|
|
related_skill_names=[str(item) for item in payload.get("related_skill_names") or []],
|
|
reason=str(payload.get("reason") or ""),
|
|
evidence=dict(payload.get("evidence") or {}),
|
|
status=status if status in LEARNING_CANDIDATE_STATUSES else "open",
|
|
priority=int(payload.get("priority", 0) or 0),
|
|
confidence=float(payload.get("confidence", 0.0) or 0.0),
|
|
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
|
owner=_optional_str(payload.get("owner")),
|
|
retry_count=int(payload.get("retry_count", 0) or 0),
|
|
last_error=_optional_str(payload.get("last_error")),
|
|
trigger_reason=str(payload.get("trigger_reason") or payload.get("reason") or ""),
|
|
evidence_summary=str(payload.get("evidence_summary") or _summarize_evidence(payload)),
|
|
draft_skill_name=_optional_str(payload.get("draft_skill_name")),
|
|
draft_id=_optional_str(payload.get("draft_id")),
|
|
safety_report_id=_optional_str(payload.get("safety_report_id")),
|
|
eval_report_id=_optional_str(payload.get("eval_report_id")),
|
|
eval_progress=dict(payload.get("eval_progress") or {}),
|
|
created_at=str(payload.get("created_at") or now),
|
|
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
|
|
)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillLearningAuditEvent:
|
|
event_id: str
|
|
candidate_id: str
|
|
event_type: str
|
|
created_at: str
|
|
payload: dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"event_id": self.event_id,
|
|
"candidate_id": self.candidate_id,
|
|
"event_type": self.event_type,
|
|
"created_at": self.created_at,
|
|
"payload": dict(self.payload),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, payload: dict[str, Any]) -> "SkillLearningAuditEvent":
|
|
return cls(
|
|
event_id=str(payload["event_id"]),
|
|
candidate_id=str(payload["candidate_id"]),
|
|
event_type=str(payload.get("event_type") or ""),
|
|
created_at=str(payload.get("created_at") or ""),
|
|
payload=dict(payload.get("payload") or {}),
|
|
)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillDraftSafetyReport:
|
|
report_id: str
|
|
skill_name: str
|
|
draft_id: str
|
|
passed: bool
|
|
risk_level: str
|
|
issues: list[str] = field(default_factory=list)
|
|
blocked_reasons: list[str] = field(default_factory=list)
|
|
suggested_fix: str = ""
|
|
created_at: str = ""
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"report_id": self.report_id,
|
|
"skill_name": self.skill_name,
|
|
"draft_id": self.draft_id,
|
|
"passed": self.passed,
|
|
"risk_level": self.risk_level,
|
|
"issues": list(self.issues),
|
|
"blocked_reasons": list(self.blocked_reasons),
|
|
"suggested_fix": self.suggested_fix,
|
|
"created_at": self.created_at,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftSafetyReport":
|
|
risk_level = str(payload.get("risk_level") or "medium")
|
|
return cls(
|
|
report_id=str(payload["report_id"]),
|
|
skill_name=str(payload["skill_name"]),
|
|
draft_id=str(payload["draft_id"]),
|
|
passed=bool(payload.get("passed")),
|
|
risk_level=risk_level if risk_level in RISK_LEVELS else "medium",
|
|
issues=[str(item) for item in payload.get("issues") or []],
|
|
blocked_reasons=[str(item) for item in payload.get("blocked_reasons") or []],
|
|
suggested_fix=str(payload.get("suggested_fix") or ""),
|
|
created_at=str(payload.get("created_at") or ""),
|
|
)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class SkillDraftEvalReport:
|
|
report_id: str
|
|
skill_name: str
|
|
draft_id: str
|
|
candidate_id: str
|
|
passed: bool
|
|
baseline_score_avg: float
|
|
candidate_score_avg: float
|
|
score_delta: float
|
|
regression_count: int
|
|
improved_count: int
|
|
unchanged_count: int
|
|
cases: list[dict[str, Any]] = field(default_factory=list)
|
|
status: str = "completed"
|
|
created_at: str = ""
|
|
eval_version: str = "heuristic-v1"
|
|
mode: str = "heuristic"
|
|
execution_coverage: float = 0.0
|
|
surrogate_coverage: float = 0.0
|
|
blocked_coverage: float = 0.0
|
|
confidence: str = "low"
|
|
case_reports: list[dict[str, Any]] = field(default_factory=list)
|
|
tool_mode_summary: dict[str, Any] = field(default_factory=dict)
|
|
ability_score_summary: dict[str, Any] = field(default_factory=dict)
|
|
tool_execution_summary: dict[str, Any] = field(default_factory=dict)
|
|
case_selection_summary: dict[str, Any] = field(default_factory=dict)
|
|
real_score_avg: float | None = None
|
|
synthetic_score_avg: float | None = None
|
|
overall_score_avg: float | None = None
|
|
preservation_report: dict[str, Any] | None = None
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"report_id": self.report_id,
|
|
"skill_name": self.skill_name,
|
|
"draft_id": self.draft_id,
|
|
"candidate_id": self.candidate_id,
|
|
"passed": self.passed,
|
|
"baseline_score_avg": self.baseline_score_avg,
|
|
"candidate_score_avg": self.candidate_score_avg,
|
|
"score_delta": self.score_delta,
|
|
"regression_count": self.regression_count,
|
|
"improved_count": self.improved_count,
|
|
"unchanged_count": self.unchanged_count,
|
|
"cases": [dict(item) for item in self.cases],
|
|
"status": self.status,
|
|
"created_at": self.created_at,
|
|
"eval_version": self.eval_version,
|
|
"mode": self.mode,
|
|
"execution_coverage": self.execution_coverage,
|
|
"surrogate_coverage": self.surrogate_coverage,
|
|
"blocked_coverage": self.blocked_coverage,
|
|
"confidence": self.confidence,
|
|
"case_reports": [dict(item) for item in self.case_reports],
|
|
"tool_mode_summary": dict(self.tool_mode_summary),
|
|
"ability_score_summary": dict(self.ability_score_summary),
|
|
"tool_execution_summary": dict(self.tool_execution_summary),
|
|
"case_selection_summary": dict(self.case_selection_summary),
|
|
"real_score_avg": self.real_score_avg,
|
|
"synthetic_score_avg": self.synthetic_score_avg,
|
|
"overall_score_avg": self.overall_score_avg,
|
|
"preservation_report": (
|
|
dict(self.preservation_report) if self.preservation_report is not None else None
|
|
),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, payload: dict[str, Any]) -> "SkillDraftEvalReport":
|
|
return cls(
|
|
report_id=str(payload["report_id"]),
|
|
skill_name=str(payload["skill_name"]),
|
|
draft_id=str(payload["draft_id"]),
|
|
candidate_id=str(payload.get("candidate_id") or ""),
|
|
passed=bool(payload.get("passed")),
|
|
baseline_score_avg=float(payload.get("baseline_score_avg", 0.0) or 0.0),
|
|
candidate_score_avg=float(payload.get("candidate_score_avg", 0.0) or 0.0),
|
|
score_delta=float(payload.get("score_delta", 0.0) or 0.0),
|
|
regression_count=int(payload.get("regression_count", 0) or 0),
|
|
improved_count=int(payload.get("improved_count", 0) or 0),
|
|
unchanged_count=int(payload.get("unchanged_count", 0) or 0),
|
|
cases=[dict(item) for item in payload.get("cases") or [] if isinstance(item, dict)],
|
|
status=str(payload.get("status") or "completed"),
|
|
created_at=str(payload.get("created_at") or ""),
|
|
eval_version=str(payload.get("eval_version") or "heuristic-v1"),
|
|
mode=str(payload.get("mode") or "heuristic"),
|
|
execution_coverage=_bounded_float(payload.get("execution_coverage"), default=0.0),
|
|
surrogate_coverage=_bounded_float(payload.get("surrogate_coverage"), default=0.0),
|
|
blocked_coverage=_bounded_float(payload.get("blocked_coverage"), default=0.0),
|
|
confidence=str(payload.get("confidence") or "low"),
|
|
case_reports=[
|
|
dict(item)
|
|
for item in payload.get("case_reports") or []
|
|
if isinstance(item, dict)
|
|
],
|
|
tool_mode_summary=dict(payload.get("tool_mode_summary") or {}),
|
|
ability_score_summary=dict(payload.get("ability_score_summary") or {}),
|
|
tool_execution_summary=dict(payload.get("tool_execution_summary") or {}),
|
|
case_selection_summary=dict(payload.get("case_selection_summary") or {}),
|
|
real_score_avg=_optional_bounded_float(payload.get("real_score_avg")),
|
|
synthetic_score_avg=_optional_bounded_float(payload.get("synthetic_score_avg")),
|
|
overall_score_avg=_optional_bounded_float(payload.get("overall_score_avg")),
|
|
preservation_report=(
|
|
dict(payload["preservation_report"])
|
|
if isinstance(payload.get("preservation_report"), dict)
|
|
else None
|
|
),
|
|
)
|
|
|
|
|
|
def _optional_str(value: Any) -> str | None:
|
|
if value in (None, ""):
|
|
return None
|
|
return str(value)
|
|
|
|
|
|
def _optional_bounded_float(value: Any) -> float | None:
|
|
if value in (None, ""):
|
|
return None
|
|
return _bounded_float(value, default=0.0)
|
|
|
|
|
|
def _bounded_float(value: Any, *, default: float = 0.0) -> float:
|
|
if value in (None, ""):
|
|
return default
|
|
try:
|
|
return max(0.0, min(1.0, float(value)))
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def _summarize_evidence(payload: dict[str, Any]) -> str:
|
|
evidence = payload.get("evidence")
|
|
if isinstance(evidence, dict):
|
|
theme = evidence.get("theme")
|
|
if theme:
|
|
return f"Theme: {theme}"
|
|
skill_version = evidence.get("skill_version")
|
|
if skill_version:
|
|
return f"Skill version: {skill_version}"
|
|
source_run_ids = payload.get("source_run_ids") or []
|
|
return f"{len(source_run_ids)} source run(s)"
|
|
|
|
|
|
def _utc_now() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|