feat(skills-ui): show replay eval coverage

This commit is contained in:
2026-06-08 13:38:10 +08:00
parent b9171998b9
commit 9e2c02a333
3 changed files with 33 additions and 2 deletions

View File

@ -1088,6 +1088,12 @@ function EvalReportPanel({ report }: { report?: SkillDraftEvalReport | null }) {
/>
</div>
<div className="mt-3 grid gap-2 sm:grid-cols-3">
<MetricTile label={t('执行覆盖', 'Execution')} value={formatPercent(report.execution_coverage)} />
<MetricTile label={t('替代评估', 'Surrogate')} value={formatPercent(report.surrogate_coverage)} />
<MetricTile label={t('置信度', 'Confidence')} value={report.confidence || 'low'} />
</div>
<div className="mt-3 grid gap-2 sm:grid-cols-3">
<ReadableFact icon={<CheckCircle2 className="h-4 w-4" />} label={t('改进', 'Improved')} value={String(report.improved_count)} />
<ReadableFact icon={<XCircle className="h-4 w-4" />} label={t('回退', 'Regressed')} value={String(report.regression_count)} />
@ -1135,6 +1141,12 @@ function EvalReportPanel({ report }: { report?: SkillDraftEvalReport | null }) {
</div>
</div>
)}
{Array.isArray(report.case_reports) && report.case_reports.length > 0 ? (
<RawDetails title={t('Replay case reports', 'Replay case reports')} payload={report.case_reports} />
) : null}
{report.preservation_report ? (
<RawDetails title={t('Preservation report', 'Preservation report')} payload={report.preservation_report} />
) : null}
<div className="mt-3 text-xs text-muted-foreground">{formatDateTime(report.created_at)}</div>
<RawDetails title={t('原始评估报告', 'Raw eval report')} payload={report} />
</div>
@ -1387,6 +1399,11 @@ function formatScore(value: number): string {
return value.toFixed(2);
}
function formatPercent(value?: number | null): string {
if (typeof value !== 'number' || Number.isNaN(value)) return '0%';
return `${Math.round(value * 100)}%`;
}
function formatSignedScore(value: number): string {
if (!Number.isFinite(value)) return '-';
return `${value >= 0 ? '+' : ''}${value.toFixed(2)}`;

View File

@ -985,6 +985,15 @@ export interface SkillDraftEvalReport {
cases: Array<Record<string, unknown>>;
status: string;
created_at: string;
eval_version?: string;
mode?: 'heuristic' | 'replay' | string;
execution_coverage?: number;
surrogate_coverage?: number;
blocked_coverage?: number;
confidence?: 'low' | 'medium' | 'high' | string;
case_reports?: Array<Record<string, unknown>>;
tool_mode_summary?: Record<string, unknown>;
preservation_report?: Record<string, unknown> | null;
}
export interface SkillDraft {