From 9e2c02a3337cc8789c18cb763fc1ddc28a0d03a4 Mon Sep 17 00:00:00 2001 From: steven_li Date: Mon, 8 Jun 2026 13:38:10 +0800 Subject: [PATCH] feat(skills-ui): show replay eval coverage --- .../backend/beaver/interfaces/web/app.py | 9 +++++++-- app-instance/frontend/app/(app)/skills/page.tsx | 17 +++++++++++++++++ app-instance/frontend/types/index.ts | 9 +++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/app-instance/backend/beaver/interfaces/web/app.py b/app-instance/backend/beaver/interfaces/web/app.py index 3ef6f11..9b88f86 100644 --- a/app-instance/backend/beaver/interfaces/web/app.py +++ b/app-instance/backend/beaver/interfaces/web/app.py @@ -50,6 +50,7 @@ from beaver.services.user_file_resolver import ( build_file_auth_context, ) from beaver.skills.learning import SkillLearningWorker, SkillLearningWorkerConfig +from beaver.skills.learning.replay import ReplayRunner from beaver.skills.catalog.utils import parse_frontmatter from .deps import get_agent_service @@ -2080,7 +2081,8 @@ def create_app( @app.post("/api/skills/candidates/{candidate_id}/draft") async def synthesize_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]: agent_service = get_agent_service(request) - loaded = agent_service.create_loop().boot() + loop = agent_service.create_loop() + loaded = loop.boot() try: candidate = loaded.skill_learning_pipeline.get_candidate(candidate_id) # type: ignore[union-attr] if candidate.draft_skill_name and candidate.draft_id: @@ -2099,6 +2101,7 @@ def create_app( draft.skill_name, draft.draft_id, provider_bundle=provider_bundle, + replay_runner=ReplayRunner(agent_loop=loop), ) except ValueError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc @@ -2107,7 +2110,8 @@ def create_app( @app.post("/api/skills/candidates/{candidate_id}/regenerate") async def regenerate_skill_draft(candidate_id: str, request: Request) -> dict[str, Any]: agent_service = get_agent_service(request) - loaded = agent_service.create_loop().boot() + loop = agent_service.create_loop() + loaded = loop.boot() provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001 try: draft = await loaded.skill_learning_pipeline.regenerate_draft( # type: ignore[union-attr] @@ -2120,6 +2124,7 @@ def create_app( draft.skill_name, draft.draft_id, provider_bundle=provider_bundle, + replay_runner=ReplayRunner(agent_loop=loop), ) except ValueError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc diff --git a/app-instance/frontend/app/(app)/skills/page.tsx b/app-instance/frontend/app/(app)/skills/page.tsx index 6d23e35..e46896f 100644 --- a/app-instance/frontend/app/(app)/skills/page.tsx +++ b/app-instance/frontend/app/(app)/skills/page.tsx @@ -1088,6 +1088,12 @@ function EvalReportPanel({ report }: { report?: SkillDraftEvalReport | null }) { /> +
+ + + +
+
} label={t('改进', 'Improved')} value={String(report.improved_count)} /> } label={t('回退', 'Regressed')} value={String(report.regression_count)} /> @@ -1135,6 +1141,12 @@ function EvalReportPanel({ report }: { report?: SkillDraftEvalReport | null }) {
)} + {Array.isArray(report.case_reports) && report.case_reports.length > 0 ? ( + + ) : null} + {report.preservation_report ? ( + + ) : null}
{formatDateTime(report.created_at)}
@@ -1387,6 +1399,11 @@ function formatScore(value: number): string { return value.toFixed(2); } +function formatPercent(value?: number | null): string { + if (typeof value !== 'number' || Number.isNaN(value)) return '0%'; + return `${Math.round(value * 100)}%`; +} + function formatSignedScore(value: number): string { if (!Number.isFinite(value)) return '-'; return `${value >= 0 ? '+' : ''}${value.toFixed(2)}`; diff --git a/app-instance/frontend/types/index.ts b/app-instance/frontend/types/index.ts index c0ae12c..5db526c 100644 --- a/app-instance/frontend/types/index.ts +++ b/app-instance/frontend/types/index.ts @@ -985,6 +985,15 @@ export interface SkillDraftEvalReport { cases: Array>; status: string; created_at: string; + eval_version?: string; + mode?: 'heuristic' | 'replay' | string; + execution_coverage?: number; + surrogate_coverage?: number; + blocked_coverage?: number; + confidence?: 'low' | 'medium' | 'high' | string; + case_reports?: Array>; + tool_mode_summary?: Record; + preservation_report?: Record | null; } export interface SkillDraft {