feat(tasks): add skill-templated task graph execution

This commit is contained in:
2026-06-23 10:22:58 +08:00
parent 6843d89b2c
commit 53b13e8eac
53 changed files with 4773 additions and 756 deletions

View File

@ -83,6 +83,12 @@ class SkillAssembler:
return SkillAssemblyResult()
llm_interactions: list[dict[str, Any]] = []
if len(candidates) == 1:
return SkillAssemblyResult(
activated_skills=self._activate_skill_contexts([candidates[0]["name"]]),
llm_interactions=llm_interactions,
)
if len(candidates) <= self.max_detailed_candidates:
shortlisted_names = [item["name"] for item in candidates]
else:
@ -115,6 +121,10 @@ class SkillAssembler:
if not selected_names:
return SkillAssemblyResult(llm_interactions=llm_interactions)
activated_skills = self._activate_skill_contexts(selected_names)
return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
def _activate_skill_contexts(self, selected_names: list[str]) -> list[SkillContext]:
activated_skills: list[SkillContext] = []
for name in selected_names:
record = self.loader.get_skill_record(name)
@ -130,10 +140,11 @@ class SkillAssembler:
content_hash=record.content_hash or "" if record is not None else "",
activation_reason="llm_selected",
tool_hints=list(record.tool_hints) if record is not None else [],
team_template=getattr(record, "team_template", None) if record is not None else None,
team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [],
)
)
return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions)
return activated_skills
async def _select_skill_names(
self,

View File

@ -28,6 +28,7 @@ from .utils import (
check_requirements,
escape_xml,
extract_required_tool_names,
extract_skill_team_template,
get_missing_requirements,
parse_frontmatter,
parse_skill_metadata_blob,
@ -49,6 +50,8 @@ class SkillRecord:
tool_hints: list[str] = field(default_factory=list)
frontmatter: dict[str, Any] = field(default_factory=dict)
description: str = ""
team_template: dict[str, Any] | None = None
team_template_warnings: list[str] = field(default_factory=list)
class SkillsLoader:
@ -113,6 +116,7 @@ class SkillsLoader:
continue
normalized_frontmatter = dict(frontmatter)
meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", ""))
template_result = extract_skill_team_template(body)
record = SkillRecord(
name=name,
path=skill_file,
@ -127,6 +131,8 @@ class SkillsLoader:
),
frontmatter=normalized_frontmatter,
description=str(frontmatter.get("description") or summarize_body(body) or name),
team_template=template_result.template,
team_template_warnings=template_result.warnings,
)
if filter_unavailable and not self._record_available(record):
continue
@ -146,6 +152,7 @@ class SkillsLoader:
else:
path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md"
_frontmatter, body = parse_frontmatter(loaded.content)
template_result = extract_skill_team_template(body)
record = SkillRecord(
name=name,
path=path,
@ -160,6 +167,8 @@ class SkillsLoader:
),
frontmatter=dict(loaded.version.frontmatter),
description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name),
team_template=template_result.template,
team_template_warnings=template_result.warnings,
)
if filter_unavailable and not self._record_available(record):
continue

View File

@ -17,6 +17,7 @@ import json
import os
import re
import shutil
from dataclasses import dataclass, field
from typing import Any
@ -84,6 +85,27 @@ def strip_frontmatter(content: str) -> str:
return body
@dataclass(slots=True)
class SkillTeamTemplateParseResult:
template: dict[str, Any] | None = None
warnings: list[str] = field(default_factory=list)
def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult:
matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL)
if not matches:
return SkillTeamTemplateParseResult()
if len(matches) != 1:
return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"])
try:
template = json.loads(matches[0])
except json.JSONDecodeError:
return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"])
if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list):
return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"])
return SkillTeamTemplateParseResult(template=template)
def extract_required_tool_names(body: str) -> list[str]:
"""从 canonical skill 正文的 `## Required Tools` 段落提取工具名。

View File

@ -284,6 +284,9 @@ def _build_replay_case_reports(
"side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
"validator_notes": list(surrogate.get("notes") or []),
}
historical_accepted_score = _historical_accepted_score(case)
if historical_accepted_score is not None:
case_report["historical_accepted_score"] = historical_accepted_score
return case_report, {
"run_id": case["run_id"],
"session_id": case.get("session_id") or "",
@ -293,6 +296,7 @@ def _build_replay_case_reports(
"baseline_score": baseline_score,
"candidate_score": candidate_score,
"delta": round(candidate_score - baseline_score, 4),
**({"historical_accepted_score": historical_accepted_score} if historical_accepted_score is not None else {}),
}
@ -658,8 +662,11 @@ def _ability_score(*, case: dict[str, Any], arm: dict[str, Any], arm_name: str)
if validator is not None:
return _ability_from_validator(validator, arm)
if not case.get("synthetic"):
score = _bounded_score(case.get("accepted_score"), default=0.75) if arm_name == "baseline" else _ability_from_output(arm)["final_score"]
return _ability_breakdown(score=score, source="user_feedback" if arm_name == "baseline" else "llm_judge")
result = _ability_from_output(arm, source="output_heuristic")
historical_accepted_score = _historical_accepted_score(case)
if historical_accepted_score is not None:
result["historical_accepted_score"] = historical_accepted_score
return result
return _ability_breakdown(score=0.0, source="unscored", notes=["Synthetic cases require a validator."])
@ -697,6 +704,12 @@ def _ability_from_output(arm: dict[str, Any], *, source: str = "llm_judge", note
return _ability_breakdown(score=score, source=source, notes=notes)
def _historical_accepted_score(case: dict[str, Any]) -> float | None:
if case.get("synthetic") or isinstance(case.get("validator"), dict) or "accepted_score" not in case:
return None
return _bounded_score(case.get("accepted_score"), default=0.75)
def _ability_breakdown(*, score: float, source: str, notes: list[str] | None = None) -> dict[str, Any]:
bounded = _bounded_score(score, default=0.0)
return {