feat(tasks): add skill-templated task graph execution

This commit is contained in:
2026-06-23 10:22:58 +08:00
parent 6843d89b2c
commit 53b13e8eac
53 changed files with 4773 additions and 756 deletions

View File

@ -0,0 +1,695 @@
"""Task attempt orchestration for Beaver Task mode."""
from __future__ import annotations
from time import perf_counter
from typing import Any, Callable
from beaver.coordinator.models import ExecutionNode, TeamRunResult
from beaver.engine import AgentRunResult
from beaver.engine.context import SkillContext
from beaver.prompts.main_agent import normalize_main_agent_prompt_locale
from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
from .models import TaskRecord
from .planner import TaskExecutionPlan
class TaskAttemptOrchestrator:
"""Own the execution order inside one Task attempt."""
def __init__(
self,
*,
loaded: Any,
create_loop: Callable[[], Any],
make_provider_bundle_for_task: Callable[[Any, dict[str, Any]], Any],
) -> None:
self.loaded = loaded
self.create_loop = create_loop
self.make_provider_bundle_for_task = make_provider_bundle_for_task
async def run(
self,
*,
message: str,
runner: Any,
kwargs: dict[str, Any],
task: TaskRecord,
) -> AgentRunResult:
task_service = self._require_loaded(self.loaded, "task_service")
task_execution_planner = self._require_loaded(self.loaded, "task_execution_planner")
session_manager = self._require_loaded(self.loaded, "session_manager")
base_execution_context = kwargs.get("execution_context")
prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale")
output_language_instruction = self._output_language_instruction(prompt_locale)
provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
pre_skill_context = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
)
preselected_skills, pre_skill_latency_ms = await self._assemble_task_attempt_skills(
task_description=pre_skill_context,
provider_bundle=provider_bundle,
thinking_enabled=kwargs.get("thinking_enabled"),
include_skill_assembly=bool(kwargs.get("include_skill_assembly", True)),
pinned_skill_contexts=kwargs.get("pinned_skill_contexts"),
)
if pre_skill_latency_ms:
kwargs["pre_run_latency_ms"] = self._merge_latency_ms(
kwargs.get("pre_run_latency_ms"),
{"pre_skill_assembly_ms": pre_skill_latency_ms},
)
plan = await task_execution_planner.plan(
task=task,
user_message=message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
skill_summaries=self._skill_summaries_for_planner(preselected_skills),
tool_hints=self._tool_hints_for_skills(preselected_skills),
activated_skills=preselected_skills,
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(kwargs),
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome(
plan,
team_result,
prompt_locale=prompt_locale,
)
if plan.is_team:
team_execution_context = self._join_context(outcome_context, team_execution_context)
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
"pinned_skill_contexts": preselected_skills,
"include_skill_assembly": False,
}
)
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
output_language_instruction,
team_execution_context,
)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
if outcome_metadata["task_outcome"] == "incomplete":
result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
**outcome_metadata,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
evidence_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"evidence_status": "recorded",
},
)
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_evidence_recorded",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"evidence_debug": evidence_debug,
},
content=None,
context_visible=False,
)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = None
return result
async def _run_team_for_task(
self,
plan: TaskExecutionPlan,
*,
task: TaskRecord,
parent_session_id: str,
provider_bundle_factory: Any,
) -> tuple[TeamRunResult | None, str | None]:
if plan.graph is None:
return None, "team plan did not include an execution graph"
try:
from beaver.services.team_service import TeamService
result = await TeamService(self.create_loop()).run_team(
plan.graph,
parent_task_id=task.task_id,
parent_session_id=parent_session_id,
parent_run_id=None,
provider_bundle_factory=provider_bundle_factory,
allow_candidate_generation=False,
)
return result, None
except Exception as exc:
return None, str(exc)
async def _assemble_task_attempt_skills(
self,
*,
task_description: str,
provider_bundle: Any,
thinking_enabled: bool | None,
include_skill_assembly: bool,
pinned_skill_contexts: Any,
) -> tuple[list[SkillContext], float]:
started = perf_counter()
selected = self._coerce_skill_contexts(pinned_skill_contexts)
if include_skill_assembly:
skill_assembler = self._require_loaded(self.loaded, "skill_assembler")
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
assembled = await skill_assembler.assemble(
task_description=task_description,
provider=provider_bundle.auxiliary_provider or provider_bundle.main_provider,
model=getattr(runtime, "model", None),
embedding_runtime=getattr(provider_bundle, "embedding_runtime", None),
thinking_enabled=thinking_enabled,
)
selected = self._merge_skill_contexts(
selected,
list(getattr(assembled, "activated_skills", []) or []),
)
return selected, (perf_counter() - started) * 1000
@staticmethod
def _coerce_skill_contexts(value: Any) -> list[SkillContext]:
if not isinstance(value, list):
return []
return [item for item in value if isinstance(item, SkillContext)]
@staticmethod
def _merge_skill_contexts(left: list[SkillContext], right: list[SkillContext]) -> list[SkillContext]:
merged: list[SkillContext] = []
seen: set[str] = set()
for skill in [*left, *right]:
if skill.name in seen:
continue
seen.add(skill.name)
merged.append(skill)
return merged
@staticmethod
def _skill_summaries_for_planner(skills: list[SkillContext]) -> list[str]:
summaries: list[str] = []
for skill in skills:
content = " ".join((skill.content or "").split())
if len(content) > 240:
content = content[:237].rstrip() + "..."
summaries.append(f"{skill.name}: {content}" if content else skill.name)
return summaries
@staticmethod
def _tool_hints_for_skills(skills: list[SkillContext]) -> list[str]:
result: list[str] = []
for skill in skills:
for hint in skill.tool_hints:
if hint and hint not in result:
result.append(hint)
return result
@staticmethod
def _require_loaded(loaded: Any, field_name: str) -> Any:
value = getattr(loaded, field_name)
if value is None:
raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}")
return value
@staticmethod
def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]:
merged: dict[str, float] = {}
if isinstance(current, dict):
for key, value in current.items():
if isinstance(value, (int, float)):
merged[str(key)] = float(value)
for key, value in updates.items():
merged[key] = merged.get(key, 0.0) + float(value)
return merged
@staticmethod
def _output_language_instruction(prompt_locale: str | None) -> str:
locale = normalize_main_agent_prompt_locale(prompt_locale)
if locale == "en":
return (
"Output language: English. Use English for user-facing task titles, summaries, plans, "
"and final answers unless the user explicitly requests another language."
)
if locale == "zh-Hant":
return (
"輸出語言:繁體中文。除非使用者明確要求其他語言,所有面向使用者的任務標題、摘要、"
"計劃與最終回答都使用繁體中文。"
)
return (
"输出语言:简体中文。除非用户明确要求其他语言,所有面向用户的任务标题、摘要、"
"计划与最终回答都使用简体中文。"
)
def _skill_names_for_run(self, run_id: str) -> list[str]:
store = getattr(self.loaded, "run_memory_store", None)
if store is None:
return []
for record in store.list_runs():
if record.run_id == run_id:
return [receipt.skill_name for receipt in record.activated_skills]
return []
@staticmethod
def _build_skill_selection_context(
*,
task: TaskRecord,
user_message: str,
attempt_index: int,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
phase = f"revision_attempt_{attempt_index}"
elif plan is not None and plan.is_team:
phase = f"team_synthesis_attempt_{attempt_index}"
sections = [
f"Task goal:\n{task.goal or task.description}",
f"Task description:\n{task.description}",
f"Current user request:\n{user_message}",
f"Execution phase:\n{phase}",
f"Task status:\n{task.status}",
]
if task.constraints:
sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints))
if task.skill_names:
sections.append(
"Previously activated skills (reuse bias, not pinned):\n"
+ "\n".join(f"- {item}" for item in task.skill_names)
)
else:
sections.append("Previously activated skills:\nNone")
if task.feedback:
history_lines = []
for item in task.feedback[-5:]:
kind = item.get("acceptance_type") or item.get("feedback_type")
comment = item.get("comment") or ""
run_id = item.get("run_id") or ""
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
if plan is not None:
plan_lines = [
f"mode: {plan.mode}",
f"reason: {plan.reason}",
]
if plan.final_synthesis_instruction:
plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}")
if plan.graph is not None:
plan_lines.append(f"strategy: {plan.graph.strategy}")
plan_lines.append(
"nodes:\n"
+ "\n".join(
f"- {node.node_id}: {node.task}"
for node in plan.graph.nodes
)
)
sections.append("Execution plan:\n" + "\n".join(plan_lines))
if team_summaries:
sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400])
sections.append(
"Skill selection instruction:\n"
"Prefer reusing previously activated skills when they still match the Task. "
"Select new skills only if the current request, revision, or execution plan needs a different capability. "
"If no published skill matches, return [] and let the run continue without skills."
)
return "\n\n".join(section for section in sections if section.strip())
@staticmethod
def _append_task_observation(
session_manager: Any,
session_id: str,
*,
event_type: str,
payload: dict[str, Any],
) -> None:
session_manager.append_message(
session_id,
role="system",
event_type=event_type,
event_payload=payload,
content=payload.get("reason") or payload.get("error"),
context_visible=False,
)
@staticmethod
def _join_context(*parts: str | None) -> str:
return "\n\n".join(part.strip() for part in parts if part and part.strip())
@staticmethod
def _team_summary_for_validation(result: TeamRunResult) -> str:
lines = [
f"success={result.success}",
f"task_id={result.task_id or ''}",
"summary:",
result.summary,
"nodes:",
]
for node in result.node_results:
lines.append(
f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} "
f"error={node.error or ''} output={node.output_text[:500]}"
)
return "\n".join(lines)
@staticmethod
def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]:
nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {}
payloads: list[dict[str, Any]] = []
for item in result.node_results:
payload = item.to_dict()
node = nodes.get(item.node_id)
if node is not None:
payload["selected_skill_names"] = list(node.inherited_pinned_skills)
payload["ephemeral_skill_names"] = [
skill.name for skill in node.inherited_pinned_skill_contexts
]
payload["skill_query"] = node.agent.metadata.get("skill_query")
payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id")
payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name")
payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts)
payloads.append(payload)
return payloads
@staticmethod
def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]:
if result is None:
return []
return [node.evidence for node in result.node_results if node.evidence is not None]
@staticmethod
def _team_synthesis_outcome(
plan: TaskExecutionPlan,
result: TeamRunResult | None,
*,
prompt_locale: str | None = None,
) -> tuple[str, str, dict[str, Any]]:
if not plan.is_team or plan.graph is None:
metadata = {
"task_outcome": "single",
"incomplete_node_ids": [],
"node_statuses": {},
"evidence_gaps": {},
}
return "Task outcome: single", "", metadata
result_by_node = {
item.node_id: item
for item in (result.node_results if result is not None else [])
}
node_statuses: dict[str, str] = {}
evidence_gaps: dict[str, list[str]] = {}
incomplete_node_ids: list[str] = []
detail_lines: list[str] = []
successful_lines: list[str] = []
for node in plan.graph.nodes:
node_result = result_by_node.get(node.node_id)
status = node_result.completion_status if node_result is not None else "not_run"
node_statuses[node.node_id] = status
gaps = list(node_result.evidence_gaps) if node_result is not None else []
if gaps:
evidence_gaps[node.node_id] = gaps
if node.required_for_completion and status != "succeeded":
incomplete_node_ids.append(node.node_id)
detail_lines.append(
f"- {node.node_id}: status={status}, "
f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, "
f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, "
f"evidence_gaps={gaps}"
)
elif node_result is not None and status == "succeeded":
successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}")
task_outcome = "incomplete" if incomplete_node_ids else "complete"
metadata = {
"task_outcome": task_outcome,
"incomplete_node_ids": incomplete_node_ids,
"node_statuses": node_statuses,
"evidence_gaps": evidence_gaps,
}
context_parts = [
f"Task outcome: {task_outcome}",
"Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"),
]
if detail_lines:
context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines))
if successful_lines:
context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines))
if task_outcome == "incomplete":
context_parts.append(
"Synthesis requirement: produce a partial report from available evidence and explicitly state "
"that the task is incomplete, partially completed, or missing required evidence."
)
prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else ""
return "\n\n".join(context_parts), prefix, metadata
@staticmethod
def _incomplete_prefix(prompt_locale: str | None) -> str:
locale = normalize_main_agent_prompt_locale(prompt_locale)
if locale == "en":
return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n"
if locale == "zh-Hant":
return "任務未完成:部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n"
return "任务未完成:部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n"
@staticmethod
def _apply_incomplete_prefix(output_text: str, prefix: str) -> str:
normalized = output_text.lower()
notices = (
"任务未完成",
"任務未完成",
"部分完成",
"缺少证据",
"缺少證據",
"task incomplete",
"incomplete task",
"partially complete",
"missing evidence",
)
if any(notice in normalized for notice in notices):
return output_text
return prefix + output_text.lstrip()
def _build_task_evidence_packet(
self,
*,
session_manager: Any,
task: TaskRecord,
attempt_index: int,
result: AgentRunResult,
team_result: TeamRunResult | None,
) -> TaskEvidencePacket:
main_run = EvidenceBuilder(session_manager).build_run_evidence(
result.session_id,
result.run_id,
result.output_text,
result.finish_reason,
)
return TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=main_run,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results) if team_result is not None else [],
final_output=result.output_text,
)
@staticmethod
def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str:
node_lines = [
(
f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, "
f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}"
)
for node in result.node_results
]
return "\n\n".join(
item
for item in [
"Task team execution result:",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Team success: {result.success}",
f"Team summary:\n{result.summary}",
"Node results:\n" + "\n\n".join(node_lines),
(
"Final synthesis instruction:\n" + plan.final_synthesis_instruction
if plan.final_synthesis_instruction
else None
),
(
"Use successful team outputs as internal evidence. If one or more nodes failed, "
"do not blindly repeat failed tool calls. Produce a user-visible fallback answer "
"with available evidence and clearly state any missing or uncertain data."
),
]
if item
)
@staticmethod
def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str:
return "\n\n".join(
[
"Task team execution failed before final synthesis.",
f"Planner reason: {plan.reason}",
f"Strategy: {plan.graph.strategy if plan.graph else ''}",
f"Error: {error}",
(
"Proceed as the main agent. Do not blindly repeat failed tool calls; "
"produce a user-visible fallback answer with available evidence and clearly "
"state any missing or uncertain data."
),
]
)
def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any:
def factory(node: ExecutionNode) -> Any:
node_kwargs = dict(kwargs)
node_kwargs.pop("provider_bundle", None)
if node.agent.model:
node_kwargs["model"] = node.agent.model
if node.agent.provider_name:
node_kwargs["provider_name"] = node.agent.provider_name
return self.make_provider_bundle_for_task(self.loaded, node_kwargs)
return factory