feat(team): preserve node run evidence

This commit is contained in:
2026-05-22 11:30:19 +08:00
parent 3ff2e2ce11
commit 60605a74e0
4 changed files with 34 additions and 2 deletions

View File

@ -241,7 +241,7 @@ class TeamGraphScheduler:
failed = [item for item in results if not item.success] failed = [item for item in results if not item.success]
if failed: if failed:
failure_lines = [ failure_lines = [
f"- {item.node_id}: {item.error or item.finish_reason}" f"- {item.node_id}: {item.error or item.finish_reason} evidence={'yes' if item.evidence else 'no'}"
for item in failed for item in failed
] ]
summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines)) summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines))

View File

@ -6,6 +6,7 @@ from uuid import uuid4
from beaver.engine import AgentLoop from beaver.engine import AgentLoop
from beaver.engine.providers import ProviderBundle from beaver.engine.providers import ProviderBundle
from beaver.tasks.evidence import EvidenceBuilder
from .models import DelegationEnvelope, NodeRunResult from .models import DelegationEnvelope, NodeRunResult
@ -47,6 +48,13 @@ class LocalAgentRunner:
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts, pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
allow_candidate_generation=allow_candidate_generation, allow_candidate_generation=allow_candidate_generation,
) )
loaded = self.loop.boot()
evidence = EvidenceBuilder(loaded.session_manager).build_run_evidence(
result.session_id,
result.run_id,
result.output_text,
result.finish_reason,
)
success = result.finish_reason == "stop" success = result.finish_reason == "stop"
return NodeRunResult( return NodeRunResult(
node_id=envelope.node_id or envelope.agent.name, node_id=envelope.node_id or envelope.agent.name,
@ -56,6 +64,7 @@ class LocalAgentRunner:
session_id=result.session_id, session_id=result.session_id,
finish_reason=result.finish_reason, finish_reason=result.finish_reason,
error=None if success else (result.output_text or result.finish_reason), error=None if success else (result.output_text or result.finish_reason),
evidence=evidence,
) )
@staticmethod @staticmethod

View File

@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Literal
if TYPE_CHECKING: if TYPE_CHECKING:
from beaver.engine.context import SkillContext from beaver.engine.context import SkillContext
from beaver.tasks.evidence import RunEvidence
TeamStrategy = Literal[ TeamStrategy = Literal[
@ -116,6 +117,7 @@ class NodeRunResult:
session_id: str | None = None session_id: str | None = None
finish_reason: str = "stop" finish_reason: str = "stop"
error: str | None = None error: str | None = None
evidence: "RunEvidence | None" = None
def to_dict(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]:
return { return {
@ -126,6 +128,7 @@ class NodeRunResult:
"session_id": self.session_id, "session_id": self.session_id,
"finish_reason": self.finish_reason, "finish_reason": self.finish_reason,
"error": self.error, "error": self.error,
"evidence": self.evidence.to_dict() if self.evidence is not None else None,
} }

View File

@ -153,6 +153,26 @@ def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: P
assert child_session["parent_session_id"] == "session-root" assert child_session["parent_session_id"] == "session-root"
def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None:
loop = _loop(tmp_path)
provider = RecordingProvider([_response("partial evidence", finish_reason="max_tool_iterations")])
envelope = DelegationEnvelope(
parent_task_id="task-parent",
parent_session_id="session-root",
parent_run_id="run-root",
agent=AgentDescriptor(name="researcher", role="research"),
task="research the requested topic",
node_id="research",
)
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
assert result.success is False
assert result.evidence is not None
assert result.evidence.output_text == "partial evidence"
assert result.evidence.finish_reason == "max_tool_iterations"
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None: def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
_publish_skill( _publish_skill(
tmp_path, tmp_path,
@ -438,7 +458,7 @@ def test_team_summary_lists_only_failed_nodes_when_all_nodes_fail(tmp_path: Path
) )
assert result.success is False assert result.success is False
assert result.summary == "Failed nodes:\n- one: one down\n- two: two down" assert result.summary == "Failed nodes:\n- one: one down evidence=no\n- two: two down evidence=no"
def test_graph_structure_errors_still_raise(tmp_path: Path) -> None: def test_graph_structure_errors_still_raise(tmp_path: Path) -> None: