feat(team): preserve node run evidence
This commit is contained in:
@ -241,7 +241,7 @@ class TeamGraphScheduler:
|
|||||||
failed = [item for item in results if not item.success]
|
failed = [item for item in results if not item.success]
|
||||||
if failed:
|
if failed:
|
||||||
failure_lines = [
|
failure_lines = [
|
||||||
f"- {item.node_id}: {item.error or item.finish_reason}"
|
f"- {item.node_id}: {item.error or item.finish_reason} evidence={'yes' if item.evidence else 'no'}"
|
||||||
for item in failed
|
for item in failed
|
||||||
]
|
]
|
||||||
summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines))
|
summary_parts.append("Failed nodes:\n" + "\n".join(failure_lines))
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from uuid import uuid4
|
|||||||
|
|
||||||
from beaver.engine import AgentLoop
|
from beaver.engine import AgentLoop
|
||||||
from beaver.engine.providers import ProviderBundle
|
from beaver.engine.providers import ProviderBundle
|
||||||
|
from beaver.tasks.evidence import EvidenceBuilder
|
||||||
|
|
||||||
from .models import DelegationEnvelope, NodeRunResult
|
from .models import DelegationEnvelope, NodeRunResult
|
||||||
|
|
||||||
@ -47,6 +48,13 @@ class LocalAgentRunner:
|
|||||||
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
|
pinned_skill_contexts=envelope.inherited_pinned_skill_contexts,
|
||||||
allow_candidate_generation=allow_candidate_generation,
|
allow_candidate_generation=allow_candidate_generation,
|
||||||
)
|
)
|
||||||
|
loaded = self.loop.boot()
|
||||||
|
evidence = EvidenceBuilder(loaded.session_manager).build_run_evidence(
|
||||||
|
result.session_id,
|
||||||
|
result.run_id,
|
||||||
|
result.output_text,
|
||||||
|
result.finish_reason,
|
||||||
|
)
|
||||||
success = result.finish_reason == "stop"
|
success = result.finish_reason == "stop"
|
||||||
return NodeRunResult(
|
return NodeRunResult(
|
||||||
node_id=envelope.node_id or envelope.agent.name,
|
node_id=envelope.node_id or envelope.agent.name,
|
||||||
@ -56,6 +64,7 @@ class LocalAgentRunner:
|
|||||||
session_id=result.session_id,
|
session_id=result.session_id,
|
||||||
finish_reason=result.finish_reason,
|
finish_reason=result.finish_reason,
|
||||||
error=None if success else (result.output_text or result.finish_reason),
|
error=None if success else (result.output_text or result.finish_reason),
|
||||||
|
evidence=evidence,
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Literal
|
|||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from beaver.engine.context import SkillContext
|
from beaver.engine.context import SkillContext
|
||||||
|
from beaver.tasks.evidence import RunEvidence
|
||||||
|
|
||||||
|
|
||||||
TeamStrategy = Literal[
|
TeamStrategy = Literal[
|
||||||
@ -116,6 +117,7 @@ class NodeRunResult:
|
|||||||
session_id: str | None = None
|
session_id: str | None = None
|
||||||
finish_reason: str = "stop"
|
finish_reason: str = "stop"
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
|
evidence: "RunEvidence | None" = None
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
@ -126,6 +128,7 @@ class NodeRunResult:
|
|||||||
"session_id": self.session_id,
|
"session_id": self.session_id,
|
||||||
"finish_reason": self.finish_reason,
|
"finish_reason": self.finish_reason,
|
||||||
"error": self.error,
|
"error": self.error,
|
||||||
|
"evidence": self.evidence.to_dict() if self.evidence is not None else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -153,6 +153,26 @@ def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: P
|
|||||||
assert child_session["parent_session_id"] == "session-root"
|
assert child_session["parent_session_id"] == "session-root"
|
||||||
|
|
||||||
|
|
||||||
|
def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None:
|
||||||
|
loop = _loop(tmp_path)
|
||||||
|
provider = RecordingProvider([_response("partial evidence", finish_reason="max_tool_iterations")])
|
||||||
|
envelope = DelegationEnvelope(
|
||||||
|
parent_task_id="task-parent",
|
||||||
|
parent_session_id="session-root",
|
||||||
|
parent_run_id="run-root",
|
||||||
|
agent=AgentDescriptor(name="researcher", role="research"),
|
||||||
|
task="research the requested topic",
|
||||||
|
node_id="research",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider)))
|
||||||
|
|
||||||
|
assert result.success is False
|
||||||
|
assert result.evidence is not None
|
||||||
|
assert result.evidence.output_text == "partial evidence"
|
||||||
|
assert result.evidence.finish_reason == "max_tool_iterations"
|
||||||
|
|
||||||
|
|
||||||
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
|
def test_pinned_skill_is_injected_into_delegated_run(tmp_path: Path) -> None:
|
||||||
_publish_skill(
|
_publish_skill(
|
||||||
tmp_path,
|
tmp_path,
|
||||||
@ -438,7 +458,7 @@ def test_team_summary_lists_only_failed_nodes_when_all_nodes_fail(tmp_path: Path
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert result.success is False
|
assert result.success is False
|
||||||
assert result.summary == "Failed nodes:\n- one: one down\n- two: two down"
|
assert result.summary == "Failed nodes:\n- one: one down evidence=no\n- two: two down evidence=no"
|
||||||
|
|
||||||
|
|
||||||
def test_graph_structure_errors_still_raise(tmp_path: Path) -> None:
|
def test_graph_structure_errors_still_raise(tmp_path: Path) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user