from __future__ import annotations from pathlib import Path from beaver.engine.session.manager import SessionManager from beaver.tasks.evidence import ( EvidenceBuilder, RunEvidence, TaskEvidencePacket, ToolEvidence, evaluate_node_evidence, render_task_evidence, ) def _run_evidence(*, tool_results: list[ToolEvidence] | None = None) -> RunEvidence: return RunEvidence( run_id="run-1", session_id="session-1", output_text="", finish_reason="stop", tool_results=list(tool_results or []), ) def test_evaluate_node_evidence_requires_successful_tool_result() -> None: evidence = _run_evidence( tool_results=[ ToolEvidence( tool_name="web_fetch", tool_call_id="call-1", content="failed", event_payload={"success": False}, ) ] ) assert evaluate_node_evidence(evidence, ["tool_result"], "done") == [ "missing required evidence: tool_result" ] def test_evaluate_node_evidence_accepts_url_in_successful_tool_content() -> None: evidence = _run_evidence( tool_results=[ ToolEvidence( tool_name="web_fetch", tool_call_id="call-1", content="Source: https://example.test/report", event_payload={"success": True}, ) ] ) assert evaluate_node_evidence(evidence, ["tool_result", "url"], "done") == [] def test_evaluate_node_evidence_checks_output_and_unknown_requirements() -> None: evidence = _run_evidence() assert evaluate_node_evidence(evidence, ["output", "unknown_type"], " ") == [ "missing required evidence: output", "unsupported evidence requirement: unknown_type", ] def test_evidence_builder_preserves_full_tool_result(tmp_path: Path) -> None: session_manager = SessionManager(tmp_path) session_id = "session-1" run_id = "run-1" long_content = "prefix " + ("x" * 700) + " MAN 3 FT 2 NFO" session_manager.ensure_session(session_id, source="test") session_manager.append_message(session_id, run_id=run_id, role="user", event_type="user_message_added", content="score?") session_manager.append_message( session_id, run_id=run_id, role="tool", event_type="tool_result_recorded", event_payload={"success": True, "url": "https://example.test/match"}, content=long_content, tool_name="web_fetch", tool_call_id="call-1", ) session_manager.append_message( session_id, run_id=run_id, role="system", event_type="run_completed", event_payload={"finish_reason": "stop"}, content="Manchester United won 3-2.", finish_reason="stop", context_visible=False, ) evidence = EvidenceBuilder(session_manager).build_run_evidence( session_id, run_id, "Manchester United won 3-2.", "stop", ) rendered = render_task_evidence( TaskEvidencePacket( task_id="task-1", attempt_index=1, main_run=evidence, team_runs=[], team_node_results=[], final_output="Manchester United won 3-2.", ) ) assert evidence.tool_results[0].content == long_content assert "MAN 3 FT 2 NFO" in rendered assert "https://example.test/match" in rendered def test_render_task_evidence_includes_failed_team_run_tool_results() -> None: run = RunEvidence( run_id="run-team", session_id="session-team", output_text="Tool loop stopped.", finish_reason="max_tool_iterations", transcript=[], tool_results=[ ToolEvidence( tool_name="web_fetch", tool_call_id="call-team", content="Recovered partial source content.", event_payload={"success": True, "created_at": "2026-05-22T12:00:00Z"}, created_at="2026-05-22T12:00:00Z", ) ], warnings=["finish_reason=max_tool_iterations"], ) packet = TaskEvidencePacket( task_id="task-1", attempt_index=2, main_run=None, team_runs=[run], team_node_results=[], final_output="partial answer", ) rendered = render_task_evidence(packet) assert "finish_reason=max_tool_iterations" in rendered assert "partial answer" in rendered assert "Recovered partial source content." in rendered assert "created_at=2026-05-22T12:00:00Z" in rendered