92 lines
3.0 KiB
Python
92 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from beaver.engine.session.manager import SessionManager
|
|
from beaver.tasks.evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, ToolEvidence, render_task_evidence
|
|
|
|
|
|
def test_evidence_builder_preserves_full_tool_result(tmp_path: Path) -> None:
|
|
session_manager = SessionManager(tmp_path)
|
|
session_id = "session-1"
|
|
run_id = "run-1"
|
|
long_content = "prefix " + ("x" * 700) + " MAN 3 FT 2 NFO"
|
|
session_manager.ensure_session(session_id, source="test")
|
|
session_manager.append_message(session_id, run_id=run_id, role="user", event_type="user_message_added", content="score?")
|
|
session_manager.append_message(
|
|
session_id,
|
|
run_id=run_id,
|
|
role="tool",
|
|
event_type="tool_result_recorded",
|
|
event_payload={"success": True, "url": "https://example.test/match"},
|
|
content=long_content,
|
|
tool_name="web_fetch",
|
|
tool_call_id="call-1",
|
|
)
|
|
session_manager.append_message(
|
|
session_id,
|
|
run_id=run_id,
|
|
role="system",
|
|
event_type="run_completed",
|
|
event_payload={"finish_reason": "stop"},
|
|
content="Manchester United won 3-2.",
|
|
finish_reason="stop",
|
|
context_visible=False,
|
|
)
|
|
|
|
evidence = EvidenceBuilder(session_manager).build_run_evidence(
|
|
session_id,
|
|
run_id,
|
|
"Manchester United won 3-2.",
|
|
"stop",
|
|
)
|
|
rendered = render_task_evidence(
|
|
TaskEvidencePacket(
|
|
task_id="task-1",
|
|
attempt_index=1,
|
|
main_run=evidence,
|
|
team_runs=[],
|
|
team_node_results=[],
|
|
final_output="Manchester United won 3-2.",
|
|
)
|
|
)
|
|
|
|
assert evidence.tool_results[0].content == long_content
|
|
assert "MAN 3 FT 2 NFO" in rendered
|
|
assert "https://example.test/match" in rendered
|
|
|
|
|
|
def test_render_task_evidence_includes_failed_team_run_tool_results() -> None:
|
|
run = RunEvidence(
|
|
run_id="run-team",
|
|
session_id="session-team",
|
|
output_text="Tool loop stopped.",
|
|
finish_reason="max_tool_iterations",
|
|
transcript=[],
|
|
tool_results=[
|
|
ToolEvidence(
|
|
tool_name="web_fetch",
|
|
tool_call_id="call-team",
|
|
content="Recovered partial source content.",
|
|
event_payload={"success": True, "created_at": "2026-05-22T12:00:00Z"},
|
|
created_at="2026-05-22T12:00:00Z",
|
|
)
|
|
],
|
|
warnings=["finish_reason=max_tool_iterations"],
|
|
)
|
|
packet = TaskEvidencePacket(
|
|
task_id="task-1",
|
|
attempt_index=2,
|
|
main_run=None,
|
|
team_runs=[run],
|
|
team_node_results=[],
|
|
final_output="partial answer",
|
|
)
|
|
|
|
rendered = render_task_evidence(packet)
|
|
|
|
assert "finish_reason=max_tool_iterations" in rendered
|
|
assert "partial answer" in rendered
|
|
assert "Recovered partial source content." in rendered
|
|
assert "created_at=2026-05-22T12:00:00Z" in rendered
|