feat(task): add structured run evidence
This commit is contained in:
81
app-instance/backend/tests/unit/test_task_evidence.py
Normal file
81
app-instance/backend/tests/unit/test_task_evidence.py
Normal file
@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from beaver.engine.session.manager import SessionManager
|
||||
from beaver.tasks.evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence
|
||||
|
||||
|
||||
def test_evidence_builder_preserves_full_tool_result(tmp_path: Path) -> None:
|
||||
session_manager = SessionManager(tmp_path)
|
||||
session_id = "session-1"
|
||||
run_id = "run-1"
|
||||
long_content = "prefix " + ("x" * 700) + " MAN 3 FT 2 NFO"
|
||||
session_manager.ensure_session(session_id, source="test")
|
||||
session_manager.append_message(session_id, run_id=run_id, role="user", event_type="user_message_added", content="score?")
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="tool",
|
||||
event_type="tool_result_recorded",
|
||||
event_payload={"success": True, "url": "https://example.test/match"},
|
||||
content=long_content,
|
||||
tool_name="web_fetch",
|
||||
tool_call_id="call-1",
|
||||
)
|
||||
session_manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="run_completed",
|
||||
event_payload={"finish_reason": "stop"},
|
||||
content="Manchester United won 3-2.",
|
||||
finish_reason="stop",
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
evidence = EvidenceBuilder(session_manager).build_run_evidence(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
output_text="Manchester United won 3-2.",
|
||||
finish_reason="stop",
|
||||
)
|
||||
rendered = render_task_evidence(
|
||||
TaskEvidencePacket(
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
main_run=evidence,
|
||||
team_runs=[],
|
||||
team_node_results=[],
|
||||
final_output="Manchester United won 3-2.",
|
||||
)
|
||||
)
|
||||
|
||||
assert evidence.tool_results[0].content == long_content
|
||||
assert "MAN 3 FT 2 NFO" in rendered
|
||||
assert "https://example.test/match" in rendered
|
||||
|
||||
|
||||
def test_render_task_evidence_includes_failed_team_run_tool_results() -> None:
|
||||
run = RunEvidence(
|
||||
run_id="run-team",
|
||||
session_id="session-team",
|
||||
output_text="Tool loop stopped.",
|
||||
finish_reason="max_tool_iterations",
|
||||
transcript=[],
|
||||
tool_results=[],
|
||||
warnings=["finish_reason=max_tool_iterations"],
|
||||
)
|
||||
packet = TaskEvidencePacket(
|
||||
task_id="task-1",
|
||||
attempt_index=2,
|
||||
main_run=None,
|
||||
team_runs=[run],
|
||||
team_node_results=[],
|
||||
final_output="partial answer",
|
||||
)
|
||||
|
||||
rendered = render_task_evidence(packet)
|
||||
|
||||
assert "finish_reason=max_tool_iterations" in rendered
|
||||
assert "partial answer" in rendered
|
||||
Reference in New Issue
Block a user