feat(task): synthesize and validate from evidence
This commit is contained in:
@ -20,7 +20,7 @@ class StubProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]) -> None:
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls: list[list[dict]] = []
|
||||
self.calls: list[dict[str, object]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
@ -30,7 +30,7 @@ class StubProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
self.calls.append(messages)
|
||||
self.calls.append({"messages": messages, "tools": tools, "model": model})
|
||||
if not self._responses:
|
||||
raise AssertionError("No stubbed provider responses left")
|
||||
return self._responses.pop(0)
|
||||
@ -42,8 +42,10 @@ class StubProvider(LLMProvider):
|
||||
class StubValidationService:
|
||||
def __init__(self, results: list[ValidationResult]) -> None:
|
||||
self.results = list(results)
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def validate_task_result(self, **kwargs) -> ValidationResult:
|
||||
self.calls.append(kwargs)
|
||||
if not self.results:
|
||||
raise AssertionError("No stubbed validation results left")
|
||||
return self.results.pop(0)
|
||||
@ -706,10 +708,45 @@ def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -
|
||||
assert result.run_id == task.run_ids[-1]
|
||||
assert any(event.event_type == "task_execution_planned" for event in events)
|
||||
assert any(event.event_type == "task_team_run_completed" for event in events)
|
||||
assert "sub-agent evidence" in main_provider.calls[0][0]["content"]
|
||||
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "sub-agent evidence" != result.output_text
|
||||
|
||||
|
||||
def test_task_mode_team_synthesis_runs_without_tools_and_receives_evidence(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
sub_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
validation = StubValidationService([ValidationResult(status="accepted", score=0.9, validator="test")])
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=validation,
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement team-backed workflow",
|
||||
session_id="web:team-no-tools",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert result.output_text == "final synthesized answer"
|
||||
assert main_provider.calls[0]["tools"] is None
|
||||
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "Task evidence packet" in validation.calls[0]["evidence_text"]
|
||||
|
||||
|
||||
def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
@ -737,9 +774,9 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
|
||||
|
||||
assert result.output_text == "fallback synthesized answer"
|
||||
assert any(event.event_type == "task_team_run_failed" for event in events)
|
||||
assert "sub-agent unavailable" in main_provider.calls[0][0]["content"]
|
||||
assert "same class of tools fails repeatedly" in main_provider.calls[0][0]["content"]
|
||||
assert "user-visible fallback answer" in main_provider.calls[0][0]["content"]
|
||||
assert "sub-agent unavailable" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "same class of tools fails repeatedly" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
|
||||
|
||||
|
||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
|
||||
Reference in New Issue
Block a user