feat(task): route validation status to review states
This commit is contained in:
@ -854,7 +854,19 @@ class AgentService:
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
latest_validation = validation
|
||||
task = task_service.record_validation(task.task_id, result.run_id, validation)
|
||||
has_usable_answer = bool(result.output_text.strip()) and (
|
||||
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
|
||||
)
|
||||
task = task_service.record_validation(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
validation,
|
||||
final_attempt=(
|
||||
attempt_index == 2
|
||||
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
|
||||
),
|
||||
has_usable_answer=has_usable_answer,
|
||||
)
|
||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
@ -865,6 +877,23 @@ class AgentService:
|
||||
"validation_status": "passed" if validation.accepted else "failed",
|
||||
},
|
||||
)
|
||||
validation_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
retry_scheduled = validation.status == "rejected" and attempt_index == 1
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
@ -874,17 +903,18 @@ class AgentService:
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"validation_result": validation.to_dict(),
|
||||
"retry_scheduled": not validation.accepted and attempt_index == 1,
|
||||
"validation_debug": validation_debug,
|
||||
"retry_scheduled": retry_scheduled,
|
||||
},
|
||||
content=validation.recommended_revision_prompt or None,
|
||||
context_visible=False,
|
||||
)
|
||||
if not validation.accepted and attempt_index == 1:
|
||||
if retry_scheduled:
|
||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = validation.to_dict()
|
||||
if validation.accepted or attempt_index == 2:
|
||||
if not retry_scheduled:
|
||||
return result
|
||||
|
||||
if last_result is None: # pragma: no cover - defensive
|
||||
|
||||
@ -110,10 +110,30 @@ class TaskService:
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
|
||||
def record_validation(
|
||||
self,
|
||||
task_id: str,
|
||||
run_id: str,
|
||||
validation: ValidationResult,
|
||||
*,
|
||||
final_attempt: bool = True,
|
||||
has_usable_answer: bool = True,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
task.status = "awaiting_feedback"
|
||||
task.updated_at = self._now()
|
||||
now = self._now()
|
||||
if validation.status == "accepted":
|
||||
task.status = "awaiting_feedback"
|
||||
elif validation.status in {"insufficient_evidence", "validator_error"}:
|
||||
task.status = "needs_review"
|
||||
elif validation.status == "rejected" and not final_attempt:
|
||||
task.status = "needs_revision"
|
||||
elif validation.status == "rejected" and has_usable_answer:
|
||||
task.status = "needs_review"
|
||||
else:
|
||||
task.status = "failed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "automatic validation rejected the final attempt"
|
||||
task.updated_at = now
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
|
||||
@ -45,13 +45,13 @@ class ValidationService:
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
status="validator_error",
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
missing_requirements=["A valid automatic validation result is required before accepting the task."],
|
||||
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
|
||||
missing_requirements=["User review is required because automatic validation failed."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the task result again because automatic validation failed, "
|
||||
"then provide a corrected final answer that explicitly satisfies the task goal."
|
||||
"Review the answer and evidence, then decide whether to revise or accept it."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
@ -96,11 +96,19 @@ class ValidationService:
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
status = payload.get("status")
|
||||
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
|
||||
status = (
|
||||
"accepted"
|
||||
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
|
||||
else "rejected"
|
||||
)
|
||||
return ValidationResult(
|
||||
passed=bool(payload.get("passed")),
|
||||
status=status,
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@ -779,6 +779,45 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
|
||||
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
|
||||
|
||||
|
||||
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(
|
||||
status="insufficient_evidence",
|
||||
score=0.4,
|
||||
evidence_gaps=["source missing"],
|
||||
validator="test",
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"answer with uncertain evidence",
|
||||
session_id="web:needs-review",
|
||||
provider_bundle=_bundle("possible answer"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
|
||||
|
||||
assert task is not None
|
||||
assert task.status == "needs_review"
|
||||
assert task.requires_user_action is True
|
||||
assert task.is_execution_active is False
|
||||
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
|
||||
assert validation_event.event_payload["retry_scheduled"] is False
|
||||
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
|
||||
|
||||
|
||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
@ -890,5 +929,6 @@ def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
|
||||
)
|
||||
|
||||
assert validation.accepted is False
|
||||
assert validation.status == "validator_error"
|
||||
assert validation.validator == "llm_error"
|
||||
assert validation.issues
|
||||
|
||||
Reference in New Issue
Block a user