feat(task): route validation status to review states
This commit is contained in:
@ -854,7 +854,19 @@ class AgentService:
|
|||||||
provider_bundle=provider_bundle,
|
provider_bundle=provider_bundle,
|
||||||
)
|
)
|
||||||
latest_validation = validation
|
latest_validation = validation
|
||||||
task = task_service.record_validation(task.task_id, result.run_id, validation)
|
has_usable_answer = bool(result.output_text.strip()) and (
|
||||||
|
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
|
||||||
|
)
|
||||||
|
task = task_service.record_validation(
|
||||||
|
task.task_id,
|
||||||
|
result.run_id,
|
||||||
|
validation,
|
||||||
|
final_attempt=(
|
||||||
|
attempt_index == 2
|
||||||
|
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
|
||||||
|
),
|
||||||
|
has_usable_answer=has_usable_answer,
|
||||||
|
)
|
||||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||||
session_manager.update_latest_assistant_event_payload(
|
session_manager.update_latest_assistant_event_payload(
|
||||||
result.session_id,
|
result.session_id,
|
||||||
@ -865,6 +877,23 @@ class AgentService:
|
|||||||
"validation_status": "passed" if validation.accepted else "failed",
|
"validation_status": "passed" if validation.accepted else "failed",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
validation_debug = {
|
||||||
|
"evidence_run_ids": [
|
||||||
|
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||||
|
],
|
||||||
|
"evidence_session_ids": [
|
||||||
|
item.session_id
|
||||||
|
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||||
|
if item is not None
|
||||||
|
],
|
||||||
|
"tool_result_count": sum(
|
||||||
|
len(item.tool_results)
|
||||||
|
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||||
|
if item is not None
|
||||||
|
),
|
||||||
|
"evidence_length": len(evidence_text),
|
||||||
|
}
|
||||||
|
retry_scheduled = validation.status == "rejected" and attempt_index == 1
|
||||||
session_manager.append_message(
|
session_manager.append_message(
|
||||||
result.session_id,
|
result.session_id,
|
||||||
run_id=result.run_id,
|
run_id=result.run_id,
|
||||||
@ -874,17 +903,18 @@ class AgentService:
|
|||||||
"task_id": task.task_id,
|
"task_id": task.task_id,
|
||||||
"attempt_index": attempt_index,
|
"attempt_index": attempt_index,
|
||||||
"validation_result": validation.to_dict(),
|
"validation_result": validation.to_dict(),
|
||||||
"retry_scheduled": not validation.accepted and attempt_index == 1,
|
"validation_debug": validation_debug,
|
||||||
|
"retry_scheduled": retry_scheduled,
|
||||||
},
|
},
|
||||||
content=validation.recommended_revision_prompt or None,
|
content=validation.recommended_revision_prompt or None,
|
||||||
context_visible=False,
|
context_visible=False,
|
||||||
)
|
)
|
||||||
if not validation.accepted and attempt_index == 1:
|
if retry_scheduled:
|
||||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||||
result.task_id = task.task_id
|
result.task_id = task.task_id
|
||||||
result.task_status = task.status
|
result.task_status = task.status
|
||||||
result.validation_result = validation.to_dict()
|
result.validation_result = validation.to_dict()
|
||||||
if validation.accepted or attempt_index == 2:
|
if not retry_scheduled:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if last_result is None: # pragma: no cover - defensive
|
if last_result is None: # pragma: no cover - defensive
|
||||||
|
|||||||
@ -110,10 +110,30 @@ class TaskService:
|
|||||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||||
return task
|
return task
|
||||||
|
|
||||||
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
|
def record_validation(
|
||||||
|
self,
|
||||||
|
task_id: str,
|
||||||
|
run_id: str,
|
||||||
|
validation: ValidationResult,
|
||||||
|
*,
|
||||||
|
final_attempt: bool = True,
|
||||||
|
has_usable_answer: bool = True,
|
||||||
|
) -> TaskRecord:
|
||||||
task = self._require(task_id)
|
task = self._require(task_id)
|
||||||
|
now = self._now()
|
||||||
|
if validation.status == "accepted":
|
||||||
task.status = "awaiting_feedback"
|
task.status = "awaiting_feedback"
|
||||||
task.updated_at = self._now()
|
elif validation.status in {"insufficient_evidence", "validator_error"}:
|
||||||
|
task.status = "needs_review"
|
||||||
|
elif validation.status == "rejected" and not final_attempt:
|
||||||
|
task.status = "needs_revision"
|
||||||
|
elif validation.status == "rejected" and has_usable_answer:
|
||||||
|
task.status = "needs_review"
|
||||||
|
else:
|
||||||
|
task.status = "failed"
|
||||||
|
task.closed_at = now
|
||||||
|
task.close_reason = "automatic validation rejected the final attempt"
|
||||||
|
task.updated_at = now
|
||||||
task.validation_result = validation.to_dict()
|
task.validation_result = validation.to_dict()
|
||||||
self.store.upsert_task(task)
|
self.store.upsert_task(task)
|
||||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||||
|
|||||||
@ -45,13 +45,13 @@ class ValidationService:
|
|||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return ValidationResult(
|
return ValidationResult(
|
||||||
passed=False,
|
status="validator_error",
|
||||||
score=0.0,
|
score=0.0,
|
||||||
issues=[f"Validator failed: {exc}"],
|
issues=[f"Validator failed: {exc}"],
|
||||||
missing_requirements=["A valid automatic validation result is required before accepting the task."],
|
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
|
||||||
|
missing_requirements=["User review is required because automatic validation failed."],
|
||||||
recommended_revision_prompt=(
|
recommended_revision_prompt=(
|
||||||
"Review the task result again because automatic validation failed, "
|
"Review the answer and evidence, then decide whether to revise or accept it."
|
||||||
"then provide a corrected final answer that explicitly satisfies the task goal."
|
|
||||||
),
|
),
|
||||||
validator="llm_error",
|
validator="llm_error",
|
||||||
)
|
)
|
||||||
@ -96,11 +96,19 @@ class ValidationService:
|
|||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
payload = self._parse_json_object(response.content or "")
|
payload = self._parse_json_object(response.content or "")
|
||||||
|
status = payload.get("status")
|
||||||
|
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
|
||||||
|
status = (
|
||||||
|
"accepted"
|
||||||
|
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
|
||||||
|
else "rejected"
|
||||||
|
)
|
||||||
return ValidationResult(
|
return ValidationResult(
|
||||||
passed=bool(payload.get("passed")),
|
status=status,
|
||||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||||
issues=[str(item) for item in payload.get("issues") or []],
|
issues=[str(item) for item in payload.get("issues") or []],
|
||||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||||
|
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||||
validator="llm",
|
validator="llm",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -779,6 +779,45 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
|
|||||||
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
|
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
|
||||||
|
service = AgentService(
|
||||||
|
loader=EngineLoader(
|
||||||
|
workspace=tmp_path,
|
||||||
|
task_execution_planner=_single_planner(),
|
||||||
|
validation_service=StubValidationService(
|
||||||
|
[
|
||||||
|
ValidationResult(
|
||||||
|
status="insufficient_evidence",
|
||||||
|
score=0.4,
|
||||||
|
evidence_gaps=["source missing"],
|
||||||
|
validator="test",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
service.process_direct(
|
||||||
|
"answer with uncertain evidence",
|
||||||
|
session_id="web:needs-review",
|
||||||
|
provider_bundle=_bundle("possible answer"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
loaded = service.create_loop().boot()
|
||||||
|
task = loaded.task_service.get_task(result.task_id)
|
||||||
|
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||||
|
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
|
||||||
|
|
||||||
|
assert task is not None
|
||||||
|
assert task.status == "needs_review"
|
||||||
|
assert task.requires_user_action is True
|
||||||
|
assert task.is_execution_active is False
|
||||||
|
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
|
||||||
|
assert validation_event.event_payload["retry_scheduled"] is False
|
||||||
|
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
|
||||||
|
|
||||||
|
|
||||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||||
main_provider = StubProvider(
|
main_provider = StubProvider(
|
||||||
[
|
[
|
||||||
@ -890,5 +929,6 @@ def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert validation.accepted is False
|
assert validation.accepted is False
|
||||||
|
assert validation.status == "validator_error"
|
||||||
assert validation.validator == "llm_error"
|
assert validation.validator == "llm_error"
|
||||||
assert validation.issues
|
assert validation.issues
|
||||||
|
|||||||
Reference in New Issue
Block a user