feat(task): route validation status to review states

This commit is contained in:
2026-05-22 11:35:46 +08:00
parent 0adc04806c
commit b808f5cbc2
4 changed files with 110 additions and 12 deletions

View File

@ -854,7 +854,19 @@ class AgentService:
provider_bundle=provider_bundle, provider_bundle=provider_bundle,
) )
latest_validation = validation latest_validation = validation
task = task_service.record_validation(task.task_id, result.run_id, validation) has_usable_answer = bool(result.output_text.strip()) and (
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
)
task = task_service.record_validation(
task.task_id,
result.run_id,
validation,
final_attempt=(
attempt_index == 2
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
),
has_usable_answer=has_usable_answer,
)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict()) run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload( session_manager.update_latest_assistant_event_payload(
result.session_id, result.session_id,
@ -865,6 +877,23 @@ class AgentService:
"validation_status": "passed" if validation.accepted else "failed", "validation_status": "passed" if validation.accepted else "failed",
}, },
) )
validation_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
retry_scheduled = validation.status == "rejected" and attempt_index == 1
session_manager.append_message( session_manager.append_message(
result.session_id, result.session_id,
run_id=result.run_id, run_id=result.run_id,
@ -874,17 +903,18 @@ class AgentService:
"task_id": task.task_id, "task_id": task.task_id,
"attempt_index": attempt_index, "attempt_index": attempt_index,
"validation_result": validation.to_dict(), "validation_result": validation.to_dict(),
"retry_scheduled": not validation.accepted and attempt_index == 1, "validation_debug": validation_debug,
"retry_scheduled": retry_scheduled,
}, },
content=validation.recommended_revision_prompt or None, content=validation.recommended_revision_prompt or None,
context_visible=False, context_visible=False,
) )
if not validation.accepted and attempt_index == 1: if retry_scheduled:
session_manager.set_run_context_visible(result.session_id, result.run_id, False) session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id result.task_id = task.task_id
result.task_status = task.status result.task_status = task.status
result.validation_result = validation.to_dict() result.validation_result = validation.to_dict()
if validation.accepted or attempt_index == 2: if not retry_scheduled:
return result return result
if last_result is None: # pragma: no cover - defensive if last_result is None: # pragma: no cover - defensive

View File

@ -110,10 +110,30 @@ class TaskService:
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []}) self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
return task return task
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord: def record_validation(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
) -> TaskRecord:
task = self._require(task_id) task = self._require(task_id)
task.status = "awaiting_feedback" now = self._now()
task.updated_at = self._now() if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.updated_at = now
task.validation_result = validation.to_dict() task.validation_result = validation.to_dict()
self.store.upsert_task(task) self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict()) self._event(task, "validated", run_id=run_id, payload=validation.to_dict())

View File

@ -45,13 +45,13 @@ class ValidationService:
) )
except Exception as exc: except Exception as exc:
return ValidationResult( return ValidationResult(
passed=False, status="validator_error",
score=0.0, score=0.0,
issues=[f"Validator failed: {exc}"], issues=[f"Validator failed: {exc}"],
missing_requirements=["A valid automatic validation result is required before accepting the task."], evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=( recommended_revision_prompt=(
"Review the task result again because automatic validation failed, " "Review the answer and evidence, then decide whether to revise or accept it."
"then provide a corrected final answer that explicitly satisfies the task goal."
), ),
validator="llm_error", validator="llm_error",
) )
@ -96,11 +96,19 @@ class ValidationService:
temperature=0.0, temperature=0.0,
) )
payload = self._parse_json_object(response.content or "") payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult( return ValidationResult(
passed=bool(payload.get("passed")), status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))), score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []], issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []], missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""), recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm", validator="llm",
) )

View File

@ -779,6 +779,45 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"] assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["source missing"],
validator="test",
)
]
),
)
)
result = asyncio.run(
service.process_direct(
"answer with uncertain evidence",
session_id="web:needs-review",
provider_bundle=_bundle("possible answer"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
assert task is not None
assert task.status == "needs_review"
assert task.requires_user_action is True
assert task.is_execution_active is False
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
assert validation_event.event_payload["retry_scheduled"] is False
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None: def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
main_provider = StubProvider( main_provider = StubProvider(
[ [
@ -890,5 +929,6 @@ def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
) )
assert validation.accepted is False assert validation.accepted is False
assert validation.status == "validator_error"
assert validation.validator == "llm_error" assert validation.validator == "llm_error"
assert validation.issues assert validation.issues