feat(task): route validation status to review states

This commit is contained in:
2026-05-22 11:35:46 +08:00
parent 0adc04806c
commit b808f5cbc2
4 changed files with 110 additions and 12 deletions

View File

@ -854,7 +854,19 @@ class AgentService:
provider_bundle=provider_bundle,
)
latest_validation = validation
task = task_service.record_validation(task.task_id, result.run_id, validation)
has_usable_answer = bool(result.output_text.strip()) and (
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
)
task = task_service.record_validation(
task.task_id,
result.run_id,
validation,
final_attempt=(
attempt_index == 2
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
),
has_usable_answer=has_usable_answer,
)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload(
result.session_id,
@ -865,6 +877,23 @@ class AgentService:
"validation_status": "passed" if validation.accepted else "failed",
},
)
validation_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
retry_scheduled = validation.status == "rejected" and attempt_index == 1
session_manager.append_message(
result.session_id,
run_id=result.run_id,
@ -874,17 +903,18 @@ class AgentService:
"task_id": task.task_id,
"attempt_index": attempt_index,
"validation_result": validation.to_dict(),
"retry_scheduled": not validation.accepted and attempt_index == 1,
"validation_debug": validation_debug,
"retry_scheduled": retry_scheduled,
},
content=validation.recommended_revision_prompt or None,
context_visible=False,
)
if not validation.accepted and attempt_index == 1:
if retry_scheduled:
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = validation.to_dict()
if validation.accepted or attempt_index == 2:
if not retry_scheduled:
return result
if last_result is None: # pragma: no cover - defensive

View File

@ -110,10 +110,30 @@ class TaskService:
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(self, task_id: str, run_id: str, validation: ValidationResult) -> TaskRecord:
def record_validation(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
) -> TaskRecord:
task = self._require(task_id)
task.status = "awaiting_feedback"
task.updated_at = self._now()
now = self._now()
if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.updated_at = now
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())

View File

@ -45,13 +45,13 @@ class ValidationService:
)
except Exception as exc:
return ValidationResult(
passed=False,
status="validator_error",
score=0.0,
issues=[f"Validator failed: {exc}"],
missing_requirements=["A valid automatic validation result is required before accepting the task."],
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=(
"Review the task result again because automatic validation failed, "
"then provide a corrected final answer that explicitly satisfies the task goal."
"Review the answer and evidence, then decide whether to revise or accept it."
),
validator="llm_error",
)
@ -96,11 +96,19 @@ class ValidationService:
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult(
passed=bool(payload.get("passed")),
status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)

View File

@ -779,6 +779,45 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["source missing"],
validator="test",
)
]
),
)
)
result = asyncio.run(
service.process_direct(
"answer with uncertain evidence",
session_id="web:needs-review",
provider_bundle=_bundle("possible answer"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
assert task is not None
assert task.status == "needs_review"
assert task.requires_user_action is True
assert task.is_execution_active is False
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
assert validation_event.event_payload["retry_scheduled"] is False
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
main_provider = StubProvider(
[
@ -890,5 +929,6 @@ def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
)
assert validation.accepted is False
assert validation.status == "validator_error"
assert validation.validator == "llm_error"
assert validation.issues