"""Automatic validation for internal Task mode.""" from __future__ import annotations import json from typing import Any from beaver.engine.providers import ProviderBundle from .models import TaskRecord, ValidationResult class ValidationService: async def validate_task_result( self, *, task: TaskRecord, user_message: str, final_output: str, evidence_packet: Any | None = None, evidence_text: str = "", transcript_excerpt: str = "", tool_summaries: list[str] | None = None, team_summaries: list[str] | None = None, provider_bundle: ProviderBundle | None = None, ) -> ValidationResult: provider = None model = None if provider_bundle is not None: provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime model = getattr(runtime, "model", None) if provider is not None: try: return await self._validate_with_provider( provider=provider, model=model, task=task, user_message=user_message, final_output=final_output, evidence_text=evidence_text, transcript_excerpt=transcript_excerpt, tool_summaries=tool_summaries or [], team_summaries=team_summaries or [], ) except Exception as exc: return ValidationResult( status="validator_error", score=0.0, issues=[f"Validator failed: {exc}"], evidence_gaps=["Automatic validation failed before producing a reliable decision."], missing_requirements=["User review is required because automatic validation failed."], recommended_revision_prompt=( "Review the answer and evidence, then decide whether to revise or accept it." ), validator="llm_error", ) return self._heuristic_validate(final_output) async def _validate_with_provider( self, *, provider: Any, model: str | None, task: TaskRecord, user_message: str, final_output: str, evidence_text: str, transcript_excerpt: str, tool_summaries: list[str], team_summaries: list[str], ) -> ValidationResult: legacy_context = "" if evidence_text else ( f"Transcript excerpt:\n{transcript_excerpt}\n\n" f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n" f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n" ) prompt = ( "Validate whether the assistant output satisfies the task. " "Return only compact JSON with keys: passed, score, issues, " "missing_requirements, recommended_revision_prompt.\n\n" f"Task goal:\n{task.goal}\n\n" f"Current user request:\n{user_message}\n\n" f"Evidence packet:\n{evidence_text}\n\n" f"{legacy_context}" f"Assistant final output:\n{final_output}" ) response = await provider.chat( messages=[ {"role": "system", "content": "You are a strict task result validator."}, {"role": "user", "content": prompt}, ], tools=None, model=model, max_tokens=4096, temperature=0.0, ) payload = self._parse_json_object(response.content or "") status = payload.get("status") if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}: status = ( "accepted" if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75 else "rejected" ) return ValidationResult( status=status, score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))), issues=[str(item) for item in payload.get("issues") or []], missing_requirements=[str(item) for item in payload.get("missing_requirements") or []], evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []], recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""), validator="llm", ) @staticmethod def _heuristic_validate(final_output: str) -> ValidationResult: text = final_output.strip() if not text: return ValidationResult( passed=False, score=0.0, issues=["Assistant output is empty."], missing_requirements=["A non-empty result is required."], recommended_revision_prompt="Produce a complete, non-empty answer for the task.", validator="heuristic", ) lowered = text.lower() if "run failed before completion" in lowered or "tool loop stopped" in lowered: return ValidationResult( passed=False, score=0.35, issues=["The run did not complete cleanly."], missing_requirements=["A successful final result is required."], recommended_revision_prompt="Retry the task and address the failure before returning the final answer.", validator="heuristic", ) return ValidationResult(passed=True, score=0.85, validator="heuristic") @staticmethod def _parse_json_object(text: str) -> dict[str, Any]: cleaned = text.strip() if cleaned.startswith("```"): cleaned = cleaned.strip("`") if cleaned.lower().startswith("json"): cleaned = cleaned[4:].strip() start = cleaned.find("{") end = cleaned.rfind("}") if start >= 0 and end >= start: cleaned = cleaned[start : end + 1] payload = json.loads(cleaned) if not isinstance(payload, dict): raise ValueError("validator response must be a JSON object") return payload