- 集成MCP连接管理器,支持MCP服务器连接 - 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、 PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、 TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等 - 实现工具注册和装配功能 - 添加技能选择上下文参数 - 支持思考模式控制参数thinking_enabled feat(coordinator): 重构任务执行计划器参数命名 - 将learning_candidate_enabled重命名为allow_candidate_generation - 更新TeamGraphScheduler中的参数传递 - 修改LocalAgentRunner中的相关参数处理 - 更新README文档中的相应描述 refactor(context): 标准化工具调用参数格式 - 添加_json导入用于参数序列化 - 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷 - 修复工具调用中参数非字符串类型的序列化问题 refactor(session): 优化消息历史记录过滤逻辑 - 修改get_messages_as_conversation为基于运行状态过滤消息 - 排除未完成、失败或错误结束的运行记录 - 改进对话历史的可见性控制机制 fix(store): 修复FTS索引重建逻辑 - 添加异常处理防止FTS索引创建失败 - 实现_rebuild_fts_index方法重新构建全文搜索索引 - 优化索引触发器和表的维护流程
139 lines
5.6 KiB
Python
139 lines
5.6 KiB
Python
"""Automatic validation for internal Task mode."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any
|
|
|
|
from beaver.engine.providers import ProviderBundle
|
|
|
|
from .models import TaskRecord, ValidationResult
|
|
|
|
|
|
class ValidationService:
|
|
async def validate_task_result(
|
|
self,
|
|
*,
|
|
task: TaskRecord,
|
|
user_message: str,
|
|
final_output: str,
|
|
transcript_excerpt: str = "",
|
|
tool_summaries: list[str] | None = None,
|
|
team_summaries: list[str] | None = None,
|
|
provider_bundle: ProviderBundle | None = None,
|
|
) -> ValidationResult:
|
|
provider = None
|
|
model = None
|
|
if provider_bundle is not None:
|
|
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
|
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
|
model = getattr(runtime, "model", None)
|
|
if provider is not None:
|
|
try:
|
|
return await self._validate_with_provider(
|
|
provider=provider,
|
|
model=model,
|
|
task=task,
|
|
user_message=user_message,
|
|
final_output=final_output,
|
|
transcript_excerpt=transcript_excerpt,
|
|
tool_summaries=tool_summaries or [],
|
|
team_summaries=team_summaries or [],
|
|
)
|
|
except Exception as exc:
|
|
return ValidationResult(
|
|
passed=False,
|
|
score=0.0,
|
|
issues=[f"Validator failed: {exc}"],
|
|
missing_requirements=["A valid automatic validation result is required before accepting the task."],
|
|
recommended_revision_prompt=(
|
|
"Review the task result again because automatic validation failed, "
|
|
"then provide a corrected final answer that explicitly satisfies the task goal."
|
|
),
|
|
validator="llm_error",
|
|
)
|
|
return self._heuristic_validate(final_output)
|
|
|
|
async def _validate_with_provider(
|
|
self,
|
|
*,
|
|
provider: Any,
|
|
model: str | None,
|
|
task: TaskRecord,
|
|
user_message: str,
|
|
final_output: str,
|
|
transcript_excerpt: str,
|
|
tool_summaries: list[str],
|
|
team_summaries: list[str],
|
|
) -> ValidationResult:
|
|
prompt = (
|
|
"Validate whether the assistant output satisfies the task. "
|
|
"Return only compact JSON with keys: passed, score, issues, "
|
|
"missing_requirements, recommended_revision_prompt.\n\n"
|
|
f"Task goal:\n{task.goal}\n\n"
|
|
f"Current user request:\n{user_message}\n\n"
|
|
f"Transcript excerpt:\n{transcript_excerpt[:2500]}\n\n"
|
|
f"Tool summaries:\n{json.dumps(tool_summaries[:12], ensure_ascii=False)}\n\n"
|
|
f"Team summaries:\n{json.dumps(team_summaries[:12], ensure_ascii=False)}\n\n"
|
|
f"Assistant final output:\n{final_output[:4000]}"
|
|
)
|
|
response = await provider.chat(
|
|
messages=[
|
|
{"role": "system", "content": "You are a strict task result validator."},
|
|
{"role": "user", "content": prompt},
|
|
],
|
|
tools=None,
|
|
model=model,
|
|
max_tokens=4096,
|
|
temperature=0.0,
|
|
)
|
|
payload = self._parse_json_object(response.content or "")
|
|
return ValidationResult(
|
|
passed=bool(payload.get("passed")),
|
|
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
|
issues=[str(item) for item in payload.get("issues") or []],
|
|
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
|
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
|
validator="llm",
|
|
)
|
|
|
|
@staticmethod
|
|
def _heuristic_validate(final_output: str) -> ValidationResult:
|
|
text = final_output.strip()
|
|
if not text:
|
|
return ValidationResult(
|
|
passed=False,
|
|
score=0.0,
|
|
issues=["Assistant output is empty."],
|
|
missing_requirements=["A non-empty result is required."],
|
|
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
|
validator="heuristic",
|
|
)
|
|
lowered = text.lower()
|
|
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
|
return ValidationResult(
|
|
passed=False,
|
|
score=0.35,
|
|
issues=["The run did not complete cleanly."],
|
|
missing_requirements=["A successful final result is required."],
|
|
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
|
validator="heuristic",
|
|
)
|
|
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
|
|
|
@staticmethod
|
|
def _parse_json_object(text: str) -> dict[str, Any]:
|
|
cleaned = text.strip()
|
|
if cleaned.startswith("```"):
|
|
cleaned = cleaned.strip("`")
|
|
if cleaned.lower().startswith("json"):
|
|
cleaned = cleaned[4:].strip()
|
|
start = cleaned.find("{")
|
|
end = cleaned.rfind("}")
|
|
if start >= 0 and end >= start:
|
|
cleaned = cleaned[start : end + 1]
|
|
payload = json.loads(cleaned)
|
|
if not isinstance(payload, dict):
|
|
raise ValueError("validator response must be a JSON object")
|
|
return payload
|