feat(engine): 添加运行时上下文支持并重构工具迭代限制
添加 RuntimeContext 类用于捕获模型运行时的日期时间信息, 包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。 同时增加最大上下文消息数和工具迭代次数的配置选项, 将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
This commit is contained in:
@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||
from .router import MainAgentRouter
|
||||
from .service import TaskService
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .validation import ValidationService
|
||||
|
||||
__all__ = [
|
||||
"EvidenceBuilder",
|
||||
@ -24,6 +23,5 @@ __all__ = [
|
||||
"ToolEvidence",
|
||||
"ValidationResult",
|
||||
"ValidationStatus",
|
||||
"ValidationService",
|
||||
"render_task_evidence",
|
||||
]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
"""Models for internal task tracking and validation."""
|
||||
"""Models for internal task tracking and user acceptance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -9,7 +9,12 @@ from typing import Any, Literal
|
||||
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
||||
|
||||
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
|
||||
LEGACY_STATUS_MAP = {
|
||||
"validating": "running",
|
||||
"awaiting_feedback": "awaiting_acceptance",
|
||||
"needs_review": "awaiting_acceptance",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -113,11 +118,11 @@ class TaskRecord:
|
||||
|
||||
@property
|
||||
def is_execution_active(self) -> bool:
|
||||
return self.status in {"running", "validating"}
|
||||
return self.status == "running"
|
||||
|
||||
@property
|
||||
def requires_user_action(self) -> bool:
|
||||
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
|
||||
return self.status in {"awaiting_acceptance", "needs_revision"}
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -137,6 +142,7 @@ class TaskRecord:
|
||||
"satisfaction": self.satisfaction,
|
||||
"run_ids": list(self.run_ids),
|
||||
"skill_names": list(self.skill_names),
|
||||
"acceptance": list(self.feedback),
|
||||
"feedback": list(self.feedback),
|
||||
"validation_result": self.validation_result,
|
||||
"metadata": dict(self.metadata),
|
||||
@ -152,7 +158,7 @@ class TaskRecord:
|
||||
goal=str(payload.get("goal") or payload.get("description") or ""),
|
||||
constraints=[str(item) for item in payload.get("constraints") or []],
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status=str(payload.get("status") or "open"),
|
||||
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
|
||||
creator=str(payload.get("creator") or "main-agent"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or ""),
|
||||
@ -161,7 +167,11 @@ class TaskRecord:
|
||||
satisfaction=_optional_float(payload.get("satisfaction")),
|
||||
run_ids=[str(item) for item in payload.get("run_ids") or []],
|
||||
skill_names=[str(item) for item in payload.get("skill_names") or []],
|
||||
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
|
||||
feedback=[
|
||||
_normalize_acceptance_entry(dict(item))
|
||||
for item in (payload.get("acceptance") or payload.get("feedback") or [])
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
)
|
||||
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
|
||||
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
|
||||
feedback_type = str(entry.get("feedback_type") or "")
|
||||
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
|
||||
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
|
||||
acceptance_type = str(entry.get("acceptance_type") or "")
|
||||
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
|
||||
return entry
|
||||
|
||||
@ -10,7 +10,7 @@ from typing import Any, Literal
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
from .models import TaskRecord
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
|
||||
|
||||
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
timeout_seconds: float = 30.0,
|
||||
) -> TaskExecutionPlan:
|
||||
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
),
|
||||
},
|
||||
],
|
||||
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None,
|
||||
) -> str:
|
||||
validation_note = ""
|
||||
if latest_validation is not None:
|
||||
validation_note = (
|
||||
"\nPrevious validation issues:\n"
|
||||
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
|
||||
)
|
||||
history_note = ""
|
||||
if task.feedback:
|
||||
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{validation_note}"
|
||||
f"{history_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import TaskEvent, TaskRecord, ValidationResult
|
||||
from .models import TaskEvent, TaskRecord
|
||||
from .store import TaskStore
|
||||
|
||||
|
||||
@ -105,38 +105,70 @@ class TaskService:
|
||||
for name in skill_names or []:
|
||||
if name not in task.skill_names:
|
||||
task.skill_names.append(name)
|
||||
task.status = "awaiting_acceptance"
|
||||
task.updated_at = self._now()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(
|
||||
def add_acceptance(
|
||||
self,
|
||||
task_id: str,
|
||||
run_id: str,
|
||||
validation: ValidationResult,
|
||||
*,
|
||||
final_attempt: bool = True,
|
||||
has_usable_answer: bool = True,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
if validation.status == "accepted":
|
||||
task.status = "awaiting_feedback"
|
||||
elif validation.status in {"insufficient_evidence", "validator_error"}:
|
||||
task.status = "needs_review"
|
||||
elif validation.status == "rejected" and not final_attempt:
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
matching_acceptance = any(
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_acceptance:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_acceptance:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": "satisfied" if normalized == "accept" else normalized,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if normalized == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif validation.status == "rejected" and has_usable_answer:
|
||||
task.status = "needs_review"
|
||||
else:
|
||||
task.status = "failed"
|
||||
elif normalized == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = "automatic validation rejected the final attempt"
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif normalized == "accept":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "accepted"
|
||||
task.satisfaction = 1.0
|
||||
if run_id:
|
||||
task.metadata["final_accepted_run_id"] = run_id
|
||||
task.updated_at = now
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def add_feedback(
|
||||
@ -147,52 +179,12 @@ class TaskService:
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
matching_feedback = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
|
||||
for item in task.feedback
|
||||
return self.add_acceptance(
|
||||
task_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_feedback:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_feedback:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"feedback_type": feedback_type,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if feedback_type == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif feedback_type == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif feedback_type == "satisfied":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "satisfied"
|
||||
task.satisfaction = 1.0
|
||||
task.updated_at = now
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
|
||||
if len(words) <= 4:
|
||||
return cleaned[:40]
|
||||
return " ".join(words[:4])[:40]
|
||||
|
||||
|
||||
def normalize_acceptance_type(value: str) -> str:
|
||||
normalized = (value or "").strip().lower()
|
||||
if normalized == "satisfied":
|
||||
return "accept"
|
||||
if normalized not in {"accept", "revise", "abandon"}:
|
||||
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
|
||||
return normalized
|
||||
|
||||
@ -1,154 +0,0 @@
|
||||
"""Automatic validation for internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class ValidationService:
|
||||
async def validate_task_result(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_packet: Any | None = None,
|
||||
evidence_text: str = "",
|
||||
transcript_excerpt: str = "",
|
||||
tool_summaries: list[str] | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> ValidationResult:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is not None:
|
||||
try:
|
||||
return await self._validate_with_provider(
|
||||
provider=provider,
|
||||
model=model,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
final_output=final_output,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=transcript_excerpt,
|
||||
tool_summaries=tool_summaries or [],
|
||||
team_summaries=team_summaries or [],
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
status="validator_error",
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
|
||||
missing_requirements=["User review is required because automatic validation failed."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the answer and evidence, then decide whether to revise or accept it."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
return self._heuristic_validate(final_output)
|
||||
|
||||
async def _validate_with_provider(
|
||||
self,
|
||||
*,
|
||||
provider: Any,
|
||||
model: str | None,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_text: str,
|
||||
transcript_excerpt: str,
|
||||
tool_summaries: list[str],
|
||||
team_summaries: list[str],
|
||||
) -> ValidationResult:
|
||||
legacy_context = "" if evidence_text else (
|
||||
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
|
||||
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
|
||||
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
|
||||
)
|
||||
prompt = (
|
||||
"Validate whether the assistant output satisfies the task. "
|
||||
"Return only compact JSON with keys: passed, score, issues, "
|
||||
"missing_requirements, recommended_revision_prompt.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Evidence packet:\n{evidence_text}\n\n"
|
||||
f"{legacy_context}"
|
||||
f"Assistant final output:\n{final_output}"
|
||||
)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict task result validator."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
status = payload.get("status")
|
||||
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
|
||||
status = (
|
||||
"accepted"
|
||||
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
|
||||
else "rejected"
|
||||
)
|
||||
return ValidationResult(
|
||||
status=status,
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_validate(final_output: str) -> ValidationResult:
|
||||
text = final_output.strip()
|
||||
if not text:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=["Assistant output is empty."],
|
||||
missing_requirements=["A non-empty result is required."],
|
||||
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
||||
validator="heuristic",
|
||||
)
|
||||
lowered = text.lower()
|
||||
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.35,
|
||||
issues=["The run did not complete cleanly."],
|
||||
missing_requirements=["A successful final result is required."],
|
||||
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
||||
validator="heuristic",
|
||||
)
|
||||
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("validator response must be a JSON object")
|
||||
return payload
|
||||
Reference in New Issue
Block a user