feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息,
包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。

同时增加最大上下文消息数和工具迭代次数的配置选项,
将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。

BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。

- 添加 RuntimeContext 类和相关渲染方法
- 增加 max_context_messages 和 max_tool_iterations 配置
- 移除 ValidationService 相关代码
- 更新消息记录中的验证状态字段
- 添加原始工具调用检测和回退处理
This commit is contained in:
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions

View File

@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"EvidenceBuilder",
@ -24,6 +23,5 @@ __all__ = [
"ToolEvidence",
"ValidationResult",
"ValidationStatus",
"ValidationService",
"render_task_evidence",
]

View File

@ -1,4 +1,4 @@
"""Models for internal task tracking and validation."""
"""Models for internal task tracking and user acceptance."""
from __future__ import annotations
@ -9,7 +9,12 @@ from typing import Any, Literal
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
LEGACY_STATUS_MAP = {
"validating": "running",
"awaiting_feedback": "awaiting_acceptance",
"needs_review": "awaiting_acceptance",
}
@dataclass(slots=True)
@ -113,11 +118,11 @@ class TaskRecord:
@property
def is_execution_active(self) -> bool:
return self.status in {"running", "validating"}
return self.status == "running"
@property
def requires_user_action(self) -> bool:
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
return self.status in {"awaiting_acceptance", "needs_revision"}
def to_dict(self) -> dict[str, Any]:
return {
@ -137,6 +142,7 @@ class TaskRecord:
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"acceptance": list(self.feedback),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
@ -152,7 +158,7 @@ class TaskRecord:
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
@ -161,7 +167,11 @@ class TaskRecord:
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
feedback=[
_normalize_acceptance_entry(dict(item))
for item in (payload.get("acceptance") or payload.get("feedback") or [])
if isinstance(item, dict)
],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
feedback_type = str(entry.get("feedback_type") or "")
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
acceptance_type = str(entry.get("acceptance_type") or "")
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
return entry

View File

@ -10,7 +10,7 @@ from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .models import TaskRecord
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
timeout_seconds: float = 30.0,
) -> TaskExecutionPlan:
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
history_note = ""
if task.feedback:
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
f"{history_note}"
)
@staticmethod

View File

@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .models import TaskEvent, TaskRecord
from .store import TaskStore
@ -105,38 +105,70 @@ class TaskService:
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.status = "awaiting_acceptance"
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(
def add_acceptance(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
acceptance_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
normalized = normalize_acceptance_type(acceptance_type)
matching_acceptance = any(
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_acceptance is not None:
raise ValueError(
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_acceptance:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_acceptance:
return task
entry = {
"acceptance_type": normalized,
"feedback_type": "satisfied" if normalized == "accept" else normalized,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if normalized == "revise":
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
elif normalized == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.close_reason = comment or "abandoned"
elif normalized == "accept":
task.status = "closed"
task.closed_at = now
task.close_reason = "accepted"
task.satisfaction = 1.0
if run_id:
task.metadata["final_accepted_run_id"] = run_id
task.updated_at = now
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
return task
def add_feedback(
@ -147,52 +179,12 @@ class TaskService:
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
return self.add_acceptance(
task_id,
acceptance_type=feedback_type,
comment=comment,
run_id=run_id,
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
task = self._require(task_id)
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
if len(words) <= 4:
return cleaned[:40]
return " ".join(words[:4])[:40]
def normalize_acceptance_type(value: str) -> str:
normalized = (value or "").strip().lower()
if normalized == "satisfied":
return "accept"
if normalized not in {"accept", "revise", "abandon"}:
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
return normalized

View File

@ -1,154 +0,0 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_packet: Any | None = None,
evidence_text: str = "",
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
evidence_text=evidence_text,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
status="validator_error",
score=0.0,
issues=[f"Validator failed: {exc}"],
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=(
"Review the answer and evidence, then decide whether to revise or accept it."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_text: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
legacy_context = "" if evidence_text else (
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
)
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Evidence packet:\n{evidence_text}\n\n"
f"{legacy_context}"
f"Assistant final output:\n{final_output}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult(
status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload