feat(engine): 添加运行时上下文支持并重构工具迭代限制
添加 RuntimeContext 类用于捕获模型运行时的日期时间信息, 包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。 同时增加最大上下文消息数和工具迭代次数的配置选项, 将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
This commit is contained in:
@ -29,9 +29,9 @@ from beaver.tasks import (
|
||||
TaskEvidencePacket,
|
||||
TaskExecutionPlan,
|
||||
TaskRecord,
|
||||
ValidationResult,
|
||||
render_task_evidence,
|
||||
)
|
||||
from beaver.tasks.service import normalize_acceptance_type
|
||||
|
||||
|
||||
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
|
||||
@ -60,11 +60,19 @@ class AgentService:
|
||||
) -> None:
|
||||
self.profile = profile or AgentProfile()
|
||||
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
|
||||
self._apply_configured_profile_defaults()
|
||||
self._loop: AgentLoop | None = None
|
||||
self._run_task: asyncio.Task[None] | None = None
|
||||
self._main_agent_router = MainAgentRouter()
|
||||
self._runtime_services: dict[str, Any] = {}
|
||||
|
||||
def _apply_configured_profile_defaults(self) -> None:
|
||||
defaults = self.loader.config.agents_defaults
|
||||
if defaults.max_context_messages is not None:
|
||||
self.profile.max_context_messages = max(1, defaults.max_context_messages)
|
||||
if defaults.max_tool_iterations is not None:
|
||||
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
|
||||
|
||||
def create_loop(self) -> AgentLoop:
|
||||
"""创建并缓存当前 service 使用的 AgentLoop。"""
|
||||
|
||||
@ -232,7 +240,7 @@ class AgentService:
|
||||
|
||||
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
|
||||
This entry bypasses the main-agent classifier and forces Task mode so
|
||||
every trigger produces a TaskRecord, validation, feedback state, and a
|
||||
every trigger produces a TaskRecord, evidence, acceptance state, and a
|
||||
run_id that the scheduled-task history can link to.
|
||||
"""
|
||||
|
||||
@ -280,9 +288,9 @@ class AgentService:
|
||||
result.run_id,
|
||||
{
|
||||
"message_type": "scheduled_reply",
|
||||
"scheduled_job_id": job.id,
|
||||
"scheduled_run_id": run.scheduled_run_id,
|
||||
"cron_job_name": job.name,
|
||||
"scheduled_job_id": cron_job_id,
|
||||
"scheduled_run_id": scheduled_run_id,
|
||||
"cron_job_name": cron_job_name,
|
||||
"mode": "notification",
|
||||
},
|
||||
)
|
||||
@ -403,15 +411,15 @@ class AgentService:
|
||||
},
|
||||
)
|
||||
|
||||
async def submit_feedback(
|
||||
async def submit_acceptance(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Record chat feedback for the internal task linked to a run."""
|
||||
"""Record user acceptance for the internal task linked to a run."""
|
||||
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
@ -419,32 +427,31 @@ class AgentService:
|
||||
if task is None or task.session_id != session_id:
|
||||
raise ValueError(f"No internal task found for run_id={run_id!r}")
|
||||
|
||||
normalized = feedback_type.strip().lower()
|
||||
if normalized not in {"satisfied", "revise", "abandon"}:
|
||||
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
|
||||
|
||||
already_recorded = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == normalized
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not already_recorded:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
updated = task if already_recorded else task_service.add_feedback(
|
||||
updated = task if already_recorded else task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type=normalized,
|
||||
acceptance_type=normalized,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -455,7 +462,8 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_state": normalized,
|
||||
"acceptance_state": normalized,
|
||||
"feedback_state": legacy_feedback_type,
|
||||
},
|
||||
)
|
||||
if not already_recorded:
|
||||
@ -463,10 +471,11 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
},
|
||||
@ -475,35 +484,36 @@ class AgentService:
|
||||
)
|
||||
|
||||
generated_candidates = []
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
if not already_recorded:
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
feedback_payload = {
|
||||
"feedback_type": normalized,
|
||||
acceptance_payload = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
|
||||
}
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback=feedback_payload,
|
||||
success=normalized == "accept",
|
||||
feedback=acceptance_payload,
|
||||
)
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback_score=self._feedback_score_for_learning(normalized, validation),
|
||||
success=normalized == "accept",
|
||||
feedback_score=self._acceptance_score_for_learning(normalized),
|
||||
notes=(comment or normalized).strip(),
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
skill_learning_service.rescore_skill_versions()
|
||||
if already_recorded:
|
||||
generated_candidates = []
|
||||
elif normalized == "satisfied" and validation is not None and validation.accepted:
|
||||
elif normalized == "accept":
|
||||
generated_candidates = [
|
||||
item.to_dict()
|
||||
for item in skill_learning_service.build_learning_candidates_for_task(
|
||||
updated.task_id,
|
||||
trigger_run_id=run_id,
|
||||
final_accepted_run_id=run_id,
|
||||
)
|
||||
]
|
||||
elif normalized == "abandon":
|
||||
@ -514,7 +524,8 @@ class AgentService:
|
||||
event_type="task_failure_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"durable_memory_written": False,
|
||||
@ -528,10 +539,28 @@ class AgentService:
|
||||
"run_id": run_id,
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"learning_candidates": generated_candidates,
|
||||
}
|
||||
|
||||
async def submit_feedback(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Backward-compatible wrapper for older clients."""
|
||||
|
||||
return await self.submit_acceptance(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
)
|
||||
|
||||
async def _process_with_main_agent(
|
||||
self,
|
||||
message: str,
|
||||
@ -591,7 +620,7 @@ class AgentService:
|
||||
else active_task
|
||||
)
|
||||
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
|
||||
task = self._record_revision_feedback_for_task(
|
||||
task = self._record_revision_acceptance_for_task(
|
||||
loaded,
|
||||
task=task,
|
||||
session_id=session_id,
|
||||
@ -599,7 +628,7 @@ class AgentService:
|
||||
)
|
||||
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
|
||||
|
||||
def _record_revision_feedback_for_task(
|
||||
def _record_revision_acceptance_for_task(
|
||||
self,
|
||||
loaded: Any,
|
||||
*,
|
||||
@ -607,9 +636,9 @@ class AgentService:
|
||||
session_id: str,
|
||||
comment: str,
|
||||
) -> TaskRecord:
|
||||
"""Mark the latest feedback-eligible run as revised before continuing a task."""
|
||||
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
|
||||
|
||||
if task.status not in {"awaiting_feedback", "needs_revision"}:
|
||||
if task.status not in {"awaiting_acceptance", "needs_revision"}:
|
||||
return task
|
||||
run_id = next((item for item in reversed(task.run_ids) if item), None)
|
||||
if not run_id:
|
||||
@ -617,15 +646,15 @@ class AgentService:
|
||||
|
||||
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
|
||||
if existing is not None:
|
||||
if existing.get("feedback_type") != "revise":
|
||||
if existing.get("acceptance_type") != "revise":
|
||||
return task
|
||||
updated = task
|
||||
already_recorded = True
|
||||
else:
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
updated = task_service.add_feedback(
|
||||
updated = task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type="revise",
|
||||
acceptance_type="revise",
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -638,6 +667,7 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"acceptance_state": "revise",
|
||||
"feedback_state": "revise",
|
||||
},
|
||||
)
|
||||
@ -648,9 +678,10 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -659,12 +690,12 @@ class AgentService:
|
||||
content=comment,
|
||||
context_visible=False,
|
||||
)
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback={
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -673,7 +704,7 @@ class AgentService:
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback_score=self._feedback_score_for_learning("revise", validation),
|
||||
feedback_score=self._acceptance_score_for_learning("revise"),
|
||||
notes=comment.strip() or "revise",
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
@ -690,236 +721,185 @@ class AgentService:
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
validation_service = self._require_loaded(loaded, "validation_service")
|
||||
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
|
||||
last_result: AgentRunResult | None = None
|
||||
latest_validation: ValidationResult | None = None
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
for attempt_index in (1, 2):
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
provider_bundle=provider_bundle,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
)
|
||||
if attempt_index == 2 and latest_validation is not None:
|
||||
revision_context = latest_validation.recommended_revision_prompt.strip()
|
||||
if revision_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
f"Task validation revision request:\n{revision_context}",
|
||||
team_execution_context,
|
||||
)
|
||||
elif team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
last_result = result
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
validation = await validation_service.validate_task_result(
|
||||
task=task,
|
||||
user_message=message,
|
||||
final_output=result.output_text,
|
||||
evidence_packet=evidence_packet,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
|
||||
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
|
||||
team_summaries=team_summaries,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
latest_validation = validation
|
||||
has_usable_answer = bool(result.output_text.strip()) and (
|
||||
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
|
||||
)
|
||||
task = task_service.record_validation(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
validation,
|
||||
final_attempt=(
|
||||
attempt_index == 2
|
||||
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
|
||||
),
|
||||
has_usable_answer=has_usable_answer,
|
||||
)
|
||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"validation_status": "passed" if validation.accepted else "failed",
|
||||
},
|
||||
)
|
||||
validation_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
retry_scheduled = validation.status == "rejected" and attempt_index == 1
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_validation_snapshotted",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"validation_result": validation.to_dict(),
|
||||
"validation_debug": validation_debug,
|
||||
"retry_scheduled": retry_scheduled,
|
||||
},
|
||||
content=validation.recommended_revision_prompt or None,
|
||||
context_visible=False,
|
||||
)
|
||||
if retry_scheduled:
|
||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = validation.to_dict()
|
||||
if not retry_scheduled:
|
||||
return result
|
||||
)
|
||||
if team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
if last_result is None: # pragma: no cover - defensive
|
||||
raise RuntimeError("Task mode did not produce a run result")
|
||||
return last_result
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"evidence_debug": evidence_debug,
|
||||
},
|
||||
content=None,
|
||||
context_visible=False,
|
||||
)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
@ -986,12 +966,10 @@ class AgentService:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
|
||||
if feedback_type == "satisfied":
|
||||
if validation is not None:
|
||||
return max(0.0, min(1.0, float(validation.score)))
|
||||
def _acceptance_score_for_learning(acceptance_type: str) -> float:
|
||||
if acceptance_type == "accept":
|
||||
return 1.0
|
||||
if feedback_type == "revise":
|
||||
if acceptance_type == "revise":
|
||||
return 0.5
|
||||
return 0.0
|
||||
|
||||
@ -1001,12 +979,11 @@ class AgentService:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if latest_validation is not None:
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
phase = f"revision_attempt_{attempt_index}"
|
||||
elif plan is not None and plan.is_team:
|
||||
phase = f"team_synthesis_attempt_{attempt_index}"
|
||||
@ -1027,24 +1004,14 @@ class AgentService:
|
||||
)
|
||||
else:
|
||||
sections.append("Previously activated skills:\nNone")
|
||||
if latest_validation is not None:
|
||||
validation_lines = [
|
||||
f"accepted: {latest_validation.accepted}",
|
||||
f"score: {latest_validation.score}",
|
||||
]
|
||||
if latest_validation.issues:
|
||||
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
|
||||
if latest_validation.missing_requirements:
|
||||
validation_lines.append(
|
||||
"missing requirements:\n"
|
||||
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
|
||||
)
|
||||
if latest_validation.recommended_revision_prompt:
|
||||
validation_lines.append(
|
||||
"recommended revision:\n"
|
||||
+ latest_validation.recommended_revision_prompt
|
||||
)
|
||||
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
|
||||
if task.feedback:
|
||||
history_lines = []
|
||||
for item in task.feedback[-5:]:
|
||||
kind = item.get("acceptance_type") or item.get("feedback_type")
|
||||
comment = item.get("comment") or ""
|
||||
run_id = item.get("run_id") or ""
|
||||
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
|
||||
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
|
||||
if plan is not None:
|
||||
plan_lines = [
|
||||
f"mode: {plan.mode}",
|
||||
@ -1313,7 +1280,8 @@ class AgentService:
|
||||
"inbound_metadata": dict(inbound.metadata),
|
||||
"task_id": getattr(result, "task_id", None),
|
||||
"task_status": getattr(result, "task_status", None),
|
||||
"validation_result": getattr(result, "validation_result", None),
|
||||
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
|
||||
"validation_result": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -235,26 +235,45 @@ class SessionProcessProjector:
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_validation_snapshotted":
|
||||
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
|
||||
accepted = bool(validation.get("accepted"))
|
||||
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
|
||||
root["finished_at"] = created_at if root["status"] == "done" else None
|
||||
elif record.event_type == "task_evidence_recorded":
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "validation"),
|
||||
event_id=_event_id(record, "evidence"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="system",
|
||||
actor_id="validator",
|
||||
actor_name="Validator",
|
||||
text=(
|
||||
f"Validation {'passed' if accepted else 'failed'} "
|
||||
f"(score={validation.get('score')})."
|
||||
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
|
||||
),
|
||||
actor_id="evidence-recorder",
|
||||
actor_name="Evidence",
|
||||
text="Task evidence was recorded; waiting for user acceptance.",
|
||||
created_at=created_at,
|
||||
status="done" if accepted else "error",
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_acceptance_recorded":
|
||||
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
|
||||
if acceptance_type == "accept":
|
||||
root["status"] = "done"
|
||||
root["finished_at"] = created_at
|
||||
elif acceptance_type == "abandon":
|
||||
root["status"] = "cancelled"
|
||||
root["finished_at"] = created_at
|
||||
else:
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "acceptance"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="user",
|
||||
actor_id="user-acceptance",
|
||||
actor_name="User Acceptance",
|
||||
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
|
||||
created_at=created_at,
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user