feat(engine): 添加运行时上下文支持并重构工具迭代限制
添加 RuntimeContext 类用于捕获模型运行时的日期时间信息, 包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。 同时增加最大上下文消息数和工具迭代次数的配置选项, 将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
This commit is contained in:
@ -4,6 +4,7 @@ from .builder import (
|
||||
ContextBuildInput,
|
||||
ContextBuildResult,
|
||||
ContextBuilder,
|
||||
RuntimeContext,
|
||||
SessionContext,
|
||||
SkillContext,
|
||||
)
|
||||
@ -12,6 +13,7 @@ __all__ = [
|
||||
"ContextBuildInput",
|
||||
"ContextBuildResult",
|
||||
"ContextBuilder",
|
||||
"RuntimeContext",
|
||||
"SessionContext",
|
||||
"SkillContext",
|
||||
]
|
||||
|
||||
@ -80,6 +80,16 @@ class SessionContext:
|
||||
parent_session_id: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RuntimeContext:
|
||||
"""Per-run runtime facts that should be visible to the model."""
|
||||
|
||||
utc_datetime: str
|
||||
local_datetime: str
|
||||
timezone: str | None = None
|
||||
utc_offset: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ContextBuildInput:
|
||||
"""一次上下文构建所需的全部输入。
|
||||
@ -103,6 +113,7 @@ class ContextBuildInput:
|
||||
memory_snapshot: MemorySnapshot | None = None
|
||||
activated_skills: list[SkillContext] = field(default_factory=list)
|
||||
session_context: SessionContext | None = None
|
||||
runtime_context: RuntimeContext | None = None
|
||||
execution_context: str | None = None
|
||||
extra_sections: list[str] = field(default_factory=list)
|
||||
|
||||
@ -143,9 +154,10 @@ class ContextBuilder:
|
||||
1. Beaver user-facing assistant identity
|
||||
2. base system prompt
|
||||
3. session metadata
|
||||
4. execution context
|
||||
5. frozen memory snapshot
|
||||
6. extra sections
|
||||
4. runtime date/time
|
||||
5. execution context
|
||||
6. frozen memory snapshot
|
||||
7. extra sections
|
||||
|
||||
这样设计的原因:
|
||||
- 身份与总规则要最靠前
|
||||
@ -164,6 +176,10 @@ class ContextBuilder:
|
||||
if session_section:
|
||||
sections.append(session_section)
|
||||
|
||||
runtime_section = self._render_runtime_section(build_input.runtime_context)
|
||||
if runtime_section:
|
||||
sections.append(runtime_section)
|
||||
|
||||
execution_context = (build_input.execution_context or "").strip()
|
||||
if execution_context:
|
||||
sections.append(f"# Execution Context\n\n{execution_context}")
|
||||
@ -347,6 +363,31 @@ class ContextBuilder:
|
||||
return None
|
||||
return "# Current Session\n\n" + "\n".join(rows)
|
||||
|
||||
def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
|
||||
"""Render date/time facts captured for the current model run."""
|
||||
|
||||
if runtime_context is None:
|
||||
return None
|
||||
|
||||
rows: list[str] = []
|
||||
if runtime_context.utc_datetime:
|
||||
rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
|
||||
if runtime_context.local_datetime:
|
||||
rows.append(f"Current local time: {runtime_context.local_datetime}")
|
||||
if runtime_context.timezone:
|
||||
rows.append(f"Local timezone: {runtime_context.timezone}")
|
||||
if runtime_context.utc_offset:
|
||||
rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
return (
|
||||
"# Current Date and Time\n\n"
|
||||
+ "\n".join(rows)
|
||||
+ "\n\nUse this section as authoritative for relative date/time references such as "
|
||||
'"today", "tomorrow", "now", "this week", and "next month".'
|
||||
)
|
||||
|
||||
def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
|
||||
"""把已激活 skill 转成显式消息。
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskService
|
||||
from beaver.tasks.skill_resolver import TaskSkillResolver
|
||||
from beaver.skills import SkillAssembler, SkillsLoader
|
||||
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
|
||||
@ -91,7 +91,6 @@ class EngineLoadResult:
|
||||
task_skill_resolver: TaskSkillResolver | None = None
|
||||
task_service: TaskService | None = None
|
||||
task_execution_planner: TaskExecutionPlanner | None = None
|
||||
validation_service: ValidationService | None = None
|
||||
mcp_manager: MCPConnectionManager | None = None
|
||||
mcp_report: dict[str, dict] = field(default_factory=dict)
|
||||
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
|
||||
@ -166,7 +165,6 @@ class EngineLoader:
|
||||
task_skill_resolver: TaskSkillResolver | None = None,
|
||||
task_service: TaskService | None = None,
|
||||
task_execution_planner: TaskExecutionPlanner | None = None,
|
||||
validation_service: ValidationService | None = None,
|
||||
) -> None:
|
||||
self.config = config or load_config(workspace=workspace, config_path=config_path)
|
||||
configured_workspace = self.config.agents_defaults.workspace
|
||||
@ -192,7 +190,6 @@ class EngineLoader:
|
||||
self._task_skill_resolver = task_skill_resolver
|
||||
self._task_service = task_service
|
||||
self._task_execution_planner = task_execution_planner
|
||||
self._validation_service = validation_service
|
||||
|
||||
def load(self) -> EngineLoadResult:
|
||||
"""装配当前主链需要的最小 runtime 对象。"""
|
||||
@ -276,7 +273,6 @@ class EngineLoader:
|
||||
)
|
||||
task_service = self._task_service or TaskService(workspace / "tasks")
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
|
||||
validation_service = self._validation_service or ValidationService()
|
||||
mcp_manager = MCPConnectionManager(
|
||||
self.config.tools.mcp_servers,
|
||||
authz_config=self.config.authz,
|
||||
@ -311,7 +307,6 @@ class EngineLoader:
|
||||
task_skill_resolver=task_skill_resolver,
|
||||
task_service=task_service,
|
||||
task_execution_planner=task_execution_planner,
|
||||
validation_service=validation_service,
|
||||
mcp_manager=mcp_manager,
|
||||
)
|
||||
if self._session_manager is None:
|
||||
|
||||
@ -4,12 +4,15 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
|
||||
from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
|
||||
from beaver.memory.runs import RunRecord, SkillEffectRecord
|
||||
from beaver.skills.learning import RunReceiptContext
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
|
||||
"Use available materials, state uncertainty clearly, and provide partial confirmed results."
|
||||
)
|
||||
|
||||
RAW_TOOL_CALL_FALLBACK = (
|
||||
"The run reached the configured tool-call limit before producing a reliable final answer. "
|
||||
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
|
||||
"Please request a revision to continue the task."
|
||||
)
|
||||
|
||||
_RAW_TOOL_CALL_RE = re.compile(
|
||||
r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AgentProfile:
|
||||
@ -35,8 +49,9 @@ class AgentProfile:
|
||||
system_prompt: str = ""
|
||||
default_model: str = "gpt-4.1-mini"
|
||||
max_tokens: int = 4096
|
||||
max_context_messages: int = 1000
|
||||
temperature: float = 0.2
|
||||
max_tool_iterations: int = 8
|
||||
max_tool_iterations: int = 30
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -446,7 +461,7 @@ class AgentLoop:
|
||||
*(pinned_skill_contexts or []),
|
||||
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
|
||||
]
|
||||
if not include_skill_assembly or thinking_enabled is False:
|
||||
if not include_skill_assembly:
|
||||
activated_skills = self._merge_skill_contexts(pinned_skills, [])
|
||||
else:
|
||||
skill_query = skill_selection_context or task
|
||||
@ -512,8 +527,6 @@ class AgentLoop:
|
||||
|
||||
if not include_tools:
|
||||
selected_tool_specs = []
|
||||
elif thinking_enabled is False:
|
||||
selected_tool_specs = tool_registry.list_specs()
|
||||
else:
|
||||
selected_tool_specs = await tool_assembler.assemble(
|
||||
task_description=task,
|
||||
@ -543,7 +556,10 @@ class AgentLoop:
|
||||
|
||||
build_input = ContextBuildInput(
|
||||
base_system_prompt=self.profile.system_prompt,
|
||||
history=session_manager.get_history(resolved_session_id),
|
||||
history=session_manager.get_history(
|
||||
resolved_session_id,
|
||||
max_messages=max(1, self.profile.max_context_messages),
|
||||
),
|
||||
current_user_input=task,
|
||||
memory_snapshot=memory_snapshot,
|
||||
activated_skills=activated_skills,
|
||||
@ -554,6 +570,7 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
parent_session_id=parent_session_id,
|
||||
),
|
||||
runtime_context=self._current_runtime_context(),
|
||||
execution_context=execution_context,
|
||||
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
|
||||
)
|
||||
@ -693,6 +710,7 @@ class AgentLoop:
|
||||
tool_calls=assistant_tool_calls or None,
|
||||
finish_reason=response.finish_reason,
|
||||
reasoning=response.reasoning_content,
|
||||
context_visible=not bool(assistant_tool_calls),
|
||||
source=source,
|
||||
title=title,
|
||||
model=final_model,
|
||||
@ -707,7 +725,11 @@ class AgentLoop:
|
||||
|
||||
if not response.has_tool_calls:
|
||||
final_text = response.content or ""
|
||||
final_finish_reason = response.finish_reason or "stop"
|
||||
if self._looks_like_raw_tool_call(final_text):
|
||||
final_text = RAW_TOOL_CALL_FALLBACK
|
||||
final_finish_reason = "invalid_tool_call_text"
|
||||
else:
|
||||
final_finish_reason = response.finish_reason or "stop"
|
||||
break
|
||||
|
||||
if iterations >= resolved_max_tool_iterations:
|
||||
@ -719,10 +741,7 @@ class AgentLoop:
|
||||
temperature=resolved_temperature,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
final_text = finalized or (
|
||||
"Tool loop stopped after reaching the configured iteration limit, "
|
||||
"and no final answer was produced."
|
||||
)
|
||||
final_text = finalized or RAW_TOOL_CALL_FALLBACK
|
||||
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
|
||||
session_manager.append_message(
|
||||
resolved_session_id,
|
||||
@ -877,17 +896,14 @@ class AgentLoop:
|
||||
temperature: float,
|
||||
thinking_enabled: bool | None,
|
||||
) -> str:
|
||||
final_messages = [
|
||||
*messages,
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"The configured tool iteration budget is exhausted. Do not call tools. "
|
||||
"Produce the best final answer from the existing conversation and tool results. "
|
||||
"State uncertainty explicitly."
|
||||
),
|
||||
},
|
||||
]
|
||||
final_messages = AgentLoop._with_system_guidance(
|
||||
messages,
|
||||
(
|
||||
"The configured tool iteration budget is exhausted. Do not call tools. "
|
||||
"Produce the best final answer from the existing conversation and tool results. "
|
||||
"State uncertainty explicitly."
|
||||
),
|
||||
)
|
||||
kwargs: dict[str, Any] = {
|
||||
"messages": final_messages,
|
||||
"tools": None,
|
||||
@ -898,7 +914,27 @@ class AgentLoop:
|
||||
if thinking_enabled is not None:
|
||||
kwargs["thinking_enabled"] = thinking_enabled
|
||||
response = await provider.chat(**kwargs)
|
||||
return (response.content or "").strip()
|
||||
if response.has_tool_calls:
|
||||
return ""
|
||||
content = (response.content or "").strip()
|
||||
if AgentLoop._looks_like_raw_tool_call(content):
|
||||
return ""
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_raw_tool_call(content: str | None) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
return bool(_RAW_TOOL_CALL_RE.match(content))
|
||||
|
||||
@staticmethod
|
||||
def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
|
||||
copied = [dict(message) for message in messages]
|
||||
if copied and copied[0].get("role") == "system":
|
||||
existing = str(copied[0].get("content") or "").strip()
|
||||
copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
|
||||
return copied
|
||||
return [{"role": "system", "content": guidance.strip()}, *copied]
|
||||
|
||||
@staticmethod
|
||||
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
|
||||
@ -1133,3 +1169,49 @@ class AgentLoop:
|
||||
@staticmethod
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
@staticmethod
|
||||
def _current_runtime_context() -> RuntimeContext:
|
||||
utc_now = datetime.now(timezone.utc)
|
||||
timezone_name = AgentLoop._configured_timezone_name()
|
||||
local_now = datetime.now().astimezone()
|
||||
rendered_timezone = local_now.tzname()
|
||||
|
||||
if timezone_name:
|
||||
try:
|
||||
local_now = utc_now.astimezone(ZoneInfo(timezone_name))
|
||||
rendered_timezone = timezone_name
|
||||
except ZoneInfoNotFoundError:
|
||||
rendered_timezone = local_now.tzname() or timezone_name
|
||||
|
||||
return RuntimeContext(
|
||||
utc_datetime=utc_now.isoformat(),
|
||||
local_datetime=local_now.isoformat(),
|
||||
timezone=rendered_timezone,
|
||||
utc_offset=AgentLoop._format_utc_offset(local_now),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configured_timezone_name() -> str | None:
|
||||
for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
|
||||
cleaned = (value or "").strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
|
||||
try:
|
||||
timezone_file = "/etc/timezone"
|
||||
if os.path.exists(timezone_file):
|
||||
with open(timezone_file, encoding="utf-8") as file:
|
||||
cleaned = file.read().strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
except OSError:
|
||||
return None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _format_utc_offset(value: datetime) -> str | None:
|
||||
raw = value.strftime("%z")
|
||||
if not raw:
|
||||
return None
|
||||
return f"{raw[:3]}:{raw[3:]}"
|
||||
|
||||
@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
|
||||
@staticmethod
|
||||
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
sanitized = []
|
||||
system_contents: list[str] = []
|
||||
for message in messages:
|
||||
clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
|
||||
if clean.get("role") == "system":
|
||||
content = clean.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
system_contents.append(content.strip())
|
||||
elif content is not None:
|
||||
system_contents.append(str(content))
|
||||
continue
|
||||
if clean.get("role") == "assistant" and "content" not in clean:
|
||||
clean["content"] = None
|
||||
if isinstance(clean.get("tool_calls"), list):
|
||||
clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
|
||||
sanitized.append(clean)
|
||||
if system_contents:
|
||||
sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
|
||||
return sanitized
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -84,8 +84,10 @@ class MessageRecord:
|
||||
payload["task_id"] = self.event_payload.get("task_id")
|
||||
if self.event_payload.get("task_status"):
|
||||
payload["task_status"] = self.event_payload.get("task_status")
|
||||
if self.event_payload.get("validation_status"):
|
||||
payload["validation_status"] = self.event_payload.get("validation_status")
|
||||
if self.event_payload.get("evidence_status"):
|
||||
payload["evidence_status"] = self.event_payload.get("evidence_status")
|
||||
if self.event_payload.get("acceptance_state"):
|
||||
payload["acceptance_state"] = self.event_payload.get("acceptance_state")
|
||||
if self.event_payload.get("feedback_state"):
|
||||
payload["feedback_state"] = self.event_payload.get("feedback_state")
|
||||
if self.event_payload.get("feedback_error"):
|
||||
|
||||
@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
|
||||
model=_string(defaults.get("model") or data.get("model")),
|
||||
provider=_string(defaults.get("provider") or data.get("provider")),
|
||||
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
|
||||
max_context_messages=_int(
|
||||
defaults.get("maxContextMessages")
|
||||
or defaults.get("max_context_messages")
|
||||
or data.get("maxContextMessages")
|
||||
or data.get("max_context_messages")
|
||||
),
|
||||
max_tool_iterations=_int(
|
||||
defaults.get("maxToolIterations")
|
||||
or defaults.get("max_tool_iterations")
|
||||
or data.get("maxToolIterations")
|
||||
or data.get("max_tool_iterations")
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
|
||||
return float(value)
|
||||
|
||||
|
||||
def _int(value: Any) -> int | None:
|
||||
parsed = _float(value)
|
||||
if parsed is None:
|
||||
return None
|
||||
return int(parsed)
|
||||
|
||||
|
||||
def _bool(value: Any, *, default: bool) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
|
||||
@ -25,6 +25,8 @@ class AgentDefaultsConfig:
|
||||
model: str | None = None
|
||||
provider: str | None = None
|
||||
embedding_model: str | None = None
|
||||
max_context_messages: int | None = None
|
||||
max_tool_iterations: int | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -44,6 +44,8 @@ from .files import (
|
||||
workspace_file_path,
|
||||
)
|
||||
from .schemas import (
|
||||
WebChatAcceptanceRequest,
|
||||
WebChatAcceptanceResponse,
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
@ -155,6 +157,13 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
|
||||
return decorator
|
||||
|
||||
|
||||
RAW_TOOL_CALL_DISPLAY_FALLBACK = (
|
||||
"The run reached the configured tool-call limit before producing a reliable final answer. "
|
||||
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
|
||||
"Please request a revision to continue the task."
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _app_lifespan(
|
||||
app: FastAPI,
|
||||
@ -365,6 +374,7 @@ def create_app(
|
||||
"workspace_exists": loaded.workspace.exists(),
|
||||
"model": config.default_model or agent_service.profile.default_model,
|
||||
"max_tokens": agent_service.profile.max_tokens,
|
||||
"max_context_messages": agent_service.profile.max_context_messages,
|
||||
"temperature": agent_service.profile.temperature,
|
||||
"max_tool_iterations": agent_service.profile.max_tool_iterations,
|
||||
"providers": providers_status,
|
||||
@ -1719,7 +1729,8 @@ def create_app(
|
||||
usage=result.usage,
|
||||
task_id=result.task_id,
|
||||
task_status=result.task_status,
|
||||
validation_result=result.validation_result,
|
||||
evidence_status="recorded" if result.task_id else None,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
fallback_target = _model_dump(payload.fallback_target)
|
||||
@ -1769,7 +1780,8 @@ def create_app(
|
||||
usage=result.usage,
|
||||
task_id=result.task_id,
|
||||
task_status=result.task_status,
|
||||
validation_result=result.validation_result,
|
||||
evidence_status="recorded" if result.task_id else None,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
@app.websocket("/ws/{session_id:path}")
|
||||
@ -1882,6 +1894,30 @@ def create_app(
|
||||
}
|
||||
)
|
||||
|
||||
@app.post(
|
||||
"/api/chat/acceptance",
|
||||
response_model=WebChatAcceptanceResponse,
|
||||
responses={
|
||||
400: {"model": WebErrorResponse},
|
||||
404: {"model": WebErrorResponse},
|
||||
},
|
||||
)
|
||||
async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
|
||||
agent_service = get_agent_service(request)
|
||||
try:
|
||||
result = await agent_service.submit_acceptance(
|
||||
session_id=payload.session_id,
|
||||
run_id=payload.run_id,
|
||||
acceptance_type=payload.acceptance_type,
|
||||
comment=payload.comment,
|
||||
)
|
||||
except ValueError as exc:
|
||||
detail = str(exc)
|
||||
status_code = 404 if "No internal task" in detail else 400
|
||||
raise HTTPException(status_code=status_code, detail=detail) from exc
|
||||
|
||||
return WebChatAcceptanceResponse(**result)
|
||||
|
||||
@app.post(
|
||||
"/api/chat/feedback",
|
||||
response_model=WebChatFeedbackResponse,
|
||||
@ -1893,10 +1929,10 @@ def create_app(
|
||||
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
|
||||
agent_service = get_agent_service(request)
|
||||
try:
|
||||
result = await agent_service.submit_feedback(
|
||||
result = await agent_service.submit_acceptance(
|
||||
session_id=payload.session_id,
|
||||
run_id=payload.run_id,
|
||||
feedback_type=payload.feedback_type,
|
||||
acceptance_type=payload.feedback_type,
|
||||
comment=payload.comment,
|
||||
)
|
||||
except ValueError as exc:
|
||||
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
|
||||
role = event.get("role")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
content = event.get("content") or ""
|
||||
comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
|
||||
if role == "assistant" and not comparable_content.strip():
|
||||
continue
|
||||
content = _sanitize_user_visible_assistant_content(role=role, content=content)
|
||||
messages.append(
|
||||
{
|
||||
"role": role,
|
||||
"content": event.get("content") or "",
|
||||
"content": content,
|
||||
"timestamp": _iso_from_timestamp(event.get("timestamp")),
|
||||
"run_id": event.get("run_id"),
|
||||
"task_id": event.get("task_id"),
|
||||
"task_status": event.get("task_status"),
|
||||
"validation_status": event.get("validation_status"),
|
||||
"evidence_status": event.get("evidence_status"),
|
||||
"acceptance_state": event.get("acceptance_state"),
|
||||
"feedback_state": event.get("feedback_state"),
|
||||
"feedback_error": event.get("feedback_error"),
|
||||
"message_type": event.get("message_type"),
|
||||
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
content = (record.content or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
|
||||
messages.append(
|
||||
{
|
||||
"role": record.role,
|
||||
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
"tool_name": record.tool_name,
|
||||
}
|
||||
)
|
||||
validation = run_record.validation_result if run_record is not None else None
|
||||
views.append(
|
||||
{
|
||||
"run_id": run_id,
|
||||
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
"attempt_index": run_record.attempt_index if run_record is not None else None,
|
||||
"task_text": run_record.task_text if run_record is not None else "",
|
||||
"messages": messages,
|
||||
"validation_result": validation,
|
||||
"evidence_status": "recorded",
|
||||
"validation_result": None,
|
||||
}
|
||||
)
|
||||
return views
|
||||
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _validation_status(validation_result: dict[str, Any] | None) -> str:
|
||||
if validation_result is None:
|
||||
return "unknown"
|
||||
return "passed" if validation_result.get("accepted") is True else "failed"
|
||||
|
||||
|
||||
def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
result: dict[str, Any] = dict(metadata)
|
||||
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:
|
||||
|
||||
|
||||
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
|
||||
validation_result = getattr(result, "validation_result", None)
|
||||
task_id = getattr(result, "task_id", None)
|
||||
task_status = getattr(result, "task_status", None)
|
||||
return {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": getattr(result, "output_text", "") or "",
|
||||
"content": _sanitize_user_visible_assistant_content(
|
||||
role="assistant",
|
||||
content=getattr(result, "output_text", "") or "",
|
||||
),
|
||||
"session_id": getattr(result, "session_id", None),
|
||||
"run_id": getattr(result, "run_id", None),
|
||||
"finish_reason": getattr(result, "finish_reason", None),
|
||||
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
|
||||
"usage": dict(getattr(result, "usage", {}) or {}),
|
||||
"task_id": task_id,
|
||||
"task_status": task_status,
|
||||
"validation_result": validation_result,
|
||||
"validation_status": _validation_status(validation_result),
|
||||
"evidence_status": "recorded" if task_id else None,
|
||||
"validation_result": None,
|
||||
"metadata": {
|
||||
"task_id": task_id,
|
||||
"task_status": task_status,
|
||||
"validation_result": validation_result,
|
||||
"evidence_status": "recorded" if task_id else None,
|
||||
"input_metadata": _websocket_input_metadata(input_payload),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
|
||||
if role != "assistant":
|
||||
return content
|
||||
if _looks_like_raw_tool_call(content):
|
||||
return RAW_TOOL_CALL_DISPLAY_FALLBACK
|
||||
return content
|
||||
|
||||
|
||||
def _looks_like_raw_tool_call(content: str | None) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
stripped = content.strip()
|
||||
lowered = stripped.lower()
|
||||
return (
|
||||
lowered.startswith("<tool_call")
|
||||
and lowered.endswith("</tool_call>")
|
||||
) or (
|
||||
lowered.startswith("<function=")
|
||||
and lowered.endswith("</function>")
|
||||
)
|
||||
|
||||
|
||||
def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
|
||||
if provider_cfg is None or provider_name == "custom":
|
||||
return False
|
||||
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
|
||||
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
|
||||
config = load_config(config_path=config_path)
|
||||
agent_service.loader.config = config
|
||||
agent_service._apply_configured_profile_defaults() # noqa: SLF001
|
||||
loop = getattr(agent_service, "_loop", None)
|
||||
loaded = getattr(loop, "loaded", None) if loop is not None else None
|
||||
if loaded is not None:
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
"""Web request and response schemas."""
|
||||
|
||||
from .chat import (
|
||||
WebChatAcceptanceRequest,
|
||||
WebChatAcceptanceResponse,
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
@ -13,6 +15,8 @@ from .chat import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"WebChatAcceptanceRequest",
|
||||
"WebChatAcceptanceResponse",
|
||||
"WebChatFeedbackRequest",
|
||||
"WebChatFeedbackResponse",
|
||||
"WebChatRequest",
|
||||
|
||||
@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
|
||||
usage: dict[str, Any] = Field(default_factory=dict)
|
||||
task_id: str | None = None
|
||||
task_status: str | None = None
|
||||
evidence_status: str | None = None
|
||||
acceptance_state: str | None = None
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WebChatAcceptanceRequest(BaseModel):
|
||||
"""User acceptance on the latest assistant result in chat."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
acceptance_type: str
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class WebChatAcceptanceResponse(BaseModel):
|
||||
"""Acceptance recording result."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
task_id: str
|
||||
task_status: str
|
||||
acceptance_type: str
|
||||
feedback_type: str
|
||||
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebChatFeedbackRequest(BaseModel):
|
||||
"""Feedback on the latest assistant result in chat."""
|
||||
"""Backward-compatible feedback payload."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class WebChatFeedbackResponse(BaseModel):
|
||||
"""Feedback recording result."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
task_id: str
|
||||
task_status: str
|
||||
feedback_type: str
|
||||
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
|
||||
class WebChatFeedbackResponse(WebChatAcceptanceResponse):
|
||||
"""Backward-compatible feedback response."""
|
||||
|
||||
|
||||
class WebProviderConfigRequest(BaseModel):
|
||||
|
||||
@ -29,9 +29,9 @@ from beaver.tasks import (
|
||||
TaskEvidencePacket,
|
||||
TaskExecutionPlan,
|
||||
TaskRecord,
|
||||
ValidationResult,
|
||||
render_task_evidence,
|
||||
)
|
||||
from beaver.tasks.service import normalize_acceptance_type
|
||||
|
||||
|
||||
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
|
||||
@ -60,11 +60,19 @@ class AgentService:
|
||||
) -> None:
|
||||
self.profile = profile or AgentProfile()
|
||||
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
|
||||
self._apply_configured_profile_defaults()
|
||||
self._loop: AgentLoop | None = None
|
||||
self._run_task: asyncio.Task[None] | None = None
|
||||
self._main_agent_router = MainAgentRouter()
|
||||
self._runtime_services: dict[str, Any] = {}
|
||||
|
||||
def _apply_configured_profile_defaults(self) -> None:
|
||||
defaults = self.loader.config.agents_defaults
|
||||
if defaults.max_context_messages is not None:
|
||||
self.profile.max_context_messages = max(1, defaults.max_context_messages)
|
||||
if defaults.max_tool_iterations is not None:
|
||||
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
|
||||
|
||||
def create_loop(self) -> AgentLoop:
|
||||
"""创建并缓存当前 service 使用的 AgentLoop。"""
|
||||
|
||||
@ -232,7 +240,7 @@ class AgentService:
|
||||
|
||||
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
|
||||
This entry bypasses the main-agent classifier and forces Task mode so
|
||||
every trigger produces a TaskRecord, validation, feedback state, and a
|
||||
every trigger produces a TaskRecord, evidence, acceptance state, and a
|
||||
run_id that the scheduled-task history can link to.
|
||||
"""
|
||||
|
||||
@ -280,9 +288,9 @@ class AgentService:
|
||||
result.run_id,
|
||||
{
|
||||
"message_type": "scheduled_reply",
|
||||
"scheduled_job_id": job.id,
|
||||
"scheduled_run_id": run.scheduled_run_id,
|
||||
"cron_job_name": job.name,
|
||||
"scheduled_job_id": cron_job_id,
|
||||
"scheduled_run_id": scheduled_run_id,
|
||||
"cron_job_name": cron_job_name,
|
||||
"mode": "notification",
|
||||
},
|
||||
)
|
||||
@ -403,15 +411,15 @@ class AgentService:
|
||||
},
|
||||
)
|
||||
|
||||
async def submit_feedback(
|
||||
async def submit_acceptance(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Record chat feedback for the internal task linked to a run."""
|
||||
"""Record user acceptance for the internal task linked to a run."""
|
||||
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
@ -419,32 +427,31 @@ class AgentService:
|
||||
if task is None or task.session_id != session_id:
|
||||
raise ValueError(f"No internal task found for run_id={run_id!r}")
|
||||
|
||||
normalized = feedback_type.strip().lower()
|
||||
if normalized not in {"satisfied", "revise", "abandon"}:
|
||||
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
|
||||
|
||||
already_recorded = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == normalized
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not already_recorded:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
updated = task if already_recorded else task_service.add_feedback(
|
||||
updated = task if already_recorded else task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type=normalized,
|
||||
acceptance_type=normalized,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -455,7 +462,8 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_state": normalized,
|
||||
"acceptance_state": normalized,
|
||||
"feedback_state": legacy_feedback_type,
|
||||
},
|
||||
)
|
||||
if not already_recorded:
|
||||
@ -463,10 +471,11 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
},
|
||||
@ -475,35 +484,36 @@ class AgentService:
|
||||
)
|
||||
|
||||
generated_candidates = []
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
if not already_recorded:
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
feedback_payload = {
|
||||
"feedback_type": normalized,
|
||||
acceptance_payload = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
|
||||
}
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback=feedback_payload,
|
||||
success=normalized == "accept",
|
||||
feedback=acceptance_payload,
|
||||
)
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback_score=self._feedback_score_for_learning(normalized, validation),
|
||||
success=normalized == "accept",
|
||||
feedback_score=self._acceptance_score_for_learning(normalized),
|
||||
notes=(comment or normalized).strip(),
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
skill_learning_service.rescore_skill_versions()
|
||||
if already_recorded:
|
||||
generated_candidates = []
|
||||
elif normalized == "satisfied" and validation is not None and validation.accepted:
|
||||
elif normalized == "accept":
|
||||
generated_candidates = [
|
||||
item.to_dict()
|
||||
for item in skill_learning_service.build_learning_candidates_for_task(
|
||||
updated.task_id,
|
||||
trigger_run_id=run_id,
|
||||
final_accepted_run_id=run_id,
|
||||
)
|
||||
]
|
||||
elif normalized == "abandon":
|
||||
@ -514,7 +524,8 @@ class AgentService:
|
||||
event_type="task_failure_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"durable_memory_written": False,
|
||||
@ -528,10 +539,28 @@ class AgentService:
|
||||
"run_id": run_id,
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"learning_candidates": generated_candidates,
|
||||
}
|
||||
|
||||
async def submit_feedback(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Backward-compatible wrapper for older clients."""
|
||||
|
||||
return await self.submit_acceptance(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
)
|
||||
|
||||
async def _process_with_main_agent(
|
||||
self,
|
||||
message: str,
|
||||
@ -591,7 +620,7 @@ class AgentService:
|
||||
else active_task
|
||||
)
|
||||
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
|
||||
task = self._record_revision_feedback_for_task(
|
||||
task = self._record_revision_acceptance_for_task(
|
||||
loaded,
|
||||
task=task,
|
||||
session_id=session_id,
|
||||
@ -599,7 +628,7 @@ class AgentService:
|
||||
)
|
||||
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
|
||||
|
||||
def _record_revision_feedback_for_task(
|
||||
def _record_revision_acceptance_for_task(
|
||||
self,
|
||||
loaded: Any,
|
||||
*,
|
||||
@ -607,9 +636,9 @@ class AgentService:
|
||||
session_id: str,
|
||||
comment: str,
|
||||
) -> TaskRecord:
|
||||
"""Mark the latest feedback-eligible run as revised before continuing a task."""
|
||||
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
|
||||
|
||||
if task.status not in {"awaiting_feedback", "needs_revision"}:
|
||||
if task.status not in {"awaiting_acceptance", "needs_revision"}:
|
||||
return task
|
||||
run_id = next((item for item in reversed(task.run_ids) if item), None)
|
||||
if not run_id:
|
||||
@ -617,15 +646,15 @@ class AgentService:
|
||||
|
||||
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
|
||||
if existing is not None:
|
||||
if existing.get("feedback_type") != "revise":
|
||||
if existing.get("acceptance_type") != "revise":
|
||||
return task
|
||||
updated = task
|
||||
already_recorded = True
|
||||
else:
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
updated = task_service.add_feedback(
|
||||
updated = task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type="revise",
|
||||
acceptance_type="revise",
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -638,6 +667,7 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"acceptance_state": "revise",
|
||||
"feedback_state": "revise",
|
||||
},
|
||||
)
|
||||
@ -648,9 +678,10 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -659,12 +690,12 @@ class AgentService:
|
||||
content=comment,
|
||||
context_visible=False,
|
||||
)
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback={
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -673,7 +704,7 @@ class AgentService:
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback_score=self._feedback_score_for_learning("revise", validation),
|
||||
feedback_score=self._acceptance_score_for_learning("revise"),
|
||||
notes=comment.strip() or "revise",
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
@ -690,236 +721,185 @@ class AgentService:
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
validation_service = self._require_loaded(loaded, "validation_service")
|
||||
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
|
||||
last_result: AgentRunResult | None = None
|
||||
latest_validation: ValidationResult | None = None
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
for attempt_index in (1, 2):
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
provider_bundle=provider_bundle,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
)
|
||||
if attempt_index == 2 and latest_validation is not None:
|
||||
revision_context = latest_validation.recommended_revision_prompt.strip()
|
||||
if revision_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
f"Task validation revision request:\n{revision_context}",
|
||||
team_execution_context,
|
||||
)
|
||||
elif team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
last_result = result
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
validation = await validation_service.validate_task_result(
|
||||
task=task,
|
||||
user_message=message,
|
||||
final_output=result.output_text,
|
||||
evidence_packet=evidence_packet,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
|
||||
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
|
||||
team_summaries=team_summaries,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
latest_validation = validation
|
||||
has_usable_answer = bool(result.output_text.strip()) and (
|
||||
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
|
||||
)
|
||||
task = task_service.record_validation(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
validation,
|
||||
final_attempt=(
|
||||
attempt_index == 2
|
||||
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
|
||||
),
|
||||
has_usable_answer=has_usable_answer,
|
||||
)
|
||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"validation_status": "passed" if validation.accepted else "failed",
|
||||
},
|
||||
)
|
||||
validation_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
retry_scheduled = validation.status == "rejected" and attempt_index == 1
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_validation_snapshotted",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"validation_result": validation.to_dict(),
|
||||
"validation_debug": validation_debug,
|
||||
"retry_scheduled": retry_scheduled,
|
||||
},
|
||||
content=validation.recommended_revision_prompt or None,
|
||||
context_visible=False,
|
||||
)
|
||||
if retry_scheduled:
|
||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = validation.to_dict()
|
||||
if not retry_scheduled:
|
||||
return result
|
||||
)
|
||||
if team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
if last_result is None: # pragma: no cover - defensive
|
||||
raise RuntimeError("Task mode did not produce a run result")
|
||||
return last_result
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"evidence_debug": evidence_debug,
|
||||
},
|
||||
content=None,
|
||||
context_visible=False,
|
||||
)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
@ -986,12 +966,10 @@ class AgentService:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
|
||||
if feedback_type == "satisfied":
|
||||
if validation is not None:
|
||||
return max(0.0, min(1.0, float(validation.score)))
|
||||
def _acceptance_score_for_learning(acceptance_type: str) -> float:
|
||||
if acceptance_type == "accept":
|
||||
return 1.0
|
||||
if feedback_type == "revise":
|
||||
if acceptance_type == "revise":
|
||||
return 0.5
|
||||
return 0.0
|
||||
|
||||
@ -1001,12 +979,11 @@ class AgentService:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if latest_validation is not None:
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
phase = f"revision_attempt_{attempt_index}"
|
||||
elif plan is not None and plan.is_team:
|
||||
phase = f"team_synthesis_attempt_{attempt_index}"
|
||||
@ -1027,24 +1004,14 @@ class AgentService:
|
||||
)
|
||||
else:
|
||||
sections.append("Previously activated skills:\nNone")
|
||||
if latest_validation is not None:
|
||||
validation_lines = [
|
||||
f"accepted: {latest_validation.accepted}",
|
||||
f"score: {latest_validation.score}",
|
||||
]
|
||||
if latest_validation.issues:
|
||||
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
|
||||
if latest_validation.missing_requirements:
|
||||
validation_lines.append(
|
||||
"missing requirements:\n"
|
||||
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
|
||||
)
|
||||
if latest_validation.recommended_revision_prompt:
|
||||
validation_lines.append(
|
||||
"recommended revision:\n"
|
||||
+ latest_validation.recommended_revision_prompt
|
||||
)
|
||||
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
|
||||
if task.feedback:
|
||||
history_lines = []
|
||||
for item in task.feedback[-5:]:
|
||||
kind = item.get("acceptance_type") or item.get("feedback_type")
|
||||
comment = item.get("comment") or ""
|
||||
run_id = item.get("run_id") or ""
|
||||
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
|
||||
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
|
||||
if plan is not None:
|
||||
plan_lines = [
|
||||
f"mode: {plan.mode}",
|
||||
@ -1313,7 +1280,8 @@ class AgentService:
|
||||
"inbound_metadata": dict(inbound.metadata),
|
||||
"task_id": getattr(result, "task_id", None),
|
||||
"task_status": getattr(result, "task_status", None),
|
||||
"validation_result": getattr(result, "validation_result", None),
|
||||
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
|
||||
"validation_result": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -235,26 +235,45 @@ class SessionProcessProjector:
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_validation_snapshotted":
|
||||
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
|
||||
accepted = bool(validation.get("accepted"))
|
||||
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
|
||||
root["finished_at"] = created_at if root["status"] == "done" else None
|
||||
elif record.event_type == "task_evidence_recorded":
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "validation"),
|
||||
event_id=_event_id(record, "evidence"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="system",
|
||||
actor_id="validator",
|
||||
actor_name="Validator",
|
||||
text=(
|
||||
f"Validation {'passed' if accepted else 'failed'} "
|
||||
f"(score={validation.get('score')})."
|
||||
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
|
||||
),
|
||||
actor_id="evidence-recorder",
|
||||
actor_name="Evidence",
|
||||
text="Task evidence was recorded; waiting for user acceptance.",
|
||||
created_at=created_at,
|
||||
status="done" if accepted else "error",
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_acceptance_recorded":
|
||||
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
|
||||
if acceptance_type == "accept":
|
||||
root["status"] = "done"
|
||||
root["finished_at"] = created_at
|
||||
elif acceptance_type == "abandon":
|
||||
root["status"] = "cancelled"
|
||||
root["finished_at"] = created_at
|
||||
else:
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "acceptance"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="user",
|
||||
actor_id="user-acceptance",
|
||||
actor_name="User Acceptance",
|
||||
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
|
||||
created_at=created_at,
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
|
||||
@ -69,15 +69,24 @@ class SkillLearningService:
|
||||
existing_ids.add(candidate.candidate_id)
|
||||
return candidates
|
||||
|
||||
def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
|
||||
"""Build candidates scoped to a single validated and satisfied Task run."""
|
||||
def build_learning_candidates_for_task(
|
||||
self,
|
||||
task_id: str,
|
||||
*,
|
||||
final_accepted_run_id: str | None = None,
|
||||
trigger_run_id: str | None = None,
|
||||
) -> list[SkillLearningCandidate]:
|
||||
"""Build candidates from a user-accepted Task and all of its runs."""
|
||||
|
||||
final_accepted_run_id = final_accepted_run_id or trigger_run_id
|
||||
if not final_accepted_run_id:
|
||||
return []
|
||||
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
|
||||
trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
|
||||
if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
|
||||
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
|
||||
if final_run is None or not self._is_task_accepted_run(final_run):
|
||||
return []
|
||||
|
||||
source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
|
||||
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
|
||||
if not source_runs:
|
||||
return []
|
||||
|
||||
@ -100,11 +109,16 @@ class SkillLearningService:
|
||||
source_session_ids=source_session_ids,
|
||||
related_skill_names=[],
|
||||
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
|
||||
evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
|
||||
evidence={
|
||||
"task_id": task_id,
|
||||
"final_accepted_run_id": final_accepted_run_id,
|
||||
"source_run_ids": source_run_ids,
|
||||
"theme": self._task_theme(final_run.task_text),
|
||||
},
|
||||
status="open",
|
||||
priority=1,
|
||||
confidence=0.8,
|
||||
trigger_reason="validation_accepted_and_user_satisfied",
|
||||
trigger_reason="task_accepted",
|
||||
)
|
||||
)
|
||||
else:
|
||||
@ -137,13 +151,14 @@ class SkillLearningService:
|
||||
),
|
||||
evidence={
|
||||
"task_id": task_id,
|
||||
"trigger_run_id": trigger_run_id,
|
||||
"final_accepted_run_id": final_accepted_run_id,
|
||||
"source_run_ids": source_run_ids,
|
||||
"skill_version": receipt.skill_version,
|
||||
},
|
||||
status="open",
|
||||
priority=1,
|
||||
confidence=0.7,
|
||||
trigger_reason="validation_accepted_and_user_satisfied",
|
||||
trigger_reason="task_accepted",
|
||||
)
|
||||
)
|
||||
|
||||
@ -269,7 +284,7 @@ class SkillLearningService:
|
||||
groups.setdefault(key, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for theme, runs in groups.items():
|
||||
successful = [record for record in runs if self._is_confirmed_positive_run(record)]
|
||||
successful = [record for record in runs if self._is_task_accepted_run(record)]
|
||||
if len(successful) < 2:
|
||||
continue
|
||||
if any(record.activated_skills for record in successful):
|
||||
@ -290,7 +305,7 @@ class SkillLearningService:
|
||||
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
if not self._is_confirmed_positive_run(record):
|
||||
if not self._is_task_accepted_run(record):
|
||||
continue
|
||||
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
||||
for pair in combinations(unique, 2):
|
||||
@ -351,14 +366,15 @@ class SkillLearningService:
|
||||
return effects
|
||||
|
||||
@staticmethod
|
||||
def _is_confirmed_positive_run(record: RunRecord) -> bool:
|
||||
validation = record.validation_result or {}
|
||||
def _is_task_accepted_run(record: RunRecord) -> bool:
|
||||
feedback = record.feedback or {}
|
||||
acceptance_type = feedback.get("acceptance_type")
|
||||
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
|
||||
acceptance_type = "accept"
|
||||
return (
|
||||
bool(record.success)
|
||||
and bool(record.task_id)
|
||||
and validation.get("accepted") is True
|
||||
and feedback.get("feedback_type") == "satisfied"
|
||||
and acceptance_type == "accept"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||
from .router import MainAgentRouter
|
||||
from .service import TaskService
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .validation import ValidationService
|
||||
|
||||
__all__ = [
|
||||
"EvidenceBuilder",
|
||||
@ -24,6 +23,5 @@ __all__ = [
|
||||
"ToolEvidence",
|
||||
"ValidationResult",
|
||||
"ValidationStatus",
|
||||
"ValidationService",
|
||||
"render_task_evidence",
|
||||
]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
"""Models for internal task tracking and validation."""
|
||||
"""Models for internal task tracking and user acceptance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -9,7 +9,12 @@ from typing import Any, Literal
|
||||
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
||||
|
||||
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
|
||||
LEGACY_STATUS_MAP = {
|
||||
"validating": "running",
|
||||
"awaiting_feedback": "awaiting_acceptance",
|
||||
"needs_review": "awaiting_acceptance",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -113,11 +118,11 @@ class TaskRecord:
|
||||
|
||||
@property
|
||||
def is_execution_active(self) -> bool:
|
||||
return self.status in {"running", "validating"}
|
||||
return self.status == "running"
|
||||
|
||||
@property
|
||||
def requires_user_action(self) -> bool:
|
||||
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
|
||||
return self.status in {"awaiting_acceptance", "needs_revision"}
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -137,6 +142,7 @@ class TaskRecord:
|
||||
"satisfaction": self.satisfaction,
|
||||
"run_ids": list(self.run_ids),
|
||||
"skill_names": list(self.skill_names),
|
||||
"acceptance": list(self.feedback),
|
||||
"feedback": list(self.feedback),
|
||||
"validation_result": self.validation_result,
|
||||
"metadata": dict(self.metadata),
|
||||
@ -152,7 +158,7 @@ class TaskRecord:
|
||||
goal=str(payload.get("goal") or payload.get("description") or ""),
|
||||
constraints=[str(item) for item in payload.get("constraints") or []],
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status=str(payload.get("status") or "open"),
|
||||
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
|
||||
creator=str(payload.get("creator") or "main-agent"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or ""),
|
||||
@ -161,7 +167,11 @@ class TaskRecord:
|
||||
satisfaction=_optional_float(payload.get("satisfaction")),
|
||||
run_ids=[str(item) for item in payload.get("run_ids") or []],
|
||||
skill_names=[str(item) for item in payload.get("skill_names") or []],
|
||||
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
|
||||
feedback=[
|
||||
_normalize_acceptance_entry(dict(item))
|
||||
for item in (payload.get("acceptance") or payload.get("feedback") or [])
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
)
|
||||
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
|
||||
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
|
||||
feedback_type = str(entry.get("feedback_type") or "")
|
||||
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
|
||||
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
|
||||
acceptance_type = str(entry.get("acceptance_type") or "")
|
||||
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
|
||||
return entry
|
||||
|
||||
@ -10,7 +10,7 @@ from typing import Any, Literal
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
from .models import TaskRecord
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
|
||||
|
||||
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
timeout_seconds: float = 30.0,
|
||||
) -> TaskExecutionPlan:
|
||||
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
),
|
||||
},
|
||||
],
|
||||
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None,
|
||||
) -> str:
|
||||
validation_note = ""
|
||||
if latest_validation is not None:
|
||||
validation_note = (
|
||||
"\nPrevious validation issues:\n"
|
||||
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
|
||||
)
|
||||
history_note = ""
|
||||
if task.feedback:
|
||||
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{validation_note}"
|
||||
f"{history_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import TaskEvent, TaskRecord, ValidationResult
|
||||
from .models import TaskEvent, TaskRecord
|
||||
from .store import TaskStore
|
||||
|
||||
|
||||
@ -105,38 +105,70 @@ class TaskService:
|
||||
for name in skill_names or []:
|
||||
if name not in task.skill_names:
|
||||
task.skill_names.append(name)
|
||||
task.status = "awaiting_acceptance"
|
||||
task.updated_at = self._now()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(
|
||||
def add_acceptance(
|
||||
self,
|
||||
task_id: str,
|
||||
run_id: str,
|
||||
validation: ValidationResult,
|
||||
*,
|
||||
final_attempt: bool = True,
|
||||
has_usable_answer: bool = True,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
if validation.status == "accepted":
|
||||
task.status = "awaiting_feedback"
|
||||
elif validation.status in {"insufficient_evidence", "validator_error"}:
|
||||
task.status = "needs_review"
|
||||
elif validation.status == "rejected" and not final_attempt:
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
matching_acceptance = any(
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_acceptance:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_acceptance:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": "satisfied" if normalized == "accept" else normalized,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if normalized == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif validation.status == "rejected" and has_usable_answer:
|
||||
task.status = "needs_review"
|
||||
else:
|
||||
task.status = "failed"
|
||||
elif normalized == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = "automatic validation rejected the final attempt"
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif normalized == "accept":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "accepted"
|
||||
task.satisfaction = 1.0
|
||||
if run_id:
|
||||
task.metadata["final_accepted_run_id"] = run_id
|
||||
task.updated_at = now
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def add_feedback(
|
||||
@ -147,52 +179,12 @@ class TaskService:
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
matching_feedback = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
|
||||
for item in task.feedback
|
||||
return self.add_acceptance(
|
||||
task_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_feedback:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_feedback:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"feedback_type": feedback_type,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if feedback_type == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif feedback_type == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif feedback_type == "satisfied":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "satisfied"
|
||||
task.satisfaction = 1.0
|
||||
task.updated_at = now
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
|
||||
if len(words) <= 4:
|
||||
return cleaned[:40]
|
||||
return " ".join(words[:4])[:40]
|
||||
|
||||
|
||||
def normalize_acceptance_type(value: str) -> str:
|
||||
normalized = (value or "").strip().lower()
|
||||
if normalized == "satisfied":
|
||||
return "accept"
|
||||
if normalized not in {"accept", "revise", "abandon"}:
|
||||
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
|
||||
return normalized
|
||||
|
||||
@ -1,154 +0,0 @@
|
||||
"""Automatic validation for internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class ValidationService:
|
||||
async def validate_task_result(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_packet: Any | None = None,
|
||||
evidence_text: str = "",
|
||||
transcript_excerpt: str = "",
|
||||
tool_summaries: list[str] | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> ValidationResult:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is not None:
|
||||
try:
|
||||
return await self._validate_with_provider(
|
||||
provider=provider,
|
||||
model=model,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
final_output=final_output,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=transcript_excerpt,
|
||||
tool_summaries=tool_summaries or [],
|
||||
team_summaries=team_summaries or [],
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
status="validator_error",
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
|
||||
missing_requirements=["User review is required because automatic validation failed."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the answer and evidence, then decide whether to revise or accept it."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
return self._heuristic_validate(final_output)
|
||||
|
||||
async def _validate_with_provider(
|
||||
self,
|
||||
*,
|
||||
provider: Any,
|
||||
model: str | None,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_text: str,
|
||||
transcript_excerpt: str,
|
||||
tool_summaries: list[str],
|
||||
team_summaries: list[str],
|
||||
) -> ValidationResult:
|
||||
legacy_context = "" if evidence_text else (
|
||||
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
|
||||
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
|
||||
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
|
||||
)
|
||||
prompt = (
|
||||
"Validate whether the assistant output satisfies the task. "
|
||||
"Return only compact JSON with keys: passed, score, issues, "
|
||||
"missing_requirements, recommended_revision_prompt.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Evidence packet:\n{evidence_text}\n\n"
|
||||
f"{legacy_context}"
|
||||
f"Assistant final output:\n{final_output}"
|
||||
)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict task result validator."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
status = payload.get("status")
|
||||
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
|
||||
status = (
|
||||
"accepted"
|
||||
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
|
||||
else "rejected"
|
||||
)
|
||||
return ValidationResult(
|
||||
status=status,
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_validate(final_output: str) -> ValidationResult:
|
||||
text = final_output.strip()
|
||||
if not text:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=["Assistant output is empty."],
|
||||
missing_requirements=["A non-empty result is required."],
|
||||
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
||||
validator="heuristic",
|
||||
)
|
||||
lowered = text.lower()
|
||||
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.35,
|
||||
issues=["The run did not complete cleanly."],
|
||||
missing_requirements=["A successful final result is required."],
|
||||
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
||||
validator="heuristic",
|
||||
)
|
||||
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("validator response must be a JSON object")
|
||||
return payload
|
||||
Reference in New Issue
Block a user