feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息,
包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。

同时增加最大上下文消息数和工具迭代次数的配置选项,
将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。

BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。

- 添加 RuntimeContext 类和相关渲染方法
- 增加 max_context_messages 和 max_tool_iterations 配置
- 移除 ValidationService 相关代码
- 更新消息记录中的验证状态字段
- 添加原始工具调用检测和回退处理
This commit is contained in:
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions

View File

@ -4,6 +4,7 @@ from .builder import (
ContextBuildInput,
ContextBuildResult,
ContextBuilder,
RuntimeContext,
SessionContext,
SkillContext,
)
@ -12,6 +13,7 @@ __all__ = [
"ContextBuildInput",
"ContextBuildResult",
"ContextBuilder",
"RuntimeContext",
"SessionContext",
"SkillContext",
]

View File

@ -80,6 +80,16 @@ class SessionContext:
parent_session_id: str | None = None
@dataclass(slots=True)
class RuntimeContext:
"""Per-run runtime facts that should be visible to the model."""
utc_datetime: str
local_datetime: str
timezone: str | None = None
utc_offset: str | None = None
@dataclass(slots=True)
class ContextBuildInput:
"""一次上下文构建所需的全部输入。
@ -103,6 +113,7 @@ class ContextBuildInput:
memory_snapshot: MemorySnapshot | None = None
activated_skills: list[SkillContext] = field(default_factory=list)
session_context: SessionContext | None = None
runtime_context: RuntimeContext | None = None
execution_context: str | None = None
extra_sections: list[str] = field(default_factory=list)
@ -143,9 +154,10 @@ class ContextBuilder:
1. Beaver user-facing assistant identity
2. base system prompt
3. session metadata
4. execution context
5. frozen memory snapshot
6. extra sections
4. runtime date/time
5. execution context
6. frozen memory snapshot
7. extra sections
这样设计的原因:
- 身份与总规则要最靠前
@ -164,6 +176,10 @@ class ContextBuilder:
if session_section:
sections.append(session_section)
runtime_section = self._render_runtime_section(build_input.runtime_context)
if runtime_section:
sections.append(runtime_section)
execution_context = (build_input.execution_context or "").strip()
if execution_context:
sections.append(f"# Execution Context\n\n{execution_context}")
@ -347,6 +363,31 @@ class ContextBuilder:
return None
return "# Current Session\n\n" + "\n".join(rows)
def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
"""Render date/time facts captured for the current model run."""
if runtime_context is None:
return None
rows: list[str] = []
if runtime_context.utc_datetime:
rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
if runtime_context.local_datetime:
rows.append(f"Current local time: {runtime_context.local_datetime}")
if runtime_context.timezone:
rows.append(f"Local timezone: {runtime_context.timezone}")
if runtime_context.utc_offset:
rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
if not rows:
return None
return (
"# Current Date and Time\n\n"
+ "\n".join(rows)
+ "\n\nUse this section as authoritative for relative date/time references such as "
'"today", "tomorrow", "now", "this week", and "next month".'
)
def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
"""把已激活 skill 转成显式消息。

View File

@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
from beaver.tasks import TaskExecutionPlanner, TaskService
from beaver.tasks.skill_resolver import TaskSkillResolver
from beaver.skills import SkillAssembler, SkillsLoader
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
@ -91,7 +91,6 @@ class EngineLoadResult:
task_skill_resolver: TaskSkillResolver | None = None
task_service: TaskService | None = None
task_execution_planner: TaskExecutionPlanner | None = None
validation_service: ValidationService | None = None
mcp_manager: MCPConnectionManager | None = None
mcp_report: dict[str, dict] = field(default_factory=dict)
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
@ -166,7 +165,6 @@ class EngineLoader:
task_skill_resolver: TaskSkillResolver | None = None,
task_service: TaskService | None = None,
task_execution_planner: TaskExecutionPlanner | None = None,
validation_service: ValidationService | None = None,
) -> None:
self.config = config or load_config(workspace=workspace, config_path=config_path)
configured_workspace = self.config.agents_defaults.workspace
@ -192,7 +190,6 @@ class EngineLoader:
self._task_skill_resolver = task_skill_resolver
self._task_service = task_service
self._task_execution_planner = task_execution_planner
self._validation_service = validation_service
def load(self) -> EngineLoadResult:
"""装配当前主链需要的最小 runtime 对象。"""
@ -276,7 +273,6 @@ class EngineLoader:
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
validation_service = self._validation_service or ValidationService()
mcp_manager = MCPConnectionManager(
self.config.tools.mcp_servers,
authz_config=self.config.authz,
@ -311,7 +307,6 @@ class EngineLoader:
task_skill_resolver=task_skill_resolver,
task_service=task_service,
task_execution_planner=task_execution_planner,
validation_service=validation_service,
mcp_manager=mcp_manager,
)
if self._session_manager is None:

View File

@ -4,12 +4,15 @@ from __future__ import annotations
import asyncio
import json
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
from beaver.memory.runs import RunRecord, SkillEffectRecord
from beaver.skills.learning import RunReceiptContext
from beaver.skills.catalog.utils import strip_frontmatter
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
"Use available materials, state uncertainty clearly, and provide partial confirmed results."
)
RAW_TOOL_CALL_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
_RAW_TOOL_CALL_RE = re.compile(
r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
re.IGNORECASE,
)
@dataclass(slots=True)
class AgentProfile:
@ -35,8 +49,9 @@ class AgentProfile:
system_prompt: str = ""
default_model: str = "gpt-4.1-mini"
max_tokens: int = 4096
max_context_messages: int = 1000
temperature: float = 0.2
max_tool_iterations: int = 8
max_tool_iterations: int = 30
@dataclass(slots=True)
@ -446,7 +461,7 @@ class AgentLoop:
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
]
if not include_skill_assembly or thinking_enabled is False:
if not include_skill_assembly:
activated_skills = self._merge_skill_contexts(pinned_skills, [])
else:
skill_query = skill_selection_context or task
@ -512,8 +527,6 @@ class AgentLoop:
if not include_tools:
selected_tool_specs = []
elif thinking_enabled is False:
selected_tool_specs = tool_registry.list_specs()
else:
selected_tool_specs = await tool_assembler.assemble(
task_description=task,
@ -543,7 +556,10 @@ class AgentLoop:
build_input = ContextBuildInput(
base_system_prompt=self.profile.system_prompt,
history=session_manager.get_history(resolved_session_id),
history=session_manager.get_history(
resolved_session_id,
max_messages=max(1, self.profile.max_context_messages),
),
current_user_input=task,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
@ -554,6 +570,7 @@ class AgentLoop:
user_id=user_id,
parent_session_id=parent_session_id,
),
runtime_context=self._current_runtime_context(),
execution_context=execution_context,
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
)
@ -693,6 +710,7 @@ class AgentLoop:
tool_calls=assistant_tool_calls or None,
finish_reason=response.finish_reason,
reasoning=response.reasoning_content,
context_visible=not bool(assistant_tool_calls),
source=source,
title=title,
model=final_model,
@ -707,7 +725,11 @@ class AgentLoop:
if not response.has_tool_calls:
final_text = response.content or ""
final_finish_reason = response.finish_reason or "stop"
if self._looks_like_raw_tool_call(final_text):
final_text = RAW_TOOL_CALL_FALLBACK
final_finish_reason = "invalid_tool_call_text"
else:
final_finish_reason = response.finish_reason or "stop"
break
if iterations >= resolved_max_tool_iterations:
@ -719,10 +741,7 @@ class AgentLoop:
temperature=resolved_temperature,
thinking_enabled=thinking_enabled,
)
final_text = finalized or (
"Tool loop stopped after reaching the configured iteration limit, "
"and no final answer was produced."
)
final_text = finalized or RAW_TOOL_CALL_FALLBACK
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
session_manager.append_message(
resolved_session_id,
@ -877,17 +896,14 @@ class AgentLoop:
temperature: float,
thinking_enabled: bool | None,
) -> str:
final_messages = [
*messages,
{
"role": "system",
"content": (
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
},
]
final_messages = AgentLoop._with_system_guidance(
messages,
(
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
)
kwargs: dict[str, Any] = {
"messages": final_messages,
"tools": None,
@ -898,7 +914,27 @@ class AgentLoop:
if thinking_enabled is not None:
kwargs["thinking_enabled"] = thinking_enabled
response = await provider.chat(**kwargs)
return (response.content or "").strip()
if response.has_tool_calls:
return ""
content = (response.content or "").strip()
if AgentLoop._looks_like_raw_tool_call(content):
return ""
return content
@staticmethod
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
return bool(_RAW_TOOL_CALL_RE.match(content))
@staticmethod
def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
copied = [dict(message) for message in messages]
if copied and copied[0].get("role") == "system":
existing = str(copied[0].get("content") or "").strip()
copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
return copied
return [{"role": "system", "content": guidance.strip()}, *copied]
@staticmethod
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
@ -1133,3 +1169,49 @@ class AgentLoop:
@staticmethod
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
@staticmethod
def _current_runtime_context() -> RuntimeContext:
utc_now = datetime.now(timezone.utc)
timezone_name = AgentLoop._configured_timezone_name()
local_now = datetime.now().astimezone()
rendered_timezone = local_now.tzname()
if timezone_name:
try:
local_now = utc_now.astimezone(ZoneInfo(timezone_name))
rendered_timezone = timezone_name
except ZoneInfoNotFoundError:
rendered_timezone = local_now.tzname() or timezone_name
return RuntimeContext(
utc_datetime=utc_now.isoformat(),
local_datetime=local_now.isoformat(),
timezone=rendered_timezone,
utc_offset=AgentLoop._format_utc_offset(local_now),
)
@staticmethod
def _configured_timezone_name() -> str | None:
for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
cleaned = (value or "").strip()
if cleaned:
return cleaned
try:
timezone_file = "/etc/timezone"
if os.path.exists(timezone_file):
with open(timezone_file, encoding="utf-8") as file:
cleaned = file.read().strip()
if cleaned:
return cleaned
except OSError:
return None
return None
@staticmethod
def _format_utc_offset(value: datetime) -> str | None:
raw = value.strftime("%z")
if not raw:
return None
return f"{raw[:3]}:{raw[3:]}"

View File

@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
@staticmethod
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
sanitized = []
system_contents: list[str] = []
for message in messages:
clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
if clean.get("role") == "system":
content = clean.get("content")
if isinstance(content, str) and content.strip():
system_contents.append(content.strip())
elif content is not None:
system_contents.append(str(content))
continue
if clean.get("role") == "assistant" and "content" not in clean:
clean["content"] = None
if isinstance(clean.get("tool_calls"), list):
clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
sanitized.append(clean)
if system_contents:
sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
return sanitized
@staticmethod

View File

@ -84,8 +84,10 @@ class MessageRecord:
payload["task_id"] = self.event_payload.get("task_id")
if self.event_payload.get("task_status"):
payload["task_status"] = self.event_payload.get("task_status")
if self.event_payload.get("validation_status"):
payload["validation_status"] = self.event_payload.get("validation_status")
if self.event_payload.get("evidence_status"):
payload["evidence_status"] = self.event_payload.get("evidence_status")
if self.event_payload.get("acceptance_state"):
payload["acceptance_state"] = self.event_payload.get("acceptance_state")
if self.event_payload.get("feedback_state"):
payload["feedback_state"] = self.event_payload.get("feedback_state")
if self.event_payload.get("feedback_error"):

View File

@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
model=_string(defaults.get("model") or data.get("model")),
provider=_string(defaults.get("provider") or data.get("provider")),
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
max_context_messages=_int(
defaults.get("maxContextMessages")
or defaults.get("max_context_messages")
or data.get("maxContextMessages")
or data.get("max_context_messages")
),
max_tool_iterations=_int(
defaults.get("maxToolIterations")
or defaults.get("max_tool_iterations")
or data.get("maxToolIterations")
or data.get("max_tool_iterations")
),
)
@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
return float(value)
def _int(value: Any) -> int | None:
parsed = _float(value)
if parsed is None:
return None
return int(parsed)
def _bool(value: Any, *, default: bool) -> bool:
if isinstance(value, bool):
return value

View File

@ -25,6 +25,8 @@ class AgentDefaultsConfig:
model: str | None = None
provider: str | None = None
embedding_model: str | None = None
max_context_messages: int | None = None
max_tool_iterations: int | None = None
@dataclass(slots=True)

View File

@ -44,6 +44,8 @@ from .files import (
workspace_file_path,
)
from .schemas import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -155,6 +157,13 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
return decorator
RAW_TOOL_CALL_DISPLAY_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
@asynccontextmanager
async def _app_lifespan(
app: FastAPI,
@ -365,6 +374,7 @@ def create_app(
"workspace_exists": loaded.workspace.exists(),
"model": config.default_model or agent_service.profile.default_model,
"max_tokens": agent_service.profile.max_tokens,
"max_context_messages": agent_service.profile.max_context_messages,
"temperature": agent_service.profile.temperature,
"max_tool_iterations": agent_service.profile.max_tool_iterations,
"providers": providers_status,
@ -1719,7 +1729,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
fallback_target = _model_dump(payload.fallback_target)
@ -1769,7 +1780,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
@app.websocket("/ws/{session_id:path}")
@ -1882,6 +1894,30 @@ def create_app(
}
)
@app.post(
"/api/chat/acceptance",
response_model=WebChatAcceptanceResponse,
responses={
400: {"model": WebErrorResponse},
404: {"model": WebErrorResponse},
},
)
async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
acceptance_type=payload.acceptance_type,
comment=payload.comment,
)
except ValueError as exc:
detail = str(exc)
status_code = 404 if "No internal task" in detail else 400
raise HTTPException(status_code=status_code, detail=detail) from exc
return WebChatAcceptanceResponse(**result)
@app.post(
"/api/chat/feedback",
response_model=WebChatFeedbackResponse,
@ -1893,10 +1929,10 @@ def create_app(
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_feedback(
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
feedback_type=payload.feedback_type,
acceptance_type=payload.feedback_type,
comment=payload.comment,
)
except ValueError as exc:
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
role = event.get("role")
if role not in {"user", "assistant"}:
continue
content = event.get("content") or ""
comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
if role == "assistant" and not comparable_content.strip():
continue
content = _sanitize_user_visible_assistant_content(role=role, content=content)
messages.append(
{
"role": role,
"content": event.get("content") or "",
"content": content,
"timestamp": _iso_from_timestamp(event.get("timestamp")),
"run_id": event.get("run_id"),
"task_id": event.get("task_id"),
"task_status": event.get("task_status"),
"validation_status": event.get("validation_status"),
"evidence_status": event.get("evidence_status"),
"acceptance_state": event.get("acceptance_state"),
"feedback_state": event.get("feedback_state"),
"feedback_error": event.get("feedback_error"),
"message_type": event.get("message_type"),
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
content = (record.content or "").strip()
if not content:
continue
content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
messages.append(
{
"role": record.role,
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"tool_name": record.tool_name,
}
)
validation = run_record.validation_result if run_record is not None else None
views.append(
{
"run_id": run_id,
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"attempt_index": run_record.attempt_index if run_record is not None else None,
"task_text": run_record.task_text if run_record is not None else "",
"messages": messages,
"validation_result": validation,
"evidence_status": "recorded",
"validation_result": None,
}
)
return views
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
return dict(value)
def _validation_status(validation_result: dict[str, Any] | None) -> str:
if validation_result is None:
return "unknown"
return "passed" if validation_result.get("accepted") is True else "failed"
def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
result: dict[str, Any] = dict(metadata)
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
validation_result = getattr(result, "validation_result", None)
task_id = getattr(result, "task_id", None)
task_status = getattr(result, "task_status", None)
return {
"type": "message",
"role": "assistant",
"content": getattr(result, "output_text", "") or "",
"content": _sanitize_user_visible_assistant_content(
role="assistant",
content=getattr(result, "output_text", "") or "",
),
"session_id": getattr(result, "session_id", None),
"run_id": getattr(result, "run_id", None),
"finish_reason": getattr(result, "finish_reason", None),
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
"usage": dict(getattr(result, "usage", {}) or {}),
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"validation_status": _validation_status(validation_result),
"evidence_status": "recorded" if task_id else None,
"validation_result": None,
"metadata": {
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"evidence_status": "recorded" if task_id else None,
"input_metadata": _websocket_input_metadata(input_payload),
},
}
def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
if role != "assistant":
return content
if _looks_like_raw_tool_call(content):
return RAW_TOOL_CALL_DISPLAY_FALLBACK
return content
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
stripped = content.strip()
lowered = stripped.lower()
return (
lowered.startswith("<tool_call")
and lowered.endswith("</tool_call>")
) or (
lowered.startswith("<function=")
and lowered.endswith("</function>")
)
def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
if provider_cfg is None or provider_name == "custom":
return False
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
config = load_config(config_path=config_path)
agent_service.loader.config = config
agent_service._apply_configured_profile_defaults() # noqa: SLF001
loop = getattr(agent_service, "_loop", None)
loaded = getattr(loop, "loaded", None) if loop is not None else None
if loaded is not None:

View File

@ -1,6 +1,8 @@
"""Web request and response schemas."""
from .chat import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -13,6 +15,8 @@ from .chat import (
)
__all__ = [
"WebChatAcceptanceRequest",
"WebChatAcceptanceResponse",
"WebChatFeedbackRequest",
"WebChatFeedbackResponse",
"WebChatRequest",

View File

@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
usage: dict[str, Any] = Field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
evidence_status: str | None = None
acceptance_state: str | None = None
validation_result: dict[str, Any] | None = None
class WebChatAcceptanceRequest(BaseModel):
"""User acceptance on the latest assistant result in chat."""
session_id: str
run_id: str
acceptance_type: str
comment: str | None = None
class WebChatAcceptanceResponse(BaseModel):
"""Acceptance recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
acceptance_type: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackRequest(BaseModel):
"""Feedback on the latest assistant result in chat."""
"""Backward-compatible feedback payload."""
session_id: str
run_id: str
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
comment: str | None = None
class WebChatFeedbackResponse(BaseModel):
"""Feedback recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackResponse(WebChatAcceptanceResponse):
"""Backward-compatible feedback response."""
class WebProviderConfigRequest(BaseModel):

View File

@ -29,9 +29,9 @@ from beaver.tasks import (
TaskEvidencePacket,
TaskExecutionPlan,
TaskRecord,
ValidationResult,
render_task_evidence,
)
from beaver.tasks.service import normalize_acceptance_type
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
@ -60,11 +60,19 @@ class AgentService:
) -> None:
self.profile = profile or AgentProfile()
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
self._apply_configured_profile_defaults()
self._loop: AgentLoop | None = None
self._run_task: asyncio.Task[None] | None = None
self._main_agent_router = MainAgentRouter()
self._runtime_services: dict[str, Any] = {}
def _apply_configured_profile_defaults(self) -> None:
defaults = self.loader.config.agents_defaults
if defaults.max_context_messages is not None:
self.profile.max_context_messages = max(1, defaults.max_context_messages)
if defaults.max_tool_iterations is not None:
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
def create_loop(self) -> AgentLoop:
"""创建并缓存当前 service 使用的 AgentLoop。"""
@ -232,7 +240,7 @@ class AgentService:
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
This entry bypasses the main-agent classifier and forces Task mode so
every trigger produces a TaskRecord, validation, feedback state, and a
every trigger produces a TaskRecord, evidence, acceptance state, and a
run_id that the scheduled-task history can link to.
"""
@ -280,9 +288,9 @@ class AgentService:
result.run_id,
{
"message_type": "scheduled_reply",
"scheduled_job_id": job.id,
"scheduled_run_id": run.scheduled_run_id,
"cron_job_name": job.name,
"scheduled_job_id": cron_job_id,
"scheduled_run_id": scheduled_run_id,
"cron_job_name": cron_job_name,
"mode": "notification",
},
)
@ -403,15 +411,15 @@ class AgentService:
},
)
async def submit_feedback(
async def submit_acceptance(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
acceptance_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Record chat feedback for the internal task linked to a run."""
"""Record user acceptance for the internal task linked to a run."""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
@ -419,32 +427,31 @@ class AgentService:
if task is None or task.session_id != session_id:
raise ValueError(f"No internal task found for run_id={run_id!r}")
normalized = feedback_type.strip().lower()
if normalized not in {"satisfied", "revise", "abandon"}:
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
normalized = normalize_acceptance_type(acceptance_type)
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
already_recorded = any(
item.get("run_id") == run_id and item.get("feedback_type") == normalized
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_feedback = next(
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_feedback is not None:
if conflicting_acceptance is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not already_recorded:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
updated = task if already_recorded else task_service.add_feedback(
updated = task if already_recorded else task_service.add_acceptance(
task.task_id,
feedback_type=normalized,
acceptance_type=normalized,
comment=comment,
run_id=run_id,
)
@ -455,7 +462,8 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_state": normalized,
"acceptance_state": normalized,
"feedback_state": legacy_feedback_type,
},
)
if not already_recorded:
@ -463,10 +471,11 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": task.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment,
"task_status": updated.status,
},
@ -475,35 +484,36 @@ class AgentService:
)
generated_candidates = []
validation = ValidationResult.from_dict(updated.validation_result)
if not already_recorded:
run_memory_store = self._require_loaded(loaded, "run_memory_store")
feedback_payload = {
"feedback_type": normalized,
acceptance_payload = {
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
}
run_memory_store.update_run_record(
run_id,
success=normalized == "satisfied",
feedback=feedback_payload,
success=normalized == "accept",
feedback=acceptance_payload,
)
run_memory_store.update_skill_effects_for_run(
run_id,
success=normalized == "satisfied",
feedback_score=self._feedback_score_for_learning(normalized, validation),
success=normalized == "accept",
feedback_score=self._acceptance_score_for_learning(normalized),
notes=(comment or normalized).strip(),
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
skill_learning_service.rescore_skill_versions()
if already_recorded:
generated_candidates = []
elif normalized == "satisfied" and validation is not None and validation.accepted:
elif normalized == "accept":
generated_candidates = [
item.to_dict()
for item in skill_learning_service.build_learning_candidates_for_task(
updated.task_id,
trigger_run_id=run_id,
final_accepted_run_id=run_id,
)
]
elif normalized == "abandon":
@ -514,7 +524,8 @@ class AgentService:
event_type="task_failure_evidence_recorded",
event_payload={
"task_id": updated.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"durable_memory_written": False,
@ -528,10 +539,28 @@ class AgentService:
"run_id": run_id,
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"learning_candidates": generated_candidates,
}
async def submit_feedback(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Backward-compatible wrapper for older clients."""
return await self.submit_acceptance(
session_id=session_id,
run_id=run_id,
acceptance_type=feedback_type,
comment=comment,
)
async def _process_with_main_agent(
self,
message: str,
@ -591,7 +620,7 @@ class AgentService:
else active_task
)
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
task = self._record_revision_feedback_for_task(
task = self._record_revision_acceptance_for_task(
loaded,
task=task,
session_id=session_id,
@ -599,7 +628,7 @@ class AgentService:
)
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
def _record_revision_feedback_for_task(
def _record_revision_acceptance_for_task(
self,
loaded: Any,
*,
@ -607,9 +636,9 @@ class AgentService:
session_id: str,
comment: str,
) -> TaskRecord:
"""Mark the latest feedback-eligible run as revised before continuing a task."""
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
if task.status not in {"awaiting_feedback", "needs_revision"}:
if task.status not in {"awaiting_acceptance", "needs_revision"}:
return task
run_id = next((item for item in reversed(task.run_ids) if item), None)
if not run_id:
@ -617,15 +646,15 @@ class AgentService:
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
if existing is not None:
if existing.get("feedback_type") != "revise":
if existing.get("acceptance_type") != "revise":
return task
updated = task
already_recorded = True
else:
task_service = self._require_loaded(loaded, "task_service")
updated = task_service.add_feedback(
updated = task_service.add_acceptance(
task.task_id,
feedback_type="revise",
acceptance_type="revise",
comment=comment,
run_id=run_id,
)
@ -638,6 +667,7 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"acceptance_state": "revise",
"feedback_state": "revise",
},
)
@ -648,9 +678,10 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": updated.task_id,
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -659,12 +690,12 @@ class AgentService:
content=comment,
context_visible=False,
)
validation = ValidationResult.from_dict(updated.validation_result)
run_memory_store = self._require_loaded(loaded, "run_memory_store")
run_memory_store.update_run_record(
run_id,
success=False,
feedback={
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -673,7 +704,7 @@ class AgentService:
run_memory_store.update_skill_effects_for_run(
run_id,
success=False,
feedback_score=self._feedback_score_for_learning("revise", validation),
feedback_score=self._acceptance_score_for_learning("revise"),
notes=comment.strip() or "revise",
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
@ -690,236 +721,185 @@ class AgentService:
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
validation_service = self._require_loaded(loaded, "validation_service")
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
session_manager = self._require_loaded(loaded, "session_manager")
run_memory_store = self._require_loaded(loaded, "run_memory_store")
last_result: AgentRunResult | None = None
latest_validation: ValidationResult | None = None
base_execution_context = kwargs.get("execution_context")
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
for attempt_index in (1, 2):
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
task=task,
user_message=message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
provider_bundle=provider_bundle,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
)
if attempt_index == 2 and latest_validation is not None:
revision_context = latest_validation.recommended_revision_prompt.strip()
if revision_context:
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
f"Task validation revision request:\n{revision_context}",
team_execution_context,
)
elif team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
last_result = result
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
validation = await validation_service.validate_task_result(
task=task,
user_message=message,
final_output=result.output_text,
evidence_packet=evidence_packet,
evidence_text=evidence_text,
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
team_summaries=team_summaries,
provider_bundle=provider_bundle,
)
latest_validation = validation
has_usable_answer = bool(result.output_text.strip()) and (
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
)
task = task_service.record_validation(
task.task_id,
result.run_id,
validation,
final_attempt=(
attempt_index == 2
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
),
has_usable_answer=has_usable_answer,
)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"validation_status": "passed" if validation.accepted else "failed",
},
)
validation_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
retry_scheduled = validation.status == "rejected" and attempt_index == 1
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_validation_snapshotted",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"validation_result": validation.to_dict(),
"validation_debug": validation_debug,
"retry_scheduled": retry_scheduled,
},
content=validation.recommended_revision_prompt or None,
context_visible=False,
)
if retry_scheduled:
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = validation.to_dict()
if not retry_scheduled:
return result
)
if team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
plan=plan,
team_summaries=team_summaries,
)
if last_result is None: # pragma: no cover - defensive
raise RuntimeError("Task mode did not produce a run result")
return last_result
result = await runner(message, **attempt_kwargs)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
evidence_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"evidence_status": "recorded",
},
)
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_evidence_recorded",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"evidence_debug": evidence_debug,
},
content=None,
context_visible=False,
)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = None
return result
async def _run_team_for_task(
self,
@ -986,12 +966,10 @@ class AgentService:
return []
@staticmethod
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
if feedback_type == "satisfied":
if validation is not None:
return max(0.0, min(1.0, float(validation.score)))
def _acceptance_score_for_learning(acceptance_type: str) -> float:
if acceptance_type == "accept":
return 1.0
if feedback_type == "revise":
if acceptance_type == "revise":
return 0.5
return 0.0
@ -1001,12 +979,11 @@ class AgentService:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if latest_validation is not None:
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
phase = f"revision_attempt_{attempt_index}"
elif plan is not None and plan.is_team:
phase = f"team_synthesis_attempt_{attempt_index}"
@ -1027,24 +1004,14 @@ class AgentService:
)
else:
sections.append("Previously activated skills:\nNone")
if latest_validation is not None:
validation_lines = [
f"accepted: {latest_validation.accepted}",
f"score: {latest_validation.score}",
]
if latest_validation.issues:
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
if latest_validation.missing_requirements:
validation_lines.append(
"missing requirements:\n"
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
)
if latest_validation.recommended_revision_prompt:
validation_lines.append(
"recommended revision:\n"
+ latest_validation.recommended_revision_prompt
)
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
if task.feedback:
history_lines = []
for item in task.feedback[-5:]:
kind = item.get("acceptance_type") or item.get("feedback_type")
comment = item.get("comment") or ""
run_id = item.get("run_id") or ""
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
if plan is not None:
plan_lines = [
f"mode: {plan.mode}",
@ -1313,7 +1280,8 @@ class AgentService:
"inbound_metadata": dict(inbound.metadata),
"task_id": getattr(result, "task_id", None),
"task_status": getattr(result, "task_status", None),
"validation_result": getattr(result, "validation_result", None),
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
"validation_result": None,
},
)

View File

@ -235,26 +235,45 @@ class SessionProcessProjector:
metadata=dict(payload),
)
elif record.event_type == "task_validation_snapshotted":
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
accepted = bool(validation.get("accepted"))
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
root["finished_at"] = created_at if root["status"] == "done" else None
elif record.event_type == "task_evidence_recorded":
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "validation"),
event_id=_event_id(record, "evidence"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="system",
actor_id="validator",
actor_name="Validator",
text=(
f"Validation {'passed' if accepted else 'failed'} "
f"(score={validation.get('score')})."
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
),
actor_id="evidence-recorder",
actor_name="Evidence",
text="Task evidence was recorded; waiting for user acceptance.",
created_at=created_at,
status="done" if accepted else "error",
status="done",
metadata=dict(payload),
)
elif record.event_type == "task_acceptance_recorded":
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
if acceptance_type == "accept":
root["status"] = "done"
root["finished_at"] = created_at
elif acceptance_type == "abandon":
root["status"] = "cancelled"
root["finished_at"] = created_at
else:
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "acceptance"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="user",
actor_id="user-acceptance",
actor_name="User Acceptance",
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
created_at=created_at,
status="done",
metadata=dict(payload),
)

View File

@ -69,15 +69,24 @@ class SkillLearningService:
existing_ids.add(candidate.candidate_id)
return candidates
def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
"""Build candidates scoped to a single validated and satisfied Task run."""
def build_learning_candidates_for_task(
self,
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[SkillLearningCandidate]:
"""Build candidates from a user-accepted Task and all of its runs."""
final_accepted_run_id = final_accepted_run_id or trigger_run_id
if not final_accepted_run_id:
return []
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
if final_run is None or not self._is_task_accepted_run(final_run):
return []
source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
if not source_runs:
return []
@ -100,11 +109,16 @@ class SkillLearningService:
source_session_ids=source_session_ids,
related_skill_names=[],
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
evidence={
"task_id": task_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"theme": self._task_theme(final_run.task_text),
},
status="open",
priority=1,
confidence=0.8,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
else:
@ -137,13 +151,14 @@ class SkillLearningService:
),
evidence={
"task_id": task_id,
"trigger_run_id": trigger_run_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"skill_version": receipt.skill_version,
},
status="open",
priority=1,
confidence=0.7,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
@ -269,7 +284,7 @@ class SkillLearningService:
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if self._is_confirmed_positive_run(record)]
successful = [record for record in runs if self._is_task_accepted_run(record)]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
@ -290,7 +305,7 @@ class SkillLearningService:
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
if not self._is_confirmed_positive_run(record):
if not self._is_task_accepted_run(record):
continue
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
@ -351,14 +366,15 @@ class SkillLearningService:
return effects
@staticmethod
def _is_confirmed_positive_run(record: RunRecord) -> bool:
validation = record.validation_result or {}
def _is_task_accepted_run(record: RunRecord) -> bool:
feedback = record.feedback or {}
acceptance_type = feedback.get("acceptance_type")
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
acceptance_type = "accept"
return (
bool(record.success)
and bool(record.task_id)
and validation.get("accepted") is True
and feedback.get("feedback_type") == "satisfied"
and acceptance_type == "accept"
)
@staticmethod

View File

@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"EvidenceBuilder",
@ -24,6 +23,5 @@ __all__ = [
"ToolEvidence",
"ValidationResult",
"ValidationStatus",
"ValidationService",
"render_task_evidence",
]

View File

@ -1,4 +1,4 @@
"""Models for internal task tracking and validation."""
"""Models for internal task tracking and user acceptance."""
from __future__ import annotations
@ -9,7 +9,12 @@ from typing import Any, Literal
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
LEGACY_STATUS_MAP = {
"validating": "running",
"awaiting_feedback": "awaiting_acceptance",
"needs_review": "awaiting_acceptance",
}
@dataclass(slots=True)
@ -113,11 +118,11 @@ class TaskRecord:
@property
def is_execution_active(self) -> bool:
return self.status in {"running", "validating"}
return self.status == "running"
@property
def requires_user_action(self) -> bool:
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
return self.status in {"awaiting_acceptance", "needs_revision"}
def to_dict(self) -> dict[str, Any]:
return {
@ -137,6 +142,7 @@ class TaskRecord:
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"acceptance": list(self.feedback),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
@ -152,7 +158,7 @@ class TaskRecord:
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
@ -161,7 +167,11 @@ class TaskRecord:
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
feedback=[
_normalize_acceptance_entry(dict(item))
for item in (payload.get("acceptance") or payload.get("feedback") or [])
if isinstance(item, dict)
],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
feedback_type = str(entry.get("feedback_type") or "")
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
acceptance_type = str(entry.get("acceptance_type") or "")
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
return entry

View File

@ -10,7 +10,7 @@ from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .models import TaskRecord
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
timeout_seconds: float = 30.0,
) -> TaskExecutionPlan:
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
history_note = ""
if task.feedback:
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
f"{history_note}"
)
@staticmethod

View File

@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .models import TaskEvent, TaskRecord
from .store import TaskStore
@ -105,38 +105,70 @@ class TaskService:
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.status = "awaiting_acceptance"
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(
def add_acceptance(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
acceptance_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
normalized = normalize_acceptance_type(acceptance_type)
matching_acceptance = any(
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_acceptance is not None:
raise ValueError(
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_acceptance:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_acceptance:
return task
entry = {
"acceptance_type": normalized,
"feedback_type": "satisfied" if normalized == "accept" else normalized,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if normalized == "revise":
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
elif normalized == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.close_reason = comment or "abandoned"
elif normalized == "accept":
task.status = "closed"
task.closed_at = now
task.close_reason = "accepted"
task.satisfaction = 1.0
if run_id:
task.metadata["final_accepted_run_id"] = run_id
task.updated_at = now
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
return task
def add_feedback(
@ -147,52 +179,12 @@ class TaskService:
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
return self.add_acceptance(
task_id,
acceptance_type=feedback_type,
comment=comment,
run_id=run_id,
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
task = self._require(task_id)
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
if len(words) <= 4:
return cleaned[:40]
return " ".join(words[:4])[:40]
def normalize_acceptance_type(value: str) -> str:
normalized = (value or "").strip().lower()
if normalized == "satisfied":
return "accept"
if normalized not in {"accept", "revise", "abandon"}:
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
return normalized

View File

@ -1,154 +0,0 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_packet: Any | None = None,
evidence_text: str = "",
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
evidence_text=evidence_text,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
status="validator_error",
score=0.0,
issues=[f"Validator failed: {exc}"],
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=(
"Review the answer and evidence, then decide whether to revise or accept it."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_text: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
legacy_context = "" if evidence_text else (
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
)
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Evidence packet:\n{evidence_text}\n\n"
f"{legacy_context}"
f"Assistant final output:\n{final_output}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult(
status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload