feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息,
包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。

同时增加最大上下文消息数和工具迭代次数的配置选项,
将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。

BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。

- 添加 RuntimeContext 类和相关渲染方法
- 增加 max_context_messages 和 max_tool_iterations 配置
- 移除 ValidationService 相关代码
- 更新消息记录中的验证状态字段
- 添加原始工具调用检测和回退处理
This commit is contained in:
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions

View File

@ -4,6 +4,7 @@ from .builder import (
ContextBuildInput,
ContextBuildResult,
ContextBuilder,
RuntimeContext,
SessionContext,
SkillContext,
)
@ -12,6 +13,7 @@ __all__ = [
"ContextBuildInput",
"ContextBuildResult",
"ContextBuilder",
"RuntimeContext",
"SessionContext",
"SkillContext",
]

View File

@ -80,6 +80,16 @@ class SessionContext:
parent_session_id: str | None = None
@dataclass(slots=True)
class RuntimeContext:
"""Per-run runtime facts that should be visible to the model."""
utc_datetime: str
local_datetime: str
timezone: str | None = None
utc_offset: str | None = None
@dataclass(slots=True)
class ContextBuildInput:
"""一次上下文构建所需的全部输入。
@ -103,6 +113,7 @@ class ContextBuildInput:
memory_snapshot: MemorySnapshot | None = None
activated_skills: list[SkillContext] = field(default_factory=list)
session_context: SessionContext | None = None
runtime_context: RuntimeContext | None = None
execution_context: str | None = None
extra_sections: list[str] = field(default_factory=list)
@ -143,9 +154,10 @@ class ContextBuilder:
1. Beaver user-facing assistant identity
2. base system prompt
3. session metadata
4. execution context
5. frozen memory snapshot
6. extra sections
4. runtime date/time
5. execution context
6. frozen memory snapshot
7. extra sections
这样设计的原因:
- 身份与总规则要最靠前
@ -164,6 +176,10 @@ class ContextBuilder:
if session_section:
sections.append(session_section)
runtime_section = self._render_runtime_section(build_input.runtime_context)
if runtime_section:
sections.append(runtime_section)
execution_context = (build_input.execution_context or "").strip()
if execution_context:
sections.append(f"# Execution Context\n\n{execution_context}")
@ -347,6 +363,31 @@ class ContextBuilder:
return None
return "# Current Session\n\n" + "\n".join(rows)
def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
"""Render date/time facts captured for the current model run."""
if runtime_context is None:
return None
rows: list[str] = []
if runtime_context.utc_datetime:
rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
if runtime_context.local_datetime:
rows.append(f"Current local time: {runtime_context.local_datetime}")
if runtime_context.timezone:
rows.append(f"Local timezone: {runtime_context.timezone}")
if runtime_context.utc_offset:
rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
if not rows:
return None
return (
"# Current Date and Time\n\n"
+ "\n".join(rows)
+ "\n\nUse this section as authoritative for relative date/time references such as "
'"today", "tomorrow", "now", "this week", and "next month".'
)
def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
"""把已激活 skill 转成显式消息。

View File

@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
from beaver.tasks import TaskExecutionPlanner, TaskService
from beaver.tasks.skill_resolver import TaskSkillResolver
from beaver.skills import SkillAssembler, SkillsLoader
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
@ -91,7 +91,6 @@ class EngineLoadResult:
task_skill_resolver: TaskSkillResolver | None = None
task_service: TaskService | None = None
task_execution_planner: TaskExecutionPlanner | None = None
validation_service: ValidationService | None = None
mcp_manager: MCPConnectionManager | None = None
mcp_report: dict[str, dict] = field(default_factory=dict)
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
@ -166,7 +165,6 @@ class EngineLoader:
task_skill_resolver: TaskSkillResolver | None = None,
task_service: TaskService | None = None,
task_execution_planner: TaskExecutionPlanner | None = None,
validation_service: ValidationService | None = None,
) -> None:
self.config = config or load_config(workspace=workspace, config_path=config_path)
configured_workspace = self.config.agents_defaults.workspace
@ -192,7 +190,6 @@ class EngineLoader:
self._task_skill_resolver = task_skill_resolver
self._task_service = task_service
self._task_execution_planner = task_execution_planner
self._validation_service = validation_service
def load(self) -> EngineLoadResult:
"""装配当前主链需要的最小 runtime 对象。"""
@ -276,7 +273,6 @@ class EngineLoader:
)
task_service = self._task_service or TaskService(workspace / "tasks")
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
validation_service = self._validation_service or ValidationService()
mcp_manager = MCPConnectionManager(
self.config.tools.mcp_servers,
authz_config=self.config.authz,
@ -311,7 +307,6 @@ class EngineLoader:
task_skill_resolver=task_skill_resolver,
task_service=task_service,
task_execution_planner=task_execution_planner,
validation_service=validation_service,
mcp_manager=mcp_manager,
)
if self._session_manager is None:

View File

@ -4,12 +4,15 @@ from __future__ import annotations
import asyncio
import json
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
from beaver.memory.runs import RunRecord, SkillEffectRecord
from beaver.skills.learning import RunReceiptContext
from beaver.skills.catalog.utils import strip_frontmatter
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
"Use available materials, state uncertainty clearly, and provide partial confirmed results."
)
RAW_TOOL_CALL_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
_RAW_TOOL_CALL_RE = re.compile(
r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
re.IGNORECASE,
)
@dataclass(slots=True)
class AgentProfile:
@ -35,8 +49,9 @@ class AgentProfile:
system_prompt: str = ""
default_model: str = "gpt-4.1-mini"
max_tokens: int = 4096
max_context_messages: int = 1000
temperature: float = 0.2
max_tool_iterations: int = 8
max_tool_iterations: int = 30
@dataclass(slots=True)
@ -446,7 +461,7 @@ class AgentLoop:
*(pinned_skill_contexts or []),
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
]
if not include_skill_assembly or thinking_enabled is False:
if not include_skill_assembly:
activated_skills = self._merge_skill_contexts(pinned_skills, [])
else:
skill_query = skill_selection_context or task
@ -512,8 +527,6 @@ class AgentLoop:
if not include_tools:
selected_tool_specs = []
elif thinking_enabled is False:
selected_tool_specs = tool_registry.list_specs()
else:
selected_tool_specs = await tool_assembler.assemble(
task_description=task,
@ -543,7 +556,10 @@ class AgentLoop:
build_input = ContextBuildInput(
base_system_prompt=self.profile.system_prompt,
history=session_manager.get_history(resolved_session_id),
history=session_manager.get_history(
resolved_session_id,
max_messages=max(1, self.profile.max_context_messages),
),
current_user_input=task,
memory_snapshot=memory_snapshot,
activated_skills=activated_skills,
@ -554,6 +570,7 @@ class AgentLoop:
user_id=user_id,
parent_session_id=parent_session_id,
),
runtime_context=self._current_runtime_context(),
execution_context=execution_context,
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
)
@ -693,6 +710,7 @@ class AgentLoop:
tool_calls=assistant_tool_calls or None,
finish_reason=response.finish_reason,
reasoning=response.reasoning_content,
context_visible=not bool(assistant_tool_calls),
source=source,
title=title,
model=final_model,
@ -707,7 +725,11 @@ class AgentLoop:
if not response.has_tool_calls:
final_text = response.content or ""
final_finish_reason = response.finish_reason or "stop"
if self._looks_like_raw_tool_call(final_text):
final_text = RAW_TOOL_CALL_FALLBACK
final_finish_reason = "invalid_tool_call_text"
else:
final_finish_reason = response.finish_reason or "stop"
break
if iterations >= resolved_max_tool_iterations:
@ -719,10 +741,7 @@ class AgentLoop:
temperature=resolved_temperature,
thinking_enabled=thinking_enabled,
)
final_text = finalized or (
"Tool loop stopped after reaching the configured iteration limit, "
"and no final answer was produced."
)
final_text = finalized or RAW_TOOL_CALL_FALLBACK
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
session_manager.append_message(
resolved_session_id,
@ -877,17 +896,14 @@ class AgentLoop:
temperature: float,
thinking_enabled: bool | None,
) -> str:
final_messages = [
*messages,
{
"role": "system",
"content": (
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
},
]
final_messages = AgentLoop._with_system_guidance(
messages,
(
"The configured tool iteration budget is exhausted. Do not call tools. "
"Produce the best final answer from the existing conversation and tool results. "
"State uncertainty explicitly."
),
)
kwargs: dict[str, Any] = {
"messages": final_messages,
"tools": None,
@ -898,7 +914,27 @@ class AgentLoop:
if thinking_enabled is not None:
kwargs["thinking_enabled"] = thinking_enabled
response = await provider.chat(**kwargs)
return (response.content or "").strip()
if response.has_tool_calls:
return ""
content = (response.content or "").strip()
if AgentLoop._looks_like_raw_tool_call(content):
return ""
return content
@staticmethod
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
return bool(_RAW_TOOL_CALL_RE.match(content))
@staticmethod
def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
copied = [dict(message) for message in messages]
if copied and copied[0].get("role") == "system":
existing = str(copied[0].get("content") or "").strip()
copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
return copied
return [{"role": "system", "content": guidance.strip()}, *copied]
@staticmethod
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
@ -1133,3 +1169,49 @@ class AgentLoop:
@staticmethod
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
@staticmethod
def _current_runtime_context() -> RuntimeContext:
utc_now = datetime.now(timezone.utc)
timezone_name = AgentLoop._configured_timezone_name()
local_now = datetime.now().astimezone()
rendered_timezone = local_now.tzname()
if timezone_name:
try:
local_now = utc_now.astimezone(ZoneInfo(timezone_name))
rendered_timezone = timezone_name
except ZoneInfoNotFoundError:
rendered_timezone = local_now.tzname() or timezone_name
return RuntimeContext(
utc_datetime=utc_now.isoformat(),
local_datetime=local_now.isoformat(),
timezone=rendered_timezone,
utc_offset=AgentLoop._format_utc_offset(local_now),
)
@staticmethod
def _configured_timezone_name() -> str | None:
for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
cleaned = (value or "").strip()
if cleaned:
return cleaned
try:
timezone_file = "/etc/timezone"
if os.path.exists(timezone_file):
with open(timezone_file, encoding="utf-8") as file:
cleaned = file.read().strip()
if cleaned:
return cleaned
except OSError:
return None
return None
@staticmethod
def _format_utc_offset(value: datetime) -> str | None:
raw = value.strftime("%z")
if not raw:
return None
return f"{raw[:3]}:{raw[3:]}"

View File

@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
@staticmethod
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
sanitized = []
system_contents: list[str] = []
for message in messages:
clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
if clean.get("role") == "system":
content = clean.get("content")
if isinstance(content, str) and content.strip():
system_contents.append(content.strip())
elif content is not None:
system_contents.append(str(content))
continue
if clean.get("role") == "assistant" and "content" not in clean:
clean["content"] = None
if isinstance(clean.get("tool_calls"), list):
clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
sanitized.append(clean)
if system_contents:
sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
return sanitized
@staticmethod

View File

@ -84,8 +84,10 @@ class MessageRecord:
payload["task_id"] = self.event_payload.get("task_id")
if self.event_payload.get("task_status"):
payload["task_status"] = self.event_payload.get("task_status")
if self.event_payload.get("validation_status"):
payload["validation_status"] = self.event_payload.get("validation_status")
if self.event_payload.get("evidence_status"):
payload["evidence_status"] = self.event_payload.get("evidence_status")
if self.event_payload.get("acceptance_state"):
payload["acceptance_state"] = self.event_payload.get("acceptance_state")
if self.event_payload.get("feedback_state"):
payload["feedback_state"] = self.event_payload.get("feedback_state")
if self.event_payload.get("feedback_error"):

View File

@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
model=_string(defaults.get("model") or data.get("model")),
provider=_string(defaults.get("provider") or data.get("provider")),
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
max_context_messages=_int(
defaults.get("maxContextMessages")
or defaults.get("max_context_messages")
or data.get("maxContextMessages")
or data.get("max_context_messages")
),
max_tool_iterations=_int(
defaults.get("maxToolIterations")
or defaults.get("max_tool_iterations")
or data.get("maxToolIterations")
or data.get("max_tool_iterations")
),
)
@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
return float(value)
def _int(value: Any) -> int | None:
parsed = _float(value)
if parsed is None:
return None
return int(parsed)
def _bool(value: Any, *, default: bool) -> bool:
if isinstance(value, bool):
return value

View File

@ -25,6 +25,8 @@ class AgentDefaultsConfig:
model: str | None = None
provider: str | None = None
embedding_model: str | None = None
max_context_messages: int | None = None
max_tool_iterations: int | None = None
@dataclass(slots=True)

View File

@ -44,6 +44,8 @@ from .files import (
workspace_file_path,
)
from .schemas import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -155,6 +157,13 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
return decorator
RAW_TOOL_CALL_DISPLAY_FALLBACK = (
"The run reached the configured tool-call limit before producing a reliable final answer. "
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
"Please request a revision to continue the task."
)
@asynccontextmanager
async def _app_lifespan(
app: FastAPI,
@ -365,6 +374,7 @@ def create_app(
"workspace_exists": loaded.workspace.exists(),
"model": config.default_model or agent_service.profile.default_model,
"max_tokens": agent_service.profile.max_tokens,
"max_context_messages": agent_service.profile.max_context_messages,
"temperature": agent_service.profile.temperature,
"max_tool_iterations": agent_service.profile.max_tool_iterations,
"providers": providers_status,
@ -1719,7 +1729,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
fallback_target = _model_dump(payload.fallback_target)
@ -1769,7 +1780,8 @@ def create_app(
usage=result.usage,
task_id=result.task_id,
task_status=result.task_status,
validation_result=result.validation_result,
evidence_status="recorded" if result.task_id else None,
validation_result=None,
)
@app.websocket("/ws/{session_id:path}")
@ -1882,6 +1894,30 @@ def create_app(
}
)
@app.post(
"/api/chat/acceptance",
response_model=WebChatAcceptanceResponse,
responses={
400: {"model": WebErrorResponse},
404: {"model": WebErrorResponse},
},
)
async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
acceptance_type=payload.acceptance_type,
comment=payload.comment,
)
except ValueError as exc:
detail = str(exc)
status_code = 404 if "No internal task" in detail else 400
raise HTTPException(status_code=status_code, detail=detail) from exc
return WebChatAcceptanceResponse(**result)
@app.post(
"/api/chat/feedback",
response_model=WebChatFeedbackResponse,
@ -1893,10 +1929,10 @@ def create_app(
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
agent_service = get_agent_service(request)
try:
result = await agent_service.submit_feedback(
result = await agent_service.submit_acceptance(
session_id=payload.session_id,
run_id=payload.run_id,
feedback_type=payload.feedback_type,
acceptance_type=payload.feedback_type,
comment=payload.comment,
)
except ValueError as exc:
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
role = event.get("role")
if role not in {"user", "assistant"}:
continue
content = event.get("content") or ""
comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
if role == "assistant" and not comparable_content.strip():
continue
content = _sanitize_user_visible_assistant_content(role=role, content=content)
messages.append(
{
"role": role,
"content": event.get("content") or "",
"content": content,
"timestamp": _iso_from_timestamp(event.get("timestamp")),
"run_id": event.get("run_id"),
"task_id": event.get("task_id"),
"task_status": event.get("task_status"),
"validation_status": event.get("validation_status"),
"evidence_status": event.get("evidence_status"),
"acceptance_state": event.get("acceptance_state"),
"feedback_state": event.get("feedback_state"),
"feedback_error": event.get("feedback_error"),
"message_type": event.get("message_type"),
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
content = (record.content or "").strip()
if not content:
continue
content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
messages.append(
{
"role": record.role,
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"tool_name": record.tool_name,
}
)
validation = run_record.validation_result if run_record is not None else None
views.append(
{
"run_id": run_id,
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
"attempt_index": run_record.attempt_index if run_record is not None else None,
"task_text": run_record.task_text if run_record is not None else "",
"messages": messages,
"validation_result": validation,
"evidence_status": "recorded",
"validation_result": None,
}
)
return views
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
return dict(value)
def _validation_status(validation_result: dict[str, Any] | None) -> str:
if validation_result is None:
return "unknown"
return "passed" if validation_result.get("accepted") is True else "failed"
def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
result: dict[str, Any] = dict(metadata)
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
validation_result = getattr(result, "validation_result", None)
task_id = getattr(result, "task_id", None)
task_status = getattr(result, "task_status", None)
return {
"type": "message",
"role": "assistant",
"content": getattr(result, "output_text", "") or "",
"content": _sanitize_user_visible_assistant_content(
role="assistant",
content=getattr(result, "output_text", "") or "",
),
"session_id": getattr(result, "session_id", None),
"run_id": getattr(result, "run_id", None),
"finish_reason": getattr(result, "finish_reason", None),
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
"usage": dict(getattr(result, "usage", {}) or {}),
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"validation_status": _validation_status(validation_result),
"evidence_status": "recorded" if task_id else None,
"validation_result": None,
"metadata": {
"task_id": task_id,
"task_status": task_status,
"validation_result": validation_result,
"evidence_status": "recorded" if task_id else None,
"input_metadata": _websocket_input_metadata(input_payload),
},
}
def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
if role != "assistant":
return content
if _looks_like_raw_tool_call(content):
return RAW_TOOL_CALL_DISPLAY_FALLBACK
return content
def _looks_like_raw_tool_call(content: str | None) -> bool:
if not content:
return False
stripped = content.strip()
lowered = stripped.lower()
return (
lowered.startswith("<tool_call")
and lowered.endswith("</tool_call>")
) or (
lowered.startswith("<function=")
and lowered.endswith("</function>")
)
def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
if provider_cfg is None or provider_name == "custom":
return False
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
config = load_config(config_path=config_path)
agent_service.loader.config = config
agent_service._apply_configured_profile_defaults() # noqa: SLF001
loop = getattr(agent_service, "_loop", None)
loaded = getattr(loop, "loaded", None) if loop is not None else None
if loaded is not None:

View File

@ -1,6 +1,8 @@
"""Web request and response schemas."""
from .chat import (
WebChatAcceptanceRequest,
WebChatAcceptanceResponse,
WebChatFeedbackRequest,
WebChatFeedbackResponse,
WebChatRequest,
@ -13,6 +15,8 @@ from .chat import (
)
__all__ = [
"WebChatAcceptanceRequest",
"WebChatAcceptanceResponse",
"WebChatFeedbackRequest",
"WebChatFeedbackResponse",
"WebChatRequest",

View File

@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
usage: dict[str, Any] = Field(default_factory=dict)
task_id: str | None = None
task_status: str | None = None
evidence_status: str | None = None
acceptance_state: str | None = None
validation_result: dict[str, Any] | None = None
class WebChatAcceptanceRequest(BaseModel):
"""User acceptance on the latest assistant result in chat."""
session_id: str
run_id: str
acceptance_type: str
comment: str | None = None
class WebChatAcceptanceResponse(BaseModel):
"""Acceptance recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
acceptance_type: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackRequest(BaseModel):
"""Feedback on the latest assistant result in chat."""
"""Backward-compatible feedback payload."""
session_id: str
run_id: str
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
comment: str | None = None
class WebChatFeedbackResponse(BaseModel):
"""Feedback recording result."""
session_id: str
run_id: str
task_id: str
task_status: str
feedback_type: str
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
class WebChatFeedbackResponse(WebChatAcceptanceResponse):
"""Backward-compatible feedback response."""
class WebProviderConfigRequest(BaseModel):

View File

@ -29,9 +29,9 @@ from beaver.tasks import (
TaskEvidencePacket,
TaskExecutionPlan,
TaskRecord,
ValidationResult,
render_task_evidence,
)
from beaver.tasks.service import normalize_acceptance_type
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
@ -60,11 +60,19 @@ class AgentService:
) -> None:
self.profile = profile or AgentProfile()
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
self._apply_configured_profile_defaults()
self._loop: AgentLoop | None = None
self._run_task: asyncio.Task[None] | None = None
self._main_agent_router = MainAgentRouter()
self._runtime_services: dict[str, Any] = {}
def _apply_configured_profile_defaults(self) -> None:
defaults = self.loader.config.agents_defaults
if defaults.max_context_messages is not None:
self.profile.max_context_messages = max(1, defaults.max_context_messages)
if defaults.max_tool_iterations is not None:
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
def create_loop(self) -> AgentLoop:
"""创建并缓存当前 service 使用的 AgentLoop。"""
@ -232,7 +240,7 @@ class AgentService:
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
This entry bypasses the main-agent classifier and forces Task mode so
every trigger produces a TaskRecord, validation, feedback state, and a
every trigger produces a TaskRecord, evidence, acceptance state, and a
run_id that the scheduled-task history can link to.
"""
@ -280,9 +288,9 @@ class AgentService:
result.run_id,
{
"message_type": "scheduled_reply",
"scheduled_job_id": job.id,
"scheduled_run_id": run.scheduled_run_id,
"cron_job_name": job.name,
"scheduled_job_id": cron_job_id,
"scheduled_run_id": scheduled_run_id,
"cron_job_name": cron_job_name,
"mode": "notification",
},
)
@ -403,15 +411,15 @@ class AgentService:
},
)
async def submit_feedback(
async def submit_acceptance(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
acceptance_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Record chat feedback for the internal task linked to a run."""
"""Record user acceptance for the internal task linked to a run."""
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
@ -419,32 +427,31 @@ class AgentService:
if task is None or task.session_id != session_id:
raise ValueError(f"No internal task found for run_id={run_id!r}")
normalized = feedback_type.strip().lower()
if normalized not in {"satisfied", "revise", "abandon"}:
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
normalized = normalize_acceptance_type(acceptance_type)
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
already_recorded = any(
item.get("run_id") == run_id and item.get("feedback_type") == normalized
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_feedback = next(
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_feedback is not None:
if conflicting_acceptance is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not already_recorded:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
updated = task if already_recorded else task_service.add_feedback(
updated = task if already_recorded else task_service.add_acceptance(
task.task_id,
feedback_type=normalized,
acceptance_type=normalized,
comment=comment,
run_id=run_id,
)
@ -455,7 +462,8 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_state": normalized,
"acceptance_state": normalized,
"feedback_state": legacy_feedback_type,
},
)
if not already_recorded:
@ -463,10 +471,11 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": task.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment,
"task_status": updated.status,
},
@ -475,35 +484,36 @@ class AgentService:
)
generated_candidates = []
validation = ValidationResult.from_dict(updated.validation_result)
if not already_recorded:
run_memory_store = self._require_loaded(loaded, "run_memory_store")
feedback_payload = {
"feedback_type": normalized,
acceptance_payload = {
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
}
run_memory_store.update_run_record(
run_id,
success=normalized == "satisfied",
feedback=feedback_payload,
success=normalized == "accept",
feedback=acceptance_payload,
)
run_memory_store.update_skill_effects_for_run(
run_id,
success=normalized == "satisfied",
feedback_score=self._feedback_score_for_learning(normalized, validation),
success=normalized == "accept",
feedback_score=self._acceptance_score_for_learning(normalized),
notes=(comment or normalized).strip(),
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
skill_learning_service.rescore_skill_versions()
if already_recorded:
generated_candidates = []
elif normalized == "satisfied" and validation is not None and validation.accepted:
elif normalized == "accept":
generated_candidates = [
item.to_dict()
for item in skill_learning_service.build_learning_candidates_for_task(
updated.task_id,
trigger_run_id=run_id,
final_accepted_run_id=run_id,
)
]
elif normalized == "abandon":
@ -514,7 +524,8 @@ class AgentService:
event_type="task_failure_evidence_recorded",
event_payload={
"task_id": updated.task_id,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"comment": comment or "",
"task_status": updated.status,
"durable_memory_written": False,
@ -528,10 +539,28 @@ class AgentService:
"run_id": run_id,
"task_id": updated.task_id,
"task_status": updated.status,
"feedback_type": normalized,
"acceptance_type": normalized,
"feedback_type": legacy_feedback_type,
"learning_candidates": generated_candidates,
}
async def submit_feedback(
self,
*,
session_id: str,
run_id: str,
feedback_type: str,
comment: str | None = None,
) -> dict[str, Any]:
"""Backward-compatible wrapper for older clients."""
return await self.submit_acceptance(
session_id=session_id,
run_id=run_id,
acceptance_type=feedback_type,
comment=comment,
)
async def _process_with_main_agent(
self,
message: str,
@ -591,7 +620,7 @@ class AgentService:
else active_task
)
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
task = self._record_revision_feedback_for_task(
task = self._record_revision_acceptance_for_task(
loaded,
task=task,
session_id=session_id,
@ -599,7 +628,7 @@ class AgentService:
)
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
def _record_revision_feedback_for_task(
def _record_revision_acceptance_for_task(
self,
loaded: Any,
*,
@ -607,9 +636,9 @@ class AgentService:
session_id: str,
comment: str,
) -> TaskRecord:
"""Mark the latest feedback-eligible run as revised before continuing a task."""
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
if task.status not in {"awaiting_feedback", "needs_revision"}:
if task.status not in {"awaiting_acceptance", "needs_revision"}:
return task
run_id = next((item for item in reversed(task.run_ids) if item), None)
if not run_id:
@ -617,15 +646,15 @@ class AgentService:
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
if existing is not None:
if existing.get("feedback_type") != "revise":
if existing.get("acceptance_type") != "revise":
return task
updated = task
already_recorded = True
else:
task_service = self._require_loaded(loaded, "task_service")
updated = task_service.add_feedback(
updated = task_service.add_acceptance(
task.task_id,
feedback_type="revise",
acceptance_type="revise",
comment=comment,
run_id=run_id,
)
@ -638,6 +667,7 @@ class AgentService:
{
"task_id": updated.task_id,
"task_status": updated.status,
"acceptance_state": "revise",
"feedback_state": "revise",
},
)
@ -648,9 +678,10 @@ class AgentService:
session_id,
run_id=run_id,
role="system",
event_type="task_feedback_recorded",
event_type="task_acceptance_recorded",
event_payload={
"task_id": updated.task_id,
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -659,12 +690,12 @@ class AgentService:
content=comment,
context_visible=False,
)
validation = ValidationResult.from_dict(updated.validation_result)
run_memory_store = self._require_loaded(loaded, "run_memory_store")
run_memory_store.update_run_record(
run_id,
success=False,
feedback={
"acceptance_type": "revise",
"feedback_type": "revise",
"comment": comment,
"task_status": updated.status,
@ -673,7 +704,7 @@ class AgentService:
run_memory_store.update_skill_effects_for_run(
run_id,
success=False,
feedback_score=self._feedback_score_for_learning("revise", validation),
feedback_score=self._acceptance_score_for_learning("revise"),
notes=comment.strip() or "revise",
)
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
@ -690,236 +721,185 @@ class AgentService:
) -> AgentRunResult:
loaded = self.create_loop().boot()
task_service = self._require_loaded(loaded, "task_service")
validation_service = self._require_loaded(loaded, "validation_service")
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
session_manager = self._require_loaded(loaded, "session_manager")
run_memory_store = self._require_loaded(loaded, "run_memory_store")
last_result: AgentRunResult | None = None
latest_validation: ValidationResult | None = None
base_execution_context = kwargs.get("execution_context")
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
kwargs = dict(kwargs)
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
kwargs["provider_bundle"] = provider_bundle
for attempt_index in (1, 2):
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
plan = await task_execution_planner.plan(
task=task,
user_message=message,
attempt_index=attempt_index,
provider_bundle=provider_bundle,
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
provider_bundle=provider_bundle,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_execution_planned",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
**plan.to_event_payload(),
},
)
team_summaries: list[str] = []
team_execution_context = ""
team_result: TeamRunResult | None = None
if plan.is_team:
team_result, team_error = await self._run_team_for_task(
plan,
task=task,
parent_session_id=kwargs["session_id"],
provider_bundle_factory=team_provider_bundle_factory
or self._build_team_provider_bundle_factory(loaded, kwargs),
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
if team_result is not None:
team_summaries = [self._team_summary_for_validation(team_result)]
team_packet = TaskEvidencePacket(
task_id=task.task_id,
attempt_index=attempt_index,
main_run=None,
team_runs=self._team_run_evidence(team_result),
team_node_results=list(team_result.node_results),
final_output="",
)
team_execution_context = self._join_context(
self._team_execution_context(plan, team_result),
"Rendered team evidence:\n" + render_task_evidence(team_packet),
)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": team_result.run_ids,
"team_success": team_result.success,
"node_results": self._team_node_results_for_event(plan, team_result),
"reason": plan.reason,
"error": None if team_result.success else "one or more team nodes failed",
},
)
else:
team_summaries = [f"Team execution failed: {team_error}"]
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_team_run_failed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"team_run_ids": [],
"team_success": False,
"reason": plan.reason,
"error": team_error,
},
)
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
)
if attempt_index == 2 and latest_validation is not None:
revision_context = latest_validation.recommended_revision_prompt.strip()
if revision_context:
attempt_kwargs["execution_context"] = self._join_context(
base_execution_context,
f"Task validation revision request:\n{revision_context}",
team_execution_context,
)
elif team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
latest_validation=latest_validation,
plan=plan,
team_summaries=team_summaries,
)
result = await runner(message, **attempt_kwargs)
last_result = result
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
validation = await validation_service.validate_task_result(
task=task,
user_message=message,
final_output=result.output_text,
evidence_packet=evidence_packet,
evidence_text=evidence_text,
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
team_summaries=team_summaries,
provider_bundle=provider_bundle,
)
latest_validation = validation
has_usable_answer = bool(result.output_text.strip()) and (
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
)
task = task_service.record_validation(
task.task_id,
result.run_id,
validation,
final_attempt=(
attempt_index == 2
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
),
has_usable_answer=has_usable_answer,
)
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"validation_status": "passed" if validation.accepted else "failed",
},
)
validation_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
attempt_kwargs = dict(kwargs)
attempt_kwargs.update(
{
"task_id": task.task_id,
"task_mode": True,
"attempt_index": attempt_index,
"allow_candidate_generation": False,
}
retry_scheduled = validation.status == "rejected" and attempt_index == 1
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_validation_snapshotted",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"validation_result": validation.to_dict(),
"validation_debug": validation_debug,
"retry_scheduled": retry_scheduled,
},
content=validation.recommended_revision_prompt or None,
context_visible=False,
)
if retry_scheduled:
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = validation.to_dict()
if not retry_scheduled:
return result
)
if team_execution_context:
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
if plan.is_team and team_execution_context:
attempt_kwargs["include_tools"] = False
attempt_kwargs["max_tool_iterations"] = 0
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
task=task,
user_message=message,
attempt_index=attempt_index,
plan=plan,
team_summaries=team_summaries,
)
if last_result is None: # pragma: no cover - defensive
raise RuntimeError("Task mode did not produce a run result")
return last_result
result = await runner(message, **attempt_kwargs)
self._append_task_observation(
session_manager,
task.session_id,
event_type="task_synthesis_completed",
payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"main_run_id": result.run_id,
"plan_mode": plan.mode,
"strategy": plan.graph.strategy if plan.graph else None,
},
)
task = task_service.append_run(
task.task_id,
result.run_id,
skill_names=self._skill_names_for_run(loaded, result.run_id),
)
evidence_packet = self._build_task_evidence_packet(
session_manager=session_manager,
task=task,
attempt_index=attempt_index,
result=result,
team_result=team_result,
)
evidence_text = render_task_evidence(evidence_packet)
evidence_debug = {
"evidence_run_ids": [
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
],
"evidence_session_ids": [
item.session_id
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
],
"tool_result_count": sum(
len(item.tool_results)
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
if item is not None
),
"evidence_length": len(evidence_text),
}
session_manager.update_latest_assistant_event_payload(
result.session_id,
result.run_id,
{
"task_id": task.task_id,
"task_status": task.status,
"evidence_status": "recorded",
},
)
session_manager.append_message(
result.session_id,
run_id=result.run_id,
role="system",
event_type="task_evidence_recorded",
event_payload={
"task_id": task.task_id,
"attempt_index": attempt_index,
"evidence_debug": evidence_debug,
},
content=None,
context_visible=False,
)
result.task_id = task.task_id
result.task_status = task.status
result.validation_result = None
return result
async def _run_team_for_task(
self,
@ -986,12 +966,10 @@ class AgentService:
return []
@staticmethod
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
if feedback_type == "satisfied":
if validation is not None:
return max(0.0, min(1.0, float(validation.score)))
def _acceptance_score_for_learning(acceptance_type: str) -> float:
if acceptance_type == "accept":
return 1.0
if feedback_type == "revise":
if acceptance_type == "revise":
return 0.5
return 0.0
@ -1001,12 +979,11 @@ class AgentService:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
plan: TaskExecutionPlan | None = None,
team_summaries: list[str] | None = None,
) -> str:
phase = f"attempt_{attempt_index}"
if latest_validation is not None:
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
phase = f"revision_attempt_{attempt_index}"
elif plan is not None and plan.is_team:
phase = f"team_synthesis_attempt_{attempt_index}"
@ -1027,24 +1004,14 @@ class AgentService:
)
else:
sections.append("Previously activated skills:\nNone")
if latest_validation is not None:
validation_lines = [
f"accepted: {latest_validation.accepted}",
f"score: {latest_validation.score}",
]
if latest_validation.issues:
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
if latest_validation.missing_requirements:
validation_lines.append(
"missing requirements:\n"
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
)
if latest_validation.recommended_revision_prompt:
validation_lines.append(
"recommended revision:\n"
+ latest_validation.recommended_revision_prompt
)
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
if task.feedback:
history_lines = []
for item in task.feedback[-5:]:
kind = item.get("acceptance_type") or item.get("feedback_type")
comment = item.get("comment") or ""
run_id = item.get("run_id") or ""
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
if plan is not None:
plan_lines = [
f"mode: {plan.mode}",
@ -1313,7 +1280,8 @@ class AgentService:
"inbound_metadata": dict(inbound.metadata),
"task_id": getattr(result, "task_id", None),
"task_status": getattr(result, "task_status", None),
"validation_result": getattr(result, "validation_result", None),
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
"validation_result": None,
},
)

View File

@ -235,26 +235,45 @@ class SessionProcessProjector:
metadata=dict(payload),
)
elif record.event_type == "task_validation_snapshotted":
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
accepted = bool(validation.get("accepted"))
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
root["finished_at"] = created_at if root["status"] == "done" else None
elif record.event_type == "task_evidence_recorded":
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "validation"),
event_id=_event_id(record, "evidence"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="system",
actor_id="validator",
actor_name="Validator",
text=(
f"Validation {'passed' if accepted else 'failed'} "
f"(score={validation.get('score')})."
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
),
actor_id="evidence-recorder",
actor_name="Evidence",
text="Task evidence was recorded; waiting for user acceptance.",
created_at=created_at,
status="done" if accepted else "error",
status="done",
metadata=dict(payload),
)
elif record.event_type == "task_acceptance_recorded":
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
if acceptance_type == "accept":
root["status"] = "done"
root["finished_at"] = created_at
elif acceptance_type == "abandon":
root["status"] = "cancelled"
root["finished_at"] = created_at
else:
root["status"] = "waiting"
root["finished_at"] = None
add_event(
event_id=_event_id(record, "acceptance"),
run_id=record.run_id or root_run_id,
parent_run_id=root_run_id if record.run_id else None,
kind="run_status",
actor_type="user",
actor_id="user-acceptance",
actor_name="User Acceptance",
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
created_at=created_at,
status="done",
metadata=dict(payload),
)

View File

@ -69,15 +69,24 @@ class SkillLearningService:
existing_ids.add(candidate.candidate_id)
return candidates
def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
"""Build candidates scoped to a single validated and satisfied Task run."""
def build_learning_candidates_for_task(
self,
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[SkillLearningCandidate]:
"""Build candidates from a user-accepted Task and all of its runs."""
final_accepted_run_id = final_accepted_run_id or trigger_run_id
if not final_accepted_run_id:
return []
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
if final_run is None or not self._is_task_accepted_run(final_run):
return []
source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
if not source_runs:
return []
@ -100,11 +109,16 @@ class SkillLearningService:
source_session_ids=source_session_ids,
related_skill_names=[],
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
evidence={
"task_id": task_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"theme": self._task_theme(final_run.task_text),
},
status="open",
priority=1,
confidence=0.8,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
else:
@ -137,13 +151,14 @@ class SkillLearningService:
),
evidence={
"task_id": task_id,
"trigger_run_id": trigger_run_id,
"final_accepted_run_id": final_accepted_run_id,
"source_run_ids": source_run_ids,
"skill_version": receipt.skill_version,
},
status="open",
priority=1,
confidence=0.7,
trigger_reason="validation_accepted_and_user_satisfied",
trigger_reason="task_accepted",
)
)
@ -269,7 +284,7 @@ class SkillLearningService:
groups.setdefault(key, []).append(record)
candidates: list[SkillLearningCandidate] = []
for theme, runs in groups.items():
successful = [record for record in runs if self._is_confirmed_positive_run(record)]
successful = [record for record in runs if self._is_task_accepted_run(record)]
if len(successful) < 2:
continue
if any(record.activated_skills for record in successful):
@ -290,7 +305,7 @@ class SkillLearningService:
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
for record in self.run_store.list_runs():
if not self._is_confirmed_positive_run(record):
if not self._is_task_accepted_run(record):
continue
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
for pair in combinations(unique, 2):
@ -351,14 +366,15 @@ class SkillLearningService:
return effects
@staticmethod
def _is_confirmed_positive_run(record: RunRecord) -> bool:
validation = record.validation_result or {}
def _is_task_accepted_run(record: RunRecord) -> bool:
feedback = record.feedback or {}
acceptance_type = feedback.get("acceptance_type")
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
acceptance_type = "accept"
return (
bool(record.success)
and bool(record.task_id)
and validation.get("accepted") is True
and feedback.get("feedback_type") == "satisfied"
and acceptance_type == "accept"
)
@staticmethod

View File

@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
from .router import MainAgentRouter
from .service import TaskService
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
from .validation import ValidationService
__all__ = [
"EvidenceBuilder",
@ -24,6 +23,5 @@ __all__ = [
"ToolEvidence",
"ValidationResult",
"ValidationStatus",
"ValidationService",
"render_task_evidence",
]

View File

@ -1,4 +1,4 @@
"""Models for internal task tracking and validation."""
"""Models for internal task tracking and user acceptance."""
from __future__ import annotations
@ -9,7 +9,12 @@ from typing import Any, Literal
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
LEGACY_STATUS_MAP = {
"validating": "running",
"awaiting_feedback": "awaiting_acceptance",
"needs_review": "awaiting_acceptance",
}
@dataclass(slots=True)
@ -113,11 +118,11 @@ class TaskRecord:
@property
def is_execution_active(self) -> bool:
return self.status in {"running", "validating"}
return self.status == "running"
@property
def requires_user_action(self) -> bool:
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
return self.status in {"awaiting_acceptance", "needs_revision"}
def to_dict(self) -> dict[str, Any]:
return {
@ -137,6 +142,7 @@ class TaskRecord:
"satisfaction": self.satisfaction,
"run_ids": list(self.run_ids),
"skill_names": list(self.skill_names),
"acceptance": list(self.feedback),
"feedback": list(self.feedback),
"validation_result": self.validation_result,
"metadata": dict(self.metadata),
@ -152,7 +158,7 @@ class TaskRecord:
goal=str(payload.get("goal") or payload.get("description") or ""),
constraints=[str(item) for item in payload.get("constraints") or []],
priority=int(payload.get("priority", 0) or 0),
status=str(payload.get("status") or "open"),
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
creator=str(payload.get("creator") or "main-agent"),
created_at=str(payload.get("created_at") or ""),
updated_at=str(payload.get("updated_at") or ""),
@ -161,7 +167,11 @@ class TaskRecord:
satisfaction=_optional_float(payload.get("satisfaction")),
run_ids=[str(item) for item in payload.get("run_ids") or []],
skill_names=[str(item) for item in payload.get("skill_names") or []],
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
feedback=[
_normalize_acceptance_entry(dict(item))
for item in (payload.get("acceptance") or payload.get("feedback") or [])
if isinstance(item, dict)
],
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
metadata=dict(payload.get("metadata") or {}),
)
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
if value in (None, ""):
return None
return float(value)
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
feedback_type = str(entry.get("feedback_type") or "")
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
acceptance_type = str(entry.get("acceptance_type") or "")
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
return entry

View File

@ -10,7 +10,7 @@ from typing import Any, Literal
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
from .models import TaskRecord
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None = None,
provider_bundle: ProviderBundle | None = None,
timeout_seconds: float = 30.0,
) -> TaskExecutionPlan:
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
task=task,
user_message=user_message,
attempt_index=attempt_index,
latest_validation=latest_validation,
),
},
],
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
task: TaskRecord,
user_message: str,
attempt_index: int,
latest_validation: ValidationResult | None,
) -> str:
validation_note = ""
if latest_validation is not None:
validation_note = (
"\nPrevious validation issues:\n"
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
)
history_note = ""
if task.feedback:
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
return (
"Decide execution mode for this internal Task attempt.\n"
"Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Attempt index: {attempt_index}\n"
f"{validation_note}"
f"{history_note}"
)
@staticmethod

View File

@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any
from uuid import uuid4
from .models import TaskEvent, TaskRecord, ValidationResult
from .models import TaskEvent, TaskRecord
from .store import TaskStore
@ -105,38 +105,70 @@ class TaskService:
for name in skill_names or []:
if name not in task.skill_names:
task.skill_names.append(name)
task.status = "awaiting_acceptance"
task.updated_at = self._now()
self.store.upsert_task(task)
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
return task
def record_validation(
def add_acceptance(
self,
task_id: str,
run_id: str,
validation: ValidationResult,
*,
final_attempt: bool = True,
has_usable_answer: bool = True,
acceptance_type: str,
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
if validation.status == "accepted":
task.status = "awaiting_feedback"
elif validation.status in {"insufficient_evidence", "validator_error"}:
task.status = "needs_review"
elif validation.status == "rejected" and not final_attempt:
normalized = normalize_acceptance_type(acceptance_type)
matching_acceptance = any(
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
for item in task.feedback
)
conflicting_acceptance = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
),
None,
)
if conflicting_acceptance is not None:
raise ValueError(
f"Acceptance for run_id={run_id!r} was already recorded as "
f"{conflicting_acceptance.get('acceptance_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_acceptance:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_acceptance:
return task
entry = {
"acceptance_type": normalized,
"feedback_type": "satisfied" if normalized == "accept" else normalized,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if normalized == "revise":
task.status = "needs_revision"
elif validation.status == "rejected" and has_usable_answer:
task.status = "needs_review"
else:
task.status = "failed"
elif normalized == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = "automatic validation rejected the final attempt"
task.close_reason = comment or "abandoned"
elif normalized == "accept":
task.status = "closed"
task.closed_at = now
task.close_reason = "accepted"
task.satisfaction = 1.0
if run_id:
task.metadata["final_accepted_run_id"] = run_id
task.updated_at = now
task.validation_result = validation.to_dict()
self.store.upsert_task(task)
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
return task
def add_feedback(
@ -147,52 +179,12 @@ class TaskService:
comment: str | None = None,
run_id: str | None = None,
) -> TaskRecord:
task = self._require(task_id)
now = self._now()
matching_feedback = any(
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
for item in task.feedback
return self.add_acceptance(
task_id,
acceptance_type=feedback_type,
comment=comment,
run_id=run_id,
)
conflicting_feedback = next(
(
item
for item in task.feedback
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
),
None,
)
if conflicting_feedback is not None:
raise ValueError(
f"Feedback for run_id={run_id!r} was already recorded as "
f"{conflicting_feedback.get('feedback_type')!r}"
)
if task.status in {"closed", "abandoned"} and not matching_feedback:
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
if matching_feedback:
return task
entry = {
"feedback_type": feedback_type,
"comment": comment or "",
"run_id": run_id,
"created_at": now,
}
task.feedback.append(entry)
if feedback_type == "revise":
task.status = "needs_revision"
elif feedback_type == "abandon":
task.status = "abandoned"
task.closed_at = now
task.close_reason = comment or "abandoned"
elif feedback_type == "satisfied":
task.status = "closed"
task.closed_at = now
task.close_reason = "satisfied"
task.satisfaction = 1.0
task.updated_at = now
self.store.upsert_task(task)
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
return task
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
task = self._require(task_id)
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
if len(words) <= 4:
return cleaned[:40]
return " ".join(words[:4])[:40]
def normalize_acceptance_type(value: str) -> str:
normalized = (value or "").strip().lower()
if normalized == "satisfied":
return "accept"
if normalized not in {"accept", "revise", "abandon"}:
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
return normalized

View File

@ -1,154 +0,0 @@
"""Automatic validation for internal Task mode."""
from __future__ import annotations
import json
from typing import Any
from beaver.engine.providers import ProviderBundle
from .models import TaskRecord, ValidationResult
class ValidationService:
async def validate_task_result(
self,
*,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_packet: Any | None = None,
evidence_text: str = "",
transcript_excerpt: str = "",
tool_summaries: list[str] | None = None,
team_summaries: list[str] | None = None,
provider_bundle: ProviderBundle | None = None,
) -> ValidationResult:
provider = None
model = None
if provider_bundle is not None:
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
model = getattr(runtime, "model", None)
if provider is not None:
try:
return await self._validate_with_provider(
provider=provider,
model=model,
task=task,
user_message=user_message,
final_output=final_output,
evidence_text=evidence_text,
transcript_excerpt=transcript_excerpt,
tool_summaries=tool_summaries or [],
team_summaries=team_summaries or [],
)
except Exception as exc:
return ValidationResult(
status="validator_error",
score=0.0,
issues=[f"Validator failed: {exc}"],
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
missing_requirements=["User review is required because automatic validation failed."],
recommended_revision_prompt=(
"Review the answer and evidence, then decide whether to revise or accept it."
),
validator="llm_error",
)
return self._heuristic_validate(final_output)
async def _validate_with_provider(
self,
*,
provider: Any,
model: str | None,
task: TaskRecord,
user_message: str,
final_output: str,
evidence_text: str,
transcript_excerpt: str,
tool_summaries: list[str],
team_summaries: list[str],
) -> ValidationResult:
legacy_context = "" if evidence_text else (
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
)
prompt = (
"Validate whether the assistant output satisfies the task. "
"Return only compact JSON with keys: passed, score, issues, "
"missing_requirements, recommended_revision_prompt.\n\n"
f"Task goal:\n{task.goal}\n\n"
f"Current user request:\n{user_message}\n\n"
f"Evidence packet:\n{evidence_text}\n\n"
f"{legacy_context}"
f"Assistant final output:\n{final_output}"
)
response = await provider.chat(
messages=[
{"role": "system", "content": "You are a strict task result validator."},
{"role": "user", "content": prompt},
],
tools=None,
model=model,
max_tokens=4096,
temperature=0.0,
)
payload = self._parse_json_object(response.content or "")
status = payload.get("status")
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
status = (
"accepted"
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
else "rejected"
)
return ValidationResult(
status=status,
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
issues=[str(item) for item in payload.get("issues") or []],
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
validator="llm",
)
@staticmethod
def _heuristic_validate(final_output: str) -> ValidationResult:
text = final_output.strip()
if not text:
return ValidationResult(
passed=False,
score=0.0,
issues=["Assistant output is empty."],
missing_requirements=["A non-empty result is required."],
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
validator="heuristic",
)
lowered = text.lower()
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
return ValidationResult(
passed=False,
score=0.35,
issues=["The run did not complete cleanly."],
missing_requirements=["A successful final result is required."],
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
validator="heuristic",
)
return ValidationResult(passed=True, score=0.85, validator="heuristic")
@staticmethod
def _parse_json_object(text: str) -> dict[str, Any]:
cleaned = text.strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.lower().startswith("json"):
cleaned = cleaned[4:].strip()
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end >= start:
cleaned = cleaned[start : end + 1]
payload = json.loads(cleaned)
if not isinstance(payload, dict):
raise ValueError("validator response must be a JSON object")
return payload

View File

@ -0,0 +1,28 @@
from __future__ import annotations
from beaver.engine.context import ContextBuildInput, ContextBuilder, RuntimeContext, SessionContext
def test_context_builder_injects_current_date_and_time() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
base_system_prompt="Follow user requests.",
current_user_input="今天几号?",
session_context=SessionContext(session_id="web:alpha", source="web", model="stub-model"),
runtime_context=RuntimeContext(
utc_datetime="2026-05-26T01:10:00+00:00",
local_datetime="2026-05-26T09:10:00+08:00",
timezone="Asia/Shanghai",
utc_offset="+08:00",
),
)
)
system_prompt = result.messages[0]["content"]
assert "# Current Date and Time" in system_prompt
assert "Current UTC time: 2026-05-26T01:10:00+00:00" in system_prompt
assert "Current local time: 2026-05-26T09:10:00+08:00" in system_prompt
assert "Local timezone: Asia/Shanghai" in system_prompt
assert "Local UTC offset: +08:00" in system_prompt
assert '"today", "tomorrow", "now", "this week", and "next month"' in system_prompt
assert result.messages[-1] == {"role": "user", "content": "今天几号?"}

View File

@ -18,8 +18,8 @@ class FakeResult:
model: str | None = "fake-model"
usage: dict[str, Any] = field(default_factory=dict)
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
task_status: str | None = "awaiting_acceptance"
validation_result: dict[str, Any] | None = None
class FakeService:
@ -79,8 +79,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
assert message.session_id == "s1"
assert message.finish_reason == "stop"
assert message.metadata["task_id"] == "task-1"
assert message.metadata["task_status"] == "awaiting_feedback"
assert message.metadata["validation_result"] == {"accepted": True}
assert message.metadata["task_status"] == "awaiting_acceptance"
assert message.metadata["evidence_status"] == "recorded"
assert message.metadata["validation_result"] is None
stop_event.set()
await asyncio.wait_for(task, timeout=2)

View File

@ -113,6 +113,19 @@ def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> N
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
def test_litellm_provider_merges_late_system_messages_to_front() -> None:
messages = [
{"role": "system", "content": "base"},
{"role": "user", "content": "question"},
{"role": "system", "content": "finalize without tools"},
]
sanitized = LiteLLMProvider._sanitize_messages(messages)
assert [message["role"] for message in sanitized] == ["system", "user"]
assert sanitized[0]["content"] == "base\n\nfinalize without tools"
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}

View File

@ -79,7 +79,7 @@ def _task() -> TaskRecord:
goal="实现任务连续性",
constraints=[],
priority=0,
status="awaiting_feedback",
status="awaiting_acceptance",
creator="test",
created_at="now",
updated_at="now",

View File

@ -35,6 +35,7 @@ class StubProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
if not self._responses:
raise AssertionError("No stubbed provider responses left")
@ -47,11 +48,22 @@ class StubProvider(LLMProvider):
class StubSkillAssembler:
def __init__(self, activated_skills: list[SkillContext]) -> None:
self.activated_skills = activated_skills
self.calls: list[dict] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.calls.append(kwargs)
return SkillAssemblyResult(activated_skills=list(self.activated_skills))
class RecordingToolAssembler:
def __init__(self) -> None:
self.calls: list[dict] = []
async def assemble(self, **kwargs):
self.calls.append(kwargs)
return kwargs["registry"].get_specs(["memory"])
def _tool_call(*, name: str = "echo", arguments: dict | None = None, call_id: str = "call-1") -> SimpleNamespace:
return SimpleNamespace(
id=call_id,
@ -576,6 +588,48 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
assert effect_records[-1].run_id == result.run_id
def test_thinking_disabled_still_uses_skill_and_tool_assembly(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
content="Use docker logs before editing config.",
version="v0007",
content_hash="hash-v7",
activation_reason="llm_selected",
tool_hints=["terminal"],
)
skill_assembler = StubSkillAssembler([skill])
tool_assembler = RecordingToolAssembler()
loader = EngineLoader(
workspace=tmp_path,
skill_assembler=skill_assembler,
tool_assembler=tool_assembler,
)
loop = AgentLoop(loader=loader)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[LLMResponse(content="Done", finish_reason="stop", provider_name="stub", model="stub-model")]
),
)
result = asyncio.run(
loop.process_direct(
"Why is the Docker container crashing?",
provider_bundle=bundle,
thinking_enabled=False,
)
)
loaded = loop.boot()
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
tool_selection = next(event for event in events if event.event_type == "tool_selection_snapshotted")
assert skill_assembler.calls
assert skill_assembler.calls[0]["thinking_enabled"] is False
assert tool_assembler.calls
assert [skill.name for skill in tool_assembler.calls[0]["activated_skills"]] == ["docker-debug"]
assert tool_selection.event_payload["tool_names"] == ["memory"]
def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
@ -635,6 +689,52 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
assert effect_records[-1].success is False
def test_agent_loop_suppresses_raw_tool_call_when_finalizing_after_tool_limit(tmp_path: Path) -> None:
loader = EngineLoader(
workspace=tmp_path,
skill_assembler=StubSkillAssembler([]),
)
loop = AgentLoop(loader=loader)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content="Need a tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call()],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content=(
"<tool_call>\n"
"<function=mcp_local_web_mcp_web_fetch>\n"
"<parameter=url>https://example.com</parameter>\n"
"</function>\n"
"</tool_call>"
),
finish_reason="stop",
provider_name="stub",
model="stub-model",
),
]
),
)
result = asyncio.run(
loop.process_direct(
"Fetch the latest result",
provider_bundle=bundle,
max_tool_iterations=0,
)
)
assert result.finish_reason == "max_tool_iterations"
assert "<tool_call>" not in result.output_text
assert "raw tool call was suppressed" in result.output_text
def test_llm_request_snapshot_defaults_to_compact_payload(tmp_path: Path) -> None:
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path, skill_assembler=StubSkillAssembler([])))
bundle = ProviderBundle(

View File

@ -101,12 +101,11 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
"web:test",
run_id="main-run",
role="system",
event_type="task_validation_snapshotted",
event_type="task_evidence_recorded",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"validation_result": {"accepted": True, "score": 0.9},
"retry_scheduled": False,
"evidence_status": "recorded",
},
context_visible=False,
)
@ -121,7 +120,7 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
assert sub_run["metadata"]["skill_query"] == "research workflow"
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
assert any(event["actor_name"] == "Validator" for event in projection["events"])
assert any(event["actor_name"] == "Evidence" for event in projection["events"])
assert any(run["session_id"] == "web:test" for run in projection["runs"])

View File

@ -4,23 +4,17 @@ import asyncio
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
from beaver.engine import EngineLoader
from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.agent_service import AgentService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService
from beaver.tasks import TaskExecutionPlan, TaskService
class StubProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self._responses = list(responses)
self.calls: list[dict[str, object]] = []
async def chat(
self,
@ -30,7 +24,6 @@ class StubProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
self.calls.append({"messages": messages, "tools": tools, "model": model})
if not self._responses:
raise AssertionError("No stubbed provider responses left")
return self._responses.pop(0)
@ -39,30 +32,9 @@ class StubProvider(LLMProvider):
return "stub-model"
class StubValidationService:
def __init__(self, results: list[ValidationResult]) -> None:
self.results = list(results)
self.calls: list[dict] = []
async def validate_task_result(self, **kwargs) -> ValidationResult:
self.calls.append(kwargs)
if not self.results:
raise AssertionError("No stubbed validation results left")
return self.results.pop(0)
class StubTaskExecutionPlanner:
def __init__(self, plans: list[TaskExecutionPlan] | None = None) -> None:
self.plans = list(plans or [TaskExecutionPlan.single("test-single")])
self.calls = []
async def plan(self, **kwargs) -> TaskExecutionPlan:
self.calls.append(kwargs)
if len(self.plans) == 1:
return self.plans[0]
if not self.plans:
raise AssertionError("No stubbed execution plans left")
return self.plans.pop(0)
return TaskExecutionPlan.single("test-single")
class FakeLearningCandidate:
@ -70,15 +42,6 @@ class FakeLearningCandidate:
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
class RecordingSkillAssembler:
def __init__(self) -> None:
self.task_descriptions: list[str] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.task_descriptions.append(kwargs["task_description"])
return SkillAssemblyResult()
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
return LLMResponse(
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
@ -107,828 +70,157 @@ def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
)
def _single_planner() -> StubTaskExecutionPlanner:
return StubTaskExecutionPlanner([TaskExecutionPlan.single("test-single")])
def _team_plan(strategy: str = "sequence") -> TaskExecutionPlan:
return TaskExecutionPlan(
mode="team",
reason="test-team",
graph=ExecutionGraph(
strategy=strategy, # type: ignore[arg-type]
nodes=[
ExecutionNode(
node_id="research",
task="research implementation options",
agent=AgentDescriptor(name="researcher", role="research"),
)
],
),
final_synthesis_instruction="Use the sub-agent result to produce the final answer.",
)
def _provider_bundle(provider: StubProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response("new_task")]),
)
def _main_only_bundle(*responses: str) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content=response,
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
for response in responses
]
),
)
def _task_record(status: str) -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="session-1",
description="test task",
goal="test task",
constraints=[],
priority=0,
status=status,
creator="main-agent",
created_at="2026-05-22T00:00:00+00:00",
updated_at="2026-05-22T00:00:00+00:00",
)
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService([]),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"hello?",
session_id="web:simple",
provider_bundle=_bundle("hi", route_action="simple_chat"),
)
)
loaded = service.create_loop().boot()
assert result.task_id is None
assert loaded.task_service.store.list_tasks() == []
def test_complex_request_creates_task_and_records_validation(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
"draft release notes",
session_id="web:test",
provider_bundle=_bundle("Done"),
)
)
result = asyncio.run(
service.process_direct(
"implement the new report workflow",
session_id="web:task",
provider_bundle=_bundle("implemented"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task_by_run_id(result.run_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
run_record = loaded.run_memory_store.list_runs()[-1]
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
assert result.task_id is not None
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.status == "awaiting_feedback"
assert any(event.event_type == "task_validation_snapshotted" for event in events)
assert run_record.task_id == result.task_id
assert run_record.validation_result["accepted"] is True
assert skill_effects.event_payload["candidate_generation_allowed"] is False
assert skill_effects.event_payload["learning_candidates"] == []
assert task.metadata["short_title"] == "Test task"
assert task.status == "awaiting_acceptance"
assert task.validation_result is None
assert result.validation_result is None
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
assert "evidence_recorded" in event_types
assert "validated" not in event_types
def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
skill_assembler = RecordingSkillAssembler()
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=1.0, validator="test")]
),
skill_assembler=skill_assembler,
)
)
result = asyncio.run(
service.process_direct(
"继续按刚才的方案改",
session_id="web:task-skill-query",
provider_bundle=_bundle("done", route_action="new_task"),
)
)
assert result.task_id
assert skill_assembler.task_descriptions
query = skill_assembler.task_descriptions[0]
assert "Task goal:" in query
assert "Current user request:" in query
assert "Previously activated skills:" in query
assert "If no published skill matches, return []" in query
def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.9, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"implement the search workflow",
session_id="web:continue",
provider_bundle=_bundle("first done", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"also add tests for it",
session_id="web:continue",
provider_bundle=_bundle("tests added", route_action="continue_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert task is not None
assert second.task_id == first.task_id
assert len(task.run_ids) == 2
closed = asyncio.run(
service.process_direct(
"这个任务结束了",
session_id="web:continue",
provider_bundle=_bundle("好的,已结束。", route_action="close_task"),
)
)
task = loaded.task_service.get_task(first.task_id)
assert closed.task_id is None
assert task is not None
assert task.status == "closed"
assert loaded.task_service.active_task_view("web:continue") is None
def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:revise-direct",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"再详细一点,并加上明后天穿衣建议",
session_id="web:revise-direct",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
first_assistant = [
message
for message in messages
if message.get("role") == "assistant" and message.get("run_id") == first.run_id
][-1]
user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert task.feedback == [
{
"feedback_type": "revise",
"comment": "再详细一点,并加上明后天穿衣建议",
"run_id": first.run_id,
"created_at": task.feedback[0]["created_at"],
}
]
assert first_assistant["feedback_state"] == "revise"
assert "再详细一点,并加上明后天穿衣建议" in user_messages
def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:explicit-revise",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
feedback = asyncio.run(
service.submit_feedback(
session_id=first.session_id,
run_id=first.run_id,
feedback_type="revise",
comment="准备补充穿衣建议",
)
)
second = asyncio.run(
service.process_direct(
"加上明后天穿衣建议",
session_id="web:explicit-revise",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert feedback["task_status"] == "needs_revision"
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert len(task.feedback) == 1
assert task.feedback[0]["feedback_type"] == "revise"
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
def test_validation_result_status_drives_accepted_and_passed() -> None:
accepted = ValidationResult(status="accepted", score=0.9, validator="test")
insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test")
rejected = ValidationResult(status="rejected", score=0.9, validator="test")
assert accepted.passed is True
assert accepted.accepted is True
assert insufficient.passed is False
assert insufficient.accepted is False
assert rejected.passed is False
assert rejected.accepted is False
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"})
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
assert accepted is not None
assert accepted.status == "accepted"
assert low_score is not None
assert low_score.status == "rejected"
assert rejected is not None
assert rejected.status == "rejected"
def test_validation_result_rejects_unknown_status() -> None:
with pytest.raises(ValueError, match="unknown validation status"):
ValidationResult(status="pending", score=0.9, validator="test") # type: ignore[arg-type]
def test_validation_result_from_dict_rejects_unknown_explicit_status() -> None:
with pytest.raises(ValueError, match="unknown validation status"):
ValidationResult.from_dict({"status": "pending", "passed": True, "score": 0.9})
def test_validation_result_evidence_gaps_round_trip() -> None:
validation = ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["missing command output", "missing file reference"],
validator="test",
)
restored = ValidationResult.from_dict(validation.to_dict())
assert restored is not None
assert restored.status == "insufficient_evidence"
assert restored.evidence_gaps == ["missing command output", "missing file reference"]
assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"]
def test_task_record_status_helpers_distinguish_review_and_failed() -> None:
needs_review = _task_record("needs_review")
failed = _task_record("failed")
assert needs_review.is_open is True
assert needs_review.is_execution_active is False
assert needs_review.requires_user_action is True
assert failed.is_open is False
assert failed.is_execution_active is False
assert failed.requires_user_action is False
def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = _task_record("needs_review")
payload = service.to_api_dict(task)
assert payload["is_open"] is True
assert payload["is_execution_active"] is False
assert payload["requires_user_action"] is True
def test_validation_failure_retries_once(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
passed=False,
score=0.2,
issues=["missing tests"],
recommended_revision_prompt="Add tests before final response.",
validator="test",
),
ValidationResult(passed=True, score=0.88, validator="test"),
]
),
)
)
result = asyncio.run(
service.process_direct(
"implement and validate the task",
session_id="web:retry",
provider_bundle=_bundle("first draft", "revised draft"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
assert result.output_text == "revised draft"
assert result.validation_result["accepted"] is True
assert task is not None
assert len(task.run_ids) == 2
visible_messages = loaded.session_manager.get_messages_as_conversation(result.session_id)
visible_contents = [message.get("content") for message in visible_messages]
assert "first draft" not in visible_contents
assert "revised draft" in visible_contents
def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"implement feedback handling",
session_id="web:feedback",
provider_bundle=_bundle("done"),
"write implementation plan",
session_id="web:acceptance",
provider_bundle=_bundle("Plan"),
)
)
loaded = service.create_loop().boot()
learning_calls = []
def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
learning_calls.append((task_id, trigger_run_id))
loaded = service.create_loop().boot()
generated: list[tuple[str, str]] = []
def build_learning_candidates_for_task(
task_id: str,
*,
final_accepted_run_id: str | None = None,
trigger_run_id: str | None = None,
) -> list[FakeLearningCandidate]:
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
return [FakeLearningCandidate()]
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
feedback = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
response = asyncio.run(
service.submit_acceptance(
session_id="web:acceptance",
run_id=result.run_id,
feedback_type="satisfied",
acceptance_type="accept",
)
)
assert feedback["task_status"] == "closed"
assert feedback["learning_candidates"] == [
assert response["task_status"] == "closed"
assert response["acceptance_type"] == "accept"
assert response["learning_candidates"] == [
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
]
assert learning_calls == [(result.task_id, result.run_id)]
assert generated == [(result.task_id, result.run_id)]
service2 = AgentService(
loader=EngineLoader(
workspace=tmp_path / "abandon",
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=False, score=0.3, validator="test"),
ValidationResult(passed=False, score=0.3, validator="test"),
]
),
)
)
abandoned = asyncio.run(
service2.process_direct(
"implement another workflow",
session_id="web:abandon",
provider_bundle=_bundle("not enough", "still not enough"),
)
)
abandon_feedback = asyncio.run(
service2.submit_feedback(
session_id=abandoned.session_id,
run_id=abandoned.run_id,
feedback_type="abandon",
comment="too costly",
)
)
assert abandon_feedback["task_status"] == "abandoned"
assert abandon_feedback["learning_candidates"] == []
loaded2 = service2.create_loop().boot()
failure_events = [
event
for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
if event.event_type == "task_failure_evidence_recorded"
]
assert len(failure_events) == 1
assert loaded2.memory_service.get_store().memory_entries == []
task_service = loaded.task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.metadata["final_accepted_run_id"] == result.run_id
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=0.9, validator="test")]
),
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"implement feedback projection",
session_id="web:feedback-projection",
provider_bundle=_bundle("done"),
"summarize notes",
session_id="web:revise",
provider_bundle=_bundle("Summary"),
)
)
loaded = service.create_loop().boot()
first = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
response = asyncio.run(
service.submit_acceptance(
session_id="web:revise",
run_id=result.run_id,
feedback_type="satisfied",
acceptance_type="revise",
comment="Add decisions",
)
)
second = asyncio.run(
assert response["task_status"] == "needs_revision"
assert response["learning_candidates"] == []
task_service = service.create_loop().boot().task_service
assert task_service is not None
task = task_service.get_task(result.task_id or "")
assert task is not None
assert task.feedback[0]["acceptance_type"] == "revise"
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner(),
)
)
result = asyncio.run(
service.process_direct(
"prepare checklist",
session_id="web:legacy",
provider_bundle=_bundle("Checklist"),
)
)
response = asyncio.run(
service.submit_feedback(
session_id=result.session_id,
session_id="web:legacy",
run_id=result.run_id,
feedback_type="satisfied",
)
)
feedback_events = [
event
for event in loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
if event.event_type == "task_feedback_recorded"
]
assistant = [
message
for message in loaded.session_manager.get_messages_as_conversation(result.session_id)
if message.get("role") == "assistant" and message.get("run_id") == result.run_id
][-1]
assert first["task_status"] == "closed"
assert second["task_status"] == "closed"
assert len(feedback_events) == 1
assert assistant["feedback_state"] == "satisfied"
assert assistant["task_status"] == "closed"
assert assistant["validation_status"] == "passed"
with pytest.raises(ValueError, match="already recorded"):
asyncio.run(
service.submit_feedback(
session_id=result.session_id,
run_id=result.run_id,
feedback_type="abandon",
)
)
task = loaded.task_service.get_task(result.task_id)
assert task is not None
assert task.status == "closed"
assert response["acceptance_type"] == "accept"
assert response["feedback_type"] == "satisfied"
assert response["task_status"] == "closed"
def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
sub_provider = StubProvider(
[
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
)
)
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
service = TaskService(tmp_path)
task = service.create_task(session_id="s", description="legacy")
task.status = "awaiting_feedback"
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
service.store.upsert_task(task)
result = asyncio.run(
service.process_direct(
"implement team-backed workflow",
session_id="web:team",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_event_records(result.session_id)
loaded = service.get_task(task.task_id)
assert result.output_text == "final synthesized answer"
assert task is not None
assert len(task.run_ids) == 2
assert result.run_id == task.run_ids[-1]
assert any(event.event_type == "task_execution_planned" for event in events)
assert any(event.event_type == "task_team_run_completed" for event in events)
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
assert "sub-agent evidence" != result.output_text
def test_task_mode_team_synthesis_runs_without_tools_and_receives_evidence(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
sub_provider = StubProvider(
[
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
validation = StubValidationService([ValidationResult(status="accepted", score=0.9, validator="test")])
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=validation,
)
)
result = asyncio.run(
service.process_direct(
"implement team-backed workflow",
session_id="web:team-no-tools",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
)
)
assert result.output_text == "final synthesized answer"
assert main_provider.calls[0]["tools"] is None
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
assert "Task evidence packet" in validation.calls[0]["evidence_text"]
def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="fallback synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
]
)
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
)
)
result = asyncio.run(
service.process_direct(
"implement workflow despite team failure",
session_id="web:team-failure",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: (_ for _ in ()).throw(RuntimeError("sub-agent unavailable")),
)
)
loaded = service.create_loop().boot()
events = loaded.session_manager.get_event_records(result.session_id)
assert result.output_text == "fallback synthesized answer"
assert any(event.event_type == "task_team_run_failed" for event in events)
assert "sub-agent unavailable" in main_provider.calls[0]["messages"][0]["content"]
assert "same class of tools fails repeatedly" in main_provider.calls[0]["messages"][0]["content"]
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(
status="insufficient_evidence",
score=0.4,
evidence_gaps=["source missing"],
validator="test",
)
]
),
)
)
result = asyncio.run(
service.process_direct(
"answer with uncertain evidence",
session_id="web:needs-review",
provider_bundle=_bundle("possible answer"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
assert task is not None
assert task.status == "needs_review"
assert task.requires_user_action is True
assert task.is_execution_active is False
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
assert validation_event.event_payload["retry_scheduled"] is False
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
main_provider = StubProvider(
[
LLMResponse(content="first synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
LLMResponse(content="revised synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
]
)
sub_providers = [
StubProvider([LLMResponse(content="first evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
StubProvider([LLMResponse(content="second evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
]
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=StubTaskExecutionPlanner([_team_plan(), _team_plan()]),
validation_service=StubValidationService(
[
ValidationResult(passed=False, score=0.2, recommended_revision_prompt="revise", validator="test"),
ValidationResult(passed=True, score=0.9, validator="test"),
]
),
)
)
result = asyncio.run(
service.process_direct(
"implement and validate with team",
session_id="web:team-retry",
provider_bundle=_provider_bundle(main_provider),
team_provider_bundle_factory=lambda node: _provider_bundle(sub_providers.pop(0)),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(result.task_id)
visible = loaded.session_manager.get_messages_as_conversation(result.session_id)
visible_contents = [message.get("content") for message in visible]
run_records = {record.run_id: record for record in loaded.run_memory_store.list_runs()}
assert result.output_text == "revised synthesized answer"
assert task is not None
assert len(task.run_ids) == 4
assert "first synthesized answer" not in visible_contents
assert "revised synthesized answer" in visible_contents
for run_id in task.run_ids:
record = run_records[run_id]
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
assert skill_effects
assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
history=[
{
"role": "assistant",
"content": "done",
"run_id": "run-1",
"task_id": "task-1",
"task_status": "closed",
"validation_status": "passed",
"feedback_state": "satisfied",
}
],
)
)
assistant = result.messages[-1]
assert assistant == {"role": "assistant", "content": "done"}
def test_context_builder_normalizes_persisted_tool_arguments() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
history=[
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {
"name": "cron",
"arguments": {"action": "add", "mode": "notification"},
},
}
],
}
],
)
)
tool_call = result.messages[-1]["tool_calls"][0]
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
task_service = TaskService(tmp_path / "tasks")
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
validation = asyncio.run(
ValidationService().validate_task_result(
task=task,
user_message="implement validator handling",
final_output="done",
provider_bundle=_main_only_bundle("not json"),
)
)
assert validation.accepted is False
assert validation.status == "validator_error"
assert validation.validator == "llm_error"
assert validation.issues
assert loaded is not None
assert loaded.status == "awaiting_acceptance"
assert loaded.feedback[0]["acceptance_type"] == "accept"

View File

@ -20,8 +20,8 @@ class StubRunResult:
model: str | None = "stub-model"
usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
task_status: str | None = "awaiting_acceptance"
validation_result: dict[str, Any] | None = None
class StubAgentService(AgentService):
@ -101,9 +101,10 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
assert message["session_id"] == "web:alpha"
assert message["run_id"] == "run-1"
assert message["task_id"] == "task-1"
assert message["task_status"] == "awaiting_feedback"
assert message["validation_result"] == {"accepted": True}
assert message["validation_status"] == "passed"
assert message["task_status"] == "awaiting_acceptance"
assert message["evidence_status"] == "recorded"
assert message["validation_result"] is None
assert "validation_status" not in message
assert message["metadata"]["input_metadata"] == {
"source": "test",
"attachments": [{"file_id": "file-1", "name": "a.txt"}],

View File

@ -19,7 +19,7 @@ import {
uploadFile,
wsManager,
} from '@/lib/api';
import { mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
import { mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
import { pickAppText } from '@/lib/i18n/core';
import { useAppI18n } from '@/lib/i18n/provider';
import { buildSessionProgressView } from '@/lib/session-progress';
@ -32,7 +32,7 @@ function isSessionUpdatedEvent(data: WsEvent | Record<string, unknown>): data is
function activeTaskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
if (status === 'needs_revision') return pickAppText(locale, '待修改', 'Needs revision');
if (status === 'awaiting_feedback') return pickAppText(locale, '待反馈', 'Awaiting feedback');
if (status === 'awaiting_acceptance') return pickAppText(locale, '待验收', 'Awaiting acceptance');
if (status === 'running') return pickAppText(locale, '进行中', 'Running');
return pickAppText(locale, '进行中', 'Active');
}
@ -157,10 +157,11 @@ export default function ChatPage() {
setSessionProcess(key, process);
}
void loadActiveTask(key);
const shouldMergePending = shouldMergePendingUsers(detail.messages, localSnapshot, waitingForReply);
const displayMessages = detail.messages.filter(shouldDisplayChatMessage);
const shouldMergePending = shouldMergePendingUsers(displayMessages, localSnapshot, waitingForReply);
const nextMessages = shouldMergePending
? mergeServerWithPendingUsers(detail.messages, localSnapshot)
: detail.messages;
? mergeServerWithPendingUsers(displayMessages, localSnapshot)
: displayMessages;
setMessages(nextMessages);
shouldSnapToLatestRef.current = true;
const last = nextMessages[nextMessages.length - 1];
@ -217,15 +218,11 @@ export default function ChatPage() {
if (data.type === 'status' && data.status === 'thinking') {
setIsThinking(true);
} else if (data.type === 'message' && data.role === 'assistant') {
const validationResult = data.validation_result ?? data.metadata?.validation_result;
const validationStatus = data.validation_status
? data.validation_status
: validationResult
? ((validationResult as Record<string, unknown>).accepted === true ? 'passed' : 'failed')
: 'unknown';
setIsThinking(false);
setIsLoading(false);
addMessage({
const rawEvidenceStatus = data.evidence_status ?? data.metadata?.evidence_status;
const evidenceStatus = rawEvidenceStatus === 'recorded' ? 'recorded' : undefined;
const assistantMessage = {
role: 'assistant',
content: typeof data.content === 'string' ? data.content : '',
timestamp: new Date().toISOString(),
@ -233,8 +230,11 @@ export default function ChatPage() {
run_id: typeof data.run_id === 'string' ? data.run_id : undefined,
task_id: data.task_id ?? data.metadata?.task_id ?? null,
task_status: data.task_status ?? data.metadata?.task_status ?? null,
validation_status: validationStatus,
});
evidence_status: evidenceStatus,
} as const;
if (shouldDisplayChatMessage(assistantMessage)) {
addMessage(assistantMessage);
}
void loadSessionMessages(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
void loadActiveTask(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
loadSessions();
@ -359,17 +359,18 @@ export default function ChatPage() {
await loadSessions();
return;
}
addMessage({
const assistantMessage = {
role: 'assistant',
content: result.response,
timestamp: new Date().toISOString(),
run_id: result.run_id,
task_id: result.task_id,
task_status: result.task_status,
validation_status: result.validation_result
? (result.validation_result.accepted === true ? 'passed' : 'failed')
: 'unknown',
});
evidence_status: result.evidence_status === 'recorded' ? 'recorded' : undefined,
} as const;
if (shouldDisplayChatMessage(assistantMessage)) {
addMessage(assistantMessage);
}
void getSessionProcess(sessionId).then((process) => setSessionProcess(sessionId, process)).catch(() => null);
void loadActiveTask(sessionId);
loadSessions();
@ -393,7 +394,7 @@ export default function ChatPage() {
}
}, [addMessage, clearInputDraft, input, isLoading, loadActiveTask, loadSessionMessages, loadSessions, locale, pendingFiles, revisionTargetRunId, sessionId, setIsLoading, setIsThinking, setSessionProcess, thinkingModeEnabled, updateMessageFeedback]);
const handleFeedback = useCallback(async (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => {
const handleFeedback = useCallback(async (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => {
updateMessageFeedback(runId, feedbackType);
try {
await submitChatFeedback({

View File

@ -1238,7 +1238,7 @@ function riskLabel(risk: string, t: (zh: string, en: string) => string): string
function triggerReasonLabel(reason: string, t: (zh: string, en: string) => string): string {
const labels: Record<string, string> = {
validation_accepted_and_user_satisfied: t('任务验证通过且用户满意', 'Validation accepted and user satisfied'),
task_accepted: t('任务已接受', 'Task accepted'),
};
return labels[reason] || reason;
}

View File

@ -3,7 +3,7 @@
import Link from 'next/link';
import { useParams, useRouter } from 'next/navigation';
import React, { useMemo, useState } from 'react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime, progressPercent } from '@/components/task-runtime/TaskRuntimeShared';
import { Badge } from '@/components/ui/badge';
@ -17,8 +17,9 @@ import { buildTaskRuntimeView, type TaskRuntimeNodeView } from '@/lib/task-runti
import { useChatStore } from '@/lib/store';
import type { BackendTask, BackendTaskRun, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
type TaskFeedbackType = 'satisfied' | 'revise' | 'abandon';
type TaskFeedbackType = 'accept' | 'revise' | 'abandon';
type TaskFeedbackItem = {
acceptance_type?: unknown;
feedback_type?: unknown;
comment?: unknown;
created_at?: unknown;
@ -151,12 +152,6 @@ export default function TaskDetailPage() {
const backendFeedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;
if (!task && backendTask) {
const validation = backendTask.validation_result;
const accepted = Boolean(validation?.accepted);
const validationIssues = [
...arrayOfStrings(validation?.issues),
...arrayOfStrings(validation?.missing_requirements),
];
const feedbackItems = backendTask.feedback || [];
return (
<div className="mx-auto max-w-5xl space-y-6 p-6">
@ -232,57 +227,6 @@ export default function TaskDetailPage() {
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '验证和反馈', 'Validation and feedback')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4 text-sm">
<div className="rounded-lg border border-border bg-muted/25 p-4">
<div className="flex items-center gap-2">
{validation ? (
accepted ? <CheckCircle2 className="h-5 w-5 text-[#657162]" /> : <XCircle className="h-5 w-5 text-destructive" />
) : (
<HelpCircle className="h-5 w-5 text-muted-foreground" />
)}
<div className="font-medium">
{validation
? accepted
? pickAppText(locale, '验证通过', 'Validation passed')
: pickAppText(locale, '需要继续修改', 'Needs revision')
: pickAppText(locale, '尚未验证', 'Not validated yet')}
</div>
</div>
{validation ? (
<div className="mt-2 text-muted-foreground">
{pickAppText(locale, '评分', 'Score')}: {String(validation.score ?? '-')} · {pickAppText(locale, '验证器', 'Validator')}: {String(validation.validator ?? '-')}
</div>
) : null}
{validationIssues.length > 0 && (
<ul className="mt-3 list-disc space-y-1 pl-5 text-muted-foreground">
{validationIssues.map((item, index) => <li key={`${item}:${index}`}>{item}</li>)}
</ul>
)}
{typeof validation?.recommended_revision_prompt === 'string' && validation.recommended_revision_prompt && (
<p className="mt-3 rounded-md bg-background p-3 text-muted-foreground">{validation.recommended_revision_prompt}</p>
)}
</div>
<div className="space-y-2">
<div className="font-medium">{pickAppText(locale, '用户反馈', 'User feedback')}</div>
{feedbackItems.length === 0 ? (
<p className="text-muted-foreground">{pickAppText(locale, '还没有用户反馈。', 'No user feedback yet.')}</p>
) : (
feedbackItems.map((item, index) => (
<div key={index} className="rounded-md border border-border p-3">
<div className="font-medium">{humanFeedback(String(item.feedback_type || ''), locale)}</div>
{item.comment ? <p className="mt-1 text-muted-foreground">{String(item.comment)}</p> : null}
{item.created_at ? <p className="mt-1 text-xs text-muted-foreground">{formatTaskRuntimeTime(String(item.created_at), locale)}</p> : null}
</div>
))
)}
</div>
</CardContent>
</Card>
</div>
);
}
@ -476,6 +420,7 @@ export default function TaskDetailPage() {
comment,
});
setRuntimeFeedback({
acceptance_type: feedbackType,
feedback_type: feedbackType,
comment: comment || '',
created_at: new Date().toISOString(),
@ -660,14 +605,14 @@ function TaskFeedbackPanel({
return (
<Card>
<CardHeader>
<CardTitle className="text-base">{pickAppText(locale, '任务反馈', 'Task feedback')}</CardTitle>
<CardTitle className="text-base">{pickAppText(locale, '任务验收', 'Task acceptance')}</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
{recordedFeedback ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
<div className="flex items-center gap-2 font-medium">
<CheckCircle2 className="h-4 w-4 text-[#657162]" />
{pickAppText(locale, '已提交反馈', 'Feedback submitted')}: {humanFeedback(String(recordedFeedback.feedback_type || ''), locale)}
{pickAppText(locale, '已提交验收', 'Acceptance submitted')}: {humanFeedback(String(recordedFeedback.acceptance_type || recordedFeedback.feedback_type || ''), locale)}
</div>
{recordedFeedback.comment ? (
<p className="mt-2 text-muted-foreground">{String(recordedFeedback.comment)}</p>
@ -678,22 +623,22 @@ function TaskFeedbackPanel({
</div>
) : isFinalized ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '任务已结束,不能再提交新的反馈。', 'This task is finalized and cannot accept new feedback.')}
{pickAppText(locale, '任务已结束,不能再提交新的验收。', 'This task is finalized and cannot accept new acceptance.')}
</div>
) : !runId ? (
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
{pickAppText(locale, '暂无可反馈的运行记录。', 'No run is available for feedback yet.')}
{pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')}
</div>
) : null}
<div className="grid gap-2 sm:grid-cols-3">
<FeedbackButton
type="satisfied"
type="accept"
icon={<ThumbsUp className="mr-2 h-4 w-4" />}
label={pickAppText(locale, '满意', 'Satisfied')}
label={pickAppText(locale, '接受', 'Accept')}
actionBusy={actionBusy}
disabled={!canSubmit}
onClick={() => submit('satisfied', comment.trim() || undefined)}
onClick={() => submit('accept', comment.trim() || undefined)}
/>
<FeedbackButton
type="revise"
@ -717,10 +662,10 @@ function TaskFeedbackPanel({
value={comment}
onChange={(event) => setComment(event.target.value)}
disabled={Boolean(recordedFeedback) || isFinalized || Boolean(actionBusy)}
placeholder={pickAppText(locale, '需要修改时写下具体要求;满意或放弃可选填说明。', 'Describe requested changes; notes are optional for satisfied or abandon.')}
placeholder={pickAppText(locale, '需要修改时写下具体要求;接受或放弃可选填说明。', 'Describe requested changes; notes are optional for accept or abandon.')}
/>
<div className="text-xs text-muted-foreground">
{pickAppText(locale, '反馈将记录到当前任务运行:', 'Feedback will be recorded on run: ')}
{pickAppText(locale, '验收将记录到当前任务运行:', 'Acceptance will be recorded on run: ')}
<span className="font-mono">{runId || '-'}</span>
<span className="mx-1">·</span>
{pickAppText(locale, '会话:', 'Session: ')}
@ -807,8 +752,7 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
const map: Record<string, [string, string]> = {
open: ['已创建', 'Open'],
running: ['执行中', 'Running'],
validating: ['验证中', 'Validating'],
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
needs_revision: ['需要修改', 'Needs revision'],
closed: ['已完成', 'Closed'],
abandoned: ['已放弃', 'Abandoned'],
@ -818,10 +762,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
}
function humanFeedback(type: string, locale: 'zh-CN' | 'en-US') {
if (type === 'satisfied') return pickAppText(locale, '满意', 'Satisfied');
if (type === 'accept' || type === 'satisfied') return pickAppText(locale, '接受', 'Accepted');
if (type === 'revise') return pickAppText(locale, '请求修改', 'Revision requested');
if (type === 'abandon') return pickAppText(locale, '放弃任务', 'Abandoned');
return type || pickAppText(locale, '反馈', 'Feedback');
return type || pickAppText(locale, '验收', 'Acceptance');
}
function humanFinishReason(reason: string, locale: 'zh-CN' | 'en-US') {
@ -848,7 +792,3 @@ function feedbackForRun(items: TaskFeedbackItem[], runId: string | null): TaskFe
function latestFeedback(items: TaskFeedbackItem[]): TaskFeedbackItem | null {
return [...items].reverse()[0] ?? null;
}
function arrayOfStrings(value: unknown): string[] {
return Array.isArray(value) ? value.map((item) => String(item)).filter(Boolean) : [];
}

View File

@ -142,7 +142,7 @@ function OrdinaryTasks() {
</div>
</TableCell>
<TableCell>
<Badge variant={task.status === 'awaiting_feedback' || task.status === 'closed' ? 'default' : 'secondary'}>
<Badge variant={task.status === 'awaiting_acceptance' || task.status === 'closed' ? 'default' : 'secondary'}>
{taskStatusLabel(task.status, locale)}
</Badge>
</TableCell>
@ -185,8 +185,7 @@ function taskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
const labels: Record<string, [string, string]> = {
open: ['已创建', 'Open'],
running: ['执行中', 'Running'],
validating: ['验证中', 'Validating'],
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
needs_revision: ['需要修改', 'Needs revision'],
closed: ['已完成', 'Closed'],
abandoned: ['已放弃', 'Abandoned'],

View File

@ -27,7 +27,7 @@ export function ChatWorkbench({
processArtifacts: ProcessArtifact[];
selectedRunId: string | null;
onSelectRun: (runId: string) => void;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
return (

View File

@ -6,7 +6,7 @@ import { Bot, CheckCircle2, ChevronRight, Loader2, Paperclip, RefreshCcw, Thumbs
import type { ChatMessage, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
import { getAccessToken, getFileUrl } from '@/lib/api';
import { getTaskCardMessageIndexes } from '@/lib/chat-messages';
import { getTaskCardMessageIndexes, hasVisibleChatContent, normalizedMessageText, shouldDisplayChatMessage } from '@/lib/chat-messages';
import { AgentTeamBlock } from '@/components/chat-workbench/AgentTeamBlock';
import { MarkdownContent } from '@/components/chat-workbench/MarkdownContent';
import { ScrollArea } from '@/components/ui/scroll-area';
@ -49,19 +49,14 @@ function MessageBubble({
message: ChatMessage;
showTaskCard: boolean;
canSendFeedback: boolean;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
const { locale } = useAppI18n();
const isUser = message.role === 'user';
const textContent = typeof message.content === 'string' ? message.content : String(message.content || '');
const [feedbackMode, setFeedbackMode] = React.useState<'satisfied' | null>(null);
const textContent = normalizedMessageText(message.content);
const [feedbackMode, setFeedbackMode] = React.useState<'accept' | null>(null);
const [feedbackComment, setFeedbackComment] = React.useState('');
const validationFailed = message.validation_status === 'failed';
const validationDetails =
validationFailed
? pickAppText(locale, '详细原因会在任务验证区展示;展开任务可查看验证报告。', 'Detailed reasons are shown in the task validation area. Open the task to inspect the validation report.')
: '';
return (
<div className={`flex gap-3 ${isUser ? 'justify-end' : ''}`}>
@ -142,22 +137,14 @@ function MessageBubble({
</div>
</div>
)}
{!isUser && validationFailed && (
<details className="mt-3 rounded-md border border-destructive/30 bg-destructive/5 p-3">
<summary className="cursor-pointer text-base font-semibold text-destructive">
{pickAppText(locale, '验证失败', 'Validation failed')}
</summary>
<p className="mt-2 text-xs leading-5 text-muted-foreground">{validationDetails}</p>
</details>
)}
{!isUser && (canSendFeedback || message.feedback_state) && message.run_id && (
<div className="mt-3 space-y-2 border-t border-border/70 pt-3">
{message.feedback_state ? (
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<CheckCircle2 className="h-3.5 w-3.5" />
<span>
{message.feedback_state === 'satisfied'
? pickAppText(locale, '已标记满意', 'Marked satisfied')
{message.feedback_state === 'accept' || message.feedback_state === 'satisfied'
? pickAppText(locale, '已接受', 'Accepted')
: message.feedback_state === 'revise'
? pickAppText(locale, '已请求修改', 'Revision requested')
: pickAppText(locale, '已放弃任务', 'Task abandoned')}
@ -168,11 +155,11 @@ function MessageBubble({
<div className="flex flex-wrap items-center gap-2">
<button
type="button"
onClick={() => setFeedbackMode('satisfied')}
onClick={() => setFeedbackMode('accept')}
className="inline-flex h-8 items-center gap-1 rounded-md border border-border px-3 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
>
<ThumbsUp className="h-3.5 w-3.5" />
{pickAppText(locale, '满意', 'Satisfied')}
{pickAppText(locale, '接受', 'Accept')}
</button>
<button
type="button"
@ -222,13 +209,6 @@ function MessageBubble({
)}
</>
)}
{message.validation_status && message.validation_status !== 'unknown' && (
<span className="text-xs text-muted-foreground">
{message.validation_status === 'passed'
? pickAppText(locale, '验证通过', 'Validated')
: pickAppText(locale, '验证未通过', 'Validation failed')}
</span>
)}
{message.feedback_error && (
<span className="text-xs text-destructive">{message.feedback_error}</span>
)}
@ -264,6 +244,17 @@ function shouldHideSystemAgentMessage(message: ChatMessage): boolean {
);
}
function hasRenderableMessageContent(message: ChatMessage): boolean {
return hasVisibleChatContent(message);
}
function shouldHideMessage(message: ChatMessage): boolean {
if (shouldHideSystemAgentMessage(message)) {
return true;
}
return !shouldDisplayChatMessage(message);
}
function parseTimelineTime(value?: string | null): number | null {
if (!value) return null;
const parsed = new Date(value).getTime();
@ -342,12 +333,12 @@ export function MessageList({
processArtifacts: ProcessArtifact[];
selectedRunId: string | null;
onSelectRun: (runId: string) => void;
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
onRequestRevision: (runId: string) => void;
}) {
const { locale } = useAppI18n();
const visibleMessages = React.useMemo(
() => messages.filter((message) => !shouldHideSystemAgentMessage(message)),
() => messages.filter((message) => !shouldHideMessage(message)),
[messages]
);
const teamGroups = React.useMemo(
@ -385,14 +376,21 @@ export function MessageList({
() => getTaskCardMessageIndexes(visibleMessages),
[visibleMessages]
);
const latestFeedbackRunId = [...visibleMessages]
.reverse()
.find((message) =>
message.role === 'assistant'
&& message.run_id
&& message.task_id
&& message.task_status === 'awaiting_feedback'
)?.run_id;
const latestFeedbackMessageIndex = (() => {
for (let index = visibleMessages.length - 1; index >= 0; index -= 1) {
const message = visibleMessages[index];
if (
message.role === 'assistant'
&& message.run_id
&& message.task_id
&& message.task_status === 'awaiting_acceptance'
&& hasRenderableMessageContent(message)
) {
return index;
}
}
return -1;
})();
return (
<ScrollArea className="h-full px-8" viewportRef={viewportRef}>
@ -411,7 +409,7 @@ export function MessageList({
key={item.key}
message={item.message}
showTaskCard={taskCardMessageIndexes.has(item.messageIndex)}
canSendFeedback={Boolean(latestFeedbackRunId && item.message.run_id === latestFeedbackRunId)}
canSendFeedback={item.messageIndex === latestFeedbackMessageIndex}
onFeedback={onFeedback}
onRequestRevision={onRequestRevision}
/>

View File

@ -271,7 +271,7 @@ export async function sendMessage(
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
}> {
const body: Record<string, unknown> = { message, session_id: sessionId };
if (attachments && attachments.length > 0) {
@ -293,7 +293,7 @@ export async function sendMessage(
finish_reason?: string;
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
}>('/api/chat', {
method: 'POST',
body: JSON.stringify(body),
@ -305,28 +305,29 @@ export async function sendMessage(
run_id: result.run_id,
task_id: result.task_id,
task_status: result.task_status,
validation_result: result.validation_result,
evidence_status: result.evidence_status,
};
}
export async function submitChatFeedback(params: {
sessionId: string;
runId: string;
feedbackType: 'satisfied' | 'revise' | 'abandon';
feedbackType: 'accept' | 'revise' | 'abandon';
comment?: string;
}): Promise<{
session_id: string;
run_id: string;
task_id: string;
task_status: string;
acceptance_type: string;
feedback_type: string;
}> {
return fetchJSON('/api/chat/feedback', {
return fetchJSON('/api/chat/acceptance', {
method: 'POST',
body: JSON.stringify({
session_id: params.sessionId,
run_id: params.runId,
feedback_type: params.feedbackType,
acceptance_type: params.feedbackType,
comment: params.comment,
}),
});

View File

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
import type { ChatMessage } from '@/types';
describe('chat message helpers', () => {
@ -85,10 +85,17 @@ describe('chat message helpers', () => {
content: 'Final answer.',
run_id: 'run-1',
task_id: 'task-1',
task_status: 'awaiting_feedback',
task_status: 'awaiting_acceptance',
},
];
expect(Array.from(getTaskCardMessageIndexes(messages))).toEqual([2]);
});
it('hides empty assistant records from session history', () => {
expect(shouldDisplayChatMessage({ role: 'assistant', content: '', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
expect(shouldDisplayChatMessage({ role: 'assistant', content: '\u200B\uFEFF', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
expect(shouldDisplayChatMessage({ role: 'assistant', content: 'Final answer.', task_id: 'task-1', run_id: 'run-1' })).toBe(true);
expect(shouldDisplayChatMessage({ role: 'user', content: '' })).toBe(true);
});
});

View File

@ -1,5 +1,28 @@
import type { ChatMessage } from '@/types';
const INVISIBLE_CONTENT_CHARS = /[\u200B-\u200D\uFEFF]/g;
export function normalizedMessageText(content: unknown): string {
if (typeof content === 'string') {
return content.replace(INVISIBLE_CONTENT_CHARS, '').trim();
}
if (content == null) {
return '';
}
return String(content).replace(INVISIBLE_CONTENT_CHARS, '').trim();
}
export function hasVisibleChatContent(msg: ChatMessage): boolean {
if (normalizedMessageText(msg.content)) {
return true;
}
return Boolean(msg.attachments?.length);
}
export function shouldDisplayChatMessage(msg: ChatMessage): boolean {
return msg.role !== 'assistant' || hasVisibleChatContent(msg);
}
export function messageFingerprint(msg: ChatMessage): string {
const attachmentKey = (msg.attachments ?? [])
.map((a) => `${a.file_id ?? ''}:${a.name}:${a.content_type}:${a.size ?? ''}`)

View File

@ -48,8 +48,9 @@ export interface ChatMessage {
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_status?: 'passed' | 'failed' | 'unknown';
feedback_state?: 'satisfied' | 'revise' | 'abandon';
evidence_status?: 'recorded';
acceptance_state?: 'accept' | 'revise' | 'abandon';
feedback_state?: 'accept' | 'satisfied' | 'revise' | 'abandon';
feedback_error?: string;
message_type?: string | null;
scheduled_job_id?: string | null;
@ -153,6 +154,7 @@ export interface SystemStatus {
workspace_exists: boolean;
model: string;
max_tokens: number;
max_context_messages?: number;
temperature: number;
max_tool_iterations: number;
providers: ProviderStatus[];
@ -315,6 +317,7 @@ export interface BackendTaskRun {
attempt_index?: number | null;
task_text?: string;
messages: BackendTaskRunMessage[];
evidence_status?: string | null;
validation_result?: Record<string, unknown> | null;
}
@ -972,12 +975,12 @@ export interface ChatAssistantEvent {
run_id?: string;
task_id?: string | null;
task_status?: string | null;
validation_status?: 'passed' | 'failed' | 'unknown';
evidence_status?: 'recorded';
validation_result?: Record<string, unknown> | null;
metadata?: {
task_id?: string | null;
task_status?: string | null;
validation_result?: Record<string, unknown> | null;
evidence_status?: string | null;
[key: string]: unknown;
};
}