feat(engine): 添加运行时上下文支持并重构工具迭代限制
添加 RuntimeContext 类用于捕获模型运行时的日期时间信息, 包括UTC时间、本地时间和时区信息,并在系统提示中显示这些信息。 同时增加最大上下文消息数和工具迭代次数的配置选项, 将验证服务从引擎加载器中移除,并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务,相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
This commit is contained in:
@ -4,6 +4,7 @@ from .builder import (
|
||||
ContextBuildInput,
|
||||
ContextBuildResult,
|
||||
ContextBuilder,
|
||||
RuntimeContext,
|
||||
SessionContext,
|
||||
SkillContext,
|
||||
)
|
||||
@ -12,6 +13,7 @@ __all__ = [
|
||||
"ContextBuildInput",
|
||||
"ContextBuildResult",
|
||||
"ContextBuilder",
|
||||
"RuntimeContext",
|
||||
"SessionContext",
|
||||
"SkillContext",
|
||||
]
|
||||
|
||||
@ -80,6 +80,16 @@ class SessionContext:
|
||||
parent_session_id: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RuntimeContext:
|
||||
"""Per-run runtime facts that should be visible to the model."""
|
||||
|
||||
utc_datetime: str
|
||||
local_datetime: str
|
||||
timezone: str | None = None
|
||||
utc_offset: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ContextBuildInput:
|
||||
"""一次上下文构建所需的全部输入。
|
||||
@ -103,6 +113,7 @@ class ContextBuildInput:
|
||||
memory_snapshot: MemorySnapshot | None = None
|
||||
activated_skills: list[SkillContext] = field(default_factory=list)
|
||||
session_context: SessionContext | None = None
|
||||
runtime_context: RuntimeContext | None = None
|
||||
execution_context: str | None = None
|
||||
extra_sections: list[str] = field(default_factory=list)
|
||||
|
||||
@ -143,9 +154,10 @@ class ContextBuilder:
|
||||
1. Beaver user-facing assistant identity
|
||||
2. base system prompt
|
||||
3. session metadata
|
||||
4. execution context
|
||||
5. frozen memory snapshot
|
||||
6. extra sections
|
||||
4. runtime date/time
|
||||
5. execution context
|
||||
6. frozen memory snapshot
|
||||
7. extra sections
|
||||
|
||||
这样设计的原因:
|
||||
- 身份与总规则要最靠前
|
||||
@ -164,6 +176,10 @@ class ContextBuilder:
|
||||
if session_section:
|
||||
sections.append(session_section)
|
||||
|
||||
runtime_section = self._render_runtime_section(build_input.runtime_context)
|
||||
if runtime_section:
|
||||
sections.append(runtime_section)
|
||||
|
||||
execution_context = (build_input.execution_context or "").strip()
|
||||
if execution_context:
|
||||
sections.append(f"# Execution Context\n\n{execution_context}")
|
||||
@ -347,6 +363,31 @@ class ContextBuilder:
|
||||
return None
|
||||
return "# Current Session\n\n" + "\n".join(rows)
|
||||
|
||||
def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
|
||||
"""Render date/time facts captured for the current model run."""
|
||||
|
||||
if runtime_context is None:
|
||||
return None
|
||||
|
||||
rows: list[str] = []
|
||||
if runtime_context.utc_datetime:
|
||||
rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
|
||||
if runtime_context.local_datetime:
|
||||
rows.append(f"Current local time: {runtime_context.local_datetime}")
|
||||
if runtime_context.timezone:
|
||||
rows.append(f"Local timezone: {runtime_context.timezone}")
|
||||
if runtime_context.utc_offset:
|
||||
rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
return (
|
||||
"# Current Date and Time\n\n"
|
||||
+ "\n".join(rows)
|
||||
+ "\n\nUse this section as authoritative for relative date/time references such as "
|
||||
'"today", "tomorrow", "now", "this week", and "next month".'
|
||||
)
|
||||
|
||||
def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
|
||||
"""把已激活 skill 转成显式消息。
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
|
||||
from beaver.tasks import TaskExecutionPlanner, TaskService
|
||||
from beaver.tasks.skill_resolver import TaskSkillResolver
|
||||
from beaver.skills import SkillAssembler, SkillsLoader
|
||||
from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
|
||||
@ -91,7 +91,6 @@ class EngineLoadResult:
|
||||
task_skill_resolver: TaskSkillResolver | None = None
|
||||
task_service: TaskService | None = None
|
||||
task_execution_planner: TaskExecutionPlanner | None = None
|
||||
validation_service: ValidationService | None = None
|
||||
mcp_manager: MCPConnectionManager | None = None
|
||||
mcp_report: dict[str, dict] = field(default_factory=dict)
|
||||
closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
|
||||
@ -166,7 +165,6 @@ class EngineLoader:
|
||||
task_skill_resolver: TaskSkillResolver | None = None,
|
||||
task_service: TaskService | None = None,
|
||||
task_execution_planner: TaskExecutionPlanner | None = None,
|
||||
validation_service: ValidationService | None = None,
|
||||
) -> None:
|
||||
self.config = config or load_config(workspace=workspace, config_path=config_path)
|
||||
configured_workspace = self.config.agents_defaults.workspace
|
||||
@ -192,7 +190,6 @@ class EngineLoader:
|
||||
self._task_skill_resolver = task_skill_resolver
|
||||
self._task_service = task_service
|
||||
self._task_execution_planner = task_execution_planner
|
||||
self._validation_service = validation_service
|
||||
|
||||
def load(self) -> EngineLoadResult:
|
||||
"""装配当前主链需要的最小 runtime 对象。"""
|
||||
@ -276,7 +273,6 @@ class EngineLoader:
|
||||
)
|
||||
task_service = self._task_service or TaskService(workspace / "tasks")
|
||||
task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
|
||||
validation_service = self._validation_service or ValidationService()
|
||||
mcp_manager = MCPConnectionManager(
|
||||
self.config.tools.mcp_servers,
|
||||
authz_config=self.config.authz,
|
||||
@ -311,7 +307,6 @@ class EngineLoader:
|
||||
task_skill_resolver=task_skill_resolver,
|
||||
task_service=task_service,
|
||||
task_execution_planner=task_execution_planner,
|
||||
validation_service=validation_service,
|
||||
mcp_manager=mcp_manager,
|
||||
)
|
||||
if self._session_manager is None:
|
||||
|
||||
@ -4,12 +4,15 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
|
||||
from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
|
||||
from beaver.memory.runs import RunRecord, SkillEffectRecord
|
||||
from beaver.skills.learning import RunReceiptContext
|
||||
from beaver.skills.catalog.utils import strip_frontmatter
|
||||
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
|
||||
"Use available materials, state uncertainty clearly, and provide partial confirmed results."
|
||||
)
|
||||
|
||||
RAW_TOOL_CALL_FALLBACK = (
|
||||
"The run reached the configured tool-call limit before producing a reliable final answer. "
|
||||
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
|
||||
"Please request a revision to continue the task."
|
||||
)
|
||||
|
||||
_RAW_TOOL_CALL_RE = re.compile(
|
||||
r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AgentProfile:
|
||||
@ -35,8 +49,9 @@ class AgentProfile:
|
||||
system_prompt: str = ""
|
||||
default_model: str = "gpt-4.1-mini"
|
||||
max_tokens: int = 4096
|
||||
max_context_messages: int = 1000
|
||||
temperature: float = 0.2
|
||||
max_tool_iterations: int = 8
|
||||
max_tool_iterations: int = 30
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -446,7 +461,7 @@ class AgentLoop:
|
||||
*(pinned_skill_contexts or []),
|
||||
*self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
|
||||
]
|
||||
if not include_skill_assembly or thinking_enabled is False:
|
||||
if not include_skill_assembly:
|
||||
activated_skills = self._merge_skill_contexts(pinned_skills, [])
|
||||
else:
|
||||
skill_query = skill_selection_context or task
|
||||
@ -512,8 +527,6 @@ class AgentLoop:
|
||||
|
||||
if not include_tools:
|
||||
selected_tool_specs = []
|
||||
elif thinking_enabled is False:
|
||||
selected_tool_specs = tool_registry.list_specs()
|
||||
else:
|
||||
selected_tool_specs = await tool_assembler.assemble(
|
||||
task_description=task,
|
||||
@ -543,7 +556,10 @@ class AgentLoop:
|
||||
|
||||
build_input = ContextBuildInput(
|
||||
base_system_prompt=self.profile.system_prompt,
|
||||
history=session_manager.get_history(resolved_session_id),
|
||||
history=session_manager.get_history(
|
||||
resolved_session_id,
|
||||
max_messages=max(1, self.profile.max_context_messages),
|
||||
),
|
||||
current_user_input=task,
|
||||
memory_snapshot=memory_snapshot,
|
||||
activated_skills=activated_skills,
|
||||
@ -554,6 +570,7 @@ class AgentLoop:
|
||||
user_id=user_id,
|
||||
parent_session_id=parent_session_id,
|
||||
),
|
||||
runtime_context=self._current_runtime_context(),
|
||||
execution_context=execution_context,
|
||||
extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
|
||||
)
|
||||
@ -693,6 +710,7 @@ class AgentLoop:
|
||||
tool_calls=assistant_tool_calls or None,
|
||||
finish_reason=response.finish_reason,
|
||||
reasoning=response.reasoning_content,
|
||||
context_visible=not bool(assistant_tool_calls),
|
||||
source=source,
|
||||
title=title,
|
||||
model=final_model,
|
||||
@ -707,7 +725,11 @@ class AgentLoop:
|
||||
|
||||
if not response.has_tool_calls:
|
||||
final_text = response.content or ""
|
||||
final_finish_reason = response.finish_reason or "stop"
|
||||
if self._looks_like_raw_tool_call(final_text):
|
||||
final_text = RAW_TOOL_CALL_FALLBACK
|
||||
final_finish_reason = "invalid_tool_call_text"
|
||||
else:
|
||||
final_finish_reason = response.finish_reason or "stop"
|
||||
break
|
||||
|
||||
if iterations >= resolved_max_tool_iterations:
|
||||
@ -719,10 +741,7 @@ class AgentLoop:
|
||||
temperature=resolved_temperature,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
final_text = finalized or (
|
||||
"Tool loop stopped after reaching the configured iteration limit, "
|
||||
"and no final answer was produced."
|
||||
)
|
||||
final_text = finalized or RAW_TOOL_CALL_FALLBACK
|
||||
final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
|
||||
session_manager.append_message(
|
||||
resolved_session_id,
|
||||
@ -877,17 +896,14 @@ class AgentLoop:
|
||||
temperature: float,
|
||||
thinking_enabled: bool | None,
|
||||
) -> str:
|
||||
final_messages = [
|
||||
*messages,
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"The configured tool iteration budget is exhausted. Do not call tools. "
|
||||
"Produce the best final answer from the existing conversation and tool results. "
|
||||
"State uncertainty explicitly."
|
||||
),
|
||||
},
|
||||
]
|
||||
final_messages = AgentLoop._with_system_guidance(
|
||||
messages,
|
||||
(
|
||||
"The configured tool iteration budget is exhausted. Do not call tools. "
|
||||
"Produce the best final answer from the existing conversation and tool results. "
|
||||
"State uncertainty explicitly."
|
||||
),
|
||||
)
|
||||
kwargs: dict[str, Any] = {
|
||||
"messages": final_messages,
|
||||
"tools": None,
|
||||
@ -898,7 +914,27 @@ class AgentLoop:
|
||||
if thinking_enabled is not None:
|
||||
kwargs["thinking_enabled"] = thinking_enabled
|
||||
response = await provider.chat(**kwargs)
|
||||
return (response.content or "").strip()
|
||||
if response.has_tool_calls:
|
||||
return ""
|
||||
content = (response.content or "").strip()
|
||||
if AgentLoop._looks_like_raw_tool_call(content):
|
||||
return ""
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_raw_tool_call(content: str | None) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
return bool(_RAW_TOOL_CALL_RE.match(content))
|
||||
|
||||
@staticmethod
|
||||
def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
|
||||
copied = [dict(message) for message in messages]
|
||||
if copied and copied[0].get("role") == "system":
|
||||
existing = str(copied[0].get("content") or "").strip()
|
||||
copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
|
||||
return copied
|
||||
return [{"role": "system", "content": guidance.strip()}, *copied]
|
||||
|
||||
@staticmethod
|
||||
def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
|
||||
@ -1133,3 +1169,49 @@ class AgentLoop:
|
||||
@staticmethod
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
@staticmethod
|
||||
def _current_runtime_context() -> RuntimeContext:
|
||||
utc_now = datetime.now(timezone.utc)
|
||||
timezone_name = AgentLoop._configured_timezone_name()
|
||||
local_now = datetime.now().astimezone()
|
||||
rendered_timezone = local_now.tzname()
|
||||
|
||||
if timezone_name:
|
||||
try:
|
||||
local_now = utc_now.astimezone(ZoneInfo(timezone_name))
|
||||
rendered_timezone = timezone_name
|
||||
except ZoneInfoNotFoundError:
|
||||
rendered_timezone = local_now.tzname() or timezone_name
|
||||
|
||||
return RuntimeContext(
|
||||
utc_datetime=utc_now.isoformat(),
|
||||
local_datetime=local_now.isoformat(),
|
||||
timezone=rendered_timezone,
|
||||
utc_offset=AgentLoop._format_utc_offset(local_now),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configured_timezone_name() -> str | None:
|
||||
for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
|
||||
cleaned = (value or "").strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
|
||||
try:
|
||||
timezone_file = "/etc/timezone"
|
||||
if os.path.exists(timezone_file):
|
||||
with open(timezone_file, encoding="utf-8") as file:
|
||||
cleaned = file.read().strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
except OSError:
|
||||
return None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _format_utc_offset(value: datetime) -> str | None:
|
||||
raw = value.strftime("%z")
|
||||
if not raw:
|
||||
return None
|
||||
return f"{raw[:3]}:{raw[3:]}"
|
||||
|
||||
@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
|
||||
@staticmethod
|
||||
def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
sanitized = []
|
||||
system_contents: list[str] = []
|
||||
for message in messages:
|
||||
clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
|
||||
if clean.get("role") == "system":
|
||||
content = clean.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
system_contents.append(content.strip())
|
||||
elif content is not None:
|
||||
system_contents.append(str(content))
|
||||
continue
|
||||
if clean.get("role") == "assistant" and "content" not in clean:
|
||||
clean["content"] = None
|
||||
if isinstance(clean.get("tool_calls"), list):
|
||||
clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
|
||||
sanitized.append(clean)
|
||||
if system_contents:
|
||||
sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
|
||||
return sanitized
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -84,8 +84,10 @@ class MessageRecord:
|
||||
payload["task_id"] = self.event_payload.get("task_id")
|
||||
if self.event_payload.get("task_status"):
|
||||
payload["task_status"] = self.event_payload.get("task_status")
|
||||
if self.event_payload.get("validation_status"):
|
||||
payload["validation_status"] = self.event_payload.get("validation_status")
|
||||
if self.event_payload.get("evidence_status"):
|
||||
payload["evidence_status"] = self.event_payload.get("evidence_status")
|
||||
if self.event_payload.get("acceptance_state"):
|
||||
payload["acceptance_state"] = self.event_payload.get("acceptance_state")
|
||||
if self.event_payload.get("feedback_state"):
|
||||
payload["feedback_state"] = self.event_payload.get("feedback_state")
|
||||
if self.event_payload.get("feedback_error"):
|
||||
|
||||
@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
|
||||
model=_string(defaults.get("model") or data.get("model")),
|
||||
provider=_string(defaults.get("provider") or data.get("provider")),
|
||||
embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
|
||||
max_context_messages=_int(
|
||||
defaults.get("maxContextMessages")
|
||||
or defaults.get("max_context_messages")
|
||||
or data.get("maxContextMessages")
|
||||
or data.get("max_context_messages")
|
||||
),
|
||||
max_tool_iterations=_int(
|
||||
defaults.get("maxToolIterations")
|
||||
or defaults.get("max_tool_iterations")
|
||||
or data.get("maxToolIterations")
|
||||
or data.get("max_tool_iterations")
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
|
||||
return float(value)
|
||||
|
||||
|
||||
def _int(value: Any) -> int | None:
|
||||
parsed = _float(value)
|
||||
if parsed is None:
|
||||
return None
|
||||
return int(parsed)
|
||||
|
||||
|
||||
def _bool(value: Any, *, default: bool) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
|
||||
@ -25,6 +25,8 @@ class AgentDefaultsConfig:
|
||||
model: str | None = None
|
||||
provider: str | None = None
|
||||
embedding_model: str | None = None
|
||||
max_context_messages: int | None = None
|
||||
max_tool_iterations: int | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -44,6 +44,8 @@ from .files import (
|
||||
workspace_file_path,
|
||||
)
|
||||
from .schemas import (
|
||||
WebChatAcceptanceRequest,
|
||||
WebChatAcceptanceResponse,
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
@ -155,6 +157,13 @@ except ModuleNotFoundError: # pragma: no cover - fallback for skeleton-only env
|
||||
return decorator
|
||||
|
||||
|
||||
RAW_TOOL_CALL_DISPLAY_FALLBACK = (
|
||||
"The run reached the configured tool-call limit before producing a reliable final answer. "
|
||||
"The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
|
||||
"Please request a revision to continue the task."
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _app_lifespan(
|
||||
app: FastAPI,
|
||||
@ -365,6 +374,7 @@ def create_app(
|
||||
"workspace_exists": loaded.workspace.exists(),
|
||||
"model": config.default_model or agent_service.profile.default_model,
|
||||
"max_tokens": agent_service.profile.max_tokens,
|
||||
"max_context_messages": agent_service.profile.max_context_messages,
|
||||
"temperature": agent_service.profile.temperature,
|
||||
"max_tool_iterations": agent_service.profile.max_tool_iterations,
|
||||
"providers": providers_status,
|
||||
@ -1719,7 +1729,8 @@ def create_app(
|
||||
usage=result.usage,
|
||||
task_id=result.task_id,
|
||||
task_status=result.task_status,
|
||||
validation_result=result.validation_result,
|
||||
evidence_status="recorded" if result.task_id else None,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
fallback_target = _model_dump(payload.fallback_target)
|
||||
@ -1769,7 +1780,8 @@ def create_app(
|
||||
usage=result.usage,
|
||||
task_id=result.task_id,
|
||||
task_status=result.task_status,
|
||||
validation_result=result.validation_result,
|
||||
evidence_status="recorded" if result.task_id else None,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
@app.websocket("/ws/{session_id:path}")
|
||||
@ -1882,6 +1894,30 @@ def create_app(
|
||||
}
|
||||
)
|
||||
|
||||
@app.post(
|
||||
"/api/chat/acceptance",
|
||||
response_model=WebChatAcceptanceResponse,
|
||||
responses={
|
||||
400: {"model": WebErrorResponse},
|
||||
404: {"model": WebErrorResponse},
|
||||
},
|
||||
)
|
||||
async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
|
||||
agent_service = get_agent_service(request)
|
||||
try:
|
||||
result = await agent_service.submit_acceptance(
|
||||
session_id=payload.session_id,
|
||||
run_id=payload.run_id,
|
||||
acceptance_type=payload.acceptance_type,
|
||||
comment=payload.comment,
|
||||
)
|
||||
except ValueError as exc:
|
||||
detail = str(exc)
|
||||
status_code = 404 if "No internal task" in detail else 400
|
||||
raise HTTPException(status_code=status_code, detail=detail) from exc
|
||||
|
||||
return WebChatAcceptanceResponse(**result)
|
||||
|
||||
@app.post(
|
||||
"/api/chat/feedback",
|
||||
response_model=WebChatFeedbackResponse,
|
||||
@ -1893,10 +1929,10 @@ def create_app(
|
||||
async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
|
||||
agent_service = get_agent_service(request)
|
||||
try:
|
||||
result = await agent_service.submit_feedback(
|
||||
result = await agent_service.submit_acceptance(
|
||||
session_id=payload.session_id,
|
||||
run_id=payload.run_id,
|
||||
feedback_type=payload.feedback_type,
|
||||
acceptance_type=payload.feedback_type,
|
||||
comment=payload.comment,
|
||||
)
|
||||
except ValueError as exc:
|
||||
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
|
||||
role = event.get("role")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
content = event.get("content") or ""
|
||||
comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
|
||||
if role == "assistant" and not comparable_content.strip():
|
||||
continue
|
||||
content = _sanitize_user_visible_assistant_content(role=role, content=content)
|
||||
messages.append(
|
||||
{
|
||||
"role": role,
|
||||
"content": event.get("content") or "",
|
||||
"content": content,
|
||||
"timestamp": _iso_from_timestamp(event.get("timestamp")),
|
||||
"run_id": event.get("run_id"),
|
||||
"task_id": event.get("task_id"),
|
||||
"task_status": event.get("task_status"),
|
||||
"validation_status": event.get("validation_status"),
|
||||
"evidence_status": event.get("evidence_status"),
|
||||
"acceptance_state": event.get("acceptance_state"),
|
||||
"feedback_state": event.get("feedback_state"),
|
||||
"feedback_error": event.get("feedback_error"),
|
||||
"message_type": event.get("message_type"),
|
||||
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
content = (record.content or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
|
||||
messages.append(
|
||||
{
|
||||
"role": record.role,
|
||||
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
"tool_name": record.tool_name,
|
||||
}
|
||||
)
|
||||
validation = run_record.validation_result if run_record is not None else None
|
||||
views.append(
|
||||
{
|
||||
"run_id": run_id,
|
||||
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
|
||||
"attempt_index": run_record.attempt_index if run_record is not None else None,
|
||||
"task_text": run_record.task_text if run_record is not None else "",
|
||||
"messages": messages,
|
||||
"validation_result": validation,
|
||||
"evidence_status": "recorded",
|
||||
"validation_result": None,
|
||||
}
|
||||
)
|
||||
return views
|
||||
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _validation_status(validation_result: dict[str, Any] | None) -> str:
|
||||
if validation_result is None:
|
||||
return "unknown"
|
||||
return "passed" if validation_result.get("accepted") is True else "failed"
|
||||
|
||||
|
||||
def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
||||
result: dict[str, Any] = dict(metadata)
|
||||
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:
|
||||
|
||||
|
||||
def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
|
||||
validation_result = getattr(result, "validation_result", None)
|
||||
task_id = getattr(result, "task_id", None)
|
||||
task_status = getattr(result, "task_status", None)
|
||||
return {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": getattr(result, "output_text", "") or "",
|
||||
"content": _sanitize_user_visible_assistant_content(
|
||||
role="assistant",
|
||||
content=getattr(result, "output_text", "") or "",
|
||||
),
|
||||
"session_id": getattr(result, "session_id", None),
|
||||
"run_id": getattr(result, "run_id", None),
|
||||
"finish_reason": getattr(result, "finish_reason", None),
|
||||
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
|
||||
"usage": dict(getattr(result, "usage", {}) or {}),
|
||||
"task_id": task_id,
|
||||
"task_status": task_status,
|
||||
"validation_result": validation_result,
|
||||
"validation_status": _validation_status(validation_result),
|
||||
"evidence_status": "recorded" if task_id else None,
|
||||
"validation_result": None,
|
||||
"metadata": {
|
||||
"task_id": task_id,
|
||||
"task_status": task_status,
|
||||
"validation_result": validation_result,
|
||||
"evidence_status": "recorded" if task_id else None,
|
||||
"input_metadata": _websocket_input_metadata(input_payload),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
|
||||
if role != "assistant":
|
||||
return content
|
||||
if _looks_like_raw_tool_call(content):
|
||||
return RAW_TOOL_CALL_DISPLAY_FALLBACK
|
||||
return content
|
||||
|
||||
|
||||
def _looks_like_raw_tool_call(content: str | None) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
stripped = content.strip()
|
||||
lowered = stripped.lower()
|
||||
return (
|
||||
lowered.startswith("<tool_call")
|
||||
and lowered.endswith("</tool_call>")
|
||||
) or (
|
||||
lowered.startswith("<function=")
|
||||
and lowered.endswith("</function>")
|
||||
)
|
||||
|
||||
|
||||
def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
|
||||
if provider_cfg is None or provider_name == "custom":
|
||||
return False
|
||||
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
|
||||
def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
|
||||
config = load_config(config_path=config_path)
|
||||
agent_service.loader.config = config
|
||||
agent_service._apply_configured_profile_defaults() # noqa: SLF001
|
||||
loop = getattr(agent_service, "_loop", None)
|
||||
loaded = getattr(loop, "loaded", None) if loop is not None else None
|
||||
if loaded is not None:
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
"""Web request and response schemas."""
|
||||
|
||||
from .chat import (
|
||||
WebChatAcceptanceRequest,
|
||||
WebChatAcceptanceResponse,
|
||||
WebChatFeedbackRequest,
|
||||
WebChatFeedbackResponse,
|
||||
WebChatRequest,
|
||||
@ -13,6 +15,8 @@ from .chat import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"WebChatAcceptanceRequest",
|
||||
"WebChatAcceptanceResponse",
|
||||
"WebChatFeedbackRequest",
|
||||
"WebChatFeedbackResponse",
|
||||
"WebChatRequest",
|
||||
|
||||
@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
|
||||
usage: dict[str, Any] = Field(default_factory=dict)
|
||||
task_id: str | None = None
|
||||
task_status: str | None = None
|
||||
evidence_status: str | None = None
|
||||
acceptance_state: str | None = None
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class WebChatAcceptanceRequest(BaseModel):
|
||||
"""User acceptance on the latest assistant result in chat."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
acceptance_type: str
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class WebChatAcceptanceResponse(BaseModel):
|
||||
"""Acceptance recording result."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
task_id: str
|
||||
task_status: str
|
||||
acceptance_type: str
|
||||
feedback_type: str
|
||||
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebChatFeedbackRequest(BaseModel):
|
||||
"""Feedback on the latest assistant result in chat."""
|
||||
"""Backward-compatible feedback payload."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
|
||||
comment: str | None = None
|
||||
|
||||
|
||||
class WebChatFeedbackResponse(BaseModel):
|
||||
"""Feedback recording result."""
|
||||
|
||||
session_id: str
|
||||
run_id: str
|
||||
task_id: str
|
||||
task_status: str
|
||||
feedback_type: str
|
||||
learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
|
||||
class WebChatFeedbackResponse(WebChatAcceptanceResponse):
|
||||
"""Backward-compatible feedback response."""
|
||||
|
||||
|
||||
class WebProviderConfigRequest(BaseModel):
|
||||
|
||||
@ -29,9 +29,9 @@ from beaver.tasks import (
|
||||
TaskEvidencePacket,
|
||||
TaskExecutionPlan,
|
||||
TaskRecord,
|
||||
ValidationResult,
|
||||
render_task_evidence,
|
||||
)
|
||||
from beaver.tasks.service import normalize_acceptance_type
|
||||
|
||||
|
||||
NOTIFICATION_SESSION_ID = "notify:default:scheduled"
|
||||
@ -60,11 +60,19 @@ class AgentService:
|
||||
) -> None:
|
||||
self.profile = profile or AgentProfile()
|
||||
self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
|
||||
self._apply_configured_profile_defaults()
|
||||
self._loop: AgentLoop | None = None
|
||||
self._run_task: asyncio.Task[None] | None = None
|
||||
self._main_agent_router = MainAgentRouter()
|
||||
self._runtime_services: dict[str, Any] = {}
|
||||
|
||||
def _apply_configured_profile_defaults(self) -> None:
|
||||
defaults = self.loader.config.agents_defaults
|
||||
if defaults.max_context_messages is not None:
|
||||
self.profile.max_context_messages = max(1, defaults.max_context_messages)
|
||||
if defaults.max_tool_iterations is not None:
|
||||
self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
|
||||
|
||||
def create_loop(self) -> AgentLoop:
|
||||
"""创建并缓存当前 service 使用的 AgentLoop。"""
|
||||
|
||||
@ -232,7 +240,7 @@ class AgentService:
|
||||
|
||||
Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
|
||||
This entry bypasses the main-agent classifier and forces Task mode so
|
||||
every trigger produces a TaskRecord, validation, feedback state, and a
|
||||
every trigger produces a TaskRecord, evidence, acceptance state, and a
|
||||
run_id that the scheduled-task history can link to.
|
||||
"""
|
||||
|
||||
@ -280,9 +288,9 @@ class AgentService:
|
||||
result.run_id,
|
||||
{
|
||||
"message_type": "scheduled_reply",
|
||||
"scheduled_job_id": job.id,
|
||||
"scheduled_run_id": run.scheduled_run_id,
|
||||
"cron_job_name": job.name,
|
||||
"scheduled_job_id": cron_job_id,
|
||||
"scheduled_run_id": scheduled_run_id,
|
||||
"cron_job_name": cron_job_name,
|
||||
"mode": "notification",
|
||||
},
|
||||
)
|
||||
@ -403,15 +411,15 @@ class AgentService:
|
||||
},
|
||||
)
|
||||
|
||||
async def submit_feedback(
|
||||
async def submit_acceptance(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Record chat feedback for the internal task linked to a run."""
|
||||
"""Record user acceptance for the internal task linked to a run."""
|
||||
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
@ -419,32 +427,31 @@ class AgentService:
|
||||
if task is None or task.session_id != session_id:
|
||||
raise ValueError(f"No internal task found for run_id={run_id!r}")
|
||||
|
||||
normalized = feedback_type.strip().lower()
|
||||
if normalized not in {"satisfied", "revise", "abandon"}:
|
||||
raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
legacy_feedback_type = "satisfied" if normalized == "accept" else normalized
|
||||
|
||||
already_recorded = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == normalized
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != normalized
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not already_recorded:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
updated = task if already_recorded else task_service.add_feedback(
|
||||
updated = task if already_recorded else task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type=normalized,
|
||||
acceptance_type=normalized,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -455,7 +462,8 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_state": normalized,
|
||||
"acceptance_state": normalized,
|
||||
"feedback_state": legacy_feedback_type,
|
||||
},
|
||||
)
|
||||
if not already_recorded:
|
||||
@ -463,10 +471,11 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
},
|
||||
@ -475,35 +484,36 @@ class AgentService:
|
||||
)
|
||||
|
||||
generated_candidates = []
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
if not already_recorded:
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
feedback_payload = {
|
||||
"feedback_type": normalized,
|
||||
acceptance_payload = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
|
||||
}
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback=feedback_payload,
|
||||
success=normalized == "accept",
|
||||
feedback=acceptance_payload,
|
||||
)
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=normalized == "satisfied",
|
||||
feedback_score=self._feedback_score_for_learning(normalized, validation),
|
||||
success=normalized == "accept",
|
||||
feedback_score=self._acceptance_score_for_learning(normalized),
|
||||
notes=(comment or normalized).strip(),
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
skill_learning_service.rescore_skill_versions()
|
||||
if already_recorded:
|
||||
generated_candidates = []
|
||||
elif normalized == "satisfied" and validation is not None and validation.accepted:
|
||||
elif normalized == "accept":
|
||||
generated_candidates = [
|
||||
item.to_dict()
|
||||
for item in skill_learning_service.build_learning_candidates_for_task(
|
||||
updated.task_id,
|
||||
trigger_run_id=run_id,
|
||||
final_accepted_run_id=run_id,
|
||||
)
|
||||
]
|
||||
elif normalized == "abandon":
|
||||
@ -514,7 +524,8 @@ class AgentService:
|
||||
event_type="task_failure_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"comment": comment or "",
|
||||
"task_status": updated.status,
|
||||
"durable_memory_written": False,
|
||||
@ -528,10 +539,28 @@ class AgentService:
|
||||
"run_id": run_id,
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"feedback_type": normalized,
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": legacy_feedback_type,
|
||||
"learning_candidates": generated_candidates,
|
||||
}
|
||||
|
||||
async def submit_feedback(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
run_id: str,
|
||||
feedback_type: str,
|
||||
comment: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Backward-compatible wrapper for older clients."""
|
||||
|
||||
return await self.submit_acceptance(
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
)
|
||||
|
||||
async def _process_with_main_agent(
|
||||
self,
|
||||
message: str,
|
||||
@ -591,7 +620,7 @@ class AgentService:
|
||||
else active_task
|
||||
)
|
||||
if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
|
||||
task = self._record_revision_feedback_for_task(
|
||||
task = self._record_revision_acceptance_for_task(
|
||||
loaded,
|
||||
task=task,
|
||||
session_id=session_id,
|
||||
@ -599,7 +628,7 @@ class AgentService:
|
||||
)
|
||||
return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)
|
||||
|
||||
def _record_revision_feedback_for_task(
|
||||
def _record_revision_acceptance_for_task(
|
||||
self,
|
||||
loaded: Any,
|
||||
*,
|
||||
@ -607,9 +636,9 @@ class AgentService:
|
||||
session_id: str,
|
||||
comment: str,
|
||||
) -> TaskRecord:
|
||||
"""Mark the latest feedback-eligible run as revised before continuing a task."""
|
||||
"""Mark the latest acceptance-eligible run as revised before continuing a task."""
|
||||
|
||||
if task.status not in {"awaiting_feedback", "needs_revision"}:
|
||||
if task.status not in {"awaiting_acceptance", "needs_revision"}:
|
||||
return task
|
||||
run_id = next((item for item in reversed(task.run_ids) if item), None)
|
||||
if not run_id:
|
||||
@ -617,15 +646,15 @@ class AgentService:
|
||||
|
||||
existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
|
||||
if existing is not None:
|
||||
if existing.get("feedback_type") != "revise":
|
||||
if existing.get("acceptance_type") != "revise":
|
||||
return task
|
||||
updated = task
|
||||
already_recorded = True
|
||||
else:
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
updated = task_service.add_feedback(
|
||||
updated = task_service.add_acceptance(
|
||||
task.task_id,
|
||||
feedback_type="revise",
|
||||
acceptance_type="revise",
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
@ -638,6 +667,7 @@ class AgentService:
|
||||
{
|
||||
"task_id": updated.task_id,
|
||||
"task_status": updated.status,
|
||||
"acceptance_state": "revise",
|
||||
"feedback_state": "revise",
|
||||
},
|
||||
)
|
||||
@ -648,9 +678,10 @@ class AgentService:
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="task_feedback_recorded",
|
||||
event_type="task_acceptance_recorded",
|
||||
event_payload={
|
||||
"task_id": updated.task_id,
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -659,12 +690,12 @@ class AgentService:
|
||||
content=comment,
|
||||
context_visible=False,
|
||||
)
|
||||
validation = ValidationResult.from_dict(updated.validation_result)
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
run_memory_store.update_run_record(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback={
|
||||
"acceptance_type": "revise",
|
||||
"feedback_type": "revise",
|
||||
"comment": comment,
|
||||
"task_status": updated.status,
|
||||
@ -673,7 +704,7 @@ class AgentService:
|
||||
run_memory_store.update_skill_effects_for_run(
|
||||
run_id,
|
||||
success=False,
|
||||
feedback_score=self._feedback_score_for_learning("revise", validation),
|
||||
feedback_score=self._acceptance_score_for_learning("revise"),
|
||||
notes=comment.strip() or "revise",
|
||||
)
|
||||
skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
|
||||
@ -690,236 +721,185 @@ class AgentService:
|
||||
) -> AgentRunResult:
|
||||
loaded = self.create_loop().boot()
|
||||
task_service = self._require_loaded(loaded, "task_service")
|
||||
validation_service = self._require_loaded(loaded, "validation_service")
|
||||
task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
|
||||
session_manager = self._require_loaded(loaded, "session_manager")
|
||||
run_memory_store = self._require_loaded(loaded, "run_memory_store")
|
||||
|
||||
last_result: AgentRunResult | None = None
|
||||
latest_validation: ValidationResult | None = None
|
||||
base_execution_context = kwargs.get("execution_context")
|
||||
provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
|
||||
kwargs = dict(kwargs)
|
||||
team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
|
||||
kwargs["provider_bundle"] = provider_bundle
|
||||
|
||||
for attempt_index in (1, 2):
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
|
||||
task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
|
||||
plan = await task_execution_planner.plan(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
provider_bundle=provider_bundle,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_execution_planned",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
**plan.to_event_payload(),
|
||||
},
|
||||
)
|
||||
team_summaries: list[str] = []
|
||||
team_execution_context = ""
|
||||
team_result: TeamRunResult | None = None
|
||||
if plan.is_team:
|
||||
team_result, team_error = await self._run_team_for_task(
|
||||
plan,
|
||||
task=task,
|
||||
parent_session_id=kwargs["session_id"],
|
||||
provider_bundle_factory=team_provider_bundle_factory
|
||||
or self._build_team_provider_bundle_factory(loaded, kwargs),
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
if team_result is not None:
|
||||
team_summaries = [self._team_summary_for_validation(team_result)]
|
||||
team_packet = TaskEvidencePacket(
|
||||
task_id=task.task_id,
|
||||
attempt_index=attempt_index,
|
||||
main_run=None,
|
||||
team_runs=self._team_run_evidence(team_result),
|
||||
team_node_results=list(team_result.node_results),
|
||||
final_output="",
|
||||
)
|
||||
team_execution_context = self._join_context(
|
||||
self._team_execution_context(plan, team_result),
|
||||
"Rendered team evidence:\n" + render_task_evidence(team_packet),
|
||||
)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": team_result.run_ids,
|
||||
"team_success": team_result.success,
|
||||
"node_results": self._team_node_results_for_event(plan, team_result),
|
||||
"reason": plan.reason,
|
||||
"error": None if team_result.success else "one or more team nodes failed",
|
||||
},
|
||||
)
|
||||
else:
|
||||
team_summaries = [f"Team execution failed: {team_error}"]
|
||||
team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_team_run_failed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
|
||||
"team_run_ids": [],
|
||||
"team_success": False,
|
||||
"reason": plan.reason,
|
||||
"error": team_error,
|
||||
},
|
||||
)
|
||||
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
)
|
||||
if attempt_index == 2 and latest_validation is not None:
|
||||
revision_context = latest_validation.recommended_revision_prompt.strip()
|
||||
if revision_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(
|
||||
base_execution_context,
|
||||
f"Task validation revision request:\n{revision_context}",
|
||||
team_execution_context,
|
||||
)
|
||||
elif team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
last_result = result
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
validation = await validation_service.validate_task_result(
|
||||
task=task,
|
||||
user_message=message,
|
||||
final_output=result.output_text,
|
||||
evidence_packet=evidence_packet,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
|
||||
tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
|
||||
team_summaries=team_summaries,
|
||||
provider_bundle=provider_bundle,
|
||||
)
|
||||
latest_validation = validation
|
||||
has_usable_answer = bool(result.output_text.strip()) and (
|
||||
"Tool loop stopped after reaching the configured iteration limit." not in result.output_text
|
||||
)
|
||||
task = task_service.record_validation(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
validation,
|
||||
final_attempt=(
|
||||
attempt_index == 2
|
||||
or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
|
||||
),
|
||||
has_usable_answer=has_usable_answer,
|
||||
)
|
||||
run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"validation_status": "passed" if validation.accepted else "failed",
|
||||
},
|
||||
)
|
||||
validation_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
attempt_kwargs = dict(kwargs)
|
||||
attempt_kwargs.update(
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_mode": True,
|
||||
"attempt_index": attempt_index,
|
||||
"allow_candidate_generation": False,
|
||||
}
|
||||
retry_scheduled = validation.status == "rejected" and attempt_index == 1
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_validation_snapshotted",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"validation_result": validation.to_dict(),
|
||||
"validation_debug": validation_debug,
|
||||
"retry_scheduled": retry_scheduled,
|
||||
},
|
||||
content=validation.recommended_revision_prompt or None,
|
||||
context_visible=False,
|
||||
)
|
||||
if retry_scheduled:
|
||||
session_manager.set_run_context_visible(result.session_id, result.run_id, False)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = validation.to_dict()
|
||||
if not retry_scheduled:
|
||||
return result
|
||||
)
|
||||
if team_execution_context:
|
||||
attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
|
||||
if plan.is_team and team_execution_context:
|
||||
attempt_kwargs["include_tools"] = False
|
||||
attempt_kwargs["max_tool_iterations"] = 0
|
||||
attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
|
||||
task=task,
|
||||
user_message=message,
|
||||
attempt_index=attempt_index,
|
||||
plan=plan,
|
||||
team_summaries=team_summaries,
|
||||
)
|
||||
|
||||
if last_result is None: # pragma: no cover - defensive
|
||||
raise RuntimeError("Task mode did not produce a run result")
|
||||
return last_result
|
||||
result = await runner(message, **attempt_kwargs)
|
||||
self._append_task_observation(
|
||||
session_manager,
|
||||
task.session_id,
|
||||
event_type="task_synthesis_completed",
|
||||
payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"main_run_id": result.run_id,
|
||||
"plan_mode": plan.mode,
|
||||
"strategy": plan.graph.strategy if plan.graph else None,
|
||||
},
|
||||
)
|
||||
task = task_service.append_run(
|
||||
task.task_id,
|
||||
result.run_id,
|
||||
skill_names=self._skill_names_for_run(loaded, result.run_id),
|
||||
)
|
||||
evidence_packet = self._build_task_evidence_packet(
|
||||
session_manager=session_manager,
|
||||
task=task,
|
||||
attempt_index=attempt_index,
|
||||
result=result,
|
||||
team_result=team_result,
|
||||
)
|
||||
evidence_text = render_task_evidence(evidence_packet)
|
||||
evidence_debug = {
|
||||
"evidence_run_ids": [
|
||||
item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
|
||||
],
|
||||
"evidence_session_ids": [
|
||||
item.session_id
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
],
|
||||
"tool_result_count": sum(
|
||||
len(item.tool_results)
|
||||
for item in [evidence_packet.main_run, *evidence_packet.team_runs]
|
||||
if item is not None
|
||||
),
|
||||
"evidence_length": len(evidence_text),
|
||||
}
|
||||
session_manager.update_latest_assistant_event_payload(
|
||||
result.session_id,
|
||||
result.run_id,
|
||||
{
|
||||
"task_id": task.task_id,
|
||||
"task_status": task.status,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
)
|
||||
session_manager.append_message(
|
||||
result.session_id,
|
||||
run_id=result.run_id,
|
||||
role="system",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": task.task_id,
|
||||
"attempt_index": attempt_index,
|
||||
"evidence_debug": evidence_debug,
|
||||
},
|
||||
content=None,
|
||||
context_visible=False,
|
||||
)
|
||||
result.task_id = task.task_id
|
||||
result.task_status = task.status
|
||||
result.validation_result = None
|
||||
return result
|
||||
|
||||
async def _run_team_for_task(
|
||||
self,
|
||||
@ -986,12 +966,10 @@ class AgentService:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
|
||||
if feedback_type == "satisfied":
|
||||
if validation is not None:
|
||||
return max(0.0, min(1.0, float(validation.score)))
|
||||
def _acceptance_score_for_learning(acceptance_type: str) -> float:
|
||||
if acceptance_type == "accept":
|
||||
return 1.0
|
||||
if feedback_type == "revise":
|
||||
if acceptance_type == "revise":
|
||||
return 0.5
|
||||
return 0.0
|
||||
|
||||
@ -1001,12 +979,11 @@ class AgentService:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
plan: TaskExecutionPlan | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
) -> str:
|
||||
phase = f"attempt_{attempt_index}"
|
||||
if latest_validation is not None:
|
||||
if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
|
||||
phase = f"revision_attempt_{attempt_index}"
|
||||
elif plan is not None and plan.is_team:
|
||||
phase = f"team_synthesis_attempt_{attempt_index}"
|
||||
@ -1027,24 +1004,14 @@ class AgentService:
|
||||
)
|
||||
else:
|
||||
sections.append("Previously activated skills:\nNone")
|
||||
if latest_validation is not None:
|
||||
validation_lines = [
|
||||
f"accepted: {latest_validation.accepted}",
|
||||
f"score: {latest_validation.score}",
|
||||
]
|
||||
if latest_validation.issues:
|
||||
validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
|
||||
if latest_validation.missing_requirements:
|
||||
validation_lines.append(
|
||||
"missing requirements:\n"
|
||||
+ "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
|
||||
)
|
||||
if latest_validation.recommended_revision_prompt:
|
||||
validation_lines.append(
|
||||
"recommended revision:\n"
|
||||
+ latest_validation.recommended_revision_prompt
|
||||
)
|
||||
sections.append("Validation feedback:\n" + "\n".join(validation_lines))
|
||||
if task.feedback:
|
||||
history_lines = []
|
||||
for item in task.feedback[-5:]:
|
||||
kind = item.get("acceptance_type") or item.get("feedback_type")
|
||||
comment = item.get("comment") or ""
|
||||
run_id = item.get("run_id") or ""
|
||||
history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
|
||||
sections.append("Task acceptance history:\n" + "\n".join(history_lines))
|
||||
if plan is not None:
|
||||
plan_lines = [
|
||||
f"mode: {plan.mode}",
|
||||
@ -1313,7 +1280,8 @@ class AgentService:
|
||||
"inbound_metadata": dict(inbound.metadata),
|
||||
"task_id": getattr(result, "task_id", None),
|
||||
"task_status": getattr(result, "task_status", None),
|
||||
"validation_result": getattr(result, "validation_result", None),
|
||||
"evidence_status": "recorded" if getattr(result, "task_id", None) else None,
|
||||
"validation_result": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@ -235,26 +235,45 @@ class SessionProcessProjector:
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_validation_snapshotted":
|
||||
validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
|
||||
accepted = bool(validation.get("accepted"))
|
||||
root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
|
||||
root["finished_at"] = created_at if root["status"] == "done" else None
|
||||
elif record.event_type == "task_evidence_recorded":
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "validation"),
|
||||
event_id=_event_id(record, "evidence"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="system",
|
||||
actor_id="validator",
|
||||
actor_name="Validator",
|
||||
text=(
|
||||
f"Validation {'passed' if accepted else 'failed'} "
|
||||
f"(score={validation.get('score')})."
|
||||
+ (" Retry scheduled." if payload.get("retry_scheduled") else "")
|
||||
),
|
||||
actor_id="evidence-recorder",
|
||||
actor_name="Evidence",
|
||||
text="Task evidence was recorded; waiting for user acceptance.",
|
||||
created_at=created_at,
|
||||
status="done" if accepted else "error",
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
elif record.event_type == "task_acceptance_recorded":
|
||||
acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
|
||||
if acceptance_type == "accept":
|
||||
root["status"] = "done"
|
||||
root["finished_at"] = created_at
|
||||
elif acceptance_type == "abandon":
|
||||
root["status"] = "cancelled"
|
||||
root["finished_at"] = created_at
|
||||
else:
|
||||
root["status"] = "waiting"
|
||||
root["finished_at"] = None
|
||||
add_event(
|
||||
event_id=_event_id(record, "acceptance"),
|
||||
run_id=record.run_id or root_run_id,
|
||||
parent_run_id=root_run_id if record.run_id else None,
|
||||
kind="run_status",
|
||||
actor_type="user",
|
||||
actor_id="user-acceptance",
|
||||
actor_name="User Acceptance",
|
||||
text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
|
||||
created_at=created_at,
|
||||
status="done",
|
||||
metadata=dict(payload),
|
||||
)
|
||||
|
||||
|
||||
@ -69,15 +69,24 @@ class SkillLearningService:
|
||||
existing_ids.add(candidate.candidate_id)
|
||||
return candidates
|
||||
|
||||
def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
|
||||
"""Build candidates scoped to a single validated and satisfied Task run."""
|
||||
def build_learning_candidates_for_task(
|
||||
self,
|
||||
task_id: str,
|
||||
*,
|
||||
final_accepted_run_id: str | None = None,
|
||||
trigger_run_id: str | None = None,
|
||||
) -> list[SkillLearningCandidate]:
|
||||
"""Build candidates from a user-accepted Task and all of its runs."""
|
||||
|
||||
final_accepted_run_id = final_accepted_run_id or trigger_run_id
|
||||
if not final_accepted_run_id:
|
||||
return []
|
||||
runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
|
||||
trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
|
||||
if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
|
||||
final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
|
||||
if final_run is None or not self._is_task_accepted_run(final_run):
|
||||
return []
|
||||
|
||||
source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
|
||||
source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
|
||||
if not source_runs:
|
||||
return []
|
||||
|
||||
@ -100,11 +109,16 @@ class SkillLearningService:
|
||||
source_session_ids=source_session_ids,
|
||||
related_skill_names=[],
|
||||
reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
|
||||
evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
|
||||
evidence={
|
||||
"task_id": task_id,
|
||||
"final_accepted_run_id": final_accepted_run_id,
|
||||
"source_run_ids": source_run_ids,
|
||||
"theme": self._task_theme(final_run.task_text),
|
||||
},
|
||||
status="open",
|
||||
priority=1,
|
||||
confidence=0.8,
|
||||
trigger_reason="validation_accepted_and_user_satisfied",
|
||||
trigger_reason="task_accepted",
|
||||
)
|
||||
)
|
||||
else:
|
||||
@ -137,13 +151,14 @@ class SkillLearningService:
|
||||
),
|
||||
evidence={
|
||||
"task_id": task_id,
|
||||
"trigger_run_id": trigger_run_id,
|
||||
"final_accepted_run_id": final_accepted_run_id,
|
||||
"source_run_ids": source_run_ids,
|
||||
"skill_version": receipt.skill_version,
|
||||
},
|
||||
status="open",
|
||||
priority=1,
|
||||
confidence=0.7,
|
||||
trigger_reason="validation_accepted_and_user_satisfied",
|
||||
trigger_reason="task_accepted",
|
||||
)
|
||||
)
|
||||
|
||||
@ -269,7 +284,7 @@ class SkillLearningService:
|
||||
groups.setdefault(key, []).append(record)
|
||||
candidates: list[SkillLearningCandidate] = []
|
||||
for theme, runs in groups.items():
|
||||
successful = [record for record in runs if self._is_confirmed_positive_run(record)]
|
||||
successful = [record for record in runs if self._is_task_accepted_run(record)]
|
||||
if len(successful) < 2:
|
||||
continue
|
||||
if any(record.activated_skills for record in successful):
|
||||
@ -290,7 +305,7 @@ class SkillLearningService:
|
||||
def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
|
||||
pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
|
||||
for record in self.run_store.list_runs():
|
||||
if not self._is_confirmed_positive_run(record):
|
||||
if not self._is_task_accepted_run(record):
|
||||
continue
|
||||
unique = sorted({receipt.skill_name for receipt in record.activated_skills})
|
||||
for pair in combinations(unique, 2):
|
||||
@ -351,14 +366,15 @@ class SkillLearningService:
|
||||
return effects
|
||||
|
||||
@staticmethod
|
||||
def _is_confirmed_positive_run(record: RunRecord) -> bool:
|
||||
validation = record.validation_result or {}
|
||||
def _is_task_accepted_run(record: RunRecord) -> bool:
|
||||
feedback = record.feedback or {}
|
||||
acceptance_type = feedback.get("acceptance_type")
|
||||
if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
|
||||
acceptance_type = "accept"
|
||||
return (
|
||||
bool(record.success)
|
||||
and bool(record.task_id)
|
||||
and validation.get("accepted") is True
|
||||
and feedback.get("feedback_type") == "satisfied"
|
||||
and acceptance_type == "accept"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
|
||||
from .router import MainAgentRouter
|
||||
from .service import TaskService
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
from .validation import ValidationService
|
||||
|
||||
__all__ = [
|
||||
"EvidenceBuilder",
|
||||
@ -24,6 +23,5 @@ __all__ = [
|
||||
"ToolEvidence",
|
||||
"ValidationResult",
|
||||
"ValidationStatus",
|
||||
"ValidationService",
|
||||
"render_task_evidence",
|
||||
]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
"""Models for internal task tracking and validation."""
|
||||
"""Models for internal task tracking and user acceptance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -9,7 +9,12 @@ from typing import Any, Literal
|
||||
ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]
|
||||
|
||||
VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
|
||||
TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
|
||||
LEGACY_STATUS_MAP = {
|
||||
"validating": "running",
|
||||
"awaiting_feedback": "awaiting_acceptance",
|
||||
"needs_review": "awaiting_acceptance",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@ -113,11 +118,11 @@ class TaskRecord:
|
||||
|
||||
@property
|
||||
def is_execution_active(self) -> bool:
|
||||
return self.status in {"running", "validating"}
|
||||
return self.status == "running"
|
||||
|
||||
@property
|
||||
def requires_user_action(self) -> bool:
|
||||
return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
|
||||
return self.status in {"awaiting_acceptance", "needs_revision"}
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -137,6 +142,7 @@ class TaskRecord:
|
||||
"satisfaction": self.satisfaction,
|
||||
"run_ids": list(self.run_ids),
|
||||
"skill_names": list(self.skill_names),
|
||||
"acceptance": list(self.feedback),
|
||||
"feedback": list(self.feedback),
|
||||
"validation_result": self.validation_result,
|
||||
"metadata": dict(self.metadata),
|
||||
@ -152,7 +158,7 @@ class TaskRecord:
|
||||
goal=str(payload.get("goal") or payload.get("description") or ""),
|
||||
constraints=[str(item) for item in payload.get("constraints") or []],
|
||||
priority=int(payload.get("priority", 0) or 0),
|
||||
status=str(payload.get("status") or "open"),
|
||||
status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
|
||||
creator=str(payload.get("creator") or "main-agent"),
|
||||
created_at=str(payload.get("created_at") or ""),
|
||||
updated_at=str(payload.get("updated_at") or ""),
|
||||
@ -161,7 +167,11 @@ class TaskRecord:
|
||||
satisfaction=_optional_float(payload.get("satisfaction")),
|
||||
run_ids=[str(item) for item in payload.get("run_ids") or []],
|
||||
skill_names=[str(item) for item in payload.get("skill_names") or []],
|
||||
feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
|
||||
feedback=[
|
||||
_normalize_acceptance_entry(dict(item))
|
||||
for item in (payload.get("acceptance") or payload.get("feedback") or [])
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
|
||||
metadata=dict(payload.get("metadata") or {}),
|
||||
)
|
||||
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return float(value)
|
||||
|
||||
|
||||
def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
|
||||
if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
|
||||
feedback_type = str(entry.get("feedback_type") or "")
|
||||
entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
|
||||
if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
|
||||
acceptance_type = str(entry.get("acceptance_type") or "")
|
||||
entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
|
||||
return entry
|
||||
|
||||
@ -10,7 +10,7 @@ from typing import Any, Literal
|
||||
from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
from .models import TaskRecord
|
||||
from .skill_resolver import SkillResolutionReport, TaskSkillResolver
|
||||
|
||||
|
||||
@ -76,7 +76,6 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
timeout_seconds: float = 30.0,
|
||||
) -> TaskExecutionPlan:
|
||||
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
attempt_index=attempt_index,
|
||||
latest_validation=latest_validation,
|
||||
),
|
||||
},
|
||||
],
|
||||
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
attempt_index: int,
|
||||
latest_validation: ValidationResult | None,
|
||||
) -> str:
|
||||
validation_note = ""
|
||||
if latest_validation is not None:
|
||||
validation_note = (
|
||||
"\nPrevious validation issues:\n"
|
||||
+ json.dumps(latest_validation.to_dict(), ensure_ascii=False)
|
||||
)
|
||||
history_note = ""
|
||||
if task.feedback:
|
||||
history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
|
||||
return (
|
||||
"Decide execution mode for this internal Task attempt.\n"
|
||||
"Use mode=team only when independent research, review, implementation slices, or staged checks "
|
||||
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Attempt index: {attempt_index}\n"
|
||||
f"{validation_note}"
|
||||
f"{history_note}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from .models import TaskEvent, TaskRecord, ValidationResult
|
||||
from .models import TaskEvent, TaskRecord
|
||||
from .store import TaskStore
|
||||
|
||||
|
||||
@ -105,38 +105,70 @@ class TaskService:
|
||||
for name in skill_names or []:
|
||||
if name not in task.skill_names:
|
||||
task.skill_names.append(name)
|
||||
task.status = "awaiting_acceptance"
|
||||
task.updated_at = self._now()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
|
||||
return task
|
||||
|
||||
def record_validation(
|
||||
def add_acceptance(
|
||||
self,
|
||||
task_id: str,
|
||||
run_id: str,
|
||||
validation: ValidationResult,
|
||||
*,
|
||||
final_attempt: bool = True,
|
||||
has_usable_answer: bool = True,
|
||||
acceptance_type: str,
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
if validation.status == "accepted":
|
||||
task.status = "awaiting_feedback"
|
||||
elif validation.status in {"insufficient_evidence", "validator_error"}:
|
||||
task.status = "needs_review"
|
||||
elif validation.status == "rejected" and not final_attempt:
|
||||
normalized = normalize_acceptance_type(acceptance_type)
|
||||
matching_acceptance = any(
|
||||
item.get("run_id") == run_id and item.get("acceptance_type") == normalized
|
||||
for item in task.feedback
|
||||
)
|
||||
conflicting_acceptance = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_acceptance is not None:
|
||||
raise ValueError(
|
||||
f"Acceptance for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_acceptance.get('acceptance_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_acceptance:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_acceptance:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"acceptance_type": normalized,
|
||||
"feedback_type": "satisfied" if normalized == "accept" else normalized,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if normalized == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif validation.status == "rejected" and has_usable_answer:
|
||||
task.status = "needs_review"
|
||||
else:
|
||||
task.status = "failed"
|
||||
elif normalized == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = "automatic validation rejected the final attempt"
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif normalized == "accept":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "accepted"
|
||||
task.satisfaction = 1.0
|
||||
if run_id:
|
||||
task.metadata["final_accepted_run_id"] = run_id
|
||||
task.updated_at = now
|
||||
task.validation_result = validation.to_dict()
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
|
||||
self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def add_feedback(
|
||||
@ -147,52 +179,12 @@ class TaskService:
|
||||
comment: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
now = self._now()
|
||||
matching_feedback = any(
|
||||
item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
|
||||
for item in task.feedback
|
||||
return self.add_acceptance(
|
||||
task_id,
|
||||
acceptance_type=feedback_type,
|
||||
comment=comment,
|
||||
run_id=run_id,
|
||||
)
|
||||
conflicting_feedback = next(
|
||||
(
|
||||
item
|
||||
for item in task.feedback
|
||||
if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
|
||||
),
|
||||
None,
|
||||
)
|
||||
if conflicting_feedback is not None:
|
||||
raise ValueError(
|
||||
f"Feedback for run_id={run_id!r} was already recorded as "
|
||||
f"{conflicting_feedback.get('feedback_type')!r}"
|
||||
)
|
||||
if task.status in {"closed", "abandoned"} and not matching_feedback:
|
||||
raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
|
||||
if matching_feedback:
|
||||
return task
|
||||
|
||||
entry = {
|
||||
"feedback_type": feedback_type,
|
||||
"comment": comment or "",
|
||||
"run_id": run_id,
|
||||
"created_at": now,
|
||||
}
|
||||
task.feedback.append(entry)
|
||||
if feedback_type == "revise":
|
||||
task.status = "needs_revision"
|
||||
elif feedback_type == "abandon":
|
||||
task.status = "abandoned"
|
||||
task.closed_at = now
|
||||
task.close_reason = comment or "abandoned"
|
||||
elif feedback_type == "satisfied":
|
||||
task.status = "closed"
|
||||
task.closed_at = now
|
||||
task.close_reason = "satisfied"
|
||||
task.satisfaction = 1.0
|
||||
task.updated_at = now
|
||||
self.store.upsert_task(task)
|
||||
self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
|
||||
return task
|
||||
|
||||
def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
|
||||
task = self._require(task_id)
|
||||
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
|
||||
if len(words) <= 4:
|
||||
return cleaned[:40]
|
||||
return " ".join(words[:4])[:40]
|
||||
|
||||
|
||||
def normalize_acceptance_type(value: str) -> str:
|
||||
normalized = (value or "").strip().lower()
|
||||
if normalized == "satisfied":
|
||||
return "accept"
|
||||
if normalized not in {"accept", "revise", "abandon"}:
|
||||
raise ValueError("acceptance_type must be one of: accept, revise, abandon")
|
||||
return normalized
|
||||
|
||||
@ -1,154 +0,0 @@
|
||||
"""Automatic validation for internal Task mode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
|
||||
from .models import TaskRecord, ValidationResult
|
||||
|
||||
|
||||
class ValidationService:
|
||||
async def validate_task_result(
|
||||
self,
|
||||
*,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_packet: Any | None = None,
|
||||
evidence_text: str = "",
|
||||
transcript_excerpt: str = "",
|
||||
tool_summaries: list[str] | None = None,
|
||||
team_summaries: list[str] | None = None,
|
||||
provider_bundle: ProviderBundle | None = None,
|
||||
) -> ValidationResult:
|
||||
provider = None
|
||||
model = None
|
||||
if provider_bundle is not None:
|
||||
provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
|
||||
runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
|
||||
model = getattr(runtime, "model", None)
|
||||
if provider is not None:
|
||||
try:
|
||||
return await self._validate_with_provider(
|
||||
provider=provider,
|
||||
model=model,
|
||||
task=task,
|
||||
user_message=user_message,
|
||||
final_output=final_output,
|
||||
evidence_text=evidence_text,
|
||||
transcript_excerpt=transcript_excerpt,
|
||||
tool_summaries=tool_summaries or [],
|
||||
team_summaries=team_summaries or [],
|
||||
)
|
||||
except Exception as exc:
|
||||
return ValidationResult(
|
||||
status="validator_error",
|
||||
score=0.0,
|
||||
issues=[f"Validator failed: {exc}"],
|
||||
evidence_gaps=["Automatic validation failed before producing a reliable decision."],
|
||||
missing_requirements=["User review is required because automatic validation failed."],
|
||||
recommended_revision_prompt=(
|
||||
"Review the answer and evidence, then decide whether to revise or accept it."
|
||||
),
|
||||
validator="llm_error",
|
||||
)
|
||||
return self._heuristic_validate(final_output)
|
||||
|
||||
async def _validate_with_provider(
|
||||
self,
|
||||
*,
|
||||
provider: Any,
|
||||
model: str | None,
|
||||
task: TaskRecord,
|
||||
user_message: str,
|
||||
final_output: str,
|
||||
evidence_text: str,
|
||||
transcript_excerpt: str,
|
||||
tool_summaries: list[str],
|
||||
team_summaries: list[str],
|
||||
) -> ValidationResult:
|
||||
legacy_context = "" if evidence_text else (
|
||||
f"Transcript excerpt:\n{transcript_excerpt}\n\n"
|
||||
f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
|
||||
f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
|
||||
)
|
||||
prompt = (
|
||||
"Validate whether the assistant output satisfies the task. "
|
||||
"Return only compact JSON with keys: passed, score, issues, "
|
||||
"missing_requirements, recommended_revision_prompt.\n\n"
|
||||
f"Task goal:\n{task.goal}\n\n"
|
||||
f"Current user request:\n{user_message}\n\n"
|
||||
f"Evidence packet:\n{evidence_text}\n\n"
|
||||
f"{legacy_context}"
|
||||
f"Assistant final output:\n{final_output}"
|
||||
)
|
||||
response = await provider.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict task result validator."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
tools=None,
|
||||
model=model,
|
||||
max_tokens=4096,
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = self._parse_json_object(response.content or "")
|
||||
status = payload.get("status")
|
||||
if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
|
||||
status = (
|
||||
"accepted"
|
||||
if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
|
||||
else "rejected"
|
||||
)
|
||||
return ValidationResult(
|
||||
status=status,
|
||||
score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
|
||||
issues=[str(item) for item in payload.get("issues") or []],
|
||||
missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
|
||||
evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
|
||||
recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
|
||||
validator="llm",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_validate(final_output: str) -> ValidationResult:
|
||||
text = final_output.strip()
|
||||
if not text:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.0,
|
||||
issues=["Assistant output is empty."],
|
||||
missing_requirements=["A non-empty result is required."],
|
||||
recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
|
||||
validator="heuristic",
|
||||
)
|
||||
lowered = text.lower()
|
||||
if "run failed before completion" in lowered or "tool loop stopped" in lowered:
|
||||
return ValidationResult(
|
||||
passed=False,
|
||||
score=0.35,
|
||||
issues=["The run did not complete cleanly."],
|
||||
missing_requirements=["A successful final result is required."],
|
||||
recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
|
||||
validator="heuristic",
|
||||
)
|
||||
return ValidationResult(passed=True, score=0.85, validator="heuristic")
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
cleaned = text.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.strip("`")
|
||||
if cleaned.lower().startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
start = cleaned.find("{")
|
||||
end = cleaned.rfind("}")
|
||||
if start >= 0 and end >= start:
|
||||
cleaned = cleaned[start : end + 1]
|
||||
payload = json.loads(cleaned)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("validator response must be a JSON object")
|
||||
return payload
|
||||
28
app-instance/backend/tests/unit/test_context_builder.py
Normal file
28
app-instance/backend/tests/unit/test_context_builder.py
Normal file
@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from beaver.engine.context import ContextBuildInput, ContextBuilder, RuntimeContext, SessionContext
|
||||
|
||||
|
||||
def test_context_builder_injects_current_date_and_time() -> None:
|
||||
result = ContextBuilder().build_messages(
|
||||
ContextBuildInput(
|
||||
base_system_prompt="Follow user requests.",
|
||||
current_user_input="今天几号?",
|
||||
session_context=SessionContext(session_id="web:alpha", source="web", model="stub-model"),
|
||||
runtime_context=RuntimeContext(
|
||||
utc_datetime="2026-05-26T01:10:00+00:00",
|
||||
local_datetime="2026-05-26T09:10:00+08:00",
|
||||
timezone="Asia/Shanghai",
|
||||
utc_offset="+08:00",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
system_prompt = result.messages[0]["content"]
|
||||
assert "# Current Date and Time" in system_prompt
|
||||
assert "Current UTC time: 2026-05-26T01:10:00+00:00" in system_prompt
|
||||
assert "Current local time: 2026-05-26T09:10:00+08:00" in system_prompt
|
||||
assert "Local timezone: Asia/Shanghai" in system_prompt
|
||||
assert "Local UTC offset: +08:00" in system_prompt
|
||||
assert '"today", "tomorrow", "now", "this week", and "next month"' in system_prompt
|
||||
assert result.messages[-1] == {"role": "user", "content": "今天几号?"}
|
||||
@ -18,8 +18,8 @@ class FakeResult:
|
||||
model: str | None = "fake-model"
|
||||
usage: dict[str, Any] = field(default_factory=dict)
|
||||
task_id: str | None = "task-1"
|
||||
task_status: str | None = "awaiting_feedback"
|
||||
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
|
||||
task_status: str | None = "awaiting_acceptance"
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class FakeService:
|
||||
@ -79,8 +79,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
|
||||
assert message.session_id == "s1"
|
||||
assert message.finish_reason == "stop"
|
||||
assert message.metadata["task_id"] == "task-1"
|
||||
assert message.metadata["task_status"] == "awaiting_feedback"
|
||||
assert message.metadata["validation_result"] == {"accepted": True}
|
||||
assert message.metadata["task_status"] == "awaiting_acceptance"
|
||||
assert message.metadata["evidence_status"] == "recorded"
|
||||
assert message.metadata["validation_result"] is None
|
||||
|
||||
stop_event.set()
|
||||
await asyncio.wait_for(task, timeout=2)
|
||||
|
||||
@ -113,6 +113,19 @@ def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> N
|
||||
assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"
|
||||
|
||||
|
||||
def test_litellm_provider_merges_late_system_messages_to_front() -> None:
|
||||
messages = [
|
||||
{"role": "system", "content": "base"},
|
||||
{"role": "user", "content": "question"},
|
||||
{"role": "system", "content": "finalize without tools"},
|
||||
]
|
||||
|
||||
sanitized = LiteLLMProvider._sanitize_messages(messages)
|
||||
|
||||
assert [message["role"] for message in sanitized] == ["system", "user"]
|
||||
assert sanitized[0]["content"] == "base\n\nfinalize without tools"
|
||||
|
||||
|
||||
def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict = {}
|
||||
|
||||
|
||||
@ -79,7 +79,7 @@ def _task() -> TaskRecord:
|
||||
goal="实现任务连续性",
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="awaiting_feedback",
|
||||
status="awaiting_acceptance",
|
||||
creator="test",
|
||||
created_at="now",
|
||||
updated_at="now",
|
||||
|
||||
@ -35,6 +35,7 @@ class StubProvider(LLMProvider):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
if not self._responses:
|
||||
raise AssertionError("No stubbed provider responses left")
|
||||
@ -47,11 +48,22 @@ class StubProvider(LLMProvider):
|
||||
class StubSkillAssembler:
|
||||
def __init__(self, activated_skills: list[SkillContext]) -> None:
|
||||
self.activated_skills = activated_skills
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def assemble(self, **kwargs) -> SkillAssemblyResult:
|
||||
self.calls.append(kwargs)
|
||||
return SkillAssemblyResult(activated_skills=list(self.activated_skills))
|
||||
|
||||
|
||||
class RecordingToolAssembler:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def assemble(self, **kwargs):
|
||||
self.calls.append(kwargs)
|
||||
return kwargs["registry"].get_specs(["memory"])
|
||||
|
||||
|
||||
def _tool_call(*, name: str = "echo", arguments: dict | None = None, call_id: str = "call-1") -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
id=call_id,
|
||||
@ -576,6 +588,48 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
|
||||
assert effect_records[-1].run_id == result.run_id
|
||||
|
||||
|
||||
def test_thinking_disabled_still_uses_skill_and_tool_assembly(tmp_path: Path) -> None:
|
||||
skill = SkillContext(
|
||||
name="docker-debug",
|
||||
content="Use docker logs before editing config.",
|
||||
version="v0007",
|
||||
content_hash="hash-v7",
|
||||
activation_reason="llm_selected",
|
||||
tool_hints=["terminal"],
|
||||
)
|
||||
skill_assembler = StubSkillAssembler([skill])
|
||||
tool_assembler = RecordingToolAssembler()
|
||||
loader = EngineLoader(
|
||||
workspace=tmp_path,
|
||||
skill_assembler=skill_assembler,
|
||||
tool_assembler=tool_assembler,
|
||||
)
|
||||
loop = AgentLoop(loader=loader)
|
||||
bundle = ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[LLMResponse(content="Done", finish_reason="stop", provider_name="stub", model="stub-model")]
|
||||
),
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
loop.process_direct(
|
||||
"Why is the Docker container crashing?",
|
||||
provider_bundle=bundle,
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
loaded = loop.boot()
|
||||
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
tool_selection = next(event for event in events if event.event_type == "tool_selection_snapshotted")
|
||||
|
||||
assert skill_assembler.calls
|
||||
assert skill_assembler.calls[0]["thinking_enabled"] is False
|
||||
assert tool_assembler.calls
|
||||
assert [skill.name for skill in tool_assembler.calls[0]["activated_skills"]] == ["docker-debug"]
|
||||
assert tool_selection.event_payload["tool_names"] == ["memory"]
|
||||
|
||||
|
||||
def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path: Path) -> None:
|
||||
skill = SkillContext(
|
||||
name="docker-debug",
|
||||
@ -635,6 +689,52 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
|
||||
assert effect_records[-1].success is False
|
||||
|
||||
|
||||
def test_agent_loop_suppresses_raw_tool_call_when_finalizing_after_tool_limit(tmp_path: Path) -> None:
|
||||
loader = EngineLoader(
|
||||
workspace=tmp_path,
|
||||
skill_assembler=StubSkillAssembler([]),
|
||||
)
|
||||
loop = AgentLoop(loader=loader)
|
||||
bundle = ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content="Need a tool.",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_tool_call()],
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
LLMResponse(
|
||||
content=(
|
||||
"<tool_call>\n"
|
||||
"<function=mcp_local_web_mcp_web_fetch>\n"
|
||||
"<parameter=url>https://example.com</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>"
|
||||
),
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
loop.process_direct(
|
||||
"Fetch the latest result",
|
||||
provider_bundle=bundle,
|
||||
max_tool_iterations=0,
|
||||
)
|
||||
)
|
||||
|
||||
assert result.finish_reason == "max_tool_iterations"
|
||||
assert "<tool_call>" not in result.output_text
|
||||
assert "raw tool call was suppressed" in result.output_text
|
||||
|
||||
|
||||
def test_llm_request_snapshot_defaults_to_compact_payload(tmp_path: Path) -> None:
|
||||
loop = AgentLoop(loader=EngineLoader(workspace=tmp_path, skill_assembler=StubSkillAssembler([])))
|
||||
bundle = ProviderBundle(
|
||||
|
||||
@ -101,12 +101,11 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
||||
"web:test",
|
||||
run_id="main-run",
|
||||
role="system",
|
||||
event_type="task_validation_snapshotted",
|
||||
event_type="task_evidence_recorded",
|
||||
event_payload={
|
||||
"task_id": "task-1",
|
||||
"attempt_index": 1,
|
||||
"validation_result": {"accepted": True, "score": 0.9},
|
||||
"retry_scheduled": False,
|
||||
"evidence_status": "recorded",
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
@ -121,7 +120,7 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
||||
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
|
||||
assert sub_run["metadata"]["skill_query"] == "research workflow"
|
||||
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
|
||||
assert any(event["actor_name"] == "Validator" for event in projection["events"])
|
||||
assert any(event["actor_name"] == "Evidence" for event in projection["events"])
|
||||
assert any(run["session_id"] == "web:test" for run in projection["runs"])
|
||||
|
||||
|
||||
|
||||
@ -4,23 +4,17 @@ import asyncio
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
|
||||
from beaver.engine import EngineLoader
|
||||
from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.services.agent_service import AgentService
|
||||
from beaver.skills.assembler import SkillAssemblyResult
|
||||
from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService
|
||||
from beaver.tasks import TaskExecutionPlan, TaskService
|
||||
|
||||
|
||||
class StubProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]) -> None:
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls: list[dict[str, object]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
@ -30,7 +24,6 @@ class StubProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> LLMResponse:
|
||||
self.calls.append({"messages": messages, "tools": tools, "model": model})
|
||||
if not self._responses:
|
||||
raise AssertionError("No stubbed provider responses left")
|
||||
return self._responses.pop(0)
|
||||
@ -39,30 +32,9 @@ class StubProvider(LLMProvider):
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class StubValidationService:
|
||||
def __init__(self, results: list[ValidationResult]) -> None:
|
||||
self.results = list(results)
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def validate_task_result(self, **kwargs) -> ValidationResult:
|
||||
self.calls.append(kwargs)
|
||||
if not self.results:
|
||||
raise AssertionError("No stubbed validation results left")
|
||||
return self.results.pop(0)
|
||||
|
||||
|
||||
class StubTaskExecutionPlanner:
|
||||
def __init__(self, plans: list[TaskExecutionPlan] | None = None) -> None:
|
||||
self.plans = list(plans or [TaskExecutionPlan.single("test-single")])
|
||||
self.calls = []
|
||||
|
||||
async def plan(self, **kwargs) -> TaskExecutionPlan:
|
||||
self.calls.append(kwargs)
|
||||
if len(self.plans) == 1:
|
||||
return self.plans[0]
|
||||
if not self.plans:
|
||||
raise AssertionError("No stubbed execution plans left")
|
||||
return self.plans.pop(0)
|
||||
return TaskExecutionPlan.single("test-single")
|
||||
|
||||
|
||||
class FakeLearningCandidate:
|
||||
@ -70,15 +42,6 @@ class FakeLearningCandidate:
|
||||
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
|
||||
|
||||
class RecordingSkillAssembler:
|
||||
def __init__(self) -> None:
|
||||
self.task_descriptions: list[str] = []
|
||||
|
||||
async def assemble(self, **kwargs) -> SkillAssemblyResult:
|
||||
self.task_descriptions.append(kwargs["task_description"])
|
||||
return SkillAssemblyResult()
|
||||
|
||||
|
||||
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
|
||||
return LLMResponse(
|
||||
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
|
||||
@ -107,828 +70,157 @@ def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
|
||||
)
|
||||
|
||||
|
||||
def _single_planner() -> StubTaskExecutionPlanner:
|
||||
return StubTaskExecutionPlanner([TaskExecutionPlan.single("test-single")])
|
||||
|
||||
|
||||
def _team_plan(strategy: str = "sequence") -> TaskExecutionPlan:
|
||||
return TaskExecutionPlan(
|
||||
mode="team",
|
||||
reason="test-team",
|
||||
graph=ExecutionGraph(
|
||||
strategy=strategy, # type: ignore[arg-type]
|
||||
nodes=[
|
||||
ExecutionNode(
|
||||
node_id="research",
|
||||
task="research implementation options",
|
||||
agent=AgentDescriptor(name="researcher", role="research"),
|
||||
)
|
||||
],
|
||||
),
|
||||
final_synthesis_instruction="Use the sub-agent result to produce the final answer.",
|
||||
)
|
||||
|
||||
|
||||
def _provider_bundle(provider: StubProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
auxiliary_provider=StubProvider([_route_response("new_task")]),
|
||||
)
|
||||
|
||||
|
||||
def _main_only_bundle(*responses: str) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content=response,
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
)
|
||||
for response in responses
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _task_record(status: str) -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
description="test task",
|
||||
goal="test task",
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status=status,
|
||||
creator="main-agent",
|
||||
created_at="2026-05-22T00:00:00+00:00",
|
||||
updated_at="2026-05-22T00:00:00+00:00",
|
||||
)
|
||||
|
||||
|
||||
def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
|
||||
def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService([]),
|
||||
task_execution_planner=StubTaskExecutionPlanner(),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"hello?",
|
||||
session_id="web:simple",
|
||||
provider_bundle=_bundle("hi", route_action="simple_chat"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
|
||||
assert result.task_id is None
|
||||
assert loaded.task_service.store.list_tasks() == []
|
||||
|
||||
|
||||
def test_complex_request_creates_task_and_records_validation(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
"draft release notes",
|
||||
session_id="web:test",
|
||||
provider_bundle=_bundle("Done"),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement the new report workflow",
|
||||
session_id="web:task",
|
||||
provider_bundle=_bundle("implemented"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task_by_run_id(result.run_id)
|
||||
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
run_record = loaded.run_memory_store.list_runs()[-1]
|
||||
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
|
||||
|
||||
assert result.task_id is not None
|
||||
task_service = service.create_loop().boot().task_service
|
||||
assert task_service is not None
|
||||
task = task_service.get_task(result.task_id or "")
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert any(event.event_type == "task_validation_snapshotted" for event in events)
|
||||
assert run_record.task_id == result.task_id
|
||||
assert run_record.validation_result["accepted"] is True
|
||||
assert skill_effects.event_payload["candidate_generation_allowed"] is False
|
||||
assert skill_effects.event_payload["learning_candidates"] == []
|
||||
assert task.metadata["short_title"] == "Test task"
|
||||
assert task.status == "awaiting_acceptance"
|
||||
assert task.validation_result is None
|
||||
assert result.validation_result is None
|
||||
|
||||
event_types = [event.event_type for event in task_service.list_events(task.task_id)]
|
||||
assert "evidence_recorded" in event_types
|
||||
assert "validated" not in event_types
|
||||
|
||||
|
||||
def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
|
||||
skill_assembler = RecordingSkillAssembler()
|
||||
def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=1.0, validator="test")]
|
||||
),
|
||||
skill_assembler=skill_assembler,
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"继续按刚才的方案改",
|
||||
session_id="web:task-skill-query",
|
||||
provider_bundle=_bundle("done", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
|
||||
assert result.task_id
|
||||
assert skill_assembler.task_descriptions
|
||||
query = skill_assembler.task_descriptions[0]
|
||||
assert "Task goal:" in query
|
||||
assert "Current user request:" in query
|
||||
assert "Previously activated skills:" in query
|
||||
assert "If no published skill matches, return []" in query
|
||||
|
||||
|
||||
def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement the search workflow",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("first done", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"also add tests for it",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("tests added", route_action="continue_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert task is not None
|
||||
assert second.task_id == first.task_id
|
||||
assert len(task.run_ids) == 2
|
||||
|
||||
closed = asyncio.run(
|
||||
service.process_direct(
|
||||
"这个任务结束了",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("好的,已结束。", route_action="close_task"),
|
||||
)
|
||||
)
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert closed.task_id is None
|
||||
assert task is not None
|
||||
assert task.status == "closed"
|
||||
assert loaded.task_service.active_task_view("web:continue") is None
|
||||
|
||||
|
||||
def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.95, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"查询珠海天气",
|
||||
session_id="web:revise-direct",
|
||||
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"再详细一点,并加上明后天穿衣建议",
|
||||
session_id="web:revise-direct",
|
||||
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
|
||||
first_assistant = [
|
||||
message
|
||||
for message in messages
|
||||
if message.get("role") == "assistant" and message.get("run_id") == first.run_id
|
||||
][-1]
|
||||
user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
|
||||
|
||||
assert second.task_id == first.task_id
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert len(task.run_ids) == 2
|
||||
assert task.feedback == [
|
||||
{
|
||||
"feedback_type": "revise",
|
||||
"comment": "再详细一点,并加上明后天穿衣建议",
|
||||
"run_id": first.run_id,
|
||||
"created_at": task.feedback[0]["created_at"],
|
||||
}
|
||||
]
|
||||
assert first_assistant["feedback_state"] == "revise"
|
||||
assert "再详细一点,并加上明后天穿衣建议" in user_messages
|
||||
|
||||
|
||||
def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.95, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"查询珠海天气",
|
||||
session_id="web:explicit-revise",
|
||||
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
feedback = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=first.session_id,
|
||||
run_id=first.run_id,
|
||||
feedback_type="revise",
|
||||
comment="准备补充穿衣建议",
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"加上明后天穿衣建议",
|
||||
session_id="web:explicit-revise",
|
||||
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert feedback["task_status"] == "needs_revision"
|
||||
assert second.task_id == first.task_id
|
||||
assert task is not None
|
||||
assert task.status == "awaiting_feedback"
|
||||
assert len(task.run_ids) == 2
|
||||
assert len(task.feedback) == 1
|
||||
assert task.feedback[0]["feedback_type"] == "revise"
|
||||
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
|
||||
|
||||
|
||||
def test_validation_result_status_drives_accepted_and_passed() -> None:
|
||||
accepted = ValidationResult(status="accepted", score=0.9, validator="test")
|
||||
insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test")
|
||||
rejected = ValidationResult(status="rejected", score=0.9, validator="test")
|
||||
|
||||
assert accepted.passed is True
|
||||
assert accepted.accepted is True
|
||||
assert insufficient.passed is False
|
||||
assert insufficient.accepted is False
|
||||
assert rejected.passed is False
|
||||
assert rejected.accepted is False
|
||||
|
||||
|
||||
def test_validation_result_from_legacy_payload_maps_to_status() -> None:
|
||||
accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
|
||||
low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"})
|
||||
rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
|
||||
|
||||
assert accepted is not None
|
||||
assert accepted.status == "accepted"
|
||||
assert low_score is not None
|
||||
assert low_score.status == "rejected"
|
||||
assert rejected is not None
|
||||
assert rejected.status == "rejected"
|
||||
|
||||
|
||||
def test_validation_result_rejects_unknown_status() -> None:
|
||||
with pytest.raises(ValueError, match="unknown validation status"):
|
||||
ValidationResult(status="pending", score=0.9, validator="test") # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_validation_result_from_dict_rejects_unknown_explicit_status() -> None:
|
||||
with pytest.raises(ValueError, match="unknown validation status"):
|
||||
ValidationResult.from_dict({"status": "pending", "passed": True, "score": 0.9})
|
||||
|
||||
|
||||
def test_validation_result_evidence_gaps_round_trip() -> None:
|
||||
validation = ValidationResult(
|
||||
status="insufficient_evidence",
|
||||
score=0.4,
|
||||
evidence_gaps=["missing command output", "missing file reference"],
|
||||
validator="test",
|
||||
)
|
||||
|
||||
restored = ValidationResult.from_dict(validation.to_dict())
|
||||
|
||||
assert restored is not None
|
||||
assert restored.status == "insufficient_evidence"
|
||||
assert restored.evidence_gaps == ["missing command output", "missing file reference"]
|
||||
assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"]
|
||||
|
||||
|
||||
def test_task_record_status_helpers_distinguish_review_and_failed() -> None:
|
||||
needs_review = _task_record("needs_review")
|
||||
failed = _task_record("failed")
|
||||
|
||||
assert needs_review.is_open is True
|
||||
assert needs_review.is_execution_active is False
|
||||
assert needs_review.requires_user_action is True
|
||||
assert failed.is_open is False
|
||||
assert failed.is_execution_active is False
|
||||
assert failed.requires_user_action is False
|
||||
|
||||
|
||||
def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None:
|
||||
service = TaskService(tmp_path)
|
||||
task = _task_record("needs_review")
|
||||
|
||||
payload = service.to_api_dict(task)
|
||||
|
||||
assert payload["is_open"] is True
|
||||
assert payload["is_execution_active"] is False
|
||||
assert payload["requires_user_action"] is True
|
||||
|
||||
|
||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(
|
||||
passed=False,
|
||||
score=0.2,
|
||||
issues=["missing tests"],
|
||||
recommended_revision_prompt="Add tests before final response.",
|
||||
validator="test",
|
||||
),
|
||||
ValidationResult(passed=True, score=0.88, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement and validate the task",
|
||||
session_id="web:retry",
|
||||
provider_bundle=_bundle("first draft", "revised draft"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
|
||||
assert result.output_text == "revised draft"
|
||||
assert result.validation_result["accepted"] is True
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 2
|
||||
visible_messages = loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
visible_contents = [message.get("content") for message in visible_messages]
|
||||
assert "first draft" not in visible_contents
|
||||
assert "revised draft" in visible_contents
|
||||
|
||||
|
||||
def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
task_execution_planner=StubTaskExecutionPlanner(),
|
||||
)
|
||||
)
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement feedback handling",
|
||||
session_id="web:feedback",
|
||||
provider_bundle=_bundle("done"),
|
||||
"write implementation plan",
|
||||
session_id="web:acceptance",
|
||||
provider_bundle=_bundle("Plan"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
learning_calls = []
|
||||
|
||||
def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
|
||||
learning_calls.append((task_id, trigger_run_id))
|
||||
loaded = service.create_loop().boot()
|
||||
generated: list[tuple[str, str]] = []
|
||||
|
||||
def build_learning_candidates_for_task(
|
||||
task_id: str,
|
||||
*,
|
||||
final_accepted_run_id: str | None = None,
|
||||
trigger_run_id: str | None = None,
|
||||
) -> list[FakeLearningCandidate]:
|
||||
generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
|
||||
return [FakeLearningCandidate()]
|
||||
|
||||
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
|
||||
|
||||
feedback = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
response = asyncio.run(
|
||||
service.submit_acceptance(
|
||||
session_id="web:acceptance",
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
acceptance_type="accept",
|
||||
)
|
||||
)
|
||||
|
||||
assert feedback["task_status"] == "closed"
|
||||
assert feedback["learning_candidates"] == [
|
||||
assert response["task_status"] == "closed"
|
||||
assert response["acceptance_type"] == "accept"
|
||||
assert response["learning_candidates"] == [
|
||||
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
]
|
||||
assert learning_calls == [(result.task_id, result.run_id)]
|
||||
assert generated == [(result.task_id, result.run_id)]
|
||||
|
||||
service2 = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path / "abandon",
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=False, score=0.3, validator="test"),
|
||||
ValidationResult(passed=False, score=0.3, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
abandoned = asyncio.run(
|
||||
service2.process_direct(
|
||||
"implement another workflow",
|
||||
session_id="web:abandon",
|
||||
provider_bundle=_bundle("not enough", "still not enough"),
|
||||
)
|
||||
)
|
||||
abandon_feedback = asyncio.run(
|
||||
service2.submit_feedback(
|
||||
session_id=abandoned.session_id,
|
||||
run_id=abandoned.run_id,
|
||||
feedback_type="abandon",
|
||||
comment="too costly",
|
||||
)
|
||||
)
|
||||
|
||||
assert abandon_feedback["task_status"] == "abandoned"
|
||||
assert abandon_feedback["learning_candidates"] == []
|
||||
loaded2 = service2.create_loop().boot()
|
||||
failure_events = [
|
||||
event
|
||||
for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
|
||||
if event.event_type == "task_failure_evidence_recorded"
|
||||
]
|
||||
assert len(failure_events) == 1
|
||||
assert loaded2.memory_service.get_store().memory_entries == []
|
||||
task_service = loaded.task_service
|
||||
assert task_service is not None
|
||||
task = task_service.get_task(result.task_id or "")
|
||||
assert task is not None
|
||||
assert task.metadata["final_accepted_run_id"] == result.run_id
|
||||
|
||||
|
||||
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
|
||||
def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=0.9, validator="test")]
|
||||
),
|
||||
task_execution_planner=StubTaskExecutionPlanner(),
|
||||
)
|
||||
)
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement feedback projection",
|
||||
session_id="web:feedback-projection",
|
||||
provider_bundle=_bundle("done"),
|
||||
"summarize notes",
|
||||
session_id="web:revise",
|
||||
provider_bundle=_bundle("Summary"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
|
||||
first = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
response = asyncio.run(
|
||||
service.submit_acceptance(
|
||||
session_id="web:revise",
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
acceptance_type="revise",
|
||||
comment="Add decisions",
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
|
||||
assert response["task_status"] == "needs_revision"
|
||||
assert response["learning_candidates"] == []
|
||||
|
||||
task_service = service.create_loop().boot().task_service
|
||||
assert task_service is not None
|
||||
task = task_service.get_task(result.task_id or "")
|
||||
assert task is not None
|
||||
assert task.feedback[0]["acceptance_type"] == "revise"
|
||||
|
||||
|
||||
def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner(),
|
||||
)
|
||||
)
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"prepare checklist",
|
||||
session_id="web:legacy",
|
||||
provider_bundle=_bundle("Checklist"),
|
||||
)
|
||||
)
|
||||
|
||||
response = asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
session_id="web:legacy",
|
||||
run_id=result.run_id,
|
||||
feedback_type="satisfied",
|
||||
)
|
||||
)
|
||||
|
||||
feedback_events = [
|
||||
event
|
||||
for event in loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
if event.event_type == "task_feedback_recorded"
|
||||
]
|
||||
assistant = [
|
||||
message
|
||||
for message in loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
if message.get("role") == "assistant" and message.get("run_id") == result.run_id
|
||||
][-1]
|
||||
|
||||
assert first["task_status"] == "closed"
|
||||
assert second["task_status"] == "closed"
|
||||
assert len(feedback_events) == 1
|
||||
assert assistant["feedback_state"] == "satisfied"
|
||||
assert assistant["task_status"] == "closed"
|
||||
assert assistant["validation_status"] == "passed"
|
||||
|
||||
with pytest.raises(ValueError, match="already recorded"):
|
||||
asyncio.run(
|
||||
service.submit_feedback(
|
||||
session_id=result.session_id,
|
||||
run_id=result.run_id,
|
||||
feedback_type="abandon",
|
||||
)
|
||||
)
|
||||
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
assert task is not None
|
||||
assert task.status == "closed"
|
||||
assert response["acceptance_type"] == "accept"
|
||||
assert response["feedback_type"] == "satisfied"
|
||||
assert response["task_status"] == "closed"
|
||||
|
||||
|
||||
def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
sub_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
|
||||
)
|
||||
)
|
||||
def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
|
||||
service = TaskService(tmp_path)
|
||||
task = service.create_task(session_id="s", description="legacy")
|
||||
task.status = "awaiting_feedback"
|
||||
task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
|
||||
service.store.upsert_task(task)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement team-backed workflow",
|
||||
session_id="web:team",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
events = loaded.session_manager.get_event_records(result.session_id)
|
||||
loaded = service.get_task(task.task_id)
|
||||
|
||||
assert result.output_text == "final synthesized answer"
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 2
|
||||
assert result.run_id == task.run_ids[-1]
|
||||
assert any(event.event_type == "task_execution_planned" for event in events)
|
||||
assert any(event.event_type == "task_team_run_completed" for event in events)
|
||||
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "sub-agent evidence" != result.output_text
|
||||
|
||||
|
||||
def test_task_mode_team_synthesis_runs_without_tools_and_receives_evidence(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
sub_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
validation = StubValidationService([ValidationResult(status="accepted", score=0.9, validator="test")])
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=validation,
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement team-backed workflow",
|
||||
session_id="web:team-no-tools",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
|
||||
)
|
||||
)
|
||||
|
||||
assert result.output_text == "final synthesized answer"
|
||||
assert main_provider.calls[0]["tools"] is None
|
||||
assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "Task evidence packet" in validation.calls[0]["evidence_text"]
|
||||
|
||||
|
||||
def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="fallback synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
]
|
||||
)
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
|
||||
validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement workflow despite team failure",
|
||||
session_id="web:team-failure",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: (_ for _ in ()).throw(RuntimeError("sub-agent unavailable")),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
events = loaded.session_manager.get_event_records(result.session_id)
|
||||
|
||||
assert result.output_text == "fallback synthesized answer"
|
||||
assert any(event.event_type == "task_team_run_failed" for event in events)
|
||||
assert "sub-agent unavailable" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "same class of tools fails repeatedly" in main_provider.calls[0]["messages"][0]["content"]
|
||||
assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
|
||||
|
||||
|
||||
def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(
|
||||
status="insufficient_evidence",
|
||||
score=0.4,
|
||||
evidence_gaps=["source missing"],
|
||||
validator="test",
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"answer with uncertain evidence",
|
||||
session_id="web:needs-review",
|
||||
provider_bundle=_bundle("possible answer"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
|
||||
validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
|
||||
|
||||
assert task is not None
|
||||
assert task.status == "needs_review"
|
||||
assert task.requires_user_action is True
|
||||
assert task.is_execution_active is False
|
||||
assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
|
||||
assert validation_event.event_payload["retry_scheduled"] is False
|
||||
assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
|
||||
|
||||
|
||||
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
main_provider = StubProvider(
|
||||
[
|
||||
LLMResponse(content="first synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
|
||||
LLMResponse(content="revised synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
|
||||
]
|
||||
)
|
||||
sub_providers = [
|
||||
StubProvider([LLMResponse(content="first evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
|
||||
StubProvider([LLMResponse(content="second evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
|
||||
]
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=StubTaskExecutionPlanner([_team_plan(), _team_plan()]),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=False, score=0.2, recommended_revision_prompt="revise", validator="test"),
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement and validate with team",
|
||||
session_id="web:team-retry",
|
||||
provider_bundle=_provider_bundle(main_provider),
|
||||
team_provider_bundle_factory=lambda node: _provider_bundle(sub_providers.pop(0)),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(result.task_id)
|
||||
visible = loaded.session_manager.get_messages_as_conversation(result.session_id)
|
||||
visible_contents = [message.get("content") for message in visible]
|
||||
run_records = {record.run_id: record for record in loaded.run_memory_store.list_runs()}
|
||||
|
||||
assert result.output_text == "revised synthesized answer"
|
||||
assert task is not None
|
||||
assert len(task.run_ids) == 4
|
||||
assert "first synthesized answer" not in visible_contents
|
||||
assert "revised synthesized answer" in visible_contents
|
||||
for run_id in task.run_ids:
|
||||
record = run_records[run_id]
|
||||
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
|
||||
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
|
||||
assert skill_effects
|
||||
assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
|
||||
|
||||
|
||||
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
|
||||
result = ContextBuilder().build_messages(
|
||||
ContextBuildInput(
|
||||
history=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "done",
|
||||
"run_id": "run-1",
|
||||
"task_id": "task-1",
|
||||
"task_status": "closed",
|
||||
"validation_status": "passed",
|
||||
"feedback_state": "satisfied",
|
||||
}
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
assistant = result.messages[-1]
|
||||
assert assistant == {"role": "assistant", "content": "done"}
|
||||
|
||||
|
||||
def test_context_builder_normalizes_persisted_tool_arguments() -> None:
|
||||
result = ContextBuilder().build_messages(
|
||||
ContextBuildInput(
|
||||
history=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "cron",
|
||||
"arguments": {"action": "add", "mode": "notification"},
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
tool_call = result.messages[-1]["tool_calls"][0]
|
||||
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
|
||||
|
||||
|
||||
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
|
||||
task_service = TaskService(tmp_path / "tasks")
|
||||
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
|
||||
validation = asyncio.run(
|
||||
ValidationService().validate_task_result(
|
||||
task=task,
|
||||
user_message="implement validator handling",
|
||||
final_output="done",
|
||||
provider_bundle=_main_only_bundle("not json"),
|
||||
)
|
||||
)
|
||||
|
||||
assert validation.accepted is False
|
||||
assert validation.status == "validator_error"
|
||||
assert validation.validator == "llm_error"
|
||||
assert validation.issues
|
||||
assert loaded is not None
|
||||
assert loaded.status == "awaiting_acceptance"
|
||||
assert loaded.feedback[0]["acceptance_type"] == "accept"
|
||||
|
||||
@ -20,8 +20,8 @@ class StubRunResult:
|
||||
model: str | None = "stub-model"
|
||||
usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
|
||||
task_id: str | None = "task-1"
|
||||
task_status: str | None = "awaiting_feedback"
|
||||
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
|
||||
task_status: str | None = "awaiting_acceptance"
|
||||
validation_result: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class StubAgentService(AgentService):
|
||||
@ -101,9 +101,10 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
|
||||
assert message["session_id"] == "web:alpha"
|
||||
assert message["run_id"] == "run-1"
|
||||
assert message["task_id"] == "task-1"
|
||||
assert message["task_status"] == "awaiting_feedback"
|
||||
assert message["validation_result"] == {"accepted": True}
|
||||
assert message["validation_status"] == "passed"
|
||||
assert message["task_status"] == "awaiting_acceptance"
|
||||
assert message["evidence_status"] == "recorded"
|
||||
assert message["validation_result"] is None
|
||||
assert "validation_status" not in message
|
||||
assert message["metadata"]["input_metadata"] == {
|
||||
"source": "test",
|
||||
"attachments": [{"file_id": "file-1", "name": "a.txt"}],
|
||||
|
||||
@ -19,7 +19,7 @@ import {
|
||||
uploadFile,
|
||||
wsManager,
|
||||
} from '@/lib/api';
|
||||
import { mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
|
||||
import { mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
|
||||
import { pickAppText } from '@/lib/i18n/core';
|
||||
import { useAppI18n } from '@/lib/i18n/provider';
|
||||
import { buildSessionProgressView } from '@/lib/session-progress';
|
||||
@ -32,7 +32,7 @@ function isSessionUpdatedEvent(data: WsEvent | Record<string, unknown>): data is
|
||||
|
||||
function activeTaskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
|
||||
if (status === 'needs_revision') return pickAppText(locale, '待修改', 'Needs revision');
|
||||
if (status === 'awaiting_feedback') return pickAppText(locale, '待反馈', 'Awaiting feedback');
|
||||
if (status === 'awaiting_acceptance') return pickAppText(locale, '待验收', 'Awaiting acceptance');
|
||||
if (status === 'running') return pickAppText(locale, '进行中', 'Running');
|
||||
return pickAppText(locale, '进行中', 'Active');
|
||||
}
|
||||
@ -157,10 +157,11 @@ export default function ChatPage() {
|
||||
setSessionProcess(key, process);
|
||||
}
|
||||
void loadActiveTask(key);
|
||||
const shouldMergePending = shouldMergePendingUsers(detail.messages, localSnapshot, waitingForReply);
|
||||
const displayMessages = detail.messages.filter(shouldDisplayChatMessage);
|
||||
const shouldMergePending = shouldMergePendingUsers(displayMessages, localSnapshot, waitingForReply);
|
||||
const nextMessages = shouldMergePending
|
||||
? mergeServerWithPendingUsers(detail.messages, localSnapshot)
|
||||
: detail.messages;
|
||||
? mergeServerWithPendingUsers(displayMessages, localSnapshot)
|
||||
: displayMessages;
|
||||
setMessages(nextMessages);
|
||||
shouldSnapToLatestRef.current = true;
|
||||
const last = nextMessages[nextMessages.length - 1];
|
||||
@ -217,15 +218,11 @@ export default function ChatPage() {
|
||||
if (data.type === 'status' && data.status === 'thinking') {
|
||||
setIsThinking(true);
|
||||
} else if (data.type === 'message' && data.role === 'assistant') {
|
||||
const validationResult = data.validation_result ?? data.metadata?.validation_result;
|
||||
const validationStatus = data.validation_status
|
||||
? data.validation_status
|
||||
: validationResult
|
||||
? ((validationResult as Record<string, unknown>).accepted === true ? 'passed' : 'failed')
|
||||
: 'unknown';
|
||||
setIsThinking(false);
|
||||
setIsLoading(false);
|
||||
addMessage({
|
||||
const rawEvidenceStatus = data.evidence_status ?? data.metadata?.evidence_status;
|
||||
const evidenceStatus = rawEvidenceStatus === 'recorded' ? 'recorded' : undefined;
|
||||
const assistantMessage = {
|
||||
role: 'assistant',
|
||||
content: typeof data.content === 'string' ? data.content : '',
|
||||
timestamp: new Date().toISOString(),
|
||||
@ -233,8 +230,11 @@ export default function ChatPage() {
|
||||
run_id: typeof data.run_id === 'string' ? data.run_id : undefined,
|
||||
task_id: data.task_id ?? data.metadata?.task_id ?? null,
|
||||
task_status: data.task_status ?? data.metadata?.task_status ?? null,
|
||||
validation_status: validationStatus,
|
||||
});
|
||||
evidence_status: evidenceStatus,
|
||||
} as const;
|
||||
if (shouldDisplayChatMessage(assistantMessage)) {
|
||||
addMessage(assistantMessage);
|
||||
}
|
||||
void loadSessionMessages(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
|
||||
void loadActiveTask(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
|
||||
loadSessions();
|
||||
@ -359,17 +359,18 @@ export default function ChatPage() {
|
||||
await loadSessions();
|
||||
return;
|
||||
}
|
||||
addMessage({
|
||||
const assistantMessage = {
|
||||
role: 'assistant',
|
||||
content: result.response,
|
||||
timestamp: new Date().toISOString(),
|
||||
run_id: result.run_id,
|
||||
task_id: result.task_id,
|
||||
task_status: result.task_status,
|
||||
validation_status: result.validation_result
|
||||
? (result.validation_result.accepted === true ? 'passed' : 'failed')
|
||||
: 'unknown',
|
||||
});
|
||||
evidence_status: result.evidence_status === 'recorded' ? 'recorded' : undefined,
|
||||
} as const;
|
||||
if (shouldDisplayChatMessage(assistantMessage)) {
|
||||
addMessage(assistantMessage);
|
||||
}
|
||||
void getSessionProcess(sessionId).then((process) => setSessionProcess(sessionId, process)).catch(() => null);
|
||||
void loadActiveTask(sessionId);
|
||||
loadSessions();
|
||||
@ -393,7 +394,7 @@ export default function ChatPage() {
|
||||
}
|
||||
}, [addMessage, clearInputDraft, input, isLoading, loadActiveTask, loadSessionMessages, loadSessions, locale, pendingFiles, revisionTargetRunId, sessionId, setIsLoading, setIsThinking, setSessionProcess, thinkingModeEnabled, updateMessageFeedback]);
|
||||
|
||||
const handleFeedback = useCallback(async (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => {
|
||||
const handleFeedback = useCallback(async (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => {
|
||||
updateMessageFeedback(runId, feedbackType);
|
||||
try {
|
||||
await submitChatFeedback({
|
||||
|
||||
@ -1238,7 +1238,7 @@ function riskLabel(risk: string, t: (zh: string, en: string) => string): string
|
||||
|
||||
function triggerReasonLabel(reason: string, t: (zh: string, en: string) => string): string {
|
||||
const labels: Record<string, string> = {
|
||||
validation_accepted_and_user_satisfied: t('任务验证通过且用户满意', 'Validation accepted and user satisfied'),
|
||||
task_accepted: t('任务已接受', 'Task accepted'),
|
||||
};
|
||||
return labels[reason] || reason;
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
import Link from 'next/link';
|
||||
import { useParams, useRouter } from 'next/navigation';
|
||||
import React, { useMemo, useState } from 'react';
|
||||
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
|
||||
import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
|
||||
|
||||
import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime, progressPercent } from '@/components/task-runtime/TaskRuntimeShared';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
@ -17,8 +17,9 @@ import { buildTaskRuntimeView, type TaskRuntimeNodeView } from '@/lib/task-runti
|
||||
import { useChatStore } from '@/lib/store';
|
||||
import type { BackendTask, BackendTaskRun, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
|
||||
|
||||
type TaskFeedbackType = 'satisfied' | 'revise' | 'abandon';
|
||||
type TaskFeedbackType = 'accept' | 'revise' | 'abandon';
|
||||
type TaskFeedbackItem = {
|
||||
acceptance_type?: unknown;
|
||||
feedback_type?: unknown;
|
||||
comment?: unknown;
|
||||
created_at?: unknown;
|
||||
@ -151,12 +152,6 @@ export default function TaskDetailPage() {
|
||||
const backendFeedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;
|
||||
|
||||
if (!task && backendTask) {
|
||||
const validation = backendTask.validation_result;
|
||||
const accepted = Boolean(validation?.accepted);
|
||||
const validationIssues = [
|
||||
...arrayOfStrings(validation?.issues),
|
||||
...arrayOfStrings(validation?.missing_requirements),
|
||||
];
|
||||
const feedbackItems = backendTask.feedback || [];
|
||||
return (
|
||||
<div className="mx-auto max-w-5xl space-y-6 p-6">
|
||||
@ -232,57 +227,6 @@ export default function TaskDetailPage() {
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">{pickAppText(locale, '验证和反馈', 'Validation and feedback')}</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4 text-sm">
|
||||
<div className="rounded-lg border border-border bg-muted/25 p-4">
|
||||
<div className="flex items-center gap-2">
|
||||
{validation ? (
|
||||
accepted ? <CheckCircle2 className="h-5 w-5 text-[#657162]" /> : <XCircle className="h-5 w-5 text-destructive" />
|
||||
) : (
|
||||
<HelpCircle className="h-5 w-5 text-muted-foreground" />
|
||||
)}
|
||||
<div className="font-medium">
|
||||
{validation
|
||||
? accepted
|
||||
? pickAppText(locale, '验证通过', 'Validation passed')
|
||||
: pickAppText(locale, '需要继续修改', 'Needs revision')
|
||||
: pickAppText(locale, '尚未验证', 'Not validated yet')}
|
||||
</div>
|
||||
</div>
|
||||
{validation ? (
|
||||
<div className="mt-2 text-muted-foreground">
|
||||
{pickAppText(locale, '评分', 'Score')}: {String(validation.score ?? '-')} · {pickAppText(locale, '验证器', 'Validator')}: {String(validation.validator ?? '-')}
|
||||
</div>
|
||||
) : null}
|
||||
{validationIssues.length > 0 && (
|
||||
<ul className="mt-3 list-disc space-y-1 pl-5 text-muted-foreground">
|
||||
{validationIssues.map((item, index) => <li key={`${item}:${index}`}>{item}</li>)}
|
||||
</ul>
|
||||
)}
|
||||
{typeof validation?.recommended_revision_prompt === 'string' && validation.recommended_revision_prompt && (
|
||||
<p className="mt-3 rounded-md bg-background p-3 text-muted-foreground">{validation.recommended_revision_prompt}</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="font-medium">{pickAppText(locale, '用户反馈', 'User feedback')}</div>
|
||||
{feedbackItems.length === 0 ? (
|
||||
<p className="text-muted-foreground">{pickAppText(locale, '还没有用户反馈。', 'No user feedback yet.')}</p>
|
||||
) : (
|
||||
feedbackItems.map((item, index) => (
|
||||
<div key={index} className="rounded-md border border-border p-3">
|
||||
<div className="font-medium">{humanFeedback(String(item.feedback_type || ''), locale)}</div>
|
||||
{item.comment ? <p className="mt-1 text-muted-foreground">{String(item.comment)}</p> : null}
|
||||
{item.created_at ? <p className="mt-1 text-xs text-muted-foreground">{formatTaskRuntimeTime(String(item.created_at), locale)}</p> : null}
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -476,6 +420,7 @@ export default function TaskDetailPage() {
|
||||
comment,
|
||||
});
|
||||
setRuntimeFeedback({
|
||||
acceptance_type: feedbackType,
|
||||
feedback_type: feedbackType,
|
||||
comment: comment || '',
|
||||
created_at: new Date().toISOString(),
|
||||
@ -660,14 +605,14 @@ function TaskFeedbackPanel({
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">{pickAppText(locale, '任务反馈', 'Task feedback')}</CardTitle>
|
||||
<CardTitle className="text-base">{pickAppText(locale, '任务验收', 'Task acceptance')}</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
{recordedFeedback ? (
|
||||
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
|
||||
<div className="flex items-center gap-2 font-medium">
|
||||
<CheckCircle2 className="h-4 w-4 text-[#657162]" />
|
||||
{pickAppText(locale, '已提交反馈', 'Feedback submitted')}: {humanFeedback(String(recordedFeedback.feedback_type || ''), locale)}
|
||||
{pickAppText(locale, '已提交验收', 'Acceptance submitted')}: {humanFeedback(String(recordedFeedback.acceptance_type || recordedFeedback.feedback_type || ''), locale)}
|
||||
</div>
|
||||
{recordedFeedback.comment ? (
|
||||
<p className="mt-2 text-muted-foreground">{String(recordedFeedback.comment)}</p>
|
||||
@ -678,22 +623,22 @@ function TaskFeedbackPanel({
|
||||
</div>
|
||||
) : isFinalized ? (
|
||||
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
|
||||
{pickAppText(locale, '任务已结束,不能再提交新的反馈。', 'This task is finalized and cannot accept new feedback.')}
|
||||
{pickAppText(locale, '任务已结束,不能再提交新的验收。', 'This task is finalized and cannot accept new acceptance.')}
|
||||
</div>
|
||||
) : !runId ? (
|
||||
<div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
|
||||
{pickAppText(locale, '暂无可反馈的运行记录。', 'No run is available for feedback yet.')}
|
||||
{pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')}
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
<div className="grid gap-2 sm:grid-cols-3">
|
||||
<FeedbackButton
|
||||
type="satisfied"
|
||||
type="accept"
|
||||
icon={<ThumbsUp className="mr-2 h-4 w-4" />}
|
||||
label={pickAppText(locale, '满意', 'Satisfied')}
|
||||
label={pickAppText(locale, '接受', 'Accept')}
|
||||
actionBusy={actionBusy}
|
||||
disabled={!canSubmit}
|
||||
onClick={() => submit('satisfied', comment.trim() || undefined)}
|
||||
onClick={() => submit('accept', comment.trim() || undefined)}
|
||||
/>
|
||||
<FeedbackButton
|
||||
type="revise"
|
||||
@ -717,10 +662,10 @@ function TaskFeedbackPanel({
|
||||
value={comment}
|
||||
onChange={(event) => setComment(event.target.value)}
|
||||
disabled={Boolean(recordedFeedback) || isFinalized || Boolean(actionBusy)}
|
||||
placeholder={pickAppText(locale, '需要修改时写下具体要求;满意或放弃可选填说明。', 'Describe requested changes; notes are optional for satisfied or abandon.')}
|
||||
placeholder={pickAppText(locale, '需要修改时写下具体要求;接受或放弃可选填说明。', 'Describe requested changes; notes are optional for accept or abandon.')}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
{pickAppText(locale, '反馈将记录到当前任务运行:', 'Feedback will be recorded on run: ')}
|
||||
{pickAppText(locale, '验收将记录到当前任务运行:', 'Acceptance will be recorded on run: ')}
|
||||
<span className="font-mono">{runId || '-'}</span>
|
||||
<span className="mx-1">·</span>
|
||||
{pickAppText(locale, '会话:', 'Session: ')}
|
||||
@ -807,8 +752,7 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
|
||||
const map: Record<string, [string, string]> = {
|
||||
open: ['已创建', 'Open'],
|
||||
running: ['执行中', 'Running'],
|
||||
validating: ['验证中', 'Validating'],
|
||||
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
|
||||
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
|
||||
needs_revision: ['需要修改', 'Needs revision'],
|
||||
closed: ['已完成', 'Closed'],
|
||||
abandoned: ['已放弃', 'Abandoned'],
|
||||
@ -818,10 +762,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
|
||||
}
|
||||
|
||||
function humanFeedback(type: string, locale: 'zh-CN' | 'en-US') {
|
||||
if (type === 'satisfied') return pickAppText(locale, '满意', 'Satisfied');
|
||||
if (type === 'accept' || type === 'satisfied') return pickAppText(locale, '接受', 'Accepted');
|
||||
if (type === 'revise') return pickAppText(locale, '请求修改', 'Revision requested');
|
||||
if (type === 'abandon') return pickAppText(locale, '放弃任务', 'Abandoned');
|
||||
return type || pickAppText(locale, '反馈', 'Feedback');
|
||||
return type || pickAppText(locale, '验收', 'Acceptance');
|
||||
}
|
||||
|
||||
function humanFinishReason(reason: string, locale: 'zh-CN' | 'en-US') {
|
||||
@ -848,7 +792,3 @@ function feedbackForRun(items: TaskFeedbackItem[], runId: string | null): TaskFe
|
||||
function latestFeedback(items: TaskFeedbackItem[]): TaskFeedbackItem | null {
|
||||
return [...items].reverse()[0] ?? null;
|
||||
}
|
||||
|
||||
function arrayOfStrings(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.map((item) => String(item)).filter(Boolean) : [];
|
||||
}
|
||||
|
||||
@ -142,7 +142,7 @@ function OrdinaryTasks() {
|
||||
</div>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Badge variant={task.status === 'awaiting_feedback' || task.status === 'closed' ? 'default' : 'secondary'}>
|
||||
<Badge variant={task.status === 'awaiting_acceptance' || task.status === 'closed' ? 'default' : 'secondary'}>
|
||||
{taskStatusLabel(task.status, locale)}
|
||||
</Badge>
|
||||
</TableCell>
|
||||
@ -185,8 +185,7 @@ function taskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
|
||||
const labels: Record<string, [string, string]> = {
|
||||
open: ['已创建', 'Open'],
|
||||
running: ['执行中', 'Running'],
|
||||
validating: ['验证中', 'Validating'],
|
||||
awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
|
||||
awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
|
||||
needs_revision: ['需要修改', 'Needs revision'],
|
||||
closed: ['已完成', 'Closed'],
|
||||
abandoned: ['已放弃', 'Abandoned'],
|
||||
|
||||
@ -27,7 +27,7 @@ export function ChatWorkbench({
|
||||
processArtifacts: ProcessArtifact[];
|
||||
selectedRunId: string | null;
|
||||
onSelectRun: (runId: string) => void;
|
||||
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onRequestRevision: (runId: string) => void;
|
||||
}) {
|
||||
return (
|
||||
|
||||
@ -6,7 +6,7 @@ import { Bot, CheckCircle2, ChevronRight, Loader2, Paperclip, RefreshCcw, Thumbs
|
||||
|
||||
import type { ChatMessage, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
|
||||
import { getAccessToken, getFileUrl } from '@/lib/api';
|
||||
import { getTaskCardMessageIndexes } from '@/lib/chat-messages';
|
||||
import { getTaskCardMessageIndexes, hasVisibleChatContent, normalizedMessageText, shouldDisplayChatMessage } from '@/lib/chat-messages';
|
||||
import { AgentTeamBlock } from '@/components/chat-workbench/AgentTeamBlock';
|
||||
import { MarkdownContent } from '@/components/chat-workbench/MarkdownContent';
|
||||
import { ScrollArea } from '@/components/ui/scroll-area';
|
||||
@ -49,19 +49,14 @@ function MessageBubble({
|
||||
message: ChatMessage;
|
||||
showTaskCard: boolean;
|
||||
canSendFeedback: boolean;
|
||||
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onRequestRevision: (runId: string) => void;
|
||||
}) {
|
||||
const { locale } = useAppI18n();
|
||||
const isUser = message.role === 'user';
|
||||
const textContent = typeof message.content === 'string' ? message.content : String(message.content || '');
|
||||
const [feedbackMode, setFeedbackMode] = React.useState<'satisfied' | null>(null);
|
||||
const textContent = normalizedMessageText(message.content);
|
||||
const [feedbackMode, setFeedbackMode] = React.useState<'accept' | null>(null);
|
||||
const [feedbackComment, setFeedbackComment] = React.useState('');
|
||||
const validationFailed = message.validation_status === 'failed';
|
||||
const validationDetails =
|
||||
validationFailed
|
||||
? pickAppText(locale, '详细原因会在任务验证区展示;展开任务可查看验证报告。', 'Detailed reasons are shown in the task validation area. Open the task to inspect the validation report.')
|
||||
: '';
|
||||
|
||||
return (
|
||||
<div className={`flex gap-3 ${isUser ? 'justify-end' : ''}`}>
|
||||
@ -142,22 +137,14 @@ function MessageBubble({
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{!isUser && validationFailed && (
|
||||
<details className="mt-3 rounded-md border border-destructive/30 bg-destructive/5 p-3">
|
||||
<summary className="cursor-pointer text-base font-semibold text-destructive">
|
||||
{pickAppText(locale, '验证失败', 'Validation failed')}
|
||||
</summary>
|
||||
<p className="mt-2 text-xs leading-5 text-muted-foreground">{validationDetails}</p>
|
||||
</details>
|
||||
)}
|
||||
{!isUser && (canSendFeedback || message.feedback_state) && message.run_id && (
|
||||
<div className="mt-3 space-y-2 border-t border-border/70 pt-3">
|
||||
{message.feedback_state ? (
|
||||
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<CheckCircle2 className="h-3.5 w-3.5" />
|
||||
<span>
|
||||
{message.feedback_state === 'satisfied'
|
||||
? pickAppText(locale, '已标记满意', 'Marked satisfied')
|
||||
{message.feedback_state === 'accept' || message.feedback_state === 'satisfied'
|
||||
? pickAppText(locale, '已接受', 'Accepted')
|
||||
: message.feedback_state === 'revise'
|
||||
? pickAppText(locale, '已请求修改', 'Revision requested')
|
||||
: pickAppText(locale, '已放弃任务', 'Task abandoned')}
|
||||
@ -168,11 +155,11 @@ function MessageBubble({
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setFeedbackMode('satisfied')}
|
||||
onClick={() => setFeedbackMode('accept')}
|
||||
className="inline-flex h-8 items-center gap-1 rounded-md border border-border px-3 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
|
||||
>
|
||||
<ThumbsUp className="h-3.5 w-3.5" />
|
||||
{pickAppText(locale, '满意', 'Satisfied')}
|
||||
{pickAppText(locale, '接受', 'Accept')}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
@ -222,13 +209,6 @@ function MessageBubble({
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{message.validation_status && message.validation_status !== 'unknown' && (
|
||||
<span className="text-xs text-muted-foreground">
|
||||
{message.validation_status === 'passed'
|
||||
? pickAppText(locale, '验证通过', 'Validated')
|
||||
: pickAppText(locale, '验证未通过', 'Validation failed')}
|
||||
</span>
|
||||
)}
|
||||
{message.feedback_error && (
|
||||
<span className="text-xs text-destructive">{message.feedback_error}</span>
|
||||
)}
|
||||
@ -264,6 +244,17 @@ function shouldHideSystemAgentMessage(message: ChatMessage): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function hasRenderableMessageContent(message: ChatMessage): boolean {
|
||||
return hasVisibleChatContent(message);
|
||||
}
|
||||
|
||||
function shouldHideMessage(message: ChatMessage): boolean {
|
||||
if (shouldHideSystemAgentMessage(message)) {
|
||||
return true;
|
||||
}
|
||||
return !shouldDisplayChatMessage(message);
|
||||
}
|
||||
|
||||
function parseTimelineTime(value?: string | null): number | null {
|
||||
if (!value) return null;
|
||||
const parsed = new Date(value).getTime();
|
||||
@ -342,12 +333,12 @@ export function MessageList({
|
||||
processArtifacts: ProcessArtifact[];
|
||||
selectedRunId: string | null;
|
||||
onSelectRun: (runId: string) => void;
|
||||
onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
|
||||
onRequestRevision: (runId: string) => void;
|
||||
}) {
|
||||
const { locale } = useAppI18n();
|
||||
const visibleMessages = React.useMemo(
|
||||
() => messages.filter((message) => !shouldHideSystemAgentMessage(message)),
|
||||
() => messages.filter((message) => !shouldHideMessage(message)),
|
||||
[messages]
|
||||
);
|
||||
const teamGroups = React.useMemo(
|
||||
@ -385,14 +376,21 @@ export function MessageList({
|
||||
() => getTaskCardMessageIndexes(visibleMessages),
|
||||
[visibleMessages]
|
||||
);
|
||||
const latestFeedbackRunId = [...visibleMessages]
|
||||
.reverse()
|
||||
.find((message) =>
|
||||
message.role === 'assistant'
|
||||
&& message.run_id
|
||||
&& message.task_id
|
||||
&& message.task_status === 'awaiting_feedback'
|
||||
)?.run_id;
|
||||
const latestFeedbackMessageIndex = (() => {
|
||||
for (let index = visibleMessages.length - 1; index >= 0; index -= 1) {
|
||||
const message = visibleMessages[index];
|
||||
if (
|
||||
message.role === 'assistant'
|
||||
&& message.run_id
|
||||
&& message.task_id
|
||||
&& message.task_status === 'awaiting_acceptance'
|
||||
&& hasRenderableMessageContent(message)
|
||||
) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
})();
|
||||
|
||||
return (
|
||||
<ScrollArea className="h-full px-8" viewportRef={viewportRef}>
|
||||
@ -411,7 +409,7 @@ export function MessageList({
|
||||
key={item.key}
|
||||
message={item.message}
|
||||
showTaskCard={taskCardMessageIndexes.has(item.messageIndex)}
|
||||
canSendFeedback={Boolean(latestFeedbackRunId && item.message.run_id === latestFeedbackRunId)}
|
||||
canSendFeedback={item.messageIndex === latestFeedbackMessageIndex}
|
||||
onFeedback={onFeedback}
|
||||
onRequestRevision={onRequestRevision}
|
||||
/>
|
||||
|
||||
@ -271,7 +271,7 @@ export async function sendMessage(
|
||||
run_id?: string;
|
||||
task_id?: string | null;
|
||||
task_status?: string | null;
|
||||
validation_result?: Record<string, unknown> | null;
|
||||
evidence_status?: string | null;
|
||||
}> {
|
||||
const body: Record<string, unknown> = { message, session_id: sessionId };
|
||||
if (attachments && attachments.length > 0) {
|
||||
@ -293,7 +293,7 @@ export async function sendMessage(
|
||||
finish_reason?: string;
|
||||
task_id?: string | null;
|
||||
task_status?: string | null;
|
||||
validation_result?: Record<string, unknown> | null;
|
||||
evidence_status?: string | null;
|
||||
}>('/api/chat', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
@ -305,28 +305,29 @@ export async function sendMessage(
|
||||
run_id: result.run_id,
|
||||
task_id: result.task_id,
|
||||
task_status: result.task_status,
|
||||
validation_result: result.validation_result,
|
||||
evidence_status: result.evidence_status,
|
||||
};
|
||||
}
|
||||
|
||||
export async function submitChatFeedback(params: {
|
||||
sessionId: string;
|
||||
runId: string;
|
||||
feedbackType: 'satisfied' | 'revise' | 'abandon';
|
||||
feedbackType: 'accept' | 'revise' | 'abandon';
|
||||
comment?: string;
|
||||
}): Promise<{
|
||||
session_id: string;
|
||||
run_id: string;
|
||||
task_id: string;
|
||||
task_status: string;
|
||||
acceptance_type: string;
|
||||
feedback_type: string;
|
||||
}> {
|
||||
return fetchJSON('/api/chat/feedback', {
|
||||
return fetchJSON('/api/chat/acceptance', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
session_id: params.sessionId,
|
||||
run_id: params.runId,
|
||||
feedback_type: params.feedbackType,
|
||||
acceptance_type: params.feedbackType,
|
||||
comment: params.comment,
|
||||
}),
|
||||
});
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
|
||||
import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
|
||||
import type { ChatMessage } from '@/types';
|
||||
|
||||
describe('chat message helpers', () => {
|
||||
@ -85,10 +85,17 @@ describe('chat message helpers', () => {
|
||||
content: 'Final answer.',
|
||||
run_id: 'run-1',
|
||||
task_id: 'task-1',
|
||||
task_status: 'awaiting_feedback',
|
||||
task_status: 'awaiting_acceptance',
|
||||
},
|
||||
];
|
||||
|
||||
expect(Array.from(getTaskCardMessageIndexes(messages))).toEqual([2]);
|
||||
});
|
||||
|
||||
it('hides empty assistant records from session history', () => {
|
||||
expect(shouldDisplayChatMessage({ role: 'assistant', content: '', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
|
||||
expect(shouldDisplayChatMessage({ role: 'assistant', content: '\u200B\uFEFF', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
|
||||
expect(shouldDisplayChatMessage({ role: 'assistant', content: 'Final answer.', task_id: 'task-1', run_id: 'run-1' })).toBe(true);
|
||||
expect(shouldDisplayChatMessage({ role: 'user', content: '' })).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,5 +1,28 @@
|
||||
import type { ChatMessage } from '@/types';
|
||||
|
||||
const INVISIBLE_CONTENT_CHARS = /[\u200B-\u200D\uFEFF]/g;
|
||||
|
||||
export function normalizedMessageText(content: unknown): string {
|
||||
if (typeof content === 'string') {
|
||||
return content.replace(INVISIBLE_CONTENT_CHARS, '').trim();
|
||||
}
|
||||
if (content == null) {
|
||||
return '';
|
||||
}
|
||||
return String(content).replace(INVISIBLE_CONTENT_CHARS, '').trim();
|
||||
}
|
||||
|
||||
export function hasVisibleChatContent(msg: ChatMessage): boolean {
|
||||
if (normalizedMessageText(msg.content)) {
|
||||
return true;
|
||||
}
|
||||
return Boolean(msg.attachments?.length);
|
||||
}
|
||||
|
||||
export function shouldDisplayChatMessage(msg: ChatMessage): boolean {
|
||||
return msg.role !== 'assistant' || hasVisibleChatContent(msg);
|
||||
}
|
||||
|
||||
export function messageFingerprint(msg: ChatMessage): string {
|
||||
const attachmentKey = (msg.attachments ?? [])
|
||||
.map((a) => `${a.file_id ?? ''}:${a.name}:${a.content_type}:${a.size ?? ''}`)
|
||||
|
||||
@ -48,8 +48,9 @@ export interface ChatMessage {
|
||||
run_id?: string;
|
||||
task_id?: string | null;
|
||||
task_status?: string | null;
|
||||
validation_status?: 'passed' | 'failed' | 'unknown';
|
||||
feedback_state?: 'satisfied' | 'revise' | 'abandon';
|
||||
evidence_status?: 'recorded';
|
||||
acceptance_state?: 'accept' | 'revise' | 'abandon';
|
||||
feedback_state?: 'accept' | 'satisfied' | 'revise' | 'abandon';
|
||||
feedback_error?: string;
|
||||
message_type?: string | null;
|
||||
scheduled_job_id?: string | null;
|
||||
@ -153,6 +154,7 @@ export interface SystemStatus {
|
||||
workspace_exists: boolean;
|
||||
model: string;
|
||||
max_tokens: number;
|
||||
max_context_messages?: number;
|
||||
temperature: number;
|
||||
max_tool_iterations: number;
|
||||
providers: ProviderStatus[];
|
||||
@ -315,6 +317,7 @@ export interface BackendTaskRun {
|
||||
attempt_index?: number | null;
|
||||
task_text?: string;
|
||||
messages: BackendTaskRunMessage[];
|
||||
evidence_status?: string | null;
|
||||
validation_result?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
@ -972,12 +975,12 @@ export interface ChatAssistantEvent {
|
||||
run_id?: string;
|
||||
task_id?: string | null;
|
||||
task_status?: string | null;
|
||||
validation_status?: 'passed' | 'failed' | 'unknown';
|
||||
evidence_status?: 'recorded';
|
||||
validation_result?: Record<string, unknown> | null;
|
||||
metadata?: {
|
||||
task_id?: string | null;
|
||||
task_status?: string | null;
|
||||
validation_result?: Record<string, unknown> | null;
|
||||
evidence_status?: string | null;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user