feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息，包括UTC时间、本地时间和时区信息，并在系统提示中显示这些信息。同时增加最大上下文消息数和工具迭代次数的配置选项，将验证服务从引擎加载器中移除，并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务，相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions
--- a/app-instance/backend/beaver/engine/context/init.py
+++ b/app-instance/backend/beaver/engine/context/init.py
@ -4,6 +4,7 @@ from .builder import (
    ContextBuildInput,
    ContextBuildResult,
    ContextBuilder,
+    RuntimeContext,
    SessionContext,
    SkillContext,
 )
@ -12,6 +13,7 @@ __all__ = [
    "ContextBuildInput",
    "ContextBuildResult",
    "ContextBuilder",
+    "RuntimeContext",
    "SessionContext",
    "SkillContext",
 ]
--- a/app-instance/backend/beaver/engine/context/builder.py
+++ b/app-instance/backend/beaver/engine/context/builder.py
@ -80,6 +80,16 @@ class SessionContext:
    parent_session_id: str | None = None


+@dataclass(slots=True)
+class RuntimeContext:
+    """Per-run runtime facts that should be visible to the model."""
+
+    utc_datetime: str
+    local_datetime: str
+    timezone: str | None = None
+    utc_offset: str | None = None
+
+
@dataclass(slots=True)
 class ContextBuildInput:
    """一次上下文构建所需的全部输入。
@ -103,6 +113,7 @@ class ContextBuildInput:
    memory_snapshot: MemorySnapshot | None = None
    activated_skills: list[SkillContext] = field(default_factory=list)
    session_context: SessionContext | None = None
+    runtime_context: RuntimeContext | None = None
    execution_context: str | None = None
    extra_sections: list[str] = field(default_factory=list)

@ -143,9 +154,10 @@ class ContextBuilder:
        1. Beaver user-facing assistant identity
        2. base system prompt
        3. session metadata
-        4. execution context
-        5. frozen memory snapshot
-        6. extra sections
+        4. runtime date/time
+        5. execution context
+        6. frozen memory snapshot
+        7. extra sections

        这样设计的原因：
        - 身份与总规则要最靠前
@ -164,6 +176,10 @@ class ContextBuilder:
        if session_section:
            sections.append(session_section)

+        runtime_section = self._render_runtime_section(build_input.runtime_context)
+        if runtime_section:
+            sections.append(runtime_section)
+
        execution_context = (build_input.execution_context or "").strip()
        if execution_context:
            sections.append(f"# Execution Context\n\n{execution_context}")
@ -347,6 +363,31 @@ class ContextBuilder:
            return None
        return "# Current Session\n\n" + "\n".join(rows)

+    def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
+        """Render date/time facts captured for the current model run."""
+
+        if runtime_context is None:
+            return None
+
+        rows: list[str] = []
+        if runtime_context.utc_datetime:
+            rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
+        if runtime_context.local_datetime:
+            rows.append(f"Current local time: {runtime_context.local_datetime}")
+        if runtime_context.timezone:
+            rows.append(f"Local timezone: {runtime_context.timezone}")
+        if runtime_context.utc_offset:
+            rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
+
+        if not rows:
+            return None
+        return (
+            "# Current Date and Time\n\n"
+            + "\n".join(rows)
+            + "\n\nUse this section as authoritative for relative date/time references such as "
+            '"today", "tomorrow", "now", "this week", and "next month".'
+        )
+
    def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
        """把已激活 skill 转成显式消息。

--- a/app-instance/backend/beaver/engine/loader.py
+++ b/app-instance/backend/beaver/engine/loader.py
@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
 from beaver.skills.publisher import SkillPublisher
 from beaver.skills.reviews import ReviewService
 from beaver.skills.specs import SkillSpecStore
-from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
+from beaver.tasks import TaskExecutionPlanner, TaskService
 from beaver.tasks.skill_resolver import TaskSkillResolver
 from beaver.skills import SkillAssembler, SkillsLoader
 from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
@ -91,7 +91,6 @@ class EngineLoadResult:
    task_skill_resolver: TaskSkillResolver | None = None
    task_service: TaskService | None = None
    task_execution_planner: TaskExecutionPlanner | None = None
-    validation_service: ValidationService | None = None
    mcp_manager: MCPConnectionManager | None = None
    mcp_report: dict[str, dict] = field(default_factory=dict)
    closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
@ -166,7 +165,6 @@ class EngineLoader:
        task_skill_resolver: TaskSkillResolver | None = None,
        task_service: TaskService | None = None,
        task_execution_planner: TaskExecutionPlanner | None = None,
-        validation_service: ValidationService | None = None,
    ) -> None:
        self.config = config or load_config(workspace=workspace, config_path=config_path)
        configured_workspace = self.config.agents_defaults.workspace
@ -192,7 +190,6 @@ class EngineLoader:
        self._task_skill_resolver = task_skill_resolver
        self._task_service = task_service
        self._task_execution_planner = task_execution_planner
-        self._validation_service = validation_service

    def load(self) -> EngineLoadResult:
        """装配当前主链需要的最小 runtime 对象。"""
@ -276,7 +273,6 @@ class EngineLoader:
        )
        task_service = self._task_service or TaskService(workspace / "tasks")
        task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
-        validation_service = self._validation_service or ValidationService()
        mcp_manager = MCPConnectionManager(
            self.config.tools.mcp_servers,
            authz_config=self.config.authz,
@ -311,7 +307,6 @@ class EngineLoader:
            task_skill_resolver=task_skill_resolver,
            task_service=task_service,
            task_execution_planner=task_execution_planner,
-            validation_service=validation_service,
            mcp_manager=mcp_manager,
        )
        if self._session_manager is None:
--- a/app-instance/backend/beaver/engine/loop.py
+++ b/app-instance/backend/beaver/engine/loop.py
@ -4,12 +4,15 @@ from __future__ import annotations

 import asyncio
 import json
+import os
+import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 from uuid import uuid4
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError

-from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
+from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
 from beaver.memory.runs import RunRecord, SkillEffectRecord
 from beaver.skills.learning import RunReceiptContext
 from beaver.skills.catalog.utils import strip_frontmatter
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
    "Use available materials, state uncertainty clearly, and provide partial confirmed results."
 )

+RAW_TOOL_CALL_FALLBACK = (
+    "The run reached the configured tool-call limit before producing a reliable final answer. "
+    "The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
+    "Please request a revision to continue the task."
+)
+
+_RAW_TOOL_CALL_RE = re.compile(
+    r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
+    re.IGNORECASE,
+)
+

@dataclass(slots=True)
 class AgentProfile:
@ -35,8 +49,9 @@ class AgentProfile:
    system_prompt: str = ""
    default_model: str = "gpt-4.1-mini"
    max_tokens: int = 4096
+    max_context_messages: int = 1000
    temperature: float = 0.2
-    max_tool_iterations: int = 8
+    max_tool_iterations: int = 30


@dataclass(slots=True)
@ -446,7 +461,7 @@ class AgentLoop:
                *(pinned_skill_contexts or []),
                *self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
            ]
-            if not include_skill_assembly or thinking_enabled is False:
+            if not include_skill_assembly:
                activated_skills = self._merge_skill_contexts(pinned_skills, [])
            else:
                skill_query = skill_selection_context or task
@ -512,8 +527,6 @@ class AgentLoop:

            if not include_tools:
                selected_tool_specs = []
-            elif thinking_enabled is False:
-                selected_tool_specs = tool_registry.list_specs()
            else:
                selected_tool_specs = await tool_assembler.assemble(
                    task_description=task,
@ -543,7 +556,10 @@ class AgentLoop:

            build_input = ContextBuildInput(
                base_system_prompt=self.profile.system_prompt,
-                history=session_manager.get_history(resolved_session_id),
+                history=session_manager.get_history(
+                    resolved_session_id,
+                    max_messages=max(1, self.profile.max_context_messages),
+                ),
                current_user_input=task,
                memory_snapshot=memory_snapshot,
                activated_skills=activated_skills,
@ -554,6 +570,7 @@ class AgentLoop:
                    user_id=user_id,
                    parent_session_id=parent_session_id,
                ),
+                runtime_context=self._current_runtime_context(),
                execution_context=execution_context,
                extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
            )
@ -693,6 +710,7 @@ class AgentLoop:
                    tool_calls=assistant_tool_calls or None,
                    finish_reason=response.finish_reason,
                    reasoning=response.reasoning_content,
+                    context_visible=not bool(assistant_tool_calls),
                    source=source,
                    title=title,
                    model=final_model,
@ -707,7 +725,11 @@ class AgentLoop:

                if not response.has_tool_calls:
                    final_text = response.content or ""
-                    final_finish_reason = response.finish_reason or "stop"
+                    if self._looks_like_raw_tool_call(final_text):
+                        final_text = RAW_TOOL_CALL_FALLBACK
+                        final_finish_reason = "invalid_tool_call_text"
+                    else:
+                        final_finish_reason = response.finish_reason or "stop"
                    break

                if iterations >= resolved_max_tool_iterations:
@ -719,10 +741,7 @@ class AgentLoop:
                        temperature=resolved_temperature,
                        thinking_enabled=thinking_enabled,
                    )
-                    final_text = finalized or (
-                        "Tool loop stopped after reaching the configured iteration limit, "
-                        "and no final answer was produced."
-                    )
+                    final_text = finalized or RAW_TOOL_CALL_FALLBACK
                    final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
                    session_manager.append_message(
                        resolved_session_id,
@ -877,17 +896,14 @@ class AgentLoop:
        temperature: float,
        thinking_enabled: bool | None,
    ) -> str:
-        final_messages = [
-            *messages,
-            {
-                "role": "system",
-                "content": (
-                    "The configured tool iteration budget is exhausted. Do not call tools. "
-                    "Produce the best final answer from the existing conversation and tool results. "
-                    "State uncertainty explicitly."
-                ),
-            },
-        ]
+        final_messages = AgentLoop._with_system_guidance(
+            messages,
+            (
+                "The configured tool iteration budget is exhausted. Do not call tools. "
+                "Produce the best final answer from the existing conversation and tool results. "
+                "State uncertainty explicitly."
+            ),
+        )
        kwargs: dict[str, Any] = {
            "messages": final_messages,
            "tools": None,
@ -898,7 +914,27 @@ class AgentLoop:
        if thinking_enabled is not None:
            kwargs["thinking_enabled"] = thinking_enabled
        response = await provider.chat(**kwargs)
-        return (response.content or "").strip()
+        if response.has_tool_calls:
+            return ""
+        content = (response.content or "").strip()
+        if AgentLoop._looks_like_raw_tool_call(content):
+            return ""
+        return content
+
+    @staticmethod
+    def _looks_like_raw_tool_call(content: str | None) -> bool:
+        if not content:
+            return False
+        return bool(_RAW_TOOL_CALL_RE.match(content))
+
+    @staticmethod
+    def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
+        copied = [dict(message) for message in messages]
+        if copied and copied[0].get("role") == "system":
+            existing = str(copied[0].get("content") or "").strip()
+            copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
+            return copied
+        return [{"role": "system", "content": guidance.strip()}, *copied]

    @staticmethod
    def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
@ -1133,3 +1169,49 @@ class AgentLoop:
    @staticmethod
    def _utc_now() -> str:
        return datetime.now(timezone.utc).isoformat()
+
+    @staticmethod
+    def _current_runtime_context() -> RuntimeContext:
+        utc_now = datetime.now(timezone.utc)
+        timezone_name = AgentLoop._configured_timezone_name()
+        local_now = datetime.now().astimezone()
+        rendered_timezone = local_now.tzname()
+
+        if timezone_name:
+            try:
+                local_now = utc_now.astimezone(ZoneInfo(timezone_name))
+                rendered_timezone = timezone_name
+            except ZoneInfoNotFoundError:
+                rendered_timezone = local_now.tzname() or timezone_name
+
+        return RuntimeContext(
+            utc_datetime=utc_now.isoformat(),
+            local_datetime=local_now.isoformat(),
+            timezone=rendered_timezone,
+            utc_offset=AgentLoop._format_utc_offset(local_now),
+        )
+
+    @staticmethod
+    def _configured_timezone_name() -> str | None:
+        for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
+            cleaned = (value or "").strip()
+            if cleaned:
+                return cleaned
+
+        try:
+            timezone_file = "/etc/timezone"
+            if os.path.exists(timezone_file):
+                with open(timezone_file, encoding="utf-8") as file:
+                    cleaned = file.read().strip()
+                if cleaned:
+                    return cleaned
+        except OSError:
+            return None
+        return None
+
+    @staticmethod
+    def _format_utc_offset(value: datetime) -> str | None:
+        raw = value.strftime("%z")
+        if not raw:
+            return None
+        return f"{raw[:3]}:{raw[3:]}"
--- a/app-instance/backend/beaver/engine/providers/litellm.py
+++ b/app-instance/backend/beaver/engine/providers/litellm.py
@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
    @staticmethod
    def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
        sanitized = []
+        system_contents: list[str] = []
        for message in messages:
            clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
+            if clean.get("role") == "system":
+                content = clean.get("content")
+                if isinstance(content, str) and content.strip():
+                    system_contents.append(content.strip())
+                elif content is not None:
+                    system_contents.append(str(content))
+                continue
            if clean.get("role") == "assistant" and "content" not in clean:
                clean["content"] = None
            if isinstance(clean.get("tool_calls"), list):
                clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
            sanitized.append(clean)
+        if system_contents:
+            sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
        return sanitized

    @staticmethod
--- a/app-instance/backend/beaver/engine/session/models.py
+++ b/app-instance/backend/beaver/engine/session/models.py
@ -84,8 +84,10 @@ class MessageRecord:
                payload["task_id"] = self.event_payload.get("task_id")
            if self.event_payload.get("task_status"):
                payload["task_status"] = self.event_payload.get("task_status")
-            if self.event_payload.get("validation_status"):
-                payload["validation_status"] = self.event_payload.get("validation_status")
+            if self.event_payload.get("evidence_status"):
+                payload["evidence_status"] = self.event_payload.get("evidence_status")
+            if self.event_payload.get("acceptance_state"):
+                payload["acceptance_state"] = self.event_payload.get("acceptance_state")
            if self.event_payload.get("feedback_state"):
                payload["feedback_state"] = self.event_payload.get("feedback_state")
            if self.event_payload.get("feedback_error"):
--- a/app-instance/backend/beaver/foundation/config/loader.py
+++ b/app-instance/backend/beaver/foundation/config/loader.py
@ -86,6 +86,18 @@ def _parse_agent_defaults(data: dict[str, Any]) -> AgentDefaultsConfig:
        model=_string(defaults.get("model") or data.get("model")),
        provider=_string(defaults.get("provider") or data.get("provider")),
        embedding_model=_string(defaults.get("embeddingModel") or defaults.get("embedding_model") or data.get("embeddingModel")),
+        max_context_messages=_int(
+            defaults.get("maxContextMessages")
+            or defaults.get("max_context_messages")
+            or data.get("maxContextMessages")
+            or data.get("max_context_messages")
+        ),
+        max_tool_iterations=_int(
+            defaults.get("maxToolIterations")
+            or defaults.get("max_tool_iterations")
+            or data.get("maxToolIterations")
+            or data.get("max_tool_iterations")
+        ),
    )


@ -217,6 +229,13 @@ def _float(value: Any) -> float | None:
    return float(value)


+def _int(value: Any) -> int | None:
+    parsed = _float(value)
+    if parsed is None:
+        return None
+    return int(parsed)
+
+
 def _bool(value: Any, *, default: bool) -> bool:
    if isinstance(value, bool):
        return value
--- a/app-instance/backend/beaver/foundation/config/schema.py
+++ b/app-instance/backend/beaver/foundation/config/schema.py
@ -25,6 +25,8 @@ class AgentDefaultsConfig:
    model: str | None = None
    provider: str | None = None
    embedding_model: str | None = None
+    max_context_messages: int | None = None
+    max_tool_iterations: int | None = None


@dataclass(slots=True)
--- a/app-instance/backend/beaver/interfaces/web/app.py
+++ b/app-instance/backend/beaver/interfaces/web/app.py
@ -44,6 +44,8 @@ from .files import (
    workspace_file_path,
 )
 from .schemas import (
+    WebChatAcceptanceRequest,
+    WebChatAcceptanceResponse,
    WebChatFeedbackRequest,
    WebChatFeedbackResponse,
    WebChatRequest,
@ -155,6 +157,13 @@ except ModuleNotFoundError:  # pragma: no cover - fallback for skeleton-only env
            return decorator


+RAW_TOOL_CALL_DISPLAY_FALLBACK = (
+    "The run reached the configured tool-call limit before producing a reliable final answer. "
+    "The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
+    "Please request a revision to continue the task."
+)
+
+
@asynccontextmanager
 async def _app_lifespan(
    app: FastAPI,
@ -365,6 +374,7 @@ def create_app(
            "workspace_exists": loaded.workspace.exists(),
            "model": config.default_model or agent_service.profile.default_model,
            "max_tokens": agent_service.profile.max_tokens,
+            "max_context_messages": agent_service.profile.max_context_messages,
            "temperature": agent_service.profile.temperature,
            "max_tool_iterations": agent_service.profile.max_tool_iterations,
            "providers": providers_status,
@ -1719,7 +1729,8 @@ def create_app(
                usage=result.usage,
                task_id=result.task_id,
                task_status=result.task_status,
-                validation_result=result.validation_result,
+                evidence_status="recorded" if result.task_id else None,
+                validation_result=None,
            )

        fallback_target = _model_dump(payload.fallback_target)
@ -1769,7 +1780,8 @@ def create_app(
            usage=result.usage,
            task_id=result.task_id,
            task_status=result.task_status,
-            validation_result=result.validation_result,
+            evidence_status="recorded" if result.task_id else None,
+            validation_result=None,
        )

    @app.websocket("/ws/{session_id:path}")
@ -1882,6 +1894,30 @@ def create_app(
                }
            )

+    @app.post(
+        "/api/chat/acceptance",
+        response_model=WebChatAcceptanceResponse,
+        responses={
+            400: {"model": WebErrorResponse},
+            404: {"model": WebErrorResponse},
+        },
+    )
+    async def chat_acceptance(request: Request, payload: WebChatAcceptanceRequest) -> WebChatAcceptanceResponse:
+        agent_service = get_agent_service(request)
+        try:
+            result = await agent_service.submit_acceptance(
+                session_id=payload.session_id,
+                run_id=payload.run_id,
+                acceptance_type=payload.acceptance_type,
+                comment=payload.comment,
+            )
+        except ValueError as exc:
+            detail = str(exc)
+            status_code = 404 if "No internal task" in detail else 400
+            raise HTTPException(status_code=status_code, detail=detail) from exc
+
+        return WebChatAcceptanceResponse(**result)
+
    @app.post(
        "/api/chat/feedback",
        response_model=WebChatFeedbackResponse,
@ -1893,10 +1929,10 @@ def create_app(
    async def chat_feedback(request: Request, payload: WebChatFeedbackRequest) -> WebChatFeedbackResponse:
        agent_service = get_agent_service(request)
        try:
-            result = await agent_service.submit_feedback(
+            result = await agent_service.submit_acceptance(
                session_id=payload.session_id,
                run_id=payload.run_id,
-                feedback_type=payload.feedback_type,
+                acceptance_type=payload.feedback_type,
                comment=payload.comment,
            )
        except ValueError as exc:
@ -1915,15 +1951,21 @@ def _session_detail(session_manager: Any, session_id: str, session: dict[str, An
        role = event.get("role")
        if role not in {"user", "assistant"}:
            continue
+        content = event.get("content") or ""
+        comparable_content = str(content).replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "")
+        if role == "assistant" and not comparable_content.strip():
+            continue
+        content = _sanitize_user_visible_assistant_content(role=role, content=content)
        messages.append(
            {
                "role": role,
-                "content": event.get("content") or "",
+                "content": content,
                "timestamp": _iso_from_timestamp(event.get("timestamp")),
                "run_id": event.get("run_id"),
                "task_id": event.get("task_id"),
                "task_status": event.get("task_status"),
-                "validation_status": event.get("validation_status"),
+                "evidence_status": event.get("evidence_status"),
+                "acceptance_state": event.get("acceptance_state"),
                "feedback_state": event.get("feedback_state"),
                "feedback_error": event.get("feedback_error"),
                "message_type": event.get("message_type"),
@ -2142,6 +2184,7 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
            content = (record.content or "").strip()
            if not content:
                continue
+            content = _sanitize_user_visible_assistant_content(role=record.role, content=content)
            messages.append(
                {
                    "role": record.role,
@ -2150,7 +2193,6 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
                    "tool_name": record.tool_name,
                }
            )
-        validation = run_record.validation_result if run_record is not None else None
        views.append(
            {
                "run_id": run_id,
@ -2163,7 +2205,8 @@ def _task_run_views(task: Any, events: list[Any], session_manager: Any, run_memo
                "attempt_index": run_record.attempt_index if run_record is not None else None,
                "task_text": run_record.task_text if run_record is not None else "",
                "messages": messages,
-                "validation_result": validation,
+                "evidence_status": "recorded",
+                "validation_result": None,
            }
        )
    return views
@ -2428,12 +2471,6 @@ def _model_dump(value: Any) -> dict[str, Any] | None:
    return dict(value)


-def _validation_status(validation_result: dict[str, Any] | None) -> str:
-    if validation_result is None:
-        return "unknown"
-    return "passed" if validation_result.get("accepted") is True else "failed"
-
-
 def _websocket_input_metadata(payload: dict[str, Any]) -> dict[str, Any]:
    metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
    result: dict[str, Any] = dict(metadata)
@ -2467,13 +2504,15 @@ def _int_or_none(value: Any) -> int | None:


 def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) -> dict[str, Any]:
-    validation_result = getattr(result, "validation_result", None)
    task_id = getattr(result, "task_id", None)
    task_status = getattr(result, "task_status", None)
    return {
        "type": "message",
        "role": "assistant",
-        "content": getattr(result, "output_text", "") or "",
+        "content": _sanitize_user_visible_assistant_content(
+            role="assistant",
+            content=getattr(result, "output_text", "") or "",
+        ),
        "session_id": getattr(result, "session_id", None),
        "run_id": getattr(result, "run_id", None),
        "finish_reason": getattr(result, "finish_reason", None),
@ -2483,17 +2522,39 @@ def _websocket_message_payload(result: Any, *, input_payload: dict[str, Any]) ->
        "usage": dict(getattr(result, "usage", {}) or {}),
        "task_id": task_id,
        "task_status": task_status,
-        "validation_result": validation_result,
-        "validation_status": _validation_status(validation_result),
+        "evidence_status": "recorded" if task_id else None,
+        "validation_result": None,
        "metadata": {
            "task_id": task_id,
            "task_status": task_status,
-            "validation_result": validation_result,
+            "evidence_status": "recorded" if task_id else None,
            "input_metadata": _websocket_input_metadata(input_payload),
        },
    }


+def _sanitize_user_visible_assistant_content(*, role: str, content: str) -> str:
+    if role != "assistant":
+        return content
+    if _looks_like_raw_tool_call(content):
+        return RAW_TOOL_CALL_DISPLAY_FALLBACK
+    return content
+
+
+def _looks_like_raw_tool_call(content: str | None) -> bool:
+    if not content:
+        return False
+    stripped = content.strip()
+    lowered = stripped.lower()
+    return (
+        lowered.startswith("<tool_call")
+        and lowered.endswith("</tool_call>")
+    ) or (
+        lowered.startswith("<function=")
+        and lowered.endswith("</function>")
+    )
+
+
 def _provider_enabled(provider_name: str, provider_cfg: Any) -> bool:
    if provider_cfg is None or provider_name == "custom":
        return False
@ -2980,6 +3041,7 @@ def _write_config_json(path: Path, data: dict[str, Any]) -> None:
 def _reload_agent_config(agent_service: AgentService, config_path: Path) -> None:
    config = load_config(config_path=config_path)
    agent_service.loader.config = config
+    agent_service._apply_configured_profile_defaults()  # noqa: SLF001
    loop = getattr(agent_service, "_loop", None)
    loaded = getattr(loop, "loaded", None) if loop is not None else None
    if loaded is not None:
--- a/app-instance/backend/beaver/interfaces/web/schemas/init.py
+++ b/app-instance/backend/beaver/interfaces/web/schemas/init.py
@ -1,6 +1,8 @@
 """Web request and response schemas."""

 from .chat import (
+    WebChatAcceptanceRequest,
+    WebChatAcceptanceResponse,
    WebChatFeedbackRequest,
    WebChatFeedbackResponse,
    WebChatRequest,
@ -13,6 +15,8 @@ from .chat import (
 )

 __all__ = [
+    "WebChatAcceptanceRequest",
+    "WebChatAcceptanceResponse",
    "WebChatFeedbackRequest",
    "WebChatFeedbackResponse",
    "WebChatRequest",
--- a/app-instance/backend/beaver/interfaces/web/schemas/chat.py
+++ b/app-instance/backend/beaver/interfaces/web/schemas/chat.py
@ -82,11 +82,34 @@ class WebChatResponse(BaseModel):
    usage: dict[str, Any] = Field(default_factory=dict)
    task_id: str | None = None
    task_status: str | None = None
+    evidence_status: str | None = None
+    acceptance_state: str | None = None
    validation_result: dict[str, Any] | None = None


+class WebChatAcceptanceRequest(BaseModel):
+    """User acceptance on the latest assistant result in chat."""
+
+    session_id: str
+    run_id: str
+    acceptance_type: str
+    comment: str | None = None
+
+
+class WebChatAcceptanceResponse(BaseModel):
+    """Acceptance recording result."""
+
+    session_id: str
+    run_id: str
+    task_id: str
+    task_status: str
+    acceptance_type: str
+    feedback_type: str
+    learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
+
+
 class WebChatFeedbackRequest(BaseModel):
-    """Feedback on the latest assistant result in chat."""
+    """Backward-compatible feedback payload."""

    session_id: str
    run_id: str
@ -94,15 +117,8 @@ class WebChatFeedbackRequest(BaseModel):
    comment: str | None = None


-class WebChatFeedbackResponse(BaseModel):
-    """Feedback recording result."""
-
-    session_id: str
-    run_id: str
-    task_id: str
-    task_status: str
-    feedback_type: str
-    learning_candidates: list[dict[str, Any]] = Field(default_factory=list)
+class WebChatFeedbackResponse(WebChatAcceptanceResponse):
+    """Backward-compatible feedback response."""


 class WebProviderConfigRequest(BaseModel):
--- a/app-instance/backend/beaver/services/agent_service.py
+++ b/app-instance/backend/beaver/services/agent_service.py
@ -29,9 +29,9 @@ from beaver.tasks import (
    TaskEvidencePacket,
    TaskExecutionPlan,
    TaskRecord,
-    ValidationResult,
    render_task_evidence,
 )
+from beaver.tasks.service import normalize_acceptance_type


 NOTIFICATION_SESSION_ID = "notify:default:scheduled"
@ -60,11 +60,19 @@ class AgentService:
    ) -> None:
        self.profile = profile or AgentProfile()
        self.loader = loader or EngineLoader(workspace=workspace, config_path=config_path)
+        self._apply_configured_profile_defaults()
        self._loop: AgentLoop | None = None
        self._run_task: asyncio.Task[None] | None = None
        self._main_agent_router = MainAgentRouter()
        self._runtime_services: dict[str, Any] = {}

+    def _apply_configured_profile_defaults(self) -> None:
+        defaults = self.loader.config.agents_defaults
+        if defaults.max_context_messages is not None:
+            self.profile.max_context_messages = max(1, defaults.max_context_messages)
+        if defaults.max_tool_iterations is not None:
+            self.profile.max_tool_iterations = max(0, defaults.max_tool_iterations)
+
    def create_loop(self) -> AgentLoop:
        """创建并缓存当前 service 使用的 AgentLoop。"""

@ -232,7 +240,7 @@ class AgentService:

        Scheduled jobs are product-level Tasks, not hidden one-off agent turns.
        This entry bypasses the main-agent classifier and forces Task mode so
-        every trigger produces a TaskRecord, validation, feedback state, and a
+        every trigger produces a TaskRecord, evidence, acceptance state, and a
        run_id that the scheduled-task history can link to.
        """

@ -280,9 +288,9 @@ class AgentService:
            result.run_id,
            {
                "message_type": "scheduled_reply",
-                "scheduled_job_id": job.id,
-                "scheduled_run_id": run.scheduled_run_id,
-                "cron_job_name": job.name,
+                "scheduled_job_id": cron_job_id,
+                "scheduled_run_id": scheduled_run_id,
+                "cron_job_name": cron_job_name,
                "mode": "notification",
            },
        )
@ -403,15 +411,15 @@ class AgentService:
            },
        )

-    async def submit_feedback(
+    async def submit_acceptance(
        self,
        *,
        session_id: str,
        run_id: str,
-        feedback_type: str,
+        acceptance_type: str,
        comment: str | None = None,
    ) -> dict[str, Any]:
-        """Record chat feedback for the internal task linked to a run."""
+        """Record user acceptance for the internal task linked to a run."""

        loaded = self.create_loop().boot()
        task_service = self._require_loaded(loaded, "task_service")
@ -419,32 +427,31 @@ class AgentService:
        if task is None or task.session_id != session_id:
            raise ValueError(f"No internal task found for run_id={run_id!r}")

-        normalized = feedback_type.strip().lower()
-        if normalized not in {"satisfied", "revise", "abandon"}:
-            raise ValueError("feedback_type must be one of: satisfied, revise, abandon")
+        normalized = normalize_acceptance_type(acceptance_type)
+        legacy_feedback_type = "satisfied" if normalized == "accept" else normalized

        already_recorded = any(
-            item.get("run_id") == run_id and item.get("feedback_type") == normalized
+            item.get("run_id") == run_id and item.get("acceptance_type") == normalized
            for item in task.feedback
        )
-        conflicting_feedback = next(
+        conflicting_acceptance = next(
            (
                item
                for item in task.feedback
-                if item.get("run_id") == run_id and item.get("feedback_type") != normalized
+                if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
            ),
            None,
        )
-        if conflicting_feedback is not None:
+        if conflicting_acceptance is not None:
            raise ValueError(
-                f"Feedback for run_id={run_id!r} was already recorded as "
-                f"{conflicting_feedback.get('feedback_type')!r}"
+                f"Acceptance for run_id={run_id!r} was already recorded as "
+                f"{conflicting_acceptance.get('acceptance_type')!r}"
            )
        if task.status in {"closed", "abandoned"} and not already_recorded:
            raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
-        updated = task if already_recorded else task_service.add_feedback(
+        updated = task if already_recorded else task_service.add_acceptance(
            task.task_id,
-            feedback_type=normalized,
+            acceptance_type=normalized,
            comment=comment,
            run_id=run_id,
        )
@ -455,7 +462,8 @@ class AgentService:
            {
                "task_id": updated.task_id,
                "task_status": updated.status,
-                "feedback_state": normalized,
+                "acceptance_state": normalized,
+                "feedback_state": legacy_feedback_type,
            },
        )
        if not already_recorded:
@ -463,10 +471,11 @@ class AgentService:
                session_id,
                run_id=run_id,
                role="system",
-                event_type="task_feedback_recorded",
+                event_type="task_acceptance_recorded",
                event_payload={
                    "task_id": task.task_id,
-                    "feedback_type": normalized,
+                    "acceptance_type": normalized,
+                    "feedback_type": legacy_feedback_type,
                    "comment": comment,
                    "task_status": updated.status,
                },
@ -475,35 +484,36 @@ class AgentService:
            )

        generated_candidates = []
-        validation = ValidationResult.from_dict(updated.validation_result)
        if not already_recorded:
            run_memory_store = self._require_loaded(loaded, "run_memory_store")
-            feedback_payload = {
-                "feedback_type": normalized,
+            acceptance_payload = {
+                "acceptance_type": normalized,
+                "feedback_type": legacy_feedback_type,
                "comment": comment or "",
                "task_status": updated.status,
+                "final_accepted_run_id": updated.metadata.get("final_accepted_run_id"),
            }
            run_memory_store.update_run_record(
                run_id,
-                success=normalized == "satisfied",
-                feedback=feedback_payload,
+                success=normalized == "accept",
+                feedback=acceptance_payload,
            )
            run_memory_store.update_skill_effects_for_run(
                run_id,
-                success=normalized == "satisfied",
-                feedback_score=self._feedback_score_for_learning(normalized, validation),
+                success=normalized == "accept",
+                feedback_score=self._acceptance_score_for_learning(normalized),
                notes=(comment or normalized).strip(),
            )
            skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
            skill_learning_service.rescore_skill_versions()
        if already_recorded:
            generated_candidates = []
-        elif normalized == "satisfied" and validation is not None and validation.accepted:
+        elif normalized == "accept":
            generated_candidates = [
                item.to_dict()
                for item in skill_learning_service.build_learning_candidates_for_task(
                    updated.task_id,
-                    trigger_run_id=run_id,
+                    final_accepted_run_id=run_id,
                )
            ]
        elif normalized == "abandon":
@ -514,7 +524,8 @@ class AgentService:
                event_type="task_failure_evidence_recorded",
                event_payload={
                    "task_id": updated.task_id,
-                    "feedback_type": normalized,
+                    "acceptance_type": normalized,
+                    "feedback_type": legacy_feedback_type,
                    "comment": comment or "",
                    "task_status": updated.status,
                    "durable_memory_written": False,
@ -528,10 +539,28 @@ class AgentService:
            "run_id": run_id,
            "task_id": updated.task_id,
            "task_status": updated.status,
-            "feedback_type": normalized,
+            "acceptance_type": normalized,
+            "feedback_type": legacy_feedback_type,
            "learning_candidates": generated_candidates,
        }

+    async def submit_feedback(
+        self,
+        *,
+        session_id: str,
+        run_id: str,
+        feedback_type: str,
+        comment: str | None = None,
+    ) -> dict[str, Any]:
+        """Backward-compatible wrapper for older clients."""
+
+        return await self.submit_acceptance(
+            session_id=session_id,
+            run_id=run_id,
+            acceptance_type=feedback_type,
+            comment=comment,
+        )
+
    async def _process_with_main_agent(
        self,
        message: str,
@ -591,7 +620,7 @@ class AgentService:
            else active_task
        )
        if active_task is not None and decision.action == "revise_task" and task.task_id == active_task.task_id:
-            task = self._record_revision_feedback_for_task(
+            task = self._record_revision_acceptance_for_task(
                loaded,
                task=task,
                session_id=session_id,
@ -599,7 +628,7 @@ class AgentService:
            )
        return await self._run_task_mode(message, runner=runner, kwargs=kwargs, task=task)

-    def _record_revision_feedback_for_task(
+    def _record_revision_acceptance_for_task(
        self,
        loaded: Any,
        *,
@ -607,9 +636,9 @@ class AgentService:
        session_id: str,
        comment: str,
    ) -> TaskRecord:
-        """Mark the latest feedback-eligible run as revised before continuing a task."""
+        """Mark the latest acceptance-eligible run as revised before continuing a task."""

-        if task.status not in {"awaiting_feedback", "needs_revision"}:
+        if task.status not in {"awaiting_acceptance", "needs_revision"}:
            return task
        run_id = next((item for item in reversed(task.run_ids) if item), None)
        if not run_id:
@ -617,15 +646,15 @@ class AgentService:

        existing = next((item for item in task.feedback if item.get("run_id") == run_id), None)
        if existing is not None:
-            if existing.get("feedback_type") != "revise":
+            if existing.get("acceptance_type") != "revise":
                return task
            updated = task
            already_recorded = True
        else:
            task_service = self._require_loaded(loaded, "task_service")
-            updated = task_service.add_feedback(
+            updated = task_service.add_acceptance(
                task.task_id,
-                feedback_type="revise",
+                acceptance_type="revise",
                comment=comment,
                run_id=run_id,
            )
@ -638,6 +667,7 @@ class AgentService:
            {
                "task_id": updated.task_id,
                "task_status": updated.status,
+                "acceptance_state": "revise",
                "feedback_state": "revise",
            },
        )
@ -648,9 +678,10 @@ class AgentService:
            session_id,
            run_id=run_id,
            role="system",
-            event_type="task_feedback_recorded",
+            event_type="task_acceptance_recorded",
            event_payload={
                "task_id": updated.task_id,
+                "acceptance_type": "revise",
                "feedback_type": "revise",
                "comment": comment,
                "task_status": updated.status,
@ -659,12 +690,12 @@ class AgentService:
            content=comment,
            context_visible=False,
        )
-        validation = ValidationResult.from_dict(updated.validation_result)
        run_memory_store = self._require_loaded(loaded, "run_memory_store")
        run_memory_store.update_run_record(
            run_id,
            success=False,
            feedback={
+                "acceptance_type": "revise",
                "feedback_type": "revise",
                "comment": comment,
                "task_status": updated.status,
@ -673,7 +704,7 @@ class AgentService:
        run_memory_store.update_skill_effects_for_run(
            run_id,
            success=False,
-            feedback_score=self._feedback_score_for_learning("revise", validation),
+            feedback_score=self._acceptance_score_for_learning("revise"),
            notes=comment.strip() or "revise",
        )
        skill_learning_service = self._require_loaded(loaded, "skill_learning_service")
@ -690,236 +721,185 @@ class AgentService:
    ) -> AgentRunResult:
        loaded = self.create_loop().boot()
        task_service = self._require_loaded(loaded, "task_service")
-        validation_service = self._require_loaded(loaded, "validation_service")
        task_execution_planner = self._require_loaded(loaded, "task_execution_planner")
        session_manager = self._require_loaded(loaded, "session_manager")
-        run_memory_store = self._require_loaded(loaded, "run_memory_store")

-        last_result: AgentRunResult | None = None
-        latest_validation: ValidationResult | None = None
        base_execution_context = kwargs.get("execution_context")
        provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs)
        kwargs = dict(kwargs)
        team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None)
        kwargs["provider_bundle"] = provider_bundle

-        for attempt_index in (1, 2):
-            task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
-            plan = await task_execution_planner.plan(
+        attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1
+        task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index)
+        plan = await task_execution_planner.plan(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            provider_bundle=provider_bundle,
+        )
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_execution_planned",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                **plan.to_event_payload(),
+            },
+        )
+        team_summaries: list[str] = []
+        team_execution_context = ""
+        team_result: TeamRunResult | None = None
+        if plan.is_team:
+            team_result, team_error = await self._run_team_for_task(
+                plan,
                task=task,
-                user_message=message,
-                attempt_index=attempt_index,
-                latest_validation=latest_validation,
-                provider_bundle=provider_bundle,
+                parent_session_id=kwargs["session_id"],
+                provider_bundle_factory=team_provider_bundle_factory
+                or self._build_team_provider_bundle_factory(loaded, kwargs),
            )
-            self._append_task_observation(
-                session_manager,
-                task.session_id,
-                event_type="task_execution_planned",
-                payload={
-                    "task_id": task.task_id,
-                    "attempt_index": attempt_index,
-                    **plan.to_event_payload(),
-                },
-            )
-            team_summaries: list[str] = []
-            team_execution_context = ""
-            team_result: TeamRunResult | None = None
-            if plan.is_team:
-                team_result, team_error = await self._run_team_for_task(
-                    plan,
-                    task=task,
-                    parent_session_id=kwargs["session_id"],
-                    provider_bundle_factory=team_provider_bundle_factory
-                    or self._build_team_provider_bundle_factory(loaded, kwargs),
+            if team_result is not None:
+                team_summaries = [self._team_summary_for_validation(team_result)]
+                team_packet = TaskEvidencePacket(
+                    task_id=task.task_id,
+                    attempt_index=attempt_index,
+                    main_run=None,
+                    team_runs=self._team_run_evidence(team_result),
+                    team_node_results=list(team_result.node_results),
+                    final_output="",
+                )
+                team_execution_context = self._join_context(
+                    self._team_execution_context(plan, team_result),
+                    "Rendered team evidence:\n" + render_task_evidence(team_packet),
+                )
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": team_result.run_ids,
+                        "team_success": team_result.success,
+                        "node_results": self._team_node_results_for_event(plan, team_result),
+                        "reason": plan.reason,
+                        "error": None if team_result.success else "one or more team nodes failed",
+                    },
+                )
+            else:
+                team_summaries = [f"Team execution failed: {team_error}"]
+                team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
+                self._append_task_observation(
+                    session_manager,
+                    task.session_id,
+                    event_type="task_team_run_failed",
+                    payload={
+                        "task_id": task.task_id,
+                        "attempt_index": attempt_index,
+                        "plan_mode": plan.mode,
+                        "strategy": plan.graph.strategy if plan.graph else None,
+                        "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
+                        "team_run_ids": [],
+                        "team_success": False,
+                        "reason": plan.reason,
+                        "error": team_error,
+                    },
                )
-                if team_result is not None:
-                    team_summaries = [self._team_summary_for_validation(team_result)]
-                    team_packet = TaskEvidencePacket(
-                        task_id=task.task_id,
-                        attempt_index=attempt_index,
-                        main_run=None,
-                        team_runs=self._team_run_evidence(team_result),
-                        team_node_results=list(team_result.node_results),
-                        final_output="",
-                    )
-                    team_execution_context = self._join_context(
-                        self._team_execution_context(plan, team_result),
-                        "Rendered team evidence:\n" + render_task_evidence(team_packet),
-                    )
-                    self._append_task_observation(
-                        session_manager,
-                        task.session_id,
-                        event_type="task_team_run_completed" if team_result.success else "task_team_run_failed",
-                        payload={
-                            "task_id": task.task_id,
-                            "attempt_index": attempt_index,
-                            "plan_mode": plan.mode,
-                            "strategy": plan.graph.strategy if plan.graph else None,
-                            "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
-                            "team_run_ids": team_result.run_ids,
-                            "team_success": team_result.success,
-                            "node_results": self._team_node_results_for_event(plan, team_result),
-                            "reason": plan.reason,
-                            "error": None if team_result.success else "one or more team nodes failed",
-                        },
-                    )
-                else:
-                    team_summaries = [f"Team execution failed: {team_error}"]
-                    team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error")
-                    self._append_task_observation(
-                        session_manager,
-                        task.session_id,
-                        event_type="task_team_run_failed",
-                        payload={
-                            "task_id": task.task_id,
-                            "attempt_index": attempt_index,
-                            "plan_mode": plan.mode,
-                            "strategy": plan.graph.strategy if plan.graph else None,
-                            "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
-                            "team_run_ids": [],
-                            "team_success": False,
-                            "reason": plan.reason,
-                            "error": team_error,
-                        },
-                    )

-            attempt_kwargs = dict(kwargs)
-            attempt_kwargs.update(
-                {
-                    "task_id": task.task_id,
-                    "task_mode": True,
-                    "attempt_index": attempt_index,
-                    "allow_candidate_generation": False,
-                }
-            )
-            if attempt_index == 2 and latest_validation is not None:
-                revision_context = latest_validation.recommended_revision_prompt.strip()
-                if revision_context:
-                    attempt_kwargs["execution_context"] = self._join_context(
-                        base_execution_context,
-                        f"Task validation revision request:\n{revision_context}",
-                        team_execution_context,
-                    )
-            elif team_execution_context:
-                attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
-            if plan.is_team and team_execution_context:
-                attempt_kwargs["include_tools"] = False
-                attempt_kwargs["max_tool_iterations"] = 0
-            attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
-                task=task,
-                user_message=message,
-                attempt_index=attempt_index,
-                latest_validation=latest_validation,
-                plan=plan,
-                team_summaries=team_summaries,
-            )
-
-            result = await runner(message, **attempt_kwargs)
-            last_result = result
-            self._append_task_observation(
-                session_manager,
-                task.session_id,
-                event_type="task_synthesis_completed",
-                payload={
-                    "task_id": task.task_id,
-                    "attempt_index": attempt_index,
-                    "main_run_id": result.run_id,
-                    "plan_mode": plan.mode,
-                    "strategy": plan.graph.strategy if plan.graph else None,
-                },
-            )
-            task = task_service.append_run(
-                task.task_id,
-                result.run_id,
-                skill_names=self._skill_names_for_run(loaded, result.run_id),
-            )
-            evidence_packet = self._build_task_evidence_packet(
-                session_manager=session_manager,
-                task=task,
-                attempt_index=attempt_index,
-                result=result,
-                team_result=team_result,
-            )
-            evidence_text = render_task_evidence(evidence_packet)
-            validation = await validation_service.validate_task_result(
-                task=task,
-                user_message=message,
-                final_output=result.output_text,
-                evidence_packet=evidence_packet,
-                evidence_text=evidence_text,
-                transcript_excerpt=self._run_excerpt(session_manager, result.session_id, result.run_id),
-                tool_summaries=self._tool_summaries(session_manager, result.session_id, result.run_id),
-                team_summaries=team_summaries,
-                provider_bundle=provider_bundle,
-            )
-            latest_validation = validation
-            has_usable_answer = bool(result.output_text.strip()) and (
-                "Tool loop stopped after reaching the configured iteration limit." not in result.output_text
-            )
-            task = task_service.record_validation(
-                task.task_id,
-                result.run_id,
-                validation,
-                final_attempt=(
-                    attempt_index == 2
-                    or validation.status in {"accepted", "insufficient_evidence", "validator_error"}
-                ),
-                has_usable_answer=has_usable_answer,
-            )
-            run_memory_store.update_run_record(result.run_id, validation_result=validation.to_dict())
-            session_manager.update_latest_assistant_event_payload(
-                result.session_id,
-                result.run_id,
-                {
-                    "task_id": task.task_id,
-                    "task_status": task.status,
-                    "validation_status": "passed" if validation.accepted else "failed",
-                },
-            )
-            validation_debug = {
-                "evidence_run_ids": [
-                    item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
-                ],
-                "evidence_session_ids": [
-                    item.session_id
-                    for item in [evidence_packet.main_run, *evidence_packet.team_runs]
-                    if item is not None
-                ],
-                "tool_result_count": sum(
-                    len(item.tool_results)
-                    for item in [evidence_packet.main_run, *evidence_packet.team_runs]
-                    if item is not None
-                ),
-                "evidence_length": len(evidence_text),
+        attempt_kwargs = dict(kwargs)
+        attempt_kwargs.update(
+            {
+                "task_id": task.task_id,
+                "task_mode": True,
+                "attempt_index": attempt_index,
+                "allow_candidate_generation": False,
            }
-            retry_scheduled = validation.status == "rejected" and attempt_index == 1
-            session_manager.append_message(
-                result.session_id,
-                run_id=result.run_id,
-                role="system",
-                event_type="task_validation_snapshotted",
-                event_payload={
-                    "task_id": task.task_id,
-                    "attempt_index": attempt_index,
-                    "validation_result": validation.to_dict(),
-                    "validation_debug": validation_debug,
-                    "retry_scheduled": retry_scheduled,
-                },
-                content=validation.recommended_revision_prompt or None,
-                context_visible=False,
-            )
-            if retry_scheduled:
-                session_manager.set_run_context_visible(result.session_id, result.run_id, False)
-            result.task_id = task.task_id
-            result.task_status = task.status
-            result.validation_result = validation.to_dict()
-            if not retry_scheduled:
-                return result
+        )
+        if team_execution_context:
+            attempt_kwargs["execution_context"] = self._join_context(base_execution_context, team_execution_context)
+        if plan.is_team and team_execution_context:
+            attempt_kwargs["include_tools"] = False
+            attempt_kwargs["max_tool_iterations"] = 0
+        attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context(
+            task=task,
+            user_message=message,
+            attempt_index=attempt_index,
+            plan=plan,
+            team_summaries=team_summaries,
+        )

-        if last_result is None:  # pragma: no cover - defensive
-            raise RuntimeError("Task mode did not produce a run result")
-        return last_result
+        result = await runner(message, **attempt_kwargs)
+        self._append_task_observation(
+            session_manager,
+            task.session_id,
+            event_type="task_synthesis_completed",
+            payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "main_run_id": result.run_id,
+                "plan_mode": plan.mode,
+                "strategy": plan.graph.strategy if plan.graph else None,
+            },
+        )
+        task = task_service.append_run(
+            task.task_id,
+            result.run_id,
+            skill_names=self._skill_names_for_run(loaded, result.run_id),
+        )
+        evidence_packet = self._build_task_evidence_packet(
+            session_manager=session_manager,
+            task=task,
+            attempt_index=attempt_index,
+            result=result,
+            team_result=team_result,
+        )
+        evidence_text = render_task_evidence(evidence_packet)
+        evidence_debug = {
+            "evidence_run_ids": [
+                item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None
+            ],
+            "evidence_session_ids": [
+                item.session_id
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ],
+            "tool_result_count": sum(
+                len(item.tool_results)
+                for item in [evidence_packet.main_run, *evidence_packet.team_runs]
+                if item is not None
+            ),
+            "evidence_length": len(evidence_text),
+        }
+        session_manager.update_latest_assistant_event_payload(
+            result.session_id,
+            result.run_id,
+            {
+                "task_id": task.task_id,
+                "task_status": task.status,
+                "evidence_status": "recorded",
+            },
+        )
+        session_manager.append_message(
+            result.session_id,
+            run_id=result.run_id,
+            role="system",
+            event_type="task_evidence_recorded",
+            event_payload={
+                "task_id": task.task_id,
+                "attempt_index": attempt_index,
+                "evidence_debug": evidence_debug,
+            },
+            content=None,
+            context_visible=False,
+        )
+        result.task_id = task.task_id
+        result.task_status = task.status
+        result.validation_result = None
+        return result

    async def _run_team_for_task(
        self,
@ -986,12 +966,10 @@ class AgentService:
        return []

    @staticmethod
-    def _feedback_score_for_learning(feedback_type: str, validation: ValidationResult | None) -> float:
-        if feedback_type == "satisfied":
-            if validation is not None:
-                return max(0.0, min(1.0, float(validation.score)))
+    def _acceptance_score_for_learning(acceptance_type: str) -> float:
+        if acceptance_type == "accept":
            return 1.0
-        if feedback_type == "revise":
+        if acceptance_type == "revise":
            return 0.5
        return 0.0

@ -1001,12 +979,11 @@ class AgentService:
        task: TaskRecord,
        user_message: str,
        attempt_index: int,
-        latest_validation: ValidationResult | None = None,
        plan: TaskExecutionPlan | None = None,
        team_summaries: list[str] | None = None,
    ) -> str:
        phase = f"attempt_{attempt_index}"
-        if latest_validation is not None:
+        if task.feedback and task.feedback[-1].get("acceptance_type") == "revise":
            phase = f"revision_attempt_{attempt_index}"
        elif plan is not None and plan.is_team:
            phase = f"team_synthesis_attempt_{attempt_index}"
@ -1027,24 +1004,14 @@ class AgentService:
            )
        else:
            sections.append("Previously activated skills:\nNone")
-        if latest_validation is not None:
-            validation_lines = [
-                f"accepted: {latest_validation.accepted}",
-                f"score: {latest_validation.score}",
-            ]
-            if latest_validation.issues:
-                validation_lines.append("issues:\n" + "\n".join(f"- {item}" for item in latest_validation.issues))
-            if latest_validation.missing_requirements:
-                validation_lines.append(
-                    "missing requirements:\n"
-                    + "\n".join(f"- {item}" for item in latest_validation.missing_requirements)
-                )
-            if latest_validation.recommended_revision_prompt:
-                validation_lines.append(
-                    "recommended revision:\n"
-                    + latest_validation.recommended_revision_prompt
-                )
-            sections.append("Validation feedback:\n" + "\n".join(validation_lines))
+        if task.feedback:
+            history_lines = []
+            for item in task.feedback[-5:]:
+                kind = item.get("acceptance_type") or item.get("feedback_type")
+                comment = item.get("comment") or ""
+                run_id = item.get("run_id") or ""
+                history_lines.append(f"- {kind} run={run_id}: {comment}".strip())
+            sections.append("Task acceptance history:\n" + "\n".join(history_lines))
        if plan is not None:
            plan_lines = [
                f"mode: {plan.mode}",
@ -1313,7 +1280,8 @@ class AgentService:
                "inbound_metadata": dict(inbound.metadata),
                "task_id": getattr(result, "task_id", None),
                "task_status": getattr(result, "task_status", None),
-                "validation_result": getattr(result, "validation_result", None),
+                "evidence_status": "recorded" if getattr(result, "task_id", None) else None,
+                "validation_result": None,
            },
        )

--- a/app-instance/backend/beaver/services/process_service.py
+++ b/app-instance/backend/beaver/services/process_service.py
@ -235,26 +235,45 @@ class SessionProcessProjector:
                        metadata=dict(payload),
                    )

-            elif record.event_type == "task_validation_snapshotted":
-                validation = payload.get("validation_result") if isinstance(payload.get("validation_result"), dict) else {}
-                accepted = bool(validation.get("accepted"))
-                root["status"] = "done" if accepted or attempt_index == 2 else "waiting"
-                root["finished_at"] = created_at if root["status"] == "done" else None
+            elif record.event_type == "task_evidence_recorded":
+                root["status"] = "waiting"
+                root["finished_at"] = None
                add_event(
-                    event_id=_event_id(record, "validation"),
+                    event_id=_event_id(record, "evidence"),
                    run_id=record.run_id or root_run_id,
                    parent_run_id=root_run_id if record.run_id else None,
                    kind="run_status",
                    actor_type="system",
-                    actor_id="validator",
-                    actor_name="Validator",
-                    text=(
-                        f"Validation {'passed' if accepted else 'failed'} "
-                        f"(score={validation.get('score')})."
-                        + (" Retry scheduled." if payload.get("retry_scheduled") else "")
-                    ),
+                    actor_id="evidence-recorder",
+                    actor_name="Evidence",
+                    text="Task evidence was recorded; waiting for user acceptance.",
                    created_at=created_at,
-                    status="done" if accepted else "error",
+                    status="done",
+                    metadata=dict(payload),
+                )
+
+            elif record.event_type == "task_acceptance_recorded":
+                acceptance_type = str(payload.get("acceptance_type") or payload.get("feedback_type") or "")
+                if acceptance_type == "accept":
+                    root["status"] = "done"
+                    root["finished_at"] = created_at
+                elif acceptance_type == "abandon":
+                    root["status"] = "cancelled"
+                    root["finished_at"] = created_at
+                else:
+                    root["status"] = "waiting"
+                    root["finished_at"] = None
+                add_event(
+                    event_id=_event_id(record, "acceptance"),
+                    run_id=record.run_id or root_run_id,
+                    parent_run_id=root_run_id if record.run_id else None,
+                    kind="run_status",
+                    actor_type="user",
+                    actor_id="user-acceptance",
+                    actor_name="User Acceptance",
+                    text=f"User acceptance recorded: {acceptance_type or 'unknown'}.",
+                    created_at=created_at,
+                    status="done",
                    metadata=dict(payload),
                )

--- a/app-instance/backend/beaver/skills/learning/service.py
+++ b/app-instance/backend/beaver/skills/learning/service.py
@ -69,15 +69,24 @@ class SkillLearningService:
                existing_ids.add(candidate.candidate_id)
        return candidates

-    def build_learning_candidates_for_task(self, task_id: str, *, trigger_run_id: str) -> list[SkillLearningCandidate]:
-        """Build candidates scoped to a single validated and satisfied Task run."""
+    def build_learning_candidates_for_task(
+        self,
+        task_id: str,
+        *,
+        final_accepted_run_id: str | None = None,
+        trigger_run_id: str | None = None,
+    ) -> list[SkillLearningCandidate]:
+        """Build candidates from a user-accepted Task and all of its runs."""

+        final_accepted_run_id = final_accepted_run_id or trigger_run_id
+        if not final_accepted_run_id:
+            return []
        runs = [record for record in self.run_store.list_runs() if record.task_id == task_id]
-        trigger_run = next((record for record in runs if record.run_id == trigger_run_id), None)
-        if trigger_run is None or not self._is_confirmed_positive_run(trigger_run):
+        final_run = next((record for record in runs if record.run_id == final_accepted_run_id), None)
+        if final_run is None or not self._is_task_accepted_run(final_run):
            return []

-        source_runs = [record for record in runs if self._is_confirmed_positive_run(record)]
+        source_runs = sorted(runs, key=lambda item: (item.started_at, item.run_id))
        if not source_runs:
            return []

@ -100,11 +109,16 @@ class SkillLearningService:
                    source_session_ids=source_session_ids,
                    related_skill_names=[],
                    reason=f"Task {task_id} completed successfully without a published skill; consider extracting reusable guidance.",
-                    evidence={"task_id": task_id, "trigger_run_id": trigger_run_id, "theme": self._task_theme(trigger_run.task_text)},
+                    evidence={
+                        "task_id": task_id,
+                        "final_accepted_run_id": final_accepted_run_id,
+                        "source_run_ids": source_run_ids,
+                        "theme": self._task_theme(final_run.task_text),
+                    },
                    status="open",
                    priority=1,
                    confidence=0.8,
-                    trigger_reason="validation_accepted_and_user_satisfied",
+                    trigger_reason="task_accepted",
                )
            )
        else:
@ -137,13 +151,14 @@ class SkillLearningService:
                        ),
                        evidence={
                            "task_id": task_id,
-                            "trigger_run_id": trigger_run_id,
+                            "final_accepted_run_id": final_accepted_run_id,
+                            "source_run_ids": source_run_ids,
                            "skill_version": receipt.skill_version,
                        },
                        status="open",
                        priority=1,
                        confidence=0.7,
-                        trigger_reason="validation_accepted_and_user_satisfied",
+                        trigger_reason="task_accepted",
                    )
                )

@ -269,7 +284,7 @@ class SkillLearningService:
            groups.setdefault(key, []).append(record)
        candidates: list[SkillLearningCandidate] = []
        for theme, runs in groups.items():
-            successful = [record for record in runs if self._is_confirmed_positive_run(record)]
+            successful = [record for record in runs if self._is_task_accepted_run(record)]
            if len(successful) < 2:
                continue
            if any(record.activated_skills for record in successful):
@ -290,7 +305,7 @@ class SkillLearningService:
    def _build_merge_candidates(self) -> list[SkillLearningCandidate]:
        pair_counts: dict[tuple[str, str], list[RunRecord]] = {}
        for record in self.run_store.list_runs():
-            if not self._is_confirmed_positive_run(record):
+            if not self._is_task_accepted_run(record):
                continue
            unique = sorted({receipt.skill_name for receipt in record.activated_skills})
            for pair in combinations(unique, 2):
@ -351,14 +366,15 @@ class SkillLearningService:
        return effects

    @staticmethod
-    def _is_confirmed_positive_run(record: RunRecord) -> bool:
-        validation = record.validation_result or {}
+    def _is_task_accepted_run(record: RunRecord) -> bool:
        feedback = record.feedback or {}
+        acceptance_type = feedback.get("acceptance_type")
+        if acceptance_type is None and feedback.get("feedback_type") == "satisfied":
+            acceptance_type = "accept"
        return (
            bool(record.success)
            and bool(record.task_id)
-            and validation.get("accepted") is True
-            and feedback.get("feedback_type") == "satisfied"
+            and acceptance_type == "accept"
        )

    @staticmethod
--- a/app-instance/backend/beaver/tasks/init.py
+++ b/app-instance/backend/beaver/tasks/init.py
@ -6,7 +6,6 @@ from .planner import TaskExecutionPlan, TaskExecutionPlanner
 from .router import MainAgentRouter
 from .service import TaskService
 from .skill_resolver import SkillResolutionReport, TaskSkillResolver
-from .validation import ValidationService

 __all__ = [
    "EvidenceBuilder",
@ -24,6 +23,5 @@ __all__ = [
    "ToolEvidence",
    "ValidationResult",
    "ValidationStatus",
-    "ValidationService",
    "render_task_evidence",
 ]
--- a/app-instance/backend/beaver/tasks/models.py
+++ b/app-instance/backend/beaver/tasks/models.py
@ -1,4 +1,4 @@
-"""Models for internal task tracking and validation."""
+"""Models for internal task tracking and user acceptance."""

 from __future__ import annotations

@ -9,7 +9,12 @@ from typing import Any, Literal
 ValidationStatus = Literal["accepted", "rejected", "insufficient_evidence", "validator_error"]

 VALIDATION_STATUSES = {"accepted", "rejected", "insufficient_evidence", "validator_error"}
-TASK_OPEN_STATUSES = {"open", "running", "validating", "awaiting_feedback", "needs_review", "needs_revision"}
+TASK_OPEN_STATUSES = {"open", "running", "awaiting_acceptance", "needs_revision"}
+LEGACY_STATUS_MAP = {
+    "validating": "running",
+    "awaiting_feedback": "awaiting_acceptance",
+    "needs_review": "awaiting_acceptance",
+}


@dataclass(slots=True)
@ -113,11 +118,11 @@ class TaskRecord:

    @property
    def is_execution_active(self) -> bool:
-        return self.status in {"running", "validating"}
+        return self.status == "running"

    @property
    def requires_user_action(self) -> bool:
-        return self.status in {"awaiting_feedback", "needs_review", "needs_revision"}
+        return self.status in {"awaiting_acceptance", "needs_revision"}

    def to_dict(self) -> dict[str, Any]:
        return {
@ -137,6 +142,7 @@ class TaskRecord:
            "satisfaction": self.satisfaction,
            "run_ids": list(self.run_ids),
            "skill_names": list(self.skill_names),
+            "acceptance": list(self.feedback),
            "feedback": list(self.feedback),
            "validation_result": self.validation_result,
            "metadata": dict(self.metadata),
@ -152,7 +158,7 @@ class TaskRecord:
            goal=str(payload.get("goal") or payload.get("description") or ""),
            constraints=[str(item) for item in payload.get("constraints") or []],
            priority=int(payload.get("priority", 0) or 0),
-            status=str(payload.get("status") or "open"),
+            status=LEGACY_STATUS_MAP.get(str(payload.get("status") or "open"), str(payload.get("status") or "open")),
            creator=str(payload.get("creator") or "main-agent"),
            created_at=str(payload.get("created_at") or ""),
            updated_at=str(payload.get("updated_at") or ""),
@ -161,7 +167,11 @@ class TaskRecord:
            satisfaction=_optional_float(payload.get("satisfaction")),
            run_ids=[str(item) for item in payload.get("run_ids") or []],
            skill_names=[str(item) for item in payload.get("skill_names") or []],
-            feedback=[dict(item) for item in payload.get("feedback") or [] if isinstance(item, dict)],
+            feedback=[
+                _normalize_acceptance_entry(dict(item))
+                for item in (payload.get("acceptance") or payload.get("feedback") or [])
+                if isinstance(item, dict)
+            ],
            validation_result=dict(payload["validation_result"]) if isinstance(payload.get("validation_result"), dict) else None,
            metadata=dict(payload.get("metadata") or {}),
        )
@ -226,3 +236,13 @@ def _optional_float(value: Any) -> float | None:
    if value in (None, ""):
        return None
    return float(value)
+
+
+def _normalize_acceptance_entry(entry: dict[str, Any]) -> dict[str, Any]:
+    if entry.get("acceptance_type") is None and entry.get("feedback_type") is not None:
+        feedback_type = str(entry.get("feedback_type") or "")
+        entry["acceptance_type"] = "accept" if feedback_type == "satisfied" else feedback_type
+    if entry.get("feedback_type") is None and entry.get("acceptance_type") is not None:
+        acceptance_type = str(entry.get("acceptance_type") or "")
+        entry["feedback_type"] = "satisfied" if acceptance_type == "accept" else acceptance_type
+    return entry
--- a/app-instance/backend/beaver/tasks/planner.py
+++ b/app-instance/backend/beaver/tasks/planner.py
@ -10,7 +10,7 @@ from typing import Any, Literal
 from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode
 from beaver.engine.providers import ProviderBundle

-from .models import TaskRecord, ValidationResult
+from .models import TaskRecord
 from .skill_resolver import SkillResolutionReport, TaskSkillResolver


@ -76,7 +76,6 @@ class TaskExecutionPlanner:
        task: TaskRecord,
        user_message: str,
        attempt_index: int,
-        latest_validation: ValidationResult | None = None,
        provider_bundle: ProviderBundle | None = None,
        timeout_seconds: float = 30.0,
    ) -> TaskExecutionPlan:
@ -105,7 +104,6 @@ class TaskExecutionPlanner:
                                task=task,
                                user_message=user_message,
                                attempt_index=attempt_index,
-                                latest_validation=latest_validation,
                            ),
                        },
                    ],
@ -230,14 +228,10 @@ class TaskExecutionPlanner:
        task: TaskRecord,
        user_message: str,
        attempt_index: int,
-        latest_validation: ValidationResult | None,
    ) -> str:
-        validation_note = ""
-        if latest_validation is not None:
-            validation_note = (
-                "\nPrevious validation issues:\n"
-                + json.dumps(latest_validation.to_dict(), ensure_ascii=False)
-            )
+        history_note = ""
+        if task.feedback:
+            history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False)
        return (
            "Decide execution mode for this internal Task attempt.\n"
            "Use mode=team only when independent research, review, implementation slices, or staged checks "
@ -254,7 +248,7 @@ class TaskExecutionPlanner:
            f"Task goal:\n{task.goal}\n\n"
            f"Current user request:\n{user_message}\n\n"
            f"Attempt index: {attempt_index}\n"
-            f"{validation_note}"
+            f"{history_note}"
        )

    @staticmethod
--- a/app-instance/backend/beaver/tasks/service.py
+++ b/app-instance/backend/beaver/tasks/service.py
@ -7,7 +7,7 @@ from pathlib import Path
 from typing import Any
 from uuid import uuid4

-from .models import TaskEvent, TaskRecord, ValidationResult
+from .models import TaskEvent, TaskRecord
 from .store import TaskStore


@ -105,38 +105,70 @@ class TaskService:
        for name in skill_names or []:
            if name not in task.skill_names:
                task.skill_names.append(name)
+        task.status = "awaiting_acceptance"
        task.updated_at = self._now()
        self.store.upsert_task(task)
        self._event(task, "run_completed", run_id=run_id, payload={"skill_names": skill_names or []})
+        self._event(task, "evidence_recorded", run_id=run_id, payload={"skill_names": skill_names or []})
        return task

-    def record_validation(
+    def add_acceptance(
        self,
        task_id: str,
-        run_id: str,
-        validation: ValidationResult,
        *,
-        final_attempt: bool = True,
-        has_usable_answer: bool = True,
+        acceptance_type: str,
+        comment: str | None = None,
+        run_id: str | None = None,
    ) -> TaskRecord:
        task = self._require(task_id)
        now = self._now()
-        if validation.status == "accepted":
-            task.status = "awaiting_feedback"
-        elif validation.status in {"insufficient_evidence", "validator_error"}:
-            task.status = "needs_review"
-        elif validation.status == "rejected" and not final_attempt:
+        normalized = normalize_acceptance_type(acceptance_type)
+        matching_acceptance = any(
+            item.get("run_id") == run_id and item.get("acceptance_type") == normalized
+            for item in task.feedback
+        )
+        conflicting_acceptance = next(
+            (
+                item
+                for item in task.feedback
+                if item.get("run_id") == run_id and item.get("acceptance_type") != normalized
+            ),
+            None,
+        )
+        if conflicting_acceptance is not None:
+            raise ValueError(
+                f"Acceptance for run_id={run_id!r} was already recorded as "
+                f"{conflicting_acceptance.get('acceptance_type')!r}"
+            )
+        if task.status in {"closed", "abandoned"} and not matching_acceptance:
+            raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
+        if matching_acceptance:
+            return task
+
+        entry = {
+            "acceptance_type": normalized,
+            "feedback_type": "satisfied" if normalized == "accept" else normalized,
+            "comment": comment or "",
+            "run_id": run_id,
+            "created_at": now,
+        }
+        task.feedback.append(entry)
+        if normalized == "revise":
            task.status = "needs_revision"
-        elif validation.status == "rejected" and has_usable_answer:
-            task.status = "needs_review"
-        else:
-            task.status = "failed"
+        elif normalized == "abandon":
+            task.status = "abandoned"
            task.closed_at = now
-            task.close_reason = "automatic validation rejected the final attempt"
+            task.close_reason = comment or "abandoned"
+        elif normalized == "accept":
+            task.status = "closed"
+            task.closed_at = now
+            task.close_reason = "accepted"
+            task.satisfaction = 1.0
+            if run_id:
+                task.metadata["final_accepted_run_id"] = run_id
        task.updated_at = now
-        task.validation_result = validation.to_dict()
        self.store.upsert_task(task)
-        self._event(task, "validated", run_id=run_id, payload=validation.to_dict())
+        self._event(task, f"acceptance_{normalized}", run_id=run_id, payload=entry)
        return task

    def add_feedback(
@ -147,52 +179,12 @@ class TaskService:
        comment: str | None = None,
        run_id: str | None = None,
    ) -> TaskRecord:
-        task = self._require(task_id)
-        now = self._now()
-        matching_feedback = any(
-            item.get("run_id") == run_id and item.get("feedback_type") == feedback_type
-            for item in task.feedback
+        return self.add_acceptance(
+            task_id,
+            acceptance_type=feedback_type,
+            comment=comment,
+            run_id=run_id,
        )
-        conflicting_feedback = next(
-            (
-                item
-                for item in task.feedback
-                if item.get("run_id") == run_id and item.get("feedback_type") != feedback_type
-            ),
-            None,
-        )
-        if conflicting_feedback is not None:
-            raise ValueError(
-                f"Feedback for run_id={run_id!r} was already recorded as "
-                f"{conflicting_feedback.get('feedback_type')!r}"
-            )
-        if task.status in {"closed", "abandoned"} and not matching_feedback:
-            raise ValueError(f"Task {task.task_id} is already finalized as {task.status!r}")
-        if matching_feedback:
-            return task
-
-        entry = {
-            "feedback_type": feedback_type,
-            "comment": comment or "",
-            "run_id": run_id,
-            "created_at": now,
-        }
-        task.feedback.append(entry)
-        if feedback_type == "revise":
-            task.status = "needs_revision"
-        elif feedback_type == "abandon":
-            task.status = "abandoned"
-            task.closed_at = now
-            task.close_reason = comment or "abandoned"
-        elif feedback_type == "satisfied":
-            task.status = "closed"
-            task.closed_at = now
-            task.close_reason = "satisfied"
-            task.satisfaction = 1.0
-        task.updated_at = now
-        self.store.upsert_task(task)
-        self._event(task, f"feedback_{feedback_type}", run_id=run_id, payload=entry)
-        return task

    def close_task(self, task_id: str, *, reason: str = "closed") -> TaskRecord:
        task = self._require(task_id)
@ -267,3 +259,12 @@ def short_task_title(text: str) -> str:
    if len(words) <= 4:
        return cleaned[:40]
    return " ".join(words[:4])[:40]
+
+
+def normalize_acceptance_type(value: str) -> str:
+    normalized = (value or "").strip().lower()
+    if normalized == "satisfied":
+        return "accept"
+    if normalized not in {"accept", "revise", "abandon"}:
+        raise ValueError("acceptance_type must be one of: accept, revise, abandon")
+    return normalized
--- a/app-instance/backend/beaver/tasks/validation.py
+++ b/app-instance/backend/beaver/tasks/validation.py
@ -1,154 +0,0 @@
-"""Automatic validation for internal Task mode."""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-from beaver.engine.providers import ProviderBundle
-
-from .models import TaskRecord, ValidationResult
-
-
-class ValidationService:
-    async def validate_task_result(
-        self,
-        *,
-        task: TaskRecord,
-        user_message: str,
-        final_output: str,
-        evidence_packet: Any | None = None,
-        evidence_text: str = "",
-        transcript_excerpt: str = "",
-        tool_summaries: list[str] | None = None,
-        team_summaries: list[str] | None = None,
-        provider_bundle: ProviderBundle | None = None,
-    ) -> ValidationResult:
-        provider = None
-        model = None
-        if provider_bundle is not None:
-            provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider
-            runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime
-            model = getattr(runtime, "model", None)
-        if provider is not None:
-            try:
-                return await self._validate_with_provider(
-                    provider=provider,
-                    model=model,
-                    task=task,
-                    user_message=user_message,
-                    final_output=final_output,
-                    evidence_text=evidence_text,
-                    transcript_excerpt=transcript_excerpt,
-                    tool_summaries=tool_summaries or [],
-                    team_summaries=team_summaries or [],
-                )
-            except Exception as exc:
-                return ValidationResult(
-                    status="validator_error",
-                    score=0.0,
-                    issues=[f"Validator failed: {exc}"],
-                    evidence_gaps=["Automatic validation failed before producing a reliable decision."],
-                    missing_requirements=["User review is required because automatic validation failed."],
-                    recommended_revision_prompt=(
-                        "Review the answer and evidence, then decide whether to revise or accept it."
-                    ),
-                    validator="llm_error",
-                )
-        return self._heuristic_validate(final_output)
-
-    async def _validate_with_provider(
-        self,
-        *,
-        provider: Any,
-        model: str | None,
-        task: TaskRecord,
-        user_message: str,
-        final_output: str,
-        evidence_text: str,
-        transcript_excerpt: str,
-        tool_summaries: list[str],
-        team_summaries: list[str],
-    ) -> ValidationResult:
-        legacy_context = "" if evidence_text else (
-            f"Transcript excerpt:\n{transcript_excerpt}\n\n"
-            f"Tool summaries:\n{json.dumps(tool_summaries, ensure_ascii=False)}\n\n"
-            f"Team summaries:\n{json.dumps(team_summaries, ensure_ascii=False)}\n\n"
-        )
-        prompt = (
-            "Validate whether the assistant output satisfies the task. "
-            "Return only compact JSON with keys: passed, score, issues, "
-            "missing_requirements, recommended_revision_prompt.\n\n"
-            f"Task goal:\n{task.goal}\n\n"
-            f"Current user request:\n{user_message}\n\n"
-            f"Evidence packet:\n{evidence_text}\n\n"
-            f"{legacy_context}"
-            f"Assistant final output:\n{final_output}"
-        )
-        response = await provider.chat(
-            messages=[
-                {"role": "system", "content": "You are a strict task result validator."},
-                {"role": "user", "content": prompt},
-            ],
-            tools=None,
-            model=model,
-            max_tokens=4096,
-            temperature=0.0,
-        )
-        payload = self._parse_json_object(response.content or "")
-        status = payload.get("status")
-        if status not in {"accepted", "rejected", "insufficient_evidence", "validator_error"}:
-            status = (
-                "accepted"
-                if payload.get("passed") and float(payload.get("score", 0.0) or 0.0) >= 0.75
-                else "rejected"
-            )
-        return ValidationResult(
-            status=status,
-            score=max(0.0, min(1.0, float(payload.get("score", 0.0) or 0.0))),
-            issues=[str(item) for item in payload.get("issues") or []],
-            missing_requirements=[str(item) for item in payload.get("missing_requirements") or []],
-            evidence_gaps=[str(item) for item in payload.get("evidence_gaps") or []],
-            recommended_revision_prompt=str(payload.get("recommended_revision_prompt") or ""),
-            validator="llm",
-        )
-
-    @staticmethod
-    def _heuristic_validate(final_output: str) -> ValidationResult:
-        text = final_output.strip()
-        if not text:
-            return ValidationResult(
-                passed=False,
-                score=0.0,
-                issues=["Assistant output is empty."],
-                missing_requirements=["A non-empty result is required."],
-                recommended_revision_prompt="Produce a complete, non-empty answer for the task.",
-                validator="heuristic",
-            )
-        lowered = text.lower()
-        if "run failed before completion" in lowered or "tool loop stopped" in lowered:
-            return ValidationResult(
-                passed=False,
-                score=0.35,
-                issues=["The run did not complete cleanly."],
-                missing_requirements=["A successful final result is required."],
-                recommended_revision_prompt="Retry the task and address the failure before returning the final answer.",
-                validator="heuristic",
-            )
-        return ValidationResult(passed=True, score=0.85, validator="heuristic")
-
-    @staticmethod
-    def _parse_json_object(text: str) -> dict[str, Any]:
-        cleaned = text.strip()
-        if cleaned.startswith("```"):
-            cleaned = cleaned.strip("`")
-            if cleaned.lower().startswith("json"):
-                cleaned = cleaned[4:].strip()
-        start = cleaned.find("{")
-        end = cleaned.rfind("}")
-        if start >= 0 and end >= start:
-            cleaned = cleaned[start : end + 1]
-        payload = json.loads(cleaned)
-        if not isinstance(payload, dict):
-            raise ValueError("validator response must be a JSON object")
-        return payload
--- a/app-instance/backend/tests/unit/test_context_builder.py
+++ b/app-instance/backend/tests/unit/test_context_builder.py
@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from beaver.engine.context import ContextBuildInput, ContextBuilder, RuntimeContext, SessionContext
+
+
+def test_context_builder_injects_current_date_and_time() -> None:
+    result = ContextBuilder().build_messages(
+        ContextBuildInput(
+            base_system_prompt="Follow user requests.",
+            current_user_input="今天几号？",
+            session_context=SessionContext(session_id="web:alpha", source="web", model="stub-model"),
+            runtime_context=RuntimeContext(
+                utc_datetime="2026-05-26T01:10:00+00:00",
+                local_datetime="2026-05-26T09:10:00+08:00",
+                timezone="Asia/Shanghai",
+                utc_offset="+08:00",
+            ),
+        )
+    )
+
+    system_prompt = result.messages[0]["content"]
+    assert "# Current Date and Time" in system_prompt
+    assert "Current UTC time: 2026-05-26T01:10:00+00:00" in system_prompt
+    assert "Current local time: 2026-05-26T09:10:00+08:00" in system_prompt
+    assert "Local timezone: Asia/Shanghai" in system_prompt
+    assert "Local UTC offset: +08:00" in system_prompt
+    assert '"today", "tomorrow", "now", "this week", and "next month"' in system_prompt
+    assert result.messages[-1] == {"role": "user", "content": "今天几号？"}
--- a/app-instance/backend/tests/unit/test_gateway_channels.py
+++ b/app-instance/backend/tests/unit/test_gateway_channels.py
@ -18,8 +18,8 @@ class FakeResult:
    model: str | None = "fake-model"
    usage: dict[str, Any] = field(default_factory=dict)
    task_id: str | None = "task-1"
-    task_status: str | None = "awaiting_feedback"
-    validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
+    task_status: str | None = "awaiting_acceptance"
+    validation_result: dict[str, Any] | None = None


 class FakeService:
@ -79,8 +79,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
        assert message.session_id == "s1"
        assert message.finish_reason == "stop"
        assert message.metadata["task_id"] == "task-1"
-        assert message.metadata["task_status"] == "awaiting_feedback"
-        assert message.metadata["validation_result"] == {"accepted": True}
+        assert message.metadata["task_status"] == "awaiting_acceptance"
+        assert message.metadata["evidence_status"] == "recorded"
+        assert message.metadata["validation_result"] is None

        stop_event.set()
        await asyncio.wait_for(task, timeout=2)
--- a/app-instance/backend/tests/unit/test_litellm_thinking_mode.py
+++ b/app-instance/backend/tests/unit/test_litellm_thinking_mode.py
@ -113,6 +113,19 @@ def test_litellm_provider_preserves_reasoning_content_for_tool_round_trip() -> N
    assert LiteLLMProvider._sanitize_messages(messages)[0]["reasoning_content"] == "must be passed back"


+def test_litellm_provider_merges_late_system_messages_to_front() -> None:
+    messages = [
+        {"role": "system", "content": "base"},
+        {"role": "user", "content": "question"},
+        {"role": "system", "content": "finalize without tools"},
+    ]
+
+    sanitized = LiteLLMProvider._sanitize_messages(messages)
+
+    assert [message["role"] for message in sanitized] == ["system", "user"]
+    assert sanitized[0]["content"] == "base\n\nfinalize without tools"
+
+
 def test_thinking_mode_is_forced_disabled_even_when_requested_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
    captured: dict = {}

--- a/app-instance/backend/tests/unit/test_main_agent_router.py
+++ b/app-instance/backend/tests/unit/test_main_agent_router.py
@ -79,7 +79,7 @@ def _task() -> TaskRecord:
        goal="实现任务连续性",
        constraints=[],
        priority=0,
-        status="awaiting_feedback",
+        status="awaiting_acceptance",
        creator="test",
        created_at="now",
        updated_at="now",
--- a/app-instance/backend/tests/unit/test_phase5_skills_runtime.py
+++ b/app-instance/backend/tests/unit/test_phase5_skills_runtime.py
@ -35,6 +35,7 @@ class StubProvider(LLMProvider):
        model: str | None = None,
        max_tokens: int = 4096,
        temperature: float = 0.7,
+        thinking_enabled: bool | None = None,
    ) -> LLMResponse:
        if not self._responses:
            raise AssertionError("No stubbed provider responses left")
@ -47,11 +48,22 @@ class StubProvider(LLMProvider):
 class StubSkillAssembler:
    def __init__(self, activated_skills: list[SkillContext]) -> None:
        self.activated_skills = activated_skills
+        self.calls: list[dict] = []

    async def assemble(self, **kwargs) -> SkillAssemblyResult:
+        self.calls.append(kwargs)
        return SkillAssemblyResult(activated_skills=list(self.activated_skills))


+class RecordingToolAssembler:
+    def __init__(self) -> None:
+        self.calls: list[dict] = []
+
+    async def assemble(self, **kwargs):
+        self.calls.append(kwargs)
+        return kwargs["registry"].get_specs(["memory"])
+
+
 def _tool_call(*, name: str = "echo", arguments: dict | None = None, call_id: str = "call-1") -> SimpleNamespace:
    return SimpleNamespace(
        id=call_id,
@ -576,6 +588,48 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
    assert effect_records[-1].run_id == result.run_id


+def test_thinking_disabled_still_uses_skill_and_tool_assembly(tmp_path: Path) -> None:
+    skill = SkillContext(
+        name="docker-debug",
+        content="Use docker logs before editing config.",
+        version="v0007",
+        content_hash="hash-v7",
+        activation_reason="llm_selected",
+        tool_hints=["terminal"],
+    )
+    skill_assembler = StubSkillAssembler([skill])
+    tool_assembler = RecordingToolAssembler()
+    loader = EngineLoader(
+        workspace=tmp_path,
+        skill_assembler=skill_assembler,
+        tool_assembler=tool_assembler,
+    )
+    loop = AgentLoop(loader=loader)
+    bundle = ProviderBundle(
+        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
+        main_provider=StubProvider(
+            [LLMResponse(content="Done", finish_reason="stop", provider_name="stub", model="stub-model")]
+        ),
+    )
+
+    result = asyncio.run(
+        loop.process_direct(
+            "Why is the Docker container crashing?",
+            provider_bundle=bundle,
+            thinking_enabled=False,
+        )
+    )
+    loaded = loop.boot()
+    events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
+    tool_selection = next(event for event in events if event.event_type == "tool_selection_snapshotted")
+
+    assert skill_assembler.calls
+    assert skill_assembler.calls[0]["thinking_enabled"] is False
+    assert tool_assembler.calls
+    assert [skill.name for skill in tool_assembler.calls[0]["activated_skills"]] == ["docker-debug"]
+    assert tool_selection.event_payload["tool_names"] == ["memory"]
+
+
 def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path: Path) -> None:
    skill = SkillContext(
        name="docker-debug",
@ -635,6 +689,52 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
    assert effect_records[-1].success is False


+def test_agent_loop_suppresses_raw_tool_call_when_finalizing_after_tool_limit(tmp_path: Path) -> None:
+    loader = EngineLoader(
+        workspace=tmp_path,
+        skill_assembler=StubSkillAssembler([]),
+    )
+    loop = AgentLoop(loader=loader)
+    bundle = ProviderBundle(
+        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
+        main_provider=StubProvider(
+            [
+                LLMResponse(
+                    content="Need a tool.",
+                    finish_reason="tool_calls",
+                    tool_calls=[_tool_call()],
+                    provider_name="stub",
+                    model="stub-model",
+                ),
+                LLMResponse(
+                    content=(
+                        "<tool_call>\n"
+                        "<function=mcp_local_web_mcp_web_fetch>\n"
+                        "<parameter=url>https://example.com</parameter>\n"
+                        "</function>\n"
+                        "</tool_call>"
+                    ),
+                    finish_reason="stop",
+                    provider_name="stub",
+                    model="stub-model",
+                ),
+            ]
+        ),
+    )
+
+    result = asyncio.run(
+        loop.process_direct(
+            "Fetch the latest result",
+            provider_bundle=bundle,
+            max_tool_iterations=0,
+        )
+    )
+
+    assert result.finish_reason == "max_tool_iterations"
+    assert "<tool_call>" not in result.output_text
+    assert "raw tool call was suppressed" in result.output_text
+
+
 def test_llm_request_snapshot_defaults_to_compact_payload(tmp_path: Path) -> None:
    loop = AgentLoop(loader=EngineLoader(workspace=tmp_path, skill_assembler=StubSkillAssembler([])))
    bundle = ProviderBundle(
--- a/app-instance/backend/tests/unit/test_process_projection.py
+++ b/app-instance/backend/tests/unit/test_process_projection.py
@ -101,12 +101,11 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
        "web:test",
        run_id="main-run",
        role="system",
-        event_type="task_validation_snapshotted",
+        event_type="task_evidence_recorded",
        event_payload={
            "task_id": "task-1",
            "attempt_index": 1,
-            "validation_result": {"accepted": True, "score": 0.9},
-            "retry_scheduled": False,
+            "evidence_status": "recorded",
        },
        context_visible=False,
    )
@ -121,7 +120,7 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
    assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
    assert sub_run["metadata"]["skill_query"] == "research workflow"
    assert sub_run["metadata"]["ephemeral_guidance_id"] is None
-    assert any(event["actor_name"] == "Validator" for event in projection["events"])
+    assert any(event["actor_name"] == "Evidence" for event in projection["events"])
    assert any(run["session_id"] == "web:test" for run in projection["runs"])


--- a/app-instance/backend/tests/unit/test_task_mode_feedback.py
+++ b/app-instance/backend/tests/unit/test_task_mode_feedback.py
@ -4,23 +4,17 @@ import asyncio
 from pathlib import Path
 from types import SimpleNamespace

-import pytest
-
-from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode
 from beaver.engine import EngineLoader
-from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
 from beaver.engine.providers.base import LLMProvider, LLMResponse
 from beaver.engine.providers.factory import ProviderBundle
 from beaver.services.agent_service import AgentService
-from beaver.skills.assembler import SkillAssemblyResult
-from beaver.tasks import TaskExecutionPlan, TaskRecord, TaskService, ValidationResult, ValidationService
+from beaver.tasks import TaskExecutionPlan, TaskService


 class StubProvider(LLMProvider):
    def __init__(self, responses: list[LLMResponse]) -> None:
        super().__init__()
        self._responses = list(responses)
-        self.calls: list[dict[str, object]] = []

    async def chat(
        self,
@ -30,7 +24,6 @@ class StubProvider(LLMProvider):
        max_tokens: int = 4096,
        temperature: float = 0.7,
    ) -> LLMResponse:
-        self.calls.append({"messages": messages, "tools": tools, "model": model})
        if not self._responses:
            raise AssertionError("No stubbed provider responses left")
        return self._responses.pop(0)
@ -39,30 +32,9 @@ class StubProvider(LLMProvider):
        return "stub-model"


-class StubValidationService:
-    def __init__(self, results: list[ValidationResult]) -> None:
-        self.results = list(results)
-        self.calls: list[dict] = []
-
-    async def validate_task_result(self, **kwargs) -> ValidationResult:
-        self.calls.append(kwargs)
-        if not self.results:
-            raise AssertionError("No stubbed validation results left")
-        return self.results.pop(0)
-
-
 class StubTaskExecutionPlanner:
-    def __init__(self, plans: list[TaskExecutionPlan] | None = None) -> None:
-        self.plans = list(plans or [TaskExecutionPlan.single("test-single")])
-        self.calls = []
-
    async def plan(self, **kwargs) -> TaskExecutionPlan:
-        self.calls.append(kwargs)
-        if len(self.plans) == 1:
-            return self.plans[0]
-        if not self.plans:
-            raise AssertionError("No stubbed execution plans left")
-        return self.plans.pop(0)
+        return TaskExecutionPlan.single("test-single")


 class FakeLearningCandidate:
@ -70,15 +42,6 @@ class FakeLearningCandidate:
        return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}


-class RecordingSkillAssembler:
-    def __init__(self) -> None:
-        self.task_descriptions: list[str] = []
-
-    async def assemble(self, **kwargs) -> SkillAssemblyResult:
-        self.task_descriptions.append(kwargs["task_description"])
-        return SkillAssemblyResult()
-
-
 def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
    return LLMResponse(
        content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
@ -107,828 +70,157 @@ def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
    )


-def _single_planner() -> StubTaskExecutionPlanner:
-    return StubTaskExecutionPlanner([TaskExecutionPlan.single("test-single")])
-
-
-def _team_plan(strategy: str = "sequence") -> TaskExecutionPlan:
-    return TaskExecutionPlan(
-        mode="team",
-        reason="test-team",
-        graph=ExecutionGraph(
-            strategy=strategy,  # type: ignore[arg-type]
-            nodes=[
-                ExecutionNode(
-                    node_id="research",
-                    task="research implementation options",
-                    agent=AgentDescriptor(name="researcher", role="research"),
-                )
-            ],
-        ),
-        final_synthesis_instruction="Use the sub-agent result to produce the final answer.",
-    )
-
-
-def _provider_bundle(provider: StubProvider) -> ProviderBundle:
-    return ProviderBundle(
-        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        main_provider=provider,
-        auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        auxiliary_provider=StubProvider([_route_response("new_task")]),
-    )
-
-
-def _main_only_bundle(*responses: str) -> ProviderBundle:
-    return ProviderBundle(
-        main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
-        main_provider=StubProvider(
-            [
-                LLMResponse(
-                    content=response,
-                    finish_reason="stop",
-                    provider_name="stub",
-                    model="stub-model",
-                )
-                for response in responses
-            ]
-        ),
-    )
-
-
-def _task_record(status: str) -> TaskRecord:
-    return TaskRecord(
-        task_id="task-1",
-        session_id="session-1",
-        description="test task",
-        goal="test task",
-        constraints=[],
-        priority=0,
-        status=status,
-        creator="main-agent",
-        created_at="2026-05-22T00:00:00+00:00",
-        updated_at="2026-05-22T00:00:00+00:00",
-    )
-
-
-def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
+def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> None:
    service = AgentService(
        loader=EngineLoader(
            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService([]),
+            task_execution_planner=StubTaskExecutionPlanner(),
        )
    )

    result = asyncio.run(
        service.process_direct(
-            "hello?",
-            session_id="web:simple",
-            provider_bundle=_bundle("hi", route_action="simple_chat"),
-        )
-    )
-    loaded = service.create_loop().boot()
-
-    assert result.task_id is None
-    assert loaded.task_service.store.list_tasks() == []
-
-
-def test_complex_request_creates_task_and_records_validation(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [ValidationResult(passed=True, score=0.9, validator="test")]
-            ),
+            "draft release notes",
+            session_id="web:test",
+            provider_bundle=_bundle("Done"),
        )
    )

-    result = asyncio.run(
-        service.process_direct(
-            "implement the new report workflow",
-            session_id="web:task",
-            provider_bundle=_bundle("implemented"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task_by_run_id(result.run_id)
-    events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
-    run_record = loaded.run_memory_store.list_runs()[-1]
-    skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
-
-    assert result.task_id is not None
+    task_service = service.create_loop().boot().task_service
+    assert task_service is not None
+    task = task_service.get_task(result.task_id or "")
    assert task is not None
-    assert task.status == "awaiting_feedback"
-    assert any(event.event_type == "task_validation_snapshotted" for event in events)
-    assert run_record.task_id == result.task_id
-    assert run_record.validation_result["accepted"] is True
-    assert skill_effects.event_payload["candidate_generation_allowed"] is False
-    assert skill_effects.event_payload["learning_candidates"] == []
-    assert task.metadata["short_title"] == "Test task"
+    assert task.status == "awaiting_acceptance"
+    assert task.validation_result is None
+    assert result.validation_result is None
+
+    event_types = [event.event_type for event in task_service.list_events(task.task_id)]
+    assert "evidence_recorded" in event_types
+    assert "validated" not in event_types


-def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
-    skill_assembler = RecordingSkillAssembler()
+def test_acceptance_closes_task_and_triggers_learning(tmp_path: Path) -> None:
    service = AgentService(
        loader=EngineLoader(
            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [ValidationResult(passed=True, score=1.0, validator="test")]
-            ),
-            skill_assembler=skill_assembler,
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "继续按刚才的方案改",
-            session_id="web:task-skill-query",
-            provider_bundle=_bundle("done", route_action="new_task"),
-        )
-    )
-
-    assert result.task_id
-    assert skill_assembler.task_descriptions
-    query = skill_assembler.task_descriptions[0]
-    assert "Task goal:" in query
-    assert "Current user request:" in query
-    assert "Previously activated skills:" in query
-    assert "If no published skill matches, return []" in query
-
-
-def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(passed=True, score=0.9, validator="test"),
-                    ValidationResult(passed=True, score=0.9, validator="test"),
-                ]
-            ),
-        )
-    )
-
-    first = asyncio.run(
-        service.process_direct(
-            "implement the search workflow",
-            session_id="web:continue",
-            provider_bundle=_bundle("first done", route_action="new_task"),
-        )
-    )
-    second = asyncio.run(
-        service.process_direct(
-            "also add tests for it",
-            session_id="web:continue",
-            provider_bundle=_bundle("tests added", route_action="continue_task"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(first.task_id)
-
-    assert task is not None
-    assert second.task_id == first.task_id
-    assert len(task.run_ids) == 2
-
-    closed = asyncio.run(
-        service.process_direct(
-            "这个任务结束了",
-            session_id="web:continue",
-            provider_bundle=_bundle("好的，已结束。", route_action="close_task"),
-        )
-    )
-    task = loaded.task_service.get_task(first.task_id)
-
-    assert closed.task_id is None
-    assert task is not None
-    assert task.status == "closed"
-    assert loaded.task_service.active_task_view("web:continue") is None
-
-
-def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(passed=True, score=0.9, validator="test"),
-                    ValidationResult(passed=True, score=0.95, validator="test"),
-                ]
-            ),
-        )
-    )
-
-    first = asyncio.run(
-        service.process_direct(
-            "查询珠海天气",
-            session_id="web:revise-direct",
-            provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
-        )
-    )
-    second = asyncio.run(
-        service.process_direct(
-            "再详细一点，并加上明后天穿衣建议",
-            session_id="web:revise-direct",
-            provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(first.task_id)
-    messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
-    first_assistant = [
-        message
-        for message in messages
-        if message.get("role") == "assistant" and message.get("run_id") == first.run_id
-    ][-1]
-    user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
-
-    assert second.task_id == first.task_id
-    assert task is not None
-    assert task.status == "awaiting_feedback"
-    assert len(task.run_ids) == 2
-    assert task.feedback == [
-        {
-            "feedback_type": "revise",
-            "comment": "再详细一点，并加上明后天穿衣建议",
-            "run_id": first.run_id,
-            "created_at": task.feedback[0]["created_at"],
-        }
-    ]
-    assert first_assistant["feedback_state"] == "revise"
-    assert "再详细一点，并加上明后天穿衣建议" in user_messages
-
-
-def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(passed=True, score=0.9, validator="test"),
-                    ValidationResult(passed=True, score=0.95, validator="test"),
-                ]
-            ),
-        )
-    )
-
-    first = asyncio.run(
-        service.process_direct(
-            "查询珠海天气",
-            session_id="web:explicit-revise",
-            provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
-        )
-    )
-    feedback = asyncio.run(
-        service.submit_feedback(
-            session_id=first.session_id,
-            run_id=first.run_id,
-            feedback_type="revise",
-            comment="准备补充穿衣建议",
-        )
-    )
-    second = asyncio.run(
-        service.process_direct(
-            "加上明后天穿衣建议",
-            session_id="web:explicit-revise",
-            provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(first.task_id)
-
-    assert feedback["task_status"] == "needs_revision"
-    assert second.task_id == first.task_id
-    assert task is not None
-    assert task.status == "awaiting_feedback"
-    assert len(task.run_ids) == 2
-    assert len(task.feedback) == 1
-    assert task.feedback[0]["feedback_type"] == "revise"
-    assert task.feedback[0]["comment"] == "准备补充穿衣建议"
-
-
-def test_validation_result_status_drives_accepted_and_passed() -> None:
-    accepted = ValidationResult(status="accepted", score=0.9, validator="test")
-    insufficient = ValidationResult(status="insufficient_evidence", score=0.9, validator="test")
-    rejected = ValidationResult(status="rejected", score=0.9, validator="test")
-
-    assert accepted.passed is True
-    assert accepted.accepted is True
-    assert insufficient.passed is False
-    assert insufficient.accepted is False
-    assert rejected.passed is False
-    assert rejected.accepted is False
-
-
-def test_validation_result_from_legacy_payload_maps_to_status() -> None:
-    accepted = ValidationResult.from_dict({"passed": True, "score": 0.9, "validator": "legacy"})
-    low_score = ValidationResult.from_dict({"passed": True, "score": 0.7, "validator": "legacy"})
-    rejected = ValidationResult.from_dict({"passed": False, "score": 0.2, "validator": "legacy"})
-
-    assert accepted is not None
-    assert accepted.status == "accepted"
-    assert low_score is not None
-    assert low_score.status == "rejected"
-    assert rejected is not None
-    assert rejected.status == "rejected"
-
-
-def test_validation_result_rejects_unknown_status() -> None:
-    with pytest.raises(ValueError, match="unknown validation status"):
-        ValidationResult(status="pending", score=0.9, validator="test")  # type: ignore[arg-type]
-
-
-def test_validation_result_from_dict_rejects_unknown_explicit_status() -> None:
-    with pytest.raises(ValueError, match="unknown validation status"):
-        ValidationResult.from_dict({"status": "pending", "passed": True, "score": 0.9})
-
-
-def test_validation_result_evidence_gaps_round_trip() -> None:
-    validation = ValidationResult(
-        status="insufficient_evidence",
-        score=0.4,
-        evidence_gaps=["missing command output", "missing file reference"],
-        validator="test",
-    )
-
-    restored = ValidationResult.from_dict(validation.to_dict())
-
-    assert restored is not None
-    assert restored.status == "insufficient_evidence"
-    assert restored.evidence_gaps == ["missing command output", "missing file reference"]
-    assert restored.to_dict()["evidence_gaps"] == ["missing command output", "missing file reference"]
-
-
-def test_task_record_status_helpers_distinguish_review_and_failed() -> None:
-    needs_review = _task_record("needs_review")
-    failed = _task_record("failed")
-
-    assert needs_review.is_open is True
-    assert needs_review.is_execution_active is False
-    assert needs_review.requires_user_action is True
-    assert failed.is_open is False
-    assert failed.is_execution_active is False
-    assert failed.requires_user_action is False
-
-
-def test_task_service_api_payload_emits_status_helpers(tmp_path: Path) -> None:
-    service = TaskService(tmp_path)
-    task = _task_record("needs_review")
-
-    payload = service.to_api_dict(task)
-
-    assert payload["is_open"] is True
-    assert payload["is_execution_active"] is False
-    assert payload["requires_user_action"] is True
-
-
-def test_validation_failure_retries_once(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(
-                        passed=False,
-                        score=0.2,
-                        issues=["missing tests"],
-                        recommended_revision_prompt="Add tests before final response.",
-                        validator="test",
-                    ),
-                    ValidationResult(passed=True, score=0.88, validator="test"),
-                ]
-            ),
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "implement and validate the task",
-            session_id="web:retry",
-            provider_bundle=_bundle("first draft", "revised draft"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(result.task_id)
-
-    assert result.output_text == "revised draft"
-    assert result.validation_result["accepted"] is True
-    assert task is not None
-    assert len(task.run_ids) == 2
-    visible_messages = loaded.session_manager.get_messages_as_conversation(result.session_id)
-    visible_contents = [message.get("content") for message in visible_messages]
-    assert "first draft" not in visible_contents
-    assert "revised draft" in visible_contents
-
-
-def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [ValidationResult(passed=True, score=0.9, validator="test")]
-            ),
+            task_execution_planner=StubTaskExecutionPlanner(),
        )
    )
    result = asyncio.run(
        service.process_direct(
-            "implement feedback handling",
-            session_id="web:feedback",
-            provider_bundle=_bundle("done"),
+            "write implementation plan",
+            session_id="web:acceptance",
+            provider_bundle=_bundle("Plan"),
        )
    )
-    loaded = service.create_loop().boot()
-    learning_calls = []

-    def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
-        learning_calls.append((task_id, trigger_run_id))
+    loaded = service.create_loop().boot()
+    generated: list[tuple[str, str]] = []
+
+    def build_learning_candidates_for_task(
+        task_id: str,
+        *,
+        final_accepted_run_id: str | None = None,
+        trigger_run_id: str | None = None,
+    ) -> list[FakeLearningCandidate]:
+        generated.append((task_id, final_accepted_run_id or trigger_run_id or ""))
        return [FakeLearningCandidate()]

    loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task

-    feedback = asyncio.run(
-        service.submit_feedback(
-            session_id=result.session_id,
+    response = asyncio.run(
+        service.submit_acceptance(
+            session_id="web:acceptance",
            run_id=result.run_id,
-            feedback_type="satisfied",
+            acceptance_type="accept",
        )
    )

-    assert feedback["task_status"] == "closed"
-    assert feedback["learning_candidates"] == [
+    assert response["task_status"] == "closed"
+    assert response["acceptance_type"] == "accept"
+    assert response["learning_candidates"] == [
        {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
    ]
-    assert learning_calls == [(result.task_id, result.run_id)]
+    assert generated == [(result.task_id, result.run_id)]

-    service2 = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path / "abandon",
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(passed=False, score=0.3, validator="test"),
-                    ValidationResult(passed=False, score=0.3, validator="test"),
-                ]
-            ),
-        )
-    )
-    abandoned = asyncio.run(
-        service2.process_direct(
-            "implement another workflow",
-            session_id="web:abandon",
-            provider_bundle=_bundle("not enough", "still not enough"),
-        )
-    )
-    abandon_feedback = asyncio.run(
-        service2.submit_feedback(
-            session_id=abandoned.session_id,
-            run_id=abandoned.run_id,
-            feedback_type="abandon",
-            comment="too costly",
-        )
-    )
-
-    assert abandon_feedback["task_status"] == "abandoned"
-    assert abandon_feedback["learning_candidates"] == []
-    loaded2 = service2.create_loop().boot()
-    failure_events = [
-        event
-        for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
-        if event.event_type == "task_failure_evidence_recorded"
-    ]
-    assert len(failure_events) == 1
-    assert loaded2.memory_service.get_store().memory_entries == []
+    task_service = loaded.task_service
+    assert task_service is not None
+    task = task_service.get_task(result.task_id or "")
+    assert task is not None
+    assert task.metadata["final_accepted_run_id"] == result.run_id


-def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
+def test_revise_and_abandon_do_not_trigger_learning(tmp_path: Path) -> None:
    service = AgentService(
        loader=EngineLoader(
            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [ValidationResult(passed=True, score=0.9, validator="test")]
-            ),
+            task_execution_planner=StubTaskExecutionPlanner(),
        )
    )
    result = asyncio.run(
        service.process_direct(
-            "implement feedback projection",
-            session_id="web:feedback-projection",
-            provider_bundle=_bundle("done"),
+            "summarize notes",
+            session_id="web:revise",
+            provider_bundle=_bundle("Summary"),
        )
    )
-    loaded = service.create_loop().boot()

-    first = asyncio.run(
-        service.submit_feedback(
-            session_id=result.session_id,
+    response = asyncio.run(
+        service.submit_acceptance(
+            session_id="web:revise",
            run_id=result.run_id,
-            feedback_type="satisfied",
+            acceptance_type="revise",
+            comment="Add decisions",
        )
    )
-    second = asyncio.run(
+
+    assert response["task_status"] == "needs_revision"
+    assert response["learning_candidates"] == []
+
+    task_service = service.create_loop().boot().task_service
+    assert task_service is not None
+    task = task_service.get_task(result.task_id or "")
+    assert task is not None
+    assert task.feedback[0]["acceptance_type"] == "revise"
+
+
+def test_legacy_feedback_endpoint_maps_satisfied_to_accept(tmp_path: Path) -> None:
+    service = AgentService(
+        loader=EngineLoader(
+            workspace=tmp_path,
+            task_execution_planner=StubTaskExecutionPlanner(),
+        )
+    )
+    result = asyncio.run(
+        service.process_direct(
+            "prepare checklist",
+            session_id="web:legacy",
+            provider_bundle=_bundle("Checklist"),
+        )
+    )
+
+    response = asyncio.run(
        service.submit_feedback(
-            session_id=result.session_id,
+            session_id="web:legacy",
            run_id=result.run_id,
            feedback_type="satisfied",
        )
    )

-    feedback_events = [
-        event
-        for event in loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
-        if event.event_type == "task_feedback_recorded"
-    ]
-    assistant = [
-        message
-        for message in loaded.session_manager.get_messages_as_conversation(result.session_id)
-        if message.get("role") == "assistant" and message.get("run_id") == result.run_id
-    ][-1]
-
-    assert first["task_status"] == "closed"
-    assert second["task_status"] == "closed"
-    assert len(feedback_events) == 1
-    assert assistant["feedback_state"] == "satisfied"
-    assert assistant["task_status"] == "closed"
-    assert assistant["validation_status"] == "passed"
-
-    with pytest.raises(ValueError, match="already recorded"):
-        asyncio.run(
-            service.submit_feedback(
-                session_id=result.session_id,
-                run_id=result.run_id,
-                feedback_type="abandon",
-            )
-        )
-
-    task = loaded.task_service.get_task(result.task_id)
-    assert task is not None
-    assert task.status == "closed"
+    assert response["acceptance_type"] == "accept"
+    assert response["feedback_type"] == "satisfied"
+    assert response["task_status"] == "closed"


-def test_task_mode_team_plan_runs_subagent_then_main_synthesis(tmp_path: Path) -> None:
-    main_provider = StubProvider(
-        [
-            LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
-        ]
-    )
-    sub_provider = StubProvider(
-        [
-            LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
-        ]
-    )
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
-            validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
-        )
-    )
+def test_task_service_maps_legacy_status_and_feedback(tmp_path: Path) -> None:
+    service = TaskService(tmp_path)
+    task = service.create_task(session_id="s", description="legacy")
+    task.status = "awaiting_feedback"
+    task.feedback.append({"feedback_type": "satisfied", "run_id": "run-1"})
+    service.store.upsert_task(task)

-    result = asyncio.run(
-        service.process_direct(
-            "implement team-backed workflow",
-            session_id="web:team",
-            provider_bundle=_provider_bundle(main_provider),
-            team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(result.task_id)
-    events = loaded.session_manager.get_event_records(result.session_id)
+    loaded = service.get_task(task.task_id)

-    assert result.output_text == "final synthesized answer"
-    assert task is not None
-    assert len(task.run_ids) == 2
-    assert result.run_id == task.run_ids[-1]
-    assert any(event.event_type == "task_execution_planned" for event in events)
-    assert any(event.event_type == "task_team_run_completed" for event in events)
-    assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
-    assert "sub-agent evidence" != result.output_text
-
-
-def test_task_mode_team_synthesis_runs_without_tools_and_receives_evidence(tmp_path: Path) -> None:
-    main_provider = StubProvider(
-        [
-            LLMResponse(content="final synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
-        ]
-    )
-    sub_provider = StubProvider(
-        [
-            LLMResponse(content="sub-agent evidence", finish_reason="stop", provider_name="stub", model="stub-model")
-        ]
-    )
-    validation = StubValidationService([ValidationResult(status="accepted", score=0.9, validator="test")])
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
-            validation_service=validation,
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "implement team-backed workflow",
-            session_id="web:team-no-tools",
-            provider_bundle=_provider_bundle(main_provider),
-            team_provider_bundle_factory=lambda node: _provider_bundle(sub_provider),
-        )
-    )
-
-    assert result.output_text == "final synthesized answer"
-    assert main_provider.calls[0]["tools"] is None
-    assert "sub-agent evidence" in main_provider.calls[0]["messages"][0]["content"]
-    assert "Task evidence packet" in validation.calls[0]["evidence_text"]
-
-
-def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> None:
-    main_provider = StubProvider(
-        [
-            LLMResponse(content="fallback synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model")
-        ]
-    )
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=StubTaskExecutionPlanner([_team_plan()]),
-            validation_service=StubValidationService([ValidationResult(passed=True, score=0.9, validator="test")]),
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "implement workflow despite team failure",
-            session_id="web:team-failure",
-            provider_bundle=_provider_bundle(main_provider),
-            team_provider_bundle_factory=lambda node: (_ for _ in ()).throw(RuntimeError("sub-agent unavailable")),
-        )
-    )
-    loaded = service.create_loop().boot()
-    events = loaded.session_manager.get_event_records(result.session_id)
-
-    assert result.output_text == "fallback synthesized answer"
-    assert any(event.event_type == "task_team_run_failed" for event in events)
-    assert "sub-agent unavailable" in main_provider.calls[0]["messages"][0]["content"]
-    assert "same class of tools fails repeatedly" in main_provider.calls[0]["messages"][0]["content"]
-    assert "user-visible fallback answer" in main_provider.calls[0]["messages"][0]["content"]
-
-
-def test_insufficient_evidence_moves_task_to_needs_review(tmp_path: Path) -> None:
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=_single_planner(),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(
-                        status="insufficient_evidence",
-                        score=0.4,
-                        evidence_gaps=["source missing"],
-                        validator="test",
-                    )
-                ]
-            ),
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "answer with uncertain evidence",
-            session_id="web:needs-review",
-            provider_bundle=_bundle("possible answer"),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(result.task_id)
-    events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id)
-    validation_event = next(event for event in events if event.event_type == "task_validation_snapshotted")
-
-    assert task is not None
-    assert task.status == "needs_review"
-    assert task.requires_user_action is True
-    assert task.is_execution_active is False
-    assert validation_event.event_payload["validation_result"]["status"] == "insufficient_evidence"
-    assert validation_event.event_payload["retry_scheduled"] is False
-    assert validation_event.event_payload["validation_debug"]["tool_result_count"] >= 0
-
-
-def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
-    main_provider = StubProvider(
-        [
-            LLMResponse(content="first synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
-            LLMResponse(content="revised synthesized answer", finish_reason="stop", provider_name="stub", model="stub-model"),
-        ]
-    )
-    sub_providers = [
-        StubProvider([LLMResponse(content="first evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
-        StubProvider([LLMResponse(content="second evidence", finish_reason="stop", provider_name="stub", model="stub-model")]),
-    ]
-    service = AgentService(
-        loader=EngineLoader(
-            workspace=tmp_path,
-            task_execution_planner=StubTaskExecutionPlanner([_team_plan(), _team_plan()]),
-            validation_service=StubValidationService(
-                [
-                    ValidationResult(passed=False, score=0.2, recommended_revision_prompt="revise", validator="test"),
-                    ValidationResult(passed=True, score=0.9, validator="test"),
-                ]
-            ),
-        )
-    )
-
-    result = asyncio.run(
-        service.process_direct(
-            "implement and validate with team",
-            session_id="web:team-retry",
-            provider_bundle=_provider_bundle(main_provider),
-            team_provider_bundle_factory=lambda node: _provider_bundle(sub_providers.pop(0)),
-        )
-    )
-    loaded = service.create_loop().boot()
-    task = loaded.task_service.get_task(result.task_id)
-    visible = loaded.session_manager.get_messages_as_conversation(result.session_id)
-    visible_contents = [message.get("content") for message in visible]
-    run_records = {record.run_id: record for record in loaded.run_memory_store.list_runs()}
-
-    assert result.output_text == "revised synthesized answer"
-    assert task is not None
-    assert len(task.run_ids) == 4
-    assert "first synthesized answer" not in visible_contents
-    assert "revised synthesized answer" in visible_contents
-    for run_id in task.run_ids:
-        record = run_records[run_id]
-        events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
-        skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
-        assert skill_effects
-        assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
-
-
-def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
-    result = ContextBuilder().build_messages(
-        ContextBuildInput(
-            history=[
-                {
-                    "role": "assistant",
-                    "content": "done",
-                    "run_id": "run-1",
-                    "task_id": "task-1",
-                    "task_status": "closed",
-                    "validation_status": "passed",
-                    "feedback_state": "satisfied",
-                }
-            ],
-        )
-    )
-
-    assistant = result.messages[-1]
-    assert assistant == {"role": "assistant", "content": "done"}
-
-
-def test_context_builder_normalizes_persisted_tool_arguments() -> None:
-    result = ContextBuilder().build_messages(
-        ContextBuildInput(
-            history=[
-                {
-                    "role": "assistant",
-                    "content": None,
-                    "tool_calls": [
-                        {
-                            "id": "call-1",
-                            "type": "function",
-                            "function": {
-                                "name": "cron",
-                                "arguments": {"action": "add", "mode": "notification"},
-                            },
-                        }
-                    ],
-                }
-            ],
-        )
-    )
-
-    tool_call = result.messages[-1]["tool_calls"][0]
-    assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
-
-
-def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
-    task_service = TaskService(tmp_path / "tasks")
-    task = task_service.create_task(session_id="web:validator", description="implement validator handling")
-    validation = asyncio.run(
-        ValidationService().validate_task_result(
-                task=task,
-                user_message="implement validator handling",
-                final_output="done",
-                provider_bundle=_main_only_bundle("not json"),
-            )
-        )
-
-    assert validation.accepted is False
-    assert validation.status == "validator_error"
-    assert validation.validator == "llm_error"
-    assert validation.issues
+    assert loaded is not None
+    assert loaded.status == "awaiting_acceptance"
+    assert loaded.feedback[0]["acceptance_type"] == "accept"
--- a/app-instance/backend/tests/unit/test_websocket_chat.py
+++ b/app-instance/backend/tests/unit/test_websocket_chat.py
@ -20,8 +20,8 @@ class StubRunResult:
    model: str | None = "stub-model"
    usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
    task_id: str | None = "task-1"
-    task_status: str | None = "awaiting_feedback"
-    validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
+    task_status: str | None = "awaiting_acceptance"
+    validation_result: dict[str, Any] | None = None


 class StubAgentService(AgentService):
@ -101,9 +101,10 @@ def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
    assert message["session_id"] == "web:alpha"
    assert message["run_id"] == "run-1"
    assert message["task_id"] == "task-1"
-    assert message["task_status"] == "awaiting_feedback"
-    assert message["validation_result"] == {"accepted": True}
-    assert message["validation_status"] == "passed"
+    assert message["task_status"] == "awaiting_acceptance"
+    assert message["evidence_status"] == "recorded"
+    assert message["validation_result"] is None
+    assert "validation_status" not in message
    assert message["metadata"]["input_metadata"] == {
        "source": "test",
        "attachments": [{"file_id": "file-1", "name": "a.txt"}],
--- a/app-instance/frontend/app/(app)/page.tsx
+++ b/app-instance/frontend/app/(app)/page.tsx
@ -19,7 +19,7 @@ import {
  uploadFile,
  wsManager,
 } from '@/lib/api';
-import { mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
+import { mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
 import { pickAppText } from '@/lib/i18n/core';
 import { useAppI18n } from '@/lib/i18n/provider';
 import { buildSessionProgressView } from '@/lib/session-progress';
@ -32,7 +32,7 @@ function isSessionUpdatedEvent(data: WsEvent | Record<string, unknown>): data is

 function activeTaskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
  if (status === 'needs_revision') return pickAppText(locale, '待修改', 'Needs revision');
-  if (status === 'awaiting_feedback') return pickAppText(locale, '待反馈', 'Awaiting feedback');
+  if (status === 'awaiting_acceptance') return pickAppText(locale, '待验收', 'Awaiting acceptance');
  if (status === 'running') return pickAppText(locale, '进行中', 'Running');
  return pickAppText(locale, '进行中', 'Active');
 }
@ -157,10 +157,11 @@ export default function ChatPage() {
        setSessionProcess(key, process);
      }
      void loadActiveTask(key);
-      const shouldMergePending = shouldMergePendingUsers(detail.messages, localSnapshot, waitingForReply);
+      const displayMessages = detail.messages.filter(shouldDisplayChatMessage);
+      const shouldMergePending = shouldMergePendingUsers(displayMessages, localSnapshot, waitingForReply);
      const nextMessages = shouldMergePending
-        ? mergeServerWithPendingUsers(detail.messages, localSnapshot)
-        : detail.messages;
+        ? mergeServerWithPendingUsers(displayMessages, localSnapshot)
+        : displayMessages;
      setMessages(nextMessages);
      shouldSnapToLatestRef.current = true;
      const last = nextMessages[nextMessages.length - 1];
@ -217,15 +218,11 @@ export default function ChatPage() {
      if (data.type === 'status' && data.status === 'thinking') {
        setIsThinking(true);
      } else if (data.type === 'message' && data.role === 'assistant') {
-        const validationResult = data.validation_result ?? data.metadata?.validation_result;
-        const validationStatus = data.validation_status
-          ? data.validation_status
-          : validationResult
-            ? ((validationResult as Record<string, unknown>).accepted === true ? 'passed' : 'failed')
-            : 'unknown';
        setIsThinking(false);
        setIsLoading(false);
-        addMessage({
+        const rawEvidenceStatus = data.evidence_status ?? data.metadata?.evidence_status;
+        const evidenceStatus = rawEvidenceStatus === 'recorded' ? 'recorded' : undefined;
+        const assistantMessage = {
          role: 'assistant',
          content: typeof data.content === 'string' ? data.content : '',
          timestamp: new Date().toISOString(),
@ -233,8 +230,11 @@ export default function ChatPage() {
          run_id: typeof data.run_id === 'string' ? data.run_id : undefined,
          task_id: data.task_id ?? data.metadata?.task_id ?? null,
          task_status: data.task_status ?? data.metadata?.task_status ?? null,
-          validation_status: validationStatus,
-        });
+          evidence_status: evidenceStatus,
+        } as const;
+        if (shouldDisplayChatMessage(assistantMessage)) {
+          addMessage(assistantMessage);
+        }
        void loadSessionMessages(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
        void loadActiveTask(typeof data.session_id === 'string' ? data.session_id : useChatStore.getState().sessionId);
        loadSessions();
@ -359,17 +359,18 @@ export default function ChatPage() {
            await loadSessions();
            return;
          }
-          addMessage({
+          const assistantMessage = {
            role: 'assistant',
            content: result.response,
            timestamp: new Date().toISOString(),
            run_id: result.run_id,
            task_id: result.task_id,
            task_status: result.task_status,
-            validation_status: result.validation_result
-              ? (result.validation_result.accepted === true ? 'passed' : 'failed')
-              : 'unknown',
-          });
+            evidence_status: result.evidence_status === 'recorded' ? 'recorded' : undefined,
+          } as const;
+          if (shouldDisplayChatMessage(assistantMessage)) {
+            addMessage(assistantMessage);
+          }
          void getSessionProcess(sessionId).then((process) => setSessionProcess(sessionId, process)).catch(() => null);
          void loadActiveTask(sessionId);
          loadSessions();
@ -393,7 +394,7 @@ export default function ChatPage() {
    }
  }, [addMessage, clearInputDraft, input, isLoading, loadActiveTask, loadSessionMessages, loadSessions, locale, pendingFiles, revisionTargetRunId, sessionId, setIsLoading, setIsThinking, setSessionProcess, thinkingModeEnabled, updateMessageFeedback]);

-  const handleFeedback = useCallback(async (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => {
+  const handleFeedback = useCallback(async (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => {
    updateMessageFeedback(runId, feedbackType);
    try {
      await submitChatFeedback({
--- a/app-instance/frontend/app/(app)/skills/page.tsx
+++ b/app-instance/frontend/app/(app)/skills/page.tsx
@ -1238,7 +1238,7 @@ function riskLabel(risk: string, t: (zh: string, en: string) => string): string

 function triggerReasonLabel(reason: string, t: (zh: string, en: string) => string): string {
  const labels: Record<string, string> = {
-    validation_accepted_and_user_satisfied: t('任务验证通过且用户满意', 'Validation accepted and user satisfied'),
+    task_accepted: t('任务已接受', 'Task accepted'),
  };
  return labels[reason] || reason;
 }
--- a/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx
+++ b/app-instance/frontend/app/(app)/tasks/[taskId]/page.tsx
@ -3,7 +3,7 @@
 import Link from 'next/link';
 import { useParams, useRouter } from 'next/navigation';
 import React, { useMemo, useState } from 'react';
-import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, HelpCircle, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';
+import { AlertCircle, ArrowLeft, Bot, CheckCircle2, Download, FileText, Loader2, MessageSquare, RefreshCw, ThumbsUp, Trash2, User, XCircle } from 'lucide-react';

 import { TaskRuntimeStatusBadge, formatTaskRuntimeDuration, formatTaskRuntimeTime, progressPercent } from '@/components/task-runtime/TaskRuntimeShared';
 import { Badge } from '@/components/ui/badge';
@ -17,8 +17,9 @@ import { buildTaskRuntimeView, type TaskRuntimeNodeView } from '@/lib/task-runti
 import { useChatStore } from '@/lib/store';
 import type { BackendTask, BackendTaskRun, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';

-type TaskFeedbackType = 'satisfied' | 'revise' | 'abandon';
+type TaskFeedbackType = 'accept' | 'revise' | 'abandon';
 type TaskFeedbackItem = {
+  acceptance_type?: unknown;
  feedback_type?: unknown;
  comment?: unknown;
  created_at?: unknown;
@ -151,12 +152,6 @@ export default function TaskDetailPage() {
  const backendFeedbackRunId = backendTask ? pickFeedbackRunId(backendTask) : null;

  if (!task && backendTask) {
-    const validation = backendTask.validation_result;
-    const accepted = Boolean(validation?.accepted);
-    const validationIssues = [
-      ...arrayOfStrings(validation?.issues),
-      ...arrayOfStrings(validation?.missing_requirements),
-    ];
    const feedbackItems = backendTask.feedback || [];
    return (
      <div className="mx-auto max-w-5xl space-y-6 p-6">
@ -232,57 +227,6 @@ export default function TaskDetailPage() {
          </CardContent>
        </Card>

-        <Card>
-          <CardHeader>
-            <CardTitle className="text-base">{pickAppText(locale, '验证和反馈', 'Validation and feedback')}</CardTitle>
-          </CardHeader>
-          <CardContent className="space-y-4 text-sm">
-            <div className="rounded-lg border border-border bg-muted/25 p-4">
-              <div className="flex items-center gap-2">
-                {validation ? (
-                  accepted ? <CheckCircle2 className="h-5 w-5 text-[#657162]" /> : <XCircle className="h-5 w-5 text-destructive" />
-                ) : (
-                  <HelpCircle className="h-5 w-5 text-muted-foreground" />
-                )}
-                <div className="font-medium">
-                  {validation
-                    ? accepted
-                      ? pickAppText(locale, '验证通过', 'Validation passed')
-                      : pickAppText(locale, '需要继续修改', 'Needs revision')
-                    : pickAppText(locale, '尚未验证', 'Not validated yet')}
-                </div>
-              </div>
-              {validation ? (
-                <div className="mt-2 text-muted-foreground">
-                  {pickAppText(locale, '评分', 'Score')}: {String(validation.score ?? '-')} · {pickAppText(locale, '验证器', 'Validator')}: {String(validation.validator ?? '-')}
-                </div>
-              ) : null}
-              {validationIssues.length > 0 && (
-                <ul className="mt-3 list-disc space-y-1 pl-5 text-muted-foreground">
-                  {validationIssues.map((item, index) => <li key={`${item}:${index}`}>{item}</li>)}
-                </ul>
-              )}
-              {typeof validation?.recommended_revision_prompt === 'string' && validation.recommended_revision_prompt && (
-                <p className="mt-3 rounded-md bg-background p-3 text-muted-foreground">{validation.recommended_revision_prompt}</p>
-              )}
-            </div>
-
-            <div className="space-y-2">
-              <div className="font-medium">{pickAppText(locale, '用户反馈', 'User feedback')}</div>
-              {feedbackItems.length === 0 ? (
-                <p className="text-muted-foreground">{pickAppText(locale, '还没有用户反馈。', 'No user feedback yet.')}</p>
-              ) : (
-                feedbackItems.map((item, index) => (
-                  <div key={index} className="rounded-md border border-border p-3">
-                    <div className="font-medium">{humanFeedback(String(item.feedback_type || ''), locale)}</div>
-                    {item.comment ? <p className="mt-1 text-muted-foreground">{String(item.comment)}</p> : null}
-                    {item.created_at ? <p className="mt-1 text-xs text-muted-foreground">{formatTaskRuntimeTime(String(item.created_at), locale)}</p> : null}
-                  </div>
-                ))
-              )}
-            </div>
-          </CardContent>
-        </Card>
      </div>
    );
  }
@ -476,6 +420,7 @@ export default function TaskDetailPage() {
                  comment,
                });
                setRuntimeFeedback({
+                  acceptance_type: feedbackType,
                  feedback_type: feedbackType,
                  comment: comment || '',
                  created_at: new Date().toISOString(),
@ -660,14 +605,14 @@ function TaskFeedbackPanel({
  return (
    <Card>
      <CardHeader>
-        <CardTitle className="text-base">{pickAppText(locale, '任务反馈', 'Task feedback')}</CardTitle>
+          <CardTitle className="text-base">{pickAppText(locale, '任务验收', 'Task acceptance')}</CardTitle>
      </CardHeader>
      <CardContent className="space-y-4">
        {recordedFeedback ? (
          <div className="rounded-md border border-border bg-muted/25 p-3 text-sm">
            <div className="flex items-center gap-2 font-medium">
              <CheckCircle2 className="h-4 w-4 text-[#657162]" />
-              {pickAppText(locale, '已提交反馈', 'Feedback submitted')}: {humanFeedback(String(recordedFeedback.feedback_type || ''), locale)}
+              {pickAppText(locale, '已提交验收', 'Acceptance submitted')}: {humanFeedback(String(recordedFeedback.acceptance_type || recordedFeedback.feedback_type || ''), locale)}
            </div>
            {recordedFeedback.comment ? (
              <p className="mt-2 text-muted-foreground">{String(recordedFeedback.comment)}</p>
@ -678,22 +623,22 @@ function TaskFeedbackPanel({
          </div>
        ) : isFinalized ? (
          <div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
-            {pickAppText(locale, '任务已结束，不能再提交新的反馈。', 'This task is finalized and cannot accept new feedback.')}
+            {pickAppText(locale, '任务已结束，不能再提交新的验收。', 'This task is finalized and cannot accept new acceptance.')}
          </div>
        ) : !runId ? (
          <div className="rounded-md border border-border bg-muted/25 p-3 text-sm text-muted-foreground">
-            {pickAppText(locale, '暂无可反馈的运行记录。', 'No run is available for feedback yet.')}
+            {pickAppText(locale, '暂无可验收的运行记录。', 'No run is available for acceptance yet.')}
          </div>
        ) : null}

        <div className="grid gap-2 sm:grid-cols-3">
          <FeedbackButton
-            type="satisfied"
+            type="accept"
            icon={<ThumbsUp className="mr-2 h-4 w-4" />}
-            label={pickAppText(locale, '满意', 'Satisfied')}
+            label={pickAppText(locale, '接受', 'Accept')}
            actionBusy={actionBusy}
            disabled={!canSubmit}
-            onClick={() => submit('satisfied', comment.trim() || undefined)}
+            onClick={() => submit('accept', comment.trim() || undefined)}
          />
          <FeedbackButton
            type="revise"
@ -717,10 +662,10 @@ function TaskFeedbackPanel({
          value={comment}
          onChange={(event) => setComment(event.target.value)}
          disabled={Boolean(recordedFeedback) || isFinalized || Boolean(actionBusy)}
-          placeholder={pickAppText(locale, '需要修改时写下具体要求；满意或放弃可选填说明。', 'Describe requested changes; notes are optional for satisfied or abandon.')}
+          placeholder={pickAppText(locale, '需要修改时写下具体要求；接受或放弃可选填说明。', 'Describe requested changes; notes are optional for accept or abandon.')}
        />
        <div className="text-xs text-muted-foreground">
-          {pickAppText(locale, '反馈将记录到当前任务运行：', 'Feedback will be recorded on run: ')}
+          {pickAppText(locale, '验收将记录到当前任务运行：', 'Acceptance will be recorded on run: ')}
          <span className="font-mono">{runId || '-'}</span>
          <span className="mx-1">·</span>
          {pickAppText(locale, '会话：', 'Session: ')}
@ -807,8 +752,7 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
  const map: Record<string, [string, string]> = {
    open: ['已创建', 'Open'],
    running: ['执行中', 'Running'],
-    validating: ['验证中', 'Validating'],
-    awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
+    awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
    needs_revision: ['需要修改', 'Needs revision'],
    closed: ['已完成', 'Closed'],
    abandoned: ['已放弃', 'Abandoned'],
@ -818,10 +762,10 @@ function humanTaskStatus(status: string, locale: 'zh-CN' | 'en-US') {
 }

 function humanFeedback(type: string, locale: 'zh-CN' | 'en-US') {
-  if (type === 'satisfied') return pickAppText(locale, '满意', 'Satisfied');
+  if (type === 'accept' || type === 'satisfied') return pickAppText(locale, '接受', 'Accepted');
  if (type === 'revise') return pickAppText(locale, '请求修改', 'Revision requested');
  if (type === 'abandon') return pickAppText(locale, '放弃任务', 'Abandoned');
-  return type || pickAppText(locale, '反馈', 'Feedback');
+  return type || pickAppText(locale, '验收', 'Acceptance');
 }

 function humanFinishReason(reason: string, locale: 'zh-CN' | 'en-US') {
@ -848,7 +792,3 @@ function feedbackForRun(items: TaskFeedbackItem[], runId: string | null): TaskFe
 function latestFeedback(items: TaskFeedbackItem[]): TaskFeedbackItem | null {
  return [...items].reverse()[0] ?? null;
 }
-
-function arrayOfStrings(value: unknown): string[] {
-  return Array.isArray(value) ? value.map((item) => String(item)).filter(Boolean) : [];
-}
--- a/app-instance/frontend/app/(app)/tasks/page.tsx
+++ b/app-instance/frontend/app/(app)/tasks/page.tsx
@ -142,7 +142,7 @@ function OrdinaryTasks() {
                  </div>
                </TableCell>
                <TableCell>
-                  <Badge variant={task.status === 'awaiting_feedback' || task.status === 'closed' ? 'default' : 'secondary'}>
+                  <Badge variant={task.status === 'awaiting_acceptance' || task.status === 'closed' ? 'default' : 'secondary'}>
                    {taskStatusLabel(task.status, locale)}
                  </Badge>
                </TableCell>
@ -185,8 +185,7 @@ function taskStatusLabel(status: string, locale: 'zh-CN' | 'en-US') {
  const labels: Record<string, [string, string]> = {
    open: ['已创建', 'Open'],
    running: ['执行中', 'Running'],
-    validating: ['验证中', 'Validating'],
-    awaiting_feedback: ['等待反馈', 'Awaiting feedback'],
+    awaiting_acceptance: ['等待验收', 'Awaiting acceptance'],
    needs_revision: ['需要修改', 'Needs revision'],
    closed: ['已完成', 'Closed'],
    abandoned: ['已放弃', 'Abandoned'],
--- a/app-instance/frontend/components/chat-workbench/ChatWorkbench.tsx
+++ b/app-instance/frontend/components/chat-workbench/ChatWorkbench.tsx
@ -27,7 +27,7 @@ export function ChatWorkbench({
  processArtifacts: ProcessArtifact[];
  selectedRunId: string | null;
  onSelectRun: (runId: string) => void;
-  onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
+  onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
  onRequestRevision: (runId: string) => void;
 }) {
  return (
--- a/app-instance/frontend/components/chat-workbench/MessageList.tsx
+++ b/app-instance/frontend/components/chat-workbench/MessageList.tsx
@ -6,7 +6,7 @@ import { Bot, CheckCircle2, ChevronRight, Loader2, Paperclip, RefreshCcw, Thumbs

 import type { ChatMessage, ProcessArtifact, ProcessEvent, ProcessRun } from '@/types';
 import { getAccessToken, getFileUrl } from '@/lib/api';
-import { getTaskCardMessageIndexes } from '@/lib/chat-messages';
+import { getTaskCardMessageIndexes, hasVisibleChatContent, normalizedMessageText, shouldDisplayChatMessage } from '@/lib/chat-messages';
 import { AgentTeamBlock } from '@/components/chat-workbench/AgentTeamBlock';
 import { MarkdownContent } from '@/components/chat-workbench/MarkdownContent';
 import { ScrollArea } from '@/components/ui/scroll-area';
@ -49,19 +49,14 @@ function MessageBubble({
  message: ChatMessage;
  showTaskCard: boolean;
  canSendFeedback: boolean;
-  onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
+  onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
  onRequestRevision: (runId: string) => void;
 }) {
  const { locale } = useAppI18n();
  const isUser = message.role === 'user';
-  const textContent = typeof message.content === 'string' ? message.content : String(message.content || '');
-  const [feedbackMode, setFeedbackMode] = React.useState<'satisfied' | null>(null);
+  const textContent = normalizedMessageText(message.content);
+  const [feedbackMode, setFeedbackMode] = React.useState<'accept' | null>(null);
  const [feedbackComment, setFeedbackComment] = React.useState('');
-  const validationFailed = message.validation_status === 'failed';
-  const validationDetails =
-    validationFailed
-      ? pickAppText(locale, '详细原因会在任务验证区展示；展开任务可查看验证报告。', 'Detailed reasons are shown in the task validation area. Open the task to inspect the validation report.')
-      : '';

  return (
    <div className={`flex gap-3 ${isUser ? 'justify-end' : ''}`}>
@ -142,22 +137,14 @@ function MessageBubble({
            </div>
          </div>
        )}
-        {!isUser && validationFailed && (
-          <details className="mt-3 rounded-md border border-destructive/30 bg-destructive/5 p-3">
-            <summary className="cursor-pointer text-base font-semibold text-destructive">
-              {pickAppText(locale, '验证失败', 'Validation failed')}
-            </summary>
-            <p className="mt-2 text-xs leading-5 text-muted-foreground">{validationDetails}</p>
-          </details>
-        )}
        {!isUser && (canSendFeedback || message.feedback_state) && message.run_id && (
          <div className="mt-3 space-y-2 border-t border-border/70 pt-3">
            {message.feedback_state ? (
              <div className="flex items-center gap-2 text-xs text-muted-foreground">
                <CheckCircle2 className="h-3.5 w-3.5" />
                <span>
-                  {message.feedback_state === 'satisfied'
-                    ? pickAppText(locale, '已标记满意', 'Marked satisfied')
+                  {message.feedback_state === 'accept' || message.feedback_state === 'satisfied'
+                    ? pickAppText(locale, '已接受', 'Accepted')
                    : message.feedback_state === 'revise'
                      ? pickAppText(locale, '已请求修改', 'Revision requested')
                      : pickAppText(locale, '已放弃任务', 'Task abandoned')}
@ -168,11 +155,11 @@ function MessageBubble({
                <div className="flex flex-wrap items-center gap-2">
                  <button
                    type="button"
-                    onClick={() => setFeedbackMode('satisfied')}
+                    onClick={() => setFeedbackMode('accept')}
                    className="inline-flex h-8 items-center gap-1 rounded-md border border-border px-3 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
                  >
                    <ThumbsUp className="h-3.5 w-3.5" />
-                    {pickAppText(locale, '满意', 'Satisfied')}
+                    {pickAppText(locale, '接受', 'Accept')}
                  </button>
                  <button
                    type="button"
@ -222,13 +209,6 @@ function MessageBubble({
                )}
              </>
            )}
-            {message.validation_status && message.validation_status !== 'unknown' && (
-              <span className="text-xs text-muted-foreground">
-                {message.validation_status === 'passed'
-                  ? pickAppText(locale, '验证通过', 'Validated')
-                  : pickAppText(locale, '验证未通过', 'Validation failed')}
-              </span>
-            )}
            {message.feedback_error && (
              <span className="text-xs text-destructive">{message.feedback_error}</span>
            )}
@ -264,6 +244,17 @@ function shouldHideSystemAgentMessage(message: ChatMessage): boolean {
  );
 }

+function hasRenderableMessageContent(message: ChatMessage): boolean {
+  return hasVisibleChatContent(message);
+}
+
+function shouldHideMessage(message: ChatMessage): boolean {
+  if (shouldHideSystemAgentMessage(message)) {
+    return true;
+  }
+  return !shouldDisplayChatMessage(message);
+}
+
 function parseTimelineTime(value?: string | null): number | null {
  if (!value) return null;
  const parsed = new Date(value).getTime();
@ -342,12 +333,12 @@ export function MessageList({
  processArtifacts: ProcessArtifact[];
  selectedRunId: string | null;
  onSelectRun: (runId: string) => void;
-  onFeedback: (runId: string, feedbackType: 'satisfied' | 'revise' | 'abandon', comment?: string) => void;
+  onFeedback: (runId: string, feedbackType: 'accept' | 'revise' | 'abandon', comment?: string) => void;
  onRequestRevision: (runId: string) => void;
 }) {
  const { locale } = useAppI18n();
  const visibleMessages = React.useMemo(
-    () => messages.filter((message) => !shouldHideSystemAgentMessage(message)),
+    () => messages.filter((message) => !shouldHideMessage(message)),
    [messages]
  );
  const teamGroups = React.useMemo(
@ -385,14 +376,21 @@ export function MessageList({
    () => getTaskCardMessageIndexes(visibleMessages),
    [visibleMessages]
  );
-  const latestFeedbackRunId = [...visibleMessages]
-    .reverse()
-    .find((message) =>
-      message.role === 'assistant'
-      && message.run_id
-      && message.task_id
-      && message.task_status === 'awaiting_feedback'
-    )?.run_id;
+  const latestFeedbackMessageIndex = (() => {
+    for (let index = visibleMessages.length - 1; index >= 0; index -= 1) {
+      const message = visibleMessages[index];
+      if (
+        message.role === 'assistant'
+        && message.run_id
+        && message.task_id
+        && message.task_status === 'awaiting_acceptance'
+        && hasRenderableMessageContent(message)
+      ) {
+        return index;
+      }
+    }
+    return -1;
+  })();

  return (
    <ScrollArea className="h-full px-8" viewportRef={viewportRef}>
@ -411,7 +409,7 @@ export function MessageList({
              key={item.key}
              message={item.message}
              showTaskCard={taskCardMessageIndexes.has(item.messageIndex)}
-              canSendFeedback={Boolean(latestFeedbackRunId && item.message.run_id === latestFeedbackRunId)}
+              canSendFeedback={item.messageIndex === latestFeedbackMessageIndex}
              onFeedback={onFeedback}
              onRequestRevision={onRequestRevision}
            />
--- a/app-instance/frontend/lib/api.ts
+++ b/app-instance/frontend/lib/api.ts
@ -271,7 +271,7 @@ export async function sendMessage(
  run_id?: string;
  task_id?: string | null;
  task_status?: string | null;
-  validation_result?: Record<string, unknown> | null;
+  evidence_status?: string | null;
 }> {
  const body: Record<string, unknown> = { message, session_id: sessionId };
  if (attachments && attachments.length > 0) {
@ -293,7 +293,7 @@ export async function sendMessage(
    finish_reason?: string;
    task_id?: string | null;
    task_status?: string | null;
-    validation_result?: Record<string, unknown> | null;
+    evidence_status?: string | null;
  }>('/api/chat', {
    method: 'POST',
    body: JSON.stringify(body),
@ -305,28 +305,29 @@ export async function sendMessage(
    run_id: result.run_id,
    task_id: result.task_id,
    task_status: result.task_status,
-    validation_result: result.validation_result,
+    evidence_status: result.evidence_status,
  };
 }

 export async function submitChatFeedback(params: {
  sessionId: string;
  runId: string;
-  feedbackType: 'satisfied' | 'revise' | 'abandon';
+  feedbackType: 'accept' | 'revise' | 'abandon';
  comment?: string;
 }): Promise<{
  session_id: string;
  run_id: string;
  task_id: string;
  task_status: string;
+  acceptance_type: string;
  feedback_type: string;
 }> {
-  return fetchJSON('/api/chat/feedback', {
+  return fetchJSON('/api/chat/acceptance', {
    method: 'POST',
    body: JSON.stringify({
      session_id: params.sessionId,
      run_id: params.runId,
-      feedback_type: params.feedbackType,
+      acceptance_type: params.feedbackType,
      comment: params.comment,
    }),
  });
--- a/app-instance/frontend/lib/chat-messages.test.ts
+++ b/app-instance/frontend/lib/chat-messages.test.ts
@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';

-import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldMergePendingUsers } from '@/lib/chat-messages';
+import { getTaskCardMessageIndexes, mergeServerWithPendingUsers, shouldDisplayChatMessage, shouldMergePendingUsers } from '@/lib/chat-messages';
 import type { ChatMessage } from '@/types';

 describe('chat message helpers', () => {
@ -85,10 +85,17 @@ describe('chat message helpers', () => {
        content: 'Final answer.',
        run_id: 'run-1',
        task_id: 'task-1',
-        task_status: 'awaiting_feedback',
+        task_status: 'awaiting_acceptance',
      },
    ];

    expect(Array.from(getTaskCardMessageIndexes(messages))).toEqual([2]);
  });
+
+  it('hides empty assistant records from session history', () => {
+    expect(shouldDisplayChatMessage({ role: 'assistant', content: '', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
+    expect(shouldDisplayChatMessage({ role: 'assistant', content: '\u200B\uFEFF', task_id: 'task-1', run_id: 'run-1' })).toBe(false);
+    expect(shouldDisplayChatMessage({ role: 'assistant', content: 'Final answer.', task_id: 'task-1', run_id: 'run-1' })).toBe(true);
+    expect(shouldDisplayChatMessage({ role: 'user', content: '' })).toBe(true);
+  });
 });
--- a/app-instance/frontend/lib/chat-messages.ts
+++ b/app-instance/frontend/lib/chat-messages.ts
@ -1,5 +1,28 @@
 import type { ChatMessage } from '@/types';

+const INVISIBLE_CONTENT_CHARS = /[\u200B-\u200D\uFEFF]/g;
+
+export function normalizedMessageText(content: unknown): string {
+  if (typeof content === 'string') {
+    return content.replace(INVISIBLE_CONTENT_CHARS, '').trim();
+  }
+  if (content == null) {
+    return '';
+  }
+  return String(content).replace(INVISIBLE_CONTENT_CHARS, '').trim();
+}
+
+export function hasVisibleChatContent(msg: ChatMessage): boolean {
+  if (normalizedMessageText(msg.content)) {
+    return true;
+  }
+  return Boolean(msg.attachments?.length);
+}
+
+export function shouldDisplayChatMessage(msg: ChatMessage): boolean {
+  return msg.role !== 'assistant' || hasVisibleChatContent(msg);
+}
+
 export function messageFingerprint(msg: ChatMessage): string {
  const attachmentKey = (msg.attachments ?? [])
    .map((a) => `${a.file_id ?? ''}:${a.name}:${a.content_type}:${a.size ?? ''}`)
--- a/app-instance/frontend/types/index.ts
+++ b/app-instance/frontend/types/index.ts
@ -48,8 +48,9 @@ export interface ChatMessage {
  run_id?: string;
  task_id?: string | null;
  task_status?: string | null;
-  validation_status?: 'passed' | 'failed' | 'unknown';
-  feedback_state?: 'satisfied' | 'revise' | 'abandon';
+  evidence_status?: 'recorded';
+  acceptance_state?: 'accept' | 'revise' | 'abandon';
+  feedback_state?: 'accept' | 'satisfied' | 'revise' | 'abandon';
  feedback_error?: string;
  message_type?: string | null;
  scheduled_job_id?: string | null;
@ -153,6 +154,7 @@ export interface SystemStatus {
  workspace_exists: boolean;
  model: string;
  max_tokens: number;
+  max_context_messages?: number;
  temperature: number;
  max_tool_iterations: number;
  providers: ProviderStatus[];
@ -315,6 +317,7 @@ export interface BackendTaskRun {
  attempt_index?: number | null;
  task_text?: string;
  messages: BackendTaskRunMessage[];
+  evidence_status?: string | null;
  validation_result?: Record<string, unknown> | null;
 }

@ -972,12 +975,12 @@ export interface ChatAssistantEvent {
  run_id?: string;
  task_id?: string | null;
  task_status?: string | null;
-  validation_status?: 'passed' | 'failed' | 'unknown';
+  evidence_status?: 'recorded';
  validation_result?: Record<string, unknown> | null;
  metadata?: {
    task_id?: string | null;
    task_status?: string | null;
-    validation_result?: Record<string, unknown> | null;
+    evidence_status?: string | null;
    [key: string]: unknown;
  };
 }