feat(engine): 添加运行时上下文支持并重构工具迭代限制

添加 RuntimeContext 类用于捕获模型运行时的日期时间信息，包括UTC时间、本地时间和时区信息，并在系统提示中显示这些信息。同时增加最大上下文消息数和工具迭代次数的配置选项，将验证服务从引擎加载器中移除，并更新相关的数据结构和接口。 BREAKING CHANGE: 移除了验证服务，相关字段被替换为证据状态和接受状态。 - 添加 RuntimeContext 类和相关渲染方法 - 增加 max_context_messages 和 max_tool_iterations 配置 - 移除 ValidationService 相关代码 - 更新消息记录中的验证状态字段 - 添加原始工具调用检测和回退处理
2026-05-26 11:18:35 +08:00
parent 16347caf5e
commit 6e9e74d1ee
57 changed files with 5710 additions and 1582 deletions
--- a/app-instance/backend/beaver/engine/context/init.py
+++ b/app-instance/backend/beaver/engine/context/init.py
@ -4,6 +4,7 @@ from .builder import (
    ContextBuildInput,
    ContextBuildResult,
    ContextBuilder,
+    RuntimeContext,
    SessionContext,
    SkillContext,
 )
@ -12,6 +13,7 @@ __all__ = [
    "ContextBuildInput",
    "ContextBuildResult",
    "ContextBuilder",
+    "RuntimeContext",
    "SessionContext",
    "SkillContext",
 ]
--- a/app-instance/backend/beaver/engine/context/builder.py
+++ b/app-instance/backend/beaver/engine/context/builder.py
@ -80,6 +80,16 @@ class SessionContext:
    parent_session_id: str | None = None


+@dataclass(slots=True)
+class RuntimeContext:
+    """Per-run runtime facts that should be visible to the model."""
+
+    utc_datetime: str
+    local_datetime: str
+    timezone: str | None = None
+    utc_offset: str | None = None
+
+
@dataclass(slots=True)
 class ContextBuildInput:
    """一次上下文构建所需的全部输入。
@ -103,6 +113,7 @@ class ContextBuildInput:
    memory_snapshot: MemorySnapshot | None = None
    activated_skills: list[SkillContext] = field(default_factory=list)
    session_context: SessionContext | None = None
+    runtime_context: RuntimeContext | None = None
    execution_context: str | None = None
    extra_sections: list[str] = field(default_factory=list)

@ -143,9 +154,10 @@ class ContextBuilder:
        1. Beaver user-facing assistant identity
        2. base system prompt
        3. session metadata
-        4. execution context
-        5. frozen memory snapshot
-        6. extra sections
+        4. runtime date/time
+        5. execution context
+        6. frozen memory snapshot
+        7. extra sections

        这样设计的原因：
        - 身份与总规则要最靠前
@ -164,6 +176,10 @@ class ContextBuilder:
        if session_section:
            sections.append(session_section)

+        runtime_section = self._render_runtime_section(build_input.runtime_context)
+        if runtime_section:
+            sections.append(runtime_section)
+
        execution_context = (build_input.execution_context or "").strip()
        if execution_context:
            sections.append(f"# Execution Context\n\n{execution_context}")
@ -347,6 +363,31 @@ class ContextBuilder:
            return None
        return "# Current Session\n\n" + "\n".join(rows)

+    def _render_runtime_section(self, runtime_context: RuntimeContext | None) -> str | None:
+        """Render date/time facts captured for the current model run."""
+
+        if runtime_context is None:
+            return None
+
+        rows: list[str] = []
+        if runtime_context.utc_datetime:
+            rows.append(f"Current UTC time: {runtime_context.utc_datetime}")
+        if runtime_context.local_datetime:
+            rows.append(f"Current local time: {runtime_context.local_datetime}")
+        if runtime_context.timezone:
+            rows.append(f"Local timezone: {runtime_context.timezone}")
+        if runtime_context.utc_offset:
+            rows.append(f"Local UTC offset: {runtime_context.utc_offset}")
+
+        if not rows:
+            return None
+        return (
+            "# Current Date and Time\n\n"
+            + "\n".join(rows)
+            + "\n\nUse this section as authoritative for relative date/time references such as "
+            '"today", "tomorrow", "now", "this week", and "next month".'
+        )
+
    def build_skill_activation_messages(self, activated_skills: list[SkillContext]) -> list[dict[str, str]]:
        """把已激活 skill 转成显式消息。

--- a/app-instance/backend/beaver/engine/loader.py
+++ b/app-instance/backend/beaver/engine/loader.py
@ -24,7 +24,7 @@ from beaver.skills.learning.eval import SkillDraftEvaluator
 from beaver.skills.publisher import SkillPublisher
 from beaver.skills.reviews import ReviewService
 from beaver.skills.specs import SkillSpecStore
-from beaver.tasks import TaskExecutionPlanner, TaskService, ValidationService
+from beaver.tasks import TaskExecutionPlanner, TaskService
 from beaver.tasks.skill_resolver import TaskSkillResolver
 from beaver.skills import SkillAssembler, SkillsLoader
 from beaver.tools import ObjectBackedTool, ToolAssembler, ToolExecutor, ToolRegistry
@ -91,7 +91,6 @@ class EngineLoadResult:
    task_skill_resolver: TaskSkillResolver | None = None
    task_service: TaskService | None = None
    task_execution_planner: TaskExecutionPlanner | None = None
-    validation_service: ValidationService | None = None
    mcp_manager: MCPConnectionManager | None = None
    mcp_report: dict[str, dict] = field(default_factory=dict)
    closeables: list[tuple[str, Callable[[], None]]] = field(default_factory=list, repr=False)
@ -166,7 +165,6 @@ class EngineLoader:
        task_skill_resolver: TaskSkillResolver | None = None,
        task_service: TaskService | None = None,
        task_execution_planner: TaskExecutionPlanner | None = None,
-        validation_service: ValidationService | None = None,
    ) -> None:
        self.config = config or load_config(workspace=workspace, config_path=config_path)
        configured_workspace = self.config.agents_defaults.workspace
@ -192,7 +190,6 @@ class EngineLoader:
        self._task_skill_resolver = task_skill_resolver
        self._task_service = task_service
        self._task_execution_planner = task_execution_planner
-        self._validation_service = validation_service

    def load(self) -> EngineLoadResult:
        """装配当前主链需要的最小 runtime 对象。"""
@ -276,7 +273,6 @@ class EngineLoader:
        )
        task_service = self._task_service or TaskService(workspace / "tasks")
        task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver)
-        validation_service = self._validation_service or ValidationService()
        mcp_manager = MCPConnectionManager(
            self.config.tools.mcp_servers,
            authz_config=self.config.authz,
@ -311,7 +307,6 @@ class EngineLoader:
            task_skill_resolver=task_skill_resolver,
            task_service=task_service,
            task_execution_planner=task_execution_planner,
-            validation_service=validation_service,
            mcp_manager=mcp_manager,
        )
        if self._session_manager is None:
--- a/app-instance/backend/beaver/engine/loop.py
+++ b/app-instance/backend/beaver/engine/loop.py
@ -4,12 +4,15 @@ from __future__ import annotations

 import asyncio
 import json
+import os
+import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 from uuid import uuid4
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError

-from beaver.engine.context import ContextBuildInput, SessionContext, SkillContext
+from beaver.engine.context import ContextBuildInput, RuntimeContext, SessionContext, SkillContext
 from beaver.memory.runs import RunRecord, SkillEffectRecord
 from beaver.skills.learning import RunReceiptContext
 from beaver.skills.catalog.utils import strip_frontmatter
@ -26,6 +29,17 @@ TOOL_FAILURE_GUIDANCE_PROMPT = (
    "Use available materials, state uncertainty clearly, and provide partial confirmed results."
 )

+RAW_TOOL_CALL_FALLBACK = (
+    "The run reached the configured tool-call limit before producing a reliable final answer. "
+    "The model attempted another tool call instead of answering, so the raw tool call was suppressed. "
+    "Please request a revision to continue the task."
+)
+
+_RAW_TOOL_CALL_RE = re.compile(
+    r"^\s*<tool_call\b[\s\S]*?</tool_call>\s*$|^\s*<function=[^>]+>[\s\S]*?</function>\s*$",
+    re.IGNORECASE,
+)
+

@dataclass(slots=True)
 class AgentProfile:
@ -35,8 +49,9 @@ class AgentProfile:
    system_prompt: str = ""
    default_model: str = "gpt-4.1-mini"
    max_tokens: int = 4096
+    max_context_messages: int = 1000
    temperature: float = 0.2
-    max_tool_iterations: int = 8
+    max_tool_iterations: int = 30


@dataclass(slots=True)
@ -446,7 +461,7 @@ class AgentLoop:
                *(pinned_skill_contexts or []),
                *self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []),
            ]
-            if not include_skill_assembly or thinking_enabled is False:
+            if not include_skill_assembly:
                activated_skills = self._merge_skill_contexts(pinned_skills, [])
            else:
                skill_query = skill_selection_context or task
@ -512,8 +527,6 @@ class AgentLoop:

            if not include_tools:
                selected_tool_specs = []
-            elif thinking_enabled is False:
-                selected_tool_specs = tool_registry.list_specs()
            else:
                selected_tool_specs = await tool_assembler.assemble(
                    task_description=task,
@ -543,7 +556,10 @@ class AgentLoop:

            build_input = ContextBuildInput(
                base_system_prompt=self.profile.system_prompt,
-                history=session_manager.get_history(resolved_session_id),
+                history=session_manager.get_history(
+                    resolved_session_id,
+                    max_messages=max(1, self.profile.max_context_messages),
+                ),
                current_user_input=task,
                memory_snapshot=memory_snapshot,
                activated_skills=activated_skills,
@ -554,6 +570,7 @@ class AgentLoop:
                    user_id=user_id,
                    parent_session_id=parent_session_id,
                ),
+                runtime_context=self._current_runtime_context(),
                execution_context=execution_context,
                extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT],
            )
@ -693,6 +710,7 @@ class AgentLoop:
                    tool_calls=assistant_tool_calls or None,
                    finish_reason=response.finish_reason,
                    reasoning=response.reasoning_content,
+                    context_visible=not bool(assistant_tool_calls),
                    source=source,
                    title=title,
                    model=final_model,
@ -707,7 +725,11 @@ class AgentLoop:

                if not response.has_tool_calls:
                    final_text = response.content or ""
-                    final_finish_reason = response.finish_reason or "stop"
+                    if self._looks_like_raw_tool_call(final_text):
+                        final_text = RAW_TOOL_CALL_FALLBACK
+                        final_finish_reason = "invalid_tool_call_text"
+                    else:
+                        final_finish_reason = response.finish_reason or "stop"
                    break

                if iterations >= resolved_max_tool_iterations:
@ -719,10 +741,7 @@ class AgentLoop:
                        temperature=resolved_temperature,
                        thinking_enabled=thinking_enabled,
                    )
-                    final_text = finalized or (
-                        "Tool loop stopped after reaching the configured iteration limit, "
-                        "and no final answer was produced."
-                    )
+                    final_text = finalized or RAW_TOOL_CALL_FALLBACK
                    final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations"
                    session_manager.append_message(
                        resolved_session_id,
@ -877,17 +896,14 @@ class AgentLoop:
        temperature: float,
        thinking_enabled: bool | None,
    ) -> str:
-        final_messages = [
-            *messages,
-            {
-                "role": "system",
-                "content": (
-                    "The configured tool iteration budget is exhausted. Do not call tools. "
-                    "Produce the best final answer from the existing conversation and tool results. "
-                    "State uncertainty explicitly."
-                ),
-            },
-        ]
+        final_messages = AgentLoop._with_system_guidance(
+            messages,
+            (
+                "The configured tool iteration budget is exhausted. Do not call tools. "
+                "Produce the best final answer from the existing conversation and tool results. "
+                "State uncertainty explicitly."
+            ),
+        )
        kwargs: dict[str, Any] = {
            "messages": final_messages,
            "tools": None,
@ -898,7 +914,27 @@ class AgentLoop:
        if thinking_enabled is not None:
            kwargs["thinking_enabled"] = thinking_enabled
        response = await provider.chat(**kwargs)
-        return (response.content or "").strip()
+        if response.has_tool_calls:
+            return ""
+        content = (response.content or "").strip()
+        if AgentLoop._looks_like_raw_tool_call(content):
+            return ""
+        return content
+
+    @staticmethod
+    def _looks_like_raw_tool_call(content: str | None) -> bool:
+        if not content:
+            return False
+        return bool(_RAW_TOOL_CALL_RE.match(content))
+
+    @staticmethod
+    def _with_system_guidance(messages: list[dict[str, Any]], guidance: str) -> list[dict[str, Any]]:
+        copied = [dict(message) for message in messages]
+        if copied and copied[0].get("role") == "system":
+            existing = str(copied[0].get("content") or "").strip()
+            copied[0]["content"] = "\n\n".join(part for part in (existing, guidance.strip()) if part)
+            return copied
+        return [{"role": "system", "content": guidance.strip()}, *copied]

    @staticmethod
    def _load_pinned_skill_contexts(skills_loader: Any, skill_names: list[str]) -> list[SkillContext]:
@ -1133,3 +1169,49 @@ class AgentLoop:
    @staticmethod
    def _utc_now() -> str:
        return datetime.now(timezone.utc).isoformat()
+
+    @staticmethod
+    def _current_runtime_context() -> RuntimeContext:
+        utc_now = datetime.now(timezone.utc)
+        timezone_name = AgentLoop._configured_timezone_name()
+        local_now = datetime.now().astimezone()
+        rendered_timezone = local_now.tzname()
+
+        if timezone_name:
+            try:
+                local_now = utc_now.astimezone(ZoneInfo(timezone_name))
+                rendered_timezone = timezone_name
+            except ZoneInfoNotFoundError:
+                rendered_timezone = local_now.tzname() or timezone_name
+
+        return RuntimeContext(
+            utc_datetime=utc_now.isoformat(),
+            local_datetime=local_now.isoformat(),
+            timezone=rendered_timezone,
+            utc_offset=AgentLoop._format_utc_offset(local_now),
+        )
+
+    @staticmethod
+    def _configured_timezone_name() -> str | None:
+        for value in (os.getenv("BEAVER_RUNTIME_TIMEZONE"), os.getenv("TZ")):
+            cleaned = (value or "").strip()
+            if cleaned:
+                return cleaned
+
+        try:
+            timezone_file = "/etc/timezone"
+            if os.path.exists(timezone_file):
+                with open(timezone_file, encoding="utf-8") as file:
+                    cleaned = file.read().strip()
+                if cleaned:
+                    return cleaned
+        except OSError:
+            return None
+        return None
+
+    @staticmethod
+    def _format_utc_offset(value: datetime) -> str | None:
+        raw = value.strftime("%z")
+        if not raw:
+            return None
+        return f"{raw[:3]}:{raw[3:]}"
--- a/app-instance/backend/beaver/engine/providers/litellm.py
+++ b/app-instance/backend/beaver/engine/providers/litellm.py
@ -119,13 +119,23 @@ class LiteLLMProvider(LLMProvider):
    @staticmethod
    def _sanitize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
        sanitized = []
+        system_contents: list[str] = []
        for message in messages:
            clean = {key: value for key, value in message.items() if key in _ALLOWED_MSG_KEYS}
+            if clean.get("role") == "system":
+                content = clean.get("content")
+                if isinstance(content, str) and content.strip():
+                    system_contents.append(content.strip())
+                elif content is not None:
+                    system_contents.append(str(content))
+                continue
            if clean.get("role") == "assistant" and "content" not in clean:
                clean["content"] = None
            if isinstance(clean.get("tool_calls"), list):
                clean["tool_calls"] = LiteLLMProvider._sanitize_tool_calls(clean["tool_calls"])
            sanitized.append(clean)
+        if system_contents:
+            sanitized.insert(0, {"role": "system", "content": "\n\n".join(system_contents)})
        return sanitized

    @staticmethod
--- a/app-instance/backend/beaver/engine/session/models.py
+++ b/app-instance/backend/beaver/engine/session/models.py
@ -84,8 +84,10 @@ class MessageRecord:
                payload["task_id"] = self.event_payload.get("task_id")
            if self.event_payload.get("task_status"):
                payload["task_status"] = self.event_payload.get("task_status")
-            if self.event_payload.get("validation_status"):
-                payload["validation_status"] = self.event_payload.get("validation_status")
+            if self.event_payload.get("evidence_status"):
+                payload["evidence_status"] = self.event_payload.get("evidence_status")
+            if self.event_payload.get("acceptance_state"):
+                payload["acceptance_state"] = self.event_payload.get("acceptance_state")
            if self.event_payload.get("feedback_state"):
                payload["feedback_state"] = self.event_payload.get("feedback_state")
            if self.event_payload.get("feedback_error"):