```

feat(learning): 添加技能学习候选者合成锁定机制添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景，确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis 方法来原子性地锁定候选者进行合成。 fix(web): 为技能草案创建端点添加适当的HTTP状态码当草案没有变化或正在合成时，现在正确返回409状态码而不是内部错误。 feat(skills): 实现技能修订内容比较以检测无变化情况添加了 _is_noop_revision 方法来比较基础技能和提议的修订，如果内容没有实际变化则抛出 NoDraftChanges 异常。 refactor(process): 修复任务证据记录后根运行状态更新逻辑将任务证据记录事件后的状态从 waiting 更改为 done，并设置 finished_at 时间戳。 feat(tools): 防止在同一运行中重复执行外部写入操作为邮件发送、日历创建等外部写入工具添加去重机制，避免重复的外部操作。 test: 添加技能学习和工具执行的单元测试增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。 ```
2026-06-16 15:58:42 +08:00
parent f07ce019fe
commit 83d9d8c200
15 changed files with 615 additions and 29 deletions
--- a/app-instance/backend/tests/unit/test_create_instance_script.py
+++ b/app-instance/backend/tests/unit/test_create_instance_script.py
@ -0,0 +1,69 @@
+import json
+import os
+import subprocess
+from pathlib import Path
+
+
+def test_create_instance_writes_default_max_tool_iterations(tmp_path) -> None:
+    app_instance_dir = Path(__file__).resolve().parents[3]
+    fake_bin = tmp_path / "bin"
+    fake_bin.mkdir()
+    docker = fake_bin / "docker"
+    docker.write_text(
+        """#!/usr/bin/env bash
+set -euo pipefail
+case "${1:-}" in
+  image)
+    [[ "${2:-}" == "inspect" ]]
+    exit 0
+    ;;
+  container)
+    [[ "${2:-}" == "inspect" ]]
+    exit 1
+    ;;
+  run)
+    exit 0
+    ;;
+  *)
+    echo "unexpected docker command: $*" >&2
+    exit 1
+    ;;
+esac
+""",
+        encoding="utf-8",
+    )
+    docker.chmod(0o755)
+
+    env = os.environ.copy()
+    env["PATH"] = f"{fake_bin}:{env['PATH']}"
+    instances_root = tmp_path / "instances"
+    result = subprocess.run(
+        [
+            str(app_instance_dir / "create-instance.sh"),
+            "--instance-id",
+            "default-tools",
+            "--auth-username",
+            "steven",
+            "--auth-password",
+            "secret",
+            "--skip-provider-config",
+            "--host-port",
+            "29001",
+            "--instances-root",
+            str(instances_root),
+            "--registry",
+            str(tmp_path / "registry.json"),
+            "--skip-initial-skills",
+        ],
+        cwd=app_instance_dir,
+        env=env,
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+
+    assert result.returncode == 0, result.stderr
+    config_path = instances_root / "default-tools" / "beaver-home" / "config.json"
+    config = json.loads(config_path.read_text(encoding="utf-8"))
+
+    assert config["agents"]["defaults"]["maxToolIterations"] == 100
--- a/app-instance/backend/tests/unit/test_process_projection.py
+++ b/app-instance/backend/tests/unit/test_process_projection.py
@ -363,6 +363,52 @@ def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -
    assert tool_result["metadata"]["success"] is True


+def test_process_projection_marks_root_done_when_result_is_ready(tmp_path: Path) -> None:
+    session = SessionManager(tmp_path)
+    run_store = RunMemoryStore(tmp_path / "memory" / "runs")
+    run_store.append_run_record(
+        RunRecord(
+            run_id="main-run",
+            session_id="web:test",
+            task_id="task-1",
+            attempt_index=1,
+            task_text="send email",
+            started_at="2026-01-01T00:00:03+00:00",
+            ended_at="2026-01-01T00:00:04+00:00",
+            success=True,
+            finish_reason="stop",
+        )
+    )
+    session.append_message(
+        "web:test",
+        role="system",
+        event_type="task_execution_planned",
+        event_payload={"task_id": "task-1", "attempt_index": 1, "plan_mode": "single", "strategy": "single"},
+        context_visible=False,
+    )
+    session.append_message(
+        "web:test",
+        role="system",
+        event_type="task_synthesis_completed",
+        event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
+        context_visible=False,
+    )
+    session.append_message(
+        "web:test",
+        run_id="main-run",
+        role="system",
+        event_type="task_evidence_recorded",
+        event_payload={"task_id": "task-1", "attempt_index": 1, "evidence_status": "recorded"},
+        context_visible=False,
+    )
+
+    projection = SessionProcessProjector(session, run_store).project("web:test")
+
+    root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
+    assert root_run["status"] == "done"
+    assert root_run["finished_at"] is not None
+
+
 def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
    session = SessionManager(tmp_path)
    run_store = RunMemoryStore(tmp_path / "memory" / "runs")
--- a/app-instance/backend/tests/unit/test_skill_learning_worker.py
+++ b/app-instance/backend/tests/unit/test_skill_learning_worker.py
@ -5,6 +5,8 @@ import json
 from pathlib import Path
 from types import SimpleNamespace

+import pytest
+
 from beaver.engine.providers.base import LLMProvider, LLMResponse
 from beaver.engine.providers.factory import ProviderBundle
 from beaver.engine.session import SessionManager
@ -13,6 +15,8 @@ from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
 from beaver.skills.authoring.format import is_canonical_skill_body
 from beaver.skills.drafts import DraftService
 from beaver.skills.learning import (
+    DraftHasNoChanges,
+    DraftSynthesisInProgress,
    EvidenceSelector,
    SkillDraftSynthesizer,
    SkillLearningPipelineService,
@ -22,7 +26,7 @@ from beaver.skills.learning import (
 )
 from beaver.skills.publisher import SkillPublisher
 from beaver.skills.reviews import ReviewService
-from beaver.skills.specs import SkillSpecStore
+from beaver.skills.specs import SkillSpecStore, SkillVersion


 class JsonProvider(LLMProvider):
@ -44,6 +48,20 @@ class JsonProvider(LLMProvider):
        return "stub"


+class BlockingJsonProvider(JsonProvider):
+    def __init__(self, *, started: asyncio.Event, release: asyncio.Event) -> None:
+        super().__init__()
+        self.started = started
+        self.release = release
+        self.calls = 0
+
+    async def chat(self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
+        self.calls += 1
+        self.started.set()
+        await self.release.wait()
+        return await super().chat(messages, tools=tools, model=model, max_tokens=max_tokens, temperature=temperature)
+
+
 def _bundle(provider: LLMProvider) -> ProviderBundle:
    runtime = SimpleNamespace(model="stub", provider_name="stub")
    return ProviderBundle(main_runtime=runtime, main_provider=provider)  # type: ignore[arg-type]
@ -120,6 +138,69 @@ def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
    )


+def _revision_pipeline(tmp_path: Path, content: str, frontmatter: dict) -> SkillLearningPipelineService:
+    spec_store = SkillSpecStore(tmp_path)
+    spec_store.write_skill_version(
+        SkillVersion(
+            skill_name="web-operation",
+            version="v0001",
+            content_hash="hash-v1",
+            summary_hash="summary-v1",
+            created_at="2026-06-01T00:00:00+00:00",
+            created_by="test",
+            change_reason="initial",
+            parent_version=None,
+            review_state="published",
+            frontmatter=frontmatter,
+            summary="web operation",
+            tool_hints=list(frontmatter.get("tools") or []),
+        ),
+        content,
+    )
+    spec_store.set_current_version("web-operation", "v0001")
+    run_store = RunMemoryStore(tmp_path / "memory" / "runs")
+    learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
+    run_store.append_run_record(
+        RunRecord(
+            run_id="run-1",
+            session_id="session-1",
+            task_text="check detailed weather",
+            started_at="start",
+            ended_at="end",
+            success=True,
+            finish_reason="stop",
+        )
+    )
+    learning_store.record_learning_candidate(
+        SkillLearningCandidate(
+            candidate_id="candidate-revision",
+            kind="revise_skill",
+            source_run_ids=["run-1"],
+            source_session_ids=["session-1"],
+            related_skill_names=["web-operation"],
+            reason="revise web guidance",
+            evidence={"skill_version": "v0001"},
+            priority=10,
+            confidence=0.9,
+        )
+    )
+    draft_service = DraftService(spec_store)
+    learning_service = SkillLearningService(
+        run_store=run_store,
+        learning_store=learning_store,
+        draft_service=draft_service,
+        evidence_selector=EvidenceSelector(run_store),
+        synthesizer=SkillDraftSynthesizer(),
+    )
+    return SkillLearningPipelineService(
+        learning_store=learning_store,
+        learning_service=learning_service,
+        draft_service=draft_service,
+        review_service=ReviewService(spec_store),
+        publisher=SkillPublisher(spec_store),
+    )
+
+
 def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> None:
    pipeline = _pipeline(tmp_path)
    worker = SkillLearningWorker(
@ -137,6 +218,104 @@ def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> No
    assert pipeline.list_drafts(candidate.draft_skill_name)[0].status == "draft"


+def test_concurrent_draft_synthesis_is_claimed_once(tmp_path: Path) -> None:
+    pipeline = _pipeline(tmp_path)
+
+    async def scenario():
+        started = asyncio.Event()
+        release = asyncio.Event()
+        provider = BlockingJsonProvider(started=started, release=release)
+        first = asyncio.create_task(
+            pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(provider))
+        )
+        await asyncio.wait_for(started.wait(), timeout=1)
+        with pytest.raises(DraftSynthesisInProgress):
+            await pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider()))
+        release.set()
+        return await first, provider
+
+    draft, provider = asyncio.run(scenario())
+    candidate = pipeline.get_candidate("candidate-1")
+
+    assert provider.calls == 1
+    assert candidate.status == "draft_ready"
+    assert candidate.draft_id == draft.draft_id
+    assert len(pipeline.list_drafts(candidate.draft_skill_name)) == 1
+
+
+def test_existing_draft_synthesis_request_returns_same_draft(tmp_path: Path) -> None:
+    pipeline = _pipeline(tmp_path)
+    first = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider())))
+    second = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider(fail=True))))
+
+    assert second.draft_id == first.draft_id
+    assert len(pipeline.list_drafts(first.skill_name)) == 1
+
+
+def test_revision_synthesis_with_no_content_changes_supersedes_candidate(tmp_path: Path) -> None:
+    content = (
+        "---\n"
+        "name: web-operation\n"
+        "description: Web search and fetch.\n"
+        "tools:\n"
+        "  - web_fetch\n"
+        "  - web_search\n"
+        "---\n"
+        "\n"
+        "# Web Operation\n"
+        "\n"
+        "## Overview\n"
+        "\n"
+        "Web search and fetch.\n"
+        "\n"
+        "## When to Use\n"
+        "\n"
+        "- Use when web information is required.\n"
+        "\n"
+        "## Required Tools\n"
+        "\n"
+        "- `web_fetch`\n"
+        "- `web_search`\n"
+        "\n"
+        "## Workflow\n"
+        "\n"
+        "- Use web_search, then web_fetch.\n"
+        "\n"
+        "## Validation\n"
+        "\n"
+        "- Verify sources.\n"
+        "\n"
+        "## Boundaries\n"
+        "\n"
+        "- Stay within the request.\n"
+        "\n"
+        "## Anti-Patterns\n"
+        "\n"
+        "- Do not cite unsupported claims.\n"
+    )
+    frontmatter = {
+        "name": "web-operation",
+        "description": "Web search and fetch.",
+        "tools": ["web_fetch", "web_search"],
+    }
+    pipeline = _revision_pipeline(tmp_path, content, frontmatter)
+    provider = JsonProvider(
+        payload={
+            "frontmatter": frontmatter,
+            "content": content,
+            "change_reason": "No changes are required.",
+        }
+    )
+
+    with pytest.raises(DraftHasNoChanges):
+        asyncio.run(pipeline.synthesize_draft("candidate-revision", provider_bundle=_bundle(provider)))
+    candidate = pipeline.get_candidate("candidate-revision")
+
+    assert candidate.status == "superseded"
+    assert "no changes" in (candidate.last_error or "").lower()
+    assert pipeline.list_drafts("web-operation") == []
+
+
 def test_worker_evaluates_draft_with_replay_runner_when_available(tmp_path: Path) -> None:
    pipeline = _pipeline(tmp_path)
    replay_runner = FakeReplayRunner()
--- a/app-instance/backend/tests/unit/test_tool_assembler.py
+++ b/app-instance/backend/tests/unit/test_tool_assembler.py
@ -28,12 +28,14 @@ class DummyTool(BaseTool):
            toolset=toolset,
            always_available=always_available,
        )
+        self.calls: list[dict] = []

    @property
    def spec(self) -> ToolSpec:
        return self._spec

    async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
+        self.calls.append(dict(arguments))
        return ToolResult(success=True, content="ok", tool_name=self.spec.name)


@ -198,3 +200,30 @@ def test_tool_executor_parses_object_tool_call_string_arguments() -> None:

    assert name == "echo"
    assert arguments == {"text": "hello"}
+
+
+def test_tool_executor_suppresses_duplicate_external_write_in_same_run() -> None:
+    registry = ToolRegistry()
+    send_tool = DummyTool("mcp_outlook_mcp_mail_send_email", toolset="mcp")
+    registry.register(send_tool)
+    executor = ToolExecutor(registry)
+    context = ToolContext(
+        metadata={
+            "task_id": "task-1",
+            "run_id": "run-1",
+        }
+    )
+    arguments = {
+        "to_recipients": ["jay.chen@boardware.com"],
+        "subject": "请回复今天下午的日程安排",
+        "body": "Hi Jay",
+    }
+
+    first = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", arguments, context=context))
+    second = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", dict(arguments), context=context))
+
+    assert first.success is True
+    assert second.success is True
+    assert second.error == "duplicate_external_write_suppressed"
+    assert "Duplicate external write suppressed" in second.content
+    assert len(send_tool.calls) == 1