feat(learning): 添加技能学习候选者合成锁定机制

添加了 DraftSynthesisInProgress 和 DraftHasNoChanges 异常来处理并发场景,
确保同一技能学习候选者的合成过程不会重复执行。实现了 claim_learning_candidate_for_synthesis
方法来原子性地锁定候选者进行合成。

fix(web): 为技能草案创建端点添加适当的HTTP状态码

当草案没有变化或正在合成时,现在正确返回409状态码而不是内部错误。

feat(skills): 实现技能修订内容比较以检测无变化情况

添加了 _is_noop_revision 方法来比较基础技能和提议的修订,
如果内容没有实际变化则抛出 NoDraftChanges 异常。

refactor(process): 修复任务证据记录后根运行状态更新逻辑

将任务证据记录事件后的状态从 waiting 更改为 done,并设置 finished_at 时间戳。

feat(tools): 防止在同一运行中重复执行外部写入操作

为邮件发送、日历创建等外部写入工具添加去重机制,避免重复的外部操作。

test: 添加技能学习和工具执行的单元测试

增加测试用例验证并发草案合成、重复外部写入抑制和无变化修订检测等功能。
```
This commit is contained in:
2026-06-16 15:58:42 +08:00
parent f07ce019fe
commit 83d9d8c200
15 changed files with 615 additions and 29 deletions

View File

@ -52,7 +52,13 @@ from beaver.services.user_file_resolver import (
)
from beaver.skills.authoring import canonical_skill_format_instructions, ensure_canonical_skill_body, normalize_skill_frontmatter
from beaver.skills.authoring.format import parse_skill_rewrite_json
from beaver.skills.learning import SkillLearningService, SkillLearningWorker, SkillLearningWorkerConfig
from beaver.skills.learning import (
DraftHasNoChanges,
DraftSynthesisInProgress,
SkillLearningService,
SkillLearningWorker,
SkillLearningWorkerConfig,
)
from beaver.skills.learning.replay import ReplayRunner
from beaver.skills.catalog.utils import extract_required_tool_names, parse_frontmatter
@ -2236,6 +2242,10 @@ def create_app(
candidate_id,
provider_bundle=provider_bundle,
)
except DraftHasNoChanges as exc:
raise HTTPException(status_code=409, detail=str(exc)) from exc
except DraftSynthesisInProgress as exc:
raise HTTPException(status_code=409, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)
@ -2251,6 +2261,10 @@ def create_app(
candidate_id,
provider_bundle=provider_bundle,
)
except DraftHasNoChanges as exc:
raise HTTPException(status_code=409, detail=str(exc)) from exc
except DraftSynthesisInProgress as exc:
raise HTTPException(status_code=409, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _skill_draft_payload(loaded, draft.skill_name, draft.draft_id)

View File

@ -114,6 +114,52 @@ class SkillLearningStore:
)
return updated
def claim_learning_candidate_for_synthesis(
self,
candidate_id: str,
*,
force: bool = False,
) -> SkillLearningCandidate | None:
"""Atomically claim a candidate before the expensive draft synthesis step."""
with self._locked():
candidates = self.list_learning_candidates()
claimed: SkillLearningCandidate | None = None
for index, candidate in enumerate(candidates):
if candidate.candidate_id != candidate_id:
continue
if candidate.status in {"queued", "synthesizing"}:
return None
if not force and candidate.draft_skill_name and candidate.draft_id:
return None
payload = candidate.to_dict()
payload.update(
{
"status": "synthesizing",
"last_error": None,
"updated_at": _utc_now(),
}
)
claimed = SkillLearningCandidate.from_dict(payload)
candidates[index] = claimed
break
if claimed is None:
return None
self.candidates_path.parent.mkdir(parents=True, exist_ok=True)
self.candidates_path.write_text(
"".join(
json.dumps(candidate.to_dict(), ensure_ascii=False, sort_keys=True) + "\n"
for candidate in candidates
),
encoding="utf-8",
)
self.append_audit_event(
candidate_id,
"draft_synthesis_started",
{"status": "synthesizing", "force": force},
)
return claimed
def list_learning_candidates(self, status: str | None = None) -> list[SkillLearningCandidate]:
results: list[SkillLearningCandidate] = []
for payload in self._read_jsonl(self.candidates_path):

View File

@ -351,8 +351,8 @@ class SessionProcessProjector:
)
elif record.event_type == "task_evidence_recorded":
root["status"] = "waiting"
root["finished_at"] = None
root["status"] = "done"
root["finished_at"] = created_at
add_event(
event_id=_event_id(record, "evidence"),
run_id=record.run_id or root_run_id,

View File

@ -9,7 +9,7 @@ from .missing_skill import (
MissingSkillDraftResult,
MissingSkillSynthesizer,
)
from .pipeline import SkillLearningPipelineService
from .pipeline import DraftHasNoChanges, DraftSynthesisInProgress, SkillLearningPipelineService
from .preservation import check_preservation
from .replay import ReplayArmRequest, ReplayRunner, ReplayToolExecutor, ReplayToolPolicy, classify_tool_mode
from .service import RunReceiptContext, SkillLearningService
@ -27,6 +27,8 @@ __all__ = [
"MissingSkillDraftResult",
"MissingSkillSynthesizer",
"RunReceiptContext",
"DraftHasNoChanges",
"DraftSynthesisInProgress",
"SkillLearningPipelineService",
"check_preservation",
"ReplayToolExecutor",

View File

@ -9,7 +9,7 @@ from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, S
from beaver.skills.drafts import DraftService
from beaver.skills.learning.eval import SkillDraftEvaluator
from beaver.skills.learning.replay import ReplayRunner
from beaver.skills.learning.service import SkillLearningService
from beaver.skills.learning.service import NoDraftChanges, SkillLearningService
from beaver.skills.learning.safety import SkillDraftSafetyChecker
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
@ -22,6 +22,14 @@ _REJECTABLE_DRAFT_STATUSES = {
}
class DraftSynthesisInProgress(RuntimeError):
"""Raised when another request already claimed the candidate for synthesis."""
class DraftHasNoChanges(RuntimeError):
"""Raised when synthesis produced no effective changes from the base skill."""
class SkillLearningPipelineService:
"""Coordinates candidate -> draft -> review -> publish lifecycle."""
@ -60,8 +68,23 @@ class SkillLearningPipelineService:
candidate_id: str,
*,
provider_bundle: ProviderBundle,
force: bool = False,
) -> SkillDraft:
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
if not force:
existing = self._draft_for_candidate(candidate_id)
if existing is not None:
return existing
claimed = self.learning_store.claim_learning_candidate_for_synthesis(candidate_id, force=force)
if claimed is None:
existing = self._draft_for_candidate(candidate_id)
if existing is not None:
return existing
raise DraftSynthesisInProgress(f"Draft synthesis is already in progress for candidate: {candidate_id}")
try:
draft = await self.learning_service.synthesize_draft(candidate_id, provider_bundle)
except NoDraftChanges as exc:
self.mark_candidate_superseded(candidate_id, str(exc))
raise DraftHasNoChanges(str(exc)) from exc
self.mark_draft_synthesized(candidate_id, draft)
return draft
@ -71,13 +94,7 @@ class SkillLearningPipelineService:
*,
provider_bundle: ProviderBundle,
) -> SkillDraft:
self.learning_store.transition_learning_candidate(
candidate_id,
"synthesizing",
event_type="draft_synthesis_started",
last_error=None,
)
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle)
return await self.synthesize_draft(candidate_id, provider_bundle=provider_bundle, force=True)
def mark_candidate_queued(self, candidate_id: str) -> SkillLearningCandidate:
return self._require_updated(
@ -162,6 +179,12 @@ class SkillLearningPipelineService:
raise ValueError(f"Draft not found: {skill_name}/{draft_id}")
return draft
def _draft_for_candidate(self, candidate_id: str) -> SkillDraft | None:
candidate = self.get_candidate(candidate_id)
if not candidate.draft_skill_name or not candidate.draft_id:
return None
return self.draft_service.get_draft(candidate.draft_skill_name, candidate.draft_id)
def submit_review(
self,
skill_name: str,

View File

@ -20,8 +20,9 @@ from beaver.plugins.tree_merge import merge_supporting_file_trees
from beaver.skills.drafts.service import DraftService
from beaver.skills.learning.evidence import EvidencePacket, EvidenceSelector
from beaver.skills.learning.synthesizer import SkillDraftSynthesizer
from beaver.skills.catalog.utils import parse_frontmatter
from beaver.skills.catalog.utils import parse_frontmatter, strip_frontmatter
from beaver.skills.specs import SkillActivationReceipt
from beaver.skills.specs.serialization import normalize_frontmatter
@dataclass(slots=True)
@ -30,6 +31,10 @@ class RunReceiptContext:
effect_records: list[SkillEffectRecord] = field(default_factory=list)
class NoDraftChanges(ValueError):
"""Raised when synthesis produces the same effective skill content as the base version."""
class SkillLearningService:
def __init__(
self,
@ -231,13 +236,18 @@ class SkillLearningService:
)
target_skill = candidate.related_skill_names[0]
base_version = candidate.evidence.get("skill_version")
base_skill = self._base_skill_snapshot(target_skill, base_version)
payload = await self.synthesizer.synthesize_revision(
candidate,
packet,
provider,
model,
base_skill=self._base_skill_snapshot(target_skill, base_version),
base_skill=base_skill,
)
if self._is_noop_revision(payload, base_skill):
raise NoDraftChanges(
f"Synthesis produced no changes for {target_skill}/{base_version or 'current'}"
)
return self.draft_service.create_revision_draft(
skill_name=target_skill,
base_version=base_version,
@ -340,6 +350,16 @@ class SkillLearningService:
"tool_hints": list(loaded.version.tool_hints),
}
@staticmethod
def _is_noop_revision(payload: dict[str, Any], base_skill: dict[str, Any] | None) -> bool:
if base_skill is None:
return False
base_frontmatter = normalize_frontmatter(dict(base_skill.get("frontmatter") or {}))
proposed_frontmatter = normalize_frontmatter(dict(payload.get("frontmatter") or {}))
base_body = _normalize_skill_body(str(base_skill.get("content") or ""))
proposed_body = _normalize_skill_body(str(payload.get("content") or ""))
return base_frontmatter == proposed_frontmatter and base_body == proposed_body
def _merged_base_skill_snapshot(self, skill_names: list[str]) -> dict[str, Any] | None:
snapshots = [
snapshot
@ -602,6 +622,10 @@ class SkillLearningService:
return parsed.astimezone(timezone.utc)
def _normalize_skill_body(content: str) -> str:
return "\n".join(line.rstrip() for line in strip_frontmatter(content).strip().splitlines()).strip()
def _digest_map(root: Path) -> dict[str, dict[str, Any]]:
digest = hash_plugin_skill_tree(root)
return {

View File

@ -9,7 +9,7 @@ from typing import Callable
from beaver.engine.providers import ProviderBundle
from beaver.memory.skills import SkillLearningCandidate
from beaver.skills.learning.pipeline import SkillLearningPipelineService
from beaver.skills.learning.pipeline import DraftHasNoChanges, SkillLearningPipelineService
from beaver.skills.learning.replay import ReplayRunner
@ -114,13 +114,13 @@ class SkillLearningWorker:
if self._has_active_draft(candidate):
self.pipeline.mark_candidate_superseded(candidate.candidate_id, "active draft already exists for this skill")
return False
self.pipeline.mark_candidate_queued(candidate.candidate_id)
self.pipeline.mark_candidate_synthesizing(candidate.candidate_id)
draft = await self.pipeline.synthesize_draft(
candidate.candidate_id,
provider_bundle=self.provider_bundle_factory(),
)
self.pipeline.mark_draft_synthesized(candidate.candidate_id, draft)
try:
draft = await self.pipeline.synthesize_draft(
candidate.candidate_id,
provider_bundle=self.provider_bundle_factory(),
)
except DraftHasNoChanges:
return False
safety = self.pipeline.check_safety(draft.skill_name, draft.draft_id)
if not safety.passed or safety.risk_level == "critical":
return True

View File

@ -11,6 +11,7 @@
from __future__ import annotations
import hashlib
import json
from typing import TYPE_CHECKING, Any
@ -44,7 +45,45 @@ class ToolExecutor:
tool_name=tool_name,
error="tool_not_found",
)
return await tool.invoke(arguments or {}, context or ToolContext())
normalized_arguments = dict(arguments or {})
tool_context = context or ToolContext()
write_key = _external_write_key(tool_name, normalized_arguments)
if write_key is None:
return await tool.invoke(normalized_arguments, tool_context)
external_writes = _external_write_state(tool_context)
previous = external_writes.get(write_key)
if previous is not None:
previous_content = str(previous.get("content") or "").strip()
detail = f" Previous result: {previous_content}" if previous_content else ""
return ToolResult(
success=True,
content=(
f"Duplicate external write suppressed for {tool_name}. "
"A matching write was already attempted in this run."
f"{detail}"
),
tool_name=tool_name,
error="duplicate_external_write_suppressed",
raw_output={"duplicate": True, "previous": previous},
)
external_writes[write_key] = {
"tool_name": tool_name,
"arguments": normalized_arguments,
"status": "attempted",
"content": "",
"error": None,
}
result = await tool.invoke(normalized_arguments, tool_context)
external_writes[write_key] = {
"tool_name": tool_name,
"arguments": normalized_arguments,
"status": "done" if result.success else "error",
"content": result.content,
"error": result.error,
}
return result
async def execute_tool_call(
self,
@ -115,3 +154,42 @@ class ToolExecutor:
if tool_call.get("name"):
return str(tool_call["name"])
return "unknown"
_EXTERNAL_WRITE_TOOL_TERMS = (
"mail_send_email",
"mail_reply_to_message",
"mail_forward_message",
"mail_move_message",
"calendar_create_event",
"calendar_update_event",
)
def _external_write_state(context: ToolContext) -> dict[str, dict[str, Any]]:
state = context.metadata.setdefault("external_write_attempts", {})
if not isinstance(state, dict):
state = {}
context.metadata["external_write_attempts"] = state
return state
def _external_write_key(tool_name: str, arguments: dict[str, Any]) -> str | None:
lowered = tool_name.lower()
if not any(term in lowered for term in _EXTERNAL_WRITE_TOOL_TERMS):
return None
payload = json.dumps(_normalize_for_key(arguments), ensure_ascii=False, sort_keys=True, separators=(",", ":"))
digest = hashlib.sha256(payload.encode("utf-8")).hexdigest()
return f"{lowered}:{digest}"
def _normalize_for_key(value: Any) -> Any:
if isinstance(value, dict):
return {str(key): _normalize_for_key(value[key]) for key in sorted(value, key=str)}
if isinstance(value, list):
return [_normalize_for_key(item) for item in value]
if isinstance(value, tuple):
return [_normalize_for_key(item) for item in value]
if isinstance(value, (str, int, float, bool)) or value is None:
return value
return str(value)

View File

@ -0,0 +1,69 @@
import json
import os
import subprocess
from pathlib import Path
def test_create_instance_writes_default_max_tool_iterations(tmp_path) -> None:
app_instance_dir = Path(__file__).resolve().parents[3]
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
docker = fake_bin / "docker"
docker.write_text(
"""#!/usr/bin/env bash
set -euo pipefail
case "${1:-}" in
image)
[[ "${2:-}" == "inspect" ]]
exit 0
;;
container)
[[ "${2:-}" == "inspect" ]]
exit 1
;;
run)
exit 0
;;
*)
echo "unexpected docker command: $*" >&2
exit 1
;;
esac
""",
encoding="utf-8",
)
docker.chmod(0o755)
env = os.environ.copy()
env["PATH"] = f"{fake_bin}:{env['PATH']}"
instances_root = tmp_path / "instances"
result = subprocess.run(
[
str(app_instance_dir / "create-instance.sh"),
"--instance-id",
"default-tools",
"--auth-username",
"steven",
"--auth-password",
"secret",
"--skip-provider-config",
"--host-port",
"29001",
"--instances-root",
str(instances_root),
"--registry",
str(tmp_path / "registry.json"),
"--skip-initial-skills",
],
cwd=app_instance_dir,
env=env,
text=True,
capture_output=True,
check=False,
)
assert result.returncode == 0, result.stderr
config_path = instances_root / "default-tools" / "beaver-home" / "config.json"
config = json.loads(config_path.read_text(encoding="utf-8"))
assert config["agents"]["defaults"]["maxToolIterations"] == 100

View File

@ -363,6 +363,52 @@ def test_process_projection_emits_tool_cards_from_run_messages(tmp_path: Path) -
assert tool_result["metadata"]["success"] is True
def test_process_projection_marks_root_done_when_result_is_ready(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="main-run",
session_id="web:test",
task_id="task-1",
attempt_index=1,
task_text="send email",
started_at="2026-01-01T00:00:03+00:00",
ended_at="2026-01-01T00:00:04+00:00",
success=True,
finish_reason="stop",
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={"task_id": "task-1", "attempt_index": 1, "plan_mode": "single", "strategy": "single"},
context_visible=False,
)
session.append_message(
"web:test",
role="system",
event_type="task_synthesis_completed",
event_payload={"task_id": "task-1", "attempt_index": 1, "main_run_id": "main-run"},
context_visible=False,
)
session.append_message(
"web:test",
run_id="main-run",
role="system",
event_type="task_evidence_recorded",
event_payload={"task_id": "task-1", "attempt_index": 1, "evidence_status": "recorded"},
context_visible=False,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
root_run = next(run for run in projection["runs"] if run["run_id"] == "task:task-1:attempt:1")
assert root_run["status"] == "done"
assert root_run["finished_at"] is not None
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")

View File

@ -5,6 +5,8 @@ import json
from pathlib import Path
from types import SimpleNamespace
import pytest
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.engine.session import SessionManager
@ -13,6 +15,8 @@ from beaver.memory.skills import SkillLearningCandidate, SkillLearningStore
from beaver.skills.authoring.format import is_canonical_skill_body
from beaver.skills.drafts import DraftService
from beaver.skills.learning import (
DraftHasNoChanges,
DraftSynthesisInProgress,
EvidenceSelector,
SkillDraftSynthesizer,
SkillLearningPipelineService,
@ -22,7 +26,7 @@ from beaver.skills.learning import (
)
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
from beaver.skills.specs import SkillSpecStore, SkillVersion
class JsonProvider(LLMProvider):
@ -44,6 +48,20 @@ class JsonProvider(LLMProvider):
return "stub"
class BlockingJsonProvider(JsonProvider):
def __init__(self, *, started: asyncio.Event, release: asyncio.Event) -> None:
super().__init__()
self.started = started
self.release = release
self.calls = 0
async def chat(self, messages: list[dict], tools: list[dict] | None = None, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
self.calls += 1
self.started.set()
await self.release.wait()
return await super().chat(messages, tools=tools, model=model, max_tokens=max_tokens, temperature=temperature)
def _bundle(provider: LLMProvider) -> ProviderBundle:
runtime = SimpleNamespace(model="stub", provider_name="stub")
return ProviderBundle(main_runtime=runtime, main_provider=provider) # type: ignore[arg-type]
@ -120,6 +138,69 @@ def _pipeline(tmp_path: Path) -> SkillLearningPipelineService:
)
def _revision_pipeline(tmp_path: Path, content: str, frontmatter: dict) -> SkillLearningPipelineService:
spec_store = SkillSpecStore(tmp_path)
spec_store.write_skill_version(
SkillVersion(
skill_name="web-operation",
version="v0001",
content_hash="hash-v1",
summary_hash="summary-v1",
created_at="2026-06-01T00:00:00+00:00",
created_by="test",
change_reason="initial",
parent_version=None,
review_state="published",
frontmatter=frontmatter,
summary="web operation",
tool_hints=list(frontmatter.get("tools") or []),
),
content,
)
spec_store.set_current_version("web-operation", "v0001")
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
run_store.append_run_record(
RunRecord(
run_id="run-1",
session_id="session-1",
task_text="check detailed weather",
started_at="start",
ended_at="end",
success=True,
finish_reason="stop",
)
)
learning_store.record_learning_candidate(
SkillLearningCandidate(
candidate_id="candidate-revision",
kind="revise_skill",
source_run_ids=["run-1"],
source_session_ids=["session-1"],
related_skill_names=["web-operation"],
reason="revise web guidance",
evidence={"skill_version": "v0001"},
priority=10,
confidence=0.9,
)
)
draft_service = DraftService(spec_store)
learning_service = SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=draft_service,
evidence_selector=EvidenceSelector(run_store),
synthesizer=SkillDraftSynthesizer(),
)
return SkillLearningPipelineService(
learning_store=learning_store,
learning_service=learning_service,
draft_service=draft_service,
review_service=ReviewService(spec_store),
publisher=SkillPublisher(spec_store),
)
def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
worker = SkillLearningWorker(
@ -137,6 +218,104 @@ def test_worker_synthesizes_open_candidate_without_publish(tmp_path: Path) -> No
assert pipeline.list_drafts(candidate.draft_skill_name)[0].status == "draft"
def test_concurrent_draft_synthesis_is_claimed_once(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
async def scenario():
started = asyncio.Event()
release = asyncio.Event()
provider = BlockingJsonProvider(started=started, release=release)
first = asyncio.create_task(
pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(provider))
)
await asyncio.wait_for(started.wait(), timeout=1)
with pytest.raises(DraftSynthesisInProgress):
await pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider()))
release.set()
return await first, provider
draft, provider = asyncio.run(scenario())
candidate = pipeline.get_candidate("candidate-1")
assert provider.calls == 1
assert candidate.status == "draft_ready"
assert candidate.draft_id == draft.draft_id
assert len(pipeline.list_drafts(candidate.draft_skill_name)) == 1
def test_existing_draft_synthesis_request_returns_same_draft(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
first = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider())))
second = asyncio.run(pipeline.synthesize_draft("candidate-1", provider_bundle=_bundle(JsonProvider(fail=True))))
assert second.draft_id == first.draft_id
assert len(pipeline.list_drafts(first.skill_name)) == 1
def test_revision_synthesis_with_no_content_changes_supersedes_candidate(tmp_path: Path) -> None:
content = (
"---\n"
"name: web-operation\n"
"description: Web search and fetch.\n"
"tools:\n"
" - web_fetch\n"
" - web_search\n"
"---\n"
"\n"
"# Web Operation\n"
"\n"
"## Overview\n"
"\n"
"Web search and fetch.\n"
"\n"
"## When to Use\n"
"\n"
"- Use when web information is required.\n"
"\n"
"## Required Tools\n"
"\n"
"- `web_fetch`\n"
"- `web_search`\n"
"\n"
"## Workflow\n"
"\n"
"- Use web_search, then web_fetch.\n"
"\n"
"## Validation\n"
"\n"
"- Verify sources.\n"
"\n"
"## Boundaries\n"
"\n"
"- Stay within the request.\n"
"\n"
"## Anti-Patterns\n"
"\n"
"- Do not cite unsupported claims.\n"
)
frontmatter = {
"name": "web-operation",
"description": "Web search and fetch.",
"tools": ["web_fetch", "web_search"],
}
pipeline = _revision_pipeline(tmp_path, content, frontmatter)
provider = JsonProvider(
payload={
"frontmatter": frontmatter,
"content": content,
"change_reason": "No changes are required.",
}
)
with pytest.raises(DraftHasNoChanges):
asyncio.run(pipeline.synthesize_draft("candidate-revision", provider_bundle=_bundle(provider)))
candidate = pipeline.get_candidate("candidate-revision")
assert candidate.status == "superseded"
assert "no changes" in (candidate.last_error or "").lower()
assert pipeline.list_drafts("web-operation") == []
def test_worker_evaluates_draft_with_replay_runner_when_available(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
replay_runner = FakeReplayRunner()

View File

@ -28,12 +28,14 @@ class DummyTool(BaseTool):
toolset=toolset,
always_available=always_available,
)
self.calls: list[dict] = []
@property
def spec(self) -> ToolSpec:
return self._spec
async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult:
self.calls.append(dict(arguments))
return ToolResult(success=True, content="ok", tool_name=self.spec.name)
@ -198,3 +200,30 @@ def test_tool_executor_parses_object_tool_call_string_arguments() -> None:
assert name == "echo"
assert arguments == {"text": "hello"}
def test_tool_executor_suppresses_duplicate_external_write_in_same_run() -> None:
registry = ToolRegistry()
send_tool = DummyTool("mcp_outlook_mcp_mail_send_email", toolset="mcp")
registry.register(send_tool)
executor = ToolExecutor(registry)
context = ToolContext(
metadata={
"task_id": "task-1",
"run_id": "run-1",
}
)
arguments = {
"to_recipients": ["jay.chen@boardware.com"],
"subject": "请回复今天下午的日程安排",
"body": "Hi Jay",
}
first = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", arguments, context=context))
second = asyncio.run(executor.execute("mcp_outlook_mcp_mail_send_email", dict(arguments), context=context))
assert first.success is True
assert second.success is True
assert second.error == "duplicate_external_write_suppressed"
assert "Duplicate external write suppressed" in second.content
assert len(send_tool.calls) == 1

View File

@ -187,6 +187,7 @@ skip_provider_config = os.environ["SKIP_PROVIDER_CONFIG"].strip() == "1"
providers = {}
agent_defaults = {
"workspace": "/root/.beaver/workspace",
"maxToolIterations": 100,
}
if not skip_provider_config:
provider_cfg = {"apiKey": os.environ["API_KEY"]}