feat(engine): 添加MCP连接管理和工具集成功能

- 集成MCP连接管理器,支持MCP服务器连接
- 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、
  PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、
  TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等
- 实现工具注册和装配功能
- 添加技能选择上下文参数
- 支持思考模式控制参数thinking_enabled

feat(coordinator): 重构任务执行计划器参数命名

- 将learning_candidate_enabled重命名为allow_candidate_generation
- 更新TeamGraphScheduler中的参数传递
- 修改LocalAgentRunner中的相关参数处理
- 更新README文档中的相应描述

refactor(context): 标准化工具调用参数格式

- 添加_json导入用于参数序列化
- 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷
- 修复工具调用中参数非字符串类型的序列化问题

refactor(session): 优化消息历史记录过滤逻辑

- 修改get_messages_as_conversation为基于运行状态过滤消息
- 排除未完成、失败或错误结束的运行记录
- 改进对话历史的可见性控制机制

fix(store): 修复FTS索引重建逻辑

- 添加异常处理防止FTS索引创建失败
- 实现_rebuild_fts_index方法重新构建全文搜索索引
- 优化索引触发器和表的维护流程
This commit is contained in:
2026-05-14 09:43:48 +08:00
parent 8a12c30141
commit 30ab74ffb2
149 changed files with 12293 additions and 2812 deletions

View File

@ -0,0 +1,80 @@
from __future__ import annotations
from pathlib import Path
from fastapi.testclient import TestClient
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
def test_active_task_api_returns_open_task_and_hides_closed(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
task = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:active",
description="实现任务连续性",
metadata={"short_title": "任务连续性"},
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
active = client.get("/api/sessions/web:active/active-task")
listed = client.get("/api/tasks")
loaded.task_service.close_task(task.task_id, reason="done") # type: ignore[union-attr]
inactive = client.get("/api/sessions/web:active/active-task")
assert active.status_code == 200
assert active.json()["task_id"] == task.task_id
assert active.json()["short_title"] == "任务连续性"
assert listed.json()[0]["short_title"] == "任务连续性"
assert inactive.status_code == 200
assert inactive.json() is None
def test_active_task_api_hides_unengaged_cron_task(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
hidden = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:cron",
description="提醒用户喝水",
creator="cron",
metadata={"source": "scheduled_cron", "user_engaged": False},
)
visible = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:engaged",
description="修改新闻总结",
creator="cron",
metadata={"source": "scheduled_run", "user_engaged": True},
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
hidden_response = client.get("/api/sessions/web:cron/active-task")
visible_response = client.get("/api/sessions/web:engaged/active-task")
assert hidden_response.status_code == 200
assert hidden_response.json() is None
assert visible_response.status_code == 200
assert visible_response.json()["task_id"] == visible.task_id
assert hidden.task_id != visible.task_id
def test_task_delete_api_removes_backend_task(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
task = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:delete",
description="删除这个任务",
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
deleted = client.delete(f"/api/tasks/{task.task_id}")
listed = client.get("/api/tasks")
missing = client.get(f"/api/tasks/{task.task_id}")
assert deleted.status_code == 200
assert deleted.json()["task_id"] == task.task_id
assert all(item["task_id"] != task.task_id for item in listed.json())
assert missing.status_code == 404

View File

@ -59,7 +59,7 @@ class BlockingSkillAssembler:
self.release_first = asyncio.Event()
async def assemble(self, **kwargs) -> SkillAssemblyResult:
if kwargs["task_description"] == "task first":
if "task first" in kwargs["task_description"]:
self.first_started.set()
await self.release_first.wait()
return SkillAssemblyResult()

View File

@ -1,4 +1,5 @@
import json
from pathlib import Path
from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.providers import make_provider_bundle
@ -42,6 +43,37 @@ def test_load_config_reads_current_instance_shape(tmp_path) -> None:
assert target["extra_headers"] == {"X-Test": "1"}
def test_provider_resolution_ignores_custom_and_disabled_overrides(tmp_path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(
json.dumps(
{
"agents": {
"defaults": {
"workspace": str(tmp_path / "workspace"),
"model": "qwen-plus",
"provider": "custom",
}
},
"providers": {
"custom": {},
"openai": {
"apiKey": "sk-test",
"apiBase": "https://oai.example.com/v1",
},
},
}
),
encoding="utf-8",
)
config = load_config(config_path=config_path)
assert config.resolve_provider_target()["provider_name"] == "openai"
assert config.resolve_provider_target(provider_name="custom")["provider_name"] == "openai"
assert config.resolve_provider_target(provider_name="deepseek")["provider_name"] == "openai"
def test_engine_loader_uses_config_workspace(tmp_path) -> None:
workspace = tmp_path / "workspace"
config_path = tmp_path / "config.json"
@ -105,3 +137,40 @@ def test_openai_compatible_qwen_config_keeps_openai_provider() -> None:
assert bundle.main_runtime.api_base == "https://oai.example.com/v1"
assert isinstance(bundle.main_provider, LiteLLMProvider)
assert bundle.main_provider._resolve_model("qwen-plus") == "openai/qwen-plus"
def test_load_config_reads_stevenli_mcp_authz_identity() -> None:
repo_root = Path(__file__).resolve().parents[4]
config_path = repo_root / "app-instance" / "runtime" / "instances" / "stevenli" / "nanobot-home" / "config.json"
config = load_config(config_path=config_path)
server = config.tools.mcp_servers["outlook_mcp"]
assert server.transport == "http"
assert server.url == "http://10.6.80.29:8000/mcp"
assert server.auth_mode == "oauth_backend_token"
assert server.auth_audience == "mcp:outlook_mcp"
assert "tool:mail_list_messages" in server.auth_scopes
assert server.tool_timeout == 60
assert server.sensitive is True
assert config.authz.enabled is True
assert config.authz.base_url == "http://nano-authz-service:19090"
assert config.backend_identity.backend_id == "stevenli"
assert config.backend_identity.client_id == "stevenli"
def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
config_path = tmp_path / "config.json"
config_path.write_text(
json.dumps({"tools": {"mcpServers": {}}}),
encoding="utf-8",
)
config = load_config(config_path=config_path)
local = config.tools.mcp_servers["local_filesystem_mcp"]
assert local.transport == "stdio"
assert local.kind == "local"
assert local.category == "filesystem"
assert local.managed is True
assert "beaver.interfaces.mcp.tools_server" in local.args

View File

@ -0,0 +1,126 @@
import asyncio
from beaver.foundation.models import CronExecutionResult, CronRunRecord, CronSchedule
from beaver.tools.base import ToolContext
from beaver.tools.builtins import CronTool
from beaver.services.cron_service import CronService, compute_next_run, parse_schedule, schedule_from_api
def test_parse_hermes_style_schedules() -> None:
interval = parse_schedule("every 15m")
assert interval.kind == "every"
assert interval.every_ms == 15 * 60 * 1000
one_shot = parse_schedule("30s")
assert one_shot.kind == "at"
assert one_shot.at_ms is not None
cron = parse_schedule("0 9 * * *")
assert cron.kind == "cron"
assert cron.expr == "0 9 * * *"
def test_schedule_from_frontend_payload() -> None:
every = schedule_from_api({"every_seconds": 60})
assert every.kind == "every"
assert every.every_ms == 60_000
cron = schedule_from_api({"cron_expr": "0 10 * * *"})
assert cron.kind == "cron"
def test_compute_next_run_skips_missed_interval() -> None:
schedule = CronSchedule(kind="every", every_ms=60_000)
assert compute_next_run(schedule, now_ms=1_000_000, last_run_at_ms=0) > 1_000_000
def test_manual_run_records_task_history(tmp_path) -> None:
async def on_job(job):
return CronExecutionResult(response="done", task_id=f"task-{job.id}", run_id="run-1")
service = CronService(tmp_path / "jobs.json", on_job=on_job)
job = service.add_job(
name="Daily check",
message="Check the project",
schedule=CronSchedule(kind="every", every_ms=3600_000),
session_key="web:default",
)
assert asyncio.run(service.run_job(job.id, force=True)) is True
updated = service.get_job(job.id)
assert updated is not None
assert updated.last_status == "ok"
assert updated.history[-1].task_id == f"task-{job.id}"
assert updated.to_api_dict()["last_task_id"] == f"task-{job.id}"
def test_manual_run_records_scheduled_run_output(tmp_path) -> None:
async def on_job(job, run):
return CronExecutionResult(
response=f"notification for {run.scheduled_run_id}",
run_id="run-notify",
notification_session_id="notify:default:scheduled",
mode="notification",
)
service = CronService(tmp_path / "jobs.json", on_job=on_job)
job = service.add_job(
name="Daily news",
message="Summarize news",
schedule=CronSchedule(kind="every", every_ms=3600_000),
)
assert asyncio.run(service.run_job(job.id, force=True)) is True
updated = service.get_job(job.id)
assert updated is not None
run = updated.history[-1]
assert run.scheduled_run_id
assert run.output == f"notification for {run.scheduled_run_id}"
assert run.notification_session_id == "notify:default:scheduled"
assert updated.to_api_dict()["last_scheduled_run_id"] == run.scheduled_run_id
def test_cron_tool_uses_runtime_service(tmp_path) -> None:
service = CronService(tmp_path / "jobs.json")
tool = CronTool()
result = asyncio.run(
tool.invoke(
{
"action": "add",
"name": "Tool-created task",
"message": "Check the queue",
"every_seconds": 300,
},
ToolContext(session_id="session-1", services={"cron_service": service}),
)
)
assert result.success is True
jobs = service.list_jobs(include_disabled=True)
assert len(jobs) == 1
assert jobs[0].payload.session_key == "session-1"
def test_mark_run_engaged_links_task(tmp_path) -> None:
service = CronService(tmp_path / "jobs.json")
job = service.add_job(
name="Daily news",
message="Summarize news",
schedule=CronSchedule(kind="every", every_ms=3600_000),
)
run = CronRunRecord(
started_at_ms=1,
status="ok",
output="news summary",
notification_session_id="notify:default:scheduled",
)
job.history.append(run)
service._save_jobs()
linked = service.mark_run_engaged(run.scheduled_run_id, task_id="task-1", intent="revise_once")
assert linked is not None
updated = service.get_run(run.scheduled_run_id)
assert updated is not None
assert updated[1].engaged is True
assert updated[1].task_id == "task-1"

View File

@ -0,0 +1,67 @@
from __future__ import annotations
from pathlib import Path
from fastapi.testclient import TestClient
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
def test_debug_chat_logs_group_events_by_run(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
manager = loaded.session_manager
session_id = "web:debug"
run_id = "run-debug"
manager.ensure_session(session_id, source="web", title="Debug")
manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="run_started",
event_payload={"source": "web", "task_id": "task-1", "attempt_index": 1},
content="hello",
context_visible=False,
)
manager.append_message(
session_id,
run_id=run_id,
role="system",
event_type="llm_request_snapshotted",
event_payload={"messages": [{"role": "user", "content": "hello"}], "tools": []},
content='{"messages":[{"role":"user","content":"hello"}],"tools":[]}',
context_visible=False,
)
manager.append_message(
session_id,
run_id=run_id,
role="user",
event_type="user_message_added",
content="hello",
)
manager.append_message(
session_id,
run_id=run_id,
role="assistant",
event_type="assistant_message_added",
content="hi",
finish_reason="stop",
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.get("/api/debug/chat-logs")
assert response.status_code == 200
sessions = response.json()["sessions"]
run = sessions[0]["runs"][0]
assert run["run_id"] == run_id
assert run["user_input"] == "hello"
assert [event["event_type"] for event in run["events"]] == [
"run_started",
"llm_request_snapshotted",
"user_message_added",
"assistant_message_added",
]
assert run["events"][1]["event_payload"]["messages"][0]["content"] == "hello"

View File

@ -17,6 +17,9 @@ class FakeResult:
provider_name: str | None = "fake"
model: str | None = "fake-model"
usage: dict[str, Any] = field(default_factory=dict)
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
class FakeService:
@ -75,6 +78,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
assert message.content == "echo:hello"
assert message.session_id == "s1"
assert message.finish_reason == "stop"
assert message.metadata["task_id"] == "task-1"
assert message.metadata["task_status"] == "awaiting_feedback"
assert message.metadata["validation_result"] == {"accepted": True}
stop_event.set()
await asyncio.wait_for(task, timeout=2)
@ -183,6 +189,50 @@ def test_agent_service_maps_stopped_runtime_to_stopped_outbound() -> None:
asyncio.run(run())
def test_channel_manager_keeps_unknown_channel_outbound_undeliverable() -> None:
async def run() -> None:
bus = MessageBus()
manager = ChannelManager(bus)
stop_event = asyncio.Event()
await bus.publish_outbound(
AgentService.build_outbound_message(
InboundMessage(channel="missing", content="hello", session_id="missing:1"),
FakeResult(session_id="missing:1", output_text="ok"),
)
)
stop_event.set()
await manager.dispatch_outbound(stop_event)
assert len(manager.undeliverable) == 1
assert manager.undeliverable[0].channel == "missing"
assert manager.undeliverable[0].session_id == "missing:1"
asyncio.run(run())
def test_memory_channel_adapts_old_style_payload_to_stable_session_id() -> None:
async def run() -> None:
bus = MessageBus()
channel = MemoryChannelAdapter(bus, name="telegram")
inbound = await channel.publish_external_text(
"hello",
chat_id="chat-1",
message_id="message-1",
raw_payload={"platform": "telegram", "text": "hello"},
)
queued = await bus.consume_inbound()
assert queued is inbound
assert queued.channel == "telegram"
assert queued.session_id == "telegram:chat-1"
assert queued.metadata["chat_id"] == "chat-1"
assert queued.metadata["message_id"] == "message-1"
assert queued.metadata["raw_channel_payload"] == {"platform": "telegram", "text": "hello"}
asyncio.run(run())
def test_channel_manager_start_cancellation_rolls_back_started_channels() -> None:
class StartedChannel:
name = "started"

View File

@ -0,0 +1,145 @@
from __future__ import annotations
import asyncio
import pytest
from types import SimpleNamespace
from beaver.engine.providers.litellm import LiteLLMProvider
def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "可以"
reasoning_content = ""
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="Qwen3.6-35B",
provider_name="openai",
)
response = asyncio.run(
provider.chat(
[{"role": "user", "content": "只回复可以"}],
model="Qwen3.6-35B",
thinking_enabled=False,
)
)
assert response.content == "可以"
assert captured["extra_body"] == {"chat_template_kwargs": {"enable_thinking": False}}
def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="gpt-4.1-mini",
provider_name="openai",
)
asyncio.run(
provider.chat(
[{"role": "user", "content": "reply ok"}],
model="gpt-4.1-mini",
thinking_enabled=False,
)
)
assert "extra_body" not in captured
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
captured: dict = {}
class Message:
content = "ok"
reasoning_content = None
tool_calls = []
class Choice:
message = Message()
finish_reason = "stop"
class Response:
choices = [Choice()]
usage = None
async def fake_acompletion(**kwargs):
captured.update(kwargs)
return Response()
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
provider = LiteLLMProvider(
api_key="sk-test",
api_base="https://oai.example.com/v1",
default_model="Qwen3.6-35B",
provider_name="openai",
)
asyncio.run(
provider.chat(
[
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {
"name": "cron",
"arguments": {"action": "add", "mode": "notification"},
},
}
],
},
{"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
],
model="Qwen3.6-35B",
thinking_enabled=False,
)
)
tool_call = captured["messages"][0]["tool_calls"][0]
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'

View File

@ -0,0 +1,116 @@
from __future__ import annotations
import asyncio
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.tasks import MainAgentRouter, TaskRecord
class RouterProvider(LLMProvider):
def __init__(self, response: str | Exception) -> None:
super().__init__()
self.response = response
self.calls: list[dict] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"max_tokens": max_tokens,
"temperature": temperature,
"model": model,
"thinking_enabled": thinking_enabled,
}
)
if isinstance(self.response, Exception):
raise self.response
return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
session_id="web:task",
description="实现任务连续性",
goal="实现任务连续性",
constraints=[],
priority=0,
status="awaiting_feedback",
creator="test",
created_at="now",
updated_at="now",
metadata={"short_title": "任务连续性"},
)
def test_router_continues_active_task_from_llm_decision() -> None:
provider = RouterProvider('{"action":"continue_task","reason":"related","short_title":"任务连续性"}')
decision = asyncio.run(
MainAgentRouter().classify(
"再把输入框标识也补上",
active_task=_task(),
provider=provider,
)
)
assert decision.is_task
assert decision.starts_new_task is False
assert decision.short_title == "任务连续性"
assert provider.calls[0]["max_tokens"] == 256
def test_router_receives_thinking_mode() -> None:
provider = RouterProvider('{"action":"simple_chat","reason":"simple"}')
decision = asyncio.run(
MainAgentRouter().classify(
"你好",
provider=provider,
thinking_enabled=False,
)
)
assert not decision.is_task
assert provider.calls[0]["thinking_enabled"] is False
def test_router_closes_active_task_from_llm_decision() -> None:
decision = asyncio.run(
MainAgentRouter().classify(
"这个任务结束了",
active_task=_task(),
provider=RouterProvider('{"action":"close_task","reason":"user said done"}'),
)
)
assert not decision.is_task
assert decision.closes_task is True
def test_router_fallback_keeps_active_task_but_not_new_task() -> None:
active = asyncio.run(
MainAgentRouter().classify(
"继续",
active_task=_task(),
provider=RouterProvider(RuntimeError("provider down")),
)
)
inactive = asyncio.run(
MainAgentRouter().classify(
"implement something",
active_task=None,
provider=RouterProvider(RuntimeError("provider down")),
)
)
assert active.is_task
assert not inactive.is_task

View File

@ -0,0 +1,142 @@
import asyncio
import io
import json
import zipfile
from types import SimpleNamespace
import pytest
from beaver.interfaces.web.app import _create_skill_upload_draft
from beaver.services.hermes_migration import HermesMigrationService
from beaver.services.skillhub_service import SkillHubService
from beaver.skills.drafts import DraftService
from beaver.skills.specs import SkillSpecStore
from beaver.tools.mcp.wrapper import MCPToolWrapper
class FakeSkillHubService(SkillHubService):
async def _get_json(self, path, *, params=None):
if path == "/skills":
return {
"data": {
"items": [
{
"slug": "multi-search-engine",
"displayName": "multi-search-engine",
"summary": "search",
"namespace": "global",
"downloadCount": 1,
"starCount": 0,
"publishedVersion": {"version": "20260413.065325"},
}
],
"total": 1,
"page": 0,
"size": 12,
}
}
if path == "/skills/global/multi-search-engine":
return {
"data": {
"slug": "multi-search-engine",
"displayName": "multi-search-engine",
"summary": "search",
"namespace": "global",
"downloadCount": 1,
"starCount": 0,
"publishedVersion": {"version": "20260413.065325"},
}
}
if path == "/skills/global/multi-search-engine/versions/20260413.065325":
return {"data": {"version": "20260413.065325"}}
if path == "/skills/global/multi-search-engine/versions/20260413.065325/files":
return {"data": [{"filePath": "SKILL.md", "fileSize": 93}, {"filePath": "references/a.txt", "fileSize": 2}]}
raise AssertionError(path)
async def _get_text(self, path, *, params):
if params["path"] == "SKILL.md":
return "---\nname: multi-search-engine\ndescription: Multi search\ntools:\n - web_search\n---\nUse search.\n"
return "ok"
def test_skillhub_search_detail_do_not_install_until_post_install(tmp_path):
store = SkillSpecStore(tmp_path)
service = FakeSkillHubService(store)
search = asyncio.run(service.search(q="multi-search-engine"))
detail = asyncio.run(service.detail("global", "multi-search-engine"))
assert search["items"][0]["installed"] is False
assert detail["installed"] is False
assert store.get_skill_spec("multi-search-engine") is None
install = asyncio.run(service.install("global", "multi-search-engine"))
assert install["ok"] is True
assert store.get_skill_spec("multi-search-engine") is not None
assert (tmp_path / "skills" / "multi-search-engine" / "versions" / install["version"] / "references" / "a.txt").read_text() == "ok"
def test_upload_skill_zip_rejects_path_traversal(tmp_path):
store = SkillSpecStore(tmp_path)
loaded = SimpleNamespace(skill_spec_store=store, draft_service=DraftService(store))
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as archive:
archive.writestr("skill/SKILL.md", "---\nname: skill\n---\nBody\n")
archive.writestr("skill/../evil.txt", "x")
with pytest.raises(ValueError, match="Unsafe archive entry"):
_create_skill_upload_draft(loaded, "skill.zip", buffer.getvalue())
def test_upload_skill_zip_keeps_supporting_files_on_draft(tmp_path):
store = SkillSpecStore(tmp_path)
loaded = SimpleNamespace(skill_spec_store=store, draft_service=DraftService(store))
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as archive:
archive.writestr("skill/SKILL.md", "---\nname: skill\n---\nBody\n")
archive.writestr("skill/references/a.txt", "context")
draft = _create_skill_upload_draft(loaded, "skill.zip", buffer.getvalue())
upload_dir = draft["evidence_refs"][0]["supporting_upload_dir"]
assert (tmp_path / "skills" / "skill" / "draft_uploads" / draft["draft_id"] / "references" / "a.txt").read_text() == "context"
assert upload_dir.endswith(draft["draft_id"])
def test_hermes_migration_manifest_includes_no_credential_skill_and_skips_api_skill(tmp_path):
repo = tmp_path / "hermes"
safe = repo / "skills" / "safe"
unsafe = repo / "skills" / "unsafe"
safe.mkdir(parents=True)
unsafe.mkdir(parents=True)
safe.joinpath("SKILL.md").write_text("---\nname: safe\n---\nUse local files only.\n", encoding="utf-8")
unsafe.joinpath("SKILL.md").write_text("---\nname: unsafe\n---\nRequires API_KEY.\n", encoding="utf-8")
store = SkillSpecStore(tmp_path / "workspace")
manifest = HermesMigrationService(store).migrate(repo)
included = {item["skill_name"] for item in manifest["included"]}
skipped = {item.get("skill_name"): item["reason"] for item in manifest["skipped"]}
assert "safe" in included
assert skipped["unsafe"] == "requires_external_credentials"
assert store.get_skill_spec("safe") is not None
manifest_path = tmp_path / "workspace" / "hermes_migration_manifest.json"
assert json.loads(manifest_path.read_text(encoding="utf-8"))["source"] == "hermes-agent"
def test_mcp_wrapper_metadata_preserves_server_id_with_underscores():
tool_def = SimpleNamespace(name="auth_status", description="Auth", inputSchema={"type": "object", "properties": {}})
async def call_tool(_name, _arguments):
return SimpleNamespace(content=[], structuredContent={"ok": True})
wrapper = MCPToolWrapper(
"outlook_mcp",
tool_def,
call_tool,
kind="online",
category="outlook",
display_name="Outlook",
)
assert wrapper.spec.name == "mcp_outlook_mcp_auth_status"
assert wrapper.spec.metadata["server_id"] == "outlook_mcp"
assert wrapper.spec.metadata["original_tool_name"] == "auth_status"

View File

@ -298,8 +298,29 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
ended_at=recent,
success=True,
finish_reason="stop",
feedback={"feedback_type": "satisfied"},
activated_skills=[],
task_id=f"task-new-{index}",
attempt_index=1,
validation_result={"accepted": True, "score": 0.9},
)
)
for index in range(2):
run_store.append_run_record(
RunRecord(
run_id=f"simple-chat-{index}",
session_id="session-simple",
task_text="你是谁",
started_at=recent,
ended_at=recent,
success=True,
finish_reason="stop",
feedback={},
activated_skills=[],
task_id=None,
attempt_index=None,
validation_result=None,
)
)
@ -329,8 +350,11 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
ended_at=recent,
success=True,
finish_reason="stop",
feedback={},
feedback={"feedback_type": "satisfied"},
activated_skills=receipts,
task_id=f"task-merge-{index}",
attempt_index=1,
validation_result={"accepted": True, "score": 0.9},
)
)
for receipt in receipts:
@ -382,6 +406,9 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
kinds = {candidate.kind for candidate in candidates}
assert {"revise_skill", "new_skill", "merge_skills", "retire_skill"} <= kinds
new_candidates = [candidate for candidate in candidates if candidate.kind == "new_skill"]
assert new_candidates
assert all("simple-chat" not in run_id for candidate in new_candidates for run_id in candidate.source_run_ids)
retire_candidate = next(candidate for candidate in candidates if candidate.kind == "retire_skill")
retire_draft = asyncio.run(
@ -396,6 +423,100 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
assert store.read_draft("svn-migration", retire_draft.draft_id) is not None
def test_skill_learning_service_generates_task_scoped_candidates(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
service = SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=DraftService(store),
evidence_selector=EvidenceSelector(run_store),
)
now = datetime.now(timezone.utc).isoformat()
receipt = _receipt(
run_id="task-run-1",
session_id="session-task",
skill_name="api-review",
skill_version="v0001",
activated_at=now,
)
run_store.append_run_record(
RunRecord(
run_id="task-run-1",
session_id="session-task",
task_id="task-1",
attempt_index=1,
task_text="Review API compatibility",
started_at=now,
ended_at=now,
success=True,
finish_reason="stop",
feedback={"feedback_type": "satisfied"},
activated_skills=[receipt],
validation_result={"accepted": True, "score": 0.9},
)
)
run_store.append_run_record(
RunRecord(
run_id="other-task-run",
session_id="session-other",
task_id="task-2",
attempt_index=1,
task_text="Review API compatibility",
started_at=now,
ended_at=now,
success=True,
finish_reason="stop",
feedback={"feedback_type": "satisfied"},
activated_skills=[],
validation_result={"accepted": True, "score": 0.9},
)
)
candidates = service.build_learning_candidates_for_task("task-1", trigger_run_id="task-run-1")
assert [candidate.candidate_id for candidate in candidates] == ["revise:api-review:v0001:task:task-1"]
assert candidates[0].source_run_ids == ["task-run-1"]
assert candidates[0].related_skill_names == ["api-review"]
assert candidates[0].evidence["task_id"] == "task-1"
def test_skill_learning_service_generates_new_skill_for_task_without_published_skills(tmp_path: Path) -> None:
store = SkillSpecStore(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
service = SkillLearningService(
run_store=run_store,
learning_store=learning_store,
draft_service=DraftService(store),
evidence_selector=EvidenceSelector(run_store),
)
now = datetime.now(timezone.utc).isoformat()
run_store.append_run_record(
RunRecord(
run_id="task-run-1",
session_id="session-task",
task_id="task-1",
attempt_index=1,
task_text="Generate migration checklist",
started_at=now,
ended_at=now,
success=True,
finish_reason="stop",
feedback={"feedback_type": "satisfied"},
activated_skills=[],
validation_result={"accepted": True, "score": 0.9},
)
)
candidates = service.build_learning_candidates_for_task("task-1", trigger_run_id="task-run-1")
assert [candidate.candidate_id for candidate in candidates] == ["new:task:task-1"]
assert candidates[0].kind == "new_skill"
assert candidates[0].source_run_ids == ["task-run-1"]
def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
skill = SkillContext(
name="docker-debug",
@ -446,7 +567,7 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
assert skill_effects.event_payload["run_record"]["activated_skills"][0]["skill_version"] == "v0007"
assert skill_effects.event_payload["skill_effects"][0]["skill_name"] == "docker-debug"
assert skill_effects.event_payload["learning_candidate_enabled"] is False
assert skill_effects.event_payload["candidate_generation_allowed"] is False
assert skill_effects.event_payload["learning_candidates"] == []
run_records = loaded.run_memory_store.list_runs()

View File

@ -53,7 +53,8 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
"node_id": "research",
"skill_query": "research workflow",
"selected_skill_names": ["research-workflow"],
"generated_skill_draft_id": None,
"ephemeral_guidance_id": None,
"ephemeral_guidance_name": None,
"ephemeral_used": False,
"reason": "matched published skill",
}
@ -80,7 +81,8 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
"skill_query": "research workflow",
"selected_skill_names": ["research-workflow"],
"ephemeral_skill_names": [],
"generated_skill_draft_id": None,
"ephemeral_guidance_id": None,
"ephemeral_guidance_name": None,
"ephemeral_used": False,
"finish_reason": "stop",
}
@ -118,5 +120,83 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
assert sub_run["metadata"]["skill_query"] == "research workflow"
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
assert any(event["actor_name"] == "Validator" for event in projection["events"])
assert any(run["session_id"] == "web:test" for run in projection["runs"])
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
session = SessionManager(tmp_path)
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
run_store.append_run_record(
RunRecord(
run_id="sub-run",
session_id="sub-session",
task_id="task-1",
attempt_index=1,
task_text="sub task",
started_at="2026-01-01T00:00:01+00:00",
ended_at="2026-01-01T00:00:02+00:00",
success=True,
finish_reason="stop",
)
)
session.append_message(
"web:test",
role="system",
event_type="task_execution_planned",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"plan_mode": "team",
"strategy": "sequence",
"node_ids": ["research"],
"ephemeral_guidance_ids": ["eg_123"],
"skill_resolution_report": [
{
"node_id": "research",
"skill_query": "research workflow",
"selected_skill_names": [],
"ephemeral_guidance_id": "eg_123",
"ephemeral_guidance_name": "research-workflow",
"ephemeral_used": True,
"reason": "generated ephemeral guidance",
}
],
},
context_visible=False,
)
session.append_message(
"web:test",
role="system",
event_type="task_team_run_completed",
event_payload={
"task_id": "task-1",
"attempt_index": 1,
"team_success": True,
"team_run_ids": ["sub-run"],
"node_results": [
{
"node_id": "research",
"success": True,
"output_text": "evidence",
"run_id": "sub-run",
"skill_query": "research workflow",
"selected_skill_names": [],
"ephemeral_skill_names": ["ephemeral:research-workflow"],
"ephemeral_guidance_id": "eg_123",
"ephemeral_guidance_name": "research-workflow",
"ephemeral_used": True,
"finish_reason": "stop",
}
],
},
context_visible=False,
)
projection = SessionProcessProjector(session, run_store).project("web:test")
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
assert sub_run["metadata"]["ephemeral_guidance_id"] == "eg_123"
assert projection["artifacts"][0]["artifact_id"] == "sub-run:ephemeral-guidance:eg_123"
assert projection["artifacts"][0]["metadata"]["ephemeral_guidance_name"] == "research-workflow"

View File

@ -0,0 +1,107 @@
from __future__ import annotations
from pathlib import Path
from fastapi.testclient import TestClient
from beaver.engine.session import SessionManager
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
def test_archived_sessions_can_be_hidden_from_default_web_list(tmp_path: Path) -> None:
manager = SessionManager(tmp_path)
manager.ensure_session("web:keep", source="web")
manager.ensure_session("web:archived", source="web")
manager.end_session("web:archived", "archived")
visible = manager.list_sessions_rich(exclude_end_reasons=["archived"])
visible_ids = {row["id"] for row in visible}
assert "web:keep" in visible_ids
assert "web:archived" not in visible_ids
assert manager.get_session("web:archived")["end_reason"] == "archived"
def test_archived_sessions_remain_available_to_history_search(tmp_path: Path) -> None:
manager = SessionManager(tmp_path)
manager.ensure_session("web:archived", source="web")
manager.end_session("web:archived", "archived")
all_sessions = manager.list_sessions_rich()
assert {row["id"] for row in all_sessions} == {"web:archived"}
def test_visible_history_excludes_error_and_incomplete_runs(tmp_path: Path) -> None:
manager = SessionManager(tmp_path)
manager.ensure_session("web:history", source="web")
manager.append_message("web:history", run_id="ok-run", role="user", content="hello")
manager.append_message("web:history", run_id="ok-run", role="assistant", content="hi", finish_reason="stop")
manager.append_message(
"web:history",
run_id="ok-run",
role="assistant",
content=None,
tool_calls=[{"id": "call-1", "type": "function", "function": {"name": "echo", "arguments": "{}"}}],
)
manager.append_message(
"web:history",
run_id="ok-run",
role="tool",
content="tool result",
tool_call_id="call-1",
)
manager.append_message(
"web:history",
run_id="ok-run",
role="system",
event_type="run_completed",
content="hi",
context_visible=False,
)
manager.append_message("web:history", run_id="error-run", role="user", content="bad")
manager.append_message(
"web:history",
run_id="error-run",
role="assistant",
content="Error: provider failed",
finish_reason="error",
)
manager.append_message(
"web:history",
run_id="error-run",
role="system",
event_type="run_completed",
content="Error: provider failed",
finish_reason="error",
context_visible=False,
)
manager.append_message("web:history", run_id="pending-run", role="user", content="pending")
history = manager.get_visible_history("web:history")
assert [(message["role"], message["content"]) for message in history] == [
("user", "hello"),
("assistant", "hi"),
]
def test_web_archive_route_does_not_create_archive_suffix_session(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
create_response = client.post("/api/sessions/web:alpha")
archive_response = client.post("/api/sessions/web:alpha/archive")
sessions_response = client.get("/api/sessions")
assert create_response.status_code == 200
assert archive_response.status_code == 200
assert archive_response.json() == {"ok": True, "archived": True}
assert sessions_response.status_code == 200
loaded = service.create_loop().boot()
assert loaded.session_manager.get_session("web:alpha")["end_reason"] == "archived" # type: ignore[union-attr]
assert loaded.session_manager.get_session("web:alpha/archive") is None # type: ignore[union-attr]
assert sessions_response.json() == []

View File

@ -0,0 +1,157 @@
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.skills.assembler.task_assembler import SkillAssembler
class RecordingProvider(LLMProvider):
def __init__(self) -> None:
super().__init__()
self.thinking_enabled: bool | None = None
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.thinking_enabled = thinking_enabled
return LLMResponse(content='["daily-news"]', provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
class SequencedProvider(LLMProvider):
def __init__(self, responses: list[str]) -> None:
super().__init__()
self.responses = list(responses)
self.messages: list[list[dict]] = []
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.messages.append(messages)
content = self.responses.pop(0)
return LLMResponse(content=content, provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
class StaticRetriever:
async def retrieve(self, **kwargs):
return kwargs["candidates"][: kwargs["top_k"]]
class LoaderWithFullSkill:
def build_selection_candidates(self) -> list[dict[str, str]]:
return [
{
"name": "docker-debug",
"description": "General container tips.",
"version": "v1",
"content_hash": "abc",
}
]
def load_published_skill(self, name: str) -> str | None:
if name != "docker-debug":
return None
return """---
description: General container tips.
tools:
- search_files
---
# Docker Debug
Use this skill when doing Docker log triage and container failure analysis.
"""
def get_skill_record(self, name: str):
return SimpleNamespace(version="v1", content_hash="abc", tool_hints=["search_files"])
def test_skill_selection_receives_thinking_mode() -> None:
provider = RecordingProvider()
assembler = SkillAssembler(loader=SimpleNamespace())
selected = asyncio.run(
assembler._select_skill_names(
task_description="summarize daily news",
candidates=[{"name": "daily-news", "description": "Summarize news"}],
provider=provider,
model="Qwen3.6-35B",
thinking_enabled=False,
)
)
assert selected == ["daily-news"]
assert provider.thinking_enabled is False
def test_skill_assembler_loads_detail_directly_for_small_candidate_sets() -> None:
provider = SequencedProvider(['["docker-debug"]'])
assembler = SkillAssembler(loader=LoaderWithFullSkill(), retriever=StaticRetriever())
result = asyncio.run(
assembler.assemble(
task_description="debug a failing Docker container",
provider=provider,
model="stub-model",
)
)
assert [skill.name for skill in result.activated_skills] == ["docker-debug"]
assert result.activated_skills[0].tool_hints == ["search_files"]
assert [item["stage"] for item in result.llm_interactions] == ["final"]
assert len(provider.messages) == 1
first_user_prompt = provider.messages[0][1]["content"]
assert "Use this skill when doing Docker log triage" in first_user_prompt
def test_skill_assembler_shortlists_before_loading_detail_for_large_candidate_sets() -> None:
provider = SequencedProvider(['["docker-debug"]', '["docker-debug"]'])
loader = LoaderWithFullSkill()
original_candidates = loader.build_selection_candidates
loader.build_selection_candidates = lambda: [
*original_candidates(),
{
"name": "other-skill",
"description": "Other workflow.",
"version": "v1",
"content_hash": "def",
},
]
assembler = SkillAssembler(
loader=loader,
retriever=StaticRetriever(),
max_detailed_candidates=1,
)
result = asyncio.run(
assembler.assemble(
task_description="debug a failing Docker container",
provider=provider,
model="stub-model",
)
)
assert [skill.name for skill in result.activated_skills] == ["docker-debug"]
assert [item["stage"] for item in result.llm_interactions] == ["shortlist", "final"]
assert len(provider.messages) == 2
assert "Use this skill when doing Docker log triage" not in provider.messages[0][1]["content"]
assert "Use this skill when doing Docker log triage" in provider.messages[1][1]["content"]

View File

@ -90,6 +90,7 @@ def test_eval_pass_allows_publish_after_safety_and_review(tmp_path: Path) -> Non
report = asyncio.run(pipeline.evaluate_draft("candidate-1", draft.skill_name, draft.draft_id, provider_bundle=_bundle()))
safety = pipeline.check_safety(draft.skill_name, draft.draft_id)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
published = pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
@ -111,6 +112,7 @@ def test_eval_regression_blocks_publish(tmp_path: Path) -> None:
report = asyncio.run(pipeline.evaluate_draft("candidate-1", draft.skill_name, draft.draft_id, provider_bundle=_bundle()))
pipeline.check_safety(draft.skill_name, draft.draft_id)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
assert report.passed is False

View File

@ -68,6 +68,39 @@ def test_pipeline_lists_candidates_and_moves_draft_through_review(tmp_path: Path
assert pipeline.get_draft(draft.skill_name, draft.draft_id).status == SkillReviewState.PUBLISHED.value
def test_pipeline_approve_requires_submitted_review(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="needs-review",
proposed_content="# Needs Review\n\nDo the thing.",
proposed_frontmatter={"description": "needs review"},
created_by="test",
reason="test",
)
with pytest.raises(ValueError, match="in review before approval"):
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="already-published",
proposed_content="# Already Published\n\nDo the thing.",
proposed_frontmatter={"description": "already published"},
created_by="test",
reason="test",
)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
pipeline.check_safety(draft.skill_name, draft.draft_id)
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
with pytest.raises(ValueError, match="draft status before review submission"):
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
@ -80,5 +113,22 @@ def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
with pytest.raises(ValueError, match="approved"):
with pytest.raises(ValueError, match="Draft not found"):
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
assert pipeline.draft_service.get_draft(draft.skill_name, draft.draft_id) is None
def test_pipeline_reject_removes_draft_from_review_list(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="remove-skill",
proposed_content="# Remove\n\nNo longer needed.",
proposed_frontmatter={"description": "remove"},
created_by="test",
reason="test",
)
review = pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
assert review.status == SkillReviewState.REJECTED.value
assert pipeline.list_drafts() == []

View File

@ -65,6 +65,7 @@ def test_safety_marks_dangerous_tools_high_and_requires_confirm(tmp_path: Path)
)
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
assert report.passed is True
@ -84,6 +85,7 @@ def test_publish_requires_safety_report(tmp_path: Path) -> None:
created_by="test",
reason="test",
)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
with pytest.raises(ValueError, match="safety report"):

View File

@ -12,6 +12,7 @@ from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.services.agent_service import AgentService
from beaver.skills.assembler import SkillAssemblyResult
from beaver.tasks import TaskExecutionPlan, TaskService, ValidationResult, ValidationService
@ -67,7 +68,25 @@ class FakeLearningCandidate:
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
def _bundle(*responses: str) -> ProviderBundle:
class RecordingSkillAssembler:
def __init__(self) -> None:
self.task_descriptions: list[str] = []
async def assemble(self, **kwargs) -> SkillAssemblyResult:
self.task_descriptions.append(kwargs["task_description"])
return SkillAssemblyResult()
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
return LLMResponse(
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
@ -81,6 +100,8 @@ def _bundle(*responses: str) -> ProviderBundle:
for response in responses
]
),
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response(route_action)]),
)
@ -110,6 +131,25 @@ def _provider_bundle(provider: StubProvider) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=provider,
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
auxiliary_provider=StubProvider([_route_response("new_task")]),
)
def _main_only_bundle(*responses: str) -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content=response,
finish_reason="stop",
provider_name="stub",
model="stub-model",
)
for response in responses
]
),
)
@ -126,7 +166,7 @@ def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
service.process_direct(
"hello?",
session_id="web:simple",
provider_bundle=_bundle("hi"),
provider_bundle=_bundle("hi", route_action="simple_chat"),
)
)
loaded = service.create_loop().boot()
@ -165,8 +205,89 @@ def test_complex_request_creates_task_and_records_validation(tmp_path: Path) ->
assert any(event.event_type == "task_validation_snapshotted" for event in events)
assert run_record.task_id == result.task_id
assert run_record.validation_result["accepted"] is True
assert skill_effects.event_payload["learning_candidate_enabled"] is False
assert skill_effects.event_payload["candidate_generation_allowed"] is False
assert skill_effects.event_payload["learning_candidates"] == []
assert task.metadata["short_title"] == "Test task"
def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
skill_assembler = RecordingSkillAssembler()
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[ValidationResult(passed=True, score=1.0, validator="test")]
),
skill_assembler=skill_assembler,
)
)
result = asyncio.run(
service.process_direct(
"继续按刚才的方案改",
session_id="web:task-skill-query",
provider_bundle=_bundle("done", route_action="new_task"),
)
)
assert result.task_id
assert skill_assembler.task_descriptions
query = skill_assembler.task_descriptions[0]
assert "Task goal:" in query
assert "Current user request:" in query
assert "Previously activated skills:" in query
assert "If no published skill matches, return []" in query
def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.9, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"implement the search workflow",
session_id="web:continue",
provider_bundle=_bundle("first done", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"also add tests for it",
session_id="web:continue",
provider_bundle=_bundle("tests added", route_action="continue_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert task is not None
assert second.task_id == first.task_id
assert len(task.run_ids) == 2
closed = asyncio.run(
service.process_direct(
"这个任务结束了",
session_id="web:continue",
provider_bundle=_bundle("好的,已结束。", route_action="close_task"),
)
)
task = loaded.task_service.get_task(first.task_id)
assert closed.task_id is None
assert task is not None
assert task.status == "closed"
assert loaded.task_service.active_task_view("web:continue") is None
def test_validation_failure_retries_once(tmp_path: Path) -> None:
@ -229,11 +350,11 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
loaded = service.create_loop().boot()
learning_calls = []
def build_learning_candidates() -> list[FakeLearningCandidate]:
learning_calls.append("called")
def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
learning_calls.append((task_id, trigger_run_id))
return [FakeLearningCandidate()]
loaded.skill_learning_service.build_learning_candidates = build_learning_candidates
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
feedback = asyncio.run(
service.submit_feedback(
@ -247,7 +368,7 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
assert feedback["learning_candidates"] == [
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
]
assert learning_calls == ["called"]
assert learning_calls == [(result.task_id, result.run_id)]
service2 = AgentService(
loader=EngineLoader(
@ -279,6 +400,14 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
assert abandon_feedback["task_status"] == "abandoned"
assert abandon_feedback["learning_candidates"] == []
loaded2 = service2.create_loop().boot()
failure_events = [
event
for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
if event.event_type == "task_failure_evidence_recorded"
]
assert len(failure_events) == 1
assert loaded2.memory_service.get_store().memory_entries == []
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
@ -466,7 +595,7 @@ def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
assert skill_effects
assert skill_effects[-1].event_payload["learning_candidate_enabled"] is False
assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
@ -490,17 +619,43 @@ def test_context_builder_strips_ui_projection_fields_from_provider_history() ->
assert assistant == {"role": "assistant", "content": "done"}
def test_context_builder_normalizes_persisted_tool_arguments() -> None:
result = ContextBuilder().build_messages(
ContextBuildInput(
history=[
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {
"name": "cron",
"arguments": {"action": "add", "mode": "notification"},
},
}
],
}
],
)
)
tool_call = result.messages[-1]["tool_calls"][0]
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
task_service = TaskService(tmp_path / "tasks")
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
validation = asyncio.run(
ValidationService().validate_task_result(
task=task,
user_message="implement validator handling",
final_output="done",
provider_bundle=_bundle("not json"),
task=task,
user_message="implement validator handling",
final_output="done",
provider_bundle=_main_only_bundle("not json"),
)
)
)
assert validation.accepted is False
assert validation.validator == "llm_error"

View File

@ -9,7 +9,7 @@ from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse
from beaver.engine.providers.factory import ProviderBundle
from beaver.skills.drafts import DraftService
from beaver.skills.learning import MissingSkillSynthesizer
from beaver.skills.learning import EphemeralGuidanceSynthesizer
from beaver.skills.publisher import SkillPublisher
from beaver.skills.reviews import ReviewService
from beaver.skills.specs import SkillSpecStore
@ -116,12 +116,12 @@ def test_task_skill_resolver_pins_matching_published_skill(tmp_path: Path) -> No
assert reports[0].ephemeral_used is False
def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(tmp_path: Path) -> None:
def test_task_skill_resolver_generates_ephemeral_guidance_when_missing(tmp_path: Path) -> None:
provider = RecordingProvider(
[
"""
{
"skill_name": "api-compatibility-review",
"guidance_name": "api-compatibility-review",
"description": "Review API compatibility",
"content": "# API Compatibility Review\\n\\nCheck schema compatibility.",
"tags": ["api", "review"]
@ -133,7 +133,7 @@ def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(t
resolver = TaskSkillResolver(
skills_loader=SkillsLoader(tmp_path),
draft_service=DraftService(store),
missing_skill_synthesizer=MissingSkillSynthesizer(),
missing_skill_synthesizer=EphemeralGuidanceSynthesizer(),
)
graph = ExecutionGraph(
strategy="sequence",
@ -163,13 +163,14 @@ def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(t
)
drafts = store.list_drafts("api-compatibility-review")
assert len(drafts) == 1
assert drafts == []
assert store.list_published_skill_names() == []
assert resolved.nodes[0].inherited_pinned_skills == []
assert len(resolved.nodes[0].inherited_pinned_skill_contexts) == 1
context: SkillContext = resolved.nodes[0].inherited_pinned_skill_contexts[0]
assert context.name == "draft:api-compatibility-review"
assert context.version == f"draft:{drafts[0].draft_id}"
assert context.activation_reason == "generated_missing_skill"
assert reports[0].generated_skill_draft_id == drafts[0].draft_id
assert context.name == "ephemeral:api-compatibility-review"
assert context.version.startswith("ephemeral:eg_")
assert context.activation_reason == "ephemeral_guidance"
assert reports[0].ephemeral_guidance_id is not None
assert reports[0].ephemeral_guidance_name == "api-compatibility-review"
assert reports[0].ephemeral_used is True

View File

@ -83,7 +83,6 @@ tools:
registry = ToolRegistry()
registry.register(DummyTool("memory", toolset="memory", always_available=True))
registry.register(DummyTool("skill_view", toolset="skills", always_available=True))
registry.register(DummyTool("terminal", toolset="shell"))
registry.register(DummyTool("search_files", toolset="file"))
registry.register(DummyTool("echo", toolset="debug"))
@ -100,7 +99,7 @@ tools:
)
)
assert [spec.name for spec in selected] == ["memory", "skill_view", "terminal", "search_files"]
assert [spec.name for spec in selected] == ["memory", "terminal", "search_files"]
def test_embedding_fallback_can_return_all_or_top_k() -> None:

View File

@ -0,0 +1,132 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from fastapi.testclient import TestClient
from beaver.interfaces.web.app import create_app
from beaver.services.agent_service import AgentService
@dataclass(slots=True)
class StubRunResult:
session_id: str
run_id: str = "run-1"
output_text: str = "ok"
finish_reason: str = "stop"
tool_iterations: int = 0
provider_name: str | None = "stub"
model: str | None = "stub-model"
usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
task_id: str | None = "task-1"
task_status: str | None = "awaiting_feedback"
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
class StubAgentService(AgentService):
def __init__(self, *, fail: bool = False) -> None:
super().__init__()
self.fail = fail
self.calls: list[dict[str, Any]] = []
async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
self.calls.append({"message": message, **kwargs})
if self.fail:
raise RuntimeError("boom")
return StubRunResult(
session_id=kwargs.get("session_id") or "web:default",
output_text=f"echo:{message}",
)
def test_websocket_ping_pong() -> None:
app = create_app(service=StubAgentService(), manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/ws/web:alpha") as websocket:
websocket.send_json({"type": "ping"})
assert websocket.receive_json() == {"type": "pong"}
def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
service = StubAgentService()
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/ws/web:alpha") as websocket:
websocket.send_json(
{
"type": "message",
"content": "hello",
"metadata": {"source": "test"},
"attachments": [{"file_id": "file-1", "name": "a.txt"}],
}
)
assert websocket.receive_json() == {"type": "status", "status": "thinking"}
message = websocket.receive_json()
session_updated = websocket.receive_json()
assert service.calls == [
{
"message": "hello",
"session_id": "web:alpha",
"source": "websocket",
"user_id": None,
"title": None,
"execution_context": None,
"model": None,
"provider_name": None,
"embedding_model": None,
}
]
assert message["type"] == "message"
assert message["role"] == "assistant"
assert message["content"] == "echo:hello"
assert message["session_id"] == "web:alpha"
assert message["run_id"] == "run-1"
assert message["task_id"] == "task-1"
assert message["task_status"] == "awaiting_feedback"
assert message["validation_result"] == {"accepted": True}
assert message["validation_status"] == "passed"
assert message["metadata"]["input_metadata"] == {
"source": "test",
"attachments": [{"file_id": "file-1", "name": "a.txt"}],
}
assert session_updated == {
"type": "session_updated",
"session_id": "web:alpha",
"source": "websocket",
}
def test_websocket_empty_content_returns_error_without_runtime_call() -> None:
service = StubAgentService()
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/ws/web:alpha") as websocket:
websocket.send_json({"type": "message", "content": " "})
assert websocket.receive_json() == {"type": "error", "error": "'content' is required"}
assert service.calls == []
def test_websocket_runtime_error_returns_assistant_error_message() -> None:
service = StubAgentService(fail=True)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/ws/web:alpha") as websocket:
websocket.send_json({"type": "message", "content": "hello"})
assert websocket.receive_json() == {"type": "status", "status": "thinking"}
message = websocket.receive_json()
websocket.send_json({"type": "ping"})
pong = websocket.receive_json()
assert message["type"] == "message"
assert message["role"] == "assistant"
assert message["session_id"] == "web:alpha"
assert message["finish_reason"] == "error"
assert "boom" in message["content"]
assert pong == {"type": "pong"}