feat(engine): 添加MCP连接管理和工具集成功能
- 集成MCP连接管理器,支持MCP服务器连接 - 添加多种内置工具:ClarifyTool、CronTool、DelegateTool、ExecuteCodeTool、 PatchFileTool、ProcessTool、SendMessageTool、SpawnTool、TerminalTool、 TodoTool、WebFetchTool、WebSearchTool、WriteFileTool等 - 实现工具注册和装配功能 - 添加技能选择上下文参数 - 支持思考模式控制参数thinking_enabled feat(coordinator): 重构任务执行计划器参数命名 - 将learning_candidate_enabled重命名为allow_candidate_generation - 更新TeamGraphScheduler中的参数传递 - 修改LocalAgentRunner中的相关参数处理 - 更新README文档中的相应描述 refactor(context): 标准化工具调用参数格式 - 添加_json导入用于参数序列化 - 实现_provider_tool_calls方法标准化OpenAI兼容的工具调用载荷 - 修复工具调用中参数非字符串类型的序列化问题 refactor(session): 优化消息历史记录过滤逻辑 - 修改get_messages_as_conversation为基于运行状态过滤消息 - 排除未完成、失败或错误结束的运行记录 - 改进对话历史的可见性控制机制 fix(store): 修复FTS索引重建逻辑 - 添加异常处理防止FTS索引创建失败 - 实现_rebuild_fts_index方法重新构建全文搜索索引 - 优化索引触发器和表的维护流程
This commit is contained in:
80
app-instance/backend/tests/unit/test_active_task_api.py
Normal file
80
app-instance/backend/tests/unit/test_active_task_api.py
Normal file
@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from beaver.interfaces.web.app import create_app
|
||||
from beaver.services.agent_service import AgentService
|
||||
|
||||
|
||||
def test_active_task_api_returns_open_task_and_hides_closed(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:active",
|
||||
description="实现任务连续性",
|
||||
metadata={"short_title": "任务连续性"},
|
||||
)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
active = client.get("/api/sessions/web:active/active-task")
|
||||
listed = client.get("/api/tasks")
|
||||
loaded.task_service.close_task(task.task_id, reason="done") # type: ignore[union-attr]
|
||||
inactive = client.get("/api/sessions/web:active/active-task")
|
||||
|
||||
assert active.status_code == 200
|
||||
assert active.json()["task_id"] == task.task_id
|
||||
assert active.json()["short_title"] == "任务连续性"
|
||||
assert listed.json()[0]["short_title"] == "任务连续性"
|
||||
assert inactive.status_code == 200
|
||||
assert inactive.json() is None
|
||||
|
||||
|
||||
def test_active_task_api_hides_unengaged_cron_task(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
hidden = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:cron",
|
||||
description="提醒用户喝水",
|
||||
creator="cron",
|
||||
metadata={"source": "scheduled_cron", "user_engaged": False},
|
||||
)
|
||||
visible = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:engaged",
|
||||
description="修改新闻总结",
|
||||
creator="cron",
|
||||
metadata={"source": "scheduled_run", "user_engaged": True},
|
||||
)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
hidden_response = client.get("/api/sessions/web:cron/active-task")
|
||||
visible_response = client.get("/api/sessions/web:engaged/active-task")
|
||||
|
||||
assert hidden_response.status_code == 200
|
||||
assert hidden_response.json() is None
|
||||
assert visible_response.status_code == 200
|
||||
assert visible_response.json()["task_id"] == visible.task_id
|
||||
assert hidden.task_id != visible.task_id
|
||||
|
||||
|
||||
def test_task_delete_api_removes_backend_task(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.create_task( # type: ignore[union-attr]
|
||||
session_id="web:delete",
|
||||
description="删除这个任务",
|
||||
)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
deleted = client.delete(f"/api/tasks/{task.task_id}")
|
||||
listed = client.get("/api/tasks")
|
||||
missing = client.get(f"/api/tasks/{task.task_id}")
|
||||
|
||||
assert deleted.status_code == 200
|
||||
assert deleted.json()["task_id"] == task.task_id
|
||||
assert all(item["task_id"] != task.task_id for item in listed.json())
|
||||
assert missing.status_code == 404
|
||||
@ -59,7 +59,7 @@ class BlockingSkillAssembler:
|
||||
self.release_first = asyncio.Event()
|
||||
|
||||
async def assemble(self, **kwargs) -> SkillAssemblyResult:
|
||||
if kwargs["task_description"] == "task first":
|
||||
if "task first" in kwargs["task_description"]:
|
||||
self.first_started.set()
|
||||
await self.release_first.wait()
|
||||
return SkillAssemblyResult()
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from beaver.engine import AgentLoop, EngineLoader
|
||||
from beaver.engine.providers import make_provider_bundle
|
||||
@ -42,6 +43,37 @@ def test_load_config_reads_current_instance_shape(tmp_path) -> None:
|
||||
assert target["extra_headers"] == {"X-Test": "1"}
|
||||
|
||||
|
||||
def test_provider_resolution_ignores_custom_and_disabled_overrides(tmp_path) -> None:
|
||||
config_path = tmp_path / "config.json"
|
||||
config_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"workspace": str(tmp_path / "workspace"),
|
||||
"model": "qwen-plus",
|
||||
"provider": "custom",
|
||||
}
|
||||
},
|
||||
"providers": {
|
||||
"custom": {},
|
||||
"openai": {
|
||||
"apiKey": "sk-test",
|
||||
"apiBase": "https://oai.example.com/v1",
|
||||
},
|
||||
},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
config = load_config(config_path=config_path)
|
||||
|
||||
assert config.resolve_provider_target()["provider_name"] == "openai"
|
||||
assert config.resolve_provider_target(provider_name="custom")["provider_name"] == "openai"
|
||||
assert config.resolve_provider_target(provider_name="deepseek")["provider_name"] == "openai"
|
||||
|
||||
|
||||
def test_engine_loader_uses_config_workspace(tmp_path) -> None:
|
||||
workspace = tmp_path / "workspace"
|
||||
config_path = tmp_path / "config.json"
|
||||
@ -105,3 +137,40 @@ def test_openai_compatible_qwen_config_keeps_openai_provider() -> None:
|
||||
assert bundle.main_runtime.api_base == "https://oai.example.com/v1"
|
||||
assert isinstance(bundle.main_provider, LiteLLMProvider)
|
||||
assert bundle.main_provider._resolve_model("qwen-plus") == "openai/qwen-plus"
|
||||
|
||||
|
||||
def test_load_config_reads_stevenli_mcp_authz_identity() -> None:
|
||||
repo_root = Path(__file__).resolve().parents[4]
|
||||
config_path = repo_root / "app-instance" / "runtime" / "instances" / "stevenli" / "nanobot-home" / "config.json"
|
||||
config = load_config(config_path=config_path)
|
||||
|
||||
server = config.tools.mcp_servers["outlook_mcp"]
|
||||
assert server.transport == "http"
|
||||
assert server.url == "http://10.6.80.29:8000/mcp"
|
||||
assert server.auth_mode == "oauth_backend_token"
|
||||
assert server.auth_audience == "mcp:outlook_mcp"
|
||||
assert "tool:mail_list_messages" in server.auth_scopes
|
||||
assert server.tool_timeout == 60
|
||||
assert server.sensitive is True
|
||||
|
||||
assert config.authz.enabled is True
|
||||
assert config.authz.base_url == "http://nano-authz-service:19090"
|
||||
assert config.backend_identity.backend_id == "stevenli"
|
||||
assert config.backend_identity.client_id == "stevenli"
|
||||
|
||||
|
||||
def test_load_config_adds_managed_local_mcp_servers(tmp_path) -> None:
|
||||
config_path = tmp_path / "config.json"
|
||||
config_path.write_text(
|
||||
json.dumps({"tools": {"mcpServers": {}}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
config = load_config(config_path=config_path)
|
||||
|
||||
local = config.tools.mcp_servers["local_filesystem_mcp"]
|
||||
assert local.transport == "stdio"
|
||||
assert local.kind == "local"
|
||||
assert local.category == "filesystem"
|
||||
assert local.managed is True
|
||||
assert "beaver.interfaces.mcp.tools_server" in local.args
|
||||
|
||||
126
app-instance/backend/tests/unit/test_cron_service.py
Normal file
126
app-instance/backend/tests/unit/test_cron_service.py
Normal file
@ -0,0 +1,126 @@
|
||||
import asyncio
|
||||
|
||||
from beaver.foundation.models import CronExecutionResult, CronRunRecord, CronSchedule
|
||||
from beaver.tools.base import ToolContext
|
||||
from beaver.tools.builtins import CronTool
|
||||
from beaver.services.cron_service import CronService, compute_next_run, parse_schedule, schedule_from_api
|
||||
|
||||
|
||||
def test_parse_hermes_style_schedules() -> None:
|
||||
interval = parse_schedule("every 15m")
|
||||
assert interval.kind == "every"
|
||||
assert interval.every_ms == 15 * 60 * 1000
|
||||
|
||||
one_shot = parse_schedule("30s")
|
||||
assert one_shot.kind == "at"
|
||||
assert one_shot.at_ms is not None
|
||||
|
||||
cron = parse_schedule("0 9 * * *")
|
||||
assert cron.kind == "cron"
|
||||
assert cron.expr == "0 9 * * *"
|
||||
|
||||
|
||||
def test_schedule_from_frontend_payload() -> None:
|
||||
every = schedule_from_api({"every_seconds": 60})
|
||||
assert every.kind == "every"
|
||||
assert every.every_ms == 60_000
|
||||
|
||||
cron = schedule_from_api({"cron_expr": "0 10 * * *"})
|
||||
assert cron.kind == "cron"
|
||||
|
||||
|
||||
def test_compute_next_run_skips_missed_interval() -> None:
|
||||
schedule = CronSchedule(kind="every", every_ms=60_000)
|
||||
assert compute_next_run(schedule, now_ms=1_000_000, last_run_at_ms=0) > 1_000_000
|
||||
|
||||
|
||||
def test_manual_run_records_task_history(tmp_path) -> None:
|
||||
async def on_job(job):
|
||||
return CronExecutionResult(response="done", task_id=f"task-{job.id}", run_id="run-1")
|
||||
|
||||
service = CronService(tmp_path / "jobs.json", on_job=on_job)
|
||||
job = service.add_job(
|
||||
name="Daily check",
|
||||
message="Check the project",
|
||||
schedule=CronSchedule(kind="every", every_ms=3600_000),
|
||||
session_key="web:default",
|
||||
)
|
||||
|
||||
assert asyncio.run(service.run_job(job.id, force=True)) is True
|
||||
updated = service.get_job(job.id)
|
||||
assert updated is not None
|
||||
assert updated.last_status == "ok"
|
||||
assert updated.history[-1].task_id == f"task-{job.id}"
|
||||
assert updated.to_api_dict()["last_task_id"] == f"task-{job.id}"
|
||||
|
||||
|
||||
def test_manual_run_records_scheduled_run_output(tmp_path) -> None:
|
||||
async def on_job(job, run):
|
||||
return CronExecutionResult(
|
||||
response=f"notification for {run.scheduled_run_id}",
|
||||
run_id="run-notify",
|
||||
notification_session_id="notify:default:scheduled",
|
||||
mode="notification",
|
||||
)
|
||||
|
||||
service = CronService(tmp_path / "jobs.json", on_job=on_job)
|
||||
job = service.add_job(
|
||||
name="Daily news",
|
||||
message="Summarize news",
|
||||
schedule=CronSchedule(kind="every", every_ms=3600_000),
|
||||
)
|
||||
|
||||
assert asyncio.run(service.run_job(job.id, force=True)) is True
|
||||
updated = service.get_job(job.id)
|
||||
assert updated is not None
|
||||
run = updated.history[-1]
|
||||
assert run.scheduled_run_id
|
||||
assert run.output == f"notification for {run.scheduled_run_id}"
|
||||
assert run.notification_session_id == "notify:default:scheduled"
|
||||
assert updated.to_api_dict()["last_scheduled_run_id"] == run.scheduled_run_id
|
||||
|
||||
|
||||
def test_cron_tool_uses_runtime_service(tmp_path) -> None:
|
||||
service = CronService(tmp_path / "jobs.json")
|
||||
tool = CronTool()
|
||||
result = asyncio.run(
|
||||
tool.invoke(
|
||||
{
|
||||
"action": "add",
|
||||
"name": "Tool-created task",
|
||||
"message": "Check the queue",
|
||||
"every_seconds": 300,
|
||||
},
|
||||
ToolContext(session_id="session-1", services={"cron_service": service}),
|
||||
)
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
jobs = service.list_jobs(include_disabled=True)
|
||||
assert len(jobs) == 1
|
||||
assert jobs[0].payload.session_key == "session-1"
|
||||
|
||||
|
||||
def test_mark_run_engaged_links_task(tmp_path) -> None:
|
||||
service = CronService(tmp_path / "jobs.json")
|
||||
job = service.add_job(
|
||||
name="Daily news",
|
||||
message="Summarize news",
|
||||
schedule=CronSchedule(kind="every", every_ms=3600_000),
|
||||
)
|
||||
run = CronRunRecord(
|
||||
started_at_ms=1,
|
||||
status="ok",
|
||||
output="news summary",
|
||||
notification_session_id="notify:default:scheduled",
|
||||
)
|
||||
job.history.append(run)
|
||||
service._save_jobs()
|
||||
|
||||
linked = service.mark_run_engaged(run.scheduled_run_id, task_id="task-1", intent="revise_once")
|
||||
|
||||
assert linked is not None
|
||||
updated = service.get_run(run.scheduled_run_id)
|
||||
assert updated is not None
|
||||
assert updated[1].engaged is True
|
||||
assert updated[1].task_id == "task-1"
|
||||
67
app-instance/backend/tests/unit/test_debug_chat_logs_api.py
Normal file
67
app-instance/backend/tests/unit/test_debug_chat_logs_api.py
Normal file
@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from beaver.interfaces.web.app import create_app
|
||||
from beaver.services.agent_service import AgentService
|
||||
|
||||
|
||||
def test_debug_chat_logs_group_events_by_run(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
manager = loaded.session_manager
|
||||
session_id = "web:debug"
|
||||
run_id = "run-debug"
|
||||
manager.ensure_session(session_id, source="web", title="Debug")
|
||||
manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="run_started",
|
||||
event_payload={"source": "web", "task_id": "task-1", "attempt_index": 1},
|
||||
content="hello",
|
||||
context_visible=False,
|
||||
)
|
||||
manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="system",
|
||||
event_type="llm_request_snapshotted",
|
||||
event_payload={"messages": [{"role": "user", "content": "hello"}], "tools": []},
|
||||
content='{"messages":[{"role":"user","content":"hello"}],"tools":[]}',
|
||||
context_visible=False,
|
||||
)
|
||||
manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="user",
|
||||
event_type="user_message_added",
|
||||
content="hello",
|
||||
)
|
||||
manager.append_message(
|
||||
session_id,
|
||||
run_id=run_id,
|
||||
role="assistant",
|
||||
event_type="assistant_message_added",
|
||||
content="hi",
|
||||
finish_reason="stop",
|
||||
)
|
||||
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/api/debug/chat-logs")
|
||||
|
||||
assert response.status_code == 200
|
||||
sessions = response.json()["sessions"]
|
||||
run = sessions[0]["runs"][0]
|
||||
assert run["run_id"] == run_id
|
||||
assert run["user_input"] == "hello"
|
||||
assert [event["event_type"] for event in run["events"]] == [
|
||||
"run_started",
|
||||
"llm_request_snapshotted",
|
||||
"user_message_added",
|
||||
"assistant_message_added",
|
||||
]
|
||||
assert run["events"][1]["event_payload"]["messages"][0]["content"] == "hello"
|
||||
@ -17,6 +17,9 @@ class FakeResult:
|
||||
provider_name: str | None = "fake"
|
||||
model: str | None = "fake-model"
|
||||
usage: dict[str, Any] = field(default_factory=dict)
|
||||
task_id: str | None = "task-1"
|
||||
task_status: str | None = "awaiting_feedback"
|
||||
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
|
||||
|
||||
|
||||
class FakeService:
|
||||
@ -75,6 +78,9 @@ def test_gateway_routes_memory_channel_roundtrip() -> None:
|
||||
assert message.content == "echo:hello"
|
||||
assert message.session_id == "s1"
|
||||
assert message.finish_reason == "stop"
|
||||
assert message.metadata["task_id"] == "task-1"
|
||||
assert message.metadata["task_status"] == "awaiting_feedback"
|
||||
assert message.metadata["validation_result"] == {"accepted": True}
|
||||
|
||||
stop_event.set()
|
||||
await asyncio.wait_for(task, timeout=2)
|
||||
@ -183,6 +189,50 @@ def test_agent_service_maps_stopped_runtime_to_stopped_outbound() -> None:
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_channel_manager_keeps_unknown_channel_outbound_undeliverable() -> None:
|
||||
async def run() -> None:
|
||||
bus = MessageBus()
|
||||
manager = ChannelManager(bus)
|
||||
stop_event = asyncio.Event()
|
||||
await bus.publish_outbound(
|
||||
AgentService.build_outbound_message(
|
||||
InboundMessage(channel="missing", content="hello", session_id="missing:1"),
|
||||
FakeResult(session_id="missing:1", output_text="ok"),
|
||||
)
|
||||
)
|
||||
stop_event.set()
|
||||
|
||||
await manager.dispatch_outbound(stop_event)
|
||||
|
||||
assert len(manager.undeliverable) == 1
|
||||
assert manager.undeliverable[0].channel == "missing"
|
||||
assert manager.undeliverable[0].session_id == "missing:1"
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_memory_channel_adapts_old_style_payload_to_stable_session_id() -> None:
|
||||
async def run() -> None:
|
||||
bus = MessageBus()
|
||||
channel = MemoryChannelAdapter(bus, name="telegram")
|
||||
inbound = await channel.publish_external_text(
|
||||
"hello",
|
||||
chat_id="chat-1",
|
||||
message_id="message-1",
|
||||
raw_payload={"platform": "telegram", "text": "hello"},
|
||||
)
|
||||
|
||||
queued = await bus.consume_inbound()
|
||||
assert queued is inbound
|
||||
assert queued.channel == "telegram"
|
||||
assert queued.session_id == "telegram:chat-1"
|
||||
assert queued.metadata["chat_id"] == "chat-1"
|
||||
assert queued.metadata["message_id"] == "message-1"
|
||||
assert queued.metadata["raw_channel_payload"] == {"platform": "telegram", "text": "hello"}
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
def test_channel_manager_start_cancellation_rolls_back_started_channels() -> None:
|
||||
class StartedChannel:
|
||||
name = "started"
|
||||
|
||||
145
app-instance/backend/tests/unit/test_litellm_thinking_mode.py
Normal file
145
app-instance/backend/tests/unit/test_litellm_thinking_mode.py
Normal file
@ -0,0 +1,145 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from beaver.engine.providers.litellm import LiteLLMProvider
|
||||
|
||||
|
||||
def test_qwen_thinking_mode_is_sent_as_chat_template_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict = {}
|
||||
|
||||
class Message:
|
||||
content = "可以"
|
||||
reasoning_content = ""
|
||||
tool_calls = []
|
||||
|
||||
class Choice:
|
||||
message = Message()
|
||||
finish_reason = "stop"
|
||||
|
||||
class Response:
|
||||
choices = [Choice()]
|
||||
usage = None
|
||||
|
||||
async def fake_acompletion(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return Response()
|
||||
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
||||
|
||||
provider = LiteLLMProvider(
|
||||
api_key="sk-test",
|
||||
api_base="https://oai.example.com/v1",
|
||||
default_model="Qwen3.6-35B",
|
||||
provider_name="openai",
|
||||
)
|
||||
response = asyncio.run(
|
||||
provider.chat(
|
||||
[{"role": "user", "content": "只回复可以"}],
|
||||
model="Qwen3.6-35B",
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
|
||||
assert response.content == "可以"
|
||||
assert captured["extra_body"] == {"chat_template_kwargs": {"enable_thinking": False}}
|
||||
|
||||
|
||||
def test_non_qwen_thinking_mode_is_not_sent(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict = {}
|
||||
|
||||
class Message:
|
||||
content = "ok"
|
||||
reasoning_content = None
|
||||
tool_calls = []
|
||||
|
||||
class Choice:
|
||||
message = Message()
|
||||
finish_reason = "stop"
|
||||
|
||||
class Response:
|
||||
choices = [Choice()]
|
||||
usage = None
|
||||
|
||||
async def fake_acompletion(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return Response()
|
||||
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
||||
|
||||
provider = LiteLLMProvider(
|
||||
api_key="sk-test",
|
||||
api_base="https://oai.example.com/v1",
|
||||
default_model="gpt-4.1-mini",
|
||||
provider_name="openai",
|
||||
)
|
||||
asyncio.run(
|
||||
provider.chat(
|
||||
[{"role": "user", "content": "reply ok"}],
|
||||
model="gpt-4.1-mini",
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
|
||||
assert "extra_body" not in captured
|
||||
|
||||
|
||||
def test_litellm_provider_sanitizes_tool_call_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict = {}
|
||||
|
||||
class Message:
|
||||
content = "ok"
|
||||
reasoning_content = None
|
||||
tool_calls = []
|
||||
|
||||
class Choice:
|
||||
message = Message()
|
||||
finish_reason = "stop"
|
||||
|
||||
class Response:
|
||||
choices = [Choice()]
|
||||
usage = None
|
||||
|
||||
async def fake_acompletion(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return Response()
|
||||
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.acompletion", fake_acompletion)
|
||||
monkeypatch.setattr("beaver.engine.providers.litellm.litellm", SimpleNamespace())
|
||||
|
||||
provider = LiteLLMProvider(
|
||||
api_key="sk-test",
|
||||
api_base="https://oai.example.com/v1",
|
||||
default_model="Qwen3.6-35B",
|
||||
provider_name="openai",
|
||||
)
|
||||
asyncio.run(
|
||||
provider.chat(
|
||||
[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "cron",
|
||||
"arguments": {"action": "add", "mode": "notification"},
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call-1", "name": "cron", "content": "done"},
|
||||
],
|
||||
model="Qwen3.6-35B",
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
|
||||
tool_call = captured["messages"][0]["tool_calls"][0]
|
||||
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
|
||||
116
app-instance/backend/tests/unit/test_main_agent_router.py
Normal file
116
app-instance/backend/tests/unit/test_main_agent_router.py
Normal file
@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.tasks import MainAgentRouter, TaskRecord
|
||||
|
||||
|
||||
class RouterProvider(LLMProvider):
|
||||
def __init__(self, response: str | Exception) -> None:
|
||||
super().__init__()
|
||||
self.response = response
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.calls.append(
|
||||
{
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"model": model,
|
||||
"thinking_enabled": thinking_enabled,
|
||||
}
|
||||
)
|
||||
if isinstance(self.response, Exception):
|
||||
raise self.response
|
||||
return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
def _task() -> TaskRecord:
|
||||
return TaskRecord(
|
||||
task_id="task-1",
|
||||
session_id="web:task",
|
||||
description="实现任务连续性",
|
||||
goal="实现任务连续性",
|
||||
constraints=[],
|
||||
priority=0,
|
||||
status="awaiting_feedback",
|
||||
creator="test",
|
||||
created_at="now",
|
||||
updated_at="now",
|
||||
metadata={"short_title": "任务连续性"},
|
||||
)
|
||||
|
||||
|
||||
def test_router_continues_active_task_from_llm_decision() -> None:
|
||||
provider = RouterProvider('{"action":"continue_task","reason":"related","short_title":"任务连续性"}')
|
||||
decision = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"再把输入框标识也补上",
|
||||
active_task=_task(),
|
||||
provider=provider,
|
||||
)
|
||||
)
|
||||
|
||||
assert decision.is_task
|
||||
assert decision.starts_new_task is False
|
||||
assert decision.short_title == "任务连续性"
|
||||
assert provider.calls[0]["max_tokens"] == 256
|
||||
|
||||
|
||||
def test_router_receives_thinking_mode() -> None:
|
||||
provider = RouterProvider('{"action":"simple_chat","reason":"simple"}')
|
||||
decision = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"你好",
|
||||
provider=provider,
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
|
||||
assert not decision.is_task
|
||||
assert provider.calls[0]["thinking_enabled"] is False
|
||||
|
||||
|
||||
def test_router_closes_active_task_from_llm_decision() -> None:
|
||||
decision = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"这个任务结束了",
|
||||
active_task=_task(),
|
||||
provider=RouterProvider('{"action":"close_task","reason":"user said done"}'),
|
||||
)
|
||||
)
|
||||
|
||||
assert not decision.is_task
|
||||
assert decision.closes_task is True
|
||||
|
||||
|
||||
def test_router_fallback_keeps_active_task_but_not_new_task() -> None:
|
||||
active = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"继续",
|
||||
active_task=_task(),
|
||||
provider=RouterProvider(RuntimeError("provider down")),
|
||||
)
|
||||
)
|
||||
inactive = asyncio.run(
|
||||
MainAgentRouter().classify(
|
||||
"implement something",
|
||||
active_task=None,
|
||||
provider=RouterProvider(RuntimeError("provider down")),
|
||||
)
|
||||
)
|
||||
|
||||
assert active.is_task
|
||||
assert not inactive.is_task
|
||||
142
app-instance/backend/tests/unit/test_marketplace_and_hermes.py
Normal file
142
app-instance/backend/tests/unit/test_marketplace_and_hermes.py
Normal file
@ -0,0 +1,142 @@
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import zipfile
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.interfaces.web.app import _create_skill_upload_draft
|
||||
from beaver.services.hermes_migration import HermesMigrationService
|
||||
from beaver.services.skillhub_service import SkillHubService
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
from beaver.tools.mcp.wrapper import MCPToolWrapper
|
||||
|
||||
|
||||
class FakeSkillHubService(SkillHubService):
|
||||
async def _get_json(self, path, *, params=None):
|
||||
if path == "/skills":
|
||||
return {
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"slug": "multi-search-engine",
|
||||
"displayName": "multi-search-engine",
|
||||
"summary": "search",
|
||||
"namespace": "global",
|
||||
"downloadCount": 1,
|
||||
"starCount": 0,
|
||||
"publishedVersion": {"version": "20260413.065325"},
|
||||
}
|
||||
],
|
||||
"total": 1,
|
||||
"page": 0,
|
||||
"size": 12,
|
||||
}
|
||||
}
|
||||
if path == "/skills/global/multi-search-engine":
|
||||
return {
|
||||
"data": {
|
||||
"slug": "multi-search-engine",
|
||||
"displayName": "multi-search-engine",
|
||||
"summary": "search",
|
||||
"namespace": "global",
|
||||
"downloadCount": 1,
|
||||
"starCount": 0,
|
||||
"publishedVersion": {"version": "20260413.065325"},
|
||||
}
|
||||
}
|
||||
if path == "/skills/global/multi-search-engine/versions/20260413.065325":
|
||||
return {"data": {"version": "20260413.065325"}}
|
||||
if path == "/skills/global/multi-search-engine/versions/20260413.065325/files":
|
||||
return {"data": [{"filePath": "SKILL.md", "fileSize": 93}, {"filePath": "references/a.txt", "fileSize": 2}]}
|
||||
raise AssertionError(path)
|
||||
|
||||
async def _get_text(self, path, *, params):
|
||||
if params["path"] == "SKILL.md":
|
||||
return "---\nname: multi-search-engine\ndescription: Multi search\ntools:\n - web_search\n---\nUse search.\n"
|
||||
return "ok"
|
||||
|
||||
|
||||
def test_skillhub_search_detail_do_not_install_until_post_install(tmp_path):
|
||||
store = SkillSpecStore(tmp_path)
|
||||
service = FakeSkillHubService(store)
|
||||
|
||||
search = asyncio.run(service.search(q="multi-search-engine"))
|
||||
detail = asyncio.run(service.detail("global", "multi-search-engine"))
|
||||
assert search["items"][0]["installed"] is False
|
||||
assert detail["installed"] is False
|
||||
assert store.get_skill_spec("multi-search-engine") is None
|
||||
|
||||
install = asyncio.run(service.install("global", "multi-search-engine"))
|
||||
assert install["ok"] is True
|
||||
assert store.get_skill_spec("multi-search-engine") is not None
|
||||
assert (tmp_path / "skills" / "multi-search-engine" / "versions" / install["version"] / "references" / "a.txt").read_text() == "ok"
|
||||
|
||||
|
||||
def test_upload_skill_zip_rejects_path_traversal(tmp_path):
|
||||
store = SkillSpecStore(tmp_path)
|
||||
loaded = SimpleNamespace(skill_spec_store=store, draft_service=DraftService(store))
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr("skill/SKILL.md", "---\nname: skill\n---\nBody\n")
|
||||
archive.writestr("skill/../evil.txt", "x")
|
||||
|
||||
with pytest.raises(ValueError, match="Unsafe archive entry"):
|
||||
_create_skill_upload_draft(loaded, "skill.zip", buffer.getvalue())
|
||||
|
||||
|
||||
def test_upload_skill_zip_keeps_supporting_files_on_draft(tmp_path):
|
||||
store = SkillSpecStore(tmp_path)
|
||||
loaded = SimpleNamespace(skill_spec_store=store, draft_service=DraftService(store))
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr("skill/SKILL.md", "---\nname: skill\n---\nBody\n")
|
||||
archive.writestr("skill/references/a.txt", "context")
|
||||
|
||||
draft = _create_skill_upload_draft(loaded, "skill.zip", buffer.getvalue())
|
||||
upload_dir = draft["evidence_refs"][0]["supporting_upload_dir"]
|
||||
assert (tmp_path / "skills" / "skill" / "draft_uploads" / draft["draft_id"] / "references" / "a.txt").read_text() == "context"
|
||||
assert upload_dir.endswith(draft["draft_id"])
|
||||
|
||||
|
||||
def test_hermes_migration_manifest_includes_no_credential_skill_and_skips_api_skill(tmp_path):
|
||||
repo = tmp_path / "hermes"
|
||||
safe = repo / "skills" / "safe"
|
||||
unsafe = repo / "skills" / "unsafe"
|
||||
safe.mkdir(parents=True)
|
||||
unsafe.mkdir(parents=True)
|
||||
safe.joinpath("SKILL.md").write_text("---\nname: safe\n---\nUse local files only.\n", encoding="utf-8")
|
||||
unsafe.joinpath("SKILL.md").write_text("---\nname: unsafe\n---\nRequires API_KEY.\n", encoding="utf-8")
|
||||
|
||||
store = SkillSpecStore(tmp_path / "workspace")
|
||||
manifest = HermesMigrationService(store).migrate(repo)
|
||||
included = {item["skill_name"] for item in manifest["included"]}
|
||||
skipped = {item.get("skill_name"): item["reason"] for item in manifest["skipped"]}
|
||||
|
||||
assert "safe" in included
|
||||
assert skipped["unsafe"] == "requires_external_credentials"
|
||||
assert store.get_skill_spec("safe") is not None
|
||||
manifest_path = tmp_path / "workspace" / "hermes_migration_manifest.json"
|
||||
assert json.loads(manifest_path.read_text(encoding="utf-8"))["source"] == "hermes-agent"
|
||||
|
||||
|
||||
def test_mcp_wrapper_metadata_preserves_server_id_with_underscores():
|
||||
tool_def = SimpleNamespace(name="auth_status", description="Auth", inputSchema={"type": "object", "properties": {}})
|
||||
|
||||
async def call_tool(_name, _arguments):
|
||||
return SimpleNamespace(content=[], structuredContent={"ok": True})
|
||||
|
||||
wrapper = MCPToolWrapper(
|
||||
"outlook_mcp",
|
||||
tool_def,
|
||||
call_tool,
|
||||
kind="online",
|
||||
category="outlook",
|
||||
display_name="Outlook",
|
||||
)
|
||||
|
||||
assert wrapper.spec.name == "mcp_outlook_mcp_auth_status"
|
||||
assert wrapper.spec.metadata["server_id"] == "outlook_mcp"
|
||||
assert wrapper.spec.metadata["original_tool_name"] == "auth_status"
|
||||
@ -298,8 +298,29 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
|
||||
ended_at=recent,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={"feedback_type": "satisfied"},
|
||||
activated_skills=[],
|
||||
task_id=f"task-new-{index}",
|
||||
attempt_index=1,
|
||||
validation_result={"accepted": True, "score": 0.9},
|
||||
)
|
||||
)
|
||||
|
||||
for index in range(2):
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id=f"simple-chat-{index}",
|
||||
session_id="session-simple",
|
||||
task_text="你是谁",
|
||||
started_at=recent,
|
||||
ended_at=recent,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={},
|
||||
activated_skills=[],
|
||||
task_id=None,
|
||||
attempt_index=None,
|
||||
validation_result=None,
|
||||
)
|
||||
)
|
||||
|
||||
@ -329,8 +350,11 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
|
||||
ended_at=recent,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={},
|
||||
feedback={"feedback_type": "satisfied"},
|
||||
activated_skills=receipts,
|
||||
task_id=f"task-merge-{index}",
|
||||
attempt_index=1,
|
||||
validation_result={"accepted": True, "score": 0.9},
|
||||
)
|
||||
)
|
||||
for receipt in receipts:
|
||||
@ -382,6 +406,9 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
|
||||
kinds = {candidate.kind for candidate in candidates}
|
||||
|
||||
assert {"revise_skill", "new_skill", "merge_skills", "retire_skill"} <= kinds
|
||||
new_candidates = [candidate for candidate in candidates if candidate.kind == "new_skill"]
|
||||
assert new_candidates
|
||||
assert all("simple-chat" not in run_id for candidate in new_candidates for run_id in candidate.source_run_ids)
|
||||
|
||||
retire_candidate = next(candidate for candidate in candidates if candidate.kind == "retire_skill")
|
||||
retire_draft = asyncio.run(
|
||||
@ -396,6 +423,100 @@ def test_skill_learning_service_generates_candidates_and_retire_draft(tmp_path:
|
||||
assert store.read_draft("svn-migration", retire_draft.draft_id) is not None
|
||||
|
||||
|
||||
def test_skill_learning_service_generates_task_scoped_candidates(tmp_path: Path) -> None:
|
||||
store = SkillSpecStore(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
|
||||
service = SkillLearningService(
|
||||
run_store=run_store,
|
||||
learning_store=learning_store,
|
||||
draft_service=DraftService(store),
|
||||
evidence_selector=EvidenceSelector(run_store),
|
||||
)
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
receipt = _receipt(
|
||||
run_id="task-run-1",
|
||||
session_id="session-task",
|
||||
skill_name="api-review",
|
||||
skill_version="v0001",
|
||||
activated_at=now,
|
||||
)
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="task-run-1",
|
||||
session_id="session-task",
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
task_text="Review API compatibility",
|
||||
started_at=now,
|
||||
ended_at=now,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={"feedback_type": "satisfied"},
|
||||
activated_skills=[receipt],
|
||||
validation_result={"accepted": True, "score": 0.9},
|
||||
)
|
||||
)
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="other-task-run",
|
||||
session_id="session-other",
|
||||
task_id="task-2",
|
||||
attempt_index=1,
|
||||
task_text="Review API compatibility",
|
||||
started_at=now,
|
||||
ended_at=now,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={"feedback_type": "satisfied"},
|
||||
activated_skills=[],
|
||||
validation_result={"accepted": True, "score": 0.9},
|
||||
)
|
||||
)
|
||||
|
||||
candidates = service.build_learning_candidates_for_task("task-1", trigger_run_id="task-run-1")
|
||||
|
||||
assert [candidate.candidate_id for candidate in candidates] == ["revise:api-review:v0001:task:task-1"]
|
||||
assert candidates[0].source_run_ids == ["task-run-1"]
|
||||
assert candidates[0].related_skill_names == ["api-review"]
|
||||
assert candidates[0].evidence["task_id"] == "task-1"
|
||||
|
||||
|
||||
def test_skill_learning_service_generates_new_skill_for_task_without_published_skills(tmp_path: Path) -> None:
|
||||
store = SkillSpecStore(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
learning_store = SkillLearningStore(tmp_path / "memory" / "skills")
|
||||
service = SkillLearningService(
|
||||
run_store=run_store,
|
||||
learning_store=learning_store,
|
||||
draft_service=DraftService(store),
|
||||
evidence_selector=EvidenceSelector(run_store),
|
||||
)
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="task-run-1",
|
||||
session_id="session-task",
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
task_text="Generate migration checklist",
|
||||
started_at=now,
|
||||
ended_at=now,
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
feedback={"feedback_type": "satisfied"},
|
||||
activated_skills=[],
|
||||
validation_result={"accepted": True, "score": 0.9},
|
||||
)
|
||||
)
|
||||
|
||||
candidates = service.build_learning_candidates_for_task("task-1", trigger_run_id="task-run-1")
|
||||
|
||||
assert [candidate.candidate_id for candidate in candidates] == ["new:task:task-1"]
|
||||
assert candidates[0].kind == "new_skill"
|
||||
assert candidates[0].source_run_ids == ["task-run-1"]
|
||||
|
||||
|
||||
def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
|
||||
skill = SkillContext(
|
||||
name="docker-debug",
|
||||
@ -446,7 +567,7 @@ def test_agent_loop_records_skill_receipts_and_effects(tmp_path: Path) -> None:
|
||||
skill_effects = next(event for event in events if event.event_type == "skill_effects_snapshotted")
|
||||
assert skill_effects.event_payload["run_record"]["activated_skills"][0]["skill_version"] == "v0007"
|
||||
assert skill_effects.event_payload["skill_effects"][0]["skill_name"] == "docker-debug"
|
||||
assert skill_effects.event_payload["learning_candidate_enabled"] is False
|
||||
assert skill_effects.event_payload["candidate_generation_allowed"] is False
|
||||
assert skill_effects.event_payload["learning_candidates"] == []
|
||||
|
||||
run_records = loaded.run_memory_store.list_runs()
|
||||
|
||||
@ -53,7 +53,8 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
||||
"node_id": "research",
|
||||
"skill_query": "research workflow",
|
||||
"selected_skill_names": ["research-workflow"],
|
||||
"generated_skill_draft_id": None,
|
||||
"ephemeral_guidance_id": None,
|
||||
"ephemeral_guidance_name": None,
|
||||
"ephemeral_used": False,
|
||||
"reason": "matched published skill",
|
||||
}
|
||||
@ -80,7 +81,8 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
||||
"skill_query": "research workflow",
|
||||
"selected_skill_names": ["research-workflow"],
|
||||
"ephemeral_skill_names": [],
|
||||
"generated_skill_draft_id": None,
|
||||
"ephemeral_guidance_id": None,
|
||||
"ephemeral_guidance_name": None,
|
||||
"ephemeral_used": False,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
@ -118,5 +120,83 @@ def test_process_projection_maps_task_team_events(tmp_path: Path) -> None:
|
||||
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
|
||||
assert sub_run["metadata"]["selected_skill_names"] == ["research-workflow"]
|
||||
assert sub_run["metadata"]["skill_query"] == "research workflow"
|
||||
assert sub_run["metadata"]["ephemeral_guidance_id"] is None
|
||||
assert any(event["actor_name"] == "Validator" for event in projection["events"])
|
||||
assert any(run["session_id"] == "web:test" for run in projection["runs"])
|
||||
|
||||
|
||||
def test_process_projection_exposes_ephemeral_guidance_artifacts(tmp_path: Path) -> None:
|
||||
session = SessionManager(tmp_path)
|
||||
run_store = RunMemoryStore(tmp_path / "memory" / "runs")
|
||||
run_store.append_run_record(
|
||||
RunRecord(
|
||||
run_id="sub-run",
|
||||
session_id="sub-session",
|
||||
task_id="task-1",
|
||||
attempt_index=1,
|
||||
task_text="sub task",
|
||||
started_at="2026-01-01T00:00:01+00:00",
|
||||
ended_at="2026-01-01T00:00:02+00:00",
|
||||
success=True,
|
||||
finish_reason="stop",
|
||||
)
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_execution_planned",
|
||||
event_payload={
|
||||
"task_id": "task-1",
|
||||
"attempt_index": 1,
|
||||
"plan_mode": "team",
|
||||
"strategy": "sequence",
|
||||
"node_ids": ["research"],
|
||||
"ephemeral_guidance_ids": ["eg_123"],
|
||||
"skill_resolution_report": [
|
||||
{
|
||||
"node_id": "research",
|
||||
"skill_query": "research workflow",
|
||||
"selected_skill_names": [],
|
||||
"ephemeral_guidance_id": "eg_123",
|
||||
"ephemeral_guidance_name": "research-workflow",
|
||||
"ephemeral_used": True,
|
||||
"reason": "generated ephemeral guidance",
|
||||
}
|
||||
],
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
session.append_message(
|
||||
"web:test",
|
||||
role="system",
|
||||
event_type="task_team_run_completed",
|
||||
event_payload={
|
||||
"task_id": "task-1",
|
||||
"attempt_index": 1,
|
||||
"team_success": True,
|
||||
"team_run_ids": ["sub-run"],
|
||||
"node_results": [
|
||||
{
|
||||
"node_id": "research",
|
||||
"success": True,
|
||||
"output_text": "evidence",
|
||||
"run_id": "sub-run",
|
||||
"skill_query": "research workflow",
|
||||
"selected_skill_names": [],
|
||||
"ephemeral_skill_names": ["ephemeral:research-workflow"],
|
||||
"ephemeral_guidance_id": "eg_123",
|
||||
"ephemeral_guidance_name": "research-workflow",
|
||||
"ephemeral_used": True,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
},
|
||||
context_visible=False,
|
||||
)
|
||||
|
||||
projection = SessionProcessProjector(session, run_store).project("web:test")
|
||||
|
||||
sub_run = next(run for run in projection["runs"] if run["run_id"] == "sub-run")
|
||||
assert sub_run["metadata"]["ephemeral_guidance_id"] == "eg_123"
|
||||
assert projection["artifacts"][0]["artifact_id"] == "sub-run:ephemeral-guidance:eg_123"
|
||||
assert projection["artifacts"][0]["metadata"]["ephemeral_guidance_name"] == "research-workflow"
|
||||
|
||||
107
app-instance/backend/tests/unit/test_session_archive.py
Normal file
107
app-instance/backend/tests/unit/test_session_archive.py
Normal file
@ -0,0 +1,107 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from beaver.engine.session import SessionManager
|
||||
from beaver.interfaces.web.app import create_app
|
||||
from beaver.services.agent_service import AgentService
|
||||
|
||||
|
||||
def test_archived_sessions_can_be_hidden_from_default_web_list(tmp_path: Path) -> None:
|
||||
manager = SessionManager(tmp_path)
|
||||
manager.ensure_session("web:keep", source="web")
|
||||
manager.ensure_session("web:archived", source="web")
|
||||
manager.end_session("web:archived", "archived")
|
||||
|
||||
visible = manager.list_sessions_rich(exclude_end_reasons=["archived"])
|
||||
visible_ids = {row["id"] for row in visible}
|
||||
|
||||
assert "web:keep" in visible_ids
|
||||
assert "web:archived" not in visible_ids
|
||||
assert manager.get_session("web:archived")["end_reason"] == "archived"
|
||||
|
||||
|
||||
def test_archived_sessions_remain_available_to_history_search(tmp_path: Path) -> None:
|
||||
manager = SessionManager(tmp_path)
|
||||
manager.ensure_session("web:archived", source="web")
|
||||
manager.end_session("web:archived", "archived")
|
||||
|
||||
all_sessions = manager.list_sessions_rich()
|
||||
|
||||
assert {row["id"] for row in all_sessions} == {"web:archived"}
|
||||
|
||||
|
||||
def test_visible_history_excludes_error_and_incomplete_runs(tmp_path: Path) -> None:
|
||||
manager = SessionManager(tmp_path)
|
||||
manager.ensure_session("web:history", source="web")
|
||||
manager.append_message("web:history", run_id="ok-run", role="user", content="hello")
|
||||
manager.append_message("web:history", run_id="ok-run", role="assistant", content="hi", finish_reason="stop")
|
||||
manager.append_message(
|
||||
"web:history",
|
||||
run_id="ok-run",
|
||||
role="assistant",
|
||||
content=None,
|
||||
tool_calls=[{"id": "call-1", "type": "function", "function": {"name": "echo", "arguments": "{}"}}],
|
||||
)
|
||||
manager.append_message(
|
||||
"web:history",
|
||||
run_id="ok-run",
|
||||
role="tool",
|
||||
content="tool result",
|
||||
tool_call_id="call-1",
|
||||
)
|
||||
manager.append_message(
|
||||
"web:history",
|
||||
run_id="ok-run",
|
||||
role="system",
|
||||
event_type="run_completed",
|
||||
content="hi",
|
||||
context_visible=False,
|
||||
)
|
||||
manager.append_message("web:history", run_id="error-run", role="user", content="bad")
|
||||
manager.append_message(
|
||||
"web:history",
|
||||
run_id="error-run",
|
||||
role="assistant",
|
||||
content="Error: provider failed",
|
||||
finish_reason="error",
|
||||
)
|
||||
manager.append_message(
|
||||
"web:history",
|
||||
run_id="error-run",
|
||||
role="system",
|
||||
event_type="run_completed",
|
||||
content="Error: provider failed",
|
||||
finish_reason="error",
|
||||
context_visible=False,
|
||||
)
|
||||
manager.append_message("web:history", run_id="pending-run", role="user", content="pending")
|
||||
|
||||
history = manager.get_visible_history("web:history")
|
||||
|
||||
assert [(message["role"], message["content"]) for message in history] == [
|
||||
("user", "hello"),
|
||||
("assistant", "hi"),
|
||||
]
|
||||
|
||||
|
||||
def test_web_archive_route_does_not_create_archive_suffix_session(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
create_response = client.post("/api/sessions/web:alpha")
|
||||
archive_response = client.post("/api/sessions/web:alpha/archive")
|
||||
sessions_response = client.get("/api/sessions")
|
||||
|
||||
assert create_response.status_code == 200
|
||||
assert archive_response.status_code == 200
|
||||
assert archive_response.json() == {"ok": True, "archived": True}
|
||||
assert sessions_response.status_code == 200
|
||||
|
||||
loaded = service.create_loop().boot()
|
||||
assert loaded.session_manager.get_session("web:alpha")["end_reason"] == "archived" # type: ignore[union-attr]
|
||||
assert loaded.session_manager.get_session("web:alpha/archive") is None # type: ignore[union-attr]
|
||||
assert sessions_response.json() == []
|
||||
157
app-instance/backend/tests/unit/test_skill_assembler.py
Normal file
157
app-instance/backend/tests/unit/test_skill_assembler.py
Normal file
@ -0,0 +1,157 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.skills.assembler.task_assembler import SkillAssembler
|
||||
|
||||
|
||||
class RecordingProvider(LLMProvider):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.thinking_enabled: bool | None = None
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.thinking_enabled = thinking_enabled
|
||||
return LLMResponse(content='["daily-news"]', provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class SequencedProvider(LLMProvider):
|
||||
def __init__(self, responses: list[str]) -> None:
|
||||
super().__init__()
|
||||
self.responses = list(responses)
|
||||
self.messages: list[list[dict]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.messages.append(messages)
|
||||
content = self.responses.pop(0)
|
||||
return LLMResponse(content=content, provider_name="stub", model="stub-model")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return "stub-model"
|
||||
|
||||
|
||||
class StaticRetriever:
|
||||
async def retrieve(self, **kwargs):
|
||||
return kwargs["candidates"][: kwargs["top_k"]]
|
||||
|
||||
|
||||
class LoaderWithFullSkill:
|
||||
def build_selection_candidates(self) -> list[dict[str, str]]:
|
||||
return [
|
||||
{
|
||||
"name": "docker-debug",
|
||||
"description": "General container tips.",
|
||||
"version": "v1",
|
||||
"content_hash": "abc",
|
||||
}
|
||||
]
|
||||
|
||||
def load_published_skill(self, name: str) -> str | None:
|
||||
if name != "docker-debug":
|
||||
return None
|
||||
return """---
|
||||
description: General container tips.
|
||||
tools:
|
||||
- search_files
|
||||
---
|
||||
|
||||
# Docker Debug
|
||||
|
||||
Use this skill when doing Docker log triage and container failure analysis.
|
||||
"""
|
||||
|
||||
def get_skill_record(self, name: str):
|
||||
return SimpleNamespace(version="v1", content_hash="abc", tool_hints=["search_files"])
|
||||
|
||||
|
||||
def test_skill_selection_receives_thinking_mode() -> None:
|
||||
provider = RecordingProvider()
|
||||
assembler = SkillAssembler(loader=SimpleNamespace())
|
||||
|
||||
selected = asyncio.run(
|
||||
assembler._select_skill_names(
|
||||
task_description="summarize daily news",
|
||||
candidates=[{"name": "daily-news", "description": "Summarize news"}],
|
||||
provider=provider,
|
||||
model="Qwen3.6-35B",
|
||||
thinking_enabled=False,
|
||||
)
|
||||
)
|
||||
|
||||
assert selected == ["daily-news"]
|
||||
assert provider.thinking_enabled is False
|
||||
|
||||
|
||||
def test_skill_assembler_loads_detail_directly_for_small_candidate_sets() -> None:
|
||||
provider = SequencedProvider(['["docker-debug"]'])
|
||||
assembler = SkillAssembler(loader=LoaderWithFullSkill(), retriever=StaticRetriever())
|
||||
|
||||
result = asyncio.run(
|
||||
assembler.assemble(
|
||||
task_description="debug a failing Docker container",
|
||||
provider=provider,
|
||||
model="stub-model",
|
||||
)
|
||||
)
|
||||
|
||||
assert [skill.name for skill in result.activated_skills] == ["docker-debug"]
|
||||
assert result.activated_skills[0].tool_hints == ["search_files"]
|
||||
assert [item["stage"] for item in result.llm_interactions] == ["final"]
|
||||
assert len(provider.messages) == 1
|
||||
first_user_prompt = provider.messages[0][1]["content"]
|
||||
assert "Use this skill when doing Docker log triage" in first_user_prompt
|
||||
|
||||
|
||||
def test_skill_assembler_shortlists_before_loading_detail_for_large_candidate_sets() -> None:
|
||||
provider = SequencedProvider(['["docker-debug"]', '["docker-debug"]'])
|
||||
loader = LoaderWithFullSkill()
|
||||
original_candidates = loader.build_selection_candidates
|
||||
loader.build_selection_candidates = lambda: [
|
||||
*original_candidates(),
|
||||
{
|
||||
"name": "other-skill",
|
||||
"description": "Other workflow.",
|
||||
"version": "v1",
|
||||
"content_hash": "def",
|
||||
},
|
||||
]
|
||||
assembler = SkillAssembler(
|
||||
loader=loader,
|
||||
retriever=StaticRetriever(),
|
||||
max_detailed_candidates=1,
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
assembler.assemble(
|
||||
task_description="debug a failing Docker container",
|
||||
provider=provider,
|
||||
model="stub-model",
|
||||
)
|
||||
)
|
||||
|
||||
assert [skill.name for skill in result.activated_skills] == ["docker-debug"]
|
||||
assert [item["stage"] for item in result.llm_interactions] == ["shortlist", "final"]
|
||||
assert len(provider.messages) == 2
|
||||
assert "Use this skill when doing Docker log triage" not in provider.messages[0][1]["content"]
|
||||
assert "Use this skill when doing Docker log triage" in provider.messages[1][1]["content"]
|
||||
@ -90,6 +90,7 @@ def test_eval_pass_allows_publish_after_safety_and_review(tmp_path: Path) -> Non
|
||||
|
||||
report = asyncio.run(pipeline.evaluate_draft("candidate-1", draft.skill_name, draft.draft_id, provider_bundle=_bundle()))
|
||||
safety = pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
published = pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
||||
|
||||
@ -111,6 +112,7 @@ def test_eval_regression_blocks_publish(tmp_path: Path) -> None:
|
||||
|
||||
report = asyncio.run(pipeline.evaluate_draft("candidate-1", draft.skill_name, draft.draft_id, provider_bundle=_bundle()))
|
||||
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
assert report.passed is False
|
||||
|
||||
@ -68,6 +68,39 @@ def test_pipeline_lists_candidates_and_moves_draft_through_review(tmp_path: Path
|
||||
assert pipeline.get_draft(draft.skill_name, draft.draft_id).status == SkillReviewState.PUBLISHED.value
|
||||
|
||||
|
||||
def test_pipeline_approve_requires_submitted_review(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="needs-review",
|
||||
proposed_content="# Needs Review\n\nDo the thing.",
|
||||
proposed_frontmatter={"description": "needs review"},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="in review before approval"):
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
|
||||
def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="already-published",
|
||||
proposed_content="# Already Published\n\nDo the thing.",
|
||||
proposed_frontmatter={"description": "already published"},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
||||
|
||||
with pytest.raises(ValueError, match="draft status before review submission"):
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
|
||||
|
||||
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
@ -80,5 +113,22 @@ def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
|
||||
|
||||
pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
with pytest.raises(ValueError, match="approved"):
|
||||
with pytest.raises(ValueError, match="Draft not found"):
|
||||
pipeline.publish(draft.skill_name, draft.draft_id, publisher="tester")
|
||||
assert pipeline.draft_service.get_draft(draft.skill_name, draft.draft_id) is None
|
||||
|
||||
|
||||
def test_pipeline_reject_removes_draft_from_review_list(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="remove-skill",
|
||||
proposed_content="# Remove\n\nNo longer needed.",
|
||||
proposed_frontmatter={"description": "remove"},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
|
||||
review = pipeline.reject(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
assert review.status == SkillReviewState.REJECTED.value
|
||||
assert pipeline.list_drafts() == []
|
||||
|
||||
@ -65,6 +65,7 @@ def test_safety_marks_dangerous_tools_high_and_requires_confirm(tmp_path: Path)
|
||||
)
|
||||
|
||||
report = pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
assert report.passed is True
|
||||
@ -84,6 +85,7 @@ def test_publish_requires_safety_report(tmp_path: Path) -> None:
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.approve(draft.skill_name, draft.draft_id, reviewer="tester")
|
||||
|
||||
with pytest.raises(ValueError, match="safety report"):
|
||||
|
||||
@ -12,6 +12,7 @@ from beaver.engine.context.builder import ContextBuilder, ContextBuildInput
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.services.agent_service import AgentService
|
||||
from beaver.skills.assembler import SkillAssemblyResult
|
||||
from beaver.tasks import TaskExecutionPlan, TaskService, ValidationResult, ValidationService
|
||||
|
||||
|
||||
@ -67,7 +68,25 @@ class FakeLearningCandidate:
|
||||
return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
|
||||
|
||||
def _bundle(*responses: str) -> ProviderBundle:
|
||||
class RecordingSkillAssembler:
|
||||
def __init__(self) -> None:
|
||||
self.task_descriptions: list[str] = []
|
||||
|
||||
async def assemble(self, **kwargs) -> SkillAssemblyResult:
|
||||
self.task_descriptions.append(kwargs["task_description"])
|
||||
return SkillAssemblyResult()
|
||||
|
||||
|
||||
def _route_response(action: str = "new_task", short_title: str = "Test task") -> LLMResponse:
|
||||
return LLMResponse(
|
||||
content=f'{{"action":"{action}","reason":"test route","short_title":"{short_title}"}}',
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
)
|
||||
|
||||
|
||||
def _bundle(*responses: str, route_action: str = "new_task") -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
@ -81,6 +100,8 @@ def _bundle(*responses: str) -> ProviderBundle:
|
||||
for response in responses
|
||||
]
|
||||
),
|
||||
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
auxiliary_provider=StubProvider([_route_response(route_action)]),
|
||||
)
|
||||
|
||||
|
||||
@ -110,6 +131,25 @@ def _provider_bundle(provider: StubProvider) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=provider,
|
||||
auxiliary_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
auxiliary_provider=StubProvider([_route_response("new_task")]),
|
||||
)
|
||||
|
||||
|
||||
def _main_only_bundle(*responses: str) -> ProviderBundle:
|
||||
return ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content=response,
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
)
|
||||
for response in responses
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -126,7 +166,7 @@ def test_simple_question_does_not_create_task(tmp_path: Path) -> None:
|
||||
service.process_direct(
|
||||
"hello?",
|
||||
session_id="web:simple",
|
||||
provider_bundle=_bundle("hi"),
|
||||
provider_bundle=_bundle("hi", route_action="simple_chat"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
@ -165,8 +205,89 @@ def test_complex_request_creates_task_and_records_validation(tmp_path: Path) ->
|
||||
assert any(event.event_type == "task_validation_snapshotted" for event in events)
|
||||
assert run_record.task_id == result.task_id
|
||||
assert run_record.validation_result["accepted"] is True
|
||||
assert skill_effects.event_payload["learning_candidate_enabled"] is False
|
||||
assert skill_effects.event_payload["candidate_generation_allowed"] is False
|
||||
assert skill_effects.event_payload["learning_candidates"] == []
|
||||
assert task.metadata["short_title"] == "Test task"
|
||||
|
||||
|
||||
def test_task_mode_uses_task_aware_skill_selection_context(tmp_path: Path) -> None:
|
||||
skill_assembler = RecordingSkillAssembler()
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[ValidationResult(passed=True, score=1.0, validator="test")]
|
||||
),
|
||||
skill_assembler=skill_assembler,
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
service.process_direct(
|
||||
"继续按刚才的方案改",
|
||||
session_id="web:task-skill-query",
|
||||
provider_bundle=_bundle("done", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
|
||||
assert result.task_id
|
||||
assert skill_assembler.task_descriptions
|
||||
query = skill_assembler.task_descriptions[0]
|
||||
assert "Task goal:" in query
|
||||
assert "Current user request:" in query
|
||||
assert "Previously activated skills:" in query
|
||||
assert "If no published skill matches, return []" in query
|
||||
|
||||
|
||||
def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
|
||||
service = AgentService(
|
||||
loader=EngineLoader(
|
||||
workspace=tmp_path,
|
||||
task_execution_planner=_single_planner(),
|
||||
validation_service=StubValidationService(
|
||||
[
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
ValidationResult(passed=True, score=0.9, validator="test"),
|
||||
]
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
first = asyncio.run(
|
||||
service.process_direct(
|
||||
"implement the search workflow",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("first done", route_action="new_task"),
|
||||
)
|
||||
)
|
||||
second = asyncio.run(
|
||||
service.process_direct(
|
||||
"also add tests for it",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("tests added", route_action="continue_task"),
|
||||
)
|
||||
)
|
||||
loaded = service.create_loop().boot()
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert task is not None
|
||||
assert second.task_id == first.task_id
|
||||
assert len(task.run_ids) == 2
|
||||
|
||||
closed = asyncio.run(
|
||||
service.process_direct(
|
||||
"这个任务结束了",
|
||||
session_id="web:continue",
|
||||
provider_bundle=_bundle("好的,已结束。", route_action="close_task"),
|
||||
)
|
||||
)
|
||||
task = loaded.task_service.get_task(first.task_id)
|
||||
|
||||
assert closed.task_id is None
|
||||
assert task is not None
|
||||
assert task.status == "closed"
|
||||
assert loaded.task_service.active_task_view("web:continue") is None
|
||||
|
||||
|
||||
def test_validation_failure_retries_once(tmp_path: Path) -> None:
|
||||
@ -229,11 +350,11 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
|
||||
loaded = service.create_loop().boot()
|
||||
learning_calls = []
|
||||
|
||||
def build_learning_candidates() -> list[FakeLearningCandidate]:
|
||||
learning_calls.append("called")
|
||||
def build_learning_candidates_for_task(task_id: str, *, trigger_run_id: str) -> list[FakeLearningCandidate]:
|
||||
learning_calls.append((task_id, trigger_run_id))
|
||||
return [FakeLearningCandidate()]
|
||||
|
||||
loaded.skill_learning_service.build_learning_candidates = build_learning_candidates
|
||||
loaded.skill_learning_service.build_learning_candidates_for_task = build_learning_candidates_for_task
|
||||
|
||||
feedback = asyncio.run(
|
||||
service.submit_feedback(
|
||||
@ -247,7 +368,7 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
|
||||
assert feedback["learning_candidates"] == [
|
||||
{"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"}
|
||||
]
|
||||
assert learning_calls == ["called"]
|
||||
assert learning_calls == [(result.task_id, result.run_id)]
|
||||
|
||||
service2 = AgentService(
|
||||
loader=EngineLoader(
|
||||
@ -279,6 +400,14 @@ def test_feedback_closes_or_abandons_internal_task(tmp_path: Path) -> None:
|
||||
|
||||
assert abandon_feedback["task_status"] == "abandoned"
|
||||
assert abandon_feedback["learning_candidates"] == []
|
||||
loaded2 = service2.create_loop().boot()
|
||||
failure_events = [
|
||||
event
|
||||
for event in loaded2.session_manager.get_run_event_records(abandoned.session_id, abandoned.run_id)
|
||||
if event.event_type == "task_failure_evidence_recorded"
|
||||
]
|
||||
assert len(failure_events) == 1
|
||||
assert loaded2.memory_service.get_store().memory_entries == []
|
||||
|
||||
|
||||
def test_feedback_is_idempotent_and_projected_to_assistant_message(tmp_path: Path) -> None:
|
||||
@ -466,7 +595,7 @@ def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:
|
||||
events = loaded.session_manager.get_run_event_records(record.session_id, run_id)
|
||||
skill_effects = [event for event in events if event.event_type == "skill_effects_snapshotted"]
|
||||
assert skill_effects
|
||||
assert skill_effects[-1].event_payload["learning_candidate_enabled"] is False
|
||||
assert skill_effects[-1].event_payload["candidate_generation_allowed"] is False
|
||||
|
||||
|
||||
def test_context_builder_strips_ui_projection_fields_from_provider_history() -> None:
|
||||
@ -490,17 +619,43 @@ def test_context_builder_strips_ui_projection_fields_from_provider_history() ->
|
||||
assert assistant == {"role": "assistant", "content": "done"}
|
||||
|
||||
|
||||
def test_context_builder_normalizes_persisted_tool_arguments() -> None:
|
||||
result = ContextBuilder().build_messages(
|
||||
ContextBuildInput(
|
||||
history=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "cron",
|
||||
"arguments": {"action": "add", "mode": "notification"},
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
tool_call = result.messages[-1]["tool_calls"][0]
|
||||
assert tool_call["function"]["arguments"] == '{"action": "add", "mode": "notification"}'
|
||||
|
||||
|
||||
def test_llm_validator_parse_failure_is_not_accepted(tmp_path: Path) -> None:
|
||||
task_service = TaskService(tmp_path / "tasks")
|
||||
task = task_service.create_task(session_id="web:validator", description="implement validator handling")
|
||||
validation = asyncio.run(
|
||||
ValidationService().validate_task_result(
|
||||
task=task,
|
||||
user_message="implement validator handling",
|
||||
final_output="done",
|
||||
provider_bundle=_bundle("not json"),
|
||||
task=task,
|
||||
user_message="implement validator handling",
|
||||
final_output="done",
|
||||
provider_bundle=_main_only_bundle("not json"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
assert validation.accepted is False
|
||||
assert validation.validator == "llm_error"
|
||||
|
||||
@ -9,7 +9,7 @@ from beaver.engine.context import SkillContext
|
||||
from beaver.engine.providers.base import LLMProvider, LLMResponse
|
||||
from beaver.engine.providers.factory import ProviderBundle
|
||||
from beaver.skills.drafts import DraftService
|
||||
from beaver.skills.learning import MissingSkillSynthesizer
|
||||
from beaver.skills.learning import EphemeralGuidanceSynthesizer
|
||||
from beaver.skills.publisher import SkillPublisher
|
||||
from beaver.skills.reviews import ReviewService
|
||||
from beaver.skills.specs import SkillSpecStore
|
||||
@ -116,12 +116,12 @@ def test_task_skill_resolver_pins_matching_published_skill(tmp_path: Path) -> No
|
||||
assert reports[0].ephemeral_used is False
|
||||
|
||||
|
||||
def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(tmp_path: Path) -> None:
|
||||
def test_task_skill_resolver_generates_ephemeral_guidance_when_missing(tmp_path: Path) -> None:
|
||||
provider = RecordingProvider(
|
||||
[
|
||||
"""
|
||||
{
|
||||
"skill_name": "api-compatibility-review",
|
||||
"guidance_name": "api-compatibility-review",
|
||||
"description": "Review API compatibility",
|
||||
"content": "# API Compatibility Review\\n\\nCheck schema compatibility.",
|
||||
"tags": ["api", "review"]
|
||||
@ -133,7 +133,7 @@ def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(t
|
||||
resolver = TaskSkillResolver(
|
||||
skills_loader=SkillsLoader(tmp_path),
|
||||
draft_service=DraftService(store),
|
||||
missing_skill_synthesizer=MissingSkillSynthesizer(),
|
||||
missing_skill_synthesizer=EphemeralGuidanceSynthesizer(),
|
||||
)
|
||||
graph = ExecutionGraph(
|
||||
strategy="sequence",
|
||||
@ -163,13 +163,14 @@ def test_task_skill_resolver_generates_draft_only_ephemeral_skill_when_missing(t
|
||||
)
|
||||
|
||||
drafts = store.list_drafts("api-compatibility-review")
|
||||
assert len(drafts) == 1
|
||||
assert drafts == []
|
||||
assert store.list_published_skill_names() == []
|
||||
assert resolved.nodes[0].inherited_pinned_skills == []
|
||||
assert len(resolved.nodes[0].inherited_pinned_skill_contexts) == 1
|
||||
context: SkillContext = resolved.nodes[0].inherited_pinned_skill_contexts[0]
|
||||
assert context.name == "draft:api-compatibility-review"
|
||||
assert context.version == f"draft:{drafts[0].draft_id}"
|
||||
assert context.activation_reason == "generated_missing_skill"
|
||||
assert reports[0].generated_skill_draft_id == drafts[0].draft_id
|
||||
assert context.name == "ephemeral:api-compatibility-review"
|
||||
assert context.version.startswith("ephemeral:eg_")
|
||||
assert context.activation_reason == "ephemeral_guidance"
|
||||
assert reports[0].ephemeral_guidance_id is not None
|
||||
assert reports[0].ephemeral_guidance_name == "api-compatibility-review"
|
||||
assert reports[0].ephemeral_used is True
|
||||
|
||||
@ -83,7 +83,6 @@ tools:
|
||||
|
||||
registry = ToolRegistry()
|
||||
registry.register(DummyTool("memory", toolset="memory", always_available=True))
|
||||
registry.register(DummyTool("skill_view", toolset="skills", always_available=True))
|
||||
registry.register(DummyTool("terminal", toolset="shell"))
|
||||
registry.register(DummyTool("search_files", toolset="file"))
|
||||
registry.register(DummyTool("echo", toolset="debug"))
|
||||
@ -100,7 +99,7 @@ tools:
|
||||
)
|
||||
)
|
||||
|
||||
assert [spec.name for spec in selected] == ["memory", "skill_view", "terminal", "search_files"]
|
||||
assert [spec.name for spec in selected] == ["memory", "terminal", "search_files"]
|
||||
|
||||
|
||||
def test_embedding_fallback_can_return_all_or_top_k() -> None:
|
||||
|
||||
132
app-instance/backend/tests/unit/test_websocket_chat.py
Normal file
132
app-instance/backend/tests/unit/test_websocket_chat.py
Normal file
@ -0,0 +1,132 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from beaver.interfaces.web.app import create_app
|
||||
from beaver.services.agent_service import AgentService
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class StubRunResult:
|
||||
session_id: str
|
||||
run_id: str = "run-1"
|
||||
output_text: str = "ok"
|
||||
finish_reason: str = "stop"
|
||||
tool_iterations: int = 0
|
||||
provider_name: str | None = "stub"
|
||||
model: str | None = "stub-model"
|
||||
usage: dict[str, Any] = field(default_factory=lambda: {"total_tokens": 3})
|
||||
task_id: str | None = "task-1"
|
||||
task_status: str | None = "awaiting_feedback"
|
||||
validation_result: dict[str, Any] | None = field(default_factory=lambda: {"accepted": True})
|
||||
|
||||
|
||||
class StubAgentService(AgentService):
|
||||
def __init__(self, *, fail: bool = False) -> None:
|
||||
super().__init__()
|
||||
self.fail = fail
|
||||
self.calls: list[dict[str, Any]] = []
|
||||
|
||||
async def submit_direct(self, message: str, **kwargs: Any) -> StubRunResult: # type: ignore[override]
|
||||
self.calls.append({"message": message, **kwargs})
|
||||
if self.fail:
|
||||
raise RuntimeError("boom")
|
||||
return StubRunResult(
|
||||
session_id=kwargs.get("session_id") or "web:default",
|
||||
output_text=f"echo:{message}",
|
||||
)
|
||||
|
||||
|
||||
def test_websocket_ping_pong() -> None:
|
||||
app = create_app(service=StubAgentService(), manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/ws/web:alpha") as websocket:
|
||||
websocket.send_json({"type": "ping"})
|
||||
assert websocket.receive_json() == {"type": "pong"}
|
||||
|
||||
|
||||
def test_websocket_message_returns_chat_metadata_and_session_updated() -> None:
|
||||
service = StubAgentService()
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/ws/web:alpha") as websocket:
|
||||
websocket.send_json(
|
||||
{
|
||||
"type": "message",
|
||||
"content": "hello",
|
||||
"metadata": {"source": "test"},
|
||||
"attachments": [{"file_id": "file-1", "name": "a.txt"}],
|
||||
}
|
||||
)
|
||||
assert websocket.receive_json() == {"type": "status", "status": "thinking"}
|
||||
message = websocket.receive_json()
|
||||
session_updated = websocket.receive_json()
|
||||
|
||||
assert service.calls == [
|
||||
{
|
||||
"message": "hello",
|
||||
"session_id": "web:alpha",
|
||||
"source": "websocket",
|
||||
"user_id": None,
|
||||
"title": None,
|
||||
"execution_context": None,
|
||||
"model": None,
|
||||
"provider_name": None,
|
||||
"embedding_model": None,
|
||||
}
|
||||
]
|
||||
assert message["type"] == "message"
|
||||
assert message["role"] == "assistant"
|
||||
assert message["content"] == "echo:hello"
|
||||
assert message["session_id"] == "web:alpha"
|
||||
assert message["run_id"] == "run-1"
|
||||
assert message["task_id"] == "task-1"
|
||||
assert message["task_status"] == "awaiting_feedback"
|
||||
assert message["validation_result"] == {"accepted": True}
|
||||
assert message["validation_status"] == "passed"
|
||||
assert message["metadata"]["input_metadata"] == {
|
||||
"source": "test",
|
||||
"attachments": [{"file_id": "file-1", "name": "a.txt"}],
|
||||
}
|
||||
assert session_updated == {
|
||||
"type": "session_updated",
|
||||
"session_id": "web:alpha",
|
||||
"source": "websocket",
|
||||
}
|
||||
|
||||
|
||||
def test_websocket_empty_content_returns_error_without_runtime_call() -> None:
|
||||
service = StubAgentService()
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/ws/web:alpha") as websocket:
|
||||
websocket.send_json({"type": "message", "content": " "})
|
||||
assert websocket.receive_json() == {"type": "error", "error": "'content' is required"}
|
||||
|
||||
assert service.calls == []
|
||||
|
||||
|
||||
def test_websocket_runtime_error_returns_assistant_error_message() -> None:
|
||||
service = StubAgentService(fail=True)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/ws/web:alpha") as websocket:
|
||||
websocket.send_json({"type": "message", "content": "hello"})
|
||||
assert websocket.receive_json() == {"type": "status", "status": "thinking"}
|
||||
message = websocket.receive_json()
|
||||
websocket.send_json({"type": "ping"})
|
||||
pong = websocket.receive_json()
|
||||
|
||||
assert message["type"] == "message"
|
||||
assert message["role"] == "assistant"
|
||||
assert message["session_id"] == "web:alpha"
|
||||
assert message["finish_reason"] == "error"
|
||||
assert "boom" in message["content"]
|
||||
assert pong == {"type": "pong"}
|
||||
Reference in New Issue
Block a user