feat(engine): 优化智能体循环中的助手消息处理逻辑

- 在没有工具调用时才添加助手消息到上下文
- 确保工具调用响应正确添加到消息上下文中
- 修复了消息构建的条件逻辑

fix(cron): 改进定时任务调度的时间解析功能

- 添加正则表达式导入用于时间显示解析
- 实现从显示文本中提取毫秒间隔的功能
- 增强整数转换的安全性,避免类型错误
- 优化定时任务配置的解析逻辑

feat(outlook): 增强Outlook集成的功能和稳定性

- 将默认超时时间从10秒增加到180秒
- 为状态检查函数添加可选的验证参数
- 串行执行邮件概览获取操作而非并行
- 改进连接状态验证逻辑

feat(channel): 添加设备名称作为会话标识的选项

- 为终端WebSocket适配器添加新的配置选项
- 实现基于设备名称生成会话对等ID的功能
- 记录原始对等ID和设备名称的元数据
- 支持从设备名称创建会话对等ID

feat(skills): 完善技能学习评估系统和进度跟踪

- 在应用启动时自动调度待评估的技能草稿
- 为技能评估工作创建独立的循环工厂
- 实现异步技能评估任务的取消和清理机制
- 添加技能评估进度报告和状态跟踪功能
- 扩展会话列表API以包含更多详细信息
- 防止对不存在的会话进行操作
- 优化技能草稿提交和评估的业务逻辑

perf(skills): 提升技能评估的并发性能

- 实现并行技能案例评估以提高效率
- 添加最大并行案例数的环境变量控制
- 实现实时评估进度更新和回调机制
- 优化评估过程中的资源管理和同步

refactor(services): 创建隔离的智能体循环实例

- 添加创建独立智能体循环的工厂方法
- 确保新循环继承运行时服务配置
- 支持技能评估等需要隔离环境的场景
```
This commit is contained in:
2026-06-15 14:48:16 +08:00
parent 8aeb97a5fc
commit 4b0bf65ace
53 changed files with 4328 additions and 292 deletions

View File

@ -29,6 +29,18 @@ def test_schedule_from_frontend_payload() -> None:
assert cron.kind == "cron"
def test_legacy_interval_schedule_recovers_duration_from_display() -> None:
schedule = CronSchedule.from_dict(
{
"kind": "every",
"every_ms": None,
"display": "every 1800s",
}
)
assert schedule.every_ms == 30 * 60 * 1000
def test_compute_next_run_skips_missed_interval() -> None:
schedule = CronSchedule(kind="every", every_ms=60_000)
assert compute_next_run(schedule, now_ms=1_000_000, last_run_at_ms=0) > 1_000_000
@ -80,6 +92,22 @@ def test_manual_run_records_scheduled_run_output(tmp_path) -> None:
assert updated.to_api_dict()["last_scheduled_run_id"] == run.scheduled_run_id
def test_persisted_interval_job_keeps_schedule_and_next_run(tmp_path) -> None:
store_path = tmp_path / "jobs.json"
service = CronService(store_path)
job = service.add_job(
name="Hydration reminder",
message="Drink water",
schedule=CronSchedule(kind="every", every_ms=30 * 60 * 1000),
)
reloaded = CronService(store_path).get_job(job.id)
assert reloaded is not None
assert reloaded.schedule.every_ms == 30 * 60 * 1000
assert reloaded.next_run_at_ms == job.next_run_at_ms
def test_cron_tool_uses_runtime_service(tmp_path) -> None:
service = CronService(tmp_path / "jobs.json")
tool = CronTool()

View File

@ -0,0 +1,71 @@
import asyncio
import pytest
from beaver.foundation.config.schema import AuthzConfig, BackendIdentityConfig, BeaverConfig
from beaver.integrations import outlook
class _FakeAuthzClient:
async def get_outlook_settings(self, backend_id: str) -> dict:
assert backend_id == "steven"
return {
"configured": True,
"email": "steven.yx.li@boardware.com",
"server": "mail.boardware.com.mo",
}
def _authz_config() -> BeaverConfig:
return BeaverConfig(
authz=AuthzConfig(
enabled=True,
base_url="http://authz.example",
outlook_mcp_url="http://outlook-mcp.example/mcp",
),
backend_identity=BackendIdentityConfig(
backend_id="steven",
client_id="steven",
client_secret="secret",
),
)
def test_outlook_status_does_not_probe_mcp_by_default(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
monkeypatch.setattr(outlook, "_authz_client", lambda _config: _FakeAuthzClient())
async def fail_if_called(*_args, **_kwargs):
raise AssertionError("status should not call Outlook MCP by default")
monkeypatch.setattr(outlook, "_call_outlook_mcp_tool", fail_if_called)
result = asyncio.run(outlook.outlook_status(_authz_config(), tmp_path))
assert result["configured"] is True
assert result["connected"] is False
assert result["auth_status"] is None
assert result["error"] is None
def test_outlook_overview_loads_sections_serially(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
monkeypatch.setattr(outlook, "_authz_client", lambda _config: _FakeAuthzClient())
active_calls = 0
max_active_calls = 0
tool_names: list[str] = []
async def fake_call(_config, tool_name: str, _arguments, **_kwargs):
nonlocal active_calls, max_active_calls
tool_names.append(tool_name)
active_calls += 1
max_active_calls = max(max_active_calls, active_calls)
await asyncio.sleep(0.01)
active_calls -= 1
return {"value": []}
monkeypatch.setattr(outlook, "_call_outlook_mcp_tool", fake_call)
result = asyncio.run(outlook.get_overview(_authz_config(), tmp_path))
assert result["warnings"] == []
assert tool_names == ["mail_list_messages", "mail_list_messages", "calendar_list_events"]
assert max_active_calls == 1

View File

@ -27,6 +27,7 @@ class StubProvider(LLMProvider):
def __init__(self, responses: list[LLMResponse]) -> None:
super().__init__()
self._responses = list(responses)
self.calls: list[dict] = []
async def chat(
self,
@ -37,6 +38,16 @@ class StubProvider(LLMProvider):
temperature: float = 0.7,
thinking_enabled: bool | None = None,
) -> LLMResponse:
self.calls.append(
{
"messages": messages,
"tools": tools,
"model": model,
"max_tokens": max_tokens,
"temperature": temperature,
"thinking_enabled": thinking_enabled,
}
)
if not self._responses:
raise AssertionError("No stubbed provider responses left")
return self._responses.pop(0)
@ -704,32 +715,33 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
skill_assembler=StubSkillAssembler([skill]),
)
loop = AgentLoop(loader=loader)
provider = StubProvider(
[
LLMResponse(
content="Need a tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call()],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content="Need another tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call(call_id="call-2")],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content="Based on the available tool result, the container likely failed during startup.",
finish_reason="stop",
provider_name="stub",
model="stub-model",
),
]
)
bundle = ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=StubProvider(
[
LLMResponse(
content="Need a tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call()],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content="Need another tool.",
finish_reason="tool_calls",
tool_calls=[_tool_call(call_id="call-2")],
provider_name="stub",
model="stub-model",
),
LLMResponse(
content="Based on the available tool result, the container likely failed during startup.",
finish_reason="stop",
provider_name="stub",
model="stub-model",
),
]
),
main_provider=provider,
)
result = asyncio.run(
@ -744,6 +756,21 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
assert result.finish_reason == "max_tool_iterations_finalized"
assert "Based on the available tool result" in result.output_text
assert "Tool loop stopped" not in result.output_text
finalization_messages = provider.calls[-1]["messages"]
assistant_tool_call_ids = [
call["id"]
for message in finalization_messages
for call in message.get("tool_calls", [])
if message.get("role") == "assistant"
]
tool_result_ids = [
message.get("tool_call_id")
for message in finalization_messages
if message.get("role") == "tool"
]
assert "call-1" in assistant_tool_call_ids
assert "call-2" not in assistant_tool_call_ids
assert set(assistant_tool_call_ids).issubset(set(tool_result_ids))
effect_records = loaded.run_memory_store.list_skill_effects("docker-debug", version="v0007")
assert effect_records[-1].run_id == result.run_id
assert effect_records[-1].success is False

View File

@ -105,3 +105,29 @@ def test_web_archive_route_does_not_create_archive_suffix_session(tmp_path: Path
assert loaded.session_manager.get_session("web:alpha")["end_reason"] == "archived" # type: ignore[union-attr]
assert loaded.session_manager.get_session("web:alpha/archive") is None # type: ignore[union-attr]
assert sessions_response.json() == []
def test_web_session_list_hides_skill_replay_evaluation_sessions(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
loaded.session_manager.ensure_session("eval-session", source="skill_replay_eval") # type: ignore[union-attr]
loaded.session_manager.ensure_session("web:visible", source="web") # type: ignore[union-attr]
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.get("/api/sessions")
assert response.status_code == 200
assert [item["key"] for item in response.json()] == ["web:visible"]
def test_get_missing_session_returns_404_without_creating_it(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.get("/api/sessions/missing-session")
assert response.status_code == 404
loaded = service.create_loop().boot()
assert loaded.session_manager.get_session("missing-session") is None # type: ignore[union-attr]

View File

@ -201,6 +201,22 @@ class FakeReplayRunner:
}
class ConcurrentReplayRunner(FakeReplayRunner):
def __init__(self) -> None:
super().__init__()
self.active = 0
self.max_active = 0
async def run_arm(self, request):
self.active += 1
self.max_active = max(self.max_active, self.active)
await asyncio.sleep(0.02)
try:
return await super().run_arm(request)
finally:
self.active -= 1
def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
@ -238,6 +254,94 @@ def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
assert report.tool_execution_summary["score_role"] == "diagnostic_only"
def test_replay_eval_reports_arm_progress(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="release-checklist",
proposed_content="# Release\n\nRun tests.",
proposed_frontmatter={"description": "release", "tools": []},
created_by="test",
reason="test",
)
pipeline.learning_store.update_learning_candidate(
"candidate-1",
draft_skill_name=draft.skill_name,
draft_id=draft.draft_id,
)
progress: list[dict] = []
asyncio.run(
pipeline.evaluate_draft(
"candidate-1",
draft.skill_name,
draft.draft_id,
provider_bundle=_bundle(),
replay_runner=FakeReplayRunner(),
progress_callback=progress.append,
)
)
assert progress[0] == {
"phase": "replaying",
"completed_arms": 0,
"total_arms": 20,
"completed_cases": 0,
"total_cases": 10,
}
assert progress[-1] == {
"phase": "replaying",
"completed_arms": 20,
"total_arms": 20,
"completed_cases": 10,
"total_cases": 10,
}
def test_replay_eval_runs_cases_with_bounded_parallelism(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
pipeline.evaluator = SkillDraftEvaluator(
pipeline.learning_service.run_store,
max_parallel_cases=2,
)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="release-checklist",
proposed_content="# Release\n\nRun tests.",
proposed_frontmatter={"description": "release", "tools": []},
created_by="test",
reason="test",
)
pipeline.learning_store.update_learning_candidate(
"candidate-1",
draft_skill_name=draft.skill_name,
draft_id=draft.draft_id,
)
replay_runner = ConcurrentReplayRunner()
report = asyncio.run(
pipeline.evaluate_draft(
"candidate-1",
draft.skill_name,
draft.draft_id,
provider_bundle=_bundle(),
replay_runner=replay_runner,
)
)
assert replay_runner.max_active == 2
assert [case["run_id"] for case in report.cases] == [
"run-1",
"synthetic:candidate-1:01",
"synthetic:candidate-1:02",
"synthetic:candidate-1:03",
"synthetic:candidate-1:04",
"synthetic:candidate-1:05",
"synthetic:candidate-1:06",
"synthetic:candidate-1:07",
"synthetic:candidate-1:08",
"synthetic:candidate-1:09",
]
def test_replay_main_score_uses_validator_not_tool_success(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
pipeline.learning_store.update_learning_candidate(

View File

@ -98,6 +98,27 @@ def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
def test_safety_recheck_keeps_submitted_candidate_in_review(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(
skill_name="reviewed-skill",
proposed_content="# Reviewed Skill\n\nDo the thing.",
proposed_frontmatter={"description": "reviewed"},
created_by="test",
reason="test",
)
candidate = pipeline.get_candidate("candidate-1")
candidate.draft_skill_name = draft.skill_name
candidate.draft_id = draft.draft_id
pipeline.learning_store.record_learning_candidate(candidate)
pipeline.check_safety(draft.skill_name, draft.draft_id)
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
pipeline.check_safety(draft.skill_name, draft.draft_id)
assert pipeline.get_candidate("candidate-1").status == "review_pending"
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
pipeline = _pipeline(tmp_path)
draft = pipeline.draft_service.create_new_skill_draft(

View File

@ -7,8 +7,17 @@ from beaver.skills.learning.replay import ReplayArmRequest, ReplayRunner
class FakeAgentLoop:
def __init__(self) -> None:
self.ended_sessions: list[tuple[str, str]] = []
def boot(self):
return SimpleNamespace(tool_executor=SimpleNamespace(), tool_registry=SimpleNamespace(get=lambda name: None))
return SimpleNamespace(
tool_executor=SimpleNamespace(),
tool_registry=SimpleNamespace(get=lambda name: None),
session_manager=SimpleNamespace(
end_session=lambda session_id, reason: self.ended_sessions.append((session_id, reason))
),
)
async def process_direct(self, task: str, **kwargs):
executor = kwargs["tool_executor_override"]
@ -18,6 +27,7 @@ class FakeAgentLoop:
class FakeRunningAgentLoop(FakeAgentLoop):
def __init__(self) -> None:
super().__init__()
self.process_direct_calls = 0
self.submit_direct_calls: list[tuple[str, dict]] = []
@ -35,6 +45,29 @@ class FakeRunningAgentLoop(FakeAgentLoop):
return SimpleNamespace(session_id="session-queued", run_id="run-queued", output_text="queued done", finish_reason="stop")
class FakeIsolatedAgentLoop(FakeAgentLoop):
def __init__(self) -> None:
super().__init__()
self.closed = False
self.mcp_manager = SimpleNamespace(close=self._close_mcp)
self.mcp_closed = False
self.loaded = None
async def _close_mcp(self) -> None:
self.mcp_closed = True
def close(self) -> None:
assert self.mcp_closed is True
self.closed = True
def boot(self):
if self.loaded is None:
self.loaded = super().boot()
self.loaded.mcp_manager = self.mcp_manager
self.loaded.closeables = [("mcp_manager", lambda: None)]
return self.loaded
def test_replay_runner_returns_arm_report_with_tool_trace() -> None:
runner = ReplayRunner(agent_loop=FakeAgentLoop())
request = ReplayArmRequest(
@ -53,6 +86,8 @@ def test_replay_runner_returns_arm_report_with_tool_trace() -> None:
assert report["arm"] == "candidate"
assert report["finish_reason"] == "stop"
assert report["tool_calls"][0]["tool_name"] == "mcp_outlook_send_email"
assert report["tool_calls"][0]["duration_ms"] >= 0
assert runner.agent_loop.ended_sessions == [("session-replay", "evaluation_complete")]
def test_replay_runner_queues_arm_when_agent_loop_is_running() -> None:
@ -83,3 +118,31 @@ def test_replay_runner_queues_arm_when_agent_loop_is_running() -> None:
assert report["session_id"] == "session-queued"
assert report["run_id"] == "run-queued"
assert report["tool_calls"][0]["tool_name"] == "mcp_outlook_send_email"
assert agent_loop.ended_sessions == [("session-queued", "evaluation_complete")]
def test_replay_runner_uses_and_closes_isolated_loop() -> None:
shared_loop = FakeRunningAgentLoop()
isolated_loops: list[FakeIsolatedAgentLoop] = []
def create_isolated_loop() -> FakeIsolatedAgentLoop:
loop = FakeIsolatedAgentLoop()
isolated_loops.append(loop)
return loop
runner = ReplayRunner(agent_loop=shared_loop, isolated_loop_factory=create_isolated_loop)
request = ReplayArmRequest(
case_id="case-isolated",
arm="candidate",
task_text="Fetch current weather.",
provider_bundle=object(),
)
report = asyncio.run(runner.run_arm(request))
assert report["session_id"] == "session-replay"
assert shared_loop.process_direct_calls == 0
assert shared_loop.submit_direct_calls == []
assert len(isolated_loops) == 1
assert isolated_loops[0].mcp_closed is True
assert isolated_loops[0].closed is True

View File

@ -1,5 +1,7 @@
from __future__ import annotations
import asyncio
import time
from pathlib import Path
from types import SimpleNamespace
@ -16,7 +18,7 @@ class StubEvaluator:
def __init__(self) -> None:
self.calls = 0
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None):
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None, progress_callback=None):
self.calls += 1
return SkillDraftEvalReport(
report_id="eval-existing",
@ -34,6 +36,18 @@ class StubEvaluator:
)
class SlowEvaluator(StubEvaluator):
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None, progress_callback=None):
await asyncio.sleep(0.15)
return await super().evaluate(
candidate=candidate,
draft=draft,
provider_bundle=provider_bundle,
replay_runner=replay_runner,
progress_callback=progress_callback,
)
def test_skill_learning_candidates_and_run_once_api(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
@ -193,15 +207,79 @@ def test_submit_draft_runs_safety_and_eval(tmp_path: Path, monkeypatch) -> None:
with TestClient(app) as client:
response = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
deadline = time.monotonic() + 1
payload = response.json()
while payload["eval_report"] is None and time.monotonic() < deadline:
time.sleep(0.02)
payload = client.get(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}").json()
assert response.status_code == 200
payload = response.json()
assert evaluator.calls == 1
assert payload["status"] == "in_review"
assert payload["safety_report"]["passed"] is True
assert payload["eval_report"]["report_id"] == "eval-existing"
def test_submit_draft_returns_before_eval_and_is_idempotent(tmp_path: Path, monkeypatch) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
draft = loaded.skill_learning_pipeline.draft_service.create_new_skill_draft( # type: ignore[union-attr]
skill_name="weather-search",
proposed_content="# Weather Search\n\nUse current weather sources.",
proposed_frontmatter={"description": "weather", "tools": []},
created_by="test",
reason="test",
)
loaded.skill_learning_store.record_learning_candidate( # type: ignore[union-attr]
SkillLearningCandidate(
candidate_id="candidate-weather",
kind="revise_skill",
source_run_ids=["run-1"],
source_session_ids=["session-1"],
related_skill_names=["weather-search"],
reason="revise",
status="draft_ready",
draft_skill_name=draft.skill_name,
draft_id=draft.draft_id,
)
)
evaluator = SlowEvaluator()
loaded.skill_learning_pipeline.evaluator = evaluator # type: ignore[union-attr]
monkeypatch.setattr(
service,
"_make_provider_bundle_for_task",
lambda loaded, kwargs: SimpleNamespace(main_provider=object()),
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
started = time.monotonic()
first = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
elapsed = time.monotonic() - started
second = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
deadline = time.monotonic() + 2
payload = second.json()
while payload["eval_report"] is None and time.monotonic() < deadline:
time.sleep(0.05)
payload = client.get(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}").json()
assert first.status_code == 200
assert elapsed < 0.12
assert first.json()["status"] == "in_review"
assert first.json()["eval_status"] == "pending"
assert first.json()["eval_progress"] == {
"phase": "preparing",
"completed_arms": 0,
"total_arms": 20,
"completed_cases": 0,
"total_cases": 10,
}
assert second.status_code == 200
assert evaluator.calls == 1
assert payload["eval_report"]["report_id"] == "eval-existing"
assert loaded.skill_learning_pipeline.get_candidate("candidate-weather").status == "review_pending" # type: ignore[union-attr]
def test_draft_payload_includes_target_version_for_revision(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()

View File

@ -57,6 +57,14 @@ def write_terminal_config(tmp_path: Path) -> Path:
return config_path
def write_terminal_config_with_device_session(tmp_path: Path) -> Path:
config_path = write_terminal_config(tmp_path)
payload = json.loads(config_path.read_text(encoding="utf-8"))
payload["channels"]["terminal-dev"]["config"]["sessionPeerFromDeviceName"] = True
config_path.write_text(json.dumps(payload), encoding="utf-8")
return config_path
def test_terminal_websocket_connect_ping_and_message_roundtrip(tmp_path: Path) -> None:
config_path = write_terminal_config(tmp_path)
service = TerminalFakeAgentService(config_path=config_path)
@ -117,6 +125,98 @@ def test_terminal_websocket_connect_ping_and_message_roundtrip(tmp_path: Path) -
assert inbound.channel_identity.message_id == "device-001-000001"
def test_terminal_websocket_can_use_device_name_as_stable_session_peer(tmp_path: Path) -> None:
config_path = write_terminal_config_with_device_session(tmp_path)
service = TerminalFakeAgentService(config_path=config_path)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/api/channels/terminal-dev/ws") as websocket:
websocket.send_json(
{
"type": "connect",
"peer_id": "livekit-test-livekit-07291699",
"device_name": "desk-terminal",
}
)
first = websocket.receive_json()
with client.websocket_connect("/api/channels/terminal-dev/ws") as websocket:
websocket.send_json(
{
"type": "connect",
"peer_id": "livekit-test-livekit-3fb03fff",
"device_name": "desk-terminal",
}
)
second = websocket.receive_json()
websocket.send_json(
{
"type": "message",
"message_id": "livekit-test-livekit-3fb03fff-000001",
"text": "hello",
}
)
ack = websocket.receive_json()
reply = websocket.receive_json()
service.close()
assert first["session_id"] == "terminal-dev:local:device-desk-terminal"
assert second["session_id"] == first["session_id"]
assert ack["session_id"] == first["session_id"]
assert reply["text"] == "echo:hello"
assert service.inbound_calls[0].session_id == first["session_id"]
assert service.inbound_calls[0].channel_identity is not None
assert service.inbound_calls[0].channel_identity.peer_id == "device-desk-terminal"
def test_terminal_websocket_reconnect_delivers_pending_reply_to_latest_device_connection(tmp_path: Path) -> None:
config_path = write_terminal_config_with_device_session(tmp_path)
service = TerminalFakeAgentService(config_path=config_path, delay_seconds=0.05)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
with client.websocket_connect("/api/channels/terminal-dev/ws") as first_websocket:
first_websocket.send_json(
{
"type": "connect",
"peer_id": "livekit-test-livekit-old",
"device_name": "desk-terminal",
}
)
first = first_websocket.receive_json()
first_websocket.send_json(
{
"type": "message",
"message_id": "livekit-test-livekit-old-000001",
"text": "slow",
}
)
assert first_websocket.receive_json()["accepted"] is True
with client.websocket_connect("/api/channels/terminal-dev/ws") as latest_websocket:
latest_websocket.send_json(
{
"type": "connect",
"peer_id": "livekit-test-livekit-new",
"device_name": "desk-terminal",
}
)
latest = latest_websocket.receive_json()
reply = latest_websocket.receive_json()
service.close()
assert latest["session_id"] == first["session_id"]
assert reply == {
"type": "message",
"role": "assistant",
"message_id": "livekit-test-livekit-old-000001",
"run_id": "run-1",
"text": "echo:slow",
"finish_reason": "stop",
}
def test_terminal_websocket_rejects_message_before_connect(tmp_path: Path) -> None:
config_path = write_terminal_config(tmp_path)
service = TerminalFakeAgentService(config_path=config_path)

View File

@ -1,6 +1,7 @@
from __future__ import annotations
import asyncio
import json
from beaver.tools.builtins import web
@ -8,8 +9,16 @@ from beaver.tools.builtins import web
class _FakeResponse:
headers = {"content-type": "text/html"}
status_code = 200
text = '<a class="result__a" href="https://example.com">Example</a>'
url = "https://example.com"
def __init__(self, url: str = "https://example.com") -> None:
self.url = url
if "duckduckgo.com" in url:
self.text = '<a class="result__a" href="https://duck.example.com">Duck Example</a>'
else:
self.text = (
'<li class="b_algo"><h2><a href="https://example.com">Example</a></h2>'
"<p>Example result</p></li>"
)
def raise_for_status(self) -> None:
return None
@ -17,6 +26,8 @@ class _FakeResponse:
class _FakeAsyncClient:
calls: list[dict[str, object]] = []
urls: list[str] = []
fail_bing = False
def __init__(self, **kwargs: object) -> None:
self.calls.append(kwargs)
@ -28,7 +39,11 @@ class _FakeAsyncClient:
return None
async def get(self, *args: object, **kwargs: object) -> _FakeResponse:
return _FakeResponse()
url = str(args[0])
self.urls.append(url)
if self.fail_bing and "bing.com" in url:
raise web.httpx.ConnectTimeout("bing unavailable")
return _FakeResponse(url)
def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
@ -42,3 +57,56 @@ def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
asyncio.run(_run())
assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True]
def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = False
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
asyncio.run(web.WebFetchTool().execute(url="https://example.com"))
timeout = _FakeAsyncClient.calls[0]["timeout"]
assert isinstance(timeout, web.httpx.Timeout)
assert timeout.connect == 5
assert timeout.read == 12
def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = False
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["engine"] in {"bing", "duckduckgo"}
assert set(_FakeAsyncClient.urls) == {
"https://www.bing.com/search?q=weather+beijing",
"https://duckduckgo.com/html/?q=weather+beijing",
}
timeout = _FakeAsyncClient.calls[0]["timeout"]
assert isinstance(timeout, web.httpx.Timeout)
assert timeout.connect == 5
assert timeout.read == 8
def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None:
_FakeAsyncClient.calls = []
_FakeAsyncClient.urls = []
_FakeAsyncClient.fail_bing = True
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
payload = json.loads(raw)
assert payload["success"] is True
assert payload["engine"] == "duckduckgo"
assert set(_FakeAsyncClient.urls) == {
"https://www.bing.com/search?q=weather+beijing",
"https://duckduckgo.com/html/?q=weather+beijing",
}