```
feat(engine): 优化智能体循环中的助手消息处理逻辑 - 在没有工具调用时才添加助手消息到上下文 - 确保工具调用响应正确添加到消息上下文中 - 修复了消息构建的条件逻辑 fix(cron): 改进定时任务调度的时间解析功能 - 添加正则表达式导入用于时间显示解析 - 实现从显示文本中提取毫秒间隔的功能 - 增强整数转换的安全性,避免类型错误 - 优化定时任务配置的解析逻辑 feat(outlook): 增强Outlook集成的功能和稳定性 - 将默认超时时间从10秒增加到180秒 - 为状态检查函数添加可选的验证参数 - 串行执行邮件概览获取操作而非并行 - 改进连接状态验证逻辑 feat(channel): 添加设备名称作为会话标识的选项 - 为终端WebSocket适配器添加新的配置选项 - 实现基于设备名称生成会话对等ID的功能 - 记录原始对等ID和设备名称的元数据 - 支持从设备名称创建会话对等ID feat(skills): 完善技能学习评估系统和进度跟踪 - 在应用启动时自动调度待评估的技能草稿 - 为技能评估工作创建独立的循环工厂 - 实现异步技能评估任务的取消和清理机制 - 添加技能评估进度报告和状态跟踪功能 - 扩展会话列表API以包含更多详细信息 - 防止对不存在的会话进行操作 - 优化技能草稿提交和评估的业务逻辑 perf(skills): 提升技能评估的并发性能 - 实现并行技能案例评估以提高效率 - 添加最大并行案例数的环境变量控制 - 实现实时评估进度更新和回调机制 - 优化评估过程中的资源管理和同步 refactor(services): 创建隔离的智能体循环实例 - 添加创建独立智能体循环的工厂方法 - 确保新循环继承运行时服务配置 - 支持技能评估等需要隔离环境的场景 ```
This commit is contained in:
@ -749,14 +749,12 @@ class AgentLoop:
|
||||
model=final_model,
|
||||
user_id=user_id,
|
||||
)
|
||||
context_builder.add_assistant_message(
|
||||
messages,
|
||||
content=response.content,
|
||||
tool_calls=assistant_tool_calls or None,
|
||||
reasoning_content=response.reasoning_content,
|
||||
)
|
||||
|
||||
if not response.has_tool_calls:
|
||||
context_builder.add_assistant_message(
|
||||
messages,
|
||||
content=response.content,
|
||||
reasoning_content=response.reasoning_content,
|
||||
)
|
||||
final_text = response.content or ""
|
||||
if self._looks_like_raw_tool_call(final_text):
|
||||
final_text = RAW_TOOL_CALL_FALLBACK
|
||||
@ -795,6 +793,12 @@ class AgentLoop:
|
||||
)
|
||||
break
|
||||
|
||||
context_builder.add_assistant_message(
|
||||
messages,
|
||||
content=response.content,
|
||||
tool_calls=assistant_tool_calls or None,
|
||||
reasoning_content=response.reasoning_content,
|
||||
)
|
||||
iterations += 1
|
||||
for tool_call in response.tool_calls:
|
||||
result = await effective_tool_executor.execute_tool_call(tool_call, context=tool_context)
|
||||
|
||||
@ -6,6 +6,7 @@ normal Task instead of a detached agent turn.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
from uuid import uuid4
|
||||
@ -37,13 +38,18 @@ class CronSchedule:
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: dict[str, Any]) -> "CronSchedule":
|
||||
kind = str(payload.get("kind") or "every")
|
||||
display = _optional_str(payload.get("display"))
|
||||
every_ms = _optional_int(payload.get("every_ms") or payload.get("everyMs"))
|
||||
if kind == "every" and every_ms is None:
|
||||
every_ms = _every_ms_from_display(display)
|
||||
return cls(
|
||||
kind=str(payload.get("kind") or "every"), # type: ignore[arg-type]
|
||||
kind=kind, # type: ignore[arg-type]
|
||||
at_ms=_optional_int(payload.get("at_ms") or payload.get("atMs")),
|
||||
every_ms=_optional_int(payload.get("every_ms") or payload.get("everyMs")),
|
||||
every_ms=every_ms,
|
||||
expr=_optional_str(payload.get("expr")),
|
||||
tz=_optional_str(payload.get("tz")),
|
||||
display=_optional_str(payload.get("display")),
|
||||
display=display,
|
||||
)
|
||||
|
||||
|
||||
@ -250,6 +256,17 @@ def _optional_str(value: Any) -> str | None:
|
||||
def _optional_int(value: Any) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _every_ms_from_display(display: str | None) -> int | None:
|
||||
match = re.fullmatch(r"every\s+(\d+)s", (display or "").strip(), re.IGNORECASE)
|
||||
if match is None:
|
||||
return None
|
||||
return int(match.group(1)) * 1000
|
||||
|
||||
|
||||
def _payload_mode(value: Any, *, default: CronPayloadMode = "notification") -> CronPayloadMode:
|
||||
@ -259,7 +276,3 @@ def _payload_mode(value: Any, *, default: CronPayloadMode = "notification") -> C
|
||||
if cleaned == "task":
|
||||
return "task"
|
||||
return "notification"
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
@ -73,9 +73,9 @@ OUTLOOK_TOOL_NAMES = [
|
||||
def _call_timeout_seconds() -> float:
|
||||
raw = os.getenv("BEAVER_OUTLOOK_MCP_CALL_TIMEOUT_SECONDS", "").strip()
|
||||
try:
|
||||
return max(1.0, float(raw)) if raw else 10.0
|
||||
return max(1.0, float(raw)) if raw else 180.0
|
||||
except ValueError:
|
||||
return 10.0
|
||||
return 180.0
|
||||
|
||||
|
||||
def _use_authz_mode(config: BeaverConfig) -> bool:
|
||||
@ -340,7 +340,7 @@ async def disconnect_workspace(config: BeaverConfig) -> dict[str, Any]:
|
||||
return {"ok": True, "removed_state": removed, "removed_mcp": False, "server_id": OUTLOOK_SERVER_ID}
|
||||
|
||||
|
||||
async def outlook_status(config: BeaverConfig, workspace: Path) -> dict[str, Any]:
|
||||
async def outlook_status(config: BeaverConfig, workspace: Path, *, verify: bool = False) -> dict[str, Any]:
|
||||
meta = _load_meta(workspace)
|
||||
if not _use_authz_mode(config):
|
||||
return {
|
||||
@ -364,7 +364,7 @@ async def outlook_status(config: BeaverConfig, workspace: Path) -> dict[str, Any
|
||||
connected = False
|
||||
auth_status: dict[str, Any] | None = None
|
||||
error: str | None = None
|
||||
if configured:
|
||||
if configured and verify:
|
||||
try:
|
||||
auth_status = await _call_outlook_mcp_tool(config, "auth_status", {}, scopes=["list_tools", "tool:auth_status"])
|
||||
connected = bool(auth_status.get("authenticated"))
|
||||
@ -403,38 +403,36 @@ async def get_overview(config: BeaverConfig, workspace: Path) -> dict[str, Any]:
|
||||
warnings.append(f"{label} unavailable: {exc}")
|
||||
return {"value": []}
|
||||
|
||||
inbox, sent, calendar = await asyncio.gather(
|
||||
_load_section(
|
||||
"inbox",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"mail_list_messages",
|
||||
{"folder": "inbox", "top": OUTLOOK_OVERVIEW_MESSAGE_LIMIT, "skip": 0},
|
||||
scopes=["list_tools", "tool:mail_list_messages"],
|
||||
),
|
||||
inbox = await _load_section(
|
||||
"inbox",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"mail_list_messages",
|
||||
{"folder": "inbox", "top": OUTLOOK_OVERVIEW_MESSAGE_LIMIT, "skip": 0},
|
||||
scopes=["list_tools", "tool:mail_list_messages"],
|
||||
),
|
||||
_load_section(
|
||||
"sent items",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"mail_list_messages",
|
||||
{"folder": "sentitems", "top": OUTLOOK_OVERVIEW_MESSAGE_LIMIT, "skip": 0},
|
||||
scopes=["list_tools", "tool:mail_list_messages"],
|
||||
),
|
||||
)
|
||||
sent = await _load_section(
|
||||
"sent items",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"mail_list_messages",
|
||||
{"folder": "sentitems", "top": OUTLOOK_OVERVIEW_MESSAGE_LIMIT, "skip": 0},
|
||||
scopes=["list_tools", "tool:mail_list_messages"],
|
||||
),
|
||||
_load_section(
|
||||
"calendar",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"calendar_list_events",
|
||||
{
|
||||
"start_time": start_of_day.isoformat(),
|
||||
"end_time": end_of_day.isoformat(),
|
||||
"top": OUTLOOK_OVERVIEW_EVENT_LIMIT,
|
||||
"skip": 0,
|
||||
},
|
||||
scopes=["list_tools", "tool:calendar_list_events"],
|
||||
),
|
||||
)
|
||||
calendar = await _load_section(
|
||||
"calendar",
|
||||
_call_outlook_mcp_tool(
|
||||
config,
|
||||
"calendar_list_events",
|
||||
{
|
||||
"start_time": start_of_day.isoformat(),
|
||||
"end_time": end_of_day.isoformat(),
|
||||
"top": OUTLOOK_OVERVIEW_EVENT_LIMIT,
|
||||
"skip": 0,
|
||||
},
|
||||
scopes=["list_tools", "tool:calendar_list_events"],
|
||||
),
|
||||
)
|
||||
meta = _update_meta(workspace, last_overview_refresh_at=datetime.now().isoformat())
|
||||
|
||||
@ -331,6 +331,10 @@ class ChannelRuntime:
|
||||
event_recorder=self.record_event,
|
||||
heartbeat_seconds=float(cfg.config.get("heartbeat_seconds") or 30),
|
||||
max_message_chars=int(cfg.config.get("max_message_chars") or 20000),
|
||||
session_peer_from_device_name=bool(
|
||||
cfg.config.get("session_peer_from_device_name")
|
||||
or cfg.config.get("sessionPeerFromDeviceName")
|
||||
),
|
||||
)
|
||||
|
||||
if cfg.kind == "telegram" and cfg.mode in {"polling", "webhook"}:
|
||||
|
||||
@ -51,6 +51,7 @@ class TerminalWebSocketAdapter:
|
||||
event_recorder: Callable[..., None] | None = None,
|
||||
heartbeat_seconds: float = 30,
|
||||
max_message_chars: int = 20000,
|
||||
session_peer_from_device_name: bool = False,
|
||||
) -> None:
|
||||
self.channel_id = channel_id
|
||||
self.kind = kind
|
||||
@ -61,6 +62,7 @@ class TerminalWebSocketAdapter:
|
||||
self.event_recorder = event_recorder
|
||||
self.heartbeat_seconds = max(1.0, float(heartbeat_seconds))
|
||||
self.max_message_chars = max(1, int(max_message_chars))
|
||||
self.session_peer_from_device_name = bool(session_peer_from_device_name)
|
||||
self.started = False
|
||||
self._connections_by_session: dict[str, TerminalConnection] = {}
|
||||
self._session_by_peer: dict[str, str] = {}
|
||||
@ -131,14 +133,15 @@ class TerminalWebSocketAdapter:
|
||||
*,
|
||||
current: TerminalConnection | None,
|
||||
) -> TerminalConnection | None:
|
||||
peer_id = _clean(payload.get("peer_id"))
|
||||
if not peer_id:
|
||||
raw_peer_id = _clean(payload.get("peer_id"))
|
||||
if not raw_peer_id:
|
||||
await websocket.send_json({"type": "error", "error": "peer_id is required"})
|
||||
return current
|
||||
|
||||
thread_id = _clean(payload.get("thread_id")) or None
|
||||
user_id = _clean(payload.get("user_id")) or None
|
||||
device_name = _clean(payload.get("device_name"))
|
||||
peer_id = self._session_peer_id(raw_peer_id, device_name)
|
||||
capabilities = [str(item) for item in payload.get("capabilities") or [] if item is not None]
|
||||
identity = ChannelIdentity(
|
||||
channel_id=self.channel_id,
|
||||
@ -171,7 +174,12 @@ class TerminalWebSocketAdapter:
|
||||
self._record(
|
||||
kind="terminal_connected",
|
||||
session_id=session_id,
|
||||
metadata={"peer_id": peer_id, "device_name": device_name, "capabilities": capabilities},
|
||||
metadata={
|
||||
"peer_id": peer_id,
|
||||
"raw_peer_id": raw_peer_id,
|
||||
"device_name": device_name,
|
||||
"capabilities": capabilities,
|
||||
},
|
||||
)
|
||||
await websocket.send_json(
|
||||
{
|
||||
@ -299,3 +307,13 @@ class TerminalWebSocketAdapter:
|
||||
error=error,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def _session_peer_id(self, peer_id: str, device_name: str) -> str:
|
||||
if self.session_peer_from_device_name and device_name:
|
||||
return f"device-{_clean_session_part(device_name)}"
|
||||
return peer_id
|
||||
|
||||
|
||||
def _clean_session_part(value: str) -> str:
|
||||
cleaned = "-".join(str(value or "").strip().split())
|
||||
return cleaned.replace(":", "_") or "unknown"
|
||||
|
||||
@ -264,6 +264,25 @@ async def _app_lifespan(
|
||||
)
|
||||
app.state.channel_runtime = channel_runtime
|
||||
await channel_runtime.start()
|
||||
for candidate in loaded.skill_learning_pipeline.list_candidates(status="review_pending"): # type: ignore[union-attr]
|
||||
skill_name = candidate.draft_skill_name
|
||||
draft_id = candidate.draft_id
|
||||
if not skill_name or not draft_id:
|
||||
continue
|
||||
if loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) is not None: # type: ignore[union-attr]
|
||||
continue
|
||||
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if draft.status != "in_review":
|
||||
continue
|
||||
_schedule_skill_draft_eval(
|
||||
app,
|
||||
agent_service=attached_service,
|
||||
loop=attached_service.create_loop(),
|
||||
loaded=loaded,
|
||||
candidate_id=candidate.candidate_id,
|
||||
skill_name=skill_name,
|
||||
draft_id=draft_id,
|
||||
)
|
||||
except BaseException:
|
||||
if owns_service and started:
|
||||
with suppress(BaseException):
|
||||
@ -280,7 +299,10 @@ async def _app_lifespan(
|
||||
worker = SkillLearningWorker(
|
||||
pipeline=loaded.skill_learning_pipeline, # type: ignore[arg-type]
|
||||
provider_bundle_factory=lambda: attached_service._make_provider_bundle_for_task(loaded, {}), # noqa: SLF001
|
||||
replay_runner_factory=lambda: ReplayRunner(agent_loop=attached_service.create_loop()),
|
||||
replay_runner_factory=lambda: ReplayRunner(
|
||||
agent_loop=attached_service.create_loop(),
|
||||
isolated_loop_factory=attached_service.create_isolated_loop,
|
||||
),
|
||||
config=worker_config,
|
||||
)
|
||||
worker_task = asyncio.create_task(worker.run_forever())
|
||||
@ -289,6 +311,13 @@ async def _app_lifespan(
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
skill_eval_tasks = getattr(app.state, "skill_eval_tasks", {})
|
||||
for task in list(skill_eval_tasks.values()):
|
||||
task.cancel()
|
||||
for task in list(skill_eval_tasks.values()):
|
||||
with suppress(BaseException):
|
||||
await task
|
||||
skill_eval_tasks.clear()
|
||||
runtime = getattr(app.state, "channel_runtime", None)
|
||||
if isinstance(runtime, ChannelRuntime):
|
||||
with suppress(BaseException):
|
||||
@ -587,6 +616,7 @@ def create_app(
|
||||
)
|
||||
app.state.auth_tokens = {}
|
||||
app.state.handoff_codes = {}
|
||||
app.state.skill_eval_tasks = {}
|
||||
app.state.auth_file = Path(os.getenv("BEAVER_AUTH_FILE") or "")
|
||||
max_file_size = 50 * 1024 * 1024
|
||||
max_user_file_upload_size = _int_env("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", 5 * 1024 * 1024 * 1024)
|
||||
@ -1250,7 +1280,7 @@ def create_app(
|
||||
session_manager = loaded.session_manager
|
||||
rows = session_manager.list_sessions_rich(
|
||||
limit=100,
|
||||
exclude_sources=["subagent", "notification"],
|
||||
exclude_sources=["subagent", "notification", "skill_replay_eval"],
|
||||
exclude_end_reasons=["archived", "deleted"],
|
||||
) # type: ignore[union-attr]
|
||||
return [
|
||||
@ -1259,6 +1289,9 @@ def create_app(
|
||||
"created_at": _iso_from_timestamp(row.get("started_at")),
|
||||
"updated_at": _iso_from_timestamp(row.get("last_active")),
|
||||
"path": str(row.get("id")),
|
||||
"source": row.get("source"),
|
||||
"title": row.get("title"),
|
||||
"preview": row.get("preview"),
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
@ -1337,7 +1370,9 @@ def create_app(
|
||||
async def get_session(session_id: str, request: Request) -> dict[str, Any]:
|
||||
loaded = get_agent_service(request).create_loop().boot()
|
||||
session_manager = loaded.session_manager
|
||||
session = session_manager.get_or_create(session_id, source="web") # type: ignore[union-attr]
|
||||
session = session_manager.get_session(session_id) # type: ignore[union-attr]
|
||||
if session is None:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
return _session_detail(session_manager, session_id, session) # type: ignore[arg-type]
|
||||
|
||||
@app.delete("/api/sessions/{session_id:path}")
|
||||
@ -2216,21 +2251,33 @@ def create_app(
|
||||
try:
|
||||
safety = loaded.skill_learning_pipeline.check_safety(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if safety.passed and safety.risk_level != "critical":
|
||||
loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
requested_by=str((payload or {}).get("requested_by") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
)
|
||||
candidate_id = _skill_learning_candidate_id_for_draft(loaded, skill_name, draft_id)
|
||||
if candidate_id is not None:
|
||||
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
|
||||
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if draft.status == "draft":
|
||||
loaded.skill_learning_pipeline.submit_review( # type: ignore[union-attr]
|
||||
skill_name,
|
||||
draft_id,
|
||||
provider_bundle=provider_bundle,
|
||||
replay_runner=ReplayRunner(agent_loop=loop),
|
||||
requested_by=str((payload or {}).get("requested_by") or "web"),
|
||||
notes=str((payload or {}).get("notes") or ""),
|
||||
)
|
||||
elif draft.status not in {"in_review", "approved"}:
|
||||
raise ValueError("Draft cannot be submitted from its current status")
|
||||
candidate_id = _skill_learning_candidate_id_for_draft(loaded, skill_name, draft_id)
|
||||
eval_report = loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) # type: ignore[union-attr]
|
||||
if candidate_id is not None and eval_report is None:
|
||||
loaded.skill_learning_store.transition_learning_candidate( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
"review_pending",
|
||||
event_type="eval_queued",
|
||||
last_error=None,
|
||||
)
|
||||
_schedule_skill_draft_eval(
|
||||
app,
|
||||
agent_service=agent_service,
|
||||
loop=loop,
|
||||
loaded=loaded,
|
||||
candidate_id=candidate_id,
|
||||
skill_name=skill_name,
|
||||
draft_id=draft_id,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise _skill_draft_http_error(exc) from exc
|
||||
@ -3810,14 +3857,88 @@ def _skill_learning_candidate_task_text(loaded: Any, candidate: Any) -> str:
|
||||
return str(evidence.get("task_text") or "").strip()
|
||||
|
||||
|
||||
def _schedule_skill_draft_eval(
|
||||
app: FastAPI,
|
||||
*,
|
||||
agent_service: AgentService,
|
||||
loop: Any,
|
||||
loaded: Any,
|
||||
candidate_id: str,
|
||||
skill_name: str,
|
||||
draft_id: str,
|
||||
) -> None:
|
||||
key = f"{skill_name}:{draft_id}"
|
||||
tasks: dict[str, asyncio.Task[None]] = app.state.skill_eval_tasks
|
||||
current = tasks.get(key)
|
||||
if current is not None and not current.done():
|
||||
return
|
||||
|
||||
loaded.skill_learning_pipeline.mark_eval_progress( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
{
|
||||
"phase": "preparing",
|
||||
"completed_arms": 0,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 0,
|
||||
"total_cases": 10,
|
||||
},
|
||||
)
|
||||
|
||||
async def run_eval() -> None:
|
||||
try:
|
||||
provider_bundle = agent_service._make_provider_bundle_for_task(loaded, {}) # noqa: SLF001
|
||||
await loaded.skill_learning_pipeline.evaluate_draft( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
skill_name,
|
||||
draft_id,
|
||||
provider_bundle=provider_bundle,
|
||||
replay_runner=ReplayRunner(
|
||||
agent_loop=loop,
|
||||
isolated_loop_factory=agent_service.create_isolated_loop,
|
||||
),
|
||||
progress_callback=lambda progress: loaded.skill_learning_pipeline.mark_eval_progress( # type: ignore[union-attr]
|
||||
candidate_id,
|
||||
progress,
|
||||
),
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
loaded.skill_learning_pipeline.mark_eval_failed(candidate_id, str(exc)) # type: ignore[union-attr]
|
||||
|
||||
task = asyncio.create_task(run_eval())
|
||||
tasks[key] = task
|
||||
|
||||
def remove_completed(completed: asyncio.Task[None]) -> None:
|
||||
if tasks.get(key) is completed:
|
||||
tasks.pop(key, None)
|
||||
|
||||
task.add_done_callback(remove_completed)
|
||||
|
||||
|
||||
def _skill_draft_payload(loaded: Any, skill_name: str, draft_id: str, *, include_reviews: bool = False) -> dict[str, Any]:
|
||||
draft = loaded.skill_learning_pipeline.get_draft(skill_name, draft_id) # type: ignore[union-attr]
|
||||
safety = loaded.skill_learning_pipeline.get_safety_report(skill_name, draft_id) # type: ignore[union-attr]
|
||||
eval_report = loaded.skill_learning_pipeline.get_eval_report(skill_name, draft_id) # type: ignore[union-attr]
|
||||
candidate_id = _skill_learning_candidate_id_for_draft(loaded, skill_name, draft_id)
|
||||
candidate = loaded.skill_learning_pipeline.get_candidate(candidate_id) if candidate_id is not None else None # type: ignore[union-attr]
|
||||
if eval_report is not None:
|
||||
eval_status = eval_report.status
|
||||
elif candidate is None:
|
||||
eval_status = "not_applicable"
|
||||
elif candidate.status == "eval_failed":
|
||||
eval_status = "failed"
|
||||
elif draft.status in {"in_review", "approved"}:
|
||||
eval_status = "pending"
|
||||
else:
|
||||
eval_status = "not_started"
|
||||
payload = {
|
||||
**draft.to_dict(),
|
||||
"safety_report": safety.to_dict() if safety is not None else None,
|
||||
"eval_report": eval_report.to_dict() if eval_report is not None else None,
|
||||
"eval_status": eval_status,
|
||||
"eval_error": candidate.last_error if candidate is not None and candidate.status == "eval_failed" else None,
|
||||
"eval_progress": dict(candidate.eval_progress) if candidate is not None else None,
|
||||
"target_version": _skill_draft_target_version(loaded, draft.skill_name, draft.proposal_kind),
|
||||
"base_skill": _skill_draft_base_skill_payload(loaded, draft),
|
||||
}
|
||||
|
||||
@ -82,6 +82,7 @@ class SkillLearningCandidate:
|
||||
draft_id: str | None = None
|
||||
safety_report_id: str | None = None
|
||||
eval_report_id: str | None = None
|
||||
eval_progress: dict[str, Any] = field(default_factory=dict)
|
||||
created_at: str = ""
|
||||
updated_at: str = ""
|
||||
|
||||
@ -107,6 +108,7 @@ class SkillLearningCandidate:
|
||||
"draft_id": self.draft_id,
|
||||
"safety_report_id": self.safety_report_id,
|
||||
"eval_report_id": self.eval_report_id,
|
||||
"eval_progress": dict(self.eval_progress),
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
@ -137,6 +139,7 @@ class SkillLearningCandidate:
|
||||
draft_id=_optional_str(payload.get("draft_id")),
|
||||
safety_report_id=_optional_str(payload.get("safety_report_id")),
|
||||
eval_report_id=_optional_str(payload.get("eval_report_id")),
|
||||
eval_progress=dict(payload.get("eval_progress") or {}),
|
||||
created_at=str(payload.get("created_at") or now),
|
||||
updated_at=str(payload.get("updated_at") or payload.get("created_at") or now),
|
||||
)
|
||||
|
||||
@ -91,6 +91,11 @@ class AgentService:
|
||||
self._loop.boot()
|
||||
return self._loop
|
||||
|
||||
def create_isolated_loop(self) -> AgentLoop:
|
||||
loop = AgentLoop(profile=self.profile, loader=self.loader)
|
||||
loop.runtime_services.update(self._runtime_services)
|
||||
return loop
|
||||
|
||||
def register_runtime_service(self, name: str, service: Any) -> None:
|
||||
"""Expose process-level services to tools during agent runs."""
|
||||
|
||||
|
||||
@ -2,8 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Any
|
||||
import os
|
||||
from typing import Any, Callable
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.engine.context import SkillContext
|
||||
@ -25,9 +27,17 @@ class SkillDraftEvaluator:
|
||||
run_store: RunMemoryStore,
|
||||
*,
|
||||
surrogate_evaluator: SurrogateToolEvaluator | None = None,
|
||||
max_parallel_cases: int | None = None,
|
||||
) -> None:
|
||||
self.run_store = run_store
|
||||
self.surrogate_evaluator = surrogate_evaluator or SurrogateToolEvaluator()
|
||||
configured_parallelism = max_parallel_cases
|
||||
if configured_parallelism is None:
|
||||
try:
|
||||
configured_parallelism = int(os.getenv("BEAVER_SKILL_EVAL_MAX_PARALLEL_CASES", "3") or "3")
|
||||
except ValueError:
|
||||
configured_parallelism = 3
|
||||
self.max_parallel_cases = max(1, configured_parallelism)
|
||||
|
||||
async def evaluate(
|
||||
self,
|
||||
@ -36,6 +46,7 @@ class SkillDraftEvaluator:
|
||||
draft: SkillDraft,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
replay_runner: ReplayRunner | None = None,
|
||||
progress_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||
) -> SkillDraftEvalReport:
|
||||
if provider_bundle is None or provider_bundle.main_provider is None:
|
||||
return self._skipped(candidate, draft)
|
||||
@ -59,6 +70,7 @@ class SkillDraftEvaluator:
|
||||
provider_bundle=provider_bundle,
|
||||
replay_runner=replay_runner,
|
||||
case_selection_meta=case_selection_meta,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
return self._evaluate_heuristic(candidate, draft, runs)
|
||||
|
||||
@ -129,96 +141,72 @@ class SkillDraftEvaluator:
|
||||
provider_bundle: ProviderBundle,
|
||||
replay_runner: ReplayRunner,
|
||||
case_selection_meta: dict[str, Any] | None = None,
|
||||
progress_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||
) -> SkillDraftEvalReport:
|
||||
case_reports: list[dict] = []
|
||||
legacy_cases: list[dict] = []
|
||||
for case in replay_cases:
|
||||
baseline = await replay_runner.run_arm(
|
||||
ReplayArmRequest(
|
||||
case_id=f"{case['run_id']}:baseline",
|
||||
arm="baseline",
|
||||
task_text=str(case["task_text"]),
|
||||
pinned_skill_names=list(case.get("baseline_skill_names") or []),
|
||||
pinned_skill_contexts=[],
|
||||
provider_bundle=provider_bundle,
|
||||
model_settings={"max_tool_iterations": 4, "temperature": 0.0},
|
||||
total_cases = len(replay_cases)
|
||||
total_arms = total_cases * 2
|
||||
completed_arms = 0
|
||||
completed_cases = 0
|
||||
progress_lock = asyncio.Lock()
|
||||
semaphore = asyncio.Semaphore(self.max_parallel_cases)
|
||||
_report_progress(
|
||||
progress_callback,
|
||||
completed_arms=completed_arms,
|
||||
total_arms=total_arms,
|
||||
completed_cases=0,
|
||||
total_cases=total_cases,
|
||||
)
|
||||
|
||||
async def mark_progress(*, case_completed: bool) -> None:
|
||||
nonlocal completed_arms, completed_cases
|
||||
async with progress_lock:
|
||||
completed_arms += 1
|
||||
if case_completed:
|
||||
completed_cases += 1
|
||||
_report_progress(
|
||||
progress_callback,
|
||||
completed_arms=completed_arms,
|
||||
total_arms=total_arms,
|
||||
completed_cases=completed_cases,
|
||||
total_cases=total_cases,
|
||||
)
|
||||
)
|
||||
candidate_arm = await replay_runner.run_arm(
|
||||
ReplayArmRequest(
|
||||
case_id=f"{case['run_id']}:candidate",
|
||||
arm="candidate",
|
||||
task_text=str(case["task_text"]),
|
||||
pinned_skill_names=[],
|
||||
pinned_skill_contexts=[_draft_skill_context(draft)],
|
||||
provider_bundle=provider_bundle,
|
||||
model_settings={"max_tool_iterations": 4, "temperature": 0.0},
|
||||
|
||||
async def evaluate_case(case: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
async with semaphore:
|
||||
baseline = await replay_runner.run_arm(
|
||||
ReplayArmRequest(
|
||||
case_id=f"{case['run_id']}:baseline",
|
||||
arm="baseline",
|
||||
task_text=str(case["task_text"]),
|
||||
pinned_skill_names=list(case.get("baseline_skill_names") or []),
|
||||
pinned_skill_contexts=[],
|
||||
provider_bundle=provider_bundle,
|
||||
model_settings={"max_tool_iterations": 4, "temperature": 0.0},
|
||||
)
|
||||
)
|
||||
)
|
||||
surrogate = await self.surrogate_evaluator.evaluate(
|
||||
task_text=str(case["task_text"]),
|
||||
baseline=baseline,
|
||||
candidate=candidate_arm,
|
||||
)
|
||||
baseline_ability = _ability_score(
|
||||
case=case,
|
||||
arm=baseline,
|
||||
arm_name="baseline",
|
||||
)
|
||||
candidate_ability = _ability_score(
|
||||
case=case,
|
||||
arm=candidate_arm,
|
||||
arm_name="candidate",
|
||||
)
|
||||
baseline_score = baseline_ability["final_score"]
|
||||
candidate_score = candidate_ability["final_score"]
|
||||
tool_execution_score = {
|
||||
"baseline_score": surrogate["baseline_score"],
|
||||
"candidate_score": surrogate["candidate_score"],
|
||||
"delta": round(surrogate["candidate_score"] - surrogate["baseline_score"], 4),
|
||||
"score_role": "diagnostic_only",
|
||||
}
|
||||
case_report = {
|
||||
"run_id": case["run_id"],
|
||||
"task_id": case.get("task_id"),
|
||||
"session_id": case.get("session_id"),
|
||||
"task_text": case.get("task_text"),
|
||||
"synthetic": bool(case.get("synthetic")),
|
||||
"tier": case.get("tier") or ("bronze" if case.get("synthetic") else "gold"),
|
||||
"validator": case.get("validator"),
|
||||
"baseline": baseline,
|
||||
"candidate": candidate_arm,
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
"ability_score": {
|
||||
"baseline": baseline_ability,
|
||||
"candidate": candidate_ability,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
},
|
||||
"tool_execution_score": tool_execution_score,
|
||||
"execution_coverage": _arm_mode_coverage(baseline, candidate_arm, "executed"),
|
||||
"surrogate_coverage": _arm_mode_coverage(baseline, candidate_arm, "surrogate"),
|
||||
"blocked_tool_count": _arm_mode_count(baseline, candidate_arm, "blocked"),
|
||||
"confidence": surrogate["confidence"],
|
||||
"tool_calls": [*baseline.get("tool_calls", []), *candidate_arm.get("tool_calls", [])],
|
||||
"artifacts": [*baseline.get("artifacts", []), *candidate_arm.get("artifacts", [])],
|
||||
"side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
|
||||
"validator_notes": list(surrogate.get("notes") or []),
|
||||
}
|
||||
case_reports.append(case_report)
|
||||
legacy_cases.append(
|
||||
{
|
||||
"run_id": case["run_id"],
|
||||
"session_id": case.get("session_id") or "",
|
||||
"task_text": case.get("task_text") or "",
|
||||
"synthetic": bool(case.get("synthetic")),
|
||||
"tier": case.get("tier") or ("bronze" if case.get("synthetic") else "gold"),
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
}
|
||||
)
|
||||
await mark_progress(case_completed=False)
|
||||
candidate_arm = await replay_runner.run_arm(
|
||||
ReplayArmRequest(
|
||||
case_id=f"{case['run_id']}:candidate",
|
||||
arm="candidate",
|
||||
task_text=str(case["task_text"]),
|
||||
pinned_skill_names=[],
|
||||
pinned_skill_contexts=[_draft_skill_context(draft)],
|
||||
provider_bundle=provider_bundle,
|
||||
model_settings={"max_tool_iterations": 4, "temperature": 0.0},
|
||||
)
|
||||
)
|
||||
await mark_progress(case_completed=True)
|
||||
surrogate = await self.surrogate_evaluator.evaluate(
|
||||
task_text=str(case["task_text"]),
|
||||
baseline=baseline,
|
||||
candidate=candidate_arm,
|
||||
)
|
||||
return _build_replay_case_reports(case, baseline, candidate_arm, surrogate)
|
||||
|
||||
results = await asyncio.gather(*(evaluate_case(case) for case in replay_cases))
|
||||
case_reports = [case_report for case_report, _ in results]
|
||||
legacy_cases = [legacy_case for _, legacy_case in results]
|
||||
preservation_report = _preservation_report(candidate, draft)
|
||||
return _report_from_case_reports(
|
||||
candidate,
|
||||
@ -248,6 +236,83 @@ class SkillDraftEvaluator:
|
||||
)
|
||||
|
||||
|
||||
def _build_replay_case_reports(
|
||||
case: dict[str, Any],
|
||||
baseline: dict[str, Any],
|
||||
candidate_arm: dict[str, Any],
|
||||
surrogate: dict[str, Any],
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
baseline_ability = _ability_score(case=case, arm=baseline, arm_name="baseline")
|
||||
candidate_ability = _ability_score(case=case, arm=candidate_arm, arm_name="candidate")
|
||||
baseline_score = baseline_ability["final_score"]
|
||||
candidate_score = candidate_ability["final_score"]
|
||||
tier = case.get("tier") or ("bronze" if case.get("synthetic") else "gold")
|
||||
case_report = {
|
||||
"run_id": case["run_id"],
|
||||
"task_id": case.get("task_id"),
|
||||
"session_id": case.get("session_id"),
|
||||
"task_text": case.get("task_text"),
|
||||
"synthetic": bool(case.get("synthetic")),
|
||||
"tier": tier,
|
||||
"validator": case.get("validator"),
|
||||
"baseline": baseline,
|
||||
"candidate": candidate_arm,
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
"ability_score": {
|
||||
"baseline": baseline_ability,
|
||||
"candidate": candidate_ability,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
},
|
||||
"tool_execution_score": {
|
||||
"baseline_score": surrogate["baseline_score"],
|
||||
"candidate_score": surrogate["candidate_score"],
|
||||
"delta": round(surrogate["candidate_score"] - surrogate["baseline_score"], 4),
|
||||
"score_role": "diagnostic_only",
|
||||
},
|
||||
"execution_coverage": _arm_mode_coverage(baseline, candidate_arm, "executed"),
|
||||
"surrogate_coverage": _arm_mode_coverage(baseline, candidate_arm, "surrogate"),
|
||||
"blocked_tool_count": _arm_mode_count(baseline, candidate_arm, "blocked"),
|
||||
"confidence": surrogate["confidence"],
|
||||
"tool_calls": [*baseline.get("tool_calls", []), *candidate_arm.get("tool_calls", [])],
|
||||
"artifacts": [*baseline.get("artifacts", []), *candidate_arm.get("artifacts", [])],
|
||||
"side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])],
|
||||
"validator_notes": list(surrogate.get("notes") or []),
|
||||
}
|
||||
return case_report, {
|
||||
"run_id": case["run_id"],
|
||||
"session_id": case.get("session_id") or "",
|
||||
"task_text": case.get("task_text") or "",
|
||||
"synthetic": bool(case.get("synthetic")),
|
||||
"tier": tier,
|
||||
"baseline_score": baseline_score,
|
||||
"candidate_score": candidate_score,
|
||||
"delta": round(candidate_score - baseline_score, 4),
|
||||
}
|
||||
|
||||
|
||||
def _report_progress(
|
||||
callback: Callable[[dict[str, Any]], None] | None,
|
||||
*,
|
||||
completed_arms: int,
|
||||
total_arms: int,
|
||||
completed_cases: int,
|
||||
total_cases: int,
|
||||
) -> None:
|
||||
if callback is None:
|
||||
return
|
||||
callback(
|
||||
{
|
||||
"phase": "replaying",
|
||||
"completed_arms": completed_arms,
|
||||
"total_arms": total_arms,
|
||||
"completed_cases": completed_cases,
|
||||
"total_cases": total_cases,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _score_from_validation(validation: dict | None, success: bool) -> float:
|
||||
if isinstance(validation, dict) and "score" in validation:
|
||||
try:
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from typing import Any, Callable
|
||||
|
||||
from beaver.engine.providers import ProviderBundle
|
||||
from beaver.memory.skills import SkillDraftEvalReport, SkillDraftSafetyReport, SkillLearningCandidate, SkillLearningStore
|
||||
@ -174,12 +174,20 @@ class SkillLearningPipelineService:
|
||||
safety = self.get_safety_report(skill_name, draft_id)
|
||||
if safety is not None and (not safety.passed or safety.risk_level == "critical"):
|
||||
raise ValueError("Draft cannot enter review because safety check failed")
|
||||
return self.review_service.submit_for_review(
|
||||
review = self.review_service.submit_for_review(
|
||||
skill_name,
|
||||
draft_id,
|
||||
reviewer_request=notes,
|
||||
requested_by=requested_by,
|
||||
)
|
||||
self._mark_candidate_by_draft(
|
||||
skill_name,
|
||||
draft_id,
|
||||
"review_pending",
|
||||
"review_submitted",
|
||||
last_error=None,
|
||||
)
|
||||
return review
|
||||
|
||||
def approve(
|
||||
self,
|
||||
@ -258,9 +266,13 @@ class SkillLearningPipelineService:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
report = self.safety_checker.check(draft)
|
||||
self.learning_store.write_safety_report(report)
|
||||
status = "safety_failed" if not report.passed or report.risk_level == "critical" else "draft_ready"
|
||||
status = (
|
||||
"safety_failed"
|
||||
if not report.passed or report.risk_level == "critical"
|
||||
else self._candidate_status_for_draft(draft)
|
||||
)
|
||||
current = self._candidate_by_draft(skill_name, draft_id)
|
||||
if current is not None and current.status == "eval_failed" and status == "draft_ready":
|
||||
if current is not None and current.status == "eval_failed" and status != "safety_failed":
|
||||
status = "eval_failed"
|
||||
self._mark_candidate_by_draft(
|
||||
skill_name,
|
||||
@ -287,6 +299,7 @@ class SkillLearningPipelineService:
|
||||
*,
|
||||
provider_bundle: ProviderBundle | None,
|
||||
replay_runner: ReplayRunner | None = None,
|
||||
progress_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||
) -> SkillDraftEvalReport:
|
||||
draft = self.get_draft(skill_name, draft_id)
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
@ -296,13 +309,14 @@ class SkillLearningPipelineService:
|
||||
draft=draft,
|
||||
provider_bundle=provider_bundle,
|
||||
replay_runner=replay_runner,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
self.learning_store.write_eval_report(report)
|
||||
if report.status == "skipped_provider_unavailable":
|
||||
status = "draft_ready"
|
||||
status = self._candidate_status_for_draft(draft)
|
||||
error = "eval skipped: provider unavailable"
|
||||
elif report.passed:
|
||||
status = "draft_ready"
|
||||
status = self._candidate_status_for_draft(draft)
|
||||
error = None
|
||||
else:
|
||||
status = "eval_failed"
|
||||
@ -316,11 +330,43 @@ class SkillLearningPipelineService:
|
||||
status,
|
||||
event_type="eval_completed",
|
||||
eval_report_id=report.report_id,
|
||||
eval_progress={
|
||||
"phase": "completed",
|
||||
"completed_arms": len(report.cases) * 2 if report.mode == "replay" else 0,
|
||||
"total_arms": len(report.cases) * 2 if report.mode == "replay" else 0,
|
||||
"completed_cases": len(report.cases),
|
||||
"total_cases": len(report.cases),
|
||||
},
|
||||
last_error=error,
|
||||
payload=report.to_dict(),
|
||||
)
|
||||
return report
|
||||
|
||||
def mark_eval_progress(self, candidate_id: str, progress: dict[str, Any]) -> SkillLearningCandidate:
|
||||
return self._require_updated(
|
||||
self.learning_store.update_learning_candidate(
|
||||
candidate_id,
|
||||
eval_progress=dict(progress),
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def mark_eval_failed(self, candidate_id: str, error: str) -> SkillLearningCandidate:
|
||||
candidate = self.get_candidate(candidate_id)
|
||||
progress = dict(candidate.eval_progress)
|
||||
progress["phase"] = "failed"
|
||||
return self._require_updated(
|
||||
self.learning_store.transition_learning_candidate(
|
||||
candidate_id,
|
||||
"eval_failed",
|
||||
eval_progress=progress,
|
||||
event_type="eval_failed",
|
||||
last_error=error,
|
||||
payload={"error": error},
|
||||
),
|
||||
candidate_id,
|
||||
)
|
||||
|
||||
def _validate_publish_gates(self, draft: SkillDraft, *, confirm_high_risk: bool) -> None:
|
||||
reviews = self.reviews_for_draft(draft.skill_name, draft.draft_id)
|
||||
if not any(review.status in {SkillReviewState.IN_REVIEW.value, SkillReviewState.APPROVED.value} for review in reviews):
|
||||
@ -372,6 +418,14 @@ class SkillLearningPipelineService:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _candidate_status_for_draft(draft: SkillDraft) -> str:
|
||||
if draft.status == SkillReviewState.APPROVED.value:
|
||||
return "approved"
|
||||
if draft.status == SkillReviewState.IN_REVIEW.value:
|
||||
return "review_pending"
|
||||
return "draft_ready"
|
||||
|
||||
@staticmethod
|
||||
def _require_updated(candidate: SkillLearningCandidate | None, candidate_id: str) -> SkillLearningCandidate:
|
||||
if candidate is None:
|
||||
|
||||
@ -3,7 +3,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Literal
|
||||
from time import perf_counter
|
||||
from typing import Any, Callable, Literal
|
||||
from uuid import uuid4
|
||||
|
||||
from beaver.tools.base import ToolContext, ToolResult, ToolSpec
|
||||
@ -59,6 +60,7 @@ class ReplayToolExecutor:
|
||||
*,
|
||||
context: ToolContext | None = None,
|
||||
) -> ToolResult:
|
||||
started_at = perf_counter()
|
||||
tool = self.registry.get(tool_name)
|
||||
spec = tool.spec if tool is not None else ToolSpec(
|
||||
name=tool_name,
|
||||
@ -84,6 +86,7 @@ class ReplayToolExecutor:
|
||||
"error": result.error,
|
||||
"content": result.content[:2000],
|
||||
}
|
||||
trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
|
||||
self.traces.append(trace)
|
||||
return result
|
||||
if mode == "surrogate":
|
||||
@ -92,6 +95,7 @@ class ReplayToolExecutor:
|
||||
"error": "replay_surrogate",
|
||||
"content": "Tool call recorded for surrogate evaluation.",
|
||||
}
|
||||
trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
|
||||
self.traces.append(trace)
|
||||
return ToolResult(
|
||||
success=True,
|
||||
@ -105,6 +109,7 @@ class ReplayToolExecutor:
|
||||
"error": "replay_blocked",
|
||||
"content": "Tool call blocked by replay policy.",
|
||||
}
|
||||
trace["duration_ms"] = round((perf_counter() - started_at) * 1000, 2)
|
||||
self.traces.append(trace)
|
||||
return ToolResult(
|
||||
success=False,
|
||||
@ -151,12 +156,20 @@ class ReplayArmRequest:
|
||||
|
||||
|
||||
class ReplayRunner:
|
||||
def __init__(self, *, agent_loop: Any, policy: ReplayToolPolicy | None = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
agent_loop: Any,
|
||||
policy: ReplayToolPolicy | None = None,
|
||||
isolated_loop_factory: Callable[[], Any] | None = None,
|
||||
) -> None:
|
||||
self.agent_loop = agent_loop
|
||||
self.policy = policy or ReplayToolPolicy()
|
||||
self.isolated_loop_factory = isolated_loop_factory
|
||||
|
||||
async def run_arm(self, request: ReplayArmRequest) -> dict[str, Any]:
|
||||
loaded = self.agent_loop.boot()
|
||||
target_loop = self.isolated_loop_factory() if self.isolated_loop_factory is not None else self.agent_loop
|
||||
loaded = target_loop.boot()
|
||||
replay_executor = ReplayToolExecutor(
|
||||
loaded.tool_executor,
|
||||
registry=loaded.tool_registry,
|
||||
@ -174,23 +187,42 @@ class ReplayRunner:
|
||||
"tool_executor_override": replay_executor,
|
||||
}
|
||||
try:
|
||||
result = await self.agent_loop.process_direct(request.task_text, **direct_kwargs)
|
||||
except RuntimeError as exc:
|
||||
if not _is_process_direct_disabled_while_running(exc) or not hasattr(self.agent_loop, "submit_direct"):
|
||||
raise
|
||||
result = await self.agent_loop.submit_direct(request.task_text, **direct_kwargs)
|
||||
return {
|
||||
"case_id": request.case_id,
|
||||
"arm": request.arm,
|
||||
"session_id": result.session_id,
|
||||
"run_id": result.run_id,
|
||||
"task_text": request.task_text,
|
||||
"finish_reason": result.finish_reason,
|
||||
"final_answer": result.output_text,
|
||||
"tool_calls": list(replay_executor.traces),
|
||||
"artifacts": [],
|
||||
"side_effects": _side_effects_from_traces(replay_executor.traces),
|
||||
}
|
||||
try:
|
||||
result = await target_loop.process_direct(request.task_text, **direct_kwargs)
|
||||
except RuntimeError as exc:
|
||||
if not _is_process_direct_disabled_while_running(exc) or not hasattr(target_loop, "submit_direct"):
|
||||
raise
|
||||
result = await target_loop.submit_direct(request.task_text, **direct_kwargs)
|
||||
session_manager = getattr(loaded, "session_manager", None)
|
||||
if session_manager is not None and hasattr(session_manager, "end_session"):
|
||||
session_manager.end_session(result.session_id, "evaluation_complete")
|
||||
return {
|
||||
"case_id": request.case_id,
|
||||
"arm": request.arm,
|
||||
"session_id": result.session_id,
|
||||
"run_id": result.run_id,
|
||||
"task_text": request.task_text,
|
||||
"finish_reason": result.finish_reason,
|
||||
"final_answer": result.output_text,
|
||||
"tool_calls": list(replay_executor.traces),
|
||||
"artifacts": [],
|
||||
"side_effects": _side_effects_from_traces(replay_executor.traces),
|
||||
}
|
||||
finally:
|
||||
if target_loop is not self.agent_loop and hasattr(target_loop, "close"):
|
||||
mcp_manager = getattr(loaded, "mcp_manager", None)
|
||||
if mcp_manager is not None and hasattr(mcp_manager, "close"):
|
||||
try:
|
||||
await mcp_manager.close()
|
||||
finally:
|
||||
closeables = getattr(loaded, "closeables", None)
|
||||
if isinstance(closeables, list):
|
||||
loaded.closeables = [
|
||||
(name, close_fn)
|
||||
for name, close_fn in closeables
|
||||
if name != "mcp_manager"
|
||||
]
|
||||
target_loop.close()
|
||||
|
||||
|
||||
def _is_process_direct_disabled_while_running(exc: RuntimeError) -> bool:
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from html import unescape
|
||||
import json
|
||||
@ -51,7 +52,8 @@ class WebFetchTool:
|
||||
try:
|
||||
safe_url = _safe_url(url)
|
||||
limit = max(1000, min(int(max_chars or 12000), 50000))
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client:
|
||||
timeout = httpx.Timeout(connect=5, read=12, write=5, pool=5)
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, trust_env=True) as client:
|
||||
response = await client.get(
|
||||
safe_url,
|
||||
headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"},
|
||||
@ -76,7 +78,7 @@ class WebFetchTool:
|
||||
@dataclass(slots=True)
|
||||
class WebSearchTool:
|
||||
name: str = "web_search"
|
||||
description: str = "Search the web using DuckDuckGo HTML results. No API key required."
|
||||
description: str = "Search the public web using HTML results. No API key required."
|
||||
toolset: str = "web"
|
||||
always_available: bool = False
|
||||
parameters: dict[str, Any] = field(
|
||||
@ -95,23 +97,102 @@ class WebSearchTool:
|
||||
if not str(query).strip():
|
||||
raise ValueError("query is required")
|
||||
bounded = max(1, min(int(limit or 5), 10))
|
||||
url = f"https://duckduckgo.com/html/?q={quote_plus(query)}"
|
||||
async with httpx.AsyncClient(timeout=20, follow_redirects=True, trust_env=True) as client:
|
||||
response = await client.get(url, headers={"User-Agent": "Mozilla/5.0 Beaver/1.0"})
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
results: list[dict[str, str]] = []
|
||||
pattern = re.compile(
|
||||
r'<a[^>]+class="result__a"[^>]+href="(?P<url>[^"]+)"[^>]*>(?P<title>.*?)</a>',
|
||||
re.I | re.S,
|
||||
)
|
||||
for match in pattern.finditer(html):
|
||||
title = _strip_html(match.group("title"))
|
||||
result_url = unescape(match.group("url"))
|
||||
if title and result_url:
|
||||
results.append({"title": title, "url": result_url, "snippet": ""})
|
||||
if len(results) >= bounded:
|
||||
break
|
||||
return _json_result(True, query=query, results=results)
|
||||
headers = {"User-Agent": "Mozilla/5.0 Beaver/1.0"}
|
||||
timeout = httpx.Timeout(connect=5, read=8, write=5, pool=5)
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, trust_env=True) as client:
|
||||
tasks = [
|
||||
asyncio.create_task(
|
||||
_search_bing(
|
||||
client,
|
||||
query=query,
|
||||
limit=bounded,
|
||||
headers=headers,
|
||||
)
|
||||
),
|
||||
asyncio.create_task(
|
||||
_search_duckduckgo(
|
||||
client,
|
||||
query=query,
|
||||
limit=bounded,
|
||||
headers=headers,
|
||||
)
|
||||
),
|
||||
]
|
||||
errors: list[str] = []
|
||||
try:
|
||||
for completed in asyncio.as_completed(tasks):
|
||||
try:
|
||||
engine, results = await completed
|
||||
except Exception as exc:
|
||||
errors.append(str(exc))
|
||||
continue
|
||||
if results:
|
||||
return _json_result(True, query=query, engine=engine, results=results)
|
||||
detail = "; ".join(error for error in errors if error) or "no search results"
|
||||
return _json_result(False, query=query, error=detail)
|
||||
finally:
|
||||
for task in tasks:
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
except Exception as exc:
|
||||
return _json_result(False, query=query, error=str(exc))
|
||||
|
||||
|
||||
async def _search_bing(
|
||||
client: httpx.AsyncClient,
|
||||
*,
|
||||
query: str,
|
||||
limit: int,
|
||||
headers: dict[str, str],
|
||||
) -> tuple[str, list[dict[str, str]]]:
|
||||
response = await client.get(f"https://www.bing.com/search?q={quote_plus(query)}", headers=headers)
|
||||
response.raise_for_status()
|
||||
return "bing", _parse_bing_results(response.text, limit)
|
||||
|
||||
|
||||
async def _search_duckduckgo(
|
||||
client: httpx.AsyncClient,
|
||||
*,
|
||||
query: str,
|
||||
limit: int,
|
||||
headers: dict[str, str],
|
||||
) -> tuple[str, list[dict[str, str]]]:
|
||||
response = await client.get(f"https://duckduckgo.com/html/?q={quote_plus(query)}", headers=headers)
|
||||
response.raise_for_status()
|
||||
return "duckduckgo", _parse_duckduckgo_results(response.text, limit)
|
||||
|
||||
|
||||
def _parse_bing_results(html: str, limit: int) -> list[dict[str, str]]:
|
||||
results: list[dict[str, str]] = []
|
||||
pattern = re.compile(
|
||||
r'<li[^>]+class="[^"]*\bb_algo\b[^"]*"[^>]*>.*?<h2[^>]*>\s*'
|
||||
r'<a[^>]+href="(?P<url>[^"]+)"[^>]*>(?P<title>.*?)</a>.*?'
|
||||
r'(?:<p[^>]*>(?P<snippet>.*?)</p>)?',
|
||||
re.I | re.S,
|
||||
)
|
||||
for match in pattern.finditer(html):
|
||||
title = _strip_html(match.group("title"))
|
||||
result_url = unescape(match.group("url"))
|
||||
snippet = _strip_html(match.group("snippet") or "")
|
||||
if title and result_url:
|
||||
results.append({"title": title, "url": result_url, "snippet": snippet})
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
def _parse_duckduckgo_results(html: str, limit: int) -> list[dict[str, str]]:
|
||||
results: list[dict[str, str]] = []
|
||||
pattern = re.compile(
|
||||
r'<a[^>]+class="result__a"[^>]+href="(?P<url>[^"]+)"[^>]*>(?P<title>.*?)</a>',
|
||||
re.I | re.S,
|
||||
)
|
||||
for match in pattern.finditer(html):
|
||||
title = _strip_html(match.group("title"))
|
||||
result_url = unescape(match.group("url"))
|
||||
if title and result_url:
|
||||
results.append({"title": title, "url": result_url, "snippet": ""})
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
@ -29,6 +29,18 @@ def test_schedule_from_frontend_payload() -> None:
|
||||
assert cron.kind == "cron"
|
||||
|
||||
|
||||
def test_legacy_interval_schedule_recovers_duration_from_display() -> None:
|
||||
schedule = CronSchedule.from_dict(
|
||||
{
|
||||
"kind": "every",
|
||||
"every_ms": None,
|
||||
"display": "every 1800s",
|
||||
}
|
||||
)
|
||||
|
||||
assert schedule.every_ms == 30 * 60 * 1000
|
||||
|
||||
|
||||
def test_compute_next_run_skips_missed_interval() -> None:
|
||||
schedule = CronSchedule(kind="every", every_ms=60_000)
|
||||
assert compute_next_run(schedule, now_ms=1_000_000, last_run_at_ms=0) > 1_000_000
|
||||
@ -80,6 +92,22 @@ def test_manual_run_records_scheduled_run_output(tmp_path) -> None:
|
||||
assert updated.to_api_dict()["last_scheduled_run_id"] == run.scheduled_run_id
|
||||
|
||||
|
||||
def test_persisted_interval_job_keeps_schedule_and_next_run(tmp_path) -> None:
|
||||
store_path = tmp_path / "jobs.json"
|
||||
service = CronService(store_path)
|
||||
job = service.add_job(
|
||||
name="Hydration reminder",
|
||||
message="Drink water",
|
||||
schedule=CronSchedule(kind="every", every_ms=30 * 60 * 1000),
|
||||
)
|
||||
|
||||
reloaded = CronService(store_path).get_job(job.id)
|
||||
|
||||
assert reloaded is not None
|
||||
assert reloaded.schedule.every_ms == 30 * 60 * 1000
|
||||
assert reloaded.next_run_at_ms == job.next_run_at_ms
|
||||
|
||||
|
||||
def test_cron_tool_uses_runtime_service(tmp_path) -> None:
|
||||
service = CronService(tmp_path / "jobs.json")
|
||||
tool = CronTool()
|
||||
|
||||
71
app-instance/backend/tests/unit/test_outlook_integration.py
Normal file
71
app-instance/backend/tests/unit/test_outlook_integration.py
Normal file
@ -0,0 +1,71 @@
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from beaver.foundation.config.schema import AuthzConfig, BackendIdentityConfig, BeaverConfig
|
||||
from beaver.integrations import outlook
|
||||
|
||||
|
||||
class _FakeAuthzClient:
|
||||
async def get_outlook_settings(self, backend_id: str) -> dict:
|
||||
assert backend_id == "steven"
|
||||
return {
|
||||
"configured": True,
|
||||
"email": "steven.yx.li@boardware.com",
|
||||
"server": "mail.boardware.com.mo",
|
||||
}
|
||||
|
||||
|
||||
def _authz_config() -> BeaverConfig:
|
||||
return BeaverConfig(
|
||||
authz=AuthzConfig(
|
||||
enabled=True,
|
||||
base_url="http://authz.example",
|
||||
outlook_mcp_url="http://outlook-mcp.example/mcp",
|
||||
),
|
||||
backend_identity=BackendIdentityConfig(
|
||||
backend_id="steven",
|
||||
client_id="steven",
|
||||
client_secret="secret",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_outlook_status_does_not_probe_mcp_by_default(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
|
||||
monkeypatch.setattr(outlook, "_authz_client", lambda _config: _FakeAuthzClient())
|
||||
|
||||
async def fail_if_called(*_args, **_kwargs):
|
||||
raise AssertionError("status should not call Outlook MCP by default")
|
||||
|
||||
monkeypatch.setattr(outlook, "_call_outlook_mcp_tool", fail_if_called)
|
||||
|
||||
result = asyncio.run(outlook.outlook_status(_authz_config(), tmp_path))
|
||||
|
||||
assert result["configured"] is True
|
||||
assert result["connected"] is False
|
||||
assert result["auth_status"] is None
|
||||
assert result["error"] is None
|
||||
|
||||
|
||||
def test_outlook_overview_loads_sections_serially(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
|
||||
monkeypatch.setattr(outlook, "_authz_client", lambda _config: _FakeAuthzClient())
|
||||
active_calls = 0
|
||||
max_active_calls = 0
|
||||
tool_names: list[str] = []
|
||||
|
||||
async def fake_call(_config, tool_name: str, _arguments, **_kwargs):
|
||||
nonlocal active_calls, max_active_calls
|
||||
tool_names.append(tool_name)
|
||||
active_calls += 1
|
||||
max_active_calls = max(max_active_calls, active_calls)
|
||||
await asyncio.sleep(0.01)
|
||||
active_calls -= 1
|
||||
return {"value": []}
|
||||
|
||||
monkeypatch.setattr(outlook, "_call_outlook_mcp_tool", fake_call)
|
||||
|
||||
result = asyncio.run(outlook.get_overview(_authz_config(), tmp_path))
|
||||
|
||||
assert result["warnings"] == []
|
||||
assert tool_names == ["mail_list_messages", "mail_list_messages", "calendar_list_events"]
|
||||
assert max_active_calls == 1
|
||||
@ -27,6 +27,7 @@ class StubProvider(LLMProvider):
|
||||
def __init__(self, responses: list[LLMResponse]) -> None:
|
||||
super().__init__()
|
||||
self._responses = list(responses)
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
@ -37,6 +38,16 @@ class StubProvider(LLMProvider):
|
||||
temperature: float = 0.7,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> LLMResponse:
|
||||
self.calls.append(
|
||||
{
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"model": model,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"thinking_enabled": thinking_enabled,
|
||||
}
|
||||
)
|
||||
if not self._responses:
|
||||
raise AssertionError("No stubbed provider responses left")
|
||||
return self._responses.pop(0)
|
||||
@ -704,32 +715,33 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
|
||||
skill_assembler=StubSkillAssembler([skill]),
|
||||
)
|
||||
loop = AgentLoop(loader=loader)
|
||||
provider = StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content="Need a tool.",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_tool_call()],
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
LLMResponse(
|
||||
content="Need another tool.",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_tool_call(call_id="call-2")],
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
LLMResponse(
|
||||
content="Based on the available tool result, the container likely failed during startup.",
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
]
|
||||
)
|
||||
bundle = ProviderBundle(
|
||||
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
|
||||
main_provider=StubProvider(
|
||||
[
|
||||
LLMResponse(
|
||||
content="Need a tool.",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_tool_call()],
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
LLMResponse(
|
||||
content="Need another tool.",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_tool_call(call_id="call-2")],
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
LLMResponse(
|
||||
content="Based on the available tool result, the container likely failed during startup.",
|
||||
finish_reason="stop",
|
||||
provider_name="stub",
|
||||
model="stub-model",
|
||||
),
|
||||
]
|
||||
),
|
||||
main_provider=provider,
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
@ -744,6 +756,21 @@ def test_agent_loop_records_max_tool_iterations_as_failed_skill_effect(tmp_path:
|
||||
assert result.finish_reason == "max_tool_iterations_finalized"
|
||||
assert "Based on the available tool result" in result.output_text
|
||||
assert "Tool loop stopped" not in result.output_text
|
||||
finalization_messages = provider.calls[-1]["messages"]
|
||||
assistant_tool_call_ids = [
|
||||
call["id"]
|
||||
for message in finalization_messages
|
||||
for call in message.get("tool_calls", [])
|
||||
if message.get("role") == "assistant"
|
||||
]
|
||||
tool_result_ids = [
|
||||
message.get("tool_call_id")
|
||||
for message in finalization_messages
|
||||
if message.get("role") == "tool"
|
||||
]
|
||||
assert "call-1" in assistant_tool_call_ids
|
||||
assert "call-2" not in assistant_tool_call_ids
|
||||
assert set(assistant_tool_call_ids).issubset(set(tool_result_ids))
|
||||
effect_records = loaded.run_memory_store.list_skill_effects("docker-debug", version="v0007")
|
||||
assert effect_records[-1].run_id == result.run_id
|
||||
assert effect_records[-1].success is False
|
||||
|
||||
@ -105,3 +105,29 @@ def test_web_archive_route_does_not_create_archive_suffix_session(tmp_path: Path
|
||||
assert loaded.session_manager.get_session("web:alpha")["end_reason"] == "archived" # type: ignore[union-attr]
|
||||
assert loaded.session_manager.get_session("web:alpha/archive") is None # type: ignore[union-attr]
|
||||
assert sessions_response.json() == []
|
||||
|
||||
|
||||
def test_web_session_list_hides_skill_replay_evaluation_sessions(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
loaded.session_manager.ensure_session("eval-session", source="skill_replay_eval") # type: ignore[union-attr]
|
||||
loaded.session_manager.ensure_session("web:visible", source="web") # type: ignore[union-attr]
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/api/sessions")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert [item["key"] for item in response.json()] == ["web:visible"]
|
||||
|
||||
|
||||
def test_get_missing_session_returns_404_without_creating_it(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/api/sessions/missing-session")
|
||||
|
||||
assert response.status_code == 404
|
||||
loaded = service.create_loop().boot()
|
||||
assert loaded.session_manager.get_session("missing-session") is None # type: ignore[union-attr]
|
||||
|
||||
@ -201,6 +201,22 @@ class FakeReplayRunner:
|
||||
}
|
||||
|
||||
|
||||
class ConcurrentReplayRunner(FakeReplayRunner):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.active = 0
|
||||
self.max_active = 0
|
||||
|
||||
async def run_arm(self, request):
|
||||
self.active += 1
|
||||
self.max_active = max(self.max_active, self.active)
|
||||
await asyncio.sleep(0.02)
|
||||
try:
|
||||
return await super().run_arm(request)
|
||||
finally:
|
||||
self.active -= 1
|
||||
|
||||
|
||||
def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
@ -238,6 +254,94 @@ def test_eval_report_includes_replay_case_and_coverage(tmp_path: Path) -> None:
|
||||
assert report.tool_execution_summary["score_role"] == "diagnostic_only"
|
||||
|
||||
|
||||
def test_replay_eval_reports_arm_progress(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="release-checklist",
|
||||
proposed_content="# Release\n\nRun tests.",
|
||||
proposed_frontmatter={"description": "release", "tools": []},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
"candidate-1",
|
||||
draft_skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
)
|
||||
progress: list[dict] = []
|
||||
|
||||
asyncio.run(
|
||||
pipeline.evaluate_draft(
|
||||
"candidate-1",
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=_bundle(),
|
||||
replay_runner=FakeReplayRunner(),
|
||||
progress_callback=progress.append,
|
||||
)
|
||||
)
|
||||
|
||||
assert progress[0] == {
|
||||
"phase": "replaying",
|
||||
"completed_arms": 0,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 0,
|
||||
"total_cases": 10,
|
||||
}
|
||||
assert progress[-1] == {
|
||||
"phase": "replaying",
|
||||
"completed_arms": 20,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 10,
|
||||
"total_cases": 10,
|
||||
}
|
||||
|
||||
|
||||
def test_replay_eval_runs_cases_with_bounded_parallelism(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
pipeline.evaluator = SkillDraftEvaluator(
|
||||
pipeline.learning_service.run_store,
|
||||
max_parallel_cases=2,
|
||||
)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="release-checklist",
|
||||
proposed_content="# Release\n\nRun tests.",
|
||||
proposed_frontmatter={"description": "release", "tools": []},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
"candidate-1",
|
||||
draft_skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
)
|
||||
replay_runner = ConcurrentReplayRunner()
|
||||
|
||||
report = asyncio.run(
|
||||
pipeline.evaluate_draft(
|
||||
"candidate-1",
|
||||
draft.skill_name,
|
||||
draft.draft_id,
|
||||
provider_bundle=_bundle(),
|
||||
replay_runner=replay_runner,
|
||||
)
|
||||
)
|
||||
|
||||
assert replay_runner.max_active == 2
|
||||
assert [case["run_id"] for case in report.cases] == [
|
||||
"run-1",
|
||||
"synthetic:candidate-1:01",
|
||||
"synthetic:candidate-1:02",
|
||||
"synthetic:candidate-1:03",
|
||||
"synthetic:candidate-1:04",
|
||||
"synthetic:candidate-1:05",
|
||||
"synthetic:candidate-1:06",
|
||||
"synthetic:candidate-1:07",
|
||||
"synthetic:candidate-1:08",
|
||||
"synthetic:candidate-1:09",
|
||||
]
|
||||
|
||||
|
||||
def test_replay_main_score_uses_validator_not_tool_success(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
pipeline.learning_store.update_learning_candidate(
|
||||
|
||||
@ -98,6 +98,27 @@ def test_pipeline_does_not_resubmit_terminal_draft(tmp_path: Path) -> None:
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
|
||||
|
||||
def test_safety_recheck_keeps_submitted_candidate_in_review(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
skill_name="reviewed-skill",
|
||||
proposed_content="# Reviewed Skill\n\nDo the thing.",
|
||||
proposed_frontmatter={"description": "reviewed"},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
candidate = pipeline.get_candidate("candidate-1")
|
||||
candidate.draft_skill_name = draft.skill_name
|
||||
candidate.draft_id = draft.draft_id
|
||||
pipeline.learning_store.record_learning_candidate(candidate)
|
||||
|
||||
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
pipeline.submit_review(draft.skill_name, draft.draft_id, requested_by="tester")
|
||||
pipeline.check_safety(draft.skill_name, draft.draft_id)
|
||||
|
||||
assert pipeline.get_candidate("candidate-1").status == "review_pending"
|
||||
|
||||
|
||||
def test_pipeline_reject_blocks_publish(tmp_path: Path) -> None:
|
||||
pipeline = _pipeline(tmp_path)
|
||||
draft = pipeline.draft_service.create_new_skill_draft(
|
||||
|
||||
@ -7,8 +7,17 @@ from beaver.skills.learning.replay import ReplayArmRequest, ReplayRunner
|
||||
|
||||
|
||||
class FakeAgentLoop:
|
||||
def __init__(self) -> None:
|
||||
self.ended_sessions: list[tuple[str, str]] = []
|
||||
|
||||
def boot(self):
|
||||
return SimpleNamespace(tool_executor=SimpleNamespace(), tool_registry=SimpleNamespace(get=lambda name: None))
|
||||
return SimpleNamespace(
|
||||
tool_executor=SimpleNamespace(),
|
||||
tool_registry=SimpleNamespace(get=lambda name: None),
|
||||
session_manager=SimpleNamespace(
|
||||
end_session=lambda session_id, reason: self.ended_sessions.append((session_id, reason))
|
||||
),
|
||||
)
|
||||
|
||||
async def process_direct(self, task: str, **kwargs):
|
||||
executor = kwargs["tool_executor_override"]
|
||||
@ -18,6 +27,7 @@ class FakeAgentLoop:
|
||||
|
||||
class FakeRunningAgentLoop(FakeAgentLoop):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.process_direct_calls = 0
|
||||
self.submit_direct_calls: list[tuple[str, dict]] = []
|
||||
|
||||
@ -35,6 +45,29 @@ class FakeRunningAgentLoop(FakeAgentLoop):
|
||||
return SimpleNamespace(session_id="session-queued", run_id="run-queued", output_text="queued done", finish_reason="stop")
|
||||
|
||||
|
||||
class FakeIsolatedAgentLoop(FakeAgentLoop):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.closed = False
|
||||
self.mcp_manager = SimpleNamespace(close=self._close_mcp)
|
||||
self.mcp_closed = False
|
||||
self.loaded = None
|
||||
|
||||
async def _close_mcp(self) -> None:
|
||||
self.mcp_closed = True
|
||||
|
||||
def close(self) -> None:
|
||||
assert self.mcp_closed is True
|
||||
self.closed = True
|
||||
|
||||
def boot(self):
|
||||
if self.loaded is None:
|
||||
self.loaded = super().boot()
|
||||
self.loaded.mcp_manager = self.mcp_manager
|
||||
self.loaded.closeables = [("mcp_manager", lambda: None)]
|
||||
return self.loaded
|
||||
|
||||
|
||||
def test_replay_runner_returns_arm_report_with_tool_trace() -> None:
|
||||
runner = ReplayRunner(agent_loop=FakeAgentLoop())
|
||||
request = ReplayArmRequest(
|
||||
@ -53,6 +86,8 @@ def test_replay_runner_returns_arm_report_with_tool_trace() -> None:
|
||||
assert report["arm"] == "candidate"
|
||||
assert report["finish_reason"] == "stop"
|
||||
assert report["tool_calls"][0]["tool_name"] == "mcp_outlook_send_email"
|
||||
assert report["tool_calls"][0]["duration_ms"] >= 0
|
||||
assert runner.agent_loop.ended_sessions == [("session-replay", "evaluation_complete")]
|
||||
|
||||
|
||||
def test_replay_runner_queues_arm_when_agent_loop_is_running() -> None:
|
||||
@ -83,3 +118,31 @@ def test_replay_runner_queues_arm_when_agent_loop_is_running() -> None:
|
||||
assert report["session_id"] == "session-queued"
|
||||
assert report["run_id"] == "run-queued"
|
||||
assert report["tool_calls"][0]["tool_name"] == "mcp_outlook_send_email"
|
||||
assert agent_loop.ended_sessions == [("session-queued", "evaluation_complete")]
|
||||
|
||||
|
||||
def test_replay_runner_uses_and_closes_isolated_loop() -> None:
|
||||
shared_loop = FakeRunningAgentLoop()
|
||||
isolated_loops: list[FakeIsolatedAgentLoop] = []
|
||||
|
||||
def create_isolated_loop() -> FakeIsolatedAgentLoop:
|
||||
loop = FakeIsolatedAgentLoop()
|
||||
isolated_loops.append(loop)
|
||||
return loop
|
||||
|
||||
runner = ReplayRunner(agent_loop=shared_loop, isolated_loop_factory=create_isolated_loop)
|
||||
request = ReplayArmRequest(
|
||||
case_id="case-isolated",
|
||||
arm="candidate",
|
||||
task_text="Fetch current weather.",
|
||||
provider_bundle=object(),
|
||||
)
|
||||
|
||||
report = asyncio.run(runner.run_arm(request))
|
||||
|
||||
assert report["session_id"] == "session-replay"
|
||||
assert shared_loop.process_direct_calls == 0
|
||||
assert shared_loop.submit_direct_calls == []
|
||||
assert len(isolated_loops) == 1
|
||||
assert isolated_loops[0].mcp_closed is True
|
||||
assert isolated_loops[0].closed is True
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
@ -16,7 +18,7 @@ class StubEvaluator:
|
||||
def __init__(self) -> None:
|
||||
self.calls = 0
|
||||
|
||||
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None):
|
||||
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None, progress_callback=None):
|
||||
self.calls += 1
|
||||
return SkillDraftEvalReport(
|
||||
report_id="eval-existing",
|
||||
@ -34,6 +36,18 @@ class StubEvaluator:
|
||||
)
|
||||
|
||||
|
||||
class SlowEvaluator(StubEvaluator):
|
||||
async def evaluate(self, *, candidate, draft, provider_bundle, replay_runner=None, progress_callback=None):
|
||||
await asyncio.sleep(0.15)
|
||||
return await super().evaluate(
|
||||
candidate=candidate,
|
||||
draft=draft,
|
||||
provider_bundle=provider_bundle,
|
||||
replay_runner=replay_runner,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
|
||||
def test_skill_learning_candidates_and_run_once_api(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
@ -193,15 +207,79 @@ def test_submit_draft_runs_safety_and_eval(tmp_path: Path, monkeypatch) -> None:
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
|
||||
deadline = time.monotonic() + 1
|
||||
payload = response.json()
|
||||
while payload["eval_report"] is None and time.monotonic() < deadline:
|
||||
time.sleep(0.02)
|
||||
payload = client.get(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}").json()
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert evaluator.calls == 1
|
||||
assert payload["status"] == "in_review"
|
||||
assert payload["safety_report"]["passed"] is True
|
||||
assert payload["eval_report"]["report_id"] == "eval-existing"
|
||||
|
||||
|
||||
def test_submit_draft_returns_before_eval_and_is_idempotent(tmp_path: Path, monkeypatch) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
draft = loaded.skill_learning_pipeline.draft_service.create_new_skill_draft( # type: ignore[union-attr]
|
||||
skill_name="weather-search",
|
||||
proposed_content="# Weather Search\n\nUse current weather sources.",
|
||||
proposed_frontmatter={"description": "weather", "tools": []},
|
||||
created_by="test",
|
||||
reason="test",
|
||||
)
|
||||
loaded.skill_learning_store.record_learning_candidate( # type: ignore[union-attr]
|
||||
SkillLearningCandidate(
|
||||
candidate_id="candidate-weather",
|
||||
kind="revise_skill",
|
||||
source_run_ids=["run-1"],
|
||||
source_session_ids=["session-1"],
|
||||
related_skill_names=["weather-search"],
|
||||
reason="revise",
|
||||
status="draft_ready",
|
||||
draft_skill_name=draft.skill_name,
|
||||
draft_id=draft.draft_id,
|
||||
)
|
||||
)
|
||||
evaluator = SlowEvaluator()
|
||||
loaded.skill_learning_pipeline.evaluator = evaluator # type: ignore[union-attr]
|
||||
monkeypatch.setattr(
|
||||
service,
|
||||
"_make_provider_bundle_for_task",
|
||||
lambda loaded, kwargs: SimpleNamespace(main_provider=object()),
|
||||
)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
started = time.monotonic()
|
||||
first = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
|
||||
elapsed = time.monotonic() - started
|
||||
second = client.post(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}/submit")
|
||||
deadline = time.monotonic() + 2
|
||||
payload = second.json()
|
||||
while payload["eval_report"] is None and time.monotonic() < deadline:
|
||||
time.sleep(0.05)
|
||||
payload = client.get(f"/api/skills/{draft.skill_name}/drafts/{draft.draft_id}").json()
|
||||
|
||||
assert first.status_code == 200
|
||||
assert elapsed < 0.12
|
||||
assert first.json()["status"] == "in_review"
|
||||
assert first.json()["eval_status"] == "pending"
|
||||
assert first.json()["eval_progress"] == {
|
||||
"phase": "preparing",
|
||||
"completed_arms": 0,
|
||||
"total_arms": 20,
|
||||
"completed_cases": 0,
|
||||
"total_cases": 10,
|
||||
}
|
||||
assert second.status_code == 200
|
||||
assert evaluator.calls == 1
|
||||
assert payload["eval_report"]["report_id"] == "eval-existing"
|
||||
assert loaded.skill_learning_pipeline.get_candidate("candidate-weather").status == "review_pending" # type: ignore[union-attr]
|
||||
|
||||
|
||||
def test_draft_payload_includes_target_version_for_revision(tmp_path: Path) -> None:
|
||||
service = AgentService(workspace=tmp_path)
|
||||
loaded = service.create_loop().boot()
|
||||
|
||||
@ -57,6 +57,14 @@ def write_terminal_config(tmp_path: Path) -> Path:
|
||||
return config_path
|
||||
|
||||
|
||||
def write_terminal_config_with_device_session(tmp_path: Path) -> Path:
|
||||
config_path = write_terminal_config(tmp_path)
|
||||
payload = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
payload["channels"]["terminal-dev"]["config"]["sessionPeerFromDeviceName"] = True
|
||||
config_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
return config_path
|
||||
|
||||
|
||||
def test_terminal_websocket_connect_ping_and_message_roundtrip(tmp_path: Path) -> None:
|
||||
config_path = write_terminal_config(tmp_path)
|
||||
service = TerminalFakeAgentService(config_path=config_path)
|
||||
@ -117,6 +125,98 @@ def test_terminal_websocket_connect_ping_and_message_roundtrip(tmp_path: Path) -
|
||||
assert inbound.channel_identity.message_id == "device-001-000001"
|
||||
|
||||
|
||||
def test_terminal_websocket_can_use_device_name_as_stable_session_peer(tmp_path: Path) -> None:
|
||||
config_path = write_terminal_config_with_device_session(tmp_path)
|
||||
service = TerminalFakeAgentService(config_path=config_path)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/api/channels/terminal-dev/ws") as websocket:
|
||||
websocket.send_json(
|
||||
{
|
||||
"type": "connect",
|
||||
"peer_id": "livekit-test-livekit-07291699",
|
||||
"device_name": "desk-terminal",
|
||||
}
|
||||
)
|
||||
first = websocket.receive_json()
|
||||
|
||||
with client.websocket_connect("/api/channels/terminal-dev/ws") as websocket:
|
||||
websocket.send_json(
|
||||
{
|
||||
"type": "connect",
|
||||
"peer_id": "livekit-test-livekit-3fb03fff",
|
||||
"device_name": "desk-terminal",
|
||||
}
|
||||
)
|
||||
second = websocket.receive_json()
|
||||
websocket.send_json(
|
||||
{
|
||||
"type": "message",
|
||||
"message_id": "livekit-test-livekit-3fb03fff-000001",
|
||||
"text": "hello",
|
||||
}
|
||||
)
|
||||
ack = websocket.receive_json()
|
||||
reply = websocket.receive_json()
|
||||
|
||||
service.close()
|
||||
assert first["session_id"] == "terminal-dev:local:device-desk-terminal"
|
||||
assert second["session_id"] == first["session_id"]
|
||||
assert ack["session_id"] == first["session_id"]
|
||||
assert reply["text"] == "echo:hello"
|
||||
assert service.inbound_calls[0].session_id == first["session_id"]
|
||||
assert service.inbound_calls[0].channel_identity is not None
|
||||
assert service.inbound_calls[0].channel_identity.peer_id == "device-desk-terminal"
|
||||
|
||||
|
||||
def test_terminal_websocket_reconnect_delivers_pending_reply_to_latest_device_connection(tmp_path: Path) -> None:
|
||||
config_path = write_terminal_config_with_device_session(tmp_path)
|
||||
service = TerminalFakeAgentService(config_path=config_path, delay_seconds=0.05)
|
||||
app = create_app(service=service, manage_service_lifecycle=False)
|
||||
|
||||
with TestClient(app) as client:
|
||||
with client.websocket_connect("/api/channels/terminal-dev/ws") as first_websocket:
|
||||
first_websocket.send_json(
|
||||
{
|
||||
"type": "connect",
|
||||
"peer_id": "livekit-test-livekit-old",
|
||||
"device_name": "desk-terminal",
|
||||
}
|
||||
)
|
||||
first = first_websocket.receive_json()
|
||||
first_websocket.send_json(
|
||||
{
|
||||
"type": "message",
|
||||
"message_id": "livekit-test-livekit-old-000001",
|
||||
"text": "slow",
|
||||
}
|
||||
)
|
||||
assert first_websocket.receive_json()["accepted"] is True
|
||||
|
||||
with client.websocket_connect("/api/channels/terminal-dev/ws") as latest_websocket:
|
||||
latest_websocket.send_json(
|
||||
{
|
||||
"type": "connect",
|
||||
"peer_id": "livekit-test-livekit-new",
|
||||
"device_name": "desk-terminal",
|
||||
}
|
||||
)
|
||||
latest = latest_websocket.receive_json()
|
||||
reply = latest_websocket.receive_json()
|
||||
|
||||
service.close()
|
||||
assert latest["session_id"] == first["session_id"]
|
||||
assert reply == {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"message_id": "livekit-test-livekit-old-000001",
|
||||
"run_id": "run-1",
|
||||
"text": "echo:slow",
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
|
||||
|
||||
def test_terminal_websocket_rejects_message_before_connect(tmp_path: Path) -> None:
|
||||
config_path = write_terminal_config(tmp_path)
|
||||
service = TerminalFakeAgentService(config_path=config_path)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
from beaver.tools.builtins import web
|
||||
|
||||
@ -8,8 +9,16 @@ from beaver.tools.builtins import web
|
||||
class _FakeResponse:
|
||||
headers = {"content-type": "text/html"}
|
||||
status_code = 200
|
||||
text = '<a class="result__a" href="https://example.com">Example</a>'
|
||||
url = "https://example.com"
|
||||
|
||||
def __init__(self, url: str = "https://example.com") -> None:
|
||||
self.url = url
|
||||
if "duckduckgo.com" in url:
|
||||
self.text = '<a class="result__a" href="https://duck.example.com">Duck Example</a>'
|
||||
else:
|
||||
self.text = (
|
||||
'<li class="b_algo"><h2><a href="https://example.com">Example</a></h2>'
|
||||
"<p>Example result</p></li>"
|
||||
)
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
@ -17,6 +26,8 @@ class _FakeResponse:
|
||||
|
||||
class _FakeAsyncClient:
|
||||
calls: list[dict[str, object]] = []
|
||||
urls: list[str] = []
|
||||
fail_bing = False
|
||||
|
||||
def __init__(self, **kwargs: object) -> None:
|
||||
self.calls.append(kwargs)
|
||||
@ -28,7 +39,11 @@ class _FakeAsyncClient:
|
||||
return None
|
||||
|
||||
async def get(self, *args: object, **kwargs: object) -> _FakeResponse:
|
||||
return _FakeResponse()
|
||||
url = str(args[0])
|
||||
self.urls.append(url)
|
||||
if self.fail_bing and "bing.com" in url:
|
||||
raise web.httpx.ConnectTimeout("bing unavailable")
|
||||
return _FakeResponse(url)
|
||||
|
||||
|
||||
def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
|
||||
@ -42,3 +57,56 @@ def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None:
|
||||
asyncio.run(_run())
|
||||
|
||||
assert [call.get("trust_env") for call in _FakeAsyncClient.calls] == [True, True]
|
||||
|
||||
|
||||
def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None:
|
||||
_FakeAsyncClient.calls = []
|
||||
_FakeAsyncClient.urls = []
|
||||
_FakeAsyncClient.fail_bing = False
|
||||
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
||||
|
||||
asyncio.run(web.WebFetchTool().execute(url="https://example.com"))
|
||||
|
||||
timeout = _FakeAsyncClient.calls[0]["timeout"]
|
||||
assert isinstance(timeout, web.httpx.Timeout)
|
||||
assert timeout.connect == 5
|
||||
assert timeout.read == 12
|
||||
|
||||
|
||||
def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None:
|
||||
_FakeAsyncClient.calls = []
|
||||
_FakeAsyncClient.urls = []
|
||||
_FakeAsyncClient.fail_bing = False
|
||||
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
||||
|
||||
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
||||
|
||||
payload = json.loads(raw)
|
||||
assert payload["success"] is True
|
||||
assert payload["engine"] in {"bing", "duckduckgo"}
|
||||
assert set(_FakeAsyncClient.urls) == {
|
||||
"https://www.bing.com/search?q=weather+beijing",
|
||||
"https://duckduckgo.com/html/?q=weather+beijing",
|
||||
}
|
||||
|
||||
timeout = _FakeAsyncClient.calls[0]["timeout"]
|
||||
assert isinstance(timeout, web.httpx.Timeout)
|
||||
assert timeout.connect == 5
|
||||
assert timeout.read == 8
|
||||
|
||||
|
||||
def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None:
|
||||
_FakeAsyncClient.calls = []
|
||||
_FakeAsyncClient.urls = []
|
||||
_FakeAsyncClient.fail_bing = True
|
||||
monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient)
|
||||
|
||||
raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing"))
|
||||
|
||||
payload = json.loads(raw)
|
||||
assert payload["success"] is True
|
||||
assert payload["engine"] == "duckduckgo"
|
||||
assert set(_FakeAsyncClient.urls) == {
|
||||
"https://www.bing.com/search?q=weather+beijing",
|
||||
"https://duckduckgo.com/html/?q=weather+beijing",
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@ import { listNotifications } from '@/lib/api';
|
||||
import type { NotificationRun } from '@/types';
|
||||
import { pickAppText } from '@/lib/i18n/core';
|
||||
import { useAppI18n } from '@/lib/i18n/provider';
|
||||
import { scheduleNotificationRefresh } from '@/lib/notification-runtime';
|
||||
import { containedLongTextClass } from '@/lib/text-wrapping';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { Button } from '@/components/ui/button';
|
||||
@ -19,20 +20,21 @@ export default function NotificationsPage() {
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const load = React.useCallback(async () => {
|
||||
setLoading(true);
|
||||
const load = React.useCallback(async (background = false) => {
|
||||
if (!background) setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
setItems(await listNotifications());
|
||||
} catch (err: any) {
|
||||
setError(err.message || pickAppText(locale, '加载通知失败', 'Failed to load notifications'));
|
||||
} finally {
|
||||
setLoading(false);
|
||||
if (!background) setLoading(false);
|
||||
}
|
||||
}, [locale]);
|
||||
|
||||
useEffect(() => {
|
||||
void load();
|
||||
return scheduleNotificationRefresh(() => load(true));
|
||||
}, [load]);
|
||||
|
||||
const formatTime = (value?: string | null) => {
|
||||
|
||||
@ -57,6 +57,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
|
||||
import type { AppLocale } from '@/lib/i18n/core';
|
||||
import { pickAppText } from '@/lib/i18n/core';
|
||||
import { useAppI18n } from '@/lib/i18n/provider';
|
||||
import { nextOutlookAutoLoadTarget, type OutlookAutoLoadView } from '@/lib/outlook-page-state';
|
||||
|
||||
type OutlookFormState = OutlookConnectionPayload;
|
||||
type OutlookView = 'inbox' | 'sent' | 'calendar' | 'settings';
|
||||
@ -368,6 +369,11 @@ export default function OutlookPage() {
|
||||
sent: false,
|
||||
});
|
||||
const [calendarLoading, setCalendarLoading] = useState(false);
|
||||
const [autoLoadAttempted, setAutoLoadAttempted] = useState<Record<OutlookAutoLoadView, boolean>>({
|
||||
inbox: false,
|
||||
sent: false,
|
||||
calendar: false,
|
||||
});
|
||||
const formDirtyRef = React.useRef(formDirty);
|
||||
|
||||
useEffect(() => {
|
||||
@ -399,6 +405,7 @@ export default function OutlookPage() {
|
||||
}, [t]);
|
||||
|
||||
const loadMailboxPage = useCallback(async (view: OutlookMailboxView, skip = 0) => {
|
||||
setAutoLoadAttempted((current) => ({ ...current, [view]: true }));
|
||||
setMailboxLoading((current) => ({ ...current, [view]: true }));
|
||||
try {
|
||||
const nextPage = await getOutlookMessages(view === 'inbox' ? 'inbox' : 'sentitems', {
|
||||
@ -425,6 +432,7 @@ export default function OutlookPage() {
|
||||
}, [t]);
|
||||
|
||||
const loadCalendarPage = useCallback(async (anchorKey: string) => {
|
||||
setAutoLoadAttempted((current) => ({ ...current, calendar: true }));
|
||||
setCalendarLoading(true);
|
||||
try {
|
||||
const range = buildCalendarRange(anchorKey);
|
||||
@ -461,9 +469,7 @@ export default function OutlookPage() {
|
||||
if (!background) {
|
||||
setStatusLoading(false);
|
||||
}
|
||||
if (nextStatus.configured) {
|
||||
await loadOverview(options?.preserveOverview ?? background);
|
||||
} else {
|
||||
if (!nextStatus.configured) {
|
||||
setOverview(null);
|
||||
setOverviewLoading(false);
|
||||
}
|
||||
@ -523,9 +529,6 @@ export default function OutlookPage() {
|
||||
);
|
||||
const isConfigured = Boolean(status?.configured);
|
||||
const isConnected = Boolean(status?.connected);
|
||||
const inboxCount = overview?.recentInbox.length ?? 0;
|
||||
const sentCount = overview?.recentSent.length ?? 0;
|
||||
const eventCount = overview?.todayEvents.length ?? 0;
|
||||
const overviewWarnings = overview?.warnings || [];
|
||||
const testWarnings = testResult?.warnings || [];
|
||||
const statusPending = statusLoading && !status;
|
||||
@ -538,7 +541,6 @@ export default function OutlookPage() {
|
||||
label: t('设置', 'Settings'),
|
||||
hint: t('配置 Outlook 连接', 'Configure the Outlook connection'),
|
||||
icon: Settings2,
|
||||
count: null,
|
||||
},
|
||||
];
|
||||
}
|
||||
@ -549,31 +551,27 @@ export default function OutlookPage() {
|
||||
label: t('收件箱', 'Inbox'),
|
||||
hint: t('最近接收邮件', 'Recently received mail'),
|
||||
icon: Inbox,
|
||||
count: null,
|
||||
},
|
||||
{
|
||||
id: 'sent' as const,
|
||||
label: t('发件箱', 'Sent'),
|
||||
hint: t('最近发送记录', 'Recently sent messages'),
|
||||
icon: Send,
|
||||
count: null,
|
||||
},
|
||||
{
|
||||
id: 'calendar' as const,
|
||||
label: t('日程', 'Calendar'),
|
||||
hint: t('未来 7 天', 'Next 7 days'),
|
||||
icon: CalendarDays,
|
||||
count: overviewPending ? null : eventCount,
|
||||
},
|
||||
{
|
||||
id: 'settings' as const,
|
||||
label: t('设置', 'Settings'),
|
||||
hint: t('连接与状态', 'Connection and status'),
|
||||
icon: Settings2,
|
||||
count: null,
|
||||
},
|
||||
];
|
||||
}, [eventCount, inboxCount, isConfigured, overviewPending, sentCount, t]);
|
||||
}, [isConfigured, t]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!availableViews.some((view) => view.id === activeView)) {
|
||||
@ -582,20 +580,31 @@ export default function OutlookPage() {
|
||||
}, [activeView, availableViews]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isConfigured) {
|
||||
return;
|
||||
}
|
||||
if (activeView === 'inbox' && !inboxPage && !mailboxLoading.inbox) {
|
||||
const target = nextOutlookAutoLoadTarget({
|
||||
isConfigured,
|
||||
activeView,
|
||||
loaded: {
|
||||
inbox: Boolean(inboxPage),
|
||||
sent: Boolean(sentPage),
|
||||
calendar: Boolean(calendarPage),
|
||||
},
|
||||
loading: {
|
||||
inbox: mailboxLoading.inbox,
|
||||
sent: mailboxLoading.sent,
|
||||
calendar: calendarLoading,
|
||||
},
|
||||
attempted: autoLoadAttempted,
|
||||
});
|
||||
if (target === 'inbox') {
|
||||
void loadMailboxPage('inbox', 0);
|
||||
}
|
||||
if (activeView === 'sent' && !sentPage && !mailboxLoading.sent) {
|
||||
} else if (target === 'sent') {
|
||||
void loadMailboxPage('sent', 0);
|
||||
}
|
||||
if (activeView === 'calendar' && !calendarPage && !calendarLoading) {
|
||||
} else if (target === 'calendar') {
|
||||
void loadCalendarPage(calendarAnchorKey);
|
||||
}
|
||||
}, [
|
||||
activeView,
|
||||
autoLoadAttempted,
|
||||
calendarAnchorKey,
|
||||
calendarLoading,
|
||||
calendarPage,
|
||||
@ -638,6 +647,7 @@ export default function OutlookPage() {
|
||||
setInboxPage(null);
|
||||
setSentPage(null);
|
||||
setCalendarPage(null);
|
||||
setAutoLoadAttempted({ inbox: false, sent: false, calendar: false });
|
||||
setCalendarAnchorKey(toLocalDateKey(new Date()));
|
||||
await loadStatus(true, { forceFormSync: true });
|
||||
setActiveView('inbox');
|
||||
@ -663,6 +673,7 @@ export default function OutlookPage() {
|
||||
setInboxPage(null);
|
||||
setSentPage(null);
|
||||
setCalendarPage(null);
|
||||
setAutoLoadAttempted({ inbox: false, sent: false, calendar: false });
|
||||
setCalendarAnchorKey(toLocalDateKey(new Date()));
|
||||
setActiveView('settings');
|
||||
setFormDirty(false);
|
||||
@ -676,6 +687,7 @@ export default function OutlookPage() {
|
||||
|
||||
const refreshOverview = async () => {
|
||||
await loadStatus(true, { preserveOverview: true });
|
||||
await loadOverview(true);
|
||||
if (activeView === 'inbox') {
|
||||
await loadMailboxPage('inbox', inboxPage?.page.skip ?? 0);
|
||||
} else if (activeView === 'sent') {
|
||||
@ -723,13 +735,6 @@ export default function OutlookPage() {
|
||||
</div>
|
||||
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
{isConfigured ? (
|
||||
<>
|
||||
<TopStat label={t('收件箱', 'Inbox')} value={String(inboxCount)} loading={overviewPending} />
|
||||
<TopStat label={t('发件箱', 'Sent')} value={String(sentCount)} loading={overviewPending} />
|
||||
<TopStat label={t('日程', 'Calendar')} value={String(eventCount)} loading={overviewPending} />
|
||||
</>
|
||||
) : null}
|
||||
<Button variant="outline" size="sm" className="h-11" onClick={() => void refreshOverview()}>
|
||||
<RefreshCw className={`mr-2 h-4 w-4 ${refreshing ? 'animate-spin' : ''}`} />
|
||||
{t('刷新', 'Refresh')}
|
||||
@ -783,9 +788,6 @@ export default function OutlookPage() {
|
||||
</span>
|
||||
<div className="text-left">
|
||||
<p className="text-sm font-semibold">{view.label}</p>
|
||||
{typeof view.count === 'number' ? (
|
||||
<p className="text-xs text-muted-foreground">{t(`${view.count} 条`, `${view.count} items`)}</p>
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -1210,19 +1212,6 @@ function MiniStat({ label, value }: { label: string; value: string }) {
|
||||
);
|
||||
}
|
||||
|
||||
function TopStat({ label, value, loading = false }: { label: string; value: string; loading?: boolean }) {
|
||||
return (
|
||||
<div className="rounded-full border bg-background px-3 py-1 text-sm">
|
||||
<span className="text-muted-foreground">{label}</span>
|
||||
{loading ? (
|
||||
<Skeleton className="ml-2 inline-flex h-4 w-8 align-middle" />
|
||||
) : (
|
||||
<span className="ml-2 font-semibold text-foreground">{value}</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MessageCard({
|
||||
title,
|
||||
icon,
|
||||
|
||||
@ -39,7 +39,7 @@ import { pickAppText } from '@/lib/i18n/core';
|
||||
import { useAppI18n } from '@/lib/i18n/provider';
|
||||
import { useChatStore } from '@/lib/store';
|
||||
import { buildTaskTimelineView } from '@/lib/task-timeline-view';
|
||||
import type { ActiveTask, BackendTask, ChatMessage, FileAttachment, SessionUpdatedEvent, WsEvent } from '@/types';
|
||||
import type { ActiveTask, BackendTask, ChatMessage, FileAttachment, Session, SessionUpdatedEvent, WsEvent } from '@/types';
|
||||
|
||||
function isSessionUpdatedEvent(data: WsEvent | Record<string, unknown>): data is SessionUpdatedEvent {
|
||||
return data.type === 'session_updated' && typeof data.session_id === 'string';
|
||||
@ -149,7 +149,15 @@ export default function ChatPage() {
|
||||
const loadSessions = useCallback(async () => {
|
||||
try {
|
||||
const list = await listSessions();
|
||||
useChatStore.getState().setSessions(list);
|
||||
const store = useChatStore.getState();
|
||||
store.setSessions(list);
|
||||
const currentSessionId = store.sessionId;
|
||||
const isOrphanedGeneratedSession =
|
||||
/^[0-9a-f]{32}$/i.test(currentSessionId) &&
|
||||
!list.some((session) => session.key === currentSessionId);
|
||||
if (isOrphanedGeneratedSession) {
|
||||
store.setSessionId(list[0]?.key || 'web:default');
|
||||
}
|
||||
} catch {
|
||||
// backend may be offline during first render
|
||||
}
|
||||
@ -576,7 +584,9 @@ export default function ChatPage() {
|
||||
});
|
||||
}, []);
|
||||
|
||||
const formatSessionName = (key: string) => {
|
||||
const formatSessionName = (key: string, session?: Session) => {
|
||||
const descriptiveName = session?.title?.trim() || session?.preview?.trim();
|
||||
if (descriptiveName) return descriptiveName;
|
||||
if (key.startsWith('web:')) {
|
||||
const id = key.slice(4);
|
||||
if (id === 'default') return pickAppText(locale, '默认', 'Default');
|
||||
@ -594,7 +604,12 @@ export default function ChatPage() {
|
||||
return key;
|
||||
};
|
||||
|
||||
const archiveTargetSessionName = archiveTargetSessionId ? formatSessionName(archiveTargetSessionId) : '';
|
||||
const archiveTargetSessionName = archiveTargetSessionId
|
||||
? formatSessionName(
|
||||
archiveTargetSessionId,
|
||||
sessions.find((session) => session.key === archiveTargetSessionId)
|
||||
)
|
||||
: '';
|
||||
|
||||
const renderSessionSidebar = (variant: 'desktop' | 'drawer') => (
|
||||
<>
|
||||
@ -618,7 +633,7 @@ export default function ChatPage() {
|
||||
<p className="px-3 py-4 text-sm text-muted-foreground">{pickAppText(locale, '暂无对话记录', 'No chat history yet')}</p>
|
||||
)}
|
||||
{sessions.map((session) => {
|
||||
const sessionName = formatSessionName(session.key);
|
||||
const sessionName = formatSessionName(session.key, session);
|
||||
const isCurrent = session.key === sessionId;
|
||||
|
||||
return (
|
||||
|
||||
@ -130,6 +130,16 @@ export default function SkillsPage() {
|
||||
void load();
|
||||
}, [load]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!drafts.some((draft) => draft.eval_status === 'pending')) return;
|
||||
const timer = window.setInterval(() => {
|
||||
void listSkillDrafts()
|
||||
.then((items) => setDrafts(Array.isArray(items) ? items : []))
|
||||
.catch(() => null);
|
||||
}, 5000);
|
||||
return () => window.clearInterval(timer);
|
||||
}, [drafts]);
|
||||
|
||||
useEffect(() => {
|
||||
setActiveTab(normalizeSkillsTab(searchParams?.get('tab')));
|
||||
}, [searchParams]);
|
||||
@ -825,7 +835,8 @@ function DraftCard({
|
||||
safety?.suggested_fix,
|
||||
].filter(Boolean).join('\n');
|
||||
const safetyBlocksReview = Boolean(safety && (!safety.passed || safety.risk_level === 'critical'));
|
||||
const submitBlocked = draft.status !== 'draft' || safetyBlocksReview;
|
||||
const canRetryEval = draft.status === 'in_review' && draft.eval_status === 'failed';
|
||||
const submitBlocked = (draft.status !== 'draft' && !canRetryEval) || safetyBlocksReview;
|
||||
const rejectBlocked = !REJECTABLE_DRAFT_STATUSES.has(draft.status);
|
||||
const canPublishLabel = publishBlocked
|
||||
? publishBlockReason(draft, t)
|
||||
@ -912,7 +923,7 @@ function DraftCard({
|
||||
<div className="flex flex-wrap gap-2">
|
||||
<Button variant="outline" size="sm" className="h-11" disabled={busy || submitBlocked} onClick={() => void onSubmit()}>
|
||||
<Send className="mr-2 h-4 w-4" />
|
||||
{t('送审', 'Submit')}
|
||||
{canRetryEval ? t('重试评估', 'Retry eval') : t('送审', 'Submit')}
|
||||
</Button>
|
||||
<Button variant="outline" size="sm" className="h-11" disabled={busy || rejectBlocked} onClick={() => void onReject()}>
|
||||
<XCircle className="mr-2 h-4 w-4" />
|
||||
@ -988,7 +999,12 @@ function DraftCard({
|
||||
|
||||
<div className="mt-3 grid min-w-0 gap-3 md:grid-cols-2">
|
||||
<SafetyReportPanel report={safety} />
|
||||
<EvalReportPanel report={evalReport} />
|
||||
<EvalReportPanel
|
||||
report={evalReport}
|
||||
status={draft.eval_status}
|
||||
error={draft.eval_error}
|
||||
progress={draft.eval_progress}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@ -1111,10 +1127,55 @@ function lineDiffSummary(baseContent: string, proposedContent: string): { added:
|
||||
return { added, removed, changed };
|
||||
}
|
||||
|
||||
function EvalReportPanel({ report }: { report?: SkillDraftEvalReport | null }) {
|
||||
function EvalReportPanel({
|
||||
report,
|
||||
status,
|
||||
error,
|
||||
progress,
|
||||
}: {
|
||||
report?: SkillDraftEvalReport | null;
|
||||
status?: SkillDraft['eval_status'];
|
||||
error?: string | null;
|
||||
progress?: SkillDraft['eval_progress'];
|
||||
}) {
|
||||
const { locale } = useAppI18n();
|
||||
const t = (zh: string, en: string) => pickAppText(locale, zh, en);
|
||||
if (!report) {
|
||||
if (status === 'pending') {
|
||||
const completedArms = Math.max(0, Number(progress?.completed_arms || 0));
|
||||
const totalArms = Math.max(0, Number(progress?.total_arms || 0));
|
||||
const progressText = totalArms > 0
|
||||
? t(
|
||||
`评估正在后台运行:已完成 ${completedArms}/${totalArms} 次回放(共 ${progress?.total_cases || 10} 个案例,每个案例包含 baseline 和 candidate)。`,
|
||||
`Evaluation is running: ${completedArms}/${totalArms} replays completed (${progress?.total_cases || 10} cases, each with baseline and candidate).`
|
||||
)
|
||||
: t('评估正在准备案例,完成后会自动更新。', 'Evaluation cases are being prepared and will update automatically.');
|
||||
return (
|
||||
<ReadablePanel
|
||||
icon={<Loader2 className="h-4 w-4 animate-spin" />}
|
||||
title={t('评估报告', 'Eval report')}
|
||||
empty={progressText}
|
||||
/>
|
||||
);
|
||||
}
|
||||
if (status === 'failed') {
|
||||
return (
|
||||
<ReadablePanel
|
||||
icon={<BarChart3 className="h-4 w-4 text-destructive" />}
|
||||
title={t('评估报告', 'Eval report')}
|
||||
empty={`${t('评估失败,可再次点击送审重试。', 'Evaluation failed. Submit again to retry.')} ${error || ''}`.trim()}
|
||||
/>
|
||||
);
|
||||
}
|
||||
if (status === 'not_applicable') {
|
||||
return (
|
||||
<ReadablePanel
|
||||
icon={<BarChart3 className="h-4 w-4" />}
|
||||
title={t('评估报告', 'Eval report')}
|
||||
empty={t('该草稿没有关联学习候选,不运行 replay eval。', 'This draft has no linked learning candidate, so replay eval does not run.')}
|
||||
/>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<ReadablePanel
|
||||
icon={<BarChart3 className="h-4 w-4" />}
|
||||
|
||||
@ -60,7 +60,7 @@ const ACCESS_TOKEN_KEY = 'beaver_access_token';
|
||||
const REFRESH_TOKEN_KEY = 'beaver_refresh_token';
|
||||
export const AUTH_CLEARED_EVENT = 'beaver-auth-cleared';
|
||||
const REQUEST_TIMEOUT_MS = 8000;
|
||||
const OUTLOOK_REQUEST_TIMEOUT_MS = 45000;
|
||||
const OUTLOOK_REQUEST_TIMEOUT_MS = 360000;
|
||||
const SKILL_LEARNING_REQUEST_TIMEOUT_MS = 120000;
|
||||
|
||||
export type PromptLocale = 'zh-Hans' | 'zh-Hant' | 'en';
|
||||
@ -902,10 +902,11 @@ export async function submitSkillDraft(
|
||||
skillName: string,
|
||||
draftId: string,
|
||||
notes: string = ''
|
||||
): Promise<SkillReviewRecord> {
|
||||
): Promise<SkillDraft> {
|
||||
return fetchJSON(`/api/skills/${encodeURIComponent(skillName)}/drafts/${encodeURIComponent(draftId)}/submit`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ notes }),
|
||||
timeoutMs: SKILL_LEARNING_REQUEST_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
28
app-instance/frontend/lib/notification-runtime.test.ts
Normal file
28
app-instance/frontend/lib/notification-runtime.test.ts
Normal file
@ -0,0 +1,28 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import {
|
||||
NOTIFICATION_REFRESH_INTERVAL_MS,
|
||||
scheduleNotificationRefresh,
|
||||
} from '@/lib/notification-runtime';
|
||||
|
||||
describe('notification refresh scheduling', () => {
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('refreshes notifications periodically until cleanup', async () => {
|
||||
const refresh = vi.fn();
|
||||
const cleanup = scheduleNotificationRefresh(refresh);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(NOTIFICATION_REFRESH_INTERVAL_MS);
|
||||
expect(refresh).toHaveBeenCalledTimes(1);
|
||||
|
||||
cleanup();
|
||||
await vi.advanceTimersByTimeAsync(NOTIFICATION_REFRESH_INTERVAL_MS);
|
||||
expect(refresh).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
12
app-instance/frontend/lib/notification-runtime.ts
Normal file
12
app-instance/frontend/lib/notification-runtime.ts
Normal file
@ -0,0 +1,12 @@
|
||||
export const NOTIFICATION_REFRESH_INTERVAL_MS = 5_000;
|
||||
|
||||
export function scheduleNotificationRefresh(
|
||||
refresh: () => void | Promise<void>,
|
||||
intervalMs = NOTIFICATION_REFRESH_INTERVAL_MS,
|
||||
): () => void {
|
||||
const timer = setInterval(() => {
|
||||
void refresh();
|
||||
}, intervalMs);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
16
app-instance/frontend/lib/outlook-counts-visibility.test.ts
Normal file
16
app-instance/frontend/lib/outlook-counts-visibility.test.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('Outlook count presentation', () => {
|
||||
it('does not render summary count chips or tab count labels', () => {
|
||||
const source = readFileSync(
|
||||
resolve(process.cwd(), 'app/(app)/outlook/page.tsx'),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
expect(source).not.toContain('<TopStat');
|
||||
expect(source).not.toContain('view.count');
|
||||
});
|
||||
});
|
||||
29
app-instance/frontend/lib/outlook-page-state.test.ts
Normal file
29
app-instance/frontend/lib/outlook-page-state.test.ts
Normal file
@ -0,0 +1,29 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { nextOutlookAutoLoadTarget } from '@/lib/outlook-page-state';
|
||||
|
||||
describe('nextOutlookAutoLoadTarget', () => {
|
||||
it('loads the active mailbox once when it has not been attempted', () => {
|
||||
expect(
|
||||
nextOutlookAutoLoadTarget({
|
||||
isConfigured: true,
|
||||
activeView: 'inbox',
|
||||
loaded: { inbox: false, sent: false, calendar: false },
|
||||
loading: { inbox: false, sent: false, calendar: false },
|
||||
attempted: { inbox: false, sent: false, calendar: false },
|
||||
})
|
||||
).toBe('inbox');
|
||||
});
|
||||
|
||||
it('does not auto-retry the same mailbox after a failed attempt', () => {
|
||||
expect(
|
||||
nextOutlookAutoLoadTarget({
|
||||
isConfigured: true,
|
||||
activeView: 'inbox',
|
||||
loaded: { inbox: false, sent: false, calendar: false },
|
||||
loading: { inbox: false, sent: false, calendar: false },
|
||||
attempted: { inbox: true, sent: false, calendar: false },
|
||||
})
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
20
app-instance/frontend/lib/outlook-page-state.ts
Normal file
20
app-instance/frontend/lib/outlook-page-state.ts
Normal file
@ -0,0 +1,20 @@
|
||||
export type OutlookAutoLoadView = 'inbox' | 'sent' | 'calendar';
|
||||
|
||||
export interface OutlookAutoLoadState {
|
||||
isConfigured: boolean;
|
||||
activeView: OutlookAutoLoadView | 'settings';
|
||||
loaded: Record<OutlookAutoLoadView, boolean>;
|
||||
loading: Record<OutlookAutoLoadView, boolean>;
|
||||
attempted: Record<OutlookAutoLoadView, boolean>;
|
||||
}
|
||||
|
||||
export function nextOutlookAutoLoadTarget(state: OutlookAutoLoadState): OutlookAutoLoadView | null {
|
||||
if (!state.isConfigured || state.activeView === 'settings') {
|
||||
return null;
|
||||
}
|
||||
const view = state.activeView;
|
||||
if (state.loaded[view] || state.loading[view] || state.attempted[view]) {
|
||||
return null;
|
||||
}
|
||||
return view;
|
||||
}
|
||||
@ -63,6 +63,9 @@ export interface Session {
|
||||
created_at?: string;
|
||||
updated_at?: string;
|
||||
path?: string;
|
||||
source?: string | null;
|
||||
title?: string | null;
|
||||
preview?: string | null;
|
||||
}
|
||||
|
||||
export interface SessionDetail {
|
||||
@ -1028,6 +1031,15 @@ export interface SkillDraft {
|
||||
reviews?: SkillReviewRecord[];
|
||||
safety_report?: SkillDraftSafetyReport | null;
|
||||
eval_report?: SkillDraftEvalReport | null;
|
||||
eval_status?: 'not_started' | 'not_applicable' | 'pending' | 'failed' | 'completed' | 'skipped_provider_unavailable';
|
||||
eval_error?: string | null;
|
||||
eval_progress?: {
|
||||
phase?: 'preparing' | 'replaying' | 'completed' | 'failed';
|
||||
completed_arms?: number;
|
||||
total_arms?: number;
|
||||
completed_cases?: number;
|
||||
total_cases?: number;
|
||||
} | null;
|
||||
}
|
||||
|
||||
export interface SkillReviewRecord {
|
||||
|
||||
Reference in New Issue
Block a user