diff --git a/app-instance/backend/beaver/coordinator/execution/scheduler.py b/app-instance/backend/beaver/coordinator/execution/scheduler.py index 6027599..8516fec 100644 --- a/app-instance/backend/beaver/coordinator/execution/scheduler.py +++ b/app-instance/backend/beaver/coordinator/execution/scheduler.py @@ -84,11 +84,21 @@ class TeamGraphScheduler: **kwargs, ) -> list[NodeRunResult]: results: list[NodeRunResult] = [] + nodes_by_id = {node.node_id: node for node in nodes} for node in nodes: - if any(not item.success for item in results): - results.append(self._blocked(node, results)) + blocking = [ + item + for item in results + if self._blocks_downstream(item, nodes_by_id[item.node_id]) + ] + if blocking: + results.append(self._blocked(node, blocking)) continue - dependency_outputs = {item.node_id: item.output_text for item in results if item.success} + dependency_outputs = { + item.node_id: item.output_text + for item in results + if item.completion_status in {"succeeded", "partial"} + } results.append(await self._run_node(node, dependency_outputs=dependency_outputs, **kwargs)) return results @@ -116,6 +126,7 @@ class TeamGraphScheduler: **kwargs, ) -> list[NodeRunResult]: pending = {node.node_id: node for node in nodes} + nodes_by_id = {node.node_id: node for node in nodes} completed: dict[str, NodeRunResult] = {} ordered: list[NodeRunResult] = [] @@ -123,18 +134,28 @@ class TeamGraphScheduler: blocked_ids = { node_id for node_id, node in pending.items() - if any(dep in completed and not completed[dep].success for dep in node.depends_on) + if any( + dep in completed + and self._blocks_downstream(completed[dep], nodes_by_id[dep]) + for dep in node.depends_on + ) } for node_id in sorted(blocked_ids): node = pending.pop(node_id) result = self._blocked(node, list(completed.values())) completed[node_id] = result ordered.append(result) + if blocked_ids: + continue ready = [ node for node in pending.values() - if all(dep in completed and completed[dep].success for dep in node.depends_on) + if all( + dep in completed + and not self._blocks_downstream(completed[dep], nodes_by_id[dep]) + for dep in node.depends_on + ) ] if not ready: if pending: @@ -196,6 +217,17 @@ class TeamGraphScheduler: expected_output=node.expected_output, node_id=node.node_id, dependency_outputs=dict(dependency_outputs), + input_contract=dict(node.input_contract), + output_contract=dict(node.output_contract), + allowed_tool_names=( + None if node.allowed_tool_names is None else list(node.allowed_tool_names) + ), + required_evidence=list(node.required_evidence), + evidence_contract=dict(node.evidence_contract), + validation_rules=list(node.validation_rules), + required_for_completion=node.required_for_completion, + block_downstream_on_partial=node.block_downstream_on_partial, + max_tool_iterations=node.max_tool_iterations, ) node_provider_bundle = provider_bundle_factory(node) if provider_bundle_factory is not None else provider_bundle return await self.runner.run( @@ -213,8 +245,17 @@ class TeamGraphScheduler: output_text="", finish_reason="error", error=str(exc), + completion_status="failed", ) + @staticmethod + def _blocks_downstream(result: NodeRunResult, node: ExecutionNode) -> bool: + if result.completion_status in {"failed", "blocked"}: + return True + if result.completion_status == "partial": + return node.block_downstream_on_partial + return not result.success + @staticmethod def _merge_pinned(parent: list[str], local: list[str]) -> list[str]: result: list[str] = [] @@ -245,6 +286,7 @@ class TeamGraphScheduler: output_text="", finish_reason="blocked", error=f"Blocked by failed dependency: {detail}", + completion_status="blocked", ) @staticmethod diff --git a/app-instance/backend/beaver/coordinator/local.py b/app-instance/backend/beaver/coordinator/local.py index f225e0a..51583ee 100644 --- a/app-instance/backend/beaver/coordinator/local.py +++ b/app-instance/backend/beaver/coordinator/local.py @@ -6,7 +6,7 @@ from uuid import uuid4 from beaver.engine import AgentLoop from beaver.engine.providers import ProviderBundle -from beaver.tasks.evidence import EvidenceBuilder +from beaver.tasks.evidence import EvidenceBuilder, evaluate_node_evidence from .models import DelegationEnvelope, NodeRunResult @@ -54,6 +54,8 @@ class LocalAgentRunner: task_mode=bool(envelope.parent_task_id), pinned_skill_names=envelope.inherited_pinned_skills, pinned_skill_contexts=envelope.inherited_pinned_skill_contexts, + allowed_tool_names=envelope.allowed_tool_names, + max_tool_iterations=envelope.max_tool_iterations, allow_candidate_generation=allow_candidate_generation, ) loaded = target_loop.boot() @@ -63,7 +65,23 @@ class LocalAgentRunner: result.output_text, result.finish_reason, ) - success = result.finish_reason == "stop" + evidence_gaps = evaluate_node_evidence( + evidence, + envelope.required_evidence, + result.output_text, + ) + run_succeeded = result.finish_reason == "stop" + if not run_succeeded: + completion_status = "failed" + elif evidence_gaps: + completion_status = "partial" + else: + completion_status = "succeeded" + success = completion_status == "succeeded" + if completion_status == "partial": + error = "; ".join(evidence_gaps) + else: + error = None if success else (result.output_text or result.finish_reason) return NodeRunResult( node_id=envelope.node_id or envelope.agent.name, success=success, @@ -71,8 +89,10 @@ class LocalAgentRunner: run_id=result.run_id, session_id=result.session_id, finish_reason=result.finish_reason, - error=None if success else (result.output_text or result.finish_reason), + error=error, evidence=evidence, + completion_status=completion_status, + evidence_gaps=evidence_gaps, ) @staticmethod diff --git a/app-instance/backend/beaver/coordinator/models.py b/app-instance/backend/beaver/coordinator/models.py index f54f036..aa45ae6 100644 --- a/app-instance/backend/beaver/coordinator/models.py +++ b/app-instance/backend/beaver/coordinator/models.py @@ -51,6 +51,15 @@ class DelegationEnvelope: expected_output: str | None = None node_id: str | None = None dependency_outputs: dict[str, str] = field(default_factory=dict) + input_contract: dict[str, Any] = field(default_factory=dict) + output_contract: dict[str, Any] = field(default_factory=dict) + allowed_tool_names: list[str] | None = None + required_evidence: list[str] = field(default_factory=list) + evidence_contract: dict[str, Any] = field(default_factory=dict) + validation_rules: list[str] = field(default_factory=list) + required_for_completion: bool = True + block_downstream_on_partial: bool = False + max_tool_iterations: int | None = None @dataclass(slots=True) @@ -65,6 +74,15 @@ class ExecutionNode: inherited_pinned_skill_contexts: list["SkillContext"] = field(default_factory=list) constraints: list[str] = field(default_factory=list) expected_output: str | None = None + input_contract: dict[str, Any] = field(default_factory=dict) + output_contract: dict[str, Any] = field(default_factory=dict) + allowed_tool_names: list[str] | None = None + required_evidence: list[str] = field(default_factory=list) + evidence_contract: dict[str, Any] = field(default_factory=dict) + validation_rules: list[str] = field(default_factory=list) + required_for_completion: bool = True + block_downstream_on_partial: bool = False + max_tool_iterations: int | None = None @dataclass(slots=True) @@ -74,7 +92,7 @@ class ExecutionGraph: strategy: TeamStrategy nodes: list[ExecutionNode] - def validate(self) -> None: + def validate(self, *, max_depth: int | None = None) -> None: if self.strategy not in {"sequence", "parallel", "dag"}: raise NotImplementedError(f"Team strategy {self.strategy!r} is reserved but not implemented in v1") if not self.nodes: @@ -91,19 +109,25 @@ class ExecutionGraph: visited: set[str] = set() deps = {node.node_id: list(node.depends_on) for node in self.nodes} - def visit(node_id: str) -> None: + def visit(node_id: str) -> int: if node_id in visited: - return + return depths[node_id] if node_id in visiting: raise ValueError(f"ExecutionGraph has cyclic or unresolved dependencies involving {node_id!r}") visiting.add(node_id) + depth = 1 for dep in deps[node_id]: - visit(dep) + depth = max(depth, visit(dep) + 1) visiting.remove(node_id) visited.add(node_id) + depths[node_id] = depth + return depth + depths: dict[str, int] = {} for node_id in node_ids: - visit(node_id) + depth = visit(node_id) + if max_depth is not None and depth > max_depth: + raise ValueError(f"ExecutionGraph exceeds max depth {max_depth}") @dataclass(slots=True) @@ -118,6 +142,8 @@ class NodeRunResult: finish_reason: str = "stop" error: str | None = None evidence: "RunEvidence | None" = None + completion_status: str = "succeeded" + evidence_gaps: list[str] = field(default_factory=list) def to_dict(self) -> dict[str, Any]: return { @@ -129,6 +155,8 @@ class NodeRunResult: "finish_reason": self.finish_reason, "error": self.error, "evidence": self.evidence.to_dict() if self.evidence is not None else None, + "completion_status": self.completion_status, + "evidence_gaps": list(self.evidence_gaps), } diff --git a/app-instance/backend/beaver/engine/context/builder.py b/app-instance/backend/beaver/engine/context/builder.py index c229635..b7775c1 100644 --- a/app-instance/backend/beaver/engine/context/builder.py +++ b/app-instance/backend/beaver/engine/context/builder.py @@ -48,6 +48,8 @@ class SkillContext: content_hash: str = "" activation_reason: str = "selected" tool_hints: list[str] = field(default_factory=list) + team_template: dict[str, Any] | None = None + team_template_warnings: list[str] = field(default_factory=list) @dataclass(slots=True) diff --git a/app-instance/backend/beaver/engine/loader.py b/app-instance/backend/beaver/engine/loader.py index d51666b..dfd626b 100644 --- a/app-instance/backend/beaver/engine/loader.py +++ b/app-instance/backend/beaver/engine/loader.py @@ -317,7 +317,10 @@ class EngineLoader: draft_service=draft_service, ) task_service = self._task_service or TaskService(workspace / "tasks") - task_execution_planner = self._task_execution_planner or TaskExecutionPlanner(task_skill_resolver=task_skill_resolver) + task_execution_planner = self._task_execution_planner or TaskExecutionPlanner( + task_skill_resolver=task_skill_resolver, + tool_registry=tool_registry, + ) mcp_manager = MCPConnectionManager( self.config.tools.mcp_servers, authz_config=self.config.authz, diff --git a/app-instance/backend/beaver/engine/loop.py b/app-instance/backend/beaver/engine/loop.py index 588421c..e9a5b7f 100644 --- a/app-instance/backend/beaver/engine/loop.py +++ b/app-instance/backend/beaver/engine/loop.py @@ -8,6 +8,7 @@ import os import re from dataclasses import dataclass, field from datetime import datetime, timezone +from time import perf_counter from typing import Any from uuid import uuid4 from zoneinfo import ZoneInfo, ZoneInfoNotFoundError @@ -81,6 +82,49 @@ class _DirectRunRequest: future: asyncio.Future[AgentRunResult] +@dataclass(slots=True) +class _WebSearchLoopGuard: + low_quality_limit: int = 3 + _low_quality_count: int = 0 + _last_query: str = "" + + def observe_result(self, tool_name: str, content: str) -> dict[str, str] | None: + if tool_name != "web_search": + self._reset() + return None + try: + payload = json.loads(content) + except Exception: + self._reset() + return None + + query = str(payload.get("query") or self._last_query or "").strip() + is_low_quality = payload.get("success") is False or payload.get("quality") == "low" + if not is_low_quality: + self._reset() + self._last_query = query + return None + + self._low_quality_count += 1 + self._last_query = query + if self._low_quality_count < self.low_quality_limit: + return None + + query_text = f" for query '{query}'" if query else "" + return { + "finish_reason": "web_search_low_quality_budget", + "message": ( + "Web search returned low-quality or failed results repeatedly" + f"{query_text}. Stop retrying query variants; use confirmed sources already found, " + "state uncertainty clearly, and mark missing fields as N/A." + ), + } + + def _reset(self) -> None: + self._low_quality_count = 0 + self._last_query = "" + + class AgentLoop: """Single execution kernel shared by root agents and delegated agents.""" @@ -240,6 +284,7 @@ class AgentLoop: thinking_enabled: bool | None = None, include_skill_assembly: bool = True, include_tools: bool = True, + allowed_tool_names: list[str] | None = None, max_tool_iterations: int | None = None, provider_bundle: ProviderBundle | None = None, parent_session_id: str | None = None, @@ -252,6 +297,7 @@ class AgentLoop: allow_candidate_generation: bool = False, intent_agent_decision: dict[str, Any] | None = None, channel_identity: ChannelIdentity | None = None, + pre_run_latency_ms: dict[str, float] | None = None, ) -> AgentRunResult: """跑通最小 direct run 主链。 @@ -292,6 +338,7 @@ class AgentLoop: thinking_enabled=thinking_enabled, include_skill_assembly=include_skill_assembly, include_tools=include_tools, + allowed_tool_names=allowed_tool_names, max_tool_iterations=max_tool_iterations, provider_bundle=provider_bundle, parent_session_id=parent_session_id, @@ -304,6 +351,7 @@ class AgentLoop: allow_candidate_generation=allow_candidate_generation, intent_agent_decision=intent_agent_decision, channel_identity=channel_identity, + pre_run_latency_ms=pre_run_latency_ms, ) async def _process_direct_impl( @@ -332,6 +380,7 @@ class AgentLoop: thinking_enabled: bool | None = None, include_skill_assembly: bool = True, include_tools: bool = True, + allowed_tool_names: list[str] | None = None, max_tool_iterations: int | None = None, provider_bundle: ProviderBundle | None = None, parent_session_id: str | None = None, @@ -344,6 +393,7 @@ class AgentLoop: allow_candidate_generation: bool = False, intent_agent_decision: dict[str, Any] | None = None, channel_identity: ChannelIdentity | None = None, + pre_run_latency_ms: dict[str, float] | None = None, ) -> AgentRunResult: """真正执行一轮 direct run 的内部实现。 @@ -353,8 +403,25 @@ class AgentLoop: - 这样才能保证 run 模式下外部不能绕过队列直接执行 """ + run_perf_started = perf_counter() + latency_ms = self._initial_latency_ms(pre_run_latency_ms) + + def add_latency(key: str, started_at: float) -> None: + latency_ms[key] = latency_ms.get(key, 0.0) + (perf_counter() - started_at) * 1000 + loaded = self.boot() session_manager = self._require_loaded("session_manager") + + def session_write(callable_obj: Any, *args: Any, **kwargs: Any) -> Any: + started_at = perf_counter() + try: + return callable_obj(*args, **kwargs) + finally: + add_latency("session_write_ms", started_at) + + def append_message(session_id_value: str, **kwargs: Any) -> int: + return session_write(session_manager.append_message, session_id_value, **kwargs) + memory_service = self._require_loaded("memory_service") context_builder = self._require_loaded("context_builder") tool_registry = self._require_loaded("tool_registry") @@ -365,9 +432,13 @@ class AgentLoop: skill_assembler = self._require_loaded("skill_assembler") skill_learning_service = self._require_loaded("skill_learning_service") mcp_manager = getattr(loaded, "mcp_manager", None) - if mcp_manager is not None: - loaded.mcp_report = await mcp_manager.connect_all(tool_registry) - loaded.tools = [spec.name for spec in tool_registry.list_specs()] + if include_tools and mcp_manager is not None: + started_at = perf_counter() + try: + loaded.mcp_report = await mcp_manager.connect_all(tool_registry) + loaded.tools = [spec.name for spec in tool_registry.list_specs()] + finally: + add_latency("mcp_ms", started_at) config = loaded.config configured_provider = config.resolve_provider_target(model=model, provider_name=provider_name) @@ -393,13 +464,15 @@ class AgentLoop: memory_snapshot = memory_service.capture_snapshot_for_run() if parent_session_id: - session_manager.ensure_session( + session_write( + session_manager.ensure_session, parent_session_id, source="unknown", model=resolved_model, user_id=user_id, ) - session_manager.ensure_session( + session_write( + session_manager.ensure_session, resolved_session_id, source=source, model=resolved_model, @@ -407,7 +480,7 @@ class AgentLoop: user_id=user_id, parent_session_id=parent_session_id, ) - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -435,7 +508,7 @@ class AgentLoop: user_id=user_id, ) if intent_agent_decision: - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -480,35 +553,39 @@ class AgentLoop: *(pinned_skill_contexts or []), *self._load_pinned_skill_contexts(skills_loader, pinned_skill_names or []), ] - if not include_skill_assembly: - activated_skills = self._merge_skill_contexts(pinned_skills, []) - else: - skill_query = skill_selection_context or task - assembled_skills = await skill_assembler.assemble( - task_description=skill_query, - provider=skill_selector_provider, - model=skill_selector_model, - embedding_runtime=bundle.embedding_runtime, - thinking_enabled=thinking_enabled, - ) - for interaction in getattr(assembled_skills, "llm_interactions", []) or []: - session_manager.append_message( - resolved_session_id, - run_id=resolved_run_id, - role="system", - event_type="skill_assembler_llm_interaction_snapshotted", - event_payload=interaction, - content=json.dumps(interaction, ensure_ascii=False, default=str), - context_visible=False, - source=source, - title=title, + started_at = perf_counter() + try: + if not include_skill_assembly: + activated_skills = self._merge_skill_contexts(pinned_skills, []) + else: + skill_query = skill_selection_context or task + assembled_skills = await skill_assembler.assemble( + task_description=skill_query, + provider=skill_selector_provider, model=skill_selector_model, - user_id=user_id, + embedding_runtime=bundle.embedding_runtime, + thinking_enabled=thinking_enabled, ) - activated_skills = self._merge_skill_contexts( - pinned_skills, - assembled_skills.activated_skills, - ) + for interaction in getattr(assembled_skills, "llm_interactions", []) or []: + append_message( + resolved_session_id, + run_id=resolved_run_id, + role="system", + event_type="skill_assembler_llm_interaction_snapshotted", + event_payload=interaction, + content=json.dumps(interaction, ensure_ascii=False, default=str), + context_visible=False, + source=source, + title=title, + model=skill_selector_model, + user_id=user_id, + ) + activated_skills = self._merge_skill_contexts( + pinned_skills, + assembled_skills.activated_skills, + ) + finally: + add_latency("skill_assembly_ms", started_at) skill_activation_messages = context_builder.build_skill_activation_messages( activated_skills ) @@ -527,7 +604,7 @@ class AgentLoop: ] if skill_activation_messages or activated_receipts: - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -544,19 +621,26 @@ class AgentLoop: user_id=user_id, ) - if not include_tools: - selected_tool_specs = [] - else: - selected_tool_specs = await tool_assembler.assemble( - task_description=task, - registry=tool_registry, - skills_loader=skills_loader, - activated_skills=activated_skills, - embedding_runtime=bundle.embedding_runtime, - top_k=10, - ) + started_at = perf_counter() + try: + if not include_tools: + selected_tool_specs = [] + else: + selected_tool_specs = await tool_assembler.assemble( + task_description=task, + registry=tool_registry, + skills_loader=skills_loader, + activated_skills=activated_skills, + embedding_runtime=bundle.embedding_runtime, + top_k=10, + ) + if allowed_tool_names is not None: + allowed = set(allowed_tool_names) + selected_tool_specs = [spec for spec in selected_tool_specs if spec.name in allowed] + finally: + add_latency("tool_assembly_ms", started_at) tool_schemas = tool_registry.export_selected_provider_schemas(selected_tool_specs) - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -573,37 +657,41 @@ class AgentLoop: user_id=user_id, ) - build_input = ContextBuildInput( - base_system_prompt=self.profile.system_prompt, - prompt_locale=prompt_locale, - history=session_manager.get_history( - resolved_session_id, - max_messages=max(1, self.profile.max_context_messages), - ), - current_user_input=task, - memory_snapshot=memory_snapshot, - activated_skills=activated_skills, - session_context=SessionContext( - session_id=resolved_session_id, - source=source, - model=resolved_model, - user_id=user_id, - channel=channel_identity.channel_id if channel_identity else None, - channel_kind=channel_identity.kind if channel_identity else None, - account_id=channel_identity.account_id if channel_identity else None, - peer_id=channel_identity.peer_id if channel_identity else None, - peer_type=channel_identity.peer_type if channel_identity else None, - chat_id=channel_identity.peer_id if channel_identity else None, - thread_id=channel_identity.thread_id if channel_identity else None, - parent_session_id=parent_session_id, - ), - runtime_context=self._current_runtime_context(), - execution_context=execution_context, - extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT], - ) - context_result = context_builder.build_messages(build_input) + started_at = perf_counter() + try: + build_input = ContextBuildInput( + base_system_prompt=self.profile.system_prompt, + prompt_locale=prompt_locale, + history=session_manager.get_history( + resolved_session_id, + max_messages=max(1, self.profile.max_context_messages), + ), + current_user_input=task, + memory_snapshot=memory_snapshot, + activated_skills=activated_skills, + session_context=SessionContext( + session_id=resolved_session_id, + source=source, + model=resolved_model, + user_id=user_id, + channel=channel_identity.channel_id if channel_identity else None, + channel_kind=channel_identity.kind if channel_identity else None, + account_id=channel_identity.account_id if channel_identity else None, + peer_id=channel_identity.peer_id if channel_identity else None, + peer_type=channel_identity.peer_type if channel_identity else None, + chat_id=channel_identity.peer_id if channel_identity else None, + thread_id=channel_identity.thread_id if channel_identity else None, + parent_session_id=parent_session_id, + ), + runtime_context=self._current_runtime_context(), + execution_context=execution_context, + extra_sections=[TOOL_FAILURE_GUIDANCE_PROMPT], + ) + context_result = context_builder.build_messages(build_input) + finally: + add_latency("context_build_ms", started_at) if skill_selection_context: - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -621,8 +709,8 @@ class AgentLoop: model=resolved_model, user_id=user_id, ) - session_manager.update_system_prompt(resolved_session_id, context_result.system_prompt) - session_manager.append_message( + session_write(session_manager.update_system_prompt, resolved_session_id, context_result.system_prompt) + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -639,7 +727,7 @@ class AgentLoop: model=resolved_model, user_id=user_id, ) - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="user", @@ -676,6 +764,9 @@ class AgentLoop: "session_id": resolved_session_id, "task_id": task_id, "run_id": resolved_run_id, + "allowed_tool_names": ( + None if allowed_tool_names is None else list(allowed_tool_names) + ), }, ) @@ -683,6 +774,7 @@ class AgentLoop: final_finish_reason = "stop" final_provider_name = bundle.main_runtime.provider_name final_model = bundle.main_runtime.model + web_search_loop_guard = _WebSearchLoopGuard() while True: chat_kwargs: dict[str, Any] = { @@ -713,7 +805,7 @@ class AgentLoop: "temperature": resolved_temperature, "thinking_enabled": thinking_enabled, } - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -726,14 +818,18 @@ class AgentLoop: model=final_model, user_id=user_id, ) - response = await provider.chat(**chat_kwargs) + started_at = perf_counter() + try: + response = await provider.chat(**chat_kwargs) + finally: + add_latency("llm_ms", started_at) final_provider_name = response.provider_name or final_provider_name final_model = response.model or final_model final_usage = self._merge_usage(final_usage, response.usage or {}) - self._record_usage(session_manager, resolved_session_id, response.usage or {}) + session_write(self._record_usage, session_manager, resolved_session_id, response.usage or {}) assistant_tool_calls = self._serialize_tool_calls(response.tool_calls) - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="assistant", @@ -764,17 +860,21 @@ class AgentLoop: break if iterations >= resolved_max_tool_iterations: - finalized = await self._finalize_after_tool_limit( - provider=provider, - messages=messages, - model=final_model, - max_tokens=resolved_max_tokens, - temperature=resolved_temperature, - thinking_enabled=thinking_enabled, - ) + started_at = perf_counter() + try: + finalized = await self._finalize_after_tool_limit( + provider=provider, + messages=messages, + model=final_model, + max_tokens=resolved_max_tokens, + temperature=resolved_temperature, + thinking_enabled=thinking_enabled, + ) + finally: + add_latency("llm_ms", started_at) final_text = finalized or RAW_TOOL_CALL_FALLBACK final_finish_reason = "max_tool_iterations_finalized" if finalized else "max_tool_iterations" - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="assistant", @@ -800,9 +900,26 @@ class AgentLoop: reasoning_content=response.reasoning_content, ) iterations += 1 - for tool_call in response.tool_calls: - result = await effective_tool_executor.execute_tool_call(tool_call, context=tool_context) - session_manager.append_message( + started_at = perf_counter() + try: + if self._can_run_tool_calls_concurrently(response.tool_calls, tool_registry): + tool_results = await asyncio.gather( + *( + effective_tool_executor.execute_tool_call(tool_call, context=tool_context) + for tool_call in response.tool_calls + ) + ) + else: + tool_results = [] + for tool_call in response.tool_calls: + tool_results.append( + await effective_tool_executor.execute_tool_call(tool_call, context=tool_context) + ) + finally: + add_latency("tool_ms", started_at) + web_guard_decision: dict[str, str] | None = None + for tool_call, result in zip(response.tool_calls, tool_results, strict=True): + append_message( resolved_session_id, run_id=resolved_run_id, role="tool", @@ -825,8 +942,30 @@ class AgentLoop: tool_name=result.tool_name, result=result.content, ) + if web_guard_decision is None: + web_guard_decision = web_search_loop_guard.observe_result(result.tool_name, result.content) + if web_guard_decision is not None: + final_text = web_guard_decision["message"] + final_finish_reason = web_guard_decision["finish_reason"] + append_message( + resolved_session_id, + run_id=resolved_run_id, + role="assistant", + event_type="assistant_message_added", + event_payload={"task_id": task_id} if task_id else None, + content=final_text, + finish_reason=final_finish_reason, + source=source, + title=title, + model=final_model, + user_id=user_id, + ) + context_builder.add_assistant_message(messages, content=final_text) + break - session_manager.append_message( + final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started) + final_usage_with_latency = self._usage_with_latency(final_usage, final_latency_ms) + append_message( resolved_session_id, run_id=resolved_run_id, role="system", @@ -837,6 +976,7 @@ class AgentLoop: "task_id": task_id, "task_mode": task_mode, "attempt_index": attempt_index, + "latency_ms": final_latency_ms, }, content=final_text, finish_reason=final_finish_reason, @@ -869,12 +1009,12 @@ class AgentLoop: tool_iterations=iterations, provider_name=final_provider_name, model=final_model, - usage=final_usage, + usage=final_usage_with_latency, task_id=task_id, ) except Exception as exc: if not user_message_recorded: - session_manager.append_message( + append_message( resolved_session_id, run_id=resolved_run_id, role="user", @@ -885,6 +1025,7 @@ class AgentLoop: model=resolved_model, user_id=user_id, ) + final_latency_ms = self._final_latency_ms(latency_ms, run_perf_started) result = self._build_error_result( session_manager=session_manager, session_id=resolved_session_id, @@ -896,8 +1037,9 @@ class AgentLoop: message=f"Run failed before completion: {exc}", tool_iterations=iterations, provider_name=final_provider_name, - usage=final_usage, + usage=self._usage_with_latency(final_usage, final_latency_ms), task_id=task_id, + latency_ms=final_latency_ms, ) self._record_run_receipts( skill_learning_service=skill_learning_service, @@ -1032,6 +1174,80 @@ class AgentLoop: ) return payload + @staticmethod + def _can_run_tool_calls_concurrently(tool_calls: list[Any], tool_registry: Any) -> bool: + if len(tool_calls) < 2: + return False + return all(AgentLoop._is_read_only_tool_call(tool_call, tool_registry) for tool_call in tool_calls) + + @staticmethod + def _is_read_only_tool_call(tool_call: Any, tool_registry: Any) -> bool: + name = AgentLoop._tool_call_name(tool_call) + if not name: + return False + tool = tool_registry.get(name) if tool_registry is not None else None + if tool is None: + return False + spec = getattr(tool, "spec", None) + toolset = str(getattr(spec, "toolset", "") or "").lower() + metadata = getattr(spec, "metadata", {}) or {} + if metadata.get("read_only") is True: + return True + if metadata.get("mutates") or metadata.get("sensitive"): + return False + return name in { + "list_directory", + "read_file", + "search_files", + "session_search", + "skills_list", + "skill_view", + "user_files_list", + "user_files_read", + "web_fetch", + "web_search", + } and toolset in {"filesystem", "session", "skills", "user_files", "web"} + + @staticmethod + def _tool_call_name(tool_call: Any) -> str: + if not isinstance(tool_call, dict): + return str(getattr(tool_call, "name", "") or "") + function = tool_call.get("function") + if isinstance(function, dict): + return str(function.get("name") or "") + return str(tool_call.get("name") or "") + + @staticmethod + def _initial_latency_ms(pre_run_latency_ms: dict[str, float] | None) -> dict[str, float]: + latency = { + "router_ms": 0.0, + "mcp_ms": 0.0, + "skill_assembly_ms": 0.0, + "tool_assembly_ms": 0.0, + "context_build_ms": 0.0, + "llm_ms": 0.0, + "tool_ms": 0.0, + "session_write_ms": 0.0, + "total_ms": 0.0, + } + if pre_run_latency_ms: + for key, value in pre_run_latency_ms.items(): + if isinstance(value, (int, float)): + latency[str(key)] = latency.get(str(key), 0.0) + float(value) + return latency + + @staticmethod + def _final_latency_ms(latency_ms: dict[str, float], run_perf_started: float) -> dict[str, float]: + finalized = dict(latency_ms) + finalized["total_ms"] = finalized.get("total_ms", 0.0) + (perf_counter() - run_perf_started) * 1000 + return {key: round(max(0.0, float(value)), 3) for key, value in finalized.items()} + + @staticmethod + def _usage_with_latency(usage: dict[str, Any], latency_ms: dict[str, float]) -> dict[str, Any]: + payload = dict(usage) + payload["latency_ms"] = dict(latency_ms) + return payload + @staticmethod def _record_usage(session_manager: Any, session_id: str, usage: dict[str, Any]) -> None: """把 provider usage 映射到 session usage 字段。 @@ -1079,6 +1295,7 @@ class AgentLoop: provider_name: str | None, usage: dict[str, Any], task_id: str | None = None, + latency_ms: dict[str, float] | None = None, ) -> AgentRunResult: """把主链中的未处理异常收口成可追踪的 assistant error turn。""" @@ -1104,6 +1321,7 @@ class AgentLoop: "tool_iterations": tool_iterations, "provider_name": provider_name, "task_id": task_id, + "latency_ms": latency_ms or {}, }, content=message, finish_reason="error", diff --git a/app-instance/backend/beaver/interfaces/web/app.py b/app-instance/backend/beaver/interfaces/web/app.py index f1318f7..daffc23 100644 --- a/app-instance/backend/beaver/interfaces/web/app.py +++ b/app-instance/backend/beaver/interfaces/web/app.py @@ -43,6 +43,7 @@ from beaver.services.user_files import ( UserFileNotFoundError, UserFilePathError, UserFileSizeError, + UserFileStorageError, UserFileService, ) from beaver.services.user_file_resolver import ( @@ -644,6 +645,8 @@ def create_app( return HTTPException(status_code=400, detail=str(exc) or "Invalid path") if isinstance(exc, UserFileSizeError): return HTTPException(status_code=413, detail=str(exc) or "File too large") + if isinstance(exc, UserFileStorageError): + return HTTPException(status_code=503, detail=str(exc) or "User file storage is unavailable") if isinstance(exc, UserFileConfigurationError): return HTTPException(status_code=503, detail=str(exc) or "User file storage is not configured") return HTTPException(status_code=400, detail=str(exc) or "User file operation failed") @@ -1327,6 +1330,7 @@ def create_app( "runs": runs, } ) + sessions.sort(key=lambda item: item.get("updated_at") or item.get("created_at") or "", reverse=True) return {"sessions": sessions} @app.post("/api/sessions/{session_id:path}/archive") @@ -3166,6 +3170,11 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[ title = getattr(started, "title", None) if title is None: title = source or "run" + latency_ms = None + if completed is not None and isinstance(completed.event_payload, dict): + raw_latency = completed.event_payload.get("latency_ms") + latency_ms = raw_latency if isinstance(raw_latency, dict) else None + sorted_records = sorted(records, key=lambda item: item.timestamp or 0, reverse=True) runs.append( { "run_id": run_id, @@ -3181,10 +3190,15 @@ def _debug_runs_for_session(session_manager: Any, session_id: str) -> list[dict[ "started_at": _iso_from_timestamp(started.timestamp if started is not None else None), "ended_at": _iso_from_timestamp(completed.timestamp) if completed is not None else None, "finish_reason": completed.finish_reason if completed is not None else None, - "events": [_debug_event_to_dict(item) for item in records], + "latency_ms": latency_ms or {}, + "events": [_debug_event_to_dict(item) for item in sorted_records], } ) - return runs + return sorted( + runs, + key=lambda item: item.get("ended_at") or item.get("started_at") or "", + reverse=True, + ) def _debug_event_to_dict(record: Any) -> dict[str, Any]: diff --git a/app-instance/backend/beaver/services/agent_service.py b/app-instance/backend/beaver/services/agent_service.py index 7fcfc98..c671e38 100644 --- a/app-instance/backend/beaver/services/agent_service.py +++ b/app-instance/backend/beaver/services/agent_service.py @@ -14,24 +14,20 @@ from __future__ import annotations import asyncio from pathlib import Path +from time import perf_counter from typing import Any from uuid import uuid4 -from beaver.coordinator.models import ExecutionNode, TeamRunResult from beaver.engine import AgentLoop, AgentProfile, AgentRunResult, EngineLoader from beaver.engine.providers import make_provider_bundle from beaver.foundation.events import InboundMessage, OutboundMessage from beaver.foundation.models import CronJob, CronRunRecord from beaver.prompts.main_agent import normalize_main_agent_prompt_locale from beaver.tasks import ( - EvidenceBuilder, MainAgentRouter, - RunEvidence, - TaskEvidencePacket, - TaskExecutionPlan, TaskRecord, - render_task_evidence, ) +from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator from beaver.tasks.service import normalize_acceptance_type @@ -594,15 +590,22 @@ class AgentService: router_provider = provider_bundle.auxiliary_provider or provider_bundle.main_provider router_runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime active_task = task_service.get_latest_open_task(session_id) - decision = await self._main_agent_router.classify( - message, - active_task=active_task, - provider=router_provider, - model=getattr(router_runtime, "model", None), - recent_messages=session_manager.get_messages_as_conversation(session_id), - intent_skill=self._load_intent_agent_skill(loaded), - thinking_enabled=kwargs.get("thinking_enabled"), - ) + router_started = perf_counter() + try: + decision = await self._main_agent_router.classify( + message, + active_task=active_task, + provider=router_provider, + model=getattr(router_runtime, "model", None), + recent_messages=session_manager.get_messages_as_conversation(session_id), + intent_skill=self._load_intent_agent_skill(loaded), + thinking_enabled=kwargs.get("thinking_enabled"), + ) + finally: + kwargs["pre_run_latency_ms"] = self._merge_latency_ms( + kwargs.get("pre_run_latency_ms"), + {"router_ms": (perf_counter() - router_started) * 1000}, + ) kwargs["intent_agent_decision"] = self._intent_decision_payload( decision, active_task=active_task, @@ -751,216 +754,19 @@ class AgentService: task: TaskRecord, ) -> AgentRunResult: loaded = self.create_loop().boot() - task_service = self._require_loaded(loaded, "task_service") - task_execution_planner = self._require_loaded(loaded, "task_execution_planner") - session_manager = self._require_loaded(loaded, "session_manager") - - base_execution_context = kwargs.get("execution_context") - prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale") - output_language_instruction = self._output_language_instruction(prompt_locale) - provider_bundle = kwargs.get("provider_bundle") or self._make_provider_bundle_for_task(loaded, kwargs) - kwargs = dict(kwargs) - team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None) - kwargs["provider_bundle"] = provider_bundle - - attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1 - task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index) - plan = await task_execution_planner.plan( + return await self._build_task_attempt_orchestrator(loaded).run( + message=message, + runner=runner, + kwargs=kwargs, task=task, - user_message=message, - attempt_index=attempt_index, - provider_bundle=provider_bundle, - ) - self._append_task_observation( - session_manager, - task.session_id, - event_type="task_execution_planned", - payload={ - "task_id": task.task_id, - "attempt_index": attempt_index, - **plan.to_event_payload(), - }, - ) - team_summaries: list[str] = [] - team_execution_context = "" - team_result: TeamRunResult | None = None - if plan.is_team: - team_result, team_error = await self._run_team_for_task( - plan, - task=task, - parent_session_id=kwargs["session_id"], - provider_bundle_factory=team_provider_bundle_factory - or self._build_team_provider_bundle_factory(loaded, kwargs), - ) - if team_result is not None: - team_summaries = [self._team_summary_for_validation(team_result)] - team_packet = TaskEvidencePacket( - task_id=task.task_id, - attempt_index=attempt_index, - main_run=None, - team_runs=self._team_run_evidence(team_result), - team_node_results=list(team_result.node_results), - final_output="", - ) - team_execution_context = self._join_context( - self._team_execution_context(plan, team_result), - "Rendered team evidence:\n" + render_task_evidence(team_packet), - ) - self._append_task_observation( - session_manager, - task.session_id, - event_type="task_team_run_completed" if team_result.success else "task_team_run_failed", - payload={ - "task_id": task.task_id, - "attempt_index": attempt_index, - "plan_mode": plan.mode, - "strategy": plan.graph.strategy if plan.graph else None, - "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [], - "team_run_ids": team_result.run_ids, - "team_success": team_result.success, - "node_results": self._team_node_results_for_event(plan, team_result), - "reason": plan.reason, - "error": None if team_result.success else "one or more team nodes failed", - }, - ) - else: - team_summaries = [f"Team execution failed: {team_error}"] - team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error") - self._append_task_observation( - session_manager, - task.session_id, - event_type="task_team_run_failed", - payload={ - "task_id": task.task_id, - "attempt_index": attempt_index, - "plan_mode": plan.mode, - "strategy": plan.graph.strategy if plan.graph else None, - "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [], - "team_run_ids": [], - "team_success": False, - "reason": plan.reason, - "error": team_error, - }, - ) - - attempt_kwargs = dict(kwargs) - attempt_kwargs.update( - { - "task_id": task.task_id, - "task_mode": True, - "attempt_index": attempt_index, - "allow_candidate_generation": False, - } - ) - attempt_kwargs["execution_context"] = self._join_context( - base_execution_context, - output_language_instruction, - team_execution_context, - ) - if plan.is_team and team_execution_context: - attempt_kwargs["include_tools"] = False - attempt_kwargs["max_tool_iterations"] = 0 - attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context( - task=task, - user_message=message, - attempt_index=attempt_index, - plan=plan, - team_summaries=team_summaries, ) - result = await runner(message, **attempt_kwargs) - self._append_task_observation( - session_manager, - task.session_id, - event_type="task_synthesis_completed", - payload={ - "task_id": task.task_id, - "attempt_index": attempt_index, - "main_run_id": result.run_id, - "plan_mode": plan.mode, - "strategy": plan.graph.strategy if plan.graph else None, - }, + def _build_task_attempt_orchestrator(self, loaded: Any) -> TaskAttemptOrchestrator: + return TaskAttemptOrchestrator( + loaded=loaded, + create_loop=self.create_loop, + make_provider_bundle_for_task=self._make_provider_bundle_for_task, ) - task = task_service.append_run( - task.task_id, - result.run_id, - skill_names=self._skill_names_for_run(loaded, result.run_id), - ) - evidence_packet = self._build_task_evidence_packet( - session_manager=session_manager, - task=task, - attempt_index=attempt_index, - result=result, - team_result=team_result, - ) - evidence_text = render_task_evidence(evidence_packet) - evidence_debug = { - "evidence_run_ids": [ - item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None - ], - "evidence_session_ids": [ - item.session_id - for item in [evidence_packet.main_run, *evidence_packet.team_runs] - if item is not None - ], - "tool_result_count": sum( - len(item.tool_results) - for item in [evidence_packet.main_run, *evidence_packet.team_runs] - if item is not None - ), - "evidence_length": len(evidence_text), - } - session_manager.update_latest_assistant_event_payload( - result.session_id, - result.run_id, - { - "task_id": task.task_id, - "task_status": task.status, - "evidence_status": "recorded", - }, - ) - session_manager.append_message( - result.session_id, - run_id=result.run_id, - role="system", - event_type="task_evidence_recorded", - event_payload={ - "task_id": task.task_id, - "attempt_index": attempt_index, - "evidence_debug": evidence_debug, - }, - content=None, - context_visible=False, - ) - result.task_id = task.task_id - result.task_status = task.status - result.validation_result = None - return result - - async def _run_team_for_task( - self, - plan: TaskExecutionPlan, - *, - task: TaskRecord, - parent_session_id: str, - provider_bundle_factory: Any, - ) -> tuple[TeamRunResult | None, str | None]: - if plan.graph is None: - return None, "team plan did not include an execution graph" - try: - from beaver.services.team_service import TeamService - - result = await TeamService(self.create_loop()).run_team( - plan.graph, - parent_task_id=task.task_id, - parent_session_id=parent_session_id, - parent_run_id=None, - provider_bundle_factory=provider_bundle_factory, - allow_candidate_generation=False, - ) - return result, None - except Exception as exc: - return None, str(exc) @staticmethod def _require_loaded(loaded: Any, field_name: str) -> Any: @@ -992,32 +798,15 @@ class AgentService: } @staticmethod - def _output_language_instruction(prompt_locale: str | None) -> str: - locale = normalize_main_agent_prompt_locale(prompt_locale) - if locale == "en": - return ( - "Output language: English. Use English for user-facing task titles, summaries, plans, " - "and final answers unless the user explicitly requests another language." - ) - if locale == "zh-Hant": - return ( - "輸出語言:繁體中文。除非使用者明確要求其他語言,所有面向使用者的任務標題、摘要、" - "計劃與最終回答都使用繁體中文。" - ) - return ( - "输出语言:简体中文。除非用户明确要求其他语言,所有面向用户的任务标题、摘要、" - "计划与最终回答都使用简体中文。" - ) - - @staticmethod - def _skill_names_for_run(loaded: Any, run_id: str) -> list[str]: - store = getattr(loaded, "run_memory_store", None) - if store is None: - return [] - for record in store.list_runs(): - if record.run_id == run_id: - return [receipt.skill_name for receipt in record.activated_skills] - return [] + def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]: + merged: dict[str, float] = {} + if isinstance(current, dict): + for key, value in current.items(): + if isinstance(value, (int, float)): + merged[str(key)] = float(value) + for key, value in updates.items(): + merged[key] = merged.get(key, 0.0) + float(value) + return merged @staticmethod def _acceptance_score_for_learning(acceptance_type: str) -> float: @@ -1027,237 +816,6 @@ class AgentService: return 0.5 return 0.0 - @staticmethod - def _build_skill_selection_context( - *, - task: TaskRecord, - user_message: str, - attempt_index: int, - plan: TaskExecutionPlan | None = None, - team_summaries: list[str] | None = None, - ) -> str: - phase = f"attempt_{attempt_index}" - if task.feedback and task.feedback[-1].get("acceptance_type") == "revise": - phase = f"revision_attempt_{attempt_index}" - elif plan is not None and plan.is_team: - phase = f"team_synthesis_attempt_{attempt_index}" - - sections = [ - f"Task goal:\n{task.goal or task.description}", - f"Task description:\n{task.description}", - f"Current user request:\n{user_message}", - f"Execution phase:\n{phase}", - f"Task status:\n{task.status}", - ] - if task.constraints: - sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints)) - if task.skill_names: - sections.append( - "Previously activated skills (reuse bias, not pinned):\n" - + "\n".join(f"- {item}" for item in task.skill_names) - ) - else: - sections.append("Previously activated skills:\nNone") - if task.feedback: - history_lines = [] - for item in task.feedback[-5:]: - kind = item.get("acceptance_type") or item.get("feedback_type") - comment = item.get("comment") or "" - run_id = item.get("run_id") or "" - history_lines.append(f"- {kind} run={run_id}: {comment}".strip()) - sections.append("Task acceptance history:\n" + "\n".join(history_lines)) - if plan is not None: - plan_lines = [ - f"mode: {plan.mode}", - f"reason: {plan.reason}", - ] - if plan.final_synthesis_instruction: - plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}") - if plan.graph is not None: - plan_lines.append(f"strategy: {plan.graph.strategy}") - plan_lines.append( - "nodes:\n" - + "\n".join( - f"- {node.node_id}: {node.task}" - for node in plan.graph.nodes - ) - ) - sections.append("Execution plan:\n" + "\n".join(plan_lines)) - if team_summaries: - sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400]) - sections.append( - "Skill selection instruction:\n" - "Prefer reusing previously activated skills when they still match the Task. " - "Select new skills only if the current request, revision, or execution plan needs a different capability. " - "If no published skill matches, return [] and let the run continue without skills." - ) - return "\n\n".join(section for section in sections if section.strip()) - - @staticmethod - def _run_excerpt(session_manager: Any, session_id: str, run_id: str) -> str: - lines = [] - for event in session_manager.get_run_event_records(session_id, run_id): - if event.context_visible and event.content: - lines.append(f"{event.role}: {event.content.strip()}") - return "\n".join(lines[:12])[:2400] - - @staticmethod - def _tool_summaries(session_manager: Any, session_id: str, run_id: str) -> list[str]: - summaries = [] - for event in session_manager.get_run_event_records(session_id, run_id): - if event.event_type != "tool_result_recorded": - continue - text = (event.content or "").strip() - if text: - summaries.append(f"{event.tool_name or 'tool'}: {text[:500]}") - return summaries[:12] - - @staticmethod - def _append_task_observation( - session_manager: Any, - session_id: str, - *, - event_type: str, - payload: dict[str, Any], - ) -> None: - session_manager.append_message( - session_id, - role="system", - event_type=event_type, - event_payload=payload, - content=payload.get("reason") or payload.get("error"), - context_visible=False, - ) - - @staticmethod - def _join_context(*parts: str | None) -> str: - return "\n\n".join(part.strip() for part in parts if part and part.strip()) - - @staticmethod - def _team_summary_for_validation(result: TeamRunResult) -> str: - lines = [ - f"success={result.success}", - f"task_id={result.task_id or ''}", - "summary:", - result.summary, - "nodes:", - ] - for node in result.node_results: - lines.append( - f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} " - f"error={node.error or ''} output={node.output_text[:500]}" - ) - return "\n".join(lines) - - @staticmethod - def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]: - nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {} - payloads: list[dict[str, Any]] = [] - for item in result.node_results: - payload = item.to_dict() - node = nodes.get(item.node_id) - if node is not None: - payload["selected_skill_names"] = list(node.inherited_pinned_skills) - payload["ephemeral_skill_names"] = [ - skill.name for skill in node.inherited_pinned_skill_contexts - ] - payload["skill_query"] = node.agent.metadata.get("skill_query") - payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id") - payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name") - payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts) - payloads.append(payload) - return payloads - - @staticmethod - def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]: - if result is None: - return [] - return [node.evidence for node in result.node_results if node.evidence is not None] - - def _build_task_evidence_packet( - self, - *, - session_manager: Any, - task: TaskRecord, - attempt_index: int, - result: AgentRunResult, - team_result: TeamRunResult | None, - ) -> TaskEvidencePacket: - main_run = EvidenceBuilder(session_manager).build_run_evidence( - result.session_id, - result.run_id, - result.output_text, - result.finish_reason, - ) - return TaskEvidencePacket( - task_id=task.task_id, - attempt_index=attempt_index, - main_run=main_run, - team_runs=self._team_run_evidence(team_result), - team_node_results=list(team_result.node_results) if team_result is not None else [], - final_output=result.output_text, - ) - - @staticmethod - def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str: - node_lines = [ - ( - f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, " - f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}" - ) - for node in result.node_results - ] - return "\n\n".join( - item - for item in [ - "Task team execution result:", - f"Planner reason: {plan.reason}", - f"Strategy: {plan.graph.strategy if plan.graph else ''}", - f"Team success: {result.success}", - f"Team summary:\n{result.summary}", - "Node results:\n" + "\n\n".join(node_lines), - ( - "Final synthesis instruction:\n" + plan.final_synthesis_instruction - if plan.final_synthesis_instruction - else None - ), - ( - "Use successful team outputs as internal evidence. If one or more nodes failed, " - "do not blindly repeat failed tool calls. Produce a user-visible fallback answer " - "with available evidence and clearly state any missing or uncertain data." - ), - ] - if item - ) - - @staticmethod - def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str: - return "\n\n".join( - [ - "Task team execution failed before final synthesis.", - f"Planner reason: {plan.reason}", - f"Strategy: {plan.graph.strategy if plan.graph else ''}", - f"Error: {error}", - ( - "Proceed as the main agent. Do not blindly repeat failed tool calls; " - "produce a user-visible fallback answer with available evidence and clearly " - "state any missing or uncertain data." - ), - ] - ) - - def _build_team_provider_bundle_factory(self, loaded: Any, kwargs: dict[str, Any]) -> Any: - def factory(node: ExecutionNode) -> Any: - node_kwargs = dict(kwargs) - node_kwargs.pop("provider_bundle", None) - if node.agent.model: - node_kwargs["model"] = node.agent.model - if node.agent.provider_name: - node_kwargs["provider_name"] = node.agent.provider_name - return self._make_provider_bundle_for_task(loaded, node_kwargs) - - return factory - def _make_provider_bundle_for_task(self, loaded: Any, kwargs: dict[str, Any]) -> Any: config = loaded.config configured_provider = config.resolve_provider_target( diff --git a/app-instance/backend/beaver/services/user_files.py b/app-instance/backend/beaver/services/user_files.py index 9052fcc..a46b1ab 100644 --- a/app-instance/backend/beaver/services/user_files.py +++ b/app-instance/backend/beaver/services/user_files.py @@ -40,6 +40,10 @@ class UserFileSizeError(UserFileError): """Raised when a user file upload exceeds configured limits.""" +class UserFileStorageError(UserFileError): + """Raised when the backing user-file storage cannot complete an operation.""" + + @dataclass(frozen=True, slots=True) class AgentUserFilePolicy: task_id: str | None = None @@ -387,26 +391,34 @@ class MinIOUserFileStorage: async def list_dir(self, path: str) -> list[UserFileEntry]: prefix = self._object_prefix(path) - objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False) + try: + objects = self.client.list_objects(self.config.bucket, prefix=prefix, recursive=False) + except Exception as exc: + raise _minio_storage_error("list directory", exc) from exc entries: list[UserFileEntry] = [] - for obj in objects: - object_name = str(obj.object_name or "") - user_path = self._user_path(object_name) - if not user_path or user_path == path or user_path.endswith("/.keep"): - continue - trimmed = user_path.rstrip("/") - name = PurePosixPath(trimmed).name - is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/") - entries.append( - UserFileEntry( - name=name, - path=trimmed, - type="directory" if is_dir else "file", - size=None if is_dir else getattr(obj, "size", None), - content_type=None if is_dir else "application/octet-stream", - modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None, + try: + for obj in objects: + object_name = str(obj.object_name or "") + user_path = self._user_path(object_name) + if not user_path or user_path == path or user_path.endswith("/.keep"): + continue + trimmed = user_path.rstrip("/") + name = PurePosixPath(trimmed).name + is_dir = bool(getattr(obj, "is_dir", False)) or object_name.endswith("/") + entries.append( + UserFileEntry( + name=name, + path=trimmed, + type="directory" if is_dir else "file", + size=None if is_dir else getattr(obj, "size", None), + content_type=None if is_dir else "application/octet-stream", + modified=obj.last_modified.isoformat() if getattr(obj, "last_modified", None) else None, + ) ) - ) + except UserFileError: + raise + except Exception as exc: + raise _minio_storage_error("list directory", exc) from exc return sorted(entries, key=lambda item: (item.type != "directory", item.name.lower())) async def read_file(self, path: str, *, max_bytes: int | None = None) -> UserFileContent: @@ -421,7 +433,9 @@ class MinIOUserFileStorage: response.close() response.release_conn() except Exception as exc: - raise UserFileNotFoundError("File not found") from exc + if _minio_error_code(exc) in {"NoSuchKey", "NoSuchObject"}: + raise UserFileNotFoundError("File not found") from exc + raise _minio_storage_error("read file", exc) from exc return UserFileContent( name=PurePosixPath(path).name, path=path, @@ -433,13 +447,16 @@ class MinIOUserFileStorage: async def write_file(self, path: str, content: bytes, *, content_type: str) -> UserFileEntry: object_name = self._object_name(path) - result = self.client.put_object( - self.config.bucket, - object_name, - BytesIO(content), - length=len(content), - content_type=content_type, - ) + try: + self.client.put_object( + self.config.bucket, + object_name, + BytesIO(content), + length=len(content), + content_type=content_type, + ) + except Exception as exc: + raise _minio_storage_error("write file", exc) from exc return UserFileEntry( name=PurePosixPath(path).name, path=path, @@ -475,6 +492,8 @@ class MinIOUserFileStorage: except Exception: pass raise + except Exception as exc: + raise _minio_storage_error("write file", exc) from exc return UserFileEntry( name=PurePosixPath(path).name, path=path, @@ -490,23 +509,30 @@ class MinIOUserFileStorage: try: self.client.remove_object(self.config.bucket, object_name) removed = True - except Exception: - pass + except Exception as exc: + if _minio_error_code(exc) != "NoSuchKey": + raise _minio_storage_error("delete path", exc) from exc prefix = f"{object_name.rstrip('/')}/" - for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True): - self.client.remove_object(self.config.bucket, str(obj.object_name)) - removed = True + try: + for obj in self.client.list_objects(self.config.bucket, prefix=prefix, recursive=True): + self.client.remove_object(self.config.bucket, str(obj.object_name)) + removed = True + except Exception as exc: + raise _minio_storage_error("delete path", exc) from exc return removed async def mkdir(self, path: str) -> UserFileEntry: object_name = f"{self._object_name(path).rstrip('/')}/.keep" - self.client.put_object( - self.config.bucket, - object_name, - BytesIO(b""), - length=0, - content_type="application/x-directory", - ) + try: + self.client.put_object( + self.config.bucket, + object_name, + BytesIO(b""), + length=0, + content_type="application/x-directory", + ) + except Exception as exc: + raise _minio_storage_error("create directory", exc) from exc return UserFileEntry( name=PurePosixPath(path).name, path=path, @@ -600,6 +626,18 @@ def _safe_scope(value: str | None) -> str: return cleaned or "interactive" +def _minio_error_code(exc: Exception) -> str: + return str(getattr(exc, "code", "") or "") + + +def _minio_storage_error(operation: str, exc: Exception) -> UserFileStorageError: + code = _minio_error_code(exc) + message = f"User file storage {operation} failed" + if code: + message = f"{message}: {code}" + return UserFileStorageError(message) + + class _LimitedReadStream: def __init__(self, stream: object, *, max_bytes: int | None = None) -> None: self.stream = stream diff --git a/app-instance/backend/beaver/skills/assembler/task_assembler.py b/app-instance/backend/beaver/skills/assembler/task_assembler.py index e95ae23..ad0a0c0 100644 --- a/app-instance/backend/beaver/skills/assembler/task_assembler.py +++ b/app-instance/backend/beaver/skills/assembler/task_assembler.py @@ -83,6 +83,12 @@ class SkillAssembler: return SkillAssemblyResult() llm_interactions: list[dict[str, Any]] = [] + if len(candidates) == 1: + return SkillAssemblyResult( + activated_skills=self._activate_skill_contexts([candidates[0]["name"]]), + llm_interactions=llm_interactions, + ) + if len(candidates) <= self.max_detailed_candidates: shortlisted_names = [item["name"] for item in candidates] else: @@ -115,6 +121,10 @@ class SkillAssembler: if not selected_names: return SkillAssemblyResult(llm_interactions=llm_interactions) + activated_skills = self._activate_skill_contexts(selected_names) + return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions) + + def _activate_skill_contexts(self, selected_names: list[str]) -> list[SkillContext]: activated_skills: list[SkillContext] = [] for name in selected_names: record = self.loader.get_skill_record(name) @@ -130,10 +140,11 @@ class SkillAssembler: content_hash=record.content_hash or "" if record is not None else "", activation_reason="llm_selected", tool_hints=list(record.tool_hints) if record is not None else [], + team_template=getattr(record, "team_template", None) if record is not None else None, + team_template_warnings=list(getattr(record, "team_template_warnings", [])) if record is not None else [], ) ) - - return SkillAssemblyResult(activated_skills=activated_skills, llm_interactions=llm_interactions) + return activated_skills async def _select_skill_names( self, diff --git a/app-instance/backend/beaver/skills/catalog/loader.py b/app-instance/backend/beaver/skills/catalog/loader.py index d2d67ce..901c332 100644 --- a/app-instance/backend/beaver/skills/catalog/loader.py +++ b/app-instance/backend/beaver/skills/catalog/loader.py @@ -28,6 +28,7 @@ from .utils import ( check_requirements, escape_xml, extract_required_tool_names, + extract_skill_team_template, get_missing_requirements, parse_frontmatter, parse_skill_metadata_blob, @@ -49,6 +50,8 @@ class SkillRecord: tool_hints: list[str] = field(default_factory=list) frontmatter: dict[str, Any] = field(default_factory=dict) description: str = "" + team_template: dict[str, Any] | None = None + team_template_warnings: list[str] = field(default_factory=list) class SkillsLoader: @@ -113,6 +116,7 @@ class SkillsLoader: continue normalized_frontmatter = dict(frontmatter) meta_blob = parse_skill_metadata_blob(frontmatter.get("metadata", "")) + template_result = extract_skill_team_template(body) record = SkillRecord( name=name, path=skill_file, @@ -127,6 +131,8 @@ class SkillsLoader: ), frontmatter=normalized_frontmatter, description=str(frontmatter.get("description") or summarize_body(body) or name), + team_template=template_result.template, + team_template_warnings=template_result.warnings, ) if filter_unavailable and not self._record_available(record): continue @@ -146,6 +152,7 @@ class SkillsLoader: else: path = self.workspace_skills / name / "versions" / loaded.version.version / "SKILL.md" _frontmatter, body = parse_frontmatter(loaded.content) + template_result = extract_skill_team_template(body) record = SkillRecord( name=name, path=path, @@ -160,6 +167,8 @@ class SkillsLoader: ), frontmatter=dict(loaded.version.frontmatter), description=str(loaded.version.frontmatter.get("description") or loaded.version.summary or name), + team_template=template_result.template, + team_template_warnings=template_result.warnings, ) if filter_unavailable and not self._record_available(record): continue diff --git a/app-instance/backend/beaver/skills/catalog/utils.py b/app-instance/backend/beaver/skills/catalog/utils.py index c2f82ce..97d4cf3 100644 --- a/app-instance/backend/beaver/skills/catalog/utils.py +++ b/app-instance/backend/beaver/skills/catalog/utils.py @@ -17,6 +17,7 @@ import json import os import re import shutil +from dataclasses import dataclass, field from typing import Any @@ -84,6 +85,27 @@ def strip_frontmatter(content: str) -> str: return body +@dataclass(slots=True) +class SkillTeamTemplateParseResult: + template: dict[str, Any] | None = None + warnings: list[str] = field(default_factory=list) + + +def extract_skill_team_template(body: str) -> SkillTeamTemplateParseResult: + matches = re.findall(r"```beaver-team-template\s*\n(.*?)\n```", body, re.DOTALL) + if not matches: + return SkillTeamTemplateParseResult() + if len(matches) != 1: + return SkillTeamTemplateParseResult(warnings=["skill defines multiple team templates"]) + try: + template = json.loads(matches[0]) + except json.JSONDecodeError: + return SkillTeamTemplateParseResult(warnings=["team template JSON is invalid"]) + if not isinstance(template, dict) or not isinstance(template.get("nodes", []), list): + return SkillTeamTemplateParseResult(warnings=["team template must be an object with a nodes list"]) + return SkillTeamTemplateParseResult(template=template) + + def extract_required_tool_names(body: str) -> list[str]: """从 canonical skill 正文的 `## Required Tools` 段落提取工具名。 diff --git a/app-instance/backend/beaver/skills/learning/eval.py b/app-instance/backend/beaver/skills/learning/eval.py index 404642e..299b689 100644 --- a/app-instance/backend/beaver/skills/learning/eval.py +++ b/app-instance/backend/beaver/skills/learning/eval.py @@ -284,6 +284,9 @@ def _build_replay_case_reports( "side_effects": [*baseline.get("side_effects", []), *candidate_arm.get("side_effects", [])], "validator_notes": list(surrogate.get("notes") or []), } + historical_accepted_score = _historical_accepted_score(case) + if historical_accepted_score is not None: + case_report["historical_accepted_score"] = historical_accepted_score return case_report, { "run_id": case["run_id"], "session_id": case.get("session_id") or "", @@ -293,6 +296,7 @@ def _build_replay_case_reports( "baseline_score": baseline_score, "candidate_score": candidate_score, "delta": round(candidate_score - baseline_score, 4), + **({"historical_accepted_score": historical_accepted_score} if historical_accepted_score is not None else {}), } @@ -658,8 +662,11 @@ def _ability_score(*, case: dict[str, Any], arm: dict[str, Any], arm_name: str) if validator is not None: return _ability_from_validator(validator, arm) if not case.get("synthetic"): - score = _bounded_score(case.get("accepted_score"), default=0.75) if arm_name == "baseline" else _ability_from_output(arm)["final_score"] - return _ability_breakdown(score=score, source="user_feedback" if arm_name == "baseline" else "llm_judge") + result = _ability_from_output(arm, source="output_heuristic") + historical_accepted_score = _historical_accepted_score(case) + if historical_accepted_score is not None: + result["historical_accepted_score"] = historical_accepted_score + return result return _ability_breakdown(score=0.0, source="unscored", notes=["Synthetic cases require a validator."]) @@ -697,6 +704,12 @@ def _ability_from_output(arm: dict[str, Any], *, source: str = "llm_judge", note return _ability_breakdown(score=score, source=source, notes=notes) +def _historical_accepted_score(case: dict[str, Any]) -> float | None: + if case.get("synthetic") or isinstance(case.get("validator"), dict) or "accepted_score" not in case: + return None + return _bounded_score(case.get("accepted_score"), default=0.75) + + def _ability_breakdown(*, score: float, source: str, notes: list[str] | None = None) -> dict[str, Any]: bounded = _bounded_score(score, default=0.0) return { diff --git a/app-instance/backend/beaver/tasks/attempt_orchestrator.py b/app-instance/backend/beaver/tasks/attempt_orchestrator.py new file mode 100644 index 0000000..4fd58d3 --- /dev/null +++ b/app-instance/backend/beaver/tasks/attempt_orchestrator.py @@ -0,0 +1,695 @@ +"""Task attempt orchestration for Beaver Task mode.""" + +from __future__ import annotations + +from time import perf_counter +from typing import Any, Callable + +from beaver.coordinator.models import ExecutionNode, TeamRunResult +from beaver.engine import AgentRunResult +from beaver.engine.context import SkillContext +from beaver.prompts.main_agent import normalize_main_agent_prompt_locale + +from .evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, render_task_evidence +from .models import TaskRecord +from .planner import TaskExecutionPlan + + +class TaskAttemptOrchestrator: + """Own the execution order inside one Task attempt.""" + + def __init__( + self, + *, + loaded: Any, + create_loop: Callable[[], Any], + make_provider_bundle_for_task: Callable[[Any, dict[str, Any]], Any], + ) -> None: + self.loaded = loaded + self.create_loop = create_loop + self.make_provider_bundle_for_task = make_provider_bundle_for_task + + async def run( + self, + *, + message: str, + runner: Any, + kwargs: dict[str, Any], + task: TaskRecord, + ) -> AgentRunResult: + task_service = self._require_loaded(self.loaded, "task_service") + task_execution_planner = self._require_loaded(self.loaded, "task_execution_planner") + session_manager = self._require_loaded(self.loaded, "session_manager") + + base_execution_context = kwargs.get("execution_context") + prompt_locale = kwargs.get("prompt_locale") or task.metadata.get("prompt_locale") + output_language_instruction = self._output_language_instruction(prompt_locale) + provider_bundle = kwargs.get("provider_bundle") or self.make_provider_bundle_for_task(self.loaded, kwargs) + kwargs = dict(kwargs) + team_provider_bundle_factory = kwargs.pop("team_provider_bundle_factory", None) + kwargs["provider_bundle"] = provider_bundle + + attempt_index = int(task.metadata.get("latest_attempt_index") or 0) + 1 + task_service.start_run(task.task_id, user_message=message, attempt_index=attempt_index) + pre_skill_context = self._build_skill_selection_context( + task=task, + user_message=message, + attempt_index=attempt_index, + ) + preselected_skills, pre_skill_latency_ms = await self._assemble_task_attempt_skills( + task_description=pre_skill_context, + provider_bundle=provider_bundle, + thinking_enabled=kwargs.get("thinking_enabled"), + include_skill_assembly=bool(kwargs.get("include_skill_assembly", True)), + pinned_skill_contexts=kwargs.get("pinned_skill_contexts"), + ) + if pre_skill_latency_ms: + kwargs["pre_run_latency_ms"] = self._merge_latency_ms( + kwargs.get("pre_run_latency_ms"), + {"pre_skill_assembly_ms": pre_skill_latency_ms}, + ) + plan = await task_execution_planner.plan( + task=task, + user_message=message, + attempt_index=attempt_index, + provider_bundle=provider_bundle, + skill_summaries=self._skill_summaries_for_planner(preselected_skills), + tool_hints=self._tool_hints_for_skills(preselected_skills), + activated_skills=preselected_skills, + ) + self._append_task_observation( + session_manager, + task.session_id, + event_type="task_execution_planned", + payload={ + "task_id": task.task_id, + "attempt_index": attempt_index, + **plan.to_event_payload(), + }, + ) + team_summaries: list[str] = [] + team_execution_context = "" + team_result: TeamRunResult | None = None + if plan.is_team: + team_result, team_error = await self._run_team_for_task( + plan, + task=task, + parent_session_id=kwargs["session_id"], + provider_bundle_factory=team_provider_bundle_factory + or self._build_team_provider_bundle_factory(kwargs), + ) + if team_result is not None: + team_summaries = [self._team_summary_for_validation(team_result)] + team_packet = TaskEvidencePacket( + task_id=task.task_id, + attempt_index=attempt_index, + main_run=None, + team_runs=self._team_run_evidence(team_result), + team_node_results=list(team_result.node_results), + final_output="", + ) + team_execution_context = self._join_context( + self._team_execution_context(plan, team_result), + "Rendered team evidence:\n" + render_task_evidence(team_packet), + ) + self._append_task_observation( + session_manager, + task.session_id, + event_type="task_team_run_completed" if team_result.success else "task_team_run_failed", + payload={ + "task_id": task.task_id, + "attempt_index": attempt_index, + "plan_mode": plan.mode, + "strategy": plan.graph.strategy if plan.graph else None, + "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [], + "team_run_ids": team_result.run_ids, + "team_success": team_result.success, + "node_results": self._team_node_results_for_event(plan, team_result), + "reason": plan.reason, + "error": None if team_result.success else "one or more team nodes failed", + }, + ) + else: + team_summaries = [f"Team execution failed: {team_error}"] + team_execution_context = self._failed_team_execution_context(plan, team_error or "unknown error") + self._append_task_observation( + session_manager, + task.session_id, + event_type="task_team_run_failed", + payload={ + "task_id": task.task_id, + "attempt_index": attempt_index, + "plan_mode": plan.mode, + "strategy": plan.graph.strategy if plan.graph else None, + "node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [], + "team_run_ids": [], + "team_success": False, + "reason": plan.reason, + "error": team_error, + }, + ) + + outcome_context, incomplete_prefix, outcome_metadata = self._team_synthesis_outcome( + plan, + team_result, + prompt_locale=prompt_locale, + ) + if plan.is_team: + team_execution_context = self._join_context(outcome_context, team_execution_context) + + attempt_kwargs = dict(kwargs) + attempt_kwargs.update( + { + "task_id": task.task_id, + "task_mode": True, + "attempt_index": attempt_index, + "allow_candidate_generation": False, + "pinned_skill_contexts": preselected_skills, + "include_skill_assembly": False, + } + ) + attempt_kwargs["execution_context"] = self._join_context( + base_execution_context, + output_language_instruction, + team_execution_context, + ) + if plan.is_team and team_execution_context: + attempt_kwargs["include_tools"] = False + attempt_kwargs["max_tool_iterations"] = 0 + attempt_kwargs["skill_selection_context"] = self._build_skill_selection_context( + task=task, + user_message=message, + attempt_index=attempt_index, + plan=plan, + team_summaries=team_summaries, + ) + + result = await runner(message, **attempt_kwargs) + if outcome_metadata["task_outcome"] == "incomplete": + result.output_text = self._apply_incomplete_prefix(result.output_text, incomplete_prefix) + self._append_task_observation( + session_manager, + task.session_id, + event_type="task_synthesis_completed", + payload={ + "task_id": task.task_id, + "attempt_index": attempt_index, + "main_run_id": result.run_id, + "plan_mode": plan.mode, + "strategy": plan.graph.strategy if plan.graph else None, + **outcome_metadata, + }, + ) + task = task_service.append_run( + task.task_id, + result.run_id, + skill_names=self._skill_names_for_run(result.run_id), + ) + evidence_packet = self._build_task_evidence_packet( + session_manager=session_manager, + task=task, + attempt_index=attempt_index, + result=result, + team_result=team_result, + ) + evidence_text = render_task_evidence(evidence_packet) + evidence_debug = { + "evidence_run_ids": [ + item.run_id for item in [evidence_packet.main_run, *evidence_packet.team_runs] if item is not None + ], + "evidence_session_ids": [ + item.session_id + for item in [evidence_packet.main_run, *evidence_packet.team_runs] + if item is not None + ], + "tool_result_count": sum( + len(item.tool_results) + for item in [evidence_packet.main_run, *evidence_packet.team_runs] + if item is not None + ), + "evidence_length": len(evidence_text), + } + session_manager.update_latest_assistant_event_payload( + result.session_id, + result.run_id, + { + "task_id": task.task_id, + "task_status": task.status, + "evidence_status": "recorded", + }, + ) + session_manager.append_message( + result.session_id, + run_id=result.run_id, + role="system", + event_type="task_evidence_recorded", + event_payload={ + "task_id": task.task_id, + "attempt_index": attempt_index, + "evidence_debug": evidence_debug, + }, + content=None, + context_visible=False, + ) + result.task_id = task.task_id + result.task_status = task.status + result.validation_result = None + return result + + async def _run_team_for_task( + self, + plan: TaskExecutionPlan, + *, + task: TaskRecord, + parent_session_id: str, + provider_bundle_factory: Any, + ) -> tuple[TeamRunResult | None, str | None]: + if plan.graph is None: + return None, "team plan did not include an execution graph" + try: + from beaver.services.team_service import TeamService + + result = await TeamService(self.create_loop()).run_team( + plan.graph, + parent_task_id=task.task_id, + parent_session_id=parent_session_id, + parent_run_id=None, + provider_bundle_factory=provider_bundle_factory, + allow_candidate_generation=False, + ) + return result, None + except Exception as exc: + return None, str(exc) + + async def _assemble_task_attempt_skills( + self, + *, + task_description: str, + provider_bundle: Any, + thinking_enabled: bool | None, + include_skill_assembly: bool, + pinned_skill_contexts: Any, + ) -> tuple[list[SkillContext], float]: + started = perf_counter() + selected = self._coerce_skill_contexts(pinned_skill_contexts) + if include_skill_assembly: + skill_assembler = self._require_loaded(self.loaded, "skill_assembler") + runtime = provider_bundle.auxiliary_runtime or provider_bundle.main_runtime + assembled = await skill_assembler.assemble( + task_description=task_description, + provider=provider_bundle.auxiliary_provider or provider_bundle.main_provider, + model=getattr(runtime, "model", None), + embedding_runtime=getattr(provider_bundle, "embedding_runtime", None), + thinking_enabled=thinking_enabled, + ) + selected = self._merge_skill_contexts( + selected, + list(getattr(assembled, "activated_skills", []) or []), + ) + return selected, (perf_counter() - started) * 1000 + + @staticmethod + def _coerce_skill_contexts(value: Any) -> list[SkillContext]: + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, SkillContext)] + + @staticmethod + def _merge_skill_contexts(left: list[SkillContext], right: list[SkillContext]) -> list[SkillContext]: + merged: list[SkillContext] = [] + seen: set[str] = set() + for skill in [*left, *right]: + if skill.name in seen: + continue + seen.add(skill.name) + merged.append(skill) + return merged + + @staticmethod + def _skill_summaries_for_planner(skills: list[SkillContext]) -> list[str]: + summaries: list[str] = [] + for skill in skills: + content = " ".join((skill.content or "").split()) + if len(content) > 240: + content = content[:237].rstrip() + "..." + summaries.append(f"{skill.name}: {content}" if content else skill.name) + return summaries + + @staticmethod + def _tool_hints_for_skills(skills: list[SkillContext]) -> list[str]: + result: list[str] = [] + for skill in skills: + for hint in skill.tool_hints: + if hint and hint not in result: + result.append(hint) + return result + + @staticmethod + def _require_loaded(loaded: Any, field_name: str) -> Any: + value = getattr(loaded, field_name) + if value is None: + raise RuntimeError(f"Engine loader did not provide required dependency {field_name!r}") + return value + + @staticmethod + def _merge_latency_ms(current: Any, updates: dict[str, float]) -> dict[str, float]: + merged: dict[str, float] = {} + if isinstance(current, dict): + for key, value in current.items(): + if isinstance(value, (int, float)): + merged[str(key)] = float(value) + for key, value in updates.items(): + merged[key] = merged.get(key, 0.0) + float(value) + return merged + + @staticmethod + def _output_language_instruction(prompt_locale: str | None) -> str: + locale = normalize_main_agent_prompt_locale(prompt_locale) + if locale == "en": + return ( + "Output language: English. Use English for user-facing task titles, summaries, plans, " + "and final answers unless the user explicitly requests another language." + ) + if locale == "zh-Hant": + return ( + "輸出語言:繁體中文。除非使用者明確要求其他語言,所有面向使用者的任務標題、摘要、" + "計劃與最終回答都使用繁體中文。" + ) + return ( + "输出语言:简体中文。除非用户明确要求其他语言,所有面向用户的任务标题、摘要、" + "计划与最终回答都使用简体中文。" + ) + + def _skill_names_for_run(self, run_id: str) -> list[str]: + store = getattr(self.loaded, "run_memory_store", None) + if store is None: + return [] + for record in store.list_runs(): + if record.run_id == run_id: + return [receipt.skill_name for receipt in record.activated_skills] + return [] + + @staticmethod + def _build_skill_selection_context( + *, + task: TaskRecord, + user_message: str, + attempt_index: int, + plan: TaskExecutionPlan | None = None, + team_summaries: list[str] | None = None, + ) -> str: + phase = f"attempt_{attempt_index}" + if task.feedback and task.feedback[-1].get("acceptance_type") == "revise": + phase = f"revision_attempt_{attempt_index}" + elif plan is not None and plan.is_team: + phase = f"team_synthesis_attempt_{attempt_index}" + + sections = [ + f"Task goal:\n{task.goal or task.description}", + f"Task description:\n{task.description}", + f"Current user request:\n{user_message}", + f"Execution phase:\n{phase}", + f"Task status:\n{task.status}", + ] + if task.constraints: + sections.append("Known constraints:\n" + "\n".join(f"- {item}" for item in task.constraints)) + if task.skill_names: + sections.append( + "Previously activated skills (reuse bias, not pinned):\n" + + "\n".join(f"- {item}" for item in task.skill_names) + ) + else: + sections.append("Previously activated skills:\nNone") + if task.feedback: + history_lines = [] + for item in task.feedback[-5:]: + kind = item.get("acceptance_type") or item.get("feedback_type") + comment = item.get("comment") or "" + run_id = item.get("run_id") or "" + history_lines.append(f"- {kind} run={run_id}: {comment}".strip()) + sections.append("Task acceptance history:\n" + "\n".join(history_lines)) + if plan is not None: + plan_lines = [ + f"mode: {plan.mode}", + f"reason: {plan.reason}", + ] + if plan.final_synthesis_instruction: + plan_lines.append(f"final synthesis instruction: {plan.final_synthesis_instruction}") + if plan.graph is not None: + plan_lines.append(f"strategy: {plan.graph.strategy}") + plan_lines.append( + "nodes:\n" + + "\n".join( + f"- {node.node_id}: {node.task}" + for node in plan.graph.nodes + ) + ) + sections.append("Execution plan:\n" + "\n".join(plan_lines)) + if team_summaries: + sections.append("Team execution summaries:\n" + "\n\n".join(team_summaries)[:2400]) + sections.append( + "Skill selection instruction:\n" + "Prefer reusing previously activated skills when they still match the Task. " + "Select new skills only if the current request, revision, or execution plan needs a different capability. " + "If no published skill matches, return [] and let the run continue without skills." + ) + return "\n\n".join(section for section in sections if section.strip()) + + @staticmethod + def _append_task_observation( + session_manager: Any, + session_id: str, + *, + event_type: str, + payload: dict[str, Any], + ) -> None: + session_manager.append_message( + session_id, + role="system", + event_type=event_type, + event_payload=payload, + content=payload.get("reason") or payload.get("error"), + context_visible=False, + ) + + @staticmethod + def _join_context(*parts: str | None) -> str: + return "\n\n".join(part.strip() for part in parts if part and part.strip()) + + @staticmethod + def _team_summary_for_validation(result: TeamRunResult) -> str: + lines = [ + f"success={result.success}", + f"task_id={result.task_id or ''}", + "summary:", + result.summary, + "nodes:", + ] + for node in result.node_results: + lines.append( + f"- {node.node_id}: success={node.success} finish_reason={node.finish_reason} " + f"error={node.error or ''} output={node.output_text[:500]}" + ) + return "\n".join(lines) + + @staticmethod + def _team_node_results_for_event(plan: TaskExecutionPlan, result: TeamRunResult) -> list[dict[str, Any]]: + nodes = {node.node_id: node for node in plan.graph.nodes} if plan.graph else {} + payloads: list[dict[str, Any]] = [] + for item in result.node_results: + payload = item.to_dict() + node = nodes.get(item.node_id) + if node is not None: + payload["selected_skill_names"] = list(node.inherited_pinned_skills) + payload["ephemeral_skill_names"] = [ + skill.name for skill in node.inherited_pinned_skill_contexts + ] + payload["skill_query"] = node.agent.metadata.get("skill_query") + payload["ephemeral_guidance_id"] = node.agent.metadata.get("ephemeral_guidance_id") + payload["ephemeral_guidance_name"] = node.agent.metadata.get("ephemeral_guidance_name") + payload["ephemeral_used"] = bool(node.inherited_pinned_skill_contexts) + payloads.append(payload) + return payloads + + @staticmethod + def _team_run_evidence(result: TeamRunResult | None) -> list[RunEvidence]: + if result is None: + return [] + return [node.evidence for node in result.node_results if node.evidence is not None] + + @staticmethod + def _team_synthesis_outcome( + plan: TaskExecutionPlan, + result: TeamRunResult | None, + *, + prompt_locale: str | None = None, + ) -> tuple[str, str, dict[str, Any]]: + if not plan.is_team or plan.graph is None: + metadata = { + "task_outcome": "single", + "incomplete_node_ids": [], + "node_statuses": {}, + "evidence_gaps": {}, + } + return "Task outcome: single", "", metadata + + result_by_node = { + item.node_id: item + for item in (result.node_results if result is not None else []) + } + node_statuses: dict[str, str] = {} + evidence_gaps: dict[str, list[str]] = {} + incomplete_node_ids: list[str] = [] + detail_lines: list[str] = [] + successful_lines: list[str] = [] + for node in plan.graph.nodes: + node_result = result_by_node.get(node.node_id) + status = node_result.completion_status if node_result is not None else "not_run" + node_statuses[node.node_id] = status + gaps = list(node_result.evidence_gaps) if node_result is not None else [] + if gaps: + evidence_gaps[node.node_id] = gaps + if node.required_for_completion and status != "succeeded": + incomplete_node_ids.append(node.node_id) + detail_lines.append( + f"- {node.node_id}: status={status}, " + f"finish_reason={node_result.finish_reason if node_result is not None else 'not_run'}, " + f"error={(node_result.error or '') if node_result is not None else 'node did not run'}, " + f"evidence_gaps={gaps}" + ) + elif node_result is not None and status == "succeeded": + successful_lines.append(f"- {node.node_id}: {node_result.output_text[:1000]}") + + task_outcome = "incomplete" if incomplete_node_ids else "complete" + metadata = { + "task_outcome": task_outcome, + "incomplete_node_ids": incomplete_node_ids, + "node_statuses": node_statuses, + "evidence_gaps": evidence_gaps, + } + context_parts = [ + f"Task outcome: {task_outcome}", + "Incomplete node IDs: " + (", ".join(incomplete_node_ids) or "none"), + ] + if detail_lines: + context_parts.append("Incomplete required node details:\n" + "\n".join(detail_lines)) + if successful_lines: + context_parts.append("Available successful node evidence:\n" + "\n".join(successful_lines)) + if task_outcome == "incomplete": + context_parts.append( + "Synthesis requirement: produce a partial report from available evidence and explicitly state " + "that the task is incomplete, partially completed, or missing required evidence." + ) + prefix = TaskAttemptOrchestrator._incomplete_prefix(prompt_locale) if incomplete_node_ids else "" + return "\n\n".join(context_parts), prefix, metadata + + @staticmethod + def _incomplete_prefix(prompt_locale: str | None) -> str: + locale = normalize_main_agent_prompt_locale(prompt_locale) + if locale == "en": + return "Task incomplete: some required steps failed or lack required evidence. The report below uses available results only.\n\n" + if locale == "zh-Hant": + return "任務未完成:部分必要步驟失敗或缺少必要證據。以下內容僅基於現有結果。\n\n" + return "任务未完成:部分必要步骤失败或缺少必要证据。以下内容仅基于现有结果。\n\n" + + @staticmethod + def _apply_incomplete_prefix(output_text: str, prefix: str) -> str: + normalized = output_text.lower() + notices = ( + "任务未完成", + "任務未完成", + "部分完成", + "缺少证据", + "缺少證據", + "task incomplete", + "incomplete task", + "partially complete", + "missing evidence", + ) + if any(notice in normalized for notice in notices): + return output_text + return prefix + output_text.lstrip() + + def _build_task_evidence_packet( + self, + *, + session_manager: Any, + task: TaskRecord, + attempt_index: int, + result: AgentRunResult, + team_result: TeamRunResult | None, + ) -> TaskEvidencePacket: + main_run = EvidenceBuilder(session_manager).build_run_evidence( + result.session_id, + result.run_id, + result.output_text, + result.finish_reason, + ) + return TaskEvidencePacket( + task_id=task.task_id, + attempt_index=attempt_index, + main_run=main_run, + team_runs=self._team_run_evidence(team_result), + team_node_results=list(team_result.node_results) if team_result is not None else [], + final_output=result.output_text, + ) + + @staticmethod + def _team_execution_context(plan: TaskExecutionPlan, result: TeamRunResult) -> str: + node_lines = [ + ( + f"- {node.node_id}: success={node.success}, finish_reason={node.finish_reason}, " + f"run_id={node.run_id or ''}, error={node.error or ''}\n{node.output_text}" + ) + for node in result.node_results + ] + return "\n\n".join( + item + for item in [ + "Task team execution result:", + f"Planner reason: {plan.reason}", + f"Strategy: {plan.graph.strategy if plan.graph else ''}", + f"Team success: {result.success}", + f"Team summary:\n{result.summary}", + "Node results:\n" + "\n\n".join(node_lines), + ( + "Final synthesis instruction:\n" + plan.final_synthesis_instruction + if plan.final_synthesis_instruction + else None + ), + ( + "Use successful team outputs as internal evidence. If one or more nodes failed, " + "do not blindly repeat failed tool calls. Produce a user-visible fallback answer " + "with available evidence and clearly state any missing or uncertain data." + ), + ] + if item + ) + + @staticmethod + def _failed_team_execution_context(plan: TaskExecutionPlan, error: str) -> str: + return "\n\n".join( + [ + "Task team execution failed before final synthesis.", + f"Planner reason: {plan.reason}", + f"Strategy: {plan.graph.strategy if plan.graph else ''}", + f"Error: {error}", + ( + "Proceed as the main agent. Do not blindly repeat failed tool calls; " + "produce a user-visible fallback answer with available evidence and clearly " + "state any missing or uncertain data." + ), + ] + ) + + def _build_team_provider_bundle_factory(self, kwargs: dict[str, Any]) -> Any: + def factory(node: ExecutionNode) -> Any: + node_kwargs = dict(kwargs) + node_kwargs.pop("provider_bundle", None) + if node.agent.model: + node_kwargs["model"] = node.agent.model + if node.agent.provider_name: + node_kwargs["provider_name"] = node.agent.provider_name + return self.make_provider_bundle_for_task(self.loaded, node_kwargs) + + return factory diff --git a/app-instance/backend/beaver/tasks/evidence.py b/app-instance/backend/beaver/tasks/evidence.py index 02ccb20..b328434 100644 --- a/app-instance/backend/beaver/tasks/evidence.py +++ b/app-instance/backend/beaver/tasks/evidence.py @@ -2,6 +2,8 @@ from __future__ import annotations +import json +import re from dataclasses import dataclass, field from typing import Any @@ -126,6 +128,37 @@ class EvidenceBuilder: ) +def evaluate_node_evidence( + evidence: RunEvidence, + required_evidence: list[str], + output_text: str, +) -> list[str]: + """Evaluate v1 coarse-grained node evidence requirements.""" + + gaps: list[str] = [] + successful_tools = [ + item + for item in evidence.tool_results + if item.event_payload.get("success") is True + ] + for raw_requirement in required_evidence: + requirement = str(raw_requirement).strip() + if not requirement: + continue + if requirement == "tool_result": + if not successful_tools: + _append_unique(gaps, "missing required evidence: tool_result") + elif requirement == "url": + if not any(_tool_evidence_contains_url(item) for item in successful_tools): + _append_unique(gaps, "missing required evidence: url") + elif requirement == "output": + if not output_text.strip(): + _append_unique(gaps, "missing required evidence: output") + else: + _append_unique(gaps, f"unsupported evidence requirement: {requirement}") + return gaps + + def render_task_evidence(packet: TaskEvidencePacket) -> str: sections = [ f"Task evidence packet: task_id={packet.task_id} attempt={packet.attempt_index}", @@ -181,3 +214,20 @@ def _render_tool_evidence(item: ToolEvidence) -> str: def _optional_str(value: Any) -> str | None: return str(value) if value is not None else None + + +_URL_RE = re.compile(r"https?://[^\s<>'\"]+", re.IGNORECASE) + + +def _tool_evidence_contains_url(item: ToolEvidence) -> bool: + values = [ + item.url or "", + item.content, + json.dumps(item.event_payload, ensure_ascii=False, default=str), + ] + return any(_URL_RE.search(value) is not None for value in values) + + +def _append_unique(values: list[str], value: str) -> None: + if value not in values: + values.append(value) diff --git a/app-instance/backend/beaver/tasks/planner.py b/app-instance/backend/beaver/tasks/planner.py index ec23ae3..7d76d2d 100644 --- a/app-instance/backend/beaver/tasks/planner.py +++ b/app-instance/backend/beaver/tasks/planner.py @@ -4,11 +4,14 @@ from __future__ import annotations import asyncio import json +import os from dataclasses import dataclass, field from typing import Any, Literal from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode +from beaver.engine.context import SkillContext from beaver.engine.providers import ProviderBundle +from beaver.tools.registry import ToolRegistry from .models import TaskRecord from .skill_resolver import SkillResolutionReport, TaskSkillResolver @@ -17,6 +20,24 @@ from .skill_resolver import SkillResolutionReport, TaskSkillResolver TaskExecutionMode = Literal["single", "team"] +# Temporary name-based denylist until high-risk tool approval is implemented. +# Keep this policy centralized so planner behavior cannot drift by call site. +HIGH_RISK_PLANNER_TOOL_NAMES = frozenset( + { + "delete_file", + "execute_command", + "external_send", + "send_email", + "terminal", + "write_file", + } +) + + +def _agent_team_enabled() -> bool: + return os.getenv("BEAVER_AGENT_TEAM_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"} + + @dataclass(slots=True) class TaskExecutionPlan: mode: TaskExecutionMode @@ -25,14 +46,26 @@ class TaskExecutionPlan: final_synthesis_instruction: str = "" fallback_error: str | None = None skill_resolution_report: list[SkillResolutionReport] = field(default_factory=list) + planner_adaptation: dict[str, Any] = field(default_factory=dict) @property def is_team(self) -> bool: return self.mode == "team" and self.graph is not None @classmethod - def single(cls, reason: str, *, fallback_error: str | None = None) -> "TaskExecutionPlan": - return cls(mode="single", reason=reason, fallback_error=fallback_error) + def single( + cls, + reason: str, + *, + fallback_error: str | None = None, + planner_adaptation: dict[str, Any] | None = None, + ) -> "TaskExecutionPlan": + return cls( + mode="single", + reason=reason, + fallback_error=fallback_error, + planner_adaptation=dict(planner_adaptation or {}), + ) def to_event_payload(self) -> dict[str, Any]: strategy = self.graph.strategy if self.graph is not None else None @@ -57,6 +90,7 @@ class TaskExecutionPlan: if item.ephemeral_guidance_id ], "skill_resolution_report": [item.to_dict() for item in self.skill_resolution_report], + "planner_adaptation": dict(self.planner_adaptation), "fallback_error": self.fallback_error, } @@ -65,10 +99,34 @@ class TaskExecutionPlanner: """Plan whether a Task attempt should run through a team first.""" _MAX_NODES = 6 + _MAX_DEPTH = 4 _SUPPORTED_STRATEGIES = {"sequence", "parallel", "dag"} + _ALLOWED_NODE_FIELDS = { + "node_id", + "task", + "use_skill", + "skill_query", + "depends_on", + "input_contract", + "output_contract", + "requested_tools", + "required_evidence", + "evidence_contract", + "validation_rules", + "required_for_completion", + "block_downstream_on_partial", + "max_tool_iterations", + "constraints", + } - def __init__(self, *, task_skill_resolver: TaskSkillResolver | None = None) -> None: + def __init__( + self, + *, + task_skill_resolver: TaskSkillResolver | None = None, + tool_registry: ToolRegistry | None = None, + ) -> None: self.task_skill_resolver = task_skill_resolver + self.tool_registry = tool_registry async def plan( self, @@ -78,7 +136,15 @@ class TaskExecutionPlanner: attempt_index: int, provider_bundle: ProviderBundle | None = None, timeout_seconds: float = 30.0, + skill_summaries: list[str] | None = None, + tool_hints: list[str] | None = None, + activated_skills: list[SkillContext] | None = None, ) -> TaskExecutionPlan: + if not _agent_team_enabled(): + return TaskExecutionPlan.single("planner_disabled_by_environment") + if not self._needs_team_planning(task=task, user_message=user_message): + return TaskExecutionPlan.single("planner_skipped_simple_task") + provider = None model = None if provider_bundle is not None: @@ -87,6 +153,7 @@ class TaskExecutionPlanner: model = getattr(runtime, "model", None) if provider is None: return TaskExecutionPlan.single("planner_provider_unavailable") + selected_template, base_adaptation = self._select_team_template(activated_skills or []) try: response = await asyncio.wait_for( provider.chat( @@ -104,6 +171,10 @@ class TaskExecutionPlanner: task=task, user_message=user_message, attempt_index=attempt_index, + skill_summaries=skill_summaries or [], + tool_hints=tool_hints or [], + activated_skills=activated_skills or [], + selected_template=selected_template, ), }, ], @@ -114,7 +185,40 @@ class TaskExecutionPlanner: ), timeout=timeout_seconds, ) - plan = self.from_json(response.content or "") + try: + plan = self._from_json_or_raise(response.content or "") + except Exception as first_error: + repair_response = await asyncio.wait_for( + provider.chat( + messages=[ + { + "role": "system", + "content": "Repair invalid Beaver task planner JSON. Return only one compact JSON object.", + }, + { + "role": "user", + "content": ( + "Repair the invalid planner JSON using the task-only schema from the original " + f"request. Validation error: {first_error}\nInvalid output:\n{response.content or ''}" + ), + }, + ], + tools=None, + model=model, + max_tokens=4096, + temperature=0.0, + ), + timeout=timeout_seconds, + ) + try: + plan = self._from_json_or_raise(repair_response.content or "") + except Exception as repair_error: + return TaskExecutionPlan.single( + "planner_fallback_single", + fallback_error=f"initial validation: {first_error}; repair validation: {repair_error}", + planner_adaptation=base_adaptation, + ) + self._merge_adaptation(plan, base_adaptation) return await self._resolve_plan( plan, task=task, @@ -152,30 +256,90 @@ class TaskExecutionPlanner: graph.validate() plan.graph = graph plan.skill_resolution_report = reports + self._merge_skill_resolution_adaptation(plan, reports) return plan except Exception as exc: return TaskExecutionPlan.single("planner_fallback_single", fallback_error=f"task_skill_resolver_failed: {exc}") + @staticmethod + def _needs_team_planning(*, task: TaskRecord, user_message: str) -> bool: + text = " ".join( + part + for part in ( + task.goal, + task.description, + user_message, + ) + if part + ).lower() + if not text.strip(): + return False + + complex_markers = ( + "agent team", + "sub-agent", + "multi-agent", + "parallel", + "dag", + "workflow", + "review", + "research", + "compare", + "comparison", + "architecture", + "refactor", + "multi-file", + "end-to-end", + "并行", + "团队", + "多智能体", + "子代理", + "工作流", + "评审", + "审查", + "调研", + "研究", + "对比", + "架构", + "重构", + "多文件", + "端到端", + ) + return any(marker in text for marker in complex_markers) + def from_json(self, text: str) -> TaskExecutionPlan: try: - payload = self._parse_json_object(text) - mode = str(payload.get("mode") or "single").strip().lower() - reason = str(payload.get("reason") or "") - if mode != "team": - return TaskExecutionPlan.single(reason or "planner_selected_single") - - graph = self._graph_from_payload(payload) - graph.validate() - return TaskExecutionPlan( - mode="team", - reason=reason or "planner_selected_team", - graph=graph, - final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""), - ) + return self._from_json_or_raise(text) except Exception as exc: return TaskExecutionPlan.single("planner_fallback_single", fallback_error=str(exc)) - def _graph_from_payload(self, payload: dict[str, Any]) -> ExecutionGraph: + def _from_json_or_raise(self, text: str) -> TaskExecutionPlan: + payload = self._parse_json_object(text) + mode = str(payload.get("mode") or "single").strip().lower() + reason = str(payload.get("reason") or "") + adaptation = self._adaptation_from_payload(payload) + if mode != "team": + return TaskExecutionPlan.single( + reason or "planner_selected_single", + planner_adaptation=adaptation, + ) + + graph = self._graph_from_payload(payload, adaptation=adaptation) + graph.validate(max_depth=self._MAX_DEPTH) + return TaskExecutionPlan( + mode="team", + reason=reason or "planner_selected_team", + graph=graph, + final_synthesis_instruction=str(payload.get("final_synthesis_instruction") or ""), + planner_adaptation=adaptation, + ) + + def _graph_from_payload( + self, + payload: dict[str, Any], + *, + adaptation: dict[str, Any], + ) -> ExecutionGraph: strategy = str(payload.get("strategy") or "sequence").strip().lower() if strategy not in self._SUPPORTED_STRATEGIES: raise ValueError(f"Unsupported team strategy: {strategy}") @@ -189,16 +353,27 @@ class TaskExecutionPlanner: for index, item in enumerate(raw_nodes, start=1): if not isinstance(item, dict): raise ValueError("Each team node must be an object") - agent_payload = item.get("agent") if isinstance(item.get("agent"), dict) else {} - skill_query = str(item.get("skill_query") or agent_payload.get("skill_query") or item.get("task") or "").strip() - requested_capabilities = _string_list( - item.get("required_capabilities") or item.get("capabilities") or agent_payload.get("capabilities") - ) - requested_tags = _string_list(item.get("tags") or agent_payload.get("tags")) - node_id = str(item.get("node_id") or item.get("id") or agent_payload.get("name") or f"node_{index}").strip() + unsupported = sorted(set(item) - self._ALLOWED_NODE_FIELDS) + if unsupported: + raise ValueError(f"Unsupported team node field(s): {', '.join(unsupported)}") + node_id = str(item.get("node_id") or f"node_{index}").strip() task = str(item.get("task") or "").strip() if not node_id or not task: - raise ValueError("Each team node requires node_id/id and task") + raise ValueError("Each team node requires node_id and task") + allowed_tool_names = self._resolve_requested_tools( + item.get("requested_tools"), + warnings=adaptation["warnings"], + ) + use_skill = _optional_str(item.get("use_skill")) + skill_query = _optional_str(item.get("skill_query")) or task + if use_skill is not None or "skill_query" in item: + adaptation.setdefault("node_skill_bindings", []).append( + { + "node_id": node_id, + "use_skill": use_skill, + "skill_query": skill_query, + } + ) nodes.append( ExecutionNode( node_id=node_id, @@ -208,30 +383,147 @@ class TaskExecutionPlanner: role="", system_prompt="", metadata={ + "use_skill": use_skill, "skill_query": skill_query, - "required_capabilities": requested_capabilities, - "requested_tags": requested_tags, + "required_capabilities": [], + "requested_tags": [], "sub_agent_kind": "generic_skill_worker", }, ), depends_on=[str(dep) for dep in item.get("depends_on") or []], - inherited_pinned_skills=[str(name) for name in item.get("pinned_skills") or []], constraints=[str(value) for value in item.get("constraints") or []], - expected_output=str(item.get("expected_output") or "") or None, + input_contract=_dict_value(item.get("input_contract")), + output_contract=_dict_value(item.get("output_contract")), + allowed_tool_names=allowed_tool_names, + required_evidence=_string_list(item.get("required_evidence")), + evidence_contract=_dict_value(item.get("evidence_contract")), + validation_rules=_string_list(item.get("validation_rules")), + required_for_completion=bool(item.get("required_for_completion", True)), + block_downstream_on_partial=bool(item.get("block_downstream_on_partial", False)), + max_tool_iterations=_optional_int(item.get("max_tool_iterations")), ) ) return ExecutionGraph(strategy=strategy, nodes=nodes) # type: ignore[arg-type] + def _resolve_requested_tools(self, value: Any, *, warnings: list[str]) -> list[str] | None: + if value is None: + return None + result: list[str] = [] + for name in _string_list(value): + if name.lower() in HIGH_RISK_PLANNER_TOOL_NAMES: + _append_unique(warnings, f"requires_high_risk_review: {name}") + continue + if self.tool_registry is None or self.tool_registry.get(name) is None: + _append_unique(warnings, f"unknown tool removed: {name}") + continue + result.append(name) + return result + + @staticmethod + def _adaptation_from_payload(payload: dict[str, Any]) -> dict[str, Any]: + raw = payload.get("adaptation") + adaptation = dict(raw) if isinstance(raw, dict) else {} + adaptation["warnings"] = _string_list(adaptation.get("warnings")) + return adaptation + + @staticmethod + def _select_team_template( + activated_skills: list[SkillContext], + ) -> tuple[SkillContext | None, dict[str, Any]]: + candidates = [ + skill + for skill in activated_skills + if isinstance(skill.team_template, dict) and isinstance(skill.team_template.get("nodes"), list) + ] + selected = candidates[0] if candidates else None + warnings: list[str] = [] + for skill in activated_skills: + for warning in skill.team_template_warnings: + _append_unique(warnings, f"{skill.name}: {warning}") + return selected, { + "template_used": False, + "selected_template": selected.name if selected else None, + "selection_reason": ( + "first activated skill with a valid team template" + if selected + else "no activated skill has a valid team template" + ), + "ignored_templates": [skill.name for skill in candidates[1:]], + "warnings": warnings, + } + + @staticmethod + def _merge_adaptation(plan: TaskExecutionPlan, base: dict[str, Any]) -> None: + payload = dict(plan.planner_adaptation) + warnings: list[str] = [] + for warning in [*base.get("warnings", []), *payload.get("warnings", [])]: + _append_unique(warnings, str(warning)) + merged = { + "template_used": bool(payload.get("template_used", False)), + "selected_template": base.get("selected_template"), + "selection_reason": base.get("selection_reason"), + "ignored_templates": list(base.get("ignored_templates", [])), + "warnings": warnings, + } + if isinstance(payload.get("node_skill_bindings"), list): + merged["node_skill_bindings"] = [dict(item) for item in payload["node_skill_bindings"] if isinstance(item, dict)] + plan.planner_adaptation = merged + + @staticmethod + def _merge_skill_resolution_adaptation( + plan: TaskExecutionPlan, + reports: list[SkillResolutionReport], + ) -> None: + warnings = plan.planner_adaptation.setdefault("warnings", []) + bindings = plan.planner_adaptation.get("node_skill_bindings") + binding_by_node = { + str(item.get("node_id")): item + for item in bindings or [] + if isinstance(item, dict) + } + for report in reports: + for warning in report.warnings: + _append_unique(warnings, warning) + binding = binding_by_node.get(report.node_id) + if binding is not None and report.requested_skill_name and not report.exact_binding_used: + binding["fallback_reason"] = f"use_skill unresolved; {report.reason}" + @staticmethod def _prompt( *, task: TaskRecord, user_message: str, attempt_index: int, + skill_summaries: list[str] | None = None, + tool_hints: list[str] | None = None, + activated_skills: list[SkillContext] | None = None, + selected_template: SkillContext | None = None, ) -> str: history_note = "" if task.feedback: history_note = "\nRelevant task history:\n" + json.dumps(task.feedback[-5:], ensure_ascii=False) + skill_note = "" + if skill_summaries: + skill_note = "\nActivated skill summaries:\n" + "\n".join(f"- {item}" for item in skill_summaries) + guidance_note = "" + if activated_skills: + guidance_note = "\nActivated Skill guidance:\n" + "\n".join( + f"[{skill.name}]\n{skill.content}" for skill in activated_skills + ) + template_note = "" + if selected_template is not None: + template_note = "\nPrimary Skill team template:\n" + json.dumps( + { + "skill_name": selected_template.name, + "skill_version": selected_template.version, + "template": selected_template.team_template, + }, + ensure_ascii=False, + indent=2, + ) + tool_note = "" + if tool_hints: + tool_note = "\nActivated skill tool hints:\n" + "\n".join(f"- {item}" for item in tool_hints) return ( "Decide execution mode for this internal Task attempt.\n" "Use mode=team only when independent research, review, implementation slices, or staged checks " @@ -241,13 +533,24 @@ class TaskExecutionPlanner: ' "mode": "single" | "team",\n' ' "reason": "short reason",\n' ' "strategy": "sequence" | "parallel" | "dag",\n' - ' "nodes": [{"node_id": "api_review", "task": "...", "skill_query": "API contract review", ' - '"required_capabilities": ["schema compatibility"], "depends_on": []}],\n' + ' "nodes": [{"node_id": "collect", "task": "...", "use_skill": "optional exact skill", ' + '"skill_query": "optional dynamic skill query", "depends_on": [], ' + '"input_contract": {}, "output_contract": {}, "requested_tools": [], ' + '"required_evidence": [], "evidence_contract": {}, "validation_rules": [], ' + '"required_for_completion": true, "block_downstream_on_partial": false, ' + '"max_tool_iterations": 3, "constraints": []}],\n' + ' "adaptation": {"template_used": true, "warnings": []},\n' ' "final_synthesis_instruction": "how the main agent should synthesize team output"\n' "}\n\n" + "Node definitions are task-only. Never output agent or role fields. Use at most one primary " + "Skill template; treat all other activated Skills as guidance.\n\n" f"Task goal:\n{task.goal}\n\n" f"Current user request:\n{user_message}\n\n" f"Attempt index: {attempt_index}\n" + f"{skill_note}" + f"{guidance_note}" + f"{template_note}" + f"{tool_note}" f"{history_note}" ) @@ -275,6 +578,26 @@ def _optional_str(value: Any) -> str | None: return text or None +def _optional_int(value: Any) -> int | None: + if value in (None, ""): + return None + if isinstance(value, bool): + raise ValueError("max_tool_iterations must be an integer") + result = int(value) + if result < 0: + raise ValueError("max_tool_iterations must be non-negative") + return result + + +def _dict_value(value: Any) -> dict[str, Any]: + return dict(value) if isinstance(value, dict) else {} + + +def _append_unique(values: list[str], value: str) -> None: + if value and value not in values: + values.append(value) + + def _string_list(value: Any) -> list[str]: if not isinstance(value, list): if isinstance(value, str): diff --git a/app-instance/backend/beaver/tasks/router.py b/app-instance/backend/beaver/tasks/router.py index ff7ae7a..ae9b874 100644 --- a/app-instance/backend/beaver/tasks/router.py +++ b/app-instance/backend/beaver/tasks/router.py @@ -4,6 +4,7 @@ from __future__ import annotations import asyncio import json +import re from typing import Any from .models import MainAgentDecision, TaskRecord @@ -24,6 +25,15 @@ class MainAgentRouter: thinking_enabled: bool | None = None, timeout_seconds: float = 8.0, ) -> MainAgentDecision: + if active_task is None and _is_obvious_simple_chat(message): + return MainAgentDecision(mode="simple", reason="obvious_simple_chat", action="simple_chat") + if active_task is None and _is_obvious_task_request(message): + return MainAgentDecision( + mode="task", + reason="obvious_task", + starts_new_task=True, + action="create_task", + ) if provider is None: return self._apply_active_task_boundary( self._fallback(active_task=active_task, reason="router_provider_unavailable"), @@ -246,6 +256,64 @@ def _clean_short_title(value: Any) -> str | None: return title[:40] or None +def _is_obvious_simple_chat(message: str) -> bool: + text = _compact_text(message).lower().strip("!!??。.,,~~") + if not text: + return False + if _has_url_or_path(text) or _looks_like_fresh_task_request(text): + return False + if len(text) <= 24 and text in { + "hi", + "hello", + "hey", + "thanks", + "thankyou", + "thankyou!", + "谢谢", + "谢了", + "多谢", + "你好", + "您好", + "嗨", + "在吗", + "早上好", + "下午好", + "晚上好", + "辛苦了", + }: + return True + simple_prefixes = ( + "翻译", + "translate", + "润色", + "改写", + "校对", + "总结下面", + "总结这段", + "摘要下面", + "summarize this", + ) + return len(text) <= 1200 and text.startswith(simple_prefixes) + + +def _is_obvious_task_request(message: str) -> bool: + text = _compact_text(message) + if not text: + return False + if _looks_like_explicit_task_followup(text): + return False + if _has_url_or_path(text): + return True + return _looks_like_fresh_task_request(text) + + +def _has_url_or_path(text: str) -> bool: + return bool( + re.search(r"https?://|www\.", text) + or re.search(r"(^|[\s'\"`])(?:[./~]|[a-zA-Z]:[\\/])[^\s'\"`]+", text) + ) + + def _looks_like_explicit_task_followup(message: str) -> bool: text = _compact_text(message) if not text: @@ -307,6 +375,16 @@ def _looks_like_fresh_task_request(message: str) -> bool: "看看最新", "最新", "今天", + "昨天", + "昨日", + "昨晚", + "刚刚", + "最近", + "近期", + "本届", + "本场", + "这场", + "上一场", "明天", "上传", "下载", @@ -324,6 +402,12 @@ def _looks_like_fresh_task_request(message: str) -> bool: "look up", "latest", "today", + "yesterday", + "last night", + "recent", + "recently", + "this match", + "this game", "tomorrow", "upload", "download", diff --git a/app-instance/backend/beaver/tasks/skill_resolver.py b/app-instance/backend/beaver/tasks/skill_resolver.py index 8038998..9b65a78 100644 --- a/app-instance/backend/beaver/tasks/skill_resolver.py +++ b/app-instance/backend/beaver/tasks/skill_resolver.py @@ -7,9 +7,11 @@ from dataclasses import dataclass, field, replace from typing import Any from beaver.coordinator.models import AgentDescriptor, ExecutionGraph, ExecutionNode +from beaver.engine.context import SkillContext from beaver.engine.providers import ProviderBundle from beaver.skills.assembler.embedding_retriever import SkillEmbeddingRetriever from beaver.skills.catalog.loader import SkillsLoader +from beaver.skills.catalog.utils import strip_frontmatter from beaver.skills.drafts import DraftService from beaver.skills.learning import EphemeralGuidanceSynthesizer from beaver.tasks.models import TaskRecord @@ -24,6 +26,9 @@ class SkillResolutionReport: ephemeral_guidance_id: str | None = None ephemeral_guidance_name: str | None = None ephemeral_used: bool = False + requested_skill_name: str | None = None + exact_binding_used: bool = False + warnings: list[str] = field(default_factory=list) reason: str = "" def to_dict(self) -> dict[str, Any]: @@ -35,6 +40,9 @@ class SkillResolutionReport: "ephemeral_guidance_id": self.ephemeral_guidance_id, "ephemeral_guidance_name": self.ephemeral_guidance_name, "ephemeral_used": self.ephemeral_used, + "requested_skill_name": self.requested_skill_name, + "exact_binding_used": self.exact_binding_used, + "warnings": list(self.warnings), "reason": self.reason, } @@ -87,12 +95,45 @@ class TaskSkillResolver: attempt_index: int, provider_bundle: ProviderBundle, ) -> tuple[ExecutionNode, SkillResolutionReport]: + use_skill = str(node.agent.metadata.get("use_skill") or "").strip() skill_query = str(node.agent.metadata.get("skill_query") or node.task or node.node_id).strip() + warnings: list[str] = [] required_capabilities = [ str(item).strip() for item in node.agent.metadata.get("required_capabilities", []) if str(item).strip() ] + if use_skill: + exact_context = self._load_exact_skill_context(use_skill) + if exact_context is not None: + resolved = self._generic_node( + node, + pinned_skill_names=_merge_names(node.inherited_pinned_skills, [use_skill]), + pinned_skill_contexts=_merge_skill_contexts( + node.inherited_pinned_skill_contexts, + [exact_context], + ), + metadata={ + **node.agent.metadata, + "use_skill": use_skill, + "skill_query": skill_query, + "required_capabilities": required_capabilities, + "selected_skill_names": [use_skill], + "ephemeral_skill_names": [], + "exact_binding_used": True, + }, + ) + return resolved, SkillResolutionReport( + node_id=node.node_id, + skill_query=skill_query, + required_capabilities=required_capabilities, + selected_skill_names=[use_skill], + requested_skill_name=use_skill, + exact_binding_used=True, + reason="exact use_skill binding", + ) + warnings.append(f"use_skill unresolved: {use_skill}") + if self._is_summary_only_node(node, skill_query=skill_query, required_capabilities=required_capabilities): resolved = self._generic_node( node, @@ -104,6 +145,7 @@ class TaskSkillResolver: "required_capabilities": required_capabilities, "selected_skill_names": [], "ephemeral_skill_names": [], + "exact_binding_used": False, "summary_uses_dependency_outputs_only": True, }, ) @@ -113,6 +155,9 @@ class TaskSkillResolver: required_capabilities=required_capabilities, selected_skill_names=[], ephemeral_used=False, + requested_skill_name=use_skill or None, + exact_binding_used=False, + warnings=warnings, reason="summary node uses dependency outputs directly", ) @@ -141,6 +186,7 @@ class TaskSkillResolver: "required_capabilities": required_capabilities, "selected_skill_names": selected, "ephemeral_skill_names": [], + "exact_binding_used": False, }, ) return resolved, SkillResolutionReport( @@ -149,6 +195,9 @@ class TaskSkillResolver: required_capabilities=required_capabilities, selected_skill_names=selected, ephemeral_used=False, + requested_skill_name=use_skill or None, + exact_binding_used=False, + warnings=warnings, reason="matched published skill", ) @@ -174,6 +223,7 @@ class TaskSkillResolver: "ephemeral_guidance_id": missing.guidance_id, "ephemeral_guidance_name": missing.guidance_name, "ephemeral_skill_names": [missing.skill_context.name], + "exact_binding_used": False, }, ) return resolved, SkillResolutionReport( @@ -183,9 +233,27 @@ class TaskSkillResolver: ephemeral_guidance_id=missing.guidance_id, ephemeral_guidance_name=missing.guidance_name, ephemeral_used=True, + requested_skill_name=use_skill or None, + exact_binding_used=False, + warnings=warnings, reason="generated ephemeral guidance for missing sub-agent capability", ) + def _load_exact_skill_context(self, name: str) -> SkillContext | None: + record = self.skills_loader.get_skill_record(name) + raw_content = self.skills_loader.load_published_skill(name) + content = strip_frontmatter(raw_content).strip() if raw_content else "" + if record is None or not content: + return None + return SkillContext( + name=name, + content=content, + version=record.version, + content_hash=record.content_hash or "", + activation_reason="explicit_node_binding", + tool_hints=list(record.tool_hints), + ) + async def _select_published_skills(self, *, query: str, provider_bundle: ProviderBundle) -> list[str]: candidates = self.skills_loader.build_selection_candidates() if not candidates: @@ -336,3 +404,14 @@ def _merge_names(parent: list[str], selected: list[str]) -> list[str]: if name and name not in result: result.append(name) return result + + +def _merge_skill_contexts(parent: list[SkillContext], selected: list[SkillContext]) -> list[SkillContext]: + result: list[SkillContext] = [] + seen: set[str] = set() + for context in [*parent, *selected]: + if context.name in seen: + continue + seen.add(context.name) + result.append(context) + return result diff --git a/app-instance/backend/beaver/tools/builtins/web.py b/app-instance/backend/beaver/tools/builtins/web.py index 90e55b3..ae55d88 100644 --- a/app-instance/backend/beaver/tools/builtins/web.py +++ b/app-instance/backend/beaver/tools/builtins/web.py @@ -5,10 +5,11 @@ from __future__ import annotations import asyncio from dataclasses import dataclass, field from html import unescape +from html.parser import HTMLParser import json import re from typing import Any -from urllib.parse import quote_plus, urlparse +from urllib.parse import quote_plus, urljoin, urlparse import httpx @@ -24,6 +25,10 @@ def _strip_html(value: str) -> str: return re.sub(r"\s+", " ", text).strip() +def _compact_text(value: str) -> str: + return re.sub(r"\s+", " ", unescape(value)).strip() + + def _safe_url(url: str) -> str: parsed = urlparse(url) if parsed.scheme not in {"http", "https"} or not parsed.netloc: @@ -31,6 +36,77 @@ def _safe_url(url: str) -> str: return url +class _HtmlMetadataParser(HTMLParser): + def __init__(self, base_url: str) -> None: + super().__init__(convert_charrefs=True) + self.base_url = base_url + self.title = "" + self.links: list[dict[str, str]] = [] + self._in_title = False + self._current_href: str | None = None + self._current_text: list[str] = [] + self._skip_depth = 0 + self._seen_urls: set[str] = set() + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + lowered = tag.lower() + if lowered in {"script", "style"}: + self._skip_depth += 1 + return + if self._skip_depth: + return + if lowered == "title": + self._in_title = True + return + if lowered == "a": + href = dict(attrs).get("href") + if href: + self._current_href = urljoin(self.base_url, href) + self._current_text = [] + + def handle_endtag(self, tag: str) -> None: + lowered = tag.lower() + if lowered in {"script", "style"} and self._skip_depth: + self._skip_depth -= 1 + return + if self._skip_depth: + return + if lowered == "title": + self._in_title = False + self.title = _compact_text(self.title) + return + if lowered == "a" and self._current_href: + parsed = urlparse(self._current_href) + if parsed.scheme in {"http", "https"} and self._current_href not in self._seen_urls: + text = _compact_text(" ".join(self._current_text)) + self.links.append({"text": text, "url": self._current_href}) + self._seen_urls.add(self._current_href) + self._current_href = None + self._current_text = [] + + def handle_data(self, data: str) -> None: + if self._skip_depth: + return + if self._in_title: + self.title += data + if self._current_href: + self._current_text.append(data) + + +def _extract_html_metadata(html: str, base_url: str, *, max_links: int = 80) -> dict[str, Any]: + parser = _HtmlMetadataParser(base_url) + parser.feed(html) + links = parser.links[:max_links] + pdf_links = [ + link for link in links if urlparse(link["url"]).path.lower().endswith(".pdf") + ][:30] + return { + "title": parser.title, + "links": links, + "pdf_links": pdf_links, + } + + @dataclass(slots=True) class WebFetchTool: name: str = "web_fetch" @@ -61,13 +137,20 @@ class WebFetchTool: response.raise_for_status() content_type = response.headers.get("content-type", "") raw = response.text - text = _strip_html(raw) if "html" in content_type.lower() else raw + is_html = "html" in content_type.lower() + text = _strip_html(raw) if is_html else raw + metadata = _extract_html_metadata(raw, str(response.url)) if is_html else { + "title": "", + "links": [], + "pdf_links": [], + } truncated = len(text) > limit return _json_result( True, url=str(response.url), status_code=response.status_code, content_type=content_type, + **metadata, content=text[:limit], truncated=truncated, ) @@ -97,6 +180,15 @@ class WebSearchTool: if not str(query).strip(): raise ValueError("query is required") bounded = max(1, min(int(limit or 5), 10)) + errors: list[str] = [] + try: + ddgs_results = await asyncio.to_thread(_search_ddgs, query, bounded) + except Exception as exc: + ddgs_results = [] + errors.append(str(exc)) + if ddgs_results: + return _json_result(True, **_search_result_payload(query, "ddgs", ddgs_results)) + headers = {"User-Agent": "Mozilla/5.0 Beaver/1.0"} timeout = httpx.Timeout(connect=5, read=8, write=5, pool=5) async with httpx.AsyncClient(timeout=timeout, follow_redirects=True, trust_env=True) as client: @@ -118,7 +210,6 @@ class WebSearchTool: ) ), ] - errors: list[str] = [] try: for completed in asyncio.as_completed(tasks): try: @@ -127,7 +218,7 @@ class WebSearchTool: errors.append(str(exc)) continue if results: - return _json_result(True, query=query, engine=engine, results=results) + return _json_result(True, **_search_result_payload(query, engine, results)) detail = "; ".join(error for error in errors if error) or "no search results" return _json_result(False, query=query, error=detail) finally: @@ -182,6 +273,62 @@ def _parse_bing_results(html: str, limit: int) -> list[dict[str, str]]: return results +def _search_ddgs(query: str, limit: int) -> list[dict[str, str]]: + from ddgs import DDGS # type: ignore[import-not-found] + + rows = DDGS().text(query, max_results=limit) + results: list[dict[str, str]] = [] + for row in rows or []: + title = _compact_text(str(row.get("title") or "")) + result_url = str(row.get("href") or row.get("url") or "").strip() + snippet = _compact_text(str(row.get("body") or row.get("snippet") or "")) + if title and result_url: + results.append({"title": title, "url": result_url, "snippet": snippet}) + if len(results) >= limit: + break + return results + + +def _search_result_payload(query: str, engine: str, results: list[dict[str, str]]) -> dict[str, Any]: + quality, reason = _assess_search_quality(query, results) + payload: dict[str, Any] = { + "query": query, + "engine": engine, + "quality": quality, + "results": results, + } + if reason: + payload["low_relevance_reason"] = reason + return payload + + +def _search_terms(value: str) -> set[str]: + return { + term + for term in re.findall(r"[a-z0-9]+", value.lower()) + if len(term) > 2 + } + + +def _assess_search_quality(query: str, results: list[dict[str, str]]) -> tuple[str, str | None]: + terms = _search_terms(query) + if not terms: + return "high", None + required_overlap = min(2, len(terms)) + for result in results: + haystack = " ".join( + [ + result.get("title", ""), + result.get("snippet", ""), + urlparse(result.get("url", "")).netloc, + urlparse(result.get("url", "")).path, + ] + ) + if len(terms & _search_terms(haystack)) >= required_overlap: + return "high", None + return "low", "results do not overlap enough with query terms" + + def _parse_duckduckgo_results(html: str, limit: int) -> list[dict[str, str]]: results: list[dict[str, str]] = [] pattern = re.compile( diff --git a/app-instance/backend/beaver/tools/runtime/executor.py b/app-instance/backend/beaver/tools/runtime/executor.py index 2d842a3..b293911 100644 --- a/app-instance/backend/beaver/tools/runtime/executor.py +++ b/app-instance/backend/beaver/tools/runtime/executor.py @@ -37,6 +37,14 @@ class ToolExecutor: ) -> ToolResult: """按工具名执行一次调用。""" + allowed = context.metadata.get("allowed_tool_names") if context is not None else None + if isinstance(allowed, list) and tool_name not in allowed: + return ToolResult( + success=False, + content=f"Tool {tool_name} is not allowed for this node.", + tool_name=tool_name, + error="tool_not_allowed", + ) tool = self.registry.get(tool_name) if tool is None: return ToolResult( diff --git a/app-instance/backend/pyproject.toml b/app-instance/backend/pyproject.toml index d16c3da..4396e15 100644 --- a/app-instance/backend/pyproject.toml +++ b/app-instance/backend/pyproject.toml @@ -6,6 +6,7 @@ requires-python = ">=3.11" dependencies = [ "anthropic>=0.51.0,<1.0.0", "croniter>=6.0.0,<7.0.0", + "ddgs>=9.0.0,<10.0.0", "fastmcp>=3.0.0,<4.0.0", "fastapi>=0.115.0,<1.0.0", "httpx>=0.28.0,<1.0.0", diff --git a/app-instance/backend/tests/unit/test_agent_loop.py b/app-instance/backend/tests/unit/test_agent_loop.py index ab48736..6c8d8d4 100644 --- a/app-instance/backend/tests/unit/test_agent_loop.py +++ b/app-instance/backend/tests/unit/test_agent_loop.py @@ -1,8 +1,10 @@ import asyncio +import json from contextlib import suppress from typing import Any from beaver.engine import AgentLoop, AgentRunResult, EngineLoader +from beaver.engine import loop as loop_module def _run_result(run_id: str, output_text: str) -> AgentRunResult: @@ -45,3 +47,37 @@ def test_running_loop_handles_reentrant_submit_direct(tmp_path) -> None: assert calls == ["outer", "inner"] asyncio.run(run_case()) + + +def test_web_search_loop_guard_stops_after_repeated_low_quality_results() -> None: + guard = loop_module._WebSearchLoopGuard() + low_quality = json.dumps( + { + "success": True, + "query": "weather beijing", + "quality": "low", + "results": [{"title": "Example", "url": "https://example.com", "snippet": ""}], + } + ) + + assert guard.observe_result("web_search", low_quality) is None + assert guard.observe_result("web_search", low_quality) is None + + guidance = guard.observe_result("web_search", low_quality) + + assert guidance is not None + assert guidance["finish_reason"] == "web_search_low_quality_budget" + assert "weather beijing" in guidance["message"] + + +def test_web_search_loop_guard_resets_after_useful_result() -> None: + guard = loop_module._WebSearchLoopGuard() + low_quality = json.dumps({"success": True, "query": "weather", "quality": "low", "results": []}) + useful = json.dumps({"success": True, "query": "weather", "quality": "high", "results": []}) + + assert guard.observe_result("web_search", low_quality) is None + assert guard.observe_result("web_search", useful) is None + assert guard.observe_result("web_search", low_quality) is None + assert guard.observe_result("web_search", low_quality) is None + + assert guard.observe_result("web_search", low_quality) is not None diff --git a/app-instance/backend/tests/unit/test_agent_loop_replay_executor.py b/app-instance/backend/tests/unit/test_agent_loop_replay_executor.py index a171e8e..d404d40 100644 --- a/app-instance/backend/tests/unit/test_agent_loop_replay_executor.py +++ b/app-instance/backend/tests/unit/test_agent_loop_replay_executor.py @@ -1,7 +1,9 @@ from __future__ import annotations +import asyncio from pathlib import Path from types import SimpleNamespace +from typing import Any import pytest @@ -44,6 +46,49 @@ class ToolCallingProvider(LLMProvider): return "stub" +class ParallelToolProvider(LLMProvider): + def __init__(self) -> None: + super().__init__() + self.calls = 0 + + async def chat( + self, + messages: list[dict], + tools: list[dict] | None = None, + model: str | None = None, + max_tokens: int | None = None, + temperature: float = 0.7, + thinking_enabled: bool | None = None, + ) -> LLMResponse: + self.calls += 1 + if self.calls == 1: + return LLMResponse( + content="", + tool_calls=[ + ToolCallRequest(id="call-1", name="read_file", arguments={"path": "README.md"}), + ToolCallRequest(id="call-2", name="search_files", arguments={"query": "Beaver"}), + ], + ) + return LLMResponse(content="done") + + def get_default_model(self) -> str: + return "stub" + + +class ConcurrentReadOnlyExecutor: + def __init__(self) -> None: + self.started: list[str] = [] + self._both_started = asyncio.Event() + + async def execute_tool_call(self, tool_call: ToolCallRequest | dict[str, Any], *, context=None): + name = getattr(tool_call, "name", "") + self.started.append(name) + if len(self.started) >= 2: + self._both_started.set() + await asyncio.wait_for(self._both_started.wait(), timeout=0.2) + return SimpleNamespace(success=True, error=None, content=f"{name} result", tool_name=name) + + @pytest.mark.asyncio async def test_process_direct_uses_replay_tool_executor(tmp_path: Path) -> None: loop = AgentLoop(loader=EngineLoader(workspace=tmp_path)) @@ -69,3 +114,63 @@ async def test_process_direct_uses_replay_tool_executor(tmp_path: Path) -> None: assert result.output_text == "done" assert replay_executor.traces assert replay_executor.traces[0]["tool_name"] == "read_file" + + +@pytest.mark.asyncio +async def test_process_direct_runs_read_only_tool_calls_concurrently(tmp_path: Path) -> None: + loop = AgentLoop(loader=EngineLoader(workspace=tmp_path)) + provider = ParallelToolProvider() + executor = ConcurrentReadOnlyExecutor() + runtime = SimpleNamespace(model="stub", provider_name="stub") + + result = await loop.process_direct( + "Read and search the workspace.", + provider_bundle=ProviderBundle(main_runtime=runtime, main_provider=provider), # type: ignore[arg-type] + include_skill_assembly=False, + pinned_skill_names=[], + tool_executor_override=executor, + max_tool_iterations=2, + ) + + assert result.output_text == "done" + assert executor.started == ["read_file", "search_files"] + + +@pytest.mark.asyncio +async def test_process_direct_records_latency_breakdown(tmp_path: Path) -> None: + loop = AgentLoop(loader=EngineLoader(workspace=tmp_path)) + provider = ParallelToolProvider() + executor = ConcurrentReadOnlyExecutor() + runtime = SimpleNamespace(model="stub", provider_name="stub") + + result = await loop.process_direct( + "Read and search the workspace.", + provider_bundle=ProviderBundle(main_runtime=runtime, main_provider=provider), # type: ignore[arg-type] + include_skill_assembly=False, + pinned_skill_names=[], + tool_executor_override=executor, + max_tool_iterations=2, + ) + + latency = result.usage["latency_ms"] + expected_keys = { + "router_ms", + "mcp_ms", + "skill_assembly_ms", + "tool_assembly_ms", + "context_build_ms", + "llm_ms", + "tool_ms", + "session_write_ms", + "total_ms", + } + assert expected_keys.issubset(latency) + assert all(isinstance(latency[key], (int, float)) and latency[key] >= 0 for key in expected_keys) + assert latency["llm_ms"] > 0 + assert latency["tool_ms"] > 0 + assert latency["total_ms"] >= latency["llm_ms"] + + loaded = loop.boot() + events = loaded.session_manager.get_run_event_records(result.session_id, result.run_id) + completed = next(event for event in events if event.event_type == "run_completed") + assert completed.event_payload["latency_ms"] == latency diff --git a/app-instance/backend/tests/unit/test_agent_team_toggle.py b/app-instance/backend/tests/unit/test_agent_team_toggle.py new file mode 100644 index 0000000..97c64a0 --- /dev/null +++ b/app-instance/backend/tests/unit/test_agent_team_toggle.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace + +from beaver.engine.providers.base import LLMProvider, LLMResponse +from beaver.engine.providers.factory import ProviderBundle +from beaver.tasks import TaskExecutionPlanner, TaskRecord + + +class _TeamPlannerProvider(LLMProvider): + def __init__(self) -> None: + super().__init__() + self.calls = 0 + + async def chat( + self, + messages: list[dict], + tools: list[dict] | None = None, + model: str | None = None, + max_tokens: int = 4096, + temperature: float = 0.7, + ) -> LLMResponse: + self.calls += 1 + return LLMResponse( + content='{"mode":"team","reason":"parallel research","strategy":"parallel","nodes":[{"node_id":"research","task":"research","agent":{"name":"researcher"}}]}', + finish_reason="stop", + provider_name="stub", + model="stub-model", + ) + + def get_default_model(self) -> str: + return "stub-model" + + +def test_agent_team_can_be_disabled_by_environment(monkeypatch) -> None: + monkeypatch.setenv("BEAVER_AGENT_TEAM_ENABLED", "0") + provider = _TeamPlannerProvider() + task = TaskRecord( + task_id="task-1", + session_id="session-1", + description="research and compare options", + goal="research and compare options", + constraints=[], + priority=0, + status="open", + creator="test", + created_at="now", + updated_at="now", + ) + bundle = ProviderBundle( + main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), + main_provider=provider, + ) + + plan = asyncio.run( + TaskExecutionPlanner().plan( + task=task, + user_message="research and compare options", + attempt_index=1, + provider_bundle=bundle, + ) + ) + + assert plan.mode == "single" + assert plan.reason == "planner_disabled_by_environment" + assert provider.calls == 0 diff --git a/app-instance/backend/tests/unit/test_agent_team_v1.py b/app-instance/backend/tests/unit/test_agent_team_v1.py index a098b81..2503a4e 100644 --- a/app-instance/backend/tests/unit/test_agent_team_v1.py +++ b/app-instance/backend/tests/unit/test_agent_team_v1.py @@ -8,7 +8,8 @@ import pytest from beaver.memory.curated.snapshot import MemorySnapshot from beaver.services.memory_service import MemoryService -from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode +from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode, NodeRunResult +from beaver.coordinator.execution.scheduler import TeamGraphScheduler from beaver.coordinator.local import LocalAgentRunner from beaver.engine import AgentLoop, EngineLoader from beaver.engine.context import SkillContext @@ -90,6 +91,15 @@ class PerRunSnapshotMemoryService(MemoryService): return MemorySnapshot(memory_block="# Memory\n\nshared-snapshot", user_block=None) +class CapturingRunner: + def __init__(self) -> None: + self.envelopes: list[DelegationEnvelope] = [] + + async def run(self, envelope: DelegationEnvelope, **kwargs) -> NodeRunResult: + self.envelopes.append(envelope) + return NodeRunResult(node_id=envelope.node_id or "node", success=True, output_text="done") + + def _bundle(provider: RecordingProvider) -> ProviderBundle: return ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), @@ -161,10 +171,72 @@ def test_local_agent_runner_uses_shared_loop_and_records_parent_task(tmp_path: P child_session = loaded.session_manager.get_session(result.session_id) # type: ignore[union-attr,arg-type] assert result.success is True + assert result.completion_status == "succeeded" + assert result.evidence_gaps == [] assert run_record.task_id == "task-parent" assert child_session["parent_session_id"] == "session-root" +def test_node_without_required_tool_result_is_partial(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider([_response("collected narrative")]) + envelope = DelegationEnvelope( + parent_task_id=None, + parent_session_id="session-root", + parent_run_id=None, + agent=AgentDescriptor(name="collect"), + task="collect", + node_id="collect", + required_evidence=["tool_result"], + ) + + result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) + + assert result.success is False + assert result.completion_status == "partial" + assert result.evidence_gaps == ["missing required evidence: tool_result"] + + +def test_node_with_required_nonempty_output_succeeds(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider([_response("verified output")]) + envelope = DelegationEnvelope( + parent_task_id=None, + parent_session_id="session-root", + parent_run_id=None, + agent=AgentDescriptor(name="verify"), + task="verify", + node_id="verify", + required_evidence=["output"], + ) + + result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) + + assert result.success is True + assert result.completion_status == "succeeded" + assert result.evidence_gaps == [] + + +def test_unknown_evidence_requirement_makes_node_partial(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider([_response("output")]) + envelope = DelegationEnvelope( + parent_task_id=None, + parent_session_id="session-root", + parent_run_id=None, + agent=AgentDescriptor(name="verify"), + task="verify", + node_id="verify", + required_evidence=["unknown_type"], + ) + + result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) + + assert result.success is False + assert result.completion_status == "partial" + assert result.evidence_gaps == ["unsupported evidence requirement: unknown_type"] + + def test_team_node_preserves_evidence_when_finish_reason_is_not_stop(tmp_path: Path) -> None: loop = _loop(tmp_path) provider = RecordingProvider([_response("partial evidence", finish_reason="max_tool_iterations")]) @@ -277,6 +349,108 @@ def test_team_sequence_passes_prior_outputs(tmp_path: Path) -> None: assert "Dependency first output:\nfirst output" in providers["second"].calls[0][0]["content"] +def test_partial_node_allows_downstream_by_default(tmp_path: Path) -> None: + loop = _loop(tmp_path) + providers = { + "collect": RecordingProvider([_response("partial source notes")]), + "extract": RecordingProvider([_response("extracted metrics")]), + } + graph = ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + "collect", + "collect", + AgentDescriptor(name="collect"), + required_evidence=["tool_result"], + ), + ExecutionNode("extract", "extract", AgentDescriptor(name="extract")), + ], + ) + + result = asyncio.run( + TeamService(loop).run_team( + graph, + parent_task_id=None, + parent_session_id="session-root", + provider_bundle_factory=lambda node: _bundle(providers[node.node_id]), + ) + ) + + assert result.node_results[0].completion_status == "partial" + assert result.node_results[1].completion_status == "succeeded" + assert "Dependency collect output:\npartial source notes" in providers["extract"].calls[0][0]["content"] + + +def test_partial_node_blocks_downstream_when_configured(tmp_path: Path) -> None: + loop = _loop(tmp_path) + providers = { + "collect": RecordingProvider([_response("partial source notes")]), + "extract": RecordingProvider([_response("must not run")]), + } + graph = ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + "collect", + "collect", + AgentDescriptor(name="collect"), + required_evidence=["tool_result"], + block_downstream_on_partial=True, + ), + ExecutionNode("extract", "extract", AgentDescriptor(name="extract")), + ], + ) + + result = asyncio.run( + TeamService(loop).run_team( + graph, + parent_task_id=None, + parent_session_id="session-root", + provider_bundle_factory=lambda node: _bundle(providers[node.node_id]), + ) + ) + + assert result.node_results[0].completion_status == "partial" + assert result.node_results[1].completion_status == "blocked" + assert providers["extract"].calls == [] + + +def test_scheduler_copies_task_two_contract_fields_to_envelope() -> None: + runner = CapturingRunner() + node = ExecutionNode( + "collect", + "collect", + AgentDescriptor(name="collect"), + input_contract={"query": "str"}, + output_contract={"sources": "list"}, + required_evidence=["tool_result"], + evidence_contract={"entities": ["MGM"]}, + validation_rules=["official_sources_only"], + required_for_completion=False, + block_downstream_on_partial=True, + max_tool_iterations=2, + ) + + asyncio.run( + TeamGraphScheduler(runner).run( # type: ignore[arg-type] + ExecutionGraph(strategy="sequence", nodes=[node]), + parent_task_id=None, + parent_session_id="session-root", + ) + ) + + envelope = runner.envelopes[0] + assert envelope.input_contract == {"query": "str"} + assert envelope.output_contract == {"sources": "list"} + assert envelope.required_evidence == ["tool_result"] + assert envelope.evidence_contract == {"entities": ["MGM"]} + assert envelope.validation_rules == ["official_sources_only"] + assert envelope.required_for_completion is False + assert envelope.block_downstream_on_partial is True + assert envelope.max_tool_iterations == 2 + + def test_team_parallel_runs_all_nodes(tmp_path: Path) -> None: loop = _loop(tmp_path) providers = { @@ -428,9 +602,12 @@ def test_team_dag_blocks_dependents_after_failure(tmp_path: Path) -> None: ) ) publish = [item for item in result.node_results if item.node_id == "publish"][0] + validate = [item for item in result.node_results if item.node_id == "validate"][0] assert result.success is False + assert validate.completion_status == "failed" assert publish.finish_reason == "blocked" + assert publish.completion_status == "blocked" assert publish.run_id is None assert publish.error == "Blocked by failed dependency: validate" assert "failed" not in result.summary.split("Failed nodes:")[0] @@ -471,8 +648,10 @@ def test_dag_node_factory_error_blocks_dependents(tmp_path: Path) -> None: assert result.success is False assert validate.finish_reason == "error" + assert validate.completion_status == "failed" assert validate.error == "validator unavailable" assert publish.finish_reason == "blocked" + assert publish.completion_status == "blocked" assert publish.error == "Blocked by failed dependency: validate" @@ -550,6 +729,76 @@ def test_graph_structure_errors_still_raise(tmp_path: Path) -> None: asyncio.run(TeamService(loop).run_team(cyclic, parent_task_id=None, parent_session_id="session-root")) +def test_execution_node_contract_defaults_preserve_legacy_scope_behavior() -> None: + node = ExecutionNode("collect", "Collect sources", AgentDescriptor(name="collect")) + + assert node.input_contract == {} + assert node.output_contract == {} + assert node.allowed_tool_names is None + assert node.required_evidence == [] + assert node.evidence_contract == {} + assert node.validation_rules == [] + assert node.required_for_completion is True + assert node.block_downstream_on_partial is False + assert node.max_tool_iterations is None + + +def test_execution_node_keeps_explicit_empty_tool_scope_distinct_from_unspecified_scope() -> None: + unrestricted = ExecutionNode("unrestricted", "Collect", AgentDescriptor(name="unrestricted")) + tool_free = ExecutionNode( + "tool_free", + "Synthesize", + AgentDescriptor(name="tool_free"), + allowed_tool_names=[], + ) + + assert unrestricted.allowed_tool_names is None + assert tool_free.allowed_tool_names == [] + + +def test_delegation_envelope_and_node_result_preserve_new_contract_metadata() -> None: + envelope = DelegationEnvelope( + parent_task_id="task-parent", + parent_session_id="session-root", + parent_run_id="run-root", + agent=AgentDescriptor(name="collect"), + task="Collect sources", + allowed_tool_names=["web_search"], + required_evidence=["url"], + evidence_contract={"entities": ["MGM", "Galaxy"]}, + validation_rules=["official_sources_only"], + required_for_completion=True, + block_downstream_on_partial=True, + max_tool_iterations=2, + ) + result = NodeRunResult( + node_id="collect", + success=False, + output_text="MGM source only", + completion_status="partial", + evidence_gaps=["missing required evidence: Galaxy official source"], + ) + + assert envelope.allowed_tool_names == ["web_search"] + assert envelope.evidence_contract == {"entities": ["MGM", "Galaxy"]} + assert result.to_dict()["completion_status"] == "partial" + assert result.to_dict()["evidence_gaps"] == ["missing required evidence: Galaxy official source"] + + +def test_graph_rejects_depth_above_configured_limit() -> None: + graph = ExecutionGraph( + strategy="dag", + nodes=[ + ExecutionNode("a", "A", AgentDescriptor(name="a")), + ExecutionNode("b", "B", AgentDescriptor(name="b"), depends_on=["a"]), + ExecutionNode("c", "C", AgentDescriptor(name="c"), depends_on=["b"]), + ], + ) + + with pytest.raises(ValueError, match="max depth"): + graph.validate(max_depth=2) + + def test_team_run_does_not_create_independent_team_task(tmp_path: Path) -> None: loop = _loop(tmp_path) loaded = loop.boot() diff --git a/app-instance/backend/tests/unit/test_debug_chat_logs_api.py b/app-instance/backend/tests/unit/test_debug_chat_logs_api.py index 7521144..03eff75 100644 --- a/app-instance/backend/tests/unit/test_debug_chat_logs_api.py +++ b/app-instance/backend/tests/unit/test_debug_chat_logs_api.py @@ -1,6 +1,7 @@ from __future__ import annotations from pathlib import Path +from time import sleep from fastapi.testclient import TestClient @@ -74,10 +75,77 @@ def test_debug_chat_logs_group_events_by_run(tmp_path: Path) -> None: assert run["intent_agent_choice"] == "create_task" assert run["user_input"] == "hello" assert [event["event_type"] for event in run["events"]] == [ - "run_started", - "intent_agent_decision_snapshotted", - "llm_request_snapshotted", - "user_message_added", "assistant_message_added", + "user_message_added", + "llm_request_snapshotted", + "intent_agent_decision_snapshotted", + "run_started", ] assert run["events"][2]["event_payload"]["messages"][0]["content"] == "hello" + + +def test_debug_chat_logs_are_reverse_chronological_and_include_latency(tmp_path: Path) -> None: + service = AgentService(workspace=tmp_path) + loaded = service.create_loop().boot() + manager = loaded.session_manager + session_id = "web:debug-order" + manager.ensure_session(session_id, source="web", title="Debug order") + + manager.append_message( + session_id, + run_id="run-old", + role="system", + event_type="run_started", + content="old", + context_visible=False, + ) + manager.append_message( + session_id, + run_id="run-old", + role="system", + event_type="run_completed", + event_payload={"latency_ms": {"total_ms": 10.0, "llm_ms": 7.0}}, + finish_reason="stop", + context_visible=False, + ) + sleep(0.01) + manager.append_message( + session_id, + run_id="run-new", + role="system", + event_type="run_started", + content="new", + context_visible=False, + ) + manager.append_message( + session_id, + run_id="run-new", + role="system", + event_type="run_completed", + event_payload={ + "latency_ms": { + "router_ms": 1.0, + "mcp_ms": 2.0, + "skill_assembly_ms": 3.0, + "tool_assembly_ms": 4.0, + "context_build_ms": 5.0, + "llm_ms": 6.0, + "tool_ms": 7.0, + "session_write_ms": 8.0, + "total_ms": 36.0, + } + }, + finish_reason="stop", + context_visible=False, + ) + + app = create_app(service=service, manage_service_lifecycle=False) + with TestClient(app) as client: + response = client.get("/api/debug/chat-logs") + + assert response.status_code == 200 + runs = response.json()["sessions"][0]["runs"] + assert [run["run_id"] for run in runs] == ["run-new", "run-old"] + assert [event["event_type"] for event in runs[0]["events"]] == ["run_completed", "run_started"] + assert runs[0]["latency_ms"]["total_ms"] == 36.0 + assert runs[0]["latency_ms"]["router_ms"] == 1.0 diff --git a/app-instance/backend/tests/unit/test_main_agent_router.py b/app-instance/backend/tests/unit/test_main_agent_router.py index f0e62d7..1c77243 100644 --- a/app-instance/backend/tests/unit/test_main_agent_router.py +++ b/app-instance/backend/tests/unit/test_main_agent_router.py @@ -158,7 +158,7 @@ def test_router_receives_thinking_mode() -> None: provider = RouterProvider('{"action":"simple_chat","reason":"simple"}') decision = asyncio.run( MainAgentRouter().classify( - "你好", + "请判断一下这个概念是否合理", provider=provider, thinking_enabled=False, ) @@ -168,11 +168,84 @@ def test_router_receives_thinking_mode() -> None: assert provider.calls[0]["thinking_enabled"] is False +def test_router_fast_paths_obvious_simple_chat_without_provider_call() -> None: + provider = RouterProvider('{"action":"new_task","reason":"should not be used"}') + + decision = asyncio.run(MainAgentRouter().classify("你好", provider=provider)) + punctuated = asyncio.run(MainAgentRouter().classify("你好!", provider=provider)) + translation = asyncio.run(MainAgentRouter().classify("翻译这句话:hello world", provider=provider)) + + assert not decision.is_task + assert decision.action == "simple_chat" + assert decision.reason == "obvious_simple_chat" + assert not punctuated.is_task + assert punctuated.action == "simple_chat" + assert not translation.is_task + assert translation.action == "simple_chat" + assert provider.calls == [] + + +def test_router_sends_broad_explanations_to_intent_llm() -> None: + provider = RouterProvider('{"action":"simple_chat","reason":"intent decided concept explanation"}') + + explanation = asyncio.run(MainAgentRouter().classify("解释一下什么是 MCP", provider=provider)) + definition = asyncio.run(MainAgentRouter().classify("什么是 context engineering", provider=provider)) + + assert not explanation.is_task + assert explanation.reason == "intent decided concept explanation" + assert not definition.is_task + assert definition.reason == "intent decided concept explanation" + assert len(provider.calls) == 2 + + +def test_router_fast_paths_obvious_task_without_provider_call() -> None: + provider = RouterProvider('{"action":"simple_chat","reason":"should not be used"}') + + decision = asyncio.run(MainAgentRouter().classify("帮我查一下今天深圳天气", provider=provider)) + current_event = asyncio.run( + MainAgentRouter().classify("解释一下今天法国队在世界杯的表现为什么那么好", provider=provider) + ) + + assert decision.is_task + assert decision.action == "create_task" + assert decision.reason == "obvious_task" + assert current_event.is_task + assert current_event.action == "create_task" + assert provider.calls == [] + + +def test_router_does_not_simple_fast_path_current_event_explanations() -> None: + provider = RouterProvider('{"action":"simple_chat","reason":"llm fallback"}') + + decision = asyncio.run(MainAgentRouter().classify("解释一下昨晚法国队在世界杯的表现为什么那么好", provider=provider)) + + assert decision.is_task + assert decision.action == "create_task" + assert decision.reason == "obvious_task" + assert provider.calls == [] + + +def test_router_keeps_active_task_followups_in_llm_path() -> None: + provider = RouterProvider('{"action":"revise_task","reason":"needs revision","short_title":"任务连续性"}') + + decision = asyncio.run( + MainAgentRouter().classify( + "这个也加上", + active_task=_task(), + provider=provider, + ) + ) + + assert decision.is_task + assert decision.action == "revise_task" + assert len(provider.calls) == 1 + + def test_router_injects_intent_skill_guidance() -> None: provider = RouterProvider('{"action":"new_task","reason":"needs weather tool","short_title":"珠海天气"}') decision = asyncio.run( MainAgentRouter().classify( - "帮我查一下今天珠海天气", + "帮我判断这个需求要不要进入任务模式", provider=provider, intent_skill="Weather and current external data must be routed to new_task.", ) @@ -247,7 +320,7 @@ def test_router_retries_once_after_provider_failure() -> None: decision = asyncio.run( MainAgentRouter().classify( - "帮我看看昨天的中美会面都谈了什么?", + "帮我判断这次中美会面分析需求要不要进入任务模式", provider=provider, ) ) @@ -262,7 +335,7 @@ def test_router_fallback_after_two_provider_failures() -> None: decision = asyncio.run( MainAgentRouter().classify( - "帮我看看昨天的中美会面都谈了什么?", + "帮我判断这次中美会面分析需求要不要进入任务模式", provider=provider, ) ) diff --git a/app-instance/backend/tests/unit/test_skill_assembler.py b/app-instance/backend/tests/unit/test_skill_assembler.py index 8a92def..e75c319 100644 --- a/app-instance/backend/tests/unit/test_skill_assembler.py +++ b/app-instance/backend/tests/unit/test_skill_assembler.py @@ -103,7 +103,7 @@ def test_skill_selection_receives_thinking_mode() -> None: assert provider.thinking_enabled is False -def test_skill_assembler_loads_detail_directly_for_small_candidate_sets() -> None: +def test_skill_assembler_directly_activates_single_clear_candidate_without_llm() -> None: provider = SequencedProvider(['["docker-debug"]']) assembler = SkillAssembler(loader=LoaderWithFullSkill(), retriever=StaticRetriever()) @@ -117,10 +117,8 @@ def test_skill_assembler_loads_detail_directly_for_small_candidate_sets() -> Non assert [skill.name for skill in result.activated_skills] == ["docker-debug"] assert result.activated_skills[0].tool_hints == ["search_files"] - assert [item["stage"] for item in result.llm_interactions] == ["final"] - assert len(provider.messages) == 1 - first_user_prompt = provider.messages[0][1]["content"] - assert "Use this skill when doing Docker log triage" in first_user_prompt + assert result.llm_interactions == [] + assert provider.messages == [] def test_skill_assembler_shortlists_before_loading_detail_for_large_candidate_sets() -> None: diff --git a/app-instance/backend/tests/unit/test_skill_learning_eval.py b/app-instance/backend/tests/unit/test_skill_learning_eval.py index 2a16b77..d401e65 100644 --- a/app-instance/backend/tests/unit/test_skill_learning_eval.py +++ b/app-instance/backend/tests/unit/test_skill_learning_eval.py @@ -395,6 +395,52 @@ def test_replay_main_score_uses_validator_not_tool_success(tmp_path: Path) -> No assert report.synthetic_score_avg is not None +def test_replay_real_case_without_validator_uses_same_output_scoring_for_both_arms(tmp_path: Path) -> None: + pipeline = _pipeline(tmp_path, task_score=0.8) + pipeline.learning_store.update_learning_candidate( + "candidate-1", + evidence={ + "eval_cases": [ + { + "run_id": "real-no-validator", + "task_id": "real-no-validator", + "session_id": "eval", + "task_text": "Summarize the release checklist.", + "accepted_score": 0.8, + } + ] + }, + ) + draft = pipeline.draft_service.create_new_skill_draft( + skill_name="release-checklist", + proposed_content="# Release\n\nRun tests.", + proposed_frontmatter={"description": "release", "tools": []}, + created_by="test", + reason="test", + ) + pipeline.learning_store.update_learning_candidate("candidate-1", draft_skill_name=draft.skill_name, draft_id=draft.draft_id) + + report = asyncio.run( + pipeline.evaluate_draft( + "candidate-1", + draft.skill_name, + draft.draft_id, + provider_bundle=_bundle(), + replay_runner=FakeReplayRunner( + baseline_answer="Release checklist summarized.", + candidate_answer="Release checklist summarized.", + ), + ) + ) + + case = next(item for item in report.case_reports if item["run_id"] == "real-no-validator") + legacy_case = next(item for item in report.cases if item["run_id"] == "real-no-validator") + assert case["baseline_score"] == 0.7 + assert case["candidate_score"] == 0.7 + assert case["delta"] == 0.0 + assert legacy_case["delta"] == 0.0 + + def test_synthetic_cases_without_validator_are_not_replay_scored(tmp_path: Path) -> None: pipeline = _pipeline(tmp_path) pipeline.learning_store.update_learning_candidate( diff --git a/app-instance/backend/tests/unit/test_skill_team_template.py b/app-instance/backend/tests/unit/test_skill_team_template.py new file mode 100644 index 0000000..f0ca37d --- /dev/null +++ b/app-instance/backend/tests/unit/test_skill_team_template.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from beaver.skills.assembler.task_assembler import SkillAssembler +from beaver.skills.catalog.loader import SkillsLoader +from beaver.skills.catalog.utils import extract_skill_team_template + + +def test_extract_team_template_returns_none_when_block_is_absent() -> None: + result = extract_skill_team_template("# Ordinary Skill") + + assert result.template is None + assert result.warnings == [] + + +def test_extract_team_template_parses_valid_json_block() -> None: + result = extract_skill_team_template( + "```beaver-team-template\n" + '{"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]}\n' + "```" + ) + + assert result.template == { + "version": 1, + "nodes": [{"node_id": "collect", "task": "Collect"}], + } + assert result.warnings == [] + + +def test_invalid_template_is_warning_not_skill_load_failure() -> None: + result = extract_skill_team_template("```beaver-team-template\nnot-json\n```") + + assert result.template is None + assert result.warnings == ["team template JSON is invalid"] + + +def test_loader_and_assembler_propagate_team_template_to_skill_context(tmp_path) -> None: + skill_dir = tmp_path / "plugin-skills" / "financial-comparison" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\n" + "description: Compare financial disclosures.\n" + "---\n\n" + "# Financial Comparison\n\n" + "```beaver-team-template\n" + '{"version": 1, "nodes": [{"node_id": "collect", "task": "Collect official sources"}]}\n' + "```\n", + encoding="utf-8", + ) + loader = SkillsLoader( + tmp_path, + builtin_skills_dir=tmp_path / "no-builtins", + extra_dirs=[tmp_path / "plugin-skills"], + ) + + record = loader.get_skill_record("financial-comparison") + context = SkillAssembler(loader)._activate_skill_contexts(["financial-comparison"])[0] + + assert record is not None + assert record.team_template == { + "version": 1, + "nodes": [{"node_id": "collect", "task": "Collect official sources"}], + } + assert record.team_template_warnings == [] + assert context.team_template == record.team_template + assert context.team_template_warnings == [] diff --git a/app-instance/backend/tests/unit/test_task_evidence.py b/app-instance/backend/tests/unit/test_task_evidence.py index 6206642..5549fcb 100644 --- a/app-instance/backend/tests/unit/test_task_evidence.py +++ b/app-instance/backend/tests/unit/test_task_evidence.py @@ -3,7 +3,65 @@ from __future__ import annotations from pathlib import Path from beaver.engine.session.manager import SessionManager -from beaver.tasks.evidence import EvidenceBuilder, RunEvidence, TaskEvidencePacket, ToolEvidence, render_task_evidence +from beaver.tasks.evidence import ( + EvidenceBuilder, + RunEvidence, + TaskEvidencePacket, + ToolEvidence, + evaluate_node_evidence, + render_task_evidence, +) + + +def _run_evidence(*, tool_results: list[ToolEvidence] | None = None) -> RunEvidence: + return RunEvidence( + run_id="run-1", + session_id="session-1", + output_text="", + finish_reason="stop", + tool_results=list(tool_results or []), + ) + + +def test_evaluate_node_evidence_requires_successful_tool_result() -> None: + evidence = _run_evidence( + tool_results=[ + ToolEvidence( + tool_name="web_fetch", + tool_call_id="call-1", + content="failed", + event_payload={"success": False}, + ) + ] + ) + + assert evaluate_node_evidence(evidence, ["tool_result"], "done") == [ + "missing required evidence: tool_result" + ] + + +def test_evaluate_node_evidence_accepts_url_in_successful_tool_content() -> None: + evidence = _run_evidence( + tool_results=[ + ToolEvidence( + tool_name="web_fetch", + tool_call_id="call-1", + content="Source: https://example.test/report", + event_payload={"success": True}, + ) + ] + ) + + assert evaluate_node_evidence(evidence, ["tool_result", "url"], "done") == [] + + +def test_evaluate_node_evidence_checks_output_and_unknown_requirements() -> None: + evidence = _run_evidence() + + assert evaluate_node_evidence(evidence, ["output", "unknown_type"], " ") == [ + "missing required evidence: output", + "unsupported evidence requirement: unknown_type", + ] def test_evidence_builder_preserves_full_tool_result(tmp_path: Path) -> None: diff --git a/app-instance/backend/tests/unit/test_task_execution_planner.py b/app-instance/backend/tests/unit/test_task_execution_planner.py index e048d7c..be5ea78 100644 --- a/app-instance/backend/tests/unit/test_task_execution_planner.py +++ b/app-instance/backend/tests/unit/test_task_execution_planner.py @@ -3,15 +3,19 @@ from __future__ import annotations import asyncio from types import SimpleNamespace +from beaver.engine.context import SkillContext from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle -from beaver.tasks import TaskExecutionPlanner, TaskRecord +from beaver.tasks import SkillResolutionReport, TaskExecutionPlanner, TaskRecord +from beaver.tools.base import BaseTool, ToolContext, ToolResult, ToolSpec +from beaver.tools.registry import ToolRegistry class PlannerProvider(LLMProvider): def __init__(self, response: str) -> None: super().__init__() self.response = response + self.calls: list[dict] = [] async def chat( self, @@ -21,6 +25,15 @@ class PlannerProvider(LLMProvider): max_tokens: int = 4096, temperature: float = 0.7, ) -> LLMResponse: + self.calls.append( + { + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + "model": model, + "tools": tools, + } + ) return LLMResponse(content=self.response, finish_reason="stop", provider_name="stub", model="stub-model") def get_default_model(self) -> str: @@ -43,6 +56,28 @@ class HangingPlannerProvider(LLMProvider): return "stub-model" +class SequencedPlannerProvider(PlannerProvider): + def __init__(self, responses: list[str]) -> None: + super().__init__(responses[0]) + self.responses = list(responses) + + async def chat(self, *args, **kwargs) -> LLMResponse: + self.response = self.responses.pop(0) + return await super().chat(*args, **kwargs) + + +class StubTool(BaseTool): + def __init__(self, name: str) -> None: + self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"}) + + @property + def spec(self) -> ToolSpec: + return self._spec + + async def invoke(self, arguments: dict, context: ToolContext) -> ToolResult: + raise AssertionError("Planner tests do not execute tools") + + def _task() -> TaskRecord: return TaskRecord( task_id="task-1", @@ -59,12 +94,26 @@ def _task() -> TaskRecord: def _bundle(response: str) -> ProviderBundle: + provider = PlannerProvider(response) return ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), - main_provider=PlannerProvider(response), + main_provider=provider, ) +def _bundle_with_provider(provider: LLMProvider) -> ProviderBundle: + return ProviderBundle( + main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), + main_provider=provider, + ) + + +def _registry() -> ToolRegistry: + registry = ToolRegistry() + registry.register_many([StubTool("web_search"), StubTool("web_fetch"), StubTool("terminal")]) + return registry + + def _hanging_bundle() -> ProviderBundle: return ProviderBundle( main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), @@ -87,26 +136,55 @@ def test_planner_selects_single_mode() -> None: assert plan.reason == "main agent is enough" +def test_planner_skips_llm_for_simple_task() -> None: + provider = PlannerProvider('{"mode":"team","reason":"should not be used"}') + bundle = ProviderBundle( + main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"), + main_provider=provider, + ) + task = _task() + task.description = "查询深圳天气" + task.goal = "查询深圳天气" + + plan = asyncio.run( + TaskExecutionPlanner().plan( + task=task, + user_message="帮我查一下今天深圳天气", + attempt_index=1, + provider_bundle=bundle, + ) + ) + + assert plan.mode == "single" + assert plan.graph is None + assert plan.reason == "planner_skipped_simple_task" + assert provider.calls == [] + + def test_planner_builds_team_graph() -> None: + bundle = _bundle( + """ + { + "mode": "team", + "reason": "needs parallel review", + "strategy": "dag", + "nodes": [ + {"node_id": "research", "task": "research options"}, + {"node_id": "review", "task": "review result", "depends_on": ["research"]} + ], + "final_synthesis_instruction": "merge the findings" + } + """ + ) + provider = bundle.main_provider plan = asyncio.run( TaskExecutionPlanner().plan( task=_task(), user_message="implement workflow", attempt_index=1, - provider_bundle=_bundle( - """ - { - "mode": "team", - "reason": "needs parallel review", - "strategy": "dag", - "nodes": [ - {"node_id": "research", "task": "research options", "agent": {"name": "researcher"}}, - {"node_id": "review", "task": "review result", "agent": {"name": "reviewer"}, "depends_on": ["research"]} - ], - "final_synthesis_instruction": "merge the findings" - } - """ - ), + provider_bundle=bundle, + skill_summaries=["docker-debug: Use docker logs before editing config."], + tool_hints=["terminal", "search_files"], ) ) @@ -116,6 +194,12 @@ def test_planner_builds_team_graph() -> None: assert [node.node_id for node in plan.graph.nodes] == ["research", "review"] assert plan.graph.nodes[1].depends_on == ["research"] assert plan.final_synthesis_instruction == "merge the findings" + assert isinstance(provider, PlannerProvider) + prompt = provider.calls[0]["messages"][1]["content"] + assert "Activated skill summaries" in prompt + assert "docker-debug: Use docker logs before editing config." in prompt + assert "terminal" in prompt + assert "search_files" in prompt def test_planner_timeout_falls_back_to_single() -> None: @@ -134,7 +218,7 @@ def test_planner_timeout_falls_back_to_single() -> None: assert "TimeoutError" in (plan.fallback_error or "") -def test_planner_team_nodes_can_target_skills_without_agent_roles() -> None: +def test_planner_team_nodes_use_task_as_internal_skill_query() -> None: plan = TaskExecutionPlanner().from_json( """ { @@ -144,9 +228,7 @@ def test_planner_team_nodes_can_target_skills_without_agent_roles() -> None: "nodes": [ { "node_id": "api_review", - "task": "review API compatibility", - "skill_query": "API contract compatibility review", - "required_capabilities": ["schema compatibility"] + "task": "review API compatibility" } ] } @@ -158,8 +240,77 @@ def test_planner_team_nodes_can_target_skills_without_agent_roles() -> None: node = plan.graph.nodes[0] assert node.agent.name == "api_review" assert node.agent.role == "" - assert node.agent.metadata["skill_query"] == "API contract compatibility review" - assert node.agent.metadata["required_capabilities"] == ["schema compatibility"] + assert node.agent.metadata["skill_query"] == "review API compatibility" + assert node.agent.metadata["required_capabilities"] == [] + + +def test_planner_accepts_use_skill_and_skill_query() -> None: + plan = TaskExecutionPlanner().from_json( + """ + { + "mode": "team", + "strategy": "sequence", + "nodes": [ + { + "node_id": "collect", + "task": "Collect official sources", + "use_skill": "official-source-research", + "skill_query": "official source verification" + } + ] + } + """ + ) + + assert plan.is_team + assert plan.graph is not None + node = plan.graph.nodes[0] + assert node.agent.metadata["use_skill"] == "official-source-research" + assert node.agent.metadata["skill_query"] == "official source verification" + assert node.inherited_pinned_skills == [] + assert node.allowed_tool_names is None + assert plan.planner_adaptation["node_skill_bindings"] == [ + { + "node_id": "collect", + "use_skill": "official-source-research", + "skill_query": "official source verification", + } + ] + + +def test_planner_defaults_skill_query_to_node_task_when_absent() -> None: + plan = TaskExecutionPlanner().from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"extract","task":"Extract financial metrics","use_skill":"financial-extraction"}]}' + ) + + assert plan.is_team + assert plan.graph is not None + assert plan.graph.nodes[0].agent.metadata["skill_query"] == "Extract financial metrics" + + +def test_planner_adaptation_records_unresolved_use_skill_fallback() -> None: + planner = TaskExecutionPlanner() + plan = planner.from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"extract","task":"Extract metrics","use_skill":"missing-skill",' + '"skill_query":"financial extraction"}]}' + ) + report = SkillResolutionReport( + node_id="extract", + skill_query="financial extraction", + requested_skill_name="missing-skill", + exact_binding_used=False, + warnings=["use_skill unresolved: missing-skill"], + reason="matched published skill", + ) + + planner._merge_skill_resolution_adaptation(plan, [report]) + + assert plan.planner_adaptation["warnings"] == ["use_skill unresolved: missing-skill"] + assert plan.planner_adaptation["node_skill_bindings"][0]["fallback_reason"] == ( + "use_skill unresolved; matched published skill" + ) def test_planner_invalid_outputs_fallback_to_single() -> None: @@ -193,3 +344,216 @@ def test_planner_invalid_outputs_fallback_to_single() -> None: assert unknown_strategy.mode == "single" assert too_many_nodes.mode == "single" assert cyclic.mode == "single" + + +def test_template_plan_builds_generic_worker_and_preserves_v1_contract_fields() -> None: + plan = TaskExecutionPlanner(tool_registry=_registry()).from_json( + """ + { + "mode": "team", + "strategy": "dag", + "nodes": [ + { + "node_id": "collect", + "task": "Collect official sources", + "requested_tools": ["web_search"], + "evidence_contract": {"entities": ["MGM", "Galaxy"]}, + "block_downstream_on_partial": true + } + ], + "adaptation": {"template_used": true} + } + """ + ) + + assert plan.is_team + assert plan.graph is not None + node = plan.graph.nodes[0] + assert node.agent.name == "collect" + assert node.agent.role == "" + assert node.agent.metadata["sub_agent_kind"] == "generic_skill_worker" + assert node.allowed_tool_names == ["web_search"] + assert node.evidence_contract == {"entities": ["MGM", "Galaxy"]} + assert node.block_downstream_on_partial is True + assert plan.planner_adaptation["template_used"] is True + + +def test_unknown_tool_is_removed_and_warned() -> None: + plan = TaskExecutionPlanner(tool_registry=_registry()).from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"collect","task":"Collect","requested_tools":["web_search","not_real"]}]}' + ) + + assert plan.is_team + assert plan.graph is not None + assert plan.graph.nodes[0].allowed_tool_names == ["web_search"] + assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"] + + +def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None: + plan = TaskExecutionPlanner(tool_registry=_registry()).from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"collect","task":"Collect","requested_tools":["web_search","terminal"]}]}' + ) + + assert plan.is_team + assert plan.graph is not None + assert plan.graph.nodes[0].allowed_tool_names == ["web_search"] + assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"] + + +def test_planner_rejects_agent_and_role_node_fields() -> None: + planner = TaskExecutionPlanner(tool_registry=_registry()) + + agent_plan = planner.from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"collect","task":"Collect","agent":{"name":"researcher"}}]}' + ) + role_plan = planner.from_json( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"collect","task":"Collect","role":"researcher"}]}' + ) + + assert agent_plan.mode == "single" + assert "agent" in (agent_plan.fallback_error or "") + assert role_plan.mode == "single" + assert "role" in (role_plan.fallback_error or "") + + +def test_planner_records_primary_template_selection_and_ignored_templates() -> None: + primary = SkillContext( + name="financial-comparison", + version="v1", + content="Compare official financial disclosures.", + team_template={"version": 1, "nodes": [{"node_id": "collect", "task": "Collect"}]}, + ) + secondary = SkillContext( + name="chart-reporting", + version="v2", + content="Render chart-ready Markdown.", + team_template={"version": 1, "nodes": [{"node_id": "report", "task": "Report"}]}, + ) + provider = PlannerProvider( + '{"mode":"team","strategy":"sequence","nodes":[' + '{"node_id":"collect","task":"Collect official sources"}],' + '"adaptation":{"template_used":true}}' + ) + + plan = asyncio.run( + TaskExecutionPlanner(tool_registry=_registry()).plan( + task=_task(), + user_message="compare financial workflow", + attempt_index=1, + provider_bundle=_bundle_with_provider(provider), + activated_skills=[primary, secondary], + ) + ) + + assert plan.planner_adaptation == { + "template_used": True, + "selected_template": "financial-comparison", + "selection_reason": "first activated skill with a valid team template", + "ignored_templates": ["chart-reporting"], + "warnings": [], + } + prompt = provider.calls[0]["messages"][1]["content"] + assert '"skill_name": "financial-comparison"' in prompt + assert "Compare official financial disclosures." in prompt + assert "Render chart-ready Markdown." in prompt + + +def test_malformed_planner_output_repairs_once_without_tools() -> None: + provider = SequencedPlannerProvider( + [ + "not json", + '{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect"}]}', + ] + ) + + plan = asyncio.run( + TaskExecutionPlanner(tool_registry=_registry()).plan( + task=_task(), + user_message="implement workflow", + attempt_index=1, + provider_bundle=_bundle_with_provider(provider), + ) + ) + + assert plan.is_team + assert len(provider.calls) == 2 + assert provider.calls[1]["tools"] is None + assert "Repair the invalid planner JSON" in provider.calls[1]["messages"][1]["content"] + + +def test_failed_planner_repair_falls_back_to_single() -> None: + provider = SequencedPlannerProvider(["not json", "still not json"]) + + plan = asyncio.run( + TaskExecutionPlanner(tool_registry=_registry()).plan( + task=_task(), + user_message="implement workflow", + attempt_index=1, + provider_bundle=_bundle_with_provider(provider), + ) + ) + + assert plan.mode == "single" + assert plan.reason == "planner_fallback_single" + assert len(provider.calls) == 2 + + +def test_finance_template_adapts_to_task_oriented_read_only_graph() -> None: + plan = TaskExecutionPlanner(tool_registry=_registry()).from_json( + """ + { + "mode": "team", + "strategy": "dag", + "nodes": [ + { + "node_id": "collect_official_sources", + "task": "Collect MGM and Galaxy official financial disclosures", + "requested_tools": ["web_search", "web_fetch"], + "required_evidence": ["tool_result", "url"] + }, + { + "node_id": "extract_financial_metrics", + "task": "Extract comparable financial metrics from collected sources", + "depends_on": ["collect_official_sources"], + "requested_tools": ["web_fetch"], + "required_evidence": ["output"] + }, + { + "node_id": "validate_metrics", + "task": "Validate metric units, periods, and source consistency", + "depends_on": ["extract_financial_metrics"], + "required_evidence": ["output"] + }, + { + "node_id": "generate_chart_report", + "task": "Generate a Markdown comparison table and chart-ready data without claiming an image or file artifact", + "depends_on": ["validate_metrics"], + "requested_tools": [], + "required_evidence": ["output"] + } + ] + } + """ + ) + + assert plan.is_team + assert plan.graph is not None + assert [node.node_id for node in plan.graph.nodes] == [ + "collect_official_sources", + "extract_financial_metrics", + "validate_metrics", + "generate_chart_report", + ] + assert all(node.agent.role == "" for node in plan.graph.nodes) + assert not {"researcher", "writer", "reviewer", "analyst"}.intersection( + node.node_id for node in plan.graph.nodes + ) + assert plan.graph.nodes[0].allowed_tool_names == ["web_search", "web_fetch"] + assert plan.graph.nodes[-1].allowed_tool_names == [] + report_task = plan.graph.nodes[-1].task.lower() + assert "markdown" in report_task + assert "without claiming an image or file artifact" in report_task diff --git a/app-instance/backend/tests/unit/test_task_mode_feedback.py b/app-instance/backend/tests/unit/test_task_mode_feedback.py index 0675fa7..497e214 100644 --- a/app-instance/backend/tests/unit/test_task_mode_feedback.py +++ b/app-instance/backend/tests/unit/test_task_mode_feedback.py @@ -4,10 +4,12 @@ import asyncio from pathlib import Path from types import SimpleNamespace -from beaver.engine import EngineLoader +from beaver.engine import AgentRunResult, EngineLoader +from beaver.engine.context import SkillContext from beaver.engine.providers.base import LLMProvider, LLMResponse from beaver.engine.providers.factory import ProviderBundle from beaver.services.agent_service import AgentService +from beaver.skills.assembler import SkillAssemblyResult from beaver.tasks import TaskExecutionPlan, TaskService @@ -39,6 +41,44 @@ class StubTaskExecutionPlanner: return TaskExecutionPlan.single("test-single") +class RecordingTaskExecutionPlanner: + def __init__(self) -> None: + self.calls: list[dict] = [] + + async def plan(self, **kwargs) -> TaskExecutionPlan: + self.calls.append(dict(kwargs)) + return TaskExecutionPlan.single("test-single") + + +class RecordingSkillAssembler: + def __init__(self, skills: list[SkillContext]) -> None: + self.skills = list(skills) + self.calls: list[dict] = [] + + async def assemble(self, **kwargs) -> SkillAssemblyResult: + self.calls.append(dict(kwargs)) + return SkillAssemblyResult(activated_skills=list(self.skills)) + + +class RecordingTaskAttemptOrchestrator: + def __init__(self) -> None: + self.calls: list[dict] = [] + + async def run(self, **kwargs) -> AgentRunResult: + self.calls.append(dict(kwargs)) + task = kwargs["task"] + task.task_id = "task-from-orchestrator" + return AgentRunResult( + session_id=kwargs["kwargs"]["session_id"], + run_id="run-from-orchestrator", + output_text="orchestrated", + finish_reason="stop", + tool_iterations=0, + task_id=task.task_id, + task_status=task.status, + ) + + class FakeLearningCandidate: def to_dict(self) -> dict: return {"candidate_id": "candidate-1", "kind": "new_skill", "status": "open"} @@ -101,6 +141,91 @@ def test_task_run_records_evidence_and_waits_for_acceptance(tmp_path: Path) -> N assert "validated" not in event_types +def test_agent_service_records_router_latency(tmp_path: Path) -> None: + service = AgentService( + loader=EngineLoader( + workspace=tmp_path, + task_execution_planner=StubTaskExecutionPlanner(), + ) + ) + + result = asyncio.run( + service.process_direct( + "draft release notes", + session_id="web:latency", + provider_bundle=_bundle("Done"), + ) + ) + + latency = result.usage["latency_ms"] + assert latency["router_ms"] > 0 + + +def test_task_mode_preselects_skills_for_planner_and_reuses_them_in_main_run(tmp_path: Path) -> None: + skill = SkillContext( + name="docker-debug", + content="Use docker logs before editing config.", + version="v1", + content_hash="hash-v1", + activation_reason="llm_selected", + tool_hints=["terminal"], + ) + skill_assembler = RecordingSkillAssembler([skill]) + planner = RecordingTaskExecutionPlanner() + service = AgentService( + loader=EngineLoader( + workspace=tmp_path, + skill_assembler=skill_assembler, + task_execution_planner=planner, + ) + ) + + result = asyncio.run( + service.process_direct( + "debug this workflow", + session_id="web:skill-aware-task", + provider_bundle=_bundle("Done"), + ) + ) + + assert result.task_id + assert len(skill_assembler.calls) == 1 + assert planner.calls + assert planner.calls[0]["skill_summaries"] == ["docker-debug: Use docker logs before editing config."] + assert planner.calls[0]["tool_hints"] == ["terminal"] + + task_service = service.create_loop().boot().task_service + assert task_service is not None + task = task_service.get_task(result.task_id) + assert task is not None + assert task.skill_names == ["docker-debug"] + + +def test_task_mode_delegates_attempt_execution_to_orchestrator(tmp_path: Path) -> None: + orchestrator = RecordingTaskAttemptOrchestrator() + service = AgentService( + loader=EngineLoader( + workspace=tmp_path, + task_execution_planner=StubTaskExecutionPlanner(), + ) + ) + service._build_task_attempt_orchestrator = lambda loaded: orchestrator # type: ignore[attr-defined] + + result = asyncio.run( + service.process_direct( + "draft release notes", + session_id="web:orchestrator", + provider_bundle=_bundle("main runner should not be used"), + ) + ) + + assert result.output_text == "orchestrated" + assert result.run_id == "run-from-orchestrator" + assert len(orchestrator.calls) == 1 + assert orchestrator.calls[0]["message"] == "draft release notes" + assert orchestrator.calls[0]["task"].description == "draft release notes" + + def test_task_mode_injects_prompt_locale_output_language(tmp_path: Path) -> None: service = AgentService( loader=EngineLoader( diff --git a/app-instance/backend/tests/unit/test_task_skill_resolver.py b/app-instance/backend/tests/unit/test_task_skill_resolver.py index fb5d07f..38079ba 100644 --- a/app-instance/backend/tests/unit/test_task_skill_resolver.py +++ b/app-instance/backend/tests/unit/test_task_skill_resolver.py @@ -222,3 +222,179 @@ def test_task_skill_resolver_keeps_summary_nodes_skillless(tmp_path: Path) -> No assert reports[0].ephemeral_used is False assert reports[0].reason == "summary node uses dependency outputs directly" assert provider.calls == [] + + +def test_resolver_exact_binds_use_skill_before_dynamic_lookup(tmp_path: Path) -> None: + _publish_skill(tmp_path, skill_name="official-source-research") + provider = RecordingProvider(['["wrong-dynamic-skill"]']) + resolver = TaskSkillResolver( + skills_loader=SkillsLoader(tmp_path), + draft_service=DraftService(SkillSpecStore(tmp_path)), + ) + graph = ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + "collect", + "Collect official sources", + AgentDescriptor( + name="collect", + metadata={ + "use_skill": "official-source-research", + "skill_query": "generic web research", + }, + ), + ) + ], + ) + + resolved, reports = asyncio.run( + resolver.resolve_graph( + graph, + task=_task(), + user_message="collect sources", + attempt_index=1, + provider_bundle=_bundle(provider), + ) + ) + + node = resolved.nodes[0] + assert node.inherited_pinned_skills == ["official-source-research"] + assert [context.name for context in node.inherited_pinned_skill_contexts] == ["official-source-research"] + assert node.agent.metadata["exact_binding_used"] is True + assert reports[0].selected_skill_names == ["official-source-research"] + assert reports[0].exact_binding_used is True + assert reports[0].warnings == [] + assert provider.calls == [] + + +def test_resolver_falls_back_to_skill_query_when_use_skill_missing(tmp_path: Path) -> None: + _publish_skill(tmp_path, skill_name="financial-metric-extraction") + provider = RecordingProvider(['["financial-metric-extraction"]']) + resolver = TaskSkillResolver( + skills_loader=SkillsLoader(tmp_path), + draft_service=DraftService(SkillSpecStore(tmp_path)), + ) + graph = ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + "extract", + "Extract metrics", + AgentDescriptor( + name="extract", + metadata={ + "use_skill": "missing-exact-skill", + "skill_query": "financial metric extraction", + }, + ), + ) + ], + ) + + resolved, reports = asyncio.run( + resolver.resolve_graph( + graph, + task=_task(), + user_message="extract financial metrics", + attempt_index=1, + provider_bundle=_bundle(provider), + ) + ) + + assert resolved.nodes[0].inherited_pinned_skills == ["financial-metric-extraction"] + assert reports[0].exact_binding_used is False + assert reports[0].selected_skill_names == ["financial-metric-extraction"] + assert reports[0].warnings == ["use_skill unresolved: missing-exact-skill"] + assert "financial metric extraction" in provider.calls[0][1]["content"] + + +def test_resolver_falls_back_to_ephemeral_when_exact_and_query_miss(tmp_path: Path) -> None: + _publish_skill(tmp_path, skill_name="unrelated-skill") + provider = RecordingProvider( + [ + "[]", + """ + { + "guidance_name": "financial-extraction-guidance", + "description": "Extract financial metrics", + "content": "# Financial Extraction\\n\\nExtract the requested metrics.", + "tags": ["finance"] + } + """, + ] + ) + resolver = TaskSkillResolver( + skills_loader=SkillsLoader(tmp_path), + draft_service=DraftService(SkillSpecStore(tmp_path)), + missing_skill_synthesizer=EphemeralGuidanceSynthesizer(), + ) + graph = ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + "extract", + "Extract metrics", + AgentDescriptor( + name="extract", + metadata={ + "use_skill": "missing-exact-skill", + "skill_query": "financial metric extraction", + }, + ), + ) + ], + ) + + resolved, reports = asyncio.run( + resolver.resolve_graph( + graph, + task=_task(), + user_message="extract financial metrics", + attempt_index=1, + provider_bundle=_bundle(provider), + ) + ) + + assert resolved.nodes[0].inherited_pinned_skills == [] + assert resolved.nodes[0].inherited_pinned_skill_contexts[0].name == "ephemeral:financial-extraction-guidance" + assert reports[0].ephemeral_used is True + assert reports[0].warnings == ["use_skill unresolved: missing-exact-skill"] + + +def test_explicit_use_skill_is_preserved_for_summary_without_nested_expansion(tmp_path: Path) -> None: + _publish_skill(tmp_path, skill_name="summary-formatting") + provider = RecordingProvider([]) + resolver = TaskSkillResolver( + skills_loader=SkillsLoader(tmp_path), + draft_service=DraftService(SkillSpecStore(tmp_path)), + ) + graph = ExecutionGraph( + strategy="dag", + nodes=[ + ExecutionNode( + "summarize", + "Compile a summary from dependency outputs", + AgentDescriptor( + name="summarize", + metadata={"use_skill": "summary-formatting", "skill_query": "Summarization"}, + ), + depends_on=["collect"], + ) + ], + ) + + resolved, reports = asyncio.run( + resolver.resolve_graph( + graph, + task=_task(), + user_message="summarize", + attempt_index=1, + provider_bundle=_bundle(provider), + ) + ) + + assert len(resolved.nodes) == 1 + assert resolved.nodes[0].inherited_pinned_skills == ["summary-formatting"] + assert reports[0].exact_binding_used is True + assert provider.calls == [] diff --git a/app-instance/backend/tests/unit/test_task_team_synthesis_outcome.py b/app-instance/backend/tests/unit/test_task_team_synthesis_outcome.py new file mode 100644 index 0000000..b8c0450 --- /dev/null +++ b/app-instance/backend/tests/unit/test_task_team_synthesis_outcome.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +import pytest + +from beaver.coordinator import AgentDescriptor, ExecutionGraph, ExecutionNode, NodeRunResult, TeamRunResult +from beaver.engine import AgentRunResult +from beaver.tasks import TaskExecutionPlan, TaskRecord +from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator + + +def _plan(*, optional_second: bool = False) -> TaskExecutionPlan: + return TaskExecutionPlan( + mode="team", + reason="test team", + graph=ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")), + ExecutionNode( + "report", + "Report", + AgentDescriptor(name="report"), + required_for_completion=not optional_second, + ), + ], + ), + ) + + +def _team_result(*results: NodeRunResult) -> TeamRunResult: + return TeamRunResult( + success=all(result.success for result in results), + summary="team summary", + node_results=list(results), + ) + + +def _result(node_id: str, status: str, *, gaps: list[str] | None = None) -> NodeRunResult: + return NodeRunResult( + node_id=node_id, + success=status == "succeeded", + output_text=f"{node_id} output", + finish_reason="blocked" if status == "blocked" else "stop", + error=None if status == "succeeded" else f"{status} node", + completion_status=status, + evidence_gaps=list(gaps or []), + ) + + +def test_required_partial_node_marks_synthesis_incomplete() -> None: + context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome( + _plan(), + _team_result( + _result("collect", "partial", gaps=["missing required evidence: url"]), + _result("report", "succeeded"), + ), + ) + + assert metadata["task_outcome"] == "incomplete" + assert metadata["incomplete_node_ids"] == ["collect"] + assert metadata["evidence_gaps"] == {"collect": ["missing required evidence: url"]} + assert "Task outcome: incomplete" in context + assert "missing required evidence: url" in context + assert prefix.startswith("任务未完成:") + + +@pytest.mark.parametrize("status", ["failed", "blocked"]) +def test_required_failed_or_blocked_node_marks_synthesis_incomplete(status: str) -> None: + _, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome( + _plan(), + _team_result(_result("collect", status), _result("report", "succeeded")), + ) + + assert metadata["task_outcome"] == "incomplete" + assert metadata["incomplete_node_ids"] == ["collect"] + assert metadata["node_statuses"]["collect"] == status + assert prefix + + +def test_optional_failed_node_does_not_force_incomplete() -> None: + context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome( + _plan(optional_second=True), + _team_result(_result("collect", "succeeded"), _result("report", "failed")), + ) + + assert metadata["task_outcome"] == "complete" + assert metadata["incomplete_node_ids"] == [] + assert "Task outcome: complete" in context + assert prefix == "" + + +def test_all_required_nodes_succeeded_is_complete() -> None: + _, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome( + _plan(), + _team_result(_result("collect", "succeeded"), _result("report", "succeeded")), + ) + + assert metadata["task_outcome"] == "complete" + assert prefix == "" + + +def test_single_plan_outcome_does_not_add_prefix() -> None: + context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome( + TaskExecutionPlan.single("single"), + None, + ) + + assert metadata["task_outcome"] == "single" + assert "Task outcome: single" in context + assert prefix == "" + + +class FakeTaskService: + def start_run(self, task_id: str, **_: Any) -> None: + return None + + def append_run(self, task_id: str, run_id: str, **_: Any) -> TaskRecord: + return self.task + + +class FakeSessionManager: + def __init__(self) -> None: + self.events: list[dict[str, Any]] = [] + + def append_message(self, session_id: str, **kwargs: Any) -> None: + self.events.append({"session_id": session_id, **kwargs}) + + def update_latest_assistant_event_payload(self, *args: Any, **kwargs: Any) -> None: + return None + + def get_run_event_records(self, session_id: str, run_id: str) -> list[Any]: + return [] + + +class FixedPlanner: + def __init__(self, plan: TaskExecutionPlan) -> None: + self.fixed_plan = plan + + async def plan(self, **_: Any) -> TaskExecutionPlan: + return self.fixed_plan + + +def _task() -> TaskRecord: + return TaskRecord( + task_id="task-1", + session_id="session-1", + description="finance comparison", + goal="finance comparison", + constraints=[], + priority=0, + status="open", + creator="test", + created_at="now", + updated_at="now", + ) + + +def test_incomplete_team_still_runs_tool_free_synthesis_and_prefixes_output() -> None: + plan = _plan() + team_result = _team_result( + _result("collect", "partial", gaps=["missing required evidence: url"]), + _result("report", "succeeded"), + ) + task = _task() + task_service = FakeTaskService() + task_service.task = task + session_manager = FakeSessionManager() + loaded = SimpleNamespace( + task_service=task_service, + task_execution_planner=FixedPlanner(plan), + session_manager=session_manager, + run_memory_store=None, + ) + orchestrator = TaskAttemptOrchestrator( + loaded=loaded, + create_loop=lambda: None, + make_provider_bundle_for_task=lambda *_: None, + ) + + async def fake_run_team(*args: Any, **kwargs: Any) -> tuple[TeamRunResult, None]: + return team_result, None + + runner_calls: list[dict[str, Any]] = [] + + async def runner(message: str, **kwargs: Any) -> AgentRunResult: + runner_calls.append(kwargs) + return AgentRunResult( + session_id="session-1", + run_id="main-run", + output_text="Available financial comparison.", + finish_reason="stop", + tool_iterations=0, + ) + + orchestrator._run_team_for_task = fake_run_team # type: ignore[method-assign] + result = asyncio.run( + orchestrator.run( + message="compare finance", + runner=runner, + kwargs={ + "session_id": "session-1", + "provider_bundle": SimpleNamespace(), + "include_skill_assembly": False, + }, + task=task, + ) + ) + + assert len(runner_calls) == 1 + assert runner_calls[0]["include_tools"] is False + assert runner_calls[0]["max_tool_iterations"] == 0 + assert "Task outcome: incomplete" in runner_calls[0]["execution_context"] + assert result.output_text.startswith("任务未完成:") + synthesis_event = [event for event in session_manager.events if event.get("event_type") == "task_synthesis_completed"][0] + assert synthesis_event["event_payload"]["task_outcome"] == "incomplete" + assert synthesis_event["event_payload"]["incomplete_node_ids"] == ["collect"] + assert synthesis_event["event_payload"]["node_statuses"] == { + "collect": "partial", + "report": "succeeded", + } + assert synthesis_event["event_payload"]["evidence_gaps"] == { + "collect": ["missing required evidence: url"] + } + + +def test_incomplete_notice_is_not_prefixed_twice() -> None: + text = "任务未完成:缺少官方来源。" + + assert TaskAttemptOrchestrator._apply_incomplete_prefix(text, "任务未完成:部分步骤缺少证据。\n\n") == text diff --git a/app-instance/backend/tests/unit/test_team_node_tool_policy.py b/app-instance/backend/tests/unit/test_team_node_tool_policy.py new file mode 100644 index 0000000..43326f2 --- /dev/null +++ b/app-instance/backend/tests/unit/test_team_node_tool_policy.py @@ -0,0 +1,231 @@ +from __future__ import annotations + +import asyncio +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from beaver.coordinator import AgentDescriptor, DelegationEnvelope, ExecutionGraph, ExecutionNode, NodeRunResult +from beaver.coordinator.execution.scheduler import TeamGraphScheduler +from beaver.coordinator.local import LocalAgentRunner +from beaver.engine import AgentLoop, EngineLoader +from beaver.engine.providers.base import LLMProvider, LLMResponse +from beaver.engine.providers.factory import ProviderBundle +from beaver.tools import BaseTool, ToolContext, ToolExecutor, ToolRegistry, ToolResult, ToolSpec + + +class RecordingProvider(LLMProvider): + def __init__(self) -> None: + super().__init__() + self.calls: list[dict[str, Any]] = [] + + async def chat( + self, + messages: list[dict], + tools: list[dict] | None = None, + model: str | None = None, + max_tokens: int | None = None, + temperature: float = 0.7, + thinking_enabled: bool | None = None, + ) -> LLMResponse: + self.calls.append({"messages": messages, "tools": tools}) + return LLMResponse(content="done", finish_reason="stop", provider_name="stub", model="stub") + + def get_default_model(self) -> str: + return "stub" + + +class StaticToolAssembler: + def __init__(self, specs: list[ToolSpec]) -> None: + self.specs = specs + + async def assemble(self, **_: Any) -> list[ToolSpec]: + return list(self.specs) + + +class StubTool(BaseTool): + def __init__(self, name: str) -> None: + self._spec = ToolSpec(name=name, description=name, input_schema={"type": "object"}) + self.calls = 0 + + @property + def spec(self) -> ToolSpec: + return self._spec + + async def invoke(self, arguments: dict[str, Any], context: ToolContext) -> ToolResult: + self.calls += 1 + return ToolResult(True, "called", self.spec.name) + + +class CapturingRunner: + def __init__(self) -> None: + self.envelopes: list[DelegationEnvelope] = [] + + async def run(self, envelope: DelegationEnvelope, **_: Any) -> NodeRunResult: + self.envelopes.append(envelope) + return NodeRunResult( + node_id=envelope.node_id or envelope.agent.name, + success=True, + output_text="done", + finish_reason="stop", + ) + + +def _bundle(provider: LLMProvider) -> ProviderBundle: + return ProviderBundle( + main_runtime=SimpleNamespace(model="stub", provider_name="stub"), + main_provider=provider, + ) + + +def _loop(tmp_path: Path) -> AgentLoop: + loop = AgentLoop(loader=EngineLoader(workspace=tmp_path)) + loaded = loop.boot() + specs = [loaded.tool_registry.get(name).spec for name in ("read_file", "web_search")] + loaded.tool_assembler = StaticToolAssembler(specs) # type: ignore[assignment] + return loop + + +def _tool_names(tools: list[dict] | None) -> list[str]: + return [str(tool["function"]["name"]) for tool in tools or []] + + +def _graph(allowed_tool_names: list[str] | None) -> ExecutionGraph: + return ExecutionGraph( + strategy="sequence", + nodes=[ + ExecutionNode( + node_id="collect", + task="collect", + agent=AgentDescriptor(name="collect"), + allowed_tool_names=allowed_tool_names, + ) + ], + ) + + +def test_none_tool_scope_preserves_legacy_selection(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + + asyncio.run( + loop.process_direct( + "collect", + allowed_tool_names=None, + include_skill_assembly=False, + provider_bundle=_bundle(provider), + ) + ) + + assert _tool_names(provider.calls[0]["tools"]) == ["read_file", "web_search"] + + +def test_empty_tool_scope_exposes_no_tools(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + + asyncio.run( + loop.process_direct( + "collect", + allowed_tool_names=[], + include_skill_assembly=False, + provider_bundle=_bundle(provider), + ) + ) + + assert _tool_names(provider.calls[0]["tools"]) == [] + + +def test_named_tool_scope_exposes_only_allowed_schema(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + + asyncio.run( + loop.process_direct( + "collect", + allowed_tool_names=["web_search"], + include_skill_assembly=False, + provider_bundle=_bundle(provider), + ) + ) + + assert _tool_names(provider.calls[0]["tools"]) == ["web_search"] + + +def test_executor_rejects_registered_tool_outside_node_allowlist() -> None: + registry = ToolRegistry() + write_file = StubTool("write_file") + registry.register(write_file) + executor = ToolExecutor(registry) + context = ToolContext(metadata={"allowed_tool_names": ["web_search"]}) + + result = asyncio.run(executor.execute("write_file", {"path": "x"}, context=context)) + + assert result.success is False + assert result.error == "tool_not_allowed" + assert write_file.calls == 0 + + +def test_local_agent_runner_passes_node_tool_scope(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + envelope = DelegationEnvelope( + parent_task_id="task-parent", + parent_session_id="session-root", + parent_run_id="run-root", + agent=AgentDescriptor(name="collect"), + task="collect", + node_id="collect", + allowed_tool_names=[], + ) + + result = asyncio.run(LocalAgentRunner(loop).run(envelope, provider_bundle=_bundle(provider))) + + assert result.success is True + assert _tool_names(provider.calls[0]["tools"]) == [] + + +def test_scheduler_copies_named_node_tool_scope_to_envelope() -> None: + runner = CapturingRunner() + + asyncio.run( + TeamGraphScheduler(runner).run( # type: ignore[arg-type] + _graph(["web_search"]), + parent_task_id="task-parent", + parent_session_id="session-root", + ) + ) + + assert runner.envelopes[0].allowed_tool_names == ["web_search"] + + +def test_empty_tool_scope_reaches_provider_through_real_team_path(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + + asyncio.run( + TeamGraphScheduler(LocalAgentRunner(loop)).run( + _graph([]), + parent_task_id="task-parent", + parent_session_id="session-root", + provider_bundle=_bundle(provider), + ) + ) + + assert _tool_names(provider.calls[0]["tools"]) == [] + + +def test_none_tool_scope_preserves_tools_through_real_team_path(tmp_path: Path) -> None: + loop = _loop(tmp_path) + provider = RecordingProvider() + + asyncio.run( + TeamGraphScheduler(LocalAgentRunner(loop)).run( + _graph(None), + parent_task_id="task-parent", + parent_session_id="session-root", + provider_bundle=_bundle(provider), + ) + ) + + assert _tool_names(provider.calls[0]["tools"]) == ["read_file", "web_search"] diff --git a/app-instance/backend/tests/unit/test_user_file_service.py b/app-instance/backend/tests/unit/test_user_file_service.py index a1fcf53..a7bbcbb 100644 --- a/app-instance/backend/tests/unit/test_user_file_service.py +++ b/app-instance/backend/tests/unit/test_user_file_service.py @@ -11,6 +11,7 @@ from beaver.services.user_files import ( UserFileNotFoundError, UserFilePathError, UserFileSizeError, + UserFileStorageError, UserFileService, normalize_user_path, ) @@ -151,3 +152,68 @@ def test_minio_storage_rejects_paths_that_escape_namespace() -> None: with pytest.raises(UserFilePathError): storage._user_path("users/bob/uploads/secret.txt") + + +@pytest.mark.asyncio +async def test_minio_storage_translates_s3_errors_to_user_file_errors() -> None: + from minio.error import S3Error + + class FakeMinioClient: + def list_objects(self, *args, **kwargs): + raise S3Error( + None, + "SignatureDoesNotMatch", + "The request signature we calculated does not match", + "/beaver-user-files", + "request-id", + "host-id", + bucket_name="beaver-user-files", + ) + + storage = object.__new__(MinIOUserFileStorage) + storage.config = MinIOStorageConfig( + endpoint="minio.local:9000", + access_key="alice-access", + secret_key="alice-secret", + bucket="beaver-user-files", + namespace="users/alice", + ) + storage.client = FakeMinioClient() + + with pytest.raises(UserFileStorageError) as exc_info: + await storage.list_dir("uploads") + + assert "SignatureDoesNotMatch" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_minio_storage_does_not_report_auth_errors_as_missing_files() -> None: + from minio.error import S3Error + + class FakeMinioClient: + def stat_object(self, *args, **kwargs): + raise S3Error( + None, + "SignatureDoesNotMatch", + "The request signature we calculated does not match", + "/beaver-user-files/uploads/input.txt", + "request-id", + "host-id", + bucket_name="beaver-user-files", + object_name="users/alice/uploads/input.txt", + ) + + storage = object.__new__(MinIOUserFileStorage) + storage.config = MinIOStorageConfig( + endpoint="minio.local:9000", + access_key="alice-access", + secret_key="alice-secret", + bucket="beaver-user-files", + namespace="users/alice", + ) + storage.client = FakeMinioClient() + + with pytest.raises(UserFileStorageError) as exc_info: + await storage.read_file("uploads/input.txt") + + assert "SignatureDoesNotMatch" in str(exc_info.value) diff --git a/app-instance/backend/tests/unit/test_web_files_api.py b/app-instance/backend/tests/unit/test_web_files_api.py index 32fd6f5..9ee7e48 100644 --- a/app-instance/backend/tests/unit/test_web_files_api.py +++ b/app-instance/backend/tests/unit/test_web_files_api.py @@ -7,7 +7,7 @@ from fastapi.testclient import TestClient from beaver.interfaces.web.app import create_app from beaver.services.agent_service import AgentService from beaver.services.user_file_resolver import UserFileStorageResolver -from beaver.services.user_files import LocalUserFileStorage, UserFileService +from beaver.services.user_files import LocalUserFileStorage, UserFileService, UserFileStorageError def _auth_headers(app, username: str = "alice") -> dict[str, str]: @@ -191,6 +191,26 @@ def test_user_files_api_authenticated_request_resolves_identity(tmp_path: Path, assert seen[0].storage_namespace == "users/alice" +def test_user_files_api_reports_storage_errors_as_unavailable(tmp_path: Path, monkeypatch) -> None: + service = AgentService(workspace=tmp_path) + app = create_app(service=service, manage_service_lifecycle=False) + + class BrokenStorage: + async def list_dir(self, path: str): + raise UserFileStorageError("User file storage list directory failed: SignatureDoesNotMatch") + + async def fake_service(self): + return UserFileService(BrokenStorage()) + + monkeypatch.setattr(UserFileStorageResolver, "service", fake_service) + + with TestClient(app) as client: + response = client.get("/api/user-files/browse", params={"path": "uploads"}, headers=_auth_headers(app)) + + assert response.status_code == 503 + assert "SignatureDoesNotMatch" in response.json()["detail"] + + def test_user_files_api_streams_upload_and_enforces_configured_limit(tmp_path: Path, monkeypatch) -> None: monkeypatch.setenv("BEAVER_USER_FILES_MAX_UPLOAD_BYTES", "5") service = AgentService(workspace=tmp_path) diff --git a/app-instance/backend/tests/unit/test_web_tools.py b/app-instance/backend/tests/unit/test_web_tools.py index de5f8a9..c88122f 100644 --- a/app-instance/backend/tests/unit/test_web_tools.py +++ b/app-instance/backend/tests/unit/test_web_tools.py @@ -2,23 +2,43 @@ from __future__ import annotations import asyncio import json +import sys +import types from beaver.tools.builtins import web +def _disable_ddgs(monkeypatch) -> None: + def _raise_unavailable(query: str, limit: int) -> list[dict[str, str]]: + raise ModuleNotFoundError("ddgs disabled for fallback test") + + monkeypatch.setattr(web, "_search_ddgs", _raise_unavailable) + + class _FakeResponse: headers = {"content-type": "text/html"} status_code = 200 + fetch_html = """ + + Investor Reports + + 2025 Annual Report + Investor Centre + + + """ def __init__(self, url: str = "https://example.com") -> None: self.url = url if "duckduckgo.com" in url: self.text = 'Duck Example' - else: + elif "bing.com" in url: self.text = ( '
  • Example

    ' "

    Example result

  • " ) + else: + self.text = self.fetch_html def raise_for_status(self) -> None: return None @@ -48,6 +68,7 @@ class _FakeAsyncClient: def test_web_tools_use_environment_proxy_settings(monkeypatch) -> None: _FakeAsyncClient.calls = [] + _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) async def _run() -> None: @@ -73,10 +94,39 @@ def test_web_fetch_uses_short_connect_timeout(monkeypatch) -> None: assert timeout.read == 12 +def test_web_fetch_returns_page_title_and_links(monkeypatch) -> None: + _FakeAsyncClient.calls = [] + _FakeAsyncClient.urls = [] + monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) + + raw = asyncio.run(web.WebFetchTool().execute(url="https://example.com/investor")) + + payload = json.loads(raw) + assert payload["success"] is True + assert payload["title"] == "Investor Reports" + assert payload["links"] == [ + { + "text": "2025 Annual Report", + "url": "https://example.com/reports/2025-annual.pdf", + }, + { + "text": "Investor Centre", + "url": "https://example.com/investor", + }, + ] + assert payload["pdf_links"] == [ + { + "text": "2025 Annual Report", + "url": "https://example.com/reports/2025-annual.pdf", + } + ] + + def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = False + _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) @@ -95,10 +145,60 @@ def test_web_search_uses_reachable_bing_endpoint_first(monkeypatch) -> None: assert timeout.read == 8 +def test_web_search_prefers_ddgs_provider_when_available(monkeypatch) -> None: + class _FakeDDGS: + def text(self, query: str, max_results: int) -> list[dict[str, str]]: + assert query == "weather beijing" + assert max_results == 5 + return [ + { + "title": "Beijing Weather", + "href": "https://weather.example.com/beijing", + "body": "Current Beijing weather forecast", + } + ] + + fake_module = types.SimpleNamespace(DDGS=_FakeDDGS) + monkeypatch.setitem(sys.modules, "ddgs", fake_module) + _FakeAsyncClient.calls = [] + monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) + + raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) + + payload = json.loads(raw) + assert payload["success"] is True + assert payload["engine"] == "ddgs" + assert payload["quality"] == "high" + assert payload["results"] == [ + { + "title": "Beijing Weather", + "url": "https://weather.example.com/beijing", + "snippet": "Current Beijing weather forecast", + } + ] + assert _FakeAsyncClient.calls == [] + + +def test_web_search_reports_low_quality_for_irrelevant_results(monkeypatch) -> None: + _FakeAsyncClient.calls = [] + _FakeAsyncClient.urls = [] + _FakeAsyncClient.fail_bing = False + _disable_ddgs(monkeypatch) + monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) + + raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) + + payload = json.loads(raw) + assert payload["success"] is True + assert payload["quality"] == "low" + assert payload["low_relevance_reason"] == "results do not overlap enough with query terms" + + def test_web_search_falls_back_when_bing_is_unavailable(monkeypatch) -> None: _FakeAsyncClient.calls = [] _FakeAsyncClient.urls = [] _FakeAsyncClient.fail_bing = True + _disable_ddgs(monkeypatch) monkeypatch.setattr(web.httpx, "AsyncClient", _FakeAsyncClient) raw = asyncio.run(web.WebSearchTool().execute(query="weather beijing")) diff --git a/app-instance/backend/uv.lock b/app-instance/backend/uv.lock index 39eaeac..399cea1 100644 --- a/app-instance/backend/uv.lock +++ b/app-instance/backend/uv.lock @@ -3,7 +3,8 @@ revision = 3 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14'", - "python_full_version < '3.14'", + "python_full_version == '3.13.*'", + "python_full_version < '3.13'", ] [[package]] @@ -282,6 +283,7 @@ source = { editable = "." } dependencies = [ { name = "anthropic" }, { name = "croniter" }, + { name = "ddgs" }, { name = "fastapi" }, { name = "fastmcp" }, { name = "httpx" }, @@ -325,6 +327,7 @@ requires-dist = [ { name = "aiohttp", marker = "extra == 'weixin'", specifier = ">=3.9.0,<4.0.0" }, { name = "anthropic", specifier = ">=0.51.0,<1.0.0" }, { name = "croniter", specifier = ">=6.0.0,<7.0.0" }, + { name = "ddgs", specifier = ">=9.0.0,<10.0.0" }, { name = "fastapi", specifier = ">=0.115.0,<1.0.0" }, { name = "fastmcp", specifier = ">=3.0.0,<4.0.0" }, { name = "httpx", specifier = ">=0.28.0,<1.0.0" }, @@ -345,6 +348,79 @@ requires-dist = [ ] provides-extras = ["dev", "telegram", "feishu", "qqbot", "weixin", "channels"] +[[package]] +name = "brotli" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/16/c92ca344d646e71a43b8bb353f0a6490d7f6e06210f8554c8f874e454285/brotli-1.2.0.tar.gz", hash = "sha256:e310f77e41941c13340a95976fe66a8a95b01e783d430eeaf7a2f87e0a57dd0a", size = 7388632, upload-time = "2025-11-05T18:39:42.86Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/ef/f285668811a9e1ddb47a18cb0b437d5fc2760d537a2fe8a57875ad6f8448/brotli-1.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:15b33fe93cedc4caaff8a0bd1eb7e3dab1c61bb22a0bf5bdfdfd97cd7da79744", size = 863110, upload-time = "2025-11-05T18:38:12.978Z" }, + { url = "https://files.pythonhosted.org/packages/50/62/a3b77593587010c789a9d6eaa527c79e0848b7b860402cc64bc0bc28a86c/brotli-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:898be2be399c221d2671d29eed26b6b2713a02c2119168ed914e7d00ceadb56f", size = 445438, upload-time = "2025-11-05T18:38:14.208Z" }, + { url = "https://files.pythonhosted.org/packages/cd/e1/7fadd47f40ce5549dc44493877db40292277db373da5053aff181656e16e/brotli-1.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:350c8348f0e76fff0a0fd6c26755d2653863279d086d3aa2c290a6a7251135dd", size = 1534420, upload-time = "2025-11-05T18:38:15.111Z" }, + { url = "https://files.pythonhosted.org/packages/12/8b/1ed2f64054a5a008a4ccd2f271dbba7a5fb1a3067a99f5ceadedd4c1d5a7/brotli-1.2.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1ad3fda65ae0d93fec742a128d72e145c9c7a99ee2fcd667785d99eb25a7fe", size = 1632619, upload-time = "2025-11-05T18:38:16.094Z" }, + { url = "https://files.pythonhosted.org/packages/89/5a/7071a621eb2d052d64efd5da2ef55ecdac7c3b0c6e4f9d519e9c66d987ef/brotli-1.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40d918bce2b427a0c4ba189df7a006ac0c7277c180aee4617d99e9ccaaf59e6a", size = 1426014, upload-time = "2025-11-05T18:38:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/26/6d/0971a8ea435af5156acaaccec1a505f981c9c80227633851f2810abd252a/brotli-1.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2a7f1d03727130fc875448b65b127a9ec5d06d19d0148e7554384229706f9d1b", size = 1489661, upload-time = "2025-11-05T18:38:18.41Z" }, + { url = "https://files.pythonhosted.org/packages/f3/75/c1baca8b4ec6c96a03ef8230fab2a785e35297632f402ebb1e78a1e39116/brotli-1.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9c79f57faa25d97900bfb119480806d783fba83cd09ee0b33c17623935b05fa3", size = 1599150, upload-time = "2025-11-05T18:38:19.792Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1a/23fcfee1c324fd48a63d7ebf4bac3a4115bdb1b00e600f80f727d850b1ae/brotli-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:844a8ceb8483fefafc412f85c14f2aae2fb69567bf2a0de53cdb88b73e7c43ae", size = 1493505, upload-time = "2025-11-05T18:38:20.913Z" }, + { url = "https://files.pythonhosted.org/packages/36/e5/12904bbd36afeef53d45a84881a4810ae8810ad7e328a971ebbfd760a0b3/brotli-1.2.0-cp311-cp311-win32.whl", hash = "sha256:aa47441fa3026543513139cb8926a92a8e305ee9c71a6209ef7a97d91640ea03", size = 334451, upload-time = "2025-11-05T18:38:21.94Z" }, + { url = "https://files.pythonhosted.org/packages/02/8b/ecb5761b989629a4758c394b9301607a5880de61ee2ee5fe104b87149ebc/brotli-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:022426c9e99fd65d9475dce5c195526f04bb8be8907607e27e747893f6ee3e24", size = 369035, upload-time = "2025-11-05T18:38:22.941Z" }, + { url = "https://files.pythonhosted.org/packages/11/ee/b0a11ab2315c69bb9b45a2aaed022499c9c24a205c3a49c3513b541a7967/brotli-1.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:35d382625778834a7f3061b15423919aa03e4f5da34ac8e02c074e4b75ab4f84", size = 861543, upload-time = "2025-11-05T18:38:24.183Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2f/29c1459513cd35828e25531ebfcbf3e92a5e49f560b1777a9af7203eb46e/brotli-1.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a61c06b334bd99bc5ae84f1eeb36bfe01400264b3c352f968c6e30a10f9d08b", size = 444288, upload-time = "2025-11-05T18:38:25.139Z" }, + { url = "https://files.pythonhosted.org/packages/3d/6f/feba03130d5fceadfa3a1bb102cb14650798c848b1df2a808356f939bb16/brotli-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acec55bb7c90f1dfc476126f9711a8e81c9af7fb617409a9ee2953115343f08d", size = 1528071, upload-time = "2025-11-05T18:38:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/2b/38/f3abb554eee089bd15471057ba85f47e53a44a462cfce265d9bf7088eb09/brotli-1.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:260d3692396e1895c5034f204f0db022c056f9e2ac841593a4cf9426e2a3faca", size = 1626913, upload-time = "2025-11-05T18:38:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/03/a7/03aa61fbc3c5cbf99b44d158665f9b0dd3d8059be16c460208d9e385c837/brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:072e7624b1fc4d601036ab3f4f27942ef772887e876beff0301d261210bca97f", size = 1419762, upload-time = "2025-11-05T18:38:28.295Z" }, + { url = "https://files.pythonhosted.org/packages/21/1b/0374a89ee27d152a5069c356c96b93afd1b94eae83f1e004b57eb6ce2f10/brotli-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adedc4a67e15327dfdd04884873c6d5a01d3e3b6f61406f99b1ed4865a2f6d28", size = 1484494, upload-time = "2025-11-05T18:38:29.29Z" }, + { url = "https://files.pythonhosted.org/packages/cf/57/69d4fe84a67aef4f524dcd075c6eee868d7850e85bf01d778a857d8dbe0a/brotli-1.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7a47ce5c2288702e09dc22a44d0ee6152f2c7eda97b3c8482d826a1f3cfc7da7", size = 1593302, upload-time = "2025-11-05T18:38:30.639Z" }, + { url = "https://files.pythonhosted.org/packages/d5/3b/39e13ce78a8e9a621c5df3aeb5fd181fcc8caba8c48a194cd629771f6828/brotli-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:af43b8711a8264bb4e7d6d9a6d004c3a2019c04c01127a868709ec29962b6036", size = 1487913, upload-time = "2025-11-05T18:38:31.618Z" }, + { url = "https://files.pythonhosted.org/packages/62/28/4d00cb9bd76a6357a66fcd54b4b6d70288385584063f4b07884c1e7286ac/brotli-1.2.0-cp312-cp312-win32.whl", hash = "sha256:e99befa0b48f3cd293dafeacdd0d191804d105d279e0b387a32054c1180f3161", size = 334362, upload-time = "2025-11-05T18:38:32.939Z" }, + { url = "https://files.pythonhosted.org/packages/1c/4e/bc1dcac9498859d5e353c9b153627a3752868a9d5f05ce8dedd81a2354ab/brotli-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:b35c13ce241abdd44cb8ca70683f20c0c079728a36a996297adb5334adfc1c44", size = 369115, upload-time = "2025-11-05T18:38:33.765Z" }, + { url = "https://files.pythonhosted.org/packages/6c/d4/4ad5432ac98c73096159d9ce7ffeb82d151c2ac84adcc6168e476bb54674/brotli-1.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9e5825ba2c9998375530504578fd4d5d1059d09621a02065d1b6bfc41a8e05ab", size = 861523, upload-time = "2025-11-05T18:38:34.67Z" }, + { url = "https://files.pythonhosted.org/packages/91/9f/9cc5bd03ee68a85dc4bc89114f7067c056a3c14b3d95f171918c088bf88d/brotli-1.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0cf8c3b8ba93d496b2fae778039e2f5ecc7cff99df84df337ca31d8f2252896c", size = 444289, upload-time = "2025-11-05T18:38:35.6Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b6/fe84227c56a865d16a6614e2c4722864b380cb14b13f3e6bef441e73a85a/brotli-1.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8565e3cdc1808b1a34714b553b262c5de5fbda202285782173ec137fd13709f", size = 1528076, upload-time = "2025-11-05T18:38:36.639Z" }, + { url = "https://files.pythonhosted.org/packages/55/de/de4ae0aaca06c790371cf6e7ee93a024f6b4bb0568727da8c3de112e726c/brotli-1.2.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:26e8d3ecb0ee458a9804f47f21b74845cc823fd1bb19f02272be70774f56e2a6", size = 1626880, upload-time = "2025-11-05T18:38:37.623Z" }, + { url = "https://files.pythonhosted.org/packages/5f/16/a1b22cbea436642e071adcaf8d4b350a2ad02f5e0ad0da879a1be16188a0/brotli-1.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67a91c5187e1eec76a61625c77a6c8c785650f5b576ca732bd33ef58b0dff49c", size = 1419737, upload-time = "2025-11-05T18:38:38.729Z" }, + { url = "https://files.pythonhosted.org/packages/46/63/c968a97cbb3bdbf7f974ef5a6ab467a2879b82afbc5ffb65b8acbb744f95/brotli-1.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4ecdb3b6dc36e6d6e14d3a1bdc6c1057c8cbf80db04031d566eb6080ce283a48", size = 1484440, upload-time = "2025-11-05T18:38:39.916Z" }, + { url = "https://files.pythonhosted.org/packages/06/9d/102c67ea5c9fc171f423e8399e585dabea29b5bc79b05572891e70013cdd/brotli-1.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3e1b35d56856f3ed326b140d3c6d9db91740f22e14b06e840fe4bb1923439a18", size = 1593313, upload-time = "2025-11-05T18:38:41.24Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4a/9526d14fa6b87bc827ba1755a8440e214ff90de03095cacd78a64abe2b7d/brotli-1.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54a50a9dad16b32136b2241ddea9e4df159b41247b2ce6aac0b3276a66a8f1e5", size = 1487945, upload-time = "2025-11-05T18:38:42.277Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e8/3fe1ffed70cbef83c5236166acaed7bb9c766509b157854c80e2f766b38c/brotli-1.2.0-cp313-cp313-win32.whl", hash = "sha256:1b1d6a4efedd53671c793be6dd760fcf2107da3a52331ad9ea429edf0902f27a", size = 334368, upload-time = "2025-11-05T18:38:43.345Z" }, + { url = "https://files.pythonhosted.org/packages/ff/91/e739587be970a113b37b821eae8097aac5a48e5f0eca438c22e4c7dd8648/brotli-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:b63daa43d82f0cdabf98dee215b375b4058cce72871fd07934f179885aad16e8", size = 369116, upload-time = "2025-11-05T18:38:44.609Z" }, + { url = "https://files.pythonhosted.org/packages/17/e1/298c2ddf786bb7347a1cd71d63a347a79e5712a7c0cba9e3c3458ebd976f/brotli-1.2.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:6c12dad5cd04530323e723787ff762bac749a7b256a5bece32b2243dd5c27b21", size = 863080, upload-time = "2025-11-05T18:38:45.503Z" }, + { url = "https://files.pythonhosted.org/packages/84/0c/aac98e286ba66868b2b3b50338ffbd85a35c7122e9531a73a37a29763d38/brotli-1.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3219bd9e69868e57183316ee19c84e03e8f8b5a1d1f2667e1aa8c2f91cb061ac", size = 445453, upload-time = "2025-11-05T18:38:46.433Z" }, + { url = "https://files.pythonhosted.org/packages/ec/f1/0ca1f3f99ae300372635ab3fe2f7a79fa335fee3d874fa7f9e68575e0e62/brotli-1.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:963a08f3bebd8b75ac57661045402da15991468a621f014be54e50f53a58d19e", size = 1528168, upload-time = "2025-11-05T18:38:47.371Z" }, + { url = "https://files.pythonhosted.org/packages/d6/a6/2ebfc8f766d46df8d3e65b880a2e220732395e6d7dc312c1e1244b0f074a/brotli-1.2.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9322b9f8656782414b37e6af884146869d46ab85158201d82bab9abbcb971dc7", size = 1627098, upload-time = "2025-11-05T18:38:48.385Z" }, + { url = "https://files.pythonhosted.org/packages/f3/2f/0976d5b097ff8a22163b10617f76b2557f15f0f39d6a0fe1f02b1a53e92b/brotli-1.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cf9cba6f5b78a2071ec6fb1e7bd39acf35071d90a81231d67e92d637776a6a63", size = 1419861, upload-time = "2025-11-05T18:38:49.372Z" }, + { url = "https://files.pythonhosted.org/packages/9c/97/d76df7176a2ce7616ff94c1fb72d307c9a30d2189fe877f3dd99af00ea5a/brotli-1.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7547369c4392b47d30a3467fe8c3330b4f2e0f7730e45e3103d7d636678a808b", size = 1484594, upload-time = "2025-11-05T18:38:50.655Z" }, + { url = "https://files.pythonhosted.org/packages/d3/93/14cf0b1216f43df5609f5b272050b0abd219e0b54ea80b47cef9867b45e7/brotli-1.2.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1530af5c3c275b8524f2e24841cbe2599d74462455e9bae5109e9ff42e9361", size = 1593455, upload-time = "2025-11-05T18:38:51.624Z" }, + { url = "https://files.pythonhosted.org/packages/b3/73/3183c9e41ca755713bdf2cc1d0810df742c09484e2e1ddd693bee53877c1/brotli-1.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2d085ded05278d1c7f65560aae97b3160aeb2ea2c0b3e26204856beccb60888", size = 1488164, upload-time = "2025-11-05T18:38:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/64/6a/0c78d8f3a582859236482fd9fa86a65a60328a00983006bcf6d83b7b2253/brotli-1.2.0-cp314-cp314-win32.whl", hash = "sha256:832c115a020e463c2f67664560449a7bea26b0c1fdd690352addad6d0a08714d", size = 339280, upload-time = "2025-11-05T18:38:54.02Z" }, + { url = "https://files.pythonhosted.org/packages/f5/10/56978295c14794b2c12007b07f3e41ba26acda9257457d7085b0bb3bb90c/brotli-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:e7c0af964e0b4e3412a0ebf341ea26ec767fa0b4cf81abb5e897c9338b5ad6a3", size = 375639, upload-time = "2025-11-05T18:38:55.67Z" }, +] + +[[package]] +name = "brotlicffi" +version = "1.2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/b6/017dc5f852ed9b8735af77774509271acbf1de02d238377667145fcee01d/brotlicffi-1.2.0.1.tar.gz", hash = "sha256:c20d5c596278307ad06414a6d95a892377ea274a5c6b790c2548c009385d621c", size = 478156, upload-time = "2026-03-05T19:54:11.547Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/f9/dfa56316837fa798eac19358351e974de8e1e2ca9475af4cb90293cd6576/brotlicffi-1.2.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c85e65913cf2b79c57a3fdd05b98d9731d9255dc0cb696b09376cc091b9cddd", size = 433046, upload-time = "2026-03-05T19:53:46.209Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f5/f8f492158c76b0d940388801f04f747028971ad5774287bded5f1e53f08d/brotlicffi-1.2.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:535f2d05d0273408abc13fc0eebb467afac17b0ad85090c8913690d40207dac5", size = 1541126, upload-time = "2026-03-05T19:53:48.248Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e1/ff87af10ac419600c63e9287a0649c673673ae6b4f2bcf48e96cb2f89f60/brotlicffi-1.2.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce17eb798ca59ecec67a9bb3fd7a4304e120d1cd02953ce522d959b9a84d58ac", size = 1541983, upload-time = "2026-03-05T19:53:50.317Z" }, + { url = "https://files.pythonhosted.org/packages/47/c0/80ecd9bd45776109fab14040e478bf63e456967c9ddee2353d8330ed8de1/brotlicffi-1.2.0.1-cp314-cp314t-win32.whl", hash = "sha256:3c9544f83cb715d95d7eab3af4adbbef8b2093ad6382288a83b3a25feb1a57ec", size = 349047, upload-time = "2026-03-05T19:53:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/ab/98/13e5b250236a281b6cd9e92a01ee1ae231029fa78faee932ef3766e1cb24/brotlicffi-1.2.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:625f8115d32ae9c0740d01ea51518437c3fbaa3e78d41cb18459f6f7ac326000", size = 385652, upload-time = "2026-03-05T19:53:53.892Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9f/b98dcd4af47994cee97aebac866996a006a2e5fc1fd1e2b82a8ad95cf09c/brotlicffi-1.2.0.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:91ba5f0ccc040f6ff8f7efaf839f797723d03ed46acb8ae9408f99ffd2572cf4", size = 432608, upload-time = "2026-03-05T19:53:56.736Z" }, + { url = "https://files.pythonhosted.org/packages/b1/7a/ac4ee56595a061e3718a6d1ea7e921f4df156894acffb28ed88a1fd52022/brotlicffi-1.2.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be9a670c6811af30a4bd42d7116dc5895d3b41beaa8ed8a89050447a0181f5ce", size = 1534257, upload-time = "2026-03-05T19:53:58.667Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/e7410db7f6f56de57744ea52a115084ceb2735f4d44973f349bb92136586/brotlicffi-1.2.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f3314a3476f59e5443f9f72a6dff16edc0c3463c9b318feaef04ae3e4683f5a", size = 1536838, upload-time = "2026-03-05T19:54:00.705Z" }, + { url = "https://files.pythonhosted.org/packages/a6/75/6e7977d1935fc3fbb201cbd619be8f2c7aea25d40a096967132854b34708/brotlicffi-1.2.0.1-cp38-abi3-win32.whl", hash = "sha256:82ea52e2b5d3145b6c406ebd3efb0d55db718b7ad996bd70c62cec0439de1187", size = 343337, upload-time = "2026-03-05T19:54:02.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ef/e7e485ce5e4ba3843a0a92feb767c7b6098fd6e65ce752918074d175ae71/brotlicffi-1.2.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:da2e82a08e7778b8bc539d27ca03cdd684113e81394bfaaad8d0dfc6a17ddede", size = 379026, upload-time = "2026-03-05T19:54:04.322Z" }, + { url = "https://files.pythonhosted.org/packages/7f/53/6262c2256513e6f530d81642477cb19367270922063eaa2d7b781d8c723d/brotlicffi-1.2.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e015af99584c6db1490a69a210c765953e473e63adc2d891ac3062a737c9e851", size = 402265, upload-time = "2026-03-05T19:54:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d9/d5340b43cf5fbe7fe5a083d237e5338cc1caa73bea523be1c5e452c26290/brotlicffi-1.2.0.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:37cb587d32bf7168e2218c455e22e409ad1f3157c6c71945879a311f3e6b6abf", size = 406710, upload-time = "2026-03-05T19:54:07.272Z" }, + { url = "https://files.pythonhosted.org/packages/a3/82/dbced4c1e0792efdf23fd90ff6d2a320c64ff4dfef7aacc85c04fde9ddd2/brotlicffi-1.2.0.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d6ba65dd528892b4d9960beba2ae011a753620bcfc66cf6fa3cee18d7b0baa4", size = 402787, upload-time = "2026-03-05T19:54:08.73Z" }, + { url = "https://files.pythonhosted.org/packages/ef/6f/534205ba7590c9a8716a614f270c5c2ec419b5b7079b3f9cd31b7b5580de/brotlicffi-1.2.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2a5575653b0672638ba039b82fda56854934d7a6a24d4b8b5033f73ab43cbc1", size = 375108, upload-time = "2026-03-05T19:54:10.079Z" }, +] + [[package]] name = "cachetools" version = "7.1.1" @@ -654,6 +730,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/18/4cedda786e7da429e7489549a9e5461530d4133130e541f25fb94f015776/cyclopts-4.11.2-py3-none-any.whl", hash = "sha256:838020120b939549ff7c8423aca29c86764b5dd1d8a5d7f3753a6327861f537b", size = 213537, upload-time = "2026-05-04T00:11:56.103Z" }, ] +[[package]] +name = "ddgs" +version = "9.14.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "fake-useragent" }, + { name = "httpx", extra = ["brotli", "http2", "socks"] }, + { name = "lxml" }, + { name = "primp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/24/9d29eeb7dd4852c27c3673adcaf30c4dc55ced76b303c1fbb792ce7cae52/ddgs-9.14.4.tar.gz", hash = "sha256:f7b118a2b709a9e9c04a1dca6e96b98c25d4dfaca1a4b0a244d74454fcca48ef", size = 59742, upload-time = "2026-05-15T06:53:45.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/5f/32de4d99220eb559b7b1cd1c529a1856efa8097f7a3e10b6c207aa95e36c/ddgs-9.14.4-py3-none-any.whl", hash = "sha256:acb084c34bf1110c974caf7e5e5a2c1973beb4bd9e170bfd191fe5ed2d2b2d6c", size = 70638, upload-time = "2026-05-15T06:53:44.761Z" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -715,6 +807,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "fake-useragent" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/43/948d10bf42735709edb5ae51e23297d034086f17fc7279fef385a7acb473/fake_useragent-2.2.0.tar.gz", hash = "sha256:4e6ab6571e40cc086d788523cf9e018f618d07f9050f822ff409a4dfe17c16b2", size = 158898, upload-time = "2025-04-14T15:32:19.238Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/37/b3ea9cd5558ff4cb51957caca2193981c6b0ff30bd0d2630ac62505d99d0/fake_useragent-2.2.0-py3-none-any.whl", hash = "sha256:67f35ca4d847b0d298187443aaf020413746e56acd985a611908c73dba2daa24", size = 161695, upload-time = "2025-04-14T15:32:17.732Z" }, +] + [[package]] name = "fastapi" version = "0.136.1" @@ -957,6 +1058,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hf-xet" version = "1.5.0" @@ -989,6 +1103,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/94/3b66b148778ee100dcfd69c2ca22b57b41b44d3063ceec934f209e9184ce/hf_xet-1.5.0-cp37-abi3-win_arm64.whl", hash = "sha256:b6c9df403040248c76d808d3e047d64db2d923bae593eb244c41e425cf6cd7be", size = 3806916, upload-time = "2026-05-06T06:18:21.7Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1053,6 +1176,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +brotli = [ + { name = "brotli", marker = "platform_python_implementation == 'CPython'" }, + { name = "brotlicffi", marker = "platform_python_implementation != 'CPython'" }, +] +http2 = [ + { name = "h2" }, +] +socks = [ + { name = "socksio" }, +] + [[package]] name = "httpx-sse" version = "0.4.3" @@ -1082,6 +1217,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/db/4b1cdae9460ae1f3ca020cd767f013430ce23eb1d9c890ae3a0609b38d26/huggingface_hub-1.13.0-py3-none-any.whl", hash = "sha256:e942cb50d6a08dd5306688b1ac05bda157fd2fcc88b63dae405f7bd0d3234005", size = 660643, upload-time = "2026-04-30T11:57:31.802Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "idna" version = "3.13" @@ -1386,6 +1530,108 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/53/aa31e4d057b3746b3c323ca993003d6cf15ef987e7fe7ceb53681695ae87/litellm-1.80.0-py3-none-any.whl", hash = "sha256:fd0009758f4772257048d74bf79bb64318859adb4ea49a8b66fdbc718cd80b6e", size = 10492975, upload-time = "2025-11-16T00:03:49.182Z" }, ] +[[package]] +name = "lxml" +version = "6.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/3b/aab6728cae887456f409b4d75e8a01856e4f04bd510de38052a47768b680/lxml-6.1.1.tar.gz", hash = "sha256:ba96ae44888e0185281e937633a743ea90d5a196c6000f82565ebb0580012d40", size = 4197430, upload-time = "2026-05-18T19:19:06.424Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/b0/83f481780d1548750b8ce2ec824073deef2f452d9cd1a6faff8507e3d16d/lxml-6.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:53b7d2b7a10b1c35c0a5e21e9224accf60c1bbfba523990732e521b2b73adef2", size = 8526461, upload-time = "2026-05-18T19:17:25.862Z" }, + { url = "https://files.pythonhosted.org/packages/b9/d5/30fa0f808002c7329397bfbb24e306789c0b29f04aa5842c07b174b4216f/lxml-6.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3f333630ab480244a1bff72043e511a91eb22e7595dead8653ee5612dd8f3d", size = 4595375, upload-time = "2026-05-18T19:17:34.555Z" }, + { url = "https://files.pythonhosted.org/packages/4f/d2/edb71cf0e561581a7c5eb2626244320eb04e9f8ce6d563184fd668b45073/lxml-6.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a4bbea04c97f6d78a48e3fbc1cb9116d2780b1b39e03a23f6eb9b603fd61f510", size = 4923654, upload-time = "2026-05-18T19:17:42.917Z" }, + { url = "https://files.pythonhosted.org/packages/4c/77/1bc7eeb0de4577d783fb625aa092cc9357883bba35845a3666bf1259f3dc/lxml-6.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db1d75f6617a49c1c01bc7023713e0ff59ab32c9579ae62a7674c0e34f3b0b0a", size = 5067921, upload-time = "2026-05-18T19:17:49.175Z" }, + { url = "https://files.pythonhosted.org/packages/1b/3c/c0690d74bd2bc17bc03b5b0d093569ead597dd0bfa088bf99eef8c24e19c/lxml-6.1.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a12689be69a28ddaa0ab99a5a1137da2afd5f8f16df7b5680b66f616d3eda1d", size = 5002456, upload-time = "2026-05-18T19:17:59.715Z" }, + { url = "https://files.pythonhosted.org/packages/66/8d/d1b3271af0c0f1e27e8472a849e4d2c65bc7766884b9ad2da9e76e145c88/lxml-6.1.1-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b73c339ae29b90fd2d06e58ebd555a751bde9cd6bbd36cc0281b9a2c94e9d8", size = 5202776, upload-time = "2026-05-18T19:18:08.924Z" }, + { url = "https://files.pythonhosted.org/packages/7a/45/689824ffb237fd10125ad273f32b28ff04dc6203c2822c85ff65a93df65e/lxml-6.1.1-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:752d3bbfe874715ccd0aec7f88d7fc623c0f1fd7aa7b3238a084e017bad2a009", size = 5329945, upload-time = "2026-05-18T19:18:13.673Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c0/ef73af53767e958fd87d437c170f272e2f6e6c0f854939f133a895f1e711/lxml-6.1.1-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:6b1761fbf9ec984e2e9d9c589ef5f5fd684b7c19f92aadd567a26c5224958db6", size = 4659237, upload-time = "2026-05-18T19:18:18.657Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5e/e1158e40397585e91cb0472374a1f63d0926a1ddeaa92f13d1a1ffe306d5/lxml-6.1.1-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d680fbcb768404c601ecb43519ecd8461f6954cb11c06a78962f666832ccfca8", size = 5265904, upload-time = "2026-05-18T19:18:24.883Z" }, + { url = "https://files.pythonhosted.org/packages/a0/16/8687e5d1400ed1c0bc41dace232ebb7553952b618ea1f2e5fb6e2cfbbe23/lxml-6.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:162af1091cd785f2f27e62d3547ae9bc58ec5c86dd314d67021fd02463708d83", size = 5045225, upload-time = "2026-05-18T19:17:20.073Z" }, + { url = "https://files.pythonhosted.org/packages/ca/18/d877bd1ae2e5ffdfd4836565aba350db31feb2f2656d6ce70316ed66a05e/lxml-6.1.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e9308ff8241c532df3f3e570f9a5aeed6c853f888512ba4b75638d7c11c95ef6", size = 4712721, upload-time = "2026-05-18T19:17:40.512Z" }, + { url = "https://files.pythonhosted.org/packages/44/4d/1f44fd1d770b10dacbf6b5c6e520f4d6e0708744930f719dc04e67cab981/lxml-6.1.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5f6994074ebae6ffb04447268e37dc16edc304f9859cf91acb86e0af6c1b395c", size = 5252549, upload-time = "2026-05-18T19:17:51.236Z" }, + { url = "https://files.pythonhosted.org/packages/64/5d/1d66b84f850089254c230ef6ea6b267a5a54e2e179a5d960036a05d501d7/lxml-6.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80c2dfadb855da477cf73373ad29a333535dedb9b12bad02c9814c8e2b43bf08", size = 5226877, upload-time = "2026-05-18T19:18:00.875Z" }, + { url = "https://files.pythonhosted.org/packages/ad/00/84c4b5302d42a2d0184f38d538c8a197f33b52a50bd4f7bcfe990bce3036/lxml-6.1.1-cp311-cp311-win32.whl", hash = "sha256:30a89d3ac8faec007453fb541f3f46807eeec88edd5826f6e3fe001752a2c621", size = 3594072, upload-time = "2026-05-18T19:17:12.714Z" }, + { url = "https://files.pythonhosted.org/packages/61/9d/2e2f7d876349f45e0f3e29f72da311668853d59b58d473a2dea4f0160135/lxml-6.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:abbefa31eee84842140f67acef1c828e28bba8bbf0c3bc6e5492a9af88152c28", size = 4025469, upload-time = "2026-05-18T19:17:50.566Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d5/570e6390e4110331e6208b2ba83d1482cc9146808ee118b22824a34c1070/lxml-6.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:dcb292aa7fe485ceff7af4f92e46c5af397daec5dff64871a528f0fc47a3cc5b", size = 3667640, upload-time = "2026-05-19T19:22:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6e/c4add832b6fc1e887125b96f880d7b9b70aae5248718e046b1704bcac4b9/lxml-6.1.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:104c09bda8d2a562824c0e319d0768ce26a779b7601e0931d33b09b53c392ef7", size = 8570821, upload-time = "2026-05-18T19:17:42.068Z" }, + { url = "https://files.pythonhosted.org/packages/22/00/ff3009c88e65de8011630acf8ab5a09cb2becd2aaf47fba2f3449f6224e9/lxml-6.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:25c6997a9a534e016695a0ba06b2f07945de682731ff01065b6d5a4474179da1", size = 4624252, upload-time = "2026-05-18T19:17:47.897Z" }, + { url = "https://files.pythonhosted.org/packages/42/95/bb63f0fd62e554fe078e1fb3c8fe9083c14ddc7ad7fa178d10e57e071ac7/lxml-6.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c921ba5c51e4e9f63b8b00267d06566e1f63407408a0496da2d1d0bfc819c7fc", size = 4930746, upload-time = "2026-05-18T19:18:29.637Z" }, + { url = "https://files.pythonhosted.org/packages/eb/99/0013e8d9b5960f4f041cf0b73e2f80c23eb5205b1f7bfb20203243651359/lxml-6.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:54a7f95e4de5fb94e2f9f4b9055c6ba33bf3d628fd77a1d647c5923caa2cdcdc", size = 5093723, upload-time = "2026-05-18T19:18:34.168Z" }, + { url = "https://files.pythonhosted.org/packages/29/91/317b332636bfc7bddcff828d41b3307f50043f4b237e40849c333d80fa1a/lxml-6.1.1-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f2ec43df44b1f76249ee0a615334f9b5b060e1c8bd90e706dad2d14d02f383", size = 5005557, upload-time = "2026-05-18T19:18:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/42/2f/cc9bf06afe70f9c9093ae60855d9759da9db601ec4080f7473319666ffd7/lxml-6.1.1-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:70ef8a7e102a1508f8121aae5b0867abd663f72c14f0a9c937e6554cb4587b7b", size = 5631036, upload-time = "2026-05-18T19:18:44.858Z" }, + { url = "https://files.pythonhosted.org/packages/08/f6/af32e23e563971ffb0fb86be52bc5be5c2c118858ffc119bf6a9039b173d/lxml-6.1.1-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ebe6af670449830d6d9b752c256a983291c766a1365ba5d5460048f9e33a7818", size = 5240367, upload-time = "2026-05-18T19:18:49.217Z" }, + { url = "https://files.pythonhosted.org/packages/78/83/8555d40948b09ce86f1bd0c68a7ac31d07b1929f92cc1b074006c97ef2d2/lxml-6.1.1-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:27acc820660aaffa4f7c087f29120e12980f7779d56d8492d263170111284740", size = 5350171, upload-time = "2026-05-18T19:18:52.779Z" }, + { url = "https://files.pythonhosted.org/packages/63/75/5d92da93729b7bad783689e6496049fa40927b45bec7bf183c981de3ca70/lxml-6.1.1-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:1db753c9115ec7100d073b744d17e25e88a8f90f5c39b2f5dd878149af59671f", size = 4694874, upload-time = "2026-05-18T19:18:55.139Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b5/3aad415a9a25b822e783f15deeb4dffccf5113030f1afa2222dd929313d9/lxml-6.1.1-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4f469aebd783bb741c2ecb2a681008fd26bfe5c16a9a72ed5467f834e810df2", size = 5244492, upload-time = "2026-05-18T19:19:01.28Z" }, + { url = "https://files.pythonhosted.org/packages/f1/a1/5fcf7eb9904b80086aa47dcf0027de07b1bb990afad2e6823144c368ae04/lxml-6.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:766b010012d59470072c1816b5b6c69f1d243e5db36ea5968e94accf430a4635", size = 5048232, upload-time = "2026-05-18T19:18:12.67Z" }, + { url = "https://files.pythonhosted.org/packages/77/74/1f601b63c7a69fcdf10fa9b148c81da8442204194f6c55509cc485c786b9/lxml-6.1.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b8d812c6011c08b8111a15e54dd990b8923692d80adf35488bee34026c35accf", size = 4777023, upload-time = "2026-05-18T19:18:15.928Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b9/7a78f51aec95b1bf780d78e12705a9f6533284f8693dc5c0e6724fa53d3f/lxml-6.1.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fe0306bd29505a9177aac19f1877174b0e7422c222a59f70b2cd41633448c3dc", size = 5645773, upload-time = "2026-05-18T19:18:23.223Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6e/98a7b7ad54e4e74fa1f20fff776913980619d0ebe5558232d7da6580bdd8/lxml-6.1.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5ba186ad207446c65d3bb3d3e0412b032b1d9f595e59861e2354798c5703d955", size = 5233088, upload-time = "2026-05-18T19:18:31.433Z" }, + { url = "https://files.pythonhosted.org/packages/65/d1/bc0ed2427bf609f2ee10da303a6a226f9c8bce94f945dc29a32ce55de6e4/lxml-6.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aa366a1e55b8ebfe8ca8ddc3cfe75c8ebade181aeb0f661d0cb05986b647f72a", size = 5260995, upload-time = "2026-05-18T19:18:37.091Z" }, + { url = "https://files.pythonhosted.org/packages/69/8b/6772e1a4b513fc50a8d931f19edde0e13ae6918510a1e13ff67864f3e5ed/lxml-6.1.1-cp312-cp312-win32.whl", hash = "sha256:126c93f7f56f0eda92f6d8c619edc463a4f23d9252f1c9d0405a76f25fa9f11a", size = 3596382, upload-time = "2026-05-18T19:17:18.37Z" }, + { url = "https://files.pythonhosted.org/packages/1b/89/45198e9624762af2dfd2cb8782598477ceb29f6e59caab560388ae1f4ec1/lxml-6.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:26e6eda8d38c1fcab1090dd196ee87cbd13788e531937610e2589085de074e77", size = 3997255, upload-time = "2026-05-18T19:17:56.781Z" }, + { url = "https://files.pythonhosted.org/packages/90/a9/7a54b6834088d9ae528a7b780584ba6a39a9457b0ac330479f20ffbc9449/lxml-6.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:6540377fbd53fe1b629172288c464fb18db11ce1fa7dc15891da10aa9dcc3e7f", size = 3659610, upload-time = "2026-05-19T19:22:50.843Z" }, + { url = "https://files.pythonhosted.org/packages/a5/eb/7e6f37c5584ccbb2ff267f56fd0339016938c1c8684cfefab9b33ffc2f36/lxml-6.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:68a9198d0fc122d14bb76837de9aa80cf84caed990b5b237f532ed87d3706736", size = 8559780, upload-time = "2026-05-18T19:17:57.661Z" }, + { url = "https://files.pythonhosted.org/packages/a1/36/587c2521cf23a2cd6c9c22108aa7528f683a1f195ed7ccd23a4b1786ad36/lxml-6.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7d47866cb32fb503450b6edc9df355d10dc49836af2e89901bd6ac6b0896d9d9", size = 4618006, upload-time = "2026-05-18T19:18:04.452Z" }, + { url = "https://files.pythonhosted.org/packages/6e/ca/ab7bfe2bf4c972af5e7878262845ead3a24a929a9b04bc11c7c1ece6c82a/lxml-6.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7c9811bfaa8b1ed5ed319f5d370dfbcaa59d52ea64be2a5a85e18195930354", size = 4924139, upload-time = "2026-05-18T19:19:04.873Z" }, + { url = "https://files.pythonhosted.org/packages/6b/55/a0c72851dfee5ecc689f949723a73dea457758912542cb955b108eaf0d8f/lxml-6.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:762ff394d5bd56da0cf034a23dcce4e13923f15321a2adfa2ac00201dc6d3fca", size = 5082329, upload-time = "2026-05-18T19:19:09.728Z" }, + { url = "https://files.pythonhosted.org/packages/f0/b6/0608f7d61a3b96cc67e5648a3d906e31a5082093e10e7be65b3886289938/lxml-6.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a088f287f7d8275a33c07f2cac6c50b9319309a0200a39e7e75d80c707723099", size = 4993564, upload-time = "2026-05-18T19:19:13.608Z" }, + { url = "https://files.pythonhosted.org/packages/4c/66/ae227524b066d29d55bf0b453d93d2d793c40218657d643dcbbca13b8faf/lxml-6.1.1-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e902da4b04e6b52e5893900d4b8ab46068f75f3561f01bf1080957f9fd932ed6", size = 5613467, upload-time = "2026-05-18T19:19:16.228Z" }, + { url = "https://files.pythonhosted.org/packages/a6/76/dbe4a00b50385e40194231dcfe5a12c059de7cf90e89c83407d2b085b719/lxml-6.1.1-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1d4962d4c66bf830a7e59ed6cfc17d148149898a3aefa8ec6e59763e6e3ed085", size = 5228304, upload-time = "2026-05-18T19:19:19.354Z" }, + { url = "https://files.pythonhosted.org/packages/1c/01/00b1b8442ed2041793336868ba0b9ea4b13d7da7c085c6404c207a63bf79/lxml-6.1.1-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:581d4c8ae690a6609e64862dd6b7c2489635c2d13907fc2b20f2bc200ff1d21e", size = 5341607, upload-time = "2026-05-18T19:19:22.297Z" }, + { url = "https://files.pythonhosted.org/packages/63/36/1ad29931e9a4638bb707869f01d423a6c815f82152138d1a40dfcfde2b95/lxml-6.1.1-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:876e1ff5930ed8bf295ec5ef9a8155e9b6b1876bbf1deed8b3a8069311875a8f", size = 4700168, upload-time = "2026-05-18T19:19:25.133Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d1/a9536cecf9be18a0dc72d32bead283a2332d1ffebd2dd3ac70ce444686e5/lxml-6.1.1-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9eb9b5a968f6e0f6d640092a567e14529ff8cea2e29d00da6f78a79fa49f013c", size = 5232487, upload-time = "2026-05-18T19:19:28.603Z" }, + { url = "https://files.pythonhosted.org/packages/0e/77/b4fb1e03bf5d130e879214d3100092e386418807fb74dd0adc4b0a48f351/lxml-6.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:aa49e06d94aba782c6a02eecb7e507969e7e7a41b267f1b359bb35585f295d5b", size = 5044231, upload-time = "2026-05-18T19:18:42.246Z" }, + { url = "https://files.pythonhosted.org/packages/26/4c/d00daeeb0a5530c4028a9232aa1b93db3ef4ed2158c116ea73c79a9765b3/lxml-6.1.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:70cdfd80589d59e43e18005dd7244e8895e93db8ab6a620b7e23df5445a4e3d2", size = 4769450, upload-time = "2026-05-18T19:18:48.013Z" }, + { url = "https://files.pythonhosted.org/packages/ed/6a/715a3a8d156ce42f29cf014706f5410c2ff3b02267774110fc23266409fe/lxml-6.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:aad9aa39483ed8ec44d6d2e59e5b98a0d80676ef0d92f44bfc374836111f62f5", size = 5635874, upload-time = "2026-05-18T19:18:51.914Z" }, + { url = "https://files.pythonhosted.org/packages/45/37/0544bc21dde2a88f3a17b504e6fc79c0e01d25a33c2f6079724e9e72b9c7/lxml-6.1.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d49514be2f28d895c38cf9d2b72d7b9a07d00314519f456c0b50b53cfcf4c785", size = 5223987, upload-time = "2026-05-18T19:18:59.715Z" }, + { url = "https://files.pythonhosted.org/packages/4d/f8/f6a5e8185bcb28c2befae3d31f8e3df3b811cb0f47746517a81279fcafe1/lxml-6.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:47402e62c52ff5988c1e8c6c63177f5708bccf48e366dea4e3dcf1e645e04947", size = 5250276, upload-time = "2026-05-18T19:19:03.834Z" }, + { url = "https://files.pythonhosted.org/packages/c7/f2/1a2b9f1b7a49d45495369be7ef9ad05b262930f2eab3e3145706fca8083f/lxml-6.1.1-cp313-cp313-win32.whl", hash = "sha256:3483644525531e1d5762b0c44a8e18b6efba321b6dcf8a8952de10b037618bca", size = 3596903, upload-time = "2026-05-18T19:17:29.863Z" }, + { url = "https://files.pythonhosted.org/packages/e6/99/f4ffb024f238eec2131aaa09f3278fb6129cf892741bf68e1fc1afb8c100/lxml-6.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:a10bd2fd62e8ce916ececb342f348f190724a098c1faa056fdfb2a22ad5e8660", size = 3995869, upload-time = "2026-05-18T19:18:02.596Z" }, + { url = "https://files.pythonhosted.org/packages/d1/53/70eb8c5c6037f27448f1e3c54ebede9545a801ae63f0a7254afca4fe8e45/lxml-6.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:424aa57aca0897eb922aef34395bd1289b3b6f04e6bae20ea123c0c7e333cffc", size = 3658490, upload-time = "2026-05-19T19:22:53.846Z" }, + { url = "https://files.pythonhosted.org/packages/13/e2/2e325795566de01d0d7c3bb57d3c370616b2d07b01214e84eec5d3b10963/lxml-6.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:19b7ab10b210b0b3ad7985d9ac4eb66ab09a90b20fe6e2f7ba55d01a234345d0", size = 8577146, upload-time = "2026-05-18T19:18:17.765Z" }, + { url = "https://files.pythonhosted.org/packages/93/cf/5630b5e4be7d2e6bee8efe83865c925221103cf0221303b104ce134b01e2/lxml-6.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c08e5c694306507275f2290073350c4f32e383db15213b2c69e7ff39c1193840", size = 4623866, upload-time = "2026-05-18T19:18:30.669Z" }, + { url = "https://files.pythonhosted.org/packages/d2/51/3904907c063451cf8d4a5c9fe0cad95fa1f4ec57f4e3884fa0731bd7a305/lxml-6.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:74a9717fd0d82effef5c2854f0d917231d5324b5a3eb7275c43ac9fa32f97a14", size = 4950022, upload-time = "2026-05-18T19:19:31.958Z" }, + { url = "https://files.pythonhosted.org/packages/94/cd/9c7611a51c37a2830928405817cc5d56a97f64fab83cc3f628748b135749/lxml-6.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efe0374196335f93b53269acd811b944f2e6bdc88e8894f214bd636455484909", size = 5086695, upload-time = "2026-05-18T19:19:34.764Z" }, + { url = "https://files.pythonhosted.org/packages/da/d6/24e3b5906abb0b674ff2ae195bc3ce59708df2bcd17cf17703b2d7dd643a/lxml-6.1.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac931cdc9442c1763b8a8f6cd62c0c938737eafc5be75eff88df55fc73bc0d00", size = 5031642, upload-time = "2026-05-18T19:19:37.771Z" }, + { url = "https://files.pythonhosted.org/packages/2d/db/6ec54f99019838bff54785c51da07f189eb4676861c5f2730962b0d8d665/lxml-6.1.1-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:aee395f5d0927f947758b4ec119fd5fc8ec71f07a1c5c52077b30b04c0fa6955", size = 5647338, upload-time = "2026-05-18T19:19:40.553Z" }, + { url = "https://files.pythonhosted.org/packages/42/3d/ef4dcfffd22d27a61805d8ed9f7fb888495bc6aa88648fa07c1eaa5586b6/lxml-6.1.1-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9395002973c827b3ed67db77e6ec09f092919a587022174554096a269378fb13", size = 5239528, upload-time = "2026-05-18T19:19:43.657Z" }, + { url = "https://files.pythonhosted.org/packages/62/bb/37fb3f0dff146bdcfa78eec47879273820b2a0bf350ec236ce14bd0b1c26/lxml-6.1.1-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:73bc2086f141224ebddb7fc5c6a36ca58b31b94b561e1dfe8e073e3270fad1e7", size = 5350730, upload-time = "2026-05-18T19:19:46.307Z" }, + { url = "https://files.pythonhosted.org/packages/90/42/43253f168388df4fae1f38c01df36ddb9bee39e2048167b54cdcbae85ea3/lxml-6.1.1-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3779def59032b81e44a5f70096ef6bf2082f8d901937dca354474ba09782e245", size = 4697530, upload-time = "2026-05-18T19:19:49.889Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a8/c5a8504f81bbdfc8e7094c2c850cdb4ed6777fc4d5ddd9e5ab819f3b0d54/lxml-6.1.1-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:86c89b9d55ebf820ad7c90bc533410f0d098054f293351f10603c0c46ff598f5", size = 5250670, upload-time = "2026-05-18T19:19:53.199Z" }, + { url = "https://files.pythonhosted.org/packages/77/b7/c7e76ab18744d75e21f320ebf9ff9d1ceae2b54dd431ea5a64caf26c9672/lxml-6.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19607c6bbff2a44cf3fe8250abccd20942d3462473e0a721d01d379ed017e462", size = 5084485, upload-time = "2026-05-18T19:19:08.422Z" }, + { url = "https://files.pythonhosted.org/packages/31/31/b35c53f8ef7b7c31cacd23d3638652fff7bcd1deb6eedb709ab43b685908/lxml-6.1.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c6ed5141a5c7507cf3ee76bd363b0d6f801e3321adc35b5d825a23115faa5465", size = 4737635, upload-time = "2026-05-18T19:19:12.321Z" }, + { url = "https://files.pythonhosted.org/packages/d9/06/31f23c813a7fe8e0cb1b175e915b08c9bf4e86d225b210feadbdbe519667/lxml-6.1.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:62aeb7e85b5d60320b9d77eef2e773994e2c0ce10121b277e0a19804e1654a5a", size = 5670681, upload-time = "2026-05-18T19:19:15.001Z" }, + { url = "https://files.pythonhosted.org/packages/1a/bc/ce619bccc89b1fd9ad8a8e1330ee3f3beff9f2ff95b712d7bbcdd6e22fc3/lxml-6.1.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b1b963fd8f5caa68e99dfae060d54de1fe9cba899b8718b44a00cdca53c3e590", size = 5238229, upload-time = "2026-05-18T19:19:18.131Z" }, + { url = "https://files.pythonhosted.org/packages/2f/5d/b329acbbedc0b619ebc2be6cf7ee9ed07e80892c88d4dfd612c33805789a/lxml-6.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:63876be28efefa04a1df615b46770e82042cce445cfdce55160522f57b231ccb", size = 5264191, upload-time = "2026-05-18T19:19:21.118Z" }, + { url = "https://files.pythonhosted.org/packages/d6/85/be36fb1425b30db3c3f9df75fe86343ebffb79e6320bd7f588e25bfeac39/lxml-6.1.1-cp314-cp314-win32.whl", hash = "sha256:7f7a92e8583f06b1fd49d01158143b8461cfcd135dcb10ec807270a3051bd603", size = 3657202, upload-time = "2026-05-18T19:17:39.509Z" }, + { url = "https://files.pythonhosted.org/packages/b8/ce/3cf9a827342269f54d405a6202397de63f07c69cbd6ce7d183a3f0cba1e9/lxml-6.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:b2d444f2e66624d68e9c6b211e28a76e22fff5fcabcfff4deac18b529b7d4137", size = 4064497, upload-time = "2026-05-18T19:18:14.662Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3e/1a957bde8f0760039e627f94699f82caa782c9d838d86c3d28245ee67212/lxml-6.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3fd9728a2735fda14f4e8235830c86b539e9661e849665bf926d3f867943b4bf", size = 3741991, upload-time = "2026-05-19T19:22:59.111Z" }, + { url = "https://files.pythonhosted.org/packages/78/b2/00ed55b3a2efa4658fb795c38d1090ec9b3e8a6c3683d4441fa517f09c3b/lxml-6.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:787b2496d0dbe8cd180984e8d29e3a6f76e7ea34db781cb3bd55e4ba1ef8b4ee", size = 8827545, upload-time = "2026-05-18T19:18:41.193Z" }, + { url = "https://files.pythonhosted.org/packages/c0/73/74573db19baa618d5f266f2407898b087ff6927115b00b71e5fc1b700847/lxml-6.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2c8daa471358dc2d6fcf02165e80ec68f77871a286df95bc5cc3816153b0fd2c", size = 4735736, upload-time = "2026-05-18T19:18:46.761Z" }, + { url = "https://files.pythonhosted.org/packages/16/02/6f7061f4f95f51e545d48e87647c54791d204a4e881be4156e7a26ba5338/lxml-6.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:acd7d70b64c0aae0c7922cca83d288a16f5f6da523637697872253415269baef", size = 4970291, upload-time = "2026-05-18T19:19:56.215Z" }, + { url = "https://files.pythonhosted.org/packages/b0/02/55fc057d8283427dea7d6edb102e7a840239c77a64a983d92f62a304c0e9/lxml-6.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f0dd2f01f9f8a89f565d000e03abcf0a13d692a346c8d22f628d49af098777a", size = 5102822, upload-time = "2026-05-18T19:19:59.223Z" }, + { url = "https://files.pythonhosted.org/packages/e4/48/8e1cf78d89d66850121d9255a2a24414c98f775da93b90cf976956c24b14/lxml-6.1.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b7e8a14c8634bf6f7a568634cb395305a6d964aeb5b7ee32248094bed3a7e2c", size = 5027923, upload-time = "2026-05-18T19:20:01.549Z" }, + { url = "https://files.pythonhosted.org/packages/ed/00/0632a0647612c8af24d26997b3b961397daa9d5b2581444805933629a4cb/lxml-6.1.1-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:86281fbdd6a8162756f8d603f37e3435bfa38043adb79c6dc6a2dfee065e7525", size = 5595843, upload-time = "2026-05-18T19:20:03.93Z" }, + { url = "https://files.pythonhosted.org/packages/bc/86/ab008a7dc360711b66858d61c80a5979a70a09f2aa2b05d9698df80b803d/lxml-6.1.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5d7152ec39ca7c402d8fb9bad86140a15b9503bd0c54484e3f1bbe3dd37ceca", size = 5224515, upload-time = "2026-05-18T19:20:06.381Z" }, + { url = "https://files.pythonhosted.org/packages/75/c6/2702ff375e728e34f56d9a45339a9cf7e4427e917f542225242d63a05afa/lxml-6.1.1-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:88d8cb75b9d82858497a5393e3c63cfbf03035225e4b35a49ed7ccb151e4dc0e", size = 5312511, upload-time = "2026-05-18T19:20:09.308Z" }, + { url = "https://files.pythonhosted.org/packages/b7/57/a5807c98f87a86f10ef9ffab35516df7c0f0c4b6d5d33e9f608ab9c04a31/lxml-6.1.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:f64ec5397ea6a41fc1b4af0380d79b44a755b5531dcaccd9940fb260dca93038", size = 4639206, upload-time = "2026-05-18T19:20:11.704Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e1/8a0a2c35734812395f4da4eaf33748a7e5705bfb2a58b128da764339d5ec/lxml-6.1.1-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d34bbf07dbc7ca5970671b1512e928991fb5e9d95365636c9b2d8b4f53af405e", size = 5232404, upload-time = "2026-05-18T19:20:14.064Z" }, + { url = "https://files.pythonhosted.org/packages/c2/e2/0e6a4dd5ad84d01d99aa7bae7cfefd4a760a0e0f8176818241de17d9b6c0/lxml-6.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:17e0e18d4ad8adbd0399291bc44845b69d9dd68439a3cdebdf35ff902ec05072", size = 5083769, upload-time = "2026-05-18T19:19:23.758Z" }, + { url = "https://files.pythonhosted.org/packages/a0/7e/161f33d463f6ffc1c7679104b65086dea120080d49dde4d238f015aaee2f/lxml-6.1.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:3ab541146f1f6968c462d6c2ac495148e8cdba2f8347700b2141b6ec5a75bf52", size = 4758936, upload-time = "2026-05-18T19:19:27.256Z" }, + { url = "https://files.pythonhosted.org/packages/f1/fb/2369825e3f6ca99305bf9f7b7085fda91c8b0922a89e54d900974aa3ef85/lxml-6.1.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2a0217714657e023ef4293500f65aa20fce6164c8fd6b08fa5bd4a859fb14b9b", size = 5620296, upload-time = "2026-05-18T19:19:29.993Z" }, + { url = "https://files.pythonhosted.org/packages/30/90/d61e383146f74c5ab683947ea14dc7b82778838ab9b95ea73a23b60d0191/lxml-6.1.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:05a82eb6e1530a64f26225b55cbd178113bd0b5af1c2b625f25e5296742c26d2", size = 5228598, upload-time = "2026-05-18T19:19:33.523Z" }, + { url = "https://files.pythonhosted.org/packages/76/2d/2dafd8149e94b05bb070690efd5bb2680720681e03ff03fc57d2b70a1105/lxml-6.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9e36f163528fc50cbef305f02a5fd66d404edf7049cdaff211dbc2cba5a7013e", size = 5247845, upload-time = "2026-05-18T19:19:36.649Z" }, + { url = "https://files.pythonhosted.org/packages/ce/68/b30e913340c380ddac9580c6e6230991fc37240ec4f64704833e4f3e2769/lxml-6.1.1-cp314-cp314t-win32.whl", hash = "sha256:649dda677cf3bd6ac9ae14007ba0c824ded8ce5808b53fc7431d9140399118c1", size = 3897345, upload-time = "2026-05-18T19:17:33.562Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4e/9eb2af5335545f9fbcd7af57bcf87c6025d31eaa31b14ec184a6c8675328/lxml-6.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:793033d6c5cdf33a573f910d9bea14ef8f5771820411d118da8e1182edb53d5e", size = 4393350, upload-time = "2026-05-18T19:18:10.076Z" }, + { url = "https://files.pythonhosted.org/packages/7f/2c/0f1e93c636720e8a3eb59af2bfda99d98b55891e1c53bc30c2e0e865f01b/lxml-6.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:58bb955caba94e467d2a96da17660d2d704e0675894cba21ab8a775b8621fd1c", size = 3817223, upload-time = "2026-05-19T19:22:56.823Z" }, + { url = "https://files.pythonhosted.org/packages/b5/32/86a3f0f724a3a402d4627937a7fc27b160e45e7012b4adf47f6e1e844511/lxml-6.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:31033dc34636ea6b7d5cc11b1ddbda78a14de858ba9d3e1ed4b69a3085bc521e", size = 3930127, upload-time = "2026-05-18T19:19:02.27Z" }, + { url = "https://files.pythonhosted.org/packages/40/44/d832e82af08723761556d004b1d04d281c09f9a8cecd7d3148548c9941a3/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3893c14c4b6ac5b2d54ba8cf03e99fe5104e592de491f19bd6b82756c09f8004", size = 4210769, upload-time = "2026-05-18T19:20:41.427Z" }, + { url = "https://files.pythonhosted.org/packages/6d/39/0dc5949f759ed7d951e0bb8c2f2d9d7aca1908d22352fa84a8afd2ea54af/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c07da4cebf6889f03ebac8d238f62318e29f495de0aa18a51ea14e61ae907e2e", size = 4318163, upload-time = "2026-05-18T19:20:44.702Z" }, + { url = "https://files.pythonhosted.org/packages/e6/fb/8ab3845fe046ba4cbf74536bcf6801a774b7caf4350de1c5d37f1f0a9e90/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6f0ce10945fab9c4c06ce14e22af9059d1a87493a9af4501a5b0b9187e21cf2", size = 4250945, upload-time = "2026-05-18T19:20:47.385Z" }, + { url = "https://files.pythonhosted.org/packages/68/1b/7553ab136894374ffae8851ec06f98f511cd8e66246e41b6be059d0a7289/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f8844cd288697c6425c9beba919302241e3278871dc6519515e72b04e987abcf", size = 4401664, upload-time = "2026-05-18T19:20:50.489Z" }, + { url = "https://files.pythonhosted.org/packages/db/a4/441aee36c6f6b249823d20fd91f9be9ab89d7c5a8ae542a4a4ca6d342d56/lxml-6.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:ed21202aec73cda4d55d1ce57b389aadb90ffb044e6cd1080b8347efe1b1ec84", size = 3508989, upload-time = "2026-05-18T19:18:38.158Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1728,6 +1974,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "primp" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/4b/7efa54f38da7de8df6b70dfed173bb41a52b740b144e4be24c1172db4209/primp-1.3.1.tar.gz", hash = "sha256:b04a5941bf9c876d011c5defaf5a25be093d56e7270b8da52c9788b9df2a829a", size = 1360029, upload-time = "2026-05-23T17:39:25.568Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/80/c4885a783a7493e396d89a592ba19fce63ef6bd6ad47230924a884a30ec0/primp-1.3.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:27b87e6370045a0c65c0e4dfdfacbfe637387d05673ce8ddcce400263f7c27f0", size = 5123967, upload-time = "2026-05-23T17:39:08.586Z" }, + { url = "https://files.pythonhosted.org/packages/58/c1/c965cc23f96a364803d44b4331f33e4465bb6f269add37e39d0ad77ffe33/primp-1.3.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:27a8804eb9a3f641f379ee2b443591428cf85c898816e93d04d3e7b6f229ebcb", size = 4743059, upload-time = "2026-05-23T17:39:15.536Z" }, + { url = "https://files.pythonhosted.org/packages/9c/99/f4248d8d833d43fd8ba78208f2f4bf7fba7d3aec8c516090a95d18d6f550/primp-1.3.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:862974796552a51af8e276bb19c5d5e189168ab8bad216aef7ce3726a8d3b1dd", size = 5100121, upload-time = "2026-05-23T17:39:04.64Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ad/519e32e0184763e1a76c9321fdeac0bb9b30bf85746f12058feec0cc4a27/primp-1.3.1-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ceb24198994799706f4020a00173ba9c1b491aa9805b1e014d87946677bc3c5d", size = 4738042, upload-time = "2026-05-23T17:39:35.967Z" }, + { url = "https://files.pythonhosted.org/packages/dc/7b/723cb40694b47ec79a142ed8492835c0ecae9fef7acbed014f04b018d1de/primp-1.3.1-cp310-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3298b8afcf0a88ba6622bfc18e78aeb11afbb7d5afa4774f24acf7491f54a2d", size = 5001773, upload-time = "2026-05-23T17:39:03.01Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/80a2e3bdab1c51d738b82ea210a5ab93986b443c561e792e42cae296ec10/primp-1.3.1-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b8d38c5a6d0a863274cbcae9678f265fcdcead3c20d12d152244e88f5f2186b", size = 5334228, upload-time = "2026-05-23T17:39:24.214Z" }, + { url = "https://files.pythonhosted.org/packages/19/70/c95b8054c7d1fe2d84226ec60a5f48ce6c95a08b7c8b1702d7742082f444/primp-1.3.1-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96f831c78ddb5900873f51e294bf9bbb4bbfdac3a2f39ce4023f8c558d299332", size = 5157269, upload-time = "2026-05-23T17:38:48.142Z" }, + { url = "https://files.pythonhosted.org/packages/34/bb/9b66986b7ecf2eff987134cd94bde533142e3085d6f67531f1a369ceaaae/primp-1.3.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329d0c320841f65b39d80801d8bae126732b84ec1094ca17b14fda0bda1b20ff", size = 5347438, upload-time = "2026-05-23T17:39:17.405Z" }, + { url = "https://files.pythonhosted.org/packages/aa/29/5d127748d06f3c6a3367f3c4974e45b98cda61cd28ea79ef91ad3fe9e093/primp-1.3.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6c3c67670c38a03e9e8da45b212243d35afc8efa018317c46ecdce47f05329d1", size = 5264862, upload-time = "2026-05-23T17:39:20.625Z" }, + { url = "https://files.pythonhosted.org/packages/16/f3/1aac229425cac142c48418e2de9f70597161ea936543b5e3c9e7476e1921/primp-1.3.1-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:9409a31028a8c62a609d389554ad4f5339aad075130300cd443beef0336d7179", size = 4969889, upload-time = "2026-05-23T17:39:22.412Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/a94d6e6166139c76ae42eb941328679309ca85139e8753d639657a24474c/primp-1.3.1-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:88ca36c2bd1b7c64b96ad07ca367d2d111ac8e9670549be5f232da8bf795d21e", size = 5082679, upload-time = "2026-05-23T17:39:28.411Z" }, + { url = "https://files.pythonhosted.org/packages/cf/61/21d297db575ed660c6aaf35c9014c1874ace45d6dcb79d1a4d3d2608bffb/primp-1.3.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:74d13800b501aa003fb05c263d38f8d61656c83a60b2951046c0fc412bc73976", size = 5605392, upload-time = "2026-05-23T17:39:38.007Z" }, + { url = "https://files.pythonhosted.org/packages/36/d6/9262a7ebb1d980a2db0cd505bb902bb3e66acd8a1cb763a4c2921f2f6a5b/primp-1.3.1-cp310-abi3-win32.whl", hash = "sha256:09ada1752629fe89d7b128beeb59cb641f404af462e24177ba36aed1cf322299", size = 4270373, upload-time = "2026-05-23T17:38:44.98Z" }, + { url = "https://files.pythonhosted.org/packages/8f/68/f0c6a60fadff0c185aef232b951a6fa4bbb64511facc48d34734db14f16f/primp-1.3.1-cp310-abi3-win_amd64.whl", hash = "sha256:c0d1e294466cd5ec7ef173eedf8df25cbdc050138d40447a906e92b8553e7765", size = 4661498, upload-time = "2026-05-23T17:39:32.213Z" }, + { url = "https://files.pythonhosted.org/packages/7f/1d/232a52abc77384ac66b9c1741691dec3659b1207bb6c5e55c1e9b59d22f1/primp-1.3.1-cp310-abi3-win_arm64.whl", hash = "sha256:43304cb41cbb46f361de49faf1cbdba57f969f628c9297239c7ed8ef0cac420f", size = 4624481, upload-time = "2026-05-23T17:38:42.724Z" }, + { url = "https://files.pythonhosted.org/packages/e5/0b/34333b26c533c3122b936dad829f0a6e04f32065d39673c92b157d97aa16/primp-1.3.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:72249a4540d0a8965f36eb9a86cd16801d1c7e8dac2f0b0fa23a0a5a03402d36", size = 5116098, upload-time = "2026-05-23T17:38:55.219Z" }, + { url = "https://files.pythonhosted.org/packages/7a/56/7fe14708adf9a5cb5d6a15ad840a3de036cebfaf20692a5bc3b72e188a73/primp-1.3.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:db4e2eaa5707e47899eeba6026f420f9b0108a28c08d63f1826d0cab8d50f06f", size = 4736300, upload-time = "2026-05-23T17:38:51.792Z" }, + { url = "https://files.pythonhosted.org/packages/31/cb/521a8c18e8808a75450b6e91dc62cc1149c0178b7d4a8697d3f9b73fa385/primp-1.3.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d62e7609c98b4bc99c9cecc47f16f332fb8fe1a023002176267b0043dedad0c7", size = 5093823, upload-time = "2026-05-23T17:38:50.059Z" }, + { url = "https://files.pythonhosted.org/packages/57/84/90f776fe46aeb0e3b86df72c674c0651326dd6a61846dd86bddbabe903ac/primp-1.3.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d692e912c2b25271163ba7719df0afdb733a7e7c3073c9094e9001882463543", size = 4734511, upload-time = "2026-05-23T17:39:34.166Z" }, + { url = "https://files.pythonhosted.org/packages/19/79/d9bfbc0df0394f18a98b512a65f4bcf3dd7d17bd871937127e1ce4549172/primp-1.3.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c08693517dc160a12c0f9e2565c5319173cef738893a303ff2fb28ecccbd84d", size = 4999315, upload-time = "2026-05-23T17:39:12.061Z" }, + { url = "https://files.pythonhosted.org/packages/bb/d8/5a986957ee1874d08567d7749668cd78a063048d47d6e46a874742b7fed1/primp-1.3.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d134ebfa31adc619e4e48289fe3e7eebc8310141560e6a6a04269cc94893d9ab", size = 5329375, upload-time = "2026-05-23T17:39:30.307Z" }, + { url = "https://files.pythonhosted.org/packages/5c/1d/321cac9902cc3992174ed530719141a0da2e426f54f8a90b7b971571d104/primp-1.3.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48e27e7c0e015a6de495cf79c0c8d599ba5f69d091af31572bec2de020522d9c", size = 5140921, upload-time = "2026-05-23T17:38:53.528Z" }, + { url = "https://files.pythonhosted.org/packages/a6/19/ccbe6b67e0e91beb5c9d5cf804354225d5a3a7a9adf84fee3d6acc53febd/primp-1.3.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c3d682df08c1b1f37b1f66b21fd173baebcfcb52490830b12292d8fe89b2147", size = 5344288, upload-time = "2026-05-23T17:39:18.965Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e5/a735751bd11558163e83e0961fc866e4f94634df9eb24937c5f59624e393/primp-1.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fabaac4280df0802377d34b869949d617a0ecf22ca7fd5f9bded3f5c981031f1", size = 5262909, upload-time = "2026-05-23T17:39:00.789Z" }, + { url = "https://files.pythonhosted.org/packages/09/8e/4e3d4520e2e751f2de825dbe2cb43f837d33a5528adc44255f9770ea125a/primp-1.3.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:f510e5881e0a4c4b9e7dbc03722c316d58454388b88000a0e7bf18a4b36d601e", size = 4964809, upload-time = "2026-05-23T17:38:59.023Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/e7b7495687d07df693325a12c497a9e5185d5001b7b216f32019fa7437a0/primp-1.3.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0504de2901c97903a9c369856a4b186dc90a782d8320652c142b066e697d5a1a", size = 5083654, upload-time = "2026-05-23T17:39:06.598Z" }, + { url = "https://files.pythonhosted.org/packages/f2/f9/e4652e93beb14a16cc4218cfb1ccc18eaca8ee7d93b517d614a135928ec9/primp-1.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c3b24e302d95d327e873834b9423823b9c8af2abf5e0bbf57a03f3354cfe528", size = 5594738, upload-time = "2026-05-23T17:39:27.001Z" }, + { url = "https://files.pythonhosted.org/packages/e2/49/e8c8a7bc6b741ab6f15896022eee4f906d04d7ccd15aeeb515dd04bbeb6d/primp-1.3.1-cp314-cp314t-win32.whl", hash = "sha256:4346dcef805279028bf4a54bb87dd43d0920130e25b5790689f5c96c9ba0d9e5", size = 4262615, upload-time = "2026-05-23T17:39:13.88Z" }, + { url = "https://files.pythonhosted.org/packages/a9/84/7ae4a257dec6dff329d4a8d9051d907316095c27ffc8d1ea15c359e6eeb5/primp-1.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6c55f152a73b6d6af8ac37bdb648d8bbfd7e656f9ef40d87feb3c0d81cee930a", size = 4660584, upload-time = "2026-05-23T17:39:10.081Z" }, + { url = "https://files.pythonhosted.org/packages/99/20/10e0d96bfaeef1f0cd339ccf9bb8feb4bf798fde93198f7a96c73441080a/primp-1.3.1-cp314-cp314t-win_arm64.whl", hash = "sha256:46a529d74583d6ceba52e15bf4c678fcf24e6d669c1ce935262d5490d1b25801", size = 4623226, upload-time = "2026-05-23T17:38:57.256Z" }, +] + [[package]] name = "propcache" version = "0.4.1" @@ -2533,6 +2817,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "socksio" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" }, +] + [[package]] name = "sse-starlette" version = "3.4.1" diff --git a/app-instance/frontend/app/(app)/logs/page.tsx b/app-instance/frontend/app/(app)/logs/page.tsx index fca55af..4e7e019 100644 --- a/app-instance/frontend/app/(app)/logs/page.tsx +++ b/app-instance/frontend/app/(app)/logs/page.tsx @@ -46,6 +46,48 @@ function timestampLabel(value?: string | null): string { return date.toLocaleString(); } +const latencyOrder = [ + 'total_ms', + 'router_ms', + 'mcp_ms', + 'skill_assembly_ms', + 'tool_assembly_ms', + 'context_build_ms', + 'llm_ms', + 'tool_ms', + 'session_write_ms', +]; + +function latencyLabel(key: string, locale: string): string { + const labels: Record = { + total_ms: ['总耗时', 'total'], + router_ms: ['路由', 'router'], + mcp_ms: ['MCP', 'MCP'], + skill_assembly_ms: ['技能', 'skills'], + tool_assembly_ms: ['工具选择', 'tool select'], + context_build_ms: ['上下文', 'context'], + llm_ms: ['模型', 'LLM'], + tool_ms: ['工具执行', 'tools'], + session_write_ms: ['写日志', 'writes'], + }; + const label = labels[key]; + if (!label) return key.replace(/_ms$/, ''); + return pickAppText(locale, label[0], label[1]); +} + +function formatLatencyMs(value: unknown): string | null { + if (typeof value !== 'number' || !Number.isFinite(value)) return null; + if (value >= 1000) return `${(value / 1000).toFixed(value >= 10000 ? 1 : 2)}s`; + return `${Math.round(value)}ms`; +} + +function latencyEntries(latency?: Record) { + if (!latency) return []; + return latencyOrder + .map((key) => [key, formatLatencyMs(latency[key])] as const) + .filter((entry): entry is readonly [string, string] => Boolean(entry[1])); +} + export default function LogsPage() { const { locale } = useAppI18n(); const [sessions, setSessions] = useState([]); @@ -142,6 +184,7 @@ export default function LogsPage() {
    {runs.map((run) => { const expanded = expandedRuns.has(run.run_id); + const latencies = latencyEntries(run.latency_ms); return ( diff --git a/app-instance/frontend/types/index.ts b/app-instance/frontend/types/index.ts index c08c717..b36068e 100644 --- a/app-instance/frontend/types/index.ts +++ b/app-instance/frontend/types/index.ts @@ -105,6 +105,7 @@ export interface ChatLogRun { started_at?: string; ended_at?: string | null; finish_reason?: string | null; + latency_ms?: Record; events: ChatLogEvent[]; } diff --git a/auth-portal/src/app/api/runtime/login/route.ts b/auth-portal/src/app/api/runtime/login/route.ts index c231af2..0c46720 100644 --- a/auth-portal/src/app/api/runtime/login/route.ts +++ b/auth-portal/src/app/api/runtime/login/route.ts @@ -2,7 +2,14 @@ import { NextRequest, NextResponse } from 'next/server'; import type { TokenResponse } from '@/types/auth'; import { normalizePortalLocale, pickPortalText } from '@/lib/i18n/core'; -import { HttpError, callDeployControl, callInstanceApi, normalizeTokenResponse } from '@/lib/runtime-control'; +import { + HttpError, + callDeployControl, + callInstanceApi, + normalizeTokenResponse, + targetFrontendBaseUrl, + waitForFrontendReady, +} from '@/lib/runtime-control'; function errorStatus(error: unknown): number { if (error instanceof HttpError) { @@ -49,7 +56,9 @@ export async function POST(request: NextRequest) { password, }); - return NextResponse.json(normalizeTokenResponse(response, routing)); + const normalized = normalizeTokenResponse(response, routing); + await waitForFrontendReady(targetFrontendBaseUrl(normalized)); + return NextResponse.json(normalized); } catch (error) { const status = errorStatus(error); const detail = status === 404 || status === 401 diff --git a/auth-portal/src/app/api/runtime/provider-onboarding/route.ts b/auth-portal/src/app/api/runtime/provider-onboarding/route.ts index cbe0420..bd166cd 100644 --- a/auth-portal/src/app/api/runtime/provider-onboarding/route.ts +++ b/auth-portal/src/app/api/runtime/provider-onboarding/route.ts @@ -2,7 +2,14 @@ import { NextRequest, NextResponse } from 'next/server'; import type { TokenResponse } from '@/types/auth'; import { normalizePortalLocale, pickPortalText } from '@/lib/i18n/core'; -import { HttpError, callDeployControl, callInstanceApi, normalizeTokenResponse } from '@/lib/runtime-control'; +import { + HttpError, + callDeployControl, + callInstanceApi, + normalizeTokenResponse, + targetFrontendBaseUrl, + waitForFrontendReady, +} from '@/lib/runtime-control'; const PROVIDER_ONBOARDING_TIMEOUT_MS = 120000; const KNOWN_PROVIDERS = new Set([ @@ -113,7 +120,9 @@ export async function POST(request: NextRequest) { password, }); - return NextResponse.json(normalizeTokenResponse(response, configuredRouting)); + const normalized = normalizeTokenResponse(response, configuredRouting); + await waitForFrontendReady(targetFrontendBaseUrl(normalized)); + return NextResponse.json(normalized); } catch (error) { return NextResponse.json({ detail: errorDetail(error) }, { status: errorStatus(error) }); } diff --git a/auth-portal/src/app/api/runtime/register/route.ts b/auth-portal/src/app/api/runtime/register/route.ts index 67eceb1..9616010 100644 --- a/auth-portal/src/app/api/runtime/register/route.ts +++ b/auth-portal/src/app/api/runtime/register/route.ts @@ -8,6 +8,8 @@ import { callAuthzService, callDeployControl, normalizeTokenResponse, + targetFrontendBaseUrl, + waitForFrontendReady, } from '@/lib/runtime-control'; function errorStatus(error: unknown): number { @@ -62,6 +64,7 @@ export async function POST(request: NextRequest) { }, REGISTER_REQUEST_TIMEOUT_MS); if (hasTargetFrontendUrl(response)) { + await waitForFrontendReady(targetFrontendBaseUrl(response)); return NextResponse.json(response); } @@ -72,7 +75,9 @@ export async function POST(request: NextRequest) { instance?: unknown; }>('/api/instances/resolve', { username }); - return NextResponse.json(normalizeTokenResponse(response, routing)); + const normalized = normalizeTokenResponse(response, routing); + await waitForFrontendReady(targetFrontendBaseUrl(normalized)); + return NextResponse.json(normalized); } catch (error) { return NextResponse.json({ detail: errorDetail(error) }, { status: errorStatus(error) }); } diff --git a/auth-portal/src/lib/auth-client.ts b/auth-portal/src/lib/auth-client.ts index bd7b277..678d8c1 100644 --- a/auth-portal/src/lib/auth-client.ts +++ b/auth-portal/src/lib/auth-client.ts @@ -4,6 +4,7 @@ import type { TokenResponse } from '@/types/auth'; import { getCurrentPortalLocale, pickPortalText } from '@/lib/i18n/core'; const REQUEST_TIMEOUT_MS = 8000; +const LOGIN_REQUEST_TIMEOUT_MS = 30000; const REGISTER_REQUEST_TIMEOUT_MS = 90000; const PROVIDER_ONBOARDING_TIMEOUT_MS = 120000; @@ -90,7 +91,7 @@ export async function login(username: string, password: string): Promise { diff --git a/auth-portal/src/lib/runtime-control-ready.test.mjs b/auth-portal/src/lib/runtime-control-ready.test.mjs new file mode 100644 index 0000000..481686c --- /dev/null +++ b/auth-portal/src/lib/runtime-control-ready.test.mjs @@ -0,0 +1,43 @@ +import assert from 'node:assert/strict'; +import { pathToFileURL } from 'node:url'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import ts from 'typescript'; + +const source = await readFile(new URL('./runtime-control.ts', import.meta.url), 'utf8'); +const output = ts.transpileModule(source, { + compilerOptions: { + module: ts.ModuleKind.ES2022, + target: ts.ScriptTarget.ES2022, + }, +}); +const dir = await mkdtemp(join(tmpdir(), 'runtime-control-test-')); +const modulePath = join(dir, 'runtime-control.mjs'); +await writeFile(modulePath, output.outputText, 'utf8'); + +const runtimeControl = await import(pathToFileURL(modulePath).href); + +assert.equal(typeof runtimeControl.waitForFrontendReady, 'function'); + +const calls = []; +globalThis.fetch = async (url) => { + calls.push(String(url)); + if (calls.length < 3) { + return { ok: false, status: 502, text: async () => 'bad gateway' }; + } + return { ok: true, status: 200, text: async () => '' }; +}; + +await runtimeControl.waitForFrontendReady('http://workspace.example:8088', { + timeoutMs: 1000, + intervalMs: 1, +}); + +assert.deepEqual(calls, [ + 'http://workspace.example:8088/handoff', + 'http://workspace.example:8088/handoff', + 'http://workspace.example:8088/handoff', +]); + +await rm(dir, { recursive: true, force: true }); diff --git a/auth-portal/src/lib/runtime-control.ts b/auth-portal/src/lib/runtime-control.ts index abd72e8..57de272 100644 --- a/auth-portal/src/lib/runtime-control.ts +++ b/auth-portal/src/lib/runtime-control.ts @@ -5,6 +5,8 @@ const DEPLOY_API_BASE_URL = (process.env.DEPLOY_API_BASE_URL || 'http://127.0.0. const DEPLOY_API_TOKEN = (process.env.DEPLOY_API_TOKEN || '').trim(); const REQUEST_TIMEOUT_MS = 15000; const REGISTER_REQUEST_TIMEOUT_MS = 90000; +const FRONTEND_READY_TIMEOUT_MS = Number(process.env.FRONTEND_READY_TIMEOUT_MS || '20000'); +const FRONTEND_READY_INTERVAL_MS = Number(process.env.FRONTEND_READY_INTERVAL_MS || '1000'); type JsonObject = Record; @@ -135,3 +137,54 @@ export function normalizeTokenResponse( backend_connection: mergedBackendConnection, }; } + +export function targetFrontendBaseUrl(response: TokenResponse): string { + return ( + asString(response.backend_connection?.frontend_base_url) || + asString(response.backend_connection?.public_base_url) || + asString(response.backend_connection?.api_base_url) || + asString(response.local_backend?.public_base_url) + ); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export async function waitForFrontendReady( + frontendBaseUrl: string, + options: { timeoutMs?: number; intervalMs?: number } = {} +): Promise { + const normalized = frontendBaseUrl.trim().replace(/\/+$/, ''); + if (!normalized) return; + + let readyUrl: URL; + try { + readyUrl = new URL('/handoff', normalized); + } catch { + return; + } + + const timeoutMs = Math.max(1, options.timeoutMs ?? FRONTEND_READY_TIMEOUT_MS); + const intervalMs = Math.max(1, options.intervalMs ?? FRONTEND_READY_INTERVAL_MS); + const deadline = Date.now() + timeoutMs; + let lastError = 'frontend is not ready'; + + while (Date.now() <= deadline) { + try { + const response = await fetch(readyUrl.toString(), { + method: 'GET', + cache: 'no-store', + }); + if (response.ok) { + return; + } + lastError = `frontend returned ${response.status}`; + } catch (error) { + lastError = error instanceof Error ? error.message : 'frontend request failed'; + } + await sleep(intervalMs); + } + + throw new HttpError(502, `frontend is not ready: ${lastError}`); +} diff --git a/authz-service/src/app/main.py b/authz-service/src/app/main.py index 9e8d460..141eb77 100644 --- a/authz-service/src/app/main.py +++ b/authz-service/src/app/main.py @@ -9,6 +9,7 @@ from typing import Any import httpx from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request from fastapi.responses import JSONResponse +from pydantic import ValidationError from app.json_store import JsonStore from app.models import ( @@ -823,7 +824,10 @@ async def get_internal_channel_settings(backend_id: str, channel_id: str) -> dic async def oauth_token(request: Request) -> OAuthTokenResponse: content_type = request.headers.get("content-type", "") if "application/json" in content_type: - payload = OAuthTokenRequest.model_validate(await request.json()) + try: + payload = OAuthTokenRequest.model_validate(await request.json()) + except ValidationError as exc: + raise HTTPException(status_code=422, detail=exc.errors()) from exc else: form = await request.form() payload = _parse_token_request_from_form(dict(form)) diff --git a/authz-service/src/app/models.py b/authz-service/src/app/models.py index 1ab73c5..36db07e 100644 --- a/authz-service/src/app/models.py +++ b/authz-service/src/app/models.py @@ -3,7 +3,7 @@ from __future__ import annotations from datetime import datetime, timezone from typing import Any -from pydantic import BaseModel, Field +from pydantic import AliasChoices, BaseModel, Field def utcnow_iso() -> str: @@ -173,7 +173,7 @@ class OAuthTokenRequest(BaseModel): grant_type: str = "client_credentials" client_id: str client_secret: str - aud: str + aud: str = Field(validation_alias=AliasChoices("aud", "audience")) scopes: list[str] = Field(default_factory=list) diff --git a/authz-service/src/tests/test_oauth_token_validation.py b/authz-service/src/tests/test_oauth_token_validation.py new file mode 100644 index 0000000..35c9e3f --- /dev/null +++ b/authz-service/src/tests/test_oauth_token_validation.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import importlib + +from fastapi.testclient import TestClient + + +def _client(tmp_path, monkeypatch) -> TestClient: + monkeypatch.setenv("AUTHZ_DATA_DIR", str(tmp_path)) + monkeypatch.setenv("AUTHZ_PRIVATE_KEY_PATH", str(tmp_path / "signing_key.pem")) + monkeypatch.setenv("AUTHZ_INTERNAL_TOKEN", "test-internal-token") + import app.main as main + + main = importlib.reload(main) + return TestClient(main.app, raise_server_exceptions=False) + + +def _register_backend(client: TestClient) -> dict: + response = client.post( + "/backends/register", + json={"backend_id": "alice", "name": "Alice", "base_url": "http://alice.local"}, + ) + assert response.status_code == 200 + return response.json() + + +def test_json_token_request_accepts_audience_alias(tmp_path, monkeypatch) -> None: + with _client(tmp_path, monkeypatch) as client: + backend = _register_backend(client) + response = client.post( + "/oauth/token", + json={ + "grant_type": "client_credentials", + "client_id": backend["client_id"], + "client_secret": backend["client_secret"], + "audience": "mcp:outlook_mcp", + "scopes": ["list_tools", "tool:auth_status"], + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["access_token"] + assert body["token_type"] == "bearer" + + +def test_json_token_request_validation_errors_return_422(tmp_path, monkeypatch) -> None: + with _client(tmp_path, monkeypatch) as client: + backend = _register_backend(client) + response = client.post( + "/oauth/token", + json={ + "grant_type": "client_credentials", + "client_id": backend["client_id"], + "client_secret": backend["client_secret"], + "scopes": ["list_tools"], + }, + ) + + assert response.status_code == 422 + assert response.json()["detail"] diff --git a/docs/superpowers/plans/2026-06-22-skill-templated-task-graph.md b/docs/superpowers/plans/2026-06-22-skill-templated-task-graph.md index 7f047c6..d8351f3 100644 --- a/docs/superpowers/plans/2026-06-22-skill-templated-task-graph.md +++ b/docs/superpowers/plans/2026-06-22-skill-templated-task-graph.md @@ -20,6 +20,10 @@ - `beaver/engine/loop.py`, `tools/runtime/executor.py`, `coordinator/local.py`: node allowlist and budget enforcement. - `beaver/tasks/evidence.py`, `coordinator/execution/scheduler.py`, `tasks/attempt_orchestrator.py`: evidence completion and incomplete synthesis gate. +## Execution Reporting Rule + +Do not commit automatically. After every task, stop and report the modified-file list, exact test command and result, `git diff --stat` summary, and remaining risks. Commit only when the user explicitly asks. + ### Task 1: Parse and Propagate Optional Skill Templates **Files:** @@ -91,9 +95,9 @@ Run: `cd app-instance/backend && uv run pytest tests/unit/test_skill_team_templa Expected: PASS. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/skills/catalog/utils.py app-instance/backend/beaver/skills/catalog/loader.py app-instance/backend/beaver/engine/context/builder.py app-instance/backend/beaver/skills/assembler/task_assembler.py app-instance/backend/tests/unit/test_skill_team_template.py && git commit -m "feat(skills): parse optional task graph templates"` +Report the modified files, parser/assembler test result, `git diff --stat`, and any template compatibility risk. Do not commit unless explicitly asked. ### Task 2: Extend Existing Graph Contracts @@ -107,9 +111,11 @@ Run: `git add app-instance/backend/beaver/skills/catalog/utils.py app-instance/b ```python def test_execution_node_contracts_default_for_existing_callers() -> None: node = ExecutionNode("collect", "Collect", AgentDescriptor(name="collect")) - assert node.allowed_tool_names == [] + assert node.allowed_tool_names is None assert node.required_evidence == [] + assert node.evidence_contract == {} assert node.required_for_completion is True + assert node.block_downstream_on_partial is False def test_graph_rejects_depth_above_configured_limit() -> None: @@ -136,14 +142,16 @@ Expected: FAIL because fields and `max_depth` do not exist. ```python input_contract: dict[str, Any] = field(default_factory=dict) output_contract: dict[str, Any] = field(default_factory=dict) -allowed_tool_names: list[str] = field(default_factory=list) +allowed_tool_names: list[str] | None = None required_evidence: list[str] = field(default_factory=list) +evidence_contract: dict[str, Any] = field(default_factory=dict) validation_rules: list[str] = field(default_factory=list) required_for_completion: bool = True +block_downstream_on_partial: bool = False max_tool_iterations: int | None = None ``` -Add the runtime-relevant values to `DelegationEnvelope`. Add `completion_status="succeeded"` and `evidence_gaps=[]` to `NodeRunResult`. Extend `ExecutionGraph.validate(max_depth: int | None = None)` to calculate longest dependency chain with its existing DFS and raise only when an explicit limit is exceeded. +Use `allowed_tool_names: list[str] | None = None`, not a default empty list. `None` means no node-level scope and keeps legacy behavior; `[]` explicitly disables tools; a populated list is the node allowlist. Add runtime-relevant values to `DelegationEnvelope`. Add `completion_status="succeeded"` and `evidence_gaps=[]` to `NodeRunResult`. Extend `ExecutionGraph.validate(max_depth: int | None = None)` to calculate longest dependency chain with its existing DFS and raise only when an explicit limit is exceeded. - [ ] **Step 4: Run the coordinator regression test** @@ -151,9 +159,9 @@ Run: `cd app-instance/backend && uv run pytest tests/unit/test_agent_team_v1.py Expected: PASS. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/coordinator/models.py app-instance/backend/tests/unit/test_agent_team_v1.py && git commit -m "feat(team): add optional node contracts"` +Report the modified files, coordinator test result, `git diff --stat`, and compatibility risk for existing direct graph callers. Do not commit unless explicitly asked. ### Task 3: Adapt Templates Into Generic Task Graphs @@ -185,6 +193,15 @@ def test_unknown_tool_is_removed_and_warned() -> None: ) assert plan.graph.nodes[0].allowed_tool_names == ["web_search"] assert "unknown tool removed: not_real" in plan.planner_adaptation["warnings"] + + +def test_high_risk_tool_is_removed_without_failing_low_risk_plan() -> None: + plan = TaskExecutionPlanner(tool_registry=_registry()).from_json( + '{"mode":"team","strategy":"sequence","nodes":[{"node_id":"collect","task":"Collect",' + '"requested_tools":["web_search","terminal"]}]}' + ) + assert plan.graph.nodes[0].allowed_tool_names == ["web_search"] + assert "requires_high_risk_review: terminal" in plan.planner_adaptation["warnings"] ``` - [ ] **Step 2: Run it to verify failure** @@ -197,9 +214,9 @@ Expected: FAIL because planner has no template context, registry policy, or adap Add `tool_registry: ToolRegistry | None` to `TaskExecutionPlanner`. Change `plan()` to receive `activated_skills: list[SkillContext]`, select at most one valid template, and include it in `_prompt`. Add `planner_adaptation: dict[str, Any] = field(default_factory=dict)` to `TaskExecutionPlan` and `to_event_payload()`. -Accept only `node_id`, `task`, `depends_on`, `input_contract`, `output_contract`, `requested_tools`, `required_evidence`, `validation_rules`, `required_for_completion`, `max_tool_iterations`, and `constraints`. Reject `agent` and `role`; construct `AgentDescriptor(name=node_id, role="", system_prompt="", metadata={"sub_agent_kind": "generic_skill_worker", ...})` internally. +Accept only `node_id`, `task`, `depends_on`, `input_contract`, `output_contract`, `requested_tools`, `required_evidence`, `evidence_contract`, `validation_rules`, `required_for_completion`, `block_downstream_on_partial`, `max_tool_iterations`, and `constraints`. Reject `agent` and `role`; construct `AgentDescriptor(name=node_id, role="", system_prompt="", metadata={"sub_agent_kind": "generic_skill_worker", ...})` internally. -Resolve requested names through registry plus conservative read-only policy. Write allowed names to `ExecutionNode.allowed_tool_names`; write unknown/high-risk removals into adaptation warnings. Validate node count, dependencies, cycles, and `graph.validate(max_depth=4)`. If first provider output is invalid, make exactly one `tools=None` repair request containing validation errors; if it is still invalid, return `TaskExecutionPlan.single("planner_fallback_single", fallback_error=...)`. +Resolve requested names through registry plus a conservative interim name-based risk policy. Treat `terminal`, `execute_command`, `write_file`, `delete_file`, `external_send`, and `send_email` as high-risk until stable `ToolSpec.metadata` risk fields exist. Write allowed names to `ExecutionNode.allowed_tool_names`; remove unknown/high-risk names and record warnings. Unknown tools never fail the whole plan; high-risk tools add `requires_high_risk_review` and are never auto-approved. Validate node count, dependencies, cycles, and `graph.validate(max_depth=4)`. If first provider output is invalid, make exactly one `tools=None` repair request containing validation errors; if it is still invalid, return `TaskExecutionPlan.single("planner_fallback_single", fallback_error=...)`. Update `TaskAttemptOrchestrator` to pass `preselected_skills`, and `EngineLoader` to construct planner with its registry. @@ -209,9 +226,9 @@ Run: `cd app-instance/backend && uv run pytest tests/unit/test_task_execution_pl Expected: PASS. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/tasks/planner.py app-instance/backend/beaver/tasks/attempt_orchestrator.py app-instance/backend/beaver/engine/loader.py app-instance/backend/tests/unit/test_task_execution_planner.py && git commit -m "feat(tasks): adapt skill templates into task graphs"` +Report the modified files, planner/task-mode test result, `git diff --stat`, and any risk-policy false-positive risk. Do not commit unless explicitly asked. ### Task 4: Enforce Node Tool Allowlists @@ -231,6 +248,13 @@ def test_team_node_exposes_only_allowed_tool_schema() -> None: assert _tool_names(provider.calls[0]["tools"]) == ["web_search"] +def test_none_tool_scope_preserves_legacy_selection_and_empty_scope_disables_all() -> None: + asyncio.run(loop.process_direct("collect", allowed_tool_names=None)) + assert _tool_names(provider.calls[0]["tools"]) + asyncio.run(loop.process_direct("collect", allowed_tool_names=[])) + assert _tool_names(provider.calls[1]["tools"]) == [] + + def test_executor_rejects_registered_tool_outside_node_allowlist() -> None: context = ToolContext(metadata={"allowed_tool_names": ["web_search"]}) result = asyncio.run(executor.execute("write_file", {"path": "x", "content": "x"}, context=context)) @@ -262,9 +286,9 @@ Run: `cd app-instance/backend && uv run pytest tests/unit/test_team_node_tool_po Expected: PASS. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/engine/loop.py app-instance/backend/beaver/tools/runtime/executor.py app-instance/backend/beaver/coordinator/local.py app-instance/backend/tests/unit/test_agent_loop.py app-instance/backend/tests/unit/test_team_node_tool_policy.py && git commit -m "feat(team): enforce node tool scopes"` +Report the modified files, focused/loop test result, `git diff --stat`, and risk that a legacy caller accidentally passes `[]`. Do not commit unless explicitly asked. ### Task 5: Gate Node Success on Required Evidence @@ -286,8 +310,19 @@ def test_node_without_required_tool_result_is_partial() -> None: assert result.evidence_gaps == ["missing required evidence: tool_result"] -def test_dag_blocks_dependency_of_partial_required_node() -> None: +def test_node_without_evidence_requirement_keeps_legacy_success() -> None: + result = asyncio.run(runner.run(_envelope(required_evidence=[]))) + assert result.success is True + assert result.completion_status == "succeeded" + + +def test_dag_allows_partial_evidence_by_default() -> None: outcome = asyncio.run(scheduler.run(_graph_with_partial_collect_node(), parent_task_id=None, parent_session_id="s")) + assert outcome.node_results[1].completion_status == "succeeded" + + +def test_dag_blocks_partial_node_only_when_node_requests_it() -> None: + outcome = asyncio.run(scheduler.run(_graph_with_blocking_partial_collect_node(), parent_task_id=None, parent_session_id="s")) assert outcome.node_results[1].finish_reason == "blocked" ``` @@ -299,7 +334,7 @@ Expected: FAIL because evidence requirements do not affect node success. - [ ] **Step 3: Implement deterministic evidence checks** -Add `evaluate_node_evidence(evidence, required_evidence, output_text) -> list[str]`. `tool_result` requires a successful tool result, `url` a tool result URL, and `output` non-empty output; any other requirement produces `unsupported evidence requirement: `. After `LocalAgentRunner` builds `RunEvidence`, set `completion_status="partial"`, `success=False`, and gaps when required evidence is absent. Scheduler-created error/blocked results set status to `failed`/`blocked` while retaining partial evidence. +Add `evaluate_node_evidence(evidence, required_evidence, output_text) -> list[str]`. `required_evidence` is a coarse v1 gate: `tool_result` requires a successful tool result, `url` a tool result URL, and `output` non-empty output; any other requirement produces `unsupported evidence requirement: `. Do not interpret `evidence_contract` in v1. After `LocalAgentRunner` builds `RunEvidence`, set `completion_status="partial"`, `success=False`, and gaps only when the node actually declares `required_evidence`. Leave existing no-requirement node success behavior unchanged. Scheduler always blocks `failed`/`blocked`; it passes partial output/evidence onward unless `block_downstream_on_partial=True`. - [ ] **Step 4: Run coordinator and evidence regression tests** @@ -307,9 +342,9 @@ Run: `cd app-instance/backend && uv run pytest tests/unit/test_agent_team_v1.py Expected: PASS. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/tasks/evidence.py app-instance/backend/beaver/coordinator/local.py app-instance/backend/beaver/coordinator/execution/scheduler.py app-instance/backend/tests/unit/test_agent_team_v1.py app-instance/backend/tests/unit/test_task_evidence.py && git commit -m "feat(team): require declared node evidence"` +Report the modified files, coordinator/evidence test result, `git diff --stat`, and the known coarse-evidence limitation. Do not commit unless explicitly asked. ### Task 6: Gate Final Synthesis and Verify Finance Planning @@ -350,7 +385,7 @@ Expected: FAIL because outcome gate is absent. Add `_team_synthesis_outcome(plan, result) -> tuple[str, str]`. Every `required_for_completion=True` node whose `completion_status` is not `succeeded` is incomplete. Context includes node id, status, error, and evidence gaps. Keep Team synthesis at `include_tools=False` and `max_tool_iterations=0`; prefix final output only when the incomplete notice is missing. Write `task_outcome` and `incomplete_node_ids` to `task_synthesis_completed`. -Add `_finance_plan_json()` fixture with four task-oriented nodes and dependencies `collect -> extract -> validate -> report`. Only source/extraction nodes request `web_search`/`web_fetch`; report node uses upstream evidence and produces Markdown/table/chart data, never an unregistered chart renderer. Assert no node is named `researcher`, `writer`, or `reviewer`. +Add `_finance_plan_json()` fixture with four task-oriented nodes and dependencies `collect -> extract -> validate -> report`. The report node explicitly uses `allowed_tool_names=[]`; source/extraction nodes request only `web_search`/`web_fetch`. Assert no node is named `researcher`, `writer`, or `reviewer`. The report node may emit a comparison table, chart-ready data, Mermaid chart, Markdown chart section, or text-bar-chart fallback. It must not claim an image/file chart artifact unless a registered chart-renderer tool exists and passes policy. - [ ] **Step 4: Run complete backend unit suite** @@ -358,12 +393,12 @@ Run: `cd app-instance/backend && uv run pytest tests/unit -q` Expected: PASS. Fix only compatibility defects in this plan; do not change router, persistent agent registry, frontend, nested-team behavior, or Skill-learning eval semantics. -- [ ] **Step 5: Commit** +- [ ] **Step 5: Stop and report; do not commit** -Run: `git add app-instance/backend/beaver/tasks/attempt_orchestrator.py app-instance/backend/tests/unit/test_task_mode_feedback.py app-instance/backend/tests/unit/test_task_team_synthesis_outcome.py app-instance/backend/tests/unit/test_task_execution_planner.py app-instance/backend/tests/unit/test_task_skill_resolver.py && git commit -m "test(team): cover skill-templated finance planning"` +Report the modified files, complete unit-suite result, `git diff --stat`, and all remaining boundaries. Do not commit unless explicitly asked. ## Plan Self-Review -- Coverage: parser compatibility, existing graph contracts, template adaptation/repair, tool enforcement, evidence completion, deterministic synthesis, and finance acceptance all have explicit tasks. +- Coverage: parser compatibility, one-primary-template adaptation/repair, `None`/`[]`/allowlist scope semantics, interim high-risk filtering, partial propagation, coarse evidence completion, deterministic synthesis, and finance acceptance all have explicit tasks. - Exclusions: no fixed role Agents, parallel Team model, nested graph execution, chart renderer, high-risk approval UI, frontend work, or Skill-eval redesign appears in the implementation scope. -- Compatibility: all new graph fields are defaults-only; `None` tool scope preserves single-agent behavior, while `[]` gives a Team node no tools. +- Compatibility: all new graph fields are defaults-only; `allowed_tool_names=None` preserves legacy behavior, `[]` explicitly disables tools, and evidence gating activates only when `required_evidence` is declared. diff --git a/docs/superpowers/specs/2026-06-22-skill-templated-task-graph-design.md b/docs/superpowers/specs/2026-06-22-skill-templated-task-graph-design.md index cff271d..f28dbc3 100644 --- a/docs/superpowers/specs/2026-06-22-skill-templated-task-graph-design.md +++ b/docs/superpowers/specs/2026-06-22-skill-templated-task-graph-design.md @@ -45,7 +45,7 @@ Out of scope: - a high-risk approval UI or new approval API; - chart-image rendering. -The current runtime registers `web_search` and `web_fetch` but no chart renderer. The finance acceptance case therefore produces evidence-backed comparison data and a textual/Markdown report, not a fabricated chart artifact. +The current runtime registers `web_search` and `web_fetch` but no chart renderer. The finance acceptance case may produce an evidence-backed comparison table, chart-ready data, Mermaid chart, Markdown chart section, text-bar-chart fallback, and final textual report. It must not claim that an image/file chart artifact was generated unless a registered chart-renderer tool exists and passes runtime safety policy. ## Data Model Evolution @@ -54,16 +54,20 @@ The current runtime registers `web_search` and `web_fetch` but no chart renderer ```python input_contract: dict[str, object] = field(default_factory=dict) output_contract: dict[str, object] = field(default_factory=dict) -allowed_tool_names: list[str] = field(default_factory=list) +allowed_tool_names: list[str] | None = None required_evidence: list[str] = field(default_factory=list) +evidence_contract: dict[str, Any] = field(default_factory=dict) validation_rules: list[str] = field(default_factory=list) required_for_completion: bool = True +block_downstream_on_partial: bool = False max_tool_iterations: int | None = None ``` -Existing callers retain their behavior because empty lists and `None` impose no new node requirement. +`allowed_tool_names` has three non-overlapping meanings: `None` means node-level tool scope is disabled and retains legacy tool selection; `[]` explicitly prohibits every tool for this node; a populated list permits only those registered, policy-allowed tools. Existing callers retain behavior because the default is `None`. -`NodeRunResult` remains the node-output container. It gains `completion_status` (`succeeded`, `partial`, `failed`, or `blocked`) and `evidence_gaps`. `success` remains for scheduler compatibility and is true only for `succeeded`. A completed run with missing required evidence is therefore `partial`, and downstream dependencies block exactly as they do for failed nodes. +`NodeRunResult` remains the node-output container. It gains `completion_status` (`succeeded`, `partial`, `failed`, or `blocked`) and `evidence_gaps`. `success` remains a compatibility field. Nodes without `required_evidence` retain the current `finish_reason == "stop"` success behavior. For a node that declares evidence requirements, a completed run with missing required evidence becomes `partial` and has `success=False`. + +`failed` and `blocked` always block dependent nodes. `partial` does not imply successful completion, but its output and evidence remain consumable by downstream nodes unless `block_downstream_on_partial=True`. Any required-for-completion node that is partial still forces the final task outcome to `incomplete`. `TaskExecutionPlan` gains a planner-adaptation payload rather than a duplicate graph object. The payload records template source/version, whether it was used, added/removed/merged node ids, removed tool names, warnings, and fallback reason. It is written into the existing `task_execution_planned` event. @@ -100,7 +104,7 @@ The template is an LLM input, not an executable workflow. It supplies candidate ## Planner Design -`TaskAttemptOrchestrator` passes activated `SkillContext` objects to the planner rather than only truncated summaries. The planner chooses at most one applicable template for the first implementation; multiple activated Skills remain ordinary guidance. This avoids composing incompatible templates before there is evidence for a composition model. +`TaskAttemptOrchestrator` passes activated `SkillContext` objects to the planner rather than only truncated summaries. v1 supports one primary applicable Skill Team Template; other activated Skills remain ordinary guidance. Template composition, sub-skill guidance composition, and multi-Skill planning are explicitly deferred rather than prohibited long-term. Planner output uses a task-only JSON schema. It contains `mode`, `reason`, `strategy`, `nodes`, `final_synthesis_instruction`, and `adaptation`. Nodes contain task, dependencies, contracts, requested tools, evidence requirements, validation rules, and completion importance. `agent` and `role` are not accepted as planner schema fields; the adapter creates the existing empty-role `AgentDescriptor` itself. @@ -125,7 +129,7 @@ template/node requested names ∩ node runtime policy ``` -Skill hints are suggestions, not authority. The current code has no populated task-time user/workspace permission model, so v1 must not claim that it enforces one. It uses a conservative node runtime policy: +Skill hints are suggestions, not authority. The current code has no populated task-time user/workspace permission model, so v1 must not claim that it enforces one. v1 uses a conservative interim tool-risk policy, not a complete task-time permission system. Until `ToolSpec.metadata` has stable fields such as `risk_level`, `mutating`, `external_side_effect`, `requires_approval`, and `readonly`, the interim policy uses a conservative name-based high-risk set such as `terminal`, `execute_command`, `write_file`, `delete_file`, `external_send`, and `send_email`. - unknown names are removed and reported as planner warnings; - read-only tools may remain available when the node requests them; @@ -134,25 +138,27 @@ Skill hints are suggestions, not authority. The current code has no populated ta Provider schemas are filtered to the allowlist, and `ToolExecutor` performs a second allowlist check through `ToolContext.metadata`. This prevents a model-originated call to a registered but unexposed tool from executing. -A real high-risk approval flow requires a task lifecycle state and UI/API confirmation. It is deferred; v1 blocks and explains rather than auto-approving. +A real high-risk runtime approval flow requires a task lifecycle state and UI/API confirmation. It is out of scope; v1 removes high-risk names, records `requires_high_risk_review`, and explains the limitation rather than auto-approving. ## Runtime and Evidence Semantics `DelegationEnvelope` receives node contracts, allowed tools, evidence requirements, and per-node tool budget. `LocalAgentRunner` passes the allowed tools and budget into the current `AgentLoop`, builds existing `RunEvidence`, and classifies completion. -Evidence requirements have deterministic meanings in v1: +`required_evidence` in v1 is a coarse node-level completion gate, not a field-level evidence contract. It can show that a node produced at least one URL or tool result; it cannot prove that every required company, reporting period, metric, and source is present. `evidence_contract: dict[str, Any]` is reserved for a later field-level contract and is not interpreted in v1. + +The coarse requirements have deterministic meanings in v1: - `tool_result`: at least one successful tool result; - `url`: at least one tool result with a URL; - `output`: non-empty node output; - any other declared value: explicit evidence gap. -The scheduler keeps sequence/parallel/DAG semantics. Dependencies only receive succeeded upstream results. It does not retry, recursively expand Skills, or create another Team graph. +The scheduler keeps sequence/parallel/DAG semantics. Dependencies never run after an upstream `failed` or `blocked` result. A `partial` upstream result is passed onward as partial evidence by default; a node can opt into blocking it with `block_downstream_on_partial=True`. The scheduler does not retry, recursively expand Skills, or create another Team graph. Before final synthesis, `TaskAttemptOrchestrator` derives a task outcome: - `complete`: every required-for-completion node succeeded; -- `incomplete`: any required node is partial, failed, or blocked; +- `incomplete`: any required node is partial, failed, or blocked, even if downstream synthesis produced a useful partial report; - `single`: no Team graph ran. Team synthesis continues to run with no tools. For `incomplete`, the synthesis context lists completed work, node failures, evidence gaps, and the deterministic task outcome. The returned user-facing answer is prefixed with an incomplete notice if the model omits it, so runtime—not prompt compliance alone—prevents a false completion claim. @@ -166,7 +172,7 @@ Existing task events receive the adaptation report, resolved tools, policy remov Compatibility guarantees: - Skills without templates activate and execute unchanged. -- Existing direct `ExecutionGraph` callers work because new fields have defaults. +- Existing direct `ExecutionGraph` callers work because new fields have compatibility defaults; specifically, `allowed_tool_names=None` does not enable node-level scope and empty `required_evidence` does not enable evidence gating. - Single-agent runs do not receive node tool policies or outcome prefixes. - Existing external registry descriptors are not removed; planner-created Team nodes stay generic and role-empty. - `TaskSkillResolver` remains the per-node published-Skill/ephemeral-guidance fallback.