feat(coordinator): 添加团队节点默认最大工具迭代次数配置

添加 DEFAULT_TEAM_NODE_MAX_TOOL_ITERATIONS 配置项以控制团队节点的最大工具迭代次数,
并修改 LocalAgentRunner 中的逻辑来使用此默认值当 envelope 中未指定时。

fix(runtime): 修复团队节点运行成功判断逻辑

更新运行成功判断条件,将 finish_reason 为 "max_tool_iterations_finalized" 的情况
视为运行失败,并添加对原始工具调用输出的检测,避免将其误判为成功完成。

feat(mcp): 添加团队工作流MCP工具类别支持

增加新的本地MCP工具类别 "team_workflow" 及其对应的工具创建功能,
为团队工作流提供本地工具支持。

refactor(engine): 调整AgentLoop最大工具迭代次数设置

将 AgentProfile 中的默认 max_tool_iterations 从 30 增加到 100,
同时移除 TaskExecutionPlanner 构造函数中的重复参数传递。

perf(mcp): 优化MCP连接管理避免重复连接

添加 mcp_connected 标志来跟踪MCP连接状态,确保 connect_all 只执行一次,
提高性能并避免不必要的重复连接。

refactor(skills): 移除技能团队模板相关功能

移除与技能团队模板相关的代码,包括解析、存储和处理逻辑,
简化技能记录结构和加载流程。

feat(process): 增强会话过程投影器功能

添加技能激活快照事件处理,改进团队运行完成消息显示,
并增强技能激活事件的时间戳记录功能。

refactor(tasks): 简化任务尝试编排器团队执行逻辑

移除团队执行相关代码,将所有任务统一按单步执行处理,
简化任务编排器的复杂度并提升执行效率。

fix(evidence): 修复节点证据评估中需求验证逻辑

更新节点证据评估逻辑,跳过自然语言证据需求的确定性验证,
只执行机器可读的需求验证,避免因自然语言需求导致的节点失败。
This commit is contained in:
2026-06-26 16:36:29 +08:00
parent 53b13e8eac
commit 520a21a027
360 changed files with 13271 additions and 1848 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timezone
from pathlib import Path
def main() -> None:
skill_name = "mgm-galaxy-financial-chart-report-safe"
workspace = Path("/root/.beaver/workspace")
skill_dir = workspace / "skills" / skill_name
skill_md = skill_dir / "versions" / "v0001" / "SKILL.md"
content = skill_md.read_text(encoding="utf-8")
digest = "sha256:" + hashlib.sha256(content.encode("utf-8")).hexdigest()
now = datetime.now(timezone.utc).isoformat()
(skill_dir / "current.json").write_text(
json.dumps({"current_version": "v0001"}, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
(skill_dir / "skill.json").write_text(
json.dumps(
{
"name": skill_name,
"display_name": "MGM/Galaxy Financial Chart Report Safe",
"description": "Compare MGM China and Galaxy Entertainment using official financial sources, produce chart-ready Markdown, and avoid claiming generated chart image/file artifacts.",
"created_at": now,
"updated_at": now,
"current_version": "v0001",
"status": "active",
"tags": ["finance", "research", "report", "chart-ready-data", "mgm", "galaxy"],
"owners": ["steven"],
"source_kind": "workspace",
"lineage": [],
},
indent=2,
ensure_ascii=False,
)
+ "\n",
encoding="utf-8",
)
(skill_dir / "versions" / "v0001" / "version.json").write_text(
json.dumps(
{
"skill_name": skill_name,
"version": "v0001",
"content_hash": digest,
"summary_hash": digest,
"created_at": now,
"created_by": "steven",
"change_reason": "Add real Skill Team Template example for MGM/Galaxy finance report demo",
"parent_version": None,
"review_state": "published",
"frontmatter": {
"name": skill_name,
"description": "Compare MGM China and Galaxy Entertainment using official financial sources, produce chart-ready Markdown, and avoid claiming generated chart image/file artifacts.",
"tools": ["web_search", "web_fetch"],
},
"summary": "MGM/Galaxy finance report skill with a task-only Beaver team template for official source collection, metric extraction, validation, and Markdown chart-ready reporting.",
"tool_hints": ["web_search", "web_fetch"],
"provenance": {"source_kind": "manual_demo", "target_instance": "steven"},
"tree_hash": "",
},
indent=2,
ensure_ascii=False,
)
+ "\n",
encoding="utf-8",
)
index_path = workspace / "skills" / "_index" / "published.json"
try:
payload = json.loads(index_path.read_text(encoding="utf-8"))
except FileNotFoundError:
payload = {"items": []}
items = [str(item) for item in payload.get("items", [])]
if skill_name not in items:
items.append(skill_name)
index_path.write_text(json.dumps({"items": items}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(f"installed metadata for {skill_name}: {digest}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,155 @@
---
name: mgm-galaxy-financial-chart-report-safe
description: Compare MGM China and Galaxy Entertainment using official financial sources, produce chart-ready Markdown, and avoid claiming generated chart image/file artifacts.
tools:
- web_search
- web_fetch
---
# MGM/Galaxy Financial Chart Report Safe
## Overview
Use this skill when the user asks for a finance comparison report for MGM China Holdings Limited and Galaxy Entertainment Group, especially when the requested output includes a table, chart-ready data, or a textual chart section.
The skill intentionally separates source collection, metric extraction, validation, and final reporting. It must not invent chart files or image artifacts. If the runtime does not expose a registered chart-rendering tool, the final output should be Markdown plus chart-ready data only.
```beaver-team-template
{
"version": 1,
"strategy": "dag",
"nodes": [
{
"node_id": "collect_official_sources",
"task": "Collect official MGM China Holdings and Galaxy Entertainment financial disclosure sources for the requested period. Prefer annual reports, interim reports, results announcements, investor relations pages, and exchange filings. Return source URLs with short notes about period coverage.",
"use_skill": "web-operation",
"skill_query": "official financial disclosure web research",
"depends_on": [],
"requested_tools": ["web_search", "web_fetch"],
"required_evidence": ["tool_result", "url"],
"evidence_contract": {
"version": 1,
"entities": ["MGM China Holdings", "Galaxy Entertainment Group"],
"source_types": ["annual_report", "interim_report", "results_announcement", "investor_relations", "exchange_filing"],
"minimum_sources_per_entity": 1
},
"validation_rules": [
"Prefer official company, investor relations, HKEX, or stock exchange sources.",
"Record the reporting period attached to each source.",
"Do not use unsourced market commentary as primary evidence."
],
"required_for_completion": true,
"block_downstream_on_partial": true,
"max_tool_iterations": 4,
"constraints": [
"Use only public pages.",
"Do not require login or paid databases."
]
},
{
"node_id": "extract_financial_metrics",
"task": "Extract comparable financial metrics for MGM China Holdings and Galaxy Entertainment from the collected official sources. Include revenue or net revenue, adjusted EBITDA where available, net profit/loss where available, period, currency, unit, and source URL for each metric.",
"skill_query": "financial metric extraction from official disclosures",
"depends_on": ["collect_official_sources"],
"requested_tools": ["web_fetch"],
"required_evidence": ["output"],
"evidence_contract": {
"version": 1,
"metrics": ["revenue", "adjusted_ebitda", "net_profit_or_loss"],
"required_fields": ["company", "period", "metric", "value", "currency", "unit", "source_url"]
},
"validation_rules": [
"Keep currencies and units explicit.",
"Do not compare different reporting periods without labeling the mismatch.",
"Mark unavailable metrics as unavailable instead of estimating them."
],
"required_for_completion": true,
"block_downstream_on_partial": true,
"max_tool_iterations": 2,
"constraints": [
"Use upstream official sources before searching for alternatives."
]
},
{
"node_id": "validate_metrics",
"task": "Validate extracted metrics for source consistency, period alignment, currency/unit consistency, and obvious transcription errors. Produce a concise validation note and list any evidence gaps.",
"skill_query": "finance metric validation",
"depends_on": ["extract_financial_metrics"],
"requested_tools": [],
"required_evidence": ["output"],
"evidence_contract": {
"version": 1,
"checks": ["source_consistency", "period_alignment", "currency_unit_consistency", "transcription_sanity"]
},
"validation_rules": [
"Do not introduce new unsourced figures.",
"If values are not comparable, explain why and preserve both values with labels."
],
"required_for_completion": true,
"block_downstream_on_partial": true,
"max_tool_iterations": 0,
"constraints": [
"No tools in this validation node; use upstream evidence only."
]
},
{
"node_id": "generate_chart_report",
"task": "Generate the final Markdown comparison report. Include an executive summary, source-backed comparison table, chart-ready data table, optional Mermaid or text bar chart section, and caveats. Do not claim that a chart image, chart file, or saved artifact was generated.",
"skill_query": "financial markdown report with chart-ready data",
"depends_on": ["validate_metrics"],
"requested_tools": [],
"required_evidence": ["output"],
"evidence_contract": {
"version": 1,
"outputs": ["comparison_table", "chart_ready_data", "markdown_report"],
"forbidden_claims": ["generated_chart_image", "generated_chart_file", "saved_chart_artifact"]
},
"validation_rules": [
"Every numeric claim must trace back to a source URL or be marked unavailable.",
"Do not claim a generated image/file unless a registered chart renderer tool was actually used.",
"Prefer Markdown tables and chart-ready data over unsupported rendering claims."
],
"required_for_completion": true,
"block_downstream_on_partial": false,
"max_tool_iterations": 0,
"constraints": [
"No chart renderer is assumed.",
"No file/image artifact claims."
]
}
]
}
```
## When to Use
- The user asks to compare MGM China and Galaxy Entertainment financial performance.
- The user asks for a chart, chart-ready data, Markdown chart section, or board-style finance report.
- The task requires source-backed public financial data rather than generic market commentary.
## Required Tools
- `web_search`
- `web_fetch`
## Workflow
1. Collect official sources first: company investor relations pages, annual/interim reports, results announcements, and exchange filings.
2. Extract comparable metrics with period, currency, unit, and source URL.
3. Validate that metrics are comparable before drawing conclusions.
4. Produce a Markdown report with comparison table and chart-ready data.
5. If a real chart renderer tool is unavailable, say so implicitly by providing chart-ready data; do not claim an image or file was created.
## Validation
- Confirm each company has at least one official source.
- Confirm all numeric metrics carry period, currency, unit, and source URL.
- Confirm the final report does not contain claims such as “saved chart image”, “generated chart file”, or “attached chart artifact”.
## Boundaries
- Do not use private, paid, or login-only sources.
- Do not fabricate unavailable figures.
- Do not use high-risk write, terminal, email, or external-send tools.
- Do not create nested teams or role-based agents.
- Do not claim chart rendering unless the runtime exposes and actually uses a registered chart-renderer tool.

View File

@ -0,0 +1,257 @@
# Steven MGM/Galaxy Team Template Demo
## Target
Install `mgm-galaxy-financial-chart-report-safe` into Steven's Beaver workspace, then run one task that exercises:
```text
Planner
→ Skill Template selection
→ ExecutionGraph / ExecutionNode adaptation
→ Node Skill Binding
→ Team execution
→ Tool scope filtering
→ Evidence gate
→ Final synthesis complete/incomplete outcome
```
## Current environment status observed by Codex
The repository contains Steven's instance metadata:
```text
instance_id: steven
container_name: app-instance-steven
beaver_home: app-instance/runtime/instances/steven/beaver-home
workspace: app-instance/runtime/instances/steven/beaver-home/workspace
public_url: http://steven.172.19.0.245.nip.io:8088
```
Codex could not directly apply the skill to the live Steven instance in this session because:
```text
docker API: permission denied while connecting to /var/run/docker.sock
Steven workspace/skills parent dir: owned by nobody:nogroup and not writable by current user
local backend .venv: incomplete after uv environment rebuild; missing test/runtime dependencies
```
So this runbook is the exact artifact to apply from a shell with Docker or filesystem permission.
## Install Skill into Steven workspace
From repository root, run as a user that can write Steven's workspace:
```bash
SKILL_NAME=mgm-galaxy-financial-chart-report-safe
WORKSPACE=app-instance/runtime/instances/steven/beaver-home/workspace
SKILL_DIR="$WORKSPACE/skills/$SKILL_NAME"
mkdir -p "$SKILL_DIR/versions/v0001"
cp docs/superpowers/examples/mgm-galaxy-financial-chart-report-safe.SKILL.md \
"$SKILL_DIR/versions/v0001/SKILL.md"
python3 - <<'PY'
import hashlib
import json
from pathlib import Path
from datetime import datetime, timezone
skill_name = "mgm-galaxy-financial-chart-report-safe"
workspace = Path("app-instance/runtime/instances/steven/beaver-home/workspace")
skill_dir = workspace / "skills" / skill_name
skill_md = skill_dir / "versions" / "v0001" / "SKILL.md"
content = skill_md.read_text(encoding="utf-8")
digest = "sha256:" + hashlib.sha256(content.encode("utf-8")).hexdigest()
now = datetime.now(timezone.utc).isoformat()
(skill_dir / "current.json").write_text(
json.dumps({"current_version": "v0001"}, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
(skill_dir / "skill.json").write_text(
json.dumps(
{
"name": skill_name,
"display_name": "MGM/Galaxy Financial Chart Report Safe",
"description": "Compare MGM China and Galaxy Entertainment using official financial sources, produce chart-ready Markdown, and avoid claiming generated chart image/file artifacts.",
"created_at": now,
"updated_at": now,
"current_version": "v0001",
"status": "active",
"tags": ["finance", "research", "report", "chart-ready-data", "mgm", "galaxy"],
"owners": ["steven"],
"source_kind": "workspace",
"lineage": [],
},
indent=2,
ensure_ascii=False,
)
+ "\n",
encoding="utf-8",
)
(skill_dir / "versions" / "v0001" / "version.json").write_text(
json.dumps(
{
"skill_name": skill_name,
"version": "v0001",
"content_hash": digest,
"summary_hash": digest,
"created_at": now,
"created_by": "steven",
"change_reason": "Add real Skill Team Template example for MGM/Galaxy finance report demo",
"parent_version": None,
"review_state": "published",
"frontmatter": {
"name": skill_name,
"description": "Compare MGM China and Galaxy Entertainment using official financial sources, produce chart-ready Markdown, and avoid claiming generated chart image/file artifacts.",
"tools": ["web_search", "web_fetch"],
},
"summary": "MGM/Galaxy finance report skill with a task-only Beaver team template for official source collection, metric extraction, validation, and Markdown chart-ready reporting.",
"tool_hints": ["web_search", "web_fetch"],
"provenance": {"source_kind": "manual_demo", "target_instance": "steven"},
"tree_hash": "",
},
indent=2,
ensure_ascii=False,
)
+ "\n",
encoding="utf-8",
)
index_path = workspace / "skills" / "_index" / "published.json"
index_path.parent.mkdir(parents=True, exist_ok=True)
try:
payload = json.loads(index_path.read_text(encoding="utf-8"))
except FileNotFoundError:
payload = {"items": []}
items = [str(item) for item in payload.get("items", [])]
if skill_name not in items:
items.append(skill_name)
index_path.write_text(json.dumps({"items": items}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
PY
```
## Restart or start Steven container
If the container already exists:
```bash
docker restart app-instance-steven
```
If it does not exist, use the existing instance metadata and project scripts rather than creating a new instance id.
## Demo task prompt
Send this as Steven's user task:
```text
Use the MGM/Galaxy finance report skill to compare MGM China Holdings and Galaxy Entertainment using official public financial disclosures. Produce a concise board-style Markdown report with source URLs, a comparison table, chart-ready data, and a text/Mermaid chart section. Do not claim a generated image or saved chart file.
```
## Expected planning shape
The planner should produce a team DAG with these task nodes:
```text
collect_official_sources
→ extract_financial_metrics
→ validate_metrics
→ generate_chart_report
```
Expected node constraints:
```text
collect_official_sources.allowed_tool_names = ["web_search", "web_fetch"]
extract_financial_metrics.allowed_tool_names = ["web_fetch"]
validate_metrics.allowed_tool_names = []
generate_chart_report.allowed_tool_names = []
```
The created workers should remain generic:
```text
node.agent.role = ""
node.agent.metadata.sub_agent_kind = "generic_skill_worker"
```
## Expected complete outcome
If source collection and extraction produce required evidence:
```text
Planner
→ TeamRunResult with required nodes completion_status=succeeded
→ task_outcome=complete
→ tool-free final synthesis
→ final Markdown report
```
The final output may include:
```text
comparison table
chart-ready data
Mermaid
Markdown chart section
text bar chart fallback
final textual report
```
It must not claim:
```text
generated chart image
generated chart file
saved chart artifact
```
## Expected incomplete outcome
If official-source evidence is missing or web tools fail:
```text
collect_official_sources.completion_status=partial
→ evidence_gaps populated
→ because block_downstream_on_partial=true, downstream nodes are blocked
→ task_outcome=incomplete
→ tool-free final synthesis still runs
→ final answer is prefixed with an incomplete notice
```
The final response should explain which required evidence was missing instead of fabricating metrics.
## Verification queries
After running the task, inspect Steven's event log:
```bash
WORKSPACE=app-instance/runtime/instances/steven/beaver-home/workspace
tail -n 200 "$WORKSPACE/tasks/events.jsonl"
```
Look for:
```text
task_execution_planned
task_team_run_completed or task_team_run_failed
task_synthesis_completed
```
For `task_execution_planned`, verify:
```text
planner_adaptation.template_used = true
planner_adaptation.selected_template = mgm-galaxy-financial-chart-report-safe
node_ids include collect_official_sources/extract_financial_metrics/validate_metrics/generate_chart_report
```
For `task_synthesis_completed`, verify:
```text
task_outcome = complete | incomplete
incomplete_node_ids = [] for complete, otherwise populated
```

View File

@ -0,0 +1,316 @@
from __future__ import annotations
import asyncio
import json
from dataclasses import asdict
from pathlib import Path
from typing import Any
from beaver.engine import AgentLoop, EngineLoader
from beaver.engine.context import SkillContext
from beaver.engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
from beaver.engine.providers.factory import ProviderBundle, build_provider_runtime
from beaver.services.team_service import TeamService
from beaver.skills.catalog.loader import SkillsLoader
from beaver.skills.catalog.utils import strip_frontmatter
from beaver.skills.drafts import DraftService
from beaver.skills.specs import SkillSpecStore
from beaver.tasks.attempt_orchestrator import TaskAttemptOrchestrator
from beaver.tasks.models import TaskRecord
from beaver.tasks.planner import TaskExecutionPlanner
from beaver.tasks.skill_resolver import TaskSkillResolver
WORKSPACE = Path("/root/.beaver/workspace")
SKILL_NAME = "mgm-galaxy-financial-chart-report-safe"
def _text_from_messages(messages: list[dict[str, Any]]) -> str:
return "\n".join(str(message.get("content") or "") for message in messages)
def _tool_names(tools: list[dict[str, Any]] | None) -> list[str]:
names: list[str] = []
for tool in tools or []:
if "function" in tool:
names.append(str(tool["function"].get("name") or ""))
else:
names.append(str(tool.get("name") or ""))
return [name for name in names if name]
class DemoProvider(LLMProvider):
def __init__(self, *, collect_uses_tool: bool) -> None:
super().__init__()
self.collect_uses_tool = collect_uses_tool
self.calls: list[dict[str, Any]] = []
async def chat(
self,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
model: str | None = None,
max_tokens: int | None = None,
temperature: float = 0.0,
thinking_enabled: bool | None = None,
) -> LLMResponse:
text = _text_from_messages(messages)
names = _tool_names(tools)
self.calls.append(
{
"tool_names": names,
"has_tool_result": any(message.get("role") == "tool" for message in messages),
"text_preview": text[:300],
}
)
if "You choose whether an internal Beaver Task attempt" in text:
return LLMResponse(
content=json.dumps(_planner_json(), ensure_ascii=False),
provider_name="demo",
model="demo-model",
)
if "You select Beaver skills for a single run" in text:
return LLMResponse(content="[]", provider_name="demo", model="demo-model")
if "team:generate_chart_report" in text:
return LLMResponse(
content=(
"# MGM China vs Galaxy Entertainment Demo Report\n\n"
"| Company | Metric | Value | Source |\n"
"|---|---:|---:|---|\n"
"| MGM China | Revenue | demo value | upstream source |\n"
"| Galaxy Entertainment | Revenue | demo value | upstream source |\n\n"
"Chart-ready data is provided as Markdown. No image or saved chart file was generated."
),
provider_name="demo",
model="demo-model",
)
if "team:validate_metrics" in text:
return LLMResponse(
content="Validation complete: periods and units are labeled; no generated chart artifact is claimed.",
provider_name="demo",
model="demo-model",
)
if "team:extract_financial_metrics" in text:
return LLMResponse(
content=(
"Extracted demo metric table: MGM China revenue: source-backed placeholder; "
"Galaxy Entertainment revenue: source-backed placeholder. Currency, period, and source URL fields are labeled."
),
provider_name="demo",
model="demo-model",
)
if "team:collect_official_sources" in text:
if self.collect_uses_tool and "web_fetch" in names and not any(message.get("role") == "tool" for message in messages):
return LLMResponse(
content=None,
tool_calls=[
ToolCallRequest(
id="call_collect_fetch",
name="web_fetch",
arguments={
"url": "https://www.bing.com/search?q=MGM+China+Galaxy+Entertainment+annual+report",
"max_chars": 1000,
},
)
],
finish_reason="tool_calls",
provider_name="demo",
model="demo-model",
)
return LLMResponse(
content=(
"Collected official-source candidates for MGM China Holdings and Galaxy Entertainment. "
"Demo evidence includes a successful web_fetch tool result with URL captured by Beaver."
),
provider_name="demo",
model="demo-model",
)
return LLMResponse(content="Demo final synthesis.", provider_name="demo", model="demo-model")
def get_default_model(self) -> str:
return "demo-model"
def _planner_json() -> dict[str, Any]:
return {
"mode": "team",
"reason": "finance comparison benefits from staged source collection, extraction, validation, and reporting",
"strategy": "dag",
"nodes": [
{
"node_id": "collect_official_sources",
"task": "Collect official MGM China Holdings and Galaxy Entertainment financial disclosure sources for the requested period. Prefer annual reports, interim reports, results announcements, investor relations pages, and exchange filings. Return source URLs with short notes about period coverage.",
"use_skill": "web-operation",
"skill_query": "official financial disclosure web research",
"depends_on": [],
"requested_tools": ["web_search", "web_fetch"],
"required_evidence": ["tool_result", "url"],
"evidence_contract": {"version": 1, "entities": ["MGM China Holdings", "Galaxy Entertainment Group"]},
"required_for_completion": True,
"block_downstream_on_partial": True,
"max_tool_iterations": 2,
},
{
"node_id": "extract_financial_metrics",
"task": "Extract comparable financial metrics for MGM China Holdings and Galaxy Entertainment from the collected official sources. Include revenue or net revenue, adjusted EBITDA where available, net profit/loss where available, period, currency, unit, and source URL for each metric.",
"use_skill": "web-operation",
"skill_query": "financial metric extraction from official disclosures",
"depends_on": ["collect_official_sources"],
"requested_tools": ["web_fetch"],
"required_evidence": ["output"],
"evidence_contract": {"version": 1, "metrics": ["revenue", "adjusted_ebitda", "net_profit_or_loss"]},
"required_for_completion": True,
"block_downstream_on_partial": True,
"max_tool_iterations": 1,
},
{
"node_id": "validate_metrics",
"task": "Validate extracted metrics for source consistency, period alignment, currency/unit consistency, and obvious transcription errors. Produce a concise validation note and list any evidence gaps.",
"use_skill": "utility-tools",
"skill_query": "finance metric validation",
"depends_on": ["extract_financial_metrics"],
"requested_tools": [],
"required_evidence": ["output"],
"evidence_contract": {"version": 1, "checks": ["source_consistency", "period_alignment"]},
"required_for_completion": True,
"block_downstream_on_partial": True,
"max_tool_iterations": 0,
},
{
"node_id": "generate_chart_report",
"task": "Generate the final Markdown comparison report. Include an executive summary, source-backed comparison table, chart-ready data table, optional Mermaid or text bar chart section, and caveats. Do not claim that a chart image, chart file, or saved artifact was generated.",
"use_skill": "utility-tools",
"skill_query": "financial markdown report with chart-ready data",
"depends_on": ["validate_metrics"],
"requested_tools": [],
"required_evidence": ["output"],
"evidence_contract": {"version": 1, "outputs": ["comparison_table", "chart_ready_data", "markdown_report"]},
"required_for_completion": True,
"block_downstream_on_partial": False,
"max_tool_iterations": 0,
},
],
"adaptation": {"template_used": True},
"final_synthesis_instruction": "Synthesize node outputs into a concise Markdown finance report.",
}
def _task() -> TaskRecord:
return TaskRecord(
task_id="demo-task-mgm-galaxy",
session_id="web:demo-mgm-galaxy-harness",
description="Compare MGM China and Galaxy Entertainment using official public financial disclosures.",
goal="Compare MGM China and Galaxy Entertainment using official public financial disclosures.",
constraints=[],
priority=0,
status="open",
creator="demo",
created_at="demo",
updated_at="demo",
)
def _finance_skill_context(loader: SkillsLoader) -> SkillContext:
record = loader.get_skill_record(SKILL_NAME)
raw = loader.load_published_skill(SKILL_NAME)
if record is None or raw is None:
raise RuntimeError(f"missing published skill: {SKILL_NAME}")
return SkillContext(
name=record.name,
version=record.version,
content=strip_frontmatter(raw).strip(),
content_hash=record.content_hash or "",
activation_reason="demo_exact_skill",
tool_hints=list(record.tool_hints),
team_template=record.team_template,
team_template_warnings=list(record.team_template_warnings),
)
async def _run_case(*, collect_uses_tool: bool) -> dict[str, Any]:
loader = SkillsLoader(WORKSPACE)
store = SkillSpecStore(WORKSPACE)
runtime = build_provider_runtime(model="demo-model", provider_name="custom", api_key="demo", api_base="http://demo.invalid/v1")
provider = DemoProvider(collect_uses_tool=collect_uses_tool)
bundle = ProviderBundle(main_runtime=runtime, main_provider=provider)
engine_loader = EngineLoader(workspace=WORKSPACE)
loop = AgentLoop(loader=engine_loader)
loaded = loop.boot()
resolver = TaskSkillResolver(skills_loader=loader, draft_service=DraftService(store))
planner = TaskExecutionPlanner(task_skill_resolver=resolver, tool_registry=loaded.tool_registry)
task = _task()
skill_context = _finance_skill_context(loader)
plan = await planner.plan(
task=task,
user_message=task.description,
attempt_index=1,
provider_bundle=bundle,
activated_skills=[skill_context],
timeout_seconds=5.0,
)
team_result = None
if plan.is_team:
team_result = await TeamService(loop).run_team(
plan.graph,
parent_task_id=None,
parent_session_id=task.session_id,
provider_bundle_factory=lambda node: bundle,
inherited_pinned_skill_contexts=[skill_context],
)
context, prefix, metadata = TaskAttemptOrchestrator._team_synthesis_outcome(plan, team_result, prompt_locale="en")
return {
"case": "complete" if collect_uses_tool else "incomplete",
"plan_mode": plan.mode,
"plan_reason": plan.reason,
"planner_adaptation": plan.planner_adaptation,
"node_ids": [node.node_id for node in plan.graph.nodes] if plan.graph else [],
"node_tool_scopes": {node.node_id: node.allowed_tool_names for node in plan.graph.nodes} if plan.graph else {},
"node_skill_bindings": [
{
"node_id": node.node_id,
"pinned_skill_names": node.inherited_pinned_skills,
"pinned_skill_contexts": [skill.name for skill in node.inherited_pinned_skill_contexts],
"role": node.agent.role,
"sub_agent_kind": node.agent.metadata.get("sub_agent_kind"),
"exact_binding_used": node.agent.metadata.get("exact_binding_used"),
}
for node in (plan.graph.nodes if plan.graph else [])
],
"team_success": team_result.success if team_result else None,
"team_summary": team_result.summary if team_result else None,
"team_run_ids": team_result.run_ids if team_result else [],
"node_results": [
{
"node_id": result.node_id,
"success": result.success,
"completion_status": result.completion_status,
"finish_reason": result.finish_reason,
"evidence_gaps": result.evidence_gaps,
"output_preview": result.output_text[:180],
}
for result in (team_result.node_results if team_result else [])
],
"synthesis_metadata": metadata,
"incomplete_prefix_present": bool(prefix),
"outcome_context_preview": context[:600],
"provider_calls": provider.calls,
}
async def main() -> None:
results = [
await _run_case(collect_uses_tool=True),
await _run_case(collect_uses_tool=False),
]
print(json.dumps(results, ensure_ascii=False, indent=2, default=str))
if __name__ == "__main__":
asyncio.run(main())