feat(task): 添加任务修订功能和超时处理机制

添加了 `revise_task` 路由动作类型,允许用户修改、纠正或重新执行最新活动任务结果。
实现了工具失败指导原则,防止相同类别工具重复失败。
为任务规划器添加了超时处理机制,避免长时间等待。

BREAKING CHANGE: 任务路由逻辑已更新,新增 `revise_task` 动作类型。

fix(api): 修复任务详情API返回完整流程投影

修复了任务详情API端点,现在会包含过滤后的流程运行、事件和工件信息,
并确保时间戳字段正确序列化。

refactor(engine): 优化任务技能解析器摘要节点处理

改进了任务技能解析器对摘要节点的处理逻辑,对于仅依赖文本生成功能的摘要节
点不再分配具体技能,直接使用依赖项输出进行汇总。

test: 增加任务修订和超时处理测试用例

添加了测试用例验证任务修订输入记录反馈、超时回退到单模式以及
摘要节点技能解析等新功能。
This commit is contained in:
2026-05-21 16:40:44 +08:00
parent 0caca8db8a
commit a27560102b
22 changed files with 855 additions and 93 deletions

View File

@ -78,3 +78,81 @@ def test_task_delete_api_removes_backend_task(tmp_path: Path) -> None:
assert deleted.json()["task_id"] == task.task_id
assert all(item["task_id"] != task.task_id for item in listed.json())
assert missing.status_code == 404
def test_task_detail_api_includes_filtered_process_projection(tmp_path: Path) -> None:
service = AgentService(workspace=tmp_path)
loaded = service.create_loop().boot()
task = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:detail",
description="补充赛事数据",
)
other_task = loaded.task_service.create_task( # type: ignore[union-attr]
session_id="web:detail",
description="不相关任务",
)
loaded.session_manager.append_message(
"web:detail",
role="system",
event_type="task_execution_planned",
event_payload={
"task_id": task.task_id,
"attempt_index": 2,
"plan_mode": "team",
"strategy": "parallel",
"node_ids": ["search_match_result", "search_match_stats"],
"reason": "needs separate evidence gathering",
},
context_visible=False,
)
loaded.session_manager.append_message(
"web:detail",
role="system",
event_type="task_team_run_failed",
event_payload={
"task_id": task.task_id,
"attempt_index": 2,
"plan_mode": "team",
"strategy": "parallel",
"team_success": False,
"team_run_ids": ["sub-run"],
"node_results": [
{
"node_id": "search_match_stats",
"success": False,
"output_text": "",
"run_id": "sub-run",
"finish_reason": "max_tool_iterations",
"error": "max_tool_iterations",
}
],
"error": "one or more team nodes failed",
},
context_visible=False,
)
loaded.session_manager.append_message(
"web:detail",
role="system",
event_type="task_execution_planned",
event_payload={
"task_id": other_task.task_id,
"attempt_index": 1,
"plan_mode": "single",
"strategy": None,
"node_ids": [],
},
context_visible=False,
)
app = create_app(service=service, manage_service_lifecycle=False)
with TestClient(app) as client:
response = client.get(f"/api/tasks/{task.task_id}")
assert response.status_code == 200
payload = response.json()
assert [run["run_id"] for run in payload["process_runs"]] == [
f"task:{task.task_id}:attempt:2",
"sub-run",
]
assert {event["actor_name"] for event in payload["process_events"]} == {"Task Planner", "Task Team", "search_match_stats"}
assert all(event["metadata"]["task_id"] == task.task_id for event in payload["process_events"])

View File

@ -103,6 +103,20 @@ def test_router_continues_active_task_from_llm_decision() -> None:
assert provider.calls[0]["max_tokens"] == 256
def test_router_marks_revision_from_llm_decision() -> None:
decision = asyncio.run(
MainAgentRouter().classify(
"再详细一点,并加上表格",
active_task=_task(),
provider=RouterProvider('{"action":"revise_task","reason":"user requested changes","short_title":"任务连续性"}'),
)
)
assert decision.is_task
assert decision.starts_new_task is False
assert decision.action == "revise_task"
def test_router_receives_thinking_mode() -> None:
provider = RouterProvider('{"action":"simple_chat","reason":"simple"}')
decision = asyncio.run(

View File

@ -0,0 +1,12 @@
from beaver.engine.session.models import MessageRecord
def test_conversation_message_preserves_timestamp() -> None:
record = MessageRecord(
role="user",
content="hello",
timestamp=1_779_329_600.0,
message_id=42,
)
assert record.to_conversation_message()["timestamp"] == 1_779_329_600.0

View File

@ -27,6 +27,22 @@ class PlannerProvider(LLMProvider):
return "stub-model"
class HangingPlannerProvider(LLMProvider):
async def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> LLMResponse:
await asyncio.sleep(10)
return LLMResponse(content='{"mode":"team"}', finish_reason="stop", provider_name="stub", model="stub-model")
def get_default_model(self) -> str:
return "stub-model"
def _task() -> TaskRecord:
return TaskRecord(
task_id="task-1",
@ -49,6 +65,13 @@ def _bundle(response: str) -> ProviderBundle:
)
def _hanging_bundle() -> ProviderBundle:
return ProviderBundle(
main_runtime=SimpleNamespace(model="stub-model", provider_name="stub"),
main_provider=HangingPlannerProvider(),
)
def test_planner_selects_single_mode() -> None:
plan = asyncio.run(
TaskExecutionPlanner().plan(
@ -95,6 +118,22 @@ def test_planner_builds_team_graph() -> None:
assert plan.final_synthesis_instruction == "merge the findings"
def test_planner_timeout_falls_back_to_single() -> None:
plan = asyncio.run(
TaskExecutionPlanner().plan(
task=_task(),
user_message="implement workflow",
attempt_index=1,
provider_bundle=_hanging_bundle(),
timeout_seconds=0.01,
)
)
assert plan.mode == "single"
assert plan.reason == "planner_failed"
assert "TimeoutError" in (plan.fallback_error or "")
def test_planner_team_nodes_can_target_skills_without_agent_roles() -> None:
plan = TaskExecutionPlanner().from_json(
"""

View File

@ -290,6 +290,109 @@ def test_active_task_continues_until_llm_closes_it(tmp_path: Path) -> None:
assert loaded.task_service.active_task_view("web:continue") is None
def test_active_task_revision_input_records_feedback_and_reruns(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:revise-direct",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
second = asyncio.run(
service.process_direct(
"再详细一点,并加上明后天穿衣建议",
session_id="web:revise-direct",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
messages = loaded.session_manager.get_messages_as_conversation(first.session_id)
first_assistant = [
message
for message in messages
if message.get("role") == "assistant" and message.get("run_id") == first.run_id
][-1]
user_messages = [message.get("content") for message in messages if message.get("role") == "user"]
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert task.feedback == [
{
"feedback_type": "revise",
"comment": "再详细一点,并加上明后天穿衣建议",
"run_id": first.run_id,
"created_at": task.feedback[0]["created_at"],
}
]
assert first_assistant["feedback_state"] == "revise"
assert "再详细一点,并加上明后天穿衣建议" in user_messages
def test_explicit_revision_feedback_then_input_reruns_without_duplicate_feedback(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
workspace=tmp_path,
task_execution_planner=_single_planner(),
validation_service=StubValidationService(
[
ValidationResult(passed=True, score=0.9, validator="test"),
ValidationResult(passed=True, score=0.95, validator="test"),
]
),
)
)
first = asyncio.run(
service.process_direct(
"查询珠海天气",
session_id="web:explicit-revise",
provider_bundle=_bundle("珠海天气概览", route_action="new_task"),
)
)
feedback = asyncio.run(
service.submit_feedback(
session_id=first.session_id,
run_id=first.run_id,
feedback_type="revise",
comment="准备补充穿衣建议",
)
)
second = asyncio.run(
service.process_direct(
"加上明后天穿衣建议",
session_id="web:explicit-revise",
provider_bundle=_bundle("更新后的珠海天气和穿衣建议", route_action="revise_task"),
)
)
loaded = service.create_loop().boot()
task = loaded.task_service.get_task(first.task_id)
assert feedback["task_status"] == "needs_revision"
assert second.task_id == first.task_id
assert task is not None
assert task.status == "awaiting_feedback"
assert len(task.run_ids) == 2
assert len(task.feedback) == 1
assert task.feedback[0]["feedback_type"] == "revise"
assert task.feedback[0]["comment"] == "准备补充穿衣建议"
def test_validation_failure_retries_once(tmp_path: Path) -> None:
service = AgentService(
loader=EngineLoader(
@ -545,6 +648,8 @@ def test_task_mode_team_failure_still_uses_main_synthesis(tmp_path: Path) -> Non
assert result.output_text == "fallback synthesized answer"
assert any(event.event_type == "task_team_run_failed" for event in events)
assert "sub-agent unavailable" in main_provider.calls[0][0]["content"]
assert "same class of tools fails repeatedly" in main_provider.calls[0][0]["content"]
assert "user-visible fallback answer" in main_provider.calls[0][0]["content"]
def test_task_mode_team_retry_hides_first_synthesis_run(tmp_path: Path) -> None:

View File

@ -65,8 +65,8 @@ def _publish_skill(workspace: Path, *, skill_name: str) -> None:
store = SkillSpecStore(workspace)
draft = DraftService(store).create_new_skill_draft(
skill_name=skill_name,
proposed_content="# API Contract Review\n\nCheck schema compatibility and breaking changes.",
proposed_frontmatter={"description": "API contract compatibility review", "tools": []},
proposed_content=f"# {skill_name}\n\nCheck schema compatibility and breaking changes.",
proposed_frontmatter={"description": f"{skill_name} capability", "tools": []},
created_by="tester",
reason="test",
)
@ -174,3 +174,51 @@ def test_task_skill_resolver_generates_ephemeral_guidance_when_missing(tmp_path:
assert reports[0].ephemeral_guidance_id is not None
assert reports[0].ephemeral_guidance_name == "api-compatibility-review"
assert reports[0].ephemeral_used is True
def test_task_skill_resolver_keeps_summary_nodes_skillless(tmp_path: Path) -> None:
_publish_skill(tmp_path, skill_name="multi-search-engine")
provider = RecordingProvider(['["multi-search-engine"]'])
resolver = TaskSkillResolver(
skills_loader=SkillsLoader(tmp_path),
draft_service=DraftService(SkillSpecStore(tmp_path)),
)
graph = ExecutionGraph(
strategy="dag",
nodes=[
ExecutionNode(
"summarize",
"Compile a clear, concise summary from dependency outputs for the user.",
AgentDescriptor(
name="summarize",
metadata={
"skill_query": "Summarization",
"required_capabilities": ["text generation"],
},
),
depends_on=["verify_result"],
inherited_pinned_skills=["multi-search-engine"],
inherited_pinned_skill_contexts=[
SkillContext(name="ephemeral:search-guidance", content="Search again.")
],
)
],
)
resolved, reports = asyncio.run(
resolver.resolve_graph(
graph,
task=_task(),
user_message="summarize result",
attempt_index=2,
provider_bundle=_bundle(provider),
)
)
assert resolved.nodes[0].inherited_pinned_skills == []
assert resolved.nodes[0].inherited_pinned_skill_contexts == []
assert resolved.nodes[0].agent.metadata["selected_skill_names"] == []
assert reports[0].selected_skill_names == []
assert reports[0].ephemeral_used is False
assert reports[0].reason == "summary node uses dependency outputs directly"
assert provider.calls == []