Files
memory-gateway/skills/summarize_case_skill/generate_case_note.py

347 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Generate an Obsidian case note from a normalized SOC case JSON file."""
from __future__ import annotations
import argparse
import asyncio
import json
from pathlib import Path
from typing import Any
from skills.retrieve_context_skill.retrieve_context import RetrievalQuery, retrieve_context_openviking
def _load_case(path: str | Path) -> dict[str, Any]:
with Path(path).open("r", encoding="utf-8") as f:
return json.load(f)
def _extract_alert_type(patterns: list[str]) -> str:
for pattern in patterns:
if pattern.startswith("alert_type:"):
return pattern.split(":", 1)[1]
return "unknown"
def _verdict_label(verdict: str) -> str:
mapping = {
"true_positive": "真报",
"false_positive": "误报",
"suspicious": "可疑待定",
}
return mapping.get(verdict, verdict or "未知")
def _join_values(values: list[str]) -> str:
return ", ".join(values) if values else ""
def _bullet_lines(values: list[str], default: str = "- 无") -> str:
if not values:
return default
return "\n".join(f"- {value}" for value in values)
def _wikilinks(values: list[str]) -> str:
if not values:
return ""
return ", ".join(f"[[{value}]]" for value in values)
def _uri_to_id(uri: str) -> str:
name = uri.rstrip("/").rsplit("/", 1)[-1]
if name.endswith(".json"):
name = name[:-5]
return name
def _derive_process_summary(item: dict[str, Any]) -> list[str]:
steps: list[str] = []
if item.get("abstract"):
steps.append(f"确认告警场景与核心风险:{item['abstract']}")
if item.get("evidence"):
steps.append(f"提取关键证据并交叉验证:{item['evidence'][0]}")
related = item.get("related_refs", {})
if related.get("playbooks") or related.get("kb"):
steps.append("对照关联 playbook / KB 复核告警模式与处置路径。")
if item.get("verdict"):
steps.append(f"基于关键证据与场景模式完成结论判定:{_verdict_label(item['verdict'])}")
return steps[:4]
def _derive_disposition(item: dict[str, Any]) -> list[str]:
verdict = item.get("verdict", "")
evidence = item.get("evidence", [])
lines: list[str] = []
if verdict:
lines.append(f"结论为{_verdict_label(verdict)}")
if evidence:
lines.append(f"最关键依据:{evidence[0]}")
if len(evidence) > 1:
lines.append(f"补充依据:{evidence[1]}")
return lines
def _derive_actions(item: dict[str, Any]) -> list[str]:
scenario = item.get("scenario", "")
verdict = item.get("verdict", "")
actions: list[str] = []
if scenario == "phishing":
actions.extend([
"隔离相同主题、发件人或 URL 的邮件样本。",
"核查用户是否点击或提交凭据,并按需执行凭据重置。",
])
elif scenario == "o365_suspicious_login":
actions.extend([
"复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。",
"若存在账号接管迹象,立即执行会话失效和凭据重置。",
])
else:
actions.append("结合关联 playbook 执行后续处置。")
if verdict == "false_positive":
actions = ["记录误报原因,并更新检测例外或抑制条件。"]
return actions
def _derive_reusable_patterns(item: dict[str, Any]) -> tuple[list[str], list[str], list[str]]:
patterns = item.get("patterns", [])
tags = item.get("tags", [])
hit_patterns = [pattern for pattern in patterns if not pattern.startswith("verdict:")]
false_positive_traits = []
variants = []
if item.get("verdict") == "false_positive":
false_positive_traits.append("本案最终确认为误报,可用于补充抑制条件。")
if tags:
variants.append("相关标签:" + ", ".join(tags))
return hit_patterns or [""], false_positive_traits or [""], variants or [""]
async def _fetch_openviking_recommendations(item: dict[str, Any], top_k: int = 3) -> dict[str, list[dict[str, Any]]]:
query = RetrievalQuery(
scenario=item.get("scenario", "general"),
alert_type=_extract_alert_type(item.get("patterns", [])),
summary=item.get("abstract", ""),
observables=item.get("observables"),
top_k=top_k + 1,
)
result = await retrieve_context_openviking(query)
case_entries: list[dict[str, Any]] = []
for entry in result.get("matched_cases", []):
candidate_id = _uri_to_id(entry.get("uri", ""))
if candidate_id == item.get("id"):
continue
case_entries.append(
{
"id": candidate_id,
"score": round(float(entry.get("score") or 0), 3),
"abstract": entry.get("abstract", ""),
}
)
if len(case_entries) >= top_k:
break
knowledge_entries: list[dict[str, Any]] = []
for entry in result.get("matched_knowledge", []):
knowledge_entries.append(
{
"id": _uri_to_id(entry.get("uri", "")),
"score": round(float(entry.get("score") or 0), 3),
"abstract": entry.get("abstract", ""),
}
)
if len(knowledge_entries) >= top_k:
break
return {
"cases": case_entries,
"knowledge": knowledge_entries,
}
def _merge_unique(primary: list[str], secondary: list[str]) -> list[str]:
merged: list[str] = []
for value in primary + secondary:
if value and value not in merged:
merged.append(value)
return merged
def _recommendation_lines(entries: list[dict[str, Any]], prefix: str) -> list[str]:
lines: list[str] = []
for entry in entries:
abstract = entry.get("abstract", "")
abstract = abstract[:140] + "..." if len(abstract) > 140 else abstract
lines.append(f"[[{entry['id']}]] ({prefix} score={entry['score']}) {abstract}")
return lines
def render_case_note(item: dict[str, Any], recommendations: dict[str, list[dict[str, Any]]] | None = None) -> str:
case_id = item["id"]
title = item.get("title", case_id)
alert_type = _extract_alert_type(item.get("patterns", []))
severity = item.get("severity", "unknown")
verdict = _verdict_label(item.get("verdict", ""))
entities = item.get("entities", {})
observables = item.get("observables", {})
related = item.get("related_refs", {})
recommendations = recommendations or {"cases": [], "knowledge": []}
recommended_cases = [entry["id"] for entry in recommendations.get("cases", [])]
recommended_knowledge = [entry["id"] for entry in recommendations.get("knowledge", [])]
merged_cases = _merge_unique(related.get("cases", []), recommended_cases)
playbooks = related.get("playbooks", [])
kb_items = related.get("kb", [])
for knowledge_id in recommended_knowledge:
if knowledge_id.startswith("PB-"):
playbooks = _merge_unique(playbooks, [knowledge_id])
else:
kb_items = _merge_unique(kb_items, [knowledge_id])
process_summary = _derive_process_summary(item)
disposition = _derive_disposition(item)
actions = _derive_actions(item)
hit_patterns, false_positive_traits, variants = _derive_reusable_patterns(item)
tags = ["#case", f"#scenario/{item.get('scenario', 'general')}", f"#alert/{alert_type}"]
if item.get("verdict"):
tags.append(f"#verdict/{item['verdict'].replace('_', '-')}")
tags.extend(f"#{tag}" for tag in item.get("tags", []))
recommendation_case_lines = _recommendation_lines(recommendations.get("cases", []), "case")
recommendation_knowledge_lines = _recommendation_lines(recommendations.get("knowledge", []), "knowledge")
lines = [
"---",
f"case_id: {case_id}",
f"scenario: {item.get('scenario', 'general')}",
f"alert_type: {alert_type}",
f"severity: {severity}",
f"verdict: {item.get('verdict', 'unknown')}",
"source: soc-memory-poc",
f"openviking_enriched: {'true' if recommendation_case_lines or recommendation_knowledge_lines else 'false'}",
"---",
"",
f"# {case_id} {title}",
"",
"## 基本信息",
"",
f"- Case ID: {case_id}",
f"- 标题: {title}",
f"- 告警类型: {alert_type}",
f"- 来源系统: SOC Memory POC Mock Dataset",
f"- 时间范围: 待补充",
f"- 研判人 / Agent: AI Agent Draft",
f"- 最终结论: {verdict}",
f"- 严重等级: {severity}",
"",
"## 告警摘要",
"",
item.get("abstract", ""),
"",
"## 关键实体",
"",
f"- 用户: {_join_values(entities.get('users', []))}",
f"- 主机: {_join_values(entities.get('hosts', []))}",
f"- 邮箱: {_join_values(entities.get('mailboxes', []))}",
f"- IP: {_join_values(observables.get('ips', []))}",
f"- 域名: {_join_values(observables.get('domains', []))}",
f"- 文件 Hash: {_join_values(observables.get('hashes', []))}",
f"- 其他 IOC: {_join_values(observables.get('urls', []) + observables.get('sender_emails', []))}",
"",
"## 关键证据",
"",
_bullet_lines(item.get("evidence", [])),
"",
"## 研判过程摘要",
"",
"\n".join(f"{index}. {step}" for index, step in enumerate(process_summary, start=1)),
"",
"## 结论依据",
"",
_bullet_lines(disposition),
"",
"## 处置建议",
"",
_bullet_lines(actions),
"",
"## 可复用模式",
"",
f"- 命中模式: {_join_values(hit_patterns)}",
f"- 误报特征: {_join_values(false_positive_traits)}",
f"- 需关注的变体: {_join_values(variants)}",
"",
"## 关联知识",
"",
f"- 关联 Playbook: {_wikilinks(playbooks)}",
f"- 关联 KB: {_wikilinks(kb_items)}",
f"- 关联历史 Case: {_wikilinks(merged_cases)}",
f"- 关联实体: {_wikilinks(entities.get('users', []) + entities.get('hosts', []))}",
"",
"## 自动关联推荐",
"",
"### 推荐历史 Case",
"",
_bullet_lines(recommendation_case_lines),
"",
"### 推荐知识条目",
"",
_bullet_lines(recommendation_knowledge_lines),
"",
"## Lessons Learned",
"",
"- 本案可沉淀为后续同类告警的快速判定参考。",
"- 若后续出现相同 lure、同类登录模式或相同关键证据应优先联想本案与关联知识。",
"",
"## 标签",
"",
_bullet_lines(tags),
"",
]
return "\n".join(lines)
def build_output_path(item: dict[str, Any], output_dir: str | Path) -> Path:
scenario = item.get("scenario", "general")
case_id = item["id"]
safe_title = item.get("title", case_id).replace("/", "-")
return Path(output_dir) / scenario / f"{case_id} - {safe_title}.md"
async def generate_case_note_async(
input_path: str | Path,
output_dir: str | Path,
enrich_from_openviking: bool = False,
top_k: int = 3,
) -> Path:
item = _load_case(input_path)
recommendations: dict[str, list[dict[str, Any]]] | None = None
if enrich_from_openviking:
recommendations = await _fetch_openviking_recommendations(item, top_k=top_k)
output_path = build_output_path(item, output_dir)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(render_case_note(item, recommendations=recommendations), encoding="utf-8")
return output_path
def main() -> None:
parser = argparse.ArgumentParser(description="Generate an Obsidian case note from a normalized case JSON file.")
parser.add_argument("--input", required=True, help="Normalized case JSON path")
parser.add_argument("--output-dir", default="obsidian-vault/02_Cases", help="Obsidian cases output directory")
parser.add_argument("--enrich-from-openviking", action="store_true", help="Retrieve related cases and knowledge from OpenViking")
parser.add_argument("--top-k", type=int, default=3, help="Number of OpenViking recommendations per type")
args = parser.parse_args()
output_path = asyncio.run(
generate_case_note_async(
args.input,
args.output_dir,
enrich_from_openviking=args.enrich_from_openviking,
top_k=args.top_k,
)
)
print(output_path)
if __name__ == "__main__":
main()