"""Generate an Obsidian case note from a normalized SOC case JSON file.""" from __future__ import annotations import argparse import asyncio import json from pathlib import Path from typing import Any from skills.retrieve_context_skill.retrieve_context import RetrievalQuery, retrieve_context_openviking def _load_case(path: str | Path) -> dict[str, Any]: with Path(path).open("r", encoding="utf-8") as f: return json.load(f) def _extract_alert_type(patterns: list[str]) -> str: for pattern in patterns: if pattern.startswith("alert_type:"): return pattern.split(":", 1)[1] return "unknown" def _verdict_label(verdict: str) -> str: mapping = { "true_positive": "真报", "false_positive": "误报", "suspicious": "可疑待定", } return mapping.get(verdict, verdict or "未知") def _join_values(values: list[str]) -> str: return ", ".join(values) if values else "无" def _bullet_lines(values: list[str], default: str = "- 无") -> str: if not values: return default return "\n".join(f"- {value}" for value in values) def _wikilinks(values: list[str]) -> str: if not values: return "无" return ", ".join(f"[[{value}]]" for value in values) def _uri_to_id(uri: str) -> str: name = uri.rstrip("/").rsplit("/", 1)[-1] if name.endswith(".json"): name = name[:-5] return name def _derive_process_summary(item: dict[str, Any]) -> list[str]: steps: list[str] = [] if item.get("abstract"): steps.append(f"确认告警场景与核心风险:{item['abstract']}") if item.get("evidence"): steps.append(f"提取关键证据并交叉验证:{item['evidence'][0]}") related = item.get("related_refs", {}) if related.get("playbooks") or related.get("kb"): steps.append("对照关联 playbook / KB 复核告警模式与处置路径。") if item.get("verdict"): steps.append(f"基于关键证据与场景模式完成结论判定:{_verdict_label(item['verdict'])}。") return steps[:4] def _derive_disposition(item: dict[str, Any]) -> list[str]: verdict = item.get("verdict", "") evidence = item.get("evidence", []) lines: list[str] = [] if verdict: lines.append(f"结论为{_verdict_label(verdict)}。") if evidence: lines.append(f"最关键依据:{evidence[0]}") if len(evidence) > 1: lines.append(f"补充依据:{evidence[1]}") return lines def _derive_actions(item: dict[str, Any]) -> list[str]: scenario = item.get("scenario", "") verdict = item.get("verdict", "") actions: list[str] = [] if scenario == "phishing": actions.extend([ "隔离相同主题、发件人或 URL 的邮件样本。", "核查用户是否点击或提交凭据,并按需执行凭据重置。", ]) elif scenario == "o365_suspicious_login": actions.extend([ "复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。", "若存在账号接管迹象,立即执行会话失效和凭据重置。", ]) else: actions.append("结合关联 playbook 执行后续处置。") if verdict == "false_positive": actions = ["记录误报原因,并更新检测例外或抑制条件。"] return actions def _derive_reusable_patterns(item: dict[str, Any]) -> tuple[list[str], list[str], list[str]]: patterns = item.get("patterns", []) tags = item.get("tags", []) hit_patterns = [pattern for pattern in patterns if not pattern.startswith("verdict:")] false_positive_traits = [] variants = [] if item.get("verdict") == "false_positive": false_positive_traits.append("本案最终确认为误报,可用于补充抑制条件。") if tags: variants.append("相关标签:" + ", ".join(tags)) return hit_patterns or ["无"], false_positive_traits or ["无"], variants or ["无"] async def _fetch_openviking_recommendations(item: dict[str, Any], top_k: int = 3) -> dict[str, list[dict[str, Any]]]: query = RetrievalQuery( scenario=item.get("scenario", "general"), alert_type=_extract_alert_type(item.get("patterns", [])), summary=item.get("abstract", ""), observables=item.get("observables"), top_k=top_k + 1, ) result = await retrieve_context_openviking(query) case_entries: list[dict[str, Any]] = [] for entry in result.get("matched_cases", []): candidate_id = _uri_to_id(entry.get("uri", "")) if candidate_id == item.get("id"): continue case_entries.append( { "id": candidate_id, "score": round(float(entry.get("score") or 0), 3), "abstract": entry.get("abstract", ""), } ) if len(case_entries) >= top_k: break knowledge_entries: list[dict[str, Any]] = [] for entry in result.get("matched_knowledge", []): knowledge_entries.append( { "id": _uri_to_id(entry.get("uri", "")), "score": round(float(entry.get("score") or 0), 3), "abstract": entry.get("abstract", ""), } ) if len(knowledge_entries) >= top_k: break return { "cases": case_entries, "knowledge": knowledge_entries, } def _merge_unique(primary: list[str], secondary: list[str]) -> list[str]: merged: list[str] = [] for value in primary + secondary: if value and value not in merged: merged.append(value) return merged def _recommendation_lines(entries: list[dict[str, Any]], prefix: str) -> list[str]: lines: list[str] = [] for entry in entries: abstract = entry.get("abstract", "") abstract = abstract[:140] + "..." if len(abstract) > 140 else abstract lines.append(f"[[{entry['id']}]] ({prefix} score={entry['score']}) {abstract}") return lines def render_case_note(item: dict[str, Any], recommendations: dict[str, list[dict[str, Any]]] | None = None) -> str: case_id = item["id"] title = item.get("title", case_id) alert_type = _extract_alert_type(item.get("patterns", [])) severity = item.get("severity", "unknown") verdict = _verdict_label(item.get("verdict", "")) entities = item.get("entities", {}) observables = item.get("observables", {}) related = item.get("related_refs", {}) recommendations = recommendations or {"cases": [], "knowledge": []} recommended_cases = [entry["id"] for entry in recommendations.get("cases", [])] recommended_knowledge = [entry["id"] for entry in recommendations.get("knowledge", [])] merged_cases = _merge_unique(related.get("cases", []), recommended_cases) playbooks = related.get("playbooks", []) kb_items = related.get("kb", []) for knowledge_id in recommended_knowledge: if knowledge_id.startswith("PB-"): playbooks = _merge_unique(playbooks, [knowledge_id]) else: kb_items = _merge_unique(kb_items, [knowledge_id]) process_summary = _derive_process_summary(item) disposition = _derive_disposition(item) actions = _derive_actions(item) hit_patterns, false_positive_traits, variants = _derive_reusable_patterns(item) tags = ["#case", f"#scenario/{item.get('scenario', 'general')}", f"#alert/{alert_type}"] if item.get("verdict"): tags.append(f"#verdict/{item['verdict'].replace('_', '-')}") tags.extend(f"#{tag}" for tag in item.get("tags", [])) recommendation_case_lines = _recommendation_lines(recommendations.get("cases", []), "case") recommendation_knowledge_lines = _recommendation_lines(recommendations.get("knowledge", []), "knowledge") lines = [ "---", f"case_id: {case_id}", f"scenario: {item.get('scenario', 'general')}", f"alert_type: {alert_type}", f"severity: {severity}", f"verdict: {item.get('verdict', 'unknown')}", "source: soc-memory-poc", f"openviking_enriched: {'true' if recommendation_case_lines or recommendation_knowledge_lines else 'false'}", "---", "", f"# {case_id} {title}", "", "## 基本信息", "", f"- Case ID: {case_id}", f"- 标题: {title}", f"- 告警类型: {alert_type}", f"- 来源系统: SOC Memory POC Mock Dataset", f"- 时间范围: 待补充", f"- 研判人 / Agent: AI Agent Draft", f"- 最终结论: {verdict}", f"- 严重等级: {severity}", "", "## 告警摘要", "", item.get("abstract", "无"), "", "## 关键实体", "", f"- 用户: {_join_values(entities.get('users', []))}", f"- 主机: {_join_values(entities.get('hosts', []))}", f"- 邮箱: {_join_values(entities.get('mailboxes', []))}", f"- IP: {_join_values(observables.get('ips', []))}", f"- 域名: {_join_values(observables.get('domains', []))}", f"- 文件 Hash: {_join_values(observables.get('hashes', []))}", f"- 其他 IOC: {_join_values(observables.get('urls', []) + observables.get('sender_emails', []))}", "", "## 关键证据", "", _bullet_lines(item.get("evidence", [])), "", "## 研判过程摘要", "", "\n".join(f"{index}. {step}" for index, step in enumerate(process_summary, start=1)), "", "## 结论依据", "", _bullet_lines(disposition), "", "## 处置建议", "", _bullet_lines(actions), "", "## 可复用模式", "", f"- 命中模式: {_join_values(hit_patterns)}", f"- 误报特征: {_join_values(false_positive_traits)}", f"- 需关注的变体: {_join_values(variants)}", "", "## 关联知识", "", f"- 关联 Playbook: {_wikilinks(playbooks)}", f"- 关联 KB: {_wikilinks(kb_items)}", f"- 关联历史 Case: {_wikilinks(merged_cases)}", f"- 关联实体: {_wikilinks(entities.get('users', []) + entities.get('hosts', []))}", "", "## 自动关联推荐", "", "### 推荐历史 Case", "", _bullet_lines(recommendation_case_lines), "", "### 推荐知识条目", "", _bullet_lines(recommendation_knowledge_lines), "", "## Lessons Learned", "", "- 本案可沉淀为后续同类告警的快速判定参考。", "- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。", "", "## 标签", "", _bullet_lines(tags), "", ] return "\n".join(lines) def build_output_path(item: dict[str, Any], output_dir: str | Path) -> Path: scenario = item.get("scenario", "general") case_id = item["id"] safe_title = item.get("title", case_id).replace("/", "-") return Path(output_dir) / scenario / f"{case_id} - {safe_title}.md" async def generate_case_note_async( input_path: str | Path, output_dir: str | Path, enrich_from_openviking: bool = False, top_k: int = 3, ) -> Path: item = _load_case(input_path) recommendations: dict[str, list[dict[str, Any]]] | None = None if enrich_from_openviking: recommendations = await _fetch_openviking_recommendations(item, top_k=top_k) output_path = build_output_path(item, output_dir) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(render_case_note(item, recommendations=recommendations), encoding="utf-8") return output_path def main() -> None: parser = argparse.ArgumentParser(description="Generate an Obsidian case note from a normalized case JSON file.") parser.add_argument("--input", required=True, help="Normalized case JSON path") parser.add_argument("--output-dir", default="obsidian-vault/02_Cases", help="Obsidian cases output directory") parser.add_argument("--enrich-from-openviking", action="store_true", help="Retrieve related cases and knowledge from OpenViking") parser.add_argument("--top-k", type=int, default=3, help="Number of OpenViking recommendations per type") args = parser.parse_args() output_path = asyncio.run( generate_case_note_async( args.input, args.output_dir, enrich_from_openviking=args.enrich_from_openviking, top_k=args.top_k, ) ) print(output_path) if __name__ == "__main__": main()