diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f99717d --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Local runtime configuration +config.yaml +*.local.yaml +*.secret.yaml +.env +.env.* + +# Python cache / test artifacts +__pycache__/ +*.py[cod] +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ +.coverage +htmlcov/ + +# Virtual environments +.venv/ +venv/ + +# Local editor / agent metadata +.codex +.DS_Store + +# Runtime output +*.log +*.tmp diff --git a/README.md b/README.md index 4cf147d..90e8401 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,12 @@ obsidian-vault/ ### 7. Hermes Agent 集成 -已在本机 Hermes skill 目录创建 `soc-memory-poc` skill: +已在本机 Hermes skill 目录创建 `soc-memory-poc` skill,并在仓库中保留了一份可版本化副本: + +- 本机 Hermes 实际加载路径:`/home/tom/.hermes/skills/soc-memory-poc/` +- 仓库副本路径:`integrations/hermes/soc-memory-poc/` + +本机 Hermes skill 文件结构: ```text /home/tom/.hermes/skills/soc-memory-poc/ diff --git a/SOC-Memory-POC-Design.md b/SOC-Memory-POC-Design.md new file mode 100644 index 0000000..5a6d9e8 --- /dev/null +++ b/SOC-Memory-POC-Design.md @@ -0,0 +1,1113 @@ +# SOC 研判辅助记忆系统 POC 方案 + +## 一、整体架构设计 + +### A. 架构图 + +``` +┌─────────────────────────────────────────────────────────────────────────────────────────┐ +│ SOC Case 研判工作流 │ +├─────────────────────────────────────────────────────────────────────────────────────────┤ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Ticket │ │ 情报系统 │ │ 历史 Case │ │ 知识库 │ │ +│ │ System │ │ (Intel) │ │ Archive │ │ (KB/Playbook)│ │ +│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ │ │ +│ ▼ ▼ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────────────────┐ │ +│ │ 数据接入层 (Ingestion Pipeline) │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ +│ │ │ ingest_skill│ │ extract_ │ │ classify_ │ │ commit_ │ │ │ +│ │ │ │──▶│ memory_skill│──▶│ memory_skill│──▶│ memory_skill│ │ │ +│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌──────────────────────────────┼──────────────────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌──────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Obsidian │◀──────────│ OpenViking │◀───────────│ EverMemOS │ │ +│ │ (知识沉淀) │ │ (统一 Context) │ │ (长期记忆整理) │ │ +│ │ │ │ │ │ │ │ +│ │ - KB/Playbook│ │ - memory │ │ - 抽取/归纳 │ │ +│ │ - Case Note │ │ - resources │ │ - 去重/合并 │ │ +│ │ - 模板/标签 │ │ - skills │ │ - 衰减/演化 │ │ +│ └──────┬───────┘ └────────┬────────┘ └────────┬────────┘ │ +│ │ │ │ │ +└─────────┼──────────────────────────────┼──────────────────────────────┼──────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────────────────────────────────────────────────────────────────────────────┐ +│ AI Agent / Harness System │ +│ ┌─────────────────────────────────────────────────────────────────────────────┐ │ +│ │ retrieve_context_skill │ │ +│ │ 根据当前 case 检索 ──▶ OpenViking ──▶ 返回相关上下文 ──▶ 辅助研判 │ │ +│ └─────────────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +### B. 组件职责划分 + +| 组件 | 定位 | 核心职责 | +|------|------|----------| +| **Obsidian** | 人工可维护的知识沉淀层 | KB/Playbook/报告模板、Case Note 人工编辑、可视化知识网络 | +| **OpenViking** | 统一 Context/Memory Gateway | 统一入口、namespace 管理、资源/技能/记忆的组织与检索 | +| **EverMemOS** | 后台长期记忆整理层 | 抽取式记忆、归纳合并、去重衰减、长期记忆演化 | +| **Skills** | 记忆流程自动化 | 导入、抽取、分类、写回、检索、清理全流程 | +| **AI Agent** | 消费端 | 调用 skills 获取上下文,辅助研判 | + +--- + +## 二、数据分层设计 + +### A. 五类记忆模型 + +``` +┌────────────────────────────────────────────────────────────────────────────────┐ +│ 数据分层架构 │ +├──────────────────┬───────────────────────────────────────────────────────────┤ +│ 记忆类型 │ 来源与特征 │ +├──────────────────┼───────────────────────────────────────────────────────────┤ +│ Knowledge Memory │ 来源: KB、月报、报告、Playbook、PO │ +│ (知识库记忆) │ 特征: 静态、高价值、可复用、长期有效 │ +│ │ 示例: "钓鱼邮件识别 playbook"、"CVE-2024-XXXX 分析" │ +├──────────────────┼───────────────────────────────────────────────────────────┤ +│ Case Memory │ 来源: 历史 case、研判结论、关键证据、误报/真报模式 │ +│ (案例记忆) │ 特征: 经验沉淀、模式识别、场景化 │ +│ │ 示例: "2024年Q3 钓鱼邮件攻击趋势"、"某 APT 组织 TTP" │ +├──────────────────┼───────────────────────────────────────────────────────────┤ +│ Process Memory │ 来源: AI agent 研判过程中的中间步骤、工具调用结果、推理路径 │ +│ (过程记忆) │ 特征: 临时性、高信息量、需抽取提炼 │ +│ │ 示例: "本次研判使用的 IOC 列表"、"推理链草稿" │ +├──────────────────┼───────────────────────────────────────────────────────────┤ +│ Profile Memory │ 来源: Analyst 偏好、团队规范、告警分级标准 │ +│ (配置/偏好记忆) │ 特征: 相对稳定、个性化、可影响检索排序 │ +│ │ 示例: "某 Analyst 偏好详细日志"、"夜间告警阈值" │ +├──────────────────┼───────────────────────────────────────────────────────────┤ +│ Session Memory │ 来源: 当前任务的临时上下文、对话历史 │ +│ (会话记忆) │ 特征: 极短期、随任务结束可丢弃 │ +│ │ 示例: "当前正在研判的告警 ID"、"已确认的 IOC" │ +└──────────────────┴───────────────────────────────────────────────────────────┘ +``` + +### B. 数据生命周期 + +``` +Knowledge Memory ─────────────────────────────────────────────────▶ 长期保留 + │ + ▼ +Case Memory ─────────────────────────▶ 抽取为 Pattern ──────────▶ 合并到 Knowledge + │ 或定期清理 + ▼ +Process Memory ──▶ 抽取高价值结论 ──▶ 写入 Case Memory + │ + ▼ +定期清理/衰减 + │ + ▼ +Session Memory ──────────────────────▶ 任务结束即丢弃 +``` + +--- + +## 三、Obsidian 设计 + +### A. Vault 目录结构 + +``` +SOC Vault/ +├── 📁 00_Knowledge/ # 知识库 - 静态文档 +│ ├── 📁 00_KB/ # 知识库文档 +│ │ ├── 01_Attack_Techniques/ # 攻击技术分类 +│ │ ├── 02_Threat_Intelligence/ # 威胁情报 +│ │ ├── 03_Vulnerability_Analysis/ # 漏洞分析 +│ │ └── 04_Tool_Usage/ # 工具使用指南 +│ ├── 📁 01_Playbooks/ # 响应playbook +│ │ ├── 01_Phishing/ # 钓鱼响应 +│ │ ├── 02_Malware/ # 恶意软件响应 +│ │ ├── 03_Data_Exfiltration/ # 数据外泄响应 +│ │ └── README.md # Playbook 索引 +│ ├── 📁 02_Monthly_Reports/ # 月报摘要 +│ │ └── YYYY-MM/ # 按年月组织 +│ └── 📁 03_PO/ # 发布命令/通告 +│ +├── 📁 10_Cases/ # 案例库 - 历史case +│ ├── 📁 by_type/ # 按类型分类 +│ │ ├── Phishing/ +│ │ ├── Malware/ +│ │ ├── Brute_Force/ +│ │ └── Data_Exfiltration/ +│ ├── 📁 by_month/ # 按月份归档 +│ │ └── YYYY-MM/ +│ └── 📁 by_status/ # 按状态筛选 +│ ├── Confirmed/ +│ ├── False_Positive/ +│ └── Pending/ +│ +├── 📁 20_Analysis/ # 分析工作区 +│ ├── 📁 Templates/ # 模板 +│ │ ├── Case_Note.md # Case 笔记模板 +│ │ ├── Playbook.md # Playbook 模板 +│ │ ├── Monthly_Report_Summary.md # 月报摘要模板 +│ │ └── IOC_Extraction.md # IOC 提取模板 +│ └── 📁 Inbox/ # 待处理草稿 +│ +├── 📁 30_Templates/ # 公共模板库 +│ +└── 📁 90_System/ # 系统配置 + ├── 📁_Tags/ # 标签定义 + └── 📁_Graph/ # 双向链接配置 +``` + +### B. 模板设计 + +#### Case Note 模板 + +```yaml +--- +type: case_note +case_id: {{CASE_ID}} +severity: {{P1|P2|P3|P4}} +status: {{Open|Confirmed|False_Positive}} +analyst: {{ANALYST_NAME}} +created: {{YYYY-MM-DD HH:mm}} +tags: [] +--- + +# Case: {{简短标题}} + +## 告警摘要 +- **告警类型**: +- **受影响资产**: +- **时间范围**: + +## 研判过程 + +### 1. 初始信息 +``` +{{告警原始内容}} +``` + +### 2. 调查步骤 +- [ ] 步骤1 +- [ ] 步骤2 +- [ ] 步骤3 + +### 3. 关键发现 +{{关键发现列表}} + +### 4. IOC 提取 +| 类型 | 值 | 置信度 | +|------|-----|--------| +| IP | | | +| Domain | | | +| Hash | | | + +## 结论 +- **判定结果**: {{True_Positive|False_Positive|Uncertain}} +- **攻击链阶段**: +- **相关 TTP**: + +## 关联信息 +- **相关 KB**: [[]] +- **历史类似 Case**: [[]] +- **相关 Intel**: [[]] + +## 后续行动 +- [ ] 行动1 +- [ ] 行动2 +``` + +#### Playbook 模板 + +```yaml +--- +type: playbook +category: {{Response|Detection|Analysis}} +mitre_tactics: [] +mitre_techniques: [] +last_updated: {{YYYY-MM-DD}} +reviewer: {{NAME}} +--- + +# Playbook: {{Playbook 名称}} + +## 目标 +{{简述本 Playbook 适用的场景和目标}} + +## 触发条件 +{{什么情况下应该使用本 Playbook}} + +## 前置要求 +- 工具/系统依赖 +- 权限要求 + +## 步骤 + +### Step 1: {{步骤名称}} +**目的**: +**执行**: +```bash +# 命令或操作 +``` + +**验证**: +- 预期结果 + +### Step 2: ... + +## 决策点 + +``` +{{决策树或流程图}} +``` + +## 常见问题 +| 问题 | 解决方案 | +|------|----------| +| | | + +## 关联 +- **相关 Case**: [[]] +- **相关 KB**: [[]] +- **相关 Intel**: [[]] +``` + +### C. 标签体系 + +#### 核心标签 (必须) + +| 标签 | 用途 | 示例 | +|------|------|------| +| `#soc/case` | Case 笔记 | `#soc/case/2024-Q3` | +| `#soc/playbook` | Playbook | `#soc/playbook/phishing` | +| `#soc/kb` | 知识库 | `#soc/kb/cve` | +| `#soc/intel` | 威胁情报 | `#soc/intel/apt29` | + +#### 维度标签 (可选) + +| 标签类型 | 标签 | 用途 | +|----------|------|------| +| 严重程度 | `#severity/p1`, `#severity/p2`, `#severity/p3` | 告警分级 | +| 攻击类型 | `#attack/phishing`, `#attack/malware`, `#attack/ransomware` | 攻击分类 | +| 行业 | `#industry/finance`, `#industry/tech` | 目标行业 | +| 威胁组织 | `#apt/apt28`, `#apt/lazarus` | 威胁组织 | +| 状态 | `#status/confirmed`, `#status/fp`, `#status/pending` | Case 状态 | + +### D. 双向链接建议 + +```markdown +# 建议的链接模式 + +1. Case → KB: "[[KB/CVE-2024-XXXX]] 在本案例中的表现为..." +2. Case → Case: "与 [[Cases/2024/2024-Q3-phishing-001]] 类似" +3. Playbook → KB: "基于 [[KB/mitre-tactics]] 的 Initial Access" +4. Intel → Case: "该 IOC 首次出现在 [[Cases/2024/2024-Q2-apt29-campaign]]" +``` + +### E. 适合/不适合的内容 + +| 适合放入 Obsidian | 不适合放入 Obsidian | +|-------------------|---------------------| +| 人工编写的 KB/Playbook | 原始日志/告警 Dump | +| 整理后的 Case Note | 全量 Ticket 记录 | +| 月报/报告摘要 | 实时威胁情报 Feed | +| 分析方法论 | 工具执行输出 | +| IOC 知识卡片 | 临时调试信息 | + +--- + +## 四、Skills 设计 + +### A. Skills 职责矩阵 + +``` +┌────────────────────────────────────────────────────────────────────────────────────────┐ +│ SOC Memory Skills 全景 │ +├────────────────┬──────────────┬──────────────────┬─────────────────────────────────────┤ +│ Skill 名称 │ 输入 │ 输出 │ 触发时机 │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ ingest_skill │ 原始数据 │ 标准化中间件 │ 定时/手动触发 │ +│ │ (KB/报告/ticket)│ │ │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ extract_ │ 原始数据/ │ 高价值记忆碎片 │ 每次 case 结案后 │ +│ memory_skill │ agent 过程 │ (key-value) │ / 定时批量处理 │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ classify_ │ 待分类记忆 │ 带分类标签的内存 │ extract_ 后自动触发 │ +│ memory_skill │ │ │ │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ retrieve_ │ 当前 case │ 排序后的上下文 │ AI agent 研判时 │ +│ context_skill │ 特征/查询 │ 列表 │ │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ summarize_ │ case 过程数据 │ 精炼的 Case Note │ case 结案时 │ +│ case_skill │ │ │ │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ commit_ │ 分类后的记忆 │ 写入目标存储 │ 分类后自动触发 │ +│ memory_skill │ │ │ │ +├────────────────┼──────────────┼──────────────────┼─────────────────────────────────────┤ +│ prune_memory_ │ 记忆库 │ 清理报告/统计 │ 定时任务 │ +│ skill │ │ │ │ +└────────────────┴──────────────┴──────────────────┴─────────────────────────────────────┘ +``` + +### B. 详细 Skill 规格 + +#### 1. ingest_skill + +```yaml +name: ingest_skill +description: 从外部系统导入原始数据,转换为标准化中间格式 + +输入: + - KB 文档 (Markdown/PDF) + - 月报/报告 (Markdown/PDF) + - Ticket System 导出 (JSON/CSV) + - 历史 Case 归档 (JSON) + - 威胁情报 (STIX/JSON) + +输出: + - 标准化中间件 (JSON) + - 元数据 (来源、日期、类型、标签) + +触发时机: + - 手动触发: `ingest_skill --source kb --path /path/to/kb` + - 定时触发: 每日 2:00 自动同步 + - 事件触发: 新 case 结案时 + +处理流程: + 1. 识别数据源类型 + 2. 解析原始内容 + 3. 提取结构化字段 + 4. 生成唯一 ID + 5. 写入待处理队列 + +依赖数据源: + - 文件系统 (本地/网络存储) + - Ticket API + - 情报系统 API +``` + +#### 2. extract_memory_skill + +```yaml +name: extract_memory_skill +description: 从原始资料或 agent 过程里抽取高价值记忆碎片 + +输入: + - 原始文档/Case 数据 + - Agent 过程日志 + - 研判对话历史 + +输出: + - 高价值记忆碎片列表 + - type: knowledge | case | pattern | ioc | ttp + - key: 记忆摘要 + - value: 详细内容 + - source: 来源 + - confidence: 置信度 0-1 + - tags: 标签 + +触发时机: + - Case 结案时自动触发 + - 定时批量处理 (每小时) + - 手动批量处理 + +处理流程: + 1. 加载待处理内容 + 2. 调用 LLM 抽取关键信息 + 3. 过滤低价值内容 (置信度 < 0.6) + 4. 输出结构化记忆碎片 + 5. 发送至分类 skill + +提取规则示例: + - IOC: IP/Domain/Hash/Email + - TTP: MITRE ATT&CK 战术/技术 + - Pattern: 攻击模式、相似特征 + - Conclusion: 研判结论、处置建议 +``` + +#### 3. classify_memory_skill + +```yaml +name: classify_memory_skill +description: 将记忆碎片分类到正确的记忆层 + +输入: + - 未分类的记忆碎片 + - 当前分类规则配置 + +输出: + - 带分类标签的记忆 + - memory_type: knowledge | case | process | profile | session + - ttl: 短期 | 中期 | 长期 + - priority: 高 | 中 | 低 + - namespace: 对应存储路径 + +触发时机: + - extract_memory_skill 完成后自动触发 + +处理流程: + 1. 分析记忆内容特征 + 2. 匹配分类规则 + 3. 估算 TTL 和优先级 + 4. 分配 namespace + 5. 发送至写回 skill + +分类规则: + - knowledge: 通用知识、方法论、工具用法 + - case: 具体案例、经验总结、模式 + - process: 推理步骤、中间结果 + - profile: 配置、偏好、规范 + - session: 临时上下文、对话片段 +``` + +#### 4. retrieve_context_skill + +```yaml +name: retrieve_context_skill +description: 根据当前 case 检索最相关的上下文 + +输入: + - 当前 case 特征 + - alert_type: 告警类型 + - iocs: 已知 IOC + - description: 告警描述 + - severity: 严重程度 + - 检索参数 + - top_k: 返回数量 + - namespaces: 搜索范围 + +输出: + - 排序后的上下文列表 + - content: 相关内容 + - relevance_score: 相关度 0-1 + - source: 来源 (ov/evermemos/obsidian) + - memory_type: 记忆类型 + - metadata: 附加信息 + +触发时机: + - AI agent 开始新 case 研判时 + - Agent 请求上下文时 + +检索策略: + 1. 多路召回: + - 向量检索: 语义相似度 + - 关键词检索: 精确匹配 + - 图检索: 双向链接关系 + 2. 多层筛选: + - 按 memory_type 过滤 + - 按时间/优先级排序 + - 去重和合并 + 3. 重排序: + - 结合场景权重 + - 个性化调整 + +返回格式: + - knowledge: 参考资料 1-3 条 + - case: 相似案例 2-5 条 + - ioc: 相关 IOC 1-3 条 + - ttp: 相关 TTP 1-2 条 +``` + +#### 5. summarize_case_skill + +```yaml +name: summarize_case_skill +description: 将 case 过程沉淀为高质量 Case Note + +输入: + - Case 元数据 + - 研判过程日志 + - Agent 对话历史 + - 提取的 IOC/TTP + +输出: + - 结构化 Case Note + - 关联建议 + - 质量评分 + +触发时机: + - Case 状态变更为 Confirmed/FP 时 + - Analyst 手动触发 + +处理流程: + 1. 收集 Case 相关信息 + 2. 提取关键时间线 + 3. 归纳 IOC/TTP + 4. 生成研判摘要 + 5. 推荐关联 (KB/历史 Case) + 6. 写入 Obsidian +``` + +#### 6. commit_memory_skill + +```yaml +name: commit_memory_skill +description: 将通过筛选的记忆写入目标存储 + +输入: + - 已分类的记忆 + - 目标存储配置 + +输出: + - 写入结果 + - 存储位置映射 + +触发时机: + - classify_memory_skill 完成后自动触发 + - 批量定时提交 + +写入策略: + - knowledge → Obsidian (Markdown) + - case → Obsidian + OpenViking + - process → EverMemOS (短期) + - profile → OpenViking (长期) + - session → EverMemOS (会话级) +``` + +#### 7. prune_memory_skill + +```yaml +name: prune_memory_skill +description: 清理过时、重复、低价值记忆 + +输入: + - 记忆库 + - 清理策略配置 + +输出: + - 清理报告 + - 统计信息 + +触发时机: + - 定时任务 (每周) + - 手动触发 + +清理规则: + 1. TTL 过期: + - process: 7 天 + - session: 任务结束 + - case: 180 天后降级 + 2. 重复合并: + - 相似度 > 0.9 合并 + - 保留高置信度版本 + 3. 价值评估: + - 长期未访问 + - 引用计数过低 +``` + +--- + +## 五、OpenViking 设计 + +### A. Namespace 设计 + +```yaml +# SOC Memory Namespace 架构 + +viking://soc/ # SOC 根命名空间 +├── knowledge/ # 知识库 +│ ├── kb/ # 知识库文档 +│ ├── playbook/ # Playbook +│ └── intel/ # 威胁情报摘要 +│ +├── cases/ # 案例库 +│ ├── 2024/ # 按年归档 +│ │ ├── Q1/ +│ │ ├── Q2/ +│ │ ├── Q3/ +│ │ └── Q4/ +│ ├── patterns/ # 攻击模式库 +│ └── iocs/ # IOC 知识库 +│ +├── process/ # 过程记忆 +│ ├── active/ # 当前活跃 +│ └── archived/ # 已归档 +│ +├── profiles/ # 配置/偏好 +│ ├── analysts/ # Analyst 配置 +│ │ └── {user_id}/ +│ └── teams/ # 团队配置 +│ +└── sessions/ # 会话记忆 + └── {session_id}/ +``` + +### B. 资源组织 + +```yaml +# OpenViking Resource 结构 + +resources: + soc_kb: # SOC 知识库 + type: directory + path: viking://soc/knowledge/ + description: KB、Playbook、Intel + + soc_cases: # 案例库 + type: directory + path: viking://soc/cases/ + description: 历史案例和模式 + + soc_process: # 过程记忆 + type: directory + path: viking://soc/process/ + description: 研判过程临时存储 +``` + +### C. AI Agent 检索流程 + +``` +┌─────────────────────────────────────────────────────────────────────────────────┐ +│ AI Agent 上下文检索流程 │ +├─────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Agent 请求: "帮我研判这个告警" │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ retrieve_context_skill │ │ +│ │ - alert_type: "钓鱼邮件" │ │ +│ │ - iocs: ["192.168.1.1", "evil.com"] │ │ +│ │ - severity: "P2" │ │ +│ └───────────────────────────┬───────────────────────────────────┘ │ +│ │ │ +│ ┌───────────────────┼───────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Vector │ │ Keyword │ │ Graph │ │ +│ │ Search │ │ Search │ │ Search │ │ +│ │ (语义) │ │ (精确) │ │ (链接) │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ └───────────────────┼───────────────────┘ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ Multi-way RRF Fusion │ │ +│ │ + 场景权重调整 │ │ +│ │ + 个性化排序 │ │ +│ └────────────────┬────────────────┘ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ 返回优先级上下文 │ │ +│ │ - Knowledge: 1-3 │ │ +│ │ - Case: 2-5 │ │ +│ │ - IOC: 1-3 │ │ +│ │ - TTP: 1-2 │ │ +│ └─────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ Context Window │ +└─────────────────────────────────────────────────────────────────────────────────┘ +``` + +### D. OpenViking 与其他组件的关系 + +| 组件 | 与 OpenViking 的关系 | +|------|----------------------| +| Obsidian | OpenViking 读取 Obsidian 作为 resource 源 | +| Skills | 通过 OpenViking API 写入/读取记忆 | +| EverMemOS | 作为 OpenViking 的后端记忆存储 | +| AI Agent | 从 OpenViking 获取统一上下文 | + +--- + +## 六、EverMemOS 设计 + +### A. 后台长期记忆整理架构 + +``` +┌──────────────────────────────────────────────────────────────────────────────────┐ +│ EverMemOS 长期记忆整理层 │ +├──────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ 数据输入 │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────────┐ │ +│ │ Episode Processor │ │ +│ │ - 从 Process Memory 提取关键事件 │ │ +│ │ - 识别有价值的研判步骤 │ │ +│ │ - 标记重要结论和决策点 │ │ +│ └────────────────────────────┬─────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────────┐ │ +│ │ Memory Extractor │ │ +│ │ 输入: 原始 episode │ │ +│ │ 输出: 结构化记忆碎片 │ │ +│ │ - fact: 事实陈述 │ │ +│ │ - pattern: 模式发现 │ │ +│ │ - conclusion: 结论 │ │ +│ │ - confidence: 置信度 │ │ +│ └────────────────────────────┬─────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────────┐ │ +│ │ Deduplication & Merge │ │ +│ │ - 向量相似度检测 │ │ +│ │ - 语义去重 │ │ +│ │ - 记忆合并 (多版本合并) │ │ +│ │ - 冲突解决 │ │ +│ └────────────────────────────┬─────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────────┐ │ +│ │ Evolution & Decay │ │ +│ │ - 访问计数跟踪 │ │ +│ │ - 时间衰减 │ │ +│ │ - 重要性重估 │ │ +│ │ - 低价值清理 │ │ +│ └────────────────────────────┬─────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ 输出 │ +│ │ │ +│ ▼ │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ OpenViking │ │ Obsidian │ │ 清理报告 │ │ +│ │ (长期记忆) │ │ (案例笔记) │ │ (统计) │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` + +### B. 长期记忆抽取规则 + +```yaml +# 记忆抽取策略 + +抽取目标: + - 从 Process Memory 抽取: + - 关键决策点 + - 重要发现 + - 有效 TTP + - 成功/失败模式 + + - 从 Session Memory 抽取: + - 高价值结论 + - 常用模式 + - Analyst 偏好 + +保留策略: + 长期 (永久): + - 确认的 TTP 模式 + - 有效的 Playbook + - 重要的 IOC 知识 + - 经典 Case 总结 + + 中期 (180天): + - 近期 Case 总结 + - 有效的分析方法 + - 团队知识更新 + + 短期 (30天): + - Process 记忆 + - 临时分析结果 + - 中间推理步骤 + + 丢弃: + - 重复信息 + - 低置信度内容 + - 未验证假设 + +去重/合并规则: + 1. 语义相似度 > 0.9: 合并 + 2. 时间相近 + 内容相似: 合并 + 3. 冲突内容: 保留高置信度 + 标记 +``` + +### C. 与 OpenViking/Obsidian 的闭环 + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 记忆循环流动 │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Process │────▶│ EverMemOS │────▶│ Open │ │ +│ │ Memory │ │ (抽取/整理) │ │ Viking │ │ +│ └─────────┘ └─────────────┘ └──────┬──────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────┐ │ +│ │ 检索返回 │ │ +│ │ 供 Agent 使用 │ │ +│ └─────────────────┘ │ +│ │ │ +│ ┌─────────┐ ┌─────────────┐ │ │ +│ │ Session │────▶│ Summarize │◀──────────┘ │ +│ │ Memory │ │ (case总结) │ │ +│ └─────────┘ └──────┬──────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────┐ │ +│ │ Obsidian │ │ +│ │ (Case Note) │ │ +│ └─────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 七、最小可行 POC 范围 + +### A. POC 目标 + +**聚焦场景**: 钓鱼邮件告警研判 + +**理由**: +1. 钓鱼邮件是 SOC 最常见的告警类型之一 +2. 有成熟的 KB 和 playbook 可供参考 +3. IOC 提取和模式识别相对标准化 +4. 容易评估效果 (研判时间、准确率) + +### B. 数据范围 + +| 数据类型 | 接入范围 | 优先级 | +|----------|----------|--------| +| 历史 Case | 最近 6 个月的钓鱼邮件 Case | P0 | +| KB | 钓鱼邮件识别 KB、常见钓鱼模式 | P0 | +| Playbook | 钓鱼邮件响应 Playbook | P0 | +| 月报摘要 | 最近 4 个季度月报 | P1 | +| 情报 | 活跃钓鱼域名/IP 情报 | P1 | +| Ticket | 只接已结案的 Case | P2 | + +### C. 先不做 + +- 实时告警接入 +- 自动化研判闭环 +- 多语言支持 +- 移动端访问 +- 全文档 OCR 识别 + +### D. 实施计划 (4 周) + +``` +┌────────────────────────────────────────────────────────────────────────────────┐ +│ POC 实施计划 │ +├────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Week 1: 基础搭建 │ +│ ├─ 搭建 Obsidian Vault 结构 │ +│ ├─ 设计 Case Note / Playbook 模板 │ +│ ├─ 配置 OpenViking SOC namespace │ +│ ├─ 部署 EverMemOS 并验证 │ +│ └─ 导入 50 个历史钓鱼 Case │ +│ │ +│ Week 2: Skills 开发 │ +│ ├─ ingest_skill: KB/Case 导入 │ +│ ├─ extract_memory_skill: IOC/TTP 抽取 │ +│ ├─ classify_memory_skill: 分类逻辑 │ +│ ├─ commit_memory_skill: 写入流程 │ +│ └─ 端到端流程打通 │ +│ │ +│ Week 3: 检索与评估 │ +│ ├─ retrieve_context_skill: 上下文检索 │ +│ ├─ 多路召回实现 │ +│ ├─ 基础评估脚本 │ +│ └─ 内部测试使用 │ +│ │ +│ Week 4: 优化与验收 │ +│ ├─ 检索效果调优 │ +│ ├─ POC 演示 │ +│ ├─ 评估指标统计 │ +│ └─ 文档和交接 │ +│ │ +└────────────────────────────────────────────────────────────────────────────────┘ +``` + +### E. 评估指标 + +| 指标 | 目标值 | 测量方式 | +|------|--------|----------| +| 相关 Case 命中率 | ≥ 70% | 检索结果相关性标注 | +| 研判时间缩短 | ≥ 30% | 对比有/无辅助的时间 | +| 结论准确率 | ≥ 85% | 回测历史 Case 准确率 | +| 人工满意度 | ≥ 4/5 | Analyst 评分 | +| 记忆库质量评分 | ≥ 80% | 随机抽样评估 | + +--- + +## 八、目录结构与工程结构 + +### A. 推荐目录结构 + +``` +soc-memory-poc/ +├── 📁 obsidian_vault/ # Obsidian Vault +│ ├── 00_Knowledge/ +│ ├── 10_Cases/ +│ ├── 20_Analysis/ +│ └── 90_System/ +│ +├── 📁 skills/ # Skills 集合 +│ ├── ingest_skill/ +│ │ ├── SKILL.md +│ │ ├── scripts/ +│ │ │ └── ingest.py +│ │ └── configs/ +│ │ +│ ├── extract_memory_skill/ +│ │ ├── SKILL.md +│ │ ├── scripts/ +│ │ │ └── extract.py +│ │ └── prompts/ +│ │ └── extraction.md +│ │ +│ ├── classify_memory_skill/ +│ ├── retrieve_context_skill/ +│ ├── summarize_case_skill/ +│ ├── commit_memory_skill/ +│ └── prune_memory_skill/ +│ +├── 📁 openviking_integration/ # OpenViking 集成 +│ ├── namespace_config.yaml +│ ├── resource_config.yaml +│ └── scripts/ +│ └── sync_resources.py +│ +├── 📁 evermemos_worker/ # EverMemOS 后台任务 +│ ├── config.yaml +│ ├── extract_worker.py +│ ├── dedup_worker.py +│ ├── decay_worker.py +│ └── requirements.txt +│ +├── 📁 ingestion_pipeline/ # 数据接入管道 +│ ├── kb_ingester.py +│ ├── case_ingester.py +│ ├── ticket_connector.py +│ └── intel_connector.py +│ +├── 📁 retrieval_service/ # 检索服务 +│ ├── api.py +│ ├── ranker.py +│ ├── multi_way_merge.py +│ └── requirements.txt +│ +├── 📁 evaluation/ # 评估脚本 +│ ├── hit_rate_test.py +│ ├── time_measurement.py +│ ├── accuracy_test.py +│ └── synthetic_data/ +│ +├── 📁 configs/ # 全局配置 +│ ├── soc_memory_config.yaml +│ ├── namespaces.yaml +│ └── template_config.yaml +│ +└── 📁 docs/ # 文档 + ├── ARCHITECTURE.md + ├── SKILLS_API.md + ├── POC_PLAN.md + └── USER_GUIDE.md +``` + +### B. 核心文件说明 + +```yaml +# 目录与文件映射 + +soc-memory-poc/ +├── obsidian_vault/ # 人工维护的知识库 +│ └── 00_Knowledge/ # 静态知识 +│ +├── skills/ # 记忆流程自动化 +│ ├── retrieve_context_skill/ # 检索入口 (Agent 调用) +│ ├── commit_memory_skill/ # 写入入口 +│ └── ...其他 skill +│ +├── openviking_integration/ # OpenViking 适配层 +│ └── namespace_config.yaml # namespace 定义 +│ +├── evermemos_worker/ # 后台整理任务 +│ └── extract_worker.py # 从 process 抽取 +│ +├── retrieval_service/ # 检索服务 +│ └── api.py # 对外 API +│ +└── evaluation/ # 评估 + └── hit_rate_test.py # 命中率测试 +``` + +--- + +## 九、为什么选择这个组合 + +### A. 组合逻辑 + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 组件选择理由 │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Obsidian │ +│ ├─ 人工可维护: 分析师可直接编辑 KB/Playbook/Case Note │ +│ ├─ 双向链接: 天然支持知识间的关联发现 │ +│ └─ 模板系统: 规范 Case Note 和 Playbook 格式 │ +│ │ +│ OpenViking │ +│ ├─ 统一入口: 所有 Agent 通过 OpenViking 获取上下文 │ +│ ├─ 多模存储: 支持文件、资源、技能等多种形式 │ +│ └─ 检索能力: 内置向量/关键词/图检索 │ +│ │ +│ EverMemOS │ +│ ├─ 后台整理: 自动从过程记忆中抽取有价值内容 │ +│ ├─ 去重合并: 避免记忆库膨胀 │ +│ └─ 衰减机制: 自动清理低价值记忆 │ +│ │ +│ Skills │ +│ ├─ 流程编排: 连接各组件形成完整工作流 │ +│ ├─ 可插拔: 不同 Agent 框架可复用 │ +│ └─ 可扩展: 方便添加新的处理逻辑 │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### B. 核心优势 + +1. **避免"全量存储"**: 通过 skills 层层筛选 + EverMemOS 自动清理 +2. **高质量记忆**: 只保留对研判有帮助的内容 +3. **模块化**: 各组件松耦合,方便替换 +4. **可扩展**: POC 成功后容易扩展到其他告警类型 +5. **人工可维护**: 分析师可以参与知识沉淀 + +--- + +## 十、落地顺序建议 + +### 推荐顺序 + +``` +1️⃣ Week 1: 先搭建 Obsidian + 导入历史 Case + - 最快看到价值 + - 验证模板和标签体系 + +2️⃣ Week 2: 部署 OpenViking + 配置 namespace + - 建立统一入口 + - 打通检索流程 + +3️⃣ Week 3: 开发核心 Skills (ingest/extract/commit) + - 实现自动化流程 + - 端到端打通 + +4️⃣ Week 4: 部署 EverMemOS + 实现检索 + - 后台整理能力 + - 评估效果 +``` + +### 关键里程碑 + +| 周 | 里程碑 | 验收标准 | +|----|--------|----------| +| Week 1 | Obsidian 可用 | 50 个 Case 导入完成 | +| Week 2 | OpenViking 可检索 | 关键词检索正常工作 | +| Week 3 | Skills 端到端 | 自动化流程跑通 | +| Week 4 | POC 完成 | 命中率 ≥ 70% | + +--- + +*文档版本: v1.0* +*创建日期: 2026-04-20* +*目标: 4 周 POC 落地* diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..d5ffa32 --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,32 @@ +# Memory Gateway 配置示例 +# 复制为 config.yaml 并根据实际情况修改 + +# Memory Gateway 服务配置 +server: + # 监听地址,0.0.0.0 表示接受所有网卡(局域网可访问) + host: "0.0.0.0" + # MCP Server 端口 + port: 1934 + # 可选:API Key 认证,客户端需要提供相同的 Key + api_key: "" + +# OpenViking 后端配置 +openviking: + # OpenViking 服务器地址 + url: "http://localhost:1933" + # OpenViking API Key(如有) + api_key: "" + # 请求超时时间(秒) + timeout: 30 + +# 记忆配置 +memory: + # 默认命名空间 + default_namespace: "soc" + # 默认搜索返回数量 + search_limit: 10 + +# 日志配置 +logging: + level: "INFO" + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..0478a13 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,190 @@ +# Architecture + +## 整体目标 + +构建一个面向 SOC case 研判辅助的记忆系统 POC,用于提升 AI agent 在以下环节的效果: + +- 告警研判 +- 历史 case 检索 +- 上下文补全 +- 结论生成 +- 高价值记忆沉淀 + +## 总体架构图 + +```text + ┌────────────────────────────┐ + │ 知识源 / 数据源 │ + │ KB / Playbook / 月报 / 报告 │ + │ Ticket / Intel / 历史 Case │ + └─────────────┬──────────────┘ + │ + │ ingest / normalize + ▼ + ┌──────────────────────────────┐ + │ Pipeline 层 │ + │ connectors / transforms / jobs│ + └─────────────┬────────────────┘ + │ + │ extracted inputs + ▼ + ┌──────────────────────────────┐ + │ Skills 层 │ + │ ingest / classify / retrieve │ + │ summarize / commit / prune │ + └───────┬─────────────┬────────┘ + │ │ + query/write │ │ write notes / long-term + ▼ ▼ + ┌────────────────────┐ ┌────────────────────┐ + │ Memory Gateway │ │ Obsidian Vault │ + │ MCP / REST / Auth │ │ Human-maintained │ + └─────────┬──────────┘ └────────────────────┘ + │ + ▼ + ┌────────────────────┐ + │ OpenViking │ + │ context / memory │ + │ resources / skills │ + └─────────┬──────────┘ + │ + ┌─────────┴──────────┐ + ▼ ▼ + ┌──────────────────┐ ┌──────────────────┐ + │ Session / Online │ │ EverMemOS │ + │ retrieval │ │ long-term memory │ + └──────────────────┘ └──────────────────┘ + ▲ + │ + ▼ + ┌────────────────────┐ + │ AI Agent / Harness │ + │ Nanobot / Hermes │ + │ OpenClaw / others │ + └────────────────────┘ +``` + +## 分层说明 + +### 1. 知识源层 + +外部系统和已有资料: + +- KB +- Playbook +- 月报 +- 报告 +- Ticket system +- 情报系统 +- 历史 case + +特点: + +- 来源多样 +- 结构不一致 +- 不能直接全部当记忆使用 + +### 2. Pipeline 层 + +负责: + +- 数据接入 +- 格式标准化 +- 提取元数据 +- 过滤噪声 + +边界: + +- 不做最终检索 +- 不做最终长期沉淀判断 + +### 3. Skills 层 + +负责: + +- 抽取高价值记忆 +- 分类为 knowledge / case / process / session +- 检索相关上下文 +- 生成 case 总结 +- 写回 OpenViking / Obsidian / EverMemOS + +这是整套系统的流程编排层。 + +### 4. Memory Gateway 层 + +负责: + +- 给 AI agent 提供统一入口 +- 屏蔽 OpenViking 细节 +- 提供 MCP / REST 接口 +- 处理鉴权和协议兼容 + +### 5. OpenViking 统一上下文层 + +负责: + +- 保存 memory +- 保存 resources +- 组织 skills +- 按 namespace 管理不同类型上下文 + +### 6. Obsidian 层 + +负责人工可维护的知识沉淀: + +- 高质量 case note +- playbook +- 月报 / 报告摘要 +- 关键实体说明 + +### 7. EverMemOS 层 + +负责后台长期记忆整理: + +- episode -> long-term memory +- 去重 +- 合并 +- 更新 +- 衰减 + +## 多 Agent 共享方式 + +多 agent 不直接彼此共享临时内存,而是通过统一上下文层协作: + +- 公共稳定知识走 `soc/knowledge` +- 历史案例走 `soc/case` +- 当前任务走 `session/` +- agent 私有偏好走 `agent/` + +这样可以做到: + +- 公共知识共享 +- 当前会话隔离 +- 不同 agent 框架可复用同一体系 + +## 检索质量控制原则 + +为避免“所有东西全塞进去”导致检索质量下降,必须坚持: + +- 原始资料不直接全部进入长期记忆 +- 只保留高价值摘要、模式、结论、证据 +- session / process memory 默认短期保留 +- 历史 case 和 playbook 优先于泛知识 +- Obsidian 只放人工维护内容,不放全量原文 + +## 第一阶段默认方案 + +第一阶段推荐组合: + +- OpenViking:统一 context / memory 层 +- Memory Gateway:统一访问入口 +- Skills:检索、总结、沉淀 +- Obsidian:人工可维护知识沉淀 +- EverMemOS:后台长期记忆整理 + +推荐原因: + +- 模块边界清晰 +- 最适合 POC 小步快跑 +- 最容易控制系统复杂度 +- 最容易对不同 agent 框架复用 diff --git a/docs/data-model.md b/docs/data-model.md new file mode 100644 index 0000000..c5c72a6 --- /dev/null +++ b/docs/data-model.md @@ -0,0 +1,138 @@ +# Data Model + +## 目标 + +这个数据模型面向 SOC case 研判辅助场景,不追求全量归档,而强调高价值记忆抽取。 + +## 数据分层 + +### 1. Knowledge Memory + +适用内容: + +- KB +- Playbook +- 月报摘要 +- 报告摘要 +- PO +- 检测规则说明 + +特点: + +- 偏稳定、可复用 +- 面向方法、知识、模式 +- 适合长期保存 + +建议字段: + +- `id` +- `title` +- `source_type` +- `summary` +- `tags` +- `entities` +- `ttp` +- `confidence` +- `updated_at` + +### 2. Case Memory + +适用内容: + +- 历史 case +- 最终研判结论 +- 关键证据 +- 误报 / 真报模式 +- 处置建议 + +特点: + +- 面向具体案例 +- 适合检索相似 case +- 是 POC 阶段最重要的数据层 + +建议字段: + +- `case_id` +- `title` +- `alert_type` +- `verdict` +- `summary` +- `key_evidence` +- `entities` +- `detection_logic` +- `lessons_learned` +- `source_links` + +### 3. Process Memory + +适用内容: + +- agent 中间步骤 +- 工具调用结果 +- 推理路径 +- 临时分析结论 + +特点: + +- 生命周期短 +- 价值不均匀 +- 只应抽取高价值部分转化为长期记忆 + +建议字段: + +- `session_id` +- `step_id` +- `tool_name` +- `observation` +- `intermediate_conclusion` +- `value_score` +- `timestamp` + +### 4. Profile / Preference Memory + +适用内容: + +- analyst 偏好 +- 默认输出风格 +- 常用研判路径 + +特点: + +- 数量小 +- 用于个性化辅助 + +建议字段: + +- `user_id` +- `preference_type` +- `value` +- `scope` + +### 5. Session Memory + +适用内容: + +- 当前 case 的上下文 +- 当前轮对话、当前任务的临时缓存 + +特点: + +- 强时效 +- 默认不长期保留 + +建议字段: + +- `session_id` +- `task_id` +- `active_entities` +- `active_hypotheses` +- `recent_observations` +- `expires_at` + +## 设计原则 + +- 原始材料不直接当记忆 +- 只沉淀对后续研判有帮助的高价值信息 +- Process Memory 默认短期,经过抽取后才升级为长期记忆 +- Knowledge 与 Case 是 POC 阶段优先建设的两层 diff --git a/docs/hermes-demo-prompts.md b/docs/hermes-demo-prompts.md new file mode 100644 index 0000000..607b411 --- /dev/null +++ b/docs/hermes-demo-prompts.md @@ -0,0 +1,91 @@ +# Hermes Demo Prompts + +## Recommended: Raw Email / Freeform Alert + +Use this when you want to show that Hermes does not need a rigid input schema. The `soc-memory-poc` skill should route the content through `triage_email.py`, extract useful fields, retrieve memory, search Obsidian, and return the fixed SOC triage sections. + +```text +Use the soc-memory-poc skill. Triage this email alert and include Memory Retrieval and Obsidian references. + +From: billing@vendor-payments.com +To: alice@corp.example +Subject: Invoice overdue notice +Attachment: invoice_review.html + +User clicked the link after opening the HTML attachment. DMARC failed. Review at https://vendor-payments-login.com/review from IP 198.51.100.20 on host FIN-LAPTOP-12. + +Return exactly these sections: +研判结果 +关键证据 +关联 Memory Retrieval +关联 Obsidian 文档 +建议动作 +``` + +Equivalent direct script check: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/triage_email.py --text "From: billing@vendor-payments.com +To: alice@corp.example +Subject: Invoice overdue notice +Attachment: invoice_review.html +User clicked the link after opening the HTML attachment. DMARC failed. Review at https://vendor-payments-login.com/review from IP 198.51.100.20 on host FIN-LAPTOP-12." +``` + +## Structured Phishing Alert + +Use this when you want maximum repeatability with explicit fields. + +```text +Use the soc-memory-poc skill. Treat the following as a structured SOC alert and use the preferred Scheme A path. + +Scenario: phishing +Alert type: mail_suspicious_attachment +User: alice@corp.example +Host: FIN-LAPTOP-12 +Sender: billing@vendor-payments.com +Subject: Invoice overdue notice +Attachment: invoice_review.html +URL: https://vendor-payments-login.com/review +IP: 198.51.100.20 +Known facts: +- DMARC failed +- User may have clicked the link + +Return exactly these sections: +研判结果 +关键证据 +关联 Memory Retrieval +关联 Obsidian 文档 +建议动作 +``` + +## Structured O365 Alert + +```text +Use the soc-memory-poc skill. Treat the following as a structured SOC alert and use the preferred Scheme A path. + +Scenario: o365_suspicious_login +Alert type: azuread_impossible_travel +User: david@corp.example +Host: WS-DAVID-01 +IP: 203.0.113.150 +Known facts: +- Impossible travel observed between Shanghai and Amsterdam within 15 minutes +- MFA fatigue occurred before final success +- User denied initiating the overseas login +- Inbox rule creation was observed after login + +Return exactly these sections: +研判结果 +关键证据 +关联 Memory Retrieval +关联 Obsidian 文档 +建议动作 +``` + +## Generate Case Note + +```text +Use the soc-memory-poc skill. Generate an Obsidian case note for /home/tom/soc_memory_poc/evaluation/datasets/normalized_cases/CASE-2026-0003.json with OpenViking enrichment, then tell me the output path and confirm whether the note was written successfully. +``` diff --git a/docs/namespaces.md b/docs/namespaces.md new file mode 100644 index 0000000..1672311 --- /dev/null +++ b/docs/namespaces.md @@ -0,0 +1,120 @@ +# OpenViking Namespaces + +## 目标 + +通过明确 namespace 和 URI 组织方式,把 OpenViking 用作统一的 context / memory gateway。 + +## 推荐 namespace + +### 1. `soc/knowledge` + +用于稳定知识: + +- KB +- Playbook +- 月报摘要 +- 报告摘要 +- PO + +示例: + +- `viking://soc/knowledge/kb/phishing-mail-header-analysis` +- `viking://soc/knowledge/playbook/o365-suspicious-login` + +### 2. `soc/case` + +用于历史案例和 case 结论: + +- 历史 case +- 真报 / 误报模式 +- 关键证据 + +示例: + +- `viking://soc/case/true-positive/case-2026-00128` +- `viking://soc/case/false-positive/case-2026-00072` + +### 3. `soc/process` + +用于流程级记忆: + +- agent 中间分析 +- 工具输出摘要 +- 可复用的中间判断模式 + +示例: + +- `viking://soc/process/session-abc123/step-04` + +### 4. `session/` + +用于当前任务的临时上下文。 + +示例: + +- `viking://session/incident-20260421-001/context` +- `viking://session/incident-20260421-001/tools` + +### 5. `agent/` + +用于 agent 级别的私有或半私有上下文。 + +示例: + +- `viking://agent/hermes-soc/default` +- `viking://agent/nanobot-soc/preferences` + +### 6. `user/` + +用于 analyst 偏好、展示习惯等小规模 profile 信息。 + +示例: + +- `viking://user/alice/preferences` + +## 资源组织建议 + +### memory + +适用于: + +- 高价值摘要 +- case 结论 +- pattern +- lesson learned + +### resources + +适用于: + +- 原始附件链接 +- 外部文档引用 +- Obsidian note 路径 +- ticket / report / intel 引用 + +### skills + +适用于: + +- 检索 skill +- 记忆抽取 skill +- case 沉淀 skill + +## 检索顺序建议 + +当前 case 发生检索时,建议按以下顺序召回: + +1. `session/` +2. `soc/case` +3. `soc/knowledge` +4. `agent/` +5. `user/` + +这样可以优先保证“当前上下文”和“历史相似 case”的相关性,不让通用知识淹没 case 信号。 + +## 约束建议 + +- 不要把所有原始资料直接写入 `soc/knowledge` +- `soc/process` 默认应该设置清理策略 +- 长期稳定内容再写入 `soc/knowledge` 或 `soc/case` +- Obsidian 只存人工可维护的摘要和结构化沉淀,不做全量原文仓 diff --git a/docs/poc-scope.md b/docs/poc-scope.md new file mode 100644 index 0000000..c86a304 --- /dev/null +++ b/docs/poc-scope.md @@ -0,0 +1,130 @@ +# POC Scope + +## 目标 + +第一阶段 POC 只验证一件事: + +**高价值记忆抽取 + 相似 case / 知识召回,是否能有效提升 SOC case 研判效率和质量。** + +## POC 范围 + +### 聚焦 case 类型 + +建议只选 1 到 2 类典型场景: + +1. 钓鱼邮件 / 恶意附件 +2. O365 异常登录 / 疑似账号被盗 + +原因: + +- 数据可获得性较高 +- 历史 case 重用价值高 +- playbook / KB 通常较完整 +- 便于定义“相似 case 命中率” + +## 第一阶段只接入的数据 + +### 必接 + +- 历史 case +- KB +- Playbook + +### 可选接入 + +- 月报摘要 +- 报告摘要 + +### 暂不接入 + +- ticket system 双向同步 +- 全量情报系统自动拉取 +- 全量报告原文 +- 大规模 process trace 持久化 +- analyst 偏好个性化 + +## 第一阶段要做的能力 + +### 必做 + +- 历史 case 导入 +- KB / Playbook 导入 +- 高价值信息抽取 +- 基于当前 case 的相关上下文检索 +- case 总结沉淀 +- 结构化写回 OpenViking +- 生成 Obsidian case note + +### 第二阶段再做 + +- EverMemOS 长期整理自动化 +- 更复杂的去重和衰减 +- 多数据源自动同步 +- 多 agent 协同策略优化 + +## 不做的事情 + +为了保证 POC 可落地,第一阶段明确不做: + +- 泛化的企业级记忆平台 +- 所有原始数据全量入库 +- 全量全文检索系统重构 +- 覆盖所有 SOC 告警类型 +- 复杂权限系统 +- 完整的在线标注平台 + +## 交付物 + +第一阶段建议交付: + +1. 可运行的 memory gateway +2. 一批可导入的历史 case 与 KB / Playbook 样本 +3. 最小的 ingest / retrieve / summarize / commit 闭环 +4. Obsidian 模板和样例 note +5. 一份 baseline 与 POC 对比评估结果 + +## 2 到 4 周实施建议 + +### 第 1 周 + +- 冻结 POC 范围 +- 整理样本数据 +- 完成数据模型与 namespace 约定 +- 建好 Obsidian 模板 + +### 第 2 周 + +- 完成历史 case / KB 导入脚本 +- 完成 `retrieve_context_skill` +- 接通 OpenViking 的 `soc/case` 和 `soc/knowledge` + +### 第 3 周 + +- 完成 `summarize_case_skill` +- 完成 `commit_memory_skill` +- 输出标准 case note 到 Obsidian + +### 第 4 周 + +- 跑评估脚本 +- 做人工 review +- 收敛下一阶段需求 + +## 评估指标 + +建议至少跟踪以下指标: + +- 相似 case 命中率 +- 检索上下文相关性 +- 平均研判时间 +- 最终结论准确率 +- 人工满意度 + +## 验收标准 + +POC 第一阶段可以认为成功,当同时满足: + +- 能稳定召回相关历史 case 或知识 +- 能辅助生成结构化 case note +- 人工评估认为上下文质量有明显提升 +- 没有因为“塞入太多资料”导致检索明显劣化 diff --git a/docs/sample-data-spec.md b/docs/sample-data-spec.md new file mode 100644 index 0000000..05344c7 --- /dev/null +++ b/docs/sample-data-spec.md @@ -0,0 +1,188 @@ +# Sample Data Spec + +## 目标 + +这个文档定义 SOC Memory POC 在无真实数据阶段使用的 mock 数据格式,用于: + +- 验证 ingestion pipeline +- 验证标准化脚本 +- 验证 context retrieval +- 验证 case summary 与 memory commit 流程 + +当前只覆盖两类场景: + +- 钓鱼邮件 +- O365 异常登录 / 疑似账号被盗 + +## 目录约定 + +```text +evaluation/datasets/ +├── mock_cases/ +│ ├── phishing/ +│ └── o365_suspicious_login/ +└── mock_kb/ + ├── playbooks/ + ├── kb/ + └── reports/ +``` + +## Mock Case 原始格式 + +每个 case 使用一个 JSON 文件,文件名建议: + +```text +.json +``` + +### 字段定义 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---:|---| +| `case_id` | string | 是 | case 唯一 ID | +| `title` | string | 是 | 简短标题 | +| `scenario` | string | 是 | `phishing` 或 `o365_suspicious_login` | +| `alert_type` | string | 是 | 告警类型 | +| `severity` | string | 是 | `low` / `medium` / `high` / `critical` | +| `status` | string | 是 | `confirmed` / `false_positive` / `pending` | +| `time_window` | object | 是 | 开始和结束时间 | +| `summary` | string | 是 | 一句话摘要 | +| `alert_source` | string | 是 | 告警来源系统 | +| `entities` | object | 是 | 关键实体 | +| `observables` | object | 否 | IOC/可观测对象 | +| `evidence` | array | 是 | 关键证据列表 | +| `investigation_steps` | array | 是 | 关键调查步骤 | +| `conclusion` | object | 是 | 研判结论 | +| `related_refs` | object | 否 | 相关 KB / playbook / case | +| `lessons_learned` | array | 否 | 复用经验 | +| `tags` | array | 否 | 标签 | + +### 示例骨架 + +```json +{ + "case_id": "CASE-2026-0001", + "title": "Potential phishing email targeting finance user", + "scenario": "phishing", + "alert_type": "mail_suspicious_attachment", + "severity": "high", + "status": "confirmed", + "time_window": { + "start": "2026-04-01T09:10:00+08:00", + "end": "2026-04-01T11:30:00+08:00" + }, + "summary": "Finance user received an invoice-themed phishing email with a malicious HTML attachment.", + "alert_source": "Secure Email Gateway", + "entities": { + "users": ["alice@corp.example"], + "hosts": ["FIN-LAPTOP-12"], + "mailboxes": ["alice@corp.example"] + }, + "observables": { + "sender_emails": ["billing@vendor-payments.com"], + "domains": ["vendor-payments.com"], + "urls": ["https://vendor-payments-login.com/review"], + "hashes": ["sha256:..."], + "ips": ["198.51.100.20"] + }, + "evidence": [ + "The sender domain was newly observed and failed DMARC.", + "The attachment redirected the user to a credential harvesting page." + ], + "investigation_steps": [ + "Validate sender reputation and authentication results.", + "Detonate attachment in sandbox.", + "Check click telemetry and account sign-in logs." + ], + "conclusion": { + "verdict": "true_positive", + "reason": "Multiple aligned phishing indicators and confirmed click behavior.", + "recommended_actions": [ + "Reset the impacted account password.", + "Block the sender domain and landing URL." + ] + }, + "related_refs": { + "playbooks": ["PB-PHISH-001"], + "kb": ["KB-PHISH-HEADER-CHECK"], + "cases": [] + }, + "lessons_learned": [ + "Invoice-themed phishing remains effective against finance users." + ], + "tags": ["phishing", "email", "credential-harvest"] +} +``` + +## Mock KB / Playbook 原始格式 + +每个知识条目使用一个 JSON 文件,文件名建议: + +```text +.json +``` + +### 字段定义 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---:|---| +| `doc_id` | string | 是 | 文档唯一 ID | +| `doc_type` | string | 是 | `kb` / `playbook` / `report_summary` | +| `title` | string | 是 | 标题 | +| `scenario` | string | 是 | 适用场景 | +| `summary` | string | 是 | 核心摘要 | +| `applicability` | array | 否 | 适用条件 | +| `key_points` | array | 是 | 核心知识点 | +| `investigation_guidance` | array | 否 | 调查建议 | +| `decision_points` | array | 否 | 判定关键点 | +| `related_entities` | object | 否 | 相关实体/TTP/IOC | +| `related_refs` | object | 否 | 相关文档 | +| `tags` | array | 否 | 标签 | +| `updated_at` | string | 否 | 更新时间 | + +## 标准化输出目标 + +### 标准化后的 Case 结构 + +标准化脚本输出建议字段: + +- `id` +- `memory_type` = `case` +- `scenario` +- `title` +- `abstract` +- `verdict` +- `severity` +- `entities` +- `observables` +- `evidence` +- `patterns` +- `related_refs` +- `source_path` +- `tags` + +### 标准化后的 KB 结构 + +标准化脚本输出建议字段: + +- `id` +- `memory_type` = `knowledge` +- `doc_type` +- `scenario` +- `title` +- `abstract` +- `key_points` +- `investigation_guidance` +- `decision_points` +- `related_refs` +- `source_path` +- `tags` + +## 检索测试建议 + +在 mock 数据阶段,优先验证: + +- 钓鱼 case 是否能召回 phishing playbook 和相似 phishing case +- O365 登录异常 case 是否能召回登录异常 KB 和相似 case +- 真报与误报 case 是否能被区分并保留不同模式 +- 召回结果是否包含关键 evidence / decision points diff --git a/docs/system-positioning.md b/docs/system-positioning.md new file mode 100644 index 0000000..b0764ad --- /dev/null +++ b/docs/system-positioning.md @@ -0,0 +1,68 @@ +# System Positioning + +## 当前项目定位 + +`memory_gateway` 不是完整的 SOC 记忆系统,而是整套方案里的统一上下文入口层。 + +它当前承担的职责是: + +- 为 AI agent 提供统一的 MCP / REST 访问入口 +- 将检索和写入请求转发给 OpenViking +- 提供基础鉴权、协议兼容和网关能力 +- 作为多 agent 共享记忆体系的最薄接入层 + +它不直接承担以下职责: + +- 原始知识源的批量导入 +- 高价值记忆抽取和筛选 +- Obsidian Vault 的人工知识沉淀 +- EverMemOS 的长期记忆整理与演化 +- 评估数据集与实验流程管理 + +## 在整套 SOC 记忆系统中的位置 + +```text +SOC 数据源 +KB / Playbook / 月报 / 报告 / Ticket / Intel / 历史 Case + | + v +Skills / Pipeline +ingest / extract / classify / summarize / commit / prune + | + v +memory_gateway +统一入口层(MCP / REST / Auth / Routing) + | + v +OpenViking +统一 context / memory / resource / skill 层 + | | + v v +Obsidian Vault EverMemOS +人工沉淀层 长期整理层 +``` + +## 下一阶段模块建议 + +建议把后续 POC 能力分成以下模块: + +- `docs/` + 保存系统设计、数据模型、命名空间规范 +- `poc/skills/` + 保存检索、抽取、沉淀相关的 skills +- `poc/pipeline/` + 保存接入 ticket、intel、历史 case 的导入流程 +- `poc/obsidian-vault/` + 保存人工维护知识和 case note 模板 +- `poc/evermemos/` + 保存长期记忆整理逻辑和策略 +- `poc/evaluation/` + 保存数据集、评估脚本和结果 + +## 当前仓库边界建议 + +建议继续把本仓库控制在“网关项目”边界内: + +- 保留:服务入口、OpenViking 接入、配置、协议、测试 +- 新增:系统设计文档、POC 骨架目录 +- 不建议继续堆积:大量业务规则、海量导入脚本、Vault 内容本体 diff --git a/evaluation/README.md b/evaluation/README.md new file mode 100644 index 0000000..15eb464 --- /dev/null +++ b/evaluation/README.md @@ -0,0 +1,12 @@ +# Evaluation + +这个目录用于保存 POC 评估相关内容。 + +建议评估指标: + +- 相似 case 命中率 +- 研判时间缩短比例 +- 结论准确率 +- 人工满意度 + +建议 POC 先聚焦 1 到 2 类 SOC case。 diff --git a/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1001.json b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1001.json new file mode 100644 index 0000000..8c0e8c4 --- /dev/null +++ b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1001.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-1001", + "title": "Impossible travel login followed by MFA prompt fatigue", + "scenario": "o365_suspicious_login", + "alert_type": "azuread_impossible_travel", + "severity": "high", + "status": "confirmed", + "time_window": {"start": "2026-04-02T22:10:00+08:00", "end": "2026-04-02T23:30:00+08:00"}, + "summary": "User account showed impossible travel between Shanghai and Amsterdam, followed by repeated MFA prompts and successful sign-in.", + "alert_source": "Microsoft Entra ID", + "entities": {"users": ["david@corp.example"], "hosts": ["WS-DAVID-01"], "mailboxes": ["david@corp.example"]}, + "observables": {"ips": ["203.0.113.150", "198.51.100.61"], "domains": [], "urls": [], "hashes": []}, + "evidence": ["Two successful sign-ins from geographically impossible locations within 15 minutes.", "MFA challenge volume increased abnormally before final success.", "User confirmed they did not initiate overseas login."], + "investigation_steps": ["Review sign-in logs and device IDs.", "Check MFA event sequence.", "Validate user travel status with manager."], + "conclusion": {"verdict": "true_positive", "reason": "Impossible travel plus user denial and MFA fatigue pattern.", "recommended_actions": ["Revoke sessions and reset credentials.", "Review mailbox rules and app consent."]}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "kb": ["KB-O365-IMPOSSIBLE-TRAVEL", "KB-O365-MFA-FATIGUE"], "cases": []}, + "lessons_learned": ["Impossible travel needs to be combined with user confirmation and MFA telemetry."], + "tags": ["o365", "login", "impossible-travel", "mfa-fatigue"] +} diff --git a/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1002.json b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1002.json new file mode 100644 index 0000000..641f2bc --- /dev/null +++ b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1002.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-1002", + "title": "Legacy protocol sign-in from unfamiliar IP blocked by policy", + "scenario": "o365_suspicious_login", + "alert_type": "azuread_legacy_auth_attempt", + "severity": "medium", + "status": "false_positive", + "time_window": {"start": "2026-04-04T07:50:00+08:00", "end": "2026-04-04T08:10:00+08:00"}, + "summary": "Legacy authentication attempt from a cloud IP was blocked; investigation tied it to an approved migration tool test.", + "alert_source": "Microsoft Entra ID", + "entities": {"users": ["svc-migration@corp.example"], "hosts": [], "mailboxes": ["svc-migration@corp.example"]}, + "observables": {"ips": ["192.0.2.24"], "domains": [], "urls": [], "hashes": []}, + "evidence": ["The account is a known migration service account.", "Source IP matched approved cloud migration vendor range.", "No successful sign-in occurred due to policy block."], + "investigation_steps": ["Review service account inventory.", "Check change ticket for migration activity.", "Validate source IP against vendor allowlist."], + "conclusion": {"verdict": "false_positive", "reason": "Expected migration tool behavior with policy block and approved change window.", "recommended_actions": ["Tune alert suppression for approved migration windows."]}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "kb": ["KB-O365-LEGACY-AUTH"], "cases": []}, + "lessons_learned": ["Service account context is essential before escalating legacy auth alerts."], + "tags": ["o365", "login", "false-positive", "legacy-auth"] +} diff --git a/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1003.json b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1003.json new file mode 100644 index 0000000..7d39edd --- /dev/null +++ b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1003.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-1003", + "title": "Suspicious inbox rule creation after successful foreign login", + "scenario": "o365_suspicious_login", + "alert_type": "azuread_suspicious_inbox_rule_after_login", + "severity": "high", + "status": "confirmed", + "time_window": {"start": "2026-04-06T19:20:00+08:00", "end": "2026-04-06T20:45:00+08:00"}, + "summary": "An overseas sign-in to Microsoft 365 was followed by inbox rule creation to hide finance-related emails.", + "alert_source": "Microsoft Defender for Cloud Apps", + "entities": {"users": ["emma@corp.example"], "hosts": ["WS-EMMA-07"], "mailboxes": ["emma@corp.example"]}, + "observables": {"ips": ["198.51.100.98"], "domains": [], "urls": [], "hashes": []}, + "evidence": ["Successful sign-in from untrusted ASN.", "Inbox rule moved wire transfer emails to RSS Feeds folder.", "Mailbox audit showed rule creation minutes after login."], + "investigation_steps": ["Review mailbox audit logs.", "Export suspicious inbox rules.", "Check for OAuth app consent and forwarding settings."], + "conclusion": {"verdict": "true_positive", "reason": "Account compromise indicators plus malicious inbox rule persistence.", "recommended_actions": ["Remove malicious rules.", "Reset account and revoke refresh tokens."]}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "kb": ["KB-O365-INBOX-RULE-ABUSE", "KB-O365-IMPOSSIBLE-TRAVEL"], "cases": []}, + "lessons_learned": ["Mailbox rule inspection should be default for suspicious O365 login cases."], + "tags": ["o365", "login", "inbox-rule", "account-compromise"] +} diff --git a/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1004.json b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1004.json new file mode 100644 index 0000000..35f5a8a --- /dev/null +++ b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1004.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-1004", + "title": "Multiple failed logins from residential proxy but no successful access", + "scenario": "o365_suspicious_login", + "alert_type": "azuread_password_spray_attempt", + "severity": "medium", + "status": "pending", + "time_window": {"start": "2026-04-08T02:00:00+08:00", "end": "2026-04-08T03:10:00+08:00"}, + "summary": "Repeated failed Microsoft 365 sign-in attempts targeted one user from a residential proxy network, with no successful authentication observed.", + "alert_source": "Microsoft Entra ID", + "entities": {"users": ["frank@corp.example"], "hosts": [], "mailboxes": ["frank@corp.example"]}, + "observables": {"ips": ["203.0.113.201"], "domains": [], "urls": [], "hashes": []}, + "evidence": ["High-volume failed attempts over a short period.", "Source IP attributed to a residential proxy provider.", "No matching successful sign-in or MFA event found."], + "investigation_steps": ["Check password spray pattern across tenant.", "Confirm user recent password reset history.", "Review conditional access outcomes."], + "conclusion": {"verdict": "uncertain", "reason": "Suspicious authentication pattern but no confirmed access or downstream activity.", "recommended_actions": ["Monitor account closely.", "Consider temporary sign-in risk remediation."]}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "kb": ["KB-O365-IMPOSSIBLE-TRAVEL"], "cases": []}, + "lessons_learned": ["Pending cases should still capture reusable spray indicators without overcommitting verdict."], + "tags": ["o365", "login", "password-spray", "pending"] +} diff --git a/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1005.json b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1005.json new file mode 100644 index 0000000..ea06b3b --- /dev/null +++ b/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1005.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-1005", + "title": "Traveling executive triggered impossible travel but activity was legitimate", + "scenario": "o365_suspicious_login", + "alert_type": "azuread_impossible_travel", + "severity": "medium", + "status": "false_positive", + "time_window": {"start": "2026-04-09T09:00:00+08:00", "end": "2026-04-09T09:40:00+08:00"}, + "summary": "Executive account triggered impossible travel due to corporate VPN exit node while the user was on an approved overseas trip.", + "alert_source": "Microsoft Entra ID", + "entities": {"users": ["grace@corp.example"], "hosts": ["VIP-LAPTOP-01"], "mailboxes": ["grace@corp.example"]}, + "observables": {"ips": ["192.0.2.90", "203.0.113.77"], "domains": [], "urls": [], "hashes": []}, + "evidence": ["Approved travel request existed.", "One login originated from corporate VPN exit node.", "Device and user agent were consistent with known user profile."], + "investigation_steps": ["Check travel approval and itinerary.", "Review VPN egress mapping.", "Compare user agent and managed device posture."], + "conclusion": {"verdict": "false_positive", "reason": "Legitimate travel combined with VPN routing caused impossible travel signal.", "recommended_actions": ["Document travel context and improve analyst checklist."]}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "kb": ["KB-O365-IMPOSSIBLE-TRAVEL"], "cases": []}, + "lessons_learned": ["Impossible travel should consider approved travel and VPN topology before escalation."], + "tags": ["o365", "login", "false-positive", "travel"] +} diff --git a/evaluation/datasets/mock_cases/phishing/CASE-2026-0001.json b/evaluation/datasets/mock_cases/phishing/CASE-2026-0001.json new file mode 100644 index 0000000..e0ac927 --- /dev/null +++ b/evaluation/datasets/mock_cases/phishing/CASE-2026-0001.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-0001", + "title": "Finance user received invoice-themed phishing email", + "scenario": "phishing", + "alert_type": "mail_suspicious_attachment", + "severity": "high", + "status": "confirmed", + "time_window": {"start": "2026-04-01T09:10:00+08:00", "end": "2026-04-01T11:30:00+08:00"}, + "summary": "Finance user received an invoice-themed phishing email containing a malicious HTML attachment that redirected to a credential harvesting page.", + "alert_source": "Secure Email Gateway", + "entities": {"users": ["alice@corp.example"], "hosts": ["FIN-LAPTOP-12"], "mailboxes": ["alice@corp.example"]}, + "observables": {"sender_emails": ["billing@vendor-payments.com"], "domains": ["vendor-payments.com", "vendor-payments-login.com"], "urls": ["https://vendor-payments-login.com/review"], "ips": ["198.51.100.20"], "hashes": ["sha256:phish0001"]}, + "evidence": ["Sender domain was newly observed and failed DMARC.", "Attachment redirected to a fake Microsoft 365 login page.", "User clicked the link before mail quarantine completed."], + "investigation_steps": ["Validate sender authentication results.", "Detonate HTML attachment in sandbox.", "Check mailbox click telemetry and account sign-in logs."], + "conclusion": {"verdict": "true_positive", "reason": "Aligned phishing indicators and confirmed click behavior.", "recommended_actions": ["Reset impacted account password.", "Block sender domain and landing URL.", "Hunt for similar emails in tenant."]}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "kb": ["KB-PHISH-HEADER-CHECK", "KB-CRED-HARVEST-PATTERNS"], "cases": []}, + "lessons_learned": ["Invoice lure remains effective against finance users."], + "tags": ["phishing", "email", "credential-harvest", "finance"] +} diff --git a/evaluation/datasets/mock_cases/phishing/CASE-2026-0002.json b/evaluation/datasets/mock_cases/phishing/CASE-2026-0002.json new file mode 100644 index 0000000..da3cf16 --- /dev/null +++ b/evaluation/datasets/mock_cases/phishing/CASE-2026-0002.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-0002", + "title": "Payroll notification email flagged but determined benign", + "scenario": "phishing", + "alert_type": "mail_suspicious_link", + "severity": "medium", + "status": "false_positive", + "time_window": {"start": "2026-04-03T08:40:00+08:00", "end": "2026-04-03T09:20:00+08:00"}, + "summary": "Payroll update email was flagged due to a shortened URL, but the destination was the approved HR vendor portal.", + "alert_source": "Secure Email Gateway", + "entities": {"users": ["bob@corp.example"], "hosts": ["HR-LAPTOP-03"], "mailboxes": ["bob@corp.example"]}, + "observables": {"sender_emails": ["notify@hr-vendor.example"], "domains": ["hr-vendor.example"], "urls": ["https://bit.ly/hr-portal-example"], "ips": [], "hashes": []}, + "evidence": ["Sender domain aligned with SPF and DKIM.", "Destination domain matched approved supplier inventory.", "No credential prompt anomaly observed."], + "investigation_steps": ["Expand shortened URL.", "Validate vendor domain against allowlist.", "Review prior communication pattern with HR users."], + "conclusion": {"verdict": "false_positive", "reason": "Trusted vendor communication with expected destination.", "recommended_actions": ["Tune mail rule to reduce noisy alerts for approved HR vendor."]}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "kb": ["KB-PHISH-HEADER-CHECK"], "cases": []}, + "lessons_learned": ["Short URLs alone should not drive phishing conclusion without destination validation."], + "tags": ["phishing", "email", "false-positive", "vendor"] +} diff --git a/evaluation/datasets/mock_cases/phishing/CASE-2026-0003.json b/evaluation/datasets/mock_cases/phishing/CASE-2026-0003.json new file mode 100644 index 0000000..fd171b0 --- /dev/null +++ b/evaluation/datasets/mock_cases/phishing/CASE-2026-0003.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-0003", + "title": "Executive impersonation email requested urgent wire transfer", + "scenario": "phishing", + "alert_type": "mail_bec_impersonation", + "severity": "high", + "status": "confirmed", + "time_window": {"start": "2026-04-05T13:15:00+08:00", "end": "2026-04-05T15:00:00+08:00"}, + "summary": "An executive impersonation email targeted finance staff with an urgent wire transfer request from a lookalike domain.", + "alert_source": "Secure Email Gateway", + "entities": {"users": ["carol@corp.example"], "hosts": ["FIN-LAPTOP-08"], "mailboxes": ["carol@corp.example"]}, + "observables": {"sender_emails": ["ceo@c0rp-example.com"], "domains": ["c0rp-example.com"], "urls": [], "ips": ["203.0.113.45"], "hashes": []}, + "evidence": ["Lookalike domain used numeric substitution.", "Language pressure matched prior BEC pattern.", "No historical communication from sender domain."], + "investigation_steps": ["Compare sender domain with corporate domain.", "Review historical communication graph.", "Confirm with executive assistant out of band."], + "conclusion": {"verdict": "true_positive", "reason": "Strong BEC indicators and confirmed spoofed sender identity.", "recommended_actions": ["Block sender domain.", "Notify finance team and update awareness content."]}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "kb": ["KB-CRED-HARVEST-PATTERNS"], "cases": []}, + "lessons_learned": ["Lookalike domains need strong entity normalization in retrieval and detection logic."], + "tags": ["phishing", "bec", "executive-impersonation"] +} diff --git a/evaluation/datasets/mock_cases/phishing/CASE-2026-0004.json b/evaluation/datasets/mock_cases/phishing/CASE-2026-0004.json new file mode 100644 index 0000000..76555ef --- /dev/null +++ b/evaluation/datasets/mock_cases/phishing/CASE-2026-0004.json @@ -0,0 +1,19 @@ +{ + "case_id": "CASE-2026-0004", + "title": "Shared mailbox received OneDrive lure with HTML attachment", + "scenario": "phishing", + "alert_type": "mail_suspicious_attachment", + "severity": "medium", + "status": "confirmed", + "time_window": {"start": "2026-04-07T10:00:00+08:00", "end": "2026-04-07T12:05:00+08:00"}, + "summary": "Shared finance mailbox received a fake OneDrive notification with an HTML attachment that led to credential collection.", + "alert_source": "Secure Email Gateway", + "entities": {"users": ["shared-finance@corp.example"], "hosts": [], "mailboxes": ["shared-finance@corp.example"]}, + "observables": {"sender_emails": ["noreply@sharepoint-notify.com"], "domains": ["sharepoint-notify.com"], "urls": ["https://onedrive-review-login.example"], "ips": ["198.51.100.87"], "hashes": ["sha256:phish0004"]}, + "evidence": ["Attachment rendered a fake Microsoft sign-in page.", "Landing page hosted outside Microsoft IP space.", "Mail body reused branding from previous phishing campaign."], + "investigation_steps": ["Render attachment safely.", "Review URL hosting provider reputation.", "Search tenant for same subject and sender."], + "conclusion": {"verdict": "true_positive", "reason": "Credential harvesting lure with campaign reuse indicators.", "recommended_actions": ["Block sender and URL.", "Search and purge duplicate emails."]}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "kb": ["KB-CRED-HARVEST-PATTERNS"], "cases": ["CASE-2026-0001"]}, + "lessons_learned": ["Campaign reuse makes historical phishing similarity especially valuable."], + "tags": ["phishing", "email", "onedrive-lure"] +} diff --git a/evaluation/datasets/mock_kb/kb/KB-CRED-HARVEST-PATTERNS.json b/evaluation/datasets/mock_kb/kb/KB-CRED-HARVEST-PATTERNS.json new file mode 100644 index 0000000..bb6e816 --- /dev/null +++ b/evaluation/datasets/mock_kb/kb/KB-CRED-HARVEST-PATTERNS.json @@ -0,0 +1,15 @@ +{ + "doc_id": "KB-CRED-HARVEST-PATTERNS", + "doc_type": "kb", + "title": "Credential Harvesting Indicators", + "scenario": "phishing", + "summary": "Common indicators that a phishing case involves credential harvesting rather than simple spam or benign mail.", + "applicability": ["mail_suspicious_attachment", "mail_suspicious_link"], + "key_points": ["Landing page mimics Microsoft 365 or common SaaS login pages.", "HTML attachment often acts as a redirector rather than containing malware.", "Credential harvest campaigns frequently reuse branding and lures across tenants."], + "investigation_guidance": ["Capture full redirect chain.", "Look for post-click login anomalies in identity logs.", "Search for same lure across multiple mailboxes."], + "decision_points": ["User click plus sign-in anomaly greatly increases confidence.", "Branding reuse can help link separate phishing cases into one campaign."], + "related_entities": {"ttps": ["T1566.002"], "iocs": []}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "cases": []}, + "tags": ["kb", "phishing", "credential-harvest"], + "updated_at": "2026-04-10T09:25:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/kb/KB-O365-IMPOSSIBLE-TRAVEL.json b/evaluation/datasets/mock_kb/kb/KB-O365-IMPOSSIBLE-TRAVEL.json new file mode 100644 index 0000000..94f65d7 --- /dev/null +++ b/evaluation/datasets/mock_kb/kb/KB-O365-IMPOSSIBLE-TRAVEL.json @@ -0,0 +1,15 @@ +{ + "doc_id": "KB-O365-IMPOSSIBLE-TRAVEL", + "doc_type": "kb", + "title": "Interpreting O365 Impossible Travel Alerts", + "scenario": "o365_suspicious_login", + "summary": "Guidance for validating impossible travel alerts, including VPN, proxy, and approved travel false-positive conditions.", + "applicability": ["azuread_impossible_travel"], + "key_points": ["Impossible travel must be validated against user travel context.", "VPN egress and cloud proxy routing are common false-positive sources.", "Pair sign-in anomaly with MFA, mailbox, or device anomalies before concluding compromise."], + "investigation_guidance": ["Validate source ASN and IP history.", "Check user-approved travel or remote work context.", "Compare device ID and user agent consistency."], + "decision_points": ["User denial of travel plus new device strongly increases confidence.", "Approved travel and trusted VPN topology reduce confidence."], + "related_entities": {"ttps": ["T1078"], "iocs": []}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "cases": []}, + "tags": ["kb", "o365", "impossible-travel"], + "updated_at": "2026-04-10T09:30:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/kb/KB-O365-INBOX-RULE-ABUSE.json b/evaluation/datasets/mock_kb/kb/KB-O365-INBOX-RULE-ABUSE.json new file mode 100644 index 0000000..e9fa812 --- /dev/null +++ b/evaluation/datasets/mock_kb/kb/KB-O365-INBOX-RULE-ABUSE.json @@ -0,0 +1,15 @@ +{ + "doc_id": "KB-O365-INBOX-RULE-ABUSE", + "doc_type": "kb", + "title": "Inbox Rule Abuse After Account Compromise", + "scenario": "o365_suspicious_login", + "summary": "Common mailbox persistence behaviors after O365 account compromise, especially rule creation to hide or forward finance emails.", + "applicability": ["azuread_suspicious_inbox_rule_after_login"], + "key_points": ["Attackers often hide financial emails using move-to-folder rules.", "Forwarding and delete rules are strong post-compromise indicators.", "Mailbox audit logs should be reviewed immediately after suspicious login confirmation."], + "investigation_guidance": ["Enumerate all inbox rules and forwarding settings.", "Check mailbox audit timeline around suspicious sign-in.", "Review OAuth consents if inbox rules are absent but suspicious mail actions continue."], + "decision_points": ["Inbox rule creation shortly after suspicious login strongly supports compromise verdict."], + "related_entities": {"ttps": ["T1114"], "iocs": []}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "cases": []}, + "tags": ["kb", "o365", "inbox-rule"], + "updated_at": "2026-04-10T09:40:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/kb/KB-O365-MFA-FATIGUE.json b/evaluation/datasets/mock_kb/kb/KB-O365-MFA-FATIGUE.json new file mode 100644 index 0000000..21ee9b7 --- /dev/null +++ b/evaluation/datasets/mock_kb/kb/KB-O365-MFA-FATIGUE.json @@ -0,0 +1,15 @@ +{ + "doc_id": "KB-O365-MFA-FATIGUE", + "doc_type": "kb", + "title": "MFA Fatigue Detection Notes", + "scenario": "o365_suspicious_login", + "summary": "Patterns for identifying MFA fatigue / push bombing during account compromise attempts.", + "applicability": ["azuread_impossible_travel", "azuread_suspicious_login"], + "key_points": ["Repeated MFA prompts preceding one successful prompt is suspicious.", "User-reported prompt fatigue is strong supporting evidence.", "MFA fatigue is often coupled with credential theft rather than password spray alone."], + "investigation_guidance": ["Review MFA event counts and timing.", "Check if the user acknowledged unexpected prompts.", "Look for subsequent session hijacking or mailbox abuse."], + "decision_points": ["Prompt flood plus user denial usually warrants immediate containment."], + "related_entities": {"ttps": ["T1621"], "iocs": []}, + "related_refs": {"playbooks": ["PB-O365-LOGIN-001"], "cases": []}, + "tags": ["kb", "o365", "mfa-fatigue"], + "updated_at": "2026-04-10T09:35:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/kb/KB-PHISH-HEADER-CHECK.json b/evaluation/datasets/mock_kb/kb/KB-PHISH-HEADER-CHECK.json new file mode 100644 index 0000000..3f38301 --- /dev/null +++ b/evaluation/datasets/mock_kb/kb/KB-PHISH-HEADER-CHECK.json @@ -0,0 +1,15 @@ +{ + "doc_id": "KB-PHISH-HEADER-CHECK", + "doc_type": "kb", + "title": "Phishing Header Validation Checklist", + "scenario": "phishing", + "summary": "Checklist for validating sender identity, domain reputation, and authentication results in suspected phishing emails.", + "applicability": ["mail_suspicious_attachment", "mail_suspicious_link", "mail_bec_impersonation"], + "key_points": ["Review SPF, DKIM, and DMARC alignment.", "Compare display name, envelope sender, and reply-to anomalies.", "Check domain age and known-good communication history."], + "investigation_guidance": ["Use message trace and header parser.", "Compare sender domain with vendor allowlist.", "Escalate lookalike domains even when content appears business-relevant."], + "decision_points": ["Newly observed domains with failed auth are high-risk.", "Benign vendor mail often has consistent historical sending patterns."], + "related_entities": {"ttps": ["T1566.001"], "iocs": []}, + "related_refs": {"playbooks": ["PB-PHISH-001"], "cases": []}, + "tags": ["kb", "phishing", "email-header"], + "updated_at": "2026-04-10T09:20:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/playbooks/PB-O365-LOGIN-001.json b/evaluation/datasets/mock_kb/playbooks/PB-O365-LOGIN-001.json new file mode 100644 index 0000000..02b8bef --- /dev/null +++ b/evaluation/datasets/mock_kb/playbooks/PB-O365-LOGIN-001.json @@ -0,0 +1,15 @@ +{ + "doc_id": "PB-O365-LOGIN-001", + "doc_type": "playbook", + "title": "O365 Suspicious Login Investigation Playbook", + "scenario": "o365_suspicious_login", + "summary": "Standard investigation steps for suspicious Entra ID sign-ins, impossible travel, MFA abuse, and follow-on mailbox abuse.", + "applicability": ["azuread_impossible_travel", "azuread_legacy_auth_attempt", "azuread_suspicious_inbox_rule_after_login", "azuread_password_spray_attempt"], + "key_points": ["Confirm user travel and business context.", "Review sign-in logs, device IDs, and user agents.", "Inspect downstream actions such as inbox rules, app consent, and forwarding."], + "investigation_guidance": ["Correlate MFA telemetry with sign-in sequence.", "Check risky sign-ins and risky users views.", "Revoke sessions and reset credentials when compromise is confirmed."], + "decision_points": ["Impossible travel alone is insufficient without corroborating evidence.", "Inbox rule creation after foreign login strongly increases confidence of compromise."], + "related_entities": {"ttps": ["T1078"], "iocs": []}, + "related_refs": {"kb": ["KB-O365-IMPOSSIBLE-TRAVEL", "KB-O365-MFA-FATIGUE", "KB-O365-INBOX-RULE-ABUSE"], "cases": []}, + "tags": ["playbook", "o365", "login"], + "updated_at": "2026-04-10T09:10:00+08:00" +} diff --git a/evaluation/datasets/mock_kb/playbooks/PB-PHISH-001.json b/evaluation/datasets/mock_kb/playbooks/PB-PHISH-001.json new file mode 100644 index 0000000..560783e --- /dev/null +++ b/evaluation/datasets/mock_kb/playbooks/PB-PHISH-001.json @@ -0,0 +1,15 @@ +{ + "doc_id": "PB-PHISH-001", + "doc_type": "playbook", + "title": "Phishing Email Investigation Playbook", + "scenario": "phishing", + "summary": "Standard investigation steps for suspicious email, credential harvesting, and BEC-like cases.", + "applicability": ["mail_suspicious_attachment", "mail_suspicious_link", "mail_bec_impersonation"], + "key_points": ["Validate sender authentication results.", "Inspect landing URL and attachment behavior.", "Check whether the user clicked or submitted credentials."], + "investigation_guidance": ["Query email telemetry for same sender, subject, or URL.", "Review mailbox click logs and endpoint browser artifacts.", "Reset credentials if submission is suspected."], + "decision_points": ["If sender auth fails and user interaction exists, treat as likely phishing.", "If destination is allowlisted and communication pattern is expected, investigate false positive path."], + "related_entities": {"ttps": ["T1566"], "iocs": []}, + "related_refs": {"kb": ["KB-PHISH-HEADER-CHECK", "KB-CRED-HARVEST-PATTERNS"], "cases": []}, + "tags": ["playbook", "phishing", "email"], + "updated_at": "2026-04-10T09:00:00+08:00" +} diff --git a/evaluation/datasets/normalized_cases/CASE-2026-0001.json b/evaluation/datasets/normalized_cases/CASE-2026-0001.json new file mode 100644 index 0000000..901e559 --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-0001.json @@ -0,0 +1,65 @@ +{ + "id": "CASE-2026-0001", + "memory_type": "case", + "scenario": "phishing", + "title": "Finance user received invoice-themed phishing email", + "abstract": "Finance user received an invoice-themed phishing email containing a malicious HTML attachment that redirected to a credential harvesting page.", + "verdict": "true_positive", + "severity": "high", + "entities": { + "users": [ + "alice@corp.example" + ], + "hosts": [ + "FIN-LAPTOP-12" + ], + "mailboxes": [ + "alice@corp.example" + ] + }, + "observables": { + "sender_emails": [ + "billing@vendor-payments.com" + ], + "domains": [ + "vendor-payments.com", + "vendor-payments-login.com" + ], + "urls": [ + "https://vendor-payments-login.com/review" + ], + "ips": [ + "198.51.100.20" + ], + "hashes": [ + "sha256:phish0001" + ] + }, + "evidence": [ + "Sender domain was newly observed and failed DMARC.", + "Attachment redirected to a fake Microsoft 365 login page.", + "User clicked the link before mail quarantine completed." + ], + "patterns": [ + "verdict:true_positive", + "scenario:phishing", + "alert_type:mail_suspicious_attachment" + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "kb": [ + "KB-PHISH-HEADER-CHECK", + "KB-CRED-HARVEST-PATTERNS" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/phishing/CASE-2026-0001.json", + "tags": [ + "phishing", + "email", + "credential-harvest", + "finance" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-0002.json b/evaluation/datasets/normalized_cases/CASE-2026-0002.json new file mode 100644 index 0000000..28597bc --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-0002.json @@ -0,0 +1,59 @@ +{ + "id": "CASE-2026-0002", + "memory_type": "case", + "scenario": "phishing", + "title": "Payroll notification email flagged but determined benign", + "abstract": "Payroll update email was flagged due to a shortened URL, but the destination was the approved HR vendor portal.", + "verdict": "false_positive", + "severity": "medium", + "entities": { + "users": [ + "bob@corp.example" + ], + "hosts": [ + "HR-LAPTOP-03" + ], + "mailboxes": [ + "bob@corp.example" + ] + }, + "observables": { + "sender_emails": [ + "notify@hr-vendor.example" + ], + "domains": [ + "hr-vendor.example" + ], + "urls": [ + "https://bit.ly/hr-portal-example" + ], + "ips": [], + "hashes": [] + }, + "evidence": [ + "Sender domain aligned with SPF and DKIM.", + "Destination domain matched approved supplier inventory.", + "No credential prompt anomaly observed." + ], + "patterns": [ + "verdict:false_positive", + "scenario:phishing", + "alert_type:mail_suspicious_link" + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "kb": [ + "KB-PHISH-HEADER-CHECK" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/phishing/CASE-2026-0002.json", + "tags": [ + "phishing", + "email", + "false-positive", + "vendor" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-0003.json b/evaluation/datasets/normalized_cases/CASE-2026-0003.json new file mode 100644 index 0000000..7a9baed --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-0003.json @@ -0,0 +1,58 @@ +{ + "id": "CASE-2026-0003", + "memory_type": "case", + "scenario": "phishing", + "title": "Executive impersonation email requested urgent wire transfer", + "abstract": "An executive impersonation email targeted finance staff with an urgent wire transfer request from a lookalike domain.", + "verdict": "true_positive", + "severity": "high", + "entities": { + "users": [ + "carol@corp.example" + ], + "hosts": [ + "FIN-LAPTOP-08" + ], + "mailboxes": [ + "carol@corp.example" + ] + }, + "observables": { + "sender_emails": [ + "ceo@c0rp-example.com" + ], + "domains": [ + "c0rp-example.com" + ], + "urls": [], + "ips": [ + "203.0.113.45" + ], + "hashes": [] + }, + "evidence": [ + "Lookalike domain used numeric substitution.", + "Language pressure matched prior BEC pattern.", + "No historical communication from sender domain." + ], + "patterns": [ + "verdict:true_positive", + "scenario:phishing", + "alert_type:mail_bec_impersonation" + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "kb": [ + "KB-CRED-HARVEST-PATTERNS" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/phishing/CASE-2026-0003.json", + "tags": [ + "phishing", + "bec", + "executive-impersonation" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-0004.json b/evaluation/datasets/normalized_cases/CASE-2026-0004.json new file mode 100644 index 0000000..b178524 --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-0004.json @@ -0,0 +1,62 @@ +{ + "id": "CASE-2026-0004", + "memory_type": "case", + "scenario": "phishing", + "title": "Shared mailbox received OneDrive lure with HTML attachment", + "abstract": "Shared finance mailbox received a fake OneDrive notification with an HTML attachment that led to credential collection.", + "verdict": "true_positive", + "severity": "medium", + "entities": { + "users": [ + "shared-finance@corp.example" + ], + "hosts": [], + "mailboxes": [ + "shared-finance@corp.example" + ] + }, + "observables": { + "sender_emails": [ + "noreply@sharepoint-notify.com" + ], + "domains": [ + "sharepoint-notify.com" + ], + "urls": [ + "https://onedrive-review-login.example" + ], + "ips": [ + "198.51.100.87" + ], + "hashes": [ + "sha256:phish0004" + ] + }, + "evidence": [ + "Attachment rendered a fake Microsoft sign-in page.", + "Landing page hosted outside Microsoft IP space.", + "Mail body reused branding from previous phishing campaign." + ], + "patterns": [ + "verdict:true_positive", + "scenario:phishing", + "alert_type:mail_suspicious_attachment" + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "kb": [ + "KB-CRED-HARVEST-PATTERNS" + ], + "cases": [ + "CASE-2026-0001" + ] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/phishing/CASE-2026-0004.json", + "tags": [ + "phishing", + "email", + "onedrive-lure" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-1001.json b/evaluation/datasets/normalized_cases/CASE-2026-1001.json new file mode 100644 index 0000000..c4a78d6 --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-1001.json @@ -0,0 +1,56 @@ +{ + "id": "CASE-2026-1001", + "memory_type": "case", + "scenario": "o365_suspicious_login", + "title": "Impossible travel login followed by MFA prompt fatigue", + "abstract": "User account showed impossible travel between Shanghai and Amsterdam, followed by repeated MFA prompts and successful sign-in.", + "verdict": "true_positive", + "severity": "high", + "entities": { + "users": [ + "david@corp.example" + ], + "hosts": [ + "WS-DAVID-01" + ], + "mailboxes": [ + "david@corp.example" + ] + }, + "observables": { + "ips": [ + "203.0.113.150", + "198.51.100.61" + ], + "domains": [], + "urls": [], + "hashes": [] + }, + "evidence": [ + "Two successful sign-ins from geographically impossible locations within 15 minutes.", + "MFA challenge volume increased abnormally before final success.", + "User confirmed they did not initiate overseas login." + ], + "patterns": [ + "verdict:true_positive", + "scenario:o365_suspicious_login", + "alert_type:azuread_impossible_travel" + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "kb": [ + "KB-O365-IMPOSSIBLE-TRAVEL", + "KB-O365-MFA-FATIGUE" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1001.json", + "tags": [ + "o365", + "login", + "impossible-travel", + "mfa-fatigue" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-1002.json b/evaluation/datasets/normalized_cases/CASE-2026-1002.json new file mode 100644 index 0000000..e65f6b6 --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-1002.json @@ -0,0 +1,52 @@ +{ + "id": "CASE-2026-1002", + "memory_type": "case", + "scenario": "o365_suspicious_login", + "title": "Legacy protocol sign-in from unfamiliar IP blocked by policy", + "abstract": "Legacy authentication attempt from a cloud IP was blocked; investigation tied it to an approved migration tool test.", + "verdict": "false_positive", + "severity": "medium", + "entities": { + "users": [ + "svc-migration@corp.example" + ], + "hosts": [], + "mailboxes": [ + "svc-migration@corp.example" + ] + }, + "observables": { + "ips": [ + "192.0.2.24" + ], + "domains": [], + "urls": [], + "hashes": [] + }, + "evidence": [ + "The account is a known migration service account.", + "Source IP matched approved cloud migration vendor range.", + "No successful sign-in occurred due to policy block." + ], + "patterns": [ + "verdict:false_positive", + "scenario:o365_suspicious_login", + "alert_type:azuread_legacy_auth_attempt" + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "kb": [ + "KB-O365-LEGACY-AUTH" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1002.json", + "tags": [ + "o365", + "login", + "false-positive", + "legacy-auth" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-1003.json b/evaluation/datasets/normalized_cases/CASE-2026-1003.json new file mode 100644 index 0000000..0bd647e --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-1003.json @@ -0,0 +1,55 @@ +{ + "id": "CASE-2026-1003", + "memory_type": "case", + "scenario": "o365_suspicious_login", + "title": "Suspicious inbox rule creation after successful foreign login", + "abstract": "An overseas sign-in to Microsoft 365 was followed by inbox rule creation to hide finance-related emails.", + "verdict": "true_positive", + "severity": "high", + "entities": { + "users": [ + "emma@corp.example" + ], + "hosts": [ + "WS-EMMA-07" + ], + "mailboxes": [ + "emma@corp.example" + ] + }, + "observables": { + "ips": [ + "198.51.100.98" + ], + "domains": [], + "urls": [], + "hashes": [] + }, + "evidence": [ + "Successful sign-in from untrusted ASN.", + "Inbox rule moved wire transfer emails to RSS Feeds folder.", + "Mailbox audit showed rule creation minutes after login." + ], + "patterns": [ + "verdict:true_positive", + "scenario:o365_suspicious_login", + "alert_type:azuread_suspicious_inbox_rule_after_login" + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "kb": [ + "KB-O365-INBOX-RULE-ABUSE", + "KB-O365-IMPOSSIBLE-TRAVEL" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1003.json", + "tags": [ + "o365", + "login", + "inbox-rule", + "account-compromise" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-1004.json b/evaluation/datasets/normalized_cases/CASE-2026-1004.json new file mode 100644 index 0000000..88abef9 --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-1004.json @@ -0,0 +1,52 @@ +{ + "id": "CASE-2026-1004", + "memory_type": "case", + "scenario": "o365_suspicious_login", + "title": "Multiple failed logins from residential proxy but no successful access", + "abstract": "Repeated failed Microsoft 365 sign-in attempts targeted one user from a residential proxy network, with no successful authentication observed.", + "verdict": "uncertain", + "severity": "medium", + "entities": { + "users": [ + "frank@corp.example" + ], + "hosts": [], + "mailboxes": [ + "frank@corp.example" + ] + }, + "observables": { + "ips": [ + "203.0.113.201" + ], + "domains": [], + "urls": [], + "hashes": [] + }, + "evidence": [ + "High-volume failed attempts over a short period.", + "Source IP attributed to a residential proxy provider.", + "No matching successful sign-in or MFA event found." + ], + "patterns": [ + "verdict:uncertain", + "scenario:o365_suspicious_login", + "alert_type:azuread_password_spray_attempt" + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "kb": [ + "KB-O365-IMPOSSIBLE-TRAVEL" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1004.json", + "tags": [ + "o365", + "login", + "password-spray", + "pending" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_cases/CASE-2026-1005.json b/evaluation/datasets/normalized_cases/CASE-2026-1005.json new file mode 100644 index 0000000..8a5a89d --- /dev/null +++ b/evaluation/datasets/normalized_cases/CASE-2026-1005.json @@ -0,0 +1,55 @@ +{ + "id": "CASE-2026-1005", + "memory_type": "case", + "scenario": "o365_suspicious_login", + "title": "Traveling executive triggered impossible travel but activity was legitimate", + "abstract": "Executive account triggered impossible travel due to corporate VPN exit node while the user was on an approved overseas trip.", + "verdict": "false_positive", + "severity": "medium", + "entities": { + "users": [ + "grace@corp.example" + ], + "hosts": [ + "VIP-LAPTOP-01" + ], + "mailboxes": [ + "grace@corp.example" + ] + }, + "observables": { + "ips": [ + "192.0.2.90", + "203.0.113.77" + ], + "domains": [], + "urls": [], + "hashes": [] + }, + "evidence": [ + "Approved travel request existed.", + "One login originated from corporate VPN exit node.", + "Device and user agent were consistent with known user profile." + ], + "patterns": [ + "verdict:false_positive", + "scenario:o365_suspicious_login", + "alert_type:azuread_impossible_travel" + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "kb": [ + "KB-O365-IMPOSSIBLE-TRAVEL" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_cases/o365_suspicious_login/CASE-2026-1005.json", + "tags": [ + "o365", + "login", + "false-positive", + "travel" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/KB-CRED-HARVEST-PATTERNS.json b/evaluation/datasets/normalized_kb/KB-CRED-HARVEST-PATTERNS.json new file mode 100644 index 0000000..48db313 --- /dev/null +++ b/evaluation/datasets/normalized_kb/KB-CRED-HARVEST-PATTERNS.json @@ -0,0 +1,34 @@ +{ + "id": "KB-CRED-HARVEST-PATTERNS", + "memory_type": "knowledge", + "doc_type": "kb", + "scenario": "phishing", + "title": "Credential Harvesting Indicators", + "abstract": "Common indicators that a phishing case involves credential harvesting rather than simple spam or benign mail.", + "key_points": [ + "Landing page mimics Microsoft 365 or common SaaS login pages.", + "HTML attachment often acts as a redirector rather than containing malware.", + "Credential harvest campaigns frequently reuse branding and lures across tenants." + ], + "investigation_guidance": [ + "Capture full redirect chain.", + "Look for post-click login anomalies in identity logs.", + "Search for same lure across multiple mailboxes." + ], + "decision_points": [ + "User click plus sign-in anomaly greatly increases confidence.", + "Branding reuse can help link separate phishing cases into one campaign." + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/kb/KB-CRED-HARVEST-PATTERNS.json", + "tags": [ + "kb", + "phishing", + "credential-harvest" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/KB-O365-IMPOSSIBLE-TRAVEL.json b/evaluation/datasets/normalized_kb/KB-O365-IMPOSSIBLE-TRAVEL.json new file mode 100644 index 0000000..9d38815 --- /dev/null +++ b/evaluation/datasets/normalized_kb/KB-O365-IMPOSSIBLE-TRAVEL.json @@ -0,0 +1,34 @@ +{ + "id": "KB-O365-IMPOSSIBLE-TRAVEL", + "memory_type": "knowledge", + "doc_type": "kb", + "scenario": "o365_suspicious_login", + "title": "Interpreting O365 Impossible Travel Alerts", + "abstract": "Guidance for validating impossible travel alerts, including VPN, proxy, and approved travel false-positive conditions.", + "key_points": [ + "Impossible travel must be validated against user travel context.", + "VPN egress and cloud proxy routing are common false-positive sources.", + "Pair sign-in anomaly with MFA, mailbox, or device anomalies before concluding compromise." + ], + "investigation_guidance": [ + "Validate source ASN and IP history.", + "Check user-approved travel or remote work context.", + "Compare device ID and user agent consistency." + ], + "decision_points": [ + "User denial of travel plus new device strongly increases confidence.", + "Approved travel and trusted VPN topology reduce confidence." + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/kb/KB-O365-IMPOSSIBLE-TRAVEL.json", + "tags": [ + "kb", + "o365", + "impossible-travel" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/KB-O365-INBOX-RULE-ABUSE.json b/evaluation/datasets/normalized_kb/KB-O365-INBOX-RULE-ABUSE.json new file mode 100644 index 0000000..ba79964 --- /dev/null +++ b/evaluation/datasets/normalized_kb/KB-O365-INBOX-RULE-ABUSE.json @@ -0,0 +1,33 @@ +{ + "id": "KB-O365-INBOX-RULE-ABUSE", + "memory_type": "knowledge", + "doc_type": "kb", + "scenario": "o365_suspicious_login", + "title": "Inbox Rule Abuse After Account Compromise", + "abstract": "Common mailbox persistence behaviors after O365 account compromise, especially rule creation to hide or forward finance emails.", + "key_points": [ + "Attackers often hide financial emails using move-to-folder rules.", + "Forwarding and delete rules are strong post-compromise indicators.", + "Mailbox audit logs should be reviewed immediately after suspicious login confirmation." + ], + "investigation_guidance": [ + "Enumerate all inbox rules and forwarding settings.", + "Check mailbox audit timeline around suspicious sign-in.", + "Review OAuth consents if inbox rules are absent but suspicious mail actions continue." + ], + "decision_points": [ + "Inbox rule creation shortly after suspicious login strongly supports compromise verdict." + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/kb/KB-O365-INBOX-RULE-ABUSE.json", + "tags": [ + "kb", + "o365", + "inbox-rule" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/KB-O365-MFA-FATIGUE.json b/evaluation/datasets/normalized_kb/KB-O365-MFA-FATIGUE.json new file mode 100644 index 0000000..a005b86 --- /dev/null +++ b/evaluation/datasets/normalized_kb/KB-O365-MFA-FATIGUE.json @@ -0,0 +1,33 @@ +{ + "id": "KB-O365-MFA-FATIGUE", + "memory_type": "knowledge", + "doc_type": "kb", + "scenario": "o365_suspicious_login", + "title": "MFA Fatigue Detection Notes", + "abstract": "Patterns for identifying MFA fatigue / push bombing during account compromise attempts.", + "key_points": [ + "Repeated MFA prompts preceding one successful prompt is suspicious.", + "User-reported prompt fatigue is strong supporting evidence.", + "MFA fatigue is often coupled with credential theft rather than password spray alone." + ], + "investigation_guidance": [ + "Review MFA event counts and timing.", + "Check if the user acknowledged unexpected prompts.", + "Look for subsequent session hijacking or mailbox abuse." + ], + "decision_points": [ + "Prompt flood plus user denial usually warrants immediate containment." + ], + "related_refs": { + "playbooks": [ + "PB-O365-LOGIN-001" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/kb/KB-O365-MFA-FATIGUE.json", + "tags": [ + "kb", + "o365", + "mfa-fatigue" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/KB-PHISH-HEADER-CHECK.json b/evaluation/datasets/normalized_kb/KB-PHISH-HEADER-CHECK.json new file mode 100644 index 0000000..8b41ad7 --- /dev/null +++ b/evaluation/datasets/normalized_kb/KB-PHISH-HEADER-CHECK.json @@ -0,0 +1,34 @@ +{ + "id": "KB-PHISH-HEADER-CHECK", + "memory_type": "knowledge", + "doc_type": "kb", + "scenario": "phishing", + "title": "Phishing Header Validation Checklist", + "abstract": "Checklist for validating sender identity, domain reputation, and authentication results in suspected phishing emails.", + "key_points": [ + "Review SPF, DKIM, and DMARC alignment.", + "Compare display name, envelope sender, and reply-to anomalies.", + "Check domain age and known-good communication history." + ], + "investigation_guidance": [ + "Use message trace and header parser.", + "Compare sender domain with vendor allowlist.", + "Escalate lookalike domains even when content appears business-relevant." + ], + "decision_points": [ + "Newly observed domains with failed auth are high-risk.", + "Benign vendor mail often has consistent historical sending patterns." + ], + "related_refs": { + "playbooks": [ + "PB-PHISH-001" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/kb/KB-PHISH-HEADER-CHECK.json", + "tags": [ + "kb", + "phishing", + "email-header" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/PB-O365-LOGIN-001.json b/evaluation/datasets/normalized_kb/PB-O365-LOGIN-001.json new file mode 100644 index 0000000..99d5a31 --- /dev/null +++ b/evaluation/datasets/normalized_kb/PB-O365-LOGIN-001.json @@ -0,0 +1,36 @@ +{ + "id": "PB-O365-LOGIN-001", + "memory_type": "knowledge", + "doc_type": "playbook", + "scenario": "o365_suspicious_login", + "title": "O365 Suspicious Login Investigation Playbook", + "abstract": "Standard investigation steps for suspicious Entra ID sign-ins, impossible travel, MFA abuse, and follow-on mailbox abuse.", + "key_points": [ + "Confirm user travel and business context.", + "Review sign-in logs, device IDs, and user agents.", + "Inspect downstream actions such as inbox rules, app consent, and forwarding." + ], + "investigation_guidance": [ + "Correlate MFA telemetry with sign-in sequence.", + "Check risky sign-ins and risky users views.", + "Revoke sessions and reset credentials when compromise is confirmed." + ], + "decision_points": [ + "Impossible travel alone is insufficient without corroborating evidence.", + "Inbox rule creation after foreign login strongly increases confidence of compromise." + ], + "related_refs": { + "kb": [ + "KB-O365-IMPOSSIBLE-TRAVEL", + "KB-O365-MFA-FATIGUE", + "KB-O365-INBOX-RULE-ABUSE" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/playbooks/PB-O365-LOGIN-001.json", + "tags": [ + "playbook", + "o365", + "login" + ] +} \ No newline at end of file diff --git a/evaluation/datasets/normalized_kb/PB-PHISH-001.json b/evaluation/datasets/normalized_kb/PB-PHISH-001.json new file mode 100644 index 0000000..9a991c7 --- /dev/null +++ b/evaluation/datasets/normalized_kb/PB-PHISH-001.json @@ -0,0 +1,35 @@ +{ + "id": "PB-PHISH-001", + "memory_type": "knowledge", + "doc_type": "playbook", + "scenario": "phishing", + "title": "Phishing Email Investigation Playbook", + "abstract": "Standard investigation steps for suspicious email, credential harvesting, and BEC-like cases.", + "key_points": [ + "Validate sender authentication results.", + "Inspect landing URL and attachment behavior.", + "Check whether the user clicked or submitted credentials." + ], + "investigation_guidance": [ + "Query email telemetry for same sender, subject, or URL.", + "Review mailbox click logs and endpoint browser artifacts.", + "Reset credentials if submission is suspected." + ], + "decision_points": [ + "If sender auth fails and user interaction exists, treat as likely phishing.", + "If destination is allowlisted and communication pattern is expected, investigate false positive path." + ], + "related_refs": { + "kb": [ + "KB-PHISH-HEADER-CHECK", + "KB-CRED-HARVEST-PATTERNS" + ], + "cases": [] + }, + "source_path": "/home/tom/soc_memory_poc/evaluation/datasets/mock_kb/playbooks/PB-PHISH-001.json", + "tags": [ + "playbook", + "phishing", + "email" + ] +} \ No newline at end of file diff --git a/evermemos/README.md b/evermemos/README.md new file mode 100644 index 0000000..2e091e2 --- /dev/null +++ b/evermemos/README.md @@ -0,0 +1,9 @@ +# EverMemOS Layer + +这个目录用于保存长期记忆整理层的工作逻辑。 + +主要职责: + +- 从 episode / process memory 中抽取长期记忆 +- 去重、合并、更新、衰减 +- 反哺 OpenViking 和 Obsidian diff --git a/integrations/hermes/soc-memory-poc/SKILL.md b/integrations/hermes/soc-memory-poc/SKILL.md new file mode 100644 index 0000000..e609232 --- /dev/null +++ b/integrations/hermes/soc-memory-poc/SKILL.md @@ -0,0 +1,314 @@ +--- +name: soc-memory-poc +description: Load this skill whenever Hermes is handling SOC alert triage, phishing investigation, suspicious O365 login analysis, historical case lookup, Obsidian note lookup, case-note generation, or committing high-value SOC findings into the SOC Memory POC. It provides a strict triage workflow using the SOC Memory Gateway for search/write operations, local Obsidian vault search, and local SOC Memory POC scripts for Obsidian case note generation. +version: 1.3.0 +metadata: + hermes: + tags: [soc, memory, openviking, obsidian, incident-response, case-triage, phishing, o365] + related_skills: [hermes-agent] +--- + +# SOC Memory POC + +Use this skill for SOC case workflows only. It is the default procedure for phishing-style alerts, suspicious O365 / Entra ID login cases, historical case comparison, Obsidian knowledge lookup, and case-note generation. + +## Mandatory Trigger Rule + +Load this skill immediately when the user asks Hermes to do any of the following: +- investigate or triage a SOC alert +- find similar phishing or O365 suspicious-login cases +- retrieve related KB or playbook context before concluding a case +- check whether Obsidian already has a related case note or knowledge note +- generate an Obsidian case note from a normalized case +- commit a normalized case or knowledge artifact into the SOC memory system + +If the task is clearly SOC triage related, do not proceed without using this skill. + +## What This Skill Connects To + +This skill assumes: +- SOC Memory POC root: `/home/tom/soc_memory_poc` +- Memory Gateway URL: `http://127.0.0.1:1934` +- Gateway API key: empty by default unless configured otherwise +- Obsidian vault root: `/home/tom/soc_memory_poc/obsidian-vault` + +Override with environment variables when needed: +- `SOC_MEMORY_POC_ROOT` +- `SOC_MEMORY_GATEWAY_URL` +- `SOC_MEMORY_GATEWAY_API_KEY` + +Capabilities: +- search SOC case / knowledge context through the Memory Gateway +- search existing Obsidian notes by case ID, scenario, keywords, or tags +- commit normalized case / knowledge JSON through the Memory Gateway +- generate Obsidian case notes from normalized case JSON + +## Triage Workflow + +Follow this order unless the user explicitly asks for something narrower. + +### Preferred Path For Structured Alerts (Scheme A) + +If the user provides a structured alert summary with fields like user, host, sender, subject, attachment, URL, IP, alert type, or known facts, do **not** manually improvise the final answer from memory search results alone. + +Use the deterministic triage helper first: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/triage_alert.py \ + --scenario phishing \ + --alert-type mail_suspicious_attachment \ + --user alice@corp.example \ + --host FIN-LAPTOP-12 \ + --sender billing@vendor-payments.com \ + --subject "Invoice overdue notice" \ + --attachment invoice_review.html \ + --url https://vendor-payments-login.com/review \ + --ip 198.51.100.20 \ + --summary "Invoice-themed phishing email with HTML attachment and credential harvesting link" \ + --fact "DMARC failed" \ + --fact "User may have clicked the link" +``` + +This script performs: +- case retrieval from the SOC Memory Gateway +- knowledge retrieval from the SOC Memory Gateway +- Obsidian note lookup from the local vault +- final markdown rendering with all required sections populated + +For scheme A, prefer returning the script output with only light cleanup. Do not drop the `关联 Memory Retrieval` or `关联 Obsidian 文档` sections. + +### Preferred Path For Freeform Alerts Or Raw Email Content + +If the user does **not** provide neatly separated fields, or pastes raw email content / ticket text / freeform alert text, do not force them into Scheme A manually. + +Use the unified triage helper: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/triage_email.py --text "From: billing@vendor-payments.com +To: alice@corp.example +Subject: Invoice overdue notice +Attachment: invoice_review.html +User clicked the link after opening the HTML attachment. DMARC failed. Review at https://vendor-payments-login.com/review from IP 198.51.100.20 on host FIN-LAPTOP-12." +``` + +Or point it at a file: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/triage_email.py --file /path/to/raw_email.txt +``` + +This helper will: +- infer the most likely scenario and alert type +- extract sender, user, subject, attachment, URL, IP, and host when possible +- carry over important facts like DMARC failure, user click, MFA fatigue, inbox rule, or OAuth consent +- run the deterministic triage pipeline so the final answer still contains `关联 Memory Retrieval` and `关联 Obsidian 文档` + +For non-structured input, prefer this helper over freehand reasoning. + +For all SOC triage inputs, `triage_email.py` is the preferred single entrypoint. It accepts raw text, a file, or optional structured overrides, then calls the deterministic retrieval pipeline. + +### Phase 1: Ground The Case + +First identify: +- scenario: `phishing`, `o365_suspicious_login`, or another SOC scenario +- likely alert type +- short case summary in one sentence +- key observables if available: sender, URL, domain, IP, mailbox, user, hash + +Do not start by writing memory. Start by grounding the case. + +### Phase 2: Retrieve Memory Context Before Judging + +Before concluding the case, search both related history and related knowledge. + +1. Search similar historical cases. +2. Search KB / playbook context. +3. Compare the current case against what comes back. + +Run these separately for better precision. + +Case search example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/search_context.py \ + --query "invoice phishing html attachment credential harvesting" \ + --kind case --limit 5 +``` + +Knowledge search example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/search_context.py \ + --query "invoice phishing html attachment credential harvesting" \ + --kind knowledge --limit 5 +``` + +O365 example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/search_context.py \ + --query "impossible travel MFA fatigue inbox rule oauth consent" \ + --kind knowledge --limit 5 +``` + +Search scopes: +- `case` -> `viking://resources/soc-memory-poc/case` +- `knowledge` -> `viking://resources/soc-memory-poc/knowledge` +- `all` -> `viking://resources/soc-memory-poc` + +### Phase 3: Retrieve Obsidian References + +After memory retrieval, look for related notes in the Obsidian vault so the final answer can reference existing human-readable documentation. + +Example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/search_obsidian_docs.py \ + --query "invoice phishing html attachment credential harvesting" \ + --scenario phishing \ + --limit 5 +``` + +Use this to surface: +- existing case notes +- related scenario notes +- notes whose names, tags, or content closely match the current case + +When reporting Obsidian references, include at least: +- note title or file name +- relative path under `obsidian-vault/` +- why the note is relevant + +### Phase 4: Produce The Triage Output + +After retrieval, synthesize a result that includes: +- likely verdict or current assessment +- strongest evidence +- closest matching historical cases +- most relevant KB / playbook guidance +- related Obsidian notes +- recommended next investigation or response actions + +Do not just paste raw search output. Summarize why the returned items matter. + +## Final Output Template + +Unless the user asks for a different format, use this structure for final SOC triage answers: + +### 研判结果 +- one short paragraph with the likely verdict / current assessment + +### 关键证据 +- 2 to 5 flat bullets with the strongest evidence + +### 关联 Memory Retrieval +- one flat bullet per retrieved case / knowledge item +- include: ID + short relevance reason +- example: `CASE-2026-0001`: same invoice lure + HTML attachment + credential harvesting flow + +### 关联 Obsidian 文档 +- one flat bullet per note +- include: note name + relative path + one-line relevance reason +- example: `CASE-2026-0001 - Finance user ...md` — `02_Cases/phishing/...` — already documents a near-identical phishing pattern + +### 建议动作 +- 2 to 5 flat bullets with next investigation or response steps + +If no Obsidian note matches, explicitly say `未找到直接关联的 Obsidian 文档`. + +### Phase 5: Generate Case Note When The Case Is Mature Enough + +If the task includes documenting the result, or the case already has a normalized JSON artifact, generate an Obsidian case note. + +Example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/generate_case_note.py \ + --input /home/tom/soc_memory_poc/evaluation/datasets/normalized_cases/CASE-2026-0001.json \ + --enrich-from-openviking \ + --top-k 3 +``` + +This writes under `obsidian-vault/02_Cases//`. + +Use `--enrich-from-openviking` by default when the gateway is available. + +### Phase 6: Commit Only High-Value Artifacts + +If Hermes has a normalized case or knowledge JSON that is worth preserving, commit it through the Gateway. + +Example: + +```bash +python /home/tom/.hermes/skills/soc-memory-poc/scripts/commit_case_memory.py \ + --input /home/tom/soc_memory_poc/evaluation/datasets/normalized_cases/CASE-2026-0001.json +``` + +Only commit normalized, reusable artifacts. Do not commit raw logs, raw tool traces, or ad hoc chat text. + +## Recommended Defaults By Scenario + +### Phishing + +Default order: +1. search `case` +2. search `knowledge` +3. search related Obsidian notes +4. assess sender auth, lure type, landing page, user interaction +5. generate case note if the case is already structured +6. commit only if the case artifact is normalized and high value + +Good query ingredients: +- lure theme +- attachment type +- credential harvesting +- fake M365 login +- sender domain +- landing URL pattern + +### O365 Suspicious Login + +Default order: +1. search `case` +2. search `knowledge` +3. search related Obsidian notes +4. assess impossible travel, MFA fatigue, inbox rule abuse, OAuth consent, legacy auth +5. generate case note if the case is already structured +6. commit only if the case artifact is normalized and high value + +Good query ingredients: +- impossible travel +- MFA fatigue +- inbox rule +- foreign login +- OAuth consent +- legacy protocol + +## Failure Handling + +If Gateway search fails: +- say explicitly that the SOC Memory Gateway is unavailable +- do not pretend retrieval succeeded +- continue with local reasoning only if the user still wants that + +If Obsidian search fails: +- say explicitly that Obsidian references could not be retrieved +- do not invent note names or paths + +If note generation fails: +- report the failing path or command +- do not claim the note was written + +If commit fails: +- report the URI or file that failed +- do not claim the memory was stored + +## Guardrails + +- Search `case` and `knowledge` separately before concluding a triage result. +- Search Obsidian notes after memory retrieval so final output can point to human-readable references. +- Prefer narrow, scenario-specific queries over vague long prompts. +- Do not dump raw investigative process into memory. +- Generate case notes from normalized case JSON, not from freeform chat. +- Commit only high-value, reusable artifacts. +- When Gateway results look noisy, explain that retrieval quality may still need SOC-specific reranking. diff --git a/integrations/hermes/soc-memory-poc/scripts/commit_case_memory.py b/integrations/hermes/soc-memory-poc/scripts/commit_case_memory.py new file mode 100755 index 0000000..6d4f49f --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/commit_case_memory.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +DEFAULT_GATEWAY_URL = os.environ.get("SOC_MEMORY_GATEWAY_URL", "http://127.0.0.1:1934") +DEFAULT_GATEWAY_API_KEY = os.environ.get("SOC_MEMORY_GATEWAY_API_KEY", "") + + +def load_item(path: str | Path) -> dict[str, Any]: + with Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def build_resource_uri(item: dict[str, Any]) -> str: + memory_type = item.get("memory_type") + item_id = item["id"] + if memory_type == "case": + scenario = item.get("scenario", "general") + return f"viking://resources/soc-memory-poc/case/{scenario}/{item_id}.json" + if memory_type == "knowledge": + doc_type = item.get("doc_type", "general") + return f"viking://resources/soc-memory-poc/knowledge/{doc_type}/{item_id}.json" + raise SystemExit(f"Unsupported memory_type: {memory_type}") + + +def post_json(url: str, payload: dict[str, Any], api_key: str = "") -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Content-Type", "application/json") + if api_key: + req.add_header("X-API-Key", api_key) + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Commit a normalized SOC case / knowledge JSON through the Memory Gateway.") + parser.add_argument("--input", required=True, help="Normalized JSON file path") + parser.add_argument("--gateway-url", default=DEFAULT_GATEWAY_URL, help="Memory Gateway base URL") + parser.add_argument("--api-key", default=DEFAULT_GATEWAY_API_KEY, help="Gateway API key if required") + args = parser.parse_args() + + item = load_item(args.input) + payload = { + "uri": build_resource_uri(item), + "content": json.dumps(item, ensure_ascii=False, indent=2), + "resource_type": "json", + } + + try: + result = post_json(args.gateway_url.rstrip("/") + "/api/resource", payload, api_key=args.api_key) + except urllib.error.URLError as exc: + raise SystemExit(f"Gateway resource commit failed: {exc}") from exc + + print(json.dumps(result, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/generate_case_note.py b/integrations/hermes/soc-memory-poc/scripts/generate_case_note.py new file mode 100755 index 0000000..524cb8e --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/generate_case_note.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from pathlib import Path + +DEFAULT_POC_ROOT = os.environ.get("SOC_MEMORY_POC_ROOT", "/home/tom/soc_memory_poc") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate an Obsidian case note from a normalized SOC case JSON file.") + parser.add_argument("--input", required=True, help="Normalized case JSON path") + parser.add_argument("--output-dir", default=None, help="Override Obsidian output directory") + parser.add_argument("--enrich-from-openviking", action="store_true", help="Enrich with OpenViking recommendations") + parser.add_argument("--top-k", type=int, default=3, help="Recommendation count per type") + parser.add_argument("--poc-root", default=DEFAULT_POC_ROOT, help="SOC Memory POC root") + args = parser.parse_args() + + poc_root = Path(args.poc_root) + script_path = poc_root / "skills" / "summarize_case_skill" / "generate_case_note.py" + if not script_path.exists(): + raise SystemExit(f"SOC Memory POC summarize script not found: {script_path}") + + output_dir = args.output_dir or str(poc_root / "obsidian-vault" / "02_Cases") + cmd = [ + sys.executable, + str(script_path), + "--input", + args.input, + "--output-dir", + output_dir, + "--top-k", + str(args.top_k), + ] + if args.enrich_from_openviking: + cmd.append("--enrich-from-openviking") + + env = os.environ.copy() + existing = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = str(poc_root) + (os.pathsep + existing if existing else "") + subprocess.run(cmd, check=True, env=env) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/search_context.py b/integrations/hermes/soc-memory-poc/scripts/search_context.py new file mode 100755 index 0000000..e5a0aef --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/search_context.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import urllib.error +import urllib.request +from typing import Any + +DEFAULT_GATEWAY_URL = os.environ.get("SOC_MEMORY_GATEWAY_URL", "http://127.0.0.1:1934") +DEFAULT_GATEWAY_API_KEY = os.environ.get("SOC_MEMORY_GATEWAY_API_KEY", "") + +URI_PREFIXES = { + "case": "viking://resources/soc-memory-poc/case", + "knowledge": "viking://resources/soc-memory-poc/knowledge", + "all": "viking://resources/soc-memory-poc", +} + + +def post_json(url: str, payload: dict[str, Any], api_key: str = "") -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Content-Type", "application/json") + if api_key: + req.add_header("X-API-Key", api_key) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def canonicalize_uri(uri: str) -> str: + if ".json/" in uri: + return uri.split(".json/", 1)[0] + ".json" + return uri + + +def filter_results(results: list[dict[str, Any]], prefix: str) -> list[dict[str, Any]]: + deduped: dict[str, dict[str, Any]] = {} + for item in results: + uri = item.get("uri") or "" + canonical = canonicalize_uri(uri) + if not canonical.startswith(prefix): + continue + score = item.get("score") or 0 + payload = dict(item) + payload["uri"] = canonical + if canonical not in deduped or score > (deduped[canonical].get("score") or 0): + deduped[canonical] = payload + return sorted(deduped.values(), key=lambda entry: entry.get("score") or 0, reverse=True) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Search SOC Memory Gateway for case / knowledge context.") + parser.add_argument("--query", required=True, help="Search query") + parser.add_argument("--kind", choices=["case", "knowledge", "all"], default="all", help="SOC resource scope") + parser.add_argument("--limit", type=int, default=5, help="Max results") + parser.add_argument("--gateway-url", default=DEFAULT_GATEWAY_URL, help="Memory Gateway base URL") + parser.add_argument("--api-key", default=DEFAULT_GATEWAY_API_KEY, help="Gateway API key if required") + args = parser.parse_args() + + prefix = URI_PREFIXES[args.kind] + payload = { + "query": args.query, + "limit": max(args.limit * 5, 10), + "uri": prefix, + } + try: + result = post_json(args.gateway_url.rstrip("/") + "/api/search", payload, api_key=args.api_key) + except urllib.error.URLError as exc: + raise SystemExit(f"Gateway search failed: {exc}") from exc + + raw_results = result.get("results", []) + filtered = filter_results(raw_results, prefix) + output = { + "query": args.query, + "kind": args.kind, + "uri_prefix": prefix, + "results": filtered[: args.limit], + "total": len(filtered), + } + print(json.dumps(output, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/search_obsidian_docs.py b/integrations/hermes/soc-memory-poc/scripts/search_obsidian_docs.py new file mode 100755 index 0000000..4f2115f --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/search_obsidian_docs.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import re +from pathlib import Path +from typing import Any + +DEFAULT_POC_ROOT = os.environ.get("SOC_MEMORY_POC_ROOT", "/home/tom/soc_memory_poc") +DEFAULT_VAULT_ROOT = str(Path(DEFAULT_POC_ROOT) / "obsidian-vault") + +TOKEN_RE = re.compile(r"[A-Za-z0-9_./:-]+") +SKIP_DIRS = {"05_Templates"} +SKIP_FILES = {"README.md"} + + +def tokenize(text: str) -> list[str]: + lowered = (text or "").lower() + tokens = TOKEN_RE.findall(lowered) + return [token for token in tokens if len(token) >= 3] + + +def parse_frontmatter(text: str) -> tuple[dict[str, str], str]: + if not text.startswith("---\n"): + return {}, text + parts = text.split("\n---\n", 1) + if len(parts) != 2: + return {}, text + raw_frontmatter = parts[0].splitlines()[1:] + body = parts[1] + data: dict[str, str] = {} + for line in raw_frontmatter: + if ":" not in line: + continue + key, value = line.split(":", 1) + data[key.strip()] = value.strip() + return data, body + + +def extract_title(body: str, fallback: str) -> str: + for line in body.splitlines(): + if line.startswith("# "): + return line[2:].strip() + return fallback + + +def extract_section_text(body: str, heading: str) -> str: + lines = body.splitlines() + marker = f"## {heading}" + collecting = False + collected: list[str] = [] + for line in lines: + if line.strip() == marker: + collecting = True + continue + if collecting and line.startswith("## "): + break + if collecting: + stripped = line.strip() + if stripped: + collected.append(stripped) + return " ".join(collected[:4]).strip() + + +def extract_tags(body: str) -> list[str]: + tags: list[str] = [] + in_tag_section = False + for line in body.splitlines(): + if line.strip() == "## 标签": + in_tag_section = True + continue + if in_tag_section and line.startswith("## "): + break + if in_tag_section: + for token in re.findall(r"#[^\s,]+", line): + tags.append(token) + return tags + + +def score_doc(query: str, tokens: list[str], doc: dict[str, Any]) -> tuple[int, list[str]]: + score = 0 + matched: list[str] = [] + + path_text = f"{doc['relative_path']} {doc['file_name']}".lower() + title_text = doc["title"].lower() + summary_text = doc.get("summary", "").lower() + body_text = doc.get("body", "").lower() + frontmatter_text = " ".join(f"{k}:{v}" for k, v in doc.get("frontmatter", {}).items()).lower() + tags_text = " ".join(doc.get("tags", [])).lower() + + if query and query.lower() in body_text: + score += 8 + matched.append(query.lower()) + + case_id = doc.get("frontmatter", {}).get("case_id", "") + if case_id and case_id.lower() in query.lower(): + score += 80 + matched.append(case_id.lower()) + + scenario = doc.get("frontmatter", {}).get("scenario", "") + if scenario and scenario.lower() in query.lower(): + score += 20 + matched.append(scenario.lower()) + + for token in tokens: + token_hit = False + if token in title_text: + score += 12 + token_hit = True + elif token in summary_text: + score += 7 + token_hit = True + elif token in path_text: + score += 6 + token_hit = True + elif token in frontmatter_text: + score += 5 + token_hit = True + elif token in tags_text: + score += 4 + token_hit = True + elif token in body_text: + score += 1 + token_hit = True + if token_hit and token not in matched: + matched.append(token) + + return score, matched[:8] + + +def load_docs(vault_root: str | Path) -> list[dict[str, Any]]: + vault_root = Path(vault_root) + docs: list[dict[str, Any]] = [] + for path in sorted(vault_root.rglob("*.md")): + rel = path.relative_to(vault_root) + if any(part in SKIP_DIRS for part in rel.parts): + continue + if path.name in SKIP_FILES: + continue + text = path.read_text(encoding="utf-8") + frontmatter, body = parse_frontmatter(text) + docs.append( + { + "file_name": path.name, + "relative_path": str(rel), + "absolute_path": str(path), + "category": rel.parts[0] if rel.parts else "", + "directory": str(rel.parent), + "frontmatter": frontmatter, + "title": extract_title(body, path.stem), + "summary": extract_section_text(body, "告警摘要") or extract_section_text(body, "Summary"), + "tags": extract_tags(body), + "body": body, + } + ) + return docs + + +def main() -> None: + parser = argparse.ArgumentParser(description="Search Obsidian SOC notes and return matching document references.") + parser.add_argument("--query", required=True, help="Search query") + parser.add_argument("--vault-root", default=DEFAULT_VAULT_ROOT, help="Obsidian vault root") + parser.add_argument("--limit", type=int, default=5, help="Maximum results") + parser.add_argument("--scenario", default="", help="Optional scenario filter") + args = parser.parse_args() + + docs = load_docs(args.vault_root) + tokens = tokenize(args.query) + results: list[dict[str, Any]] = [] + + for doc in docs: + scenario = doc.get("frontmatter", {}).get("scenario", "") + if args.scenario and scenario != args.scenario: + continue + score, matched_terms = score_doc(args.query, tokens, doc) + if score <= 0: + continue + results.append( + { + "score": score, + "title": doc["title"], + "file_name": doc["file_name"], + "relative_path": doc["relative_path"], + "directory": doc["directory"], + "category": doc["category"], + "scenario": scenario, + "summary": doc.get("summary", ""), + "tags": doc.get("tags", []), + "matched_terms": matched_terms, + } + ) + + results.sort(key=lambda item: item["score"], reverse=True) + payload = { + "query": args.query, + "vault_root": str(Path(args.vault_root)), + "matched_docs": results[: args.limit], + } + print(json.dumps(payload, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/triage_alert.py b/integrations/hermes/soc-memory-poc/scripts/triage_alert.py new file mode 100644 index 0000000..4a6439a --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/triage_alert.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +DEFAULT_GATEWAY_URL = os.environ.get("SOC_MEMORY_GATEWAY_URL", "http://127.0.0.1:1934") +DEFAULT_GATEWAY_API_KEY = os.environ.get("SOC_MEMORY_GATEWAY_API_KEY", "") +DEFAULT_POC_ROOT = os.environ.get("SOC_MEMORY_POC_ROOT", "/home/tom/soc_memory_poc") +DEFAULT_VAULT_ROOT = str(Path(DEFAULT_POC_ROOT) / "obsidian-vault") + +CASE_URI = "viking://resources/soc-memory-poc/case" +KNOWLEDGE_URI = "viking://resources/soc-memory-poc/knowledge" + + +def post_json(url: str, payload: dict[str, Any], api_key: str = "") -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Content-Type", "application/json") + if api_key: + req.add_header("X-API-Key", api_key) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def canonicalize_uri(uri: str) -> str: + if ".json/" in uri: + return uri.split(".json/", 1)[0] + ".json" + return uri + + +def filter_results(results: list[dict[str, Any]], prefix: str) -> list[dict[str, Any]]: + deduped: dict[str, dict[str, Any]] = {} + for item in results: + uri = item.get("uri") or "" + canonical = canonicalize_uri(uri) + if not canonical.startswith(prefix): + continue + score = item.get("score") or 0 + payload = dict(item) + payload["uri"] = canonical + if canonical not in deduped or score > (deduped[canonical].get("score") or 0): + deduped[canonical] = payload + return sorted(deduped.values(), key=lambda entry: entry.get("score") or 0, reverse=True) + + +def gateway_search(query: str, uri: str, limit: int, gateway_url: str, api_key: str) -> list[dict[str, Any]]: + payload = {"query": query, "limit": max(limit * 5, 10), "uri": uri} + raw = post_json(gateway_url.rstrip("/") + "/api/search", payload, api_key=api_key) + return filter_results(raw.get("results", []), uri)[:limit] + + +def obsidian_search(query: str, scenario: str, limit: int, vault_root: str) -> dict[str, Any]: + from search_obsidian_docs import load_docs, score_doc, tokenize + + docs = load_docs(vault_root) + tokens = tokenize(query) + results: list[dict[str, Any]] = [] + for doc in docs: + doc_scenario = doc.get("frontmatter", {}).get("scenario", "") + if scenario and doc_scenario != scenario: + continue + score, matched_terms = score_doc(query, tokens, doc) + if score <= 0: + continue + results.append( + { + "score": score, + "title": doc["title"], + "file_name": doc["file_name"], + "relative_path": doc["relative_path"], + "directory": doc["directory"], + "absolute_path": str(Path(vault_root) / doc["relative_path"]), + "summary": doc.get("summary", ""), + "matched_terms": matched_terms, + } + ) + results.sort(key=lambda item: item["score"], reverse=True) + return {"matched_docs": results[:limit]} + + +def build_query(args: argparse.Namespace) -> str: + parts = [ + args.scenario, + args.alert_type, + args.user, + args.host, + args.sender, + args.subject, + args.attachment, + args.url, + args.ip, + args.summary, + ] + parts.extend(args.fact) + return " ".join(part.strip() for part in parts if part and part.strip()) + + +def bullet(lines: list[str], fallback: str) -> str: + if not lines: + return f"- {fallback}" + return "\n".join(f"- {line}" for line in lines) + + +def top_results(items: list[dict[str, Any]], limit: int = 3) -> list[dict[str, Any]]: + return items[:limit] + + +def has_fact(args: argparse.Namespace, needle: str) -> bool: + haystacks = [args.summary, args.subject, args.alert_type, *args.fact] + lowered = needle.lower() + return any(lowered in (item or "").lower() for item in haystacks) + + +def summarize_evidence(args: argparse.Namespace) -> list[str]: + evidence: list[str] = [] + if args.subject: + evidence.append(f"邮件主题/诱饵:{args.subject}") + if args.attachment: + evidence.append(f"恶意附件:{args.attachment}") + if args.url: + evidence.append(f"可疑链接:{args.url}") + if args.sender: + evidence.append(f"发件人:{args.sender}") + if args.ip: + evidence.append(f"相关 IP:{args.ip}") + for fact in args.fact[:4]: + evidence.append(fact) + return evidence[:6] + + +def uri_to_id(uri: str) -> str: + return uri.rsplit('/', 1)[-1].replace('.json', '') + + +def infer_assessment(args: argparse.Namespace, case_results: list[dict[str, Any]]) -> str: + top_case = case_results[0] if case_results else None + if args.scenario == "phishing": + if args.url and args.attachment and (has_fact(args, "dmarc failed") or has_fact(args, "clicked")): + base = "当前告警高度符合凭证收割型钓鱼攻击特征,属于高可信 True Positive,且存在凭证泄露风险。" + elif args.url or args.attachment: + base = "当前告警具备明显钓鱼迹象,尤其是附件与落地页组合,倾向于高风险钓鱼事件。" + else: + base = "当前告警呈现出邮件钓鱼模式,但仍需补充落地页、附件和用户交互证据进一步确认。" + elif args.scenario == "o365_suspicious_login": + if has_fact(args, "impossible travel") and (has_fact(args, "mfa fatigue") or has_fact(args, "inbox rule") or has_fact(args, "oauth")): + base = "当前告警高度符合 O365 账号接管链路,属于高可信身份威胁事件。" + else: + base = "当前告警表现为异常身份登录,需要结合登录轨迹、MFA 和邮箱规则进一步确认是否账号接管。" + else: + base = "当前告警具备明显的可疑特征,需要结合历史案例和关联知识继续判断。" + + if top_case: + return base + f" 最相近的历史案例为 `{uri_to_id(top_case.get('uri', ''))}`,说明当前 case 与既有攻击模式存在明显重合。" + return base + + +def format_memory_results(case_results: list[dict[str, Any]], knowledge_results: list[dict[str, Any]]) -> str: + lines: list[str] = [] + for item in top_results(case_results, 2): + uri = item.get("uri", "") + abstract = (item.get("abstract") or "").strip() + snippet = abstract[:140] + "..." if len(abstract) > 140 else abstract + lines.append(f"`{uri_to_id(uri)}`({uri})— {snippet}") + for item in top_results(knowledge_results, 2): + uri = item.get("uri", "") + abstract = (item.get("abstract") or "").strip() + snippet = abstract[:140] + "..." if len(abstract) > 140 else abstract + lines.append(f"`{uri_to_id(uri)}`({uri})— {snippet}") + return bullet(lines, "未检索到直接关联的 Memory 条目") + + +def format_obsidian_results(obsidian_docs: list[dict[str, Any]]) -> str: + lines = [] + for doc in top_results(obsidian_docs, 3): + reason = doc.get("summary") or ", ".join(doc.get("matched_terms", [])) or "与当前场景相关" + lines.append( + f"`{doc['file_name']}` — `obsidian-vault/{doc['relative_path']}` " + f"(absolute: `{doc['absolute_path']}`)— {reason}" + ) + return bullet(lines, "未找到直接关联的 Obsidian 文档") + + +def recommend_actions(args: argparse.Namespace, case_results: list[dict[str, Any]]) -> list[str]: + actions: list[str] = [] + if args.scenario == "phishing": + actions.extend([ + "检查用户是否已点击链接或提交凭据,必要时立即重置账号并撤销会话。", + "搜索同主题、同发件人、同 URL 或同附件的邮件是否已投递给其他用户。", + "封锁相关域名、URL 和可疑 IP,并保留附件样本用于沙箱分析。", + "如邮件面向财务或高价值角色,优先排查是否存在 BEC 或后续横向利用。", + ]) + elif args.scenario == "o365_suspicious_login": + actions.extend([ + "复核登录日志、MFA 记录和后续邮箱规则 / OAuth 变更。", + "若确认账号接管迹象,立即重置凭据并撤销所有活跃会话。", + "检查同源 IP、同设备指纹和同时间窗口内的其他用户活动。", + "对邮箱转发、隐藏规则、恶意 OAuth 授权进行专项排查。", + ]) + else: + actions.append("基于当前高风险迹象继续扩充调查和处置。") + if case_results: + actions.append("对照最相近历史案例,复用已有 IOC 和调查路径。") + return actions[:5] + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run a structured SOC triage using memory retrieval and Obsidian lookup.") + parser.add_argument("--scenario", required=True, help="Scenario, e.g. phishing or o365_suspicious_login") + parser.add_argument("--alert-type", default="", help="Alert type") + parser.add_argument("--user", default="", help="Target user") + parser.add_argument("--host", default="", help="Target host") + parser.add_argument("--sender", default="", help="Sender email") + parser.add_argument("--subject", default="", help="Email subject or short title") + parser.add_argument("--attachment", default="", help="Attachment name") + parser.add_argument("--url", default="", help="Suspicious URL") + parser.add_argument("--ip", default="", help="Relevant IP") + parser.add_argument("--summary", default="", help="One-sentence alert summary") + parser.add_argument("--fact", action="append", default=[], help="Additional known fact; repeatable") + parser.add_argument("--gateway-url", default=DEFAULT_GATEWAY_URL, help="Memory Gateway URL") + parser.add_argument("--api-key", default=DEFAULT_GATEWAY_API_KEY, help="Memory Gateway API key") + parser.add_argument("--vault-root", default=DEFAULT_VAULT_ROOT, help="Obsidian vault root") + parser.add_argument("--limit", type=int, default=5, help="Search limit") + args = parser.parse_args() + + query = build_query(args) + + case_results: list[dict[str, Any]] = [] + knowledge_results: list[dict[str, Any]] = [] + obsidian_docs: list[dict[str, Any]] = [] + memory_error = "" + obsidian_error = "" + + try: + case_results = gateway_search(query, CASE_URI, args.limit, args.gateway_url, args.api_key) + knowledge_results = gateway_search(query, KNOWLEDGE_URI, args.limit, args.gateway_url, args.api_key) + except urllib.error.URLError as exc: + memory_error = f"Memory Gateway 不可用:{exc}" + + try: + obsidian_resp = obsidian_search(query, args.scenario, args.limit, args.vault_root) + obsidian_docs = obsidian_resp.get("matched_docs", []) + except Exception as exc: # noqa: BLE001 + obsidian_error = f"Obsidian 检索失败:{exc}" + + lines = [ + "## 研判结果", + infer_assessment(args, case_results), + "", + "## 关键证据", + bullet(summarize_evidence(args), "当前输入只提供了有限证据,需要继续补充调查信息"), + "", + "## 关联 Memory Retrieval", + ] + if memory_error: + lines.append(f"- {memory_error}") + else: + lines.append(format_memory_results(case_results, knowledge_results)) + lines.extend([ + "", + "## 关联 Obsidian 文档", + ]) + if obsidian_error: + lines.append(f"- {obsidian_error}") + else: + lines.append(format_obsidian_results(obsidian_docs)) + lines.extend([ + "", + "## 建议动作", + bullet(recommend_actions(args, case_results), "继续补充告警细节后再执行更精确的响应动作"), + ]) + + print("\n".join(lines)) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/triage_email.py b/integrations/hermes/soc-memory-poc/scripts/triage_email.py new file mode 100644 index 0000000..9c29c49 --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/triage_email.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import os +import re +import subprocess +import sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +TRIAGE_ALERT = SCRIPT_DIR / "triage_alert.py" + +EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") +URL_RE = re.compile(r"https?://[^\s<>\"]+") +IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b") +HOST_RE = re.compile(r"\b[A-Z]{2,}(?:-[A-Z0-9]+)+\b") +ATTACHMENT_RE = re.compile(r"\b[\w.-]+\.(?:html|htm|pdf|zip|docx|xlsx|eml)\b", re.IGNORECASE) +HEADER_RE = re.compile( + r"^(From|To|Subject|Attachment|URL|IP|Host|User|Alert type|Scenario)\s*:\s*(.+)$", + re.IGNORECASE | re.MULTILINE, +) + + +def first_nonempty(*values: str) -> str: + for value in values: + if value and value.strip(): + return value.strip() + return "" + + +def load_text(args: argparse.Namespace) -> str: + if args.file: + return Path(args.file).read_text(encoding="utf-8") + if args.text: + return args.text + data = sys.stdin.read() + if data.strip(): + return data + return "" + + +def find_header(text: str, name: str) -> str: + for key, value in HEADER_RE.findall(text): + if key.lower() == name.lower(): + return value.strip() + return "" + + +def unique_matches(pattern: re.Pattern[str], text: str) -> list[str]: + seen: list[str] = [] + for match in pattern.findall(text): + if match not in seen: + seen.append(match) + return seen + + +def infer_scenario(text: str, explicit_scenario: str = "", explicit_alert_type: str = "") -> tuple[str, str]: + if explicit_scenario: + return explicit_scenario, explicit_alert_type + + lowered = text.lower() + if any(token in lowered for token in ["impossible travel", "mfa fatigue", "oauth consent", "inbox rule", "entra", "azuread", "sign-in", "signin"]): + alert_type = explicit_alert_type or ("azuread_impossible_travel" if "impossible travel" in lowered else "o365_suspicious_login") + return "o365_suspicious_login", alert_type + + if any(token in lowered for token in ["phishing", "invoice", "attachment", "credential harvest", "fake microsoft 365", "dmarc", "mail_suspicious", "wire transfer"]): + if explicit_alert_type: + return "phishing", explicit_alert_type + if "wire transfer" in lowered or "executive impersonation" in lowered or "bec" in lowered: + return "phishing", "mail_bec_impersonation" + if "link" in lowered and "attachment" not in lowered: + return "phishing", "mail_suspicious_link" + return "phishing", "mail_suspicious_attachment" + + return "phishing", explicit_alert_type + + +def collect_facts(text: str, provided: list[str]) -> list[str]: + facts: list[str] = [] + for fact in provided: + if fact and fact not in facts: + facts.append(fact) + + lowered = text.lower() + fact_patterns = [ + ("DMARC failed", ["dmarc failed"]), + ("SPF failed", ["spf failed"]), + ("User may have clicked the link", ["clicked", "user clicked"]), + ("Credential submission suspected", ["submitted credentials", "credential submission", "entered credentials"]), + ("Impossible travel observed", ["impossible travel"]), + ("MFA fatigue observed", ["mfa fatigue", "repeated mfa"]), + ("Inbox rule creation observed", ["inbox rule"]), + ("OAuth consent activity observed", ["oauth consent"]), + ] + for label, needles in fact_patterns: + if any(needle in lowered for needle in needles) and label not in facts: + facts.append(label) + + for line in text.splitlines(): + stripped = line.strip("-* \t") + if not stripped or len(stripped) > 160: + continue + lower = stripped.lower() + if any(word in lower for word in ["dmarc", "spf", "clicked", "credential", "impossible travel", "mfa", "inbox rule", "oauth"]): + if stripped not in facts: + facts.append(stripped) + return facts[:8] + + +def build_summary(text: str, subject: str, provided_summary: str = "") -> str: + if provided_summary: + return provided_summary[:240] + if subject: + return subject[:180] + for line in text.splitlines(): + stripped = line.strip() + if len(stripped) >= 20 and ":" not in stripped[:20]: + return stripped[:240] + return text.strip()[:240] + + +def parse_input(args: argparse.Namespace) -> dict[str, str | list[str]]: + text = load_text(args) + scenario, alert_type = infer_scenario(text, args.scenario, args.alert_type) + emails = unique_matches(EMAIL_RE, text) + urls = unique_matches(URL_RE, text) + ips = unique_matches(IP_RE, text) + hosts = unique_matches(HOST_RE, text) + attachments = unique_matches(ATTACHMENT_RE, text) + + sender = first_nonempty(args.sender, find_header(text, "From"), emails[0] if emails else "") + user = first_nonempty(args.user, find_header(text, "User"), find_header(text, "To"), emails[1] if len(emails) > 1 else "") + subject = first_nonempty(args.subject, find_header(text, "Subject")) + attachment = first_nonempty(args.attachment, find_header(text, "Attachment"), attachments[0] if attachments else "") + url = first_nonempty(args.url, find_header(text, "URL"), urls[0] if urls else "") + ip = first_nonempty(args.ip, find_header(text, "IP"), ips[0] if ips else "") + host = first_nonempty(args.host, find_header(text, "Host"), hosts[0] if hosts else "") + summary = build_summary(text, subject, args.summary) + facts = collect_facts(text, args.fact) + + return { + "scenario": scenario, + "alert_type": alert_type, + "user": user, + "host": host, + "sender": sender, + "subject": subject, + "attachment": attachment, + "url": url, + "ip": ip, + "summary": summary, + "facts": facts, + } + + +def run_triage(parsed: dict[str, str | list[str]], limit: int) -> None: + cmd = [ + sys.executable, + str(TRIAGE_ALERT), + "--scenario", str(parsed["scenario"]), + "--alert-type", str(parsed["alert_type"]), + "--user", str(parsed["user"]), + "--host", str(parsed["host"]), + "--sender", str(parsed["sender"]), + "--subject", str(parsed["subject"]), + "--attachment", str(parsed["attachment"]), + "--url", str(parsed["url"]), + "--ip", str(parsed["ip"]), + "--summary", str(parsed["summary"]), + "--limit", str(limit), + ] + for fact in parsed["facts"]: + cmd.extend(["--fact", str(fact)]) + subprocess.run(cmd, check=True, env=os.environ.copy()) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Unified SOC alert/email triage entrypoint with memory and Obsidian retrieval.") + parser.add_argument("--text", help="Raw email, ticket text, or freeform alert text") + parser.add_argument("--file", help="Path to a raw email/ticket/alert text file") + parser.add_argument("--scenario", default="", help="Optional scenario override") + parser.add_argument("--alert-type", default="", help="Optional alert type override") + parser.add_argument("--user", default="", help="Optional user override") + parser.add_argument("--host", default="", help="Optional host override") + parser.add_argument("--sender", default="", help="Optional sender override") + parser.add_argument("--subject", default="", help="Optional subject override") + parser.add_argument("--attachment", default="", help="Optional attachment override") + parser.add_argument("--url", default="", help="Optional URL override") + parser.add_argument("--ip", default="", help="Optional IP override") + parser.add_argument("--summary", default="", help="Optional summary override") + parser.add_argument("--fact", action="append", default=[], help="Additional known fact; repeatable") + parser.add_argument("--limit", type=int, default=5, help="Search limit") + args = parser.parse_args() + + parsed = parse_input(args) + run_triage(parsed, args.limit) + + +if __name__ == "__main__": + main() diff --git a/integrations/hermes/soc-memory-poc/scripts/triage_from_text.py b/integrations/hermes/soc-memory-poc/scripts/triage_from_text.py new file mode 100644 index 0000000..9c768c6 --- /dev/null +++ b/integrations/hermes/soc-memory-poc/scripts/triage_from_text.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +TRIAGE_EMAIL = SCRIPT_DIR / "triage_email.py" + +if __name__ == "__main__": + subprocess.run([sys.executable, str(TRIAGE_EMAIL), *sys.argv[1:]], check=True, env=os.environ.copy()) diff --git a/memory_gateway/__init__.py b/memory_gateway/__init__.py new file mode 100644 index 0000000..02500d0 --- /dev/null +++ b/memory_gateway/__init__.py @@ -0,0 +1 @@ +"""Memory Gateway 核心模块""" diff --git a/memory_gateway/config.py b/memory_gateway/config.py new file mode 100644 index 0000000..e07a3bb --- /dev/null +++ b/memory_gateway/config.py @@ -0,0 +1,55 @@ +"""配置加载模块""" +import os +from pathlib import Path +from typing import Optional + +import yaml +from pydantic import ValidationError + +from .types import Config, ServerConfig, OpenVikingConfig, MemoryConfig, LoggingConfig + + +def load_config(config_path: Optional[str] = None) -> Config: + """加载配置文件""" + if config_path is None: + config_path = os.environ.get("MEMORY_GATEWAY_CONFIG", "config.yaml") + + config_file = Path(config_path) + + if not config_file.exists(): + # 返回默认配置 + return Config() + + try: + with open(config_file, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + if data is None: + return Config() + + return Config( + server=ServerConfig(**data.get("server", {})), + openviking=OpenVikingConfig(**data.get("openviking", {})), + memory=MemoryConfig(**data.get("memory", {})), + logging=LoggingConfig(**data.get("logging", {})), + ) + except (ValidationError, yaml.YAMLError) as e: + print(f"配置文件解析错误: {e}") + return Config() + + +def get_config() -> Config: + """获取全局配置(单例)""" + global _config + if _config is None: + _config = load_config() + return _config + + +def set_config(config: Config) -> None: + """设置全局配置""" + global _config + _config = config + + +_config: Optional[Config] = None diff --git a/memory_gateway/openviking_client.py b/memory_gateway/openviking_client.py new file mode 100644 index 0000000..2328052 --- /dev/null +++ b/memory_gateway/openviking_client.py @@ -0,0 +1,302 @@ +"""OpenViking client wrapper used by the SOC Memory POC.""" +from __future__ import annotations + +import json +import logging +import mimetypes +import tempfile +from pathlib import Path +from typing import Any, Optional + +import httpx + +from .config import get_config +from .types import MemoryEntry, ResourceEntry, SearchResult + +logger = logging.getLogger(__name__) + + +class OpenVikingClient: + """Thin async client for the OpenViking HTTP API.""" + + def __init__( + self, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + timeout: int = 30, + account: str = "default", + user: str = "default", + ): + self.config = get_config() + self.base_url = base_url or self.config.openviking.url + self.api_key = api_key or self.config.openviking.api_key or "your-secret-root-key" + self.timeout = timeout + self.account = account + self.user = user + self._client: Optional[httpx.AsyncClient] = None + + def _get_headers(self) -> dict[str, str]: + headers = {} + if self.api_key: + headers["X-API-Key"] = self.api_key + headers["X-OpenViking-Account"] = self.account + headers["X-OpenViking-User"] = self.user + return headers + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient( + base_url=self.base_url, + headers=self._get_headers(), + timeout=self.timeout, + ) + return self._client + + async def close(self): + if self._client: + await self._client.aclose() + self._client = None + + async def health_check(self) -> dict[str, Any]: + client = await self._get_client() + try: + response = await client.get("/health") + response.raise_for_status() + return response.json() + except httpx.HTTPError as e: + logger.error(f"OpenViking 健康检查失败: {e}") + return {"status": "error", "message": str(e)} + + async def search( + self, + query: str, + namespace: Optional[str] = None, + limit: Optional[int] = None, + uri: Optional[str] = None, + ) -> SearchResult: + """Semantic search against OpenViking resources/memories.""" + client = await self._get_client() + + payload: dict[str, Any] = {"query": query} + if limit: + payload["limit"] = limit + + if uri: + payload["uri"] = uri + elif namespace: + payload["uri"] = f"viking://{namespace}" + + try: + response = await client.post("/api/v1/search/search", json=payload) + response.raise_for_status() + data = response.json() + + if data.get("status") != "ok": + logger.warning(f"搜索返回错误: {data.get('error')}") + return SearchResult(results=[], total=0) + + result = data.get("result", {}) + memories = result.get("memories", []) + resources = result.get("resources", []) + + all_results = [] + for m in memories + resources: + all_results.append( + { + "uri": m.get("uri"), + "abstract": m.get("abstract"), + "score": m.get("score"), + "context_type": m.get("context_type"), + } + ) + + return SearchResult(results=all_results, total=result.get("total", len(all_results))) + except httpx.HTTPError as e: + logger.error(f"搜索失败: {e}") + return SearchResult(results=[], total=0) + + async def add_memory( + self, + content: str, + namespace: Optional[str] = None, + memory_type: str = "general", + ) -> dict[str, Any]: + """Add memory via session commit flow.""" + client = await self._get_client() + ns = namespace or self.config.memory.default_namespace or "user/default/memories" + + try: + response = await client.post("/api/v1/sessions", json={"mode": "interactive"}) + response.raise_for_status() + session_data = response.json() + + if session_data.get("status") != "ok": + return session_data + + session_id = session_data["result"]["session_id"] + commit_response = await client.post( + f"/api/v1/sessions/{session_id}/commit", + json={ + "messages": [ + { + "role": "user", + "content": f"[{ns}/{memory_type}] {content}", + } + ] + }, + ) + commit_response.raise_for_status() + return commit_response.json() + except httpx.HTTPError as e: + logger.error(f"添加记忆失败: {e}") + raise + + async def _upload_temp_file(self, file_path: str | Path) -> str: + client = await self._get_client() + file_path = Path(file_path) + mime_type = mimetypes.guess_type(file_path.name)[0] or "application/octet-stream" + + with file_path.open("rb") as f: + response = await client.post( + "/api/v1/resources/temp_upload", + files={"file": (file_path.name, f, mime_type)}, + ) + response.raise_for_status() + data = response.json() + result = data.get("result", {}) + if "temp_path" in result: + return result["temp_path"] + if "temp_file_id" in result: + return result["temp_file_id"] + raise KeyError(f"Unexpected temp upload response: {data}") + + async def add_resource( + self, + uri: str, + content: str, + resource_type: str = "text", + wait: bool = False, + ) -> dict[str, Any]: + """Add a text/json resource by uploading a temporary file first. + + OpenViking HTTP API does not accept raw `uri + content` directly. The + client must upload a temp file and then create the resource with `to`. + """ + client = await self._get_client() + suffix_map = { + "json": ".json", + "text": ".txt", + "markdown": ".md", + "md": ".md", + } + suffix = suffix_map.get(resource_type, ".txt") + + with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=suffix, delete=False) as tmp: + tmp.write(content) + tmp_path = Path(tmp.name) + + try: + temp_ref = await self._upload_temp_file(tmp_path) + payload = { + "temp_path": temp_ref, + "to": uri, + "wait": wait, + "source_name": Path(uri).name or tmp_path.name, + "strict": False, + } + response = await client.post("/api/v1/resources", json=payload) + if response.status_code >= 400: + logger.error("添加资源失败响应: %s", response.text) + response.raise_for_status() + return response.json() + except httpx.HTTPError as e: + logger.error(f"添加资源失败: {e}") + raise + finally: + tmp_path.unlink(missing_ok=True) + + async def list_memories( + self, + namespace: Optional[str] = None, + memory_type: Optional[str] = None, + limit: Optional[int] = None, + ) -> list[MemoryEntry]: + client = await self._get_client() + + ns = namespace or "user/default/memories" + if memory_type: + ns = f"{ns}/{memory_type}" + + try: + response = await client.post( + "/api/v1/search/search", + json={"query": "", "uri": f"viking://{ns}", "limit": limit or 10}, + ) + response.raise_for_status() + data = response.json() + + if data.get("status") == "ok": + result = data.get("result", {}) + memories = result.get("memories", []) + return [ + MemoryEntry( + id=m.get("uri", ""), + content=m.get("abstract", ""), + namespace=ns, + memory_type=memory_type or "general", + ) + for m in memories + ] + return [] + except httpx.HTTPError as e: + logger.error(f"列出记忆失败: {e}") + return [] + + async def list_resources( + self, + namespace: Optional[str] = None, + limit: Optional[int] = None, + ) -> list[ResourceEntry]: + client = await self._get_client() + + uri = f"viking://{namespace}" if namespace else "viking://resources" + try: + response = await client.post( + "/api/v1/search/search", + json={"query": "", "uri": uri, "limit": limit or 10}, + ) + response.raise_for_status() + data = response.json() + + if data.get("status") == "ok": + result = data.get("result", {}) + resources = result.get("resources", []) + return [ + ResourceEntry( + uri=r.get("uri", ""), + content=r.get("abstract", ""), + resource_type="text", + ) + for r in resources + ] + return [] + except httpx.HTTPError as e: + logger.error(f"列出资源失败: {e}") + return [] + + +_client: Optional[OpenVikingClient] = None + + +async def get_openviking_client() -> OpenVikingClient: + global _client + if _client is None: + _client = OpenVikingClient() + return _client + + +async def close_openviking_client(): + global _client + if _client: + await _client.close() + _client = None diff --git a/memory_gateway/server.py b/memory_gateway/server.py new file mode 100644 index 0000000..d083738 --- /dev/null +++ b/memory_gateway/server.py @@ -0,0 +1,387 @@ +"""Memory Gateway MCP Server. + +基于 Model Context Protocol 的记忆网关服务,为局域网内的 AI Agent 提供统一的 OpenViking 访问入口。 +""" +import asyncio +import json +import logging +from contextlib import asynccontextmanager +from typing import Any, Optional + +from fastapi import APIRouter, Depends, FastAPI, Header, HTTPException, Request, status +from fastapi.responses import JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from mcp.server import Server +from mcp.types import TextContent, Tool +from sse_starlette import EventSourceResponse + +from .config import get_config, set_config, Config +from .openviking_client import get_openviking_client, close_openviking_client +from .types import SearchRequest, AddMemoryRequest, AddResourceRequest + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +# 创建 MCP Server +mcp_server = Server("memory-gateway") + + +@mcp_server.list_tools() +async def list_tools() -> list[Tool]: + """列出可用的 MCP 工具""" + return [ + Tool( + name="search", + description="语义搜索记忆和资源", + inputSchema={ + "type": "object", + "properties": { + "query": {"type": "string", "description": "搜索查询"}, + "namespace": {"type": "string", "description": "命名空间(可选)"}, + "limit": {"type": "integer", "description": "返回结果数量(默认10)"}, + "uri": {"type": "string", "description": "资源 URI(可选)"}, + }, + "required": ["query"], + }, + ), + Tool( + name="add_memory", + description="添加新记忆", + inputSchema={ + "type": "object", + "properties": { + "content": {"type": "string", "description": "记忆内容"}, + "namespace": {"type": "string", "description": "命名空间(可选)"}, + "memory_type": {"type": "string", "description": "记忆类型(默认general)"}, + }, + "required": ["content"], + }, + ), + Tool( + name="add_resource", + description="添加资源", + inputSchema={ + "type": "object", + "properties": { + "uri": {"type": "string", "description": "资源 URI"}, + "content": {"type": "string", "description": "资源内容"}, + "resource_type": {"type": "string", "description": "资源类型(默认text)"}, + }, + "required": ["uri", "content"], + }, + ), + Tool( + name="get_status", + description="检查系统状态", + inputSchema={ + "type": "object", + "properties": {}, + }, + ), + Tool( + name="list_memories", + description="列出已存储的记忆", + inputSchema={ + "type": "object", + "properties": { + "namespace": {"type": "string", "description": "命名空间(可选)"}, + "memory_type": {"type": "string", "description": "记忆类型(可选)"}, + "limit": {"type": "integer", "description": "返回数量(默认10)"}, + }, + }, + ), + Tool( + name="list_resources", + description="列出已存储的资源", + inputSchema={ + "type": "object", + "properties": { + "namespace": {"type": "string", "description": "命名空间(可选)"}, + "limit": {"type": "integer", "description": "返回数量(默认10)"}, + }, + }, + ), + ] + + +@mcp_server.call_tool() +async def call_tool(name: str, arguments: Any) -> list[TextContent]: + """调用 MCP 工具""" + try: + ov_client = await get_openviking_client() + + if name == "search": + result = await ov_client.search( + query=arguments.get("query"), + namespace=arguments.get("namespace"), + limit=arguments.get("limit"), + uri=arguments.get("uri"), + ) + return [TextContent(type="text", text=str(result.results))] + + elif name == "add_memory": + result = await ov_client.add_memory( + content=arguments.get("content"), + namespace=arguments.get("namespace"), + memory_type=arguments.get("memory_type", "general"), + ) + return [TextContent(type="text", text=str(result))] + + elif name == "add_resource": + result = await ov_client.add_resource( + uri=arguments.get("uri"), + content=arguments.get("content"), + resource_type=arguments.get("resource_type", "text"), + ) + return [TextContent(type="text", text=str(result))] + + elif name == "get_status": + ov_status = await ov_client.health_check() + return [TextContent(type="text", text=f"Memory Gateway: OK\nOpenViking: {ov_status}")] + + elif name == "list_memories": + memories = await ov_client.list_memories( + namespace=arguments.get("namespace"), + memory_type=arguments.get("memory_type"), + limit=arguments.get("limit"), + ) + return [TextContent(type="text", text=str([m.model_dump() for m in memories]))] + + elif name == "list_resources": + resources = await ov_client.list_resources( + namespace=arguments.get("namespace"), + limit=arguments.get("limit"), + ) + return [TextContent(type="text", text=str([r.model_dump() for r in resources]))] + + else: + raise ValueError(f"Unknown tool: {name}") + + except Exception as e: + logger.error(f"工具执行失败: {e}") + return [TextContent(type="text", text=f"Error: {str(e)}")] + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """应用生命周期管理""" + logger.info("Memory Gateway 启动中...") + config = get_config() + logger.info(f"配置加载完成: {config.server.host}:{config.server.port}") + logger.info(f"OpenViking 后端: {config.openviking.url}") + + # 测试 OpenViking 连接 + try: + ov_client = await get_openviking_client() + status = await ov_client.health_check() + logger.info(f"OpenViking 连接状态: {status}") + except Exception as e: + logger.warning(f"OpenViking 连接失败: {e}") + + yield + + logger.info("Memory Gateway 关闭中...") + await close_openviking_client() + + +def verify_api_key(x_api_key: Optional[str] = Header(default=None)) -> None: + """在配置了 API Key 时校验请求头。""" + expected_key = get_config().server.api_key + if not expected_key: + return + if x_api_key != expected_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or missing API key", + ) + + +# FastAPI 应用 +app = FastAPI(title="Memory Gateway", version="0.1.0", lifespan=lifespan) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/health", dependencies=[Depends(verify_api_key)]) +async def health_check(): + """健康检查""" + try: + ov_client = await get_openviking_client() + ov_status = await ov_client.health_check() + return { + "status": "ok", + "gateway": "memory-gateway", + "openviking": ov_status, + } + except Exception as e: + return { + "status": "degraded", + "gateway": "memory-gateway", + "error": str(e), + } + +mcp_router = APIRouter() + + +async def mcp_server_events(request: Request, _: None = Depends(verify_api_key)): + """MCP Server-Sent Events 端点 - 使用 stdio 模式模拟""" + async def event_generator(): + # 发送初始化消息 + yield {"event": "initialize", "data": json.dumps({"protocolVersion": "2024-11-05"})} + + # 保持连接 + try: + while True: + await asyncio.sleep(30) + yield {"event": "ping", "data": ""} + except asyncio.CancelledError: + pass + + return EventSourceResponse(event_generator()) + + +mcp_router.add_api_route("/sse", mcp_server_events, methods=["GET"]) + + +# MCP JSON-RPC 端点(简化实现) +async def mcp_rpc(request: Request, _: None = Depends(verify_api_key)): + """处理 MCP JSON-RPC 请求""" + body = await request.json() + + method = body.get("method") + params = body.get("params", {}) + msg_id = body.get("id") + + try: + if method == "tools/list": + tools = await list_tools() + result = { + "tools": [ + { + "name": t.name, + "description": t.description, + "inputSchema": t.inputSchema, + } + for t in tools + ] + } + elif method == "tools/call": + tool_name = params.get("name") + tool_args = params.get("arguments", {}) + result_content = await call_tool_tool(tool_name, tool_args) + result = {"content": [c.model_dump() for c in result_content]} + else: + return JSONResponse( + status_code=400, + content={"jsonrpc": "2.0", "error": {"code": -32601, "message": f"Method not found: {method}"}, "id": msg_id} + ) + + return {"jsonrpc": "2.0", "result": result, "id": msg_id} + + except Exception as e: + logger.error(f"MCP RPC 错误: {e}") + return JSONResponse( + status_code=500, + content={"jsonrpc": "2.0", "error": {"code": -32603, "message": str(e)}, "id": msg_id} + ) + + +async def call_tool_tool(name: str, arguments: dict) -> list[TextContent]: + """调用工具的内部函数""" + return await call_tool(name, arguments) + + +mcp_router.add_api_route("/rpc", mcp_rpc, methods=["POST"]) + + +# 注册 MCP 路由 +app.include_router(mcp_router, prefix="/mcp", tags=["mcp"]) + + +@app.post("/api/search", dependencies=[Depends(verify_api_key)]) +async def api_search(request: SearchRequest): + """REST API: 搜索""" + ov_client = await get_openviking_client() + result = await ov_client.search( + query=request.query, + namespace=request.namespace or get_config().memory.default_namespace, + limit=request.limit or get_config().memory.search_limit, + uri=request.uri, + ) + return {"results": result.results, "total": result.total} + + +@app.post("/api/memory", dependencies=[Depends(verify_api_key)]) +async def api_add_memory(request: AddMemoryRequest): + """REST API: 添加记忆""" + ov_client = await get_openviking_client() + result = await ov_client.add_memory( + content=request.content, + namespace=request.namespace or get_config().memory.default_namespace, + memory_type=request.memory_type, + ) + return result + + +@app.post("/api/resource", dependencies=[Depends(verify_api_key)]) +async def api_add_resource(request: AddResourceRequest): + """REST API: 添加资源""" + ov_client = await get_openviking_client() + result = await ov_client.add_resource( + uri=request.uri, + content=request.content, + resource_type=request.resource_type, + ) + return result + + +def create_app(config: Optional[Config] = None) -> FastAPI: + """创建 FastAPI 应用""" + if config: + set_config(config) + return app + + +# 入口点 +def main(): + """主入口""" + import argparse + import uvicorn + + parser = argparse.ArgumentParser(description="Memory Gateway MCP Server") + parser.add_argument("--config", default="config.yaml", help="配置文件路径") + parser.add_argument("--host", default=None, help="监听地址") + parser.add_argument("--port", type=int, default=None, help="监听端口") + args = parser.parse_args() + + # 加载配置 + from .config import load_config as load + config = load(args.config) + if args.host: + config.server.host = args.host + if args.port: + config.server.port = args.port + set_config(config) + + # 启动服务 + uvicorn.run( + app, + host=config.server.host, + port=config.server.port, + log_level=config.logging.level.lower(), + ) + + +if __name__ == "__main__": + main() diff --git a/memory_gateway/types.py b/memory_gateway/types.py new file mode 100644 index 0000000..41d76fa --- /dev/null +++ b/memory_gateway/types.py @@ -0,0 +1,82 @@ +"""类型定义""" +from typing import Optional, Any +from pydantic import BaseModel, Field + + +class ServerConfig(BaseModel): + """服务器配置""" + host: str = "0.0.0.0" + port: int = 1934 + api_key: str = "" + + +class OpenVikingConfig(BaseModel): + """OpenViking 后端配置""" + url: str = "http://localhost:1933" + api_key: str = "" + timeout: int = 30 + + +class MemoryConfig(BaseModel): + """记忆配置""" + default_namespace: str = "soc" + search_limit: int = 10 + + +class LoggingConfig(BaseModel): + """日志配置""" + level: str = "INFO" + format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + +class Config(BaseModel): + """完整配置""" + server: ServerConfig = Field(default_factory=ServerConfig) + openviking: OpenVikingConfig = Field(default_factory=OpenVikingConfig) + memory: MemoryConfig = Field(default_factory=MemoryConfig) + logging: LoggingConfig = Field(default_factory=LoggingConfig) + + +class SearchRequest(BaseModel): + """搜索请求""" + query: str + namespace: Optional[str] = None + limit: Optional[int] = None + uri: Optional[str] = None + + +class AddMemoryRequest(BaseModel): + """添加记忆请求""" + content: str + namespace: Optional[str] = None + memory_type: Optional[str] = "general" + + +class AddResourceRequest(BaseModel): + """添加资源请求""" + uri: str + content: str + resource_type: Optional[str] = "text" + + +class SearchResult(BaseModel): + """搜索结果""" + results: list[dict[str, Any]] + total: int + + +class MemoryEntry(BaseModel): + """记忆条目""" + id: str + content: str + namespace: str + memory_type: str + created_at: Optional[str] = None + + +class ResourceEntry(BaseModel): + """资源条目""" + uri: str + content: str + resource_type: str + created_at: Optional[str] = None diff --git a/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1001 - Impossible travel login followed by MFA prompt fatigue.md b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1001 - Impossible travel login followed by MFA prompt fatigue.md new file mode 100644 index 0000000..8a294ec --- /dev/null +++ b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1001 - Impossible travel login followed by MFA prompt fatigue.md @@ -0,0 +1,101 @@ +--- +case_id: CASE-2026-1001 +scenario: o365_suspicious_login +alert_type: azuread_impossible_travel +severity: high +verdict: true_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-1001 Impossible travel login followed by MFA prompt fatigue + +## 基本信息 + +- Case ID: CASE-2026-1001 +- 标题: Impossible travel login followed by MFA prompt fatigue +- 告警类型: azuread_impossible_travel +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 真报 +- 严重等级: high + +## 告警摘要 + +User account showed impossible travel between Shanghai and Amsterdam, followed by repeated MFA prompts and successful sign-in. + +## 关键实体 + +- 用户: david@corp.example +- 主机: WS-DAVID-01 +- 邮箱: david@corp.example +- IP: 203.0.113.150, 198.51.100.61 +- 域名: 无 +- 文件 Hash: 无 +- 其他 IOC: 无 + +## 关键证据 + +- Two successful sign-ins from geographically impossible locations within 15 minutes. +- MFA challenge volume increased abnormally before final success. +- User confirmed they did not initiate overseas login. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:User account showed impossible travel between Shanghai and Amsterdam, followed by repeated MFA prompts and successful sign-in. +2. 提取关键证据并交叉验证:Two successful sign-ins from geographically impossible locations within 15 minutes. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:真报。 + +## 结论依据 + +- 结论为真报。 +- 最关键依据:Two successful sign-ins from geographically impossible locations within 15 minutes. +- 补充依据:MFA challenge volume increased abnormally before final success. + +## 处置建议 + +- 复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。 +- 若存在账号接管迹象,立即执行会话失效和凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:o365_suspicious_login, alert_type:azuread_impossible_travel +- 误报特征: 无 +- 需关注的变体: 相关标签:o365, login, impossible-travel, mfa-fatigue + +## 关联知识 + +- 关联 Playbook: [[PB-O365-LOGIN-001]] +- 关联 KB: [[KB-O365-IMPOSSIBLE-TRAVEL]], [[KB-O365-MFA-FATIGUE]] +- 关联历史 Case: [[CASE-2026-1005]], [[CASE-2026-1004]] +- 关联实体: [[david@corp.example]], [[WS-DAVID-01]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-1005]] (case score=0.687) This directory contains a single case record documenting a false positive alert triggered by Microsoft 365’s impossible travel detection sys... +- [[CASE-2026-1004]] (case score=0.636) This directory contains a single incident case file related to a suspicious Microsoft 365 login attempt, identified as CASE-2026-1004. The c... + +### 推荐知识条目 + +- [[KB-O365-IMPOSSIBLE-TRAVEL]] (knowledge score=0.69) This directory contains a knowledge base artifact focused on analyzing and validating Microsoft 365 impossible travel alerts—security events... +- [[PB-O365-LOGIN-001]] (knowledge score=0.63) This directory contains a security playbook focused on detecting and responding to suspicious Microsoft Entra ID sign-in activities within M... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/o365_suspicious_login +- #alert/azuread_impossible_travel +- #verdict/true-positive +- #o365 +- #login +- #impossible-travel +- #mfa-fatigue diff --git a/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1002 - Legacy protocol sign-in from unfamiliar IP blocked by policy.md b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1002 - Legacy protocol sign-in from unfamiliar IP blocked by policy.md new file mode 100644 index 0000000..6d7c8f7 --- /dev/null +++ b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1002 - Legacy protocol sign-in from unfamiliar IP blocked by policy.md @@ -0,0 +1,100 @@ +--- +case_id: CASE-2026-1002 +scenario: o365_suspicious_login +alert_type: azuread_legacy_auth_attempt +severity: medium +verdict: false_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-1002 Legacy protocol sign-in from unfamiliar IP blocked by policy + +## 基本信息 + +- Case ID: CASE-2026-1002 +- 标题: Legacy protocol sign-in from unfamiliar IP blocked by policy +- 告警类型: azuread_legacy_auth_attempt +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 误报 +- 严重等级: medium + +## 告警摘要 + +Legacy authentication attempt from a cloud IP was blocked; investigation tied it to an approved migration tool test. + +## 关键实体 + +- 用户: svc-migration@corp.example +- 主机: 无 +- 邮箱: svc-migration@corp.example +- IP: 192.0.2.24 +- 域名: 无 +- 文件 Hash: 无 +- 其他 IOC: 无 + +## 关键证据 + +- The account is a known migration service account. +- Source IP matched approved cloud migration vendor range. +- No successful sign-in occurred due to policy block. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Legacy authentication attempt from a cloud IP was blocked; investigation tied it to an approved migration tool test. +2. 提取关键证据并交叉验证:The account is a known migration service account. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:误报。 + +## 结论依据 + +- 结论为误报。 +- 最关键依据:The account is a known migration service account. +- 补充依据:Source IP matched approved cloud migration vendor range. + +## 处置建议 + +- 记录误报原因,并更新检测例外或抑制条件。 + +## 可复用模式 + +- 命中模式: scenario:o365_suspicious_login, alert_type:azuread_legacy_auth_attempt +- 误报特征: 本案最终确认为误报,可用于补充抑制条件。 +- 需关注的变体: 相关标签:o365, login, false-positive, legacy-auth + +## 关联知识 + +- 关联 Playbook: [[PB-O365-LOGIN-001]] +- 关联 KB: [[KB-O365-LEGACY-AUTH]], [[KB-O365-IMPOSSIBLE-TRAVEL]] +- 关联历史 Case: [[CASE-2026-1001]], [[CASE-2026-1004]] +- 关联实体: [[svc-migration@corp.example]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-1001]] (case score=0.651) This directory contains a structured security incident case report related to a high-severity event in an Office 365 environment, identified... +- [[CASE-2026-1004]] (case score=0.634) This directory contains a single incident case file related to a suspicious Microsoft 365 login attempt, identified as CASE-2026-1004. The c... + +### 推荐知识条目 + +- [[KB-O365-IMPOSSIBLE-TRAVEL]] (knowledge score=0.626) This directory contains a knowledge base artifact focused on analyzing and validating Microsoft 365 impossible travel alerts—security events... +- [[PB-O365-LOGIN-001]] (knowledge score=0.61) This directory contains a security playbook focused on detecting and responding to suspicious Microsoft Entra ID sign-in activities within M... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/o365_suspicious_login +- #alert/azuread_legacy_auth_attempt +- #verdict/false-positive +- #o365 +- #login +- #false-positive +- #legacy-auth diff --git a/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1003 - Suspicious inbox rule creation after successful foreign login.md b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1003 - Suspicious inbox rule creation after successful foreign login.md new file mode 100644 index 0000000..60e846d --- /dev/null +++ b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1003 - Suspicious inbox rule creation after successful foreign login.md @@ -0,0 +1,101 @@ +--- +case_id: CASE-2026-1003 +scenario: o365_suspicious_login +alert_type: azuread_suspicious_inbox_rule_after_login +severity: high +verdict: true_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-1003 Suspicious inbox rule creation after successful foreign login + +## 基本信息 + +- Case ID: CASE-2026-1003 +- 标题: Suspicious inbox rule creation after successful foreign login +- 告警类型: azuread_suspicious_inbox_rule_after_login +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 真报 +- 严重等级: high + +## 告警摘要 + +An overseas sign-in to Microsoft 365 was followed by inbox rule creation to hide finance-related emails. + +## 关键实体 + +- 用户: emma@corp.example +- 主机: WS-EMMA-07 +- 邮箱: emma@corp.example +- IP: 198.51.100.98 +- 域名: 无 +- 文件 Hash: 无 +- 其他 IOC: 无 + +## 关键证据 + +- Successful sign-in from untrusted ASN. +- Inbox rule moved wire transfer emails to RSS Feeds folder. +- Mailbox audit showed rule creation minutes after login. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:An overseas sign-in to Microsoft 365 was followed by inbox rule creation to hide finance-related emails. +2. 提取关键证据并交叉验证:Successful sign-in from untrusted ASN. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:真报。 + +## 结论依据 + +- 结论为真报。 +- 最关键依据:Successful sign-in from untrusted ASN. +- 补充依据:Inbox rule moved wire transfer emails to RSS Feeds folder. + +## 处置建议 + +- 复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。 +- 若存在账号接管迹象,立即执行会话失效和凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:o365_suspicious_login, alert_type:azuread_suspicious_inbox_rule_after_login +- 误报特征: 无 +- 需关注的变体: 相关标签:o365, login, inbox-rule, account-compromise + +## 关联知识 + +- 关联 Playbook: [[PB-O365-LOGIN-001]] +- 关联 KB: [[KB-O365-INBOX-RULE-ABUSE]], [[KB-O365-IMPOSSIBLE-TRAVEL]] +- 关联历史 Case: [[CASE-2026-1005]], [[CASE-2026-1001]] +- 关联实体: [[emma@corp.example]], [[WS-EMMA-07]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-1005]] (case score=0.667) This directory contains a single case record documenting a false positive alert triggered by Microsoft 365’s impossible travel detection sys... +- [[CASE-2026-1001]] (case score=0.666) This document is a structured case report detailing a high-severity security incident involving suspicious login activity in an Office 365 e... + +### 推荐知识条目 + +- [[PB-O365-LOGIN-001]] (knowledge score=0.653) This directory contains a security playbook focused on detecting and responding to suspicious Microsoft Entra ID sign-in activities within M... +- [[KB-O365-IMPOSSIBLE-TRAVEL]] (knowledge score=0.645) This directory contains a knowledge base artifact focused on analyzing and validating Microsoft 365 impossible travel alerts—security events... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/o365_suspicious_login +- #alert/azuread_suspicious_inbox_rule_after_login +- #verdict/true-positive +- #o365 +- #login +- #inbox-rule +- #account-compromise diff --git a/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1004 - Multiple failed logins from residential proxy but no successful access.md b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1004 - Multiple failed logins from residential proxy but no successful access.md new file mode 100644 index 0000000..febb56b --- /dev/null +++ b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1004 - Multiple failed logins from residential proxy but no successful access.md @@ -0,0 +1,101 @@ +--- +case_id: CASE-2026-1004 +scenario: o365_suspicious_login +alert_type: azuread_password_spray_attempt +severity: medium +verdict: uncertain +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-1004 Multiple failed logins from residential proxy but no successful access + +## 基本信息 + +- Case ID: CASE-2026-1004 +- 标题: Multiple failed logins from residential proxy but no successful access +- 告警类型: azuread_password_spray_attempt +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: uncertain +- 严重等级: medium + +## 告警摘要 + +Repeated failed Microsoft 365 sign-in attempts targeted one user from a residential proxy network, with no successful authentication observed. + +## 关键实体 + +- 用户: frank@corp.example +- 主机: 无 +- 邮箱: frank@corp.example +- IP: 203.0.113.201 +- 域名: 无 +- 文件 Hash: 无 +- 其他 IOC: 无 + +## 关键证据 + +- High-volume failed attempts over a short period. +- Source IP attributed to a residential proxy provider. +- No matching successful sign-in or MFA event found. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Repeated failed Microsoft 365 sign-in attempts targeted one user from a residential proxy network, with no successful authentication observed. +2. 提取关键证据并交叉验证:High-volume failed attempts over a short period. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:uncertain。 + +## 结论依据 + +- 结论为uncertain。 +- 最关键依据:High-volume failed attempts over a short period. +- 补充依据:Source IP attributed to a residential proxy provider. + +## 处置建议 + +- 复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。 +- 若存在账号接管迹象,立即执行会话失效和凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:o365_suspicious_login, alert_type:azuread_password_spray_attempt +- 误报特征: 无 +- 需关注的变体: 相关标签:o365, login, password-spray, pending + +## 关联知识 + +- 关联 Playbook: [[PB-O365-LOGIN-001]] +- 关联 KB: [[KB-O365-IMPOSSIBLE-TRAVEL]] +- 关联历史 Case: [[CASE-2026-1001]], [[CASE-2026-1003]] +- 关联实体: [[frank@corp.example]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-1001]] (case score=0.665) This directory contains a structured security incident case report related to a high-severity event in an Office 365 environment, identified... +- [[CASE-2026-1003]] (case score=0.627) This directory contains a structured incident case report focused on a confirmed Microsoft 365 account compromise involving suspicious login... + +### 推荐知识条目 + +- [[PB-O365-LOGIN-001]] (knowledge score=0.614) This directory contains a security playbook focused on detecting and responding to suspicious Microsoft Entra ID sign-in activities within M... +- [[KB-O365-IMPOSSIBLE-TRAVEL]] (knowledge score=0.609) This directory contains a knowledge base artifact focused on analyzing and validating Microsoft 365 impossible travel alerts—security events... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/o365_suspicious_login +- #alert/azuread_password_spray_attempt +- #verdict/uncertain +- #o365 +- #login +- #password-spray +- #pending diff --git a/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1005 - Traveling executive triggered impossible travel but activity was legitimate.md b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1005 - Traveling executive triggered impossible travel but activity was legitimate.md new file mode 100644 index 0000000..cb04d58 --- /dev/null +++ b/obsidian-vault/02_Cases/o365_suspicious_login/CASE-2026-1005 - Traveling executive triggered impossible travel but activity was legitimate.md @@ -0,0 +1,100 @@ +--- +case_id: CASE-2026-1005 +scenario: o365_suspicious_login +alert_type: azuread_impossible_travel +severity: medium +verdict: false_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-1005 Traveling executive triggered impossible travel but activity was legitimate + +## 基本信息 + +- Case ID: CASE-2026-1005 +- 标题: Traveling executive triggered impossible travel but activity was legitimate +- 告警类型: azuread_impossible_travel +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 误报 +- 严重等级: medium + +## 告警摘要 + +Executive account triggered impossible travel due to corporate VPN exit node while the user was on an approved overseas trip. + +## 关键实体 + +- 用户: grace@corp.example +- 主机: VIP-LAPTOP-01 +- 邮箱: grace@corp.example +- IP: 192.0.2.90, 203.0.113.77 +- 域名: 无 +- 文件 Hash: 无 +- 其他 IOC: 无 + +## 关键证据 + +- Approved travel request existed. +- One login originated from corporate VPN exit node. +- Device and user agent were consistent with known user profile. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Executive account triggered impossible travel due to corporate VPN exit node while the user was on an approved overseas trip. +2. 提取关键证据并交叉验证:Approved travel request existed. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:误报。 + +## 结论依据 + +- 结论为误报。 +- 最关键依据:Approved travel request existed. +- 补充依据:One login originated from corporate VPN exit node. + +## 处置建议 + +- 记录误报原因,并更新检测例外或抑制条件。 + +## 可复用模式 + +- 命中模式: scenario:o365_suspicious_login, alert_type:azuread_impossible_travel +- 误报特征: 本案最终确认为误报,可用于补充抑制条件。 +- 需关注的变体: 相关标签:o365, login, false-positive, travel + +## 关联知识 + +- 关联 Playbook: [[PB-O365-LOGIN-001]] +- 关联 KB: [[KB-O365-IMPOSSIBLE-TRAVEL]] +- 关联历史 Case: [[CASE-2026-1001]], [[CASE-2026-1004]] +- 关联实体: [[grace@corp.example]], [[VIP-LAPTOP-01]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-1001]] (case score=0.684) This directory contains a structured security incident case report related to a high-severity event in an Office 365 environment, identified... +- [[CASE-2026-1004]] (case score=0.63) This directory contains a single incident case file related to a suspicious Microsoft 365 login attempt, identified as CASE-2026-1004. The c... + +### 推荐知识条目 + +- [[KB-O365-IMPOSSIBLE-TRAVEL]] (knowledge score=0.703) This directory contains a knowledge base artifact focused on analyzing and validating Microsoft 365 impossible travel alerts—security events... +- [[PB-O365-LOGIN-001]] (knowledge score=0.626) This directory contains a security playbook focused on detecting and responding to suspicious Microsoft Entra ID sign-in activities within M... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/o365_suspicious_login +- #alert/azuread_impossible_travel +- #verdict/false-positive +- #o365 +- #login +- #false-positive +- #travel diff --git a/obsidian-vault/02_Cases/phishing/CASE-2026-0001 - Finance user received invoice-themed phishing email.md b/obsidian-vault/02_Cases/phishing/CASE-2026-0001 - Finance user received invoice-themed phishing email.md new file mode 100644 index 0000000..90930e4 --- /dev/null +++ b/obsidian-vault/02_Cases/phishing/CASE-2026-0001 - Finance user received invoice-themed phishing email.md @@ -0,0 +1,101 @@ +--- +case_id: CASE-2026-0001 +scenario: phishing +alert_type: mail_suspicious_attachment +severity: high +verdict: true_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-0001 Finance user received invoice-themed phishing email + +## 基本信息 + +- Case ID: CASE-2026-0001 +- 标题: Finance user received invoice-themed phishing email +- 告警类型: mail_suspicious_attachment +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 真报 +- 严重等级: high + +## 告警摘要 + +Finance user received an invoice-themed phishing email containing a malicious HTML attachment that redirected to a credential harvesting page. + +## 关键实体 + +- 用户: alice@corp.example +- 主机: FIN-LAPTOP-12 +- 邮箱: alice@corp.example +- IP: 198.51.100.20 +- 域名: vendor-payments.com, vendor-payments-login.com +- 文件 Hash: sha256:phish0001 +- 其他 IOC: https://vendor-payments-login.com/review, billing@vendor-payments.com + +## 关键证据 + +- Sender domain was newly observed and failed DMARC. +- Attachment redirected to a fake Microsoft 365 login page. +- User clicked the link before mail quarantine completed. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Finance user received an invoice-themed phishing email containing a malicious HTML attachment that redirected to a credential harvesting page. +2. 提取关键证据并交叉验证:Sender domain was newly observed and failed DMARC. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:真报。 + +## 结论依据 + +- 结论为真报。 +- 最关键依据:Sender domain was newly observed and failed DMARC. +- 补充依据:Attachment redirected to a fake Microsoft 365 login page. + +## 处置建议 + +- 隔离相同主题、发件人或 URL 的邮件样本。 +- 核查用户是否点击或提交凭据,并按需执行凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:phishing, alert_type:mail_suspicious_attachment +- 误报特征: 无 +- 需关注的变体: 相关标签:phishing, email, credential-harvest, finance + +## 关联知识 + +- 关联 Playbook: [[PB-PHISH-001]] +- 关联 KB: [[KB-PHISH-HEADER-CHECK]], [[KB-CRED-HARVEST-PATTERNS]] +- 关联历史 Case: [[CASE-2026-0004]], [[CASE-2026-0002]] +- 关联实体: [[alice@corp.example]], [[FIN-LAPTOP-12]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-0004]] (case score=0.662) This directory contains a structured incident case report related to a phishing attack targeting a shared mailbox via a spoofed OneDrive not... +- [[CASE-2026-0002]] (case score=0.631) This directory contains a single case record detailing the investigation of a suspicious payroll notification email flagged due to a shorten... + +### 推荐知识条目 + +- [[KB-CRED-HARVEST-PATTERNS]] (knowledge score=0.656) This directory contains a structured knowledge base artifact focused on identifying and investigating credential harvesting campaigns, parti... +- [[PB-PHISH-001]] (knowledge score=0.639) This directory contains a phishing email investigation playbook designed to standardize incident response procedures for suspicious emails, ... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/phishing +- #alert/mail_suspicious_attachment +- #verdict/true-positive +- #phishing +- #email +- #credential-harvest +- #finance diff --git a/obsidian-vault/02_Cases/phishing/CASE-2026-0002 - Payroll notification email flagged but determined benign.md b/obsidian-vault/02_Cases/phishing/CASE-2026-0002 - Payroll notification email flagged but determined benign.md new file mode 100644 index 0000000..a2a3e59 --- /dev/null +++ b/obsidian-vault/02_Cases/phishing/CASE-2026-0002 - Payroll notification email flagged but determined benign.md @@ -0,0 +1,100 @@ +--- +case_id: CASE-2026-0002 +scenario: phishing +alert_type: mail_suspicious_link +severity: medium +verdict: false_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-0002 Payroll notification email flagged but determined benign + +## 基本信息 + +- Case ID: CASE-2026-0002 +- 标题: Payroll notification email flagged but determined benign +- 告警类型: mail_suspicious_link +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 误报 +- 严重等级: medium + +## 告警摘要 + +Payroll update email was flagged due to a shortened URL, but the destination was the approved HR vendor portal. + +## 关键实体 + +- 用户: bob@corp.example +- 主机: HR-LAPTOP-03 +- 邮箱: bob@corp.example +- IP: 无 +- 域名: hr-vendor.example +- 文件 Hash: 无 +- 其他 IOC: https://bit.ly/hr-portal-example, notify@hr-vendor.example + +## 关键证据 + +- Sender domain aligned with SPF and DKIM. +- Destination domain matched approved supplier inventory. +- No credential prompt anomaly observed. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Payroll update email was flagged due to a shortened URL, but the destination was the approved HR vendor portal. +2. 提取关键证据并交叉验证:Sender domain aligned with SPF and DKIM. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:误报。 + +## 结论依据 + +- 结论为误报。 +- 最关键依据:Sender domain aligned with SPF and DKIM. +- 补充依据:Destination domain matched approved supplier inventory. + +## 处置建议 + +- 记录误报原因,并更新检测例外或抑制条件。 + +## 可复用模式 + +- 命中模式: scenario:phishing, alert_type:mail_suspicious_link +- 误报特征: 本案最终确认为误报,可用于补充抑制条件。 +- 需关注的变体: 相关标签:phishing, email, false-positive, vendor + +## 关联知识 + +- 关联 Playbook: [[PB-PHISH-001]] +- 关联 KB: [[KB-PHISH-HEADER-CHECK]], [[KB-CRED-HARVEST-PATTERNS]] +- 关联历史 Case: [[CASE-2026-0004]], [[CASE-2026-0001]] +- 关联实体: [[bob@corp.example]], [[HR-LAPTOP-03]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-0004]] (case score=0.549) This directory contains a structured incident case report related to a phishing attack targeting a shared mailbox via a spoofed OneDrive not... +- [[CASE-2026-0001]] (case score=0.532) This directory contains a structured case report detailing a high-severity phishing incident targeting a finance user via a malicious invoic... + +### 推荐知识条目 + +- [[PB-PHISH-001]] (knowledge score=0.514) This directory contains a phishing email investigation playbook designed to standardize incident response procedures for suspicious emails, ... +- [[KB-CRED-HARVEST-PATTERNS]] (knowledge score=0.494) This directory contains a structured knowledge base artifact focused on identifying and investigating credential harvesting campaigns, parti... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/phishing +- #alert/mail_suspicious_link +- #verdict/false-positive +- #phishing +- #email +- #false-positive +- #vendor diff --git a/obsidian-vault/02_Cases/phishing/CASE-2026-0003 - Executive impersonation email requested urgent wire transfer.md b/obsidian-vault/02_Cases/phishing/CASE-2026-0003 - Executive impersonation email requested urgent wire transfer.md new file mode 100644 index 0000000..a81cc6f --- /dev/null +++ b/obsidian-vault/02_Cases/phishing/CASE-2026-0003 - Executive impersonation email requested urgent wire transfer.md @@ -0,0 +1,101 @@ +--- +case_id: CASE-2026-0003 +scenario: phishing +alert_type: mail_bec_impersonation +severity: high +verdict: true_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-0003 Executive impersonation email requested urgent wire transfer + +## 基本信息 + +- Case ID: CASE-2026-0003 +- 标题: Executive impersonation email requested urgent wire transfer +- 告警类型: mail_bec_impersonation +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 真报 +- 严重等级: high + +## 告警摘要 + +An executive impersonation email targeted finance staff with an urgent wire transfer request from a lookalike domain. + +## 关键实体 + +- 用户: carol@corp.example +- 主机: FIN-LAPTOP-08 +- 邮箱: carol@corp.example +- IP: 203.0.113.45 +- 域名: c0rp-example.com +- 文件 Hash: 无 +- 其他 IOC: ceo@c0rp-example.com + +## 关键证据 + +- Lookalike domain used numeric substitution. +- Language pressure matched prior BEC pattern. +- No historical communication from sender domain. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:An executive impersonation email targeted finance staff with an urgent wire transfer request from a lookalike domain. +2. 提取关键证据并交叉验证:Lookalike domain used numeric substitution. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:真报。 + +## 结论依据 + +- 结论为真报。 +- 最关键依据:Lookalike domain used numeric substitution. +- 补充依据:Language pressure matched prior BEC pattern. + +## 处置建议 + +- 隔离相同主题、发件人或 URL 的邮件样本。 +- 核查用户是否点击或提交凭据,并按需执行凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:phishing, alert_type:mail_bec_impersonation +- 误报特征: 无 +- 需关注的变体: 相关标签:phishing, bec, executive-impersonation + +## 关联知识 + +- 关联 Playbook: [[PB-PHISH-001]] +- 关联 KB: [[KB-CRED-HARVEST-PATTERNS]], [[KB-PHISH-HEADER-CHECK]] +- 关联历史 Case: [[CASE-2026-0001]], [[CASE-2026-0004]] +- 关联实体: [[carol@corp.example]], [[FIN-LAPTOP-08]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-0001]] (case score=0.572) This directory contains a structured case report detailing a high-severity phishing incident targeting a finance user via a malicious invoic... +- [[CASE-2026-0004]] (case score=0.566) This directory contains a structured incident case report related to a phishing attack targeting a shared mailbox via a spoofed OneDrive not... + +### 推荐知识条目 + +- [[PB-PHISH-001]] (knowledge score=0.538) This directory contains a phishing email investigation playbook designed to standardize incident response procedures for suspicious emails, ... +- [[KB-CRED-HARVEST-PATTERNS]] (knowledge score=0.522) This directory contains a structured knowledge base artifact focused on identifying and investigating credential harvesting campaigns, parti... +- [[KB-PHISH-HEADER-CHECK]] (knowledge score=0.512) This directory contains a structured knowledge base document focused on validating phishing emails through detailed analysis of email header... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/phishing +- #alert/mail_bec_impersonation +- #verdict/true-positive +- #phishing +- #bec +- #executive-impersonation diff --git a/obsidian-vault/02_Cases/phishing/CASE-2026-0004 - Shared mailbox received OneDrive lure with HTML attachment.md b/obsidian-vault/02_Cases/phishing/CASE-2026-0004 - Shared mailbox received OneDrive lure with HTML attachment.md new file mode 100644 index 0000000..1303fc1 --- /dev/null +++ b/obsidian-vault/02_Cases/phishing/CASE-2026-0004 - Shared mailbox received OneDrive lure with HTML attachment.md @@ -0,0 +1,100 @@ +--- +case_id: CASE-2026-0004 +scenario: phishing +alert_type: mail_suspicious_attachment +severity: medium +verdict: true_positive +source: soc-memory-poc +openviking_enriched: true +--- + +# CASE-2026-0004 Shared mailbox received OneDrive lure with HTML attachment + +## 基本信息 + +- Case ID: CASE-2026-0004 +- 标题: Shared mailbox received OneDrive lure with HTML attachment +- 告警类型: mail_suspicious_attachment +- 来源系统: SOC Memory POC Mock Dataset +- 时间范围: 待补充 +- 研判人 / Agent: AI Agent Draft +- 最终结论: 真报 +- 严重等级: medium + +## 告警摘要 + +Shared finance mailbox received a fake OneDrive notification with an HTML attachment that led to credential collection. + +## 关键实体 + +- 用户: shared-finance@corp.example +- 主机: 无 +- 邮箱: shared-finance@corp.example +- IP: 198.51.100.87 +- 域名: sharepoint-notify.com +- 文件 Hash: sha256:phish0004 +- 其他 IOC: https://onedrive-review-login.example, noreply@sharepoint-notify.com + +## 关键证据 + +- Attachment rendered a fake Microsoft sign-in page. +- Landing page hosted outside Microsoft IP space. +- Mail body reused branding from previous phishing campaign. + +## 研判过程摘要 + +1. 确认告警场景与核心风险:Shared finance mailbox received a fake OneDrive notification with an HTML attachment that led to credential collection. +2. 提取关键证据并交叉验证:Attachment rendered a fake Microsoft sign-in page. +3. 对照关联 playbook / KB 复核告警模式与处置路径。 +4. 基于关键证据与场景模式完成结论判定:真报。 + +## 结论依据 + +- 结论为真报。 +- 最关键依据:Attachment rendered a fake Microsoft sign-in page. +- 补充依据:Landing page hosted outside Microsoft IP space. + +## 处置建议 + +- 隔离相同主题、发件人或 URL 的邮件样本。 +- 核查用户是否点击或提交凭据,并按需执行凭据重置。 + +## 可复用模式 + +- 命中模式: scenario:phishing, alert_type:mail_suspicious_attachment +- 误报特征: 无 +- 需关注的变体: 相关标签:phishing, email, onedrive-lure + +## 关联知识 + +- 关联 Playbook: [[PB-PHISH-001]] +- 关联 KB: [[KB-CRED-HARVEST-PATTERNS]] +- 关联历史 Case: [[CASE-2026-0001]], [[CASE-2026-0003]] +- 关联实体: [[shared-finance@corp.example]] + +## 自动关联推荐 + +### 推荐历史 Case + +- [[CASE-2026-0001]] (case score=0.675) This directory contains a structured case report detailing a high-severity phishing incident targeting a finance user via a malicious invoic... +- [[CASE-2026-0003]] (case score=0.606) This directory contains a structured incident report for a high-severity phishing attack involving executive impersonation, classified under... + +### 推荐知识条目 + +- [[KB-CRED-HARVEST-PATTERNS]] (knowledge score=0.652) This directory contains a structured knowledge base artifact focused on identifying and investigating credential harvesting campaigns, parti... +- [[PB-PHISH-001]] (knowledge score=0.608) This directory contains a phishing email investigation playbook designed to standardize incident response procedures for suspicious emails, ... + +## Lessons Learned + +- 本案可沉淀为后续同类告警的快速判定参考。 +- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。 + +## 标签 + +- #case +- #scenario/phishing +- #alert/mail_suspicious_attachment +- #verdict/true-positive +- #phishing +- #email +- #onedrive-lure diff --git a/obsidian-vault/05_Templates/case-note-template.md b/obsidian-vault/05_Templates/case-note-template.md new file mode 100644 index 0000000..fe992af --- /dev/null +++ b/obsidian-vault/05_Templates/case-note-template.md @@ -0,0 +1,76 @@ +# Case Note Template + +## 基本信息 + +- Case ID: +- 标题: +- 告警类型: +- 来源系统: +- 时间范围: +- 研判人 / Agent: +- 最终结论: +- 严重等级: + +## 告警摘要 + +一句话概述这次 case 的核心问题。 + +## 关键实体 + +- 用户: +- 主机: +- 邮箱: +- IP: +- 域名: +- 文件 Hash: +- 其他 IOC: + +## 关键证据 + +- 证据 1: +- 证据 2: +- 证据 3: + +## 研判过程摘要 + +只保留对后续复用有价值的关键步骤,不记录所有原始过程。 + +1. +2. +3. + +## 结论依据 + +- 为什么判定为真报 / 误报 / 可疑待定 +- 哪些信号最关键 + +## 处置建议 + +- +- + +## 可复用模式 + +- 命中模式: +- 误报特征: +- 需关注的变体: + +## 关联知识 + +- 关联 Playbook: +- 关联 KB: +- 关联历史 Case: +- 关联实体: + +## Lessons Learned + +- 本案新增了什么可复用经验 +- 哪些规则、知识或流程应更新 + +## 标签 + +- `#case` +- `#alert/...` +- `#verdict/true-positive` +- `#verdict/false-positive` +- `#ttp/...` diff --git a/obsidian-vault/05_Templates/playbook-template.md b/obsidian-vault/05_Templates/playbook-template.md new file mode 100644 index 0000000..a20194b --- /dev/null +++ b/obsidian-vault/05_Templates/playbook-template.md @@ -0,0 +1,59 @@ +# Playbook Template + +## 基本信息 + +- 名称: +- 适用告警类型: +- 场景: +- 最近更新时间: +- 负责人: + +## 场景描述 + +这个 playbook 解决什么问题,适用于哪些前置条件。 + +## 输入信号 + +- 必要信号: +- 可选信号: +- 常见数据源: + +## 调查步骤 + +1. +2. +3. + +## 关键判断点 + +- 什么情况下倾向真报 +- 什么情况下倾向误报 +- 哪些证据最关键 + +## 常见误报模式 + +- +- + +## 常见真报模式 + +- +- + +## 升级 / 处置建议 + +- +- + +## 关联内容 + +- 相关 Case: +- 相关 KB: +- 相关 IOC: +- 相关 TTP: + +## 标签 + +- `#playbook` +- `#alert/...` +- `#ttp/...` diff --git a/obsidian-vault/05_Templates/report-summary-template.md b/obsidian-vault/05_Templates/report-summary-template.md new file mode 100644 index 0000000..6b850cc --- /dev/null +++ b/obsidian-vault/05_Templates/report-summary-template.md @@ -0,0 +1,52 @@ +# Report Summary Template + +## 基本信息 + +- 标题: +- 来源: +- 日期: +- 作者 / 团队: +- 类型: + +## 核心摘要 + +用 3 到 5 句话总结对 SOC 研判最有帮助的内容。 + +## 关键发现 + +- 发现 1: +- 发现 2: +- 发现 3: + +## 关键实体 + +- 攻击者: +- 工具: +- 域名 / IP: +- Hash: +- 邮件主题 / 发件特征: + +## 对 SOC 的实际价值 + +- 对哪些告警类型有帮助 +- 对哪些 playbook 需要更新 +- 对哪些规则或研判路径有启发 + +## 可沉淀记忆 + +- 哪些内容适合作为 Knowledge Memory +- 哪些内容适合作为 Case Pattern + +## 关联内容 + +- 关联 KB: +- 关联 Playbook: +- 关联 Case: +- 关联 TTP: + +## 标签 + +- `#report` +- `#intel` +- `#ttp/...` +- `#campaign/...` diff --git a/obsidian-vault/README.md b/obsidian-vault/README.md new file mode 100644 index 0000000..fd40f58 --- /dev/null +++ b/obsidian-vault/README.md @@ -0,0 +1,15 @@ +# Obsidian Vault + +这个目录用于保存 Obsidian Vault 的推荐骨架。 + +原则: + +- 只存高价值、可人工维护的沉淀 +- 不存全量原始资料 +- 不把 ticket 原文、报告全文直接塞进 Vault + +建议优先建设: + +- `01_Knowledge/` +- `02_Cases/` +- `05_Templates/` diff --git a/pipeline/README.md b/pipeline/README.md new file mode 100644 index 0000000..1085fbe --- /dev/null +++ b/pipeline/README.md @@ -0,0 +1,14 @@ +# Pipeline + +这个目录用于保存知识源接入和数据清洗流程。 + +建议优先接入: + +- 历史 case +- KB / Playbook + +后续再逐步扩展: + +- ticket system +- intel system +- 月报 / 报告 diff --git a/pipeline/jobs/ingest_case.py b/pipeline/jobs/ingest_case.py new file mode 100644 index 0000000..01beee5 --- /dev/null +++ b/pipeline/jobs/ingest_case.py @@ -0,0 +1,41 @@ +"""Batch-ingest mock case files and emit normalized case JSON documents.""" +from __future__ import annotations + +import json +from dataclasses import asdict +from pathlib import Path + +from pipeline.transforms.normalize_case import load_and_normalize_case + + +def ingest_cases(input_dir: str | Path, output_dir: str | Path) -> list[Path]: + input_dir = Path(input_dir) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + written: list[Path] = [] + for src in sorted(input_dir.rglob("*.json")): + normalized = load_and_normalize_case(src) + dest = output_dir / f"{normalized.id}.json" + with dest.open("w", encoding="utf-8") as f: + json.dump(asdict(normalized), f, ensure_ascii=False, indent=2) + written.append(dest) + return written + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="Normalize a directory of mock case JSON files.") + parser.add_argument("--input-dir", default="evaluation/datasets/mock_cases", help="Directory containing raw mock case files") + parser.add_argument("--output-dir", default="evaluation/datasets/normalized_cases", help="Directory to write normalized case files") + args = parser.parse_args() + + written = ingest_cases(args.input_dir, args.output_dir) + print(f"normalized_cases={len(written)}") + for path in written: + print(path) + + +if __name__ == "__main__": + main() diff --git a/pipeline/jobs/ingest_kb.py b/pipeline/jobs/ingest_kb.py new file mode 100644 index 0000000..86a8a89 --- /dev/null +++ b/pipeline/jobs/ingest_kb.py @@ -0,0 +1,41 @@ +"""Batch-ingest mock KB/playbook files and emit normalized knowledge JSON documents.""" +from __future__ import annotations + +import json +from dataclasses import asdict +from pathlib import Path + +from pipeline.transforms.normalize_kb import load_and_normalize_kb + + +def ingest_kb(input_dir: str | Path, output_dir: str | Path) -> list[Path]: + input_dir = Path(input_dir) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + written: list[Path] = [] + for src in sorted(input_dir.rglob("*.json")): + normalized = load_and_normalize_kb(src) + dest = output_dir / f"{normalized.id}.json" + with dest.open("w", encoding="utf-8") as f: + json.dump(asdict(normalized), f, ensure_ascii=False, indent=2) + written.append(dest) + return written + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="Normalize a directory of mock KB/playbook JSON files.") + parser.add_argument("--input-dir", default="evaluation/datasets/mock_kb", help="Directory containing raw mock KB/playbook files") + parser.add_argument("--output-dir", default="evaluation/datasets/normalized_kb", help="Directory to write normalized KB/playbook files") + args = parser.parse_args() + + written = ingest_kb(args.input_dir, args.output_dir) + print(f"normalized_kb={len(written)}") + for path in written: + print(path) + + +if __name__ == "__main__": + main() diff --git a/pipeline/transforms/normalize_case.py b/pipeline/transforms/normalize_case.py new file mode 100644 index 0000000..c90c8ec --- /dev/null +++ b/pipeline/transforms/normalize_case.py @@ -0,0 +1,91 @@ +"""Normalize raw mock SOC cases into a retrieval-friendly structure. + +This module is intentionally small and deterministic so it can be used with +mock data before real connectors are available. +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + + +@dataclass +class NormalizedCase: + id: str + memory_type: str + scenario: str + title: str + abstract: str + verdict: str + severity: str + entities: dict[str, list[str]] + observables: dict[str, list[str]] + evidence: list[str] + patterns: list[str] + related_refs: dict[str, list[str]] + source_path: str + tags: list[str] + + +def _derive_patterns(raw_case: dict[str, Any]) -> list[str]: + """Derive a small set of reusable patterns from the case payload.""" + patterns: list[str] = [] + + verdict = raw_case.get("conclusion", {}).get("verdict") + if verdict: + patterns.append(f"verdict:{verdict}") + + scenario = raw_case.get("scenario") + if scenario: + patterns.append(f"scenario:{scenario}") + + alert_type = raw_case.get("alert_type") + if alert_type: + patterns.append(f"alert_type:{alert_type}") + + return patterns + + +def normalize_case(raw_case: dict[str, Any], source_path: str = "") -> NormalizedCase: + """Convert a raw case document into the internal normalized case model.""" + conclusion = raw_case.get("conclusion", {}) + return NormalizedCase( + id=raw_case["case_id"], + memory_type="case", + scenario=raw_case["scenario"], + title=raw_case["title"], + abstract=raw_case.get("summary", ""), + verdict=conclusion.get("verdict", raw_case.get("status", "unknown")), + severity=raw_case.get("severity", "unknown"), + entities=raw_case.get("entities", {}), + observables=raw_case.get("observables", {}), + evidence=raw_case.get("evidence", []), + patterns=_derive_patterns(raw_case), + related_refs=raw_case.get("related_refs", {}), + source_path=source_path, + tags=raw_case.get("tags", []), + ) + + +def load_and_normalize_case(path: str | Path) -> NormalizedCase: + path = Path(path) + with path.open("r", encoding="utf-8") as f: + raw_case = json.load(f) + return normalize_case(raw_case, source_path=str(path)) + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="Normalize a mock SOC case JSON file.") + parser.add_argument("path", help="Path to a raw case JSON file") + args = parser.parse_args() + + normalized = load_and_normalize_case(args.path) + print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/pipeline/transforms/normalize_kb.py b/pipeline/transforms/normalize_kb.py new file mode 100644 index 0000000..8934a86 --- /dev/null +++ b/pipeline/transforms/normalize_kb.py @@ -0,0 +1,63 @@ +"""Normalize raw mock KB/playbook documents into a retrieval-friendly structure.""" +from __future__ import annotations + +import json +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + + +@dataclass +class NormalizedKnowledge: + id: str + memory_type: str + doc_type: str + scenario: str + title: str + abstract: str + key_points: list[str] + investigation_guidance: list[str] + decision_points: list[str] + related_refs: dict[str, list[str]] + source_path: str + tags: list[str] + + +def normalize_kb(raw_doc: dict[str, Any], source_path: str = "") -> NormalizedKnowledge: + """Convert a raw KB or playbook document into the normalized knowledge model.""" + return NormalizedKnowledge( + id=raw_doc["doc_id"], + memory_type="knowledge", + doc_type=raw_doc["doc_type"], + scenario=raw_doc["scenario"], + title=raw_doc["title"], + abstract=raw_doc.get("summary", ""), + key_points=raw_doc.get("key_points", []), + investigation_guidance=raw_doc.get("investigation_guidance", []), + decision_points=raw_doc.get("decision_points", []), + related_refs=raw_doc.get("related_refs", {}), + source_path=source_path, + tags=raw_doc.get("tags", []), + ) + + +def load_and_normalize_kb(path: str | Path) -> NormalizedKnowledge: + path = Path(path) + with path.open("r", encoding="utf-8") as f: + raw_doc = json.load(f) + return normalize_kb(raw_doc, source_path=str(path)) + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="Normalize a mock KB or playbook JSON file.") + parser.add_argument("path", help="Path to a raw KB/playbook JSON file") + args = parser.parse_args() + + normalized = load_and_normalize_kb(args.path) + print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0e44121 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[project] +name = "memory-gateway" +version = "0.1.0" +description = "基于 OpenViking 的统一记忆入口 MCP Server" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "fastapi>=0.109.0", + "sse-starlette>=2.0.0", + "mcp[cli]>=1.1.0", + "httpx>=0.26.0", + "pydantic>=2.5.0", + "pyyaml>=6.0", + "uvicorn>=0.27.0", + "tenacity>=8.2.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-asyncio>=0.23.0", + "ruff>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.uv] +dev-dependencies = [] + +[tool.ruff] +target-version = "py310" diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000..22f14b8 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,17 @@ +# Skills + +建议优先落地的 skills: + +- `ingest_skill` +- `extract_memory_skill` +- `classify_memory_skill` +- `retrieve_context_skill` +- `summarize_case_skill` +- `commit_memory_skill` +- `prune_memory_skill` + +POC 第一阶段建议先做: + +- `retrieve_context_skill` +- `summarize_case_skill` +- `commit_memory_skill` diff --git a/skills/commit_memory_skill/README.md b/skills/commit_memory_skill/README.md new file mode 100644 index 0000000..57c2a1c --- /dev/null +++ b/skills/commit_memory_skill/README.md @@ -0,0 +1,36 @@ +# commit_memory_skill + +这个 skill 负责把标准化后的高价值记忆写回 OpenViking。 + +## 当前阶段职责 + +第一阶段优先把标准化后的 `case` 和 `knowledge` 以 resource 形式写入 OpenViking。 + +原因: + +- 结构化数据适合用 URI 明确组织 +- 相比通过会话提交 `add_memory`,resource 写入更可控 +- 便于后续按 namespace 和 URI 组织 case / knowledge / report + +## 第一阶段输入 + +- 标准化后的 case JSON +- 标准化后的 KB / Playbook JSON + +## 第一阶段输出 + +- OpenViking resource 写入结果 +- 统一 URI 组织的资源 + +## 默认 URI 约定 + +- case: `viking://soc/case//` +- knowledge: `viking://soc/knowledge//` + +## 后续扩展 + +后续可以在 resource 写入稳定后,再增加: + +- 高价值 summary 写入 `memory` +- EverMemOS 提炼结果回灌 +- Obsidian / OpenViking 双写策略 diff --git a/skills/commit_memory_skill/SKILL.md b/skills/commit_memory_skill/SKILL.md new file mode 100644 index 0000000..55b4de1 --- /dev/null +++ b/skills/commit_memory_skill/SKILL.md @@ -0,0 +1,29 @@ +# commit_memory_skill + +## 用途 + +把已经过标准化和筛选的 case / knowledge 内容写入 OpenViking。 + +## 当前默认策略 + +第一阶段只做 resource 写入,不强行做复杂 memory 演化。 + +- `case` -> `viking://soc/case//` +- `knowledge` -> `viking://soc/knowledge//` + +## 输入 + +- 标准化后的 case / knowledge JSON 文件 +- OpenViking 配置(URL / API Key) + +## 输出 + +- 写入结果 +- 目标 URI +- 成功 / 失败状态 + +## 成功标准 + +- 可以把本地标准化样本成功写入 OpenViking +- URI 组织符合 namespace 设计 +- 后续可以被检索和引用 diff --git a/skills/commit_memory_skill/commit_to_openviking.py b/skills/commit_memory_skill/commit_to_openviking.py new file mode 100644 index 0000000..7dc5795 --- /dev/null +++ b/skills/commit_memory_skill/commit_to_openviking.py @@ -0,0 +1,89 @@ +"""Commit normalized SOC memory items to OpenViking as structured resources.""" +from __future__ import annotations + +import argparse +import asyncio +import json +from pathlib import Path +from typing import Any + +from memory_gateway.openviking_client import OpenVikingClient + + +def build_resource_uri(item: dict[str, Any]) -> str: + memory_type = item.get("memory_type") + item_id = item["id"] + + if memory_type == "case": + scenario = item.get("scenario", "general") + return f"viking://resources/soc-memory-poc/case/{scenario}/{item_id}.json" + + if memory_type == "knowledge": + doc_type = item.get("doc_type", "general") + return f"viking://resources/soc-memory-poc/knowledge/{doc_type}/{item_id}.json" + + raise ValueError(f"Unsupported memory_type for commit: {memory_type}") + + +def load_item(path: str | Path) -> dict[str, Any]: + path = Path(path) + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +async def commit_file(path: str | Path, client: OpenVikingClient) -> dict[str, Any]: + item = load_item(path) + uri = build_resource_uri(item) + result = await client.add_resource( + uri=uri, + content=json.dumps(item, ensure_ascii=False, indent=2), + resource_type="json", + wait=False, + ) + return { + "path": str(path), + "uri": uri, + "result": result, + } + + +async def commit_directory(directory: str | Path, client: OpenVikingClient, limit: int | None = None) -> list[dict[str, Any]]: + directory = Path(directory) + paths = sorted(directory.rglob("*.json")) + if limit is not None: + paths = paths[:limit] + + results: list[dict[str, Any]] = [] + for path in paths: + results.append(await commit_file(path, client)) + return results + + +async def main_async(args: argparse.Namespace) -> None: + client = OpenVikingClient() + try: + if args.path: + result = await commit_file(args.path, client) + print(json.dumps(result, ensure_ascii=False, indent=2)) + else: + results = await commit_directory(args.directory, client, limit=args.limit) + print(json.dumps(results, ensure_ascii=False, indent=2)) + finally: + await client.close() + + +def main() -> None: + parser = argparse.ArgumentParser(description="Commit normalized SOC items to OpenViking.") + parser.add_argument("--path", help="Single normalized JSON file to commit") + parser.add_argument("--directory", help="Directory of normalized JSON files to commit") + parser.add_argument("--limit", type=int, default=None, help="Optional limit for directory commits") + args = parser.parse_args() + + if not args.path and not args.directory: + parser.error("Either --path or --directory is required") + + asyncio.run(main_async(args)) + + +if __name__ == "__main__": + main() diff --git a/skills/retrieve_context_skill/README.md b/skills/retrieve_context_skill/README.md new file mode 100644 index 0000000..5d17bff --- /dev/null +++ b/skills/retrieve_context_skill/README.md @@ -0,0 +1,42 @@ +# retrieve_context_skill + +这个 skill 用于根据当前 case 的关键信号,从 OpenViking 或 mock dataset 中召回最相关的上下文。 + +## 目标 + +输入当前 case 的场景、告警类型、IOC、描述,输出一组排序后的相关内容: + +- 相似历史 case +- 相关 KB +- 相关 Playbook +- 关键 decision points + +## 第一阶段输入 + +- `scenario` +- `alert_type` +- `summary` +- `entities` +- `observables` +- `top_k` + +## 第一阶段输出 + +- `matched_cases` +- `matched_knowledge` +- `decision_points` +- `next_actions` + +## 第一阶段检索策略 + +1. 先按 `scenario` 过滤 +2. 再按 `alert_type`、IOC、关键词做匹配 +3. 再按 evidence / tags 做轻量重排序 +4. 输出 top-k + +## 第一阶段不做 + +- 向量检索 +- 图检索 +- 个性化排序 +- 多源复杂重排 diff --git a/skills/retrieve_context_skill/SKILL.md b/skills/retrieve_context_skill/SKILL.md new file mode 100644 index 0000000..bca0515 --- /dev/null +++ b/skills/retrieve_context_skill/SKILL.md @@ -0,0 +1,39 @@ +# retrieve_context_skill + +## 用途 + +在 SOC case 研判时,为 agent 检索最相关的历史 case 和知识上下文。 + +## 输入 + +- `scenario`: 场景,如 `phishing`、`o365_suspicious_login` +- `alert_type`: 告警类型 +- `summary`: 当前 case 摘要 +- `entities`: 用户、主机、邮箱等 +- `observables`: 域名、IP、URL、Hash 等 +- `top_k`: 期望返回条数 + +## 输出 + +- 相关历史 case 列表 +- 相关 KB / Playbook 列表 +- 关键 evidence / decision points +- 推荐下一步调查动作 + +## 默认检索顺序 + +1. `session/` +2. `soc/case` +3. `soc/knowledge` +4. `agent/` +5. `user/` + +## Mock 阶段工作方式 + +在没有真实数据和完整 OpenViking 检索链路时,先使用 `evaluation/datasets/mock_cases/` 和 `evaluation/datasets/mock_kb/` 做本地检索验证。 + +## 成功标准 + +- 钓鱼 case 能召回钓鱼 playbook 和相似 phishing case +- O365 异常登录 case 能召回登录异常 KB 和相似 case +- 返回结果对人工 reviewer 看起来是“有帮助的上下文”,而不是泛资料堆积 diff --git a/skills/retrieve_context_skill/retrieve_context.py b/skills/retrieve_context_skill/retrieve_context.py new file mode 100644 index 0000000..a2be7fb --- /dev/null +++ b/skills/retrieve_context_skill/retrieve_context.py @@ -0,0 +1,216 @@ +"""Retrieval entrypoint for SOC Memory POC. + +Supports two modes: +- local: retrieve from normalized mock datasets +- openviking: retrieve from OpenViking resource namespaces and filter results +""" +from __future__ import annotations + +import asyncio +import json +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +from memory_gateway.openviking_client import OpenVikingClient + +CASE_URI_PREFIX = "viking://resources/soc-memory-poc/case" +KNOWLEDGE_URI_PREFIX = "viking://resources/soc-memory-poc/knowledge" + + +def _load_json_dir(path: str | Path) -> list[dict[str, Any]]: + path = Path(path) + items: list[dict[str, Any]] = [] + for file in sorted(path.rglob("*.json")): + with file.open("r", encoding="utf-8") as f: + items.append(json.load(f)) + return items + + +@dataclass +class RetrievalQuery: + scenario: str + alert_type: str = "" + summary: str = "" + entities: dict[str, list[str]] | None = None + observables: dict[str, list[str]] | None = None + top_k: int = 3 + + +def _flatten_values(data: dict[str, list[str]] | None) -> set[str]: + if not data: + return set() + values: set[str] = set() + for items in data.values(): + values.update(str(item).lower() for item in items) + return values + + +def _score_case(query: RetrievalQuery, item: dict[str, Any]) -> int: + score = 0 + if item.get("scenario") == query.scenario: + score += 50 + + for pattern in item.get("patterns", []): + if query.alert_type and pattern == f"alert_type:{query.alert_type}": + score += 20 + + query_observables = _flatten_values(query.observables) + item_observables = _flatten_values(item.get("observables")) + score += 8 * len(query_observables & item_observables) + + summary = query.summary.lower() + haystacks = [item.get("title", "").lower(), item.get("abstract", "").lower()] + for token in [t for t in summary.split() if len(t) > 4]: + if any(token in text for text in haystacks): + score += 2 + + return score + + +def _score_knowledge(query: RetrievalQuery, item: dict[str, Any]) -> int: + score = 0 + if item.get("scenario") == query.scenario: + score += 40 + + title = item.get("title", "").lower() + abstract = item.get("abstract", "").lower() + for token in [t for t in query.summary.lower().split() if len(t) > 4]: + if token in title or token in abstract: + score += 2 + + if query.alert_type and query.alert_type in " ".join(item.get("related_refs", {}).get("cases", [])).lower(): + score += 5 + + return score + + +def retrieve_context_local( + query: RetrievalQuery, + cases_dir: str | Path = "evaluation/datasets/normalized_cases", + knowledge_dir: str | Path = "evaluation/datasets/normalized_kb", +) -> dict[str, Any]: + cases = _load_json_dir(cases_dir) + knowledge = _load_json_dir(knowledge_dir) + + ranked_cases = sorted( + ({"score": _score_case(query, item), "item": item} for item in cases), + key=lambda x: x["score"], + reverse=True, + ) + ranked_knowledge = sorted( + ({"score": _score_knowledge(query, item), "item": item} for item in knowledge), + key=lambda x: x["score"], + reverse=True, + ) + + matched_cases = [entry for entry in ranked_cases if entry["score"] > 0][: query.top_k] + matched_knowledge = [entry for entry in ranked_knowledge if entry["score"] > 0][: query.top_k] + + decision_points: list[str] = [] + next_actions: list[str] = [] + for entry in matched_knowledge: + item = entry["item"] + decision_points.extend(item.get("decision_points", [])) + next_actions.extend(item.get("investigation_guidance", [])) + + return { + "backend": "local", + "query": asdict(query), + "matched_cases": matched_cases, + "matched_knowledge": matched_knowledge, + "decision_points": decision_points[: query.top_k], + "next_actions": next_actions[: query.top_k], + } + + +def _canonicalize_resource_uri(uri: str) -> str: + if ".json/" in uri: + return uri.split(".json/", 1)[0] + ".json" + return uri + + +def _query_text(query: RetrievalQuery) -> str: + parts = [query.scenario, query.alert_type, query.summary] + parts.extend(sorted(_flatten_values(query.observables))) + return " ".join(part for part in parts if part).strip() + + +def _dedupe_openviking_results(results: list[dict[str, Any]], prefix: str) -> list[dict[str, Any]]: + deduped: dict[str, dict[str, Any]] = {} + for item in results: + uri = item.get("uri") or "" + if not uri.startswith(prefix): + continue + canonical_uri = _canonicalize_resource_uri(uri) + score = item.get("score") or 0 + existing = deduped.get(canonical_uri) + payload = { + "uri": canonical_uri, + "abstract": item.get("abstract", ""), + "score": score, + "context_type": item.get("context_type"), + "source_uri": uri, + } + if existing is None or score > existing.get("score", 0): + deduped[canonical_uri] = payload + return sorted(deduped.values(), key=lambda x: x["score"], reverse=True) + + +async def retrieve_context_openviking( + query: RetrievalQuery, + case_uri: str = CASE_URI_PREFIX, + knowledge_uri: str = KNOWLEDGE_URI_PREFIX, +) -> dict[str, Any]: + client = OpenVikingClient() + try: + query_text = _query_text(query) + case_result = await client.search(query=query_text, uri=case_uri, limit=max(query.top_k * 5, 10)) + knowledge_result = await client.search(query=query_text, uri=knowledge_uri, limit=max(query.top_k * 5, 10)) + + matched_cases = _dedupe_openviking_results(case_result.results, case_uri)[: query.top_k] + matched_knowledge = _dedupe_openviking_results(knowledge_result.results, knowledge_uri)[: query.top_k] + + return { + "backend": "openviking", + "query": asdict(query), + "matched_cases": matched_cases, + "matched_knowledge": matched_knowledge, + "decision_points": [], + "next_actions": [], + } + finally: + await client.close() + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="Retrieve SOC context from local datasets or OpenViking.") + parser.add_argument("--backend", choices=["local", "openviking"], default="local", help="Retrieval backend") + parser.add_argument("--scenario", required=True, help="Scenario, e.g. phishing or o365_suspicious_login") + parser.add_argument("--alert-type", default="", help="Alert type") + parser.add_argument("--summary", default="", help="Short case summary") + parser.add_argument("--top-k", type=int, default=3, help="Number of results to return") + parser.add_argument("--cases-dir", default="evaluation/datasets/normalized_cases", help="Normalized case dataset directory") + parser.add_argument("--knowledge-dir", default="evaluation/datasets/normalized_kb", help="Normalized knowledge dataset directory") + parser.add_argument("--case-uri", default=CASE_URI_PREFIX, help="OpenViking case URI prefix") + parser.add_argument("--knowledge-uri", default=KNOWLEDGE_URI_PREFIX, help="OpenViking knowledge URI prefix") + args = parser.parse_args() + + query = RetrievalQuery( + scenario=args.scenario, + alert_type=args.alert_type, + summary=args.summary, + top_k=args.top_k, + ) + + if args.backend == "openviking": + result = asyncio.run(retrieve_context_openviking(query, case_uri=args.case_uri, knowledge_uri=args.knowledge_uri)) + else: + result = retrieve_context_local(query, cases_dir=args.cases_dir, knowledge_dir=args.knowledge_dir) + print(json.dumps(result, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/skills/summarize_case_skill/README.md b/skills/summarize_case_skill/README.md new file mode 100644 index 0000000..30d9810 --- /dev/null +++ b/skills/summarize_case_skill/README.md @@ -0,0 +1,17 @@ +# summarize_case_skill + +This skill turns a normalized SOC case record into a reusable Obsidian case note. + +Current scope: +- input: normalized case JSON from `evaluation/datasets/normalized_cases/` +- output: markdown case note under `obsidian-vault/02_Cases/` +- goal: produce a clean analyst-facing note, not a raw process dump + +Typical usage: + +```bash +source /home/tom/OpenViking/.venv/bin/activate +PYTHONPATH=/home/tom/soc_memory_poc python /home/tom/soc_memory_poc/skills/summarize_case_skill/generate_case_note.py \ + --input /home/tom/soc_memory_poc/evaluation/datasets/normalized_cases/CASE-2026-0001.json \ + --output-dir /home/tom/soc_memory_poc/obsidian-vault/02_Cases +``` diff --git a/skills/summarize_case_skill/SKILL.md b/skills/summarize_case_skill/SKILL.md new file mode 100644 index 0000000..c43b065 --- /dev/null +++ b/skills/summarize_case_skill/SKILL.md @@ -0,0 +1,21 @@ +# summarize_case_skill + +## Purpose +Summarize one normalized SOC case into a high-quality Obsidian case note that can be reviewed and maintained by analysts. + +## Inputs +- A normalized case JSON document +- Optional output directory for Obsidian notes + +## Outputs +- One markdown case note per case +- Stable structure aligned with the vault template + +## Guardrails +- Do not dump raw logs or full tool traces +- Keep only reusable evidence, conclusions, and response guidance +- Prefer linked references to playbooks, KBs, and related cases +- Preserve case identifiers and observable values exactly + +## Current implementation +Use `generate_case_note.py` to render a local markdown note from a normalized case. diff --git a/skills/summarize_case_skill/generate_case_note.py b/skills/summarize_case_skill/generate_case_note.py new file mode 100644 index 0000000..8c27ddc --- /dev/null +++ b/skills/summarize_case_skill/generate_case_note.py @@ -0,0 +1,346 @@ +"""Generate an Obsidian case note from a normalized SOC case JSON file.""" +from __future__ import annotations + +import argparse +import asyncio +import json +from pathlib import Path +from typing import Any + +from skills.retrieve_context_skill.retrieve_context import RetrievalQuery, retrieve_context_openviking + + +def _load_case(path: str | Path) -> dict[str, Any]: + with Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def _extract_alert_type(patterns: list[str]) -> str: + for pattern in patterns: + if pattern.startswith("alert_type:"): + return pattern.split(":", 1)[1] + return "unknown" + + +def _verdict_label(verdict: str) -> str: + mapping = { + "true_positive": "真报", + "false_positive": "误报", + "suspicious": "可疑待定", + } + return mapping.get(verdict, verdict or "未知") + + +def _join_values(values: list[str]) -> str: + return ", ".join(values) if values else "无" + + +def _bullet_lines(values: list[str], default: str = "- 无") -> str: + if not values: + return default + return "\n".join(f"- {value}" for value in values) + + +def _wikilinks(values: list[str]) -> str: + if not values: + return "无" + return ", ".join(f"[[{value}]]" for value in values) + + +def _uri_to_id(uri: str) -> str: + name = uri.rstrip("/").rsplit("/", 1)[-1] + if name.endswith(".json"): + name = name[:-5] + return name + + +def _derive_process_summary(item: dict[str, Any]) -> list[str]: + steps: list[str] = [] + if item.get("abstract"): + steps.append(f"确认告警场景与核心风险:{item['abstract']}") + if item.get("evidence"): + steps.append(f"提取关键证据并交叉验证:{item['evidence'][0]}") + related = item.get("related_refs", {}) + if related.get("playbooks") or related.get("kb"): + steps.append("对照关联 playbook / KB 复核告警模式与处置路径。") + if item.get("verdict"): + steps.append(f"基于关键证据与场景模式完成结论判定:{_verdict_label(item['verdict'])}。") + return steps[:4] + + +def _derive_disposition(item: dict[str, Any]) -> list[str]: + verdict = item.get("verdict", "") + evidence = item.get("evidence", []) + lines: list[str] = [] + if verdict: + lines.append(f"结论为{_verdict_label(verdict)}。") + if evidence: + lines.append(f"最关键依据:{evidence[0]}") + if len(evidence) > 1: + lines.append(f"补充依据:{evidence[1]}") + return lines + + +def _derive_actions(item: dict[str, Any]) -> list[str]: + scenario = item.get("scenario", "") + verdict = item.get("verdict", "") + actions: list[str] = [] + if scenario == "phishing": + actions.extend([ + "隔离相同主题、发件人或 URL 的邮件样本。", + "核查用户是否点击或提交凭据,并按需执行凭据重置。", + ]) + elif scenario == "o365_suspicious_login": + actions.extend([ + "复核登录来源、MFA 事件和后续邮箱规则或 OAuth 变更。", + "若存在账号接管迹象,立即执行会话失效和凭据重置。", + ]) + else: + actions.append("结合关联 playbook 执行后续处置。") + if verdict == "false_positive": + actions = ["记录误报原因,并更新检测例外或抑制条件。"] + return actions + + +def _derive_reusable_patterns(item: dict[str, Any]) -> tuple[list[str], list[str], list[str]]: + patterns = item.get("patterns", []) + tags = item.get("tags", []) + hit_patterns = [pattern for pattern in patterns if not pattern.startswith("verdict:")] + false_positive_traits = [] + variants = [] + if item.get("verdict") == "false_positive": + false_positive_traits.append("本案最终确认为误报,可用于补充抑制条件。") + if tags: + variants.append("相关标签:" + ", ".join(tags)) + return hit_patterns or ["无"], false_positive_traits or ["无"], variants or ["无"] + + +async def _fetch_openviking_recommendations(item: dict[str, Any], top_k: int = 3) -> dict[str, list[dict[str, Any]]]: + query = RetrievalQuery( + scenario=item.get("scenario", "general"), + alert_type=_extract_alert_type(item.get("patterns", [])), + summary=item.get("abstract", ""), + observables=item.get("observables"), + top_k=top_k + 1, + ) + result = await retrieve_context_openviking(query) + + case_entries: list[dict[str, Any]] = [] + for entry in result.get("matched_cases", []): + candidate_id = _uri_to_id(entry.get("uri", "")) + if candidate_id == item.get("id"): + continue + case_entries.append( + { + "id": candidate_id, + "score": round(float(entry.get("score") or 0), 3), + "abstract": entry.get("abstract", ""), + } + ) + if len(case_entries) >= top_k: + break + + knowledge_entries: list[dict[str, Any]] = [] + for entry in result.get("matched_knowledge", []): + knowledge_entries.append( + { + "id": _uri_to_id(entry.get("uri", "")), + "score": round(float(entry.get("score") or 0), 3), + "abstract": entry.get("abstract", ""), + } + ) + if len(knowledge_entries) >= top_k: + break + + return { + "cases": case_entries, + "knowledge": knowledge_entries, + } + + +def _merge_unique(primary: list[str], secondary: list[str]) -> list[str]: + merged: list[str] = [] + for value in primary + secondary: + if value and value not in merged: + merged.append(value) + return merged + + +def _recommendation_lines(entries: list[dict[str, Any]], prefix: str) -> list[str]: + lines: list[str] = [] + for entry in entries: + abstract = entry.get("abstract", "") + abstract = abstract[:140] + "..." if len(abstract) > 140 else abstract + lines.append(f"[[{entry['id']}]] ({prefix} score={entry['score']}) {abstract}") + return lines + + +def render_case_note(item: dict[str, Any], recommendations: dict[str, list[dict[str, Any]]] | None = None) -> str: + case_id = item["id"] + title = item.get("title", case_id) + alert_type = _extract_alert_type(item.get("patterns", [])) + severity = item.get("severity", "unknown") + verdict = _verdict_label(item.get("verdict", "")) + entities = item.get("entities", {}) + observables = item.get("observables", {}) + related = item.get("related_refs", {}) + recommendations = recommendations or {"cases": [], "knowledge": []} + + recommended_cases = [entry["id"] for entry in recommendations.get("cases", [])] + recommended_knowledge = [entry["id"] for entry in recommendations.get("knowledge", [])] + + merged_cases = _merge_unique(related.get("cases", []), recommended_cases) + playbooks = related.get("playbooks", []) + kb_items = related.get("kb", []) + for knowledge_id in recommended_knowledge: + if knowledge_id.startswith("PB-"): + playbooks = _merge_unique(playbooks, [knowledge_id]) + else: + kb_items = _merge_unique(kb_items, [knowledge_id]) + + process_summary = _derive_process_summary(item) + disposition = _derive_disposition(item) + actions = _derive_actions(item) + hit_patterns, false_positive_traits, variants = _derive_reusable_patterns(item) + tags = ["#case", f"#scenario/{item.get('scenario', 'general')}", f"#alert/{alert_type}"] + if item.get("verdict"): + tags.append(f"#verdict/{item['verdict'].replace('_', '-')}") + tags.extend(f"#{tag}" for tag in item.get("tags", [])) + + recommendation_case_lines = _recommendation_lines(recommendations.get("cases", []), "case") + recommendation_knowledge_lines = _recommendation_lines(recommendations.get("knowledge", []), "knowledge") + + lines = [ + "---", + f"case_id: {case_id}", + f"scenario: {item.get('scenario', 'general')}", + f"alert_type: {alert_type}", + f"severity: {severity}", + f"verdict: {item.get('verdict', 'unknown')}", + "source: soc-memory-poc", + f"openviking_enriched: {'true' if recommendation_case_lines or recommendation_knowledge_lines else 'false'}", + "---", + "", + f"# {case_id} {title}", + "", + "## 基本信息", + "", + f"- Case ID: {case_id}", + f"- 标题: {title}", + f"- 告警类型: {alert_type}", + f"- 来源系统: SOC Memory POC Mock Dataset", + f"- 时间范围: 待补充", + f"- 研判人 / Agent: AI Agent Draft", + f"- 最终结论: {verdict}", + f"- 严重等级: {severity}", + "", + "## 告警摘要", + "", + item.get("abstract", "无"), + "", + "## 关键实体", + "", + f"- 用户: {_join_values(entities.get('users', []))}", + f"- 主机: {_join_values(entities.get('hosts', []))}", + f"- 邮箱: {_join_values(entities.get('mailboxes', []))}", + f"- IP: {_join_values(observables.get('ips', []))}", + f"- 域名: {_join_values(observables.get('domains', []))}", + f"- 文件 Hash: {_join_values(observables.get('hashes', []))}", + f"- 其他 IOC: {_join_values(observables.get('urls', []) + observables.get('sender_emails', []))}", + "", + "## 关键证据", + "", + _bullet_lines(item.get("evidence", [])), + "", + "## 研判过程摘要", + "", + "\n".join(f"{index}. {step}" for index, step in enumerate(process_summary, start=1)), + "", + "## 结论依据", + "", + _bullet_lines(disposition), + "", + "## 处置建议", + "", + _bullet_lines(actions), + "", + "## 可复用模式", + "", + f"- 命中模式: {_join_values(hit_patterns)}", + f"- 误报特征: {_join_values(false_positive_traits)}", + f"- 需关注的变体: {_join_values(variants)}", + "", + "## 关联知识", + "", + f"- 关联 Playbook: {_wikilinks(playbooks)}", + f"- 关联 KB: {_wikilinks(kb_items)}", + f"- 关联历史 Case: {_wikilinks(merged_cases)}", + f"- 关联实体: {_wikilinks(entities.get('users', []) + entities.get('hosts', []))}", + "", + "## 自动关联推荐", + "", + "### 推荐历史 Case", + "", + _bullet_lines(recommendation_case_lines), + "", + "### 推荐知识条目", + "", + _bullet_lines(recommendation_knowledge_lines), + "", + "## Lessons Learned", + "", + "- 本案可沉淀为后续同类告警的快速判定参考。", + "- 若后续出现相同 lure、同类登录模式或相同关键证据,应优先联想本案与关联知识。", + "", + "## 标签", + "", + _bullet_lines(tags), + "", + ] + return "\n".join(lines) + + +def build_output_path(item: dict[str, Any], output_dir: str | Path) -> Path: + scenario = item.get("scenario", "general") + case_id = item["id"] + safe_title = item.get("title", case_id).replace("/", "-") + return Path(output_dir) / scenario / f"{case_id} - {safe_title}.md" + + +async def generate_case_note_async( + input_path: str | Path, + output_dir: str | Path, + enrich_from_openviking: bool = False, + top_k: int = 3, +) -> Path: + item = _load_case(input_path) + recommendations: dict[str, list[dict[str, Any]]] | None = None + if enrich_from_openviking: + recommendations = await _fetch_openviking_recommendations(item, top_k=top_k) + output_path = build_output_path(item, output_dir) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(render_case_note(item, recommendations=recommendations), encoding="utf-8") + return output_path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate an Obsidian case note from a normalized case JSON file.") + parser.add_argument("--input", required=True, help="Normalized case JSON path") + parser.add_argument("--output-dir", default="obsidian-vault/02_Cases", help="Obsidian cases output directory") + parser.add_argument("--enrich-from-openviking", action="store_true", help="Retrieve related cases and knowledge from OpenViking") + parser.add_argument("--top-k", type=int, default=3, help="Number of OpenViking recommendations per type") + args = parser.parse_args() + + output_path = asyncio.run( + generate_case_note_async( + args.input, + args.output_dir, + enrich_from_openviking=args.enrich_from_openviking, + top_k=args.top_k, + ) + ) + print(output_path) + + +if __name__ == "__main__": + main() diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000..345014e --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,170 @@ +import sys +import types + +from fastapi.responses import StreamingResponse +from fastapi.testclient import TestClient + + +def install_test_stubs() -> None: + if "mcp.server" not in sys.modules: + mcp_module = types.ModuleType("mcp") + mcp_server_module = types.ModuleType("mcp.server") + mcp_types_module = types.ModuleType("mcp.types") + + class Server: + def __init__(self, name): + self.name = name + + def list_tools(self): + def decorator(func): + return func + return decorator + + def call_tool(self): + def decorator(func): + return func + return decorator + + class Tool: + def __init__(self, name, description, inputSchema): + self.name = name + self.description = description + self.inputSchema = inputSchema + + class TextContent: + def __init__(self, type, text): + self.type = type + self.text = text + + def model_dump(self): + return {"type": self.type, "text": self.text} + + mcp_server_module.Server = Server + mcp_types_module.Tool = Tool + mcp_types_module.TextContent = TextContent + sys.modules["mcp"] = mcp_module + sys.modules["mcp.server"] = mcp_server_module + sys.modules["mcp.types"] = mcp_types_module + + if "sse_starlette" not in sys.modules: + sse_module = types.ModuleType("sse_starlette") + + class EventSourceResponse(StreamingResponse): + def __init__(self, content, *args, **kwargs): + super().__init__(content, media_type="text/event-stream", *args, **kwargs) + + sse_module.EventSourceResponse = EventSourceResponse + sys.modules["sse_starlette"] = sse_module + + +install_test_stubs() + +from memory_gateway.server import app +from memory_gateway.types import Config, SearchResult, ServerConfig + + +class FakeOVClient: + async def health_check(self): + return {"status": "ok", "backend": "fake"} + + async def search(self, query, namespace=None, limit=None, uri=None): + return SearchResult( + results=[ + { + "uri": "viking://soc/test", + "abstract": query, + "score": 1.0, + "context_type": "memory", + } + ], + total=1, + ) + + async def add_memory(self, content, namespace=None, memory_type="general"): + return { + "status": "ok", + "content": content, + "namespace": namespace, + "memory_type": memory_type, + } + + async def add_resource(self, uri, content, resource_type="text"): + return { + "status": "ok", + "uri": uri, + "content": content, + "resource_type": resource_type, + } + + async def list_memories(self, namespace=None, memory_type=None, limit=None): + return [] + + async def list_resources(self, namespace=None, limit=None): + return [] + + +async def fake_get_openviking_client(): + return FakeOVClient() + + +def build_headers(api_key: str | None): + return {"x-api-key": api_key} if api_key is not None else {} + + +def test_health_requires_api_key(monkeypatch): + monkeypatch.setattr( + "memory_gateway.server.get_config", + lambda: Config(server=ServerConfig(api_key="secret")), + ) + monkeypatch.setattr( + "memory_gateway.server.get_openviking_client", + fake_get_openviking_client, + ) + + with TestClient(app) as client: + response = client.get("/health") + assert response.status_code == 401 + + response = client.get("/health", headers=build_headers("secret")) + assert response.status_code == 200 + assert response.json()["openviking"]["status"] == "ok" + + +def test_mcp_rpc_lists_tools_with_api_key(monkeypatch): + monkeypatch.setattr( + "memory_gateway.server.get_config", + lambda: Config(server=ServerConfig(api_key="secret")), + ) + monkeypatch.setattr( + "memory_gateway.server.get_openviking_client", + fake_get_openviking_client, + ) + + with TestClient(app) as client: + response = client.post( + "/mcp/rpc", + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + headers=build_headers("secret"), + ) + assert response.status_code == 200 + payload = response.json() + assert payload["jsonrpc"] == "2.0" + assert len(payload["result"]["tools"]) == 6 + + +def test_search_passes_through_gateway(monkeypatch): + monkeypatch.setattr( + "memory_gateway.server.get_config", + lambda: Config(server=ServerConfig(api_key="")), + ) + monkeypatch.setattr( + "memory_gateway.server.get_openviking_client", + fake_get_openviking_client, + ) + + with TestClient(app) as client: + response = client.post("/api/search", json={"query": "phishing"}) + assert response.status_code == 200 + payload = response.json() + assert payload["total"] == 1 + assert payload["results"][0]["abstract"] == "phishing" diff --git a/uvicorn.yaml b/uvicorn.yaml new file mode 100644 index 0000000..b8935e2 --- /dev/null +++ b/uvicorn.yaml @@ -0,0 +1,4 @@ +host: "0.0.0.0" +port: 1934 +reload: true +log_level: "info"