Initial SOC memory POC implementation

This commit is contained in:
2026-04-27 17:13:06 +08:00
parent fc68581198
commit e6b1520bce
89 changed files with 7610 additions and 1 deletions

View File

@ -0,0 +1,91 @@
"""Normalize raw mock SOC cases into a retrieval-friendly structure.
This module is intentionally small and deterministic so it can be used with
mock data before real connectors are available.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any
@dataclass
class NormalizedCase:
id: str
memory_type: str
scenario: str
title: str
abstract: str
verdict: str
severity: str
entities: dict[str, list[str]]
observables: dict[str, list[str]]
evidence: list[str]
patterns: list[str]
related_refs: dict[str, list[str]]
source_path: str
tags: list[str]
def _derive_patterns(raw_case: dict[str, Any]) -> list[str]:
"""Derive a small set of reusable patterns from the case payload."""
patterns: list[str] = []
verdict = raw_case.get("conclusion", {}).get("verdict")
if verdict:
patterns.append(f"verdict:{verdict}")
scenario = raw_case.get("scenario")
if scenario:
patterns.append(f"scenario:{scenario}")
alert_type = raw_case.get("alert_type")
if alert_type:
patterns.append(f"alert_type:{alert_type}")
return patterns
def normalize_case(raw_case: dict[str, Any], source_path: str = "") -> NormalizedCase:
"""Convert a raw case document into the internal normalized case model."""
conclusion = raw_case.get("conclusion", {})
return NormalizedCase(
id=raw_case["case_id"],
memory_type="case",
scenario=raw_case["scenario"],
title=raw_case["title"],
abstract=raw_case.get("summary", ""),
verdict=conclusion.get("verdict", raw_case.get("status", "unknown")),
severity=raw_case.get("severity", "unknown"),
entities=raw_case.get("entities", {}),
observables=raw_case.get("observables", {}),
evidence=raw_case.get("evidence", []),
patterns=_derive_patterns(raw_case),
related_refs=raw_case.get("related_refs", {}),
source_path=source_path,
tags=raw_case.get("tags", []),
)
def load_and_normalize_case(path: str | Path) -> NormalizedCase:
path = Path(path)
with path.open("r", encoding="utf-8") as f:
raw_case = json.load(f)
return normalize_case(raw_case, source_path=str(path))
def main() -> None:
import argparse
parser = argparse.ArgumentParser(description="Normalize a mock SOC case JSON file.")
parser.add_argument("path", help="Path to a raw case JSON file")
args = parser.parse_args()
normalized = load_and_normalize_case(args.path)
print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,63 @@
"""Normalize raw mock KB/playbook documents into a retrieval-friendly structure."""
from __future__ import annotations
import json
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any
@dataclass
class NormalizedKnowledge:
id: str
memory_type: str
doc_type: str
scenario: str
title: str
abstract: str
key_points: list[str]
investigation_guidance: list[str]
decision_points: list[str]
related_refs: dict[str, list[str]]
source_path: str
tags: list[str]
def normalize_kb(raw_doc: dict[str, Any], source_path: str = "") -> NormalizedKnowledge:
"""Convert a raw KB or playbook document into the normalized knowledge model."""
return NormalizedKnowledge(
id=raw_doc["doc_id"],
memory_type="knowledge",
doc_type=raw_doc["doc_type"],
scenario=raw_doc["scenario"],
title=raw_doc["title"],
abstract=raw_doc.get("summary", ""),
key_points=raw_doc.get("key_points", []),
investigation_guidance=raw_doc.get("investigation_guidance", []),
decision_points=raw_doc.get("decision_points", []),
related_refs=raw_doc.get("related_refs", {}),
source_path=source_path,
tags=raw_doc.get("tags", []),
)
def load_and_normalize_kb(path: str | Path) -> NormalizedKnowledge:
path = Path(path)
with path.open("r", encoding="utf-8") as f:
raw_doc = json.load(f)
return normalize_kb(raw_doc, source_path=str(path))
def main() -> None:
import argparse
parser = argparse.ArgumentParser(description="Normalize a mock KB or playbook JSON file.")
parser.add_argument("path", help="Path to a raw KB/playbook JSON file")
args = parser.parse_args()
normalized = load_and_normalize_kb(args.path)
print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()