92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
"""Normalize raw mock SOC cases into a retrieval-friendly structure.
|
|
|
|
This module is intentionally small and deterministic so it can be used with
|
|
mock data before real connectors are available.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, asdict
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class NormalizedCase:
|
|
id: str
|
|
memory_type: str
|
|
scenario: str
|
|
title: str
|
|
abstract: str
|
|
verdict: str
|
|
severity: str
|
|
entities: dict[str, list[str]]
|
|
observables: dict[str, list[str]]
|
|
evidence: list[str]
|
|
patterns: list[str]
|
|
related_refs: dict[str, list[str]]
|
|
source_path: str
|
|
tags: list[str]
|
|
|
|
|
|
def _derive_patterns(raw_case: dict[str, Any]) -> list[str]:
|
|
"""Derive a small set of reusable patterns from the case payload."""
|
|
patterns: list[str] = []
|
|
|
|
verdict = raw_case.get("conclusion", {}).get("verdict")
|
|
if verdict:
|
|
patterns.append(f"verdict:{verdict}")
|
|
|
|
scenario = raw_case.get("scenario")
|
|
if scenario:
|
|
patterns.append(f"scenario:{scenario}")
|
|
|
|
alert_type = raw_case.get("alert_type")
|
|
if alert_type:
|
|
patterns.append(f"alert_type:{alert_type}")
|
|
|
|
return patterns
|
|
|
|
|
|
def normalize_case(raw_case: dict[str, Any], source_path: str = "") -> NormalizedCase:
|
|
"""Convert a raw case document into the internal normalized case model."""
|
|
conclusion = raw_case.get("conclusion", {})
|
|
return NormalizedCase(
|
|
id=raw_case["case_id"],
|
|
memory_type="case",
|
|
scenario=raw_case["scenario"],
|
|
title=raw_case["title"],
|
|
abstract=raw_case.get("summary", ""),
|
|
verdict=conclusion.get("verdict", raw_case.get("status", "unknown")),
|
|
severity=raw_case.get("severity", "unknown"),
|
|
entities=raw_case.get("entities", {}),
|
|
observables=raw_case.get("observables", {}),
|
|
evidence=raw_case.get("evidence", []),
|
|
patterns=_derive_patterns(raw_case),
|
|
related_refs=raw_case.get("related_refs", {}),
|
|
source_path=source_path,
|
|
tags=raw_case.get("tags", []),
|
|
)
|
|
|
|
|
|
def load_and_normalize_case(path: str | Path) -> NormalizedCase:
|
|
path = Path(path)
|
|
with path.open("r", encoding="utf-8") as f:
|
|
raw_case = json.load(f)
|
|
return normalize_case(raw_case, source_path=str(path))
|
|
|
|
|
|
def main() -> None:
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Normalize a mock SOC case JSON file.")
|
|
parser.add_argument("path", help="Path to a raw case JSON file")
|
|
args = parser.parse_args()
|
|
|
|
normalized = load_and_normalize_case(args.path)
|
|
print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|