"""Normalize raw mock SOC cases into a retrieval-friendly structure. This module is intentionally small and deterministic so it can be used with mock data before real connectors are available. """ from __future__ import annotations import json from dataclasses import dataclass, asdict from pathlib import Path from typing import Any @dataclass class NormalizedCase: id: str memory_type: str scenario: str title: str abstract: str verdict: str severity: str entities: dict[str, list[str]] observables: dict[str, list[str]] evidence: list[str] patterns: list[str] related_refs: dict[str, list[str]] source_path: str tags: list[str] def _derive_patterns(raw_case: dict[str, Any]) -> list[str]: """Derive a small set of reusable patterns from the case payload.""" patterns: list[str] = [] verdict = raw_case.get("conclusion", {}).get("verdict") if verdict: patterns.append(f"verdict:{verdict}") scenario = raw_case.get("scenario") if scenario: patterns.append(f"scenario:{scenario}") alert_type = raw_case.get("alert_type") if alert_type: patterns.append(f"alert_type:{alert_type}") return patterns def normalize_case(raw_case: dict[str, Any], source_path: str = "") -> NormalizedCase: """Convert a raw case document into the internal normalized case model.""" conclusion = raw_case.get("conclusion", {}) return NormalizedCase( id=raw_case["case_id"], memory_type="case", scenario=raw_case["scenario"], title=raw_case["title"], abstract=raw_case.get("summary", ""), verdict=conclusion.get("verdict", raw_case.get("status", "unknown")), severity=raw_case.get("severity", "unknown"), entities=raw_case.get("entities", {}), observables=raw_case.get("observables", {}), evidence=raw_case.get("evidence", []), patterns=_derive_patterns(raw_case), related_refs=raw_case.get("related_refs", {}), source_path=source_path, tags=raw_case.get("tags", []), ) def load_and_normalize_case(path: str | Path) -> NormalizedCase: path = Path(path) with path.open("r", encoding="utf-8") as f: raw_case = json.load(f) return normalize_case(raw_case, source_path=str(path)) def main() -> None: import argparse parser = argparse.ArgumentParser(description="Normalize a mock SOC case JSON file.") parser.add_argument("path", help="Path to a raw case JSON file") args = parser.parse_args() normalized = load_and_normalize_case(args.path) print(json.dumps(asdict(normalized), ensure_ascii=False, indent=2)) if __name__ == "__main__": main()