Initial SOC memory POC implementation
This commit is contained in:
201
integrations/hermes/soc-memory-poc/scripts/triage_email.py
Normal file
201
integrations/hermes/soc-memory-poc/scripts/triage_email.py
Normal file
@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
TRIAGE_ALERT = SCRIPT_DIR / "triage_alert.py"
|
||||
|
||||
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
||||
URL_RE = re.compile(r"https?://[^\s<>\"]+")
|
||||
IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
|
||||
HOST_RE = re.compile(r"\b[A-Z]{2,}(?:-[A-Z0-9]+)+\b")
|
||||
ATTACHMENT_RE = re.compile(r"\b[\w.-]+\.(?:html|htm|pdf|zip|docx|xlsx|eml)\b", re.IGNORECASE)
|
||||
HEADER_RE = re.compile(
|
||||
r"^(From|To|Subject|Attachment|URL|IP|Host|User|Alert type|Scenario)\s*:\s*(.+)$",
|
||||
re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
|
||||
|
||||
def first_nonempty(*values: str) -> str:
|
||||
for value in values:
|
||||
if value and value.strip():
|
||||
return value.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def load_text(args: argparse.Namespace) -> str:
|
||||
if args.file:
|
||||
return Path(args.file).read_text(encoding="utf-8")
|
||||
if args.text:
|
||||
return args.text
|
||||
data = sys.stdin.read()
|
||||
if data.strip():
|
||||
return data
|
||||
return ""
|
||||
|
||||
|
||||
def find_header(text: str, name: str) -> str:
|
||||
for key, value in HEADER_RE.findall(text):
|
||||
if key.lower() == name.lower():
|
||||
return value.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def unique_matches(pattern: re.Pattern[str], text: str) -> list[str]:
|
||||
seen: list[str] = []
|
||||
for match in pattern.findall(text):
|
||||
if match not in seen:
|
||||
seen.append(match)
|
||||
return seen
|
||||
|
||||
|
||||
def infer_scenario(text: str, explicit_scenario: str = "", explicit_alert_type: str = "") -> tuple[str, str]:
|
||||
if explicit_scenario:
|
||||
return explicit_scenario, explicit_alert_type
|
||||
|
||||
lowered = text.lower()
|
||||
if any(token in lowered for token in ["impossible travel", "mfa fatigue", "oauth consent", "inbox rule", "entra", "azuread", "sign-in", "signin"]):
|
||||
alert_type = explicit_alert_type or ("azuread_impossible_travel" if "impossible travel" in lowered else "o365_suspicious_login")
|
||||
return "o365_suspicious_login", alert_type
|
||||
|
||||
if any(token in lowered for token in ["phishing", "invoice", "attachment", "credential harvest", "fake microsoft 365", "dmarc", "mail_suspicious", "wire transfer"]):
|
||||
if explicit_alert_type:
|
||||
return "phishing", explicit_alert_type
|
||||
if "wire transfer" in lowered or "executive impersonation" in lowered or "bec" in lowered:
|
||||
return "phishing", "mail_bec_impersonation"
|
||||
if "link" in lowered and "attachment" not in lowered:
|
||||
return "phishing", "mail_suspicious_link"
|
||||
return "phishing", "mail_suspicious_attachment"
|
||||
|
||||
return "phishing", explicit_alert_type
|
||||
|
||||
|
||||
def collect_facts(text: str, provided: list[str]) -> list[str]:
|
||||
facts: list[str] = []
|
||||
for fact in provided:
|
||||
if fact and fact not in facts:
|
||||
facts.append(fact)
|
||||
|
||||
lowered = text.lower()
|
||||
fact_patterns = [
|
||||
("DMARC failed", ["dmarc failed"]),
|
||||
("SPF failed", ["spf failed"]),
|
||||
("User may have clicked the link", ["clicked", "user clicked"]),
|
||||
("Credential submission suspected", ["submitted credentials", "credential submission", "entered credentials"]),
|
||||
("Impossible travel observed", ["impossible travel"]),
|
||||
("MFA fatigue observed", ["mfa fatigue", "repeated mfa"]),
|
||||
("Inbox rule creation observed", ["inbox rule"]),
|
||||
("OAuth consent activity observed", ["oauth consent"]),
|
||||
]
|
||||
for label, needles in fact_patterns:
|
||||
if any(needle in lowered for needle in needles) and label not in facts:
|
||||
facts.append(label)
|
||||
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip("-* \t")
|
||||
if not stripped or len(stripped) > 160:
|
||||
continue
|
||||
lower = stripped.lower()
|
||||
if any(word in lower for word in ["dmarc", "spf", "clicked", "credential", "impossible travel", "mfa", "inbox rule", "oauth"]):
|
||||
if stripped not in facts:
|
||||
facts.append(stripped)
|
||||
return facts[:8]
|
||||
|
||||
|
||||
def build_summary(text: str, subject: str, provided_summary: str = "") -> str:
|
||||
if provided_summary:
|
||||
return provided_summary[:240]
|
||||
if subject:
|
||||
return subject[:180]
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if len(stripped) >= 20 and ":" not in stripped[:20]:
|
||||
return stripped[:240]
|
||||
return text.strip()[:240]
|
||||
|
||||
|
||||
def parse_input(args: argparse.Namespace) -> dict[str, str | list[str]]:
|
||||
text = load_text(args)
|
||||
scenario, alert_type = infer_scenario(text, args.scenario, args.alert_type)
|
||||
emails = unique_matches(EMAIL_RE, text)
|
||||
urls = unique_matches(URL_RE, text)
|
||||
ips = unique_matches(IP_RE, text)
|
||||
hosts = unique_matches(HOST_RE, text)
|
||||
attachments = unique_matches(ATTACHMENT_RE, text)
|
||||
|
||||
sender = first_nonempty(args.sender, find_header(text, "From"), emails[0] if emails else "")
|
||||
user = first_nonempty(args.user, find_header(text, "User"), find_header(text, "To"), emails[1] if len(emails) > 1 else "")
|
||||
subject = first_nonempty(args.subject, find_header(text, "Subject"))
|
||||
attachment = first_nonempty(args.attachment, find_header(text, "Attachment"), attachments[0] if attachments else "")
|
||||
url = first_nonempty(args.url, find_header(text, "URL"), urls[0] if urls else "")
|
||||
ip = first_nonempty(args.ip, find_header(text, "IP"), ips[0] if ips else "")
|
||||
host = first_nonempty(args.host, find_header(text, "Host"), hosts[0] if hosts else "")
|
||||
summary = build_summary(text, subject, args.summary)
|
||||
facts = collect_facts(text, args.fact)
|
||||
|
||||
return {
|
||||
"scenario": scenario,
|
||||
"alert_type": alert_type,
|
||||
"user": user,
|
||||
"host": host,
|
||||
"sender": sender,
|
||||
"subject": subject,
|
||||
"attachment": attachment,
|
||||
"url": url,
|
||||
"ip": ip,
|
||||
"summary": summary,
|
||||
"facts": facts,
|
||||
}
|
||||
|
||||
|
||||
def run_triage(parsed: dict[str, str | list[str]], limit: int) -> None:
|
||||
cmd = [
|
||||
sys.executable,
|
||||
str(TRIAGE_ALERT),
|
||||
"--scenario", str(parsed["scenario"]),
|
||||
"--alert-type", str(parsed["alert_type"]),
|
||||
"--user", str(parsed["user"]),
|
||||
"--host", str(parsed["host"]),
|
||||
"--sender", str(parsed["sender"]),
|
||||
"--subject", str(parsed["subject"]),
|
||||
"--attachment", str(parsed["attachment"]),
|
||||
"--url", str(parsed["url"]),
|
||||
"--ip", str(parsed["ip"]),
|
||||
"--summary", str(parsed["summary"]),
|
||||
"--limit", str(limit),
|
||||
]
|
||||
for fact in parsed["facts"]:
|
||||
cmd.extend(["--fact", str(fact)])
|
||||
subprocess.run(cmd, check=True, env=os.environ.copy())
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Unified SOC alert/email triage entrypoint with memory and Obsidian retrieval.")
|
||||
parser.add_argument("--text", help="Raw email, ticket text, or freeform alert text")
|
||||
parser.add_argument("--file", help="Path to a raw email/ticket/alert text file")
|
||||
parser.add_argument("--scenario", default="", help="Optional scenario override")
|
||||
parser.add_argument("--alert-type", default="", help="Optional alert type override")
|
||||
parser.add_argument("--user", default="", help="Optional user override")
|
||||
parser.add_argument("--host", default="", help="Optional host override")
|
||||
parser.add_argument("--sender", default="", help="Optional sender override")
|
||||
parser.add_argument("--subject", default="", help="Optional subject override")
|
||||
parser.add_argument("--attachment", default="", help="Optional attachment override")
|
||||
parser.add_argument("--url", default="", help="Optional URL override")
|
||||
parser.add_argument("--ip", default="", help="Optional IP override")
|
||||
parser.add_argument("--summary", default="", help="Optional summary override")
|
||||
parser.add_argument("--fact", action="append", default=[], help="Additional known fact; repeatable")
|
||||
parser.add_argument("--limit", type=int, default=5, help="Search limit")
|
||||
args = parser.parse_args()
|
||||
|
||||
parsed = parse_input(args)
|
||||
run_triage(parsed, args.limit)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user