#!/usr/bin/env python3 from __future__ import annotations import argparse import os import re import subprocess import sys from pathlib import Path SCRIPT_DIR = Path(__file__).resolve().parent TRIAGE_ALERT = SCRIPT_DIR / "triage_alert.py" EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") URL_RE = re.compile(r"https?://[^\s<>\"]+") IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b") HOST_RE = re.compile(r"\b[A-Z]{2,}(?:-[A-Z0-9]+)+\b") ATTACHMENT_RE = re.compile(r"\b[\w.-]+\.(?:html|htm|pdf|zip|docx|xlsx|eml)\b", re.IGNORECASE) HEADER_RE = re.compile( r"^(From|To|Subject|Attachment|URL|IP|Host|User|Alert type|Scenario)\s*:\s*(.+)$", re.IGNORECASE | re.MULTILINE, ) def first_nonempty(*values: str) -> str: for value in values: if value and value.strip(): return value.strip() return "" def load_text(args: argparse.Namespace) -> str: if args.file: return Path(args.file).read_text(encoding="utf-8") if args.text: return args.text data = sys.stdin.read() if data.strip(): return data return "" def find_header(text: str, name: str) -> str: for key, value in HEADER_RE.findall(text): if key.lower() == name.lower(): return value.strip() return "" def unique_matches(pattern: re.Pattern[str], text: str) -> list[str]: seen: list[str] = [] for match in pattern.findall(text): if match not in seen: seen.append(match) return seen def infer_scenario(text: str, explicit_scenario: str = "", explicit_alert_type: str = "") -> tuple[str, str]: if explicit_scenario: return explicit_scenario, explicit_alert_type lowered = text.lower() if any(token in lowered for token in ["impossible travel", "mfa fatigue", "oauth consent", "inbox rule", "entra", "azuread", "sign-in", "signin"]): alert_type = explicit_alert_type or ("azuread_impossible_travel" if "impossible travel" in lowered else "o365_suspicious_login") return "o365_suspicious_login", alert_type if any(token in lowered for token in ["phishing", "invoice", "attachment", "credential harvest", "fake microsoft 365", "dmarc", "mail_suspicious", "wire transfer"]): if explicit_alert_type: return "phishing", explicit_alert_type if "wire transfer" in lowered or "executive impersonation" in lowered or "bec" in lowered: return "phishing", "mail_bec_impersonation" if "link" in lowered and "attachment" not in lowered: return "phishing", "mail_suspicious_link" return "phishing", "mail_suspicious_attachment" return "phishing", explicit_alert_type def collect_facts(text: str, provided: list[str]) -> list[str]: facts: list[str] = [] for fact in provided: if fact and fact not in facts: facts.append(fact) lowered = text.lower() fact_patterns = [ ("DMARC failed", ["dmarc failed"]), ("SPF failed", ["spf failed"]), ("User may have clicked the link", ["clicked", "user clicked"]), ("Credential submission suspected", ["submitted credentials", "credential submission", "entered credentials"]), ("Impossible travel observed", ["impossible travel"]), ("MFA fatigue observed", ["mfa fatigue", "repeated mfa"]), ("Inbox rule creation observed", ["inbox rule"]), ("OAuth consent activity observed", ["oauth consent"]), ] for label, needles in fact_patterns: if any(needle in lowered for needle in needles) and label not in facts: facts.append(label) for line in text.splitlines(): stripped = line.strip("-* \t") if not stripped or len(stripped) > 160: continue lower = stripped.lower() if any(word in lower for word in ["dmarc", "spf", "clicked", "credential", "impossible travel", "mfa", "inbox rule", "oauth"]): if stripped not in facts: facts.append(stripped) return facts[:8] def build_summary(text: str, subject: str, provided_summary: str = "") -> str: if provided_summary: return provided_summary[:240] if subject: return subject[:180] for line in text.splitlines(): stripped = line.strip() if len(stripped) >= 20 and ":" not in stripped[:20]: return stripped[:240] return text.strip()[:240] def parse_input(args: argparse.Namespace) -> dict[str, str | list[str]]: text = load_text(args) scenario, alert_type = infer_scenario(text, args.scenario, args.alert_type) emails = unique_matches(EMAIL_RE, text) urls = unique_matches(URL_RE, text) ips = unique_matches(IP_RE, text) hosts = unique_matches(HOST_RE, text) attachments = unique_matches(ATTACHMENT_RE, text) sender = first_nonempty(args.sender, find_header(text, "From"), emails[0] if emails else "") user = first_nonempty(args.user, find_header(text, "User"), find_header(text, "To"), emails[1] if len(emails) > 1 else "") subject = first_nonempty(args.subject, find_header(text, "Subject")) attachment = first_nonempty(args.attachment, find_header(text, "Attachment"), attachments[0] if attachments else "") url = first_nonempty(args.url, find_header(text, "URL"), urls[0] if urls else "") ip = first_nonempty(args.ip, find_header(text, "IP"), ips[0] if ips else "") host = first_nonempty(args.host, find_header(text, "Host"), hosts[0] if hosts else "") summary = build_summary(text, subject, args.summary) facts = collect_facts(text, args.fact) return { "scenario": scenario, "alert_type": alert_type, "user": user, "host": host, "sender": sender, "subject": subject, "attachment": attachment, "url": url, "ip": ip, "summary": summary, "facts": facts, } def run_triage(parsed: dict[str, str | list[str]], limit: int) -> None: cmd = [ sys.executable, str(TRIAGE_ALERT), "--scenario", str(parsed["scenario"]), "--alert-type", str(parsed["alert_type"]), "--user", str(parsed["user"]), "--host", str(parsed["host"]), "--sender", str(parsed["sender"]), "--subject", str(parsed["subject"]), "--attachment", str(parsed["attachment"]), "--url", str(parsed["url"]), "--ip", str(parsed["ip"]), "--summary", str(parsed["summary"]), "--limit", str(limit), ] for fact in parsed["facts"]: cmd.extend(["--fact", str(fact)]) subprocess.run(cmd, check=True, env=os.environ.copy()) def main() -> None: parser = argparse.ArgumentParser(description="Unified SOC alert/email triage entrypoint with memory and Obsidian retrieval.") parser.add_argument("--text", help="Raw email, ticket text, or freeform alert text") parser.add_argument("--file", help="Path to a raw email/ticket/alert text file") parser.add_argument("--scenario", default="", help="Optional scenario override") parser.add_argument("--alert-type", default="", help="Optional alert type override") parser.add_argument("--user", default="", help="Optional user override") parser.add_argument("--host", default="", help="Optional host override") parser.add_argument("--sender", default="", help="Optional sender override") parser.add_argument("--subject", default="", help="Optional subject override") parser.add_argument("--attachment", default="", help="Optional attachment override") parser.add_argument("--url", default="", help="Optional URL override") parser.add_argument("--ip", default="", help="Optional IP override") parser.add_argument("--summary", default="", help="Optional summary override") parser.add_argument("--fact", action="append", default=[], help="Additional known fact; repeatable") parser.add_argument("--limit", type=int, default=5, help="Search limit") args = parser.parse_args() parsed = parse_input(args) run_triage(parsed, args.limit) if __name__ == "__main__": main()