Initial SOC memory POC implementation

This commit is contained in:
2026-04-27 17:13:06 +08:00
parent fc68581198
commit e6b1520bce
89 changed files with 7610 additions and 1 deletions

View File

@ -0,0 +1,201 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import subprocess
import sys
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
TRIAGE_ALERT = SCRIPT_DIR / "triage_alert.py"
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
URL_RE = re.compile(r"https?://[^\s<>\"]+")
IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
HOST_RE = re.compile(r"\b[A-Z]{2,}(?:-[A-Z0-9]+)+\b")
ATTACHMENT_RE = re.compile(r"\b[\w.-]+\.(?:html|htm|pdf|zip|docx|xlsx|eml)\b", re.IGNORECASE)
HEADER_RE = re.compile(
r"^(From|To|Subject|Attachment|URL|IP|Host|User|Alert type|Scenario)\s*:\s*(.+)$",
re.IGNORECASE | re.MULTILINE,
)
def first_nonempty(*values: str) -> str:
for value in values:
if value and value.strip():
return value.strip()
return ""
def load_text(args: argparse.Namespace) -> str:
if args.file:
return Path(args.file).read_text(encoding="utf-8")
if args.text:
return args.text
data = sys.stdin.read()
if data.strip():
return data
return ""
def find_header(text: str, name: str) -> str:
for key, value in HEADER_RE.findall(text):
if key.lower() == name.lower():
return value.strip()
return ""
def unique_matches(pattern: re.Pattern[str], text: str) -> list[str]:
seen: list[str] = []
for match in pattern.findall(text):
if match not in seen:
seen.append(match)
return seen
def infer_scenario(text: str, explicit_scenario: str = "", explicit_alert_type: str = "") -> tuple[str, str]:
if explicit_scenario:
return explicit_scenario, explicit_alert_type
lowered = text.lower()
if any(token in lowered for token in ["impossible travel", "mfa fatigue", "oauth consent", "inbox rule", "entra", "azuread", "sign-in", "signin"]):
alert_type = explicit_alert_type or ("azuread_impossible_travel" if "impossible travel" in lowered else "o365_suspicious_login")
return "o365_suspicious_login", alert_type
if any(token in lowered for token in ["phishing", "invoice", "attachment", "credential harvest", "fake microsoft 365", "dmarc", "mail_suspicious", "wire transfer"]):
if explicit_alert_type:
return "phishing", explicit_alert_type
if "wire transfer" in lowered or "executive impersonation" in lowered or "bec" in lowered:
return "phishing", "mail_bec_impersonation"
if "link" in lowered and "attachment" not in lowered:
return "phishing", "mail_suspicious_link"
return "phishing", "mail_suspicious_attachment"
return "phishing", explicit_alert_type
def collect_facts(text: str, provided: list[str]) -> list[str]:
facts: list[str] = []
for fact in provided:
if fact and fact not in facts:
facts.append(fact)
lowered = text.lower()
fact_patterns = [
("DMARC failed", ["dmarc failed"]),
("SPF failed", ["spf failed"]),
("User may have clicked the link", ["clicked", "user clicked"]),
("Credential submission suspected", ["submitted credentials", "credential submission", "entered credentials"]),
("Impossible travel observed", ["impossible travel"]),
("MFA fatigue observed", ["mfa fatigue", "repeated mfa"]),
("Inbox rule creation observed", ["inbox rule"]),
("OAuth consent activity observed", ["oauth consent"]),
]
for label, needles in fact_patterns:
if any(needle in lowered for needle in needles) and label not in facts:
facts.append(label)
for line in text.splitlines():
stripped = line.strip("-* \t")
if not stripped or len(stripped) > 160:
continue
lower = stripped.lower()
if any(word in lower for word in ["dmarc", "spf", "clicked", "credential", "impossible travel", "mfa", "inbox rule", "oauth"]):
if stripped not in facts:
facts.append(stripped)
return facts[:8]
def build_summary(text: str, subject: str, provided_summary: str = "") -> str:
if provided_summary:
return provided_summary[:240]
if subject:
return subject[:180]
for line in text.splitlines():
stripped = line.strip()
if len(stripped) >= 20 and ":" not in stripped[:20]:
return stripped[:240]
return text.strip()[:240]
def parse_input(args: argparse.Namespace) -> dict[str, str | list[str]]:
text = load_text(args)
scenario, alert_type = infer_scenario(text, args.scenario, args.alert_type)
emails = unique_matches(EMAIL_RE, text)
urls = unique_matches(URL_RE, text)
ips = unique_matches(IP_RE, text)
hosts = unique_matches(HOST_RE, text)
attachments = unique_matches(ATTACHMENT_RE, text)
sender = first_nonempty(args.sender, find_header(text, "From"), emails[0] if emails else "")
user = first_nonempty(args.user, find_header(text, "User"), find_header(text, "To"), emails[1] if len(emails) > 1 else "")
subject = first_nonempty(args.subject, find_header(text, "Subject"))
attachment = first_nonempty(args.attachment, find_header(text, "Attachment"), attachments[0] if attachments else "")
url = first_nonempty(args.url, find_header(text, "URL"), urls[0] if urls else "")
ip = first_nonempty(args.ip, find_header(text, "IP"), ips[0] if ips else "")
host = first_nonempty(args.host, find_header(text, "Host"), hosts[0] if hosts else "")
summary = build_summary(text, subject, args.summary)
facts = collect_facts(text, args.fact)
return {
"scenario": scenario,
"alert_type": alert_type,
"user": user,
"host": host,
"sender": sender,
"subject": subject,
"attachment": attachment,
"url": url,
"ip": ip,
"summary": summary,
"facts": facts,
}
def run_triage(parsed: dict[str, str | list[str]], limit: int) -> None:
cmd = [
sys.executable,
str(TRIAGE_ALERT),
"--scenario", str(parsed["scenario"]),
"--alert-type", str(parsed["alert_type"]),
"--user", str(parsed["user"]),
"--host", str(parsed["host"]),
"--sender", str(parsed["sender"]),
"--subject", str(parsed["subject"]),
"--attachment", str(parsed["attachment"]),
"--url", str(parsed["url"]),
"--ip", str(parsed["ip"]),
"--summary", str(parsed["summary"]),
"--limit", str(limit),
]
for fact in parsed["facts"]:
cmd.extend(["--fact", str(fact)])
subprocess.run(cmd, check=True, env=os.environ.copy())
def main() -> None:
parser = argparse.ArgumentParser(description="Unified SOC alert/email triage entrypoint with memory and Obsidian retrieval.")
parser.add_argument("--text", help="Raw email, ticket text, or freeform alert text")
parser.add_argument("--file", help="Path to a raw email/ticket/alert text file")
parser.add_argument("--scenario", default="", help="Optional scenario override")
parser.add_argument("--alert-type", default="", help="Optional alert type override")
parser.add_argument("--user", default="", help="Optional user override")
parser.add_argument("--host", default="", help="Optional host override")
parser.add_argument("--sender", default="", help="Optional sender override")
parser.add_argument("--subject", default="", help="Optional subject override")
parser.add_argument("--attachment", default="", help="Optional attachment override")
parser.add_argument("--url", default="", help="Optional URL override")
parser.add_argument("--ip", default="", help="Optional IP override")
parser.add_argument("--summary", default="", help="Optional summary override")
parser.add_argument("--fact", action="append", default=[], help="Additional known fact; repeatable")
parser.add_argument("--limit", type=int, default=5, help="Search limit")
args = parser.parse_args()
parsed = parse_input(args)
run_triage(parsed, args.limit)
if __name__ == "__main__":
main()