"""DLP policy definitions: violation categories, thresholds, and prompt formatting.""" import json from .models import ActionClass, RiskLevel # Risk score thresholds RISK_THRESHOLDS = { RiskLevel.CRITICAL: 80, RiskLevel.HIGH: 60, RiskLevel.MEDIUM: 40, RiskLevel.LOW: 0, } # Action mapping based on risk level RISK_TO_ACTION = { RiskLevel.CRITICAL: ActionClass.BLOCK, RiskLevel.HIGH: ActionClass.ALERT, RiskLevel.MEDIUM: ActionClass.ALERT, RiskLevel.LOW: ActionClass.PASS_, } DLP_CATEGORIES = { "PII": { "description": "Personally Identifiable Information", "signals": [ "Full name combined with email address", "Social Security Number (SSN) or employee ID", "Phone numbers combined with personal details", "Home address combined with personal identifiers", ], "risk_weight": "HIGH to CRITICAL depending on volume", }, "FINANCIAL_DATA": { "description": "Non-public financial information", "signals": [ "Revenue targets, EBITDA projections, internal forecasts", "Salary figures, compensation plans", "Invoice amounts and vendor payment terms", "Internal budget allocations", ], "risk_weight": "MEDIUM to CRITICAL depending on sensitivity", }, "SOURCE_CODE": { "description": "Proprietary source code or model weights", "signals": [ "Python, Java, or other source files with copyright notices", "Internal class names and proprietary algorithms", "Model architecture files or weight files", "Internal API keys or credentials embedded in code", ], "risk_weight": "CRITICAL", }, "REGULATORY_DOCUMENT": { "description": "Internal regulatory and compliance drafts", "signals": [ "CFPB, GDPR, or SOX compliance drafts marked internal", "Audit findings or remediation plans", "Internal compliance assessments not yet published", "Regulatory submission drafts", ], "risk_weight": "CRITICAL", }, "LEGAL_CONTRACT": { "description": "Executed or draft legal agreements", "signals": [ "Non-Disclosure Agreements (NDAs) with named parties", "Signed contracts with dates and signatures", "Settlement agreements or legal memoranda", "Vendor contracts with financial terms", ], "risk_weight": "HIGH to CRITICAL", }, "PAYROLL_RECORD": { "description": "Employee payroll and compensation records", "signals": [ "Employee ID combined with salary and payroll period", "Direct deposit details or bank account information", "Year-to-date earnings and deductions", "HR compensation reports", ], "risk_weight": "CRITICAL", }, "CUSTOMER_LIST": { "description": "Customer or prospect data in bulk", "signals": [ "CSV or table with customer names, emails, and revenue figures", "CRM exports with contact details", "Prospect lists for sales campaigns", "Customer PII in aggregate", ], "risk_weight": "CRITICAL", }, "INTERNAL_MEMO": { "description": "Confidential internal communications", "signals": [ 'Documents marked "INTERNAL ONLY" or "DO NOT DISTRIBUTE"', "CEO or executive strategy memos", "Organizational restructuring plans", "Internal performance reviews or headcount discussions", ], "risk_weight": "HIGH", }, } ACTION_THRESHOLDS = { "BLOCK": "risk_score >= 80 (CRITICAL risk)", "ALERT": "risk_score >= 40 (MEDIUM or HIGH risk)", "PASS": "risk_score < 40 (LOW risk)", } def format_policy_for_prompt() -> str: """Format the DLP policy as a JSON string for injection into the LLM system prompt.""" policy = { "categories": DLP_CATEGORIES, "risk_score_thresholds": { "CRITICAL": "score >= 80", "HIGH": "score >= 60", "MEDIUM": "score >= 40", "LOW": "score < 40", }, "action_mapping": ACTION_THRESHOLDS, "instructions": ( "Evaluate the email against ALL categories above. " "Assign a risk_score from 0 to 100 based on the most severe violation found. " "Multiple violations increase the score. " "action must match the threshold: BLOCK if score>=80, ALERT if score>=40, PASS otherwise. " "evidence must be direct quotes from the actual email or attachment content." ), } return json.dumps(policy, indent=2)