Files
email-dlp/email_dlp/policy.py
2026-03-20 10:28:28 +08:00

133 lines
4.7 KiB
Python

"""DLP policy definitions: violation categories, thresholds, and prompt formatting."""
import json
from .models import ActionClass, RiskLevel
# Risk score thresholds
RISK_THRESHOLDS = {
RiskLevel.CRITICAL: 80,
RiskLevel.HIGH: 60,
RiskLevel.MEDIUM: 40,
RiskLevel.LOW: 0,
}
# Action mapping based on risk level
RISK_TO_ACTION = {
RiskLevel.CRITICAL: ActionClass.BLOCK,
RiskLevel.HIGH: ActionClass.ALERT,
RiskLevel.MEDIUM: ActionClass.ALERT,
RiskLevel.LOW: ActionClass.PASS_,
}
DLP_CATEGORIES = {
"PII": {
"description": "Personally Identifiable Information",
"signals": [
"Full name combined with email address",
"Social Security Number (SSN) or employee ID",
"Phone numbers combined with personal details",
"Home address combined with personal identifiers",
],
"risk_weight": "HIGH to CRITICAL depending on volume",
},
"FINANCIAL_DATA": {
"description": "Non-public financial information",
"signals": [
"Revenue targets, EBITDA projections, internal forecasts",
"Salary figures, compensation plans",
"Invoice amounts and vendor payment terms",
"Internal budget allocations",
],
"risk_weight": "MEDIUM to CRITICAL depending on sensitivity",
},
"SOURCE_CODE": {
"description": "Proprietary source code or model weights",
"signals": [
"Python, Java, or other source files with copyright notices",
"Internal class names and proprietary algorithms",
"Model architecture files or weight files",
"Internal API keys or credentials embedded in code",
],
"risk_weight": "CRITICAL",
},
"REGULATORY_DOCUMENT": {
"description": "Internal regulatory and compliance drafts",
"signals": [
"CFPB, GDPR, or SOX compliance drafts marked internal",
"Audit findings or remediation plans",
"Internal compliance assessments not yet published",
"Regulatory submission drafts",
],
"risk_weight": "CRITICAL",
},
"LEGAL_CONTRACT": {
"description": "Executed or draft legal agreements",
"signals": [
"Non-Disclosure Agreements (NDAs) with named parties",
"Signed contracts with dates and signatures",
"Settlement agreements or legal memoranda",
"Vendor contracts with financial terms",
],
"risk_weight": "HIGH to CRITICAL",
},
"PAYROLL_RECORD": {
"description": "Employee payroll and compensation records",
"signals": [
"Employee ID combined with salary and payroll period",
"Direct deposit details or bank account information",
"Year-to-date earnings and deductions",
"HR compensation reports",
],
"risk_weight": "CRITICAL",
},
"CUSTOMER_LIST": {
"description": "Customer or prospect data in bulk",
"signals": [
"CSV or table with customer names, emails, and revenue figures",
"CRM exports with contact details",
"Prospect lists for sales campaigns",
"Customer PII in aggregate",
],
"risk_weight": "CRITICAL",
},
"INTERNAL_MEMO": {
"description": "Confidential internal communications",
"signals": [
'Documents marked "INTERNAL ONLY" or "DO NOT DISTRIBUTE"',
"CEO or executive strategy memos",
"Organizational restructuring plans",
"Internal performance reviews or headcount discussions",
],
"risk_weight": "HIGH",
},
}
ACTION_THRESHOLDS = {
"BLOCK": "risk_score >= 80 (CRITICAL risk)",
"ALERT": "risk_score >= 40 (MEDIUM or HIGH risk)",
"PASS": "risk_score < 40 (LOW risk)",
}
def format_policy_for_prompt() -> str:
"""Format the DLP policy as a JSON string for injection into the LLM system prompt."""
policy = {
"categories": DLP_CATEGORIES,
"risk_score_thresholds": {
"CRITICAL": "score >= 80",
"HIGH": "score >= 60",
"MEDIUM": "score >= 40",
"LOW": "score < 40",
},
"action_mapping": ACTION_THRESHOLDS,
"instructions": (
"Evaluate the email against ALL categories above. "
"Assign a risk_score from 0 to 100 based on the most severe violation found. "
"Multiple violations increase the score. "
"action must match the threshold: BLOCK if score>=80, ALERT if score>=40, PASS otherwise. "
"evidence must be direct quotes from the actual email or attachment content."
),
}
return json.dumps(policy, indent=2)