Initial commit
This commit is contained in:
132
email_dlp/policy.py
Normal file
132
email_dlp/policy.py
Normal file
@ -0,0 +1,132 @@
|
||||
"""DLP policy definitions: violation categories, thresholds, and prompt formatting."""
|
||||
|
||||
import json
|
||||
|
||||
from .models import ActionClass, RiskLevel
|
||||
|
||||
# Risk score thresholds
|
||||
RISK_THRESHOLDS = {
|
||||
RiskLevel.CRITICAL: 80,
|
||||
RiskLevel.HIGH: 60,
|
||||
RiskLevel.MEDIUM: 40,
|
||||
RiskLevel.LOW: 0,
|
||||
}
|
||||
|
||||
# Action mapping based on risk level
|
||||
RISK_TO_ACTION = {
|
||||
RiskLevel.CRITICAL: ActionClass.BLOCK,
|
||||
RiskLevel.HIGH: ActionClass.ALERT,
|
||||
RiskLevel.MEDIUM: ActionClass.ALERT,
|
||||
RiskLevel.LOW: ActionClass.PASS_,
|
||||
}
|
||||
|
||||
DLP_CATEGORIES = {
|
||||
"PII": {
|
||||
"description": "Personally Identifiable Information",
|
||||
"signals": [
|
||||
"Full name combined with email address",
|
||||
"Social Security Number (SSN) or employee ID",
|
||||
"Phone numbers combined with personal details",
|
||||
"Home address combined with personal identifiers",
|
||||
],
|
||||
"risk_weight": "HIGH to CRITICAL depending on volume",
|
||||
},
|
||||
"FINANCIAL_DATA": {
|
||||
"description": "Non-public financial information",
|
||||
"signals": [
|
||||
"Revenue targets, EBITDA projections, internal forecasts",
|
||||
"Salary figures, compensation plans",
|
||||
"Invoice amounts and vendor payment terms",
|
||||
"Internal budget allocations",
|
||||
],
|
||||
"risk_weight": "MEDIUM to CRITICAL depending on sensitivity",
|
||||
},
|
||||
"SOURCE_CODE": {
|
||||
"description": "Proprietary source code or model weights",
|
||||
"signals": [
|
||||
"Python, Java, or other source files with copyright notices",
|
||||
"Internal class names and proprietary algorithms",
|
||||
"Model architecture files or weight files",
|
||||
"Internal API keys or credentials embedded in code",
|
||||
],
|
||||
"risk_weight": "CRITICAL",
|
||||
},
|
||||
"REGULATORY_DOCUMENT": {
|
||||
"description": "Internal regulatory and compliance drafts",
|
||||
"signals": [
|
||||
"CFPB, GDPR, or SOX compliance drafts marked internal",
|
||||
"Audit findings or remediation plans",
|
||||
"Internal compliance assessments not yet published",
|
||||
"Regulatory submission drafts",
|
||||
],
|
||||
"risk_weight": "CRITICAL",
|
||||
},
|
||||
"LEGAL_CONTRACT": {
|
||||
"description": "Executed or draft legal agreements",
|
||||
"signals": [
|
||||
"Non-Disclosure Agreements (NDAs) with named parties",
|
||||
"Signed contracts with dates and signatures",
|
||||
"Settlement agreements or legal memoranda",
|
||||
"Vendor contracts with financial terms",
|
||||
],
|
||||
"risk_weight": "HIGH to CRITICAL",
|
||||
},
|
||||
"PAYROLL_RECORD": {
|
||||
"description": "Employee payroll and compensation records",
|
||||
"signals": [
|
||||
"Employee ID combined with salary and payroll period",
|
||||
"Direct deposit details or bank account information",
|
||||
"Year-to-date earnings and deductions",
|
||||
"HR compensation reports",
|
||||
],
|
||||
"risk_weight": "CRITICAL",
|
||||
},
|
||||
"CUSTOMER_LIST": {
|
||||
"description": "Customer or prospect data in bulk",
|
||||
"signals": [
|
||||
"CSV or table with customer names, emails, and revenue figures",
|
||||
"CRM exports with contact details",
|
||||
"Prospect lists for sales campaigns",
|
||||
"Customer PII in aggregate",
|
||||
],
|
||||
"risk_weight": "CRITICAL",
|
||||
},
|
||||
"INTERNAL_MEMO": {
|
||||
"description": "Confidential internal communications",
|
||||
"signals": [
|
||||
'Documents marked "INTERNAL ONLY" or "DO NOT DISTRIBUTE"',
|
||||
"CEO or executive strategy memos",
|
||||
"Organizational restructuring plans",
|
||||
"Internal performance reviews or headcount discussions",
|
||||
],
|
||||
"risk_weight": "HIGH",
|
||||
},
|
||||
}
|
||||
|
||||
ACTION_THRESHOLDS = {
|
||||
"BLOCK": "risk_score >= 80 (CRITICAL risk)",
|
||||
"ALERT": "risk_score >= 40 (MEDIUM or HIGH risk)",
|
||||
"PASS": "risk_score < 40 (LOW risk)",
|
||||
}
|
||||
|
||||
|
||||
def format_policy_for_prompt() -> str:
|
||||
"""Format the DLP policy as a JSON string for injection into the LLM system prompt."""
|
||||
policy = {
|
||||
"categories": DLP_CATEGORIES,
|
||||
"risk_score_thresholds": {
|
||||
"CRITICAL": "score >= 80",
|
||||
"HIGH": "score >= 60",
|
||||
"MEDIUM": "score >= 40",
|
||||
"LOW": "score < 40",
|
||||
},
|
||||
"action_mapping": ACTION_THRESHOLDS,
|
||||
"instructions": (
|
||||
"Evaluate the email against ALL categories above. "
|
||||
"Assign a risk_score from 0 to 100 based on the most severe violation found. "
|
||||
"Multiple violations increase the score. "
|
||||
"action must match the threshold: BLOCK if score>=80, ALERT if score>=40, PASS otherwise. "
|
||||
"evidence must be direct quotes from the actual email or attachment content."
|
||||
),
|
||||
}
|
||||
return json.dumps(policy, indent=2)
|
||||
Reference in New Issue
Block a user