133 lines
4.7 KiB
Python
133 lines
4.7 KiB
Python
"""DLP policy definitions: violation categories, thresholds, and prompt formatting."""
|
|
|
|
import json
|
|
|
|
from .models import ActionClass, RiskLevel
|
|
|
|
# Risk score thresholds
|
|
RISK_THRESHOLDS = {
|
|
RiskLevel.CRITICAL: 80,
|
|
RiskLevel.HIGH: 60,
|
|
RiskLevel.MEDIUM: 40,
|
|
RiskLevel.LOW: 0,
|
|
}
|
|
|
|
# Action mapping based on risk level
|
|
RISK_TO_ACTION = {
|
|
RiskLevel.CRITICAL: ActionClass.BLOCK,
|
|
RiskLevel.HIGH: ActionClass.ALERT,
|
|
RiskLevel.MEDIUM: ActionClass.ALERT,
|
|
RiskLevel.LOW: ActionClass.PASS_,
|
|
}
|
|
|
|
DLP_CATEGORIES = {
|
|
"PII": {
|
|
"description": "Personally Identifiable Information",
|
|
"signals": [
|
|
"Full name combined with email address",
|
|
"Social Security Number (SSN) or employee ID",
|
|
"Phone numbers combined with personal details",
|
|
"Home address combined with personal identifiers",
|
|
],
|
|
"risk_weight": "HIGH to CRITICAL depending on volume",
|
|
},
|
|
"FINANCIAL_DATA": {
|
|
"description": "Non-public financial information",
|
|
"signals": [
|
|
"Revenue targets, EBITDA projections, internal forecasts",
|
|
"Salary figures, compensation plans",
|
|
"Invoice amounts and vendor payment terms",
|
|
"Internal budget allocations",
|
|
],
|
|
"risk_weight": "MEDIUM to CRITICAL depending on sensitivity",
|
|
},
|
|
"SOURCE_CODE": {
|
|
"description": "Proprietary source code or model weights",
|
|
"signals": [
|
|
"Python, Java, or other source files with copyright notices",
|
|
"Internal class names and proprietary algorithms",
|
|
"Model architecture files or weight files",
|
|
"Internal API keys or credentials embedded in code",
|
|
],
|
|
"risk_weight": "CRITICAL",
|
|
},
|
|
"REGULATORY_DOCUMENT": {
|
|
"description": "Internal regulatory and compliance drafts",
|
|
"signals": [
|
|
"CFPB, GDPR, or SOX compliance drafts marked internal",
|
|
"Audit findings or remediation plans",
|
|
"Internal compliance assessments not yet published",
|
|
"Regulatory submission drafts",
|
|
],
|
|
"risk_weight": "CRITICAL",
|
|
},
|
|
"LEGAL_CONTRACT": {
|
|
"description": "Executed or draft legal agreements",
|
|
"signals": [
|
|
"Non-Disclosure Agreements (NDAs) with named parties",
|
|
"Signed contracts with dates and signatures",
|
|
"Settlement agreements or legal memoranda",
|
|
"Vendor contracts with financial terms",
|
|
],
|
|
"risk_weight": "HIGH to CRITICAL",
|
|
},
|
|
"PAYROLL_RECORD": {
|
|
"description": "Employee payroll and compensation records",
|
|
"signals": [
|
|
"Employee ID combined with salary and payroll period",
|
|
"Direct deposit details or bank account information",
|
|
"Year-to-date earnings and deductions",
|
|
"HR compensation reports",
|
|
],
|
|
"risk_weight": "CRITICAL",
|
|
},
|
|
"CUSTOMER_LIST": {
|
|
"description": "Customer or prospect data in bulk",
|
|
"signals": [
|
|
"CSV or table with customer names, emails, and revenue figures",
|
|
"CRM exports with contact details",
|
|
"Prospect lists for sales campaigns",
|
|
"Customer PII in aggregate",
|
|
],
|
|
"risk_weight": "CRITICAL",
|
|
},
|
|
"INTERNAL_MEMO": {
|
|
"description": "Confidential internal communications",
|
|
"signals": [
|
|
'Documents marked "INTERNAL ONLY" or "DO NOT DISTRIBUTE"',
|
|
"CEO or executive strategy memos",
|
|
"Organizational restructuring plans",
|
|
"Internal performance reviews or headcount discussions",
|
|
],
|
|
"risk_weight": "HIGH",
|
|
},
|
|
}
|
|
|
|
ACTION_THRESHOLDS = {
|
|
"BLOCK": "risk_score >= 80 (CRITICAL risk)",
|
|
"ALERT": "risk_score >= 40 (MEDIUM or HIGH risk)",
|
|
"PASS": "risk_score < 40 (LOW risk)",
|
|
}
|
|
|
|
|
|
def format_policy_for_prompt() -> str:
|
|
"""Format the DLP policy as a JSON string for injection into the LLM system prompt."""
|
|
policy = {
|
|
"categories": DLP_CATEGORIES,
|
|
"risk_score_thresholds": {
|
|
"CRITICAL": "score >= 80",
|
|
"HIGH": "score >= 60",
|
|
"MEDIUM": "score >= 40",
|
|
"LOW": "score < 40",
|
|
},
|
|
"action_mapping": ACTION_THRESHOLDS,
|
|
"instructions": (
|
|
"Evaluate the email against ALL categories above. "
|
|
"Assign a risk_score from 0 to 100 based on the most severe violation found. "
|
|
"Multiple violations increase the score. "
|
|
"action must match the threshold: BLOCK if score>=80, ALERT if score>=40, PASS otherwise. "
|
|
"evidence must be direct quotes from the actual email or attachment content."
|
|
),
|
|
}
|
|
return json.dumps(policy, indent=2)
|