Add persisted LLM audit logging
This commit is contained in:
186
app-instance/backend/nanobot/llm_audit.py
Normal file
186
app-instance/backend/nanobot/llm_audit.py
Normal file
@ -0,0 +1,186 @@
|
||||
"""Structured LLM audit logging persisted in backend storage."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.utils.helpers import get_logs_path
|
||||
|
||||
_MAX_TEXT_PREVIEW = 1000
|
||||
_MAX_TRACEBACK_PREVIEW = 8000
|
||||
_REDACTED = "***REDACTED***"
|
||||
_SENSITIVE_KEYS = {
|
||||
"api_key",
|
||||
"authorization",
|
||||
"proxy_authorization",
|
||||
"x_api_key",
|
||||
"x-api-key",
|
||||
"token",
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"secret",
|
||||
"password",
|
||||
}
|
||||
|
||||
|
||||
def get_llm_audit_log_path() -> Path:
|
||||
"""Return the persisted LLM audit log path."""
|
||||
return get_logs_path() / "llm_audit.jsonl"
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _truncate_text(text: str, limit: int = _MAX_TEXT_PREVIEW) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit] + "...(truncated)"
|
||||
|
||||
|
||||
def _redact_value(key: str, value: Any) -> Any:
|
||||
if key.lower() in _SENSITIVE_KEYS and value is not None:
|
||||
return _REDACTED
|
||||
return value
|
||||
|
||||
|
||||
def redact_mapping(mapping: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Redact common secret-like keys in a mapping."""
|
||||
if not mapping:
|
||||
return {}
|
||||
sanitized: dict[str, Any] = {}
|
||||
for key, value in mapping.items():
|
||||
if isinstance(value, dict):
|
||||
sanitized[key] = redact_mapping(value)
|
||||
continue
|
||||
if isinstance(value, list):
|
||||
sanitized[key] = [
|
||||
redact_mapping(item) if isinstance(item, dict) else item
|
||||
for item in value
|
||||
]
|
||||
continue
|
||||
sanitized[key] = _redact_value(str(key), value)
|
||||
return sanitized
|
||||
|
||||
|
||||
def summarize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Build a compact audit-safe summary of prompt messages."""
|
||||
summary: list[dict[str, Any]] = []
|
||||
for idx, msg in enumerate(messages):
|
||||
item: dict[str, Any] = {
|
||||
"index": idx,
|
||||
"role": msg.get("role"),
|
||||
}
|
||||
if "name" in msg:
|
||||
item["name"] = msg.get("name")
|
||||
if "tool_call_id" in msg:
|
||||
item["tool_call_id"] = msg.get("tool_call_id")
|
||||
|
||||
content = msg.get("content")
|
||||
if content is None:
|
||||
item["content_kind"] = "none"
|
||||
elif isinstance(content, str):
|
||||
item["content_kind"] = "text"
|
||||
item["content_length"] = len(content)
|
||||
item["content_preview"] = _truncate_text(content)
|
||||
elif isinstance(content, list):
|
||||
item["content_kind"] = "blocks"
|
||||
item["content_blocks"] = len(content)
|
||||
item["content_preview"] = _truncate_text(json.dumps(content, ensure_ascii=False))
|
||||
else:
|
||||
rendered = str(content)
|
||||
item["content_kind"] = type(content).__name__
|
||||
item["content_length"] = len(rendered)
|
||||
item["content_preview"] = _truncate_text(rendered)
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list) and tool_calls:
|
||||
item["tool_calls"] = summarize_tool_calls(tool_calls)
|
||||
|
||||
summary.append(item)
|
||||
return summary
|
||||
|
||||
|
||||
def summarize_tool_calls(tool_calls: list[Any]) -> list[dict[str, Any]]:
|
||||
"""Summarize outgoing or incoming tool calls."""
|
||||
summary: list[dict[str, Any]] = []
|
||||
for idx, tool_call in enumerate(tool_calls):
|
||||
if hasattr(tool_call, "function"):
|
||||
function = getattr(tool_call, "function")
|
||||
arguments = getattr(function, "arguments", None)
|
||||
summary.append({
|
||||
"index": idx,
|
||||
"id": getattr(tool_call, "id", None),
|
||||
"name": getattr(function, "name", None),
|
||||
"arguments_preview": _truncate_text(str(arguments) if arguments is not None else ""),
|
||||
})
|
||||
continue
|
||||
|
||||
if isinstance(tool_call, dict):
|
||||
fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
summary.append({
|
||||
"index": idx,
|
||||
"id": tool_call.get("id"),
|
||||
"name": fn.get("name"),
|
||||
"arguments_preview": _truncate_text(str(fn.get("arguments", ""))),
|
||||
})
|
||||
continue
|
||||
|
||||
summary.append({
|
||||
"index": idx,
|
||||
"repr": _truncate_text(str(tool_call)),
|
||||
})
|
||||
return summary
|
||||
|
||||
|
||||
def summarize_tools(tools: list[dict[str, Any]] | None) -> list[dict[str, Any]]:
|
||||
"""Summarize tool definitions sent to the provider."""
|
||||
if not tools:
|
||||
return []
|
||||
summary: list[dict[str, Any]] = []
|
||||
for idx, tool in enumerate(tools):
|
||||
function = tool.get("function") if isinstance(tool, dict) else None
|
||||
entry = {
|
||||
"index": idx,
|
||||
"type": tool.get("type") if isinstance(tool, dict) else None,
|
||||
}
|
||||
if isinstance(function, dict):
|
||||
entry["name"] = function.get("name")
|
||||
params = function.get("parameters")
|
||||
if params is not None:
|
||||
entry["parameters_preview"] = _truncate_text(json.dumps(params, ensure_ascii=False))
|
||||
else:
|
||||
entry["preview"] = _truncate_text(str(tool))
|
||||
summary.append(entry)
|
||||
return summary
|
||||
|
||||
|
||||
def write_llm_audit_event(event: dict[str, Any]) -> None:
|
||||
"""Append one JSONL audit event to backend storage."""
|
||||
payload = {
|
||||
"ts": _utc_now_iso(),
|
||||
**event,
|
||||
}
|
||||
path = get_llm_audit_log_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with path.open("a", encoding="utf-8") as fh:
|
||||
fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to persist LLM audit log: {}", exc)
|
||||
|
||||
|
||||
def summarize_exception(exc: BaseException) -> dict[str, str]:
|
||||
return {
|
||||
"type": type(exc).__name__,
|
||||
"message": str(exc),
|
||||
}
|
||||
|
||||
|
||||
def truncate_traceback(text: str) -> str:
|
||||
return _truncate_text(text, _MAX_TRACEBACK_PREVIEW)
|
||||
Reference in New Issue
Block a user