beaver_project/app-instance/backend-old/nanobot/llm_audit.py

"""Structured LLM audit logging persisted in backend storage."""

from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from loguru import logger

from nanobot.utils.helpers import get_logs_path

_MAX_TEXT_PREVIEW = 1000
_MAX_TRACEBACK_PREVIEW = 8000
_REDACTED = "***REDACTED***"
_SENSITIVE_KEYS = {
    "api_key",
    "authorization",
    "proxy_authorization",
    "x_api_key",
    "x-api-key",
    "token",
    "access_token",
    "refresh_token",
    "secret",
    "password",
}


def get_llm_audit_log_path() -> Path:
    """Return the persisted LLM audit log path."""
    return get_logs_path() / "llm_audit.jsonl"


def _utc_now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def _truncate_text(text: str, limit: int = _MAX_TEXT_PREVIEW) -> str:
    if len(text) <= limit:
        return text
    return text[:limit] + "...(truncated)"


def _redact_value(key: str, value: Any) -> Any:
    if key.lower() in _SENSITIVE_KEYS and value is not None:
        return _REDACTED
    return value


def redact_mapping(mapping: dict[str, Any] | None) -> dict[str, Any]:
    """Redact common secret-like keys in a mapping."""
    if not mapping:
        return {}
    sanitized: dict[str, Any] = {}
    for key, value in mapping.items():
        if isinstance(value, dict):
            sanitized[key] = redact_mapping(value)
            continue
        if isinstance(value, list):
            sanitized[key] = [
                redact_mapping(item) if isinstance(item, dict) else item
                for item in value
            ]
            continue
        sanitized[key] = _redact_value(str(key), value)
    return sanitized


def summarize_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """Build a compact audit-safe summary of prompt messages."""
    summary: list[dict[str, Any]] = []
    for idx, msg in enumerate(messages):
        item: dict[str, Any] = {
            "index": idx,
            "role": msg.get("role"),
        }
        if "name" in msg:
            item["name"] = msg.get("name")
        if "tool_call_id" in msg:
            item["tool_call_id"] = msg.get("tool_call_id")

        content = msg.get("content")
        if content is None:
            item["content_kind"] = "none"
        elif isinstance(content, str):
            item["content_kind"] = "text"
            item["content_length"] = len(content)
            item["content_preview"] = _truncate_text(content)
        elif isinstance(content, list):
            item["content_kind"] = "blocks"
            item["content_blocks"] = len(content)
            item["content_preview"] = _truncate_text(json.dumps(content, ensure_ascii=False))
        else:
            rendered = str(content)
            item["content_kind"] = type(content).__name__
            item["content_length"] = len(rendered)
            item["content_preview"] = _truncate_text(rendered)

        tool_calls = msg.get("tool_calls")
        if isinstance(tool_calls, list) and tool_calls:
            item["tool_calls"] = summarize_tool_calls(tool_calls)

        summary.append(item)
    return summary


def summarize_tool_calls(tool_calls: list[Any]) -> list[dict[str, Any]]:
    """Summarize outgoing or incoming tool calls."""
    summary: list[dict[str, Any]] = []
    for idx, tool_call in enumerate(tool_calls):
        if hasattr(tool_call, "function"):
            function = getattr(tool_call, "function")
            arguments = getattr(function, "arguments", None)
            summary.append({
                "index": idx,
                "id": getattr(tool_call, "id", None),
                "name": getattr(function, "name", None),
                "arguments_preview": _truncate_text(str(arguments) if arguments is not None else ""),
            })
            continue

        if isinstance(tool_call, dict):
            fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
            summary.append({
                "index": idx,
                "id": tool_call.get("id"),
                "name": fn.get("name"),
                "arguments_preview": _truncate_text(str(fn.get("arguments", ""))),
            })
            continue

        summary.append({
            "index": idx,
            "repr": _truncate_text(str(tool_call)),
        })
    return summary


def summarize_tools(tools: list[dict[str, Any]] | None) -> list[dict[str, Any]]:
    """Summarize tool definitions sent to the provider."""
    if not tools:
        return []
    summary: list[dict[str, Any]] = []
    for idx, tool in enumerate(tools):
        function = tool.get("function") if isinstance(tool, dict) else None
        entry = {
            "index": idx,
            "type": tool.get("type") if isinstance(tool, dict) else None,
        }
        if isinstance(function, dict):
            entry["name"] = function.get("name")
            params = function.get("parameters")
            if params is not None:
                entry["parameters_preview"] = _truncate_text(json.dumps(params, ensure_ascii=False))
        else:
            entry["preview"] = _truncate_text(str(tool))
        summary.append(entry)
    return summary


def write_llm_audit_event(event: dict[str, Any]) -> None:
    """Append one JSONL audit event to backend storage."""
    payload = {
        "ts": _utc_now_iso(),
        **event,
    }
    path = get_llm_audit_log_path()
    path.parent.mkdir(parents=True, exist_ok=True)
    try:
        with path.open("a", encoding="utf-8") as fh:
            fh.write(json.dumps(payload, ensure_ascii=False) + "\n")
    except Exception as exc:
        logger.warning("Failed to persist LLM audit log: {}", exc)


def summarize_exception(exc: BaseException) -> dict[str, str]:
    return {
        "type": type(exc).__name__,
        "message": str(exc),
    }


def truncate_traceback(text: str) -> str:
    return _truncate_text(text, _MAX_TRACEBACK_PREVIEW)