beaver_project/app-instance/backend-old/nanobot/utils/helpers.py

"""nanobot 通用工具函数集合。

这个文件放的是“跨模块都会用到的小函数”，特点是：
- 逻辑简单、无副作用或副作用可预期
- 不依赖复杂业务对象
- 主要负责路径处理、字符串处理、时间格式等基础能力
"""

import shutil
from datetime import datetime
from pathlib import Path


def ensure_dir(path: Path) -> Path:
    """确保目录存在，不存在时自动创建。

    设计意图：
    - 统一“先创建目录再写文件”的模式
    - 避免每个调用点都重复写 mkdir 逻辑

    参数:
    - path: 目标目录路径（Path 对象）

    返回:
    - 原始 path（方便链式调用或直接赋值使用）
    """
    # parents=True: 父目录不存在时一并创建
    # exist_ok=True: 目录已存在时不报错（幂等）
    path.mkdir(parents=True, exist_ok=True)
    return path


def get_data_path() -> Path:
    """获取 nanobot 全局数据目录（~/.nanobot）。

    这里通常用于存放：
    - config.json
    - 历史数据
    - 运行时缓存/状态文件
    """
    # 通过 ensure_dir 保证调用后目录一定存在。
    return ensure_dir(Path.home() / ".nanobot")


def get_logs_path() -> Path:
    """获取后端日志目录（~/.nanobot/logs）。"""
    return ensure_dir(get_data_path() / "logs")


def get_legacy_cron_store_path() -> Path:
    """获取旧版全局 cron store 路径（~/.nanobot/cron/jobs.json）。"""
    return get_data_path() / "cron" / "jobs.json"


def get_workspace_path(workspace: str | None = None) -> Path:
    """
    获取工作区路径（workspace）。

    Args:
        workspace: 可选工作区路径。
            - 传入时：使用调用者指定路径
            - 不传时：使用默认 ~/.nanobot/workspace

    Returns:
        处理后的 Path（已展开 `~`，且目录已确保存在）。
    """
    # 如果用户手动指定 workspace，就尊重用户输入。
    # expanduser() 负责把 `~` 展开成真实 home 路径。
    if workspace:
        path = Path(workspace).expanduser()
    else:
        # 默认工作区路径：~/.nanobot/workspace
        path = Path.home() / ".nanobot" / "workspace"
    # 返回前确保目录存在，避免下游写文件时报 “No such file or directory”。
    return ensure_dir(path)


def get_workspace_state_path(workspace: Path | str | None = None) -> Path:
    """获取工作区级运行状态目录（<workspace>/state）。"""
    if isinstance(workspace, Path):
        ws = ensure_dir(workspace.expanduser())
    else:
        ws = get_workspace_path(workspace)
    return ensure_dir(ws / "state")


def get_cron_store_path(workspace: Path | str | None = None) -> Path:
    """获取工作区级 cron store 路径，并按需迁移旧版全局 store。"""
    store_path = get_workspace_state_path(workspace) / "cron" / "jobs.json"
    store_path.parent.mkdir(parents=True, exist_ok=True)

    legacy_path = get_legacy_cron_store_path()
    if not store_path.exists() and legacy_path.exists():
        try:
            shutil.move(str(legacy_path), str(store_path))
        except Exception:
            # 迁移失败时退回旧路径，避免已有任务“消失”。
            return legacy_path
    return store_path


def get_sessions_path() -> Path:
    """获取会话持久化目录（~/.nanobot/sessions）。"""
    # 会话目录挂在全局数据目录下，而不是 workspace 下。
    # 这样即使切换 workspace，历史会话依然可以保留。
    return ensure_dir(get_data_path() / "sessions")


def get_skills_path(workspace: Path | None = None) -> Path:
    """获取工作区内 skills 目录路径。

    参数:
    - workspace: 可选工作区路径；不传则自动使用默认工作区。

    返回:
    - `<workspace>/skills`，并保证目录存在。
    """
    # 不传 workspace 时，自动回退到默认工作区。
    ws = workspace or get_workspace_path()
    return ensure_dir(ws / "skills")


def timestamp() -> str:
    """返回当前本地时间的 ISO 字符串。"""
    # 例子：2026-02-24T11:08:00.123456
    # 常用于日志、消息元数据等轻量时间戳场景。
    return datetime.now().isoformat()


def truncate_string(s: str, max_len: int = 100, suffix: str = "...") -> str:
    """把字符串截断到指定最大长度，超长时追加后缀。

    行为规则：
    - 若原始长度 <= max_len：原样返回
    - 若原始长度 > max_len：截断并追加 suffix

    注意：
    - 该函数假设 `max_len >= len(suffix)`，否则结果可能比预期短很多
    """
    if len(s) <= max_len:
        return s
    # 预留 suffix 长度，再拼接后缀，确保总长度不超过 max_len。
    return s[: max_len - len(suffix)] + suffix


def safe_filename(name: str) -> str:
    """把任意字符串转换成相对安全的文件名片段。

    处理策略：
    - 将常见非法文件名字符替换为 `_`
    - 去除首尾空白

    典型用途：
    - 把 session key、用户输入等动态字符串转成可落盘文件名
    """
    # Windows/跨平台常见非法字符集合
    # < > : " / \ | ? *
    unsafe = '<>:"/\\|?*'
    for char in unsafe:
        name = name.replace(char, "_")
    # strip() 去掉前后空格，避免生成难以识别的文件名。
    return name.strip()


def parse_session_key(key: str) -> tuple[str, str]:
    """
    把 session key 解析成 `(channel, chat_id)`。

    Args:
        key: 形如 `"channel:chat_id"` 的会话键

    Returns:
        二元组 `(channel, chat_id)`

    异常:
        ValueError: 当 key 不包含 `:` 分隔符时抛出
    """
    # 只按第一个冒号切分，避免 chat_id 自身包含冒号时被错误切碎。
    # 例如 "system:telegram:12345" -> ("system", "telegram:12345")
    parts = key.split(":", 1)
    if len(parts) != 2:
        raise ValueError(f"Invalid session key: {key}")
    return parts[0], parts[1]