This commit is contained in:
33
.github/workflows/ci-cd.yml
vendored
Normal file
33
.github/workflows/ci-cd.yml
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
name: Build container
|
||||||
|
env:
|
||||||
|
VERSION: 0.0.1
|
||||||
|
REGISTRY: https://harbor.bwgdi.com
|
||||||
|
REGISTRY_NAME: harbor.bwgdi.com
|
||||||
|
REGISTRY_PATH: library
|
||||||
|
DOCKER_NAME: fun-asr
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
jobs:
|
||||||
|
build-docker:
|
||||||
|
runs-on: builder-ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ secrets.BWGDI_NAME }}
|
||||||
|
password: ${{ secrets.BWGDI_TOKEN }}
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v2
|
||||||
|
- name: Build and push
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: ${{ env.REGISTRY_NAME }}/${{ env.REGISTRY_PATH }}/${{ env.DOCKER_NAME }}:${{ env.VERSION }}
|
||||||
38
.gitignore
vendored
Normal file
38
.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
coverage.xml
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
log/*.log
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# IDE settings
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
*.wav
|
||||||
|
*.pdf
|
||||||
|
|
||||||
|
*.lock
|
||||||
21
Dockerfile
Normal file
21
Dockerfile
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
RUN apt-get update && apt-get -y install \
|
||||||
|
ffmpeg \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||||
|
|
||||||
|
# Create app directory
|
||||||
|
WORKDIR /app
|
||||||
|
# Copy dependency definition files
|
||||||
|
COPY pyproject.toml uv.lock ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
ENV UV_HTTP_TIMEOUT=1200
|
||||||
|
RUN uv sync
|
||||||
|
|
||||||
|
# Copy the rest of the application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 5000
|
||||||
|
CMD [ "uv", "run", "api.py" ]
|
||||||
133
api.py
Normal file
133
api.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import uuid
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
||||||
|
|
||||||
|
# 导入两种模式需要的库
|
||||||
|
from funasr import AutoModel
|
||||||
|
from model import FunASRNano
|
||||||
|
from tools.utils import load_audio
|
||||||
|
|
||||||
|
app = FastAPI(title="FunASR Dual-Mode API")
|
||||||
|
|
||||||
|
# --- 环境配置 ---
|
||||||
|
device = (
|
||||||
|
"cuda:0" if torch.cuda.is_available()
|
||||||
|
else "mps" if torch.backends.mps.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
MODEL_DIR = os.getenv("MODEL_DIR", "/models/Fun-ASR-Nano-2512")
|
||||||
|
TEMP_DIR = "./temp_audio"
|
||||||
|
os.makedirs(TEMP_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# --- 模型全局初始化 ---
|
||||||
|
print(f"正在加载 AutoModel (Mode 1)...")
|
||||||
|
model_auto = AutoModel(
|
||||||
|
model=MODEL_DIR,
|
||||||
|
trust_remote_code=True,
|
||||||
|
vad_model="fsmn-vad",
|
||||||
|
vad_kwargs={"max_single_segment_time": 30000},
|
||||||
|
device=device,
|
||||||
|
hub="ms"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"正在加载 Direct Model (Mode 2)...")
|
||||||
|
model_direct, direct_kwargs = FunASRNano.from_pretrained(model=MODEL_DIR, device=device)
|
||||||
|
tokenizer = direct_kwargs.get("tokenizer", None)
|
||||||
|
model_direct.eval()
|
||||||
|
|
||||||
|
|
||||||
|
# --- 接口 1: Using FunASR for Inference ---
|
||||||
|
@app.post("/inference/funasr")
|
||||||
|
async def inference_funasr(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
language: str = Form("中文"),
|
||||||
|
itn: str = Form("true"),
|
||||||
|
hotwords: str = Form("")
|
||||||
|
):
|
||||||
|
temp_path = save_temp_file(file)
|
||||||
|
try:
|
||||||
|
is_itn = True if itn.lower() in ["true", "1", "t"] else False
|
||||||
|
clean_lang = language.strip().strip('"')
|
||||||
|
clean_hw = hotwords.strip().strip('"')
|
||||||
|
|
||||||
|
# 核心修复点:不传 cache,且处理 hotwords
|
||||||
|
res = model_auto.generate(
|
||||||
|
input=temp_path,
|
||||||
|
batch_size=1,
|
||||||
|
hotwords=clean_hw if clean_hw else None,
|
||||||
|
language=clean_lang,
|
||||||
|
itn=is_itn,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"status": "success", "text": res[0]["text"]}
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
finally:
|
||||||
|
remove_temp_file(temp_path)
|
||||||
|
|
||||||
|
|
||||||
|
# --- 接口 2: Direct Inference ---
|
||||||
|
@app.post("/inference/direct")
|
||||||
|
async def inference_direct(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
chunk_mode: bool = Form(False) # 是否开启你脚本2中的分片逻辑
|
||||||
|
):
|
||||||
|
"""直接调用 model.py 中的 FunASRNano 进行推理"""
|
||||||
|
temp_path = save_temp_file(file)
|
||||||
|
try:
|
||||||
|
if not chunk_mode:
|
||||||
|
# 模式 A: 标准直接推理
|
||||||
|
res = model_direct.inference(data_in=[temp_path], **direct_kwargs)
|
||||||
|
text = res[0][0]
|
||||||
|
else:
|
||||||
|
# 模式 B: 模拟脚本 2 中的分片循环逻辑
|
||||||
|
duration = sf.info(temp_path).duration
|
||||||
|
chunk_size = 0.72
|
||||||
|
cum_durations = np.arange(chunk_size, duration + chunk_size, chunk_size)
|
||||||
|
prev_text = ""
|
||||||
|
|
||||||
|
for idx, cum_duration in enumerate(cum_durations):
|
||||||
|
audio, rate = load_audio(temp_path, 16000, duration=round(cum_duration, 3))
|
||||||
|
# 注意:这里调用的是模型内部的推理逻辑
|
||||||
|
step_res = model_direct.inference(
|
||||||
|
[torch.tensor(audio).to(device)],
|
||||||
|
prev_text=prev_text,
|
||||||
|
**direct_kwargs
|
||||||
|
)
|
||||||
|
prev_text = step_res[0][0]["text"]
|
||||||
|
|
||||||
|
# 脚本 2 中的特殊解码逻辑
|
||||||
|
if idx != len(cum_durations) - 1 and tokenizer:
|
||||||
|
prev_text = tokenizer.decode(tokenizer.encode(prev_text)[:-5]).replace("", "")
|
||||||
|
|
||||||
|
text = prev_text
|
||||||
|
|
||||||
|
return {"status": "success", "mode": "direct", "text": text}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
finally:
|
||||||
|
remove_temp_file(temp_path)
|
||||||
|
|
||||||
|
|
||||||
|
# --- 工具函数 ---
|
||||||
|
def save_temp_file(upload_file):
|
||||||
|
ext = os.path.splitext(upload_file.filename)[1]
|
||||||
|
path = os.path.join(TEMP_DIR, f"{uuid.uuid4()}{ext}")
|
||||||
|
with open(path, "wb") as buffer:
|
||||||
|
shutil.copyfileobj(upload_file.file, buffer)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def remove_temp_file(path):
|
||||||
|
if os.path.exists(path):
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=5000)
|
||||||
27
pyproject.toml
Normal file
27
pyproject.toml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
[project]
|
||||||
|
name = "Fun-ASR"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "语音识别/处理相关项目"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.12,<3.13"
|
||||||
|
dependencies = [
|
||||||
|
"torch>=2.9.0",
|
||||||
|
"torchaudio>=2.9.0",
|
||||||
|
"transformers>=4.51.3",
|
||||||
|
"funasr>=1.3.0",
|
||||||
|
"zhconv",
|
||||||
|
"whisper_normalizer",
|
||||||
|
"pyopenjtalk-plus",
|
||||||
|
"compute-wer",
|
||||||
|
"openai-whisper",
|
||||||
|
"python-multipart==0.0.20",
|
||||||
|
"fastapi>=0.128.0",
|
||||||
|
"uvicorn>=0.40.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
package = false # 声明这只是一个应用程序,不是一个库
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
147
readme_bw.md
Normal file
147
readme_bw.md
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
# FunASR Dual-Mode API
|
||||||
|
|
||||||
|
This is a speech recognition (ASR) service built on FastAPI, integrating two inference modes of FunASR to provide flexible speech transcription capabilities.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
The service provides two main inference interfaces:
|
||||||
|
|
||||||
|
1. **AutoModel Mode (`/inference/funasr`)**:
|
||||||
|
* Uses the `funasr.AutoModel` high-level interface.
|
||||||
|
* Integrates VAD (Voice Activity Detection).
|
||||||
|
* Supports Hotwords enhancement.
|
||||||
|
* Supports ITN (Inverse Text Normalization).
|
||||||
|
* Supports multi-language configuration.
|
||||||
|
|
||||||
|
2. **Direct Model Mode (`/inference/direct`)**:
|
||||||
|
* Directly calls the underlying `FunASRNano` model.
|
||||||
|
* Supports standard full inference.
|
||||||
|
* Supports simulated streaming/chunk inference (Chunk Mode) for testing the model's incremental decoding capabilities.
|
||||||
|
|
||||||
|
## Environment Setup
|
||||||
|
|
||||||
|
### Dependency Installation
|
||||||
|
|
||||||
|
This project uses `uv` for dependency management. Please ensure `uv` is installed, then run the following command in the project root directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv sync
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model Configuration
|
||||||
|
|
||||||
|
The default model path is configured as `/models/Fun-ASR-Nano-2512`. If your model is located elsewhere, please set the environment variable `MODEL_DIR`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export MODEL_DIR="/your/absolute/path/to/model"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Start Service
|
||||||
|
|
||||||
|
You can start the service directly using the uv script (default port 5000):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run api.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The service will automatically detect the computing device (CUDA > MPS > CPU) upon startup.
|
||||||
|
|
||||||
|
### Docker Startup
|
||||||
|
|
||||||
|
If deploying with Docker, you can refer to the following command. You can specify a custom model path using `-e MODEL_DIR`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d --restart always -p 5000:5000 --gpus "device=1" \
|
||||||
|
-e MODEL_DIR="/models/Fun-ASR-Nano-2512" \
|
||||||
|
--mount type=bind,source=/your/path/model/Fun-ASR-Nano-2512,target=/models/Fun-ASR-Nano-2512 \
|
||||||
|
harbor.bwgdi.com/library/fun-asr:0.0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Documentation
|
||||||
|
|
||||||
|
### 1. FunASR Standard Inference Interface
|
||||||
|
|
||||||
|
* **URL**: `/inference/funasr`
|
||||||
|
* **Method**: `POST`
|
||||||
|
* **Content-Type**: `multipart/form-data`
|
||||||
|
|
||||||
|
| Parameter Name | Type | Required | Default | Description |
|
||||||
|
| :--- | :--- | :--- | :--- | :--- |
|
||||||
|
| `file` | File | Yes | - | Audio file |
|
||||||
|
| `language` | String | No | "中文" | Target language |
|
||||||
|
| `itn` | String | No | "true" | Whether to enable Inverse Text Normalization (true/false) |
|
||||||
|
| `hotwords` | String | No | "" | List of hotwords to improve recognition rate of specific vocabulary |
|
||||||
|
|
||||||
|
**Example**:
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://127.0.0.1:5000/inference/funasr" \
|
||||||
|
-F "file=@/path/to/audio.wav" \
|
||||||
|
-F "hotwords=开放时间"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Direct Underlying Inference Interface
|
||||||
|
|
||||||
|
* **URL**: `/inference/direct`
|
||||||
|
* **Method**: `POST`
|
||||||
|
* **Content-Type**: `multipart/form-data`
|
||||||
|
|
||||||
|
| Parameter Name | Type | Required | Default | Description |
|
||||||
|
| :--- | :--- | :--- | :--- | :--- |
|
||||||
|
| `file` | File | Yes | - | Audio file |
|
||||||
|
| `chunk_mode` | Boolean | No | False | Whether to enable chunk simulation mode (true/false) |
|
||||||
|
|
||||||
|
**Example**:
|
||||||
|
```bash
|
||||||
|
# Enable chunk simulation mode
|
||||||
|
curl -X POST "http://127.0.0.1:5000/inference/direct" \
|
||||||
|
-F "file=@/path/to/audio.wav" \
|
||||||
|
-F "chunk_mode=true"
|
||||||
|
```
|
||||||
|
**Response**:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"mode": "direct",
|
||||||
|
"text": {
|
||||||
|
"key": "rand_key_WgNZq6ITZM5jt",
|
||||||
|
"text": "你好。",
|
||||||
|
"text_tn": "你好",
|
||||||
|
"label": "null",
|
||||||
|
"ctc_text": "你好",
|
||||||
|
"ctc_timestamps": [
|
||||||
|
{
|
||||||
|
"token": "你",
|
||||||
|
"start_time": 1.8,
|
||||||
|
"end_time": 1.86,
|
||||||
|
"score": 0.908
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "好",
|
||||||
|
"start_time": 2.16,
|
||||||
|
"end_time": 2.22,
|
||||||
|
"score": 0.988
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timestamps": [
|
||||||
|
{
|
||||||
|
"token": "你",
|
||||||
|
"start_time": 1.8,
|
||||||
|
"end_time": 1.86,
|
||||||
|
"score": 0.908
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "好",
|
||||||
|
"start_time": 2.16,
|
||||||
|
"end_time": 2.22,
|
||||||
|
"score": 0.988
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "。",
|
||||||
|
"start_time": 2.88,
|
||||||
|
"end_time": 2.94,
|
||||||
|
"score": 0.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
147
readme_bw_zh.md
Normal file
147
readme_bw_zh.md
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
# FunASR Dual-Mode API
|
||||||
|
|
||||||
|
这是一个基于 FastAPI 构建的语音识别(ASR)服务,集成了 FunASR 的两种推理模式,旨在提供灵活的语音转写能力。
|
||||||
|
|
||||||
|
## 功能特性
|
||||||
|
|
||||||
|
服务提供了两个主要的推理接口:
|
||||||
|
|
||||||
|
1. **AutoModel 模式 (`/inference/funasr`)**:
|
||||||
|
* 使用 `funasr.AutoModel` 高级接口。
|
||||||
|
* 集成 VAD(语音活动检测)。
|
||||||
|
* 支持热词(Hotwords)增强。
|
||||||
|
* 支持 ITN(逆文本标准化)。
|
||||||
|
* 支持多语言配置。
|
||||||
|
|
||||||
|
2. **Direct Model 模式 (`/inference/direct`)**:
|
||||||
|
* 直接调用底层 `FunASRNano` 模型。
|
||||||
|
* 支持普通全量推理。
|
||||||
|
* 支持模拟流式/分片推理(Chunk Mode),用于测试模型的增量解码能力。
|
||||||
|
|
||||||
|
## 环境准备
|
||||||
|
|
||||||
|
### 依赖安装
|
||||||
|
|
||||||
|
本项目使用 `uv` 进行依赖管理。请确保已安装 `uv`,然后在项目根目录下运行:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv sync
|
||||||
|
```
|
||||||
|
|
||||||
|
### 模型配置
|
||||||
|
|
||||||
|
默认模型路径配置为 `/models/Fun-ASR-Nano-2512`。如果你的模型在其他位置,请设置环境变量 `MODEL_DIR`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export MODEL_DIR="/你的/模型/绝对路径"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 启动服务
|
||||||
|
|
||||||
|
可以直接运行 uv 脚本启动(默认端口 5000):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run api.py
|
||||||
|
```
|
||||||
|
|
||||||
|
服务启动时会自动检测计算设备(CUDA > MPS > CPU)。
|
||||||
|
|
||||||
|
### Docker 启动
|
||||||
|
|
||||||
|
若使用 Docker 部署,可参考以下命令。如需自定义模型路径,可通过 `-e MODEL_DIR` 指定:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d --restart always -p 5000:5000 --gpus "device=1" \
|
||||||
|
-e MODEL_DIR="/models/Fun-ASR-Nano-2512" \
|
||||||
|
--mount type=bind,source=/your/path/model/Fun-ASR-Nano-2512,target=/models/Fun-ASR-Nano-2512 \
|
||||||
|
harbor.bwgdi.com/library/fun-asr:0.0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## 接口文档
|
||||||
|
|
||||||
|
### 1. FunASR 标准推理接口
|
||||||
|
|
||||||
|
* **URL**: `/inference/funasr`
|
||||||
|
* **Method**: `POST`
|
||||||
|
* **Content-Type**: `multipart/form-data`
|
||||||
|
|
||||||
|
| 参数名 | 类型 | 必填 | 默认值 | 说明 |
|
||||||
|
| :--- | :--- | :--- | :--- | :--- |
|
||||||
|
| `file` | File | 是 | - | 音频文件 |
|
||||||
|
| `language` | String | 否 | "中文" | 目标语言 |
|
||||||
|
| `itn` | String | 否 | "true" | 是否开启逆文本标准化 (true/false) |
|
||||||
|
| `hotwords` | String | 否 | "" | 热词列表,用于提升特定词汇识别率 |
|
||||||
|
|
||||||
|
**示例**:
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://127.0.0.1:5000/inference/funasr" \
|
||||||
|
-F "file=@/path/to/audio.wav" \
|
||||||
|
-F "hotwords=开放时间"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Direct 底层推理接口
|
||||||
|
|
||||||
|
* **URL**: `/inference/direct`
|
||||||
|
* **Method**: `POST`
|
||||||
|
* **Content-Type**: `multipart/form-data`
|
||||||
|
|
||||||
|
| 参数名 | 类型 | 必填 | 默认值 | 说明 |
|
||||||
|
| :--- | :--- | :--- | :--- | :--- |
|
||||||
|
| `file` | File | 是 | - | 音频文件 |
|
||||||
|
| `chunk_mode` | Boolean | 否 | False | 是否开启分片模拟模式 (true/false) |
|
||||||
|
|
||||||
|
**示例**:
|
||||||
|
```bash
|
||||||
|
# 开启分片模拟模式
|
||||||
|
curl -X POST "http://127.0.0.1:5000/inference/direct" \
|
||||||
|
-F "file=@/path/to/audio.wav" \
|
||||||
|
-F "chunk_mode=true"
|
||||||
|
```
|
||||||
|
**返回**:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"mode": "direct",
|
||||||
|
"text": {
|
||||||
|
"key": "rand_key_WgNZq6ITZM5jt",
|
||||||
|
"text": "你好。",
|
||||||
|
"text_tn": "你好",
|
||||||
|
"label": "null",
|
||||||
|
"ctc_text": "你好",
|
||||||
|
"ctc_timestamps": [
|
||||||
|
{
|
||||||
|
"token": "你",
|
||||||
|
"start_time": 1.8,
|
||||||
|
"end_time": 1.86,
|
||||||
|
"score": 0.908
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "好",
|
||||||
|
"start_time": 2.16,
|
||||||
|
"end_time": 2.22,
|
||||||
|
"score": 0.988
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timestamps": [
|
||||||
|
{
|
||||||
|
"token": "你",
|
||||||
|
"start_time": 1.8,
|
||||||
|
"end_time": 1.86,
|
||||||
|
"score": 0.908
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "好",
|
||||||
|
"start_time": 2.16,
|
||||||
|
"end_time": 2.22,
|
||||||
|
"score": 0.988
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"token": "。",
|
||||||
|
"start_time": 2.88,
|
||||||
|
"end_time": 2.94,
|
||||||
|
"score": 0.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
@ -1,5 +1,4 @@
|
|||||||
torch>=2.9.0
|
torchaudio
|
||||||
torchaudio>=2.9.0
|
|
||||||
transformers>=4.51.3
|
transformers>=4.51.3
|
||||||
funasr>=1.3.0
|
funasr>=1.3.0
|
||||||
zhconv
|
zhconv
|
||||||
@ -7,3 +6,6 @@ whisper_normalizer
|
|||||||
pyopenjtalk-plus
|
pyopenjtalk-plus
|
||||||
compute-wer
|
compute-wer
|
||||||
openai-whisper
|
openai-whisper
|
||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
python-multipart==0.0.20
|
||||||
Reference in New Issue
Block a user