feat: tts

This commit is contained in:
superobk
2024-03-19 17:33:09 +08:00
parent 2dccf5e78d
commit f2d6b9e526
90 changed files with 533580 additions and 5 deletions

View File

@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import IPython.display as ipd\n",
"\n",
"import os\n",
"import json\n",
"import math\n",
"import torch\n",
"from torch import nn\n",
"from torch.nn import functional as F\n",
"from torch.utils.data import DataLoader\n",
"\n",
"import ../commons\n",
"import ../utils\n",
"from ../data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n",
"from ../models import SynthesizerTrn\n",
"from ../text.symbols import symbols\n",
"from ../text import text_to_sequence\n",
"\n",
"from scipy.io.wavfile import write\n",
"\n",
"\n",
"def get_text(text, hps):\n",
" text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
" if hps.data.add_blank:\n",
" text_norm = commons.intersperse(text_norm, 0)\n",
" text_norm = torch.LongTensor(text_norm)\n",
" return text_norm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Single Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Multiple Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"./configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" n_speakers=hps.data.n_speakers,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" sid = torch.LongTensor([4]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}