feat: tts

This commit is contained in:
superobk
2024-03-19 17:33:09 +08:00
parent 2dccf5e78d
commit f2d6b9e526
90 changed files with 533580 additions and 5 deletions

View File

@ -10,7 +10,7 @@
| python | SpeechRecognition | https://pypi.org/project/SpeechRecognition/ | pip install SpeechRecognition |
| python | gtts | https://pypi.org/project/gTTS/ | pip install gTTS |
## Start
Dev
Dev rh
```bash
uvicorn main:app --reload
```

View File

@ -4,6 +4,7 @@ from fastapi import FastAPI, Request, status
from fastapi.responses import JSONResponse
from src.blackbox.blackbox_factory import BlackboxFactory
app = FastAPI()
blackbox_factory = BlackboxFactory()

View File

@ -6,7 +6,6 @@ from fastapi.responses import JSONResponse
from .rapid_paraformer.utils import read_yaml
from .rapid_paraformer import RapidParaformer
from .asr_service import ASRService
from ..blackbox.blackbox import Blackbox
class ASR(Blackbox):

View File

@ -8,7 +8,7 @@ class Blackbox(ABC):
the methods processing, valid and fast_api_handler.
If implemented correctly, the blackbox class can be used in the main.py file
"""
def __init__(self, config: any) -> None:
def __init__(self, *args, **kwargs) -> None:
pass
"""

View File

@ -1,3 +1,4 @@
from .tts import TTS
from ..asr.asr import ASR
from .audio_to_text import AudioToText
from .blackbox import Blackbox
@ -8,8 +9,8 @@ from .text_to_audio import TextToAudio
class BlackboxFactory:
def __init__(self) -> None:
self.tts = TTS()
self.asr = ASR("./.env.yaml")
pass
def create_blackbox(self, blackbox_name: str, blackbox_config: dict) -> Blackbox:
if blackbox_name == "audio_to_text":
@ -20,4 +21,6 @@ class BlackboxFactory:
return Calculator(blackbox_config)
if blackbox_name == "asr":
return self.asr
if blackbox_name == "tts":
return self.tts
raise ValueError("Invalid blockbox type")

36
src/blackbox/tts.py Normal file
View File

@ -0,0 +1,36 @@
import io
from typing import Any, Coroutine
from fastapi import Request, Response, status
from fastapi.responses import JSONResponse
from .blackbox import Blackbox
from tts.tts_service import TTService
class TTS(Blackbox):
def __init__(self) -> None:
config = {
'paimon': ['resources/tts/models/paimon6k.json', 'resources/tts/models/paimon6k_390k.pth', 'character_paimon', 1],
'yunfei': ['resources/tts/models/yunfeimix2.json', 'resources/tts/models/yunfeimix2_53k.pth', 'character_yunfei', 1.1],
'catmaid': ['resources/tts/models/catmix.json', 'resources/tts/models/catmix_107k.pth', 'character_catmaid', 1.2]
}
self.tts_service = TTService(*config['catmaid'])
super().__init__(config)
def processing(self, text: str) -> io.BytesIO:
audio = self.tts_service.read(text)
return audio
def valid(self, txt: any) -> bool:
return isinstance(txt, str)
async def fast_api_handler(self, request: Request) -> Response:
try:
data = await request.json()
except:
return JSONResponse(content={"error": "json parse error"}, status_code=status.HTTP_400_BAD_REQUEST)
text = data.get("text")
if text is None:
return JSONResponse(content={"error": "text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
by = self.processing(text)
return Response(content=by.read(), media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=audio.wav"})

BIN
test_data/how-are-you.wav Normal file

Binary file not shown.

64
tts/tts_service.py Normal file
View File

@ -0,0 +1,64 @@
import io
import sys
import time
sys.path.append('tts/vits')
import numpy as np
import soundfile
import os
os.environ["PYTORCH_JIT"] = "0"
import torch
import tts.vits.commons as commons
import tts.vits.utils as utils
from tts.vits.models import SynthesizerTrn
from tts.vits.text.symbols import symbols
from tts.vits.text import text_to_sequence
import logging
logging.getLogger().setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO)
from pydub import AudioSegment
class TTService():
def __init__(self, cfg, model, char, speed):
logging.info('Initializing TTS Service for %s...' % char)
self.hps = utils.get_hparams_from_file(cfg)
self.speed = speed
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
**self.hps.model).cpu()
_ = self.net_g.eval()
_ = utils.load_checkpoint(model, self.net_g, None)
def get_text(self, text, hps):
text_norm = text_to_sequence(text, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def read(self, text, format="wav") -> io.BytesIO:
text = text.replace('~', '')
stn_tst = self.get_text(text, self.hps)
with torch.no_grad():
x_tst = stn_tst.cpu().unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cpu()
# tp = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)
audio = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.2, length_scale=self.speed)[0][
0, 0].data.cpu().float().numpy()
f = io.BytesIO()
soundfile.write(f, audio, self.hps.data.sampling_rate, format=format)
f.seek(0)
return f

26
tts/vits/.dockerignore Normal file
View File

@ -0,0 +1,26 @@
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
README.md

14
tts/vits/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
DUMMY1
DUMMY2
DUMMY3
logs
__pycache__
.ipynb_checkpoints
.*.swp
build
*.c
monotonic_align/monotonic_align
/.vs/vits/FileContentIndex
configs/dracu_japanese_base2.json
configs/tolove_japanese_base2.json

View File

@ -0,0 +1,3 @@
{
"CurrentProjectSetting": null
}

View File

@ -0,0 +1,9 @@
{
"ExpandedNodes": [
"",
"\\filelists",
"\\text"
],
"SelectedNode": "\\text\\symbols.py",
"PreviewInSolutionExplorer": false
}

BIN
tts/vits/.vs/slnx.sqlite Normal file

Binary file not shown.

BIN
tts/vits/.vs/vits/v17/.suo Normal file

Binary file not shown.

21
tts/vits/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
# For more information, please refer to https://aka.ms/vscode-docker-python
FROM python:3.7-slim
# Keeps Python from generating .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE=1
# Turns off buffering for easier container logging
ENV PYTHONUNBUFFERED=1
# Install pip requirements
COPY requirements.txt .
RUN apt-get update
RUN apt-get install -y vim
RUN apt-get install -y gcc
RUN apt-get install -y g++
RUN apt-get install -y cmake
RUN python -m pip install -r requirements.txt
WORKDIR /content
COPY . /content

21
tts/vits/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Jaehyeon Kim
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,121 @@
#include <iostream>
#include <torch/torch.h>
#include <torch/script.h>
#include <string>
#include <vector>
#include <locale>
#include <codecvt>
#include <direct.h>
#include <fstream>
typedef int64_t int64;
namespace Shirakana {
struct WavHead {
char RIFF[4];
long int size0;
char WAVE[4];
char FMT[4];
long int size1;
short int fmttag;
short int channel;
long int samplespersec;
long int bytepersec;
short int blockalign;
short int bitpersamples;
char DATA[4];
long int size2;
};
int conArr2Wav(int64 size, int16_t* input, const char* filename) {
WavHead head = { {'R','I','F','F'},0,{'W','A','V','E'},{'f','m','t',' '},16,
1,1,22050,22050 * 2,2,16,{'d','a','t','a'},
0 };
head.size0 = size * 2 + 36;
head.size2 = size * 2;
std::ofstream ocout;
char* outputData = (char*)input;
ocout.open(filename, std::ios::out | std::ios::binary);
ocout.write((char*)&head, 44);
ocout.write(outputData, (int32_t)(size * 2));
ocout.close();
return 0;
}
inline std::wstring to_wide_string(const std::string& input)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.from_bytes(input);
}
inline std::string to_byte_string(const std::wstring& input)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.to_bytes(input);
}
}
#define val const auto
int main()
{
torch::jit::Module Vits;
std::string buffer;
std::vector<int64> text;
std::vector<int16_t> data;
while(true)
{
while (true)
{
std::cin >> buffer;
if (buffer == "end")
return 0;
if(buffer == "model")
{
std::cin >> buffer;
Vits = torch::jit::load(buffer);
continue;
}
if (buffer == "endinfer")
{
Shirakana::conArr2Wav(data.size(), data.data(), "temp\\tmp.wav");
data.clear();
std::cout << "endofinfe";
continue;
}
if (buffer == "line")
{
std::cin >> buffer;
while (buffer.find("endline")==std::string::npos)
{
text.push_back(std::atoi(buffer.c_str()));
std::cin >> buffer;
}
val InputTensor = torch::from_blob(text.data(), { 1,static_cast<int64>(text.size()) }, torch::kInt64);
std::array<int64, 1> TextLength{ static_cast<int64>(text.size()) };
val InputTensor_length = torch::from_blob(TextLength.data(), { 1 }, torch::kInt64);
std::vector<torch::IValue> inputs;
inputs.push_back(InputTensor);
inputs.push_back(InputTensor_length);
if (buffer.length() > 7)
{
std::array<int64, 1> speakerIndex{ (int64)atoi(buffer.substr(7).c_str()) };
inputs.push_back(torch::from_blob(speakerIndex.data(), { 1 }, torch::kLong));
}
val output = Vits.forward(inputs).toTuple()->elements()[0].toTensor().multiply(32276.0F);
val outputSize = output.sizes().at(2);
val floatOutput = output.data_ptr<float>();
int16_t* outputTmp = (int16_t*)malloc(sizeof(float) * outputSize);
if (outputTmp == nullptr) {
throw std::exception("内存不足");
}
for (int i = 0; i < outputSize; i++) {
*(outputTmp + i) = (int16_t) * (floatOutput + i);
}
data.insert(data.end(), outputTmp, outputTmp+outputSize);
free(outputTmp);
text.clear();
std::cout << "endofline";
}
}
}
//model S:\VSGIT\ShirakanaTTSUI\build\x64\Release\Mods\AtriVITS\AtriVITS_LJS.pt
}

View File

@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import IPython.display as ipd\n",
"\n",
"import os\n",
"import json\n",
"import math\n",
"import torch\n",
"from torch import nn\n",
"from torch.nn import functional as F\n",
"from torch.utils.data import DataLoader\n",
"\n",
"import ../commons\n",
"import ../utils\n",
"from ../data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n",
"from ../models import SynthesizerTrn\n",
"from ../text.symbols import symbols\n",
"from ../text import text_to_sequence\n",
"\n",
"from scipy.io.wavfile import write\n",
"\n",
"\n",
"def get_text(text, hps):\n",
" text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
" if hps.data.add_blank:\n",
" text_norm = commons.intersperse(text_norm, 0)\n",
" text_norm = torch.LongTensor(text_norm)\n",
" return text_norm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Single Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Multiple Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"./configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" n_speakers=hps.data.n_speakers,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" sid = torch.LongTensor([4]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

68
tts/vits/README.md Normal file
View File

@ -0,0 +1,68 @@
# How to use
(Suggestion) Python == 3.7
## Clone this repository
```sh
git clone https://github.com/CjangCjengh/vits.git
```
## Choose cleaners
- Fill "text_cleaners" in config.json
- Edit text/symbols.py
- Remove unnecessary imports from text/cleaners.py
## Install requirements
```sh
pip install -r requirements.txt
```
## Create datasets
### Single speaker
"n_speakers" should be 0 in config.json
```
path/to/XXX.wav|transcript
```
- Example
```
dataset/001.wav|こんにちは。
```
### Mutiple speakers
Speaker id should start from 0
```
path/to/XXX.wav|speaker id|transcript
```
- Example
```
dataset/001.wav|0|こんにちは。
```
## Preprocess
If you have done this, set "cleaned_text" to true in config.json
```sh
# Single speaker
python preprocess.py --text_index 1 --filelists path/to/filelist_train.txt path/to/filelist_val.txt
# Mutiple speakers
python preprocess.py --text_index 2 --filelists path/to/filelist_train.txt path/to/filelist_val.txt
```
## Build monotonic alignment search
```sh
cd monotonic_align
python setup.py build_ext --inplace
cd ..
```
## Train
```sh
# Single speaker
python train.py -c <config> -m <folder>
# Mutiple speakers
python train_ms.py -c <config> -m <folder>
```
## Inference
### Online
See [inference.ipynb](inference.ipynb)
### Offline
See [MoeGoe](https://github.com/CjangCjengh/MoeGoe)
# Running in Docker
```sh
docker run -itd --gpus all --name "Container name" -e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all "Image name"
```

303
tts/vits/attentions.py Normal file
View File

@ -0,0 +1,303 @@
import copy
import math
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
import commons
import modules
from modules import LayerNorm
class Encoder(nn.Module):
def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., window_size=4, **kwargs):
super().__init__()
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.window_size = window_size
self.drop = nn.Dropout(p_dropout)
self.attn_layers = nn.ModuleList()
self.norm_layers_1 = nn.ModuleList()
self.ffn_layers = nn.ModuleList()
self.norm_layers_2 = nn.ModuleList()
for i in range(self.n_layers):
self.attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, window_size=window_size))
self.norm_layers_1.append(LayerNorm(hidden_channels))
self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout))
self.norm_layers_2.append(LayerNorm(hidden_channels))
def forward(self, x, x_mask):
attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
x = x * x_mask
for i in range(self.n_layers):
y = self.attn_layers[i](x, x, attn_mask)
y = self.drop(y)
x = self.norm_layers_1[i](x + y)
y = self.ffn_layers[i](x, x_mask)
y = self.drop(y)
x = self.norm_layers_2[i](x + y)
x = x * x_mask
return x
class Decoder(nn.Module):
def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs):
super().__init__()
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.proximal_bias = proximal_bias
self.proximal_init = proximal_init
self.drop = nn.Dropout(p_dropout)
self.self_attn_layers = nn.ModuleList()
self.norm_layers_0 = nn.ModuleList()
self.encdec_attn_layers = nn.ModuleList()
self.norm_layers_1 = nn.ModuleList()
self.ffn_layers = nn.ModuleList()
self.norm_layers_2 = nn.ModuleList()
for i in range(self.n_layers):
self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init))
self.norm_layers_0.append(LayerNorm(hidden_channels))
self.encdec_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout))
self.norm_layers_1.append(LayerNorm(hidden_channels))
self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True))
self.norm_layers_2.append(LayerNorm(hidden_channels))
def forward(self, x, x_mask, h, h_mask):
"""
x: decoder input
h: encoder output
"""
self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype)
encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
x = x * x_mask
for i in range(self.n_layers):
y = self.self_attn_layers[i](x, x, self_attn_mask)
y = self.drop(y)
x = self.norm_layers_0[i](x + y)
y = self.encdec_attn_layers[i](x, h, encdec_attn_mask)
y = self.drop(y)
x = self.norm_layers_1[i](x + y)
y = self.ffn_layers[i](x, x_mask)
y = self.drop(y)
x = self.norm_layers_2[i](x + y)
x = x * x_mask
return x
class MultiHeadAttention(nn.Module):
def __init__(self, channels, out_channels, n_heads, p_dropout=0., window_size=None, heads_share=True, block_length=None, proximal_bias=False, proximal_init=False):
super().__init__()
assert channels % n_heads == 0
self.channels = channels
self.out_channels = out_channels
self.n_heads = n_heads
self.p_dropout = p_dropout
self.window_size = window_size
self.heads_share = heads_share
self.block_length = block_length
self.proximal_bias = proximal_bias
self.proximal_init = proximal_init
self.attn = None
self.k_channels = channels // n_heads
self.conv_q = nn.Conv1d(channels, channels, 1)
self.conv_k = nn.Conv1d(channels, channels, 1)
self.conv_v = nn.Conv1d(channels, channels, 1)
self.conv_o = nn.Conv1d(channels, out_channels, 1)
self.drop = nn.Dropout(p_dropout)
if window_size is not None:
n_heads_rel = 1 if heads_share else n_heads
rel_stddev = self.k_channels**-0.5
self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
nn.init.xavier_uniform_(self.conv_q.weight)
nn.init.xavier_uniform_(self.conv_k.weight)
nn.init.xavier_uniform_(self.conv_v.weight)
if proximal_init:
with torch.no_grad():
self.conv_k.weight.copy_(self.conv_q.weight)
self.conv_k.bias.copy_(self.conv_q.bias)
def forward(self, x, c, attn_mask=None):
q = self.conv_q(x)
k = self.conv_k(c)
v = self.conv_v(c)
x, self.attn = self.attention(q, k, v, mask=attn_mask)
x = self.conv_o(x)
return x
def attention(self, query, key, value, mask=None):
# reshape [b, d, t] -> [b, n_h, t, d_k]
b, d, t_s, t_t = (*key.size(), query.size(2))
query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1))
if self.window_size is not None:
assert t_s == t_t, "Relative attention is only available for self-attention."
key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
rel_logits = self._matmul_with_relative_keys(query /math.sqrt(self.k_channels), key_relative_embeddings)
scores_local = self._relative_position_to_absolute_position(rel_logits)
scores = scores + scores_local
if self.proximal_bias:
assert t_s == t_t, "Proximal bias is only available for self-attention."
scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype)
if mask is not None:
scores = scores.masked_fill(mask == 0, -1e4)
if self.block_length is not None:
assert t_s == t_t, "Local attention is only available for self-attention."
block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length)
scores = scores.masked_fill(block_mask == 0, -1e4)
p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s]
p_attn = self.drop(p_attn)
output = torch.matmul(p_attn, value)
if self.window_size is not None:
relative_weights = self._absolute_position_to_relative_position(p_attn)
value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t]
return output, p_attn
def _matmul_with_relative_values(self, x, y):
"""
x: [b, h, l, m]
y: [h or 1, m, d]
ret: [b, h, l, d]
"""
ret = torch.matmul(x, y.unsqueeze(0))
return ret
def _matmul_with_relative_keys(self, x, y):
"""
x: [b, h, l, d]
y: [h or 1, m, d]
ret: [b, h, l, m]
"""
ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
return ret
def _get_relative_embeddings(self, relative_embeddings, length):
max_relative_position = 2 * self.window_size + 1
# Pad first before slice to avoid using cond ops.
pad_length = max(length - (self.window_size + 1), 0)
slice_start_position = max((self.window_size + 1) - length, 0)
slice_end_position = slice_start_position + 2 * length - 1
if pad_length > 0:
padded_relative_embeddings = F.pad(
relative_embeddings,
commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]))
else:
padded_relative_embeddings = relative_embeddings
used_relative_embeddings = padded_relative_embeddings[:,slice_start_position:slice_end_position]
return used_relative_embeddings
def _relative_position_to_absolute_position(self, x):
"""
x: [b, h, l, 2*l-1]
ret: [b, h, l, l]
"""
batch, heads, length, _ = x.size()
# Concat columns of pad to shift from relative to absolute indexing.
x = F.pad(x, commons.convert_pad_shape([[0,0],[0,0],[0,0],[0,1]]))
# Concat extra elements so to add up to shape (len+1, 2*len-1).
x_flat = x.view([batch, heads, length * 2 * length])
x_flat = F.pad(x_flat, commons.convert_pad_shape([[0,0],[0,0],[0,length-1]]))
# Reshape and slice out the padded elements.
x_final = x_flat.view([batch, heads, length+1, 2*length-1])[:, :, :length, length-1:]
return x_final
def _absolute_position_to_relative_position(self, x):
"""
x: [b, h, l, l]
ret: [b, h, l, 2*l-1]
"""
batch, heads, length, _ = x.size()
# padd along column
x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length-1]]))
x_flat = x.view([batch, heads, length**2 + length*(length -1)])
# add 0's in the beginning that will skew the elements after reshape
x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
x_final = x_flat.view([batch, heads, length, 2*length])[:,:,:,1:]
return x_final
def _attention_bias_proximal(self, length):
"""Bias for self-attention to encourage attention to close positions.
Args:
length: an integer scalar.
Returns:
a Tensor with shape [1, 1, length, length]
"""
r = torch.arange(length, dtype=torch.float32)
diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
class FFN(nn.Module):
def __init__(self, in_channels, out_channels, filter_channels, kernel_size, p_dropout=0., activation=None, causal=False):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.filter_channels = filter_channels
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.activation = activation
self.causal = causal
if causal:
self.padding = self._causal_padding
else:
self.padding = self._same_padding
self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size)
self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size)
self.drop = nn.Dropout(p_dropout)
def forward(self, x, x_mask):
x = self.conv_1(self.padding(x * x_mask))
if self.activation == "gelu":
x = x * torch.sigmoid(1.702 * x)
else:
x = torch.relu(x)
x = self.drop(x)
x = self.conv_2(self.padding(x * x_mask))
return x * x_mask
def _causal_padding(self, x):
if self.kernel_size == 1:
return x
pad_l = self.kernel_size - 1
pad_r = 0
padding = [[0, 0], [0, 0], [pad_l, pad_r]]
x = F.pad(x, commons.convert_pad_shape(padding))
return x
def _same_padding(self, x):
if self.kernel_size == 1:
return x
pad_l = (self.kernel_size - 1) // 2
pad_r = self.kernel_size // 2
padding = [[0, 0], [0, 0], [pad_l, pad_r]]
x = F.pad(x, commons.convert_pad_shape(padding))
return x

1
tts/vits/colab.ipynb Normal file
View File

@ -0,0 +1 @@
{"cells":[{"cell_type":"markdown","metadata":{"id":"65CU-n-JHhbY"},"source":["# Clone repository"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"elapsed":198272,"status":"ok","timestamp":1659461977037,"user":{"displayName":"章政","userId":"17693849672782836082"},"user_tz":-480},"id":"i_0vZ-OjHVNu","outputId":"52655c4e-699c-465a-ce12-9cef24aa8a1e"},"outputs":[],"source":["!git clone https://github.com/CjangCjengh/vits.git\n","%cd vits\n","!pip install -r requirements.txt\n"]},{"cell_type":"markdown","metadata":{"id":"G0iLn2JxKYhl"},"source":["# Mount Google Drive"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2801,"status":"ok","timestamp":1659464479547,"user":{"displayName":"章政","userId":"17693849672782836082"},"user_tz":-480},"id":"ZOgjdsQgKTfD","outputId":"ba0ce34b-45f6-43ea-af98-98e6007a5351"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"markdown","metadata":{"id":"UZ9maSoUmHaS"},"source":["# Unpack dataset"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":191445,"status":"ok","timestamp":1659462201164,"user":{"displayName":"章政","userId":"17693849672782836082"},"user_tz":-480},"id":"N3a-FsHghwXS","outputId":"979fe3a3-49ba-4f0d-cc71-fc68b8ec0db7"},"outputs":[],"source":["!sudo apt-get install p7zip-full\n","!7z x ../drive/MyDrive/dataset.zip"]},{"cell_type":"markdown","metadata":{"id":"LY9d2hgjmYUF"},"source":["# Alignment"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4802,"status":"ok","timestamp":1659462298052,"user":{"displayName":"章政","userId":"17693849672782836082"},"user_tz":-480},"id":"LOsV22D8IUTS","outputId":"8b9b7c35-e869-4af5-a8eb-9e6eee11fa7c"},"outputs":[],"source":["%cd monotonic_align\n","!python setup.py build_ext --inplace\n","%cd .."]},{"cell_type":"markdown","metadata":{"id":"gjIAR_UsmPEz"},"source":["# Train"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":245907,"status":"ok","timestamp":1659464394347,"user":{"displayName":"章政","userId":"17693849672782836082"},"user_tz":-480},"id":"ltU2JXpxIh-K","outputId":"13573700-ca5d-4bd1-ebbc-966c059e1327"},"outputs":[],"source":["!python train_ms.py -c configs/yuzusoft_base.json -m yuzusoft_base"]}],"metadata":{"accelerator":"GPU","colab":{"authorship_tag":"ABX9TyMUFamkuoxXK2DqqYNB4cPL","collapsed_sections":[],"mount_file_id":"1twKgqwggarlmTzNEjCsuldastf_zMIkF","name":"vits.ipynb","provenance":[]},"gpuClass":"standard","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}

161
tts/vits/commons.py Normal file
View File

@ -0,0 +1,161 @@
import math
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
def init_weights(m, mean=0.0, std=0.01):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
m.weight.data.normal_(mean, std)
def get_padding(kernel_size, dilation=1):
return int((kernel_size*dilation - dilation)/2)
def convert_pad_shape(pad_shape):
l = pad_shape[::-1]
pad_shape = [item for sublist in l for item in sublist]
return pad_shape
def intersperse(lst, item):
result = [item] * (len(lst) * 2 + 1)
result[1::2] = lst
return result
def kl_divergence(m_p, logs_p, m_q, logs_q):
"""KL(P||Q)"""
kl = (logs_q - logs_p) - 0.5
kl += 0.5 * (torch.exp(2. * logs_p) + ((m_p - m_q)**2)) * torch.exp(-2. * logs_q)
return kl
def rand_gumbel(shape):
"""Sample from the Gumbel distribution, protect from overflows."""
uniform_samples = torch.rand(shape) * 0.99998 + 0.00001
return -torch.log(-torch.log(uniform_samples))
def rand_gumbel_like(x):
g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device)
return g
def slice_segments(x, ids_str, segment_size=4):
ret = torch.zeros_like(x[:, :, :segment_size])
for i in range(x.size(0)):
idx_str = ids_str[i]
idx_end = idx_str + segment_size
ret[i] = x[i, :, idx_str:idx_end]
return ret
def rand_slice_segments(x, x_lengths=None, segment_size=4):
b, d, t = x.size()
if x_lengths is None:
x_lengths = t
ids_str_max = x_lengths - segment_size + 1
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
ret = slice_segments(x, ids_str, segment_size)
return ret, ids_str
def get_timing_signal_1d(
length, channels, min_timescale=1.0, max_timescale=1.0e4):
position = torch.arange(length, dtype=torch.float)
num_timescales = channels // 2
log_timescale_increment = (
math.log(float(max_timescale) / float(min_timescale)) /
(num_timescales - 1))
inv_timescales = min_timescale * torch.exp(
torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment)
scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
signal = F.pad(signal, [0, 0, 0, channels % 2])
signal = signal.view(1, channels, length)
return signal
def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
b, channels, length = x.size()
signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
return x + signal.to(dtype=x.dtype, device=x.device)
def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1):
b, channels, length = x.size()
signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis)
def subsequent_mask(length):
mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
return mask
@torch.jit.script
def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
n_channels_int = n_channels[0]
in_act = input_a + input_b
t_act = torch.tanh(in_act[:, :n_channels_int, :])
s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
acts = t_act * s_act
return acts
def convert_pad_shape(pad_shape):
l = pad_shape[::-1]
pad_shape = [item for sublist in l for item in sublist]
return pad_shape
def shift_1d(x):
x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
return x
def sequence_mask(length, max_length=None):
if max_length is None:
max_length = length.max()
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
return x.unsqueeze(0) < length.unsqueeze(1)
def generate_path(duration, mask):
"""
duration: [b, 1, t_x]
mask: [b, 1, t_y, t_x]
"""
device = duration.device
b, _, t_y, t_x = mask.shape
cum_duration = torch.cumsum(duration, -1)
cum_duration_flat = cum_duration.view(b * t_x)
path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
path = path.view(b, t_x, t_y)
path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
path = path.unsqueeze(1).transpose(2,3) * mask
return path
def clip_grad_value_(parameters, clip_value, norm_type=2):
if isinstance(parameters, torch.Tensor):
parameters = [parameters]
parameters = list(filter(lambda p: p.grad is not None, parameters))
norm_type = float(norm_type)
if clip_value is not None:
clip_value = float(clip_value)
total_norm = 0
for p in parameters:
param_norm = p.grad.data.norm(norm_type)
total_norm += param_norm.item() ** norm_type
if clip_value is not None:
p.grad.data.clamp_(min=-clip_value, max=clip_value)
total_norm = total_norm ** (1. / norm_type)
return total_norm

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/juzi_train_filelist.txt.cleaned",
"validation_files":"filelists/juzi_val_filelist.txt.cleaned",
"text_cleaners":["chinese_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 8,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u5c0f\u8338", "\u5510\u4e50\u541f", "\u5c0f\u6bb7", "\u82b1\u73b2", "\u8bb8\u8001\u5e08", "\u90b1\u7433", "\u4e03\u4e00", "\u516b\u56db"],
"symbols": ["_", "\uff0c", "\u3002", "\uff01", "\uff1f", "\u2014", "\u2026", "\u3105", "\u3106", "\u3107", "\u3108", "\u3109", "\u310a", "\u310b", "\u310c", "\u310d", "\u310e", "\u310f", "\u3110", "\u3111", "\u3112", "\u3113", "\u3114", "\u3115", "\u3116", "\u3117", "\u3118", "\u3119", "\u311a", "\u311b", "\u311c", "\u311d", "\u311e", "\u311f", "\u3120", "\u3121", "\u3122", "\u3123", "\u3124", "\u3125", "\u3126", "\u3127", "\u3128", "\u3129", "\u02c9", "\u02ca", "\u02c7", "\u02cb", "\u02d9", " "]
}

View File

@ -0,0 +1,54 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/cjke_train_filelist.txt.cleaned",
"validation_files":"filelists/cjke_val_filelist.txt.cleaned",
"text_cleaners":["cjke_cleaners2"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 2891,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "N", "Q", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "s", "t", "u", "v", "w", "x", "y", "z", "\u0251", "\u00e6", "\u0283", "\u0291", "\u00e7", "\u026f", "\u026a", "\u0254", "\u025b", "\u0279", "\u00f0", "\u0259", "\u026b", "\u0265", "\u0278", "\u028a", "\u027e", "\u0292", "\u03b8", "\u03b2", "\u014b", "\u0266", "\u207c", "\u02b0", "`", "^", "#", "*", "=", "\u02c8", "\u02cc", "\u2192", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/cjks_train_filelist.txt.cleaned",
"validation_files":"filelists/cjks_val_filelist.txt.cleaned",
"text_cleaners":["cjks_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 24,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u7dbe\u5730\u5be7\u3005", "\u671d\u6b66\u82b3\u4e43", "\u5728\u539f\u4e03\u6d77", "\u30eb\u30a4\u30ba", "\u91d1\u8272\u306e\u95c7", "\u30e2\u30e2", "\u7d50\u57ce\u7f8e\u67d1", "\u5c0f\u8338", "\u5510\u4e50\u541f", "\u5c0f\u6bb7", "\u82b1\u73b2", "\u516b\u56db", "\uc218\uc544", "\ubbf8\ubbf8\ub974", "\uc544\ub9b0", "\uc720\ud654", "\uc5f0\ud654", "SA1", "SA2", "SA3", "SA4", "SA5", "SA6", ""],
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "N", "Q", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "s", "t", "u", "v", "w", "x", "y", "z", "\u0283", "\u02a7", "\u02a5", "\u02a6", "\u026f", "\u0279", "\u0259", "\u0265", "\u00e7", "\u0278", "\u027e", "\u03b2", "\u014b", "\u0266", "\u02d0", "\u207c", "\u02b0", "`", "^", "#", "*", "=", "\u2192", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/train_filelist.txt.cleaned",
"validation_files":"filelists/val_filelist.txt.cleaned",
"text_cleaners":["japanese_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 7,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u7dbe\u5730\u5be7\u3005", "\u56e0\u5e61\u3081\u3050\u308b", "\u671d\u6b66\u82b3\u4e43", "\u5e38\u9678\u8309\u5b50", "\u30e0\u30e9\u30b5\u30e1", "\u978d\u99ac\u5c0f\u6625", "\u5728\u539f\u4e03\u6d77"],
"symbols": ["_", ",", ".", "!", "?", "-", "A", "E", "I", "N", "O", "Q", "U", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "y", "z", "\u0283", "\u02a7", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/hamidashi_train_filelist.txt.cleaned",
"validation_files":"filelists/hamidashi_val_filelist.txt.cleaned",
"text_cleaners":["japanese_cleaners2"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 8,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u548c\u6cc9\u5983\u611b", "\u5e38\u76e4\u83ef\u4e43", "\u9326\u3042\u3059\u307f", "\u938c\u5009\u8a69\u685c", "\u7adc\u9591\u5929\u68a8", "\u548c\u6cc9\u91cc", "\u65b0\u5ddd\u5e83\u5922", "\u8056\u8389\u3005\u5b50"],
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "A", "E", "I", "N", "O", "Q", "U", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "y", "z", "\u0283", "\u02a7", "\u02a6", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,54 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 20000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/train_filelist.txt.cleaned",
"validation_files":"filelists/val_filelist.txt.cleaned",
"text_cleaners":["japanese_cleaners2"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 0,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false
},
"speakers": ["\u30eb\u30a4\u30ba"],
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "A", "E", "I", "N", "O", "Q", "U", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "y", "z", "\u0283", "\u02a7", "\u02a6", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/fox_train_filelist.txt.cleaned",
"validation_files":"filelists/fox_val_filelist.txt.cleaned",
"text_cleaners":["korean_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 6,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\uc218\uc544", "\ubbf8\ubbf8\ub974", "\uc544\ub9b0", "\uc5f0\ud654", "\uc720\ud654", "\uc120\ubc30"],
"symbols": ["_", ",", ".", "!", "?", "\u2026", "~", "\u3131", "\u3134", "\u3137", "\u3139", "\u3141", "\u3142", "\u3145", "\u3147", "\u3148", "\u314a", "\u314b", "\u314c", "\u314d", "\u314e", "\u3132", "\u3138", "\u3143", "\u3146", "\u3149", "\u314f", "\u3153", "\u3157", "\u315c", "\u3161", "\u3163", "\u3150", "\u3154", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/sanskrit_train_filelist.txt.cleaned",
"validation_files":"filelists/sanskrit_val_filelist.txt.cleaned",
"text_cleaners":["sanskrit_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 27,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["Male 1", "Male 2", "Male 3", "Male 4 (Malayalam)", "Male 5", "Male 6", "Male 7", "Male 8 (Kannada)", "Female 1 (Tamil)", "Male 9 (Kannada)", "Female 2 (Marathi)", "Female 3 (Marathi)", "Female 4 (Marathi)", "Female 5 (Telugu)", "Female 6 (Telugu)", "Male 10 (Kannada)", "Male 11 (Kannada)", "Male 12", "Male 13", "Male 14", "Male 15", "Female 7", "Male 16 (Malayalam)", "Male 17 (Tamil)", "Male 18 (Hindi)", "Male 19 (Telugu)", "Male 20 (Hindi)"],
"symbols": ["_", "\u0964", "\u0901", "\u0902", "\u0903", "\u0905", "\u0906", "\u0907", "\u0908", "\u0909", "\u090a", "\u090b", "\u090f", "\u0910", "\u0913", "\u0914", "\u0915", "\u0916", "\u0917", "\u0918", "\u0919", "\u091a", "\u091b", "\u091c", "\u091d", "\u091e", "\u091f", "\u0920", "\u0921", "\u0922", "\u0923", "\u0924", "\u0925", "\u0926", "\u0927", "\u0928", "\u092a", "\u092b", "\u092c", "\u092d", "\u092e", "\u092f", "\u0930", "\u0932", "\u0933", "\u0935", "\u0936", "\u0937", "\u0938", "\u0939", "\u093d", "\u093e", "\u093f", "\u0940", "\u0941", "\u0942", "\u0943", "\u0944", "\u0947", "\u0948", "\u094b", "\u094c", "\u094d", "\u0960", "\u0962", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/zaonhe_train_filelist.txt.cleaned",
"validation_files":"filelists/zaonhe_val_filelist.txt.cleaned",
"text_cleaners":["shanghainese_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 2,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["1", "2"],
"symbols": ["_", ",", ".", "!", "?", "\u2026", "a", "b", "d", "f", "g", "h", "i", "k", "l", "m", "n", "o", "p", "s", "t", "u", "v", "y", "z", "\u00f8", "\u014b", "\u0235", "\u0251", "\u0254", "\u0255", "\u0259", "\u0264", "\u0266", "\u026a", "\u027f", "\u0291", "\u0294", "\u02b0", "\u0303", "\u0329", "\u1d00", "\u1d07", "1", "5", "6", "7", "8", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/zero_train_filelist.txt.cleaned",
"validation_files":"filelists/zero_val_filelist.txt.cleaned",
"text_cleaners":["japanese_cleaners2"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 26,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u30eb\u30a4\u30ba", "\u30c6\u30a3\u30d5\u30a1\u30cb\u30a2", "\u30a4\u30eb\u30af\u30af\u30a5", "\u30a2\u30f3\u30ea\u30a8\u30c3\u30bf", "\u30bf\u30d0\u30b5", "\u30b7\u30a8\u30b9\u30bf", "\u30cf\u30eb\u30ca", "\u5c11\u5973\u30ea\u30b7\u30e5", "\u30ea\u30b7\u30e5", "\u30a2\u30ad\u30ca", "\u30af\u30ea\u30b9", "\u30ab\u30c8\u30ec\u30a2", "\u30a8\u30ec\u30aa\u30ce\u30fc\u30eb", "\u30e2\u30f3\u30e2\u30e9\u30f3\u30b7\u30fc", "\u30ea\u30fc\u30f4\u30eb", "\u30ad\u30e5\u30eb\u30b1", "\u30a6\u30a7\u30b6\u30ea\u30fc", "\u30b5\u30a4\u30c8", "\u30ae\u30fc\u30b7\u30e5", "\u30b3\u30eb\u30d9\u30fc\u30eb", "\u30aa\u30b9\u30de\u30f3", "\u30c7\u30eb\u30d5\u30ea\u30f3\u30ac\u30fc", "\u30c6\u30af\u30b9\u30c8", "\u30c0\u30f3\u30d7\u30ea\u30e1", "\u30ac\u30ec\u30c3\u30c8", "\u30b9\u30ab\u30ed\u30f3"],
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "A", "E", "I", "N", "O", "Q", "U", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "y", "z", "\u0283", "\u02a7", "\u02a6", "\u2193", "\u2191", " "]
}

View File

@ -0,0 +1,55 @@
{
"train": {
"log_interval": 200,
"eval_interval": 1000,
"seed": 1234,
"epochs": 10000,
"learning_rate": 2e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/mix_train_filelist.txt.cleaned",
"validation_files":"filelists/mix_val_filelist.txt.cleaned",
"text_cleaners":["zh_ja_mixture_cleaners"],
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": true,
"n_speakers": 5,
"cleaned_text": true
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 256
},
"speakers": ["\u7dbe\u5730\u5be7\u3005", "\u5728\u539f\u4e03\u6d77", "\u5c0f\u8338", "\u5510\u4e50\u541f"],
"symbols": ["_", ",", ".", "!", "?", "-", "~", "\u2026", "A", "E", "I", "N", "O", "Q", "U", "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "y", "z", "\u0283", "\u02a7", "\u02a6", "\u026f", "\u0279", "\u0259", "\u0265", "\u207c", "\u02b0", "`", "\u2192", "\u2193", "\u2191", " "]
}

392
tts/vits/data_utils.py Normal file
View File

@ -0,0 +1,392 @@
import time
import os
import random
import numpy as np
import torch
import torch.utils.data
import commons
from mel_processing import spectrogram_torch
from utils import load_wav_to_torch, load_filepaths_and_text
from text import text_to_sequence, cleaned_text_to_sequence
class TextAudioLoader(torch.utils.data.Dataset):
"""
1) loads audio, text pairs
2) normalizes text and converts them to sequences of integers
3) computes spectrograms from audio files.
"""
def __init__(self, audiopaths_and_text, hparams):
self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text)
self.text_cleaners = hparams.text_cleaners
self.max_wav_value = hparams.max_wav_value
self.sampling_rate = hparams.sampling_rate
self.filter_length = hparams.filter_length
self.hop_length = hparams.hop_length
self.win_length = hparams.win_length
self.sampling_rate = hparams.sampling_rate
self.cleaned_text = getattr(hparams, "cleaned_text", False)
self.add_blank = hparams.add_blank
self.min_text_len = getattr(hparams, "min_text_len", 1)
self.max_text_len = getattr(hparams, "max_text_len", 190)
random.seed(1234)
random.shuffle(self.audiopaths_and_text)
self._filter()
def _filter(self):
"""
Filter text & store spec lengths
"""
# Store spectrogram lengths for Bucketing
# wav_length ~= file_size / (wav_channels * Bytes per dim) = file_size / (1 * 2)
# spec_length = wav_length // hop_length
audiopaths_and_text_new = []
lengths = []
for audiopath, text in self.audiopaths_and_text:
if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
audiopaths_and_text_new.append([audiopath, text])
lengths.append(os.path.getsize(audiopath) // (2 * self.hop_length))
self.audiopaths_and_text = audiopaths_and_text_new
self.lengths = lengths
def get_audio_text_pair(self, audiopath_and_text):
# separate filename and text
audiopath, text = audiopath_and_text[0], audiopath_and_text[1]
text = self.get_text(text)
spec, wav = self.get_audio(audiopath)
return (text, spec, wav)
def get_audio(self, filename):
audio, sampling_rate = load_wav_to_torch(filename)
if sampling_rate != self.sampling_rate:
raise ValueError("{} {} SR doesn't match target {} SR".format(
sampling_rate, self.sampling_rate))
audio_norm = audio / self.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename):
spec = torch.load(spec_filename)
else:
spec = spectrogram_torch(audio_norm, self.filter_length,
self.sampling_rate, self.hop_length, self.win_length,
center=False)
spec = torch.squeeze(spec, 0)
torch.save(spec, spec_filename)
return spec, audio_norm
def get_text(self, text):
if self.cleaned_text:
text_norm = cleaned_text_to_sequence(text)
else:
text_norm = text_to_sequence(text, self.text_cleaners)
if self.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def __getitem__(self, index):
return self.get_audio_text_pair(self.audiopaths_and_text[index])
def __len__(self):
return len(self.audiopaths_and_text)
class TextAudioCollate():
""" Zero-pads model inputs and targets
"""
def __init__(self, return_ids=False):
self.return_ids = return_ids
def __call__(self, batch):
"""Collate's training batch from normalized text and aduio
PARAMS
------
batch: [text_normalized, spec_normalized, wav_normalized]
"""
# Right zero-pad all one-hot text sequences to max input length
_, ids_sorted_decreasing = torch.sort(
torch.LongTensor([x[1].size(1) for x in batch]),
dim=0, descending=True)
max_text_len = max([len(x[0]) for x in batch])
max_spec_len = max([x[1].size(1) for x in batch])
max_wav_len = max([x[2].size(1) for x in batch])
text_lengths = torch.LongTensor(len(batch))
spec_lengths = torch.LongTensor(len(batch))
wav_lengths = torch.LongTensor(len(batch))
text_padded = torch.LongTensor(len(batch), max_text_len)
spec_padded = torch.FloatTensor(len(batch), batch[0][1].size(0), max_spec_len)
wav_padded = torch.FloatTensor(len(batch), 1, max_wav_len)
text_padded.zero_()
spec_padded.zero_()
wav_padded.zero_()
for i in range(len(ids_sorted_decreasing)):
row = batch[ids_sorted_decreasing[i]]
text = row[0]
text_padded[i, :text.size(0)] = text
text_lengths[i] = text.size(0)
spec = row[1]
spec_padded[i, :, :spec.size(1)] = spec
spec_lengths[i] = spec.size(1)
wav = row[2]
wav_padded[i, :, :wav.size(1)] = wav
wav_lengths[i] = wav.size(1)
if self.return_ids:
return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, ids_sorted_decreasing
return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths
"""Multi speaker version"""
class TextAudioSpeakerLoader(torch.utils.data.Dataset):
"""
1) loads audio, speaker_id, text pairs
2) normalizes text and converts them to sequences of integers
3) computes spectrograms from audio files.
"""
def __init__(self, audiopaths_sid_text, hparams):
self.audiopaths_sid_text = load_filepaths_and_text(audiopaths_sid_text)
self.text_cleaners = hparams.text_cleaners
self.max_wav_value = hparams.max_wav_value
self.sampling_rate = hparams.sampling_rate
self.filter_length = hparams.filter_length
self.hop_length = hparams.hop_length
self.win_length = hparams.win_length
self.sampling_rate = hparams.sampling_rate
self.cleaned_text = getattr(hparams, "cleaned_text", False)
self.add_blank = hparams.add_blank
self.min_text_len = getattr(hparams, "min_text_len", 1)
self.max_text_len = getattr(hparams, "max_text_len", 190)
random.seed(1234)
random.shuffle(self.audiopaths_sid_text)
self._filter()
def _filter(self):
"""
Filter text & store spec lengths
"""
# Store spectrogram lengths for Bucketing
# wav_length ~= file_size / (wav_channels * Bytes per dim) = file_size / (1 * 2)
# spec_length = wav_length // hop_length
audiopaths_sid_text_new = []
lengths = []
for audiopath, sid, text in self.audiopaths_sid_text:
if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
audiopaths_sid_text_new.append([audiopath, sid, text])
lengths.append(os.path.getsize(audiopath) // (2 * self.hop_length))
self.audiopaths_sid_text = audiopaths_sid_text_new
self.lengths = lengths
def get_audio_text_speaker_pair(self, audiopath_sid_text):
# separate filename, speaker_id and text
audiopath, sid, text = audiopath_sid_text[0], audiopath_sid_text[1], audiopath_sid_text[2]
text = self.get_text(text)
spec, wav = self.get_audio(audiopath)
sid = self.get_sid(sid)
return (text, spec, wav, sid)
def get_audio(self, filename):
audio, sampling_rate = load_wav_to_torch(filename)
if sampling_rate != self.sampling_rate:
raise ValueError("{} {} SR doesn't match target {} SR".format(
sampling_rate, self.sampling_rate))
audio_norm = audio / self.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename):
spec = torch.load(spec_filename)
else:
spec = spectrogram_torch(audio_norm, self.filter_length,
self.sampling_rate, self.hop_length, self.win_length,
center=False)
spec = torch.squeeze(spec, 0)
torch.save(spec, spec_filename)
return spec, audio_norm
def get_text(self, text):
if self.cleaned_text:
text_norm = cleaned_text_to_sequence(text)
else:
text_norm = text_to_sequence(text, self.text_cleaners)
if self.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def get_sid(self, sid):
sid = torch.LongTensor([int(sid)])
return sid
def __getitem__(self, index):
return self.get_audio_text_speaker_pair(self.audiopaths_sid_text[index])
def __len__(self):
return len(self.audiopaths_sid_text)
class TextAudioSpeakerCollate():
""" Zero-pads model inputs and targets
"""
def __init__(self, return_ids=False):
self.return_ids = return_ids
def __call__(self, batch):
"""Collate's training batch from normalized text, audio and speaker identities
PARAMS
------
batch: [text_normalized, spec_normalized, wav_normalized, sid]
"""
# Right zero-pad all one-hot text sequences to max input length
_, ids_sorted_decreasing = torch.sort(
torch.LongTensor([x[1].size(1) for x in batch]),
dim=0, descending=True)
max_text_len = max([len(x[0]) for x in batch])
max_spec_len = max([x[1].size(1) for x in batch])
max_wav_len = max([x[2].size(1) for x in batch])
text_lengths = torch.LongTensor(len(batch))
spec_lengths = torch.LongTensor(len(batch))
wav_lengths = torch.LongTensor(len(batch))
sid = torch.LongTensor(len(batch))
text_padded = torch.LongTensor(len(batch), max_text_len)
spec_padded = torch.FloatTensor(len(batch), batch[0][1].size(0), max_spec_len)
wav_padded = torch.FloatTensor(len(batch), 1, max_wav_len)
text_padded.zero_()
spec_padded.zero_()
wav_padded.zero_()
for i in range(len(ids_sorted_decreasing)):
row = batch[ids_sorted_decreasing[i]]
text = row[0]
text_padded[i, :text.size(0)] = text
text_lengths[i] = text.size(0)
spec = row[1]
spec_padded[i, :, :spec.size(1)] = spec
spec_lengths[i] = spec.size(1)
wav = row[2]
wav_padded[i, :, :wav.size(1)] = wav
wav_lengths[i] = wav.size(1)
sid[i] = row[3]
if self.return_ids:
return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, sid, ids_sorted_decreasing
return text_padded, text_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, sid
class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
"""
Maintain similar input lengths in a batch.
Length groups are specified by boundaries.
Ex) boundaries = [b1, b2, b3] -> any batch is included either {x | b1 < length(x) <=b2} or {x | b2 < length(x) <= b3}.
It removes samples which are not included in the boundaries.
Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded.
"""
def __init__(self, dataset, batch_size, boundaries, num_replicas=None, rank=None, shuffle=True):
super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
self.lengths = dataset.lengths
self.batch_size = batch_size
self.boundaries = boundaries
self.buckets, self.num_samples_per_bucket = self._create_buckets()
self.total_size = sum(self.num_samples_per_bucket)
self.num_samples = self.total_size // self.num_replicas
def _create_buckets(self):
buckets = [[] for _ in range(len(self.boundaries) - 1)]
for i in range(len(self.lengths)):
length = self.lengths[i]
idx_bucket = self._bisect(length)
if idx_bucket != -1:
buckets[idx_bucket].append(i)
for i in range(len(buckets) - 1, 0, -1):
if len(buckets[i]) == 0:
buckets.pop(i)
self.boundaries.pop(i+1)
num_samples_per_bucket = []
for i in range(len(buckets)):
len_bucket = len(buckets[i])
total_batch_size = self.num_replicas * self.batch_size
rem = (total_batch_size - (len_bucket % total_batch_size)) % total_batch_size
num_samples_per_bucket.append(len_bucket + rem)
return buckets, num_samples_per_bucket
def __iter__(self):
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = []
if self.shuffle:
for bucket in self.buckets:
indices.append(torch.randperm(len(bucket), generator=g).tolist())
else:
for bucket in self.buckets:
indices.append(list(range(len(bucket))))
batches = []
for i in range(len(self.buckets)):
bucket = self.buckets[i]
len_bucket = len(bucket)
ids_bucket = indices[i]
num_samples_bucket = self.num_samples_per_bucket[i]
# add extra samples to make it evenly divisible
rem = num_samples_bucket - len_bucket
ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
# subsample
ids_bucket = ids_bucket[self.rank::self.num_replicas]
# batching
for j in range(len(ids_bucket) // self.batch_size):
batch = [bucket[idx] for idx in ids_bucket[j*self.batch_size:(j+1)*self.batch_size]]
batches.append(batch)
if self.shuffle:
batch_ids = torch.randperm(len(batches), generator=g).tolist()
batches = [batches[i] for i in batch_ids]
self.batches = batches
assert len(self.batches) * self.batch_size == self.num_samples
return iter(self.batches)
def _bisect(self, x, lo=0, hi=None):
if hi is None:
hi = len(self.boundaries) - 1
if hi > lo:
mid = (hi + lo) // 2
if self.boundaries[mid] < x and x <= self.boundaries[mid+1]:
return mid
elif x <= self.boundaries[mid]:
return self._bisect(x, lo, mid)
else:
return self._bisect(x, mid + 1, hi)
else:
return -1
def __len__(self):
return self.num_samples // self.batch_size

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,171 @@
voistock/5fc85bdd3e4186174f039dc3.wav|1913|[EN]Violetgrass is a plant that enjoys a moist environment and is best picked after it rains. If you should choose to pick any, be sure to store them appropriately.[EN]
voistock/5d5cbdbd3e418673514e7963.wav|980|[JA]やあ、助けを求めてるのはあんたか?私はボルチモア、通りすがりの重巡洋艦だ…と冗談はここまでにして…ボルチモア、着任した。指揮官、よろしく。[JA]
voistock/5db01fb23e418601be759a0a.wav|1105|[JA]なんでこんなこと…するのかって?[JA]
voistock/5d0cc70e3e4186510a13facb.wav|199|[JA]シャアッ![JA]
voistock/5fce68ac3e41860bff49cdd2.wav|1836|[EN]Whenceforth springs this call to mirth and misery, you ask?[EN]
voistock/5d4290093e4186733d61a4c2.wav|1600|[JA]見てください!こちらにはみずがめ座、そちらに見えるのが、おひつじ座そしてあちらが、やぎ座ですわ[JA]
voistock/5c9c8c0f3e41865571121807.wav|904|[JA]ちょっ……。あんたたち、何言ってんの!?[JA]
voistock/5caee0cd3e4186128b6760da.wav|902|[JA]でも……、きちんとしなきゃだめって言われた。[JA]
voistock/5c75216c3e4186273b657817.wav|253|[JA]強さに果てはない[JA]
voistock/5e54fff53e41867d0c1d23dc.wav|2809|[JA]プログラミング全然わかんないよ~![JA]
voistock/6253f8433e41861e59799f62.wav|2686|[JA]高校生活が始まり、最初に仲良くなったのは隣の席の阿波連さん最初は距離がはかれなくて戸惑うことがあったけど…最近だんだん彼女のことが少し分かってきた気がする[JA]
voistock/5d0c6b7d3e41865d7e564e8b.wav|280|[JA]んー、そりゃあ私は天才だからね。多くの人間に乞われ、求められたよ?……でも、その……なんだ? ここまで一人の人間に求められたのは、……間違いなく、これが初めてみたいなんだ[JA]
voistock/604b23b73e4186746c333db2.wav|1279|[JA]ツナ缶の味だ!はぐはぐっ!おわっ、今度はチーズオムレツ![JA]
voistock/5d15ce583e41865ac6038962.wav|128|[JA]作業がある。失礼[JA]
voistock/5d4186243e418673d87540f0.wav|1588|[JA]あっ、ごはんもちょうどたけたみたい♪にこにーがとくせいおにぎりをつくってあげるわね♪ボールに、ごはんとしおをいれて……と[JA]
voistock/5e26ed3f3e418644f941f4c4.wav|340|[JA]ちょっとご相談が、マスター。あの……葛飾北斎さんって画家の方、ずっと付いてくるんです。[JA]
voistock/5d418a733e418673d875426a.wav|1575|[JA]そのことをこれからも忘れず、みんな対等で……μ'sのますますの活躍を目指して日々精進に努めていきたいですね[JA]
voistock/6082815e3e418624672eef24.wav|1276|[JA]……そしたら、許してやらないことも、ない。[JA]
voistock/5fe2dbf33e41863fd745d144.wav|1291|[JA]なんだあのヤバイの!?[JA]
voistock/605190733e418665281cf275.wav|1291|[JA]…………言えません。[JA]
voistock/5ae944883e41867b2911ae26.wav|2039|[JA]おまえ…どうしてここへ?大丈夫か?危険な目に遭ったりは…そうか、よかった[JA]
voistock/5d15cfdd3e41865ac60389b8.wav|139|[JA]そうか……もう礼拝の時間か――[JA]
voistock/5d50db0d3e41866b8c4a6723.wav|977|[JA]指揮官さんと一緒にいると、心が温まりますわ。そう、私の大好きな紅茶によく似ています。永遠にこの時が続いて欲しいって思ってしまうのですが、欲張りでしょうか?[JA]
voistock/5d2872223e4186471e5549c4.wav|2782|[JA]お姉様とミヌゥが出るまでもなく、私が片付けてしまいたいね。[JA]
voistock/61f0ffa23e4186406e163a85.wav|1273|[JA]ご主人様は、学園長の秘密がお望みなのですか?[JA]
voistock/5bab39b43e4186363b7a5cde.wav|897|[JA]それ、ツッコミか……?[JA]
voistock/5d419aa13e418673d8754816.wav|1572|[JA]掴み取りに行きましょう!一緒に![JA]
voistock/5d42c1743e4186180544e1ce.wav|1578|[JA]なーんちゃって!それだけで終わるマリーではありません♪[JA]
voistock/5d42aea93e4186180544d76e.wav|1592|[JA]いい? この話は私とあなただけの秘密だからねうっかりしゃべったりしないでよ?…ついでだからもう少し話してあげるわ[JA]
voistock/5eb3d9ce3e418605a21669a7.wav|2471|[JA]お帰りなさいませ、ご主人様![JA]
voistock/61d675d03e418639955b2bd2.wav|1886|[JA]山を下りた時に様々な料理を食べたが、味はあまり覚えていない。ただ唯一忘れられないのは、かつて暮らした家で食べた一杯の麺…いや、これは覚えておくべきことではない。そう…覚えておくべきことではないんだ…[JA]
voistock/5d417efd3e418673d8753e64.wav|1572|[JA]ブックカバーを掛けておけばそれほど周囲の目を気にせず読めますし、大好きな本がすぐ手に取れる距離にある安心感と幸福感にも浸れます![JA]
voistock/62f486233e41862eae7d5514.wav|221|[JA]サンチョ「あの特異点での戦いをクリアしたマスターさんならば、言っても構わないでしょう。私はサンチョ・パンサですが、物語に出てくる、他の人物の統合概念でもあります。そして、彼が打ち勝てなかった、唯一の存在。現実という壁でもあるのです」[JA]
voistock/62f485953e41861abd0f64b0.wav|129|[JA]ふふっ…怖いもの知らず[JA]
voistock/5ca56ada3e41866dd26dab1c.wav|2810|[JA]うふふ・・・今日は大役だったもの。緊張の糸が切れたのかしらねー[JA]
voistock/606449473e418673de5fdce2.wav|1788|[EN]I have no idea.[EN]
voistock/5d09e5153e418622cf4659f9.wav|2833|[JA]海の家で食べる焼きそばって最高だよね~。そこで!その焼きそばを…餡かけ中華焼きそばにしたらどうかな?ビジネスチャンスだね![JA]
voistock/5d143ecf3e41867a9e7d2a46.wav|294|[JA]こりゃ。まめに市井の様子を見に行くのも為政者の務めじゃぞ[JA]
voistock/5f8684c43e41860fb7361412.wav|42|[JA]もう年末か。来年もよろしく[JA]
voistock/611f7bb03e41865c9668baee.wav|115|[JA]イベントは逃げませんが、終わります。悔いの無いよう、楽しんでおくべきかと[JA]
voistock/5d54b9e33e41865ff21bead8.wav|365|[JA]拙えおれの腕前で、お役に立てれば重畳サ。猫のノミ取り耳掃除、なんでもござれだァ。信州信濃の新蕎麦よりも、俺はマスターの側がいい、ってね。[JA]
voistock/5d42932d3e4186733d61a6b2.wav|1580|[JA]もうっ、なんで笑ってるにゃ~凛、誰かをケガさせちゃったかと思って、びっくりしたのに~でも、あなたが大丈夫でよかったにゃ[JA]
voistock/5d419b943e418673d875487a.wav|1578|[JA]そんな時は、マリーとアメリカを旅してみない?[JA]
voistock/5d3abd6e3e4186121716ecfb.wav|2805|[JA]実は水着というのは恐ろしく運動性能がいいかもしれないな。[JA]
voistock/5d0b24273e41863a640b6136.wav|229|[JA]余の見せ場だな![JA]
voistock/6134c88f3e41863b7a79e693.wav|1834|[EN]Paimon thinks she's pretty strange...[EN]
voistock/5c247ff03e41861753655ec8.wav|256|[JA]了解です[JA]
voistock/5d394d233e41862322197da7.wav|2160|[JA]ここの温泉に入れば、誰だって元気になるってもんさ!皆の笑顔が俺の力の源だ[JA]
voistock/5bbd5e6d3e4186395703898c.wav|2800|[JA]うわ!今のって魔女の反応ですよね。魔女がいるとみんなが危ないって知ってますから、お店のお手伝いの前にかたずけに行かないと[JA]
voistock/5bcd87d53e41866b287d6671.wav|2465|[JA]ったく、例によっていつものごとく目の前のことしか見えておらんな、この従僕は[JA]
voistock/62f486bb3e41863b6d4083ce.wav|174|[JA]ぐうぅぅぅぅっ![JA]
voistock/5d47d64b3e41866ad86e17ef.wav|996|[JA]しーきーかーん~誰も、戻ってないよ~[JA]
voistock/5d3049703e4186202e2be363.wav|2023|[JA]あれ?終わっちゃった?[JA]
voistock/5f741b0f3e418658246d5ba2.wav|30|[JA]カズ、決めるよ![JA]
voistock/611b1e823e4186190a779882.wav|2620|[JA]俺たちを創った奴はこう考えた。色んな奴がいた方が面白いってな。君たちユミルの民も同じさ。俺たちは求められたから存在する[JA]
voistock/5d42b2223e4186180544d96d.wav|1582|[JA]それなら、本番までにばっちり間に合うんやないかな[JA]
voistock/5d09d7fe3e418603a11f266a.wav|2786|[JA]アリナが作ったインスタレーション。参加してくれたよね。あれ無題だったけど、最終日にタイトルを付けた訳。[JA]
voistock/6114d29c3e41861e9e6222e7.wav|235|[JA]とぅ!ぜぇいっ![JA]
voistock/5c6679b33e418653ab2f91c6.wav|227|[JA]メジェド様に近ずいてゆく…私に何をせよとおっしゃるのですか?ネジェド様…[JA]
voistock/5df999983e418625a6300644.wav|2504|[JA]行くねん!定春![JA]
voistock/5fd1e4f43e41864f902a4a72.wav|1796|[EN]Oh right, you're still underage. Haha, such a shame, it truly is.[EN]
voistock/611f82543e41862d6a5cd47f.wav|228|[JA]エンジン「嫌いなもの?特にないよ。あー、でもキャプテン野郎はムカつくなー。あいつ、港に出るとこっそり甘いもの食べに行くんだぜ。アタシが岩塩舐めてエンジンの面倒見てるっていうのに。不公平にも程があるよな!?」[JA]
voistock/5f97ad543e418620d0424252.wav|29|[JA]今日の晩ご飯は恵方巻きを作ろうって言ってるんだけど、いっぱい具を詰めておっきいのがいいよね!あ、でも、みんな最後まで静かに食べきれるかなぁ[JA]
voistock/61c1503d3e41865afd0cef13.wav|1903|[EN]Hey! Raiden Shogun, is that you? Well guess what — you're not the boss of me. Uh-huh, that's right, you heard me! Go ahead, strike me down![EN]
voistock/62b59a9f3e41865a9851d662.wav|602|[JA]ボンドマンのピストル何パスのやつ?[JA]
voistock/61024df83e41860d3f361752.wav|1284|[JA]なんだあの3馬鹿以外にも、頭にイソギンチャクを生やした奴らがたくさんやがるぞ[JA]
voistock/5d42dd5b3e4186180544f167.wav|1578|[JA]いつか、Aqoursのステージをみんなに観てもらって、みんなにキラキラハッピースマイルを届けたいって思っているわ[JA]
voistock/61836b983e41863cb64eceb2.wav|1827|[EN]Here's backup.[EN]
voistock/5ea8c53a3e418644af2625e4.wav|325|[JA]うっ……ううっ!ごめん、ごめん!ちょっと待った!二人きりの時に密着するの、禁止~![JA]
voistock/5da951d33e41862a5821b4a2.wav|2086|[JA]早起き…うっ…つらい…お前も早く起きろ![JA]
voistock/5db03f3d3e4186058b39a500.wav|2653|[JA]削り殺し!手間をかけさせるな![JA]
voistock/5d429f573e4186733d61ae3f.wav|1582|[JA]みらいのじぶんからメッセージをうけとる…みらいのじぶんのすがたをカードにうつすんだねおもしろいなぁ~[JA]
voistock/5d54bd2b3e41865fea61b805.wav|165|[JA]でも、今から予告状を出すとすれば…[JA]
voistock/5dab4a273e41866cb40fed73.wav|413|[JA]超尾獣螺旋手裏剣[JA]
voistock/5c496d313e4186140613c012.wav|2154|[JA]さあ、もっと苦しめ…![JA]
voistock/5d42e42b3e4186180544f4e8.wav|1582|[JA]あ、でも暗闇の中で走っていくのとか、水の上を歩いちゃうのとかも楽しそうやな……[JA]
voistock/5d42900d3e4186733d61a4c5.wav|1600|[JA]それじゃあ、あなたは何座になりますの?ほら、こちらに座って一緒に探してみましょう♪[JA]
voistock/5aeab58b3e41861abe14fb62.wav|2067|[JA]この力も、姿も、全て貴方のものです[JA]
voistock/5d08b38f3e41867f547dc1a8.wav|201|[JA]無念はない。ありがとう。[JA]
voistock/5d42dd933e4186180544f192.wav|1600|[JA]はじめまして、黒澤ダイヤと申しますここ浦の星女学院で生徒会長をしておりますわ[JA]
voistock/60f136ad3e41860f0e1c1b75.wav|1302|[JA]何故……?理由なんか聞いてどうする。俺を叱って、慰めてくれるって?[JA]
voistock/5d419dde3e418673d875493e.wav|1589|[JA]キミの好きな星って、どれかしら?私は……あっ! あれを見てあの明るい一等星、分かる?[JA]
voistock/5d42dda23e4186180544f19b.wav|1600|[JA]うふふっ私も、もしかしたら既にスクールアイドルの魔法にかかっているのかもしれませんね♪[JA]
voistock/5d0c8d173e418674941f8039.wav|150|[JA]おお、エリザよ。死んでしまうとは……ばたっ[JA]
voistock/61c044c93e41861c8472d972.wav|1902|[JA]虫相撲をなめるんじゃねぇ!汗と涙の一騎打ちを乗り越え、自らを強くする…これぞ人生ってもんよ![JA]
voistock/5d429bf83e4186733d61ac54.wav|1586|[JA]えっ、そんなところにリトルデーモンがいるのかって?いっ、いるわよ!リトルデーモンは下界だけじゃなく地獄にもいるんだから![JA]
voistock/5aed81863e418643ae44971c.wav|782|[JA]ガーッハッハッハ[JA]
voistock/5c2327bd3e4186243344bb4d.wav|154|[JA]ぜひじっくりとお話ししたいわ[JA]
voistock/5d5cc1843e4186077d72daa8.wav|1004|[JA]ときには派手にやることも必要です…![JA]
voistock/5d42965c3e4186733d61a8c9.wav|1573|[JA]マシュマロだって、手作りできるよ〜え、材料が想像つかない?[JA]
voistock/5d42e65f3e4186180544f60b.wav|1575|[JA]……あなたも穂乃果に言ってやってください1000と言われたのに、100泳いで一人で勝手におしまいにしてしまうんですから[JA]
voistock/5df74fe23e4186245c282d02.wav|857|[JA]あれー?先輩どうしたんですかー?[JA]
voistock/5d0b6ab43e418638bd40103a.wav|388|[JA]血路を開きます[JA]
voistock/6018b6da3e41865e411c94e2.wav|2718|[JA]温かい…ごめんなさい!全部…全部…僕が悪かったんだ。父さんと母さんと…同じ所へ行かないよね…?[JA]
voistock/5f7d48a33e418662fe7c93f2.wav|41|[JA]ワクワクが止まらないよ~!![JA]
voistock/630dae523e418665247b3222.wav|1344|[JA]うまく逃げるんだ!!ぜったいセルに吸収されないでくれ。たのむ!![JA]
voistock/5e2fabcf3e41867d9a77ef7b.wav|2801|[JA]お前さんのこと考えながら座禅してたから、狭窄で叩かれまくったんだぞ~![JA]
voistock/5d4178dd3e418673d8753c09.wav|1596|[JA]あんなに飛ばせるなんて、きっとあなたにはビー玉を飛ばす才能があるんだね![JA]
voistock/5d0cd0a53e418659c0492ae5.wav|121|[JA]円卓の騎士たちにもおみやげを買ってあげたいところですね。皆お揃いのアロハシャツなどどうでしょう?喜ぶ姿が目に浮かぶようです[JA]
voistock/5d417c8d3e418673d8753d8e.wav|1592|[JA]それじゃあお礼になってない?もう、私がいいって言ってるんだからいいのよ![JA]
voistock/5d0b23693e41863a640b610e.wav|250|[JA]おらおらおら、どしたどした![JA]
voistock/60af554d3e41867e7604eb1c.wav|1289|[JA]君たちに頼みたいことがあって来ました。立ち話もなんですから、中へ。[JA]
voistock/5db91be13e41865ffc326dc0.wav|2794|[JA]私も子供たちに混じってお菓子もらえるかな[JA]
voistock/6321789c3e41862997182491.wav|204|[JA]よくやるものだ。呆れたぞ。……いや、褒めているのだ。本当に。よく私に付き合ってくれる[JA]
voistock/5d0cd0d43e418659c0492aed.wav|121|[JA]面白みのない提案ではありませんでしたか?……感謝します、マスター[JA]
voistock/614067333e4186651d77e8a2.wav|1857|[EN]Hey![EN]
voistock/603887913e41865a00249884.wav|2776|[EN]I think I'm gonna use my free time to make some new clothes. It'll be gothic lolita style. Maybe I'll add some bandages, fake blood, and scars to give it that extra gurokawa feeling I'm so into![EN]
voistock/5f9ff8d13e4186093260bdf5.wav|2876|[JA]ついこないだ入ったやつが寝ぼけてんじゃねーよそもそも練習中だって俺に勝てねえのにどういうつもりだ[JA]
voistock/60e2c3e03e41861eba386192.wav|1278|[JA]なんだ!?急に大声出して。[JA]
voistock/61c066a93e41863d0300e072.wav|1902|[JA]疲れたんなら寝ろ、無理はするな。俺様か?俺様は目を開けてても寝れるんだ、へへへへッ。[JA]
voistock/5d42d5573e4186180544ed23.wav|1600|[JA]Aqoursを多くの人に知っていただくために、ここで完璧なパフォーマンスをしておきたいところです[JA]
voistock/5d42bbb63e4186180544decc.wav|1586|[JA]あなたにも分けてあげるわ一緒に月夜を、祝いましょ♪[JA]
voistock/5a9c968c3e41865b621d249b.wav|1580|[JA]凛がもう一回、ギュ!ってしてあげるにゃ![JA]
voistock/5d42b8ad3e4186180544dd30.wav|1601|[JA]な、な、何か、何か、えっと……あっ、な、何も無い……![JA]
voistock/611f7a123e41863acf22d3aa.wav|318|[JA]レベルアップですか。悪くありません。身体がビルドされるのは良い事です[JA]
voistock/5e27b9bb3e41860f4e19ac6f.wav|261|[JA]教えてくれてありがたいっす、マスター![JA]
voistock/5db7f1903e418620c94517e5.wav|2471|[JA]こんなのは大体見ればわかるでしょ?[JA]
voistock/5d0b17623e4186656a1e9670.wav|240|[JA]大英雄と呼ばれた私は確かに、多くの邪悪な怪物を打ち倒したが、私一人ではエリンを守り抜く事はできなかったろう……。我が騎士たちあってこそだ[JA]
voistock/5f4efe1b3e418606150a88fd.wav|343|[JA]これも一つの夏の怪談。人魚の戯れ、お付き合い下さいませ[JA]
voistock/5d3010a43e4186026d1c358d.wav|1759|[JA]はぁ、ま、それはともかく彼女のあの見事な絵を描きあげる創作へのモチベーションが幼馴染との仲を引き裂いた世間への復讐だったなんてね。[JA]
voistock/5fce096e3e41860a1d5742b3.wav|1836|[EN]Ahh—Atchoo!!![EN]
voistock/5d0b28f43e418651b94ae52d.wav|322|[JA]頑張ります![JA]
voistock/63217d463e418641f1166a1f.wav|237|[JA]休憩です[JA]
voistock/5e54ffe73e41867d0c1d23d8.wav|2809|[JA]話しかける勇気があれば、編み物の話とか…できるのかな~?[JA]
voistock/5acdd3073e41860b620bcda7.wav|540|[JA]初めから全開だ![JA]
voistock/5e7c27e03e41866f8f150c96.wav|595|[JA]普通に…、友達として、話してくれたらいいよ。最初はぎこちないと思うし、何日もかかるかもしれないけど…![JA]
voistock/6143088d3e41865634733c04.wav|1916|[EN]I'm fearful because of what I witnessed five hundred years ago — her demise and that thing. But, if... If it were you, everyone would be safe. You would've saved the world. Just like I cherish the memories of everything in Inazuma, if you remember me, I'll live forever.[EN]
voistock/5d429d5e3e4186733d61ad38.wav|1578|[JA]ひたすら辞書と教科書を見比べラリーするなんて退屈でおかしくなっちゃう![JA]
voistock/5d0c8cbd3e418674941f8021.wav|176|[JA]わわっ、すっごーい!頑張ったのね、やるぅ~[JA]
voistock/5eb2f4753e418604be7cd0b4.wav|2474|[JA]今日はここにお出かけするんだね…?[JA]
voistock/611f7dad3e4186041176b738.wav|304|[JA]ここに至るはあらゆる収斂……。縁を斬り、定めを斬り、業を斬り、我をも断たん、『無元の剣製』。即ち──宿業からの解放なり![JA]
voistock/5d418dff3e418673d87543af.wav|1592|[JA]……ふふっ♪ 今日は来てくれてありがとうもうすぐ乾杯の時間だから、なにか飲み物を選んでね[JA]
voistock/60ffac5c3e418662c76e9dd5.wav|1276|[JA]あっ、監督生!お前、監督生ならちゃんと寮生の管理しろ![JA]
voistock/5f6adb5a3e4186330e2d0402.wav|1258|[JA]「皐月葵」です。動物のお世話ならお任せください。……なんちゃって[JA]
voistock/62f4857a3e41861abd0f6471.wav|129|[JA]ふっ、やっ、とりゃー![JA]
voistock/5d37ab263e41865838251032.wav|925|[JA]私は悪い子じゃないですわよ。ただね、あなたに出会うまでは本気になったことないだけ。もうここまで話してやったから、最後まで付き合ってくれますよね?こ・ぶ・た・ちゃん?[JA]
voistock/612355843e41861e64636ab2.wav|654|[JA]料理なんて、レシピさえ把握すれば誰でも出来るもの。分量とタイミングを間違えなければいいの。そっ、変な所で突っかかったりしなければ、ね[JA]
voistock/62f4886f3e418656c835bd85.wav|331|[JA]化けて出てやるぅー[JA]
voistock/6075359f3e4186337a3215a2.wav|1782|[EN]ou have come here, you have seen our secrets. For this, you must pay the price.[EN]
voistock/5d42e11e3e4186180544f35e.wav|1575|[JA]またこういったアトラクションがあったらいっしょに行きませんか?あなたとなら、どんなところも大丈夫な気がするんです![JA]
voistock/5d42db5c3e4186180544f069.wav|1596|[JA]もちろん、その思い出の中にあなたもいるよずっとずっとμ'sのみんなを支えてくれたよね[JA]
voistock/601ba30c3e41867247377052.wav|1212|[JA]最後まで希望を捨てちゃいかん。あきらめたら、そこで試合終了だよ[JA]
voistock/5d42d9a23e4186180544ef78.wav|1596|[JA]一番大事なのは、ファンのみんなに喜んでもらうことだから……[JA]
voistock/6215e4af3e4186482e6674d3.wav|413|[JA]だってばよ[JA]
voistock/6216dfc63e418637122aa398.wav|258|[JA]こんにちは、ディルムッド・オディナ。槍の調子はどうでしょうか?あなたには、一つだけ──略奪愛は良くないと思います[JA]
voistock/5d41981c3e418673d8754730.wav|1578|[JA]あなたも一緒にやってみるうふふっ、9人でもなかなか決着がつかないから10人だと益々エキサイティングなじゃんけんになりそうね[JA]
voistock/5d105edd3e41860b1a5ecbd3.wav|102|[JA]はぁ……。隙を見て豚にしていじめる予定でしたけど、プラン変更です。物好きな変態マスターとかねーですから、もう少しだけサーヴァントごっこをしてあげます。ちゃあんとお世話してあげますから良き人類であってくださいね?まぁ、それはそれとして最後は豚にしますけどぉ。何故?って女神ペレの恋人は……[JA]
voistock/5fc418423e418629d762cc03.wav|1412|[JA]自分のことなら家なんかなくても俺が決めてやる[JA]
voistock/615eb3ae3e41861c717f9e82.wav|1872|[EN]Almost. Got. Away...[EN]
voistock/5d42b5b63e4186180544db82.wav|1588|[JA]うふふっ、言わなくてもわかってるわよ~…えっ、違う? いつものアレをやって…?も~、そんなに中毒性が高かったかしら[JA]
voistock/5f4498783e418663cf4a92e3.wav|114|[JA]本当は恥ずかしいのだけれど、マスターが気に入ってくださったら嬉しいなぁ。[JA]
voistock/5c4707083e418649c730d47d.wav|338|[JA]ふん…奇異な縁もあったものよ。三厳であればさと楽しげに笑ったろうが、私には敵うべくもない[JA]
voistock/629c4fa43e41865740330b32.wav|2220|[JA]聞いた?ウチの機転のおかげで助かったんだよ!今回は迷惑かけずにみんなを救えた!うふふ。謙遜はしないよ。ウチってばさすがっ![JA]
voistock/5d0b0fdd3e4186656a1e9571.wav|117|[JA]では、こちらを[JA]
voistock/5c208ebd3e4186084b621344.wav|375|[JA]エ・ク・ス、、、カリバー![JA]
voistock/6010f60e3e41860eda6c8d64.wav|1276|[JA]あははっ。頑張れよ、監督生どの![JA]
voistock/5d4178cd3e418673d8753c02.wav|1596|[JA]ラムネ飲み終わったから海未ちゃんとことりちゃんと、ビー玉で遊んでたんだ[JA]
voistock/5d42db743e4186180544f074.wav|1596|[JA]さあ、練習始めるよ~!! あれ、みんなは!?えっ、もうジョギング行っちゃったの!?それを早く言ってよ~!![JA]
voistock/5d1445d83e4186650c670e71.wav|152|[JA]私にあるのは可愛げのない筋肉と、面倒くさい女王の矜持だけだぞ。こんな愛想も面白みもないサーヴァントにかかずらっていていいのか。物好きな人間だな、貴様は[JA]
voistock/5ae00dc43e418645b2578116.wav|2094|[JA]おおっ!?プレゼントか!?[JA]
voistock/5bac510c3e4186624f2359a5.wav|2799|[JA]人に見られたくない物ってたくさんあるよね[JA]
voistock/5f98fc733e41866cae309722.wav|36|[JA]あけましておめでとう。年の初めは気合いが入るな[JA]
voistock/5d089de23e418635ac221845.wav|256|[JA]お任せください![JA]
voistock/5d42b0c23e4186180544d89f.wav|1580|[JA]でも、こうしてショートパンツを隠しちゃうと~…かよちんのスカートにそっくりなんだよ♪ちょっとお揃いってカンジでテンション上がるにゃ~♪[JA]
voistock/5db826bb3e418643f672da3c.wav|2474|[JA]この子は知らない。[JA]
voistock/62b3e53c3e4186523426a8ef.wav|1851|[JA]たとえラーメンや餃子などは満腹感を得られるものを食べた直後だとしても最後はやはりご飯いっぱいで閉めた[JA]
voistock/5d42e5fe3e4186180544f5d5.wav|1596|[JA]100じゃなくて、1000泳がなくちゃいけなかったんだってちゃんと先生の話を聞きなさいって言われちゃった……反省してます……[JA]
voistock/5d4189413e418673d8754209.wav|1600|[JA]ですから、本物の花であっても難しいということはないでしょう?[JA]
voistock/5f8697903e41864c6726c723.wav|42|[JA]行くぞ、北門[JA]
voistock/5ca48f2a3e418637bb64af13.wav|2810|[JA]これで作戦通りねー[JA]
voistock/5ffc28a33e418642b56cacd2.wav|2596|[JA]じゃあ任せる。だが無理はするなよ![JA]

View File

@ -0,0 +1,171 @@
voistock/5fc85bdd3e4186174f039dc3.wav|1913|violetgɹass* ɪz ə plænt ðət ˌɛnˈdʒɔɪz ə mɔɪst ɪnˈvaɪɹənmənt ənd ɪz bɛst pɪkt ˈæftəɹ ɪt ɹeɪnz. ɪf ju ʃʊd tʃuz tɪ pɪk ˈɛni, bi ʃʊɹ tɪ stɔɹ ðɛm əˈpɹoʊpɹiɪtli.
voistock/5d5cbdbd3e418673514e7963.wav|980|ja↓a, ta↑sɯ*ke↓o mo↑tomete↓ɾɯ no↑wa a↓ntaka? wa↑taʃiwa bo↑ɾɯtʃi↓moa, to↑oɾisɯgaɾino dʑɯɯɯ↓Njookanda… t o dʑo↑oda↓Nwa ko↑koma↓den^i ʃi*↑te… bo↑ɾɯtʃi↓moa, tʃa↑kɯn^iN ʃi*↑ta. ʃi*↑ki↓kaN, jo↑ɾoʃi*kɯ.
voistock/5db01fb23e418601be759a0a.wav|1105|na↓nde ko↑nna ko↑to… sɯ↑ɾɯ no↑kat#te?
voistock/5d0cc70e3e4186510a13facb.wav|199|ʃ a a↓Q!
voistock/5fce68ac3e41860bff49cdd2.wav|1836|whencefoɹth* spəɹɪŋz ðɪs kɔɫ tɪ məɹθ ənd ˈmɪzəɹi, ju æsk?
voistock/5d4290093e4186733d61a4c2.wav|1600|mi↓te kɯ↑dasa↓i! ko↑tʃiɾan^iwa mi↑zɯgameza, so↑tʃiɾan^i mi↑e↓ɾɯ no↑ga, o↑hi*tsɯdʑiza so↑ʃi*te a↑tʃiɾaga, ja↑gizade↓sɯ*wa.
voistock/5c9c8c0f3e41865571121807.wav|904|tʃo↓Q……. a↑nta↓tatʃi, na↓n^i i↑t#te n↓no!?
voistock/5caee0cd3e4186128b6760da.wav|902|de↓mo……, ki*↑tʃi↓nto ʃi↑nakja da↑me↓t#te i↑waɾeta.
voistock/5c75216c3e4186273b657817.wav|253|tsɯ↑jo↓san^i ha↑te↓wa na↓i.
voistock/5e54fff53e41867d0c1d23dc.wav|2809|pɯ↑ɾogɯ↓ɾamiŋgɯ ze↑NzeN wa↑ka↓N na↓ijo~!
voistock/6253f8433e41861e59799f62.wav|2686|ko↑okoose↓ekatsɯga ha↑dʑimaɾi, sa↑iʃon^i na↓kajokɯ na↓t#ta no↑wa to↑naɾino se↓kino a↑haɾeNsaN sa↑iʃowa kjo↓ɾiga ha↑kaɾe↓nakɯ*te to↑mado↓ɯ ko↑to↓ga a↓t#takedo… sa↑ikiN da↑ndaN ka↓nodʑono ko↑to↓ga sɯ*↑ko↓ʃi wa↑ka↓t#te ki↓ta ki↑ga sɯ↑ɾɯ.
voistock/5d0c6b7d3e41865d7e564e8b.wav|280|N↑N, so↑ɾja a↓wataʃiwa te↑Nsaida↓kaɾane. o↓okɯno n^i↑ŋgenn^i ko↓waɾe, mo↑tome↓ɾaɾetajo?…… de↓mo, so↑no…… na↓nda? ko↑koma↓de hi*↑to↓ɾino n^i↑ŋgenn^i mo↑tome↓ɾaɾeta no↑wa,…… ma↑tʃiga↓inakɯ, ko↑ɾega ha↑dʑi↓mete mi↓taina n↓da.
voistock/604b23b73e4186746c333db2.wav|1279|tsɯ↑nakanno a↑dʑida! ha↓gɯhagɯ Q! o↑waQ, ko↓ndowa tʃi↑izɯo↓mɯɾetsɯ!
voistock/5d15ce583e41865ac6038962.wav|128|sa↓gjooga a↓ɾɯ. ʃi*↑tsɯ↓ɾee.
voistock/5d4186243e418673d87540f0.wav|1588|a↓Q, go↓hammo tʃo↑odo ta↑ke↓ta mi↓tai n^i↑ko↓n^iiga to↑kɯ*seion^i↓giɾio tsɯ*↑kɯ↓t#te a↑geɾɯ↓wane bo↑oɾɯn^i, go↓hanto ʃi↑o↓o i↑ɾete…… t o.
voistock/5e26ed3f3e418644f941f4c4.wav|340|tʃo↓t#to go↑so↓odaŋga, ma↓sɯ*taa. a↑no…… ka↑tsɯ↓ʃi*ka ho↓kɯ*saisant#te ga↑kano ho↓o, zɯ↑t#to tsɯ↓ite kɯ↓ɾɯ n↓desɯ*.
voistock/5d418a733e418673d875426a.wav|1575|so↑no ko↑to↓o ko↑ɾekaɾamo wa↑sɯɾe↓zɯ, mi↑nna ta↑itoode……m' e↓sɯ n o ma↑sɯ↓masɯno ka↑tsɯjakɯo me↑za↓ʃi*te çi↓bi ʃo↓odʑinn^i tsɯ*↑tome↓te i↑ki*ta↓idesɯ*ne.
voistock/6082815e3e418624672eef24.wav|1276|…… so↑ʃi*ta↓ɾa, jɯ↑ɾɯ↓ʃi*te ja↑ɾanai ko↑to↓mo, na↓i.
voistock/5fe2dbf33e41863fd745d144.wav|1291|na↓nda a↑no ja↑ba↓ino!?
voistock/605190733e418665281cf275.wav|1291|………… i↑emase↓N.
voistock/5ae944883e41867b2911ae26.wav|2039|o↑mae… do↓oʃi*te ko↑koe? da↑idʑo↓obɯka? ki*↑kenna me↓n^i a↑t#ta↓ɾiwa… so↑oka, jo↓kat#ta.
voistock/5d15cfdd3e41865ac60389b8.wav|139|so↑oka…… mo↓o ɾe↑ehaino dʑi↑kaŋka----
voistock/5d50db0d3e41866b8c4a6723.wav|977|ʃi*↑ki↓kaNsanto i↑sʃon^i i↑ɾɯto, ko↑ko↓ɾoga a↑tatamaɾima↓sɯ*wa. so↑o, wa↑taʃino da↓isɯ*kina ko↑otʃan^i jo↓kɯ n^i↑te i↑ma↓sɯ*. e↑eenn^i ko↑no to↑ki↓ga tsɯ↑zɯitehoʃiit#te o↑mo↓t#te ʃi↑maɯ no↑de↓sɯ*ga, jo↑kɯba↓ɾideʃooka?
voistock/5d2872223e4186471e5549c4.wav|2782|o↓anesamato mi↓nɯɯga de↓ɾɯmademo na↓kɯ, wa↑taʃiga ka↑tazɯke↓te ʃi↑maita↓ine.
voistock/61f0ffa23e4186406e163a85.wav|1273|go↑ʃɯ↓dʑiNsamawa, ga↑kɯen^↓tʃoono çi↑mitsɯga o↑no↓zomina no↑de↓sɯ*ka?
voistock/5bab39b43e4186363b7a5cde.wav|897|so↑ɾe, tsɯ↑k#komika……?
voistock/5d419aa13e418673d8754816.wav|1572|tsɯ*↑kamidoɾin^i i↑kimaʃo↓o! i↑sʃon^i!
voistock/5d42c1743e4186180544e1ce.wav|1578|na↓aN tʃa↑t#te! so↑ɾedakede o↑waɾɯ ma↑ɾiidewaaɾimase↓N.
voistock/5d42aea93e4186180544d76e.wav|1592|i↓i? ko↑no ha↑naʃi↓wa wa↑taʃi*to a↑natadakeno çi↑mitsɯda↓kaɾane ɯ↑k#ka↓ɾi ʃa↑bet#ta↓ɾi ʃi↑na↓idejo?… tsɯ↑ide da↓kaɾa mo↑osɯ*ko↓ʃi* ha↑na↓ʃi*te a↑geɾɯ↓wa.
voistock/5eb3d9ce3e418605a21669a7.wav|2471|o↑ka↓eɾi na↑saima↓se, go↑ʃɯ↓dʑiNsama!
voistock/61d675d03e418639955b2bd2.wav|1886|ja↑ma↓o o↑ɾi↓ta to↑ki↓n^i sa↑ma↓zamana ɾjo↓oɾio ta↑be↓taga, a↑dʑiwa a↑maɾi o↑boe↓te i↑nai. ta↓da jɯ↓iitsɯ wa↑sɯɾeɾaɾenai no↑wa, ka↓tsɯ*te kɯ↑ɾaʃi*ta i↑e↓de ta↑be↓ta i↑p#↓paino me↓N… i↑ja, ko↑ɾewa o↑boe↓te o↑kɯbe↓ki* ko↑to↓dewa na↓i. so↑o… o↑boe↓te o↑kɯbe↓ki* ko↑to↓dewa na↓i n↓da…
voistock/5d417efd3e418673d8753e64.wav|1572|bɯ↑k#kɯ*ka↓baao ka↑ke↓te o↑ke↓ba so↑ɾehodo ʃɯɯino me↓o ki↑n^i se↓zɯ jo↑mema↓sɯ*ʃi, da↓isɯ*kina ho↓ŋga sɯ↓gɯ te↓n^i to↑ɾe↓ɾɯ kjo↓ɾin^i a↓ɾɯ a↑Nʃiŋ↓kanto ko↑oɸɯ*kɯ↓kann^imo hi*↑taɾema↓sɯ*!
voistock/62f486233e41862eae7d5514.wav|221|sa↓n^tʃo[ a↑no to↑kɯi↓tendeno ta↑takaio kɯ↑ɾi↓a ʃi*↑ta ma↓sɯ*taasannaɾaba, i↑t#temo ka↑mawa↓naideʃoo. wa↑taʃiwa sa↓n^tʃopa↓Nsadesɯ*ga, mo↑noga↓taɾin^i de↑te kɯ↓ɾɯ, ta↓no dʑi↓mbɯtsɯno to↑ogooga↓inendemo a↑ɾima↓sɯ*. so↑ʃi*te, ka↓ɾega ɯ↑tʃi*kate↓nakat#ta, jɯ↓iitsɯno so↑Nzai. ge↑n^dʑitsɯ*to i↑ɯ ka↑bedemo a↓ɾɯ no↑de↓sɯ*].
voistock/62f485953e41861abd0f64b0.wav|129|ɸ ɯ ɸɯ↓Q… ko↑wa↓i mo↑no ʃi↑ɾa↓zɯ.
voistock/5ca56ada3e41866dd26dab1c.wav|2810|ɯ↑ɸɯɸɯkjo↓owa ta↑ijakɯdat#ta mo↑no. ki↑n^tʃoono i↓toga ki↑ɾe↓ta no↑kaʃiɾanee.
voistock/606449473e418673de5fdce2.wav|1788|aɪ hæv noʊ aɪˈdiə.
voistock/5d09e5153e418622cf4659f9.wav|2833|ɯ↓mino i↑e↓de ta↑be↓ɾɯ ja↑ki*sobat#te sa↑ikoodajone~. so↑kode! so↑no ja↑ki*sobao… a↑ŋkaketʃɯɯka↓jaki*soban^i ʃi*↑ta↓ɾa do↓okana? bi↑dʑinesɯ*tʃa↓Nsɯdane!
voistock/5d143ecf3e41867a9e7d2a46.wav|294|ko↓ɾja. ma↑men^i ʃi↓seino jo↑osɯo mi↑n^i i↑kɯ no↑mo i↑se↓eʃano tsɯ*↑tome↓dʑazo.
voistock/5f8684c43e41860fb7361412.wav|42|mo↓o ne↑mmatsɯ*ka. ɾa↑inemmo jo↑ɾoʃi*kɯ.
voistock/611f7bb03e41865c9668baee.wav|115|i↑bentowa n^i↑gemase↓ŋga, o↑waɾima↓sɯ*. kɯ↓ino na↓i jo↓o, ta↑noʃi↓nde o↑kɯbe↓ki*kato.
voistock/5d54b9e33e41865ff21bead8.wav|365|tsɯ*↑tane↓e o↑ɾeno ɯ↑demaede, o↑jakɯn^i ta↑te↓ɾeba tʃo↑odʑoosa. ne↓kono no↑mi↓toɾi mi↑miso↓odʑi, na↓ndemogozaɾeda a. ʃi↑Nʃɯɯʃi↓nanono ʃi↑Nso↓bajoɾimo, o↑ɾewa ma↓sɯ*taano ga↑waga i↓i, t#↑tene.
voistock/5d42932d3e4186733d61a6b2.wav|1580|mo↓o Q, na↓nde wa↑ɾat#te↓ɾɯn^a~ ɾi↓N, da↓ɾekao ke↑ga sa↑setʃat#ta↓kato o↑mo↓t#te, bi↑k#kɯ↓ɾi ʃi*↑ta↓non^i~ de↓mo, a↑na↓taga da↑idʑo↓obɯde jo↓kat#tan^a.
voistock/5d419b943e418673d875487a.wav|1578|so↑nna to↑ki↓wa, ma↓ɾiito a↑meɾikao ta↑bi ʃi*↑te mi↓nai?
voistock/5d3abd6e3e4186121716ecfb.wav|2805|dʑi↑tsɯ↓wa mi↑zɯgito i↑ɯ no↑wa o↑soɾoʃi↓kɯ ɯ↑ndoose↓enooga i↓ikamo ʃi↑ɾenai↓na.
voistock/5d0b24273e41863a640b6136.wav|229|jo↓no mi↑seba↓dana!
voistock/6134c88f3e41863b7a79e693.wav|1834|paimon* θɪŋks ʃiz ˈɪti stɹeɪndʒ…
voistock/5c247ff03e41861753655ec8.wav|256|ɾjo↑okaide↓sɯ*.
voistock/5d394d233e41862322197da7.wav|2160|ko↑kono o↑Nsenn^i ha↓iɾeba, da↓ɾedat#te ge↓ŋkin^i na↓ɾɯt#te mo↓Nsa! mi↑nano e↓gaoga o↑ɾeno tʃi*↑kaɾa↓no mi↑namotoda.
voistock/5bbd5e6d3e4186395703898c.wav|2800|ɯ↑wa! i↓manot#te ma↓dʑono ha↑nnoode↓sɯ*jone. ma↓dʑoga i↑ɾɯto mi↑nna↓ga a↑bɯnait#te ʃi↑t#tema↓sɯ*kaɾa, o↑miseno o↑tetsɯda↓ino ma↓en^i ka↑tazɯken^i i↑kanaito.
voistock/5bcd87d53e41866b287d6671.wav|2465|t#↑takɯ, ɾe↓en^i jo↑t#te i↓tsɯmonogotokɯ me↑noma↓eno ko↑to↓ʃi*ka mi↑e↓te o↑ɾa↓nna, ko↑no dʑɯɯbokɯwa.
voistock/62f486bb3e41863b6d4083ce.wav|174|gɯɯɯɯɯɯQ!
voistock/5d47d64b3e41866ad86e17ef.wav|996|ʃi↑ikiikaaN~ da↓ɾemo, mo↑dot#te↓naijo~
voistock/5d3049703e4186202e2be363.wav|2023|a↑ɾe? o↑wat#tʃat#ta?
voistock/5f741b0f3e418658246d5ba2.wav|30|ka↓zɯ, ki↑meɾɯjo!
voistock/611b1e823e4186190a779882.wav|2620|o↑ɾe↓tatʃio tsɯ*↑kɯ↓t#ta ja↓tsɯwa ko↓o ka↑ŋga↓eta. i↑ɾonna ja↓tsɯga i↑ta ho↓oga o↑moʃiɾo↓it#tena. ki↑mi↓tatʃijɯmiɾɯno mi↓mmo o↑nadʑi↓sa. o↑ɾe↓tatʃiwa mo↑tome↓ɾaɾetakaɾa so↑Nzai sɯ↑ɾɯ.
voistock/5d42b2223e4186180544d96d.wav|1582|so↑ɾena↓ɾa, ho↑mbamma↓den^i ba↑t#tʃi↓ɾi ma↑n^ia↓ɯ N↑ja↓naikana.
voistock/5d09d7fe3e418603a11f266a.wav|2786|a↑ɾi na↓ga tsɯ*↑kɯ↓t#ta i↑Nsɯ*taɾe↓eʃoN. sa↑ŋka ʃi*↑te kɯ↑ɾeta↓jone. a↑ɾemɯ↓daidat#takedo, sa↑iʃɯɯbin^i ta↓itoɾɯo tsɯ*↑ke↓ta wa↓ke.
voistock/6114d29c3e41861e9e6222e7.wav|235|t ɯ! z e e↓iQ!
voistock/5c6679b33e418653ab2f91c6.wav|227|me↓dʑedosaman^i ki↑Nzɯite jɯ↑kɯ… wa↑taʃin^i na↓n^io se↓joto o↑sʃa↓ɾɯ no↑de↓sɯ*ka? n e dʑe↓dosama…
voistock/5df999983e418625a6300644.wav|2504|i↑kɯ↓neN! te↑e↓haɾɯ!
voistock/5fd1e4f43e41864f902a4a72.wav|1796|oʊ ɹaɪt, jʊɹ stɪɫ ˈəndəɹɪdʒ. haha*, sətʃ ə ʃeɪm, ɪt ˈtɹuli ɪz.
voistock/611f82543e41862d6a5cd47f.wav|228|e↓n^dʑiN[ ki↑ɾaina mo↑no? to↓kɯn^i na↓ijo. a↓a, de↓mo kja↑pɯ*teNja↓ɾoowa mɯ↑katsɯ*kɯna↓a. a↑itsɯ, mi↑naton^i de↓ɾɯto ko↑sso↓ɾi a↑mai mo↑no ta↑ben^i i↑kɯ n↓daze. a↑taʃiga ga↑NeN na↑me↓te e↓n^dʑinno me↑ndo↓o mi↑te↓ɾɯt#te i↑ɯ↓non^i. ɸɯ*↑ko↓oheen^imo ho↑doga a↓ɾɯjona!?].
voistock/5f97ad543e418620d0424252.wav|29|kjo↓ono ba↑N go↓haNwa e↑hoomakio tsɯ*↑kɯ↓ɾoot#te i↑t#te↓ɾɯ n↓dakedo, i↓p#pai gɯ↑o tsɯ↑me↓te o↑k#ki↓i no↑ga i↓ijone! a, de↓mo, mi↑nnasa↓igomade ʃi↓zɯkan^i ta↑bekiɾe↓ɾɯkanaa.
voistock/61c1503d3e41865afd0cef13.wav|1903|heɪ! ɹaiden* ˈʃoʊgən, ɪz ðət ju? wɛɫ gɛs wət jʊɹ nɑt ðə bɔs əv mi. ˈəˈhə, ðæts ɹaɪt, ju həɹd mi! goʊ əˈhɛd, stɹaɪk mi daʊn!
voistock/62b59a9f3e41865a9851d662.wav|602|bo↑ndo↓manno pi*↑sɯtoɾɯnam↓pasɯno ja↓tsɯ?
voistock/61024df83e41860d3f361752.wav|1284|na↓nda!? a↑no↓saN ba↓ka i↓gain^imo, a↑tama↓n^i i↑sogi↓n^tʃakɯo ha↑ja↓ʃi*ta ja↓tsɯɾaga ta↑kɯ*saNjagaɾɯ↓zo!
voistock/5d42dd5b3e4186180544f167.wav|1578|i↓tsɯ*ka, e↑ekjɯɯoojɯɯaaɾɯesɯno sɯ*↑te↓edʑio mi↑nna↓n^i mi↓te mo↑ɾat#te, mi↑nna↓n^i ki↓ɾakiɾa ha↓p#pii sɯ↑ma↓iɾɯo to↑doketa↓it#te o↑mo↓t#te i↑ɾɯ↓wa.
voistock/61836b983e41863cb64eceb2.wav|1827|hɪɹz ˈbæˌkəp.
voistock/5ea8c53a3e418644af2625e4.wav|325|ɯ↓Q…… ɯɯQ! go↑meN, go↑meN! tʃo↓t#to ma↓t#ta! ɸɯ*↑taɾikiɾino to↑ki↓n^i mi↑t#tʃakɯ* sɯ↑ɾɯ↓no, ki↑Nʃi~!
voistock/5da951d33e41862a5821b4a2.wav|2086|ha↑ja↓oki… ɯ↓Q… tsɯ↑ɾai… o↑maemo ha↓jakɯokiɾo!
voistock/5db03f3d3e4186058b39a500.wav|2653|ke↑zɯɾi↓goɾoʃi! te↑ma↓o ka↑kesase↓ɾɯna!
voistock/5d429f573e4186733d61ae3f.wav|1582|mi↓ɾaino dʑi↑bɯŋkaɾa me↓sseedʑio ɯ↑keto↓ɾɯ… mi↓ɾaino dʑi↑bɯnno sɯ↓gatao ka↓adon^i ɯ↑tsɯ↓sɯ n↓dane o↑moʃiɾo↓inaa~
voistock/5d54bd2b3e41865fea61b805.wav|165|de↓mo, i↓makaɾa jo↑kokɯdʑooo da↓sɯ*to sɯ↑ɾe↓ba…
voistock/5dab4a273e41866cb40fed73.wav|413|tʃo↑oodʑɯɯ ɾa↑seNʃɯ↓ɾikeN.
voistock/5c496d313e4186140613c012.wav|2154|sa↓a, mo↓t#to kɯ↑ɾɯʃi↓me…!
voistock/5d42e42b3e4186180544f4e8.wav|1582|a, de↓mo kɯ↑ɾajamino na↓kade ha↑ʃi↓t#te i↑kɯ no↑toka, mi↑zɯno↓ɯeo a↑ɾɯitʃaɯ no↑tokamo ta↑noʃi↓soojana……
voistock/5d42900d3e4186733d61a4c5.wav|1600|so↑ɾedʑa↓a, a↑na↓tawa na↑Nzan^i na↑ɾima↓sɯ*no? ho↓ɾa, ko↑tʃiɾan^i sɯ↑wat#te i↑sʃon^i sa↑gaʃi*te mi↑maʃo↓o.
voistock/5aeab58b3e41861abe14fb62.wav|2067|ko↑no tʃi*↑kaɾa↓mo, sɯ↓gatamo, sɯ↓bete a↑na↓tano mo↑no↓desɯ*.
voistock/5d08b38f3e41867f547dc1a8.wav|201|mɯ↓neNwa na↓i. a↑ɾi↓gatoo.
voistock/5d42dd933e4186180544f192.wav|1600|ha↑dʑimema↓ʃi*te, kɯ↑ɾosawa da↓ijato mo↑oʃima↓sɯ* ko↑koɯɾano ho↑ʃidʑogakɯ↓inde se↑etoka↓itʃooo ʃi*↑te o↑ɾima↓sɯ*wa.
voistock/60f136ad3e41860f0e1c1b75.wav|1302|na↓ze……? ɾi↑jɯɯna↓ŋka ki↑ite do↓o sɯ↑ɾɯ. o↑ɾeo ʃi*↑kat#te, na↑gɯsamete kɯ↑ɾeɾɯt#te?
voistock/5d419dde3e418673d875493e.wav|1589|ki↑mino sɯ*↑ki↓na ho↑ʃit#te, do↓ɾekaʃiɾa? wa↑taʃiwa…… a↓Q! a↑ɾeo mi↓te a↑noakaɾɯi i↑t#to↓osee, wa↑ka↓ɾɯ?
voistock/5d42dda23e4186180544f19b.wav|1600|ɯ↑ɸɯ ɸɯ↓Q wa↑taʃimo, mo↓ʃi*ka ʃi*↑ta↓ɾa sɯ↓den^i sɯ*↑kɯɯɾɯa↓idoɾɯno ma↑hoon^i ka↑ka↓t#te i↑ɾɯ no↑ka↓mo ʃi↑ɾemase↓nne.
voistock/5d0c8d173e418674941f8039.wav|150|o↓o, e↓ɾizajo. ʃi↑nde ʃi↑maɯtowa…… ba↑ta↓Q.
voistock/61c044c93e41861c8472d972.wav|1902|mɯ↑ʃi*sɯ↓mooo na↑me↓ɾɯ n^↓dʑanee! a↓seto na↓midano i↑k#ki↓ɯtʃio no↑ɾikoe, mi↓zɯkaɾao tsɯ↑jo↓kɯ*sɯɾɯ… ko↑ɾezo dʑi↓Nseet#temoNjo!
voistock/5d429bf83e4186733d61ac54.wav|1586|e↓Q, so↑nna to↑koɾon^i ɾi↓toɾɯ de↓emoŋga i↑ɾɯ no↑kat#te? i↑Q, i↑ɾɯ↓wajo! ɾi↓toɾɯ de↓emoNwa ge↑kaidakedʑanakɯ dʑi↑gokɯn^imo i↑ɾɯ n↓dakaɾa!
voistock/5aed81863e418643ae44971c.wav|782|ga↓aQ ha↑Qha↓Qha.
voistock/5c2327bd3e4186243344bb4d.wav|154|ze↓çi dʑi↑k#kɯ↓ɾito o↑hanaʃi ʃi*↑ta↓iwa.
voistock/5d5cc1843e4186077d72daa8.wav|1004|to↑ki↓n^iwa ha↑de↓n^i ja↑ɾɯ ko↑to↓mo hi*↑tsɯjoode↓sɯ*…!
voistock/5d42965c3e4186733d61a8c9.wav|1573|ma↑ʃɯmaɾodat#te, te↑zɯ↓kɯɾi de↑ki↓ɾɯjo~ e, za↑iɾjo↓oga so↑ozoo tsɯ*↑ka↓nai?
voistock/5d42e65f3e4186180544f60b.wav|1575|…… a↑na↓tamo h on i i↑t#te ja↓t#te kɯ↑dasa↓i se↓N e↓mɯ t o i↑waɾeta↓non^i, ça↑kɯ e↓mɯ o↑jo↓ide hi*↑to↓ɾide ka↑t#ten^i o↑ʃimain^i ʃi*↑te ʃi↑maɯ n↓desɯ*kaɾa.
voistock/5df74fe23e4186245c282d02.wav|857|a↓ɾee? se↑mpai do↓o ʃi*↑ta n↓desɯ*kaa?
voistock/5d0b6ab43e418638bd40103a.wav|388|ke↓tsɯɾoo çi↑ɾakima↓sɯ*.
voistock/6018b6da3e41865e411c94e2.wav|2718|a↑tataka↓i… go↑mennasa↓i! ze↓mbɯ… ze↓mbɯ… bo↓kɯga wa↑ɾɯ↓kat#ta n↓da. to↓osanto ka↓asanto… o↑nadʑi to↑koɾoe i↑kanaijone…?
voistock/5f7d48a33e418662fe7c93f2.wav|41|wa↓kɯwakɯga to↑maɾanaijo~!!
voistock/630dae523e418665247b3222.wav|1344|ɯ↑ma↓kɯn^igeɾɯ n↓da!! ze↑t#tai se↓ɾɯn^i kjɯɯʃɯɯ sa↑ɾena↓ide kɯ↑ɾe. ta↑no↓mɯ!!
voistock/5e2fabcf3e41867d9a77ef7b.wav|2801|o↑maesanno ko↑to ka↑ŋgaena↓gaɾa za↑zeN ʃi*↑teta↓kaɾa, kjo↑osakɯde ta↑ta↓kaɾemakɯt#ta n↓dazo~!
voistock/5d4178dd3e418673d8753c09.wav|1596|a↑nnan^i to↑baseɾɯ↓nante, ki↑t#to a↑na↓tan^iwa bi↑idamao to↑basɯ sa↑inooga a↓ɾɯ n↓dane!
voistock/5d0cd0a53e418659c0492ae5.wav|121|e↑ntakɯno ki*↑ʃi↓tatʃin^imo o↑mijageo ka↑t#te a↑geta↓i to↑koɾode↓sɯ*ne. mi↑na o↑so↓ɾoino a↑ɾohaʃa↓tsɯnado do↓odeʃoo? jo↑ɾoko↓bɯ sɯ↓gataga me↓n^i ɯ↑kabɯ jo↓odesɯ*.
voistock/5d417c8d3e418673d8753d8e.wav|1592|so↑ɾedʑa↓a o↑ɾeen^i na↑t#te↓nai? mo↓o, wa↑taʃiga i↓it#te i↑t#te↓ɾɯ n↓dakaɾa i↓i no↑jo!
voistock/5d0b23693e41863a640b610e.wav|250|o↑ɾaoɾa↓oɾa, do↑ʃi*tado↓ʃi*ta!
voistock/60af554d3e41867e7604eb1c.wav|1289|ki↑mi↓tatʃin^i ta↑nomita↓i ko↑to↓ga a↓t#te ki↑ma↓ʃi*ta. ta↑tʃiba↓naʃimo na↓ndesɯ*kaɾa, na↓kae.
voistock/5db91be13e41865ffc326dc0.wav|2794|wa↑taʃimo ko↑domo↓tatʃin^i ma↑dʑi↓t#te o↓kaʃi mo↑ɾaeɾɯ↓kana.
voistock/6321789c3e41862997182491.wav|204|jo↓kɯ ja↑ɾɯ mo↑no↓da. a↑kiɾeta↓zo……. i↑ja, ho↑me↓te i↑ɾɯ no↑da. ho↑ntoon^i. jo↓kɯ wa↑taʃin^i tsɯ*↑kia↓t#te kɯ↑ɾeɾɯ.
voistock/5d0cd0d43e418659c0492aed.wav|121|o↑moʃiɾomino na↓i te↑eandewaaɾimase↓ndeʃi*taka?…… ka↓Nʃa ʃi↑ma↓sɯ*, ma↓sɯ*taa.
voistock/614067333e4186651d77e8a2.wav|1857|heɪ!
voistock/603887913e41865a00249884.wav|2776|aɪ θɪŋk əm ˈgɑnə juz maɪ fɹi taɪm tɪ meɪk səm nu kloʊðz. ˈɪtəɫ bi ˈgɑθɪk loʊˈlitə staɪɫ. ˈmeɪbi aɪɫ æd səm ˈbændɪɪz, feɪk bləd, ənd skɑɹz tɪ gɪv ɪt ðət ˈɛkstɹə guɹokawa* ˈfilɪŋ əm soʊ ˈɪntu!
voistock/5f9ff8d13e4186093260bdf5.wav|2876|tsɯ↓i ko↑naida ha↓it#ta ja↓tsɯga ne↑boke↓te n^↑dʑa↓neejo so↓mosomo ɾe↑Nʃɯɯɯɯdat#te o↑ɾen^i ka↑te↓neenon^i do↓ojɯɯ tsɯ↑moɾida.
voistock/60e2c3e03e41861eba386192.wav|1278|na↓nda!? kjɯɯn^i o↑ogo↓e da↓ʃi*te.
voistock/61c066a93e41863d0300e072.wav|1902|tsɯ*↑kaɾe↓ta n↓naɾa ne↑ɾo, mɯ↓ɾiwa sɯ↑ɾɯ↓na. o↑ɾesamaka? o↑ɾesamawa me↓o a↑ke↓tetemo ne↑ɾeɾɯ n↓da, he↑heheheQ.
voistock/5d42d5573e4186180544ed23.wav|1600|e↑ekjɯɯoojɯɯaaɾɯesɯo o↓okɯno hi*↑ton^i ʃi↑t#te i↑tadakɯ* ta↑me↓n^i, ko↑kode ka↑mpekina pa↑ɸo↓omaNsɯo ʃi*↑te o↑ki*ta↓i to↑koɾode↓sɯ*.
voistock/5d42bbb63e4186180544decc.wav|1586|a↑na↓tan^imo wa↑ke↓te a↑geɾɯ↓wa i↑sʃon^i tsɯ*↑ki↓joo, i↑waimaʃo.
voistock/5a9c968c3e41865b621d249b.wav|1580|ɾi↓ŋga mo↓o i↑k#↓kai, gj ɯ! t#↑te ʃi*↑te a↑geɾɯ↓n^a!
voistock/5d42b8ad3e4186180544dd30.wav|1601|n a, n a, na↓n^ika, na↓n^ika, e↑t#to…… a↓Q, n a, na↓n^imo na↓i……!
voistock/611f7a123e41863acf22d3aa.wav|318|ɾe↑beɾɯa↓p#pɯdesɯ*ka. wa↑ɾɯkɯaɾimase↓N. ʃi↓ntaiga bi↓ɾɯdo sa↑ɾeɾɯ no↑wa jo↓i ko↑to↓desɯ*.
voistock/5e27b9bb3e41860f4e19ac6f.wav|261|o↑ʃiete kɯ↑ɾete a↑ɾigata↓issɯ, ma↓sɯ*taa!
voistock/5db7f1903e418620c94517e5.wav|2471|ko↑nna no↑wa da↑itai mi↓ɾeba wa↑ka↓ɾɯdeʃo?
voistock/5d0b17623e4186656a1e9670.wav|240|da↑ie↓ejɯɯto jo↑baɾeta wa↑taʃiwa ta↓ʃi*kan^i, o↓okɯno dʑa↑akɯna ka↑ibɯtsɯo ɯ↑tʃi*tao↓ʃi*taga, wa↑taʃi* çi↑to↓ɾidewa e↑ɾi↓No ma↑moɾinɯ↓kɯ* ko↑to↓wa de↑kinakat#ta↓ɾoo……. wa↓ga ki*↑ʃi↓tatʃi a↓t#tekosoda.
voistock/5f4efe1b3e418606150a88fd.wav|343|ko↑ɾemo hi*↑to↓tsɯno na↑tsɯ↓no ka↑idaN. n^i↓ŋgjono dʑa↑ɾe, o↑tsɯ↓kiai kɯ↑dasaima↓se.
voistock/5d3010a43e4186026d1c358d.wav|1759|ha↓a, m a, so↑ɾewa to↓mokakɯ* ka↓nodʑono a↑no mi↓gotona e↓o e↑gakiage↓ɾɯ so↑osakɯeno mo↑tʃibeeʃoŋga o↑sanana↓dʑimitono na↓kao hi*↑kisa↓ita se↓keNeno ɸɯ*↑kɯʃɯɯdat#ta↓nantene.
voistock/5fce096e3e41860a1d5742b3.wav|1836|ahh--atchoo*!!!
voistock/5d0b28f43e418651b94ae52d.wav|322|ga↑mbaɾima↓sɯ*!
voistock/63217d463e418641f1166a1f.wav|237|kjɯɯkeede↓sɯ*.
voistock/5e54ffe73e41867d0c1d23d8.wav|2809|ha↑naʃi*kake↓ɾɯ jɯɯkiga a↑ɾe↓ba, a↑mi↓monono ha↑naʃi↓toka… de↑ki↓ɾɯ no↑kana~?
voistock/5acdd3073e41860b620bcda7.wav|540|ha↑dʑimekaɾa ze↑ŋkaida!
voistock/5e7c27e03e41866f8f150c96.wav|595|ɸɯ*↑tsɯɯn^i…, to↑modatʃi*to ʃi*↑te, ha↑na↓ʃi*te kɯ↑ɾeta↓ɾa i↓ijo. sa↑iʃowa gi↑kotʃinaito o↑mo↓ɯʃi, na↑n↓n^itʃimo ka↑ka↓ɾɯkamo ʃi↑ɾenai↓kedo…!
voistock/6143088d3e41865634733c04.wav|1916|əm ˈfɪɹfəɫ bɪˈkəz əv wət aɪ ˈwɪtnəst faɪv ˈhənəɹd jɪɹz əˈgoʊ həɹ dɪˈmaɪz ənd ðət θɪŋ. bət, ɪf… ɪf ɪt wəɹ ju, ˈɛvɹiˌwən wʊd bi seɪf. ju ˈwʊdəv seɪvd ðə wəɹɫd. dʒɪst laɪk aɪ ˈtʃɛɹɪʃ ðə ˈmɛməɹiz əv ˈɛvɹiˌθɪŋ ɪn inazuma*, ɪf ju ɹɪˈmɛmbəɹ mi, aɪɫ lɪv fəɹˈɛvəɹ.
voistock/5d429d5e3e4186733d61ad38.wav|1578|hi*↑tasɯɾa dʑi↓ʃoto kjo↑oka↓ʃoo mi↑kɯɾabe ɾa↓ɾii sɯ↑ɾɯ↓nante ta↑ikɯ*tsɯde o↑kaʃi*kɯnat#tʃaɯ!
voistock/5d0c8cbd3e418674941f8021.wav|176|w a wa↓Q, sɯ↑k#go↓oi! ga↑mba↓t#tanone, ja↑ɾɯɯ~
voistock/5eb2f4753e418604be7cd0b4.wav|2474|kjo↓owa ko↑kon^i o↑dekake sɯ↑ɾɯ n↓dane…?
voistock/611f7dad3e4186041176b738.wav|304|ko↑kon^i i↑ta↓ɾɯwa a↑ɾajɯ↓ɾɯ ʃɯɯɾeN……. e↓No ki↓ɾi, sa↑dame↓o ki↓ɾi, go↓oo ki↓ɾi, ga↑omo ta↑ta↓N,{ mɯ↑motono ke↑Nsee}. sɯ↑na↓watʃi-- ʃɯ*↑kɯgookaɾano ka↑ihoona↓ɾi!
voistock/5d418dff3e418673d87543af.wav|1592|…… ɸ ɯ ɸɯ↓Q kjo↓owa ki↓te kɯ↑ɾete a↑ɾi↓gatoo mo↑osɯ↓gɯ ka↑mpaino dʑi↑kanda↓kaɾa, na↓n^ika no↑mi↓monoo e↑ɾa↓ndene.
voistock/60ffac5c3e418662c76e9dd5.wav|1276|a↓Q, ka↑ntokɯ↓see! o↑mae, ka↑ntokɯ↓seenaɾa tʃa↑nto ɾjo↑oseeno ka↓Nɾi ʃi↑ɾo!
voistock/5f6adb5a3e4186330e2d0402.wav|1258|[ sa↑tsɯ*kia↓oi] de↓sɯ*. do↑obɯtsɯno o↑se↓wanaɾa o↑makasekɯdasai……. na↓N tʃa↑t#te.
voistock/62f4857a3e41861abd0f6471.wav|129|ɸɯ↓Q, ja↓Q, to↓ɾjaa!
voistock/5d37ab263e41865838251032.wav|925|wa↑taʃiwa wa↑ɾɯ↓i ko↑dʑanaidesɯ*↓wajo. ta↓dane, a↑na↓tan^i de↑a↓ɯmadewa ho↑ŋkin^i na↓t#ta ko↑to na↓idake. mo↓o ko↑koma↓de ha↑na↓ʃi*te ja↓t#takaɾa, sa↓igomade tsɯ*↑kia↓t#te kɯ↑ɾema↓sɯ*jone? k ob ɯtatʃaN?
voistock/612355843e41861e64636ab2.wav|654|ɾjo↓oɾinante, ɾe↓ʃi*pisae ha↑akɯ* sɯ↑ɾe↓ba da↓ɾedemo de↑ki↓ɾɯ mo↑no. bɯ↑Nɾjo↓oto ta↑imiŋgɯo ma↑tʃigae↓nakeɾeba i↓i n o. so↓Q, he↓nna to↑koɾode tsɯ↑k#kakat#ta↓ɾi ʃi↑nakeɾe↓ba, n e.
voistock/62f4886f3e418656c835bd85.wav|331|ba↑ke↓te de↑te ja↑ɾɯɯɯ.
voistock/6075359f3e4186337a3215a2.wav|1782|u hæv kəm hiɹ, ju hæv sin ɑɹ ˈsikɹɪts. fəɹ ðɪs, ju məst peɪ ðə pɹaɪs.
voistock/5d42e11e3e4186180544f35e.wav|1575|ma↑ta ko↓o i↑t#ta a↑toɾa↓kɯ*ʃoŋga a↓t#taɾa i↑sʃon^i i↑kimase↓ŋka? a↑na↓tatonaɾa, do↓nna to↑koɾomo da↑idʑo↓obɯna ki↑ga sɯ↑ɾɯ n↓desɯ*!
voistock/5d42db5c3e4186180544f069.wav|1596|mo↑tʃi↓ɾoN, so↑no o↑moideno na↓kan^i a↑na↓tamo i↑ɾɯjo zɯ↑t#to zɯ↑t#tom' e↓sɯ n o mi↑nna↓o sa↑sae↓te kɯ↑ɾeta↓jone.
voistock/601ba30c3e41867247377052.wav|1212|sa↓igomade ki↑booo sɯ*↑tetʃa i↑ka↓N. a↑kiɾame↓taɾa, so↑kode ʃi↑aiʃɯɯɾjoodajo.
voistock/5d42d9a23e4186180544ef78.wav|1596|i↑tʃi↓baN da↑idʑina no↑wa, ɸa↓nno mi↑nna↓n^i jo↑ɾoko↓nde mo↑ɾaɯ ko↑to↓dakaɾa……
voistock/6215e4af3e4186482e6674d3.wav|413|da↑t#tebajo.
voistock/6216dfc63e418637122aa398.wav|258|ko↑nn^itʃiwa, di↓ɾɯ mɯ↓Q d oo↓dina. ja↑ɾino tʃo↑oʃiwa do↓odeʃooka? a↑na↓tan^iwa, hi*↑totsɯdake-- ɾja↑kɯdatsɯ↓aiwa jo↓kɯnaito o↑moima↓sɯ*.
voistock/5d41981c3e418673d8754730.wav|1578|a↑na↓tamo i↑sʃon^i ja↓t#te mi↓ɾɯ? ɯ↑ɸɯ ɸɯ↓Q, kjɯɯ↓n^indemo na↑kanaka ke↑t#tʃakɯga tsɯ*↑ka↓naikaɾa dʑɯɯ↓n^indato ma↑sɯ↓masɯ e↑ki*sa↓itiŋgɯna dʑa↓ŋkenn^i na↑ɾi↓soone!
voistock/5d105edd3e41860b1a5ecbd3.wav|102|ha↓a……. sɯ*↑kio mi↓te bɯ↑tan^i ʃi*↑te i↑dʑimeɾɯ jo↑teedeʃi*ta↓kedo, pɯ↑ɾaNhe↓ŋkoodesɯ*. mo↑nozɯ↓kina he↑ntaima↓sɯ*taatokaneedesɯ*kaɾa, mo↑osɯ*ko↓ʃidake sa↑abantogo↓k#koo ʃi*↑te a↑gema↓sɯ*. tʃa↑anto o↑se↓wa ʃi*↑te a↑gema↓sɯ*kaɾa jo↓ki dʑi↓Nɾɯideat#te kɯ↑dasa↓ine? ma↓a, so↑ɾewa so↑ɾeto ʃi*↑te sa↓igowa bɯ↑tan^i ʃi↑ma↓sɯ*kedo o. na↓ze? t#↑te me↑gami↓peɾeno ko↑ibitowa……
voistock/5fc418423e418629d762cc03.wav|1412|dʑi↑bɯnno ko↑to↓naɾa i↑e↓naŋka na↓kɯ*temo o↑ɾega ki↑mete ja↑ɾɯ.
voistock/615eb3ae3e41861c717f9e82.wav|1872|ˈɔlˌmoʊst. gɑt. əˈweɪ
voistock/5d42b5b63e4186180544db82.wav|1588|ɯ↑ɸɯ ɸɯ↓Q, i↑wanakɯ*temo wa↑kat#te↓ɾɯwajo~… e↓Q, tʃi↑gaɯ? i↓tsɯmono a↑ɾeo ja↓t#te…? m o~, so↑nnan^i tʃɯɯdokɯ*seega ta↑ka↓kat#takaʃiɾa.
voistock/5f4498783e418663cf4a92e3.wav|114|ho↑ntoowa ha↑zɯkaʃi↓i no↑da↓keɾedo, ma↓sɯ*taaga ki↑n^ii↓t#te kɯ↑dasa↓t#taɾa ɯ↑ɾeʃi↓inaa.
voistock/5c4707083e418649c730d47d.wav|338|ɸɯ↑N… ki↓ina e↓mmo a↓t#ta mo↑no↓jo. mi↑tsɯ↓joʃideaɾeba sa↑to ta↑noʃigen^i wa↑ɾat#ta↓ɾooga, wa↑taʃin^iwa ka↑na↓ɯbekɯmo na↓i.
voistock/629c4fa43e41865740330b32.wav|2220|ki↑ita? ɯ↑tʃino ki*↑tenno o↑kagede ta↑sɯ*ka↓t#ta n↓dajo! ko↓ŋkaiwa me↓ewakɯ* ka↓kezɯn^i mi↑nna↓o sɯ*↑kɯeta! ɯ↑ɸɯɸɯ. ke↑NsoNwa ʃi↑naijo. ɯ↑tʃit#tebasasɯgaQ!
voistock/5d0b0fdd3e4186656a1e9571.wav|117|de↓wa, ko↑tʃiɾao.
voistock/5c208ebd3e4186084b621344.wav|375|ek ɯs ɯ,,, ka↑ɾi↓baa!
voistock/6010f60e3e41860eda6c8d64.wav|1276|a↑hahaQ. ga↑mba↓ɾejo, ka↑ntokɯ↓seedono!
voistock/5d4178cd3e418673d8753c02.wav|1596|ɾa↑mɯne no↓miowat#takaɾa ɯ↓mi m i tʃa↑nto ko↑to↓ɾi tʃa↑nto, bi↑idamade a↑sonde ta↓nda.
voistock/5d42db743e4186180544f074.wav|1596|sa↓a, ɾe↑Nʃɯɯ ha↑dʑimeɾɯjo~!! a↑ɾe, mi↑nna↓wa!? e↓Q, mo↓o dʑo↑giŋgɯ i↑t#tʃat#ta↓no!? so↑ɾeo ha↓jakɯit#tejo~!!
voistock/5d1445d83e4186650c670e71.wav|152|wa↑taʃin^i a↓ɾɯ no↑wa ka↑waigeno na↓i ki↓nn^ikɯ*to, me↑ndookɯ*sa↓i dʑo↑o↓ono kjo↑odʑidakeda↓zo. ko↑nna a↑iso↓mo o↑moʃiɾomimonai sa↓abanton^i ka↑kazɯɾa↓t#te i↑teii no↑ka. mo↑nozɯ↓kina n^i↑ŋgenda↓na, ki*↑samawa.
voistock/5ae00dc43e418645b2578116.wav|2094|o↑o↓Q!? pɯ↑ɾe↓zentoka!?
voistock/5bac510c3e4186624f2359a5.wav|2799|hi*↑ton^i mi↑ɾaɾetakɯ↓nai mo↑no↓t#te ta↑kɯ*saN a↓ɾɯjone.
voistock/5f98fc733e41866cae309722.wav|36|a↑kema↓ʃi*te o↑medetoo. to↑ʃi↓no ha↑dʑimewa ki↑aiga ha↓iɾɯna.
voistock/5d089de23e418635ac221845.wav|256|o↑makasekɯdasai!
voistock/5d42b0c23e4186180544d89f.wav|1580|de↓mo, ko↑oʃi*te ʃo↑otopa↓ntsɯo ka↑kɯ*ʃitʃaɯ↓to~… ka↓jotʃinno sɯ*↑ka↓aton^i so↑k#kɯ↓ɾina n↓dajo tʃo↓t#to o↑so↓ɾoit#te ka↑n^dʑide te↓NʃoN a↑gaɾɯ↓n^a~
voistock/5db826bb3e418643f672da3c.wav|2474|ko↑no ko↑wa ʃi↑ɾanai.
voistock/62b3e53c3e4186523426a8ef.wav|1851|ta↑toe ɾa↓ameNja gjo↑ozana↓dowa ma↑mpɯ*kɯ↓kaNo e↑ɾaɾe↓ɾɯ mo↑no↓o ta↑be↓ta tʃo↓kɯgodato ʃi*↑temo sa↓igowa ja↑ha↓ɾi go↑haNi↓p#paide ʃi↑me↓ta.
voistock/5d42e5fe3e4186180544f5d5.wav|1596|ça↑kɯ e↓mɯ dʑa↑nakɯ*te, se↓N e↓mɯ o↑joga↓nakɯ*tʃa i↑kenakat#ta n↓dat#te tʃa↑nto se↑Nse↓eno ha↑naʃi↓o ki*↑kinasait#te i↑waɾetʃat#ta…… ha↑Nsee ʃi*↑tema↓sɯ*……
voistock/5d4189413e418673d8754209.wav|1600|de↓sɯ*kaɾa, ho↑mmonono ha↑na↓deat#temo mɯ↑zɯkaʃi↓ito i↑ɯ ko↑to↓wanaideʃoo?
voistock/5f8697903e41864c6726c723.wav|42|i↑kɯ↓zo, ki*↑tamoN.
voistock/5ca48f2a3e418637bb64af13.wav|2810|ko↑ɾede sa↑kɯ*sendo↓oɾinee.
voistock/5ffc28a33e418642b56cacd2.wav|2596|dʑa↓a ma↑kase↓ɾɯ. da↓ga mɯ↓ɾiwa sɯ↑ɾɯ↓najo!

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
cjks/myvoice/00148.wav|23|[ZH]之前有一次,我[ZH][JA]病気にかかった[JA][ZH]……但哥哥没看出来……[ZH]
cjks/louise/0_10880.wav|3|[JA]すごい気合ね。……わたしももっとがんばるべきかしら。[JA]
cjks/SA1/sp009-003012.wav|17|[SA]तादृशैः लिखितम् नेत्रे निमिल्य कण्ठस्थीकृतवान् अस्ति भवान्[SA]
cjks/yos/yos113_077.wav|1|[JA]おやすみなさい[JA]
cjks/sua/0_s_1695.wav|12|[KO]지금은, 아무 말도 하지 말아요.[KO]
cjks/nen/nen112_129.wav|0|[JA]でも、そんな恥ずかしいことを言わないで下さい。照れますよぅ……[JA]
cjks/sua/0_s_1626.wav|12|[KO]도… 도련님… 이… 이러시면 아니 되어요… 소녀는… 소녀느은…….[KO]
cjks/sua/1_s_1032.wav|12|[KO]아…….[KO]
cjks/mimiru/0_m_395.wav|13|[KO]내 경계점이 아니었으면 방금 도령 얼굴에 구멍 났어!!![KO]
cjks/nen/nen102_009.wav|0|[JA]いえ、おそらく病気ではないと思うんですが……今朝から少し気持ちが悪くて……ずっと家で大人しく寝ていました[JA]
cjks/louise/2_09455.wav|3|[JA]あらそう?じゃあ、みんなが一斉にスカートをたくし上げたらサイトは誰を見るのかしら?[JA]
cjks/mom/MS01_MOM-0517.wav|5|[JA]……大好きです、リトさん[JA]
cjks/yam/MS04_YAM-0011.wav|4|[JA]……悪気のないことは、わかっています。しかし、悪気がないからこそ、真の脅威とも言えるのです[JA]
cjks/louise/2_07659.wav|3|[JA]それは、あの、あれだ、多分![JA]
cjks/louise/1_07889.wav|3|[JA]うるさいっ!少しは自分で考えなさいっ!このバカ犬っ![JA]
cjks/mom/SS01_MOM-0066.wav|5|[JA]い、いいから早く!追いつかれてしまいます!![JA]
cjks/louise/0_11529.wav|3|[JA]あ、あんた誰よ!?人の部屋に勝手に入って![JA]
cjks/louise/0_12023.wav|3|[JA]あ、あは、あはははは……![JA]
cjks/SA2/sp011-000175.wav|18|[SA]अस्तु नाम इति चिन्तयता शिवराजेन सः मार्गः एव निश्चितः पलायनाय[SA]
cjks/louise/0_09835.wav|3|[JA]……サイト。授業が終わったら、わたしにサムライについて教えなさい。[JA]
cjks/sua/1_s_980.wav|12|[KO]우후후후후후후후후~[KO]
cjks/louise/1_05594.wav|3|[JA]はあ、はあ、はあ……。[JA]
cjks/mik/MS10_MIK-0077.wav|6|[JA]ララさんの料理、すっごく味が濃そうだったからね。これぐらい薄味の方がいいと思って[JA]
cjks/louise/0_10726.wav|3|[JA]バカっ![JA]
cjks/louise/2_05665.wav|3|[JA]いえ、わたしの顔をじっと見ているようでしたから。なにかおかしなところがあるんでしょうか?[JA]
cjks/mimiru/0_m_198.wav|13|[KO]애를 반쯤 잡았던데?[KO]
cjks/nan/nan212_004.wav|2|[JA]無理せず、体調不良で休んだ方がいいんじゃない?[JA]
cjks/louise/0_10563.wav|3|[JA]ああああっ!ま、また!あんたはまたっ![JA]
cjks/yuhwa/1_u_1020.wav|15|[KO]됐고. 연화 언니로 변신해 보세요.[KO]
cjks/SA6/sp015-000374.wav|22|[SA]तेन तु वरदक्षिणाम् विना एव सा परिणेतव्या[SA]
cjks/mik/MS10_MIK-0031.wav|6|[JA]はぁ、家まで来るなんて……[JA]
cjks/louise/0_10836.wav|3|[JA]今度は涙ぐんでる。[JA]
cjks/sua/1_s_517.wav|12|[KO]소녀는 먼저 나가 있을게요~[KO]
cjks/SA5/sp014-000018.wav|21|[SA]पुनश्च तत् जेतुम् शक्यम्[SA]
cjks/yos/yos101_027.wav|1|[JA]誰も、そこまでは言ってません……っ[JA]
cjks/nan/nan009_067.wav|2|[JA]確認したけど、ネットワーク上のセキュリティも強化された様子はないよ[JA]
cjks/arin/1_a_361.wav|14|[KO]으… 응!!![KO]
cjks/sua/0_s_1813.wav|12|[KO]오빠 그렇게밖에 못해?[KO]
cjks/nen/nen117_195.wav|0|[JA]んんんんんぅぅぅぅぅ……ぅぅッ!はっ、はーっ、はーっ……はぁぁーーー……[JA]
cjks/louise/0_10978.wav|3|[JA]でしょう?だから、どんなことをすれば運動能力のアップにつながるのか聞いてきたの。[JA]
cjks/SA1/sp009-002328.wav|17|[SA]इति विविधाः भावनाः वर्षाकाले नदीप्रवाहाः इव तस्य हृदयम् पूरयन्ति स्म[SA]
cjks/louise/0_14038.wav|3|[JA]どうして、そんなにふらふらするの!?どうして、他の子に目が行くの!わたしだけを見ていればそれで充分でしょう!?[JA]
cjks/mik/MS03_MIK-0004.wav|6|[JA]だとすると……今まで体験してきたことをもう一度やれば、他の記憶も戻るかも?[JA]
cjks/sua/0_s_2370.wav|12|[KO]그러니까아아아…….[KO]
cjks/nan/nan211_191.wav|2|[JA]ダメ、ダメだよ、言わないで……嬉しくて、体、熱くなっちゃってっ[JA]
cjks/rong/cvrong06054.wav|7|[ZH]……嗯。[ZH]
cjks/nen/nen016_043.wav|0|[JA]ありがとうございます、大変参考になりました[JA]
cjks/nan/nan004_002.wav|2|[JA]それで用事ってなに?[JA]
cjks/sua/1_s_561.wav|12|[KO]아뇨!! 도련님!![KO]
cjks/yos/yos113_212.wav|1|[JA]はぁ、はぁ……んっ、はい、気持ちいいです、はぁ、はぁ、んっ、んんぁぁぁっ![JA]
cjks/yam/MS03_YAM-0038.wav|4|[JA]はい[JA]
cjks/louise/0_10485.wav|3|[JA]ちょ、ちょっと!なんでこれで解決したみたいな雰囲気になってるの!?[JA]
cjks/hl/ch11_hl_011.wav|10|[ZH]不震惊一下吗?[ZH]
cjks/nan/nan206_018.wav|2|[JA]驚きの顔。まさかサトル君からデートに誘ってくれるなんて……[JA]
cjks/yuhwa/1_u_1012.wav|15|[KO]어… 응… 언니야.[KO]
cjks/louise/2_06835.wav|3|[JA]……わかったわよ。[JA]
cjks/nen/nen117_088.wav|0|[JA]ちゅ、じゅるる……んむ、ちゅ、ちゅ、れろれろれろれろ……んんっ、はぁ、はぁ、はぁ、もうトロトロが治まらない[JA]
cjks/louise/1_07723.wav|3|[JA]まず、頭の上にひとつ乗せて、と。……ん、両手にもひとつずつ持ってるのよ。[JA]
cjks/nan/nan202_051.wav|2|[JA]え!?[JA]
cjks/nen/nen406_007.wav|0|[JA]でも最近、私は回収を行っていないんです[JA]
cjks/yos/yos102_060.wav|1|[JA]なんでしょうか?[JA]
cjks/louise/1_07509.wav|3|[JA]覚悟はいいわね?[JA]
cjks/nen/nen113_206.wav|0|[JA]もうすぐ当たります?当たっちゃいますか……はぁ、はぁ、はぁ……[JA]
cjks/sua/0_s_477.wav|12|[KO]!!![KO]
cjks/nen/nen115_178.wav|0|[JA]あ、は、はい……大丈夫、なんですが……今ちょっと、カリが、気持ちいいところに引っかかって……[JA]
cjks/louise/1_06499.wav|3|[JA]あんたなんか、あんたなんかっ……。[JA]
cjks/nen/nen104_083.wav|0|[JA]―――[JA]
cjks/nen/nen109_046.wav|0|[JA]よかった……もう、他には何も思いつきません。それぐらい満喫できました[JA]
cjks/SA4/sp013-000151.wav|20|[SA]तत्रत्यः नायकः परमायोग्यः उत्कोचशीलः च[SA]
cjks/yos/yos115_180.wav|1|[JA]それは、そうなんですけど……んっ、んひっ!?あっあっあっ、だめ、今は動いちゃだめですよっ[JA]
cjks/arin/0_a_250.wav|14|[KO]응……![KO]
cjks/SA2/sp011-000255.wav|18|[SA]तेन सह[SA]
cjks/louise/2_07456.wav|3|[JA]サイト。ま〜たあんたはぼうっとして![JA]
cjks/arin/0_a_156.wav|14|[KO]애초에 수명이 남은 사람은 손대면 안 되는데!![KO]
cjks/nen/nen113_093.wav|0|[JA]あむ……[JA]
cjks/SA5/sp014-000357.wav|21|[SA]खानेन लिखितम् पत्रम् कृष्णाजीपन्तः शिवमहाराजाय दत्तवान्[SA]
cjks/louise/2_06748.wav|3|[JA]それじゃあ、あんたには他に何かいい案があるの?[JA]
cjks/yuhwa/1_u_1048.wav|15|[KO]응~! 내가 언니야 동생아!!![KO]
cjks/yeonhwa/1_y_36.wav|16|[KO]응? 왜?[KO]
cjks/yos/yos110_061.wav|1|[JA]ぅぅ~……セックスって、難しいものなんですね……[JA]
cjks/louise/2_07111.wav|3|[JA]まったく、どこに隠したのかしらね。[JA]
cjks/louise/1_07285.wav|3|[JA]それに、こないだもそうだったけど、虚無の魔法は詠唱に時間がかかるわ。[JA]
cjks/sua/0_s_629.wav|12|[KO]판소리에요?![KO]
cjks/louise/2_08049.wav|3|[JA]わたしも胸を大きくできるかしら?[JA]
cjks/nen/nen211_017.wav|0|[JA]はい。状況からすると、保科君と因幡さんの距離が近づくごとに[JA]
cjks/louise/0_10002.wav|3|[JA]か、かん違いしないでよ!?別に嬉しかったわけじゃないんだからね![JA]
cjks/yos/yos100_054.wav|1|[JA]――初めてできた友達ですから[JA]
cjks/yos/yos106_085.wav|1|[JA]んっ、んんーーーー……[JA]
cjks/nen/nen410_024.wav|0|[JA]それならいいんですが[JA]
cjks/louise/0_14257.wav|3|[JA]‥‥‥‥。[JA]
cjks/louise/0_10520.wav|3|[JA]でしょう?クリス、大丈夫かしら。[JA]
cjks/mimiru/1_m_581.wav|13|[KO]희생당한 입장에선 이해해줄 의무가 없지?[KO]
cjks/SA5/sp014-000337.wav|21|[SA]तुळ्जापुरस्य पण्ढरापुरस्य कोल्हापुरस्य च विध्वंसेन क्रोधतप्ताः ते प्रतापदुर्गस्य मार्गम् आश्रितवन्तः आसन्[SA]
cjks/SA1/sp009-001884.wav|17|[SA]पशवः हृष्टाः पुष्टाङ्गाश्च गोचरीभवन्ति स्म[SA]
cjks/SA1/sp009-000926.wav|17|[SA]विद्यया लोकम् जेतुम् यः शक्तः भविष्यति तस्य पोषणाय स्तोकम् अपि धनम् अस्माकम् नास्ति[SA]
cjks/louise/0_13719.wav|3|[JA]さあ、姫さま。積もる話もありますし、こんな犬は置いて、お庭でゆっくりしましょう。[JA]
cjks/nen/nen117_226.wav|0|[JA]んぐぅぅ……ぅぅぁあっ、はぁ、はぁ、はぁ、んっ、んんーーーッ……はぁ、はぁ、はぁ、あっ、あっ、あっあっあっあっあっあああああああ――[JA]
cjks/SA6/sp015-000060.wav|22|[SA]कालान्तरे सः बकः वृद्धः अभवत्[SA]
cjks/nen/nen019_028.wav|0|[JA]お、思い出させないで下さいっ[JA]
cjks/louise/1_07186.wav|3|[JA]そんなの他の人に頼めばいいじゃない?[JA]

View File

@ -0,0 +1,100 @@
cjks/myvoice/00148.wav|23|ʦ`⁼ɹ`→ʧʰen↑ jou↓↑ i→ʦʰɹ↓, wo↓↑ bjo↑okin^i ka↑ka↓t#ta …… t⁼an↓ k⁼ə→k⁼ə→ mei↑ kʰan↓ ʦ`ʰu→lai↑……
cjks/louise/0_10880.wav|3|sɯ↑go↓i ki↑aine……. wa↑taʃimo mo↓t#to ga↑mba↓ɾɯbeki*kaʃiɾa.
cjks/SA1/sp009-003012.wav|17|t⁼aːd⁼ɹ`ʃəihi* likʰit⁼əm neːt⁼ɾeː nimiljə k⁼ən`t⁼`ʰəstʰiːk⁼ɹ`t⁼əvaːn əst⁼i bʰəvaːn.
cjks/yos/yos113_077.wav|1|o↑jasɯminasai.
cjks/sua/0_s_1695.wav|12|ʧigɯmɯn, amu mal`do haʥi maɾajo.
cjks/nen/nen112_129.wav|0|de↓mo, so↑nna ha↑zɯkaʃi↓i ko↑to↓o i↑wana↓ide kɯ↑dasa↓i. te↑ɾema↓sɯ* jo↓ɯ……
cjks/sua/0_s_1626.wav|12|to… toɾjənnim… i… iɾəʃʰimjən ani tweəjo.
cjks/sua/1_s_1032.wav|12|a…….
cjks/mimiru/0_m_395.wav|13|ne kjəŋgjeʥəmi.
cjks/nen/nen102_009.wav|0|i↓e, o↑so↓ɾakɯ bjo↑okidewa na↓ito o↑mo↓ɯ n↓desɯ*ga…… ke↓sakaɾa sɯ*↑ko↓ʃi* ki↑moʧiga wa↑ɾɯ↓kɯ*te…… zɯ↑t#to i↑e↓de o↑tonaʃi↓kɯnete i↑ma↓ʃi*ta.
cjks/louise/2_09455.wav|3|a↑ɾasoː? ʥa↓a, mi↑nna↓ga i↑sseːn^i sɯ*↑ka↓atoː ta↑kɯ*ʃiage↓taɾa sa↑itowa da↓ɾeo mi↓ɾɯ no↑kaʃiɾa?
cjks/mom/MS01_MOM-0517.wav|5|…… da↓isɯ*kidesɯ*, ɾi↓tosaN.
cjks/yam/MS04_YAM-0011.wav|4|…… wa↑ɾɯgi↓no na↓i ko↑to↓wa, wa↑ka↓t#te i↑ma↓sɯ*. ʃi*↑ka↓ʃi, wa↑ɾɯgi↓ga na↓ikaɾakoso, ʃi↓nno kjo↓oitomo i↑eɾɯ no↑de↓sɯ*.
cjks/louise/2_07659.wav|3|so↑ɾewa, a↑no, a↑ɾeda, ta↓bɯN!
cjks/louise/1_07889.wav|3|ɯ↑ɾɯsa↓iQ! sɯ*↑ko↓ʃiwa ʥi↑bɯnde ka↑ŋga↓enasaiQ! ko↑no ba↑ka↓inɯQ!
cjks/mom/SS01_MOM-0066.wav|5|i, i↓ikaɾa ha↓jakɯ! o↑iʦɯ↓kaɾete ʃi↑maima↓sɯ*!!
cjks/louise/0_11529.wav|3|a, a↑nta↓daɾejo!? hi*↑tono he↑ja↓n^i ka↑t#ten^i ha↓it#te!
cjks/louise/0_12023.wav|3|a, a↓wa, a↑hahahaha……!
cjks/SA2/sp011-000175.wav|18|əst⁼u naːmə it⁼i ʧ⁼int⁼əjət⁼aː ʃivəɾaːʥ⁼eːnə səhə* maːɾg⁼əhə* eːvə niʃʧ⁼it⁼əhə* p⁼əlaːjənaːjə.
cjks/louise/0_09835.wav|3|…… sa↑ito. ʥɯ↓gjoːga o↑wat#ta↓ɾa, wa↑taʃin^i sa↑mɯɾain^i ʦɯ↓ite o↑ʃienasai.
cjks/sua/1_s_980.wav|12|uβuβuβuβuβuβuβuβu~
cjks/louise/1_05594.wav|3|ha↓a, ha↓a, ha↓a…….
cjks/mik/MS10_MIK-0077.wav|6|ɾa↓ɾasanno ɾjo↓oɾi, sɯ↑k#go↓kɯ a↑ʥiga ko↓soːdat#takaɾane. ko↑ɾegɯ↓ɾai ɯ↑sɯ↓aʥino ho↓oga i↓ito o↑mo↓t#te.
cjks/louise/0_10726.wav|3|ba↑kaQ!
cjks/louise/2_05665.wav|3|i↓e, wa↑taʃino ka↑oː ʥi↑t#to mi↓te i↑ɾɯ jo↓odeʃi*takaɾa. na↓n^ika o↑ka↓ʃina to↑koɾoga a↓ɾɯ n↓deʃoːka?
cjks/mimiru/0_m_198.wav|13|eɾɯl` panʧ=ɯm ʧabat#t=ənde?
cjks/nan/nan212_004.wav|2|mɯ↓ɾi se↓zɯ, ta↑iʧoːɸɯ↓ɾjoːde ja↑sɯ↓nda ho↓oga i↓in^ʥanai?
cjks/louise/0_10563.wav|3|a↑aː↓aQ! m a, ma↑ta! a↓ntawa ma↑taQ!
cjks/yuhwa/1_u_1020.wav|15|twet#k=o. jənβwa ənniɾo pjənʃʰinɦe posʰejo.
cjks/SA6/sp015-000374.wav|22|t⁼eːnə t⁼u vəɾəd⁼ək⁼s`in`aːm vinaː eːvə saː p⁼əɾin`eːt⁼əvjaː.
cjks/mik/MS10_MIK-0031.wav|6|ha↓a, i↑e↓made kɯ↓ɾɯnante……
cjks/louise/0_10836.wav|3|ko↓ndowa na↑midagɯnde↓ɾɯ.
cjks/sua/1_s_517.wav|12|sʰon^jənɯn mənʥə naga is=ɯl`gejo~
cjks/SA5/sp014-000018.wav|21|p⁼unəʃʧ⁼ə t⁼ət⁼ ʥ⁼eːt⁼um ʃək⁼jəm.
cjks/yos/yos101_027.wav|1|da↓ɾemo, so↑koma↓dewa i↑t#temase↓N…… Q.
cjks/nan/nan009_067.wav|2|ka↑kɯn^iN ʃi*↑ta↓kedo, ne↑t#towaːkɯʥoːno se↑kjɯ↓ɾitimo kjo↓oka sa↑ɾeta jo↑osɯwa na↓ijo.
cjks/arin/1_a_361.wav|14|ɯ… ɯŋ!!!
cjks/sua/0_s_1813.wav|12|op=a kɯɾəkʰebak=e motʰe?
cjks/nen/nen117_195.wav|0|N↑Nː↓Nː ɯ↓ɯːːː…… ɯɯQ! ha↓Q, wa↑aQ, wa↑aQ…… ha↓aːːːː……
cjks/louise/0_10978.wav|3|de↓ʃoː? da↓kaɾa, do↓nna ko↑to↓o sɯ↑ɾe↓ba ɯ↑ndoːno↓oɾjokɯno a↓p#pɯn^i ʦɯ↑nagaɾɯ no↑ka ki↑ite ki↓tano.
cjks/SA1/sp009-002328.wav|17|it⁼i vividʰaːha* bʰaːvənaːha* vəɾs`aːk⁼aːleː nəd⁼iːp⁼ɾəvaːɦaːha* ivə t⁼əsjə ɦɹ`d⁼əjəm p⁼uːɾəjənt⁼i smə.
cjks/louise/0_14038.wav|3|do↓oʃi*te, so↑nnan^i ɸɯ↓ɾaɸɯɾa sɯ↑ɾɯ↓no!? do↓oʃi*te, ta↓no ko↑n^i me↓ga i↑kɯ↓no! wa↑taʃidakeo mi↓te i↑ɾe↓ba so↑ɾede ʥɯɯbɯ↓ndeʃoː!?
cjks/mik/MS03_MIK-0004.wav|6|da↓tosɯɾɯto…… i↑mama↓de ta↑ikeN ʃi*↑te ki↓ta ko↑to↓o mo↑oiʧido ja↓ɾeba, ta↓no ki↑okɯmo mo↑do↓ɾɯkamo?
cjks/sua/0_s_2370.wav|12|kɯɾənik=aaaa…….
cjks/nan/nan211_191.wav|2|da↑me, da↑me↓dajo, i↑wana↓ide…… ɯ↑ɾeʃi↓kɯ*te, ka↑ɾada, a↑ʦɯ↓kɯnat#ʧat#te Q.
cjks/rong/cvrong06054.wav|7|…… ən↑.
cjks/nen/nen016_043.wav|0|a↑ɾi↓gatoː go↑zaima↓sɯ*, ta↑iheN sa↑ŋkoːn^i na↑ɾima↓ʃi*ta.
cjks/nan/nan004_002.wav|2|so↑ɾede jo↑oʥit#te na↓n^i?
cjks/sua/1_s_561.wav|12|an^jo!! toɾjənnim!!
cjks/yos/yos113_212.wav|1|ha↓a, ha↓a…… N↓Q, ha↓i, ki↑moʧiː↓idesɯ*, ha↓a, ha↓a, N↓Q, N↓NaːːQ!
cjks/yam/MS03_YAM-0038.wav|4|ha↓i.
cjks/louise/0_10485.wav|3|ʧ o, ʧo↓t#to! na↓nde ko↑ɾede ka↑ikeʦɯ ʃi*↑ta mi↓taina ɸɯ↑Ni↓kin^i na↑t#te↓ɾɯno!?
cjks/hl/ch11_hl_011.wav|10|p⁼u↓ ʦ`⁼ən↓ʧ⁼iŋ→ i→ʃa↓ ma?
cjks/nan/nan206_018.wav|2|o↑doɾokino ka↑o. ma↓saka sa↑toɾɯkɯŋkaɾa de↓eton^i sa↑sot#te kɯ↑ɾeɾɯ↓nante……
cjks/yuhwa/1_u_1012.wav|15|ə… ɯŋ… ənnija.
cjks/louise/2_06835.wav|3|…… wa↑ka↓t#tawajo.
cjks/nen/nen117_088.wav|0|ʧ ɯ, ʥ ɯ ɾɯ↓ɾɯ…… m↓mɯ, ʧ ɯ, ʧ ɯ, ɾe↓ɾoɾeɾo ɾe↓ɾoɾeɾo…… N↓NQ, ha↓a, ha↓a, ha↓a, mo↓o to↑ɾo↓toɾoga o↑samaɾa↓nai.
cjks/louise/1_07723.wav|3|ma↓zɯ, a↑tama↓no ɯ↑en^i hi*↑to↓ʦɯ no↑sete, t o……. N, ɾjo↑oten^imo hi*↑toʦɯzɯ↓ʦɯ mo↑t#te↓ɾɯnojo.
cjks/nan/nan202_051.wav|2|e!?
cjks/nen/nen406_007.wav|0|de↓mo sa↑ikiN, wa↑taʃiwa ka↑iʃɯːo i↑t#te i↑nai n↓desɯ*.
cjks/yos/yos102_060.wav|1|na↓ndeʃoːka?
cjks/louise/1_07509.wav|3|ka↓kɯgowa i↓iwane?
cjks/nen/nen113_206.wav|0|mo↑osɯ↓gɯ a↑taɾima↓sɯ*? a↑tat#ʧa i↑ma↓sɯ*ka…… ha↓a, ha↓a, ha↓a……
cjks/sua/0_s_477.wav|12|!!!
cjks/nen/nen115_178.wav|0|a, w a, ha↓i…… da↑iʥo↓obɯ, na↓ndesɯ*ga…… i↓ma ʧo↓t#to, ka↓ɾiga, ki↑moʧiː↓i to↑koɾon^i çi↑k#kaka↓t#te……
cjks/louise/1_06499.wav|3|a↑ntana↓ŋka, a↓ntanaŋkaQ…….
cjks/nen/nen104_083.wav|0|------
cjks/nen/nen109_046.wav|0|jo↓kat#ta…… mo↓o, ta↓n^iwa na↓n^imo o↑moiʦɯ*kimase↓N. so↑ɾegɯ↓ɾai ma↑ŋki*ʦɯ de↑kima↓ʃi*ta.
cjks/SA4/sp013-000151.wav|20|t⁼ət⁼ɾət⁼jəhə* naːjək⁼əhə* p⁼əɾəmaːjoːg⁼jəhə* ut⁼k⁼oːʧ⁼əʃiːləhə* ʧ⁼ə.
cjks/yos/yos115_180.wav|1|so↑ɾewa, so↑ona n↓desɯ*kedo…… N↓Q, N çi↓Q!? a↑QaQ↓aQ, da↑me, i↓mawa ɯ↑go↓iʧa da↑me↓desɯ*jo Q.
cjks/arin/0_a_250.wav|14|ɯŋ……!
cjks/SA2/sp011-000255.wav|18|t⁼eːnə səɦə.
cjks/louise/2_07456.wav|3|sa↑ito. m a~ t a a↓ntawa bo↑ot#to ʃi*↑te!
cjks/arin/0_a_156.wav|14|eʧʰoe sʰumjəŋi namɯn sʰaɾamɯn sʰondemjən an twenɯnde.
cjks/nen/nen113_093.wav|0|a↓mɯ……
cjks/SA5/sp014-000357.wav|21|kʰaːneːnə likʰit⁼əm p⁼ət⁼ɾəm k⁼ɹ`s`n`aːʥ⁼iːp⁼ənt⁼əhə* ʃivəməɦaːɾaːʥ⁼aːjə d⁼ət⁼t⁼əvaːn.
cjks/louise/2_06748.wav|3|so↑ɾeʥa↓a, a↓ntan^iwa ho↓kan^i na↓n^ika i↓i a↓ŋga a↓ɾɯno?
cjks/yuhwa/1_u_1048.wav|15|ɯŋ~! nega ənnija toŋsʰeŋa!!!
cjks/yeonhwa/1_y_36.wav|16|ɯŋ? we?
cjks/yos/yos110_061.wav|1|ɯ↓ɯ~…… se↓k#kɯ*sɯt#te, mɯ↑zɯkaʃi↓i mo↑no↓na n↓desɯ*ne……
cjks/louise/2_07111.wav|3|ma↑t#takɯ, do↓kon^i ka↑kɯ*ʃi↓ta no↑kaʃiɾane.
cjks/louise/1_07285.wav|3|so↑ɾen^i, ko↑naida↓mo so↑odat#ta↓kedo, kjo↓mɯno ma↑hoːwa e↑eʃoːn^i ʥi↑kaŋga ka↑ka↓ɾɯwa.
cjks/sua/0_s_629.wav|12|pʰansʰoɾiejo?!
cjks/louise/2_08049.wav|3|wa↑taʃimo mɯ↑ne↓o o↓oki*kɯdekiɾɯkaʃiɾa?
cjks/nen/nen211_017.wav|0|ha↓i. ʥo↑okjoːkaɾa sɯ↑ɾɯto, ho↓ʃinakɯnto i↓nabasanno kjo↓ɾiga ʧi*↑kazɯ↓kɯ go↑ton^i.
cjks/louise/0_10002.wav|3|k a, ka↑n^ʧi↓gai ʃi↑na↓idejo!? be↑ʦɯn^i ɯ↑ɾeʃi↓kat#ta wa↓keʥanai n↓dakaɾane!
cjks/yos/yos100_054.wav|1|---- ha↑ʥi↓mete de↑ki↓ta to↑modaʧide↓sɯ*kaɾa.
cjks/yos/yos106_085.wav|1|N↓Q, N↓Nːːːː……
cjks/nen/nen410_024.wav|0|so↑ɾena↓ɾa i↓i n↓desɯ*ga.
cjks/louise/0_14257.wav|3|………
cjks/louise/0_10520.wav|3|de↓ʃoː? kɯ↓ɾisɯ, da↑iʥo↓obɯkaʃiɾa.
cjks/mimiru/1_m_581.wav|13|çisʰeŋdaŋɦan ip#ʧ=aŋesʰən iɦeɦeʥul` ɯimuga əp#ʧ=i?
cjks/SA5/sp014-000337.wav|21|t⁼ul`ʥ⁼aːp⁼uɾəsjə p⁼ən`d⁼`ʰəɾaːp⁼uɾəsjə k⁼oːlɦaːp⁼uɾəsjə ʧ⁼ə vidʰvə^seːnə k⁼ɾoːdʰət⁼əp⁼t⁼aːha* t⁼eː p⁼ɾət⁼aːp⁼əd⁼uɾg⁼əsjə maːɾg⁼əm aːʃɾit⁼əvənt⁼əhə* aːsən.
cjks/SA1/sp009-001884.wav|17|p⁼əʃəvəhə* ɦɹ`s`t⁼`⁼aːha* p⁼us`t⁼`⁼aːŋg⁼aːʃʧ⁼ə g⁼oːʧ⁼əɾiːbʰəvənt⁼i smə.
cjks/SA1/sp009-000926.wav|17|vid⁼jəjaː loːk⁼əm ʥ⁼eːt⁼um jəhə* ʃək⁼t⁼əhə* bʰəvis`jət⁼i t⁼əsjə p⁼oːs`ən`aːjə st⁼oːk⁼əm əp⁼i dʰənəm əsmaːk⁼əm naːst⁼i.
cjks/louise/0_13719.wav|3|sa↓a, çi↓mesama. ʦɯ↑mo↓ɾɯ ha↑naʃi↓mo a↑ɾima↓sɯ*ʃi, ko↑nna i↑nɯ↓wa o↑ite, o↓n^iwade jɯ↑k#kɯ↓ɾi ʃi↑maʃo↓o.
cjks/nen/nen117_226.wav|0|ŋ↓gɯːː…… ɯɯa↓aQ, ha↓a, ha↓a, ha↓a, N↓Q, N↓NːːːQ…… ha↓a, ha↓a, ha↓a, a↓Q, a↓Q, a↑QaQaQaQaQaːːːː↓aː----
cjks/SA6/sp015-000060.wav|22|k⁼aːlaːnt⁼əɾeː səhə* b⁼ək⁼əhə* vɹ`d⁼dʰəhə* əbʰəvət⁼.
cjks/nen/nen019_028.wav|0|o, o↑moida↓sasenaide kɯ↑dasaiQ.
cjks/louise/1_07186.wav|3|so↑nna↓no ho↑kano hi*↑ton^i ta↑no↓meba i↓iʥanai?

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
fox_data/origin/s_1.wav|0|도련…… 님……?
fox_data/origin/s_100.wav|0|네!
fox_data/origin/s_200.wav|0|뭐든지 말씀해보세요~
fox_data/origin/s_347.wav|0|도련님한테 이게 무슨 짓이냐니까?
fox_data/origin/s_287.wav|0|소녀가 흉계를 꾸미고 있었다고 생각했나요?
fox_data/origin/m_58.wav|1|반했냐? 어?
fox_data/origin/s_434.wav|0|들어가요. 소녀만 볼 거예요.
fox_data/origin/s_715.wav|0|…….
fox_data/origin/s_509.wav|0|…….
fox_data/origin/s_609.wav|0|…!!!
fox_data/origin/a_47.wav|2|혹시 영능력자라거나…….
fox_data/origin/s_815.wav|0|에헤헤헤헤~
fox_data/origin/a_221.wav|2|……확신은 못하겠어.
fox_data/origin/m_225.wav|1|내게 그런 말 할 자격은 없겠지…….
fox_data/origin/s_953.wav|0|소녀가 또 쓸데없는 짓을…….
fox_data/origin/a_154.wav|2|생각해봐!! 널 어떻게 할 거였으면 진작에 했겠지!!
fox_data/origin/s_1099.wav|0|도련님……
fox_data/origin/a_314.wav|2|응응!!!
fox_data/origin/s_1121.wav|0|뭘 하시는 건가요?
fox_data/origin/a_352.wav|2|아! 미… 미안해!! 내가 멋대로…….
fox_data/origin/m_424.wav|1|다만, 그 법기는 실체가 이쪽에 있는 물건이 아니라서…….
fox_data/origin/s_1273.wav|0|우후후~
fox_data/origin/s_1393.wav|0|어서요!
fox_data/origin/m_466.wav|1|하…….
fox_data/origin/s_1440.wav|0|계속 말씀드렸잖아요
fox_data/origin/a_521.wav|2|아침에 일어났는데, 날 감시하던 분신도 없구.
fox_data/origin/a_550.wav|2|세은아? 왜 그래?!!
fox_data/origin/s_1546.wav|0|침실에 갈 때까지 참으셔야 해요.
fox_data/origin/s_1646.wav|0|하아… 하아… 하아…….
fox_data/origin/m_589.wav|1|도령이 제일 잘못한 건…….
fox_data/origin/m_546.wav|1|나 너 싫어하지 않아.
fox_data/origin/s_1807.wav|0|흥.
fox_data/origin/j_30.wav|2|사람을 홀려야 하는 여우 입장에서는, 최악의 상대가 아닌지요?
fox_data/origin/m_642.wav|1|허접하긴 해도, 한 번 보고 거기까지 따라 하는 건 천부적인 재능이야.
fox_data/origin/a_711.wav|2|크윽…! 저… 저승사자는 이런 협박에 굴복하지 않는다!!
fox_data/origin/s_2013.wav|0|아, 고마워요.
fox_data/origin/a_736.wav|2|그래서 한번 해 봤는데… 진짜 되네요?
fox_data/origin/s_2215.wav|0|우후후~
fox_data/origin/s_2292.wav|0|…….
fox_data/origin/m_862.wav|1|산신령이야~~
fox_data/origin/s_2368.wav|0|소녀, 너무 기뻐서… 더… 더 이상 여한이 없긴 한데…….
fox_data/origin/a_826.wav|2|아니… 그런건 아닌 것 같아.
fox_data/origin/s_2461.wav|0|도련님? 좀 더 말해보세요.
fox_data/origin/a_865.wav|2|그… 그… 글쎄요?! 저… 전 아무것도 예상 안 했어요!!
fox_data/hwa/s_55.wav|0|하지마아아안…….
fox_data/hwa/s_86.wav|0|수아예요.
fox_data/hwa/s_114.wav|0|여우의 싸움에서 이런 건 당연한 것이옵니다~
fox_data/hwa/u_69.wav|4|언니…….
fox_data/hwa/u_121.wav|4|인간의 육체와 영혼을 취했을 때의 대가라고나 할까…….
fox_data/hwa/s_163.wav|0|도…… 련님…….
fox_data/hwa/m_82.wav|1|헹.
fox_data/hwa/m_102.wav|1|뭐… 안전하게 해제할 만한 녀석이 있긴 한데…….
fox_data/hwa/y_115.wav|3|힘들 거 없잖아? 눈 딱 감고 한 번만 하면 되는걸?
fox_data/hwa/s_302.wav|0|후훗…….
fox_data/hwa/y_164.wav|3|…….
fox_data/hwa/y_187.wav|3|그렇긴 한데~?
fox_data/hwa/y_207.wav|3|아 참.
fox_data/hwa/s_457.wav|0|어쩌면, 가족이 되었을지도 몰라요.
fox_data/hwa/u_416.wav|4|그게 뭔가요 인간아?
fox_data/hwa/m_11.wav|1|하아… 그래.
fox_data/hwa/u_49.wav|4|어디서 눈을 부라려요. 가문에서 도태된 실패작이 개기는 게 아니에요.
fox_data/hwa/u_67.wav|4|…놔요.
fox_data/hwa/u_433.wav|4|취미?
fox_data/hwa/k_113.wav|5|처음엔, 별 일 아니었어.
fox_data/hwa/u_467.wav|4|뭘 멋대로 꼬리에 손대고 앉았나요.
fox_data/hwa/k_223.wav|5|스스로 계속 머물고 있었던 거냐?
fox_data/hwa/s_553.wav|0|도련님?
fox_data/hwa/a_209.wav|2|미… 미안해!!
fox_data/hwa/s_655.wav|0|이게… 소녀인걸요?
fox_data/hwa/a_253.wav|2|…….
fox_data/hwa/s_685.wav|0|아하하하하하~!!
fox_data/hwa/s_740.wav|0|왜~?
fox_data/hwa/s_841.wav|0|도련님이라면 선을 지키실 거라고. 소녀, 믿고 있사옵니다.
fox_data/hwa/m_451.wav|1|너 설마 나를 제일 먼저 불러낸 거야?!
fox_data/hwa/a_438.wav|2|아까 내가 했던… 부끄러운 말은…….
fox_data/hwa/a_469.wav|2|그리고, 영력 타입을 봤을 때…… 절대로 저 여우가 설치한 게 아냐!
fox_data/hwa/k_286.wav|5|일부 저승사자나 신령의 기술로 가공하면…….
fox_data/hwa/k_338.wav|5|상부에 비상이 걸리고… 즉시 중지 명령이 내려왔어.
fox_data/hwa/u_644.wav|4|……아.
fox_data/hwa/u_722.wav|4|…….
fox_data/hwa/u_778.wav|4|내 불꽃을 보고 예쁘다고 했잖아!!!
fox_data/hwa/u_867.wav|4|여기서 나가!!!!!!!!!!!!!!!
fox_data/hwa/y_323.wav|3|먼지투성이가 되어있던 날 불러준. 당신의 마음은.
fox_data/hwa/u_957.wav|4|수아를 '언니'라고 부르는 건 아직 힘들고.
fox_data/hwa/s_1041.wav|0|이 세상 누구보다도.
fox_data/hwa/m_602.wav|1|…….
fox_data/hwa/y_334.wav|3|하지만 이 영혼과 인격…… 뭐. 진짜 영혼인건 아니지만.
fox_data/hwa/a_587.wav|2|으… 응?!
fox_data/hwa/m_632.wav|1|아니. 도령.

View File

@ -0,0 +1,89 @@
fox_data/origin/s_1.wav|0|ㄷㅗㄹㅣㅓㄴ…… ㄴㅣㅁ……?
fox_data/origin/s_100.wav|0|ㄴㅔ!
fox_data/origin/s_200.wav|0|ㅁㅜㅓㄷㅡㄴㅈㅣ ㅁㅏㄹㅆㅡㅁㅎㅐㅂㅗㅅㅔㅇㅣㅗ~
fox_data/origin/s_347.wav|0|ㄷㅗㄹㅣㅓㄴㄴㅣㅁㅎㅏㄴㅌㅔ ㅇㅣㄱㅔ ㅁㅜㅅㅡㄴ ㅈㅣㅅㅇㅣㄴㅣㅏㄴㅣㄲㅏ?
fox_data/origin/s_287.wav|0|ㅅㅗㄴㅣㅓㄱㅏ ㅎㅣㅜㅇㄱㅣㅔㄹㅡㄹ ㄲㅜㅁㅣㄱㅗ ㅇㅣㅆㅇㅓㅆㄷㅏㄱㅗ ㅅㅐㅇㄱㅏㄱㅎㅐㅆㄴㅏㅇㅣㅗ?
fox_data/origin/m_58.wav|1|ㅂㅏㄴㅎㅐㅆㄴㅣㅏ? ㅇㅓ?
fox_data/origin/s_434.wav|0|ㄷㅡㄹㅇㅓㄱㅏㅇㅣㅗ. ㅅㅗㄴㅣㅓㅁㅏㄴ ㅂㅗㄹ ㄱㅓㅇㅣㅔㅇㅣㅗ.
fox_data/origin/s_715.wav|0|…….
fox_data/origin/s_509.wav|0|…….
fox_data/origin/s_609.wav|0|…!!!
fox_data/origin/a_47.wav|2|ㅎㅗㄱㅅㅣ ㅇㅣㅓㅇㄴㅡㅇㄹㅣㅓㄱㅈㅏㄹㅏㄱㅓㄴㅏ…….
fox_data/origin/s_815.wav|0|ㅇㅔㅎㅔㅎㅔㅎㅔㅎㅔ~
fox_data/origin/a_221.wav|2|……ㅎㅗㅏㄱㅅㅣㄴㅇㅡㄴ ㅁㅗㅅㅎㅏㄱㅔㅆㅇㅓ.
fox_data/origin/m_225.wav|1|ㄴㅐㄱㅔ ㄱㅡㄹㅓㄴ ㅁㅏㄹ ㅎㅏㄹ ㅈㅏㄱㅣㅓㄱㅇㅡㄴ ㅇㅓㅂㅅㄱㅔㅆㅈㅣ…….
fox_data/origin/s_953.wav|0|ㅅㅗㄴㅣㅓㄱㅏ ㄸㅗ ㅆㅡㄹㄷㅔㅇㅓㅂㅅㄴㅡㄴ ㅈㅣㅅㅇㅡㄹ…….
fox_data/origin/a_154.wav|2|ㅅㅐㅇㄱㅏㄱㅎㅐㅂㅗㅏ!! ㄴㅓㄹ ㅇㅓㄸㅓㅎㄱㅔ ㅎㅏㄹ ㄱㅓㅇㅣㅓㅆㅇㅡㅁㅣㅓㄴ ㅈㅣㄴㅈㅏㄱㅇㅔ ㅎㅐㅆㄱㅔㅆㅈㅣ!!
fox_data/origin/s_1099.wav|0|ㄷㅗㄹㅣㅓㄴㄴㅣㅁ……
fox_data/origin/a_314.wav|2|ㅇㅡㅇㅇㅡㅇ!!!
fox_data/origin/s_1121.wav|0|ㅁㅜㅓㄹ ㅎㅏㅅㅣㄴㅡㄴ ㄱㅓㄴㄱㅏㅇㅣㅗ?
fox_data/origin/a_352.wav|2|ㅇㅏ! ㅁㅣ… ㅁㅣㅇㅏㄴㅎㅐ!! ㄴㅐㄱㅏ ㅁㅓㅅㄷㅐㄹㅗ…….
fox_data/origin/m_424.wav|1|ㄷㅏㅁㅏㄴ, ㄱㅡ ㅂㅓㅂㄱㅣㄴㅡㄴ ㅅㅣㄹㅊㅔㄱㅏ ㅇㅣㅉㅗㄱㅇㅔ ㅇㅣㅆㄴㅡㄴ ㅁㅜㄹㄱㅓㄴㅇㅣ ㅇㅏㄴㅣㄹㅏㅅㅓ…….
fox_data/origin/s_1273.wav|0|ㅇㅜㅎㅜㅎㅜ~
fox_data/origin/s_1393.wav|0|ㅇㅓㅅㅓㅇㅣㅗ!
fox_data/origin/m_466.wav|1|ㅎㅏ…….
fox_data/origin/s_1440.wav|0|ㄱㅣㅔㅅㅗㄱ ㅁㅏㄹㅆㅡㅁㄷㅡㄹㅣㅓㅆㅈㅏㄴㅎㅇㅏㅇㅣㅗ.
fox_data/origin/a_521.wav|2|ㅇㅏㅊㅣㅁㅇㅔ ㅇㅣㄹㅇㅓㄴㅏㅆㄴㅡㄴㄷㅔ, ㄴㅏㄹ ㄱㅏㅁㅅㅣㅎㅏㄷㅓㄴ ㅂㅜㄴㅅㅣㄴㄷㅗ ㅇㅓㅂㅅㄱㅜ.
fox_data/origin/a_550.wav|2|ㅅㅔㅇㅡㄴㅇㅏ? ㅇㅗㅐ ㄱㅡㄹㅐ?!!
fox_data/origin/s_1546.wav|0|ㅊㅣㅁㅅㅣㄹㅇㅔ ㄱㅏㄹ ㄸㅐㄲㅏㅈㅣ ㅊㅏㅁㅇㅡㅅㅣㅓㅇㅣㅏ ㅎㅐㅇㅣㅗ.
fox_data/origin/s_1646.wav|0|ㅎㅏㅇㅏ… ㅎㅏㅇㅏ… ㅎㅏㅇㅏ…….
fox_data/origin/m_589.wav|1|ㄷㅗㄹㅣㅓㅇㅇㅣ ㅈㅔㅇㅣㄹ ㅈㅏㄹㅁㅗㅅㅎㅏㄴ ㄱㅓㄴ…….
fox_data/origin/m_546.wav|1|ㄴㅏ ㄴㅓ ㅅㅣㄹㅎㅇㅓㅎㅏㅈㅣ ㅇㅏㄴㅎㅇㅏ.
fox_data/origin/s_1807.wav|0|ㅎㅡㅇ.
fox_data/origin/j_30.wav|2|ㅅㅏㄹㅏㅁㅇㅡㄹ ㅎㅗㄹㄹㅣㅓㅇㅣㅏ ㅎㅏㄴㅡㄴ ㅇㅣㅓㅇㅜ ㅇㅣㅂㅈㅏㅇㅇㅔㅅㅓㄴㅡㄴ, ㅊㅗㅣㅇㅏㄱㅇㅡㅣ ㅅㅏㅇㄷㅐㄱㅏ ㅇㅏㄴㅣㄴㅈㅣㅇㅣㅗ?
fox_data/origin/m_642.wav|1|ㅎㅓㅈㅓㅂㅎㅏㄱㅣㄴ ㅎㅐㄷㅗ, ㅎㅏㄴ ㅂㅓㄴ ㅂㅗㄱㅗ ㄱㅓㄱㅣㄲㅏㅈㅣ ㄸㅏㄹㅏ ㅎㅏㄴㅡㄴ ㄱㅓㄴ ㅊㅓㄴㅂㅜㅈㅓㄱㅇㅣㄴ ㅈㅐㄴㅡㅇㅇㅣㅇㅣㅏ.
fox_data/origin/a_711.wav|2|ㅋㅡㅇㅡㄱ…! ㅈㅓ… ㅈㅓㅅㅡㅇㅅㅏㅈㅏㄴㅡㄴ ㅇㅣㄹㅓㄴ ㅎㅣㅓㅂㅂㅏㄱㅇㅔ ㄱㅜㄹㅂㅗㄱㅎㅏㅈㅣ ㅇㅏㄴㅎㄴㅡㄴㄷㅏ!!
fox_data/origin/s_2013.wav|0|ㅇㅏ, ㄱㅗㅁㅏㅇㅜㅓㅇㅣㅗ.
fox_data/origin/a_736.wav|2|ㄱㅡㄹㅐㅅㅓ ㅎㅏㄴㅂㅓㄴ ㅎㅐ ㅂㅗㅏㅆㄴㅡㄴㄷㅔ… ㅈㅣㄴㅉㅏ ㄷㅗㅣㄴㅔㅇㅣㅗ?
fox_data/origin/s_2215.wav|0|ㅇㅜㅎㅜㅎㅜ~
fox_data/origin/s_2292.wav|0|…….
fox_data/origin/m_862.wav|1|ㅅㅏㄴㅅㅣㄴㄹㅣㅓㅇㅇㅣㅇㅣㅏ~~
fox_data/origin/s_2368.wav|0|ㅅㅗㄴㅣㅓ, ㄴㅓㅁㅜ ㄱㅣㅃㅓㅅㅓ… ㄷㅓ… ㄷㅓ ㅇㅣㅅㅏㅇ ㅇㅣㅓㅎㅏㄴㅇㅣ ㅇㅓㅂㅅㄱㅣㄴ ㅎㅏㄴㄷㅔ…….
fox_data/origin/a_826.wav|2|ㅇㅏㄴㅣ… ㄱㅡㄹㅓㄴㄱㅓㄴ ㅇㅏㄴㅣㄴ ㄱㅓㅅ ㄱㅏㅌㅇㅏ.
fox_data/origin/s_2461.wav|0|ㄷㅗㄹㅣㅓㄴㄴㅣㅁ? ㅈㅗㅁ ㄷㅓ ㅁㅏㄹㅎㅐㅂㅗㅅㅔㅇㅣㅗ.
fox_data/origin/a_865.wav|2|ㄱㅡ… ㄱㅡ… ㄱㅡㄹㅆㅔㅇㅣㅗ?! ㅈㅓ… ㅈㅓㄴ ㅇㅏㅁㅜㄱㅓㅅㄷㅗ ㅇㅣㅔㅅㅏㅇ ㅇㅏㄴ ㅎㅐㅆㅇㅓㅇㅣㅗ!!
fox_data/hwa/s_55.wav|0|ㅎㅏㅈㅣㅁㅏㅇㅏㅇㅏㅇㅏㄴ…….
fox_data/hwa/s_86.wav|0|ㅅㅜㅇㅏㅇㅣㅔㅇㅣㅗ.
fox_data/hwa/s_114.wav|0|ㅇㅣㅓㅇㅜㅇㅡㅣ ㅆㅏㅇㅜㅁㅇㅔㅅㅓ ㅇㅣㄹㅓㄴ ㄱㅓㄴ ㄷㅏㅇㅇㅣㅓㄴㅎㅏㄴ ㄱㅓㅅㅇㅣㅇㅗㅂㄴㅣㄷㅏ~
fox_data/hwa/u_69.wav|4|ㅇㅓㄴㄴㅣ…….
fox_data/hwa/u_121.wav|4|ㅇㅣㄴㄱㅏㄴㅇㅡㅣ ㅇㅣㅜㄱㅊㅔㅇㅗㅏ ㅇㅣㅓㅇㅎㅗㄴㅇㅡㄹ ㅊㅜㅣㅎㅐㅆㅇㅡㄹ ㄸㅐㅇㅡㅣ ㄷㅐㄱㅏㄹㅏㄱㅗㄴㅏ ㅎㅏㄹㄲㅏ…….
fox_data/hwa/s_163.wav|0|ㄷㅗ…… ㄹㅣㅓㄴㄴㅣㅁ…….
fox_data/hwa/m_82.wav|1|ㅎㅔㅇ.
fox_data/hwa/m_102.wav|1|ㅁㅜㅓ… ㅇㅏㄴㅈㅓㄴㅎㅏㄱㅔ ㅎㅐㅈㅔㅎㅏㄹ ㅁㅏㄴㅎㅏㄴ ㄴㅣㅓㅅㅓㄱㅇㅣ ㅇㅣㅆㄱㅣㄴ ㅎㅏㄴㄷㅔ…….
fox_data/hwa/y_115.wav|3|ㅎㅣㅁㄷㅡㄹ ㄱㅓ ㅇㅓㅂㅅㅈㅏㄴㅎㅇㅏ? ㄴㅜㄴ ㄸㅏㄱ ㄱㅏㅁㄱㅗ ㅎㅏㄴ ㅂㅓㄴㅁㅏㄴ ㅎㅏㅁㅣㅓㄴ ㄷㅗㅣㄴㅡㄴㄱㅓㄹ?
fox_data/hwa/s_302.wav|0|ㅎㅜㅎㅜㅅ…….
fox_data/hwa/y_164.wav|3|…….
fox_data/hwa/y_187.wav|3|ㄱㅡㄹㅓㅎㄱㅣㄴ ㅎㅏㄴㄷㅔ~?
fox_data/hwa/y_207.wav|3|ㅇㅏ ㅊㅏㅁ.
fox_data/hwa/s_457.wav|0|ㅇㅓㅉㅓㅁㅣㅓㄴ, ㄱㅏㅈㅗㄱㅇㅣ ㄷㅗㅣㅇㅓㅆㅇㅡㄹㅈㅣㄷㅗ ㅁㅗㄹㄹㅏㅇㅣㅗ.
fox_data/hwa/u_416.wav|4|ㄱㅡㄱㅔ ㅁㅜㅓㄴㄱㅏㅇㅣㅗ ㅇㅣㄴㄱㅏㄴㅇㅏ?
fox_data/hwa/m_11.wav|1|ㅎㅏㅇㅏ… ㄱㅡㄹㅐ.
fox_data/hwa/u_49.wav|4|ㅇㅓㄷㅣㅅㅓ ㄴㅜㄴㅇㅡㄹ ㅂㅜㄹㅏㄹㅣㅓㅇㅣㅗ. ㄱㅏㅁㅜㄴㅇㅔㅅㅓ ㄷㅗㅌㅐㄷㅗㅣㄴ ㅅㅣㄹㅍㅐㅈㅏㄱㅇㅣ ㄱㅐㄱㅣㄴㅡㄴ ㄱㅔ ㅇㅏㄴㅣㅇㅔㅇㅣㅗ.
fox_data/hwa/u_67.wav|4|…ㄴㅗㅏㅇㅣㅗ.
fox_data/hwa/u_433.wav|4|ㅊㅜㅣㅁㅣ?
fox_data/hwa/k_113.wav|5|ㅊㅓㅇㅡㅁㅇㅔㄴ, ㅂㅣㅓㄹ ㅇㅣㄹ ㅇㅏㄴㅣㅇㅓㅆㅇㅓ.
fox_data/hwa/u_467.wav|4|ㅁㅜㅓㄹ ㅁㅓㅅㄷㅐㄹㅗ ㄲㅗㄹㅣㅇㅔ ㅅㅗㄴㄷㅐㄱㅗ ㅇㅏㄴㅈㅇㅏㅆㄴㅏㅇㅣㅗ.
fox_data/hwa/k_223.wav|5|ㅅㅡㅅㅡㄹㅗ ㄱㅣㅔㅅㅗㄱ ㅁㅓㅁㅜㄹㄱㅗ ㅇㅣㅆㅇㅓㅆㄷㅓㄴ ㄱㅓㄴㅣㅏ?
fox_data/hwa/s_553.wav|0|ㄷㅗㄹㅣㅓㄴㄴㅣㅁ?
fox_data/hwa/a_209.wav|2|ㅁㅣ… ㅁㅣㅇㅏㄴㅎㅐ!!
fox_data/hwa/s_655.wav|0|ㅇㅣㄱㅔ… ㅅㅗㄴㅣㅓㅇㅣㄴㄱㅓㄹㅇㅣㅗ?
fox_data/hwa/a_253.wav|2|…….
fox_data/hwa/s_685.wav|0|ㅇㅏㅎㅏㅎㅏㅎㅏㅎㅏㅎㅏ~!!
fox_data/hwa/s_740.wav|0|ㅇㅗㅐ~?
fox_data/hwa/s_841.wav|0|ㄷㅗㄹㅣㅓㄴㄴㅣㅁㅇㅣㄹㅏㅁㅣㅓㄴ ㅅㅓㄴㅇㅡㄹ ㅈㅣㅋㅣㅅㅣㄹ ㄱㅓㄹㅏㄱㅗ. ㅅㅗㄴㅣㅓ, ㅁㅣㄷㄱㅗ ㅇㅣㅆㅅㅏㅇㅗㅂㄴㅣㄷㅏ.
fox_data/hwa/m_451.wav|1|ㄴㅓ ㅅㅓㄹㅁㅏ ㄴㅏㄹㅡㄹ ㅈㅔㅇㅣㄹ ㅁㅓㄴㅈㅓ ㅂㅜㄹㄹㅓㄴㅐㄴ ㄱㅓㅇㅣㅏ?!
fox_data/hwa/a_438.wav|2|ㅇㅏㄲㅏ ㄴㅐㄱㅏ ㅎㅐㅆㄷㅓㄴ… ㅂㅜㄲㅡㄹㅓㅇㅜㄴ ㅁㅏㄹㅇㅡㄴ…….
fox_data/hwa/a_469.wav|2|ㄱㅡㄹㅣㄱㅗ, ㅇㅣㅓㅇㄹㅣㅓㄱ ㅌㅏㅇㅣㅂㅇㅡㄹ ㅂㅗㅏㅆㅇㅡㄹ ㄸㅐ…… ㅈㅓㄹㄷㅐㄹㅗ ㅈㅓ ㅇㅣㅓㅇㅜㄱㅏ ㅅㅓㄹㅊㅣㅎㅏㄴ ㄱㅔ ㅇㅏㄴㅣㅏ!
fox_data/hwa/k_286.wav|5|ㅇㅣㄹㅂㅜ ㅈㅓㅅㅡㅇㅅㅏㅈㅏㄴㅏ ㅅㅣㄴㄹㅣㅓㅇㅇㅡㅣ ㄱㅣㅅㅜㄹㄹㅗ ㄱㅏㄱㅗㅇㅎㅏㅁㅣㅓㄴ…….
fox_data/hwa/k_338.wav|5|ㅅㅏㅇㅂㅜㅇㅔ ㅂㅣㅅㅏㅇㅇㅣ ㄱㅓㄹㄹㅣㄱㅗ… ㅈㅡㄱㅅㅣ ㅈㅜㅇㅈㅣ ㅁㅣㅓㅇㄹㅣㅓㅇㅇㅣ ㄴㅐㄹㅣㅓㅇㅗㅏㅆㅇㅓ.
fox_data/hwa/u_644.wav|4|……ㅇㅏ.
fox_data/hwa/u_722.wav|4|…….
fox_data/hwa/u_778.wav|4|ㄴㅐ ㅂㅜㄹㄲㅗㅊㅇㅡㄹ ㅂㅗㄱㅗ ㅇㅣㅔㅃㅡㄷㅏㄱㅗ ㅎㅐㅆㅈㅏㄴㅎㅇㅏ!!!
fox_data/hwa/u_867.wav|4|ㅇㅣㅓㄱㅣㅅㅓ ㄴㅏㄱㅏ!!!!!!!!!!!!!!!
fox_data/hwa/y_323.wav|3|ㅁㅓㄴㅈㅣㅌㅜㅅㅓㅇㅇㅣㄱㅏ ㄷㅗㅣㅇㅓㅇㅣㅆㄷㅓㄴ ㄴㅏㄹ ㅂㅜㄹㄹㅓㅈㅜㄴ. ㄷㅏㅇㅅㅣㄴㅇㅡㅣ ㅁㅏㅇㅡㅁㅇㅡㄴ.
fox_data/hwa/u_957.wav|4|ㅅㅜㅇㅏㄹㅡㄹ 'ㅇㅓㄴㄴㅣ'ㄹㅏㄱㅗ ㅂㅜㄹㅡㄴㅡㄴ ㄱㅓㄴ ㅇㅏㅈㅣㄱ ㅎㅣㅁㄷㅡㄹㄱㅗ.
fox_data/hwa/s_1041.wav|0|ㅇㅣ ㅅㅔㅅㅏㅇ ㄴㅜㄱㅜㅂㅗㄷㅏㄷㅗ.
fox_data/hwa/m_602.wav|1|…….
fox_data/hwa/y_334.wav|3|ㅎㅏㅈㅣㅁㅏㄴ ㅇㅣ ㅇㅣㅓㅇㅎㅗㄴㄱㅗㅏ ㅇㅣㄴㄱㅣㅓㄱ…… ㅁㅜㅓ. ㅈㅣㄴㅉㅏ ㅇㅣㅓㅇㅎㅗㄴㅇㅣㄴㄱㅓㄴ ㅇㅏㄴㅣㅈㅣㅁㅏㄴ.
fox_data/hwa/a_587.wav|2|ㅇㅡ… ㅇㅡㅇ?!
fox_data/hwa/m_632.wav|1|ㅇㅏㄴㅣ. ㄷㅗㄹㅣㅓㅇ.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,60 @@
mixdata/nen001_001.wav|0|[JA]はい?呼びました?[JA]
mixdata/nen003_002.wav|0|[JA]保科君を待っていたんです。一緒に来て欲しいところがありまして[JA]
mixdata/nen006_015.wav|0|[JA]それで、具体的にはどうすればいいんですか?[JA]
mixdata/nen009_072.wav|0|[JA]はい、私は一人暮らしですから何の問題もありません[JA]
mixdata/nen012_013.wav|0|[JA]一分一秒を争っているわけじゃありませんから、帰る間際でも大丈夫でしょう[JA]
mixdata/nen016_049.wav|0|[JA]では、基本的にはこの流れで[JA]
mixdata/nen020_121.wav|0|[JA]私も自分なりに満足できて……嬉しく思っています[JA]
mixdata/nen102_009.wav|0|[JA]いえ、おそらく病気ではないと思うんですが……今朝から少し気持ちが悪くて……ずっと家で大人しく寝ていました[JA]
mixdata/nen103_081.wav|0|[JA]あ……わ、私も忘れてしまって……本当にすみません[JA]
mixdata/nen104_004.wav|0|[JA]……照れますね[JA]
mixdata/nen104_204.wav|0|[JA]んちゅ、んっ……んん……んちゅっ、ちゅ、ちゅっ……っ[JA]
mixdata/nen105_029.wav|0|[JA]保科君……[JA]
mixdata/nen106_021.wav|0|[JA]………[JA]
mixdata/nen107_059.wav|0|[JA]イ、イチャイチャ……ですか[JA]
mixdata/nen109_011.wav|0|[JA]そうですね……カラオケに、ボウリング、プリクラも……[JA]
mixdata/nen111_065.wav|0|[JA]い、今からテーブルの角で、オナニーをしますから……もう私に残された道はこれしかないんですっ[JA]
mixdata/nen112_136.wav|0|[JA]うん。甘くて、もちもちで、美味しいです[JA]
mixdata/nen112_338.wav|0|[JA]んっ、んふぅーっ……んっ、ふぅーっ、んぅぅぅ……ふぅーっ、ふぅーっ……[JA]
mixdata/nen113_312.wav|0|[JA]んぁぁ!はぁっ、はぁっ、はぁっ、はぁぁあ……しあわせ、私、幸せです……んっ、んんんっ[JA]
mixdata/nen114_021.wav|0|[JA]ハロウィンパーティーですか……なんだか懐かしいですね[JA]
mixdata/nen115_069.wav|0|[JA]んんぅぅぅぅぅーーーーー……ああぁあっ!ずるい、そのグリグリずるいですよぉ……あっ、あっ、こんなの、むりぃぃ……[JA]
mixdata/nen115_269.wav|0|[JA]それは……[JA]
mixdata/nen117_147.wav|0|[JA]んっ、んんっ、あむあむ……ぢゅぷ、ぢゅるるる……んぽくぽ、じゅるるるっ[JA]
mixdata/nen203_106.wav|0|[JA]……………………[JA]
mixdata/nen210_035.wav|0|[JA]……そうですか[JA]
mixdata/nen303_010.wav|0|[JA]そうだったんですか?なら、椎葉さんに気をつけてもらう必要はなかったかもしれませんね[JA]
mixdata/nen318_002.wav|0|[JA]頼みます、七緒[JA]
mixdata/nen411_003.wav|0|[JA]私たちはまだ卒業しませんから[JA]
mixdata/nan001_060.wav|1|[JA]……?な、なんですか?[JA]
mixdata/nan002_090.wav|1|[JA]それじゃあ撤収?[JA]
mixdata/nan009_023.wav|1|[JA]わたしの人見知りがそう簡単に直るわけないよ。あまり侮らないでくれる?[JA]
mixdata/nan012_085.wav|1|[JA]そっ、それは……ち、がうんです、わたしは別にっ[JA]
mixdata/nan108_097.wav|1|[JA]お父さんに報告は……どうするの?[JA]
mixdata/nan201_104.wav|1|[JA]はいとれた[JA]
mixdata/nan202_145.wav|1|[JA]お兄ちゃん……お兄ちゃん……お兄ちゃん……お兄ちゃん……お兄ちゃん……んっ、んぃぃぃっ!?[JA]
mixdata/nan203_151.wav|1|[JA]ぇっ……な、なに?どうしたの急に[JA]
mixdata/nan204_056.wav|1|[JA]後で部屋にお邪魔しても……いいかな?勉強……教えて欲しいんだけど[JA]
mixdata/nan205_025.wav|1|[JA]お兄ちゃん……わたしは本気で、お兄ちゃんのことが好き[JA]
mixdata/nan205_226.wav|1|[JA]ホント?ありがとう[JA]
mixdata/nan207_079.wav|1|[JA]あぁっ!?そんな、強引にしたら破けちゃうってば……っ![JA]
mixdata/nan208_045.wav|1|[JA]でも嬉しいよ……えへへ[JA]
mixdata/nan210_062.wav|1|[JA]ありがとう、えへへ[JA]
mixdata/nan210_263.wav|1|[JA]太いの擦れてるのが、よくわかるから……あっ、あっ、あぁぁ……あぁぁ…それっ、すご……あああぁぁ……![JA]
mixdata/nan211_102.wav|1|[JA]んっ、らって……先走り汁、お兄ちゃんの匂いと味がするんだもん、れろれろ[JA]
mixdata/nan212_087.wav|1|[JA]それは、だって……んんっ!?自分でも、わかんないけど……はぁ、はぁ……やり方を説明しながらの方が、恥ずかしくて……んんぁぁぁ……っ![JA]
mixdata/nan305_019.wav|1|[JA]しきべ先輩は、こっちに向かったみたいだね![JA]
mixdata/nan313_004.wav|1|[JA]認識してなかっただけで、今までも普通に参加してましたけどね[JA]
mixdata/nan416_014.wav|1|[JA]どういう意味?[JA]
mixdata/nan504_035.wav|1|[JA]ああ……そっか[JA]
mixdata/cvrong02044.wav|2|[ZH]那、那那那怎么办!?[ZH]
mixdata/cvrong03085.wav|2|[ZH]哥——哥哥![ZH]
mixdata/cvrong04156.wav|2|[ZH]但中途,又觉得很高兴。[ZH]
mixdata/cvrong06018.wav|2|[ZH]对女孩子来说区别很大的。[ZH]
mixdata/cvrong10003.wav|2|[ZH]那……再等两天呢?[ZH]
mixdata/cvrong15062.wav|2|[ZH]哥哥想试试看吗?[ZH]
mixdata/day00_tang_016.wav|3|[ZH]来笑一个,像这样——嘻嘻。[ZH]
mixdata/day03_tang_023.wav|3|[ZH]注意调动腰腹力量,记得感受下腰突然抽动的那股劲儿,明白吗?[ZH]
mixdata/day05_tang_054.wav|3|[ZH]男朋友的和我自己的,味道肯定有区别啦。[ZH]
mixdata/day07_tang_072.wav|3|[ZH]…………[ZH]
mixdata/day09_tang_032.wav|3|[ZH]……嗯。[ZH]

View File

@ -0,0 +1,60 @@
mixdata/nen001_001.wav|0|ha↓i? yo↑bima↓ʃIta?
mixdata/nen003_002.wav|0|ho↓ʃinakɯNo ma↓Qte i↑ta N↓desU. i↑Qʃoni ki↓tehoʃii to↑koroga a↑rima↓ʃIte.
mixdata/nen006_015.wav|0|so↑rede, gɯ↑tai↓tekiniwa do↓o sɯ↑re↓ba i↓i N↓desUka?
mixdata/nen009_072.wav|0|ha↓i, wa↑taʃiwa hI↑torigɯ↓raʃidesUkara na↓nino mo↑Ndaimoarimase↓N.
mixdata/nen012_013.wav|0|i↑Q↓pɯN i↑ʧi↓byooo a↑raso↓Qte i↑rɯ wa↑kejaarimase↓Nkara, ka↓erɯ ma↓giwademo da↑ijo↓obɯdeʃoo.
mixdata/nen016_049.wav|0|de↓wa, kI↑hoN↓tekiniwa ko↑no na↑gare↓de.
mixdata/nen020_121.wav|0|wa↑taʃimo ji↑bɯNna↓rini ma↓Nzokɯ de↑ki↓te…… ɯ↑reʃi↓kɯomoQte i↑ma↓sU.
mixdata/nen102_009.wav|0|i↓e, o↑so↓rakɯ byo↑okidewa na↓ito o↑mo↓ɯ N↓desUga…… ke↓sakara sU↑ko↓ʃI ki↑moʧiga wa↑rɯ↓kUte…… zɯ↑Qto i↑e↓de o↑tonaʃi↓kɯnete i↑ma↓ʃIta.
mixdata/nen103_081.wav|0|a…… w a, wa↑taʃimo wa↑sɯrete ʃi↑ma↓Qte…… ho↑Ntooni sɯ↑mimase↓N.
mixdata/nen104_004.wav|0|…… te↑rema↓sUne.
mixdata/nen104_204.wav|0|N ʧ ɯ, N↓Q…… N↓N…… N ʧɯ↓Q, ʧ ɯ, ʧɯ↓Q…… Q.
mixdata/nen105_029.wav|0|ho↓ʃinakɯN……
mixdata/nen106_021.wav|0|………
mixdata/nen107_059.wav|0|i, i↓ʧaiʧa…… de↓sUka.
mixdata/nen109_011.wav|0|so↓odesUne…… ka↑raokeni, bo↑ɯriNgɯ, pɯ↑rikɯramo……
mixdata/nen111_065.wav|0|i, i↓makara te↑ebɯrɯno ka↓kɯde, o↓naniio ʃi↑ma↓sUkara…… mo↓o wa↑taʃini no↑ko↓sareta mi↑ʧiwa ko↑reʃi↓ka na↓iNde sɯ↓Q.
mixdata/nen112_136.wav|0|ɯ↓N. a↑makUte, mo↓ʧi mo↑ʧide, o↑iʃi↓idesU.
mixdata/nen112_338.wav|0|N↓Q, N↓fɯɯɯQ…… N↓Q, fɯɯɯQ, N↓ɯɯɯ…… fɯɯɯQ, fɯɯɯQ……
mixdata/nen113_312.wav|0|N↓aa! ha↓aQ, ha↓aQ, ha↓aQ, ha↓aaa…… ʃi↑awase, wa↑taʃi, ʃi↑awasede↓sU…… N↓Q, N↓NNQ.
mixdata/nen114_021.wav|0|ha↑rowiNpa↓atiidesUka…… na↓Ndaka na↑ʦUkaʃi↓idesUne.
mixdata/nen115_069.wav|0|N↓Nɯɯɯɯɯɯɯɯɯɯ…… a↑aa↓aQ! zɯ↑rɯ↓i, so↑no gɯ↓rigɯri zɯ↑rɯ↓idesUyo o…… a↓Q, a↓Q, ko↑Nnano, mɯ↓riii……
mixdata/nen115_269.wav|0|so↑rewa……
mixdata/nen117_147.wav|0|N↓Q, N↓NQ, a↓mɯamɯ…… ji↓yɯpɯ, ji↓yɯ rɯ↑rɯrɯ…… N↓pokUpo, jɯ↑rɯ rɯ↓rɯ Q.
mixdata/nen203_106.wav|0|……………………
mixdata/nen210_035.wav|0|…… so↑odesU↓ka.
mixdata/nen303_010.wav|0|so↑odaQta N↓desUka? na↓ra, ʃi↑ibasaNni ki↑o ʦU↑ke↓te mo↑raɯ hI↑ʦɯyoowa na↓kaQtakamo ʃi↑remase↓Nne.
mixdata/nen318_002.wav|0|ta↑nomima↓sU, na↑naito↓gɯʧi.
mixdata/nen411_003.wav|0|wa↑taʃi↓taʧiwa ma↓da so↑ʦɯgyoo ʃi↑mase↓Nkara.
mixdata/nan001_060.wav|1|……? n a, na↓NdesUka?
mixdata/nan002_090.wav|1|so↑reja↓a te↑Qʃɯɯ?
mixdata/nan009_023.wav|1|wa↑taʃino hI↑tomiʃiriga so↑o ka↑NtaNni na↑orɯwake↓naiyo. a↑mari a↑nado↓ranaide kɯ↑rerɯ?
mixdata/nan012_085.wav|1|so↓Q, so↑rewa…… ʧ i, g a ɯ↓NdesU, wa↑taʃiwa be↑ʦɯni Q.
mixdata/nan108_097.wav|1|o↑to↓osaNni ho↑okokɯwa…… do↓o sɯ↑rɯ↓no?
mixdata/nan201_104.wav|1|ha↓itoreta.
mixdata/nan202_145.wav|1|o↑ni↓iʧaN…… o↑ni↓iʧaN…… o↑ni↓iʧaN…… o↑ni↓iʧaN…… o↑ni↓iʧaN…… N↓Q, N↓iiiQ!?
mixdata/nan203_151.wav|1|e↓Q…… n a, na↓ni? do↓o ʃI↑ta↓no kyɯɯni.
mixdata/nan204_056.wav|1|a↓tode he↑ya↓ni o↓jama ʃI↑temo…… i↓ikana? be↑Nkyoo…… o↑ʃietehoʃii N↓dakedo.
mixdata/nan205_025.wav|1|o↑ni↓iʧaN…… wa↑taʃiwa ho↑Nkide, o↑ni↓iʧaNno ko↑to↓ga sU↑ki.
mixdata/nan205_226.wav|1|ho↑Nto? a↑ri↓gatoo.
mixdata/nan207_079.wav|1|a↓aQ!? so↑Nna, go↑oiNni ʃI↑ta↓ra ya↑bɯkeʧaɯ↓Qteba…… Q!
mixdata/nan208_045.wav|1|de↓mo ɯ↑reʃi↓iyo…… e↑hehe.
mixdata/nan210_062.wav|1|a↑ri↓gatoo, e↑hehe.
mixdata/nan210_263.wav|1|fU↑to↓i n o sɯ↑rete↓rɯ no↑ga, yo↓kɯ wa↑ka↓rɯkara…… a↓Q, a↓Q, a↓aa…… a↓aa… so↑reQ, sɯ↑go…… a↑a↓aaa……!
mixdata/nan211_102.wav|1|N↓Q, ra↓Qte…… sa↑kibaʃiri↓jirɯ, o↑ni↓iʧaNno ni↑o↓ito a↑jiga sɯ↑rɯ N↓damoN, re↓rorero.
mixdata/nan212_087.wav|1|so↑rewa, da↓Qte…… N↓NQ!? ji↑bɯNdemo, wa↑kaN↓naikedo…… ha↓a, ha↓a…… ya↑rikatao se↑ʦɯmee ʃi↑na↓garano ho↓oga, ha↑zɯkaʃi↓kUte…… N↓Naaa…… Q!
mixdata/nan305_019.wav|1|ʃI↑ki↓be se↑Npaiwa, ko↑Qʧi↓ni mɯ↑kaQta mi↓taidane!
mixdata/nan313_004.wav|1|ni↑NʃIki ʃI↑tenakaQta↓dakede, i↑mama↓demo fU↑ʦɯɯni sa↑Nka ʃI↑tema↓ʃItakedone.
mixdata/nan416_014.wav|1|do↓oyɯɯ i↓mi?
mixdata/nan504_035.wav|1|a↓a…… so↓Qka.
mixdata/cvrong02044.wav|2|na↓, na↓ na↓ na↓ ʦ⁼əNN↓↑məp⁼aNN↓!?
mixdata/cvrong03085.wav|2|k⁼ə→-- k⁼ə→k⁼ə→!
mixdata/cvrong04156.wav|2|t⁼aNN↓ ʦ`⁼uNg→tʰu↑, you↓ ʧ⁼ɥe↑t⁼ə həNN↓↑ k⁼au→ʃiNg↓.
mixdata/cvrong06018.wav|2|t⁼wei↓ nɥ↓↑hai↑ʦ⁼ lai↑s`wo→ ʧʰɥ→p⁼ye↑ həNN↓↑t⁼a↓ t⁼ə.
mixdata/cvrong10003.wav|2|na↓…… ʦ⁼ai↓ t⁼əNg↓↑ lyaNg↓↑tʰyeNN→ nə?
mixdata/cvrong15062.wav|2|k⁼ə→k⁼ə→ ʃyaNg↓↑ s`ɹ`↓s`ɹ`↓kʰaNN↓ ma?
mixdata/day00_tang_016.wav|3|lai↑ ʃyau↓ i↑k⁼ə↓, ʃyaNg↓ ʦ`⁼ə↓yaNg↓-- ʃi→ʃi→.
mixdata/day03_tang_023.wav|3|ʦ`⁼u↓i↓ t⁼yau↓t⁼uNg↓ yau→fu↓ li↓lyaNg↓, ʧ⁼i↓t⁼ə k⁼aNN↓↑s`ou↓ ʃya↓yau→ tʰu→ɹ`aNN↑ ʦ`ʰou→t⁼uNg↓ t⁼ə na↓k⁼u↓↑ ʧ⁼iNN↓əɹ`, miNg↑p⁼ai↑ ma?
mixdata/day05_tang_054.wav|3|naNN↑pʰəNg↑you↓↑ t⁼ə hə↑ wo↓↑ ʦ⁼ɹ↓ʧ⁼i↓↑ t⁼ə, wei↓t⁼au↓ kʰəNN↓↑t⁼iNg↓ you↓↑ ʧʰɥ→p⁼ye↑ la.
mixdata/day07_tang_072.wav|3|…………
mixdata/day09_tang_032.wav|3|…… əNN↑.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,102 @@
sanskrit/sp005/sp005-000920.wav|4|तथा प्राणादीनामपि संहतत्वात् भवितुम् अर्हति
sanskrit/sp007/sp007-002357.wav|6|तेषाम् मूर्खताम् दृष्ट्वा हसन् व्याधः जालसहिततया सर्वान् शुकान् नीतवान्
sanskrit/sp003/sp003-001418.wav|2|पुरा किल पुरञ्जयो नाम साक्षाद्भगवतः विष्णोः अंशावतारः कश्चित् ऐक्ष्वाकः राजा देवैः सह
sanskrit/sp003/sp003-000245.wav|2|मणिरिव
sanskrit/sp001/sp001-003521.wav|0|न तु उपभोगफलामिति भावः
sanskrit/sp003/sp003-001276.wav|2|पद्धती इव
sanskrit/sp001/sp001-001708.wav|0|घोरम् भीमम् जन्यम् युद्धम् अभूत्
sanskrit/sp001/sp001-006005.wav|0|तेन आघूर्णिताभ्याम् उद्भ्रान्ताभ्याम् नेत्राभ्याम् शोभते इति
sanskrit/sp005/sp005-000966.wav|4|तथा आत्मनि सर्वो लोकः क्रियाकारकफलात्मकम् विज्ञानम्
sanskrit/sp003/sp003-005906.wav|2|अमोघम् धनुः यस्य सः अमोघधन्वा
sanskrit/sp001/sp001-000767.wav|0|प्रकामभुजः पर्याप्तभोजिनो न भवन्ति नूनम् सत्यम्
sanskrit/sp019/sp019-000263.wav|18|विद्यालयसमाप्तिवेलायाम् पुनः आगत्य ताम् गृहम् नयति स्म
sanskrit/sp003/sp003-002498.wav|2|सपर्यया अभ्यगात् प्रत्युद्गतवान्
sanskrit/sp021/sp021-000567.wav|20|भवतः कार्यात् अपि मम एतत् कार्यम् वरम्
sanskrit/sp014/sp014-000416.wav|13|चिन्तयतः तस्य महान् आनन्दः
sanskrit/sp011/sp011-000356.wav|10|साधु साधु जामाता नाम एतादृशः स्यात् इति स्वगतम् वदन् मनसि एव तम् अश्लाघत सः
sanskrit/sp001/sp001-007311.wav|0|यत् यस्मात् कारणात् जगताम् अध्यक्षेण अधिपेन त्वया वयम् मनोरथस्य अभिलाषस्य
sanskrit/sp003/sp003-005915.wav|2|तस्य क्षेमधन्वनोऽपि समरे अनीकिनीनाम् चमूनाम् अग्रयायी देवप्रतिमः इन्द्रादिकल्पः सुतः अभूत्
sanskrit/sp001/sp001-003879.wav|0|उदधेः कूले निविष्टम् तम् रामम्
sanskrit/sp001/sp001-010707.wav|0|दिवसश्च पृथिव्याम् इत्यत्र चकारात् यावादेशः
sanskrit/sp004/sp004-001504.wav|3|सङ्गेनेति
sanskrit/sp020/sp020-000398.wav|19|अत्र स्थितम् सरोवरम् दृष्ट्वा वयम् किञ्चित्कालम् स्थितवन्तः
sanskrit/sp019/sp019-000627.wav|18|अन्यः अपहरणकारः अगर्जत्
sanskrit/sp003/sp003-005685.wav|2|शक्येष्वेवाभवद्यात्रा इत्यादिना उक्तम् अर्थम् सोपस्कारमाह
sanskrit/sp002/sp002-001759.wav|1|स्वभावाकृतिसौन्दर्यवपुषि श्लोकशब्दयोरिति विश्वः
sanskrit/sp027/sp027-000218.wav|26|अहम् मूर्खाय मार्गम् न ददामि इति उक्तवान् प्रथमः
sanskrit/sp008/sp008-001203.wav|7|सक्तिः व्यसनम् व्यस्यत्येनम् श्रेयसः इति अविरुद्धा प्रतिपत्तिः न्याय्या
sanskrit/sp025/sp025-000738.wav|24|भूयांसो धन्यवादाः
sanskrit/sp002/sp002-001450.wav|1|यैः तीर्थजलैः नियमाभिषेकः नित्यस्नानम् निर्वर्त्यते निष्पाद्यते
sanskrit/sp001/sp001-009004.wav|0|तस्या इति
sanskrit/sp008/sp008-000842.wav|7|अनुभवकर्म विषयतामापन्नम् अन्यस्वरूपेण प्रतिलब्धात्मकम्
sanskrit/sp006/sp006-000792.wav|5|अश्रेयस्करम् च कर्म कुरु इति माम् प्रतिपादयति तत् किम् नु कारणम् इति
sanskrit/sp007/sp007-002169.wav|6|रानडेवर्यः किमपिवचनम् न अवदत्
sanskrit/sp009/sp009-001640.wav|8|तव सुतः अभूतपूर्वः ज्ञानी भविष्यति
sanskrit/sp007/sp007-001725.wav|6|शुश्रूषा क्रियताम् तावत् स हि धर्मस्सनातनः
sanskrit/sp008/sp008-000358.wav|7|तच्च तस्य ऐश्वर्यम् साम्यातिशय विनिर्मुक्तम्
sanskrit/sp001/sp001-003296.wav|0|शूराणाम् परकीयम् ऐश्वर्यम् एव असह्यम्
sanskrit/sp009/sp009-001253.wav|8|स्वोचितयोः पीठयोः कहोडयज्ञदत्तौ उपविष्टौ
sanskrit/sp001/sp001-008229.wav|0|मातृभिः ब्राह्मीप्रभृतिभिः सप्तमातृकाभिः
sanskrit/sp005/sp005-000614.wav|4|अत एव च गन्तॄणाम् सर्वगतिमताम् संसारिणाम् सा परा प्रकृष्टा गतिः
sanskrit/sp006/sp006-000184.wav|5|तद्द्रष्टुम् शीलम् येषाम् ते तत्त्वदर्शिनः तैः तत्त्वदर्शिभिः
sanskrit/sp007/sp007-000209.wav|6|तदागमनवेगतः धूलिः सर्वत्र व्याप्ता आसीत्
sanskrit/sp007/sp007-002856.wav|6|सः प्रतिदिनम् पश्यति स्म माता साहाय्यार्थम् प्रत्यहम् प्रतिवेशिनः गृहम् गच्छति इति
sanskrit/sp004/sp004-000145.wav|3|तथा सत्वरैश्च नाकिलोकैः इन्द्रादिलोकैः किरीटेषु बद्धाः अञ्जलयः
sanskrit/sp001/sp001-010073.wav|0|गतश्रियमिति
sanskrit/sp001/sp001-005998.wav|0|स्वजग्धशेषम् ददावित्यर्थः
sanskrit/sp025/sp025-000838.wav|24|यत्र अरण्यम् भवति पाद मार्गो भवति तस्मिन् काष्ठ भारवाहिन्यः महिलाः प्रचलन्त्यः दृश्यन्ते
sanskrit/sp002/sp002-002215.wav|1|सम्भ्रमादिति भावः
sanskrit/sp001/sp001-002763.wav|0|नाशितः पक्षः सहायो बलम् च येन सः तथोक्तः
sanskrit/sp003/sp003-003770.wav|2|किन्तु मे मम लोकापवादः बलवान् मतः
sanskrit/sp023/sp023-000487.wav|22|कथम् यत्त्वम् ज्ञानम् मन्यसे स सत्त्वधर्मः
sanskrit/sp006/sp006-001293.wav|5|स दृष्टादृष्टेष्टविषयाशीर्विवर्जिततया दृष्टादृष्टार्थे कर्मणि प्रयोजनम् अपश्यन् ससाधनम् कर्म सन्न्यस्य
sanskrit/sp006/sp006-000626.wav|5|आत्मवश्यैः आत्मनो वश्यानि वशीभूतानि तैः आत्मवश्यैः
sanskrit/sp001/sp001-000058.wav|0|कवीनाम् यशः काव्यनिर्माणेन जातम् तत्प्रार्थनाशीलोऽहम् प्रांशुना उन्नतपुरुषेण लभ्ये प्राप्ये फले फलविषये लोभात् उद्बाहुः
sanskrit/sp008/sp008-000207.wav|7|अक्लिष्टच्छिद्रेषु क्लिष्टा इति
sanskrit/sp003/sp003-000195.wav|2|दिशः प्रसेदुः प्रसन्ना बभूवुः
sanskrit/sp008/sp008-000884.wav|7|षडविशेषाः
sanskrit/sp002/sp002-000187.wav|1|यस्य मच्चरणस्य परिकर्मणि प्रसाधने
sanskrit/sp001/sp001-007004.wav|0|उवाचेत्यर्थः
sanskrit/sp008/sp008-001021.wav|7|न च एषाम् विप्रलीनानाम् पुनरस्ति उत्पादः प्रयोजनाभावात् इति
sanskrit/sp016/sp016-000333.wav|15|यतो ब्रह्मविद्याम् ज्ञात्वा संसारान्मुच्येयम् इत्याशंससे
sanskrit/sp003/sp003-005597.wav|2|अत्र षष्ठ्यर्थलक्षणया रात्रिन्दिवमिति
sanskrit/sp020/sp020-000542.wav|19|पुत्र वने लोध्रवृक्षः अस्ति
sanskrit/sp004/sp004-001209.wav|3|अगाधः अतलस्पर्शः
sanskrit/sp003/sp003-000544.wav|2|इतीति
sanskrit/sp003/sp003-005547.wav|2|तस्मिन् अतिथौ तु नभस्ये भाद्रपदे मासे ताः इव नद्यः इव भूयसीम् वृद्धिम् अभ्युदयम् आययुः
sanskrit/sp001/sp001-004319.wav|0|तदुक्तम् आरोप्यमाणस्य प्रकृतोपयोगित्वे परिणामः इति
sanskrit/sp001/sp001-011471.wav|0|तथा अविरतैः निरन्तरभवद्भिः
sanskrit/sp001/sp001-011575.wav|0|नभस्तः आकाशसकाशात्
sanskrit/sp009/sp009-000595.wav|8|सः ज्ञानिना जेतव्यः इति च विशेषेण बोधितवान्
sanskrit/sp001/sp001-004150.wav|0|अस्मिन्सर्गे प्रायेण वृत्तम् उपजातिः
sanskrit/sp001/sp001-001818.wav|0|मेदिनी भूमिः महिषी च ध्वन्यते
sanskrit/sp001/sp001-003000.wav|0|शिञ्जितेति कर्तरि क्तः
sanskrit/sp001/sp001-007683.wav|0|इमम् देशम् उद्दिश्य इदम् प्रयाणमित्यर्थः
sanskrit/sp001/sp001-010139.wav|0|निसर्गः शीलसर्गयोः इति विश्वः
sanskrit/sp021/sp021-000155.wav|20|अरुणकुमारः
sanskrit/sp001/sp001-002465.wav|0|यतो हेतोः मुनिः असमाप्तविधिः असमाप्तक्रतुः ततः तव
sanskrit/sp004/sp004-000646.wav|3|त्रयाणाम् अभेदस्यैव इष्टार्थकरत्वात्
sanskrit/sp017/sp017-000132.wav|16|उतो त्वस्मै तन्वम् विसस्रे तनुम् विवृणुते
sanskrit/sp006/sp006-000963.wav|5|अन्यस्य वा लोकसङ्ग्रहम् मुक्त्वा ततः तस्य आत्मविदः इदम् उपदिश्यते
sanskrit/sp003/sp003-003583.wav|2|उत्तोरणाम् अन्वयराजधानीम् अयोध्याम् विवेश प्रविष्टवान्
sanskrit/sp007/sp007-001241.wav|6|शृणु मद्वाक्यम्
sanskrit/sp018/sp018-000183.wav|17|भूयसा तु निर्देशो यत्र काले तम् कालम् इति आम्रवणवत्
sanskrit/sp011/sp011-000268.wav|10|बाजिप्रभुः प्रस्थितवान्
sanskrit/sp003/sp003-004526.wav|2|अथेति
sanskrit/sp008/sp008-001055.wav|7|एवम् नवकारणानि
sanskrit/sp002/sp002-002434.wav|1|पादस्यपद् इत्यादिना पदादेशः
sanskrit/sp006/sp006-001863.wav|5|वेत्ति तत् ईदृशम् सुखम् अनुभवति यत्र यस्मिन् काले
sanskrit/sp003/sp003-005736.wav|2|अयस्कान्तः मणिविशेषः आयसम् लोहविकारमिव चकर्ष
sanskrit/sp001/sp001-010886.wav|0|तदनुभवहेतुकः उत्साहः इति यावत्
sanskrit/sp001/sp001-010149.wav|0|प्रदक्षिणीकृत्य कृताञ्जलिः सन्
sanskrit/sp005/sp005-000108.wav|4|प्रीतिसमन्वितः तव पिता
sanskrit/sp001/sp001-002485.wav|0|चरत इति
sanskrit/sp001/sp001-002316.wav|0|त्वामनु त्वया सह
sanskrit/sp003/sp003-006531.wav|2|सौधस्य जालानि गवाक्षाः तानि एव विवराणि तैः व्यलोकयत्
sanskrit/sp008/sp008-000703.wav|7|सर्वस्य प्राणिनः इयम् आत्माशीः नित्या भवति मा न भूवम् भूयासमिति
sanskrit/sp002/sp002-002204.wav|1|तावत् आलोकमार्गप्राप्तिपर्यन्तम् बन्धुम् बन्धनार्थम् न सम्भावितो न चिन्तित एव
sanskrit/sp020/sp020-000362.wav|19|गृहजनाः अपि जागरिताः अभवन्
sanskrit/sp006/sp006-001514.wav|5|ज्ञानासिना शोकमोहादिदोषहरम् सम्यग्दर्शनम् ज्ञानम्
sanskrit/sp001/sp001-000512.wav|0|तोरणम् बहिर्द्वारम्
sanskrit/sp001/sp001-004153.wav|0|उपेन्द्रवज्राजतजास्ततो गौ
sanskrit/sp025/sp025-000500.wav|24|जनपद स्तरे त्रयः राज्यस्तरे च त्रयः एतेभ्यः पुरस्काराः प्रदास्यन्ते

View File

@ -0,0 +1,102 @@
sanskrit/sp005/sp005-000920.wav|4|तथा प्राणादीनामपि संहतत्वात् भवितुम् अर्हति ।
sanskrit/sp007/sp007-002357.wav|6|तेषाम् मूर्खताम् दृष्ट्वा हसन् व्याधः जालसहिततया सर्वान् शुकान् नीतवान् ।
sanskrit/sp003/sp003-001418.wav|2|पुरा किल पुरञ्जयो नाम साक्षाद्भगवतः विष्णोः अंशावतारः कश्चित् ऐक्ष्वाकः राजा देवैः सह ।
sanskrit/sp003/sp003-000245.wav|2|मणिरिव ।
sanskrit/sp001/sp001-003521.wav|0|न तु उपभोगफलामिति भावः ।
sanskrit/sp003/sp003-001276.wav|2|पद्धती इव ।
sanskrit/sp001/sp001-001708.wav|0|घोरम् भीमम् जन्यम् युद्धम् अभूत् ।
sanskrit/sp001/sp001-006005.wav|0|तेन आघूर्णिताभ्याम् उद्भ्रान्ताभ्याम् नेत्राभ्याम् शोभते इति ।
sanskrit/sp005/sp005-000966.wav|4|तथा आत्मनि सर्वो लोकः क्रियाकारकफलात्मकम् विज्ञानम् ।
sanskrit/sp003/sp003-005906.wav|2|अमोघम् धनुः यस्य सः अमोघधन्वा ।
sanskrit/sp001/sp001-000767.wav|0|प्रकामभुजः पर्याप्तभोजिनो न भवन्ति नूनम् सत्यम् ।
sanskrit/sp019/sp019-000263.wav|18|विद्यालयसमाप्तिवेलायाम् पुनः आगत्य ताम् गृहम् नयति स्म ।
sanskrit/sp003/sp003-002498.wav|2|सपर्यया अभ्यगात् प्रत्युद्गतवान् ।
sanskrit/sp021/sp021-000567.wav|20|भवतः कार्यात् अपि मम एतत् कार्यम् वरम् ।
sanskrit/sp014/sp014-000416.wav|13|चिन्तयतः तस्य महान् आनन्दः ।
sanskrit/sp011/sp011-000356.wav|10|साधु साधु जामाता नाम एतादृशः स्यात् इति स्वगतम् वदन् मनसि एव तम् अश्लाघत सः ।
sanskrit/sp001/sp001-007311.wav|0|यत् यस्मात् कारणात् जगताम् अध्यक्षेण अधिपेन त्वया वयम् मनोरथस्य अभिलाषस्य ।
sanskrit/sp003/sp003-005915.wav|2|तस्य क्षेमधन्वनोऽपि समरे अनीकिनीनाम् चमूनाम् अग्रयायी देवप्रतिमः इन्द्रादिकल्पः सुतः अभूत् ।
sanskrit/sp001/sp001-003879.wav|0|उदधेः कूले निविष्टम् तम् रामम् ।
sanskrit/sp001/sp001-010707.wav|0|दिवसश्च पृथिव्याम् इत्यत्र चकारात् यावादेशः ।
sanskrit/sp004/sp004-001504.wav|3|सङ्गेनेति ।
sanskrit/sp020/sp020-000398.wav|19|अत्र स्थितम् सरोवरम् दृष्ट्वा वयम् किञ्चित्कालम् स्थितवन्तः ।
sanskrit/sp019/sp019-000627.wav|18|अन्यः अपहरणकारः अगर्जत् ।
sanskrit/sp003/sp003-005685.wav|2|शक्येष्वेवाभवद्यात्रा इत्यादिना उक्तम् अर्थम् सोपस्कारमाह ।
sanskrit/sp002/sp002-001759.wav|1|स्वभावाकृतिसौन्दर्यवपुषि श्लोकशब्दयोरिति विश्वः ।
sanskrit/sp027/sp027-000218.wav|26|अहम् मूर्खाय मार्गम् न ददामि इति उक्तवान् प्रथमः ।
sanskrit/sp008/sp008-001203.wav|7|सक्तिः व्यसनम् व्यस्यत्येनम् श्रेयसः इति अविरुद्धा प्रतिपत्तिः न्याय्या ।
sanskrit/sp025/sp025-000738.wav|24|भूयांसो धन्यवादाः ।
sanskrit/sp002/sp002-001450.wav|1|यैः तीर्थजलैः नियमाभिषेकः नित्यस्नानम् निर्वर्त्यते निष्पाद्यते ।
sanskrit/sp001/sp001-009004.wav|0|तस्या इति ।
sanskrit/sp008/sp008-000842.wav|7|अनुभवकर्म विषयतामापन्नम् अन्यस्वरूपेण प्रतिलब्धात्मकम् ।
sanskrit/sp006/sp006-000792.wav|5|अश्रेयस्करम् च कर्म कुरु इति माम् प्रतिपादयति तत् किम् नु कारणम् इति ।
sanskrit/sp007/sp007-002169.wav|6|रानडेवर्यः किमपिवचनम् न अवदत् ।
sanskrit/sp009/sp009-001640.wav|8|तव सुतः अभूतपूर्वः ज्ञानी भविष्यति ।
sanskrit/sp007/sp007-001725.wav|6|शुश्रूषा क्रियताम् तावत् स हि धर्मस्सनातनः ।
sanskrit/sp008/sp008-000358.wav|7|तच्च तस्य ऐश्वर्यम् साम्यातिशय विनिर्मुक्तम् ।
sanskrit/sp001/sp001-003296.wav|0|शूराणाम् परकीयम् ऐश्वर्यम् एव असह्यम् ।
sanskrit/sp009/sp009-001253.wav|8|स्वोचितयोः पीठयोः कहोडयज्ञदत्तौ उपविष्टौ ।
sanskrit/sp001/sp001-008229.wav|0|मातृभिः ब्राह्मीप्रभृतिभिः सप्तमातृकाभिः ।
sanskrit/sp005/sp005-000614.wav|4|अत एव च गन्तॄणाम् सर्वगतिमताम् संसारिणाम् सा परा प्रकृष्टा गतिः ।
sanskrit/sp006/sp006-000184.wav|5|तद्द्रष्टुम् शीलम् येषाम् ते तत्त्वदर्शिनः तैः तत्त्वदर्शिभिः ।
sanskrit/sp007/sp007-000209.wav|6|तदागमनवेगतः धूलिः सर्वत्र व्याप्ता आसीत् ।
sanskrit/sp007/sp007-002856.wav|6|सः प्रतिदिनम् पश्यति स्म माता साहाय्यार्थम् प्रत्यहम् प्रतिवेशिनः गृहम् गच्छति इति ।
sanskrit/sp004/sp004-000145.wav|3|तथा सत्वरैश्च नाकिलोकैः इन्द्रादिलोकैः किरीटेषु बद्धाः अञ्जलयः ।
sanskrit/sp001/sp001-010073.wav|0|गतश्रियमिति ।
sanskrit/sp001/sp001-005998.wav|0|स्वजग्धशेषम् ददावित्यर्थः ।
sanskrit/sp025/sp025-000838.wav|24|यत्र अरण्यम् भवति पाद मार्गो भवति तस्मिन् काष्ठ भारवाहिन्यः महिलाः प्रचलन्त्यः दृश्यन्ते ।
sanskrit/sp002/sp002-002215.wav|1|सम्भ्रमादिति भावः ।
sanskrit/sp001/sp001-002763.wav|0|नाशितः पक्षः सहायो बलम् च येन सः तथोक्तः ।
sanskrit/sp003/sp003-003770.wav|2|किन्तु मे मम लोकापवादः बलवान् मतः ।
sanskrit/sp023/sp023-000487.wav|22|कथम् यत्त्वम् ज्ञानम् मन्यसे स सत्त्वधर्मः ।
sanskrit/sp006/sp006-001293.wav|5|स दृष्टादृष्टेष्टविषयाशीर्विवर्जिततया दृष्टादृष्टार्थे कर्मणि प्रयोजनम् अपश्यन् ससाधनम् कर्म सन्न्यस्य ।
sanskrit/sp006/sp006-000626.wav|5|आत्मवश्यैः आत्मनो वश्यानि वशीभूतानि तैः आत्मवश्यैः ।
sanskrit/sp001/sp001-000058.wav|0|कवीनाम् यशः काव्यनिर्माणेन जातम् तत्प्रार्थनाशीलोऽहम् प्रांशुना उन्नतपुरुषेण लभ्ये प्राप्ये फले फलविषये लोभात् उद्बाहुः ।
sanskrit/sp008/sp008-000207.wav|7|अक्लिष्टच्छिद्रेषु क्लिष्टा इति ।
sanskrit/sp003/sp003-000195.wav|2|दिशः प्रसेदुः प्रसन्ना बभूवुः ।
sanskrit/sp008/sp008-000884.wav|7|षडविशेषाः ।
sanskrit/sp002/sp002-000187.wav|1|यस्य मच्चरणस्य परिकर्मणि प्रसाधने ।
sanskrit/sp001/sp001-007004.wav|0|उवाचेत्यर्थः ।
sanskrit/sp008/sp008-001021.wav|7|न च एषाम् विप्रलीनानाम् पुनरस्ति उत्पादः प्रयोजनाभावात् इति ।
sanskrit/sp016/sp016-000333.wav|15|यतो ब्रह्मविद्याम् ज्ञात्वा संसारान्मुच्येयम् इत्याशंससे ।
sanskrit/sp003/sp003-005597.wav|2|अत्र षष्ठ्यर्थलक्षणया रात्रिन्दिवमिति ।
sanskrit/sp020/sp020-000542.wav|19|पुत्र वने लोध्रवृक्षः अस्ति ।
sanskrit/sp004/sp004-001209.wav|3|अगाधः अतलस्पर्शः ।
sanskrit/sp003/sp003-000544.wav|2|इतीति ।
sanskrit/sp003/sp003-005547.wav|2|तस्मिन् अतिथौ तु नभस्ये भाद्रपदे मासे ताः इव नद्यः इव भूयसीम् वृद्धिम् अभ्युदयम् आययुः ।
sanskrit/sp001/sp001-004319.wav|0|तदुक्तम् आरोप्यमाणस्य प्रकृतोपयोगित्वे परिणामः इति ।
sanskrit/sp001/sp001-011471.wav|0|तथा अविरतैः निरन्तरभवद्भिः ।
sanskrit/sp001/sp001-011575.wav|0|नभस्तः आकाशसकाशात् ।
sanskrit/sp009/sp009-000595.wav|8|सः ज्ञानिना जेतव्यः इति च विशेषेण बोधितवान् ।
sanskrit/sp001/sp001-004150.wav|0|अस्मिन्सर्गे प्रायेण वृत्तम् उपजातिः ।
sanskrit/sp001/sp001-001818.wav|0|मेदिनी भूमिः महिषी च ध्वन्यते ।
sanskrit/sp001/sp001-003000.wav|0|शिञ्जितेति कर्तरि क्तः ।
sanskrit/sp001/sp001-007683.wav|0|इमम् देशम् उद्दिश्य इदम् प्रयाणमित्यर्थः ।
sanskrit/sp001/sp001-010139.wav|0|निसर्गः शीलसर्गयोः इति विश्वः ।
sanskrit/sp021/sp021-000155.wav|20|अरुणकुमारः ।
sanskrit/sp001/sp001-002465.wav|0|यतो हेतोः मुनिः असमाप्तविधिः असमाप्तक्रतुः ततः तव ।
sanskrit/sp004/sp004-000646.wav|3|त्रयाणाम् अभेदस्यैव इष्टार्थकरत्वात् ।
sanskrit/sp017/sp017-000132.wav|16|उतो त्वस्मै तन्वम् विसस्रे तनुम् विवृणुते ।
sanskrit/sp006/sp006-000963.wav|5|अन्यस्य वा लोकसङ्ग्रहम् मुक्त्वा ततः तस्य आत्मविदः इदम् उपदिश्यते ।
sanskrit/sp003/sp003-003583.wav|2|उत्तोरणाम् अन्वयराजधानीम् अयोध्याम् विवेश प्रविष्टवान् ।
sanskrit/sp007/sp007-001241.wav|6|शृणु मद्वाक्यम् ।
sanskrit/sp018/sp018-000183.wav|17|भूयसा तु निर्देशो यत्र काले तम् कालम् इति आम्रवणवत् ।
sanskrit/sp011/sp011-000268.wav|10|बाजिप्रभुः प्रस्थितवान् ।
sanskrit/sp003/sp003-004526.wav|2|अथेति ।
sanskrit/sp008/sp008-001055.wav|7|एवम् नवकारणानि ।
sanskrit/sp002/sp002-002434.wav|1|पादस्यपद् इत्यादिना पदादेशः ।
sanskrit/sp006/sp006-001863.wav|5|वेत्ति तत् ईदृशम् सुखम् अनुभवति यत्र यस्मिन् काले ।
sanskrit/sp003/sp003-005736.wav|2|अयस्कान्तः मणिविशेषः आयसम् लोहविकारमिव चकर्ष ।
sanskrit/sp001/sp001-010886.wav|0|तदनुभवहेतुकः उत्साहः इति यावत् ।
sanskrit/sp001/sp001-010149.wav|0|प्रदक्षिणीकृत्य कृताञ्जलिः सन् ।
sanskrit/sp005/sp005-000108.wav|4|प्रीतिसमन्वितः तव पिता ।
sanskrit/sp001/sp001-002485.wav|0|चरत इति ।
sanskrit/sp001/sp001-002316.wav|0|त्वामनु त्वया सह ।
sanskrit/sp003/sp003-006531.wav|2|सौधस्य जालानि गवाक्षाः तानि एव विवराणि तैः व्यलोकयत् ।
sanskrit/sp008/sp008-000703.wav|7|सर्वस्य प्राणिनः इयम् आत्माशीः नित्या भवति मा न भूवम् भूयासमिति ।
sanskrit/sp002/sp002-002204.wav|1|तावत् आलोकमार्गप्राप्तिपर्यन्तम् बन्धुम् बन्धनार्थम् न सम्भावितो न चिन्तित एव ।
sanskrit/sp020/sp020-000362.wav|19|गृहजनाः अपि जागरिताः अभवन् ।
sanskrit/sp006/sp006-001514.wav|5|ज्ञानासिना शोकमोहादिदोषहरम् सम्यग्दर्शनम् ज्ञानम् ।
sanskrit/sp001/sp001-000512.wav|0|तोरणम् बहिर्द्वारम् ।
sanskrit/sp001/sp001-004153.wav|0|उपेन्द्रवज्राजतजास्ततो गौ ।
sanskrit/sp025/sp025-000500.wav|24|जनपद स्तरे त्रयः राज्यस्तरे च त्रयः एतेभ्यः पुरस्काराः प्रदास्यन्ते ।

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
zaonhe/ssvAzVd.wav|0|ȵiɪʔ8 təʔ7iɔ1 ɕi5.
zaonhe/sKxBzXK.wav|0|pʰᴀʔ7tsɔ5, tsəʔ7 vəʔ8 ku5 zɿ6 ŋu1 iɪʔ7 ɦɑ̃6 ȵiɪʔ8 ɦy6 ᴇ5hɔ5.
zaonhe/sbwFzrE.wav|0|zɿ6 ku5 zən6kuɑ̃1 ɦiɤ6 tsʰɔ1pʰiɔ5 kɤ5, ŋu1 iɔ1 tɔ5 ɤ1 tsɤ1 tɕʰi5 ly6 ɦiɤ6.
zaonhe/ciau5.wav|1|tɕiɔ5.
zaonhe/ue5.wav|1|uᴇ5.
zaonhe/phi1.wav|1|pʰi1.
zaonhe/sUzqztf.wav|0|iɪʔ7ɦəʔ8 zɑ̃6pø5tʰi1 zɿ6 in1tʰi1.
zaonhe/s6yrzcA.wav|0|zɑ̃6 hᴇ5 gəʔ8 kʰuᴇ5 di6fɑ̃1, hᴇ5nᴇ6 ŋᴀ6 ɦiã6 ɦiã6 ɕiɔ5tsʰᴇ5 zᴇ6ɦiɤ6.
zaonhe/ssxoz7p.wav|0|tən1 kʰᴇ1 vəʔ8 liã6 liɔ6, noŋ6 pɑ̃1 ŋu1 lᴇ6kʰø1 kʰø1hɔ5 vᴀ6?
zaonhe/s0x6zIy.wav|0|ŋu1 iɔ1 tsɤ5liɔ6, ŋu1 zɿ6 lᴇ6 pɑ̃1 noŋ6 tsᴇ1ɦuᴇ6 ɦəʔ8.
zaonhe/sdypzlx.wav|0|kᴀ5 tɕy5 ɦəʔ8, tɕy5 təʔ7iɪʔ7 tʰᴀʔ7 ɦu6du6! tɕy5 liɔ6 oʔ7li6ɕiã5 ɦᴀ6 vəʔ8 zən6təʔ7 liɔ6!
zaonhe/s1y3zw0.wav|0|ȵiã6 ᴀ1lᴀ1 ɕi1 kʰø1kʰø1 tsᴇ1kɑ̃5.
zaonhe/sYxszc9.wav|0|ɦi6lᴀ1 liã6kᴀ1dɤ6 ɦəʔ8 ᴇ5 ʑin6, i5 tɕin1 sən1 liɔ6 iɪʔ7ɦəʔ8 dᴇ6kᴀ1.
zaonhe/sYuUzbW.wav|0|tɕi1ti5 tsoŋ1 tɔ5 zɑ̃6 hᴇ5?
zaonhe/s6yBzOi.wav|0|iɔ1məʔ8 tɕi5 “dəʔ8 kʰuᴀ5 ɦiɤ6 dʑi6 ”min6tsɔ1 ʑiɤ6 kʰu5i5 tɔ5.
zaonhe/iu1.wav|1|y1.
zaonhe/slyDzTc.wav|0|ɦi6 zᴇ6 zəʔ8bi6 kɔ1 zən6min6bi6 ɦəʔ8 dᴇ6ɦuø6sᴇ1 zɿ6 tu1sɔ5?
zaonhe/scu8zMj.wav|0|i1 ɕin1li6 lɔ6 nᴇ6ku5 ɦəʔ8.
zaonhe/s8xkzcW.wav|0|ŋu1 tɔ5 koʔ7zᴀʔ8 mᴇ1hɔ5 ɦəʔ8.
zaonhe/slzBzPx.wav|0|zən6kuɑ̃1 ɦᴇ6tsɔ5 lᴀʔ8 hᴇ5 lᴇ, dᴀ6kᴀ1 zu6ɦo6 lᴇ6 gᴀ6 gᴀ6sᴇ1ɦu6 vᴀ6.
zaonhe/s8t5zt6.wav|0|tɕʰi5ȵiɪʔ8kuø5li6.
zaonhe/san1.wav|1|sã1.
zaonhe/fi1.wav|1|fi1.
zaonhe/sBvDz4d.wav|0|lɔ6 vəʔ8 hɔ5i5sɿ1 ɦəʔ8, məʔ8 dʑiɪʔ8 zɿ6 ɦuᴇ6ɕin5 pəʔ7 noŋ6.
zaonhe/styKzDX.wav|0|ɕi1sã1, tɕʰin5 noŋ6 nᴇ1 ɦu6tsɔ5 ɦoʔ8tsᴇ5 sən1vən6 tsən5 ȵiã6 ŋu1 tən1 tɕi5 iɪʔ7ɕiᴀ5.
zaonhe/s5ygzGR.wav|0|zø6bi6 nᴀ6nən6 tsɤ5, nᴀ6nən6 kʰuᴀ5 ʑiɤ6 nᴀ6nən6 kʰᴇ1!
zaonhe/svvTzwY.wav|0|tɕi5tɕi5.
zaonhe/s9vfzrR.wav|0|tən1loŋ6.
zaonhe/sqvRzMP.wav|0|ɦɤ6tʰi1 li6pᴀ5iɪʔ7.
zaonhe/s6xEz0o.wav|0|kəʔ7 noŋ6 ɦo6pø5tʰi1 tɔ5 ku5 tɕi1ɦəʔ8 di6fɑ̃1?
zaonhe/then1.wav|1|tʰən1.
zaonhe/sfuYzgI.wav|0|tɕin1 zɔ6 vəʔ8ɕiɔ5təʔ7 fɑ̃5 sᴀ5 di6 in5?
zaonhe/slvszVc.wav|0|vəʔ8 ku5ɦo6 tʰi1 iɪʔ7 ku5, ʑiɤ6 tɔ5 zɑ̃6 hᴇ5 tsø5 hɔ5ɦəʔ8 tɕi5 tɕiɪʔ7.
zaonhe/sLxzzJP.wav|0|ɦiɤ6 ti5 oʔ7ɕin1, dᴇ6zɿ6 tʰu5 vəʔ8 tsʰəʔ7 lᴇ6.
zaonhe/sXykzt7.wav|0|gəʔ8tᴀʔ7 tʰɑ̃5fᴀʔ7 ɦiɤ6 ɦiɤ6li6 tsɿ5 tʰɑ̃5, vɤ6 li6 tsɿ5 tʰɑ̃5, zəʔ8 pᴇ5 tʰɑ̃5, kɑ̃1 sɿ1 tʰɑ̃5.
zaonhe/phaon5.wav|1|pʰɑ̃5.
zaonhe/din6.wav|1|din6.
zaonhe/s1t6zsG.wav|0|noŋ6iɔ1 dʑiɤ6 ɦəʔ8 koŋ1 tsɿ1 tu1sɔ5?
zaonhe/sPw9zOu.wav|0|tsoŋ1kᴀ5.
zaonhe/smuEzBp.wav|0|ŋu1 ɕiã5 pɑ̃1 i1 iɪʔ7 dɔ6 tɕʰi5sɿ1 tɕi5 koŋ1 ɦiø6.
zaonhe/zhion6.wav|1|ʑioŋ6.
zaonhe/szxqzmz.wav|0|pɑ̃1 ŋu1 kʰᴇ1tsã1 fᴀʔ7pʰiɔ5.
zaonhe/sFw6zTY.wav|0|noŋ6 min6.
zaonhe/s9xHzO4.wav|0|ɕi1sã1, tɕʰin5 noŋ6 zɿ6 ŋu1 tɕiᴀ5zɔ6 iɪʔ7ɕiᴀ5, noŋ6ɦiɤ6 sᴀ5ku5 tsø1tsã5?
zaonhe/sfyQzGP.wav|0|iɔ1vəʔ8iɔ1 zɑ̃6 kɔ1 kᴀ5 tsɤ5?
zaonhe/sbuvzS4.wav|0|fɤ5tsəʔ7 min6ȵi6 kɔ1 kʰɔ5 ɦiɤ6iɔ1 loʔ8 kʰoŋ1 liɔ6.
zaonhe/khu1.wav|1|kʰu1.
zaonhe/khau5.wav|1|kʰɔ5.
zaonhe/ni1.wav|1|ni1.
zaonhe/sRugzfm.wav|0|ŋu1ɦuᴇ6 zɿ6 lᴀʔ8 lᴀʔ8 oʔ7li6 kʰø1kʰø1 di6 zɿ6, zɑ̃6 zɑ̃6 mɑ̃6 sø5 liɔ6.
zaonhe/sxxnzz8.wav|0|noŋ6iɔ1 soŋ5 tɔ5 ᴀ1li6 tᴀʔ7?
zaonhe/theu5.wav|1|tʰɤ5.
zaonhe/sqwYzUM.wav|0|həʔ7mɔ1.
zaonhe/en1.wav|1|ən1.
zaonhe/toe1.wav|1|tø1.
zaonhe/sixzzd5.wav|0|ŋu1 vəʔ8lᴇ6 məʔ8 sᴀ5ȵin6 lᴇ6ᴀ1?
zaonhe/ngoq8.wav|1|ŋoʔ8.
zaonhe/sSxOzj6.wav|0|ŋᴀ6sã1 tɕin1ȵi6 kʰɔ5 dᴀ6ɦoʔ8 tɕʰyɪʔ7 liɔ6 tɕi1fən1 məʔ8 kʰɔ5tɕin5 zoŋ6ti5du6ɦiᴀʔ8.
zaonhe/pho5.wav|1|pʰo5.
zaonhe/phaq7.wav|1|pʰᴀʔ7.
zaonhe/sFuFzOG.wav|0|tsən1 gɑ̃6.
zaonhe/tiq7.wav|1|tiɪʔ7.
zaonhe/suw8zCq.wav|0|ɔ5 zɔ6 ɦin6.
zaonhe/heu1.wav|1|hɤ1.
zaonhe/s6zhz1Z.wav|0|ɦuᴇ6sᴀ5 vəʔ8 tʰin1 ŋu1ɦəʔ8 ɦi6ɦo6?
zaonhe/sFzMzZ1.wav|0|ɕiɔ5tɕi5, ŋu1 ɕiã5 mᴀ6 iɪʔ7 dʑi6 ɕi1tsɑ̃1, ɦiã6tsɿ5 hɔ5iɪʔ7ŋᴇ6 ɦəʔ8.
zaonhe/sZz7z37.wav|0|noŋ6 ᴀ1 zɿ6 vəʔ8 tsən5 bᴇ6 tɕʰi5?
zaonhe/saon5.wav|1|sɑ̃5.
zaonhe/suwuztc.wav|0|“lɔ6 zɑ̃6 hᴇ5 ”pʰᴀ5 tᴇ5 li6, tɕi1ɦəʔ8 lɔ6 kʰəʔ7lᴀ1 vəʔ8 dᴇ6 tsʰɑ̃5 tɕʰi5 bin6 dᴇ6 sən1tɕʰioʔ7.
zaonhe/suzSzXz.wav|0|ŋu1ɦəʔ8 ȵi6tsɿ5 tsã5dᴀ6 liɔ6 ɕi1 mɑ̃6 tsu5 i1sã1.
zaonhe/sPyDzsJ.wav|0|noŋ6iɔ1 kø1 dᴀ6 ɦᴇ6zɿ6 sᴀʔ7 dᴀ6 ᴀ1?
zaonhe/sMyczkV.wav|0|hɔ5 hɔ5 hɔ5. noŋ6 vən6liã6 iɔ1 tsʰən1 tsʰən1 tsoʔ7.
zaonhe/syzAzgU.wav|0|gəʔ8ŋᴇ6 zɿ6 ᴀ1lᴀ1ɦəʔ8 kuɑ̃1bø6.
zaonhe/sVyJzAC.wav|0|ɦiɤ6ɦəʔ8, ɕi1sã1 noŋ6 tɕi1ɦuᴇ6?
zaonhe/ssvAzhm.wav|0|ȵyø6 lᴇ6 noŋ6ɦiɤ6 kᴀ5 hɔ5ɦəʔ8 koŋ1 dʑy6 pɑ̃1 mɑ̃6. mɑ̃6 tsɿ5 zɿ6 tu1sɔ5?
zaonhe/chiuq7.wav|1|tɕʰyɪʔ7.
zaonhe/sOwbzmy.wav|0|iɪʔ7tɕʰyø1 tɤ1 ɦuᴇ6lᴇ6, ɦᴇ6hɔ5 kʰø1kʰø1 ŋᴀ6 tʰᴇ1 dʑin6 dᴇ6 tɕi5tsoʔ7 ɦiᴀ6tɕin5.
zaonhe/waq8.wav|1|ɦuᴀʔ8.
zaonhe/sNyvzYo.wav|0|uᴇ1, tɕʰin5 vən6 ɦi6 zᴇ6 tʰəʔ7 ŋu1 mo6zɑ̃6 soŋ5 iɪʔ7 dəʔ8kɔ5tsɿ5 hɔ5vᴀ6?
zaonhe/sftkzhl.wav|0|noŋ6 vəʔ8iɔ1 tɕiɪʔ7, vəʔ8iɔ1 kʰoʔ7.
zaonhe/sdvRz8O.wav|0|tʰəʔ7 vᴇ6liɔ6.
zaonhe/sByhzVy.wav|0|tɕʰin5 mən6noŋ6 ᴀ1 zɿ6 loʔ8 min5 sɿ1ti5 ɦəʔ8 lɔ6 pᴇ5?
zaonhe/s8zIzoy.wav|0|ʑiᴀ6ʑiᴀ6noŋ6 tsoŋ1 sᴇ1 poʔ7lu6 ɦŋ̩6zəʔ8 liã6 ɦɔ6 lᴀʔ8 lᴀʔ8 ᴀ1li6 tᴀʔ7?
zaonhe/sxtgzaH.wav|0|tʰi1tɕʰi5 ɦy6 pɔ5 iɪʔ7 pᴀʔ7 ȵiᴇ6 iɪʔ7.
zaonhe/sdzJzbT.wav|0|ɦᴇ6iɔ1 iɪʔ7ŋᴇ6ŋᴇ6.
zaonhe/sXv9zzv.wav|0|soʔ7soʔ7.
zaonhe/svypz4d.wav|0|ŋu1 iɔ1iɪʔ7 səʔ7 iɪʔ7 tʰin1 ɦəʔ8 fɑ̃1tsɿ5, mi6 tɕiɪʔ7 lᴀʔ8 sɿ5zəʔ8 ɦəʔ8 bin6 fɑ̃1 tsu5 ɦiɤ6.
zaonhe/stvizBR.wav|0|uᴇ1, noŋ6 hɔ5, ŋu1 ɕiã5 ʑin6 liɤ6 ɕi1sã1.
zaonhe/s7vJzSo.wav|0|noŋ6 tsø5 dʑin6 nᴀ6nən6?
zaonhe/sTvnzn8.wav|0|pᴇ5tən5.
zaonhe/tsa5.wav|1|tsᴀ5.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,118 @@
zero_data/maigo/19272.wav|5|そ、それは結構です。
zero_data/muma/10614.wav|0|ほ、ほんと!あんたのご主人様やるのも大変なんだからね!
zero_data/muma/22126.wav|17|‥‥‥‥。
zero_data/muma/21730.wav|17|だから、寂しいって思うのはごくたまにだよ。
zero_data/maigo/09248.wav|0|そういうこと。文句も禁止よ。
zero_data/maigo/18699.wav|5|サイトさーん。女王陛下ー。
zero_data/muma/01386.wav|10|さあ、サイト。他の場所も案内してくれ。まだいろいろあるんだろう?
zero_data/koakuma/09135.wav|17|え、あの……その……。
zero_data/muma/23248.wav|17|当然だろ?友達が困ってんのを見過ごせるかよ。
zero_data/maigo/16764.wav|17|はぁ?そんな簡単なわけないだろ?
zero_data/maigo/19571.wav|4|試す?
zero_data/muma/18602.wav|17|おい、ルイズ。
zero_data/koakuma/12890.wav|17|でも、キュルケ。どうしてそんなに今回の事件に真剣なんだ?
zero_data/maigo/02307.wav|23|でも、シエスタはリンゴを求めた……。これはすがろうと思ったからじゃないのかい?
zero_data/maigo/15359.wav|17|う〜ん……。
zero_data/maigo/18898.wav|5|こう見えて……、ううん、こう見えなくても、けっこうけだものなんですよ!
zero_data/maigo/14401.wav|17|ルイズ、どうしたんだ?何でしゃべらないんだ?
zero_data/maigo/17308.wav|17|いえいえいえ…。そんなことは……。
zero_data/koakuma/11214.wav|17|言われてみれば狭かったかもなぁ。
zero_data/koakuma/00419.wav|3|ミス・ウェザリーにも与えられていたなら、彼女はこのようなことをしなかったのでしょうか。
zero_data/muma/06156.wav|21|簡単に言えばそういうこった。
zero_data/muma/06012.wav|21|おい!無視すんなよ、相棒。
zero_data/koakuma/01445.wav|18|ぼく達はただ、ウェザリーに買い物を頼まれただけだ!まったく!
zero_data/maigo/04263.wav|2|あー、お着替え?なら、イルククゥがお手伝いするのね!
zero_data/maigo/11337.wav|17|こう……。色気で男に貢がせてー、みたいな。
zero_data/maigo/12886.wav|17|は、はは……。血、吸われすぎたかも……。とりあえず、次、行こうぜ……。
zero_data/muma/22888.wav|17|じゃあ、俺が最近見てた夢は、リシュに見せられてたってことか?
zero_data/koakuma/02543.wav|6|ふふっ。そういえば、お芝居楽しかったね。
zero_data/koakuma/01067.wav|19|いや、すまない……。つい熱くなってしまったな。
zero_data/maigo/08463.wav|0|ドラゴンは頭のいい生き物よ。ちゃんとわかってくれたわ。
zero_data/koakuma/15433.wav|4|……そう。
zero_data/koakuma/05982.wav|0|……。
zero_data/maigo/07120.wav|0|きゃっ!なによいきなり。頑張るに決まってるじゃない。
zero_data/muma/06216.wav|21|ブシの娘っ子の一族はそのサキュバスの封印を代々行っていた一族なんだとよ。
zero_data/koakuma/06034.wav|0|わ、わたしへのお礼はどうなってるのよ!
zero_data/muma/12977.wav|0|キュルケッ!あ、あ、あんた、どういうつもりなの!?
zero_data/maigo/09021.wav|0|と、とにかく、早くしてよね。
zero_data/muma/22882.wav|17|よ、よくわからんがわかった!
zero_data/maigo/02646.wav|21|なんでぇ、決闘でもするのか?
zero_data/muma/14449.wav|0|‥‥‥‥。
zero_data/koakuma/04365.wav|20|では、わしが探しに行ってくるぞハルナ。これからは気を付けてくだされよ。
zero_data/koakuma/03217.wav|15|ルイズが何も言わないのは、黙認するって意味よ、きっと。
zero_data/maigo/06761.wav|0|絶対、リーダーになるんだから。
zero_data/maigo/04769.wav|13|わたしの調合機材……流れ着いてないかしらねぇ。
zero_data/maigo/12317.wav|17|そんな娘に対して、何も思わないはずないだろ?
zero_data/maigo/08726.wav|0|いつもの服ですか?わたしはいつもこの服装ですが。それとも、本日は別の服を御所望ですか?
zero_data/muma/21420.wav|17|おーおー。今日も叫んでるなぁ。
zero_data/maigo/16254.wav|17|たとえるなら、丘と平原……。テファは最強の山脈。
zero_data/maigo/20883.wav|1|それ、本当に大丈夫なんですか?
zero_data/maigo/12493.wav|17|きみも五人衆の一人か?
zero_data/koakuma/04174.wav|13|もう、しょうがないんだから。わたしももちろん手伝うわよ。
zero_data/muma/12338.wav|0|なっ!なななな!なにしてるのよあんたたちぃ〜!
zero_data/koakuma/13144.wav|17|ほんっと、優等生だよなあ。
zero_data/muma/04325.wav|15|慣れよ、慣れ。
zero_data/maigo/18626.wav|5|しらゆきひめ?サイトさんが話してくれた……。
zero_data/maigo/16355.wav|17|それくらいなら……。
zero_data/muma/10263.wav|0|トリステインの貴族ともあろう者が、ははは、はしたないわっ!
zero_data/maigo/08475.wav|0|さっき言ったでしょ。陛下の言葉を疑うつもりはないって!
zero_data/muma/05017.wav|15|ま、いいわ。それじゃ、洗濯頑張ってね。
zero_data/maigo/04281.wav|2|りんご。サイト、知らない?
zero_data/maigo/15715.wav|17|多分ね。今もずきずき痛むし。
zero_data/maigo/19844.wav|4|大丈夫、玉砕して帰ってくる。
zero_data/maigo/17141.wav|17|は?いや、別に失礼なことはしてないと思うけど。
zero_data/maigo/13905.wav|17|うわぁっ、イルククゥ!?
zero_data/muma/01101.wav|3|ちょ!ちょっとあんた!なに、姫さまの頭をなでてるのよ!!無礼にも程があるわ!!
zero_data/muma/07879.wav|20|ミス・ヴァリエール。落ち着いて考えてみなさい。
zero_data/maigo/20265.wav|22|考えてもみろ。ルイズの魔力を吸い取った古き本を、全て完結させたわけじゃない。
zero_data/maigo/09917.wav|0|学校で会った時に話せばいいのに、不思議と、会話が続いてしまうの。
zero_data/koakuma/14862.wav|5|分かりましたわ、ミス・ツェルプストー。どうぞお手柔らかにお願いします。
zero_data/maigo/19651.wav|4|……これ。
zero_data/muma/10903.wav|0|べ、別にあんたが誰と仲良くしてようと、わたしには全然、そりゃもう全っ然関係ないんだから!!
zero_data/maigo/00303.wav|3|使い魔さんの前で、歌ってみてもいいですか?
zero_data/muma/09077.wav|8|それから、ルイズの部屋で一緒に暮らして……。楽しかったぁ。
zero_data/koakuma/06891.wav|0|なんでって……。そんなの、決まってるじゃない。使い魔の不始末はご主人様の責任だもの。
zero_data/maigo/10404.wav|17|そっか。ありがとう。じゃあ……行こうか。
zero_data/koakuma/12886.wav|17|いや、まったく関係ないだろ。
zero_data/maigo/16983.wav|17|それは……。
zero_data/maigo/02441.wav|23|でも、もういいんだ。ルイズ以上に、相応しい相手を見つけたからね。
zero_data/muma/13845.wav|0|ちょ、ちょっと、サイト!なにやってるの!
zero_data/muma/12623.wav|0|い、1ページだけだからね!!
zero_data/maigo/00387.wav|3|そうですね。あと、もう一押し……かしら。
zero_data/koakuma/09889.wav|17|タバサがあんなに励ましてくれるなんて。頑張らないとな。
zero_data/muma/14105.wav|0|ほんっとに犬ね!
zero_data/muma/15912.wav|17|なにがあったってわけじゃなくてさ。ちょっと、クリスのことを考えてたんだ。
zero_data/muma/12629.wav|0|シエスタ。サイトを確保。
zero_data/maigo/17429.wav|17|まあ、やっぱり姫さまのほうが……。
zero_data/maigo/09744.wav|0|でも、そんな男の子にもわかってもらいたい時ってあるじゃない?
zero_data/maigo/14477.wav|17|……あれ?そういえばシエスタは?
zero_data/muma/13509.wav|0|姫さまが下賎の者にお手を触れるなんて!いけません!
zero_data/muma/01668.wav|10|わたしはやはり異端なのだろうな。だから、同じような立場にあるサイトとしか友になれないのか……。
zero_data/muma/08518.wav|8|ロクに喋りもしない、なに考えてるかもわからない。あんな女のどこがいいの?
zero_data/koakuma/02170.wav|6|あ、そうですね。ちょ、ちょーどいい加減かなー、なぁんて。
zero_data/maigo/04925.wav|13|これから派遣される討伐隊には、山にいるっていう情報で十分だと思うけど。
zero_data/muma/16666.wav|17|あ、い、いや!小粋なジョークだから。
zero_data/maigo/08969.wav|0|サイト……。
zero_data/maigo/14158.wav|17|げっ!ルイズ!?それにシエスタ!どうしてここに!?
zero_data/muma/28307.wav|4|だから、メイド勝負の裏ルールを適用することにした。
zero_data/maigo/19047.wav|5|そうですよ、リラックスしてください。
zero_data/maigo/07046.wav|0|だ、大丈夫よ。見つからなければいいんだから。
zero_data/muma/11412.wav|0|‥‥‥‥。
zero_data/maigo/04315.wav|2|え!?あ、ああ、ただの例え、例えなのね!
zero_data/muma/07739.wav|24|今、サキュバスは夢の世界を封鎖しているはず。その証拠に昨日は妙な夢を見ていないでしょ?
zero_data/muma/04548.wav|15|タバサ!?あなたまで、その格好なに!?
zero_data/muma/07865.wav|20|悪くはない意見じゃがの、ミス・リシュは働かせるにはまだ少々頼りない年頃じゃ。
zero_data/muma/08105.wav|8|むふふふーん♪
zero_data/muma/08123.wav|8|うふふ、これであたしの声がよく聞こえるわよね。
zero_data/muma/12055.wav|0|……サイトのこと、少しくらいは必要だって思ってるわよ。
zero_data/muma/23407.wav|17|じゃあ、許可出せよ。しばらく勝手な行動を取ってもいいって。
zero_data/maigo/08725.wav|0|はい?何かおかしいところがあるでしょうか?
zero_data/muma/10124.wav|0|……もう。
zero_data/muma/08004.wav|20|ほっほっほ!まあ若い頃は、そうやって血気に走るのも悪くなかろうて。
zero_data/muma/05410.wav|15|落ちついて!リシュ、落ちついて、まずは離れて気を静めて!
zero_data/maigo/02635.wav|21|確かにおせえな。
zero_data/muma/00447.wav|3|クリス……。やはり出たのですか。
zero_data/muma/25588.wav|5|あの……その……。こういう言い方をするのはよくないって、わかってるんですけど……。
zero_data/muma/27940.wav|4|青汁。
zero_data/koakuma/08041.wav|17|じゃあ……。
zero_data/koakuma/01802.wav|6|うん、いつまでもこの学院で……。ルイズさん達の世話になってるのもどうかなって思って。

View File

@ -0,0 +1,118 @@
zero_data/maigo/19272.wav|5|s o, so↑rewa ke↓QkoodesU.
zero_data/muma/10614.wav|0|h o, ho↑Nto! a↓Ntano go↑ʃu↓jiNsama ya↑ru no↑mo ta↑iheNna N↓dakarane!
zero_data/muma/22126.wav|17|………
zero_data/muma/21730.wav|17|da↓kara, sa↑biʃi↓iQte o↑mo↓u no↑wa go↓kU ta↑manidayo.
zero_data/maigo/09248.wav|0|so↑oyuu ko↑to. mo↓Nkumo ki↑Nʃiyo.
zero_data/maigo/18699.wav|5|sa↑ito↓saaN. jo↑oohe↓ekaa.
zero_data/muma/01386.wav|10|sa↓a, sa↑ito. ta↓no ba↑ʃomo a↑Nna↓i ʃI↑te ku↑re. ma↓da i↑roiro a↓ru N↓daroo?
zero_data/koakuma/09135.wav|17|e, a↑no…… so↑no…….
zero_data/muma/23248.wav|17|to↑ozeNdaro? to↑modaʧiga ko↑ma↓QteN no↑o mi↑sugose↓rukayo.
zero_data/maigo/16764.wav|17|ha↓a? so↑Nna ka↑NtaNna wa↑ke↓naidaro?
zero_data/maigo/19571.wav|4|ta↑me↓su?
zero_data/muma/18602.wav|17|o↓i, ru↓izu.
zero_data/koakuma/12890.wav|17|de↓mo, kyu↓ruke. do↓oʃIte so↑Nnani ko↓Nkaino ji↓keNni ʃi↑NkeNna N↓da?
zero_data/maigo/02307.wav|23|de↓mo, ʃi↑e↓sUtawa ri↑Ngoo mo↑tome↓ta……. ko↑rewa su↑garooto o↑mo↓Qtakarajanai no↑ka↓i?
zero_data/maigo/15359.wav|17|u~ N…….
zero_data/maigo/18898.wav|5|ko↑o mi↑e↓te……, u↓uN, ko↑o mi↑e↓nakUtemo, ke↓Qkoo ke↑damonona N↓desUyo!
zero_data/maigo/14401.wav|17|ru↓izu, do↓o ʃI↑ta↓Nda? na↓Nde ʃa↑bera↓nai N↓da?
zero_data/maigo/17308.wav|17|i↓e i↓e i↓e…. so↑Nna ko↑to↓wa…….
zero_data/koakuma/11214.wav|17|i↑warete mi↓reba se↑ma↓kaQtakamonaa.
zero_data/koakuma/00419.wav|3|mi↓suwe↓zariinimo a↑taerarete i↑ta↓nara, ka↓nojowa ko↑no yo↓ona ko↑to↓o ʃi↑nakaQta no↑deʃoo↓ka.
zero_data/muma/06156.wav|21|ka↑NtaNni i↑e↓ba so↑oyuu ko↓Qta.
zero_data/muma/06012.wav|21|o↓i! mu↓ʃI su↓Nnayo, a↑iboo.
zero_data/koakuma/01445.wav|18|bo↑ku↓taʧiwa ta↓da, we↓zariini ka↑imonoo ta↑no↓maretadakeda! ma↑Qtaku!
zero_data/maigo/04263.wav|2|a↓a, o↑ki↓gae? na↓ra, i↑ruku↓kuuga o↑te↓ʦudai su↑ru no↑ne!
zero_data/maigo/11337.wav|17|ko↓o……. i↑roke↓de o↑toko↓ni mi↑ʦu↓gasetee, mi↓taina.
zero_data/maigo/12886.wav|17|w a, wa↑wa……. ʧ i, su↑waresugita↓kamo……. to↑ria↓ezu, ʦu↑gi, i↑koo↓ze…….
zero_data/muma/22888.wav|17|ja↓a, o↑rega sa↑ikiN mi↓teta yu↑me↓wa, ri↓ʃuni mi↑se↓rarete ta↓Qte ko↑to↓ka?
zero_data/koakuma/02543.wav|6|f u fu↓Q. so↑o i↓eba, o↑ʃi↓bai ta↑noʃi↓kaQtane.
zero_data/koakuma/01067.wav|19|i↑ya, su↑ma↓nai……. ʦu↓i a↑ʦu↓kunaQte ʃi↑ma↓Qtana.
zero_data/maigo/08463.wav|0|do↓ragoNwa a↑tama↓no i↓i i↑ki↓monoyo. ʧa↑Nto wa↑ka↓Qte ku↑reta↓wa.
zero_data/koakuma/15433.wav|4|…… so↑o.
zero_data/koakuma/05982.wav|0|…….
zero_data/maigo/07120.wav|0|kya↓Q! na↓niyo i↑kinari. ga↑Nbaruni ki↑maQte↓rujanai.
zero_data/muma/06216.wav|21|bu↑ʃi↓no mu↑sumeQkono i↑ʧi↓zokuwa so↑no sa↓kyubasuno fu↑uiNo da↓idai i↑Qte i↑ta i↑ʧi↓zokuna N↓datoyo.
zero_data/koakuma/06034.wav|0|w a, wa↑taʃieno o↑reewa do↓o na↑Qte↓runoyo!
zero_data/muma/12977.wav|0|kyu↓rukeQ! a, a, a↓Nta, do↓oyuu ʦu↑morina↓no!?
zero_data/maigo/09021.wav|0|t o, to↓nikaku, ha↓yakuʃIteyone.
zero_data/muma/22882.wav|17|y o, yo↓ku wa↑kara↓Nga wa↑ka↓Qta!
zero_data/maigo/02646.wav|21|na↓Nde e, ke↑Qtoode↓mo su↑ru no↑ka?
zero_data/muma/14449.wav|0|………
zero_data/koakuma/04365.wav|20|de↓wa, wa↑ʃiga sa↑gaʃini i↑Qte ku↓ruzo ha↓runa. ko↑rekarawa ki↑o ʦU↑ke↓te ku↑dasa↓reyo.
zero_data/koakuma/03217.wav|15|ru↓izuga na↓nimo i↑wanai no↑wa, mo↑kuniN su↑ruQte i↓miyo, ki↑Qto.
zero_data/maigo/06761.wav|0|ze↑Qtai, ri↓idaani na↓ru N↓dakara.
zero_data/maigo/04769.wav|13|wa↑taʃino ʧo↑ogooki↓zai…… na↑gareʦuite↓naikaʃiranee.
zero_data/maigo/12317.wav|17|so↑Nna mu↑sume↓ni ta↓iʃIte, na↓nimo o↑mowa↓nai ha↑zu na↓idaro?
zero_data/maigo/08726.wav|0|i↓ʦumono fU↑ku↓desUka? wa↑taʃiwa i↓ʦumo ko↑no fU↑kusoode↓sUga. so↑reto↓mo, ho↓Njiʦuwa be↑ʦuno fU↑ku↓o go↑ʃo↓moodesUka?
zero_data/muma/21420.wav|17|o↑o o↑o. kyo↓omo sa↑keNde↓runaa.
zero_data/maigo/16254.wav|17|ta↑toe↓runara, o↑kato he↑egeN……. te↓fawa sa↑ikyoono sa↑Nmyaku.
zero_data/maigo/20883.wav|1|so↑re, ho↑Ntooni da↑ijo↓obuna N↓desUka?
zero_data/maigo/12493.wav|17|ki↑mimo go↓niN ʃu↓uno hI↑to↓rika?
zero_data/koakuma/04174.wav|13|mo↓o, ʃo↑oganai N↓dakara. wa↑taʃimo mo↑ʧi↓roN te↑ʦuda↓uwayo.
zero_data/muma/12338.wav|0|na↓Q! na↓nanana! na↓ni ʃI↑te↓runoyo a↑Nta↓taʧii~!
zero_data/koakuma/13144.wav|17|ho↑NQto, yu↑uto↓oseedayonaa.
zero_data/muma/04325.wav|15|na↑re↓yo, na↑re.
zero_data/maigo/18626.wav|5|ʃi↑ra↓yukI hi↑me? sa↑itosaNga ha↑na↓ʃIte ku↑reta…….
zero_data/maigo/16355.wav|17|so↑reku↓rainara…….
zero_data/muma/10263.wav|0|to↑risUte↓iNno ki↓zokUtomo a↑ro↓oʃaga, wa↑wawa, ha↑ʃItanai↓waQ!
zero_data/maigo/08475.wav|0|sa↓Qki i↑Qta↓deʃo. he↓ekano ko↑toba↓o u↑tagau ʦu↑moriwa na↓iQte!
zero_data/muma/05017.wav|15|m a, i↓iwa. so↑reja, se↑Ntaku ga↑Nba↓Qtene.
zero_data/maigo/04281.wav|2|ri↑Ngo. sa↑ito, ʃi↑ranai?
zero_data/maigo/15715.wav|17|ta↓buNne. i↓mamo zu↓kizuki i↑ta↓muʃi.
zero_data/maigo/19844.wav|4|da↑ijo↓obu, gyo↑kUsai ʃI↑te ka↓eQte ku↓ru.
zero_data/maigo/17141.wav|17|w a? i↑ya, be↑ʦuni ʃI↑ʦu↓reena ko↑to↓wa ʃI↑tenaito o↑mo↓ukedo.
zero_data/maigo/13905.wav|17|u↓waaQ, i↑ruku↓kuu!?
zero_data/muma/01101.wav|3|ʧ o! ʧo↓Qto a↓Nta! na↓ni, hi↓mesamano a↑tama↓o na↑dete↓runoyo!! bu↓reenimo ho↑doga a↓ruwa!!
zero_data/muma/07879.wav|20|mi↓suba↑rie↓eru. o↑ʧIʦuite ka↑Nga↓ete mi↓nasai.
zero_data/maigo/20265.wav|22|ka↑Nga↓etemo mi↓ro. ru↓izuno ma↓ryokuo su↑ito↓Qta fu↑ru↓kI ho↓No, su↓bete ka↑NkeʦU sa↑seta wa↓kejanai.
zero_data/maigo/09917.wav|0|ga↑Qkoode a↓Qta to↑ki↓ni ha↑na↓seba i↓i no↑ni, fU↑ʃigito, ka↑iwaga ʦu↑zuite ʃi↑mau↓no.
zero_data/koakuma/14862.wav|5|wa↑karima↓ʃItawa, mi↓suʦe↑rupUsu↓too. do↓ozo o↓te ya↑wa↓rakani o↑negai ʃi↑ma↓sU.
zero_data/maigo/19651.wav|4|…… ko↑re.
zero_data/muma/10903.wav|0|b e, be↑ʦuni a↓Ntaga da↓reto na↓kayoku ʃI↑teyooto, wa↑taʃiniwa ze↑NzeN, so↑rya mo↓o ze↑NQzeN ka↑Nkee na↓i N↓dakara!!
zero_data/maigo/00303.wav|3|ʦU↑kai↓masaNno ma↓ede, u↑taQte mi↓temo i↓idesUka?
zero_data/muma/09077.wav|8|so↑rekara, ru↓izuno he↑ya↓de i↑Qʃoni ku↑raʃIte……. ta↑noʃi↓kaQta a.
zero_data/koakuma/06891.wav|0|na↓NdeQte……. so↑Nna n o, ki↑maQte↓rujanai. ʦU↑kai↓mano fU↑ʃi↓maʦuwa go↑ʃu↓jiNsamano se↑kiniNda mo↑no.
zero_data/maigo/10404.wav|17|so↓Qka. a↑ri↓gatoo. ja↓a…… i↑koo↓ka.
zero_data/koakuma/12886.wav|17|i↑ya, ma↑QtakU ka↑Nkee na↓idaro.
zero_data/maigo/16983.wav|17|so↑rewa…….
zero_data/maigo/02441.wav|23|de↓mo, mo↓o i↓i N↓da. ru↑izui↓jooni, fU↑sawaʃi↓i a↑ite↓o mi↑ʦUketa↓karane.
zero_data/muma/13845.wav|0|ʧ o, ʧo↓Qto, sa↑ito! na↓ni ya↑Qte↓runo!
zero_data/muma/12623.wav|0|i, i↑ʧIpeejidakeda↓karane!!
zero_data/maigo/00387.wav|3|so↓odesUne. a↓to, mo↓o i↑ʧioʃi…… ka↓ʃira.
zero_data/koakuma/09889.wav|17|ta↓basaga a↑Nnani ha↑gema↓ʃIte ku↑reru↓naNte. ga↑Nbara↓naitona.
zero_data/muma/14105.wav|0|ho↑NQtoni i↑nu↓ne!
zero_data/muma/15912.wav|17|na↓niga a↓QtaQte wa↓kejanakUtesa. ʧo↓Qto, ku↓risuno ko↑to↓o ka↑Nga↓ete ta↓Nda.
zero_data/muma/12629.wav|0|ʃi↑e↓sUta. sa↑itoo ka↓kUho.
zero_data/maigo/17429.wav|17|ma↓a, ya↑Qpa↓ri hi↓mesamano ho↓oga…….
zero_data/maigo/09744.wav|0|de↓mo, so↑Nna o↑toko↓nokonimo wa↑ka↓Qte mo↑raita↓i to↑ki↓Qte a↓rujanai?
zero_data/maigo/14477.wav|17|…… a↑re? so↑o i↓eba ʃi↑e↓sUtawa?
zero_data/muma/13509.wav|0|hi↓mesamaga ge↑seNno mo↑no↓ni o↓teo fu↑reru↓naNte! i↑kemase↓N!
zero_data/muma/01668.wav|10|wa↑taʃiwa ya↑ha↓ri i↑taNna no↑daroo↓na. da↓kara, o↑naji yo↓ona ta↓ʧibani a↓ru sa↑itotoʃIka to↓moni na↑re↓nai no↑ka…….
zero_data/muma/08518.wav|8|ro↑kuni ʃa↑be↓rimo ʃi↑nai, na↓ni ka↑Ngaete↓rukamo wa↑kara↓nai. a↑Nna o↑Nna↓no do↓koga i↓ino?
zero_data/koakuma/02170.wav|6|a, so↓odesUne. ʧ o, ʧo↑odo i↑ikageNkanaa, na↓a N↑te.
zero_data/maigo/04925.wav|13|ko↑rekara ha↑keN sa↑reru to↑obaʦUtainiwa, ya↑ma↓ni i↑ruQte i↓u jo↑ohoode ju↑ubu↓Ndato o↑mo↓ukedo.
zero_data/muma/16666.wav|17|a, i, i↑ya! ko↑ikina jo↓okudakara.
zero_data/maigo/08969.wav|0|sa↑ito…….
zero_data/maigo/14158.wav|17|ge↓Q! ru↓izu!? so↑reni ʃi↑e↓sUta! do↓oʃIte ko↑koni!?
zero_data/muma/28307.wav|4|da↓kara, me↑idoʃo↓obuno u↑raru↓uruo te↑kiyoo su↑ru ko↑to↓ni ʃI↑ta.
zero_data/maigo/19047.wav|5|so↑odesUyo, ri↑ra↓QkUsu ʃI↑te ku↑dasa↓i.
zero_data/maigo/07046.wav|0|d a, da↑ijo↓obuyo. mi↑ʦUkaranakere↓ba i↓i N↓dakara.
zero_data/muma/11412.wav|0|………
zero_data/maigo/04315.wav|2|e!? a, a↓a, ta↓dano re↑e↓e, ta↑toe↓nanone!
zero_data/muma/07739.wav|24|i↓ma, sa↓kyubasuwa yu↑me↓no se↓kaio fu↑usa ʃI↑te i↑ru ha↑zu. so↑no ʃo↑okoni ki↑no↓owa myo↓ona yu↑me↓o mi↓te i↑nai↓deʃo?
zero_data/muma/04548.wav|15|ta↓basa!? a↑na↓tamade, so↑no ka↑Qkoo↓nani!?
zero_data/muma/07865.wav|20|wa↑ru↓kuwa na↓i i↓keNjagano, mi↓suri↓ʃuwa ha↑tarakaseruniwa ma↓da ʃo↓oʃoo ta↑yorina↓i to↑ʃigoroja.
zero_data/muma/08105.wav|8|mu↓fufufuuN.
zero_data/muma/08123.wav|8|u↑fufu, ko↑rede a↑taʃino ko↓ega yo↓kU ki↑koeru↓wayone.
zero_data/muma/12055.wav|0|…… sa↑itono ko↑to, sU↑ko↓ʃIkuraiwa hI↑ʦuyoodaQte o↑moQte↓ruwayo.
zero_data/muma/23407.wav|17|ja↓a, kyo↑kaʃu↓Qseyo. ʃi↑ba↓rakU ka↑Qtena ko↑odooo to↓Qtemo i↓iQte.
zero_data/maigo/08725.wav|0|ha↓i? na↓nika o↑kaʃi↓i to↑koroga a↓rudeʃooka?
zero_data/muma/10124.wav|0|…… mo↓o.
zero_data/muma/08004.wav|20|ho↓QhoQ h o! ma↓a wa↑ka↓i ko↓rowa, so↑o ya↓Qte ke↓Qkini ha↑ʃi↓ru no↑mo wa↑ru↓kunakaroote.
zero_data/muma/05410.wav|15|o↑ʧIʦuite! ri↓ʃu, o↑ʧIʦuite, ma↓zuwa ha↓narete ki↑o ʃi↑zumete!
zero_data/maigo/02635.wav|21|ta↓ʃIkani o↑seena.
zero_data/muma/00447.wav|3|ku↓risu……. ya↑ha↓ri de↑ta no↑de↓sUka.
zero_data/muma/25588.wav|5|a↑no…… so↑no……. ko↑oyuu i↑ikatao su↑ru no↑wa yo↓kunaiQte, wa↑kaQte↓ru N↓desUkedo…….
zero_data/muma/27940.wav|4|a↑o↓jiru.
zero_data/koakuma/08041.wav|17|ja↓a…….
zero_data/koakuma/01802.wav|6|u↓N, i↓ʦumademo ko↑no ga↑kuiNde……. ru↑izusaN↓taʧino se↑wa↓ni na↑Qte↓ru no↑mo do↓oka na↓Qte o↑mo↓Qte.

205
tts/vits/inference.ipynb Normal file
View File

@ -0,0 +1,205 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import IPython.display as ipd\n",
"\n",
"import os\n",
"import json\n",
"import math\n",
"import torch\n",
"from torch import nn\n",
"from torch.nn import functional as F\n",
"from torch.utils.data import DataLoader\n",
"\n",
"import commons\n",
"import utils\n",
"from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n",
"from models import SynthesizerTrn\n",
"from text.symbols import symbols\n",
"from text import text_to_sequence\n",
"\n",
"from scipy.io.wavfile import write\n",
"\n",
"\n",
"def get_text(text, hps):\n",
" text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
" if hps.data.add_blank:\n",
" text_norm = commons.intersperse(text_norm, 0)\n",
" text_norm = torch.LongTensor(text_norm)\n",
" return text_norm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Single Speaker"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"configs/XXX.json\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Multiple Speakers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"./configs/XXX.json\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" n_speakers=hps.data.n_speakers,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" sid = torch.LongTensor([4]).cuda()\n",
" audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Voice Conversion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data)\n",
"collate_fn = TextAudioSpeakerCollate()\n",
"loader = DataLoader(dataset, num_workers=8, shuffle=False,\n",
" batch_size=1, pin_memory=True,\n",
" drop_last=True, collate_fn=collate_fn)\n",
"data_list = list(loader)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with torch.no_grad():\n",
" x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda() for x in data_list[0]]\n",
" sid_tgt1 = torch.LongTensor([1]).cuda()\n",
" sid_tgt2 = torch.LongTensor([2]).cuda()\n",
" sid_tgt3 = torch.LongTensor([4]).cuda()\n",
" audio1 = net_g.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data.cpu().float().numpy()\n",
" audio2 = net_g.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt2)[0][0,0].data.cpu().float().numpy()\n",
" audio3 = net_g.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt3)[0][0,0].data.cpu().float().numpy()\n",
"print(\"Original SID: %d\" % sid_src.item())\n",
"ipd.display(ipd.Audio(y[0].cpu().numpy(), rate=hps.data.sampling_rate, normalize=False))\n",
"print(\"Converted SID: %d\" % sid_tgt1.item())\n",
"ipd.display(ipd.Audio(audio1, rate=hps.data.sampling_rate, normalize=False))\n",
"print(\"Converted SID: %d\" % sid_tgt2.item())\n",
"ipd.display(ipd.Audio(audio2, rate=hps.data.sampling_rate, normalize=False))\n",
"print(\"Converted SID: %d\" % sid_tgt3.item())\n",
"ipd.display(ipd.Audio(audio3, rate=hps.data.sampling_rate, normalize=False))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.9 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
},
"vscode": {
"interpreter": {
"hash": "c15292341d300295ca9f634d04c483f667a0c1d5ee0c309c2ac4e312cce8b8df"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}

61
tts/vits/losses.py Normal file
View File

@ -0,0 +1,61 @@
import torch
from torch.nn import functional as F
import commons
def feature_loss(fmap_r, fmap_g):
loss = 0
for dr, dg in zip(fmap_r, fmap_g):
for rl, gl in zip(dr, dg):
rl = rl.float().detach()
gl = gl.float()
loss += torch.mean(torch.abs(rl - gl))
return loss * 2
def discriminator_loss(disc_real_outputs, disc_generated_outputs):
loss = 0
r_losses = []
g_losses = []
for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
dr = dr.float()
dg = dg.float()
r_loss = torch.mean((1-dr)**2)
g_loss = torch.mean(dg**2)
loss += (r_loss + g_loss)
r_losses.append(r_loss.item())
g_losses.append(g_loss.item())
return loss, r_losses, g_losses
def generator_loss(disc_outputs):
loss = 0
gen_losses = []
for dg in disc_outputs:
dg = dg.float()
l = torch.mean((1-dg)**2)
gen_losses.append(l)
loss += l
return loss, gen_losses
def kl_loss(z_p, logs_q, m_p, logs_p, z_mask):
"""
z_p, logs_q: [b, h, t_t]
m_p, logs_p: [b, h, t_t]
"""
z_p = z_p.float()
logs_q = logs_q.float()
m_p = m_p.float()
logs_p = logs_p.float()
z_mask = z_mask.float()
kl = logs_p - logs_q - 0.5
kl += 0.5 * ((z_p - m_p)**2) * torch.exp(-2. * logs_p)
kl = torch.sum(kl * z_mask)
l = kl / torch.sum(z_mask)
return l

112
tts/vits/mel_processing.py Normal file
View File

@ -0,0 +1,112 @@
import math
import os
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data
import numpy as np
import librosa
import librosa.util as librosa_util
from librosa.util import normalize, pad_center, tiny
from scipy.signal import get_window
from scipy.io.wavfile import read
from librosa.filters import mel as librosa_mel_fn
MAX_WAV_VALUE = 32768.0
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
"""
PARAMS
------
C: compression factor
"""
return torch.log(torch.clamp(x, min=clip_val) * C)
def dynamic_range_decompression_torch(x, C=1):
"""
PARAMS
------
C: compression factor used to compress
"""
return torch.exp(x) / C
def spectral_normalize_torch(magnitudes):
output = dynamic_range_compression_torch(magnitudes)
return output
def spectral_de_normalize_torch(magnitudes):
output = dynamic_range_decompression_torch(magnitudes)
return output
mel_basis = {}
hann_window = {}
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
if torch.min(y) < -1.:
print('min value is ', torch.min(y))
if torch.max(y) > 1.:
print('max value is ', torch.max(y))
global hann_window
dtype_device = str(y.dtype) + '_' + str(y.device)
wnsize_dtype_device = str(win_size) + '_' + dtype_device
if wnsize_dtype_device not in hann_window:
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
y = y.squeeze(1)
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
center=center, pad_mode='reflect', normalized=False, onesided=True)
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
return spec
def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
global mel_basis
dtype_device = str(spec.dtype) + '_' + str(spec.device)
fmax_dtype_device = str(fmax) + '_' + dtype_device
if fmax_dtype_device not in mel_basis:
mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device)
spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
spec = spectral_normalize_torch(spec)
return spec
def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
if torch.min(y) < -1.:
print('min value is ', torch.min(y))
if torch.max(y) > 1.:
print('max value is ', torch.max(y))
global mel_basis, hann_window
dtype_device = str(y.dtype) + '_' + str(y.device)
fmax_dtype_device = str(fmax) + '_' + dtype_device
wnsize_dtype_device = str(win_size) + '_' + dtype_device
if fmax_dtype_device not in mel_basis:
mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device)
if wnsize_dtype_device not in hann_window:
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
y = y.squeeze(1)
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
center=center, pad_mode='reflect', normalized=False, onesided=True)
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
spec = spectral_normalize_torch(spec)
return spec

534
tts/vits/models.py Normal file
View File

@ -0,0 +1,534 @@
import copy
import math
import torch
from torch import nn
from torch.nn import functional as F
import commons
import modules
import attentions
import monotonic_align
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from commons import init_weights, get_padding
class StochasticDurationPredictor(nn.Module):
def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, n_flows=4, gin_channels=0):
super().__init__()
filter_channels = in_channels # it needs to be removed from future version.
self.in_channels = in_channels
self.filter_channels = filter_channels
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.n_flows = n_flows
self.gin_channels = gin_channels
self.log_flow = modules.Log()
self.flows = nn.ModuleList()
self.flows.append(modules.ElementwiseAffine(2))
for i in range(n_flows):
self.flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3))
self.flows.append(modules.Flip())
self.post_pre = nn.Conv1d(1, filter_channels, 1)
self.post_proj = nn.Conv1d(filter_channels, filter_channels, 1)
self.post_convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout)
self.post_flows = nn.ModuleList()
self.post_flows.append(modules.ElementwiseAffine(2))
for i in range(4):
self.post_flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3))
self.post_flows.append(modules.Flip())
self.pre = nn.Conv1d(in_channels, filter_channels, 1)
self.proj = nn.Conv1d(filter_channels, filter_channels, 1)
self.convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout)
if gin_channels != 0:
self.cond = nn.Conv1d(gin_channels, filter_channels, 1)
def forward(self, x, x_mask, w=None, g=None, reverse=False, noise_scale=1.0):
x = torch.detach(x)
x = self.pre(x)
if g is not None:
g = torch.detach(g)
x = x + self.cond(g)
x = self.convs(x, x_mask)
x = self.proj(x) * x_mask
if not reverse:
flows = self.flows
assert w is not None
logdet_tot_q = 0
h_w = self.post_pre(w)
h_w = self.post_convs(h_w, x_mask)
h_w = self.post_proj(h_w) * x_mask
e_q = torch.randn(w.size(0), 2, w.size(2)).to(device=x.device, dtype=x.dtype) * x_mask
z_q = e_q
for flow in self.post_flows:
z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w))
logdet_tot_q += logdet_q
z_u, z1 = torch.split(z_q, [1, 1], 1)
u = torch.sigmoid(z_u) * x_mask
z0 = (w - u) * x_mask
logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1,2])
logq = torch.sum(-0.5 * (math.log(2*math.pi) + (e_q**2)) * x_mask, [1,2]) - logdet_tot_q
logdet_tot = 0
z0, logdet = self.log_flow(z0, x_mask)
logdet_tot += logdet
z = torch.cat([z0, z1], 1)
for flow in flows:
z, logdet = flow(z, x_mask, g=x, reverse=reverse)
logdet_tot = logdet_tot + logdet
nll = torch.sum(0.5 * (math.log(2*math.pi) + (z**2)) * x_mask, [1,2]) - logdet_tot
return nll + logq # [b]
else:
flows = list(reversed(self.flows))
flows = flows[:-2] + [flows[-1]] # remove a useless vflow
z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale
for flow in flows:
z = flow(z, x_mask, g=x, reverse=reverse)
z0, z1 = torch.split(z, [1, 1], 1)
logw = z0
return logw
class DurationPredictor(nn.Module):
def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, gin_channels=0):
super().__init__()
self.in_channels = in_channels
self.filter_channels = filter_channels
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.gin_channels = gin_channels
self.drop = nn.Dropout(p_dropout)
self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size//2)
self.norm_1 = modules.LayerNorm(filter_channels)
self.conv_2 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size//2)
self.norm_2 = modules.LayerNorm(filter_channels)
self.proj = nn.Conv1d(filter_channels, 1, 1)
if gin_channels != 0:
self.cond = nn.Conv1d(gin_channels, in_channels, 1)
def forward(self, x, x_mask, g=None):
x = torch.detach(x)
if g is not None:
g = torch.detach(g)
x = x + self.cond(g)
x = self.conv_1(x * x_mask)
x = torch.relu(x)
x = self.norm_1(x)
x = self.drop(x)
x = self.conv_2(x * x_mask)
x = torch.relu(x)
x = self.norm_2(x)
x = self.drop(x)
x = self.proj(x * x_mask)
return x * x_mask
class TextEncoder(nn.Module):
def __init__(self,
n_vocab,
out_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout):
super().__init__()
self.n_vocab = n_vocab
self.out_channels = out_channels
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.emb = nn.Embedding(n_vocab, hidden_channels)
nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5)
self.encoder = attentions.Encoder(
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout)
self.proj= nn.Conv1d(hidden_channels, out_channels * 2, 1)
def forward(self, x, x_lengths):
x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h]
x = torch.transpose(x, 1, -1) # [b, h, t]
x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
x = self.encoder(x * x_mask, x_mask)
stats = self.proj(x) * x_mask
m, logs = torch.split(stats, self.out_channels, dim=1)
return x, m, logs, x_mask
class ResidualCouplingBlock(nn.Module):
def __init__(self,
channels,
hidden_channels,
kernel_size,
dilation_rate,
n_layers,
n_flows=4,
gin_channels=0):
super().__init__()
self.channels = channels
self.hidden_channels = hidden_channels
self.kernel_size = kernel_size
self.dilation_rate = dilation_rate
self.n_layers = n_layers
self.n_flows = n_flows
self.gin_channels = gin_channels
self.flows = nn.ModuleList()
for i in range(n_flows):
self.flows.append(modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels, mean_only=True))
self.flows.append(modules.Flip())
def forward(self, x, x_mask, g=None, reverse=False):
if not reverse:
for flow in self.flows:
x, _ = flow(x, x_mask, g=g, reverse=reverse)
else:
for flow in reversed(self.flows):
x = flow(x, x_mask, g=g, reverse=reverse)
return x
class PosteriorEncoder(nn.Module):
def __init__(self,
in_channels,
out_channels,
hidden_channels,
kernel_size,
dilation_rate,
n_layers,
gin_channels=0):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.hidden_channels = hidden_channels
self.kernel_size = kernel_size
self.dilation_rate = dilation_rate
self.n_layers = n_layers
self.gin_channels = gin_channels
self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels)
self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
def forward(self, x, x_lengths, g=None):
x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
x = self.pre(x) * x_mask
x = self.enc(x, x_mask, g=g)
stats = self.proj(x) * x_mask
m, logs = torch.split(stats, self.out_channels, dim=1)
z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask
return z, m, logs, x_mask
class Generator(torch.nn.Module):
def __init__(self, initial_channel, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates, upsample_initial_channel, upsample_kernel_sizes, gin_channels=0):
super(Generator, self).__init__()
self.num_kernels = len(resblock_kernel_sizes)
self.num_upsamples = len(upsample_rates)
self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3)
resblock = modules.ResBlock1 if resblock == '1' else modules.ResBlock2
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
self.ups.append(weight_norm(
ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
k, u, padding=(k-u)//2)))
self.resblocks = nn.ModuleList()
for i in range(len(self.ups)):
ch = upsample_initial_channel//(2**(i+1))
for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
self.resblocks.append(resblock(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(init_weights)
if gin_channels != 0:
self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1)
def forward(self, x, g=None):
x = self.conv_pre(x)
if g is not None:
x = x + self.cond(g)
for i in range(self.num_upsamples):
x = F.leaky_relu(x, modules.LRELU_SLOPE)
x = self.ups[i](x)
xs = None
for j in range(self.num_kernels):
if xs is None:
xs = self.resblocks[i*self.num_kernels+j](x)
else:
xs += self.resblocks[i*self.num_kernels+j](x)
x = xs / self.num_kernels
x = F.leaky_relu(x)
x = self.conv_post(x)
x = torch.tanh(x)
return x
def remove_weight_norm(self):
print('Removing weight norm...')
for l in self.ups:
remove_weight_norm(l)
for l in self.resblocks:
l.remove_weight_norm()
class DiscriminatorP(torch.nn.Module):
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
super(DiscriminatorP, self).__init__()
self.period = period
self.use_spectral_norm = use_spectral_norm
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))),
])
self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
def forward(self, x):
fmap = []
# 1d to 2d
b, c, t = x.shape
if t % self.period != 0: # pad first
n_pad = self.period - (t % self.period)
x = F.pad(x, (0, n_pad), "reflect")
t = t + n_pad
x = x.view(b, c, t // self.period, self.period)
for l in self.convs:
x = l(x)
x = F.leaky_relu(x, modules.LRELU_SLOPE)
fmap.append(x)
x = self.conv_post(x)
fmap.append(x)
x = torch.flatten(x, 1, -1)
return x, fmap
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 16, 15, 1, padding=7)),
norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)),
norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)),
norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)),
norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)),
norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
])
self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))
def forward(self, x):
fmap = []
for l in self.convs:
x = l(x)
x = F.leaky_relu(x, modules.LRELU_SLOPE)
fmap.append(x)
x = self.conv_post(x)
fmap.append(x)
x = torch.flatten(x, 1, -1)
return x, fmap
class MultiPeriodDiscriminator(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(MultiPeriodDiscriminator, self).__init__()
periods = [2,3,5,7,11]
discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods]
self.discriminators = nn.ModuleList(discs)
def forward(self, y, y_hat):
y_d_rs = []
y_d_gs = []
fmap_rs = []
fmap_gs = []
for i, d in enumerate(self.discriminators):
y_d_r, fmap_r = d(y)
y_d_g, fmap_g = d(y_hat)
y_d_rs.append(y_d_r)
y_d_gs.append(y_d_g)
fmap_rs.append(fmap_r)
fmap_gs.append(fmap_g)
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
class SynthesizerTrn(nn.Module):
"""
Synthesizer for Training
"""
def __init__(self,
n_vocab,
spec_channels,
segment_size,
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout,
resblock,
resblock_kernel_sizes,
resblock_dilation_sizes,
upsample_rates,
upsample_initial_channel,
upsample_kernel_sizes,
n_speakers=0,
gin_channels=0,
use_sdp=True,
**kwargs):
super().__init__()
self.n_vocab = n_vocab
self.spec_channels = spec_channels
self.inter_channels = inter_channels
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.resblock = resblock
self.resblock_kernel_sizes = resblock_kernel_sizes
self.resblock_dilation_sizes = resblock_dilation_sizes
self.upsample_rates = upsample_rates
self.upsample_initial_channel = upsample_initial_channel
self.upsample_kernel_sizes = upsample_kernel_sizes
self.segment_size = segment_size
self.n_speakers = n_speakers
self.gin_channels = gin_channels
self.use_sdp = use_sdp
self.enc_p = TextEncoder(n_vocab,
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout)
self.dec = Generator(inter_channels, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates, upsample_initial_channel, upsample_kernel_sizes, gin_channels=gin_channels)
self.enc_q = PosteriorEncoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels)
self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels)
if use_sdp:
self.dp = StochasticDurationPredictor(hidden_channels, 192, 3, 0.5, 4, gin_channels=gin_channels)
else:
self.dp = DurationPredictor(hidden_channels, 256, 3, 0.5, gin_channels=gin_channels)
if n_speakers > 1:
self.emb_g = nn.Embedding(n_speakers, gin_channels)
def forward(self, x, x_lengths, y, y_lengths, sid=None):
x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)
if self.n_speakers > 0:
g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1]
else:
g = None
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
z_p = self.flow(z, y_mask, g=g)
with torch.no_grad():
# negative cross-entropy
s_p_sq_r = torch.exp(-2 * logs_p) # [b, d, t]
neg_cent1 = torch.sum(-0.5 * math.log(2 * math.pi) - logs_p, [1], keepdim=True) # [b, 1, t_s]
neg_cent2 = torch.matmul(-0.5 * (z_p ** 2).transpose(1, 2), s_p_sq_r) # [b, t_t, d] x [b, d, t_s] = [b, t_t, t_s]
neg_cent3 = torch.matmul(z_p.transpose(1, 2), (m_p * s_p_sq_r)) # [b, t_t, d] x [b, d, t_s] = [b, t_t, t_s]
neg_cent4 = torch.sum(-0.5 * (m_p ** 2) * s_p_sq_r, [1], keepdim=True) # [b, 1, t_s]
neg_cent = neg_cent1 + neg_cent2 + neg_cent3 + neg_cent4
attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
attn = monotonic_align.maximum_path(neg_cent, attn_mask.squeeze(1)).unsqueeze(1).detach()
w = attn.sum(2)
if self.use_sdp:
l_length = self.dp(x, x_mask, w, g=g)
l_length = l_length / torch.sum(x_mask)
else:
logw_ = torch.log(w + 1e-6) * x_mask
logw = self.dp(x, x_mask, g=g)
l_length = torch.sum((logw - logw_)**2, [1,2]) / torch.sum(x_mask) # for averaging
# expand prior
m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2)
logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1, 2)
z_slice, ids_slice = commons.rand_slice_segments(z, y_lengths, self.segment_size)
o = self.dec(z_slice, g=g)
return o, l_length, attn, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
def infer(self, x, x_lengths, sid=None, noise_scale=1, length_scale=1, noise_scale_w=1., max_len=None):
x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)
if self.n_speakers > 0:
g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1]
else:
g = None
if self.use_sdp:
logw = self.dp(x, x_mask, g=g, reverse=True, noise_scale=noise_scale_w)
else:
logw = self.dp(x, x_mask, g=g)
w = torch.exp(logw) * x_mask * length_scale
w_ceil = torch.ceil(w)
y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
y_mask = torch.unsqueeze(commons.sequence_mask(y_lengths, None), 1).to(x_mask.dtype)
attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1)
attn = commons.generate_path(w_ceil, attn_mask)
m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
z = self.flow(z_p, y_mask, g=g, reverse=True)
o = self.dec((z * y_mask)[:,:,:max_len], g=g)
return o, attn, y_mask, (z, z_p, m_p, logs_p)
def voice_conversion(self, y, y_lengths, sid_src, sid_tgt):
assert self.n_speakers > 0, "n_speakers have to be larger than 0."
g_src = self.emb_g(sid_src).unsqueeze(-1)
g_tgt = self.emb_g(sid_tgt).unsqueeze(-1)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g_src)
z_p = self.flow(z, y_mask, g=g_src)
z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True)
o_hat = self.dec(z_hat * y_mask, g=g_tgt)
return o_hat, y_mask, (z, z_p, z_hat)

390
tts/vits/modules.py Normal file
View File

@ -0,0 +1,390 @@
import copy
import math
import numpy as np
import scipy
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm
import commons
from commons import init_weights, get_padding
from transforms import piecewise_rational_quadratic_transform
LRELU_SLOPE = 0.1
class LayerNorm(nn.Module):
def __init__(self, channels, eps=1e-5):
super().__init__()
self.channels = channels
self.eps = eps
self.gamma = nn.Parameter(torch.ones(channels))
self.beta = nn.Parameter(torch.zeros(channels))
def forward(self, x):
x = x.transpose(1, -1)
x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
return x.transpose(1, -1)
class ConvReluNorm(nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
super().__init__()
self.in_channels = in_channels
self.hidden_channels = hidden_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.n_layers = n_layers
self.p_dropout = p_dropout
assert n_layers > 1, "Number of layers should be larger than 0."
self.conv_layers = nn.ModuleList()
self.norm_layers = nn.ModuleList()
self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size//2))
self.norm_layers.append(LayerNorm(hidden_channels))
self.relu_drop = nn.Sequential(
nn.ReLU(),
nn.Dropout(p_dropout))
for _ in range(n_layers-1):
self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size//2))
self.norm_layers.append(LayerNorm(hidden_channels))
self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
self.proj.weight.data.zero_()
self.proj.bias.data.zero_()
def forward(self, x, x_mask):
x_org = x
for i in range(self.n_layers):
x = self.conv_layers[i](x * x_mask)
x = self.norm_layers[i](x)
x = self.relu_drop(x)
x = x_org + self.proj(x)
return x * x_mask
class DDSConv(nn.Module):
"""
Dialted and Depth-Separable Convolution
"""
def __init__(self, channels, kernel_size, n_layers, p_dropout=0.):
super().__init__()
self.channels = channels
self.kernel_size = kernel_size
self.n_layers = n_layers
self.p_dropout = p_dropout
self.drop = nn.Dropout(p_dropout)
self.convs_sep = nn.ModuleList()
self.convs_1x1 = nn.ModuleList()
self.norms_1 = nn.ModuleList()
self.norms_2 = nn.ModuleList()
for i in range(n_layers):
dilation = kernel_size ** i
padding = (kernel_size * dilation - dilation) // 2
self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size,
groups=channels, dilation=dilation, padding=padding
))
self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
self.norms_1.append(LayerNorm(channels))
self.norms_2.append(LayerNorm(channels))
def forward(self, x, x_mask, g=None):
if g is not None:
x = x + g
for i in range(self.n_layers):
y = self.convs_sep[i](x * x_mask)
y = self.norms_1[i](y)
y = F.gelu(y)
y = self.convs_1x1[i](y)
y = self.norms_2[i](y)
y = F.gelu(y)
y = self.drop(y)
x = x + y
return x * x_mask
class WN(torch.nn.Module):
def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0):
super(WN, self).__init__()
assert(kernel_size % 2 == 1)
self.hidden_channels =hidden_channels
self.kernel_size = kernel_size,
self.dilation_rate = dilation_rate
self.n_layers = n_layers
self.gin_channels = gin_channels
self.p_dropout = p_dropout
self.in_layers = torch.nn.ModuleList()
self.res_skip_layers = torch.nn.ModuleList()
self.drop = nn.Dropout(p_dropout)
if gin_channels != 0:
cond_layer = torch.nn.Conv1d(gin_channels, 2*hidden_channels*n_layers, 1)
self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
for i in range(n_layers):
dilation = dilation_rate ** i
padding = int((kernel_size * dilation - dilation) / 2)
in_layer = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, kernel_size,
dilation=dilation, padding=padding)
in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
self.in_layers.append(in_layer)
# last one is not necessary
if i < n_layers - 1:
res_skip_channels = 2 * hidden_channels
else:
res_skip_channels = hidden_channels
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
self.res_skip_layers.append(res_skip_layer)
def forward(self, x, x_mask, g=None, **kwargs):
output = torch.zeros_like(x)
n_channels_tensor = torch.IntTensor([self.hidden_channels])
if g is not None:
g = self.cond_layer(g)
for i in range(self.n_layers):
x_in = self.in_layers[i](x)
if g is not None:
cond_offset = i * 2 * self.hidden_channels
g_l = g[:,cond_offset:cond_offset+2*self.hidden_channels,:]
else:
g_l = torch.zeros_like(x_in)
acts = commons.fused_add_tanh_sigmoid_multiply(
x_in,
g_l,
n_channels_tensor)
acts = self.drop(acts)
res_skip_acts = self.res_skip_layers[i](acts)
if i < self.n_layers - 1:
res_acts = res_skip_acts[:,:self.hidden_channels,:]
x = (x + res_acts) * x_mask
output = output + res_skip_acts[:,self.hidden_channels:,:]
else:
output = output + res_skip_acts
return output * x_mask
def remove_weight_norm(self):
if self.gin_channels != 0:
torch.nn.utils.remove_weight_norm(self.cond_layer)
for l in self.in_layers:
torch.nn.utils.remove_weight_norm(l)
for l in self.res_skip_layers:
torch.nn.utils.remove_weight_norm(l)
class ResBlock1(torch.nn.Module):
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
super(ResBlock1, self).__init__()
self.convs1 = nn.ModuleList([
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
padding=get_padding(kernel_size, dilation[0]))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
padding=get_padding(kernel_size, dilation[1]))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
padding=get_padding(kernel_size, dilation[2])))
])
self.convs1.apply(init_weights)
self.convs2 = nn.ModuleList([
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1)))
])
self.convs2.apply(init_weights)
def forward(self, x, x_mask=None):
for c1, c2 in zip(self.convs1, self.convs2):
xt = F.leaky_relu(x, LRELU_SLOPE)
if x_mask is not None:
xt = xt * x_mask
xt = c1(xt)
xt = F.leaky_relu(xt, LRELU_SLOPE)
if x_mask is not None:
xt = xt * x_mask
xt = c2(xt)
x = xt + x
if x_mask is not None:
x = x * x_mask
return x
def remove_weight_norm(self):
for l in self.convs1:
remove_weight_norm(l)
for l in self.convs2:
remove_weight_norm(l)
class ResBlock2(torch.nn.Module):
def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
super(ResBlock2, self).__init__()
self.convs = nn.ModuleList([
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
padding=get_padding(kernel_size, dilation[0]))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
padding=get_padding(kernel_size, dilation[1])))
])
self.convs.apply(init_weights)
def forward(self, x, x_mask=None):
for c in self.convs:
xt = F.leaky_relu(x, LRELU_SLOPE)
if x_mask is not None:
xt = xt * x_mask
xt = c(xt)
x = xt + x
if x_mask is not None:
x = x * x_mask
return x
def remove_weight_norm(self):
for l in self.convs:
remove_weight_norm(l)
class Log(nn.Module):
def forward(self, x, x_mask, reverse=False, **kwargs):
if not reverse:
y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
logdet = torch.sum(-y, [1, 2])
return y, logdet
else:
x = torch.exp(x) * x_mask
return x
class Flip(nn.Module):
def forward(self, x, *args, reverse=False, **kwargs):
x = torch.flip(x, [1])
if not reverse:
logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device)
return x, logdet
else:
return x
class ElementwiseAffine(nn.Module):
def __init__(self, channels):
super().__init__()
self.channels = channels
self.m = nn.Parameter(torch.zeros(channels,1))
self.logs = nn.Parameter(torch.zeros(channels,1))
def forward(self, x, x_mask, reverse=False, **kwargs):
if not reverse:
y = self.m + torch.exp(self.logs) * x
y = y * x_mask
logdet = torch.sum(self.logs * x_mask, [1,2])
return y, logdet
else:
x = (x - self.m) * torch.exp(-self.logs) * x_mask
return x
class ResidualCouplingLayer(nn.Module):
def __init__(self,
channels,
hidden_channels,
kernel_size,
dilation_rate,
n_layers,
p_dropout=0,
gin_channels=0,
mean_only=False):
assert channels % 2 == 0, "channels should be divisible by 2"
super().__init__()
self.channels = channels
self.hidden_channels = hidden_channels
self.kernel_size = kernel_size
self.dilation_rate = dilation_rate
self.n_layers = n_layers
self.half_channels = channels // 2
self.mean_only = mean_only
self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, gin_channels=gin_channels)
self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
self.post.weight.data.zero_()
self.post.bias.data.zero_()
def forward(self, x, x_mask, g=None, reverse=False):
x0, x1 = torch.split(x, [self.half_channels]*2, 1)
h = self.pre(x0) * x_mask
h = self.enc(h, x_mask, g=g)
stats = self.post(h) * x_mask
if not self.mean_only:
m, logs = torch.split(stats, [self.half_channels]*2, 1)
else:
m = stats
logs = torch.zeros_like(m)
if not reverse:
x1 = m + x1 * torch.exp(logs) * x_mask
x = torch.cat([x0, x1], 1)
logdet = torch.sum(logs, [1,2])
return x, logdet
else:
x1 = (x1 - m) * torch.exp(-logs) * x_mask
x = torch.cat([x0, x1], 1)
return x
class ConvFlow(nn.Module):
def __init__(self, in_channels, filter_channels, kernel_size, n_layers, num_bins=10, tail_bound=5.0):
super().__init__()
self.in_channels = in_channels
self.filter_channels = filter_channels
self.kernel_size = kernel_size
self.n_layers = n_layers
self.num_bins = num_bins
self.tail_bound = tail_bound
self.half_channels = in_channels // 2
self.pre = nn.Conv1d(self.half_channels, filter_channels, 1)
self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.)
self.proj = nn.Conv1d(filter_channels, self.half_channels * (num_bins * 3 - 1), 1)
self.proj.weight.data.zero_()
self.proj.bias.data.zero_()
def forward(self, x, x_mask, g=None, reverse=False):
x0, x1 = torch.split(x, [self.half_channels]*2, 1)
h = self.pre(x0)
h = self.convs(h, x_mask, g=g)
h = self.proj(h) * x_mask
b, c, t = x0.shape
h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?]
unnormalized_widths = h[..., :self.num_bins] / math.sqrt(self.filter_channels)
unnormalized_heights = h[..., self.num_bins:2*self.num_bins] / math.sqrt(self.filter_channels)
unnormalized_derivatives = h[..., 2 * self.num_bins:]
x1, logabsdet = piecewise_rational_quadratic_transform(x1,
unnormalized_widths,
unnormalized_heights,
unnormalized_derivatives,
inverse=reverse,
tails='linear',
tail_bound=self.tail_bound
)
x = torch.cat([x0, x1], 1) * x_mask
logdet = torch.sum(logabsdet * x_mask, [1,2])
if not reverse:
return x, logdet
else:
return x

View File

@ -0,0 +1,19 @@
import numpy as np
import torch
from .monotonic_align.core import maximum_path_c
def maximum_path(neg_cent, mask):
""" Cython optimized version.
neg_cent: [b, t_t, t_s]
mask: [b, t_t, t_s]
"""
device = neg_cent.device
dtype = neg_cent.dtype
neg_cent = neg_cent.data.cpu().numpy().astype(np.float32)
path = np.zeros(neg_cent.shape, dtype=np.int32)
t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(np.int32)
t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(np.int32)
maximum_path_c(path, neg_cent, t_t_max, t_s_max)
return torch.from_numpy(path).to(device=device, dtype=dtype)

View File

@ -0,0 +1,42 @@
cimport cython
from cython.parallel import prange
@cython.boundscheck(False)
@cython.wraparound(False)
cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil:
cdef int x
cdef int y
cdef float v_prev
cdef float v_cur
cdef float tmp
cdef int index = t_x - 1
for y in range(t_y):
for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
if x == y:
v_cur = max_neg_val
else:
v_cur = value[y-1, x]
if x == 0:
if y == 0:
v_prev = 0.
else:
v_prev = max_neg_val
else:
v_prev = value[y-1, x-1]
value[y, x] += max(v_prev, v_cur)
for y in range(t_y - 1, -1, -1):
path[y, index] = 1
if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]):
index = index - 1
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil:
cdef int b = paths.shape[0]
cdef int i
for i in prange(b, nogil=True):
maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i])

View File

@ -0,0 +1,9 @@
from distutils.core import setup
from Cython.Build import cythonize
import numpy
setup(
name = 'monotonic_align',
ext_modules = cythonize("core.pyx"),
include_dirs=[numpy.get_include()]
)

25
tts/vits/preprocess.py Normal file
View File

@ -0,0 +1,25 @@
import argparse
import text
from utils import load_filepaths_and_text
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--out_extension", default="cleaned")
parser.add_argument("--text_index", default=1, type=int)
parser.add_argument("--filelists", nargs="+", default=["filelists/ljs_audio_text_val_filelist.txt", "filelists/ljs_audio_text_test_filelist.txt"])
parser.add_argument("--text_cleaners", nargs="+", default=["english_cleaners2"])
args = parser.parse_args()
for filelist in args.filelists:
print("START:", filelist)
filepaths_and_text = load_filepaths_and_text(filelist)
for i in range(len(filepaths_and_text)):
original_text = filepaths_and_text[i][args.text_index]
cleaned_text = text._clean_text(original_text, args.text_cleaners)
filepaths_and_text[i][args.text_index] = cleaned_text
new_filelist = filelist + "." + args.out_extension
with open(new_filelist, "w", encoding="utf-8") as f:
f.writelines(["|".join(x) + "\n" for x in filepaths_and_text])

17
tts/vits/requirements.txt Normal file
View File

@ -0,0 +1,17 @@
Cython==0.29.21
librosa==0.8.0
matplotlib
numpy
scipy
unidecode==1.3.4
jamo==0.4.1
pypinyin==0.44.0
jieba==0.42.1
protobuf==3.19.0
cn2an==0.5.17
inflect==6.0.0
eng_to_ipa==0.0.2
ko_pron==1.3
indic_transliteration==2.3.37
num_thai==0.0.5
opencc==1.1.1

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

19
tts/vits/text/LICENSE Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2017 Keith Ito
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

56
tts/vits/text/__init__.py Normal file
View File

@ -0,0 +1,56 @@
""" from https://github.com/keithito/tacotron """
from text import cleaners
from text.symbols import symbols
# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
def text_to_sequence(text, cleaner_names):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:
text: string to convert to a sequence
cleaner_names: names of the cleaner functions to run the text through
Returns:
List of integers corresponding to the symbols in the text
'''
sequence = []
clean_text = _clean_text(text, cleaner_names)
for symbol in clean_text:
if symbol not in _symbol_to_id.keys():
continue
symbol_id = _symbol_to_id[symbol]
sequence += [symbol_id]
return sequence
def cleaned_text_to_sequence(cleaned_text):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:
text: string to convert to a sequence
Returns:
List of integers corresponding to the symbols in the text
'''
sequence = [_symbol_to_id[symbol] for symbol in cleaned_text if symbol in _symbol_to_id.keys()]
return sequence
def sequence_to_text(sequence):
'''Converts a sequence of IDs back to a string'''
result = ''
for symbol_id in sequence:
s = _id_to_symbol[symbol_id]
result += s
return result
def _clean_text(text, cleaner_names):
for name in cleaner_names:
cleaner = getattr(cleaners, name)
if not cleaner:
raise Exception('Unknown cleaner: %s' % name)
text = cleaner(text)
return text

View File

@ -0,0 +1,59 @@
import re
import cn2an
import opencc
converter = opencc.OpenCC('jyutjyu')
# List of (Latin alphabet, ipa) pairs:
_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('A', 'ei˥'),
('B', 'biː˥'),
('C', 'siː˥'),
('D', 'tiː˥'),
('E', 'iː˥'),
('F', 'e˥fuː˨˩'),
('G', 'tsiː˥'),
('H', 'ɪk̚˥tsʰyː˨˩'),
('I', 'ɐi˥'),
('J', 'tsei˥'),
('K', 'kʰei˥'),
('L', 'e˥llou˨˩'),
('M', 'ɛː'),
('N', 'ɛː'),
('O', 'ou˥'),
('P', 'pʰiː˥'),
('Q', 'kʰiː'),
('R', 'aː˥lou˨˩'),
('S', 'ɛː˥siː˨˩'),
('T', 'tʰiː˥'),
('U', 'juː˥'),
('V', 'wiː˥'),
('W', 'tʊk̚˥piː˥juː˥'),
('X', 'ɪk̚˥siː˨˩'),
('Y', 'waː'),
('Z', 'iː˨sɛːt̚˥')
]]
def number_to_cantonese(text):
return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
def latin_to_ipa(text):
for regex, replacement in _latin_to_ipa:
text = re.sub(regex, replacement, text)
return text
def cantonese_to_ipa(text):
text = number_to_cantonese(text.upper())
text = converter.convert(text).replace('-','').replace('$',' ')
text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
text = re.sub(r'[、;:]', '', text)
text = re.sub(r'\s*\s*', ', ', text)
text = re.sub(r'\s*。\s*', '. ', text)
text = re.sub(r'\s*\s*', '? ', text)
text = re.sub(r'\s*\s*', '! ', text)
text = re.sub(r'\s*$', '', text)
return text

128
tts/vits/text/cleaners.py Normal file
View File

@ -0,0 +1,128 @@
import re
# from text.japanese import japanese_to_romaji_with_accent, japanese_to_ipa, japanese_to_ipa2, japanese_to_ipa3
# from text.korean import latin_to_hangul, number_to_hangul, divide_hangul, korean_to_lazy_ipa, korean_to_ipa
from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
# from text.sanskrit import devanagari_to_ipa
# from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
# from text.thai import num_to_thai, latin_to_thai
# from text.shanghainese import shanghainese_to_ipa
# from text.cantonese import cantonese_to_ipa
# from text.ngu_dialect import ngu_dialect_to_ipa
# def japanese_cleaners(text):
# text = japanese_to_romaji_with_accent(text)
# text = re.sub(r'([A-Za-z])$', r'\1.', text)
# return text
#
#
# def japanese_cleaners2(text):
# return japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…')
#
#
# def korean_cleaners(text):
# '''Pipeline for Korean text'''
# text = latin_to_hangul(text)
# text = number_to_hangul(text)
# text = divide_hangul(text)
# text = re.sub(r'([\u3131-\u3163])$', r'\1.', text)
# return text
#
def chinese_cleaners(text):
'''Pipeline for Chinese text'''
text = number_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = re.sub(r'([ˉˊˇˋ˙])$', r'\1。', text)
return text
def zh_ja_mixture_cleaners(text):
text = re.sub(r'\[ZH\](.*?)\[ZH\]',
lambda x: chinese_to_romaji(x.group(1))+' ', text)
text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_to_romaji_with_accent(
x.group(1)).replace('ts', 'ʦ').replace('u', 'ɯ').replace('...', '')+' ', text)
text = re.sub(r'\s+$', '', text)
text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
return text
# def sanskrit_cleaners(text):
# text = text.replace('॥', '।').replace('ॐ', 'ओम्')
# text = re.sub(r'([^।])$', r'\1।', text)
# return text
#
#
# def cjks_cleaners(text):
# text = re.sub(r'\[ZH\](.*?)\[ZH\]',
# lambda x: chinese_to_lazy_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[JA\](.*?)\[JA\]',
# lambda x: japanese_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[KO\](.*?)\[KO\]',
# lambda x: korean_to_lazy_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[SA\](.*?)\[SA\]',
# lambda x: devanagari_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[EN\](.*?)\[EN\]',
# lambda x: english_to_lazy_ipa(x.group(1))+' ', text)
# text = re.sub(r'\s+$', '', text)
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
# return text
#
#
# def cjke_cleaners(text):
# text = re.sub(r'\[ZH\](.*?)\[ZH\]', lambda x: chinese_to_lazy_ipa(x.group(1)).replace(
# 'ʧ', 'tʃ').replace('ʦ', 'ts').replace('ɥan', 'ɥæn')+' ', text)
# text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_to_ipa(x.group(1)).replace('ʧ', 'tʃ').replace(
# 'ʦ', 'ts').replace('ɥan', 'ɥæn').replace('ʥ', 'dz')+' ', text)
# text = re.sub(r'\[KO\](.*?)\[KO\]',
# lambda x: korean_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[EN\](.*?)\[EN\]', lambda x: english_to_ipa2(x.group(1)).replace('ɑ', 'a').replace(
# 'ɔ', 'o').replace('ɛ', 'e').replace('ɪ', 'i').replace('ʊ', 'u')+' ', text)
# text = re.sub(r'\s+$', '', text)
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
# return text
#
#
# def cjke_cleaners2(text):
# text = re.sub(r'\[ZH\](.*?)\[ZH\]',
# lambda x: chinese_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[JA\](.*?)\[JA\]',
# lambda x: japanese_to_ipa2(x.group(1))+' ', text)
# text = re.sub(r'\[KO\](.*?)\[KO\]',
# lambda x: korean_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[EN\](.*?)\[EN\]',
# lambda x: english_to_ipa2(x.group(1))+' ', text)
# text = re.sub(r'\s+$', '', text)
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
# return text
#
#
# def thai_cleaners(text):
# text = num_to_thai(text)
# text = latin_to_thai(text)
# return text
#
#
# def shanghainese_cleaners(text):
# text = shanghainese_to_ipa(text)
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
# return text
#
#
# def chinese_dialect_cleaners(text):
# text = re.sub(r'\[ZH\](.*?)\[ZH\]',
# lambda x: chinese_to_ipa2(x.group(1))+' ', text)
# text = re.sub(r'\[JA\](.*?)\[JA\]',
# lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
# text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
# '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
# text = re.sub(r'\[GD\](.*?)\[GD\]',
# lambda x: cantonese_to_ipa(x.group(1))+' ', text)
# text = re.sub(r'\[EN\](.*?)\[EN\]',
# lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
# text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
# 1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
# text = re.sub(r'\s+$', '', text)
# text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
# return text

188
tts/vits/text/english.py Normal file
View File

@ -0,0 +1,188 @@
""" from https://github.com/keithito/tacotron """
'''
Cleaners are transformations that run over the input text at both training and eval time.
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
1. "english_cleaners" for English text
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
the symbols in symbols.py to match your data).
'''
# Regular expression matching whitespace:
import re
import inflect
from unidecode import unidecode
import eng_to_ipa as ipa
_inflect = inflect.engine()
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
_number_re = re.compile(r'[0-9]+')
# List of (regular expression, replacement) pairs for abbreviations:
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
('mrs', 'misess'),
('mr', 'mister'),
('dr', 'doctor'),
('st', 'saint'),
('co', 'company'),
('jr', 'junior'),
('maj', 'major'),
('gen', 'general'),
('drs', 'doctors'),
('rev', 'reverend'),
('lt', 'lieutenant'),
('hon', 'honorable'),
('sgt', 'sergeant'),
('capt', 'captain'),
('esq', 'esquire'),
('ltd', 'limited'),
('col', 'colonel'),
('ft', 'fort'),
]]
# List of (ipa, lazy ipa) pairs:
_lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('ð', 'z'),
('θ', 's'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ʒ', 'ʥ'),
('ʤ', 'ʥ'),
('ˈ', ''),
]]
# List of (ipa, lazy ipa2) pairs:
_lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('ð', 'z'),
('θ', 's'),
('ʒ', 'ʑ'),
('ʤ', ''),
('ˈ', ''),
]]
# List of (ipa, ipa2) pairs
_ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('ʤ', ''),
('ʧ', '')
]]
def expand_abbreviations(text):
for regex, replacement in _abbreviations:
text = re.sub(regex, replacement, text)
return text
def collapse_whitespace(text):
return re.sub(r'\s+', ' ', text)
def _remove_commas(m):
return m.group(1).replace(',', '')
def _expand_decimal_point(m):
return m.group(1).replace('.', ' point ')
def _expand_dollars(m):
match = m.group(1)
parts = match.split('.')
if len(parts) > 2:
return match + ' dollars' # Unexpected format
dollars = int(parts[0]) if parts[0] else 0
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
if dollars and cents:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
elif dollars:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
return '%s %s' % (dollars, dollar_unit)
elif cents:
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s' % (cents, cent_unit)
else:
return 'zero dollars'
def _expand_ordinal(m):
return _inflect.number_to_words(m.group(0))
def _expand_number(m):
num = int(m.group(0))
if num > 1000 and num < 3000:
if num == 2000:
return 'two thousand'
elif num > 2000 and num < 2010:
return 'two thousand ' + _inflect.number_to_words(num % 100)
elif num % 100 == 0:
return _inflect.number_to_words(num // 100) + ' hundred'
else:
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
else:
return _inflect.number_to_words(num, andword='')
def normalize_numbers(text):
text = re.sub(_comma_number_re, _remove_commas, text)
text = re.sub(_pounds_re, r'\1 pounds', text)
text = re.sub(_dollars_re, _expand_dollars, text)
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
text = re.sub(_ordinal_re, _expand_ordinal, text)
text = re.sub(_number_re, _expand_number, text)
return text
def mark_dark_l(text):
return re.sub(r'l([^aeiouæɑɔəɛɪʊ ]*(?: |$))', lambda x: 'ɫ'+x.group(1), text)
def english_to_ipa(text):
text = unidecode(text).lower()
text = expand_abbreviations(text)
text = normalize_numbers(text)
phonemes = ipa.convert(text)
phonemes = collapse_whitespace(phonemes)
return phonemes
def english_to_lazy_ipa(text):
text = english_to_ipa(text)
for regex, replacement in _lazy_ipa:
text = re.sub(regex, replacement, text)
return text
def english_to_ipa2(text):
text = english_to_ipa(text)
text = mark_dark_l(text)
for regex, replacement in _ipa_to_ipa2:
text = re.sub(regex, replacement, text)
return text.replace('...', '')
def english_to_lazy_ipa2(text):
text = english_to_ipa(text)
for regex, replacement in _lazy_ipa2:
text = re.sub(regex, replacement, text)
return text

153
tts/vits/text/japanese.py Normal file
View File

@ -0,0 +1,153 @@
import re
from unidecode import unidecode
import pyopenjtalk
# Regular expression matching Japanese without punctuation marks:
_japanese_characters = re.compile(
r'[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
# Regular expression matching non-Japanese characters or punctuation marks:
_japanese_marks = re.compile(
r'[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
# List of (symbol, Japanese) pairs for marks:
_symbols_to_japanese = [(re.compile('%s' % x[0]), x[1]) for x in [
('', 'パーセント')
]]
# List of (romaji, ipa) pairs for marks:
_romaji_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('ts', 'ʦ'),
('u', 'ɯ'),
('j', 'ʥ'),
('y', 'j'),
('ni', 'n^i'),
('nj', 'n^'),
('hi', 'çi'),
('hj', 'ç'),
('f', 'ɸ'),
('I', 'i*'),
('U', 'ɯ*'),
('r', 'ɾ')
]]
# List of (romaji, ipa2) pairs for marks:
_romaji_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('u', 'ɯ'),
('ʧ', ''),
('j', ''),
('y', 'j'),
('ni', 'n^i'),
('nj', 'n^'),
('hi', 'çi'),
('hj', 'ç'),
('f', 'ɸ'),
('I', 'i*'),
('U', 'ɯ*'),
('r', 'ɾ')
]]
# List of (consonant, sokuon) pairs:
_real_sokuon = [(re.compile('%s' % x[0]), x[1]) for x in [
(r'Q([↑↓]*[kg])', r'k#\1'),
(r'Q([↑↓]*[tdjʧ])', r't#\1'),
(r'Q([↑↓]*[sʃ])', r's\1'),
(r'Q([↑↓]*[pb])', r'p#\1')
]]
# List of (consonant, hatsuon) pairs:
_real_hatsuon = [(re.compile('%s' % x[0]), x[1]) for x in [
(r'N([↑↓]*[pbm])', r'm\1'),
(r'N([↑↓]*[ʧʥj])', r'n^\1'),
(r'N([↑↓]*[tdn])', r'n\1'),
(r'N([↑↓]*[kg])', r'ŋ\1')
]]
def symbols_to_japanese(text):
for regex, replacement in _symbols_to_japanese:
text = re.sub(regex, replacement, text)
return text
def japanese_to_romaji_with_accent(text):
'''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
text = symbols_to_japanese(text)
sentences = re.split(_japanese_marks, text)
marks = re.findall(_japanese_marks, text)
text = ''
for i, sentence in enumerate(sentences):
if re.match(_japanese_characters, sentence):
if text != '':
text += ' '
labels = pyopenjtalk.extract_fullcontext(sentence)
for n, label in enumerate(labels):
phoneme = re.search(r'\-([^\+]*)\+', label).group(1)
if phoneme not in ['sil', 'pau']:
text += phoneme.replace('ch', 'ʧ').replace('sh',
'ʃ').replace('cl', 'Q')
else:
continue
# n_moras = int(re.search(r'/F:(\d+)_', label).group(1))
a1 = int(re.search(r"/A:(\-?[0-9]+)\+", label).group(1))
a2 = int(re.search(r"\+(\d+)\+", label).group(1))
a3 = int(re.search(r"\+(\d+)/", label).group(1))
if re.search(r'\-([^\+]*)\+', labels[n + 1]).group(1) in ['sil', 'pau']:
a2_next = -1
else:
a2_next = int(
re.search(r"\+(\d+)\+", labels[n + 1]).group(1))
# Accent phrase boundary
if a3 == 1 and a2_next == 1:
text += ' '
# Falling
elif a1 == 0 and a2_next == a2 + 1:
text += ''
# Rising
elif a2 == 1 and a2_next == 2:
text += ''
if i < len(marks):
text += unidecode(marks[i]).replace(' ', '')
return text
def get_real_sokuon(text):
for regex, replacement in _real_sokuon:
text = re.sub(regex, replacement, text)
return text
def get_real_hatsuon(text):
for regex, replacement in _real_hatsuon:
text = re.sub(regex, replacement, text)
return text
def japanese_to_ipa(text):
text = japanese_to_romaji_with_accent(text).replace('...', '')
text = re.sub(
r'([aiueo])\1+', lambda x: x.group(0)[0]+'ː'*(len(x.group(0))-1), text)
text = get_real_sokuon(text)
text = get_real_hatsuon(text)
for regex, replacement in _romaji_to_ipa:
text = re.sub(regex, replacement, text)
return text
def japanese_to_ipa2(text):
text = japanese_to_romaji_with_accent(text).replace('...', '')
text = get_real_sokuon(text)
text = get_real_hatsuon(text)
for regex, replacement in _romaji_to_ipa2:
text = re.sub(regex, replacement, text)
return text
def japanese_to_ipa3(text):
text = japanese_to_ipa2(text).replace('n^', 'ȵ').replace(
'ʃ', 'ɕ').replace('*', '\u0325').replace('#', '\u031a')
text = re.sub(
r'([aiɯeo])\1+', lambda x: x.group(0)[0]+'ː'*(len(x.group(0))-1), text)
text = re.sub(r'((?:^|\s)(?:ts|tɕ|[kpt]))', r'\', text)
return text

210
tts/vits/text/korean.py Normal file
View File

@ -0,0 +1,210 @@
import re
from jamo import h2j, j2hcj
import ko_pron
# This is a list of Korean classifiers preceded by pure Korean numerals.
_korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'
# List of (hangul, hangul divided) pairs:
_hangul_divided = [(re.compile('%s' % x[0]), x[1]) for x in [
('', 'ㄱㅅ'),
('', 'ㄴㅈ'),
('', 'ㄴㅎ'),
('', 'ㄹㄱ'),
('', 'ㄹㅁ'),
('', 'ㄹㅂ'),
('', 'ㄹㅅ'),
('', 'ㄹㅌ'),
('', 'ㄹㅍ'),
('', 'ㄹㅎ'),
('', 'ㅂㅅ'),
('', 'ㅗㅏ'),
('', 'ㅗㅐ'),
('', 'ㅗㅣ'),
('', 'ㅜㅓ'),
('', 'ㅜㅔ'),
('', 'ㅜㅣ'),
('', 'ㅡㅣ'),
('', 'ㅣㅏ'),
('', 'ㅣㅐ'),
('', 'ㅣㅓ'),
('', 'ㅣㅔ'),
('', 'ㅣㅗ'),
('', 'ㅣㅜ')
]]
# List of (Latin alphabet, hangul) pairs:
_latin_to_hangul = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('a', '에이'),
('b', ''),
('c', ''),
('d', ''),
('e', ''),
('f', '에프'),
('g', ''),
('h', '에이치'),
('i', '아이'),
('j', '제이'),
('k', '케이'),
('l', ''),
('m', ''),
('n', ''),
('o', ''),
('p', ''),
('q', ''),
('r', '아르'),
('s', '에스'),
('t', ''),
('u', ''),
('v', '브이'),
('w', '더블유'),
('x', '엑스'),
('y', '와이'),
('z', '제트')
]]
# List of (ipa, lazy ipa) pairs:
_ipa_to_lazy_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('t͡ɕ','ʧ'),
('d͡ʑ','ʥ'),
('ɲ','n^'),
('ɕ','ʃ'),
('ʷ','w'),
('ɭ','l`'),
('ʎ','ɾ'),
('ɣ','ŋ'),
('ɰ','ɯ'),
('ʝ','j'),
('ʌ','ə'),
('ɡ','g'),
('\u031a','#'),
('\u0348','='),
('\u031e',''),
('\u0320',''),
('\u0339','')
]]
def latin_to_hangul(text):
for regex, replacement in _latin_to_hangul:
text = re.sub(regex, replacement, text)
return text
def divide_hangul(text):
text = j2hcj(h2j(text))
for regex, replacement in _hangul_divided:
text = re.sub(regex, replacement, text)
return text
def hangul_number(num, sino=True):
'''Reference https://github.com/Kyubyong/g2pK'''
num = re.sub(',', '', num)
if num == '0':
return ''
if not sino and num == '20':
return '스무'
digits = '123456789'
names = '일이삼사오육칠팔구'
digit2name = {d: n for d, n in zip(digits, names)}
modifiers = '한 두 세 네 다섯 여섯 일곱 여덟 아홉'
decimals = '열 스물 서른 마흔 쉰 예순 일흔 여든 아흔'
digit2mod = {d: mod for d, mod in zip(digits, modifiers.split())}
digit2dec = {d: dec for d, dec in zip(digits, decimals.split())}
spelledout = []
for i, digit in enumerate(num):
i = len(num) - i - 1
if sino:
if i == 0:
name = digit2name.get(digit, '')
elif i == 1:
name = digit2name.get(digit, '') + ''
name = name.replace('일십', '')
else:
if i == 0:
name = digit2mod.get(digit, '')
elif i == 1:
name = digit2dec.get(digit, '')
if digit == '0':
if i % 4 == 0:
last_three = spelledout[-min(3, len(spelledout)):]
if ''.join(last_three) == '':
spelledout.append('')
continue
else:
spelledout.append('')
continue
if i == 2:
name = digit2name.get(digit, '') + ''
name = name.replace('일백', '')
elif i == 3:
name = digit2name.get(digit, '') + ''
name = name.replace('일천', '')
elif i == 4:
name = digit2name.get(digit, '') + ''
name = name.replace('일만', '')
elif i == 5:
name = digit2name.get(digit, '') + ''
name = name.replace('일십', '')
elif i == 6:
name = digit2name.get(digit, '') + ''
name = name.replace('일백', '')
elif i == 7:
name = digit2name.get(digit, '') + ''
name = name.replace('일천', '')
elif i == 8:
name = digit2name.get(digit, '') + ''
elif i == 9:
name = digit2name.get(digit, '') + ''
elif i == 10:
name = digit2name.get(digit, '') + ''
elif i == 11:
name = digit2name.get(digit, '') + ''
elif i == 12:
name = digit2name.get(digit, '') + ''
elif i == 13:
name = digit2name.get(digit, '') + ''
elif i == 14:
name = digit2name.get(digit, '') + ''
elif i == 15:
name = digit2name.get(digit, '') + ''
spelledout.append(name)
return ''.join(elem for elem in spelledout)
def number_to_hangul(text):
'''Reference https://github.com/Kyubyong/g2pK'''
tokens = set(re.findall(r'(\d[\d,]*)([\uac00-\ud71f]+)', text))
for token in tokens:
num, classifier = token
if classifier[:2] in _korean_classifiers or classifier[0] in _korean_classifiers:
spelledout = hangul_number(num, sino=False)
else:
spelledout = hangul_number(num, sino=True)
text = text.replace(f'{num}{classifier}', f'{spelledout}{classifier}')
# digit by digit for remaining digits
digits = '0123456789'
names = '영일이삼사오육칠팔구'
for d, n in zip(digits, names):
text = text.replace(d, n)
return text
def korean_to_lazy_ipa(text):
text = latin_to_hangul(text)
text = number_to_hangul(text)
text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa').split('] ~ [')[0],text)
for regex, replacement in _ipa_to_lazy_ipa:
text = re.sub(regex, replacement, text)
return text
def korean_to_ipa(text):
text = korean_to_lazy_ipa(text)
return text.replace('ʧ','').replace('ʥ','')

326
tts/vits/text/mandarin.py Normal file
View File

@ -0,0 +1,326 @@
import os
import sys
import re
from pypinyin import lazy_pinyin, BOPOMOFO
import jieba
import cn2an
import logging
# List of (Latin alphabet, bopomofo) pairs:
_latin_to_bopomofo = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('a', 'ㄟˉ'),
('b', 'ㄅㄧˋ'),
('c', 'ㄙㄧˉ'),
('d', 'ㄉㄧˋ'),
('e', 'ㄧˋ'),
('f', 'ㄝˊㄈㄨˋ'),
('g', 'ㄐㄧˋ'),
('h', 'ㄝˇㄑㄩˋ'),
('i', 'ㄞˋ'),
('j', 'ㄐㄟˋ'),
('k', 'ㄎㄟˋ'),
('l', 'ㄝˊㄛˋ'),
('m', 'ㄝˊㄇㄨˋ'),
('n', 'ㄣˉ'),
('o', 'ㄡˉ'),
('p', 'ㄆㄧˉ'),
('q', 'ㄎㄧㄡˉ'),
('r', 'ㄚˋ'),
('s', 'ㄝˊㄙˋ'),
('t', 'ㄊㄧˋ'),
('u', 'ㄧㄡˉ'),
('v', 'ㄨㄧˉ'),
('w', 'ㄉㄚˋㄅㄨˋㄌㄧㄡˋ'),
('x', 'ㄝˉㄎㄨˋㄙˋ'),
('y', 'ㄨㄞˋ'),
('z', 'ㄗㄟˋ')
]]
# List of (bopomofo, romaji) pairs:
_bopomofo_to_romaji = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'p⁼wo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('', 'p⁼'),
('', ''),
('', 'm'),
('', 'f'),
('', 't⁼'),
('', ''),
('', 'n'),
('', 'l'),
('', 'k⁼'),
('', ''),
('', 'h'),
('', 'ʧ⁼'),
('', 'ʧʰ'),
('', 'ʃ'),
('', 'ʦ`⁼'),
('', 'ʦ`ʰ'),
('', 's`'),
('', 'ɹ`'),
('', 'ʦ⁼'),
('', 'ʦʰ'),
('', 's'),
('', 'a'),
('', 'o'),
('', 'ə'),
('', 'e'),
('', 'ai'),
('', 'ei'),
('', 'au'),
('', 'ou'),
('ㄧㄢ', 'yeNN'),
('', 'aNN'),
('ㄧㄣ', 'iNN'),
('', 'əNN'),
('', 'aNg'),
('ㄧㄥ', 'iNg'),
('ㄨㄥ', 'uNg'),
('ㄩㄥ', 'yuNg'),
('', 'əNg'),
('', 'əɻ'),
('', 'i'),
('', 'u'),
('', 'ɥ'),
('ˉ', ''),
('ˊ', ''),
('ˇ', '↓↑'),
('ˋ', ''),
('˙', ''),
('', ','),
('', '.'),
('', '!'),
('', '?'),
('', '-')
]]
# List of (romaji, ipa) pairs:
_romaji_to_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('ʃy', 'ʃ'),
('ʧʰy', 'ʧʰ'),
('ʧ⁼y', 'ʧ⁼'),
('NN', 'n'),
('Ng', 'ŋ'),
('y', 'j'),
('h', 'x')
]]
# List of (bopomofo, ipa) pairs:
_bopomofo_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'p⁼wo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('', 'p⁼'),
('', ''),
('', 'm'),
('', 'f'),
('', 't⁼'),
('', ''),
('', 'n'),
('', 'l'),
('', 'k⁼'),
('', ''),
('', 'x'),
('', 'tʃ⁼'),
('', 'tʃʰ'),
('', 'ʃ'),
('', 'ts`⁼'),
('', 'ts`ʰ'),
('', 's`'),
('', 'ɹ`'),
('', 'ts⁼'),
('', 'tsʰ'),
('', 's'),
('', 'a'),
('', 'o'),
('', 'ə'),
('', 'ɛ'),
('', 'aɪ'),
('', 'eɪ'),
('', 'ɑʊ'),
('', ''),
('ㄧㄢ', 'jɛn'),
('ㄩㄢ', 'ɥæn'),
('', 'an'),
('ㄧㄣ', 'in'),
('ㄩㄣ', 'ɥn'),
('', 'ən'),
('', 'ɑŋ'),
('ㄧㄥ', ''),
('ㄨㄥ', 'ʊŋ'),
('ㄩㄥ', 'jʊŋ'),
('', 'əŋ'),
('', 'əɻ'),
('', 'i'),
('', 'u'),
('', 'ɥ'),
('ˉ', ''),
('ˊ', ''),
('ˇ', '↓↑'),
('ˋ', ''),
('˙', ''),
('', ','),
('', '.'),
('', '!'),
('', '?'),
('', '-')
]]
# List of (bopomofo, ipa2) pairs:
_bopomofo_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('ㄅㄛ', 'pwo'),
('ㄆㄛ', 'pʰwo'),
('ㄇㄛ', 'mwo'),
('ㄈㄛ', 'fwo'),
('', 'p'),
('', ''),
('', 'm'),
('', 'f'),
('', 't'),
('', ''),
('', 'n'),
('', 'l'),
('', 'k'),
('', ''),
('', 'h'),
('', ''),
('', 'tɕʰ'),
('', 'ɕ'),
('', ''),
('', 'tʂʰ'),
('', 'ʂ'),
('', 'ɻ'),
('', 'ts'),
('', 'tsʰ'),
('', 's'),
('', 'a'),
('', 'o'),
('', 'ɤ'),
('', 'ɛ'),
('', 'aɪ'),
('', 'eɪ'),
('', 'ɑʊ'),
('', ''),
('ㄧㄢ', 'jɛn'),
('ㄩㄢ', 'yæn'),
('', 'an'),
('ㄧㄣ', 'in'),
('ㄩㄣ', 'yn'),
('', 'ən'),
('', 'ɑŋ'),
('ㄧㄥ', ''),
('ㄨㄥ', 'ʊŋ'),
('ㄩㄥ', 'jʊŋ'),
('', 'ɤŋ'),
('', 'əɻ'),
('', 'i'),
('', 'u'),
('', 'y'),
('ˉ', '˥'),
('ˊ', '˧˥'),
('ˇ', '˨˩˦'),
('ˋ', '˥˩'),
('˙', ''),
('', ','),
('', '.'),
('', '!'),
('', '?'),
('', '-')
]]
def number_to_chinese(text):
numbers = re.findall(r'\d+(?:\.?\d+)?', text)
for number in numbers:
text = text.replace(number, cn2an.an2cn(number), 1)
return text
def chinese_to_bopomofo(text):
text = text.replace('', '').replace('', '').replace('', '')
words = jieba.lcut(text, cut_all=False)
text = ''
for word in words:
bopomofos = lazy_pinyin(word, BOPOMOFO)
if not re.search('[\u4e00-\u9fff]', word):
text += word
continue
for i in range(len(bopomofos)):
bopomofos[i] = re.sub(r'([\u3105-\u3129])$', r'\', bopomofos[i])
if text != '':
text += ' '
text += ''.join(bopomofos)
return text
def latin_to_bopomofo(text):
for regex, replacement in _latin_to_bopomofo:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_romaji(text):
for regex, replacement in _bopomofo_to_romaji:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_ipa(text):
for regex, replacement in _bopomofo_to_ipa:
text = re.sub(regex, replacement, text)
return text
def bopomofo_to_ipa2(text):
for regex, replacement in _bopomofo_to_ipa2:
text = re.sub(regex, replacement, text)
return text
def chinese_to_romaji(text):
text = number_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_romaji(text)
text = re.sub('i([aoe])', r'y\1', text)
text = re.sub('u([aoəe])', r'w\1', text)
text = re.sub('([ʦsɹ]`[⁼ʰ]?)([→↓↑ ]+|$)',
r'\1ɹ`\2', text).replace('ɻ', 'ɹ`')
text = re.sub('([ʦs][⁼ʰ]?)([→↓↑ ]+|$)', r'\\2', text)
return text
def chinese_to_lazy_ipa(text):
text = chinese_to_romaji(text)
for regex, replacement in _romaji_to_ipa:
text = re.sub(regex, replacement, text)
return text
def chinese_to_ipa(text):
text = number_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_ipa(text)
text = re.sub('i([aoe])', r'j\1', text)
text = re.sub('u([aoəe])', r'w\1', text)
text = re.sub('([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)',
r'\1ɹ`\2', text).replace('ɻ', 'ɹ`')
text = re.sub('([s][⁼ʰ]?)([→↓↑ ]+|$)', r'\\2', text)
return text
def chinese_to_ipa2(text):
text = number_to_chinese(text)
text = chinese_to_bopomofo(text)
text = latin_to_bopomofo(text)
text = bopomofo_to_ipa2(text)
text = re.sub(r'i([aoe])', r'j\1', text)
text = re.sub(r'u([aoəe])', r'w\1', text)
text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\\2', text)
text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text)
return text

View File

@ -0,0 +1,30 @@
import re
import opencc
dialects = {'SZ': 'suzhou', 'WX': 'wuxi', 'CZ': 'changzhou', 'HZ': 'hangzhou',
'SX': 'shaoxing', 'NB': 'ningbo', 'JJ': 'jingjiang', 'YX': 'yixing',
'JD': 'jiading', 'ZR': 'zhenru', 'PH': 'pinghu', 'TX': 'tongxiang',
'JS': 'jiashan', 'HN': 'xiashi', 'LP': 'linping', 'XS': 'xiaoshan',
'FY': 'fuyang', 'RA': 'ruao', 'CX': 'cixi', 'SM': 'sanmen',
'TT': 'tiantai', 'WZ': 'wenzhou', 'SC': 'suichang', 'YB': 'youbu'}
converters = {}
for dialect in dialects.values():
try:
converters[dialect] = opencc.OpenCC(dialect)
except:
pass
def ngu_dialect_to_ipa(text, dialect):
dialect = dialects[dialect]
text = converters[dialect].convert(text).replace('-','').replace('$',' ')
text = re.sub(r'[、;:]', '', text)
text = re.sub(r'\s*\s*', ', ', text)
text = re.sub(r'\s*。\s*', '. ', text)
text = re.sub(r'\s*\s*', '? ', text)
text = re.sub(r'\s*\s*', '! ', text)
text = re.sub(r'\s*$', '', text)
return text

62
tts/vits/text/sanskrit.py Normal file
View File

@ -0,0 +1,62 @@
import re
from indic_transliteration import sanscript
# List of (iast, ipa) pairs:
_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('a', 'ə'),
('ā', 'aː'),
('ī', 'iː'),
('ū', 'uː'),
('', 'ɹ`'),
('', 'ɹ`ː'),
('', 'l`'),
('', 'l`ː'),
('e', 'eː'),
('o', 'oː'),
('k', 'k⁼'),
('k⁼h', ''),
('g', 'g⁼'),
('g⁼h', ''),
('', 'ŋ'),
('c', 'ʧ⁼'),
('ʧ⁼h', 'ʧʰ'),
('j', 'ʥ⁼'),
('ʥ⁼h', 'ʥʰ'),
('ñ', 'n^'),
('', 't`⁼'),
('t`⁼h', 't`ʰ'),
('', 'd`⁼'),
('d`⁼h', 'd`ʰ'),
('', 'n`'),
('t', 't⁼'),
('t⁼h', ''),
('d', 'd⁼'),
('d⁼h', ''),
('p', 'p⁼'),
('p⁼h', ''),
('b', 'b⁼'),
('b⁼h', ''),
('y', 'j'),
('ś', 'ʃ'),
('', 's`'),
('r', 'ɾ'),
('', 'l`'),
('h', 'ɦ'),
("'", ''),
('~', '^'),
('', '^')
]]
def devanagari_to_ipa(text):
text = text.replace('', 'ओम्')
text = re.sub(r'\s*।\s*$', '.', text)
text = re.sub(r'\s*।\s*', ', ', text)
text = re.sub(r'\s*॥', '.', text)
text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST)
for regex, replacement in _iast_to_ipa:
text = re.sub(regex, replacement, text)
text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0)
[:-1]+'h'+x.group(1)+'*', text)
return text

View File

@ -0,0 +1,64 @@
import re
import cn2an
import opencc
converter = opencc.OpenCC('zaonhe')
# List of (Latin alphabet, ipa) pairs:
_latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('A', ''),
('B', 'bi'),
('C', 'si'),
('D', 'di'),
('E', 'i'),
('F', 'ᴇf'),
('G', 'dʑi'),
('H', 'ᴇtɕʰ'),
('I', 'ᴀi'),
('J', 'dʑᴇ'),
('K', 'kʰᴇ'),
('L', 'ᴇl'),
('M', 'ᴇm'),
('N', 'ᴇn'),
('O', 'o'),
('P', 'pʰi'),
('Q', 'kʰiu'),
('R', 'ᴀl'),
('S', 'ᴇs'),
('T', 'tʰi'),
('U', 'ɦiu'),
('V', 'vi'),
('W', 'dᴀbɤliu'),
('X', 'ᴇks'),
('Y', 'uᴀi'),
('Z', 'zᴇ')
]]
def _number_to_shanghainese(num):
num = cn2an.an2cn(num).replace('一十','').replace('二十', '廿').replace('', '')
return re.sub(r'((?:^|[^三四五六七八九])十|廿)两', r'\1二', num)
def number_to_shanghainese(text):
return re.sub(r'\d+(?:\.?\d+)?', lambda x: _number_to_shanghainese(x.group()), text)
def latin_to_ipa(text):
for regex, replacement in _latin_to_ipa:
text = re.sub(regex, replacement, text)
return text
def shanghainese_to_ipa(text):
text = number_to_shanghainese(text.upper())
text = converter.convert(text).replace('-','').replace('$',' ')
text = re.sub(r'[A-Z]', lambda x: latin_to_ipa(x.group())+' ', text)
text = re.sub(r'[、;:]', '', text)
text = re.sub(r'\s*\s*', ', ', text)
text = re.sub(r'\s*。\s*', '. ', text)
text = re.sub(r'\s*\s*', '? ', text)
text = re.sub(r'\s*\s*', '! ', text)
text = re.sub(r'\s*$', '', text)
return text

75
tts/vits/text/symbols.py Normal file
View File

@ -0,0 +1,75 @@
'''
Defines the set of symbols used in text input to the model.
'''
'''# japanese_cleaners
_pad = '_'
_punctuation = ',.!?-'
_letters = 'AEINOQUabdefghijkmnoprstuvwyzʃʧ↓↑ '
'''
#
# # japanese_cleaners2
# _pad = '_'
# _punctuation = ',.!?-~…'
# _letters = 'AEINOQUabdefghijkmnoprstuvwyzʃʧʦ↓↑ '
'''# korean_cleaners
_pad = '_'
_punctuation = ',.!?…~'
_letters = 'ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ '
'''
# chinese_cleaners
_pad = '_'
_punctuation = ',。!?—…「」'
_letters = 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩˉˊˇˋ˙ '
'''# zh_ja_mixture_cleaners
_pad = '_'
_punctuation = ',.!?-~…'
_letters = 'AEINOQUabdefghijklmnoprstuvwyzʃʧʦɯɹəɥ⁼ʰ`→↓↑ '
'''
'''# sanskrit_cleaners
_pad = '_'
_punctuation = ''
_letters = 'ँंःअआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसहऽािीुूृॄेैोौ्ॠॢ '
'''
'''# cjks_cleaners
_pad = '_'
_punctuation = ',.!?-~…'
_letters = 'NQabdefghijklmnopstuvwxyzʃʧʥʦɯɹəɥçɸɾβŋɦː⁼ʰ`^#*=→↓↑ '
'''
'''# thai_cleaners
_pad = '_'
_punctuation = '.!? '
_letters = 'กขฃคฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลวศษสหฬอฮฯะัาำิีึืุูเแโใไๅๆ็่้๊๋์'
'''
'''# cjke_cleaners2
_pad = '_'
_punctuation = ',.!?-~…'
_letters = 'NQabdefghijklmnopstuvwxyzɑæʃʑçɯɪɔɛɹðəɫɥɸʊɾʒθβŋɦ⁼ʰ`^#*=ˈˌ→↓↑ '
'''
'''# shanghainese_cleaners
_pad = '_'
_punctuation = ',.!?…'
_letters = 'abdfghiklmnopstuvyzøŋȵɑɔɕəɤɦɪɿʑʔʰ̩̃ᴀᴇ15678 '
'''
'''# chinese_dialect_cleaners
_pad = '_'
_punctuation = ',.!?~…─'
_letters = '#Nabdefghijklmnoprstuvwxyzæçøŋœȵɐɑɒɓɔɕɗɘəɚɛɜɣɤɦɪɭɯɵɷɸɻɾɿʂʅʊʋʌʏʑʔʦʮʰʷˀː˥˦˧˨˩̥̩̃̚ᴀᴇ↑↓∅ⱼ '
'''
# Export all symbols:
symbols = [_pad] + list(_punctuation) + list(_letters)
# Special symbol ids
SPACE_ID = symbols.index(" ")

44
tts/vits/text/thai.py Normal file
View File

@ -0,0 +1,44 @@
import re
from num_thai.thainumbers import NumThai
num = NumThai()
# List of (Latin alphabet, Thai) pairs:
_latin_to_thai = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
('a', 'เอ'),
('b','บี'),
('c','ซี'),
('d','ดี'),
('e','อี'),
('f','เอฟ'),
('g','จี'),
('h','เอช'),
('i','ไอ'),
('j','เจ'),
('k','เค'),
('l','แอล'),
('m','เอ็ม'),
('n','เอ็น'),
('o','โอ'),
('p','พี'),
('q','คิว'),
('r','แอร์'),
('s','เอส'),
('t','ที'),
('u','ยู'),
('v','วี'),
('w','ดับเบิลยู'),
('x','เอ็กซ์'),
('y','วาย'),
('z','ซี')
]]
def num_to_thai(text):
return re.sub(r'(?:\d+(?:,?\d+)?)+(?:\.\d+(?:,?\d+)?)?', lambda x: ''.join(num.NumberToTextThai(float(x.group(0).replace(',', '')))), text)
def latin_to_thai(text):
for regex, replacement in _latin_to_thai:
text = re.sub(regex, replacement, text)
return text

301
tts/vits/train.py Normal file
View File

@ -0,0 +1,301 @@
import os
import json
import argparse
import itertools
import math
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
import librosa
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
import commons
import utils
from data_utils import (
TextAudioLoader,
TextAudioCollate,
DistributedBucketSampler
)
from models import (
SynthesizerTrn,
MultiPeriodDiscriminator,
)
from losses import (
generator_loss,
discriminator_loss,
feature_loss,
kl_loss
)
from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from text.symbols import symbols
torch.backends.cudnn.benchmark = True
global_step = 0
def main():
"""Assume Single Node Multi GPUs Training Only"""
assert torch.cuda.is_available(), "CPU training is not allowed."
n_gpus = torch.cuda.device_count()
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '8000'
hps = utils.get_hparams()
mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
def run(rank, n_gpus, hps):
global global_step
if rank == 0:
logger = utils.get_logger(hps.model_dir)
logger.info(hps)
utils.check_git_hash(hps.model_dir)
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
torch.manual_seed(hps.train.seed)
torch.cuda.set_device(rank)
train_dataset = TextAudioLoader(hps.data.training_files, hps.data)
train_sampler = DistributedBucketSampler(
train_dataset,
hps.train.batch_size,
[32,300,400,500,600,700,800,900,1000],
num_replicas=n_gpus,
rank=rank,
shuffle=True)
collate_fn = TextAudioCollate()
train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, pin_memory=True,
collate_fn=collate_fn, batch_sampler=train_sampler)
if rank == 0:
eval_dataset = TextAudioLoader(hps.data.validation_files, hps.data)
eval_loader = DataLoader(eval_dataset, num_workers=8, shuffle=False,
batch_size=hps.train.batch_size, pin_memory=True,
drop_last=False, collate_fn=collate_fn)
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model).cuda(rank)
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
optim_g = torch.optim.AdamW(
net_g.parameters(),
hps.train.learning_rate,
betas=hps.train.betas,
eps=hps.train.eps)
optim_d = torch.optim.AdamW(
net_d.parameters(),
hps.train.learning_rate,
betas=hps.train.betas,
eps=hps.train.eps)
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
try:
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g)
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d)
global_step = (epoch_str - 1) * len(train_loader)
except:
epoch_str = 1
global_step = 0
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)
scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)
scaler = GradScaler(enabled=hps.train.fp16_run)
for epoch in range(epoch_str, hps.train.epochs + 1):
if rank==0:
train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, eval_loader], logger, [writer, writer_eval])
else:
train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, None], None, None)
scheduler_g.step()
scheduler_d.step()
def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers):
net_g, net_d = nets
optim_g, optim_d = optims
scheduler_g, scheduler_d = schedulers
train_loader, eval_loader = loaders
if writers is not None:
writer, writer_eval = writers
train_loader.batch_sampler.set_epoch(epoch)
global global_step
net_g.train()
net_d.train()
for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths) in enumerate(train_loader):
x, x_lengths = x.cuda(rank, non_blocking=True), x_lengths.cuda(rank, non_blocking=True)
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda(rank, non_blocking=True)
with autocast(enabled=hps.train.fp16_run):
y_hat, l_length, attn, ids_slice, x_mask, z_mask,\
(z, z_p, m_p, logs_p, m_q, logs_q) = net_g(x, x_lengths, spec, spec_lengths)
mel = spec_to_mel_torch(
spec,
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.mel_fmin,
hps.data.mel_fmax)
y_mel = commons.slice_segments(mel, ids_slice, hps.train.segment_size // hps.data.hop_length)
y_hat_mel = mel_spectrogram_torch(
y_hat.squeeze(1),
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.hop_length,
hps.data.win_length,
hps.data.mel_fmin,
hps.data.mel_fmax
)
y = commons.slice_segments(y, ids_slice * hps.data.hop_length, hps.train.segment_size) # slice
# Discriminator
y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
with autocast(enabled=False):
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(y_d_hat_r, y_d_hat_g)
loss_disc_all = loss_disc
optim_d.zero_grad()
scaler.scale(loss_disc_all).backward()
scaler.unscale_(optim_d)
grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
scaler.step(optim_d)
with autocast(enabled=hps.train.fp16_run):
# Generator
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
with autocast(enabled=False):
loss_dur = torch.sum(l_length.float())
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)
loss_gen, losses_gen = generator_loss(y_d_hat_g)
loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl
optim_g.zero_grad()
scaler.scale(loss_gen_all).backward()
scaler.unscale_(optim_g)
grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
scaler.step(optim_g)
scaler.update()
if rank==0:
if global_step % hps.train.log_interval == 0:
lr = optim_g.param_groups[0]['lr']
losses = [loss_disc, loss_gen, loss_fm, loss_mel, loss_dur, loss_kl]
logger.info('Train Epoch: {} [{:.0f}%]'.format(
epoch,
100. * batch_idx / len(train_loader)))
logger.info([x.item() for x in losses] + [global_step, lr])
scalar_dict = {"loss/g/total": loss_gen_all, "loss/d/total": loss_disc_all, "learning_rate": lr, "grad_norm_d": grad_norm_d, "grad_norm_g": grad_norm_g}
scalar_dict.update({"loss/g/fm": loss_fm, "loss/g/mel": loss_mel, "loss/g/dur": loss_dur, "loss/g/kl": loss_kl})
scalar_dict.update({"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)})
scalar_dict.update({"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)})
scalar_dict.update({"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)})
image_dict = {
"slice/mel_org": utils.plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()),
"slice/mel_gen": utils.plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()),
"all/mel": utils.plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
"all/attn": utils.plot_alignment_to_numpy(attn[0,0].data.cpu().numpy())
}
utils.summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict)
if global_step % hps.train.eval_interval == 0:
evaluate(hps, net_g, eval_loader, writer_eval)
utils.save_checkpoint(net_g, optim_g, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "G_{}.pth".format(global_step)))
utils.save_checkpoint(net_d, optim_d, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "D_{}.pth".format(global_step)))
old_g=os.path.join(hps.model_dir, "G_{}.pth".format(global_step-2000))
old_d=os.path.join(hps.model_dir, "D_{}.pth".format(global_step-2000))
if os.path.exists(old_g):
os.remove(old_g)
if os.path.exists(old_d):
os.remove(old_d)
global_step += 1
if rank == 0:
logger.info('====> Epoch: {}'.format(epoch))
def evaluate(hps, generator, eval_loader, writer_eval):
generator.eval()
with torch.no_grad():
for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths) in enumerate(eval_loader):
x, x_lengths = x.cuda(0), x_lengths.cuda(0)
spec, spec_lengths = spec.cuda(0), spec_lengths.cuda(0)
y, y_lengths = y.cuda(0), y_lengths.cuda(0)
# remove else
x = x[:1]
x_lengths = x_lengths[:1]
spec = spec[:1]
spec_lengths = spec_lengths[:1]
y = y[:1]
y_lengths = y_lengths[:1]
break
y_hat, attn, mask, *_ = generator.module.infer(x, x_lengths, max_len=1000)
y_hat_lengths = mask.sum([1,2]).long() * hps.data.hop_length
mel = spec_to_mel_torch(
spec,
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.mel_fmin,
hps.data.mel_fmax)
y_hat_mel = mel_spectrogram_torch(
y_hat.squeeze(1).float(),
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.hop_length,
hps.data.win_length,
hps.data.mel_fmin,
hps.data.mel_fmax
)
image_dict = {
"gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy())
}
audio_dict = {
"gen/audio": y_hat[0,:,:y_hat_lengths[0]]
}
if global_step == 0:
image_dict.update({"gt/mel": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy())})
audio_dict.update({"gt/audio": y[0,:,:y_lengths[0]]})
utils.summarize(
writer=writer_eval,
global_step=global_step,
images=image_dict,
audios=audio_dict,
audio_sampling_rate=hps.data.sampling_rate
)
generator.train()
if __name__ == "__main__":
main()

305
tts/vits/train_ms.py Normal file
View File

@ -0,0 +1,305 @@
import os
import json
import argparse
import itertools
import math
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.cuda.amp import autocast, GradScaler
import librosa
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
import commons
import utils
from data_utils import (
TextAudioSpeakerLoader,
TextAudioSpeakerCollate,
DistributedBucketSampler
)
from models import (
SynthesizerTrn,
MultiPeriodDiscriminator,
)
from losses import (
generator_loss,
discriminator_loss,
feature_loss,
kl_loss
)
from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
from text.symbols import symbols
torch.backends.cudnn.benchmark = True
global_step = 0
def main():
"""Assume Single Node Multi GPUs Training Only"""
assert torch.cuda.is_available(), "CPU training is not allowed."
n_gpus = torch.cuda.device_count()
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '8000'
hps = utils.get_hparams()
mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
def run(rank, n_gpus, hps):
global global_step
if rank == 0:
logger = utils.get_logger(hps.model_dir)
logger.info(hps)
utils.check_git_hash(hps.model_dir)
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
torch.manual_seed(hps.train.seed)
torch.cuda.set_device(rank)
train_dataset = TextAudioSpeakerLoader(hps.data.training_files, hps.data)
train_sampler = DistributedBucketSampler(
train_dataset,
hps.train.batch_size,
[32,300,400,500,600,700,800,900,1000],
num_replicas=n_gpus,
rank=rank,
shuffle=True)
collate_fn = TextAudioSpeakerCollate()
train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, pin_memory=True,
collate_fn=collate_fn, batch_sampler=train_sampler)
if rank == 0:
eval_dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data)
eval_loader = DataLoader(eval_dataset, num_workers=8, shuffle=False,
batch_size=hps.train.batch_size, pin_memory=True,
drop_last=False, collate_fn=collate_fn)
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model).cuda(rank)
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
optim_g = torch.optim.AdamW(
net_g.parameters(),
hps.train.learning_rate,
betas=hps.train.betas,
eps=hps.train.eps)
optim_d = torch.optim.AdamW(
net_d.parameters(),
hps.train.learning_rate,
betas=hps.train.betas,
eps=hps.train.eps)
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
try:
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g)
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d)
global_step = (epoch_str - 1) * len(train_loader)
except:
epoch_str = 1
global_step = 0
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)
scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str-2)
scaler = GradScaler(enabled=hps.train.fp16_run)
for epoch in range(epoch_str, hps.train.epochs + 1):
if rank==0:
train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, eval_loader], logger, [writer, writer_eval])
else:
train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, None], None, None)
scheduler_g.step()
scheduler_d.step()
def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers):
net_g, net_d = nets
optim_g, optim_d = optims
scheduler_g, scheduler_d = schedulers
train_loader, eval_loader = loaders
if writers is not None:
writer, writer_eval = writers
train_loader.batch_sampler.set_epoch(epoch)
global global_step
net_g.train()
net_d.train()
for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths, speakers) in enumerate(train_loader):
x, x_lengths = x.cuda(rank, non_blocking=True), x_lengths.cuda(rank, non_blocking=True)
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda(rank, non_blocking=True)
speakers = speakers.cuda(rank, non_blocking=True)
with autocast(enabled=hps.train.fp16_run):
y_hat, l_length, attn, ids_slice, x_mask, z_mask,\
(z, z_p, m_p, logs_p, m_q, logs_q) = net_g(x, x_lengths, spec, spec_lengths, speakers)
mel = spec_to_mel_torch(
spec,
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.mel_fmin,
hps.data.mel_fmax)
y_mel = commons.slice_segments(mel, ids_slice, hps.train.segment_size // hps.data.hop_length)
y_hat_mel = mel_spectrogram_torch(
y_hat.squeeze(1),
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.hop_length,
hps.data.win_length,
hps.data.mel_fmin,
hps.data.mel_fmax
)
y = commons.slice_segments(y, ids_slice * hps.data.hop_length, hps.train.segment_size) # slice
# Discriminator
y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
with autocast(enabled=False):
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(y_d_hat_r, y_d_hat_g)
loss_disc_all = loss_disc
optim_d.zero_grad()
scaler.scale(loss_disc_all).backward()
scaler.unscale_(optim_d)
grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
scaler.step(optim_d)
with autocast(enabled=hps.train.fp16_run):
# Generator
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
with autocast(enabled=False):
loss_dur = torch.sum(l_length.float())
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)
loss_gen, losses_gen = generator_loss(y_d_hat_g)
loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl
optim_g.zero_grad()
scaler.scale(loss_gen_all).backward()
scaler.unscale_(optim_g)
grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
scaler.step(optim_g)
scaler.update()
if rank==0:
if global_step % hps.train.log_interval == 0:
lr = optim_g.param_groups[0]['lr']
losses = [loss_disc, loss_gen, loss_fm, loss_mel, loss_dur, loss_kl]
logger.info('Train Epoch: {} [{:.0f}%]'.format(
epoch,
100. * batch_idx / len(train_loader)))
logger.info([x.item() for x in losses] + [global_step, lr])
scalar_dict = {"loss/g/total": loss_gen_all, "loss/d/total": loss_disc_all, "learning_rate": lr, "grad_norm_d": grad_norm_d, "grad_norm_g": grad_norm_g}
scalar_dict.update({"loss/g/fm": loss_fm, "loss/g/mel": loss_mel, "loss/g/dur": loss_dur, "loss/g/kl": loss_kl})
scalar_dict.update({"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)})
scalar_dict.update({"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)})
scalar_dict.update({"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)})
image_dict = {
"slice/mel_org": utils.plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()),
"slice/mel_gen": utils.plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()),
"all/mel": utils.plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
"all/attn": utils.plot_alignment_to_numpy(attn[0,0].data.cpu().numpy())
}
utils.summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict)
if global_step % hps.train.eval_interval == 0:
evaluate(hps, net_g, eval_loader, writer_eval)
utils.save_checkpoint(net_g, optim_g, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "G_{}.pth".format(global_step)))
utils.save_checkpoint(net_d, optim_d, hps.train.learning_rate, epoch, os.path.join(hps.model_dir, "D_{}.pth".format(global_step)))
old_g=os.path.join(hps.model_dir, "G_{}.pth".format(global_step-2000))
old_d=os.path.join(hps.model_dir, "D_{}.pth".format(global_step-2000))
if os.path.exists(old_g):
os.remove(old_g)
if os.path.exists(old_d):
os.remove(old_d)
global_step += 1
if rank == 0:
logger.info('====> Epoch: {}'.format(epoch))
def evaluate(hps, generator, eval_loader, writer_eval):
generator.eval()
with torch.no_grad():
for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths, speakers) in enumerate(eval_loader):
x, x_lengths = x.cuda(0), x_lengths.cuda(0)
spec, spec_lengths = spec.cuda(0), spec_lengths.cuda(0)
y, y_lengths = y.cuda(0), y_lengths.cuda(0)
speakers = speakers.cuda(0)
# remove else
x = x[:1]
x_lengths = x_lengths[:1]
spec = spec[:1]
spec_lengths = spec_lengths[:1]
y = y[:1]
y_lengths = y_lengths[:1]
speakers = speakers[:1]
break
y_hat, attn, mask, *_ = generator.module.infer(x, x_lengths, speakers, max_len=1000)
y_hat_lengths = mask.sum([1,2]).long() * hps.data.hop_length
mel = spec_to_mel_torch(
spec,
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.mel_fmin,
hps.data.mel_fmax)
y_hat_mel = mel_spectrogram_torch(
y_hat.squeeze(1).float(),
hps.data.filter_length,
hps.data.n_mel_channels,
hps.data.sampling_rate,
hps.data.hop_length,
hps.data.win_length,
hps.data.mel_fmin,
hps.data.mel_fmax
)
image_dict = {
"gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy())
}
audio_dict = {
"gen/audio": y_hat[0,:,:y_hat_lengths[0]]
}
if global_step == 0:
image_dict.update({"gt/mel": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy())})
audio_dict.update({"gt/audio": y[0,:,:y_lengths[0]]})
utils.summarize(
writer=writer_eval,
global_step=global_step,
images=image_dict,
audios=audio_dict,
audio_sampling_rate=hps.data.sampling_rate
)
generator.train()
if __name__ == "__main__":
main()

193
tts/vits/transforms.py Normal file
View File

@ -0,0 +1,193 @@
import torch
from torch.nn import functional as F
import numpy as np
DEFAULT_MIN_BIN_WIDTH = 1e-3
DEFAULT_MIN_BIN_HEIGHT = 1e-3
DEFAULT_MIN_DERIVATIVE = 1e-3
def piecewise_rational_quadratic_transform(inputs,
unnormalized_widths,
unnormalized_heights,
unnormalized_derivatives,
inverse=False,
tails=None,
tail_bound=1.,
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
min_derivative=DEFAULT_MIN_DERIVATIVE):
if tails is None:
spline_fn = rational_quadratic_spline
spline_kwargs = {}
else:
spline_fn = unconstrained_rational_quadratic_spline
spline_kwargs = {
'tails': tails,
'tail_bound': tail_bound
}
outputs, logabsdet = spline_fn(
inputs=inputs,
unnormalized_widths=unnormalized_widths,
unnormalized_heights=unnormalized_heights,
unnormalized_derivatives=unnormalized_derivatives,
inverse=inverse,
min_bin_width=min_bin_width,
min_bin_height=min_bin_height,
min_derivative=min_derivative,
**spline_kwargs
)
return outputs, logabsdet
def searchsorted(bin_locations, inputs, eps=1e-6):
bin_locations[..., -1] += eps
return torch.sum(
inputs[..., None] >= bin_locations,
dim=-1
) - 1
def unconstrained_rational_quadratic_spline(inputs,
unnormalized_widths,
unnormalized_heights,
unnormalized_derivatives,
inverse=False,
tails='linear',
tail_bound=1.,
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
min_derivative=DEFAULT_MIN_DERIVATIVE):
inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
outside_interval_mask = ~inside_interval_mask
outputs = torch.zeros_like(inputs)
logabsdet = torch.zeros_like(inputs)
if tails == 'linear':
unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
constant = np.log(np.exp(1 - min_derivative) - 1)
unnormalized_derivatives[..., 0] = constant
unnormalized_derivatives[..., -1] = constant
outputs[outside_interval_mask] = inputs[outside_interval_mask]
logabsdet[outside_interval_mask] = 0
else:
raise RuntimeError('{} tails are not implemented.'.format(tails))
outputs[inside_interval_mask], logabsdet[inside_interval_mask] = rational_quadratic_spline(
inputs=inputs[inside_interval_mask],
unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
inverse=inverse,
left=-tail_bound, right=tail_bound, bottom=-tail_bound, top=tail_bound,
min_bin_width=min_bin_width,
min_bin_height=min_bin_height,
min_derivative=min_derivative
)
return outputs, logabsdet
def rational_quadratic_spline(inputs,
unnormalized_widths,
unnormalized_heights,
unnormalized_derivatives,
inverse=False,
left=0., right=1., bottom=0., top=1.,
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
min_derivative=DEFAULT_MIN_DERIVATIVE):
if torch.min(inputs) < left or torch.max(inputs) > right:
raise ValueError('Input to a transform is not within its domain')
num_bins = unnormalized_widths.shape[-1]
if min_bin_width * num_bins > 1.0:
raise ValueError('Minimal bin width too large for the number of bins')
if min_bin_height * num_bins > 1.0:
raise ValueError('Minimal bin height too large for the number of bins')
widths = F.softmax(unnormalized_widths, dim=-1)
widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
cumwidths = torch.cumsum(widths, dim=-1)
cumwidths = F.pad(cumwidths, pad=(1, 0), mode='constant', value=0.0)
cumwidths = (right - left) * cumwidths + left
cumwidths[..., 0] = left
cumwidths[..., -1] = right
widths = cumwidths[..., 1:] - cumwidths[..., :-1]
derivatives = min_derivative + F.softplus(unnormalized_derivatives)
heights = F.softmax(unnormalized_heights, dim=-1)
heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
cumheights = torch.cumsum(heights, dim=-1)
cumheights = F.pad(cumheights, pad=(1, 0), mode='constant', value=0.0)
cumheights = (top - bottom) * cumheights + bottom
cumheights[..., 0] = bottom
cumheights[..., -1] = top
heights = cumheights[..., 1:] - cumheights[..., :-1]
if inverse:
bin_idx = searchsorted(cumheights, inputs)[..., None]
else:
bin_idx = searchsorted(cumwidths, inputs)[..., None]
input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
delta = heights / widths
input_delta = delta.gather(-1, bin_idx)[..., 0]
input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
input_heights = heights.gather(-1, bin_idx)[..., 0]
if inverse:
a = (((inputs - input_cumheights) * (input_derivatives
+ input_derivatives_plus_one
- 2 * input_delta)
+ input_heights * (input_delta - input_derivatives)))
b = (input_heights * input_derivatives
- (inputs - input_cumheights) * (input_derivatives
+ input_derivatives_plus_one
- 2 * input_delta))
c = - input_delta * (inputs - input_cumheights)
discriminant = b.pow(2) - 4 * a * c
assert (discriminant >= 0).all()
root = (2 * c) / (-b - torch.sqrt(discriminant))
outputs = root * input_bin_widths + input_cumwidths
theta_one_minus_theta = root * (1 - root)
denominator = input_delta + ((input_derivatives + input_derivatives_plus_one - 2 * input_delta)
* theta_one_minus_theta)
derivative_numerator = input_delta.pow(2) * (input_derivatives_plus_one * root.pow(2)
+ 2 * input_delta * theta_one_minus_theta
+ input_derivatives * (1 - root).pow(2))
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
return outputs, -logabsdet
else:
theta = (inputs - input_cumwidths) / input_bin_widths
theta_one_minus_theta = theta * (1 - theta)
numerator = input_heights * (input_delta * theta.pow(2)
+ input_derivatives * theta_one_minus_theta)
denominator = input_delta + ((input_derivatives + input_derivatives_plus_one - 2 * input_delta)
* theta_one_minus_theta)
outputs = input_cumheights + numerator / denominator
derivative_numerator = input_delta.pow(2) * (input_derivatives_plus_one * theta.pow(2)
+ 2 * input_delta * theta_one_minus_theta
+ input_derivatives * (1 - theta).pow(2))
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
return outputs, logabsdet

258
tts/vits/utils.py Normal file
View File

@ -0,0 +1,258 @@
import os
import glob
import sys
import argparse
import logging
import json
import subprocess
import numpy as np
from scipy.io.wavfile import read
import torch
MATPLOTLIB_FLAG = False
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logger = logging
def load_checkpoint(checkpoint_path, model, optimizer=None):
assert os.path.isfile(checkpoint_path)
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
iteration = checkpoint_dict['iteration']
learning_rate = checkpoint_dict['learning_rate']
if optimizer is not None:
optimizer.load_state_dict(checkpoint_dict['optimizer'])
saved_state_dict = checkpoint_dict['model']
if hasattr(model, 'module'):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
new_state_dict= {}
for k, v in state_dict.items():
try:
new_state_dict[k] = saved_state_dict[k]
except:
logger.info("%s is not in the checkpoint" % k)
new_state_dict[k] = v
if hasattr(model, 'module'):
model.module.load_state_dict(new_state_dict)
else:
model.load_state_dict(new_state_dict)
logger.info("Loaded checkpoint '{}' (iteration {})" .format(
checkpoint_path, iteration))
return model, optimizer, learning_rate, iteration
def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
logger.info("Saving model and optimizer state at iteration {} to {}".format(
iteration, checkpoint_path))
if hasattr(model, 'module'):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
torch.save({'model': state_dict,
'iteration': iteration,
'optimizer': optimizer.state_dict(),
'learning_rate': learning_rate}, checkpoint_path)
def summarize(writer, global_step, scalars={}, histograms={}, images={}, audios={}, audio_sampling_rate=22050):
for k, v in scalars.items():
writer.add_scalar(k, v, global_step)
for k, v in histograms.items():
writer.add_histogram(k, v, global_step)
for k, v in images.items():
writer.add_image(k, v, global_step, dataformats='HWC')
for k, v in audios.items():
writer.add_audio(k, v, global_step, audio_sampling_rate)
def latest_checkpoint_path(dir_path, regex="G_*.pth"):
f_list = glob.glob(os.path.join(dir_path, regex))
f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
x = f_list[-1]
print(x)
return x
def plot_spectrogram_to_numpy(spectrogram):
global MATPLOTLIB_FLAG
if not MATPLOTLIB_FLAG:
import matplotlib
matplotlib.use("Agg")
MATPLOTLIB_FLAG = True
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
import matplotlib.pylab as plt
import numpy as np
fig, ax = plt.subplots(figsize=(10,2))
im = ax.imshow(spectrogram, aspect="auto", origin="lower",
interpolation='none')
plt.colorbar(im, ax=ax)
plt.xlabel("Frames")
plt.ylabel("Channels")
plt.tight_layout()
fig.canvas.draw()
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
plt.close()
return data
def plot_alignment_to_numpy(alignment, info=None):
global MATPLOTLIB_FLAG
if not MATPLOTLIB_FLAG:
import matplotlib
matplotlib.use("Agg")
MATPLOTLIB_FLAG = True
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
import matplotlib.pylab as plt
import numpy as np
fig, ax = plt.subplots(figsize=(6, 4))
im = ax.imshow(alignment.transpose(), aspect='auto', origin='lower',
interpolation='none')
fig.colorbar(im, ax=ax)
xlabel = 'Decoder timestep'
if info is not None:
xlabel += '\n\n' + info
plt.xlabel(xlabel)
plt.ylabel('Encoder timestep')
plt.tight_layout()
fig.canvas.draw()
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
plt.close()
return data
def load_wav_to_torch(full_path):
sampling_rate, data = read(full_path)
return torch.FloatTensor(data.astype(np.float32)), sampling_rate
def load_filepaths_and_text(filename, split="|"):
with open(filename, encoding='utf-8') as f:
filepaths_and_text = [line.strip().split(split) for line in f]
return filepaths_and_text
def get_hparams(init=True):
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, default="./configs/base.json",
help='JSON file for configuration')
parser.add_argument('-m', '--model', type=str, required=True,
help='Model name')
args = parser.parse_args()
model_dir = os.path.join("../drive/MyDrive", args.model)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
config_path = args.config
config_save_path = os.path.join(model_dir, "config.json")
if init:
with open(config_path, "r") as f:
data = f.read()
with open(config_save_path, "w") as f:
f.write(data)
else:
with open(config_save_path, "r") as f:
data = f.read()
config = json.loads(data)
hparams = HParams(**config)
hparams.model_dir = model_dir
return hparams
def get_hparams_from_dir(model_dir):
config_save_path = os.path.join(model_dir, "config.json")
with open(config_save_path, "r") as f:
data = f.read()
config = json.loads(data)
hparams =HParams(**config)
hparams.model_dir = model_dir
return hparams
def get_hparams_from_file(config_path):
with open(config_path, "r") as f:
data = f.read()
config = json.loads(data)
hparams =HParams(**config)
return hparams
def check_git_hash(model_dir):
source_dir = os.path.dirname(os.path.realpath(__file__))
if not os.path.exists(os.path.join(source_dir, ".git")):
logger.warn("{} is not a git repository, therefore hash value comparison will be ignored.".format(
source_dir
))
return
cur_hash = subprocess.getoutput("git rev-parse HEAD")
path = os.path.join(model_dir, "githash")
if os.path.exists(path):
saved_hash = open(path).read()
if saved_hash != cur_hash:
logger.warn("git hash values are different. {}(saved) != {}(current)".format(
saved_hash[:8], cur_hash[:8]))
else:
open(path, "w").write(cur_hash)
def get_logger(model_dir, filename="train.log"):
global logger
logger = logging.getLogger(os.path.basename(model_dir))
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
if not os.path.exists(model_dir):
os.makedirs(model_dir)
h = logging.FileHandler(os.path.join(model_dir, filename))
h.setLevel(logging.DEBUG)
h.setFormatter(formatter)
logger.addHandler(h)
return logger
class HParams():
def __init__(self, **kwargs):
for k, v in kwargs.items():
if type(v) == dict:
v = HParams(**v)
self[k] = v
def keys(self):
return self.__dict__.keys()
def items(self):
return self.__dict__.items()
def values(self):
return self.__dict__.values()
def __len__(self):
return len(self.__dict__)
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, value):
return setattr(self, key, value)
def __contains__(self, key):
return key in self.__dict__
def __repr__(self):
return self.__dict__.__repr__()