mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-11 07:43:25 +00:00
main
jarvis-models
Conda Environment and Python Library Requirement
conda create -n jarvis-models python==3.10.11
pip install -r sample/requirement_out_of_pytorch.txt
pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
More Dependencies
| System | package | web | install command |
|---|---|---|---|
| python | filetype | https://pypi.org/project/filetype/ | pip install filetype |
| python | fastAPI | https://fastapi.tiangolo.com/ | pip install fastapi |
| python | python-multipart | https://pypi.org/project/python-multipart/ | pip install python-multipart |
| python | uvicorn | https://www.uvicorn.org/ | pip install "uvicorn[standard]" |
| python | SpeechRecognition | https://pypi.org/project/SpeechRecognition/ | pip install SpeechRecognition |
| python | gtts | https://pypi.org/project/gTTS/ | pip install gTTS |
| python | PyYAML | https://pypi.org/project/PyYAML/ | pip install PyYAML |
| python | injector | https://github.com/python-injector/injector | pip install injector |
| python | langchain | https://github.com/langchain-ai/langchain | pip install langchain |
| python | chromadb | https://docs.trychroma.com/getting-started | pip install chromadb |
| python | lagent | https://github.com/InternLM/lagent/blob/main/README.md | pip install lagent |
| python | sentence_transformers | https://github.com/InternLM/lagent/blob/main/README.md | pip install sentence_transformers |
Start
Start the jarvis-models service via
uvicorn main:app --reload
or
python main.py
Configuration
Create ".env.yaml" at the root of jarvis-models, and copy the following yaml configuration
env:
version: 0.0.1
host: 0.0.0.0
port: 8000
log:
level: debug
time_format: "%Y-%m-%d %H:%M:%S"
filename: "D:/Workspace/Logging/jarvis/jarvis-models.log"
loki:
url: "https://loki.bwgdi.com/loki/api/v1/push"
labels:
app: jarvis
env: dev
location: "gdi"
layer: models
melotts:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: 'ZH'
speaker: 'ZH'
cosyvoicetts:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: '粤语女'
speaker: 'ZH'
sovitstts:
mode: docker
url: http://10.6.80.90:9880/tts
speed: 0.9
device: 'cuda:0'
language: 'ZH'
speaker: 'ZH'
text_lang: "yue"
ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
prompt_lang: "yue"
prompt_text: "你失敗咗點算啊?你而家安安穩穩,點解要咁樣做呢?"
text_split_method: "cut5"
batch_size: 1
media_type: "wav"
streaming_mode: True
sensevoiceasr:
mode: local # or docker
url: http://10.6.44.141:18080/convert/tts
speed: 0.9
device: 'cuda:0'
language: '粤语女'
speaker: 'ZH'
tesou:
url: http://120.196.116.194:48891/chat/
TokenIDConverter:
token_path: src/asr/resources/models/token_list.pkl
unk_symbol: <unk>
CharTokenizer:
symbol_value:
space_symbol: <space>
remove_non_linguistic_symbols: false
WavFrontend:
cmvn_file: src/asr/resources/models/am.mvn
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
lfr_m: 7
lfr_n: 6
filter_length_max: -.inf
dither: 0.0
Model:
model_path: src/asr/resources/models/model.onnx
use_cuda: false
CUDAExecutionProvider:
device_id: 0
arena_extend_strategy: kNextPowerOfTwo
cudnn_conv_algo_search: EXHAUSTIVE
do_copy_in_default_stream: true
batch_size: 3
blackbox:
lazyloading: true
vlms:
urls:
qwen_vl: http://10.6.80.87:8000
qwen2_vl: http://10.6.80.87:23333
qwen2_vl_72b: http://10.6.80.91:23333
path:
chroma_rerank_embedding_model: /media/verachen/e0f7a88c-ad43-4736-8829-4d06e5ed8f4f/model/BAAI
cosyvoice_path: /media/verachen/e0f7a88c-ad43-4736-8829-4d06e5ed8f4f/Workspace/CosyVoice
cosyvoice_model_path: /media/verachen/e0f7a88c-ad43-4736-8829-4d06e5ed8f4f/model/Voice/CosyVoice/pretrained_models
sensevoice_model_path: /media/verachen/e0f7a88c-ad43-4736-8829-4d06e5ed8f4f/model/Voice/SenseVoice/SenseVoiceSmall
Description
Languages
Python
95.5%
Jupyter Notebook
3.2%
C++
0.9%
Cython
0.3%
Dockerfile
0.1%