fix: uv dependency
All checks were successful
Build container / build-docker (push) Successful in 24m10s
All checks were successful
Build container / build-docker (push) Successful in 24m10s
This commit is contained in:
@ -7,6 +7,7 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN uv sync
|
RUN uv sync
|
||||||
|
RUN uv sync --extra vllm
|
||||||
|
|
||||||
ENV ASR_MODEL_PATH="Qwen/Qwen3-ASR-1.7B"
|
ENV ASR_MODEL_PATH="Qwen/Qwen3-ASR-1.7B"
|
||||||
|
|
||||||
|
|||||||
@ -19,14 +19,14 @@ https://github.com/QwenLM/Qwen3-ASR
|
|||||||
# Start
|
# Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker pull harbor.bwgdi.com/library/qwen3asr:0.0.1
|
docker pull harbor.bwgdi.com/library/qwen3-asr:0.0.1
|
||||||
|
|
||||||
# Run with custom model path
|
# Run with custom model path
|
||||||
# -e ASR_MODEL_PATH: Model name or local path inside container
|
# -e ASR_MODEL_PATH: Model name or local path inside container
|
||||||
docker run -d --restart always -p 8000:8000 --gpus all \
|
docker run -d --restart always -p 5051:5000 --gpus all \
|
||||||
-e ASR_MODEL_PATH="Qwen/Qwen3-ASR-1.7B" \
|
-e ASR_MODEL_PATH="Qwen/Qwen3-ASR-1.7B" \
|
||||||
--mount type=bind,source=/path/to/your/models,target=/models \
|
--mount type=bind,source=/path/to/your/models,target=/models \
|
||||||
harbor.bwgdi.com/library/qwen3asr:0.0.3
|
harbor.bwgdi.com/library/qwen3-asr:0.0.1
|
||||||
```
|
```
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|||||||
@ -173,7 +173,7 @@ def parse_args():
|
|||||||
p = argparse.ArgumentParser(description="Qwen3-ASR Unified API (streaming + non-streaming)")
|
p = argparse.ArgumentParser(description="Qwen3-ASR Unified API (streaming + non-streaming)")
|
||||||
p.add_argument("--asr-model-path", default="Qwen/Qwen3-ASR-1.7B", help="Model name or local path")
|
p.add_argument("--asr-model-path", default="Qwen/Qwen3-ASR-1.7B", help="Model name or local path")
|
||||||
p.add_argument("--host", default="0.0.0.0")
|
p.add_argument("--host", default="0.0.0.0")
|
||||||
p.add_argument("--port", type=int, default=8000)
|
p.add_argument("--port", type=int, default=5000)
|
||||||
p.add_argument("--gpu-memory-utilization", type=float, default=0.8)
|
p.add_argument("--gpu-memory-utilization", type=float, default=0.8)
|
||||||
p.add_argument("--max-new-tokens", type=int, default=32,
|
p.add_argument("--max-new-tokens", type=int, default=32,
|
||||||
help="Max new tokens per call (streaming). Use larger value for non-streaming.")
|
help="Max new tokens per call (streaming). Use larger value for non-streaming.")
|
||||||
|
|||||||
Reference in New Issue
Block a user