diff --git a/README.md b/README.md
index 2211ce4..974607c 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 <p>
 
 <p align="center">
-&nbsp&nbsp🤗 <a href="https://huggingface.co/collections/Qwen/qwen3-asr">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/collections/Qwen/Qwen3-ASR">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://qwen.ai/blog?id=qwen3asr">Blog</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://github.com/QwenLM/Qwen3-ASR/blob/main/assets/Qwen3_ASR.pdf">Paper</a>&nbsp&nbsp
+&nbsp&nbsp🤗 <a href="https://huggingface.co/collections/Qwen/qwen3-asr">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/collections/Qwen/Qwen3-ASR">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://qwen.ai/blog?id=qwen3asr">Blog</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://arxiv.org/abs/2601.21337">Paper</a>&nbsp&nbsp
 <br>
 🖥️ <a href="https://huggingface.co/spaces/Qwen/Qwen3-ASR">Hugging Face Demo</a>&nbsp&nbsp | &nbsp&nbsp 🖥️ <a href="https://modelscope.cn/studios/Qwen/Qwen3-ASR">ModelScope Demo</a>&nbsp&nbsp | &nbsp&nbsp💬 <a href="https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png">WeChat (微信)</a>&nbsp&nbsp | &nbsp&nbsp🫨 <a href="https://discord.gg/CV4E9rpNSD">Discord</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://help.aliyun.com/zh/model-studio/qwen-speech-recognition">API</a>
 
@@ -41,7 +41,7 @@ We release **Qwen3-ASR**, a family that includes two powerful all-in-one speech
 - [Fine Tuning](#fine-tuning)
 - [Docker](#docker)
 - [Evaluation](#evaluation)
-<!-- - [Citation](#citation) -->
+- [Citation](#citation)
 
 
 ## Overview
@@ -1420,18 +1420,18 @@ During evaluation, we ran inference for all models with `dtype=torch.bfloat16` a
 </details>
 
 
-<!-- ## Citation
+## Citation
 
 If you find our paper and code useful in your research, please consider giving a star :star: and citation :pencil: :)
 
 ```BibTeX
 @article{Qwen3-ASR,
   title={Qwen3-ASR Technical Report},
-  author={},
-  journal={arXiv preprint arXiv:},
+  author={Xian Shi, Xiong Wang, Zhifang Guo, Yongqi Wang, Pei Zhang, Xinyu Zhang, Zishan Guo, Hongkun Hao, Yu Xi, Baosong Yang, Jin Xu, Jingren Zhou, Junyang Lin},
+  journal={arXiv preprint arXiv:2601.21337},
   year={2026}
 }
-``` -->
+```
 
 
 ## Star History
diff --git a/pyproject.toml b/pyproject.toml
index 93ea350..66d0f88 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "qwen-asr"
-version = "0.0.4"
+version = "0.0.5"
 description = "Qwen-ASR python package"
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/qwen_asr/inference/qwen3_asr.py b/qwen_asr/inference/qwen3_asr.py
index 92608f0..d997ba3 100644
--- a/qwen_asr/inference/qwen3_asr.py
+++ b/qwen_asr/inference/qwen3_asr.py
@@ -733,8 +733,18 @@ class Qwen3ASRModel:
                 prefix = ""
             else:
                 cur_ids = self.processor.tokenizer.encode(state._raw_decoded)
-                end_idx = max(1, len(cur_ids) - int(state.unfixed_token_num))
-                prefix = self.processor.tokenizer.decode(cur_ids[:end_idx])
+                k = int(state.unfixed_token_num)
+                while True:
+                    end_idx = max(0, len(cur_ids) - k)
+                    prefix = self.processor.tokenizer.decode(cur_ids[:end_idx]) if end_idx > 0 else ""
+                    try:
+                        prefix.encode("utf-8").decode("utf-8")
+                        break
+                    except UnicodeError:
+                        if end_idx == 0:
+                            prefix = ""
+                            break
+                        k += 1
 
             prompt = state.prompt_raw + prefix