update example data

This commit is contained in:
pengzhendong
2026-01-03 23:11:15 +08:00
parent 2e31abe02d
commit 6ed053b0f6
8 changed files with 185 additions and 31 deletions

View File

@ -1 +1,20 @@
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!/path/to/wav<|endofspeech|>"}, {"role": "assistant", "content": "content of /path/to/wav"}], "speech_length": 42, "text_length": 42}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0002.wav<|endofspeech|>"}, {"role": "assistant", "content": "几点了?"}], "speech_length": 145, "text_length": 3}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0001.wav<|endofspeech|>"}, {"role": "assistant", "content": "Margaret said Mister Hale, as he returned from showing his guest downstairs, I could not help watching your face with some anxiety when Mister Thornton made his confession of having been a shop boy."}], "speech_length": 1261, "text_length": 38}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0004.wav<|endofspeech|>"}, {"role": "assistant", "content": "Number ten. Fresh Nelly is waiting on you. Good night, husband."}], "speech_length": 520, "text_length": 16}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0002.wav<|endofspeech|>"}, {"role": "assistant", "content": "After early nightfall, the yellow lamps would light up here and there the squalid quarter of the brothels."}], "speech_length": 661, "text_length": 24}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0126.wav<|endofspeech|>"}, {"role": "assistant", "content": "因此,土地储备至关重要。"}], "speech_length": 348, "text_length": 6}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0002.wav<|endofspeech|>"}, {"role": "assistant", "content": "You don't mean that you thought me so silly."}], "speech_length": 282, "text_length": 11}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0001.wav<|endofspeech|>"}, {"role": "assistant", "content": "Stuff it into you, his belly counselled him."}], "speech_length": 326, "text_length": 11}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0127.wav<|endofspeech|>"}, {"role": "assistant", "content": "中原地产首席分析师张大伟说。"}], "speech_length": 443, "text_length": 9}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0001.wav<|endofspeech|>"}, {"role": "assistant", "content": "换一首歌。"}], "speech_length": 197, "text_length": 4}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0124.wav<|endofspeech|>"}, {"role": "assistant", "content": "为了规避三四线城市明显过剩的市场风险,"}], "speech_length": 522, "text_length": 11}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0003.wav<|endofspeech|>"}, {"role": "assistant", "content": "Hello, Bertie, any good in your mind?"}], "speech_length": 266, "text_length": 11}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0125.wav<|endofspeech|>"}, {"role": "assistant", "content": "标杆房企必然调整市场战略。"}], "speech_length": 429, "text_length": 7}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0000.wav<|endofspeech|>"}, {"role": "assistant", "content": "There's iron, they say, in all our blood, and a grain or two perhaps is good; but his, he makes me harshly feel, has got a little too much of steel. Anon."}], "speech_length": 1498, "text_length": 43}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0006.wav<|endofspeech|>"}, {"role": "assistant", "content": "The dull light fell more faintly upon the page whereon another equation began to unfold itself slowly and to spread abroad its widening tail."}], "speech_length": 1054, "text_length": 27}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0008.wav<|endofspeech|>"}, {"role": "assistant", "content": "The chaos in which his ardour extinguished itself was a cold indifferent knowledge of himself."}], "speech_length": 671, "text_length": 18}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0003.wav<|endofspeech|>"}, {"role": "assistant", "content": "早上好。"}], "speech_length": 147, "text_length": 3}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0005.wav<|endofspeech|>"}, {"role": "assistant", "content": "The music came nearer, and he recalled the words, the words of Shelley's fragment upon the moon, wandering companionless, pale for weariness."}], "speech_length": 962, "text_length": 30}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0004.wav<|endofspeech|>"}, {"role": "assistant", "content": "His statement of having been a shop boy was the thing I liked best of all."}], "speech_length": 446, "text_length": 17}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0007.wav<|endofspeech|>"}, {"role": "assistant", "content": "A cold lucid indifference reigned in his soul."}], "speech_length": 426, "text_length": 11}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0129.wav<|endofspeech|>"}, {"role": "assistant", "content": "也助推了土地市场的火爆。"}], "speech_length": 357, "text_length": 7}

20
data/train_text.txt Normal file
View File

@ -0,0 +1,20 @@
1089-134686-0004 Number ten. Fresh Nelly is waiting on you. Good night, husband.
BAC009S0764W0127 中原地产首席分析师张大伟说。
1089-134686-0002 After early nightfall, the yellow lamps would light up here and there the squalid quarter of the brothels.
1688-142285-0001 Margaret said Mister Hale, as he returned from showing his guest downstairs, I could not help watching your face with some anxiety when Mister Thornton made his confession of having been a shop boy.
1089-134686-0001 Stuff it into you, his belly counselled him.
1688-142285-0002 You don't mean that you thought me so silly.
IT0011W0002 几点了?
BAC009S0764W0126 因此,土地储备至关重要。
BAC009S0764W0125 标杆房企必然调整市场战略。
IT0011W0001 换一首歌。
1688-142285-0000 There's iron, they say, in all our blood, and a grain or two perhaps is good; but his, he makes me harshly feel, has got a little too much of steel. Anon.
BAC009S0764W0124 为了规避三四线城市明显过剩的市场风险,
1089-134686-0003 Hello, Bertie, any good in your mind?
1089-134686-0006 The dull light fell more faintly upon the page whereon another equation began to unfold itself slowly and to spread abroad its widening tail.
IT0011W0003 早上好。
1089-134686-0008 The chaos in which his ardour extinguished itself was a cold indifferent knowledge of himself.
BAC009S0764W0129 也助推了土地市场的火爆。
1089-134686-0005 The music came nearer, and he recalled the words, the words of Shelley's fragment upon the moon, wandering companionless, pale for weariness.
1089-134686-0007 A cold lucid indifference reigned in his soul.
1688-142285-0004 His statement of having been a shop boy was the thing I liked best of all.

20
data/train_wav.scp Normal file
View File

@ -0,0 +1,20 @@
1089-134686-0004 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0004.wav
BAC009S0764W0127 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0127.wav
1089-134686-0002 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0002.wav
1688-142285-0001 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0001.wav
1089-134686-0001 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0001.wav
1688-142285-0002 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0002.wav
IT0011W0002 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0002.wav
BAC009S0764W0126 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0126.wav
BAC009S0764W0125 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0125.wav
IT0011W0001 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0001.wav
1688-142285-0000 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0000.wav
BAC009S0764W0124 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0124.wav
1089-134686-0003 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0003.wav
1089-134686-0006 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0006.wav
IT0011W0003 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0003.wav
1089-134686-0008 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0008.wav
BAC009S0764W0129 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0129.wav
1089-134686-0005 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0005.wav
1089-134686-0007 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0007.wav
1688-142285-0004 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0004.wav

View File

@ -1 +1,10 @@
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!/path/to/wav<|endofspeech|>"}, {"role": "assistant", "content": "content of /path/to/wav"}], "speech_length": 42, "text_length": 42}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0000.wav<|endofspeech|>"}, {"role": "assistant", "content": "He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick peppered flour-fattened sauce."}], "speech_length": 1042, "text_length": 37}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0004.wav<|endofspeech|>"}, {"role": "assistant", "content": "放歌。"}], "speech_length": 131, "text_length": 3}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0130.wav<|endofspeech|>"}, {"role": "assistant", "content": "北京仅新增住宅土地供应十宗。"}], "speech_length": 523, "text_length": 9}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0005.wav<|endofspeech|>"}, {"role": "assistant", "content": "放首歌。"}], "speech_length": 155, "text_length": 4}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0121.wav<|endofspeech|>"}, {"role": "assistant", "content": "甚至出现交易几乎停滞的情况。"}], "speech_length": 418, "text_length": 7}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0009.wav<|endofspeech|>"}, {"role": "assistant", "content": "At most, by an alms given to a beggar whose blessing he fled from, he might hope wearily to win for himself some measure of actual grace."}], "speech_length": 1056, "text_length": 33}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0123.wav<|endofspeech|>"}, {"role": "assistant", "content": "但因为聚集了过多公共资源,"}], "speech_length": 398, "text_length": 7}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0003.wav<|endofspeech|>"}, {"role": "assistant", "content": "I really liked that account of himself better than anything else he said."}], "speech_length": 504, "text_length": 14}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0128.wav<|endofspeech|>"}, {"role": "assistant", "content": "一线城市土地供应量减少,"}], "speech_length": 355, "text_length": 6}
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "语音转写:<|startofspeech|>!https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0122.wav<|endofspeech|>"}, {"role": "assistant", "content": "一二线城市虽然也处于调整中,"}], "speech_length": 410, "text_length": 9}

10
data/val_text.txt Normal file
View File

@ -0,0 +1,10 @@
1688-142285-0003 I really liked that account of himself better than anything else he said.
BAC009S0764W0130 北京仅新增住宅土地供应十宗。
BAC009S0764W0123 但因为聚集了过多公共资源,
IT0011W0004 放歌。
IT0011W0005 放首歌。
1089-134686-0009 At most, by an alms given to a beggar whose blessing he fled from, he might hope wearily to win for himself some measure of actual grace.
1089-134686-0000 He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick peppered flour-fattened sauce.
BAC009S0764W0121 甚至出现交易几乎停滞的情况。
BAC009S0764W0128 一线城市土地供应量减少,
BAC009S0764W0122 一二线城市虽然也处于调整中,

10
data/val_wav.scp Normal file
View File

@ -0,0 +1,10 @@
1688-142285-0003 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1688-142285-0003.wav
BAC009S0764W0130 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0130.wav
BAC009S0764W0123 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0123.wav
IT0011W0004 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0004.wav
IT0011W0005 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/IT0011W0005.wav
1089-134686-0009 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0009.wav
1089-134686-0000 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/1089-134686-0000.wav
BAC009S0764W0121 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0121.wav
BAC009S0764W0128 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0128.wav
BAC009S0764W0122 https://modelscope.cn/datasets/FunAudioLLM/funasr-demo/resolve/master/audios/BAC009S0764W0122.wav

View File

@ -1,4 +1,5 @@
import torch
from model import FunASRNano

View File

@ -1,11 +1,15 @@
import argparse
import json
import os
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from io import BytesIO
from typing import Dict, Optional, Tuple
from urllib.request import urlopen
import soundfile as sf
from modelscope import AutoTokenizer
from tqdm import tqdm
def parse_args():
@ -13,42 +17,43 @@ def parse_args():
parser.add_argument("--scp-file", type=str, required=True)
parser.add_argument("--transcript-file", type=str, required=True)
parser.add_argument("--jsonl-file", type=str, required=True)
parser.add_argument("--max-workers", type=int, default=8,
help="Number of concurrent workers (default: 8)")
return parser.parse_args()
def main():
args = parse_args()
scp_file = args.scp_file
transcript_file = args.transcript_file
jsonl_file = args.jsonl_file
class LineProcessor:
def __init__(self, tokenizer):
self.tokenizer = tokenizer
self.lock = threading.Lock()
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
f = open(jsonl_file, "w")
with open(scp_file, "r") as f1, open(transcript_file, "r") as f2:
for line1, line2 in zip(f1, f2):
line1, line2 = line1.strip(), line2.strip()
if not line1 or not line2:
continue
parts1, parts2 = line1.split(maxsplit=1), line2.split(maxsplit=1)
if len(parts1) != 2 or len(parts2) != 2:
continue
utt1, utt2 = parts1[0], parts2[0]
wav_path, text = parts1[1], parts2[1]
if utt1 != utt2:
print(f"UTT mismatch, skip: {utt1} vs {utt2}")
continue
# TODO: avoid downloading the total audio file to memory
def process_line(self, line_pair: Tuple[str, str]) -> Optional[Dict]:
line1, line2 = line_pair
line1, line2 = line1.strip(), line2.strip()
if not line1 or not line2:
return None
parts1, parts2 = line1.split(maxsplit=1), line2.split(maxsplit=1)
if len(parts1) != 2 or len(parts2) != 2:
return None
utt1, utt2 = parts1[0], parts2[0]
wav_path, text = parts1[1], parts2[1]
if utt1 != utt2:
return {"error": f"UTT mismatch: {utt1} vs {utt2}"}
try:
if wav_path.startswith("http"):
response = urlopen(wav_path)
if response.status != 200:
print(f"WAV path not found, skip: {wav_path}")
continue
return {"error": f"WAV not found: {wav_path}"}
audio_file = BytesIO(response.read())
duration = sf.info(audio_file).duration
else:
if not os.path.exists(wav_path):
print(f"WAV path not found, skip: {wav_path}")
continue
return {"error": f"WAV not found: {wav_path}"}
duration = sf.info(wav_path).duration
data = {
@ -58,11 +63,71 @@ def main():
{"role": "assistant", "content": text}
],
"speech_length": int((duration * 1000 - 25) // 10 + 1),
"text_length": len(tokenizer.tokenize(text))
"text_length": len(self.tokenizer.tokenize(text))
}
json.dump(data, f, ensure_ascii=False)
f.write("\n")
f.close()
return {"success": data, "utt": utt1}
except Exception as e:
return {"error": f"Error processing {wav_path}: {str(e)}"}
def main():
args = parse_args()
with open(args.scp_file, "r") as f1, open(args.transcript_file, "r") as f2:
scp_lines = f1.readlines()
transcript_lines = f2.readlines()
if len(scp_lines) != len(transcript_lines):
print(f"Warning: Line count mismatch - scp: {len(scp_lines)}, transcript: {len(transcript_lines)}")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
processor = LineProcessor(tokenizer)
data_pairs = list(zip(scp_lines, transcript_lines))
processed_count = 0
failed_count = 0
error_messages = []
with tqdm(total=len(data_pairs), desc="Processing") as pbar:
with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
with open(args.jsonl_file, "w") as f_out:
futures = {executor.submit(processor.process_line, pair): i
for i, pair in enumerate(data_pairs)}
for future in as_completed(futures):
result = future.result()
if result and "success" in result:
with processor.lock:
json.dump(result["success"], f_out, ensure_ascii=False)
f_out.write("\n")
processed_count += 1
elif result and "error" in result:
failed_count += 1
error_messages.append(result["error"])
pbar.update(1)
pbar.set_postfix({
"processed": processed_count,
"failed": failed_count
})
print(f"\nProcessing completed:")
print(f" Total lines: {len(data_pairs)}")
print(f" Successfully processed: {processed_count}")
print(f" Failed: {failed_count}")
if error_messages and len(error_messages) <= 10:
print(f"\nSample errors:")
for error in error_messages[:10]:
print(f" - {error}")
elif error_messages:
print(f"\nFirst 10 errors:")
for error in error_messages[:10]:
print(f" - {error}")
print(f" ... and {len(error_messages) - 10} more errors")
if __name__ == "__main__":