why?
处理音频数据,模型需要处理为16k,单声道
So
import os
from pathlib import Path
from pydub import AudioSegment
# pydub处理
def convert_to_wav(src_path, dst_path, audio_format):
sound = AudioSegment.from_file(src_path, format=audio_format).set_frame_rate(16000).set_channels(1)
sound.export(dst_path, format="wav")
# ffmpeg处理
def cmd_convert_to_wav(src_path, dst_path):
call = f'ffmpeg -y -i "{src_path}" -ar 16000 -ac 1 -f wav "{dst_path}"'
os.system(call)
def batch_convert_to_wav(directory, filter_suffix=None, convert_suffix=None):
filter_suffix = filter_suffix or []
convert_suffix = convert_suffix or ['.m4a', '.mp3', '.wav']
directory = Path(directory)
for file in directory.iterdir():
if not file.is_file():
continue
suffix = file.suffix.lower()
if suffix in filter_suffix or suffix not in convert_suffix:
continue
wav_path = file.with_suffix('.wav')
print(f"正在处理: {file}")
try:
convert_to_wav(str(file), str(wav_path), audio_format=suffix.lstrip('.'))
except Exception as e:
print(f"pydub转换失败: {e},尝试ffmpeg...")
cmd_convert_to_wav(str(file), str(wav_path))
if __name__ == "__main__":
# 音频转为wav,单声道,16k
_dir = r""
batch_convert_to_wav(_dir, filter_suffix=[], convert_suffix=['.m4a', '.mp3', '.wav'])