#!/bin/bash
# Copyright 2012 Vassil Panayotov
# Apache 2.0
# Downloads and extracts the data from VoxForge website
# defines "DATA_ROOT" variable - the location to store data
source path.sh
DATA_SRC="http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit"
DATA_TGZ=${DATA_ROOT}/tgz
DATA_EXTRACT=${DATA_ROOT}/extracted
# delete the archive after it's extracted?
deltgz=false
# give the user opportunity to change behaviour using cmd line - e.g. --deltgz true
source utils/parse_options.sh
mkdir -p ${DATA_TGZ} 2>/dev/null
# Check if the executables needed for this script are present in the system
command -v wget >/dev/null 2>&1 ||\
{ echo "\"wget\" is needed but not found"'!'; exit 1; }
echo "--- Starting VoxForge data download (may take some time) ..."
wget -P ${DATA_TGZ} -l 1 -N -nd -c -e robots=off -A tgz -r -np ${DATA_SRC} || \
{ echo "WGET error"'!' ; exit 1 ; }
mkdir -p ${DATA_EXTRACT}
echo "--- Starting VoxForge archives extraction ..."
for a in ${DATA_TGZ}/*.tgz; do
tar -C ${DATA_EXTRACT} -xf $a
done
if ${deltgz}; then
rm -rf ${DATA_TGZ}
fi12G voxforge1语音数据源 下载脚本
最新推荐文章于 2024-07-17 15:32:28 发布
本文介绍了一个用于从VoxForge网站下载并解压数据的bash脚本。该脚本定义了存放数据的位置,并检查了系统中是否安装了wget等必需的执行文件。接着通过wget下载指定目录下的所有.tar.gz文件到本地存储位置,并将这些文件解压。
2306

被折叠的 条评论
为什么被折叠?



