国内快速高效下载 HuggingFace上的各种大语言模型

落难Coder

已于 2024-09-05 19:20:25 修改

阅读量703

点赞数 1

分类专栏： LLMs 文章标签：语言模型自然语言处理大语言模型 LLM llama

于 2024-09-05 18:03:53 首次发布

本文链接：https://blog.youkuaiyun.com/u014297502/article/details/141936113

版权

预先安装：

apt install aria2
# sudo apt install aria2

apt install git-lfs
# sudo apt install git-lfs

下载hfd

wget https://hf-mirror.com/hfd/hfd.sh

chmod a+x hfd.sh

设置环境变量

Linux

export HF_ENDPOINT=https://hf-mirror.com

Windows

$env:HF_ENDPOINT = "https://hf-mirror.com"

下载模型

./hfd.sh gpt2 --tool aria2c -x 4

下载数据集

./hfd.sh wikitext --dataset --tool aria2c -x 4

本文参考：https://hf-mirror.com/

自己保存hfd脚本：hfd.sh

#!/usr/bin/env bash
# Color definitions
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

trap 'printf "${YELLOW}\nDownload interrupted. If you re-run the command, you can resume the download from the breakpoint.\n${NC}"; exit 1' INT

display_help() {
   
    cat << EOF
Usage:
  hfd <repo_id> [--include include_pattern] [--exclude exclude_pattern] [--hf_username username] [--hf_token token] [--tool aria2c|wget] [-x threads] [--dataset] [--local-dir path]    

Description:
  Downloads a model or dataset from Hugging Face using the provided repo ID.

Parameters:
  repo_id        The Hugging Face repo ID in the format 'org/repo_name'.
  --include       (Optional) Flag to specify a string pattern to include files for downloading.
  --exclude       (Optional) Flag to specify a string pattern to exclude files from downloading.
  include/exclude_pattern The pattern to match against filenames, supports wildcard characters. e.g., '--exclude *.safetensor', '--include vae/*'.
  --hf_username   (Optional) Hugging Face username for authentication. **NOT EMAIL**.
  --hf_token      (Optional) Hugging Face token for authentication.
  --tool          (Optional) Download tool to use. Can be aria2c (default) or wget.
  -x              (Optional) Number of download threads for aria2c. Defaults to 4.
  --dataset       (Optional) Flag to indicate downloading a dataset.
  --local-dir     (Optional) Local directory path where the model or dataset will be stored.

Example:
  hfd bigscience/bloom-560m --exclude *.safetensors
  hfd meta-llama/Llama-2-7b --hf_username myuser --hf_token mytoken -x 4
  hfd lavita/medical-qa-shared-task-v1-toy --dataset
EOF
    exit 1
}

MODEL_ID=$1
shift

# Default values
TOOL="aria2c"
THREADS=4
HF_ENDPOINT=${HF_ENDPOINT:-"https://huggingface.co"}

while [[ $# -gt 0 ]]; do
    case $1 in
        --include) INCLUDE_PATTERN="$2"; shift 2 ;;
        --exclude) EXCLUDE_PATTERN="$2"; shift 2 ;;
        --hf_username) HF_USERNAME="$2"; shift 2 ;;
        --hf_token) HF_TOKEN="$2"; shift 2 ;;
        --tool) TOOL="$2"; shift 2 ;;
        -x)