下载npm依赖，通过package-lock.json

小小码农如此不堪

已于 2024-12-09 15:06:58 修改

阅读量413

点赞数 2

文章标签： npm json python

于 2024-12-06 16:45:39 首次发布

本文链接：https://blog.youkuaiyun.com/weixin_43944102/article/details/144295653

版权

下载npm依赖，通过package-lock.json

说明

使用Nexus时，导出npm离线依赖包上传至Nexus

使用

python downloadNpmPackage.py -f package-lock.json -o ./npm

或指定一个文件夹

python downloadNpmPackage.py -i ./xxx -o ./npm

import os
import sys
import requests
import random

requests.packages.urllib3.disable_warnings()
import json
import time
from concurrent.futures import ThreadPoolExecutor
import getopt

# 从哪里下载
repository = [
    "https://registry.npmmirror.com",
    "https://mirrors.cloud.tencent.com/npm",
    "https://mirrors.huaweicloud.com/repository/npm",
]

#
#下载失败时，重试次数
retry_times = 3

# 创建20个线程
pool = ThreadPoolExecutor(20)

# 收集依赖的信息
# [{"name":"xxx","version":"xxx"}]
download_data = set([])
download_fail = set([])

try:
    options, args = getopt.getopt(sys.argv[1:], "f:i:o:", ["output_dir=", "file=", "input_dir="])
except:
    print("参数错误")
    sys.exit()


def get_json_data(data):
    for pkg in data['dependencies']:
        if 'version' not in data['dependencies'][pkg] or 'resolved' not in data['dependencies'][pkg]:
            continue
        version = data['dependencies'][pkg]['version']
        download_data.add((pkg,version,data['dependencies'][pkg]['resolved'].split('/')[-1].split('?')[0]))
        if 'dependencies' in data['dependencies'][pkg]:
            get_json_data(data['dependencies'][pkg])

def get_download_url(name, version):
    response = requests.get(random.choice(repository)+"/"+name+"/", timeout=60, verify=False)
    if response.status_code != 200:
        raise f"{name}获取依赖地址失败"
    data = json.loads(response.text)
    if 'versions' not in data and not data['versions'][version]:
        raise f"{name}版本{version}不存在"
    return data['versions'][version]['dist']['tarball']




def do_download(name, version ,tgz_name, current_try_time):
    output_dir = os.path.join(down_url, name.replace('/','_')+"_"+version+"."+tgz_name.split('.')[-1])
    output_url = os.path.join(name.replace('/','_')+"_"+version+"."+tgz_name.split('.')[-1])
    try:
        if os.path.exists(output_dir):
            print(f'依赖{output_url}已存在')
            return
        print(f'依赖{output_url}开始下载')
        response = requests.get(get_download_url(name, version),
                                stream=True, timeout=60, verify=False)
        with open(output_dir, 'wb') as f:
            for data in response.iter_content(chunk_size=4096):
                f.write(data)
            f.close()
    except:
        current_try_time += 1
        if current_try_time < retry_times:
            print(f'【第{current_try_time}次】尝试重新下载依赖 {name} {version}')
            # 等待5秒后重试
            time.sleep(1)
            do_download(name, version ,output_url, current_try_time)
        else:
            download_fail.add((name, version ,output_url))
            print(f'依赖{name} {version}下载失败')
            return


down_url = './npm-package'

if __name__ == '__main__':
    for option, value in options:
        if option in ("-f", "--file"):
            with open(value, 'r') as f:
                get_json_data(json.load(f))
        elif option in ("-o", "--output_dir"):
            down_url = value
        elif option in ("-i", "--input_dir"):
            for root, dirs, files in os.walk(value):
                for file in files:
                    if file.endswith('.json'):
                        with open(os.path.join(root, file), 'r') as f:
                            get_json_data(json.load(f))
    if not os.path.exists(down_url):
        os.makedirs(down_url)

    print(f"下载总数量：{len(download_data)}")
    for e in download_data:
        pool.submit(do_download, e[0],e[1],e[2], 0)

    pool.shutdown(wait=True)
    print(f"下载失败总数量：{len(download_fail)}")
    with open(down_url + "/download_fail_tgz.txt", 'w') as file:
        file.write(download_fail.__str__())
    print("下载失败路径" + down_url + "/download_fail_tgz.txt")