下载npm依赖,通过package-lock.json
说明
使用Nexus时,导出npm离线依赖包上传至Nexus
使用
python downloadNpmPackage.py -f package-lock.json -o ./npm
或指定一个文件夹
python downloadNpmPackage.py -i ./xxx -o ./npm
import os
import sys
import requests
import random
requests.packages.urllib3.disable_warnings()
import json
import time
from concurrent.futures import ThreadPoolExecutor
import getopt
# 从哪里下载
repository = [
"https://registry.npmmirror.com",
"https://mirrors.cloud.tencent.com/npm",
"https://mirrors.huaweicloud.com/repository/npm",
]
#
#下载失败时,重试次数
retry_times = 3
# 创建20个线程
pool = ThreadPoolExecutor(20)
# 收集依赖的信息
# [{"name":"xxx","version":"xxx"}]
download_data = set([])
download_fail = set([])
try:
options, args = getopt.getopt(sys.argv[1:], "f:i:o:", ["output_dir=", "file=", "input_dir="])
except:
print("参数错误")
sys.exit()
def get_json_data(data):
for pkg in data['dependencies']:
if 'version' not in data['dependencies'][pkg] or 'resolved' not in data['dependencies'][pkg]:
continue
version = data['dependencies'][pkg]['version']
download_data.add((pkg,version,data['dependencies'][pkg]['resolved'].split('/')[-1].split('?')[0]))
if 'dependencies' in data['dependencies'][pkg]:
get_json_data(data['dependencies'][pkg])
def get_download_url(name, version):
response = requests.get(random.choice(repository)+"/"+name+"/", timeout=60, verify=False)
if response.status_code != 200:
raise f"{name}获取依赖地址失败"
data = json.loads(response.text)
if 'versions' not in data and not data['versions'][version]:
raise f"{name}版本{version}不存在"
return data['versions'][version]['dist']['tarball']
def do_download(name, version ,tgz_name, current_try_time):
output_dir = os.path.join(down_url, name.replace('/','_')+"_"+version+"."+tgz_name.split('.')[-1])
output_url = os.path.join(name.replace('/','_')+"_"+version+"."+tgz_name.split('.')[-1])
try:
if os.path.exists(output_dir):
print(f'依赖{output_url}已存在')
return
print(f'依赖{output_url}开始下载')
response = requests.get(get_download_url(name, version),
stream=True, timeout=60, verify=False)
with open(output_dir, 'wb') as f:
for data in response.iter_content(chunk_size=4096):
f.write(data)
f.close()
except:
current_try_time += 1
if current_try_time < retry_times:
print(f'【第{current_try_time}次】尝试重新下载依赖 {name} {version}')
# 等待5秒后重试
time.sleep(1)
do_download(name, version ,output_url, current_try_time)
else:
download_fail.add((name, version ,output_url))
print(f'依赖{name} {version}下载失败')
return
down_url = './npm-package'
if __name__ == '__main__':
for option, value in options:
if option in ("-f", "--file"):
with open(value, 'r') as f:
get_json_data(json.load(f))
elif option in ("-o", "--output_dir"):
down_url = value
elif option in ("-i", "--input_dir"):
for root, dirs, files in os.walk(value):
for file in files:
if file.endswith('.json'):
with open(os.path.join(root, file), 'r') as f:
get_json_data(json.load(f))
if not os.path.exists(down_url):
os.makedirs(down_url)
print(f"下载总数量:{len(download_data)}")
for e in download_data:
pool.submit(do_download, e[0],e[1],e[2], 0)
pool.shutdown(wait=True)
print(f"下载失败总数量:{len(download_fail)}")
with open(down_url + "/download_fail_tgz.txt", 'w') as file:
file.write(download_fail.__str__())
print("下载失败路径" + down_url + "/download_fail_tgz.txt")