python爬虫，通过哨兵1号数据批量下载对应的轨道文件，参考的别人代码，但是忘了出处了

本文链接：https://blog.youkuaiyun.com/wangtimer/article/details/106773528

需要的几个包代码前面都有。然后get_yestoday是因为轨道文件的命名规则原因，如果你的数据是10月5日的，轨道文件的日期就会是10月4日——10月6日。

import urllib.parse
import urllib.request
import os, sys, re
from bs4 import BeautifulSoup
import datetime
import time

# 打开文件
dir_path = r"E:\\sar\\170"#哨兵数据路径自己修改
out_path = r"E:\\sar\\EOF\\"#存放EOF路径自己修改
dirs = os.listdir( dir_path )

def get_yestoday(mytime):
    myday = datetime.datetime( int(mytime[0:4]),int(mytime[4:6]),int(mytime[6:8]) )
    delta = datetime.timedelta(days=-1)
    my_yestoday = myday + delta
    my_yes_time = my_yestoday.strftime('%Y%m%d')
    return my_yes_time


# 输出所有文件和文件夹
for file in dirs:
   if file.endswith(".zip"):
    url_param_json = {}
    url_param_json['sentinel1__mission'] = file[0:3]#修改了的地方
    date = re.findall(r"\d{8}",file)[0]
    date = get_yestoday(date)

    tmp = list(date)
    tmp.insert(4,'-');tmp.insert(7,'-')
    date = "".join(tmp)
    print (date)
    url_param_json['validity_start'] = date#修改了的地方
    url_param = urllib.parse.urlencode(url_param_json) #url参数
    url = 'https://qc.sentinel1.eo.esa.int/aux_poeorb/?%s' % url_param #拼接
    html = urllib.request.urlopen(url)
    dom = BeautifulSoup(html,"html.parser") # 解析html文档

    
    a_list = dom.findAll("a")
    eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')] 
   
    for eof in eof_lists:
        print (eof)
        filename = eof[50:]
        save_path = os.path.join(out_path,eof)
        urllib.request.urlretrieve(eof,out_path+filename)