import xlrd
import xlwt
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
mailnums = []
path = 'D:\EMS83Search\Chrome\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('blink-settings=imagesEnabled=false')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--log-level=5')
options.add_argument('--disable-infobars')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-logging')
options.add_argument('--hide-logging')
options.add_argument('--disable-devtools')
options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763"')
# browser = webdriver.Chrome(executable_path=path,options=chrome_options)
nums = []
sjtimes = []
sjplaces = []
status = []
lstimes = []
lsplaces = []
def get_nums():
with xlrd.open_workbook('D:\EMS83Search\Emsnum.xls', 'r') as K:
num = K.sheets()[0]
lie = num.nrows
for l in range(lie):
mailnum = int(num.row_values(l)[0])
mailnums.append(mailnum)
def get_info():
count = 1
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
,'Connection': 'keep-alive'}
browser = webdriver.Chrome(executable_path=path,options=options)
for i in mailnums:
try:
browser.get('http://10.3.10.83/ems/newsystem/thesecond/ttq/ttqMailquery.jsp')
browser.implicitly_wait(5)
input = browser.find_element_by_id('mailNum')
input.send_keys(i)
input.send_keys(Keys.ENTER)
html = browser.find_element_by_xpath('//*').get_attribute('outerHTML')
soup = BeautifulSoup(html,'html.parser')
realweb = soup.find(attrs={'target':'ajaxFrameArea'})
web2 = realweb['action']
web2 = web2[8:]
web1 = 'http://10.3.10.83/ems'
web666 = web1 + web2
rsp = requests.get(web666,headers=headers)
rep = rsp.text
soup = BeautifulSoup(rep,'html.parser')
soup = soup.body
sjtime = soup.find(attrs={'width':'47%'})
sjplace = soup.find(attrs={'width':'25%'})
# print(sjtime.text)
# print(sjplace.text)
sjtimes.append(sjtime.text)
sjplaces.append(sjplace.text)
soup2 = soup.find(attrs={'id':'frameTrackStatueInfo'})
# print(soup2.td.string)
nums.append(soup2.td.string)
qq = browser.find_element_by_xpath('//*[@id="trackStatueInfo"]/table/tbody/tr/td[3]')
# print(qq.text)
lstimes.append(qq.text)
qq2 =browser.find_element_by_xpath('//*[@id="trackStatueInfo"]/table/tbody/tr/td[5]')
qq3 =browser.find_element_by_xpath('//*[@id="trackStatueInfo"]/table/tbody/tr/td[7]')
# print(qq2.text[6:])
# print(qq3.text[3:])
status.append(qq2.text[6:])
lsplaces.append(qq3.text[3:])
except:
nums.append(i)
sjtimes.append('邮件号不对??')
sjplaces.append('网络出问题了??')
lstimes.append('A股又跌了??')
status.append('今天心情不美丽??')
lsplaces.append('我方水晶正在被攻击???')
browser = webdriver.Chrome(executable_path=path,options=options)
print(f'正在查询第{count}件: {i}')
count += 1
browser.close()
browser.quit()
def get_excel():
wjtime = time.strftime('%Y-%m-%d_%H-%M',time.localtime(time.time()))
tt = []
tt.append(wjtime)
ttt1 = tt[0]
ttt3 = '件'
ttt4 = f'---{len(nums)}'
ttt2 = '.xlsx'
tttt = ttt1 + ttt4 + ttt3 + ttt2
a = 1
b = 0
c = 1
d = 1
e = 1
f = 2
g = 1
h = 3
i = 1
j = 4
k = 1
l = 5
wb = xlwt.Workbook()
sheet1 = wb.add_sheet('查询结果')
sheet1.write(0,0,'邮件号码')
sheet1.write(0,1,'收寄日期')
sheet1.write(0,2,'收寄地点')
sheet1.write(0,3,'最后时间')
sheet1.write(0,4,'最后地点')
sheet1.write(0,5,'最后状态')
for v in nums:
sheet1.write(a,b,str(v))
a += 1
for vv in sjtimes:
sheet1.write(c,d,vv)
c += 1
for vvv in sjplaces:
sheet1.write(e,f,vvv)
e += 1
for vvvv in lstimes:
sheet1.write(g,h,vvvv)
g += 1
for vvvvv in lsplaces:
sheet1.write(i,j,vvvvv)
i += 1
for vvvvvv in status:
sheet1.write(k,l,vvvvvv)
k += 1
wb.save(f'D:\EMS83Search\{tttt}.xls')
get_nums()
get_info()
get_excel()