&&&&&
# -*- coding: utf-8 -*
from bs4 import BeautifulSoup
import requests
from xlwt import Workbook
import time
import datetime
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def shuju(url,date,page):
"""
提取指定公司,指定日期、指定页面的自动监测数据
"""
fromdata = {
"startTime":date,
"pageIndex":page,
}
try:
response = requests.post(url, data=fromdata)
except:
time.sleep(2)
response = requests.post(url, data=fromdata)
soup = BeautifulSoup(response.text, 'lxml')
shujulist = soup.find_all('tr')
datalist = []
for shuju in shujulist:
try:
linedata = shuju.find_all('td')
lin01 = linedata[0].text.split()[0]
lin02 = linedata[1].text.split()[0]
lin03 = linedata[2].text.split()[0]
lin04 = linedata[3].text.split()[0]
lin05 = linedata[4].text.split()[0]
lin06 = linedata[5].text.split()[0]
lin07 = linedata[6].text.split()[0]
lin08 = linedata[7].text.split()[0]
try:
lin09 = linedata[8].text.split()[0]
except:
lin09 = ''
lin10 = linedata[9].text.split()[0]
lin11 = linedata[10].text.split()[0]
lin12 = linedata[11].text.split()[0]
try:
lin13 = linedata[12].text.split()[0]
except:
lin13 = ''
data = [lin01,lin02,lin03,lin04,lin05,lin06,lin07,lin08,lin09,lin10,lin11,lin12,lin13]
datalist.append(data)
except:
pass
return datalist
def pageNumber(url,date):
"""
返回公司指定日期自动监测数据的页数,便于for循环遍历
"""
fromdata = {
"startTime":date,
"pageIndex":"",
}
try:
response = requests.post(url, data=fromdata)
except:
time.sleep(1)
response = requests.post(url, data=fromdata)
soup = BeautifulSoup(response.text,'lxml')
number = soup.find('span',class_="clr_b ver_mid").string.split('/')[1][0]
compname = soup.find('div',class_="com_tit_new f_22 clr_3").string
return number,compname
def Date_list_generation(start,end):
"""
生成指定日期段的一个列表
"""
datelist = []
datestart = datetime.datetime.strptime(str(start), '%Y-%m-%d')
dateend = datetime.datetime.strptime(str(end), '%Y-%m-%d')
while datestart < dateend:
datestart += datetime.timedelta(days=1)
datelist.append(datestart.strftime('%Y-%m-%d'))
return datelist
def pao(start,end,url):
book = Workbook(encoding='utf-8')
sheet1 = book.add_sheet('Sheet 1')
sheet1.write(0, 0, u'序号')
sheet1.write(0, 1, u'监测点位')
sheet1.write(0, 2, u'监测时间')
sheet1.write(0, 3, u'监测项目')
sheet1.write(0, 4, u'监测结果')
sheet1.write(0, 5, u'标准限值')
sheet1.write(0, 6, u'单位')
sheet1.write(0, 7, u'是否达标')
sheet1.write(0, 8, u'超标倍数')
sheet1.write(0, 9, u'评价标准')
sheet1.write(0, 10, u'排放去向')
sheet1.write(0, 11, u'排放方式')
sheet1.write(0, 12, u'备注')
datalistnew = []
for date in Date_list_generation(start, end):
pagenumber, compname = pageNumber(url, date)
for page in range(1, int(pagenumber) + 1):
try:
datalist = shuju(url, date, page)
print date, page
time.sleep(0.8)
except:
print page
datalistnew = datalistnew + datalist
time.sleep(0.8)
datalist = datalistnew
for data in range(0, len(datalist)):
culumn01 = datalist[data][0]
culumn02 = datalist[data][1]
culumn03 = datalist[data][2]
culumn04 = datalist[data][3]
culumn05 = datalist[data][4]
culumn06 = datalist[data][5]
culumn07 = datalist[data][6]
culumn08 = datalist[data][7]
culumn09 = datalist[data][8]
culumn10 = datalist[data][9]
culumn11 = datalist[data][10]
culumn12 = datalist[data][11]
culumn13 = datalist[data][12]
sheet1.write(data + 1, 0, culumn01)
sheet1.write(data + 1, 1, culumn02)
sheet1.write(data + 1, 2, culumn03)
sheet1.write(data + 1, 3, culumn04)
sheet1.write(data + 1, 4, culumn05)
sheet1.write(data + 1, 5, culumn06)
sheet1.write(data + 1, 6, culumn07)
sheet1.write(data + 1, 7, culumn08)
sheet1.write(data + 1, 8, culumn09)
sheet1.write(data + 1, 9, culumn10)
sheet1.write(data + 1, 10, culumn11)
sheet1.write(data + 1, 11, culumn12)
sheet1.write(data + 1, 12, culumn13)
tablename = "%s_%s_%s.xls" % (compname, start, end)
book.save(tablename)
if __name__ == "__main__":
start = "2017-05-01"
end = "2017-06-01"
url = "http://58.30.229.134/monitor-pub/org_zdjc/e3e8b6b7-578a-4982-93bf-5484c49b3e5e.do"
pao(start,end,url)
&&&&&