import datetime
import pandas
import requests
import json
import pymongo
import time
import numpy as np
import dateutil.parser
def request(year, month, codenum):
url = "http://d1.weather.com.cn/calendar_new/" + year + "/{}_".format(codenum) + year + month + ".html"
#print('url:', url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
"Referer": "http://www.weather.com.cn/weather40d/{}.shtml".format(codenum),
}
return requests.get(url, headers=headers)
def parse(res):
json_str = res.content.decode(encoding='utf-8')[11:]
return json.loads(json_str)
def date_now():
today = datetime.datetime.now()
return today
def date_40(today):
need_time = today + datetime.timedelta(days=+40)
re_date = need_time.strftime('%Y-%m-%d')
return re_date
def save(list, local):
subkey = {'date': '日期', 'max': '最高温度', 'min': '最低温度', 'hgl': '降水概率', 'fe': '节日', 'wk': '星期', 'time': '发布时间',
'hmax': '历史平均最高温度', 'hmin': '历史平均最低温度','w1': '天气'}
for dict in list:
subdict = {value: dict[key] for key, value in subkey.items()} # 提取原字典中部分键值对,并替换key为中文
if subdict['最高温度'] == '' or subdict['最低温度'] == '':
subdict['最高温度'] = subdict['历史平均最高温度']
subdict['最低温度'] = subdict['历史平均最低温度']
subdict["地区"] = local
date1 = subdict["日期"]
date2 = dateutil.parser.parse(date1)
date3 = date2.strftime('%Y-%m-%d')
today = date_now()
today2 = today.strftime('%Y-%m-%d')
re_date = date_40(today)
subdict["日期"] = date3
if (date3 >= today2) and (date3 <= re_date):
forecast.update(subdict, {'$setOnInsert': subdict}, True)
df = pandas.read_excel(r"salecity.xlsx", engine='openpyxl', header=None)
max_row = df.shape[0]
print("表格地区数:", max_row)
def messcol_2(num_row, num_col): # 获取第num_col列单元格内容
cell = df.loc[[num_row], [num_col]]
cells = np.array(cell)
cell_message = cells.tolist()
mess = cell_message[0][0]
return mess
if __name__ == '__main__':
year = "2021"
month = 4
client = pymongo.MongoClient('localhost', 27017) # 连接mongodb,端口27017
test = client['test'] # 创建数据库文件test
forecast = test['forecast416'] # 创建表forecast
for i in range(1, max_row):
mess_url = messcol_2(i, 1) # 获取第2列单元格内容--代号
url = str(mess_url)
mess_local = messcol_2(i, 0) # 获取第1列单元格内容--地区
local = str(mess_local)
#print("地区", local, "代号:", url)
j = month
for j in range(month, 6):
n = j
months = str(n) if n > 9 else "0" + str(n) # 小于10的月份要补0
par = parse(request(year, months, url))
save(par, local)
data = pandas.DataFrame(list(forecast.find()))
data.to_excel('data.xlsx', encoding='utf-8', index=False)
# time.sleep(1)