网站原因数据不全
# -*- codeing = utf-8 -*-
# @Time : 2021/6/5 16:24
# @Author : wangzhengxiang
# @File : lishitianqi.py
# @Software : PyCharm
import datetime
import mysql.connector
import requests
from bs4 import BeautifulSoup
def savedata(strpath):
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="yourpasswd",
database="mydatabase"
)
mycursor = mydb.cursor()
mycursor.execute(strpath)
mydb.commit()
mycursor.close()
mydb.close()
def askurl(urlport):
#地址直接改,可以进网站确定地址是否正确
url_ = "http://lishi.tianqi.com/xinxiang/%s.html" % urlport
# print(url_)
headers_ = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
}
response_ = requests.get(url_, headers=headers_)
response = response_.text
bs = BeautifulSoup(response, "html.parser")
# print(bs.a.string)
# t_list = bs.find_all(class_="th200")
# for item in t_list:
# print(item.text)
t_list = []
d_list = []
t_list = bs.select("li>div.th200")
# time_list = []
# for item in t_list:
# time_list.append(item.text)
long = len(t_list)
d_list = bs.select("li>div.th140")
for i in range(0, long):
n = 4 * i
str1 = ('''insert into weathers(time,high_tem,low_tem,weather,wind) value("%s",'%s','%s','%s','%s')''' % (
t_list[i].text[0:10], d_list[n].text[0:-1], d_list[n + 1].text[0:-1], d_list[n + 2].text,
d_list[n + 3].text))
savedata(str1)
if __name__ == '__main__':
firstyear = int(input("请输入起始年份:"))
time = datetime.date.today()
for year in range(firstyear, int(str(time)[:4])):
for month in range(1, 13):
if month < 10:
month = "0" + str(month)
url = "%s%s" % (str(year), str(month))
askurl(url)
# print("%s%s"%(str(year),str(month)))
for months in range(0, int(str(time)[5:7])):
if months < 10:
months = "0" + str(months)
# print("%s%s"%(str(time)[:4],str(months)))
url = "%s%s" % (str(time)[:4], str(months))
askurl(url)
#建表语句
# mycursor.execute("""create table weathers (
# id int primary key auto_increment,
# time varchar(100),
# high_tem varchar(100),
# low_tem varchar(100),
# weather varchar(255),
# wind varchar(255))
# """)
# mycursor.execute("truncate table weathers")
# mydb.commit()