import mysql.connector # 导入库
import re
import urllib.request
from lxml import etree
import urllib3
# coding:utf-8
import requests
import json
import time # 定时
from datetime import datetime, timedelta # 定时
urllib3.disable_warnings()
# 数据库
config = {'host': '', # 默认127.0.0.1
'user': '', # 用户名
'password': '', # 密码
'port': 3306, # 端口,默认为3306
'database': 'ztbu_notice', # 数据库名称
'charset': 'utf8mb4' # 字符编码
} # 配置数据库
# cnn = mysql.connector.connect(**config) # 建立MySQL连接
# cursor = cnn.cursor() # 获得游标
# 获取列表数据
def acquire_link_data() :
cnn = mysql.connector.connect(**config) # 建立MySQL连接
cursor = cnn.cursor() # 获得游标
sql = "select * from link_data" # where school_id=13507" # SQL语句
cursor.execute(sql) # 执行SQL语句
data = cursor.fetchall() # 通过fetchall方法获得数据
for i in data :
# print(i[4])
link, headline, date, text = acquire_link_webdata(str(i[4])) # 获取网页数据
# print(headline)
# 判断当前数据库所存储标题与网页获取标题是否吻合
# acquire_notice_list_user_send(i[1], headline, link, date, text)
if i[5] != headline :
# print("budeng")
# 更新linkdata 数据
update_link_data(i[1], headline, date) # 更新数据
# 判断开启用户 并且推送数据
acquire_notice_list_user_send(i[1], headline, link, date, text)
else :
print(f"{i[2]},无更新!")
# break
# 获取用户数据
def acquire_notice_list_user_send(linkcode, headline, link, date, text) :
cnn = mysql.connector.connect(**config) # 建立MySQL连接
cursor = cnn.cursor() # 获得游标
# sql = f"select * from notice_list where L101 <> {content}" # SQL语句
sql = f"select * from notice_list where {linkcode} = 1" # SQL语句
cursor.execute(sql) # 执行SQL语句
data = cursor.fetchall() # 通过fetchall方法获得数据
for i in data :
# print(i)
massage_send(i[1], i[2], i[3], headline, link, date, text)
time.sleep(1)
# 更新数据库
def update_link_data(linkcode, Title, Time) :
cnn = mysql.connector.connect(**config) # 建立MySQL连接
cursor = cnn.cursor() # 获得游标
sql = f"update link_data set Title='{Title}' ,Time='{Time}' where code like '%{linkcode}%'"
cursor.execute(sql) # 执行SQL语句
cnn.commit() # 提交数据库
print(f"高校:{linkcode}保留数据更新成功!")
# cnn.close()
return True
# 获取网页数据
def acquire_link_webdata(url) :
header = {
'Connection': 'close' ,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36"}
response = requests.get(url=url, headers=header, timeout=10, verify=False)
response.encoding = 'utf-8'
notice_data = re.findall(
'<a href="(.*?)" target="_blank" title="(.*?)">(.*?)</a><span class="datatime">(.*?)</span>', response.text)
notice_data = notice_data[0]
link = notice_data[0]
headline = notice_data[1]
date = notice_data[3]
# 获取内容
request = urllib.request.Request(url=link, headers=header)
# 发送请求访问服务器,返回响应对象
response = urllib.request.urlopen(request)
# 解码响应对象,得到页面源码
content = response.read().decode('utf-8')
# 解析服务器响应的文件
parse_html = etree.HTML(content)
# 编写xpath路径,获取想要的数据,xpath的返回值是列表类型
link_content_data = parse_html.xpath('//div[@class="layout_txtcontent_content"]//text()')
text = ""
for i in link_content_data :
text += i
# 解析页面内容link部分内容,fujian
link_data_fujian = re.findall(
'<a href="(.*?)" target="_blank" style="font-family: 微软雅黑; font-size: 14pt;">(.*?)</a>', content)
for i in link_data_fujian :
text += f'<a href="{i[0]}">{i[1]}</a><br>'
text += i[1] + "\n"
# print(headline,link,date,text)
return link, headline, date, text
# 推送
def massage_send(user_id, push_select, push_token, web_head, web_link, web_time, web_text) :
# pushplus wxpusher kutui_push hi_push sever
# pushplus
if push_select == "1" :
# 定义POST请求的URL
url = "http://www.pushplus.plus/send/"
# 定义请求的数据
data = {
"token": push_token ,
"title": web_head ,
"content": web_text ,
"template": "html"
# "topic": web_link
}
# 发送POST请求
res = requests.post(url, data=data)
name = "pushplus"
# server_url = f'http://www.pushplus.plus/send?token={push_token}&title={web_head}&content={web_text}&template=html' # txt/htm/json/markdown
# res = requests.post(server_url, timeout=10, verify=False)
print(f"用户:{user_id}\n状态:{res}推送成功!\n推送方式:{name}\n")
# wxpusher
elif push_select == "2" :
name = "Wxpusher"
p = push_token.split("#") # 分割字符 # 格式:token#uid
# print(p[1])
header = {
"Content-Type": "application/json" ,
"User-Agent": "PostmanRuntime/7.26.5" ,
"Accept": "*/*" ,
"Postman-Token": "8b253f0e-292c-4b0d-8b37-86cc344bb199" ,
"Host": "wxpusher.zjiecode.com" ,
"Accept-Encoding": "gzip, deflate, br" ,
"Content-Length": "221" ,
"Connection": "keep-alive"
}
data = {
"appToken": p[0] ,
"content": web_text ,
"summary": web_head ,
"contentType": 2, # 内容类型 1表示文字 2表示html(只发送body标签内部的数据即可,不包括body标签) 3表示markdown
"uids": [p[1]] ,
"url": web_link, # 原文链接,可选参数
"verifyPay": False, # //是否验证订阅时间,true表示只推送给付费订阅用户,false表示推送的时候,不验证付费,不验证用户订阅到期时间,用户订阅过期了,也能收到。
}
res = requests.post("https://wxpusher.zjiecode.com/api/send/message", data=json.dumps(data), headers=header ,
timeout=10, verify=False).json()["data"][0]["status"]
print(f"用户:{user_id}\n状态:{res}推送成功!\n推送方式:{name}\n")
# kutui_push
elif push_select == "3" :
name = "酷推"
data = web_head + "\n" + web_text
res = requests.post(url="https://push.xuthus.cc/send/" + push_token, data=data.encode('utf-8'), timeout=10 ,
verify=False).text
print(f"用户:{user_id}\n状态:{res}推送成功!\n推送方式:{name}\n")
# hi_push
elif push_select == "4" :
name = "Hi提醒"
server_url = f'https://api.caicenter.com/send.php?appkey={push_token}&title={web_head}&describes={web_text}&channel=1'
res = requests.post(server_url, timeout=10, verify=False)
print(f"用户:{user_id}\n状态:{res}推送成功!\n推送方式:{name}\n")
# sever
elif push_select == "5" :
name = "方糖"
server_url = f"https://sctapi.ftqq.com/{push_token}.send?title={web_head}&desp={web_text}"
res = requests.post(server_url, timeout=10, verify=False)
print(f"用户:{user_id}\n状态:{res}推送成功!\n推送方式:{name}\n")
# eorr
else:
print(f"用户:{user_id} 推送失败!\n")
time.sleep(2)
# 主程序
def main() :
print("开始启动!")
acquire_link_data()
# 定时
def timing(times) :
print(f"定时任务已启动:{times}秒")
# 获取固定的开始时间
start_time = datetime.strptime('2022-12-27 15:00:00', '%Y-%m-%d %H:%M:%S')
# 计算下一次运行的时间
next_run_time = start_time + timedelta(seconds=times)
i = 1
while True :
# 获取当前时间
current_time = datetime.now()
# 如果当前时间大于或等于下一次运行的时间,则运行任务
if current_time >= next_run_time :
time_tuple = time.localtime(time.time())
nowtime = "{}年{}月{}日".format(time_tuple[0], time_tuple[1], time_tuple[2])
#
main()
print(f"{nowtime} 第{i}次执行")
i += 1
# 计算下一次运行的时间
next_run_time = current_time + timedelta(seconds=times)
else :
# 否则,暂停程序
time.sleep(1)
if __name__ == '__main__' :
# 定时任务
timing(30)
# acquireuserdata()
# acquire_link_data()
Python爬取ZTBU官网公告获取及推送
最新推荐文章于 2025-12-05 17:02:52 发布
29万+

被折叠的 条评论
为什么被折叠?



