这是一个仅为了娱乐写的小型爬虫.
可以获取某漫画网站的每日更新,如果想看的作品有更新则语音播报.
运行环境:
Python3.7
beautifulsoup4 4.6.3
pywin32 224
文件结构:
file文件夹下的txt文件保存了想看的作品名称
run.py调用get_update获取更新列表,speak判断是否有想看的更新并合成语音提醒.
源代码:
from get_update import GetUpdate
import speak
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
name = []
pages = int(input('输入获取页数:'))
dmzj = GetUpdate(pages, name, headers)
name = dmzj.update_get()
if input('需要语音播报吗?(Y/N)') == 'Y':
speak.want_spk(name)
else:
speak.just_show(name)
get_update.py
import requests
from bs4 import BeautifulSoup
class GetUpdate():
def __init__(self, pages, name, headers):
self.pages = pages
self.name = name
self.headers = headers
def update_get(self):
for page in range(1, self.pages+1):
r = requests.get('https://manhua.xxxx.com/update_'+str(page)+'.shtml', headers=self.headers)
#网址我就不放出来了,知道的自己改吧
c = r.content
soup = BeautifulSoup(c, 'html.parser')
pictext = soup.select(r'.pictext')
for text in pictext:
names = text.find_all('a', {'target':'_blank'})
for n in names:
self.name.append(str(n.contents[0]))
return self.name
import win32com.client
import time
def spk(name):
spk = win32com.client.Dispatch("SAPI.SpVoice")
spk.Speak('今日更新漫画有:')
for j, i in enumerate(name):
spk.Speak(i)
if j%2 != 0:
time.sleep(1)
def want_spk(name):
spk = win32com.client.Dispatch("SAPI.SpVoice")
comics = open('./file/Collection.txt', 'r', encoding='utf-8')
comics = comics.read().splitlines()
j = 0
for i in name:
if i in comics:
if not j:
spk.Speak('您收藏的漫画有更新:')
spk.Speak(i)
print(i)
j += 1
def just_show(name):
comics = open('./file/Collection.txt', 'r', encoding='utf-8')
comics = comics.read().splitlines()
for i in name:
if i in comics:
print(i)