import re
import requests
from bs4 import BeautifulSoup
defgetHTML(url):try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:return""defgetContent(url):
html = getHTML(url)print("url=",url)
soup = BeautifulSoup(html,'html.parser')
title = soup.select('h3,label,span')print("%s", title)return title
defsaveFile(text):
f =open('novel.txt','a', encoding='utf-8')for t in text:iflen(t)>0:
f.writelines(t.get_text()+"\n")print("456", t)
f.close()defmain():
li =[1436,1467,1471,1484,1485,1486,4199,4200,16719,16937,17076,17251,17886,18186,18219,18263,18264,18638,20244]for j in li:print("j=", j)
url01 ='http://10.1.20.14/redir.php?catalog_id=6&cmd=learning&tikubh='+str(j)+'&page='print(url01)for i inrange(100):
url02 = url01+str(i)print(url02)
text = getContent(url02)print(text)
saveFile(text)# print("i=",i);
main()