import urllib.request
import urllib.error
import re
headers = ("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
opener = urllib.request.build_opener()
opener.addheaders = [headers]
urllib.request.install_opener(opener)
for i in range(1,2):
url = "https://www.qiushibaike.com/8hr/page/"+str(i)+"/"
pagedata = urllib.request.urlopen(url).read().decode("utf-8","ignore")
pattern = '<div class="content">.*?<span>(.*?)</span>(.*?)</div>'
datalist = re.compile(pattern,re.S).findall(pagedata)
for j in range(0,len(datalist)):
print("第"+str(i)+"页第"+str(j)+"个段子内容是:")
print(datalist[j])