import requests
from lxml import etree
import time
'''
黑板爬虫闯关
网址:http://www.heibanke.com/lesson/crawler_ex00/
'''
start_time = time.time()
start_url = "http://www.heibanke.com/lesson/crawler_ex00/"
res = requests.get(start_url)
res.encoding = "utf8"
html = etree.HTML(res.content)
Num = html.xpath("//h3/text()")[0][-5:]
print(Num)
while Num:
url = "http://www.heibanke.com/lesson/crawler_ex00/" + str(Num) + "/"
print(url)
res = requests.get(url)
res.encoding = "utf8"
html = etree.HTML(res.content)
Num = html.xpath("//h3/text()")[0]
N = []
for i in Num:
if i.isdigit() == True:
N.append(i)
Num = "".join(N)
print(Num)
end_time = time.time()
print(end_time-start_time)