《2018年7月14日》【连续285天】
标题:XPath库练习;
内容:
试着根据xpath写一个爬虫爬取b站up发布过的视频信息,
先写了一个框架:
import requests
from lxml import etree
from requests.exceptions import RequestException
baseurl ="http://space.bilibili.com"
def find_video(uid,page):
try:
url =baseurl + '/' + str(uid) +'/#/video?tid=0&page='+str(page)+"&keyword=&order=pubdate"
headers ={'User-Agent':'User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'}
response = requests.get(url, headers =headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def print_video(text):
html =etree.parse(text,etree.HTMLParser())
result =html.xpath('//ul[@class="clearfix cube-list"]/li/a[contains(@class,"title")]/@href')
print(result)
def main(uid):
page =1
text =find_video(uid,page)
print_video(text)
if __name__ == "__main__":
uid ="11357018"
main(uid)
出现了bug,排查中;