import requests
from fake_useragent import UserAgent
agent=UserAgent()
#当用到xpath时需要引入此包
from lxml import etree
下载
url="http://search.51job.com/list/010000%252C020000%252C180200%252C200200,000000,0000,00,9,99,python,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="
response = requests.get(url,
headers = {"User-Agent":agent.random},
)
#设置编码格式
response.encoding=response.apparent_encoding
解析
# root可理解为网页本身
root = etree.HTML(response.text)
#用xpath返回的是一个列表
div_list = root.xpath('//div[@class="dw_t