#coding:utf-8 #__author__='wang' import requests from lxml import etree response = requests.get('https://www.baidu.com') html = etree.HTML(response.content,parser=etree.HTMLPullParser (encoding='utf-8')) res = html.cssselect('a.mnav') for tag in res: print tag.text #获取标签中属性的值,attrib是一个字典,它将开始标签内的所有属性 (name,class,id,href,scr等)都生成了一个字典,之间通过键值获取即可 print tag.attrib['href'] div_obj_list = html.cssselect('div.1_post') for div_tag in div_obj_list: usrename = div_tag.cssselect('a.p_author_name')[0].text content = div_tag.cssselect('divd_post_content')[0].text print usrename print content
lxml的css用法
最新推荐文章于 2025-07-05 14:41:56 发布