from bs4 import BeautifulSoup
Text = GetHtmlText(url)
soup = BeautifulSoup(Text, 'html.parser')
(1)获取标签的内容
soup.p.string
soup.h2.string
(2)获取标签的属性
soup.p['id']
soup.p.attrs['id'] #这样写也可以
soup.p['class']
soup.a['href']
可以嵌套选择
soup.div.p.string
find_all函数会以列表形式返回所有信息
print(soup.find_all('li'))
for a in soup.find_all('li'):
print(a.string)
for ul in soup.find_all('ul'):
print(ul.find_all('li'))
soup.find_all(attrs={'id': '1'})
soup.find_all('li', attrs = {'name' : 'a'}) #所有name属性为a的li标签