from bs4 import BeautifulSoup,Tag,CData
import re
doc = ['<html><head><title>Page title</title></head>',
'<body><p id="firstpara" align="center">This is paragraph <b>one</b>.</p>',
'<p id="secondpara" align="blah">This is paragraph <b>two</b>.</p>',
'</body></html>']
soup = BeautifulSoup(''.join(doc))
print(soup.prettify)
print(soup.contents[0].name)
print(soup.contents[0].contents[0].name)
print(soup.contents[0].contents[0].contents[0].string)
head = soup.contents[0].contents[0]
print(head.parent.name)
print(head.next)
print(head.nextSibling.name)
print(head.nextSibling.contents[0])
print(head.nextSibling.contents[0])
print(soup.findAll('p', align="center"))
print(soup.find('p', align="center"))
print(soup('p', align ="center")[0]['id'])
print(soup.find('p').b.string)
print(soup('p')[1].b.string)
titleTag = soup.html.head.title
titleTag['id'] = 'theTitle'
print(titleTag)
titleTag.contents[0].replaceWith("new title")
soup.p.replaceWith(soup.b)
print(soup.prettify)
soup.body.insert(0, "this page used to have")
BeautifulSoup 对象方法
最新推荐文章于 2025-02-03 20:57:51 发布