1:
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
header = soup.select('h1')
print(header)
print(header[0])
print(header[0].text)
运行结果为:
[<h1 id="title">Hello World</h1>]
<h1 id="title">Hello World</h1>
Hello World
2.
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
alink = soup.select('a')
print(alink)
for link in alink:
print(link)
运行结果为:
[<a class="link" href="#">this is link1</a>, <a class="link" href="# link2">This is link2</a>]
<a class="link" href="# link2">This is link2</a>
3.
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
alink = soup.select('a')
print(alink)
for link in alink:
#print(link)
print(link.text)
运行结果为:
this is link1
This is link2