import requests
from bs4 import BeautifulSoup
url = 'https://so.gushiwen.cn/gushi/tangshi.aspx'
r = requests.get(url)
r.encoding = "utf8"
html = r.text
# print(html)
print(r.status_code)
plist = []
soup = BeautifulSoup(html, 'html.parser')
links = soup.find_all('strong')
i = 0
poetrys = soup.find_all('a')
for poetry in poetrys:
if not poetry:
continue
i += 1
print('https://so.gushiwen.cn/' + poetry['href'], poetry.text)
print(i)
使用BeautifulSoup库,爬取网页数据