import requests
from bs4 import BeautifulSoup
# 获取要爬取的网页链接(1-9表示翻页)
urls = [f'https://www.cnblogs.com/#p{i}' for i in range(1, 10)]
# 循环请求每个网页链接,并把请求到的页面内容返回
def crew():
for url in urls:
r = requests.get(url)
return r.text
# 解析crew()函数返回的每个网页链接内容,并将符合参数中的要求匹配的内容过滤出来
def parse(html):
soup = BeautifulSoup(html, 'html.parser')
links = soup.find_all('a', class_='post-item-title')
return [(link['href'], link.get_text()) for link in links]
if __name__ == '__main__':
for ret in parse(crew()):
print(ret)