import requests
from bs4 import BeautifulSoup
def url_list2():
content = []
for page in range(1,5):
urls = 'https://laod.cn/news/page/'+str(page)
res = requests.get(urls)
text = res.text
soup = BeautifulSoup(text,"lxml")
list2 = soup.find_all('article',class_=('wow'))
for href in list2:
a_href = href.find('a')
url = a_href['href']
content_dict = contentparse(url)
content.append(content_dict)
print(url)
print(content)
return content2
def contentparse(url):
res = requests.get(url)
text = res.text
soup = BeautifulSoup(text,'lxml')
title = soup.find('h1',attrs={'class':'entry-title'}).getText()
time = soup.find('ul',attrs={'class':'spostinfo'}).getText()[15:25]
context = soup.find('div',attrs={'class':'single-content'})
content_dict = {'title':title,'time':time,'context':context}
return content_dict
if __name__ == '__main__':
url_list2()
python requests简易爬取资讯
最新推荐文章于 2025-07-01 17:30:16 发布