import requests
import json
from bs4 import BeautifulSoup
url = 'https://www.cnblogs.com/AggSite/AggSitePostList'
data = {
"CategoryType": "SiteHome",
"ParentCategoryId": 0,
"CategoryId": 808,
"PageIndex": 3,
"TotalPostCount": 2000,
"ItemListActionName": "AggSitePostList"
}
headers = {
'Content-Type': 'application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
}
response = requests.post(url=url, data=json.dumps(data), headers=headers)
code = response.status_code
print(code)
text = response.text
# print(text)
soup = BeautifulSoup(text, 'html.parser')
articles = soup.find_all('article', class_='post-item')
for article in articles:
link = article.find('a', class_='post-item-title')
title = link.get_text()
href = link['href']
print(title, href)
python 博客园爬虫02
最新推荐文章于 2025-04-02 16:54:32 发布