from bs4 import BeautifulSoup
import requests
import json
import time
import logging
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(name)s %(levelname)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S %a",
)
log = logging.getLogger(__name__)
class AccounZhihu:
def __init__(self):
pass
def zhihu(self):
try:
url = f"https://www.zhihu.com/billboard"
# 直接使用requests进行请求
headers = {
'Accept-Language': 'zh-CN,zh;q=0.9',
'sec-ch-ua-platform': "Windows",
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'User-Agent': 'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}
# 直接执行请求
response = requests.get(url, headers=headers, timeout=30)
if response.status_code != 200:
return {'error': f"error {response.text}", 'status': 1001}
try:
soup = BeautifulSoup(response.text, "html.parser")
card_list_items = soup.find('script', {'id': 'js-initialData'})
try:
json_html = card_list_items.text
parsed_data = json.loads(json_html)
except Exception as e:
return {'error':"解码失败,请检查编码是否正确",'status':1000}
result_dict = []
hot_list = parsed_data.get("initialState", {}).get("topstory", {}).get("hotList", [])
if hot_list:
for index, item in enumerate(hot_list):
result_dict.append({
"index": index,
"answerCount": item["feedSpecific"]["answerCount"],
"titleText": item["target"]["titleArea"]["text"],
"excerptText": item["target"]["excerptArea"]["text"],
"imageURL": item["target"]["imageArea"]["url"],
"metricsText": item["target"]["metricsArea"]["text"],
"linkURL": item["target"]["link"]["url"]
})
status = 200
else:
status = 404
result_dict = []
return {'data': result_dict, 'status': status}
except Exception as e:
errors = f"JSON解析失败:{str(e)}"
return {'error': errors, 'status': 1000}
except Exception as e:
errors = f"请求发生错误:{str(e)}"
return {'error': errors, 'status': 1002}
if __name__ == "__main__":
workings = AccounZhihu()
result = workings.zhihu()
print(result)