尝试了一下知乎首页爬虫:
import re
import requests
from urllib import parse
首页链接 = []
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
'cookie': '_zap=d87a4437-7623-4ed0-9aea-8e3db9f0a2a7; _xsrf=WE09KPNwIEZZSwM7t95gSxiU0bH5VZfe; d_c0="AJBsNreRQBCPTnUIjLaLd2xSWLQtcZGToV8=|1571991072"; z_c0=Mi4xYjg4QUJBQUFBQUFBa0d3MnQ1RkFFQmNBQUFCaEFsVk5LUHlmWGdCNnpNN3Zta1NNeTJsdldITUgyU05ySERaNnJ3|1571991080|b93528ba749b936e200ca7a5d85c9653f2e0f932; tst=r; __utmv=51854390.100--|2=registration_date=20170128=1^3=entry_date=20170128=1; __utma=51854390.216570059.1573461536.1573461536.1573541524.2; __utmz=51854390.1573541524.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; q_c1=75d1f2e82fb3418ca72dd2006a184a02|15762