"""
Spyder Editor
This is a temporary script file.
"""
import requests
from requests.exceptions import ConnectionError
from pyquery import PyQuery as pq
base_url = 'https://weixin.sogou.com/weixin?query={}&s_from=input&type=2&page={}&ie=utf8'
headers = {
'Cookie':'SMYUV=156867c543fede325; sct=2',
'Host':'weixin.sogou.com',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
def get_html(url):
try:
response =requests.get(url,allow_redirects=False,headers=headers)
if response.status_code == 200:
return response.text
print('200')
if response.status_code == 302:
print('302')
if response.status_code == 404:
print('404')
except ConnectionError:
return get_html(url)
def get_index(keyword,page):
url = base_url.format(keyword,page)
html = get_html(url)
doc = pq(html)
items = doc('.news-list li .txt-box h3 a').items()
for i in items:
print(i.attr('data-share'))
print(items)
if __name__ == '__main__':
get_index('风景',10)