如何使用python查看百度指数

直接上代码
自己运行时,将cookie修改为自己的cookie值,key修改为要爬取的值



import requests
import json
from datetime import date, timedelta
import pandas as pd
from odps import ODPS
import time



key = ['特斯拉']

cookie = '自己百度账号登录后的cookie'

class DownloadBaiDuIndex(object):
    def __init__(self, cookie):
        self.cookie = cookie
        self.headers = {
            "Connection": "keep-alive",
            "Accept": "application/json, text/plain, */*",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Dest": "empty",
            "Referer": "https://index.baidu.com/v2/main/index.html",
            "Accept-Language": "zh-CN,zh;q=0.9",
            'Cookie': self.cookie,
            "Host": "index.baidu.com",
            "X-Requested-With": "XMLHttpRequest",
            "Cipher-Text": "1656572408684_1656582701256_Nvm1pABkNsfD7V9VhZSzzFiFKylr3l5NR3YDrmHmH9yfFicm+Z9kmmwKVqVV6unvzAEh5hgXmgelP+OyOeaK8F21LyRVX1BDjxm+ezsglwoe1yfp6lEpuvu5Iggg1dz3PLF8e2II0e80ocXeU0jQFBhSbnB2wjhKl57JggTej12CzuL+h9eeVWdaMO4DSBWU2XX6PfbN8pv9+cdfFhVRHCzb0BJBU3iccoFczwNQUvzLn0nZsu0YPtG5DxDkGlRlZrCfKMtqKAe1tXQhg3+Oww4N3CQUM+6A/tKZA7jfRE6CGTFetC7QQyKlD7nxabkQ5CReAhFYAFAVYJ+sEqmY5pke8s3+RZ6jR7ASOih6Afl35EArbJzzLpnNPgrPCHoJiDUlECJveul7P5vvXl/O/Q==",

        }

    def decrypt(self, ptbk, index_data):
        n = len(ptbk) // 2
        a = dict(zip(ptbk[:n], ptbk[n:]))
        return "".join([a[s] for s in index_data])

    def get_index_data_json(self,city, keys, start=None, end=None):
        words = [[{"name": key, "wordType": 1}] for key in keys]
        words = str(words).replace(" ", "").replace("'", "\"")
        url = f'http://index.baidu.com/api/SearchApi/index?area={city}&word={words}&startDate={start}&endDate={end}'
        res = requests.get(url, headers=self.headers)
        data = res.json()['data']
        uniqid = data['uniqid']
        url = f'http://index.baidu.com/Interface/ptbk?uniqid={uniqid}'
        res = requests.get(url, headers=self.headers)
        ptbk = res.json()['data']
        result = {}
        result["startDate"] = start
        result["endDate"] = end
        for userIndexe in data['userIndexes']:
            name = userIndexe['word'][0]['name']
            tmp = {}
            index_all = userIndexe['all']['data']
            index_all_data = [str(e) for e in self.decrypt(ptbk, index_all).split(",")]

            tmp["all"] = index_all_data
            index_pc = userIndexe['pc']['data']
            index_pc_data = [str(e) for e in self.decrypt(ptbk, index_pc).split(",")]
            tmp["pc"] = index_pc_data
            index_wise = userIndexe['wise']['data']
            index_wise_data = [str(e)
                               for e in self.decrypt(ptbk, index_wise).split(",")]
            tmp["wise"] = index_wise_data


            result[name] = tmp
        return result

    def GetIndex(self,city, keys, start=None, end=None):
        today = date.today()
        if start is None:
            start = str(today - timedelta(days=8))
        if end is None:
            end = str(today - timedelta(days=2))

        try:
            raw_data = self.get_index_data_json(city=city,keys=keys, start=start, end=end)
            raw_data = pd.DataFrame(raw_data[keys[0]])
            raw_data.index = pd.date_range(start=start, end=end)

            tmp_str = ''
            for index, row in raw_data.iterrows():  # 按行遍历
                print(str(row['all']))
                print(str(row['wise']))
                print(str(row['pc']))
            time.sleep(1)

        except Exception as e:
            print(e)
            raw_data = pd.DataFrame({'all': [], 'pc': [], 'wise': []})

        finally:
            return raw_data



# 初始化一个类
downloadbaiduindex = DownloadBaiDuIndex(cookie=cookie)


city_d = [95,94,133,195,196,197,198,199,200,201,202,203,204,205,207,208,209,210,211,212,213,168,262,263,264,265,266,268,370,371,373,374,375,376,378,379,380,381,667,97,96,98,99,100,101,102,103,104,106,107,108,109,111,112,113,114,291,417,457,479,125,126,127,156,157,158,159,160,161,162,163,169,172,28,30,31,32,33,34,35,36,37,38,39,40,41,42,73,74,687,138,134,135,149,287,288,289,303,304,305,306,50,51,52,53,54,55,56,87,253,152,153,295,297,300,301,302,319,320,322,323,324,359,1,76,77,78,79,80,81,82,83,84,85,86,88,352,353,356,366,165,271,272,273,274,275,276,277,278,401,141,143,144,145,146,147,148,259,261,292,293,150,29,151,215,216,217,218,219,220,221,222,223,224,225,154,155,191,194,270,407,408,410,525,117,123,124,334,335,337,339,342,350,437,438,666,668,669,671,672,467,280,310,311,312,315,317,318,383,384,386,499,520,563,653,661,692,693,90,89,91,92,93,118,119,128,129,130,131,132,506,665,231,227,228,229,230,232,233,234,235,236,237,43,44,45,46,47,48,49,65,66,67,68,226,269,405,5,6,7,8,9,10,115,136,137,246,256,189,173,174,175,176,177,178,179,181,182,183,184,185,186,187,188,391,20,13,14,15,16,17,19,21,22,25,331,333,166,281,282,283,284,285,286,307,308,309,343,344,346,673,239,241,242,243,244,456,582,670,674,675,679,680,681,683,684,686,689,690,2,3,4,59,61,422,424,426,588,140,395,396,472,480,139,608,652,659,676,682,685,688,466,516,655,656,677,678,691,911,910,904,923]
#city_d = [95,94,133,195,196,197]
for i in city_d:
    data = downloadbaiduindex.GetIndex(city=str(i),keys=key, start='2022-01-01', end='2022-09-12')



城市编码对应城市名称见:https://blog.youkuaiyun.com/qq_38524532/article/details/126874267
有个惊喜,爬取多了,百度账号会在百度指数那块被封,查不到任何数据

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值