微博同城热搜抓取逻辑(333城市)

接口需要从移动端拿

安坐模拟器加Fiddler

import re
import sys
import time

import pymysql
import requests



# 获取经纬度
def get_ll():
    # 经纬度接口
    url = 'https://m.weibo.cn/api/container/getIndex?uid=1887387237&wm=9006_2001&from=10A8195010&sourcetype=weixin&display=0&retcode=6102&containerid=2306860024'
    resp = requests.get(url)
    info = resp.json()['data']['cards']
    infos = info[3:]
    for i in infos:
        p = i['card_type_name']
        all_citys = i['card_group']
        for citys in all_citys:
            citys = citys['group']
            for city in citys:
                url = city['scheme']
                city_name = city['title_sub']
                s = url.split('_')
                l1 = s[-2]
                l2 = s[-1]
                get_response(city_name, l1, l2)
                # sleep时间为一秒,ip处于安全状态,不会被封禁
                # sleep时间在0.5到一秒之间,请求约400次,ip被封禁
                # 不sleep,请求约200次,ip被封禁
                time.sleep(1)


def get_response(city_name, l1, l2):
    headers = {
        'User-Agent': 'PRO 6 Plus_5.1.1_weibo_9.9.3_weibolite'
    }
    # 同城热搜最简接口
    base_url = 'https://api.weibo.cn/2/page?extparam=pos=0_0&lon={}&lat={}&c=weibolite&s=bbbbbbbb&from=3799395010&gsid=_2A25yRxAdDeRxGeBO6lYS9CzEyj6IHXVvVSTVrDV6PUJbkdANLVbAkWpNSiLhtqAmtyzao5wyTR0CInrjrcWA35NC&containerid=106003type=25&t=3&disable_hot=1&filter_type=region'
    url = base_url.format(l1, l2)
    resp = requests.get(url, headers=headers)
    resp = resp.text
    # 获取失败会返回404页面,但是状态码和响应都正常,所以程序不会报异常
    # 判断响应内容长度,强制执行退出
    if len(resp) < 10:
        print('error')
        print(resp)
        print('----')
        exit()
    title = re.findall('"desc"', resp)
    # print(url)
    print(city_name, len(title))



def exit():
    sys.exit()


if __name__ == '__main__':
    while True:
        try:
            # 333个城市访问完需要时间约为7分40秒
            start_time = int(time.time())
            get_ll()
            end_time = int(time.time())
            print('时间:', str((end_time - start_time) / 60 ))
        except Exception as e:
            print(e)
            break

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值