python爬虫 通过包名爬取apkpure内apk

本文介绍了一种使用Python从APKPure网站自动化批量下载安卓应用的方法。通过解析配置文件中的包名列表,构造请求头,获取应用下载页面,解析下载链接并下载应用。此过程涉及网络请求、BeautifulSoup解析、异常处理等关键技术。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

需要有个apkpure.txt命名的文件,里面为需要下载的包名,每个包名之间需要换行

import logging
import requests
import re
import urllib
import sys
import socket
sys.path.append("..")
socket.setdefaulttimeout(30)
from bs4 import BeautifulSoup

_root_url="https://apkpure.com" #/cn/search?q=com.android.vending

class ApkPureBy:
    '''
    APK Pure 通过包名下载
    '''
    def __init__(self):
        logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%a, %d %b %Y %H:%M:%S',
        filename='runtime.log',
        filemode='a+'
      )
        self.logger = logging.getLogger()
        self.header= {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}
       
    def getAppPages(self):
        '''
        获取单个搜索apk的页面
        '''
        app_lists = self.getAppList()
        app_pages=[]
        for appname in app_lists:
            app_page = "https://apkpure.com/cn/search?q="+appname  
            app_pages.append(app_page)
        return app_pages


    def getAppList(self):
        '''
        获取配置文件中的APP名称
        **#需要同目录下存在一个配置文件 里面写着包名**
        '''
        app_lists=[]
        apptxt = open("apkpure.txt",'r')
        line = apptxt.readline()
        while line:
            if len(line) > 0:
                line = line.strip("\n")
                app_lists.append(line)
            line=apptxt.readline()
        apptxt.close()
        return app_lists

    def getAllName(self):
        '''
        获取配置文件中的APP名称
        '''
        app_names=''
        apptxt = open("apkpure.txt",'r')
        line = apptxt.readline()
        while line:
            if len(line) > 0:
                line = line.strip("\n")
                app_names = app_names + line+"#"
            line=apptxt.readline()
        apptxt.close()
        return app_names


    def getDownLoadUrl(self,path=".\\"):
        '''
        获取app的下载地址
        '''
        opener = urllib.request.build_opener()
        opener.addheaders = [('User-Agent',
                              'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36')]
        urllib.request.install_opener(opener)
        urls = self.getAppPages()
        app_names = self.getAllName()
        for url in urls:
            wbdata = requests.get(url,self.header).text
            soup=BeautifulSoup(wbdata,"html.parser")
            try:
                download_link=soup.find(class_="more-down")["href"]
                download_url=urllib.parse.urljoin(_root_url, str(download_link))
                packagename = (download_link.split('/'))[3]
                index_app = urls.index(url) 
                except_app =app_names.split("#")[index_app]
                if not packagename in app_names:
                    index_app = urls.index(url) 
                    print("apkpure无对应app:"+except_app)
                    self.logger.info("apkpure no app:"+except_app)
                    continue
                downurl_data=requests.get(download_url,self.header).text
                soup2=BeautifulSoup(downurl_data,"html.parser")
                download_link_page=soup2.find(class_=" da")["href"]
                download_page=urllib.parse.urljoin(_root_url, str(download_link_page))
                app_data=download_page+'/download?from=details'
                detail_data = requests.get(app_data, headers=self.header).text
                soup3=BeautifulSoup(detail_data,"html.parser")
                download=soup3.find(id="download_link")["href"]
                self.auto_down(download,packagename+'.apk')
            except:
                self.logger.info("异常app网页/网络异常"+str(packagename))


    def auto_down(self,url,filename):
        '''
        尝试下载5次
        '''
        try:
            print("正在下载: "+filename.split("\\")[-1])
            self.logger.info("正在下载: "+filename.split("\\")[-1])
            urllib.request.urlretrieve(url,filename)
            #下载完整包后才会加载下载完成log
            print("下载完成: "+filename.split("\\")[-1])
            self.logger.info("下载完成: "+filename.split("\\")[-1])
        except (socket.timeout,Exception) as e:
            #重试5次
            count = 1
            while count <= 5:
                try:
                    urllib.request.urlretrieve(url,filename)
                except (socket.timeout,Exception):
                    count += 1
            if count > 5:
                print("应用下载5次失败:"+filename)
                self.logger.info("应用下载5次失败:"+filename)



if __name__=="__main__":
    ApkPureBy=ApkPureBy()
    ApkPureBy.getDownLoadUrl()
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值