python爬虫通过包名爬取apkpure内apk

最新推荐文章于 2025-06-13 08:15:00 发布

原创最新推荐文章于 2025-06-13 08:15:00 发布 · 2.1k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#python

python 专栏收录该内容

6 篇文章

订阅专栏

本文介绍了一种使用Python从APKPure网站自动化批量下载安卓应用的方法。通过解析配置文件中的包名列表，构造请求头，获取应用下载页面，解析下载链接并下载应用。此过程涉及网络请求、BeautifulSoup解析、异常处理等关键技术。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

需要有个apkpure.txt命名的文件，里面为需要下载的包名，每个包名之间需要换行

import logging
import requests
import re
import urllib
import sys
import socket
sys.path.append("..")
socket.setdefaulttimeout(30)
from bs4 import BeautifulSoup

_root_url="https://apkpure.com" #/cn/search?q=com.android.vending

class ApkPureBy:
    '''
    APK Pure 通过包名下载
    '''
    def __init__(self):
        logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%a, %d %b %Y %H:%M:%S',
        filename='runtime.log',
        filemode='a+'
      )
        self.logger = logging.getLogger()
        self.header= {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}
       
    def getAppPages(self):
        '''
        获取单个搜索apk的页面
        '''
        app_lists = self.getAppList()
        app_pages=[]
        for appname in app_lists:
            app_page = "https://apkpure.com/cn/search?q="+appname  
            app_pages.append(app_page)
        return app_pages


    def getAppList(self):
        '''
        获取配置文件中的APP名称
        **#需要同目录下存在一个配置文件 里面写着包名**
        '''
        app_lists=[]
        apptxt = open("apkpure.txt",'r')
        line = apptxt.readline()
        while line:
            if len(line) > 0:
                line = line.strip("\n")
                app_lists.append(line)
            line=apptxt.readline()
        apptxt.close()
        return app_lists

    def getAllName(self):
        '''
        获取配置文件中的APP名称
        '''
        app_names=''
        apptxt = open("apkpure.txt",'r')
        line = apptxt.readline()
        while line:
            if len(line) > 0:
                line = line.strip("\n")
                app_names = app_names + line+"#"
            line=apptxt.readline()
        apptxt.close()
        return app_names


    def getDownLoadUrl(self,path=".\\"):
        '''
        获取app的下载地址
        '''
        opener = urllib.request.build_opener()
        opener.addheaders = [('User-Agent',
                              'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36')]
        urllib.request.install_opener(opener)
        urls = self.getAppPages()
        app_names = self.getAllName()
        for url in urls:
            wbdata = requests.get(url,self.header).text
            soup=BeautifulSoup(wbdata,"html.parser")
            try:
                download_link=soup.find(class_="more-down")["href"]
                download_url=urllib.parse.urljoin(_root_url, str(download_link))
                packagename = (download_link.split('/'))[3]
                index_app = urls.index(url) 
                except_app =app_names.split("#")[index_app]
                if not packagename in app_names:
                    index_app = urls.index(url) 
                    print("apkpure无对应app:"+except_app)
                    self.logger.info("apkpure no app："+except_app)
                    continue
                downurl_data=requests.get(download_url,self.header).text
                soup2=BeautifulSoup(downurl_data,"html.parser")
                download_link_page=soup2.find(class_=" da")["href"]
                download_page=urllib.parse.urljoin(_root_url, str(download_link_page))
                app_data=download_page+'/download?from=details'
                detail_data = requests.get(app_data, headers=self.header).text
                soup3=BeautifulSoup(detail_data,"html.parser")
                download=soup3.find(id="download_link")["href"]
                self.auto_down(download,packagename+'.apk')
            except:
                self.logger.info("异常app网页/网络异常"+str(packagename))


    def auto_down(self,url,filename):
        '''
        尝试下载5次
        '''
        try:
            print("正在下载: "+filename.split("\\")[-1])
            self.logger.info("正在下载: "+filename.split("\\")[-1])
            urllib.request.urlretrieve(url,filename)
            #下载完整包后才会加载下载完成log
            print("下载完成: "+filename.split("\\")[-1])
            self.logger.info("下载完成: "+filename.split("\\")[-1])
        except (socket.timeout,Exception) as e:
            #重试5次
            count = 1
            while count <= 5:
                try:
                    urllib.request.urlretrieve(url,filename)
                except (socket.timeout,Exception):
                    count += 1
            if count > 5:
                print("应用下载5次失败："+filename)
                self.logger.info("应用下载5次失败："+filename)



if __name__=="__main__":
    ApkPureBy=ApkPureBy()
    ApkPureBy.getDownLoadUrl()