抓取豆瓣APP的信息

最新推荐文章于 2025-01-08 00:48:21 发布

weixin_30296405

最新推荐文章于 2025-01-08 00:48:21 发布

阅读量610

点赞数

CC 4.0 BY-SA版权

文章标签：爬虫 python

原文链接：http://www.cnblogs.com/friendg/p/11129678.html

import time
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.by import By  # 按照什么方式查找，By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
from selenium.webdriver.support import expected_conditions as EC  # 和下面WebDriverWait一起用的
from selenium.webdriver.support.wait import WebDriverWait  # 等待页面加载某些元素
from bs4 import BeautifulSoup
driver = webdriver.Chrome()



try:

    driver.implicitly_wait(10)
    driver.get('https://www.wandoujia.com/category/6001')
    num = 1
    app_list = driver.find_elements_by_class_name('card')
    for app in app_list:
        app_name = app.find_element_by_css_selector('.app-title-h2').text
         # print(good_name)

        # app链接
        detail_url = app.find_element_by_css_selector('.icon-wrap a').get_attribute('href')
        # print(good_url)

        # app下载人数
        download_num = app.find_element_by_class_name('install-count').text
        # print(good_price)

        # app大小
        app_size =app.find_element_by_xpath('//*[@id="j-tag-list"]/li['+str(num)+']/div[2]/div[1]/span[3]').text
        app_content = f'''
            num: {num}
            app名称: {app_name}
            app链接: { detail_url}
            appx下载量: {download_num}
            app大小: {app_size}
         \n 
         '''
        num += 1
        print(app_content)
        with open('app.txt', 'a', encoding='utf-8') as f:
             f.write(app_content)
             print('app信息写入成功!')
finally:
    driver.close()