python selenium 爬取一个网站

from selenium import webdriver
import time
import pandas as pd


driver = webdriver.Chrome()
content_list = []

def ai():
    div_list = driver.find_elements_by_xpath("//*[@id='SearchResultsListings']/div")
    # print(len(div_list))
    i = 0
    ii = 1
    while i < len(div_list):
        div = [x for x in div_list]
        # print(div[i].text)
        item = {}
        try:
            review = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]//span[2]".format(ii)).text
            item["review"] = str(review).split("\n")[0][1:]
            time.sleep(0.5)
            item["name"] = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]/div/a/h2".format(ii)).text
            time.sleep(0.5)
            developer = div[i].find_element_by_class_name("ui-app-card__developer-name").text
            item["developer"] = developer.split("by ")[1]
            time.sleep(0.5)
            score = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]//div[2]/span[1]".format(ii)).text
            # print(score)
            item["score"] = str(score).split("\n")[0]
            time.sleep(0.5)
            item["Classification"] = Classification
            time.sleep(0.5)
            urlss = div[i].find_element_by_tag_name("a").get_attribute("href")
            item["url"] = str(urlss).split("?")[0]
            print(item)
            content_list.append(item)
        except Exception as e:
            print(e)
        finally:
            i += 1
            ii += 1

Classifications = ["conversion","marketing","Store design","Store management","Customer service","Merchandising","Sourcing and selling products","Fulfillment","Shipping and delivery"]
for Classification in Classifications:
    if  Classification == "conversion":
        nums = 363 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=conversion&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()

    elif Classification == "marketing":
        nums = 220 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=marketing&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Store design":
        nums = 196 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=store-design&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Store management":
        nums = 114 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=store-management&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format( num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Customer service":
        nums = 104 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=customer-service&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Merchandising":
        nums = 80 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=merchandising&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Sourcing and selling products":
        nums = 76 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=sourcing-and-selling-products&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Fulfillment":
        nums = 67 // 24 + 1
        for numss in range(nums):
            num = numss + 1
            url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=fulfillment&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
            driver.get(url)
            time.sleep(8)
            ai()
    elif Classification == "Shipping and delivery":
        url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=shipping-and-delivery&pricing=all&q=review&requirements=off&sort_by=relevance"
        driver.get(url)
        time.sleep(8)
        ai()
    else:
        pass

# 提取数据,写入excel中
time.sleep(10)
review = [z["review"] for z in content_list]
name = [zz["name"] for zz in content_list]
developer = [zzz["developer"] for zzz in content_list]
score = [zzzz["score"] for zzzz in content_list]
Classification = [zzzzz["Classification"] for zzzzz in content_list]
url = [zzzzzz["url"] for zzzzzz in content_list]

data = pd.DataFrame({'review数量': review,'名称': name,'开发者名称': developer,'评分': score,'分类': Classification,'URL': url})
data.to_excel("shopify.xlsx")

driver.quit()


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值