from selenium import webdriver
import time
import pandas as pd
driver = webdriver.Chrome()
content_list = []
def ai():
div_list = driver.find_elements_by_xpath("//*[@id='SearchResultsListings']/div")
# print(len(div_list))
i = 0
ii = 1
while i < len(div_list):
div = [x for x in div_list]
# print(div[i].text)
item = {}
try:
review = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]//span[2]".format(ii)).text
item["review"] = str(review).split("\n")[0][1:]
time.sleep(0.5)
item["name"] = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]/div/a/h2".format(ii)).text
time.sleep(0.5)
developer = div[i].find_element_by_class_name("ui-app-card__developer-name").text
item["developer"] = developer.split("by ")[1]
time.sleep(0.5)
score = div[i].find_element_by_xpath("//*[@id='SearchResultsListings']/div[{}]//div[2]/span[1]".format(ii)).text
# print(score)
item["score"] = str(score).split("\n")[0]
time.sleep(0.5)
item["Classification"] = Classification
time.sleep(0.5)
urlss = div[i].find_element_by_tag_name("a").get_attribute("href")
item["url"] = str(urlss).split("?")[0]
print(item)
content_list.append(item)
except Exception as e:
print(e)
finally:
i += 1
ii += 1
Classifications = ["conversion","marketing","Store design","Store management","Customer service","Merchandising","Sourcing and selling products","Fulfillment","Shipping and delivery"]
for Classification in Classifications:
if Classification == "conversion":
nums = 363 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=conversion&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "marketing":
nums = 220 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=marketing&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Store design":
nums = 196 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=store-design&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Store management":
nums = 114 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=store-management&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format( num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Customer service":
nums = 104 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=customer-service&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Merchandising":
nums = 80 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=merchandising&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Sourcing and selling products":
nums = 76 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=sourcing-and-selling-products&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Fulfillment":
nums = 67 // 24 + 1
for numss in range(nums):
num = numss + 1
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=fulfillment&page={}&pricing=all&q=review&requirements=off&sort_by=relevance".format(num)
driver.get(url)
time.sleep(8)
ai()
elif Classification == "Shipping and delivery":
url = "https://apps.shopify.com/search?app_integration_pos=off&app_integration_shopify_checkout=off&category=shipping-and-delivery&pricing=all&q=review&requirements=off&sort_by=relevance"
driver.get(url)
time.sleep(8)
ai()
else:
pass
# 提取数据,写入excel中
time.sleep(10)
review = [z["review"] for z in content_list]
name = [zz["name"] for zz in content_list]
developer = [zzz["developer"] for zzz in content_list]
score = [zzzz["score"] for zzzz in content_list]
Classification = [zzzzz["Classification"] for zzzzz in content_list]
url = [zzzzzz["url"] for zzzzzz in content_list]
data = pd.DataFrame({'review数量': review,'名称': name,'开发者名称': developer,'评分': score,'分类': Classification,'URL': url})
data.to_excel("shopify.xlsx")
driver.quit()