序言
要采集某某买菜的产品数据,主要是分析产品数据来选择精品商品,方便选品。
原创代码如下:
import json import os from appium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By import time from base.base_root import BaseRoot PLATFORM = 'Android' deviceName = '127.0.0.1:62001' app_package = 'com.tencent.mm' app_activity = '.ui.LauncherUI' driver_server = 'http://127.0.0.1:4723/wd/hub' class Moments(): def __init__(self): self.desired_caps = { 'platformName': PLATFORM, 'deviceName': deviceName, 'unicodeKeyboard': True, # 使用自带输入法,输入中文时填True 'resetKeyboard': True, # 执行完程序恢复原来输入法 'noReset': True, # 不要重置App 'newCommandTimeout': 60000, 'appPackage': app_package, 'appActivity': app_activity } # 设置谷歌驱动地址 chromedriverpath = BaseRoot.root_path + '/vendor/chromedriver_win32_v92.0.4515.43.exe' self.desired_caps[ 'chromedriverExecutable'] = chromedriverpath # self.desired_caps['chromeOptions'] = {'androidProcess':"WEBVIEW_com.tencent.mm:appbrand0"} # 启动微信小程序,要设置这里 # 查询pid,命令行输入 adb shell dumpsys activity top | findstr ACTIVITY # 查询当前小程序进程,命令行输入 adb shell ps 查询的pid self.desired_caps['chromeOptions'] = {'androidProcess': 'com.tencent.mm:appbrand0'} self.driver = webdriver.Remote(driver_server, self.desired_caps) print("启动微信应用程序") time.sleep(3) self.wait = WebDriverWait(self.driver, 300) self.size = self.driver.get_window_size() def swipUp(self, t=600, n=1): start_x = self.size['width'] * 0.5 start_y = self.size['height'] * 0.75 end_y = self.size['height'] * 0.25 for i in range(n): self.driver.swipe(start_x, start_y, start_x, end_y, t) def swipSmallUp(self, t=200, n=1): start_x = self.size['width'] * 0.5 start_y = self.size['height'] * 0.75 end_y = self.size['height'] * 0.50 for i in range(n): self.driver.swipe(start_x, start_y, start_x, end_y, t) def swipSmallUp2(self, t=100, n=1): start_x = self.size['width'] * 0.5 start_y = self.size['height'] * 0.75 end_y = self.size['height'] * 0.25 for i in range(n): self.driver.swipe(start_x, start_y, start_x, end_y, t) def getData(self, i: int): #print("切换进入主应用") #self.driver.switch_to.context('NATIVE_APP') #time.sleep(1) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') #如下下一个节点可以取到那么暂时取消滑动 goods_name_info ='' try: goods_name = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[1]/wx-text/span[2]' % i goods_name_info = self.driver.find_element(By.XPATH, goods_name).text except Exception as e: print("无goods_name") if len(str(goods_name_info)) ==0: print("切换进入主应用") print("上滑睡3秒") self.driver.switch_to.context('NATIVE_APP') self.swipSmallUp2() time.sleep(1) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') try: goods_name = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[1]/wx-text/span[2]' % i goods_name_info = self.driver.find_element(By.XPATH, goods_name).text except Exception as e: print("无goods_name") if len(str(goods_name_info)) == 0: print("切换进入主应用") print("上滑睡3秒") self.driver.switch_to.context('NATIVE_APP') self.swipSmallUp() time.sleep(1) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') else: print("有内容无需滑动") print('') goods_name='' print(f"###################打印第{i}次滑动开始############################################") goods_name_xpath='//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[1]/wx-text/span[2]'% i try: goods_name = self.driver.find_element(By.XPATH, goods_name_xpath).text except Exception as e: return 0 print(goods_name) goods_price='//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[2]/wx-v[1]/wx-v/wx-text[2]/span[2]'% i goods_price = self.driver.find_element(By.XPATH, goods_price).text print(goods_price) goods_img='//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[1]/wx-_a/wx-image'% i goods_img = self.driver.find_element(By.XPATH, goods_img).get_attribute('src') print(goods_img) #附近多少人购买过 goods_buy_info ='' goods_buy = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c[2]/wx-v/wx-v/wx-v[2]/wx-text/span[2]' % i try: goods_buy_info = self.driver.find_element(By.XPATH, goods_buy).text except Exception as e: print("无goods_buy_1") if goods_buy_info=='': goods_buy = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c/wx-v/wx-v/wx-v[2]/wx-text/span[2]' % i try: goods_buy_info = self.driver.find_element(By.XPATH, goods_buy).text except Exception as e: print("无goods_buy_2") print("goods_buy_info="+str(goods_buy_info)) goods_buy_person_info ='' goods_buy_person = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c[2]/wx-v/wx-v/wx-v[2]/wx-text/span[2]' % i try: goods_buy_person_info = self.driver.find_element(By.XPATH, goods_buy_person).text except Exception as e: print("无goods_buy_person_info") print("goods_buy_person_info="+str(goods_buy_person_info)) print(f"###################打印第{i}次滑动结束############################################") return 1 # 滑动一次取两次数据 def getData2(self, j: int): print("切换进入主应用") self.driver.switch_to.context('NATIVE_APP') time.sleep(1) print("滑动两个产品长度上滑睡4秒") self.swipSmallUp2() time.sleep(4) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') time.sleep(1) print() print(f"###################打印第{j}次滑动开始############################################") for i in range(j*2-1,j*2+1): print(f"-------------打印第{i}产品集合开始---------------") #如果没有数据还要再滑动两次再取数据 goods_name = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[1]/wx-text/span[2]' % i try: goods_name = self.driver.find_element(By.XPATH, goods_name).text except Exception as e: print("无产品数据进行再滑动两次") print("切换进入主应用") self.driver.switch_to.context('NATIVE_APP') time.sleep(1) print("滑动两个产品长度上滑睡4秒") self.swipSmallUp2() time.sleep(4) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') time.sleep(1) goods_name = self.driver.find_element(By.XPATH, goods_name).text print(str(goods_name)) goods_price='//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-v[2]/wx-v[1]/wx-v/wx-text[2]/span[2]'% i goods_price = self.driver.find_element(By.XPATH, goods_price).text print(goods_price) goods_img='//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[1]/wx-_a/wx-image'% i goods_img = self.driver.find_element(By.XPATH, goods_img).get_attribute('src') print(goods_img) #附近多少人购买过 goods_buy_info ='' goods_buy = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c[2]/wx-v/wx-v/wx-v[2]/wx-text/span[2]' % i try: goods_buy_info = self.driver.find_element(By.XPATH, goods_buy).text except Exception as e: print("无goods_buy_1") if goods_buy_info=='': goods_buy = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[%s]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c/wx-v/wx-v/wx-v[2]/wx-text/span[2]' % i try: goods_buy_info = self.driver.find_element(By.XPATH, goods_buy).text except Exception as e: print("无goods_buy_2") print("goods_buy_info="+str(goods_buy_info)) goods_buy_person_info ='' goods_buy_person = '//*[@id="mall-list"]/wx-_r/wx-v/wx-v/wx-_c/wx-v/wx-_e[19]/wx-v/wx-_b/wx-_a/wx-v/wx-_a/wx-v/wx-_b/wx-v/wx-v/wx-v[2]/wx-_c[2]/wx-v/wx-v/wx-text/span[2]' % i try: goods_buy_person_info = self.driver.find_element(By.XPATH, goods_buy_person).text except Exception as e: print("无goods_buy_person_info") print("goods_buy_person_info="+str(goods_buy_person_info)) print(f"-------------打印第{i}产品集合结束---------------") print(f"###################打印第{j}次滑动结束############################################") def getContent(self): print('time sleep 30 second') time.sleep(30) print('getContent') ''' {'width': 900, 'height': 1600} ''' print('下拉') self.driver.swipe(self.size['width'] * 0.5, self.size['height'] * 0.4, self.size['width'] * 0.5, self.size['height'] * 0.9) print('time sleep 4 second') time.sleep(4) print('查找买菜小程序并且点击') self.driver.find_element(By.XPATH, "//*[@te'mou买菜']").click() print('time sleep 4 second') time.sleep(4) contexts = self.driver.contexts print(contexts) time.sleep(2) self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') time.sleep(2) handles = self.driver.window_handles print(handles) time.sleep(1) print('打印当前handle') print(self.driver.current_window_handle) print("切换进入主应用") self.driver.switch_to.context('NATIVE_APP') time.sleep(1) print("上滑睡4秒") self.swipSmallUp() time.sleep(4) print("切换进入webview") self.driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0') time.sleep(1) ''' warehouse_name = self.driver.find_element(By.XPATH, '//*[@id="mall-list"]/wx-_d/wx-v/wx-_a/wx-v/wx-v[1]/wx-v[2]').text print(warehouse_name) ''' time.sleep(1) for i in range(1, 2000): reg = self.getData(i) if reg == 0: break print("采集结束") print("睡眠600秒") time.sleep(600) if __name__ == '__main__': action = Moments() action.getContent()
特别声明:代码只为了和大家学习与交流,切勿攻击服务器,遵守国家法律法规。欢迎私聊讨论。