#!/usr/bin/python # -*- coding:utf8 -*- import time import random import os import re import xlwt import requests import numpy as np import xlsxwriter import pandas as pd print os.getcwd() company = pd.read_excel('E:\***\company.xlsx') num1 = company.shape[0] num2 = company.shape[1] from selenium import webdriver driver = webdriver.Chrome() driver.get('**********') for i in range(num1): time.sleep(1) driver.find_element_by_id("live-search").send_keys(company[u"公司名称"][i]) time.sleep(1) driver.find_element_by_xpath( "/html/body/div[1]/div[1]/div[1]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div").click() time.sleep(2) driver.find_element_by_xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/a/span/em").click() print driver.current_url # 获得当前窗口 nowhandle = driver.current_window_handle # 获得所有窗口 allhandles = driver.window_handles # 循环判断窗口是否为当前窗口 for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'now register window!' print driver.current_url #电话 test1 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[1]/span[2]").text test1 = test1.encode("utf-8") company[u"电话"][i] = test1 #邮箱 test2 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[2]/div[2]/span[2]").text company[u"邮箱"][i]=test2 #网址 test3 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[1]/a").text company[u"网址"][i] = test3 #地址 test4 = driver.find_element_by_xpath("/html/body/div[2]/div[1]/div/div/div/div[1]/div[2]/div[2]/div/div[3]/div[2]/span[2]").text company[u"地址"][i] = test4 # 获得当前窗口 nowhandle = driver.current_window_handle # 获得所有窗口 allhandles = driver.window_handles # 循环判断窗口是否为当前窗口 for handle in allhandles: if handle == nowhandle: driver.close() print 'now register window!' for handle in allhandles: if handle != nowhandle: driver.switch_to_window(handle) print 'register window!' driver.back() time.sleep(3) print company
Python+Selenium进行网页多窗口切换爬虫
最新推荐文章于 2024-05-31 21:24:28 发布