谷歌驱动浏览器,爬虫技术 爬数据

package com.dataspider.test;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.UnexpectedAlertBehaviour;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

import com.common.ibatis.BatisManager;
import com.ibatis.dao.client.DaoManager;

public class MonitorDiscountPrice {
	
	DaoManager daoManager = BatisManager.getDaomanager();
	
	
	/**
	 * 监控折扣价
	 */
	public static void MonitorDiscountPrice(){ 
		Map<String, String> productList=new HashMap<String, String>(); //新建数组
		System.setProperty("webdriver.chrome.driver", "chromedriver.exe"); //加载驱动
//		List urlList = getProductUrl();
		List<String> url =new ArrayList<String>() ;
		url.add(0, "https://www.aliexpress.com/item/-/32850800904.html");
		url.add(1, "https://www.aliexpress.com/item/-/32850800904.html");
		url.add(2, "https://www.aliexpress.com/item/-/32850800904.html");
		url.add(3, "https://www.aliexpress.com/item/-/32850800904.html");
		WebDriver driver=getDriver();
		for (int i = 0; i < url.size(); i++) {
			String pageText = getPageText(url.get(i),driver);
			Document doc = Jsoup.parse(pageText);
			if (doc !=null) {
				productList=getPageProductInfo(doc);
				productList.get("price");
				productList.get("percentage");
				productList.get("dayTime");
			}
		}
		driver.quit();
	
	}
	
	/**
	 * 获取页面产品折扣价,折扣率,折扣时间(aliexpress)
	 * @param doc
	 * @return
	 */
	private static Map<String,String> getPageProductInfo(Document doc) {
		Element   price1 = doc.getElementById("j-sku-discount-price");
		Elements  percentages= doc.getElementsByClass("p-discount-rate");
		Elements  dayTimes = doc.getElementsByClass("p-eventtime-left");
		String percentage = percentages.get(0).text();
		String dayTime = dayTimes.get(0).text();
		String price = price1.text();
		if (percentage.isEmpty()) {
			percentage = " <%> is not exist !";
		}
		if (dayTime.isEmpty()) {
			dayTime = " <dayTime> is not exist !";
		}
		if (price.isEmpty()) {
			price = "the price is not exist !";
		}
		Map<String, String> map = new HashMap<String, String>();
		map.put("price", price);
		map.put("percentage", percentage);
		map.put("dayTime", dayTime);

		return map;
	}

	/**
	 * 获取谷歌驱动(设置所需属性)
	 * @return
	 */
	private static WebDriver getDriver(){
		ChromeOptions chromeOptions = new ChromeOptions();
		chromeOptions.addArguments("disable-extensions"); // 禁用扩展
		chromeOptions.addArguments("chrome.switches","--disable-extensions");// 开关
		chromeOptions.addArguments("disable-infobars");// 禁用信息栏
		DesiredCapabilities cp = DesiredCapabilities.chrome(); // 所需的功能
		cp.setCapability(ChromeOptions.CAPABILITY, chromeOptions); // 性能
		cp.setCapability(CapabilityType.UNEXPECTED_ALERT_BEHAVIOUR, UnexpectedAlertBehaviour.ACCEPT);// 接受--意外警报行为
		WebDriver driver = new ChromeDriver(cp);
		return driver;
	}
	/**
	 * 获取页面源码
	 * @param url
	 * @param driver
	 * @return
	 */
	private static String getPageText(String url,WebDriver driver){
		driver.manage().timeouts().pageLoadTimeout(1*60, TimeUnit.SECONDS);
		String text = "";
		try{
			Thread.sleep(3000);
			driver.navigate().to(url); //直接跳转这个页面不受第一个页面影响
			WebDriverWait wait = new WebDriverWait(driver, 60);
			wait.until(new ExpectedCondition<WebElement>() {
				public WebElement apply(WebDriver d) {
					return d.findElement(By.id("j-detail-page"));
				}
			});
		}catch(Exception e){
			System.out.println("页面链接:"+url);
			System.out.println("异常信息:"+e.getMessage());
			driver.navigate().refresh();
			try {
				Thread.sleep(3000);
			} catch (Exception e1) {
				e1.printStackTrace();
			}
		}finally{
				text=driver.getPageSource();//-----返回页面源代码
		}
		return text;	
		}
	
	/**
	 * 数据库获取产品链接url
	 * @return
	 */
	private static String getProductUrl(){
		
		return null;
	}
	
	
	public static void main(String[] args) {
		
		MonitorDiscountPrice();
		
	}
	
	
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Lying~

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值