Java ExecutorService示例

最新推荐文章于 2024-11-26 08:30:00 发布

allway2

最新推荐文章于 2024-11-26 08:30:00 发布

阅读量197

点赞数

本文链接：https://blog.youkuaiyun.com/allway2/article/details/118790756

版权

ExecutorService 多线程 HTTP请求网页状态并发爬虫

关键词由优快云通过智能技术生成

import java.net.HttpURLConnection;
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class GetPingStatusWithExecutorService {
	private static final int MYTHREADS = 10;

	public static void main(String args[]) throws Exception {

		String[] hostList = { "http://bing.com", "http://baidu.com", "http://163.com", "http://sina.com",
				"http://sohu.com", "http://weibo.com", "http://jd.com", "http://taobao.com", "http://dangdang.com" };

		ExecutorService executor = Executors.newFixedThreadPool(MYTHREADS);
		for (int i = 0; i < hostList.length; i++) {

			String url = hostList[i];
			Runnable worker = new MyRunnable(url);
			executor.execute(worker);
		}
		executor.shutdown();
		// Wait until all threads are finish
		while (!executor.isTerminated()) {

		}
		System.out.println("\nFinished all threads");
	}

	public static class MyRunnable implements Runnable {
		private final String url;

		MyRunnable(String url) {
			this.url = url;
		}

		@Override
		public void run() {

			String result = "";
			int code = 200;
			try {
				URL siteURL = new URL(url);
				HttpURLConnection connection = (HttpURLConnection) siteURL.openConnection();
				connection.setRequestMethod("GET");
				connection.setConnectTimeout(3000);
				connection.connect();

				code = connection.getResponseCode();
				if (code == 200) {
					result = "-> Green <-\t" + "Code: " + code;
					;
				} else {
					result = "-> Yellow <-\t" + "Code: " + code;
				}
			} catch (Exception e) {
				result = "-> Red <-\t" + "Wrong domain - Exception: " + e.getMessage();

			}
			System.out.println(url + "\t\tStatus:" + result);
		}
	}
}

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ScrapeWebExecutorService {

	/**
	 * @param urlFile    Path for file which consists of URLs to be scraped
	 * @param outputFile File where scrape results will be written
	 * @throws InterruptedException
	 * @throws ExecutionException
	 * @throws TimeoutException
	 * @throws IOException 
	 */
	public static void scrapeURLs(String urlFile, String outputFile)
			throws InterruptedException, ExecutionException, TimeoutException, IOException {
		Path p = Paths.get(urlFile);
		List<String> lines = Files.readAllLines(p, StandardCharsets.UTF_8);

		ExecutorService executorService = Executors.newFixedThreadPool(10);

		Map<Future, String> tasks = new LinkedHashMap<Future, String>();

		for (String line : lines) {

			Callable callable = new Callable() {
				public String call() throws Exception {
					return scrapeIndividualURls(line);
				}
			};
			//
			// Submit the task to executorService; At this point the scraping starts
			//
			Future future = executorService.submit(callable);
			tasks.put(future, line);
		}
		//
		// For each task, iterate and get the content; Write the content to a file
		//
		tasks.forEach((future, url) -> {
			try {
				String content = (String) future.get(120, TimeUnit.SECONDS);
				writeToFile(url, content, outputFile);
			} catch (InterruptedException | ExecutionException | TimeoutException | IOException e) {
				e.printStackTrace();
				try {
					writeToFile(url, "Not Found", outputFile);
				} catch (IOException e1) {
					// TODO Auto-generated catch block
					e1.printStackTrace();
				}
			}
		});
		executorService.shutdown();
	}

	/**
	 * Scrape the URL
	 * 
	 * @param urlstr
	 * @return
	 */
	public static String scrapeIndividualURls(String urlstr) {
		URL url = null;
		StringBuilder contentb = new StringBuilder();
		try {
			// get URL content
			url = new URL(urlstr);
			// Create a URL Connection Object
			HttpURLConnection conn = (HttpURLConnection) url.openConnection();
			// Set the configuration parameters
			// Note the readTimeOut set to 30 seconds.
			// This is quite important when you are planning to scrape URLs.
			conn.setConnectTimeout(100000);
			conn.setReadTimeout(30000);
			conn.connect();
			// open the stream and put it into BufferedReader
			InputStream in = null;
			if (conn.getResponseCode() >= 400) {
				in = conn.getErrorStream();
			} else {
				BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
				String inputLine;
				while ((inputLine = br.readLine()) != null) {
					contentb.append(inputLine);
					contentb.append("\n");
				}
				br.close();
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return contentb.toString();
	}

	/**
	 * Write to the file
	 * 
	 * @param url
	 * @param value
	 * @param outputFile
	 */
	private static void writeToFile(String url, String value, String outputFile) throws IOException {
		FileWriter fw = new FileWriter(new File(outputFile), true);
		BufferedWriter bw = new BufferedWriter(fw);
		if (value != null) {
			bw.write(url + "\t" + value + "\n");
		} else {
			bw.write(url + "\t" + "Not Found" + "\n");
		}
		bw.close();
	}

	public static void main(String[] args) throws IOException, InterruptedException, ExecutionException, TimeoutException {
		scrapeURLs("f:\\urls.txt","f:\\urlsout.txt");
		
	}
}