import java.net.HttpURLConnection;
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class GetPingStatusWithExecutorService {
private static final int MYTHREADS = 10;
public static void main(String args[]) throws Exception {
String[] hostList = { "http://bing.com", "http://baidu.com", "http://163.com", "http://sina.com",
"http://sohu.com", "http://weibo.com", "http://jd.com", "http://taobao.com", "http://dangdang.com" };
ExecutorService executor = Executors.newFixedThreadPool(MYTHREADS);
for (int i = 0; i < hostList.length; i++) {
String url = hostList[i];
Runnable worker = new MyRunnable(url);
executor.execute(worker);
}
executor.shutdown();
// Wait until all threads are finish
while (!executor.isTerminated()) {
}
System.out.println("\nFinished all threads");
}
public static class MyRunnable implements Runnable {
private final String url;
MyRunnable(String url) {
this.url = url;
}
@Override
public void run() {
String result = "";
int code = 200;
try {
URL siteURL = new URL(url);
HttpURLConnection connection = (HttpURLConnection) siteURL.openConnection();
connection.setRequestMethod("GET");
connection.setConnectTimeout(3000);
connection.connect();
code = connection.getResponseCode();
if (code == 200) {
result = "-> Green <-\t" + "Code: " + code;
;
} else {
result = "-> Yellow <-\t" + "Code: " + code;
}
} catch (Exception e) {
result = "-> Red <-\t" + "Wrong domain - Exception: " + e.getMessage();
}
System.out.println(url + "\t\tStatus:" + result);
}
}
}
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public class ScrapeWebExecutorService {
/**
* @param urlFile Path for file which consists of URLs to be scraped
* @param outputFile File where scrape results will be written
* @throws InterruptedException
* @throws ExecutionException
* @throws TimeoutException
* @throws IOException
*/
public static void scrapeURLs(String urlFile, String outputFile)
throws InterruptedException, ExecutionException, TimeoutException, IOException {
Path p = Paths.get(urlFile);
List<String> lines = Files.readAllLines(p, StandardCharsets.UTF_8);
ExecutorService executorService = Executors.newFixedThreadPool(10);
Map<Future, String> tasks = new LinkedHashMap<Future, String>();
for (String line : lines) {
Callable callable = new Callable() {
public String call() throws Exception {
return scrapeIndividualURls(line);
}
};
//
// Submit the task to executorService; At this point the scraping starts
//
Future future = executorService.submit(callable);
tasks.put(future, line);
}
//
// For each task, iterate and get the content; Write the content to a file
//
tasks.forEach((future, url) -> {
try {
String content = (String) future.get(120, TimeUnit.SECONDS);
writeToFile(url, content, outputFile);
} catch (InterruptedException | ExecutionException | TimeoutException | IOException e) {
e.printStackTrace();
try {
writeToFile(url, "Not Found", outputFile);
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
});
executorService.shutdown();
}
/**
* Scrape the URL
*
* @param urlstr
* @return
*/
public static String scrapeIndividualURls(String urlstr) {
URL url = null;
StringBuilder contentb = new StringBuilder();
try {
// get URL content
url = new URL(urlstr);
// Create a URL Connection Object
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// Set the configuration parameters
// Note the readTimeOut set to 30 seconds.
// This is quite important when you are planning to scrape URLs.
conn.setConnectTimeout(100000);
conn.setReadTimeout(30000);
conn.connect();
// open the stream and put it into BufferedReader
InputStream in = null;
if (conn.getResponseCode() >= 400) {
in = conn.getErrorStream();
} else {
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String inputLine;
while ((inputLine = br.readLine()) != null) {
contentb.append(inputLine);
contentb.append("\n");
}
br.close();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return contentb.toString();
}
/**
* Write to the file
*
* @param url
* @param value
* @param outputFile
*/
private static void writeToFile(String url, String value, String outputFile) throws IOException {
FileWriter fw = new FileWriter(new File(outputFile), true);
BufferedWriter bw = new BufferedWriter(fw);
if (value != null) {
bw.write(url + "\t" + value + "\n");
} else {
bw.write(url + "\t" + "Not Found" + "\n");
}
bw.close();
}
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException, TimeoutException {
scrapeURLs("f:\\urls.txt","f:\\urlsout.txt");
}
}