import org.apache.commons.lang.StringUtils;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HttpUtils {
public static URLConnection getUrlConnection(String url) throws IOException {
URL obj = new URL(url);
URLConnection conn = obj.openConnection();
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
return conn;
}
public static URLConnection getUrlConnection(String url, String encode, String cookie)
throws IOException {
URL obj = new URL(url);
URLConnection conn = obj.openConnection();
setProperty(encode, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
return conn;
}
public static Map<String, List<String>> getHeaderFields(String url) throws IOException {
URLConnection conn = getUrlConnection(url);
Map<String, List<String>> map = conn.getHeaderFields();
return map;
}
public static String getCookieByHeaders(Map<String, List<String>> headers) {
String mcookie = "";
List<String> server = headers.get("Set-Cookie");
if (server == null) {
System.err.println("Key 'Cookie' is not found!");
} else {
StringBuilder sb = new StringBuilder();
for (String values : server) {
sb.append(values);
}
mcookie = sb.toString();
}
return mcookie;
}
public static String streamToString(InputStream inStream) throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
String line;
StringBuilder sb = new StringBuilder();
while ((line = in.readLine()) != null) {
sb.append(line);
}
String result = sb.toString();
in.close();
return result;
}
/**
* 向指定URL发送GET方法的请求
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @param encoding 使用编码.
* @return URL 所代表远程资源的响应结果
*/
public static String sendGet(String url, String param, String encoding) {
return sendGet(url, param, encoding, "");
}
public static String sendGet(String url, String param, String encoding, String cookie) {
String result = "";
BufferedReader in = null;
try {
String urlNameString = concatUrl(url, param);
// System.err.println("urlNameString-------------");
// System.err.println(urlNameString);
URL realUrl = new URL(urlNameString);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
setProperty(encoding, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
conn.setRequestMethod("GET");
conn.connect();
result = streamToString(conn.getInputStream());
} catch (Exception e) {
e.printStackTrace();
System.err.println("Send GET http request exception!" + e.getMessage());
} finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
System.err.println(e2.getMessage());
}
}
return result;
}
/**
* 向指定URL发送POST方法的请求
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @param encoding 使用编码.
* @return URL 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param, String encoding) throws IOException {
return sendPost(url, param, encoding, "");
}
public static String sendPost(String url, String param, String encoding, String cookie)
throws IOException {
PrintWriter out = null;
BufferedReader in = null;
String result = "";
if (StringUtils.isEmpty(url)) {
throw new IOException("The request url is not allow empty or null!");
}
try {
URL realUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
setProperty(encoding, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
conn.setDoInput(true);
out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), encoding));
out.print(param);
out.flush();
result = streamToString(conn.getInputStream());
} catch (Exception e) {
System.err.println("Send http post method exception!" + e);
throw new IOException(e);
} finally {
try {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
} catch (IOException ex) {
throw ex;
}
}
return result;
}
private static String getHostByUrl(String url) {
int index = url.indexOf(":");
String temp = url.substring(index + 3);
int end = temp.indexOf("/");
if (end <= 0) {
end = temp.length();
}
String host = temp.substring(0, end);
System.out.println("host---------->" + host);
return host;
}
private static void setProperty(String encoding, URLConnection conn, String cookie) {
conn.setRequestProperty("Accept-Charset", encoding);
conn.setRequestProperty(
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
conn.setRequestProperty("Connection", "keep-alive");
// conn.setRequestProperty("Content-Type",
// "application/x-www-form-urlencoded");
// "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101
// Firefox/43.0"
// Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.2)
// Gecko/20090729 Firefox/3.5.2
// conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows; U;
// Windows NT 5.1; zh-CN; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2");
conn.setRequestProperty("user-agent", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1;SV1)");
if (StringUtils.isNotBlank(cookie)) {
conn.setRequestProperty("Cookie", cookie);
}
// conn.setRequestProperty("Access-Control-Allow-Origin", "*");
}
/**
* 向指定URL发送GET方法的请求,默认编码:gb2312
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return URL 所代表远程资源的响应结果
*/
public static String sendGet(String url, String param) {
return sendGet(url, param, "gb2312");
}
/**
* 向指定 URL 发送POST方法的请求,默认编码:gb2312
*
* @param url 发送请求的 URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param) throws IOException {
return sendPost(url, param, "gb2312");
}
/**
* Format money to fen unit as 12 bit,if not enough pending left by 0.
*
* @param input the money input source
* @return
*/
public static String toFenAndPending(float input) {
int money = (int) (input * 100);
String ret = String.valueOf(money);
if (ret.length() < 12) {
while (12 > ret.length()) {
ret = "0" + ret;
}
}
return ret;
}
public static String filterHtml(String input, String pattern) {
Matcher m = Pattern.compile(pattern, 2 | Pattern.DOTALL).matcher(input);
int i = m.groupCount();
while (m.find()) {
return m.group(i);
}
return input;
}
public static String getFormTagHtml(String result) {
return HttpUtils.filterHtml(result, "<form [\\s\\S]*</form>");
}
/**
* Filter HTML with pattern for specific given input tag(name="key").
*
* @param input the source of HTML.
* @param key the input name.
* @return the given name input of value.
*/
public static String getHtmlInput(String input, String key) {
String pattern = "<input\\s*(?=[^>]*name=\"" + key + "\")(?=[^>]*value=\"([^\"]+)\")[^>]+>";
Matcher m = Pattern.compile(pattern, 2 | Pattern.DOTALL).matcher(input);
int i = m.groupCount();
while (m.find()) {
return m.group(i);
}
return input;
}
// "<a[^>]*>(.*?)</a>"
// String pattern= "href=\"([^\"]*)\"";
public static String filterAhref(String input) {
Pattern pattern = Pattern.compile("<a\\s*href=\"([^<>\"]*)\"[^<>]*>", 2 | Pattern.DOTALL);
Matcher m = pattern.matcher(input);
if (m.find()) {
return m.group(1);
}
return input;
}
public static String concatUrl(String url, String param) {
if (StringUtils.isNotBlank(param)) {
if (url.indexOf("?") > -1) {
url += "&" + param;
} else {
url += "?" + param;
}
}
return url;
}
public static String getRequestParam(Map<String, Object> maps) {
StringBuilder sb = new StringBuilder();
if (maps != null && maps.size() > 0) {
Iterator<Map.Entry<String, Object>> entries = maps.entrySet().iterator();
while (entries.hasNext()) {
Map.Entry<String, Object> entry = entries.next();
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
sb.append("&");
}
return sb.substring(0, sb.length() - 1);
}
return null;
}
/*
* public static void main(String[] args) { String str =
* " <div style=\"height:500px;\"><div class=\"list\"> <ul> " +
* "<!-- 启用数据分发并且非国家局省入口且数据是地市的的跳到地市服务器,其它使用本系统 -->" +
* "<li class=\"font16\"><a href=\"/ECPS_HB/businessPublicity.jspx?id=1E14F35079882995E1A21BDC5EB022F0\">武汉闻广资讯有限公司</a></li>"
* +
* " <li class=\"font14\">注册号/统一社会信用代码:<span>420103000168755</span> 法定代表人:"
* +
* "<span>狄刚</span> 登记机关:<span>武汉市工商行政管理局江汉分局</span> 成立日期:<span>2011年8月5日</span></li></ul> </div> </div>"
* ;
*
* // Matcher m = //
* Pattern.compile("<div.+?class=.*?list.+?>(.*)</div>").matcher(str); //
* <div.+?class=.*?list.+?>(.*)</div> // <div.+?class=.*?list.+?>(.*)</div>
* // while (m.find()) { // String temp = m.group(m.groupCount()); //
* System.out.println(temp); // String result = HttpUtil.filterAhref(temp);
* // System.out.println(result); // // } String result =
* HttpUtil.filterHtml(str,
* "<div\\s*style=\"height:500px;\">\\s*<div.+?class=.*?list.+?>(.*)</div>")
* ; System.out.println(result); }
*/
/* public static void main(String[] args) {
String url = "http://www.qichacha.com/search?key=%E7%80%9A%E6%99%96&index=0";
// http://www.qichacha.com/firm_GD_2bf17e4f0cd4bc67706d3471faa8b1aa.shtml#touzi
// url ="http://www.qixin.com";
String result;
try {
String url1 = "http://www.qichacha.com/";
URLConnection conn = HttpUtils.getUrlConnection(url1);
result = HttpUtils.getCookieByHeaders(conn.getHeaderFields());
result = HttpUtils.sendGet(url, "", "utf-8", result);
System.out.println("=====" + result);
} catch (Exception e) {
e.printStackTrace();
}
}*/
}
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HttpUtils {
public static URLConnection getUrlConnection(String url) throws IOException {
URL obj = new URL(url);
URLConnection conn = obj.openConnection();
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
return conn;
}
public static URLConnection getUrlConnection(String url, String encode, String cookie)
throws IOException {
URL obj = new URL(url);
URLConnection conn = obj.openConnection();
setProperty(encode, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
return conn;
}
public static Map<String, List<String>> getHeaderFields(String url) throws IOException {
URLConnection conn = getUrlConnection(url);
Map<String, List<String>> map = conn.getHeaderFields();
return map;
}
public static String getCookieByHeaders(Map<String, List<String>> headers) {
String mcookie = "";
List<String> server = headers.get("Set-Cookie");
if (server == null) {
System.err.println("Key 'Cookie' is not found!");
} else {
StringBuilder sb = new StringBuilder();
for (String values : server) {
sb.append(values);
}
mcookie = sb.toString();
}
return mcookie;
}
public static String streamToString(InputStream inStream) throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
String line;
StringBuilder sb = new StringBuilder();
while ((line = in.readLine()) != null) {
sb.append(line);
}
String result = sb.toString();
in.close();
return result;
}
/**
* 向指定URL发送GET方法的请求
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @param encoding 使用编码.
* @return URL 所代表远程资源的响应结果
*/
public static String sendGet(String url, String param, String encoding) {
return sendGet(url, param, encoding, "");
}
public static String sendGet(String url, String param, String encoding, String cookie) {
String result = "";
BufferedReader in = null;
try {
String urlNameString = concatUrl(url, param);
// System.err.println("urlNameString-------------");
// System.err.println(urlNameString);
URL realUrl = new URL(urlNameString);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
setProperty(encoding, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
conn.setRequestMethod("GET");
conn.connect();
result = streamToString(conn.getInputStream());
} catch (Exception e) {
e.printStackTrace();
System.err.println("Send GET http request exception!" + e.getMessage());
} finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
System.err.println(e2.getMessage());
}
}
return result;
}
/**
* 向指定URL发送POST方法的请求
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @param encoding 使用编码.
* @return URL 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param, String encoding) throws IOException {
return sendPost(url, param, encoding, "");
}
public static String sendPost(String url, String param, String encoding, String cookie)
throws IOException {
PrintWriter out = null;
BufferedReader in = null;
String result = "";
if (StringUtils.isEmpty(url)) {
throw new IOException("The request url is not allow empty or null!");
}
try {
URL realUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
setProperty(encoding, conn, cookie);
String host = getHostByUrl(url);
conn.setRequestProperty("Host", host);
conn.setRequestProperty("Referer", "http://" + host);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
conn.setDoInput(true);
out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), encoding));
out.print(param);
out.flush();
result = streamToString(conn.getInputStream());
} catch (Exception e) {
System.err.println("Send http post method exception!" + e);
throw new IOException(e);
} finally {
try {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
} catch (IOException ex) {
throw ex;
}
}
return result;
}
private static String getHostByUrl(String url) {
int index = url.indexOf(":");
String temp = url.substring(index + 3);
int end = temp.indexOf("/");
if (end <= 0) {
end = temp.length();
}
String host = temp.substring(0, end);
System.out.println("host---------->" + host);
return host;
}
private static void setProperty(String encoding, URLConnection conn, String cookie) {
conn.setRequestProperty("Accept-Charset", encoding);
conn.setRequestProperty(
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
conn.setRequestProperty("Connection", "keep-alive");
// conn.setRequestProperty("Content-Type",
// "application/x-www-form-urlencoded");
// "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101
// Firefox/43.0"
// Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.2)
// Gecko/20090729 Firefox/3.5.2
// conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows; U;
// Windows NT 5.1; zh-CN; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2");
conn.setRequestProperty("user-agent", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1;SV1)");
if (StringUtils.isNotBlank(cookie)) {
conn.setRequestProperty("Cookie", cookie);
}
// conn.setRequestProperty("Access-Control-Allow-Origin", "*");
}
/**
* 向指定URL发送GET方法的请求,默认编码:gb2312
*
* @param url 发送请求的URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return URL 所代表远程资源的响应结果
*/
public static String sendGet(String url, String param) {
return sendGet(url, param, "gb2312");
}
/**
* 向指定 URL 发送POST方法的请求,默认编码:gb2312
*
* @param url 发送请求的 URL
* @param param 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。
* @return 所代表远程资源的响应结果
*/
public static String sendPost(String url, String param) throws IOException {
return sendPost(url, param, "gb2312");
}
/**
* Format money to fen unit as 12 bit,if not enough pending left by 0.
*
* @param input the money input source
* @return
*/
public static String toFenAndPending(float input) {
int money = (int) (input * 100);
String ret = String.valueOf(money);
if (ret.length() < 12) {
while (12 > ret.length()) {
ret = "0" + ret;
}
}
return ret;
}
public static String filterHtml(String input, String pattern) {
Matcher m = Pattern.compile(pattern, 2 | Pattern.DOTALL).matcher(input);
int i = m.groupCount();
while (m.find()) {
return m.group(i);
}
return input;
}
public static String getFormTagHtml(String result) {
return HttpUtils.filterHtml(result, "<form [\\s\\S]*</form>");
}
/**
* Filter HTML with pattern for specific given input tag(name="key").
*
* @param input the source of HTML.
* @param key the input name.
* @return the given name input of value.
*/
public static String getHtmlInput(String input, String key) {
String pattern = "<input\\s*(?=[^>]*name=\"" + key + "\")(?=[^>]*value=\"([^\"]+)\")[^>]+>";
Matcher m = Pattern.compile(pattern, 2 | Pattern.DOTALL).matcher(input);
int i = m.groupCount();
while (m.find()) {
return m.group(i);
}
return input;
}
// "<a[^>]*>(.*?)</a>"
// String pattern= "href=\"([^\"]*)\"";
public static String filterAhref(String input) {
Pattern pattern = Pattern.compile("<a\\s*href=\"([^<>\"]*)\"[^<>]*>", 2 | Pattern.DOTALL);
Matcher m = pattern.matcher(input);
if (m.find()) {
return m.group(1);
}
return input;
}
public static String concatUrl(String url, String param) {
if (StringUtils.isNotBlank(param)) {
if (url.indexOf("?") > -1) {
url += "&" + param;
} else {
url += "?" + param;
}
}
return url;
}
public static String getRequestParam(Map<String, Object> maps) {
StringBuilder sb = new StringBuilder();
if (maps != null && maps.size() > 0) {
Iterator<Map.Entry<String, Object>> entries = maps.entrySet().iterator();
while (entries.hasNext()) {
Map.Entry<String, Object> entry = entries.next();
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
sb.append("&");
}
return sb.substring(0, sb.length() - 1);
}
return null;
}
/*
* public static void main(String[] args) { String str =
* " <div style=\"height:500px;\"><div class=\"list\"> <ul> " +
* "<!-- 启用数据分发并且非国家局省入口且数据是地市的的跳到地市服务器,其它使用本系统 -->" +
* "<li class=\"font16\"><a href=\"/ECPS_HB/businessPublicity.jspx?id=1E14F35079882995E1A21BDC5EB022F0\">武汉闻广资讯有限公司</a></li>"
* +
* " <li class=\"font14\">注册号/统一社会信用代码:<span>420103000168755</span> 法定代表人:"
* +
* "<span>狄刚</span> 登记机关:<span>武汉市工商行政管理局江汉分局</span> 成立日期:<span>2011年8月5日</span></li></ul> </div> </div>"
* ;
*
* // Matcher m = //
* Pattern.compile("<div.+?class=.*?list.+?>(.*)</div>").matcher(str); //
* <div.+?class=.*?list.+?>(.*)</div> // <div.+?class=.*?list.+?>(.*)</div>
* // while (m.find()) { // String temp = m.group(m.groupCount()); //
* System.out.println(temp); // String result = HttpUtil.filterAhref(temp);
* // System.out.println(result); // // } String result =
* HttpUtil.filterHtml(str,
* "<div\\s*style=\"height:500px;\">\\s*<div.+?class=.*?list.+?>(.*)</div>")
* ; System.out.println(result); }
*/
/* public static void main(String[] args) {
String url = "http://www.qichacha.com/search?key=%E7%80%9A%E6%99%96&index=0";
// http://www.qichacha.com/firm_GD_2bf17e4f0cd4bc67706d3471faa8b1aa.shtml#touzi
// url ="http://www.qixin.com";
String result;
try {
String url1 = "http://www.qichacha.com/";
URLConnection conn = HttpUtils.getUrlConnection(url1);
result = HttpUtils.getCookieByHeaders(conn.getHeaderFields());
result = HttpUtils.sendGet(url, "", "utf-8", result);
System.out.println("=====" + result);
} catch (Exception e) {
e.printStackTrace();
}
}*/
}