package com.xxx.xx;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
//访问Web接口类
public class HttpAccessUtil
{
/**
* 发起http请求并获取结果
*
* @param requestUrl 请求地址
* @param chartSet 字符集
* @param mimeTypes enum MimeTypes
* @param requestMethod 请求方式(GET、POST)
* @param connTimeoutMills 设置连接主机超时,单位:毫秒
* @param readTimeoutMills 设置从主机读取数据超时,单位:毫秒
* @param attachments 附加提交的数据,可以是单字符串{"json":"value"} 或者 多个参数遵循 A=a&B=b格式
*
* @return remoteHttp 返回的结果
*/
public static String httpRequest(String requestUrl, String chartSet, String mimeTypes,
String requestMethod, int connTimeoutMills,
int readTimeoutMills, String attachments)
throws Exception
{
HttpURLConnection httpUrlConn = null;
InputStream inputStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
// PS:其实亦可以统一使用ObjectOutputStream,因为String也是impl Serializable
OutputStream outputStream = null;
StringBuilder buffer = new StringBuilder();
try
{
URL url = new URL(requestUrl);
httpUrlConn = (HttpURLConnection)url.openConnection();
// 设置content_type
httpUrlConn.setRequestProperty("Content-Type", "text/plain;charset=UTF-8");
httpUrlConn.setConnectTimeout(connTimeoutMills);
httpUrlConn.setReadTimeout(readTimeoutMills);
// 设置是否向httpUrlConn输出,因为兼容post请求,参数要放在http正文内,因此需要设为true, 默认情况下是false
httpUrlConn.setDoOutput(true);
// 设置是否从httpUrlConn读入,默认情况下是true
httpUrlConn.setDoInput(true);
httpUrlConn.setUseCaches(false);
// httpUrlConn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
// 设置请求方式(GET/POST),默认是GET
httpUrlConn.setRequestMethod(requestMethod);
// 完成相关配置后,进行connect操作,实际上只是建立了一个与服务器的tcp连接,并没有实际发送http请求
httpUrlConn.connect();
// 当有额外数据需要提交时
if (StringUtils.isNotBlank(attachments))
{
// 此处getOutputStream会隐含的进行connect,即:如同调用上面的connect()方法,
// 所以在开发中不调用上述的connect()也可以,不过建议最好显式调用
outputStream = httpUrlConn.getOutputStream();
// 注意编码格式,防止中文乱码
outputStream.write(attachments.getBytes(chartSet));
outputStream.flush();
// outputStream不是一个网络流,充其量是个字符串流,往里面写入的东西不会立即发送到网络,
// 而是存在于内存缓冲区中,待outputStream流关闭时,根据输入的内容生成http正文。所以这里的close是必须的
outputStream.close();
}
// 将返回的输入流转换成字符串
// 无论是post还是get,http请求实际上直到HttpURLConnection的getInputStream()这个函数里面才正式发送出去
inputStream = httpUrlConn.getInputStream();//注意,实际发送请求的代码段就在这里
inputStreamReader = new InputStreamReader(inputStream, chartSet);
bufferedReader = new BufferedReader(inputStreamReader);
String str = null;
while ((str = bufferedReader.readLine()) != null)
{
buffer.append(str);
}
}
catch (Exception e)
{
log.error(requestMethod + " " + requestUrl + " error ", e);
throw e;
}
finally
{
try
{
//关闭流
IOUtils.closeQuietly(bufferedReader);
IOUtils.closeQuietly(inputStreamReader);
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
if (httpUrlConn != null)
{
httpUrlConn.disconnect();
}
}
catch (Exception e)
{
log.error(e);
}
}
return buffer.toString();
}
}
//爬网页类
package com.xxx.xx;
import com.xxx.xx.HttpAccessUtil;
import net.sf.json.JSONArray;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
//获取jd网页上某一产品的价格及评论信息
public class testFromWeb
{
//
public Map<String,String> getNeedMessageFromJd(String url)
{
int i=(int)(Math.random()*1000);
//建立一个访问延迟
while(i!=0)
{
i--;
}
Document doc=Jsoup.connect(url).userAgent("Mozilla").get();
String htmlString = doc.select("span.p-price").html();
String strStart="<span>¥</span><span class=\"price J-p-";
String strEnd="\"></span>";
//获取商品Id
String productId=htmlString.substring(htmlString.indexOf(strStart)+strStart.length,htmlString.lastIndexOf(strEnd));
//获取商品价格
JSONArray json=new JSONArray();
json = json.fromObject(HttpAccessUtil.httpRequest("http://p.3.cn/prices/mgets?skuIds="+productId,
"text/plain;charset=UTF-8", "POST", 100000, 100000, null));
String productPrice = json.getJSONObject(0).get("op").toString();
//获取评论
String commentStr =HttpAccessUtil.httpRequest( "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv56668&productId="+productId+"&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1", "GBK", "text/plain;charset=GBK",
"POST", 100000, 100000, null)
strStart="\"hotCommentTagStatistics\"";
commentStr=commentStr.substring(commentStr.indexOf(strStart)-1);
commentStr=commentStr.substring(commentStr.indexOf("]")+1);
commentStr="[{"+commentStr+"}]";
json=JSONArray.fromObject(commentStr);
List<Map<String,Object>> modellList=(List<Map<String,Object>>)json.fromObject(commentStr).getJSONObject(0).get("hotCommentTagStatistics");
//获取评论
StringBuffer sb=new StringBuffer();
int count=0;
for(Map<String,Object> map:modellList)
{
if(count==0){
if(map.containsKey("name"))
{
sb.append(map.get("name").toString());
}
if(map.containsKey("count"))
{
sb.append("|"+map.get("count").toString());
}
}else
{
if(map.containsKey("name"))
{
sb.append(","+map.get("name").toString());
}
if(map.containsKey("count"))
{
sb.append("|"+map.get("count").toString());
}
}
count++;
}
Map<String,String> mapResult=new HashMap<String,String>();
mapResult.put("price",productPrice);
mapResult.put("comment",sb);
return mapResult;
}
}
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
//访问Web接口类
public class HttpAccessUtil
{
/**
* 发起http请求并获取结果
*
* @param requestUrl 请求地址
* @param chartSet 字符集
* @param mimeTypes enum MimeTypes
* @param requestMethod 请求方式(GET、POST)
* @param connTimeoutMills 设置连接主机超时,单位:毫秒
* @param readTimeoutMills 设置从主机读取数据超时,单位:毫秒
* @param attachments 附加提交的数据,可以是单字符串{"json":"value"} 或者 多个参数遵循 A=a&B=b格式
*
* @return remoteHttp 返回的结果
*/
public static String httpRequest(String requestUrl, String chartSet, String mimeTypes,
String requestMethod, int connTimeoutMills,
int readTimeoutMills, String attachments)
throws Exception
{
HttpURLConnection httpUrlConn = null;
InputStream inputStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
// PS:其实亦可以统一使用ObjectOutputStream,因为String也是impl Serializable
OutputStream outputStream = null;
StringBuilder buffer = new StringBuilder();
try
{
URL url = new URL(requestUrl);
httpUrlConn = (HttpURLConnection)url.openConnection();
// 设置content_type
httpUrlConn.setRequestProperty("Content-Type", "text/plain;charset=UTF-8");
httpUrlConn.setConnectTimeout(connTimeoutMills);
httpUrlConn.setReadTimeout(readTimeoutMills);
// 设置是否向httpUrlConn输出,因为兼容post请求,参数要放在http正文内,因此需要设为true, 默认情况下是false
httpUrlConn.setDoOutput(true);
// 设置是否从httpUrlConn读入,默认情况下是true
httpUrlConn.setDoInput(true);
httpUrlConn.setUseCaches(false);
// httpUrlConn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
// 设置请求方式(GET/POST),默认是GET
httpUrlConn.setRequestMethod(requestMethod);
// 完成相关配置后,进行connect操作,实际上只是建立了一个与服务器的tcp连接,并没有实际发送http请求
httpUrlConn.connect();
// 当有额外数据需要提交时
if (StringUtils.isNotBlank(attachments))
{
// 此处getOutputStream会隐含的进行connect,即:如同调用上面的connect()方法,
// 所以在开发中不调用上述的connect()也可以,不过建议最好显式调用
outputStream = httpUrlConn.getOutputStream();
// 注意编码格式,防止中文乱码
outputStream.write(attachments.getBytes(chartSet));
outputStream.flush();
// outputStream不是一个网络流,充其量是个字符串流,往里面写入的东西不会立即发送到网络,
// 而是存在于内存缓冲区中,待outputStream流关闭时,根据输入的内容生成http正文。所以这里的close是必须的
outputStream.close();
}
// 将返回的输入流转换成字符串
// 无论是post还是get,http请求实际上直到HttpURLConnection的getInputStream()这个函数里面才正式发送出去
inputStream = httpUrlConn.getInputStream();//注意,实际发送请求的代码段就在这里
inputStreamReader = new InputStreamReader(inputStream, chartSet);
bufferedReader = new BufferedReader(inputStreamReader);
String str = null;
while ((str = bufferedReader.readLine()) != null)
{
buffer.append(str);
}
}
catch (Exception e)
{
log.error(requestMethod + " " + requestUrl + " error ", e);
throw e;
}
finally
{
try
{
//关闭流
IOUtils.closeQuietly(bufferedReader);
IOUtils.closeQuietly(inputStreamReader);
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
if (httpUrlConn != null)
{
httpUrlConn.disconnect();
}
}
catch (Exception e)
{
log.error(e);
}
}
return buffer.toString();
}
}
//爬网页类
package com.xxx.xx;
import com.xxx.xx.HttpAccessUtil;
import net.sf.json.JSONArray;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
//获取jd网页上某一产品的价格及评论信息
public class testFromWeb
{
//
public Map<String,String> getNeedMessageFromJd(String url)
{
int i=(int)(Math.random()*1000);
//建立一个访问延迟
while(i!=0)
{
i--;
}
Document doc=Jsoup.connect(url).userAgent("Mozilla").get();
String htmlString = doc.select("span.p-price").html();
String strStart="<span>¥</span><span class=\"price J-p-";
String strEnd="\"></span>";
//获取商品Id
String productId=htmlString.substring(htmlString.indexOf(strStart)+strStart.length,htmlString.lastIndexOf(strEnd));
//获取商品价格
JSONArray json=new JSONArray();
json = json.fromObject(HttpAccessUtil.httpRequest("http://p.3.cn/prices/mgets?skuIds="+productId,
"text/plain;charset=UTF-8", "POST", 100000, 100000, null));
String productPrice = json.getJSONObject(0).get("op").toString();
//获取评论
String commentStr =HttpAccessUtil.httpRequest( "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv56668&productId="+productId+"&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1", "GBK", "text/plain;charset=GBK",
"POST", 100000, 100000, null)
strStart="\"hotCommentTagStatistics\"";
commentStr=commentStr.substring(commentStr.indexOf(strStart)-1);
commentStr=commentStr.substring(commentStr.indexOf("]")+1);
commentStr="[{"+commentStr+"}]";
json=JSONArray.fromObject(commentStr);
List<Map<String,Object>> modellList=(List<Map<String,Object>>)json.fromObject(commentStr).getJSONObject(0).get("hotCommentTagStatistics");
//获取评论
StringBuffer sb=new StringBuffer();
int count=0;
for(Map<String,Object> map:modellList)
{
if(count==0){
if(map.containsKey("name"))
{
sb.append(map.get("name").toString());
}
if(map.containsKey("count"))
{
sb.append("|"+map.get("count").toString());
}
}else
{
if(map.containsKey("name"))
{
sb.append(","+map.get("name").toString());
}
if(map.containsKey("count"))
{
sb.append("|"+map.get("count").toString());
}
}
count++;
}
Map<String,String> mapResult=new HashMap<String,String>();
mapResult.put("price",productPrice);
mapResult.put("comment",sb);
return mapResult;
}
}