网络爬虫学习2

package com.li.bean; import java.util.HashMap; import org.apache.http.Header; import org.apache.http.HttpEntity; public class Result { private String cookie; private int statusCode; private HashMap<String, Header> headerAll; private HttpEntity httpEntity; public String getCookie() { return cookie; } public void setCookie(String cookie) { this.cookie = cookie; } public int getStatusCode() { return statusCode; } public void setStatusCode(int statusCode) { this.statusCode = statusCode; } public HashMap<String, Header> getHeaders() { return headerAll; } public void setHeaders(Header[] headers){ headerAll = new HashMap<String, Header>(); for (Header header : headers) { headerAll.put(header.getName(), header); } } public HttpEntity getHttpEntity() { return httpEntity; } public void setHttpEntity(HttpEntity httpEntity) { this.httpEntity = httpEntity; } }


package com.li.main; import java.io.IOException; import java.net.URLEncoder; import java.util.HashMap; import java.util.Map; import org.apache.http.client.ClientProtocolException; import com.li.bean.Result; import com.li.utli.SendRequest; public class 优快云 { public static void downLoadAndAcomment(String cookie,String downLoadUrl,String acommentLink) throws ClientProtocolException, IOException { String referer[] = downLoadUrl.split("/"); Map<String,String> headers = new HashMap<String,String>(); headers.put("Cookie", cookie); headers.put("Referer", "http://d.download.youkuaiyun.com/down/"+referer[7]+"/"+referer[8]); headers.put("Host", "d.download.youkuaiyun.com"); headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0"); Map<String,String> parameters = new HashMap<String,String>(); parameters.put("dx", "ds"); Result res = SendRequest.sendPost(downLoadUrl, headers, parameters,"utf-8"); String location = res.getHeaders().get("Location").getValue(); SendRequest.sendGet(location, headers, null, "utf-8",true); headers.put("Referer",acommentLink); headers.put("Host","review.youkuaiyun.com"); Map<String,String> acommentParameters = new HashMap<String,String>(); acommentParameters.put("rt", "3"); acommentParameters.put("title",URLEncoder.encode("优快云 下载频道", "utf-8").replaceAll("\\+", "%20")); acommentParameters.put("description", "%E8%B5%84%E6%BA%90%E6%8C%BA%E4%B8%8D%E9%94%99%E7%9A%84%EF%BC%81%EF%BC%81"); acommentParameters.put("rating", "4"); acommentParameters.put("url", acommentLink); acommentParameters.put("extended", referer[8]); acommentParameters.put("jsoncallback", "jsonp1314023443336"); acommentParameters.put("_", "1314023459837"); SendRequest.sendGet("http://review.youkuaiyun.com/rest/v1/reviews/add", headers, acommentParameters, "utf-8"); Map<String,String> acommentheaders = new HashMap<String,String>(); acommentheaders.put("Cookie", cookie); SendRequest.sendGet("http://download.youkuaiyun.com/index.php/rest/users/addscoreByratings/"+referer[7]+"&jsoncallback=jsonp1314023443336?_=1314023459837", acommentheaders, null, "utf-8"); } public static String testAccount(String name, String password, Map<String, String> params) throws ClientProtocolException, IOException { Map<String,String> parameters = new HashMap<String,String>(); parameters.put("t", "log"); parameters.put("u", name); parameters.put("p", password); parameters.put("remember", "0"); parameters.put("f", "http://passport.youkuaiyun.com/account/login"); String cookie = SendRequest.sendGet("http://passport.youkuaiyun.com/ajax/accounthandler.ashx", null, parameters, "utf-8").getCookie(); return cookie; } }


package com.li.main; import javax.swing.JOptionPane; import com.li.utli.优快云UrlExtract; public class Main { public static void main(String[] args) throws Exception { String cookie = 优快云.testAccount(JOptionPane.showInputDialog("请输入你的用户名"),JOptionPane.showInputDialog("请输入你的密码"), null); if(cookie.indexOf("UserInfo")==-1){ JOptionPane.showMessageDialog(null, "用户名或密码错误"); System.exit(1); } 优快云UrlExtract.cSDNShuFen(cookie); } }


package com.li.utli; import java.util.List; import org.apache.http.util.EntityUtils; import com.li.bean.Result; import com.li.main.优快云; public class 优快云UrlExtract { public static void cSDNShuFen(String cookie) throws Exception{ long startt = System.currentTimeMillis(); Result result = SendRequest.sendGet("http://download.youkuaiyun.com/", null, null,"utf-8" ); String sort = EntityUtils.toString(result.getHttpEntity(),"utf-8"); List<String> sortList = HtmlParse.prase(sort, "/sort/class/\\d{5}"); for (int i = 10; i < sortList.size(); i++) { try{ Result classResult = SendRequest.sendGet("http://download.youkuaiyun.com"+sortList.get(i)+"/1", null, null, "utf-8"); String classsAll = EntityUtils.toString(classResult.getHttpEntity(),"utf-8"); String page = HtmlParse.prase(classsAll,"<a href=\"http://download.youkuaiyun.com/sort/class/\\d+/\\d+\">末页</a>" , 1).get(0); page = page.split("\"")[1]; page = page.substring(page.lastIndexOf("/")+1); for (int j = 1; j <=Integer.parseInt(page); j++) { classResult = SendRequest.sendGet("http://download.youkuaiyun.com"+sortList.get(i)+"/"+j, null, null, "utf-8"); classsAll = EntityUtils.toString(classResult.getHttpEntity(),"utf-8"); List<String> sorce = HtmlParse.prase(classsAll, "http://download.youkuaiyun.com/source/\\d+"); for (String string : sorce) { String addressHtml = null; String address = null; String acommentLink =null; Result downResult = null; String down = null; Result addressResult = null; try { downResult = SendRequest.sendGet(string, null, null, "utf-8"); down =EntityUtils.toString(downResult.getHttpEntity(),"utf-8"); down = HtmlParse.prase(down, "http://d.download.youkuaiyun.com/down/\\d+/.{3,21}>",1).get(0); down = down.substring(0,down.length()-2); addressResult = SendRequest.sendGet(down, null, null, "utf-8"); addressHtml= EntityUtils.toString(addressResult.getHttpEntity(),"utf-8"); address = HtmlParse.prase(addressHtml, "http://d.download.youkuaiyun.com/index.php/new/download/dodownload/\\d+/.{3,21}/\\w+",1).get(0); acommentLink = HtmlParse.prase(addressHtml, "http://download.youkuaiyun.com/source/\\d+",1).get(0); 优快云.downLoadAndAcomment(cookie,address,acommentLink); } catch (Exception e) { continue; } } } }catch (Exception e) { continue; } } long end = System.currentTimeMillis(); System.out.println("耗时"+(end-startt)); } }


package com.li.utli; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HtmlParse { public static List<String> prase(String html,String regex,int number){ Pattern patten = Pattern.compile(regex); Matcher mat = patten.matcher(html); List<String> list = new ArrayList<String>(); while(mat.find()) { if(number==-1){ list.add(mat.group()); continue; } if(number>0){ list.add(mat.group()); number--; }else{ break; } } return list; } public static List<String> prase(String html,String regex){ return prase(html, regex, -1); } }


package com.li.utli; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.cookie.Cookie; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicHeader; import org.apache.http.message.BasicNameValuePair; import com.li.bean.Result; public class SendRequest { public static Result sendGet(String url,Map<String,String> headers,Map<String,String> params,String encoding,boolean duan) throws ClientProtocolException, IOException{ DefaultHttpClient client = new DefaultHttpClient(); url = url+(null==params?"":assemblyParameter(params)); HttpGet hp = new HttpGet(url); if(null!=headers)hp.setHeaders(assemblyHeader(headers)); HttpResponse response = client.execute(hp); if(duan==true) hp.abort(); HttpEntity entity = response.getEntity(); Result result= new Result(); result.setCookie(assemblyCookie(client.getCookieStore().getCookies())); result.setStatusCode(response.getStatusLine().getStatusCode()); result.setHeaders(response.getAllHeaders()); result.setHttpEntity(entity); return result; } public static Result sendGet(String url,Map<String,String> headers,Map<String,String> params,String encoding) throws ClientProtocolException, IOException{ return sendGet(url, headers, params, encoding,false); } public static Result sendPost(String url,Map<String,String> headers,Map<String,String> params,String encoding) throws ClientProtocolException, IOException{ DefaultHttpClient client = new DefaultHttpClient(); HttpPost post = new HttpPost(url); List<NameValuePair> list = new ArrayList<NameValuePair>(); for (String temp : params.keySet()) { list.add(new BasicNameValuePair(temp,params.get(temp))); } post.setEntity(new UrlEncodedFormEntity(list,encoding)); if(null!=headers)post.setHeaders(assemblyHeader(headers)); HttpResponse response = client.execute(post); HttpEntity entity = response.getEntity(); Result result = new Result(); result.setStatusCode(response.getStatusLine().getStatusCode()); result.setHeaders(response.getAllHeaders()); result.setCookie(assemblyCookie(client.getCookieStore().getCookies())); result.setHttpEntity(entity); return result ; } public static Header[] assemblyHeader(Map<String,String> headers){ Header[] allHeader= new BasicHeader[headers.size()]; int i = 0; for (String str :headers.keySet()) { allHeader[i] = new BasicHeader(str,headers.get(str)); i++; } return allHeader; } public static String assemblyCookie(List<Cookie> cookies){ StringBuffer sbu = new StringBuffer(); for (Cookie cookie : cookies) { sbu.append(cookie.getName()).append("=").append(cookie.getValue()).append(";"); } if(sbu.length()>0)sbu.deleteCharAt(sbu.length()-1); return sbu.toString(); } public static String assemblyParameter(Map<String,String> parameters){ String para = "?"; for (String str :parameters.keySet()) { para+=str+"="+parameters.get(str)+"&"; } return para.substring(0,para.length()-1); } }


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值