java抓取带验证码登陆后的页面

 

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public class CookieUtil {

 public final static String CONTENT_TYPE = "Content-Type";

 public static void main(String[] args) {
  // login
  //验证码的位置
  //Content content = getRandom("GET", "http://localhost:8080/back/random.action", null, null, false,"d:/");
  Content content = getRandom("GET", "http://localhost:9090/w/random.action", null, null, false,"d:/");
  
  // build request headers & do rate of user review
  List<String> lsit = content.getHeaders().get("Set-Cookie");
  Map<String, String> resmap = new HashMap<String, String>();  
  if (lsit != null) {
   StringBuffer sb = new StringBuffer();
   boolean isLast = false;
   int i = 0;
   for (String val : lsit) {
    i++;
    if (i == lsit.size()) {
     isLast = true;
    }
    int pos = val.indexOf("=");
    if (pos != -1) {
     String cookieName = val.substring(0, pos);
     String cookieVal = val.substring(pos + 1);
     System.out.println(cookieName+":"+cookieVal);
     cookieVal = cookieVal.split(";")[0];
     if (isLast) {
      sb.append(cookieName + "=" + cookieVal);
     } else {
      sb.append(cookieName + "=" + cookieVal + ";");
     }
    }
   }   
   System.out.println("sb.toString() = "+sb.toString());   
   resmap.put("Cookie", sb.toString());
  }
  String a="";
    System.out.print("请输入验证码:");
    BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));
    try {
   a=strin.readLine();
  }  catch (IOException e) {
   e.printStackTrace();
  }
   System.out.println("输入的数是:"+a);
   
  String userCode = "admin";
  String password = "123456";
  String loginUrl = "http://localhost:9090/w/login.action";
  String rateReviewUrl = "http://localhost:9090/w/system/role_list.action";
  Map<String, String> paramMap = new HashMap<String, String>();
  paramMap.put("userCode", userCode);
  paramMap.put("password", password);
  paramMap.put("random", a+"");
  content = curl("POST", loginUrl, paramMap, resmap, false,"");
  System.out.println("第一次 content.getBody()= " + content==null?"no body":content.getBody());
  // build request headers & do rate of user review  
  paramMap = new HashMap<String, String>();

  content = curl("POST", rateReviewUrl, paramMap, resmap, false,"");
  inFile(content.getBody(), "D:/sss.txt");
  System.out.println("第二次content.getBody() = " + content==null?"no body":content.getBody());
 }
 
 public static Content curl(String method, //方法类型
          String sUrl,//要解析的URL
          Map<String, String> paramMap, //存放用户名和密码的map
          Map<String, String> requestHeaderMap,//存放COOKIE的map
          boolean isOnlyReturnHeader,
          String path) {//存放文件路径
    System.out.println("-------------"+sUrl+"-------------------");
    Content content = null;
    HttpURLConnection httpUrlConnection = null;
    InputStream in = null;
    try {
     URL url = new URL(sUrl);
     boolean isPost = "POST".equals(method);
     
     if (method == null || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method))) {
      method = "POST";
     }
     
     URL resolvedURL = url;
     URLConnection urlConnection = resolvedURL.openConnection();
     httpUrlConnection = (HttpURLConnection) urlConnection;
     httpUrlConnection.setRequestMethod(method);
     httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");
     
     // Do not follow redirects, We will handle redirects ourself
     httpUrlConnection.setInstanceFollowRedirects(false);
     urlConnection.setDoOutput(true);
     urlConnection.setDoInput(true);
     urlConnection.setConnectTimeout(5000);
     urlConnection.setReadTimeout(5000);
     urlConnection.setUseCaches(false);
     urlConnection.setDefaultUseCaches(false);
     // set request header
     if (requestHeaderMap != null) {
     for (Map.Entry<String, String> entry : requestHeaderMap.entrySet()) {
      String key = entry.getKey();
      String val = entry.getValue();     
      if (key != null && val != null) {
       urlConnection.setRequestProperty(key, val);
      }
     }
     }
     if (isPost) {
      urlConnection.setDoOutput(true);
      ByteArrayOutputStream bufOut = new ByteArrayOutputStream();
      boolean firstParam = true;
      for (Map.Entry<String, String> entry : paramMap.entrySet()) {
       String encName = URLEncoder.encode(entry.getKey(), "UTF-8");
       if (firstParam) {
        firstParam = false;
       } else {
        bufOut.write((byte) '&');
       }
       String encValue = URLEncoder.encode(entry.getValue(),"UTF-8");
       bufOut.write(encName.getBytes("UTF-8"));
       bufOut.write((byte) '=');
       bufOut.write(encValue.getBytes("UTF-8"));
      }
      byte[] postContent = bufOut.toByteArray();
      if (urlConnection instanceof HttpURLConnection) {
       ((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);
      }
      OutputStream postOut = urlConnection.getOutputStream();
      postOut.write(postContent);
      postOut.flush();
      postOut.close();
     }
     httpUrlConnection.connect();
     int responseCode = httpUrlConnection.getResponseCode();
     
     // We handle redirects ourself
     if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP) {
     String location = httpUrlConnection.getHeaderField("Location");
     URL newAction = new URL(url, location);
     // Recurse
     StringBuffer newUrlSb = new StringBuffer(newAction.getProtocol() + "://" + newAction.getHost());
     if (newAction.getPort() != -1) {
      newUrlSb.append(":" + newAction.getPort());
     }
     if (newAction.getPath() != null) {
      newUrlSb.append(newAction.getPath());
     }
     if (newAction.getQuery() != null) {
      newUrlSb.append("?" + newAction.getQuery());
     }
     if (newAction.getRef() != null) {
      newUrlSb.append("#" + newAction.getRef());
     }
     
     return curl("POST", newUrlSb.toString(), paramMap, requestHeaderMap,isOnlyReturnHeader,path);
     } else if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED) {
     byte[] bytes = new byte[0];
     if (!isOnlyReturnHeader) {
      if(isPost){
       in = httpUrlConnection.getInputStream();
       ByteArrayOutputStream bout = new ByteArrayOutputStream();
       byte[] buf = new byte[1024];
       while (true) {
        int rc = in.read(buf);
        if (rc <= 0) {
         break;
        } else {
         bout.write(buf, 0, rc);
        }
       }
       bytes = bout.toByteArray();
       in.close();
      }
     }
     // only fetch Content-Length and Last-Modified header
     String encoding = null;
     if (encoding == null) {
      encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));
     }    
      content = new Content(sUrl, new String(bytes, encoding),httpUrlConnection.getHeaderFields());
     }
    } catch (Exception e) {
    return null;
    } finally {
    if (httpUrlConnection != null) {
     httpUrlConnection.disconnect();
    }
   }
    return content;
   }
 
 
 public static Content getRandom(String method, 
          String sUrl,//要解析的url
          Map<String, String> paramMap, //存放用户名和密码的map
          Map<String, String> requestHeaderMap,//存放COOKIE的map
          boolean isOnlyReturnHeader,
          String path) {
  
  Content content = null;
  HttpURLConnection httpUrlConnection = null;
  InputStream in = null;
  try {
   URL url = new URL(sUrl);
   boolean isPost = "POST".equals(method);
   if (method == null || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method))) {
    method = "POST";
   }
   URL resolvedURL = url;
   URLConnection urlConnection = resolvedURL.openConnection();
   httpUrlConnection = (HttpURLConnection) urlConnection;
   httpUrlConnection.setRequestMethod(method);
   httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");
   // Do not follow redirects, We will handle redirects ourself
   httpUrlConnection.setInstanceFollowRedirects(false);
   httpUrlConnection.setDoOutput(true);
   httpUrlConnection.setDoInput(true);
   httpUrlConnection.setConnectTimeout(5000);
   httpUrlConnection.setReadTimeout(5000);
   httpUrlConnection.setUseCaches(false);
   httpUrlConnection.setDefaultUseCaches(false);
   httpUrlConnection.connect();
   
   int responseCode = httpUrlConnection.getResponseCode();
   
    if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED) {
    byte[] bytes = new byte[0];
    if (!isOnlyReturnHeader) {
       DataInputStream ins = new DataInputStream(httpUrlConnection.getInputStream());
       //验证码的位置
           DataOutputStream out = new DataOutputStream(new FileOutputStream(path+"/code.bmp"));
           byte[] buffer = new byte[4096];
           int count = 0;
           while ((count = ins.read(buffer)) > 0) {
            out.write(buffer, 0, count);
           }
          out.close();
          ins.close();
    }
    String encoding = null;
    if (encoding == null) {
     encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));
    }    
    content = new Content(sUrl, new String(bytes, encoding),httpUrlConnection.getHeaderFields());
   }
  } catch (Exception e) {
   return null;
  } finally {
   if (httpUrlConnection != null) {
    httpUrlConnection.disconnect();
   }
  }
  return content;
 }

 public static String getEncodingFromContentType(String contentType) {
  String encoding = null;
  if (contentType == null) {
   return null;
  }
  StringTokenizer tok = new StringTokenizer(contentType, ";");
  if (tok.hasMoreTokens()) {
   tok.nextToken();
   while (tok.hasMoreTokens()) {
    String assignment = tok.nextToken().trim();
    int eqIdx = assignment.indexOf('=');
    if (eqIdx != -1) {
     String varName = assignment.substring(0, eqIdx).trim();
     if ("charset".equalsIgnoreCase(varName)) {
      String varValue = assignment.substring(eqIdx + 1).trim();
      if (varValue.startsWith("\"") && varValue.endsWith("\"")) {
       // substring works on indices
       varValue = varValue.substring(1,varValue.length() - 1);
      }
      if (Charset.isSupported(varValue)) {
       encoding = varValue;
      }
     }
    }
   }
  }
  if (encoding == null) {
   return "UTF-8";
  }
  return encoding;
 }

 

 // 这个是输出
 public static boolean inFile(String content, String path) {
  PrintWriter out = null;
  File file = new File(path);
  try {
   if (!file.exists()) {
    file.createNewFile();
   }
   out = new PrintWriter(new FileWriter(file));

   out.write(content);
   out.flush();
   return true;
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   out.close();
  }
  return false;
 }

 public static String getHtmlReadLine(String httpurl){
  String CurrentLine="";
  String TotalString="";
  InputStream urlStream;
  String content="";

  try {
   URL url = new URL(httpurl);

   HttpURLConnection connection = (HttpURLConnection)url.openConnection();

   connection.connect();
   System.out.println(connection.getResponseCode());
   urlStream = connection.getInputStream();

   BufferedReader reader = new BufferedReader(

   new InputStreamReader(urlStream,"utf-8"));

   while ((CurrentLine = reader.readLine()) != null) {
    TotalString += CurrentLine+"\n";
   }

   content = TotalString;   

  } catch (Exception e) {}

  return content;  
 }
}

class Content {
 private String url;
 private String body;
 private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();

 public Content(String url, String body, Map<String, List<String>> headers) {
  this.url = url;
  this.body = body;
  this.m_mHeaders = headers;
 }

 public String getUrl() {
  return url;
 }

 public String getBody() {
  return body;
 }

 public Map<String, List<String>> getHeaders() {
  return m_mHeaders;
 }

}


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值