由于生活中需要对网站上的某张图片投票决定排名,无可避免地想起了做个自动投票机器人。
经过一天的尝试,三种方案:
(1)保存投票页面到本地,分析代码,直接用 js 和 ajax 循环提交,每次循环中间随机休眠几秒。一开始居然有效,不过好景不长,第二天就被网站改了页面,不能直接提交了。
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>
自动投票机器人一
</title>
<link rel="stylesheet" href="style.css" type="text/css" media="screen">
<script type="text/javascript">
var counters=0;
var xmlHttp;
function S_xmlhttprequest(){
if(window.ActiveXObject){
xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
} else if(window.XMLHttpRequest){
xmlHttp = new XMLHttpRequest();
}
}
function vote(){
url =1;//id
S_xmlhttprequest();
xmlHttp.open("GET","http://www.xxxx.com/work/vote.php?id="+url+"&"+Math.random(),true);
xmlHttp.send(null);
alert(' 投票成功!\n 谢谢您的支持!');
counters++;
document.getElementById("counter").innerText=counters;
setTimeout("vote()",nextTime());
}
function Workspace_OnLoad() {
setTimeout("vote()",1000);
}
function nextTime(){
var vNum=1000;
vNum = Math.random();
vNum = Math.round(vNum*60000);
return vNum;
}
</script>
</head>
<body class="bgcolor" onload="Workspace_OnLoad();">
<div align="center">
<table class="bgjpg" align="center" border="0" cellpadding="0" cellspacing="0"
width="100%">
<tbody>
<tr height="10">
<td>
</td>
</tr>
<tr>
<td height="30">
</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>
(2)发现需要提交表单到服务器,而不是直接发送GET请求,并且有 cookie 检查校验码,尝试用 java 编程,好象没成功,但理论上可行。
- 获取校验码图片
- tesseract OCR 解析校验码图片
- 尝试提交模拟表单
- 需要的 jar 包和 Tesseract OCR在网上搜。
package A;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;
import com.overseas.ocr.ImageFilter;
import com.overseas.ocr.ImageIOHelper;
import com.overseas.sys.InitEnv;
import com.overseas.util.PicUtil;
import net.sourceforge.tess4j.*;
public class TesseractExample {
public int success=0;
public static void main(String[] args) {
//File imageFile = new File("eurotext.tif");
//Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping
// Tesseract1 instance = new Tesseract1(); // JNA Direct Mapping
try {
TesseractExample t =new TesseractExample();
t.doit() ;
// String result = instance.doOCR(imageFile);
// System.out.println(result);
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
private void doit() {
int counter =1000000;
for(int i=0;i<counter;i++){
try{
Thread.sleep((int)(Math.random()*50000));
}catch(Exception e){
e.printStackTrace();
}
String decodeText =decode();
System.out.println("Start excute..."+i+"====="+decodeText);
if(decodeText==null || decodeText.trim().length()<4 ){
continue;
//next times
}
decodeText =decodeText.trim();
if(decodeText.length()>4)
decodeText =decodeText.substring(0,4);
try{
int code =Integer.parseInt(decodeText);
// submit vote
vote(code);
System.out.println("call vote..."+code);
}catch(Exception e){
continue;
}
}
}
private void vote(int code) {
HttpClient httpClient = new HttpClient();
String url = "http://www.xxxx.com/work/chick.php?id=1";
PostMethod postMethod = new PostMethod(url);
// 填入各个表单域的值
NameValuePair[] data = {
new NameValuePair("numtext", Integer.toString(code)),
new NameValuePair("submit", "确认"),
};
// 将表单的值放入postMethod中
postMethod.setRequestBody(data);
// 执行postMethod
int statusCode = 0;
try {
statusCode = httpClient.executeMethod(postMethod);
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发
// 301或者302
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY)
{
// 从头中取出转向的地址
Header locationHeader = postMethod.getResponseHeader("location");
String location = null;
if (locationHeader != null) {
location = locationHeader.getValue();
System.out.println("The page was redirected to:" + location);
}
else {
System.err.println("Location field value is null.");
}
return;
}
else
{
System.out.println(postMethod.getStatusLine());
String str = "";
try {
str = postMethod.getResponseBodyAsString();
System.out.println("Success! ooooooooooooooooo"+success++);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("======================================================");
System.out.println(utf8Togb2312(str));
}
postMethod.releaseConnection();
return ;
}
private String decode() {
String url = "http://www.xxxx.com/work/che.php?"+Math.random();
InputStream instream = InitEnv.class.getResourceAsStream(InitEnv.CERTPATH);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
String protocol = "http";
byte[] b = null;
try {
if (url.startsWith("https")) {
protocol = "https";
}
PicUtil.getPic(protocol, url, 80, "", instream,outputStream);
b = outputStream.toByteArray();
ImageFilter imageFilter = new ImageFilter(new ByteArrayInputStream(b));
outputStream.close();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
ImageIOHelper.createImage(imageFilter.changeGrey(), byteArrayOutputStream);
File file = new File("C:\\temp\\ocr.tiff ");
if (!file.exists()) {
file.createNewFile();
}
DataOutputStream to = new DataOutputStream(new FileOutputStream(file));
byteArrayOutputStream.writeTo(to);
byteArrayOutputStream.close();
Tesseract instance = Tesseract.getInstance();
String result = instance.doOCR(file);
return result;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
private String utf8Togb2312(String str){
StringBuffer sb = new StringBuffer();
for(int i=0; i<str.length(); i++) {
char c = str.charAt(i);
switch (c) {
case '+':
sb.append(' ');
break;
case '%':
try {
sb.append((char)Integer.parseInt(
str.substring(i+1,i+3),16));
}
catch (NumberFormatException e) {
throw new IllegalArgumentException();
}
i += 2;
break;
default:
sb.append(c);
break;
}
}
// Undo conversion to external encoding
String result = sb.toString();
String res=null;
try{
byte[] inputBytes = result.getBytes("8859_1");
res= new String(inputBytes,"UTF-8");
}
catch(Exception e){}
return res;
}
}
(3)既然前面两种方法都没办法成功,尝试用第三种方法吧,比较复杂,但理论上一定可用(做出来后才发现,投票时间已经结束,无法验证)
- greasemonkey
- ajax 取得图片
- 上传到图片解析服务器(需要搭建专门 OCR 解析服务器)
- 从解析服务器返回解析后的校验码
- 填充表单校验码,提交
// ==UserScript==
// @name AutoVote
// @namespace autovote
// @include http://www.xxxx.com/work/workshow.php?id=1
// @include http://www.xxxx.com/work/index.php
// @include http://www.xxxx.com/work/show.php
// ==/UserScript==
var VOTE_URL='http://www.xxxx.com/work/workshow.php?id=1';
var IMG_URL ='http://www.xxxx.com/work/che.php?';
var DECODE_SERVER_URL ='http://localhost/decode/image';
var MAX_COUNT =10;
var counter =0;
function start()
{
// 1. direct to vote page
if(counter>MAX_COUNT)
return;
if(document.location.href!=VOTE_URL){
document.location.href =VOTE_URL;
}
document.getElementById('title').style.display="block";
// 2. get code from PLUS_VOTE_SERVER
load_image();
// 3. full code value
// 4. submit
// 5. open new window
// 6. close current windows
}
function load_image()
{
if(counter>MAX_COUNT)
return;
counter++;
alert("2. get code from PLUS_VOTE_SERVER");
var imageSrc = IMG_URL+Math.random(1);
GM_xmlhttpRequest({method: 'GET',
url: imageSrc,
overrideMimeType: 'text/plain; charset=x-user-defined',
onload: function(response) { decode_image(response.responseText); }
});
}
function decode_image(data)
{
var textbox = document.getElementById('numtext');
textbox.value = 'working...';
upload(data);
}
function upload(data) {
var darray=data_array(data);
GM_xmlhttpRequest({
method: 'POST',
headers:{'Content-type':'application/x-www-form-urlencoded'},
url: DECODE_SERVER_URL,
data: 'data='+darray,
onload: function(response) { submit_form(response.responseText); }
});
}
function submit_form(data)
{
if(data==-1){
load_image();
}
var textbox = document.getElementById('numtext');
textbox.value = data;
var form1 = document.forms[0];
//alert("submit ...");
form1.submit();
}
function data_array(data)
{
var data_array = [];
for (var i = 0; i < data.length; i++)
data_array.push(data[i].charCodeAt(0) & 0xff);
return data_array;
}
start();
import java.util.Map;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import javaplus.base.BaseRestSpringController;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.validation.Valid;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.springframework.beans.propertyeditors.CustomDateEditor;
import org.springframework.stereotype.Controller;
import org.springframework.ui.ModelMap;
import org.springframework.validation.BindingResult;
import org.springframework.validation.Validator;
import org.springframework.web.bind.WebDataBinder;
import org.springframework.web.bind.annotation.InitBinder;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.webframework.page.Page;
import org.webframework.web.scope.Flash;
import java.util.*;
import javaplus.base.*;
import javaplus.util.*;
import org.webframework.util.*;
import org.webframework.web.util.*;
import org.webframework.page.*;
import org.webframework.page.impl.*;
import com.company.project.model.*;
import com.company.project.dao.*;
import com.company.project.service.*;
import com.company.project.vo.query.*;
import com.overseas.ocr.ImageFilter;
import com.overseas.ocr.ImageIOHelper;
/**
* @author mmm email:mmm(a)gmail.com
* @version 1.0
* @since 1.0
*/
@Controller
@RequestMapping("/decode")
public class DecodeImageController extends BaseRestSpringController<UserInfo,java.lang.Long>{
private UserInfoManager userInfoManager;
private final String LIST_ACTION = "redirect:/userinfo";
/**
* 增加setXXXX()方法,spring就可以通过autowire自动设置对象属性,注意大小写
**/
public void setUserInfoManager(UserInfoManager manager) {
this.userInfoManager = manager;
}
/** binder用于bean属性的设置 */
@InitBinder
public void initBinder(WebDataBinder binder) {
binder.registerCustomEditor(Date.class, new CustomDateEditor(new SimpleDateFormat("yyyy-MM-dd"), true));
}
/**
* 增加了@ModelAttribute的方法可以在本controller方法调用前执行,可以存放一些共享变量,如枚举值,或是一些初始化操作
*/
@ModelAttribute
public void init(ModelMap model) {
model.put("now", new java.sql.Timestamp(System.currentTimeMillis()));
}
@RequestMapping(value="/image",method=RequestMethod.POST)
public String upload(HttpServletRequest request ,
@RequestParam("data") String data ) throws Exception {
String code ="-1";
if(data==null || data=="") {
return code;
}
System.out.println(data);
File file =createTempFile(convert(data));
code =decode(file);
return code;
}
private static byte[] convert(String s) {
String[] split = s.split(",");
byte[] c=new byte[split.length];
for(int i=0;i<split.length;i++)
{
c[i]=Byte.parseByte(split[i]);
System.out.println(c[i]);
}
System.out.println();
return c;
}
private String decode(File file){
String decodeText;
try {
Tesseract instance = Tesseract.getInstance();
decodeText = instance.doOCR(file);
if(decodeText==null || decodeText.trim().length()<4 ){
return null;
}
decodeText =decodeText.trim();
if(decodeText.length()>4)
decodeText =decodeText.substring(0,4);
return decodeText;
} catch (TesseractException e) {
e.printStackTrace();
return null;
}
}
private File createTempFile(byte[] b){
try{
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
ImageFilter imageFilter = new ImageFilter(new ByteArrayInputStream(b));
ImageIOHelper.createImage(imageFilter.changeGrey(),byteArrayOutputStream);
File file = new File("C:\\temp\\ocr.tiff");
if (!file.exists()) {
file.createNewFile();
}
DataOutputStream to = new DataOutputStream(new FileOutputStream(file));
byteArrayOutputStream.writeTo(to);
byteArrayOutputStream.close();
return file;
}catch(Exception e){
e.printStackTrace();
return null;
}
}
}
第三种方法在理论上可行,但要注意获取图片后,要分析代码,或许其中不仅仅是图片数据,还包含了解 cookie 的写入,需要具体对应每一个投票网站具体分析。
***然而始料未及的是写了许多代码,在调试的时候才突然发现,取得检验图片的数据格式中,末尾包含了明文的4位校验码,NND不知是程序员留的后门还是写入 cookie 需要,忽然发现自己就很失败,被狗血了。
从而说明分析获得的数据是多么重要了。
天不负苦心人,投票服务器又开了,是时候上场了,一台机器一小时刷了上万票,有需要即可多开几台机器,要多少票都是如此简单,果然不是一般的爽。(不敢刷太多,树大招风(:)