花了一下午写了段小代码把ZOJ各题的AC数与AC率爬下来存到数据库,这样排个序就能知道哪些是水题,哪些是难题了。。
正则表达式还是不熟啊,解析的方法有点土,将就着用了
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.xhldtc.db.DBUtils;
public class CollectData
{
static Connection conn;
public static void main(String[] args) throws Exception
{
ArrayList<String> array = new ArrayList<String>();
for (int i = 1; i <= 28; i++)
{
URL url = new URL(
"http://acm.zju.edu.cn/onlinejudge/showProblems.do?contestId=1&pageNumber="
+ i);
BufferedReader br = new BufferedReader(new InputStreamReader(
url.openStream()));
String content = getContent(br);
br.close();
array.addAll(getEachRow(content));
}
conn = DBUtils.getMysqlConnection();
getProblemInfoAndSave(array);
DBUtils.closeConnection(conn);
}
static String getContent(BufferedReader br) throws IOException
{
boolean isContent = false;
StringBuilder sb = new StringBuilder();
while (true)
{
String line = br.readLine().trim();
if (isContent && "</table>".equalsIgnoreCase(line))
break;
if (isContent)
sb.append(line);
if ("<table class=\"list\">".equalsIgnoreCase(line))
isContent = true;
}
return sb.substring(sb.indexOf("</tr>") + 5);
}
static ArrayList<String> getEachRow(String s)
{
int begin = 0;
int end = s.indexOf("</tr><tr", begin) + 5;
ArrayList<String> array = new ArrayList<String>();
while (end != 4)
{
array.add(s.substring(begin, end));
begin = end;
end = s.indexOf("</tr><tr", begin) + 5;
}
array.add(s.substring(begin));
return array;
}
static void getProblemInfoAndSave(ArrayList<String> array)
{
Pattern p = Pattern.compile("(\\>)([^\\<\\)\\/]+)(\\<)");
PreparedStatement pstms = null;
try
{
pstms = conn
.prepareStatement("insert into zoj_problem_info(id,title,ratio,accepted,alls)values(?,?,?,?,?)");
int count = 0;
for (String s : array)
{
Matcher matcher = p.matcher(s);
String temp = null;
int index = 0;
while (matcher.find())
{
temp = matcher.group(2);
if (temp.indexOf("%") != -1)
temp = temp.substring(0, temp.indexOf("%"));
pstms.setString(++index, temp);
}
pstms.addBatch();
count++;
if (count % 500 == 0)
pstms.executeBatch();
}
pstms.executeBatch();
System.out.println("Save success!");
}
catch (SQLException e)
{
e.printStackTrace();
}
finally
{
DBUtils.closeStatement(pstms);
}
}
}