利用HttpURLConnection抓取网页取名

本文介绍了一个用Java编写的多线程程序,该程序利用HttpURLConnection从特定网站抓取页面,并从中解析出姓名及其对应的评分。程序通过遍历一系列姓名组合,筛选出评分较高的名字。

闲来无事,利用Java 的HttpURLConnection,使用多线程来抓取网页,计算名字的分数。

仅供娱乐。

程序如下
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
public class XingMing {
static final Logger log = Logger.getLogger(XingMing. class );
public static String read(String urlStr) {
try {
URL url = new URL(urlStr);
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
connection.connect();
InputStream in = connection.getInputStream();
BufferedReader read = new BufferedReader( new InputStreamReader(in));
StringBuffer buf = new StringBuffer();
String line = null ;
while ((line = read.readLine()) != null ) {
buf.append(line);
}
return buf.toString();
} catch (MalformedURLException e) {
return null ;
} catch (IOException e) {
return null ;
}
}
public static String find(String str, String beginStr, String endStr) {
final int length = beginStr.length();
int index = str.indexOf(beginStr);
String result = null ;
if (index != -1) {
int index2 = str.indexOf(endStr, index + length);
if (index2 != -1) {
result = str.substring(index + beginStr.length(), index2);
}
}
return result;
}
public static String findName(String source, String name) {
// value= 我的姓名『 XX 』的分析:
return find(source, "value= 我的姓名『 " , " 』的分析 " );
}
public static String findScore(String source, String name) {
// <font size=3> 姓名评分: </font><font color=0000ff size=5FONT-SIZE: 10pt;">
// BT, 楷体 ">99.5</font>
return find(
source,
"<font size=3> 姓名评分: </font><font color=0000ff size=5 BT, 楷体 \">" ,
"</font>" );
}
public static void main(String[] args) throws IOException {
final char firstChar = ' ' ;
final char lastChar = ' ' ;
// 最大开启 100 个线程,可以加快查询速度 .
int maxThread = 100;
int step = (lastChar - firstChar) / maxThread;
for ( int i = 0; i < maxThread; i++) {
char start = ( char ) (firstChar + i * step);
char end = ( char ) (firstChar + i * step + step - 1);
System. out .println( " 开启 " + (i + 1) + " 处理 :" + start + "-" + end
+ ( char ) (end + 1));
new CallThread(start, end).start();
}
}
static class CallThread extends Thread {
private char start ;
private char end ;
private String info ;
CallThread( char start, char end) {
this . start = start;
this . end = end;
this . info = this . start + "-" + this . end ;
}
public void run() {
//
final char youname1 = ' ' ;
final String url = "http://www.xingming.net/cmjg-mz.asp?sex= &youname1="
+ youname1 + "&youname2=" ;
String youname2;
String webinfo = null ;
for ( char i = start ; i <= end ; i++, webinfo = null ) {
// 名字规则自己取吧 .
// youname2 = " " + i;
// youname2 = "" + i + i;
youname2 = i + " " ;
for ( int j = 0; j < 5 && webinfo == null ; j++) {
webinfo = XingMing.read(url + youname2);
}
if (webinfo == null ) {
log .warn( " 获取名字 [" + youname1 + youname2 + "] 失败 " );
continue ;
}
String webName = XingMing.findName(webinfo, "[" + youname2
+ "]" );
String webScore = XingMing.findScore(webinfo, "[" + youname2
+ "]" );
try {
if (Float.parseFloat(webScore) >= 90) {
System. out .println(youname2 + ":" + webName + ":"
+ webScore);
}
} catch (Exception e) {
}
log .info( this . info + ":" + webName + ":" + webScore);
if ((i - start ) % 100 == 0) {
System. out .println( this . info + " 处理了 " + (i - start ) + " " );
}
}
System. out .println( this . info + " 结束了 ....." );
}
}
}
最新程序:
最终版宝宝取名程序,java版,我宝名字已经确定。

http://blog.youkuaiyun.com/z3h/archive/2008/01/16/2047420.aspx

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值