package fund.code;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
/**
* @class SnatchDataTool
* 页面数据抓取公共类
* @author Duan gaohui
* @time 2008-2-20
*/
public class SnatchDataTool {
//使用配置文件来配置Log4j
static File file = new File("");
static String path = file.getAbsolutePath();
static String filePath = path+"//bin//fund//common//log4j.properties";
static Logger logger=Logger.getLogger(SnatchDataTool.class.getClass());
/**
* @Method:ReadWeb(String url)
* 读取指定url的网页内容
* @param String url
* @return String 源文件代码
*/
public static String ReadWeb(String urlStr,String startStr,String endStr,int b) {
PropertyConfigurator.configure (filePath);
StringBuffer buf = new StringBuffer();
try {
URL url = new URL(urlStr);
HttpURLConnection connection=(HttpURLConnection)url.openConnection();
connection.setConnectTimeout(30000);
connection.setReadTimeout(30000);
connection.connect();
//System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
//System.setProperty("sun.net.client.defaultReadTimeout", "30000");
InputStream in = connection.getInputStream();
BufferedReader read = new BufferedReader(new InputStreamReader(in));
String line = null;
int a = 0;
while ((line = read.readLine()) != null) {
String txt = line.replaceAll("//s*","");
if(txt.equals(startStr)){a++;}
if(txt.equals(endStr))break;
if(a>b){
buf.append(line);
}
}
in.close();
read.close();
connection.disconnect();
} catch (MalformedURLException e) {
logger.debug("Error:"+e.getMessage());
} catch (IOException e) {
logger.info("URL="+e.getMessage()+"网络地址打开时连接超时.");
}
return buf.toString();
}
/**
* Method:Find(String str,String beginstr,String endstr)
* 读取指定代码区间的代码
* @param String 源文件 String 开始标记 String 结束标记
* @return String 截取的代码
*/
public static String Find(String str,String beginStr, String endStr) {
int index = str.indexOf(beginStr);
String result = null;
if (index != -1) {
int index2 = str.indexOf(endStr);
if (index2 != -1) {
result = str.substring(index + beginStr.length(), index2);
}
}
return result;
}
/**
* Method:Find(String str,String beginstr)
* 读取指定代码区间的代码
* @param String 源文件 String 开始标记 String 结束标记
* @return String 截取的代码
*/
public static String Finds(String str,String beginStr) {
int index = str.indexOf(beginStr);
String result = null;
if (index != -1) {
int index2 = str.length();
if (index2 != -1) {
result = str.substring(index, index2);
}
}
return result;
}
/**
* Method:InterceptStr(String str,String beginstr,int end)
* 读取指定代码区间的代码
* @param String 源文件 String 开始标记 String 结束标记 end
* @return String 截取的代码
*/
public static String InterceptStr(String str,String beginStr,int end) {
int index = str.indexOf(beginStr)+beginStr.length();
String result = null;
if (index != -1) {
int index2 = index+end;
if (index2 != -1) {
result = str.substring(index, index2);
}
}
return result;
}
/**
*@Method:removeHtml
*@param String htmstr
*过滤掉html代码
*@return String
*/
public static String removeHtml(String htmlStr){
String result = "";
boolean flag = true;
int m = 0;
if(htmlStr==null){
return null;
}
char[] a = htmlStr.toCharArray();
int length=a.length;
for(int ii=0;ii<length;ii++){
if(a[ii]=='<'){
flag=false;
continue;
}
if(a[ii]=='>'){
flag=true;
continue;
}
if(flag==true){
m++;
result+=a[ii];
}
}
String content = result.toString().replaceAll(" ","");
return content.replaceAll("//s*","");
}
}