package cn.tongdun.preserver.etl;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test2 {
//请求网页,对返回的数据进行提取
//返回网页信息格式 :{"company1":"济南吉隆锻造厂","company2":"济南章丘烟台西蒙西锻造有限公司","similarity":38}
//提取内容:similarity的值,这个例子是 38
public static void main(String[] args) throws IOException {
//创建httpClient实例
CloseableHttpClient httpClient = HttpClients.createDefault();
//装结果
List<String> valueStrList = new ArrayList<String>();
//装的是文件中的每一行
List<String> fileList = readerFile();
for (int i = 0; i<fileList.size();i++){
//创建httpGet实例
HttpGet httpGet = new HttpGet(fileList.get(i));
//执行http get请求
CloseableHttpResponse response = httpClient.execute(httpGet);
//获取返回实体
HttpEntity entity = response.getEntity();
//获取网页内容
String str = EntityUtils.toString(entity, "utf-8");
System.out.println("网页内容:"+str);
//关闭流
response.close();
//正则提取返回网页的结果
String returnValue = regx("\\d+", str);
valueStrList.add(returnValue);
}
//循环输出结果
for (int j = 0 ;j<valueStrList.size();j++){
System.out.println(valueStrList.get(j));
}
}
// 将文件中读取的每行数据放入list
public static List<String> readerFile() {
List<String> fileStrList = new ArrayList<String>();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream("C:\\Users\\86184\\Desktop\\test.txt"), "gbk"));
String line = br.readLine();
while (null != line) {
fileStrList.add(line);
line = br.readLine();
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("读取的数据是:"+fileStrList.toString());
return fileStrList;
}
//正则匹配数字,并返回最后一个匹配到的数字
public static String regx(String reg ,String str){
List<String> strsList = new ArrayList<String>();
Pattern p = Pattern.compile(reg); //写匹配规则的模式
Matcher m = p.matcher(str); //字符串和规则匹配
while (m.find()) { //返回true或false,是否有匹配的子串
strsList.add(m.group()); //循环,通过group拿出匹配的子串
}
return strsList.get(strsList.size()-1);
}
}