对txt操作核心代码
package util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class StoreTxt {
public static void operateTxt(String url) throws IOException {
String all="";
String content = HttpClientUtil.testHttpClient(url);
Document doc = Jsoup.parse(content); // 解析网页 得到文档对象
File file = new File("D:\\纣临.txt");
Elements h1Elements = doc.getElementsByTag("h1"); // 根据tag名称来查询DOM
Element h1Element = h1Elements.get(0);
String h1 = h1Element.text();
System.out.println("题目:" + h1);
all=all+"书名:"+h1+"\r\n";
Elements authorElements = doc.select("#info p");//作者
Element authorElement = authorElements.get(0);
String author = authorElement.text();
all=all+author+"\r\n";
System.out.println(author);
Element introElement = doc.getElementById("intro");// 简介
String intro = introElement.text();
System.out.println("简介" + intro);
all=all+"简介" + intro+"\r\n";
// addH1AuthorIntro(fileWriter, h1, author, intro);
Elements hrefElements = doc.select("#list dl dd a");
for (Element e : hrefElements) {
String urlIndex = "http://www.biquge.com.tw" + e.attr("href");
String contentIndex = HttpClientUtil.testHttpClient(urlIndex);
Document docIndex = Jsoup.parse(contentIndex); // 解析网页 得到文档对象
// 获取章节名
Elements chapterElements = docIndex.getElementsByTag("h1"); // 根据tag名称来查询DOM
Element chapterElement = chapterElements.get(0);
String chapter = chapterElement.text();
String textsIndex = docIndex.select("#content").text().replaceAll("\\s+","\r\n");
// int index = textsIndex.indexOf(" ");
// while(index>-1){
// String line = textsIndex.substring(0,index);
// textsIndex = textsIndex.substring(index+1);
// System.out.println(line);
// }
all=all+chapter+"\r\n"+textsIndex+"\r\n";
}
FileWriter fileWriter = new FileWriter(file, true);
fileWriter.write(all);
fileWriter.flush();
fileWriter.close();
System.out.println(all);
}
}
httpclient工具类
package util;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
class HttpClientUtil {
private static CloseableHttpClient httpClient;
public static String testHttpClient(String url) {
String content= "";
CloseableHttpResponse response=null;
try {
//即将访问的url
// String url = "http://www.biquge.com.tw/17_17380/";
//使用默认配置的httpclient
httpClient = HttpClients.createDefault();
//执行请求
response = getResponse(url);
//打印请求的实体内容 返回json格式
HttpEntity entity = response.getEntity();
content=EntityUtils.toString(entity, "GBK");
response.close();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
public static CloseableHttpResponse getResponse(String url) throws IOException {
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response = httpClient.execute(httpGet);
return response;
}
}
main方法入口
import util.StoreTxt;
import java.io.IOException;
public class Main {
public static void main(String[] args) {
try {
StoreTxt.operateTxt("http://www.biquge.com.tw/17_17380/");
} catch (IOException e) {
e.printStackTrace();
}
}
}
出现问题及解决
对txt操作问题:
fileWriter = new FileWriter(file,true);
txt简写:
FileWriter fileWriter = new FileWriter(file, true);
fileWriter.write(all);
fileWriter.flush();
fileWriter.close();
System.out.println(all);
输入txt简写:String all="";//统一写入后再到txt
应用stringbuffer改进