/**
*
*/
package com.pan.tools;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.ResourceBundle;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
/**
* @author Javay
*
* 2012-9-7下午3:13:10
*
*/
public class MovieRssCNGenerator {
private ResourceBundle bundle = ResourceBundle.getBundle("xmlCN");
private final static int RETRY_TIME = 3;
public static String getDateTime() {
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
}
/**
* 解析获取豆瓣电影宣传海报信息
*
* @return */
public Elements getDoubanMovieSlidePic() {
int time = 0;
do{
try {
org.jsoup.nodes.Document doc = Jsoup
.connect(bundle.getString("douban")).data("query", "Java")
.userAgent("Mozilla").cookie("auth", "token")
.timeout(20000).post();
org.jsoup.nodes.Element screeningbd = doc
.select("div.screening-bd").first();
Elements slideItems = screeningbd.select("li.poster");
return slideItems;
} catch (IOException e) {
time++;
if(time < RETRY_TIME){
System.out.println("请求超时,进行第"+time+"次重连。");
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
continue;
}
}
}
}while(time < RETRY_TIME);
return null;
}
/**
* 生成XML文件
*
* @param items
*/
public void createXMLDoc(Elements items) {
Document doc = DocumentHelper.createDocument();
doc.addComment("panmay.com"+this.getDateTime());
Element root = doc.addElement("movies");
if (items != null) {
for (org.jsoup.nodes.Element item : items) {
Element movie = root.addElement("movie");
Element title = movie.addElement("title");
title.setText(item.select("img").attr("alt").trim());
Element link = movie.addElement("link");
link.setText(item.select("a").attr("href"));
String img = item.select("img").attr("data-original");
Element pic = movie.addElement("pic");
if ("".equals(img)) {
pic.setText(item.select("img").attr("src").trim());
} else {
pic.setText(img.trim());
}
}
String directory = bundle.getString("xmlPath");
String fileName = bundle.getString("fileName");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
File file = new File(directory);
if (!file.exists()) {
System.out.println("目录不存在,创建一个新的文件输出路径: " + file);
file.mkdirs();
}
try {
FileOutputStream fos = new FileOutputStream(directory
+ fileName + ".xml");
try {
XMLWriter writer = new XMLWriter(fos, format);
try {
writer.write(doc);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
System.out.println(fileName + "文件输出完毕!");
} catch (IOException e) {
e.printStackTrace();
}
}
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
} else {
System.out.println("数据读取失败!程序终止!");
}
}
/**
* @param args
*/
public static void main(String[] args) {
MovieRssCNGenerator robot = new MovieRssCNGenerator();
robot.createXMLDoc(robot.getDoubanMovieSlidePic());
}
}
转载于:https://my.oschina.net/panjavay/blog/77416