public class TestParse {
private static result r;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
public void parseRss(String rss, String keyword) {
try {
URL url = new URL(rss);
// 数据库表
String table = "baidunews.rss";
// 读取Rss源
XmlReader reader = new XmlReader(url);
System.out.println("Rss源的编码格式为:" + reader.getEncoding());
SyndFeedInput input = new SyndFeedInput();
// 得到SyndFeed对象,即得到Rss源里的所有信息
SyndFeed feed = input.build(reader);
// 得到Rss新闻中子项列表
List entries = feed.getEntries();
// 循环得到每个子项信息
r = new result();
for (int i = 0; i < entries.size(); i++) {
SyndEntry entry = (SyndEntry) entries.get(i);
// 标题、连接地址、标题简介、时间是一个Rss源项最基本的组成部分
System.out.println("标题:" + entry.getTitle());
System.out.println("连接地址:" + entry.getLink());
SyndContent description = entry.getDescription();
System.out.println("标题简介:" + description.getValue());
Date date = entry.getPublishedDate();
System.out.println("发布时间:" + date);
// 以下是Rss源可先的几个部分
System.out.println("标题的作者:" + entry.getAuthor());
String format_date = sdf.format(date);
r.setTime(format_date);
r.setAuthor(entry.getAuthor());
r.setDescription(description.getValue());
r.setTitle(entry.getTitle());
r.setUrl(entry.getLink());
r.setKeyword(keyword);
// 插入数据库
DBOPerate.insert_rss(r, table);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[]) throws IOException,
InterruptedException {
//选择你要的关键词
String keyword = "****";
TestParse tp = new TestParse();
String s1 = "http://news.baidu.com/ns?word=";
String s2 = "&tn=newsrss&sr=0&cl=2&rn=100&ct=0";
//因为关键词对应的rss_url是有固定格式的,所以是可以拼的,免去了模拟浏览器点击然后获取url的操作
String feed = s1 + keyword + s2;
System.out.println(feed);
tp.parseRss(feed,keyword);
}
}