参考此处相关,http://blog.youkuaiyun.com/lmj623565791/article/details/23532797
1、采用GET方法
2、了解设计模式相关
3、对主方法进行封装进行二次jar使用
package com.zhy.biz;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhy.bean.CommonException;
import com.zhy.bean.NewsItem;
import com.zhy.csdn.DataUtil;
import com.zhy.csdn.URLUtil;
/**
* 处理NewItem的业务类
* @author zhy
*
*/
public class NewsItemBiz
{
/**
* 业界、移动、云计算
*
* @param htmlStr
* @return
* @throws CommonException
*/
public List<NewsItem> getNewsItems( int newsType , int currentPage) throws CommonException
{
String urlStr = URLUtil.generateUrl(newsType, currentPage);
String htmlStr = DataUtil.doGet(urlStr);
List<NewsItem> newsItems = new ArrayList<NewsItem>();
NewsItem newsItem = null;
Document doc = Jsoup.parse(htmlStr);
Elements units = doc.getElementsByClass("unit");
for (int i = 0; i < units.size(); i++)
{
newsItem = new NewsItem();
newsItem.setNewsType(newsType);
Element unit_ele = units.get(i);
Element h1_ele = unit_ele.getElementsByTag("h1").get(0);
Element h1_a_ele = h1_ele.child(0);
String title = h1_a_ele.text();
String href = h1_a_ele.attr("href");
newsItem.setLink(href);
newsItem.setTitle(title);
Element h4_ele = unit_ele.getElementsByTag("h4").get(0);
Element ago_ele = h4_ele.getElementsByClass("ago").get(0);
String date = ago_ele.text();
newsItem.setDate(date);
Element dl_ele = unit_ele.getElementsByTag("dl").get(0);// dl
Element dt_ele = dl_ele.child(0);// dt
try
{// 可能没有图片
Element img_ele = dt_ele.child(0);
String imgLink = img_ele.child(0).attr("src");
newsItem.setImgLink(imgLink);
} catch (IndexOutOfBoundsException e)
{
}
Element content_ele = dl_ele.child(1);// dd
String content = content_ele.text();
newsItem.setContent(content);
newsItems.add(newsItem);
}
return newsItems;
}
}