- package ivyy.taobao.com.domain.xml;
- import ivyy.taobao.com.utils.GlobalConstants;
- import java.net.URL;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- /**
- *@Author:liangjilong
- *@Date:2015-1-4
- *@Email:jilongliang@sina.com
- *@Version:1.0
- *@Description这个是通过jsoup处理的
- */
- public class SinaNew {
- public static void main(String[] args)throws Exception {
- String requestURL = GlobalConstants.getUrl(2, "xml");
- org.jsoup.nodes.Document doc=Jsoup.parse(new URL(requestURL), 3000);
- // String html=doc.html();
- Elements items=doc.select("item");//获取item(item具有多个节点)
- String title = "", url = "", keywords = "", img = "", media_name = "";
- int i=1;
- for (Element its : items) {
- title=its.select("title").html();
- url=its.select("url").html();
- keywords=its.select("keywords").html();
- img=its.select("img").html();
- media_name=its.select("media_name").html();
- String newsText=GlobalConstants.getNewsContent(url);//处理新闻内容
- //System.out.println(title + "\n" + url + "\n" + keywords + "\n"+ url + "\n" + media_name);
- System.out.println("==================第"+i+"篇=================="+newsText);
- i++;
- }
- }
- }
源代码:http://download.youkuaiyun.com/detail/jilongliang/8324543
- package ivyy.taobao.com.utils;
- import java.net.URL;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- /**
- *@Author:liangjilong
- *@Date:2015-1-4
- *@Email:jilongliang@sina.com
- *@Version:1.0
- *@Description
- */
- public class GlobalConstants {
- /***
- * 获取url连接
- * @param page第几页
- * @param format格式(XML、JSON)
- * @return
- */
- public static String getUrl(Integer page,String format){
- StringBuffer buffer=new StringBuffer("http://api.roll.news.sina.com.cn/zt_list?channel=news");
- String url="";
- buffer.append("&cat_1=shxw");//显示新闻
- buffer.append("&cat_2==zqsk||=qwys||=shwx||=fz-shyf");
- buffer.append("&level==1||=2");//级别
- buffer.append("&show_ext=1");
- buffer.append("&show_all=1");//显示所有
- buffer.append("&show_num=22");//显示多少条
- buffer.append("&tag=1");
- buffer.append("&format="+format);
- buffer.append("&page="+page);
- buffer.append("&callback=newsloader");
- url=buffer.toString();
- return url;
- }
- /***
- * 获取文章的内容
- * 从新浪的网页分析,通过文章body的id就可以拿到相应的文章内容..
- * @param url
- * @return
- */
- public static String getNewsContent(String url) throws Exception{
- Document doc=Jsoup.parse(new URL(url), 3000);
- if(doc!=null){
- String artibody=doc.getElementById("artibody").html();//通过网页的html的id去拿到新闻内容artibody
- return artibody;
- }else{
- return "网络异常";
- }
- }
- }
- package ivyy.taobao.com.utils;
- import java.io.BufferedReader;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.net.HttpURLConnection;
- import java.net.URL;
- /**
- *@Author:liangjilong
- *@Date:2015-1-4
- *@Email:jilongliang@sina.com
- *@Version:1.0
- *@Description
- */
- public class HttpRequestUtils {
- /**
- * 发送http请求
- * POST和GET请求都可以
- * @param requestUrl 请求地址
- * @param method传入的执行的方式 是GET还是POST方式
- * @return String
- */
- public static String HttpURLConnRequest(String requestUrl,String method) {
- StringBuffer buffer = new StringBuffer();
- try {
- URL url = new URL(requestUrl);
- HttpURLConnection httpUrlConn = (HttpURLConnection) url.openConnection();
- httpUrlConn.setDoInput(true);
- httpUrlConn.setRequestMethod(method);
- httpUrlConn.setUseCaches(false);
- httpUrlConn.setInstanceFollowRedirects(true); //重定向
- httpUrlConn.connect();
- // 将返回的输入流转换成字符串
- InputStream inputStream = httpUrlConn.getInputStream();
- InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "utf-8");
- BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
- String str = null;
- while ((str = bufferedReader.readLine()) != null) {
- buffer.append(str);
- }
- bufferedReader.close();
- inputStreamReader.close();
- // 释放资源
- inputStream.close();
- inputStream = null;
- httpUrlConn.disconnect();
- } catch (Exception e) {
- e.printStackTrace();
- }
- return buffer.toString();
- }
- }