Document类
一、类结构
org.jsoup.nodes
Class Document
org.jsoup.nodes.Document
All Implemented Interfaces:
public class Document
extends
一个HTML文档
二、方法
Constructor Summary | ||
| ||
Method Summary | ||
body() | ||
clone() | ||
createElement(String tagName) | ||
static Document | createShell(String baseUri) | |
head() | ||
nodeName() | ||
normalise() | ||
outerHtml() | ||
outputSettings() | ||
outputSettings(Document.OutputSettings outputSettings) | ||
quirksMode(Document.QuirksMode quirksMode) | ||
title() 获取Document的title值 | ||
void |
三、实例
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- public class JsoupDocument {
- private static Document doc;
- private static Document shell;
- public static void main(String[] args) {
- try {
- doc = Jsoup.connect("http://www.baidu.com").get();
- //System.out.println(doc);
- //Body();
- //Clone();
- //CreateElement();
- //CreateShell();
- //Head();
- //NodeName();
- //OuterHtml();
- //Text("你好");
- //Text();
- //Title();
- Title("伊诺克Eliot");
- } catch (Exception e) {
- // TODO Auto-generatedcatch block
- e.printStackTrace();
- }
- }
- //获取body
- private static void Body(){
- System.out.println(doc.body());
- }
- //克隆Document
- private static void Clone(){
- Document clone = doc.clone();
- System.out.println(clone);
- }
- //创建一个节点,例如:<div></div>
- private static void CreateElement(){
- Element e = doc.createElement("div");
- System.out.println(e);
- }
- //创建一个整体框架
- /*
- 输出:
- <html>
- <head></head>
- <body></body>
- </html>
- */
- private static void CreateShell(){
- shell = Document.createShell("http://www.baidu.com");
- System.out.println(shell);
- }
- //获取Head
- /*
- * 输出:
- * <head>
- <meta http-equiv="content-type"content="text/html;charset=utf-8" />
- <title>百度一下,你就知道</title>
- <style>html,body{height:100%}.....省略.... </style>
- <script>functionh(obj){obj.style.behavior='url(#default#homepage)';vara = obj.setHomePage('http://www.baidu.com/');}</script>
- </head>
- * */
- private static void Head(){
- Element e = doc.head();
- System.out.println(e);
- }
- //获取节点名
- private static void NodeName(){
- String name = doc.body().nodeName();
- System.out.println(name);
- name = doc.body().getElementById("content").nodeName();
- System.out.println(name);
- name = doc.body().getElementById("content").getElementById("u").nodeName();
- System.out.println(name);
- }
- //获取节点的html
- /*
- * 输出:
- * <divid="u">
- <a href="http://www.baidu.com/gaoji/preferences.html"name="tj_setting">搜索设置</a>|
- <a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" name="tj_login" id="lb"onclick="return false;">登录</a> <a href="https://passport.baidu.com/v2/?reg®Type=1&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" target="_blank"name="tj_reg" class="reg">注册</a>
- </div>
- * */
- private static void OuterHtml(){
- String html = doc.body().getElementById("content").getElementById("u").outerHtml();
- //Element html =doc.body().getElementById("content").getElementById("u");
- System.out.println(html);
- }
- //设置节点或者Document的text值
- //设置Document的text值时,body中的节点将全部被删除
- private static void Text(String text){
- //设置节点的text
- Element e = doc.body().getElementById("content").getElementById("u").select("a").get(0).text(text);
- System.out.println(e);
- //设置Document的text值
- doc.text(text);
- System.out.println(doc);
- }
- //获取节点或者Document的text值
- private static void Text(){
- String text = doc.body().getElementById("content").getElementById("u").select("a").get(0).text();
- System.out.println(text);
- }
- //设置Document的title值
- private static void Title(String title){
- doc.title(title);
- System.out.println(doc);
- }
- //获取Document的title值
- private static void Title(){
- String text = doc.title();
- System.out.println(text);
- }
- }
四、总结
Document类主要是一些有关HTML文档的方法,包括获得HTML文档的body、头Head、整体框架Shell、标题Title、Text(其实就是body的内容)、HTML文档的拷贝,函数作用的主体大多数为Document。可以更改Title、Text以及HTML文档中某个节点的text值。
本文为Eliot原创,转载请注明出处:http://blog.youkuaiyun.com/xyw_eliot/article/details/9159857
版权声明:本文为博主原创文章,未经博主允许不得转载。