程序一:
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class crawltest1 {
private crawldata op;
public crawltest1(){
this.op = new crawldata();
}
public static void main(String[] args) {
crawltest1 t = new crawltest1();
t.parseUrl();
}
public void parseString() {
String url = "http://……";//文章来源网站名称。
Document doc = Jsoup.parse(url);
System.out.println(doc);
// Elements es = doc.body().getAllElements();
// System.out.println(es.attr("onload"));
// System.out.println(es.select("p"));
}
public void parseUrl() {
try {
// Document doc = Jsoup.connect("http://……/").get();
String doc_web="http://www.…….cn";
Document doc = Jsoup.connect("http://www.…….cn/download/do_list.jsp?TYPE=1").get();
// String title = doc.title();
Elements hrefs = doc.select("a[href~=www]");
// for(Element href : hrefs){
// op.saveUrl(href.toString());
// }
// String doc_link = hrefs.attr("href");
// String doc_name = hrefs.text();
for (Element href : hrefs){
String doc_link = doc_web+href.attr("href");
String doc_name = href.text();
System.out.println(" address="+doc_link+" "+" name="+doc_name);
op.saveUrl(doc_name,doc_link,doc_web);
}
// System.out.println(doc_name);
// System.out.println("------------------");
// System.out.println(hrefs.select("[href^=http]"));
// System.out.println(title);
} catch (IOException e) {
e.printStackTrace();
}
}
public void parseFile() {
try {
File input = new File("d:\\abc\\input.html");
Document doc = Jsoup.parse(input, "UTF-8");
// 提取出所有的编号
Elements codes = doc.body().select("td[title^=IA] > a[href^=javascript:view]");
System.out.println(codes);
System.out.println("------------------");
System.out.println(codes.html());
} catch (IOException e) {
e.printStackTrace();
}
}
}
程序二
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class testpage {
private crawldata op;
public testpage(){
this.op = new crawldata();
}
public static void main(String[] args) {
crawltest1 t = new crawltest1();
t.parseUrl();
}
public void parseUrl() {
try {
// Document doc = Jsoup.connect("http://www…….cn/fagui/").get();
// String doc_web="http://www.…….cn";
Document doc = Jsoup.connect("http://www.……cn/download/do_list.jsp?TYPE=1").get();
// String title = doc.title();
// Elements hrefs = doc.select("a[href~=www]");
Elements href2 = doc.select(":containsOwn(尾页)");
// for(Element href : hrefs){
// op.saveUrl(href.toString());
// }
// String doc_link = hrefs.attr("href");
// String doc_name = hrefs.text();
// for (Element href : hrefs){
// String doc_link = doc_web+href.attr("href");
// String doc_name = href.text();
// System.out.println(" address="+doc_link+" "+" name="+doc_name);
// op.saveUrl(doc_name,doc_link,doc_web);
// }
System.out.println(href2.toString());
// System.out.println(doc_name);
// System.out.println("------------------");
// System.out.println(hrefs.select("[href^=http]"));
// System.out.println(title);
} catch (IOException e) {
e.printStackTrace();
}
}
}
程序三:
import java.sql.DriverManager;
import java.sql.Connection;
import java.sql.Statement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class crawldata {
public static Connection getConnection() throws Exception {
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://localhost/test1";
String username = "root";
String password = "********";
Class.forName(driver);
Connection conn = DriverManager.getConnection(url, username, password);
return conn;
}
public void saveUrl(String name,String url,String web) {
Connection conn = null;
Statement stmt = null;
ResultSet rs = null;
try {
conn = getConnection();
String sql = "INSERT INTO docfirst(docname,docaddress,docweb) VALUES('"+name+"','"+url+"','"+web+"')";
stmt = conn.createStatement();
stmt.execute(sql);
// rs = stmt.executeQuery("SELECT * FROM docfirst");
// while (rs.next()) {
// Integer id = rs.getInt("id");
// String address = rs.getString("docaddress");
// System.out.println("id=" + id + " address=" + address);
// }
} catch (SQLException ex) {
System.err.println("SQLException: " + ex.getMessage());
System.err.println("SQLState: " + ex.getSQLState());
System.err.println("Message: " + ex.getMessage());
System.err.println("Vendor error code: " + ex.getErrorCode());
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
} finally {
try {
rs.close();
stmt.close();
conn.close();
} catch (Exception ignore) {
}
}
}
}