Java爬虫获取网页表格数据

最新推荐文章于 2025-09-06 16:45:07 发布

原创最新推荐文章于 2025-09-06 16:45:07 发布 · 5.8k 阅读

12 ·

CC 4.0 BY-SA版权

文章标签：

#Java爬虫 #Java爬虫表格 #爬虫数据保存到MySQL

本文介绍了一个使用Java编写的爬虫程序，该程序利用Jsoup库从指定网址抓取表格数据，并通过解析HTML元素定位特定信息。此外，还展示了如何配置并使用属性文件来连接MySQL数据库，实现爬取数据的持久化存储。

//Java爬虫获取网页表格数据

public class Pachong implements Runnable {

public void run() {
String Rpt_date = null;
double price = 0;
//网页地址
String url = "http://www.sse.net.cn/index/singleIndex?indexType=cbcfi";
try {
Document doc = Jsoup.connect(url).get();
//获取第一个表格
Element element = doc.select("table").first();
// System.out.println(element);
Elements els = element.select("tr");
for (Element el : els) {
Elements ele = el.select("td");
for (Element elem : ele) {
if (elem.text().toString().indexOf("本期") != -1) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Rpt_date = elem.text().toString().substring(3);
}
if (elem.text().toString().equals("秦皇岛-广州（6-7万DWT）")) {
price = Double.parseDouble(el.select("td").get(3).text().toString());
}
}
}
} catch (Exception e) {
e.printStackTrace();
}

//以下是将爬取到的数据保存到MySQL数据库
InputStream in = GetOCFIAll.class.getClassLoader().getResourceAsStream("config.property");
String dbURL = "";
String userName = "";
String userPwd = "";
Properties properties = new Properties();
try {
properties.load(in);
dbURL = (String) properties.get("dburl");
userName = (String) properties.get("dbuser");
userPwd = (String) properties.get("dbpwd");
} catch (Exception e) {
e.printStackTrace();
}

try {
Connection dbConn = DriverManager.getConnection(dbURL, userName, userPwd);
Statement statement = dbConn.createStatement();
String query = "SQL语句";
statement.addBatch(query);

statement.executeBatch();
statement.close();
dbConn.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}