1.jar包: /org/jsoup/jsoup/1.8.2/jsoup-1.8.2.jar 2.java爬虫教程: http://blog.youkuaiyun.com/column/details/jsoup.html
3.数据库表结构: CREATE TABLE `bank_list` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(50) DEFAULT '' COMMENT '银行的中文名字', `ename` varchar(20) DEFAULT '' COMMENT '英文名字', `icon` varchar(1024) DEFAULT '' COMMENT '图标地址', `status` tinyint(1) DEFAULT '1' COMMENT '状态 1正常(默认) 0删除', `cts` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '创建时间', `uts` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间', PRIMARY KEY (`id`), UNIQUE KEY `idx_name_ename_status` (`name`,`ename`,`status`), KEY `idx_cts` (`cts`) ) ENGINE=InnoDB AUTO_INCREMENT=165 DEFAULT CHARSET=utf8 COMMENT='银行列表'
4.java代码: import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Main { public static void main(String[] args) throws Exception{ //1.获取阿里银行合作伙伴页面 Document doc = Jsoup.connect("http://ab.alipay.com/i/yinhang.htm").get(); Element body = doc.body(); //2.获取对应数据的标签 Elements es = body.select("li#ap-a-cnt-bank>ul>li>a>span"); //3.遍历标签中的数据 for (Element e:es){ String name = e.attr("title"); String eName = e.attr("class"); //4.获取银行icon地址 String url = "https://apimg.alipay.com/combo.png?d=cashier&t="; if (eName!=null && eName!="") eName = eName.substring(eName.indexOf(" ")+1); url = url+eName; //5.打印sql System.out.println("insert into bank_list(name,ename,icon,cts,uts) values('"+name+"','"+eName+"','"+url+"',now(),now()) ON DUPLICATE KEY UPDATE uts=now();"); } } }
5.结果: insert into bank_list(name,ename,icon,cts,uts) values('国家开发银行','CDB','https://apimg.alipay.com/combo.png?d=cashier&t=CDB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国工商银行','ICBC','https://apimg.alipay.com/combo.png?d=cashier&t=ICBC',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国农业银行','ABC','https://apimg.alipay.com/combo.png?d=cashier&t=ABC',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国银行','BOC','https://apimg.alipay.com/combo.png?d=cashier&t=BOC',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国建设银行','CCB','https://apimg.alipay.com/combo.png?d=cashier&t=CCB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国邮政储蓄银行','PSBC','https://apimg.alipay.com/combo.png?d=cashier&t=PSBC',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('交通银行','COMM','https://apimg.alipay.com/combo.png?d=cashier&t=COMM',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('招商银行','CMB','https://apimg.alipay.com/combo.png?d=cashier&t=CMB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('上海浦东发展银行','SPDB','https://apimg.alipay.com/combo.png?d=cashier&t=SPDB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('兴业银行','CIB','https://apimg.alipay.com/combo.png?d=cashier&t=CIB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('华夏银行','HXBANK','https://apimg.alipay.com/combo.png?d=cashier&t=HXBANK',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('广东发展银行','GDB','https://apimg.alipay.com/combo.png?d=cashier&t=GDB',now(),now()) ON DUPLICATE KEY UPDATE uts=now(); insert into bank_list(name,ename,icon,cts,uts) values('中国民生银行','CMBC','https://apimg.alipay.com/combo.png?d=cashier&t=CMBC',now(),now()) ON DUPLICATE KEY UPDATE uts=now();