需求爬取二手房信息 房天下,58
开源到gitHub了 项目地址 基于springBoot,idea
导入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
数据放入redis中,引人redis
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
开启定时任务
@SpringBootApplication
@EnableScheduling //开启定时任务
public class MqApplication {
public static void main(String[] args) {
SpringApplication.run(MqApplication.class, args);
}
}
实体类
public class RoomInfo58 {
private String id;
private String title;
private String style;
private String position;
private String name;
private String price;
private String area;
private String phone;
private String comefrom;
private String createtime;
private String url;
private String sendtime;
//省略get set方法
工作类
1. 58
/**
* Created by daitian on 2017/5/31.
*/
@Component
public class TaskTest {
@Autowired
JedisCluster jedisCluster;
String one = "http://ty.58.com/ershoufang/0/";
String ones = "http://ty.58.com/ershoufang/11111x.shtml";
// @Scheduled(fixedRate = 1000)
// public void reportCurrentTimes() {
// jedisCluster.del("roominfo");
// jedisCluster.del("ids");
// System.out.println("操作成功!");
// }
@Scheduled(fixedRate = 10000)
public void tongcheng() {
try {
//获取最新消息
Document document = Jsoup.connect(one).get();
Elements element = document.getElementsByClass("house-list-wrap");
Elements li = element.select("li");//
for (Iterator<Element> iterator = li.iterator(); iterator.hasNext(); ) {
Element next = (Element) iterator.next();
RoomInfo58 roomInfo58 = new RoomInfo58();
String id = next.attr("logr").substring(19, 33);
Long number = jedisCluster.sadd("ids", id);
if (number == 0) {
continue;
}
//如果id存在 continue
roomInfo58.setId(id);
roomInfo58.setComefrom("58同城");
roomInfo58.setCreatetime(new Date());
roomInfo58.setTitle("" + next.select("h2").text());
roomInfo58.setStyle("" + next.select("p[class=baseinfo]").get(0).text());
roomInfo58.setPosition("" + next.select("p[class=baseinfo]").get(1).text());
roomInfo58.setName("" + next.select("span[class=jjrname-outer]").text());
roomInfo58.setPrice(next.select("p[class=sum]").text());
roomInfo58.setArea(next.select("p[class=unit]").text());
String url = ones.replace("11111", id);
roomInfo58.setUrl(url);
//获取手机号
Document doc = Jsoup.connect(url).get();
roomInfo58.setPhone(doc.select("p[class=phone-num]").text());
//TODO 如果手机号是null 放地址
jedisCluster.lpush("roominfo", roomInfo58.toString());
}
// jedisCluster.ltrim("roominfo",0,10000);
} catch (Exception e) {
e.printStackTrace();
}
}
}
- 房天下
/**
* Created by daitian on 2017/6/1.
*/
@Component
public class TaskFangTest {
@Autowired
JedisCluster jedisCluster;
String fang = "http://esf.taiyuan.fang.com/house/a211-h316/";
String fangs = "http://esf.taiyuan.fang.com/";
@Scheduled(fixedRate = 10000)
public void fang() {
try {
//获取最新消息
Document document = Jsoup.connect(fang).get();
Elements element = document.getElementsByClass("houseList").select("dl");
for (Iterator<Element> iterator = element.iterator(); iterator.hasNext(); ) {
Element next = (Element) iterator.next();
String id = next.select("dt[class=img rel floatl]").select("a").attr("href");
RoomInfo58 roomInfo58 = new RoomInfo58();
Long number = jedisCluster.sadd("ids", id);
if (number == 0) {
continue;
}
//如果id存在 continue
roomInfo58.setId(id);
roomInfo58.setComefrom("房天下");
roomInfo58.setCreatetime(new Date());
roomInfo58.setTitle("" + next.select("p[class=title]").text());
roomInfo58.setStyle("" + next.select("p[class=mt12]").text() + next.select("div[class=area alignR]").select("p").first().text().replaceAll("�O", "m2"));
roomInfo58.setPosition("" + next.select("p[class=mt10]").text());
roomInfo58.setPrice(next.select("p[class=mt5 alignR]").text());
roomInfo58.setArea(next.select("p[class=danjia alignR mt5]").text().replaceAll("�O", "m2"));
String url = fangs + id;
roomInfo58.setUrl(url);
Document doc = Jsoup.connect(url).get();
Elements nexts = doc.getElementsByClass("bookTel");
roomInfo58.setPhone(nexts.select("strong").text());
roomInfo58.setName("" + nexts.select("a").text().replaceAll("业主", ""));
jedisCluster.lpush("roominfo", roomInfo58.toString());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
获取数据 restController
@GetMapping()
public String taskTest(){
List<String> roominfo = jedisCluster.lrange("roominfo", 0, -1);
return roominfo.toString();
}