爬取二手房信息

需求爬取二手房信息 房天下,58

开源到gitHub了 项目地址 基于springBoot,idea

导入依赖

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.10.2</version>
</dependency>

数据放入redis中,引人redis

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>

开启定时任务

@SpringBootApplication
@EnableScheduling //开启定时任务
public class MqApplication {
    public static void main(String[] args) {
        SpringApplication.run(MqApplication.class, args);
    }
}

实体类

public class RoomInfo58 {
    private String id;
    private String title;
    private String style;
    private String position;
    private String name;
    private String price;
    private String area;
    private String phone;
    private String comefrom;
    private String createtime;
    private String url;
    private String sendtime;

    //省略get set方法

工作类
1. 58

/**
 * Created by daitian on 2017/5/31.
 */
@Component
public class TaskTest {
    @Autowired
    JedisCluster jedisCluster;
    String one = "http://ty.58.com/ershoufang/0/";
    String ones = "http://ty.58.com/ershoufang/11111x.shtml";
//        @Scheduled(fixedRate = 1000)
//    public void reportCurrentTimes() {
//        jedisCluster.del("roominfo");
//        jedisCluster.del("ids");
//        System.out.println("操作成功!");
// }
    @Scheduled(fixedRate = 10000)
    public void tongcheng() {
        try {
            //获取最新消息
            Document document = Jsoup.connect(one).get();
            Elements element = document.getElementsByClass("house-list-wrap");
            Elements li = element.select("li");//
            for (Iterator<Element> iterator = li.iterator(); iterator.hasNext(); ) {
                Element next = (Element) iterator.next();
                RoomInfo58 roomInfo58 = new RoomInfo58();
                String id = next.attr("logr").substring(19, 33);
                Long number = jedisCluster.sadd("ids", id);
                if (number == 0) {
                    continue;
                }
                //如果id存在 continue
                roomInfo58.setId(id);
                roomInfo58.setComefrom("58同城");
                roomInfo58.setCreatetime(new Date());
                roomInfo58.setTitle("" + next.select("h2").text());
                roomInfo58.setStyle("" + next.select("p[class=baseinfo]").get(0).text());
                roomInfo58.setPosition("" + next.select("p[class=baseinfo]").get(1).text());
                roomInfo58.setName("" + next.select("span[class=jjrname-outer]").text());
                roomInfo58.setPrice(next.select("p[class=sum]").text());
                roomInfo58.setArea(next.select("p[class=unit]").text());
                String url = ones.replace("11111", id);
                roomInfo58.setUrl(url);
                //获取手机号
                Document doc = Jsoup.connect(url).get();
                roomInfo58.setPhone(doc.select("p[class=phone-num]").text());
                //TODO 如果手机号是null 放地址
                jedisCluster.lpush("roominfo", roomInfo58.toString());
            }
//            jedisCluster.ltrim("roominfo",0,10000);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
  1. 房天下
/**
 * Created by daitian on 2017/6/1.
 */
@Component
public class TaskFangTest {
    @Autowired
    JedisCluster jedisCluster;
    String fang = "http://esf.taiyuan.fang.com/house/a211-h316/";
    String fangs = "http://esf.taiyuan.fang.com/";
    @Scheduled(fixedRate = 10000)
    public void fang() {
        try {
            //获取最新消息
            Document document = Jsoup.connect(fang).get();
            Elements element = document.getElementsByClass("houseList").select("dl");
            for (Iterator<Element> iterator = element.iterator(); iterator.hasNext(); ) {
                Element next = (Element) iterator.next();
                String id = next.select("dt[class=img rel floatl]").select("a").attr("href");
                RoomInfo58 roomInfo58 = new RoomInfo58();
                Long number = jedisCluster.sadd("ids", id);
                if (number == 0) {
                    continue;
                }
                //如果id存在 continue
                roomInfo58.setId(id);
                roomInfo58.setComefrom("房天下");
                roomInfo58.setCreatetime(new Date());
                roomInfo58.setTitle("" + next.select("p[class=title]").text());
                roomInfo58.setStyle("" + next.select("p[class=mt12]").text() + next.select("div[class=area alignR]").select("p").first().text().replaceAll("�O", "m2"));
                roomInfo58.setPosition("" + next.select("p[class=mt10]").text());
                roomInfo58.setPrice(next.select("p[class=mt5 alignR]").text());
                roomInfo58.setArea(next.select("p[class=danjia alignR mt5]").text().replaceAll("�O", "m2"));
                String url = fangs + id;
                roomInfo58.setUrl(url);
                Document doc = Jsoup.connect(url).get();
                Elements nexts = doc.getElementsByClass("bookTel");
                roomInfo58.setPhone(nexts.select("strong").text());
                roomInfo58.setName("" + nexts.select("a").text().replaceAll("业主", ""));
                jedisCluster.lpush("roominfo", roomInfo58.toString());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

获取数据 restController

@GetMapping()
    public String taskTest(){
        List<String> roominfo = jedisCluster.lrange("roominfo", 0, -1);
        return roominfo.toString();
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值