爬取赶集网二手房信息(详细代码)

基本没有反爬,直接上代码

import requests
from lxml import etree
import re
import xlwt
import time

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'nb.ganji.com'
}


# 创建excel表格
f = xlwt.Workbook(encoding='utf_8')
sheet01 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
sheet01.write(0, 0, '名称')
sheet01.write(0, 1, '厅室')
sheet01.write(0, 2, '面积')
sheet01.write(0, 3, '朝向')
sheet01.write(0, 4, '楼层')
sheet01.write(0, 5, '小区')
sheet01.write(0, 6, '联系人')
sheet01.write(0, 7, '价格')
sheet01.write(0, 8, '单价')
num = 1

for x in range(1,71):
    url = 'http://nb.ganji.com/ershoufang/pn%d/' % x
    response = requests.get(url, headers)
    result = response.text

    html = etree.HTML(result, etree.HTMLParser())
    # 获取所有二手房的div标签   
    divs = html.xpath("//div[@class='f-list js-tips-list']/div[contains(@class,'ershoufang-list')]")
    # 遍历每个标签,拿到需要的数据
    for div in divs:
        title = div.xpath(".//dd[contains(@class,'title')]/a/@title")[0]   
        info = div.xpath(".//dd[contains(@class,'size')]//text()")
        rooms = info[1]  
        area = info[4]   
        orientation = info[7]   
        floor = info[10]  
        address = "".join(div.xpath(".//dd[contains(@class,'address')][1]//text()"))
        address = re.sub("\s", "", address)
        host = "".join(div.xpath(".//dd[contains(@class,'address')][2]//text()"))
        host = re.sub("\s", "", host)
        price = "".join(div.xpath(".//dd[contains(@class,'info')]/div[@class='price']/span/text()"))    # 
        unit_price = div.xpath(".//dd[contains(@class,'info')]/div[@class='time']/text()")[0]
        sheet01.write(num, 0, title)
        sheet01.write(num, 1, rooms)
        sheet01.write(num, 2, area)
        sheet01.write(num, 3, orientation)
        sheet01.write(num, 4, floor)
        sheet01.write(num, 5, address)
        sheet01.write(num, 6, host)
        sheet01.write(num, 7, price)
        sheet01.write(num, 8, unit_price)
        num = num + 1
    time.sleep(1)  #  设置延时


f.save("二手房"+'.xls')


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值