5i5j的房屋出租爬取

本文介绍了一个使用Python进行网页爬取的实际案例,通过requests和lxml.etree模块从5i5j网站抓取北京回龙观地区的租房信息,并将抓取到的数据(包括地址、房间描述及价格)存储到MySQL数据库中。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# 导入模块
import requests
from lxml import etree
import time
# 导入mysql封装的class类
from mysql import MysqlHelper
# 实例化mysql类
mc = MysqlHelper()
def wujia(url,headers):
    # 定义要爬取的页数
    for i in range(1,21):
        fullurl = url.format(i)
        response = requests.get(fullurl,headers=headers)
        html = response.text
        html = etree.HTML(html)
        ul_list = html.xpath('//ul[@class="pList"]/li')
        # print(ul_list)
        for i in ul_list:
            # print(i)
            # 获取地址
            dizhi = i.xpath('./div[2]/div/p[2]/text()')
            # print(dizhi)
            if dizhi:
                dizhi = dizhi[0].replace('·','').strip()
                print(dizhi)
            else:
                print('无地址')
            #获取租房介绍
            room_data = i.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','').replace('·','')
            print(room_data)
            # 获取价钱/月
            money = i.xpath('./div[2]/div[1]/div//strong/text()')[0]
            money = int(money)
            print(money)
    #写sql语句,存入数据库
    sql = 'insert into 5i5j(dizhi,room_data,money) values ("%s","%s","%s")'
    data = (dizhi,room_data,money)
    #调用mysql类中的执行方法
    mc.execute_modify_sql(sql,data)
#设置休息时间
# time.sleep(3)
#执行函数和传参
if __name__ == '__main__':
    url = 'https://bj.5i5j.com/zufang/huilongguan/n{}/'
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        # 'Accept-Encoding': 'gzip, deflate, br',
        # 'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Cookie': '_Jo0OQK=1349C4397FAF634DC28494B7372F7EE6A6BAFA7626819FB7027758023F7044ABBF04A8994B1C24AC6546C9F8AD8FBB6099D9577A4343988ABD03B1C034CCCF5A512DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1IQ==; _ga=GA1.2.1741003594.1534567087; _gid=GA1.2.429105984.1534567087; yfx_c_g_u_id_10000001=_ck18081812380714557549321883736; PHPSESSID=a1eros861f9teh5n4r6lv6f30j; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534567088,1534572065; zufang_BROWSES=41290006; domain=bj; yfx_f_l_v_t_10000001=f_t_1534567087448__r_t_1534567087448__v_t_1534591789846__r_c_0; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534591798',
        'Host': 'bj.5i5j.com',
        'Referer': 'https://bj.5i5j.com/zufang/huilongguan/n2/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    }
    wujia(url,headers)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值