lianjia 0.01

import requests
from lxml import etree
import pandas as pd
url = 'https://hhht.lianjia.com/ershoufang/rs%E5%8C%97%E4%BA%AC/'

headers = {    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}


res = requests.get(url)
tree=etree.HTML(res)
li list = tree.xpath('//ul[@class="sellListcontent"]/li')
dict1 ={}
for li in li list:
    Title = li.xpath('./div[1]/div[1]/a/text()')District = li.xpath('./div[2]/div[1]/a[1]/text()')Region = li.xpath('./div[2]/div[1]/a[2]/text()')Layout = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[0]Size= li.xpath('./div[@class="houseInfo"]/span/text()').split('|')[1].replace('平米','')Direction= li.xpath('./div[@class="houseInfo"]/span/text()').split('')[2]Renovation = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[3]
    #在此省略其他字段提取
    dict1['Title'1= Title
    dict1['District'l= District
    dict1['Region'l= Region
    dict1['Layout']= Layout
    dict1['size'l= size
    dict1['Direction'l= Direction
    dict1['Renovation']= Renovation
df = pd.DataFrame(dict1,index=False)
df.to_csv('lianjia.csv')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值