import requests
from lxml import etree
import pandas as pd
url = 'https://hhht.lianjia.com/ershoufang/rs%E5%8C%97%E4%BA%AC/'
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
res = requests.get(url)
tree=etree.HTML(res)
li list = tree.xpath('//ul[@class="sellListcontent"]/li')
dict1 ={}
for li in li list:
Title = li.xpath('./div[1]/div[1]/a/text()')District = li.xpath('./div[2]/div[1]/a[1]/text()')Region = li.xpath('./div[2]/div[1]/a[2]/text()')Layout = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[0]Size= li.xpath('./div[@class="houseInfo"]/span/text()').split('|')[1].replace('平米','')Direction= li.xpath('./div[@class="houseInfo"]/span/text()').split('')[2]Renovation = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[3]
#在此省略其他字段提取
dict1['Title'1= Title
dict1['District'l= District
dict1['Region'l= Region
dict1['Layout']= Layout
dict1['size'l= size
dict1['Direction'l= Direction
dict1['Renovation']= Renovation
df = pd.DataFrame(dict1,index=False)
df.to_csv('lianjia.csv')
lianjia 0.01
最新推荐文章于 2025-11-29 16:24:14 发布
1116

被折叠的 条评论
为什么被折叠?



