import requests from lxml import etree import pandas as pd url = 'https://hhht.lianjia.com/ershoufang/rs%E5%8C%97%E4%BA%AC/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'} res = requests.get(url) tree=etree.HTML(res) li list = tree.xpath('//ul[@class="sellListcontent"]/li') dict1 ={} for li in li list: Title = li.xpath('./div[1]/div[1]/a/text()')District = li.xpath('./div[2]/div[1]/a[1]/text()')Region = li.xpath('./div[2]/div[1]/a[2]/text()')Layout = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[0]Size= li.xpath('./div[@class="houseInfo"]/span/text()').split('|')[1].replace('平米','')Direction= li.xpath('./div[@class="houseInfo"]/span/text()').split('')[2]Renovation = li.xpath('./div[@class="houseInfo"]/span/text()').split('')[3] #在此省略其他字段提取 dict1['Title'1= Title dict1['District'l= District dict1['Region'l= Region dict1['Layout']= Layout dict1['size'l= size dict1['Direction'l= Direction dict1['Renovation']= Renovation df = pd.DataFrame(dict1,index=False) df.to_csv('lianjia.csv')