from selenium import webdriver
import time
from numpy import *
import pandas as pd
driver = webdriver.Chrome(executable_path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver')
content_list = []
diqu = [1025,1028,1033,1026,1034,1031,1030,1032,1024,1023,1037,1027,1035,1029,1038,1036,]
for di in diqu:
list_year = [2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008]
for year in list_year:
item = {}
driver.get("https://www.58.com/fangjiawang/shi-{}-100/qy-{}/".format(year,di))
time.sleep(3)
span = driver.find_elements_by_xpath("//*[@id='main']/div/div[4]/div[3]/div/div/div[1]/div[2]/div[2]/ul/li/a/span")
qian = [int(x.text[0:-3]) for x in span]
# print(qian)
avg = sum(qian)/len(qian)
item["di"] = di
time.sleep(0.1)
item["year"] = year
time.sleep(0.1)
item["avg"] = avg
print(item)
content_list.append(item)
time.sleep(3)
di = [z1["di"] for z1 in content_list]
year = [z2["year"] for z2 in content_list]
avg = [z3["avg"] for z3 in content_list]
data = pd.DataFrame({'地区': di,'年份': year,'平均房价': avg})
data.to_excel("1.xlsx")
driver.quit()
该代码使用Selenium库爬取58同城网站上指定地区的历年房价数据,涉及2008年至2021年。通过计算平均房价,得到每个地区每年的房价平均值,并将结果保存到Excel文件中。该过程有助于了解房价走势。
801

被折叠的 条评论
为什么被折叠?



