import pandas as pd import matplotlib.pyplot as plt import numpy as np ###################Get data####################### house_data = pd.read_csv("./housing.csv") # Divide by 1.5 to limit the number of income categories house_data["income_cat"] = np.ceil(house_data["median_income"] / 1.5) # Label those above 5 as 5 house_data["income_cat"].where(house_data["income_cat"] < 5, 5.0, inplace=True) from sklearn.model_selection import StratifiedShuffleSplit split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42) for train_index, test_index in split.split(house_data, house_data["income_cat"]): strat_train_set = house_data.loc[train_index] strat_test_set = house_data.loc[test_index] # from sklearn.model_selection import train_test_split # train_set, test_set = train_test_split(house_data, test_size=0.2, random_state=16) strat_train_set = strat_train_set.drop("income_cat",axis =1) strat_test_set = strat_test_set.drop("income_cat",axis =1) house_data = strat_train_set.copy() ###############Visualize######################## house_data.hist(bins=50, figsize=(15, 10)) house_data.plot(kind="scatter", x="longitude", y="latitude", alpha=0.1) house_data.plot(kind="scatter", x="longitude", y="latitude", alpha=0.3, s=house_data["population"] / 100, label="population", c=house_data["median_house_value"], cmap=plt.get_cmap("jet"), colorbar=True, ) plt.legend() #plt.show() corr_matrix = house_data.corr() a = corr_matrix["median_house_value"].sort_values(ascending=False) house_data["rooms_per_households"] = house_data["total_rooms"] / house_data["households"] house_data["bedrooms_per_room"] = house_data["total_bedrooms"] / house_data["total_rooms"] house_data["population_per_households"] = house_data["population"] / house_data["households"] #print(house_data.info()) corr_matrix = house_data.corr() b = corr_matrix["median_house_value"].sort_values(ascending=False) #print(b) ###################Prepare data#################### house_data = strat_train_set.drop("median_house_value", axis
housing price
最新推荐文章于 2025-03-14 16:36:18 发布