洛杉矶房价预测实战-优快云博客

本文链接：https://blog.youkuaiyun.com/oppo603/article/details/100799355

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 忽略警告信息
import warnings
warnings.filterwarnings("ignore")

train = pd.read_csv('datas/house_data.csv')
y = train['SalePrice']
train.shape

(1460, 82)

train1 = train.drop(['Id', 'SalePrice'], axis=1)
train1.shape

(1460, 80)

# 变成one_hot形式，内容全部被数字化了,原特征删除
X = pd.get_dummies(train1).reset_index(drop=True)
X.head()

	MSSubClass	LotFrontage	LotArea	OverallQual	OverallCond	YearBuilt	YearRemodAdd	MasVnrArea	BsmtFinSF1	...	SaleType_WD	SaleCondition_Abnorml	SaleCondition_Normal
0	60	65.0	8450	7	5	2003	2003	196.0	706	...	1	0	1
1	20	80.0	9600	6	8	1976	1976	0.0	978	...	1	0	1
2	60	68.0	11250	7	5	2001	2002	162.0	486	...	1	0	1
3	70	60.0	9550	7	5	1915	1970	0.0	216	...	1	1	0
4	60	84.0	14260	8	5	2000	2000	350.0	655	...	1	0	1

5 rows × 303 columns

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=123)

X_train.shape

(1168, 303)

X_test.shape

(292, 303)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error #方差

lm=LinearRegression()

lm.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

pred=lm.predict(X_test)

np.sqrt(mean_squared_error(np.log(y_test), np.log(pred)))

0.12627809622157107

np.sqrt(mean_squared_error(y_test, pred)

8.房价预测基础线性回归