#%matplotlib inline #显示图片用的
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets
Boston=datasets.load_boston() #导入波斯顿房屋数据
Boston.data
Boston.target
Boston.DESCR #关于Boston备注
Boston_df = pd.DataFrame(Boston.data,columns = Boston.feature_names)
Boston_df.head(20)
digits = datasets.load_digits()
digits.images[100]
plt.matshow(digits.images[1208])
plt.show()
from sklearn import neighbors #最近邻算法 实例化(空的)
nei = neighbors.NearestNeighbors()
from sklearn import preprocessing
std = preprocessing.StandardScaler() #实例化
std
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg
from sklearn import linear_model
std.get_params()
std.fit(Boston.data)#训练标准化
std.mean_
std.var_
ZX=std.transform(Boston.data)
ZX[:2]
std.fit_transform(Boston.data)[:2]
reg.fit(Boston.data,Boston.target)
reg.coef_
pred=reg.predict(Boston.data)
pred[:10]
reg.score(Boston.data,Boston.target)
#用joblib模块将训练好的模型保存为外部文件
from sklearn.externals import joblib
joblib.dump(std,‘f:/datamodel/std.pkl’)
joblib.dump(reg,‘f:/datamodel/reg.pkl’)
reg2=joblib.load(‘f:/datamodel/reg.pkl’)#读入外部保存的模型文件
reg2.coef_
sklearn.preprocessing.scale(
x : {array - like,sparse matrix}, #需要进行变换的数据库
axis = 0 :#指定分别按照列(0)还是整个样本(1)计算均数、标准差并进行变换
with_mean = True:#是否中心化数据
with_std = True :
copy = True
)
from sklearn import preprocessing
Boston_scaled = preprocessing.scale(Boston_df)
Boston_scaled
Boston_scaled.mean(axis = 0)
Boston_scaled.std(axis = 0)
Boston_scaled_all = preprocessing.scale(Boston_scaled,axis=1)
Boston_scaled_all
Boston_scaled_all.mean(axis = 0)
Boston_scaled_all.std(axis = 0)
Boston_scaled_all.mean()
preprocessing.scale(Boston.target)
“”“std = preprocessing.StandardScaler()”""
std.fit(Boston_df)
std.mean_,std.scale_
std.transform(Boston_df)
std.transform(Boston_df[:3])
“”“class sklearn.preprocessing.MinMaxScaler(feature_range)”""
scaler = preprocessing.MinMaxScaler((1,10))
scaler.fit_transform(Boston_df)
scaler_1 = preprocessing.MaxAbsScaler((1,10))
scaler_1.fit_transform(Boston_df)
help(preprocessing.MaxAbsScaler)
sklearn.preprocessing.normalize(
X,axis=1,copy = True
norm=‘l2’:‘l1’,‘l2’,or ‘max’, #用于正则化的具体范数
return_norm = False:#是否返还所使用的范数
)
x=[[-1,-1,2]]
x_normalized = preprocessing.normalize(x,norm=‘l2’,return_norm=True)
x_normalized
-1/2.44948974
sklearn.preprocessing.robust_scale(
x,axis=0,with_centering=True,with_scaling=True
quantile_range =
)
preprocessing.robust_scale(Boston_df)