import numpy as np
# 归一化
class StandardScaler:
def __init__(self):
'''初始化'''
self.mean_ = None
self.scale_ = None
def fit(self,x):
'''input x_train,train the data inputed,and get the mean and scale.return the mean and scale'''
assert x.ndim == 2, "The dimension of x must be 2"
self.mean_ = np.array([np.mean(x[:, i]) for i in range(x.shape[1])]) # mean_是返回列的平均值
self.scale_ = np.array([np.std(x[:, i]) for i in range(x.shape[1])])
return self
def transform(self, x):
'''输入矩阵x,对输入的矩阵归一化'''
assert self.scale_ is not None and self.mean_ is not None, "在归一化前要先fit"
assert x.ndim == 2, "The dimension of x must be 2"
assert x.shape[1] == len(self.scale_), "x的列数,特征数要和方差数同"
# 创建一个空矩阵,将归一化结果存入
resX = np.empty(shape = x.shape, dtype=float)
for col in range(x.shape[1]):
resX[:,col] = (x[:,col]-self.mean_[col])/self.scale_[col]
return resX#!/C:/USers/C/PycharmProjects/Machine_Learning_1
# -*- coding:utf-8 -*-# ModelSelectation.py
import numpy as np
from sklearn import datasets
def train_test_split(x,y,test_ratio=0.2,seed=None):
'''训练测试集划分,将x,y划分成x_train,y_traion,x_test,y_test'''
assert x.shape[0]==y.shape[0],\
"样本个数要同"
assert 0<= test_ratio<=1.0,\
"test_ratio must be valid"
# 为了使得例子可以重复,用种子,permutation打乱顺序,返回索引值
if seed is not None:
np.random.seed(seed)
shuffle_index=np.random.permutation(len(x))
# 把前len(x)*test_ratio作为测试集,后者作为训练集
test_size=int(len(x)*test_ratio)
train_index=shuffle_index[test_size:]
test_index=shuffle_index[:test_size]
x_train=x[train_index]
y_train=y[train_index]
x_test=x[test_index]
y_test=y[test_index]
return x_train,y_train,x_test,y_test这里我定义了一个类用于归一化。和一个训练集验证集划分的类。但是当我调用时。
from sklearn import datasets
from Standardization import StandardScaler
iris=datasets.load_iris()
x=iris.data
y=iris.target
from ModelSelectation import train_test_split
x_train,y_train,x_test,y_test=train_test_split(x,y)
StandardScaler.fit(x_train)
Traceback (most recent call last):
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-32-23221b0e8939>", line 1, in <module>
StandardScaler.fit(x_train)StandardScaler.fit(x=x_train)
Traceback (most recent call last):
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-33-783e331fdef3>", line 1, in <module>
StandardScaler.fit(x=x_train)
TypeError: fit() missing 1 required positional argument: 'self'这种情况是因为在调用方法时,没有提前定义这个类的实例,导致无法返回。只要先定义一个实例就可以了
s=StandardScaler()
s.fit(x_train)
Out[35]: <Standardization.StandardScaler at 0x203d5ab0710>
本文介绍了一个自定义的数据预处理类,用于实现数据的归一化处理,并提供了一个训练集与测试集划分的方法。通过实例演示了如何使用这些自定义类进行数据预处理。
2929

被折叠的 条评论
为什么被折叠?



