

1.数据标准化文件normailize.py
import numpy as np
def normailize(features):
features_normalized=np.copy(features).astype(float)
#计算均值
features_mean=np.mean(features,0)
#计算标准差
features_deviation=np.std(features,0)
#标准化操作
if features.shape[0]>1:
features_normalized=features_normalized-features_mean
features_deviation[features_deviation==0]=1
features_normalized/features_normalized/features_deviation
return features_normalized,features_mean,features_deviation
2.预处理文件 prepare_for_training.py
import numpy as np
import normalize
#from .generate_sinusoids import generate_sinusoids
#from .generate_polynomials import generate_polynomials
#def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):
def prepare_for_training(data,normalize_data=True):
# 计算样本总数
num_examples = data.shape[0]
data_processed = np.copy(data)
# 预处理
features_mean = 0
features_deviation = 0
data_normalized = data_processed
if normalize_data:
(
data_normalized,
features_mean,
features_deviation
) = normalize(data_processed)
data_processed = data_normalized
"""
# 特征变换sinusoidal
if sinusoid_degree > 0:
sinusoids = generate_sinusoids(data_normalized, sinusoid_degree)
data_processed = np.concatenate((data_processed, sinusoids), axis=1)
# 特征变换polynomial
if polynomial_degree > 0:
polynomials = generate_polynomials(data_normalized, polynomial_degree, normalize_data)
data_processed = np.concatenate((data_processed, polynomials), axis=1)
"""
# 加一列1
data_processed = np.hstack((np.ones((num_examples, 1)), data_processed))
return data_processed, features_mean, features_deviation
3. Linear_regression.py 核心算法的实现
import numpy as np
import prepare_for_training
class LinearRegression:
def __init__(self,data,labels,normalize_data = True):
"""
1.对数据进行预处理
2.得到特征数目
3.初始化参数矩阵
"""
(data_processed,
features_mean,
features_deviation)=prepare_for_training(data,normalize_data=True)
self.data=data_processed
self.labels=labels
self.features_mean=features_mean
self.features_deviation=features_deviation
self.normalize_data=normalize_data
num_features=self.data.shape[1]
self.theta=np.zeros(num_features,1)
# 计算预测值
@staticmethod
def hypothesis(data,theta):
preditions=np.dot(data,theta)
return preditions
# 梯度下降参数更新方法(矩阵运算)
def gradiant_step(self,alpha):
num_examples=self.data.shape[0]
delta=LinearRegression.hypothesis(self.data,self.theta)-self.labels
theta=self.theta
theta=theta-alpha*(1/num_examples)*np.dot(delta.T,self.data).T
self.theta=theta
# 计算学习代价(损失)
def cost_function(self):
num_examples = self.data.shape[0]
delta=LinearRegression.hypothesis(self.data,self.theta)-self.labels
cost=(1/2)*(np.dot(delta.T,delta))/num_examples
return cost[0][0]
# 训练模块,执行梯度下降
def gradiant_descent(self,alpha,num_iteration):
cost_history = []
for i in range(num_iteration):
self.gradiant_step(alpha)
cost_history.append(self.cost_function())
return cost_history
# 实际迭代模块,执行num_iterations次梯度下降算法
def train(self,alpha,num_iteration=500):
cost_history=self.gradiant_descent(alpha,num_iteration)
return self.theta,cost_history
'''下面是不属于训练模型的辅助函数'''
# 得到单元损失
def get_cost(self, data):
data_processed = prepare_for_training(data, self.normalize_data)[0]
return self.cost_function(data_processed)
# 用训练好的参数模型,去预测得到的回归值结果
def get_predict(self, data):
data_processed = prepare_for_training(data, self.normalize_data)[0]
predictions = LinearRegression.hypothesis(data_processed, self.theta) # 预测值计算
return predictions