1.可以通过np.matrix()函数将一个变量转换为numpy型矩阵
2.python中,np库中,矩阵和向量相乘的话,会自动对向量进行转换,转换成列向量/一列的矩阵。
3.关于维度的一些辨析
# 找到每个样本中预测概率最大的值
h_argmax = np.argmax(h, axis=1)
#这里axis = 1 表示按行找出最大值对应的列索引
2.二维数组或者矩阵中,axis=1
表示列。
3.a.reshape(-1)
表示变形为一行。
4.x_categ.reshape((shape)+(n_out,)))是确保x_categ是一个行是shape,列是n_out的矩阵,通过reshape来实现。这里的(shape)+(n_out,)
实现的是元组的拼接。其中,shape = (14, ),n_out = 10
5.标量和矢量相减,会自动扩展到矢量的尺寸再运算
second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
6.获取.mat文件里面的每一个属性的数据有2种方式:
一种方式是:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
data = loadmat('ex4data1.mat')
data
X = data['X']
y = data['y']
X.shape, y.shape#看下维度
另一种方式是:
import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def load_data():
"""for ex5
d['X'] shape = (12, 1)
pandas has trouble taking this 2d ndarray to construct a dataframe, so I ravel
the results
"""
d = sio.loadmat('ex5data1.mat')
# return d
return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])
X, y, Xval, yval, Xtest, ytest = load_data()
7.矩阵和向量或者数组相乘的时候,会自动把数组或向量调整为矩阵运算所需要的维度:
import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import loadmat
data = loadmat('ex5data1.mat')
data
data['X']
data['X'].shape
type(data['X'])
def load_data():
"""for ex5
d['X'] shape = (12, 1)
pandas has trouble taking this 2d ndarray to construct a dataframe, so I ravel
the results
"""
d = sio.loadmat('ex5data1.mat')
# return d
return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])
X, y, Xval, yval, Xtest, ytest = load_data()
X, X.shape
X, Xval, Xtest = [np.insert(x.reshape(x.shape[0], 1), 0, np.ones(x.shape[0]), axis=1) for x in (X, Xval, Xtest)]
def cost(theta, X, y):
# INPUT:参数值theta,数据X,标签y
# OUTPUT:当前参数值下代价函数
# TODO:根据参数和输入的数据计算代价函数
# STEP1:获取样本个数
# your code here (appro ~ 1 lines)
m = X.shape[0]
# STEP2:计算代价函数
# your code here (appro ~ 3 lines)
inner = X @ theta.T - y #**
## 这里直接用theta也是对的
**
square_sum = inner.T @ inner #@是正常的矩阵乘法
cost = square_sum / (2 * m)
return cost
theta = np.ones(X.shape[1])
cost(theta, X, y)
8.实际工作中,如果欠拟合,又没有那么多数据的话,会考虑用特征多项式进行维度拓展。这个时候如果过拟合的话,就要加正则项进行控制。一般在训练集上,用一组lamda进行正则化训练,同时在验证集上选择代价最小的那一个lamda作为最终的选择。
def prepare_poly_data(*args, power):
"""
args: keep feeding in X, Xval, or Xtest
will return in the same order
"""
def prepare(x):
# 特征映射
df = poly_features(x, power=power)
# 归一化处理
ndarr = normalize_feature(df).as_matrix()#要先转化成矩阵,才能利用下面的方式在第一列添加一列
# 添加偏置项
return np.insert(ndarr, 0, np.ones(ndarr.shape[0]), axis=1)
return [prepare(x) for x in args]
def poly_features(x, power, as_ndarray=False): #特征映射
data = {'f{}'.format(i): np.power(x, i) for i in range(1, power + 1)}
df = pd.DataFrame(data)
return df.as_matrix() if as_ndarray else df
def normalize_feature(df):
"""Applies function along input axis(default 0) of DataFrame."""
return df.apply(lambda column: (column - column.mean()) / column.std())