公式
实现
- 加载数据
def loadData():
train_x = []
train_y = []
fileIn = open('testSet')
for line in fileIn.readlines():
lineArr = line.strip().split()
train_x.append([1.0, float(lineArr[0]), float(lineArr[1])])
train_y.append(float(lineArr[2]))
return train_x, train_y
- 训练
def train(self, x, y):
self.label_num = len(set(y))
self.dataMat = np.mat(x)
self.labelMat = np.mat(y).transpose()
M, self.feature_num = np.shape(self.dataMat)
self.theta = np.mat(np.ones((self.label_num, self.feature_num)))
loss = []
for kk in range(0, self.run_times):
prob = self.condProb(self.dataMat)
loss.append(self.cost(prob, y) / M)
prob = -prob
self.alpha = 0.001 + 0.1 * (self.run_times - kk) / self.run_times
for m in range(M):
prob[int(self.labelMat[m, 0]), m] += 1.0
self.theta = self.theta + self.alpha * prob * self.dataMat / M
import matplotlib.pyplot as plt
plt.plot(xrange(self.run_times), loss)
plt.show()
def condProb(self, x):
prob = np.exp(self.theta * x.T)
prob_sum = np.sum(prob, axis=0)
prob_sum = prob_sum.repeat(self.label_num, axis=0)
prob = prob / prob_sum
return prob
def predict(self, x):
probs = self.condProb(x)
return probs
def cost(self, prob, y):
costs = 0.0
for i in xrange(len(y)):
costs += np.log(prob[int(y[i]), i])
return -costs
实现结果
准确度:0.96