前些天在微博上有一道逆天的幼儿园题目,如下图: 围脖上有很多人猜测答案,更有趣的是,很多人提出用机器学习的方法解决。很有趣,也符合我学习的方法,这样,更有利于学习。遂决定尝试一下。 代码如下:
#!/usr/bin/python #-*- encoding: utf-8 -*- ''' Created on 2012-6-22 @author: zhangcheng ''' from numpy import loadtxt, zeros, ones, array, mean, std def feature_normalize(X): mean_r = [] std_r = [] X_norm = X n_c = X.shape[1] #column num for i in range(n_c): m = mean(X[:, i]) s = std(X[:, i]) mean_r.append(m) std_r.append(s) X_norm[:, i] = (X[:, i] - m) / s return X_norm, mean_r, std_r def compute_cost(X, y, theta): m = y.size predictions = X.dot(theta) sq_errors = (predictions - y) J = (1.0 / (2 * m)) *sq_errors.T.dot(sq_errors) return J def gradient_descent(X, y, theta, alpha, num_iters): m = y.size J_history = zeros(shape=(num_iters, 1)) for i in range(num_iters): predictions = X.dot(theta) theta_size = theta.size for it in range(theta_size): tmp = X[:, it] tmp.shape = (m, 1) errors_x1 = (predictions - y) * tmp theta[it][0] = theta[it][0] - alpha * (1.0 / m) * errors_x1.sum() J_history[i, 0] = compute_cost(X, y, theta) return theta, J_history data = array([ [0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0], #7111 [1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6], #8809 [0, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0], #2172 [0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4], #6666 [0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0], #1111 [0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0], #2222 [0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 2], #7662 [0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 1], #9313 [4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4], #0000 [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0], #5555 [0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 3], #8193 [1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 5], #8096 [0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 3], #4398 [0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1], #9475 [1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 4], #9038 [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 2] #3148 ]) X = data[:, :10] y = data[:, 10] m = y.size y.shape = (m, 1) x, mean_r, std_r = feature_normalize(X) #x = X it = ones(shape=(m, 11)) it[:, 1:11] = x iters = 300 alpha = 0.01 theta = zeros(shape=(11, 1)) theta, J_history = gradient_descent(it, y, theta, alpha, iters) print theta, J_history #print array([1.0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1]).dot(theta) # 8920 #print array([1.0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0]).dot(theta) # 6742 #8829 print array([1.0, 0, 0, (1 - mean_r[2]) / std_r[2], 0, 0, 0, 0, 0, (2 - mean_r[8]) / std_r[8], (1 - mean_r[9]) / std_r[9]]).dot(theta) #2809 print array([1.0, (1 - mean_r[0]) / std_r[0], 0, (1 - mean_r[2]) / std_r[2], 0, 0, 0, 0, 0, (1 - mean_r[8]) / std_r[8], (1 - mean_r[9]) / std_r[9]]).dot(theta)
有几点要说明一下:
- 第一遍,我没有对特征进行规范化,结果需要迭代的次数比较多。规范化,会减少迭代次数
- 代码实现主要是用了NumPy,非常强大的工具,N维数据是一个非常强大的数据结构。
- 并没有采用一些方法来判断是否停止迭代,alpha直接取0.01,没有采用任何优化的方法
整体效果很不错,一道小题,提高了我对机器学习的兴趣。以后要多玩玩儿这些模型。 另外还有两个实现:
【完】