DataScience_笔记_6-优快云博客

回归-代码

import numpy as np
from matplotlib import pyplot as plt


def PolyFun(x, coef):
    y = coef[0] + coef[1] * x + coef[2] * np.power(x, 2) + coef[3] * np.power(x, 3)
    return y


coef = np.array([5, 2, 3, 1])
x = np.linspace(-1, 1, 1000)
y = PolyFun(x, coef)


x_data = np.random.rand(10000)
data = np.column_stack((x_data, PolyFun(x_data, coef)))
train_idx = np.random.permutation(10000)[:8000]
test_idx = np.random.permutation(10000)[8000:]

train_data = data[train_idx]
test_data = data[test_idx]

pred_coff = np.polyfit(train_data[:, 0], train_data[:, 1], deg=3)

y_pred = PolyFun(test_data[:, 0], pred_coff)

error = np.mean(np.square(y_pred - test_data[:, 1]))

print("The prediction error is: {:.4f}".format(error))

plt.plot(test_data[:, 0], y_pred, 'b-', label='Prediction')
plt.plot(test_data[:, 0], test_data[:, 1], 'g-', label='Ground Truth')
plt.title("Polynomial function $x^3+3x^2+2x+5$")
plt.legend()
plt.show()

这段代码展示了如何使用 Python 和 NumPy 库进行多项式函数的拟合和预测。

代码定义了一个名为 PolyFun 的函数，计算给定多项式系数的多项式函数值。
通过生成随机输入数据和对应的多项式函数值来创建一个数据集。
代码将数据集随机打乱，并将其划分为训练集和测试集。对于训练集数据，代码使用 np.polyfit 函数拟合了一个三次多项式模型。
在测试集上，代码利用拟合的模型进行预测，并计算了预测误差。
代码使用 matplotlib 库绘制了测试集上的预测结果和实际结果的曲线图。

总而言之，这段代码展示了多项式函数拟合和预测的基本过程，包括数据生成、模型训练、预测和误差计算。

监督学习-分类-分类器

K-Nearest Neighbors(KNN) 算法演示-代码

import numpy as np
import numpy.random as nr

class_1 = nr.multivariate_normal(np.array([0, 0]), np.array([[3, 0], [0, 2]]), 100)
class_2 = nr.multivariate_normal(np.array([1, 1]), np.array([[4, 0], [0, 1]]), 100)
label_1 = np.zeros(class_1.shape[0])
label_2 = np.ones(class_2.shape[0])  # Corrected label assignment for class 2

data = np.concatenate([class_1, class_2], axis=0)
label = np.concatenate([label_1, label_2], axis=0)

idx = np.random.permutation(data.shape[0])
train_idx = idx[0:round(len(idx) * 0.8)]
test_idx = idx[round(len(idx) * 0.8):-1]

train_data = data[train_idx]
train_label = label[train_idx]
test_data = data[test_idx]
test_label = label[test_idx]


def KNN(x, train_data, train_label, d, K):
    D = []
    L = []
    for x_t, l_t in zip(train_data, train_label):
        if d == "Euclidean Distance":
            d_x = np.linalg.norm(x - x_t, 2)
            D.append(d_x)
            L.append(l_t)

    KNN_idx = np.argsort(np.array(D))[:K]  # Use np.argsort to get the indices of the sorted distances
    L = np.array(L)
    KNN_label = L[KNN_idx]
    max_occur = 0
    for l in np.unique(KNN_label):
        occuracy = (np.sum(KNN_label == l)) / KNN_label.shape[0]
        if occuracy > max_occur:
            pred_label = l
            max_occur = occuracy
    return pred_label


count_correct = 0
for x_t, l_t in zip(test_data, test_label):
    pred_label = KNN(x_t, train_data, train_label, "Euclidean Distance", K=10)
    if pred_label == l_t:
        count_correct = count_correct + 1

    print("pred: {}, real: {}".format(pred_label, l_t))

这段代码实现了 K-Nearest Neighbors (KNN) 分类算法。
它生成两个不同分布的数据类别，并将其分配为训练集和测试集( train 和 test )。
然后，对于测试集中的每个数据点，使用KNN算法根据最近的邻居进行分类预测。
最后，统计分类正确的数量并输出预测结果和真实标签。