吴恩达机器学习代码

新图学霸猫

已于 2024-05-05 14:45:05 修改

阅读量1.3k

点赞数 8

CC 4.0 BY-SA版权

文章标签：机器学习人工智能 python

于 2024-04-15 17:39:44 首次发布

本文链接：https://blog.youkuaiyun.com/wenchlove/article/details/137788952

线性回归

单变量线性回归-梯度下降算法

from xml.etree.ElementTree import tostring

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 设置随机种子，以确保结果可重复
np.random.seed(0)

# 生成自变量 x，假设范围是 0 到 100，共有 100 个数据点
x = np.random.uniform(0, 100, 100)

# 假设因变量 y 与 x 之间存在线性关系，加上一些噪声
# y = 3 * x + 5 + 噪声，噪声服从均值为 0，标准差为 10 的正态分布
noise = np.random.normal(0, 10, 100)
y = 3 * x + 5 + noise

# 创建 DataFrame 存储数据
data = pd.DataFrame({
   'X': x, 'Y': y})

# 保存数据到 CSV 文件
data.to_csv('linear_regression_data.csv', index=False)

data.insert(0,'ones',1)
# 绘制散点图
plt.scatter(x, y)
plt.title('Scatter Plot of Linear Regression Data')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()


#数据处理
X = data.iloc[:,0:-1]
X.head()
X = X.values
X.shape
y = data.iloc[:,-1]
y.head()
y = y.values
y.shape
y = y.reshape(100,1)
y.shape

#计算J（Θ）的值
def cost_func(X,y,theta):
    inner=np.power(X@theta - y,2)
    return np.sum(inner)/(2*len(X))

#随机初始值
theta=np.zeros((2,1))
print(theta)
cost0=cost_func(X,y,theta)
print("cost0:")
print(cost0)
#学习率和学习论述
alpha=0.000001
count=1000000


#梯度下降算法
def gradient_Abscent(X,y,alpha,count):
    global theta
    costs=[]
    for i in range(count):
        #theta = theta-(X.T @(X @ theta - y))*alpha/len(X)
        theta = theta - (X.T @ (X @ theta - y)) * alpha / len(X)
        nowcost=cost_func(X,y,theta)
        costs.append(nowcost)
        if i%100==0:
            print(nowcost)
    return theta,costs







theta_ans,cost_ans=gradient_Abscent(X,y,alpha,count)

#代价函数可视化
fig,ax = plt.subplots()
ax.plot(np.arange(count),cost_ans)
ax.set(xlabel = 'count',ylabel = 'cost')
plt.show()

# 拟合函数可视化
x = np.linspace(y.min(), y.max(), 100)  # 网格数据
y_ = theta_ans[0, 0] + theta_ans[1, 0] * x  # 取theta第一行第一个和第二行第一个

print("b:")
print(theta_ans[0, 0])
print("k:")
print(theta_ans[1, 0])


fig, ax = plt.subplots()
ax.scatter(X[:, 1], y, label='training')  # 绘制数据集散点图取x所有行，第2列population
ax.plot(x, y_, 'r', label='predict')  # 绘制预测后的直线
ax.legend()
ax.set(xlabel='population', ylabel='profit')
plt.show()

生成的初试样本
J（Θ）随轮数增加而减小
最终的直线

多变量线性回归-梯度下降算法

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

np.random.seed(0)
# 设置数据集大小
num_samples = 1000

# 生成特征数据
area = np.random.normal(loc=1500, scale=300, size=num_samples)  # 房屋面积，均值为1500，标准差为300
year = np.random.randint(1950, 2023, size=num_samples)  # 房屋年份，范围在1950年至2022年之间
num_rooms = np.random.randint(2, 6, size=num_samples)  # 房间数量，范围在2至5之间

# 生成目标变量数据（房价），假设线性关系为 price = 100 * area + 500 * year - 300 * num_rooms + noise
noise = np.random.normal(loc=0, scale=10000, size=num_samples)  # 添加噪声
price = 100 * area + 500 * (2022 - year) - 300 * num_rooms + noise

# 定义Z-score标准化函数
def z_score_normalization(feature):
    mean = np.mean(feature)
    std = np.std(feature)
    normalized_feature = (feature - mean) / std
    return normalized_feature

# 对每个特征进行Z-score标准化
area_normalized = z_score_normalization(area)
year_normalized = z_score_normalization(year)
num_rooms_normalized = z_score_normalization(num_rooms)

# 输出标准化后的特征数据
print("Normalized Area:", area_normalized)
print("Normalized Year:", year_normalized)
print("Normalized Number of Rooms:", num_rooms_normalized)


# 创建 DataFrame 对象
data = pd.DataFrame({
   
    'Area': area_normalized,
    'Year': year_normalized,
    'NumRooms': num_rooms_normalized,
    'Price': price
})

# 保存数据集到文件
data.to_csv('linear_regression_data1.csv', index=False)
data.insert(0,'ones',1) #x0=1


# 数据处理
X = data.iloc[:, :-1].values  # 特征矩阵
y = data.iloc[:, -1].values.reshape(-1, 1)  # 目标变量列

# 数据集分割为训练集和测试集（70%训练，30%测试）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


#计算J（Θ）的值
def cost_func(X,y,theta):
    #inner=np.power(X @ theta - y,2)
    # print("X shape:", X.shape)
    # print("y shape:", y.shape)
    # print("theta shape:", theta.shape)
    inner = (X @ theta - y) ** 2
    return np.sum(inner)/(2*

最低0.47元/天解锁文章