保姆级教程吴恩达机器学习ex1Matlab和python代码解析

本文链接：https://blog.youkuaiyun.com/weixin_41460195/article/details/126844580

%%Machine learning From Andrew with Matlab ex1
%%By youknowwho3_3 in 优快云 #GirlsHelpGirls#DOUBANEZU
%1.GradientDescent to get min theta
%2.CostFucntion to get best Costfunction J with min theta
%3.Plot the Linear regression by the min theta
%%

%%ploat data 
load("ex1data1.txt");
data=ex1data1;
x=data(:,1);%suoyou hang de di yi lie-the first row in every line
y=data(:,2);%suoyou hang de di er lie-the second row in every line
plot(x,y,'rx','MarkerSize',7) 
xlabel('the population in 2030s')
ylabel('profit in 2030s')

%%implementation
m = length(x); % number of training examples
X = [ones(m,1),data(:,1)]; % Add a column of ones to x ####X0=1####
theta = zeros(2, 1); % initialize fitting parameters
iterations = 1500; %迭代次数
alpha = 0.01; 

%%calling computing gradientDescent
theta=gradientDescent(X, y, theta, alpha, iterations);
%fprintf('Theta computed from gradient descent:\n%f,\n%f',theta(1),theta(2))


% Plot the linear fit
hold on; % keep previous plot visible
plot(X(:,2), X*theta,'b-')
legend('Training data', 'Linear regression')
hold off % don't overlay any more plots on this figure


%%calling computing Cost function
J=computeCost(X, y, theta);  
%%CostFunction J(theta)
%{
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CostFunction J(theta)=1/2m*sum((h(xi)-yi)^2)
Linear regression h(x)=theta'*x=theta0+theta1*x1...=theta0*X0+theta1*X1...
           coding h(x)=X*theta; %size(h) =97*1
                 So J(theta)=1/(2*m)*sum((X*theta-y).^2)

%}
function J=computeCost(X, y, theta)  
m=length(y);
h=X*theta;  
J=1/(2*m)*sum((h-y).^2);
%{
%matrix theory
%a=[1;2;3]
 =
     1
     2
     3
size(a)=3*1
%b=[2,3]
 =
    2  3
size(b)=1*2
%a*b=
   2     3
   4     6
   6     9 
size(a*b)=3*2
%b*a=fail
%b.*a=
    2     3
    4     6
    6     9
%b'*a=fail theta is a size(b')here
%}
%{
h(x)=theta0*x(0)+theta1*x(1)...theta is size n*1;
%theta=
       0
       0
size(theta)=2*1
theta'=
      0  0
size(theta')=1*2
%X=
    1.0000    6.1101
    1.0000    5.5277
    1.0000    8.5186
           ...
    1.0000    8.2934
    1.0000   13.3940
    1.0000    5.4369
size(X)=97*2

Soooo, theta'*X is failed for not matched,
but X*theta is possible to get a 97*1 matrix which is h(X)
and which is so important####
%in the formulation h=theta0*X0+theta1X1...=theta(i)*X(i)=theta'*x h is a
matrix of size(length of X *1)
is not equal to matrix theta' but ##elements## in theta' to muliple with elements in X;

%X*theta=
     0
     0
    ...
     0
     0
size(X*theta)=97*1
So X*theta is the way to get theta(i)*X(i)
and h is a 97*1 matrix
%}
end 

%%Gradient Descent
%{
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Gradient Descent thetaj:=thetaj-alpha*(1/m)*sum[(h(xi)-y(i)) *xj(i)]
    coding thetaj:=thetaj-alpha*(1/m)*(X*theta-y)*X')
For
    size(X*theta-y)=97*1
    size(X)=97*2
    so size X'=2*97
    size((X*theta-y)*X')=2*1 and it's the sum of [(h(xi)-y(i)) *xj(i)]
    and size(sum)=2*1
    size((alpha*(1/m)*(X*theta-y)*X')=2*1
    and theta0=
               0
               0
    size theta0=2*1
    Soooo, thetaj:=thetaj-alpha*(1/m)*(X*theta-y)*X') which is 2*1
    to get min theta;
    And it still work when theta is n*1
    for size(X) is always m*n;
 So we got the thetaj which is the min theta
 And we send the min theta to CostFunction J(theta) to get the best
 CostFunction J;
%} 
function theta=gradientDescent(X, y, theta, alpha, iterations)
m=length(y);
for i=1:iterations
    theta=theta-alpha*(1/m)*((X*theta-y)'*X)';
end
end

%theta=[-3.630291439404359;1.166362350335582]
%J=4.483

以下为python 注意很多不一样，根据以后的需求建议熟练掌握python

'''
this is the ex1.1 finish in python
start in 24th april 2023
youknowwho
'''
#################
#################
#python和Matlab最大的不同在于要特别定义matrix，不是读进去就是matrix
#################
##################


import numpy as np
import matplotlib.pyplot as plt
import torch



#call costfunction:
'''
CostFunction J(theta)=1/2m*sum((h(xi)-yi)^2)
Linear regression h(x)=theta'*x=theta0+theta1*x1...=theta0*X0+theta1*X1...
           coding h(x)=X*theta; %size(h) =97*1
                 So J(theta)=1/(2*m)*sum((X*theta-y).^2)
'''
def computeCost(X,y,theta):
    m= len(y)
    h_x = X*theta
    J_theta = 1/(2*m) * np.sum(np.power((h_x - y),2))
    return J_theta

#call Gradient Descent 
'''
Gradient Descent thetaj:=thetaj-alpha*(1/m)*sum[(h(xi)-y(i)) *xj(i)]
    coding thetaj:=thetaj-alpha*(1/m)*(X*theta-y)*X')
'''
def gradientDescent(X,y,theta,alpha,iterations):
    m = len(y)
    #h_x=X*theta #size(h_x)=m*1
    #print(X.T * (h_x-y))#size(X.T)=2*m
    for i in range(iterations):
        theta = theta - (alpha/m) * (X.T * (X*theta-y))######不可以把h_x在循环外写好，每一次的theta都不同
        #size(X)=m*2
        #size(theta)=2*1
    return theta
    

#A = np.eye(5)

data = np.loadtxt('ex1data1.txt', delimiter=',')
    #here is different from matlab
    #np.load('')only load .npy file
    #并且需要分离器‘，’


X=data[:,0] #size(x)=1*m
X=X.reshape(len(X),1) #size(x)=m*1
y=data[:,1]
y=y.reshape(len(y),1)
    ##从0开始算第一列也和matlab不同

plt.scatter(X, y, c='r', marker='x',label='Training Data')##scatter x点，plot连线
plt.xlabel('the population in 2030s')
plt.ylabel('profit in 2030s')
#plt.show()

m = len(X)  #number of training examples
# X = m*1;  X has m rows and 1 column

###################### python和matlab不同 #########################
X = np.concatenate((np.ones((m, 1)), X), axis=1)
##################### 给X加一列 ##########################

theta = np.zeros((2,1))
theta = np.matrix(theta)###############需要把theta变成matrix#######################
#array theta = 2*1
iterations = 1500
alpha = 0.01

print(computeCost(X, y, theta))
print(computeCost(X, y, np.matrix([[-1],[2]])))

theta=gradientDescent(X,y,theta,alpha,iterations)

plt.plot(X[:,1],X*theta,label='Linear Regression')
plt.legend()#label 展示

plt.show()ss