保姆级教程吴恩达机器学习ex2Matlab代码解析_吴恩达机器学习lab代码-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_41460195/article/details/126910600
%%Machine learning From Andrew with Matlab ex2
%%By youknowwho3_3 in 优快云 #GirlsHelpGirls#DOUBANEZU
%%logistic regression's
%1.sigmoid function
%2.compute costFunction for logistic regression
%3.Gradient for logistic regression
%4.predict function
%5.compute cost for regularized LR
%6.Gradient for regularized Logistic regression
%%

%%%%%%%%%%%%logistic regression%%%%%%%%%%%
%%ploat data 
data=load("ex2data1.txt");
x = data(:, [1, 2]); 
y = data(:, 3);
%%%%%x1,x2两门课的成绩，y出席与否

% Plot the data with + indicating (y = 1) examples and o indicating (y = 0) examples.
plotData(x, y);
% Labels and Legend
xlabel('Exam 1 score')
ylabel('Exam 2 score')
% Specified in plot order
legend(' ', 'Not admitted')

%%implementation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%costFunciton&Gradient Descent for Logistc Regression%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Setup the data matrix appropriately
[m, n] = size(x);
% Add intercept term to X
X = [ones(m, 1) x];
% Initialize the fitting parameters
initial_theta = zeros(n + 1, 1);
[cost,grad]=costFunction(initial_theta,X,y);
fprintf('TheValueOfCost: %f\n',cost) 
disp('Gradient at initial theta (zeros):'); 
disp(grad);
%TheValueOfCost: 0.693147
%Gradient at initial theta (zeros):
%   -0.1000
%  -12.0092
%  -11.2628

%%call fminunc
options = optimset('GradObj', 'on', 'MaxIter', 400);
[theta, Cost] = fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
fprintf('Cost at theta found by fminunc: %f\n', cost);
disp('theta:');disp(theta);

% Plot Boundary 
plotDecisionBoundary(theta, X, y);
% Add some labels 
hold on;
% Labels and Legend 
xlabel('Exam 1 score')
ylabel('Exam 2 score')
% Specified in plot order
legend('Admitted', 'Not admitted')
hold off;

%  Predict probability of admition for a student with score 45 on exam 1  and score 85 on exam 2 
prob = sigmoid([1 45 85] * theta);%%New h
fprintf('For a student with scores 45 and 85, we predict an admission probability of %f\n\n', prob);

% Compute accuracy on our training set
p = predict(theta, X);
fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);

%%%%%%%%Regularized logistic regression%%%%%%%%%
%  The first two columns contains the X values and the third column
%  contains the label (y).
data = load('ex2data2.txt');
X = data(:, [1, 2]); y = data(:, 3);

plotData(X, y);
% Put some labels 
hold on;
% Labels and Legend
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')
% Specified in plot order
legend('y = 1', 'y = 0')
hold off;
% Add Polynomial Features
% Note that mapFeature also adds a column of ones for us, so the intercept term is handled
X = mapFeature(X(:,1), X(:,2));

%%calling costFunctionReg
% Initialize fitting parameters
initial_theta = zeros(size(X, 2), 1);

% Set regularization parameter lambda to 1
lambda = 1;
% Compute and display initial cost and gradient for regularized logistic regression
[cost, grad] = costFunctionReg(initial_theta, X, y, lambda);
fprintf('Cost at initial theta (zeros): %f\n', cost);
fprintf('Expected cost (approx): 0.693\n');
fprintf('Gradient at initial theta (zeros) - first five values only:\n');
fprintf(' %f \n', grad(1:5));
fprintf('Expected gradients (approx) - first five values only:\n');
fprintf(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n');

% Compute and display cost and gradient with all-ones theta and lambda = 10
test_theta = ones(size(X,2),1);
[cost, grad] = costFunctionReg(test_theta, X, y, 10);
fprintf('\nCost at test theta (with lambda = 10): %f\n', cost);
fprintf('Expected cost (approx): 3.16\n');
fprintf('Gradient at test theta - first five values only:\n');
fprintf(' %f \n', grad(1:5));
fprintf('Expected gradients (approx) - first five values only:\n');
fprintf(' 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n');


%%function costFunctionReg
%{
formulate:%costFunction
          J=1/m*sum(i=1:m)[-yi*log(h(xi))-(1-yi)*log(1-h(xi))]+lambda/2m*sum(j=1:n)(theta^2)
          %gradientDescent
          theta(j)
          j=0 J'=1/m*sum(h(xi)-yi)*xi
          j>=1 J'=1/m*sum(h(xi)-yi)*xi+lambda/m*theta(j)
code:   h=sigmoid(z)
        z=X*theta
        J=1/m*sum(-y.*log(h)-(1-y).*log(1-h))+lambda/(2*m)*sum(theta(2:end).^2)
        j=0  grad(1) = (1/m)* (X(:,1)'*(h-y));    
        j>=1 grad(2:end) = (1/m)* (X(:,2:end)'*(h-y))+(lambda/m)*theta(2:end); 
%}
function [J, grad] = costFunctionReg(theta, X, y, lambda)
m=length(y);
J = 0;
grad = zeros(size(theta));
z = X * theta;      % m x 1
h = sigmoid(z);   % m x 1
J=1/m*sum(-y.*log(h)-(1-y).*log(1-h))+lambda/(2*m)*sum(theta(2:end).^2);
  
  grad(1) = (1/m)* (X(:,1)'*(h-y));                                  % 1 x 1
  grad(2:end) = (1/m)* (X(:,2:end)'*(h-y))+(lambda/m)*theta(2:end);  % n x 1

end




%%function mapFeature
function out = mapFeature(X1, X2)
% MAPFEATURE Feature mapping function to polynomial features
%   MAPFEATURE(X1, X2) maps the two input features
%   to quadratic features used in the regularization exercise.
%
%   Returns a new feature array with more features, comprising of 
%   X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
%
%   Inputs X1, X2 must be the same size
degree = 6;
out = ones(size(X1(:,1)));
for i = 1:degree
    for j = 0:i
        out(:, end+1) = (X1.^(i-j)).*(X2.^j);
    end
end

end

%%Predict Function
function p=predict(theta,X)
m = size(X, 1); 
p = zeros(m, 1);
h= sigmoid(X*theta);
p=(h>=0.5);
end

%%plotDecisionBoundary Function (prepared)
function plotDecisionBoundary(theta, X, y)
%PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
%the decision boundary defined by theta
%   PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 
%   positive examples and o for the negative examples. X is assumed to be 
%   a either 
%   1) Mx3 matrix, where the first column is an all-ones column for the 
%      intercept.
%   2) MxN, N>3 matrix, where the first column is all-ones

plotData(X(:,2:3), y);
hold on

if size(X, 2) <= 3
    % Only need 2 points to define a line, so choose two endpoints
    plot_x = [min(X(:,2))-2,  max(X(:,2))+2];

    % Calculate the decision boundary line
    plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1));

    % Plot, and adjust axes for better viewing
    plot(plot_x, plot_y)
    
    % Legend, specific for the exercise
    legend('Admitted', 'Not admitted', 'Decision Boundary')
    axis([30, 100, 30, 100])
else
    % Here is the grid range
    u = linspace(-1, 1.5, 50);
    v = linspace(-1, 1.5, 50);

    z = zeros(length(u), length(v));
    % Evaluate z = theta*x over the grid
    for i = 1:length(u)
        for j = 1:length(v)
            z(i,j) = mapFeature(u(i), v(j))*theta;
        end
    end
    z = z'; % important to transpose z before calling contour

    % Plot z = 0
    % Notice you need to specify the range [0, 0]
    contour(u, v, z, [0, 0], 'LineWidth', 2)
end
hold off

end

%%plotData Function 
function plotData(X,y)
% Find Indices of Positive and Negative Examples
pos = find(y==1); %Everytime find y==1, pos targets the X.
neg = find(y==0);
% Plot Examples
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 2, 'MarkerSize', 7);
hold on;%%%%%%%%%%%%%%%%%%%%%Attention:The original Code do not hold it.But you should add it.
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y','MarkerSize', 7);
hold off;%%%%%%%%%%%%%%%%%%%%%Attention:The original Code do not hold it.But you should add it.
%%%%%%hold on & off is helpful to plot two data.
end

%costFunction of logistic regression
%formulate J(theta)=1/m*sum[-yi*log(h(xi)-(1-yi)*log(1-h(xi))]
%          J在theta的导数=grad=1/m*sum((hxi-yi)xi)
%           h(X)=g(theta'*X)
%           g(z)=1/(1+e^(-z))
%for coding h=sigmoid(X*theta) size(X*theta)=100*1
%           sigmoid(z)=1./(1+exp(-z))
%                   size(1./(1+exp(-X*theta))=size(h)=100*1 
%                   Not size(1/(1+exp(-X*theta))=1*100
%           J=1/m*sum(-y.*log(h)-(1-y).*log(1-h)) 
%                   size(log(h))=100*1
%                   size(y)=100*1 so elements in y multiple log(h)
%                   if size(y'*log(h))=1*1 its not we want.
%%%%%       grad=1/m*X'*(h-y)  size(X')=3*100 size(h-y)=100*1
%%%%%       size(grad)=3*1
function [J,Grad]=costFunction(theta,X,y)
m=length(y);
Grad=zeros(size(theta));
h=sigmoid(X*theta); 
J=1/m*sum(-y.*log(h)-(1-y).*log(1-h));
Grad=1/m*X'*(h-y);
end
%%sigmoid function
%Formula:h(X)=g(theta'*X)=1/(1+e^(-theta'*X))
%   Coding:h=1./(1+exp(-X*theta))
function g=sigmoid(z)
g=zeros(size(z)); 
g=1./(1+exp(-z)); 
end