通过读取训练集建模后预测大学生入学申请的申请结果。
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#define alpha 0.01
using namespace cv;
using namespace std;
// 读取训练集
vector<Point3d> getTraining(string trainingPath) {
vector<Point3d> result;
ifstream fileIn(trainingPath, ios_base::in);
if (!fileIn.is_open()) {
cout << "读取文件失败" << endl;
}
else {
for (int i = 0; i < 100; i++) {
char data[100];
fileIn.getline(data, 100);
double num[3];
num[0] = atof(strtok(data, ","));
for (int i = 1; i <= 2; i++) {
num[i] = atof(strtok(NULL, ","));
}
// 为避免超出精度范围,故对训练集的数据进行缩小
Point3d p(num[0] / 10, num[1] / 10, num[2]);
result.push_back(p);
}
}
fileIn.close();
return result;
}
// 读取测试集
vector<Point3d> getTesting(string testingPath) {
vector<Point3d> result;
ifstream fileIn(testingPath, ios_base::in);
if (!fileIn.is_open()) {
cout << "读取文件失败" << endl;
}
else {
for (int i = 0; i < 100; i++) {
char data[100];
fileIn.getline(data, 100);
double num[2];
//cout << "11" << endl;
num[0] = atof(strtok(data, "\t"));
num[1] = atof(strtok(NULL, "\t"));
num[1] = atof(strtok(NULL, "\t"));
//cout << num[0] << "\t" << num[1] << endl;
Point3d p(num[0], num[1], -1);
result.push_back(p);
}
}
fileIn.close();
return result;
}
// 获取θ转置与X的乘积
double getProduct(Point3d sample, Point3d theta) {
double product = theta.x + theta.y * sample.x + theta.z * sample.y;
return product;
}
// sigmoid/logistic函数
double sigmoid(double z) {
double sig = exp(z) / (1 + exp(z));
return sig;
}
// 成本函数
double costFunction(Point3d sample, Point3d theta) {
double h = sigmoid(getProduct(sample, theta));
if (sample.z == 0)
return (-1) * log10(1 - h);
return (-1) * log10(h);
}
// 由梯度下降算法获得新的θ
Point3d getNewTheta(Point3d oldTheta, Vector<Point3d> training) {
Point3d newTheta;
double sumX = 0, sumY = 0, sumZ = 0;
for (int i = 0; i < training.size(); i++) {
double h = sigmoid(getProduct(training[i], oldTheta));
sumX += (h - training[i].z) * alpha * training[i].x;
sumY += (h - training[i].z) * training[i].x * alpha;
sumZ += (h - training[i].z) * training[i].y * alpha;
}
newTheta.x = oldTheta.x - sumX;
newTheta.y = oldTheta.y - sumY;
newTheta.z = oldTheta.z - sumZ;
return newTheta;
}
// 对训练集的样本进行分类,检验分类模型的自测准确率
void compareTraining(vector<Point3d> training, Point3d finalTheta) {
int y, count = 0;
for (int i = 0; i < training.size(); i++) {
y = 0;
if (sigmoid(getProduct(training[i], finalTheta)) >= 0.5)
y = 1;
if (y == training[i].z)
count++;
}
cout << "自测准确率为 " << (double)count / training.size() << endl;
}
// 对测试集的样本进行分类,预测每个申请者的申请结果
void compareTesting(vector<Point3d> testing, Point3d finalTheta) {
Point3d temp;
int y, resultTrue = 0, resultFalse = 0;
cout << endl << "Num\t\tGrade1\t\tGrade2\t\tResult" << endl;
for (int i = 0; i < testing.size(); i++) {
y = 0;
temp.x = testing[i].x / 10;
temp.y = testing[i].y / 10;
if (sigmoid(getProduct(temp, finalTheta)) >= 0.5) {
y = 1;
resultTrue++;
}
else {
resultFalse++;
}
cout << "testing " << i + 1 << "\t" << testing[i].x << "\t\t" << testing[i].y << "\t\t" << y << endl;
}
cout << endl << resultTrue << " students can enter the university" << endl
<< resultFalse << " students can't enter the university" << endl;
}
int main() {
string trainingPath = "C:\\Users\\Administrator\\Desktop\\LR_TrainingSet.txt";
string testingPath = "C:\\Users\\Administrator\\Desktop\\LR_TestingSet.txt";
// 读入训练集(100个样本
vector<Point3d> training = getTraining(trainingPath);
vector<Point3d> testing = getTesting(testingPath);
// 建立模型
Point3d theta(3, 11, 9);
cout << "alpha = " << alpha << endl;
cout << "initial_theta\t" << theta.x << "\t" << theta.y << "\t" << theta.z << endl;
Point3d oldTheta, finalTheta;
double sumJ, oldJ, J = 0, minJ = 99999, num = 0;
while (1) {
//cout << "第" << num++ << "次循环" << endl;
num++;
sumJ = 0;
for (int i = 0; i < training.size(); i++) {
sumJ += costFunction(training[i], theta); // J(θ)之和
}
//system("pause");
//cout << "sumJ " << sumJ << endl;
oldJ = J;
J = sumJ / training.size(); // J(θ)
if (num == 1)
minJ = J;
// 不停迭代得到minJ(θ)
if (J < minJ) {
minJ = J;
finalTheta = theta;
}
// 当迭代次数超过500时跳出
if (num > 500)
break;
oldTheta = theta;
// 得到新的theta
theta = getNewTheta(oldTheta, training);
}
// 输出最终的theta
cout << "final_theta\t" << finalTheta.x << " " << finalTheta.y << " " << finalTheta.z << endl;
// 检验自测准确率
compareTraining(training, finalTheta);
// 检测测试集的预测结果
compareTesting(testing, finalTheta);
system("pause");
}