通过训练集预测大学申请结果

最新推荐文章于 2024-11-10 16:01:31 发布

awenst

最新推荐文章于 2024-11-10 16:01:31 发布

阅读量331

点赞数

CC 4.0 BY-SA版权

分类专栏：医学影像大数据分析

本文链接：https://blog.youkuaiyun.com/awenst/article/details/62047282

医学影像大数据分析专栏收录该内容

2 篇文章

订阅专栏

本文介绍了一个使用梯度下降法优化参数θ的逻辑回归模型，该模型用于预测大学生的入学申请结果。通过对训练集进行多次迭代，找到使成本函数最小化的θ值，并以此模型对测试集进行预测。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

通过读取训练集建模后预测大学生入学申请的申请结果。

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#define alpha 0.01

using namespace cv;
using namespace std;

// 读取训练集
vector<Point3d> getTraining(string trainingPath) {
	vector<Point3d> result;
	ifstream fileIn(trainingPath, ios_base::in);
	if (!fileIn.is_open()) {
		cout << "读取文件失败" << endl;
	}
	else {
		for (int i = 0; i < 100; i++) {
			char data[100];
			fileIn.getline(data, 100);
			double num[3];
			num[0] = atof(strtok(data, ","));
			for (int i = 1; i <= 2; i++) {
				num[i] = atof(strtok(NULL, ","));
			}
			// 为避免超出精度范围，故对训练集的数据进行缩小
			Point3d p(num[0] / 10, num[1] / 10, num[2]);
			result.push_back(p);
		}
	}
	fileIn.close();
	return result;
}

// 读取测试集
vector<Point3d> getTesting(string testingPath) {
	vector<Point3d> result;
	ifstream fileIn(testingPath, ios_base::in);
	if (!fileIn.is_open()) {
		cout << "读取文件失败" << endl;
	}
	else {
		for (int i = 0; i < 100; i++) {
			char data[100];
			fileIn.getline(data, 100);
			double num[2];
			//cout << "11" << endl;
			num[0] = atof(strtok(data, "\t"));
			num[1] = atof(strtok(NULL, "\t"));
			num[1] = atof(strtok(NULL, "\t"));
			//cout << num[0] << "\t" << num[1] << endl;
			Point3d p(num[0], num[1], -1);
			result.push_back(p);
		}
	}
	fileIn.close();
	return result;
}

// 获取θ转置与X的乘积
double getProduct(Point3d sample, Point3d theta) {
	double product = theta.x + theta.y * sample.x + theta.z * sample.y;
	return product;
}

// sigmoid/logistic函数
double sigmoid(double z) {
	double sig = exp(z) / (1 + exp(z));
	return sig;
}

// 成本函数
double costFunction(Point3d sample, Point3d theta) {
	double h = sigmoid(getProduct(sample, theta));
	if (sample.z == 0)
		return (-1) * log10(1 - h);
	return (-1) * log10(h);
}

// 由梯度下降算法获得新的θ
Point3d getNewTheta(Point3d oldTheta, Vector<Point3d> training) {
	Point3d newTheta;
	double sumX = 0, sumY = 0, sumZ = 0;
	for (int i = 0; i < training.size(); i++) {
		double h = sigmoid(getProduct(training[i], oldTheta));
		sumX += (h - training[i].z) * alpha * training[i].x;
		sumY += (h - training[i].z) * training[i].x * alpha;
		sumZ += (h - training[i].z) * training[i].y * alpha;
	}
	newTheta.x = oldTheta.x - sumX;
	newTheta.y = oldTheta.y - sumY;
	newTheta.z = oldTheta.z - sumZ;
	return newTheta;
}

// 对训练集的样本进行分类，检验分类模型的自测准确率
void compareTraining(vector<Point3d> training, Point3d finalTheta) {
	int y, count = 0;
	for (int i = 0; i < training.size(); i++) {
		y = 0;
		if (sigmoid(getProduct(training[i], finalTheta)) >= 0.5)
			y = 1;
		if (y == training[i].z)
			count++;
	}
	cout << "自测准确率为 " << (double)count / training.size() << endl;
}

// 对测试集的样本进行分类，预测每个申请者的申请结果
void compareTesting(vector<Point3d> testing, Point3d finalTheta) {
	Point3d temp;
	int y, resultTrue = 0, resultFalse = 0;
	cout << endl << "Num\t\tGrade1\t\tGrade2\t\tResult" << endl;
	for (int i = 0; i < testing.size(); i++) {
		y = 0;
		temp.x = testing[i].x / 10;
		temp.y = testing[i].y / 10;
		if (sigmoid(getProduct(temp, finalTheta)) >= 0.5) {
			y = 1;
			resultTrue++;
		}
		else {
			resultFalse++;
		}
		cout << "testing " << i + 1 << "\t" << testing[i].x << "\t\t" << testing[i].y << "\t\t" << y << endl;
	}
	cout << endl << resultTrue << " students can enter the university" << endl
		<< resultFalse << " students can't enter the university" << endl;
}

int main() {
	string trainingPath = "C:\\Users\\Administrator\\Desktop\\LR_TrainingSet.txt";
	string testingPath = "C:\\Users\\Administrator\\Desktop\\LR_TestingSet.txt";

	// 读入训练集(100个样本
	vector<Point3d> training = getTraining(trainingPath);
	vector<Point3d> testing = getTesting(testingPath);

	// 建立模型
	Point3d theta(3, 11, 9);
	cout << "alpha = " << alpha << endl;
	cout << "initial_theta\t" << theta.x << "\t" << theta.y << "\t" << theta.z << endl;
	Point3d oldTheta, finalTheta;
	double sumJ, oldJ, J = 0, minJ = 99999, num = 0;
	while (1) {
		//cout << "第" << num++ << "次循环" << endl;
		num++;
		sumJ = 0;
		for (int i = 0; i < training.size(); i++) {
			sumJ += costFunction(training[i], theta); // J(θ)之和
		}
		//system("pause");
		//cout << "sumJ " << sumJ << endl;
		oldJ = J;
		J = sumJ / training.size(); // J(θ)
		if (num == 1)
			minJ = J;
		// 不停迭代得到minJ(θ)
		if (J < minJ) {
			minJ = J;
			finalTheta = theta;
		}

		// 当迭代次数超过500时跳出
		if (num > 500)
			break;

		oldTheta = theta;
		// 得到新的theta
		theta = getNewTheta(oldTheta, training);
	}

	// 输出最终的theta
	cout << "final_theta\t" << finalTheta.x << "  " << finalTheta.y << "  " << finalTheta.z << endl;

	// 检验自测准确率
	compareTraining(training, finalTheta);
	// 检测测试集的预测结果
	compareTesting(testing, finalTheta);
	system("pause");
}