R语言对于Machine learning的代码以及研究

最新推荐文章于 2020-08-12 19:16:31 发布

原创

最新推荐文章于 2020-08-12 19:16:31 发布 · 1k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#R #ML

本文详细探讨了R语言在机器学习领域的应用，包括数据预处理、模型训练、评估与调优等方面，通过实例代码展示如何使用R进行机器学习项目。读者将了解R中的各种机器学习库，如caret、randomForest等，并掌握如何整合这些工具解决实际问题。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#### packages
	install.packages("ggplot2")
	install.packages("ROCR")
	install.packages("glmnet")
	install.packages("Metrics")
	install.packages("DMwR")
	install.packages("Rcpp")


	library(ggplot2)
	library(ROCR)
	library(glmnet)
	library(Metrics)
	#### Input
	marketing<- read.csv("marketing.csv")

	head(marketing)
	summary(marketing)


	#### Data Visualization #############################

	### Average age for each occupation
	ggplot(marketing, aes(job, age)) +
	geom_bar(stat = "summary", fun.y = "mean", color = "black",fill= "grey", width = 0.5) +
	theme_bw() +
	labs( y = "Age",
	title = "Age Distribution")+
	theme(plot.title = element_text(hjust = 0.5),
	plot.subtitle = element_text(hjust = 0.5))

	ggplot(marketing, aes(job, age, fill = y)) +
	geom_bar(stat = "summary", fun.y = "mean", width = 0.5) +
	theme_bw() +
	labs( y = "Age",
	title = "Age Distribution")

	ggplot(marketing, aes(job, age, fill = y)) +
	geom_bar(stat = "summary", fun.y = "mean", width = 0.5) +
	facet_wrap( ~ marital)
	theme_bw() +
	labs( y = "Age",
	title = "Age Distribution")
	geom_density()


	####Data preparation##################################
	## Training and Testing
	data_y<- marketing[marketing$y == "yes",]
	data_n<- marketing[marketing$y == "no", ]

	set.seed(1234)
	ysub<- sample(nrow(data_y), floor(nrow(data_y)*0.7))
	nsub<- sample(nrow(data_n), floor(nrow(data_n)*0.7))

	train_yes<- data_y[ysub,]
	train_no<- data_n[nsub,]

	test_yes<- data_y[-ysub,]
	test_no<- data_n[-nsub,]

	train<- rbind(train_yes, train_no)
	train$y<- ifelse(train$y== "yes", 1, 0)
	test<- rbind(test_yes, test_no)
	test$y<- ifelse(test$y== "yes", 1, 0)

	nrow(marketing)- nrow(train)- nrow(test)
	print(prop.table(table(train$y)))

	#### Explore SMOTe
	library(DMwR)

	X<- nrow(train_no)
	Y<- nrow(train_yes)
	perc.over<- ((X-Y)*100/Y)
	perc.under<- X*100/(X-Y)

	train$y<- as.factor(train$y)
	train_bal <- SMOTE(y ~ . , train, perc.over=perc.over, perc.under = perc.under)

	print(prop.table(table(train_bal$y)))



	################## Model result function
	modelperf<- function(ypredict, ytrue, cutoff) {
	library(ROCR)
	##
	ypredict <- as.numeric(ypredict)
	ytrue<- as.numeric(as.character(ytrue))
	yresult<- ifelse(ypredict > cutoff, 1,0)
	accuracy <- 1 - mean(yresult != ytrue)

	ypredict<- as.numeric(ypredict)