#################################################
# 1.查看、设置当前文件夹/目录
#################################################
getwd()
setwd("D:/MyR_New")
getwd()
#################################################
# 2.读取数据
#################################################
# 请联网搜索读取Excel文件的R命令,读取给定的四六级成绩Excel文件,读取的数据赋值给cet201712
# 如果需要安装R包,请自行安装并装载
# 下面赋值语句请补充完毕
cet201712<-
length(cet201712)
dim(cet201712)
names(cet201712)
summary(cet201712)
head(cet201712)
tail(cet201712)
cet201712[1:10,"score"]
mean(cet201712$score)
View(cet201712)
View(table(cet201712$school))
#######################################################
# 3.简单的数据清洗
# http://blog.sina.com.cn/s/blog_13b7ba9b00102x42m.html
#######################################################
duplicated(cet201712$StuID)
cet201712<-unique(cet201712)
cet201712[is.na(cet201712)]<-0
cet201712<-na.omit(cet201712)
major=as.vector(cet201712$major)
class(major)
install.packages("raster")
library(raster)
Major<-trim(as.character(cet201712$major))
cet201712$major<-Major
# 较为复杂的数据清洗
# https://www.cnblogs.com/payton/p/4894747.html
dim(cet201712)
length(cet201712[which(cet201712$score==0),"score"])
install.packages("dplyr")
library(dplyr)
cet201712<-filter(cet201712,score!=0)
dim(cet201712)
#############################################################
# 4.数据处理dplyr包和plyr包
# https://blog.csdn.net/c1z2w3456789/article/details/50899197
# https://blog.csdn.net/G090909/article/details/50769701
#############################################################
install.packages("plyr")
library("plyr")
each(max,min,median,sd)(cet201712$score)
each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4"),"score"])
each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET6"),"score"])
Temp<-each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4"),"score"])
class(Temp)
Temp<-as.data.frame(Temp)
Temp
names(Temp)
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET6"),"score"]))
# 各学院大英四级成绩分布对比
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="求索荣誉学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="国际商学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="国际传媒学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="涉外法政学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="英语学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="日语学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="欧洲语言文化学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="亚非语学院"),"score"]))
View(each(max,min,median,mean,sd,quantile,IQR)(cet201712[which(cet201712$cettype=="CET4" & cet201712$school=="国际交流学院"),"score"]))
# 数据聚合
# https://blog.csdn.net/G090909/article/details/50769701
group_by(cet201712, school) %>% summarize_each(funs(mean),score)
View(group_by(cet201712, school) %>% summarize_each(funs(mean),score))
group_by(cet201712[which(cet201712$cettype=="CET4"),], grade) %>% summarize_each(funs(mean),score)
Temp4<-group_by(cet201712[which(cet201712$cettype=="CET4"),], grade) %>% summarize_each(funs(mean),score)
class(Temp4)
names(Temp4)<-c("年级","四级平均分")
View(Temp4)
Temp6<-group_by(cet201712[which(cet201712$cettype=="CET6"),], grade) %>% summarize_each(funs(mean),score)
Temp6
names(Temp6)<-c("年级","六级平均分")
View(Temp6)
table(cet201712[which(cet201712$cettype=="CET4"),]$grade)
table(cet201712[which(cet201712$cettype=="CET6"),]$grade)
table(cet201712[which(cet201712$cettype=="CET4"),]$lantype)
View(table(cet201712[which(cet201712$cettype=="CET4"),]$lantype))
View(table(cet201712[which(cet201712$cettype=="CET6"),]$lantype))
View(cet201712[which(cet201712$cettype=="CET6" & cet201712$grade==17),c("stuid","firstyear","grade","school","class","score")])
aggregate(cet201712[which(cet201712$cettype=="CET4"),"score"],list(cet201712[which(cet201712$cettype=="CET4"),]$grade),mean)
aggregate(cet201712[,"score"],list(cet201712$school),mean)
View(aggregate(cet201712[,"score"],list(cet201712$school),mean))
View(aggregate(cet201712[cet201712$cettype=="CET4","score"],list(cet201712[cet201712$cettype=="CET4","school"]),mean))
View(arrange(aggregate(cet201712[cet201712$cettype=="CET4","score"],list(cet201712[cet201712$cettype=="CET4","school"]),mean),desc(x)))
View(arrange(aggregate(cet201712[cet201712$cettype=="CET4","score"],list(cet201712[cet201712$cettype=="CET4","school"]),median),desc(x)))
Temp4<-arrange(aggregate(cet201712[cet201712$cettype=="CET4","score"],list(cet201712[cet201712$cettype=="CET4","school"]),mean),desc(x))
Temp6<-arrange(aggregate(cet201712[cet201712$cettype=="CET6","score"],list(cet201712[cet201712$cettype=="CET6","school"]),mean),desc(x))
Temp4
names(Temp4)<-c("学院","四级平均数")
names(Temp6)<-c("学院","六级平均数")
View(Temp4)
View(Temp6)
Temp4.lan<-arrange(aggregate(cet201712[cet201712$cettype=="CET4","score"],list(cet201712[cet201712$cettype=="CET4","lantype"]),mean),desc(x))
Temp6.lan<-arrange(aggregate(cet201712[cet201712$cettype=="CET6","score"],list(cet201712[cet201712$cettype=="CET6","lantype"]),mean),desc(x))
Temp4.lan
names(Temp4.lan)<-c("语种","四级平均数")
names(Temp6.lan)<-c("语种","六级平均数")
dim(Temp4.lan)
dim(Temp6.lan)
View(Temp4.lan)
View(Temp6.lan)
write.csv(Temp4.lan,"Data/Tjfsu_CET4_201712.SortByLanguage.csv")
write.csv(Temp6.lan,"Data/Tjfsu_CET6_201712.SortByLanguage.csv")
# 数据集合并---行列合并
# 3个基本函数:
# 合并列:merge、cbind
# 合并行:rbind
# http://blog.sina.com.cn/s/blog_72512a1d0102xq6b.html
# 更强大的数据框整形工具reshape
# merge函数合并列时需要指定数据集的共有变量
Temp<-merge(Temp4,Temp6,by="学院")
Temp
class(Temp)
# 直接增加一列
Temp[,"四六级平均数"]<-(Temp[,"四级平均数"]+Temp[,"六级平均数"])/2
Temp<-arrange(Temp,desc(四级平均数+六级平均数))
Temp
View(Temp)
Temp.lan<-merge(Temp4.lan,Temp6.lan,by="语种")
Temp.lan
class(Temp.lan)
# 直接增加一列
Temp.lan[,"四六级平均数"]<-(Temp.lan[,"四级平均数"]+Temp.lan[,"六级平均数"])/2
Temp.lan<-arrange(Temp.lan,desc(四级平均数+六级平均数))
dim(Temp.lan)
Temp.lan
View(Temp.lan)代码注释