###=====dataset ======
#读取csv文件
dataset <- read.csv(".csv",header = T,row.names = 1)
#将因变量变为因子型,以便于分类
dataset$y<-as.factor(dataset$y)
summary(dataset$y)
#=== down-sampling ====
#提取y=1的样本
subset1 <- dataset[dataset$y == 1,] #controls
#提取y=2的样本
row.name <- rownames(dataset[dataset$y == 2,])
#将两个subset的样本数保持一致
resample <- sample(row.name, nrow(subset1), replace = T) #resampling
subset2 <- dataset[resample,] #cases
#合并两个样本
dataset<- rbind(subset1,subset2)
#新的数据中,两种类型的样本量均衡
dim(dataset)