setwd('H:\\Puberty_SEM\\MR_GWAS')
library(TwoSampleMR)
library(dplyr)
files = c('ukb-a-25')
files[1]
这里用计算继续F值,但是由于要用到samplesize,所以先用if语句判断一下数据中是否含有samplesize值,如果是NA值,就跳过,这样就减少了观看每个文档的时间;这条命令也适用于其他的很多种场景中;所以简单记录一下;
for (i in seq_along(files)){
exposure_dat <- extract_instruments(outcomes = files[i])
if(any(is.na(exposure_dat$samplesize.exposure))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
exposure_dat$se.exposure,
exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
}
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "PSY_factor.txt",
sep = "\t",
snp_col = "rsID",
beta_col = "BETA",
se_col = "SE",
effect_allele_col = "EA",
other_allele_col = "NEA",
pval_col = "P",
samplesize_col = "N"
)
dat <- harmonise_data(
exposure_dat = exposure_dat,
outcome_dat = outcome_dat
)
filename1 <- paste0("dat_",files[i], ".txt")#这里给输出文件命名一下
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste0("res_",files[i], ".txt")
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste0("het_",files[i], ".txt")
write.table(res,file=filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste0("plt_",files[i], ".txt")
write.table(res,file=filename4,row.names = F,sep = '\t',quote = F)
#这里一样用了一个if循环判断一下
if(any(is.na(exposure_dat$samplesize.exposure))){
print(paste("列", "column_name", "包含NA值,跳过这个步骤。"))
}else{
out <- directionality_test(dat)
filename5 <- paste0("out_",files[i], ".txt")
}
write.table(res,file=filename5,row.names = F,sep = '\t',quote = F)
}
##批量提取某GWAS数据中多个snp 1Mb范围内p值小于5e-8的所有snplibrary(data.table)
library(dplyr)
df = fread('age of menarche.txt',header = T)
files = c(##多个snp名,进行循环
)
files[1]
for (i in seq_along(files)){
df1 = df[(df$RSID==files[i]),] #df 数据框中RSID列等于files的那一行
a = df1$CHR #df 数据框中RSID列等于files的那一行,的CHR
a = as.numeric(a)
b = df1$POS
b = as.numeric(b)
subset_df = subset(df,CHR==a & POS<(b+500000) & POS>(b-500000) & P<5e-8) #提取df数据框中CHR==a,POS 1Mb范围内P值小于5e-8的SNP
filename1 <- paste0("voice_",files[i], ".txt")
write.table(subset_df,file = filename1,row.names = F,sep = '\t',quote = F)
}
#RSID获取其chr和bp
library(dplyr)
library(data.table)
library(tidyr)
match = fread("snp150_hg19.txt",header=T,check.names=F,sep="\t")#用hg19作为reference 进行转换
files = c(#多个GWAS数据
)
files[1]
for (i in seq_along(files)){
tes = fread(files[i],header=T,check.names=F,sep="\t")
colnames(tes)[1] = 'name'#因为snp150_hg19.txt数据中列名为name,所以转换以下
need=dplyr::left_join(tes,match,by="name")
need = na.omit(need)
need=temp%>%separate(`chromosome:start`,c('CHR','BP'),'[:]')
colnames(need)=c('SNP','A1','A2','Freq','BETA','SE','P','N','CHR','B