MR分析（主要记录一下，有多个分析时，一些循环的代码，以及分析前数据清洗准备工作）

onlooker_zsq

已于 2024-06-28 17:12:37 修改

阅读量842

点赞数 6

CC 4.0 BY-SA版权

文章标签： r语言

于 2024-04-24 14:47:37 首次发布

本文链接：https://blog.youkuaiyun.com/onlookerq/article/details/137714288

setwd('H:\\Puberty_SEM\\MR_GWAS')
library(TwoSampleMR)
library(dplyr)
files = c('ukb-a-25')
files[1]

这里用计算继续F值，但是由于要用到samplesize，所以先用if语句判断一下数据中是否含有samplesize值，如果是NA值，就跳过，这样就减少了观看每个文档的时间；这条命令也适用于其他的很多种场景中；所以简单记录一下；

for (i in seq_along(files)){
exposure_dat <- extract_instruments(outcomes = files[i])
if(any(is.na(exposure_dat$samplesize.exposure))){
  print(paste("列", "column_name", "包含NA值，跳过这个步骤。"))
}else{
exposure_dat = mutate(exposure_dat,R=get_r_from_bsen(exposure_dat$beta.exposure,
                                                     exposure_dat$se.exposure,
                                                     exposure_dat$samplesize.exposure))
exposure_dat = mutate(exposure_dat,F=(exposure_dat$samplesize.exposure-2)*((R*R)/(1-R*R)))
exposure_dat = exposure_dat %>% filter(F>10)
}
outcome_dat <- read_outcome_data(
  snps = exposure_dat$SNP,
  filename = "PSY_factor.txt",
  sep = "\t",
  snp_col = "rsID",
  beta_col = "BETA",
  se_col = "SE",
  effect_allele_col = "EA",
  other_allele_col = "NEA",
  pval_col = "P",
  samplesize_col = "N"
)
dat <- harmonise_data(
  exposure_dat = exposure_dat, 
  outcome_dat = outcome_dat
)
filename1 <- paste0("dat_",files[i], ".txt")#这里给输出文件命名一下
write.table(dat,file=filename1,row.names = F,sep = '\t',quote = F)
res <- mr(dat)
res = generate_odds_ratios(res)
filename2 <- paste0("res_",files[i], ".txt")
write.table(res,file=filename2,row.names = F,sep = '\t',quote = F)
het <- mr_heterogeneity(dat)
filename3 <- paste0("het_",files[i], ".txt")
write.table(res,file=filename3,row.names = F,sep = '\t',quote = F)
plt <- mr_pleiotropy_test(dat)
filename4 <- paste0("plt_",files[i], ".txt")
write.table(res,file=filename4,row.names = F,sep = '\t',quote = F)
#这里一样用了一个if循环判断一下
if(any(is.na(exposure_dat$samplesize.exposure))){
  print(paste("列", "column_name", "包含NA值，跳过这个步骤。"))
}else{
out <- directionality_test(dat)
filename5 <- paste0("out_",files[i], ".txt")
}
write.table(res,file=filename5,row.names = F,sep = '\t',quote = F)
}

##批量提取某GWAS数据中多个snp 1Mb范围内p值小于5e-8的所有snplibrary(data.table)

library(dplyr)
df = fread('age of menarche.txt',header = T)
files = c(##多个snp名，进行循环
)
files[1]
for (i in seq_along(files)){
  df1 = df[(df$RSID==files[i]),] #df 数据框中RSID列等于files的那一行
  a = df1$CHR  #df 数据框中RSID列等于files的那一行，的CHR
  a = as.numeric(a)
  b = df1$POS
  b = as.numeric(b)
  subset_df = subset(df,CHR==a & POS<(b+500000) & POS>(b-500000) & P<5e-8) #提取df数据框中CHR==a，POS 1Mb范围内P值小于5e-8的SNP
  filename1 <- paste0("voice_",files[i], ".txt")
  write.table(subset_df,file = filename1,row.names = F,sep = '\t',quote = F)
}

#RSID获取其chr和bp

library(dplyr)
library(data.table)
library(tidyr)
match = fread("snp150_hg19.txt",header=T,check.names=F,sep="\t")#用hg19作为reference 进行转换
files = c(#多个GWAS数据
)
files[1]
for (i in seq_along(files)){
tes = fread(files[i],header=T,check.names=F,sep="\t") 
colnames(tes)[1] = 'name'#因为snp150_hg19.txt数据中列名为name，所以转换以下
need=dplyr::left_join(tes,match,by="name")
need = na.omit(need)
need=temp%>%separate(`chromosome:start`,c('CHR','BP'),'[:]') 
colnames(need)=c('SNP','A1','A2','Freq','BETA','SE','P','N','CHR','B