rds转换tsv_将rds数据转为tsv-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_71847005/article/details/144253158


rm(list=ls())

setwd("F:/412种菌群数据")

GM_id_table = read.table("412GMname.txt",header =T,sep = "\t")

GM_ID = as.vector(GM_id_table $id)

#如果从csv文件读取id
# MR_result_data = fread("idname.csv")
# result_ID = as.vector(MR_result_data$id)

GM_ID = GM_ID [1:412]

GM_ID

setwd("F:/412种菌群数据/412kind_of_GM_by_in_xianyu")

library(data.table)

for (i in GM_ID){
  setwd("F:/412种菌群数据/412kind_of_GM_by_in_xianyu") #读取文件的位置
  orgin<-readRDS(paste0(i,".rds"))
  setwd("F:/412种菌群数据/412kind_of_GM_tsv") #保存文件的位置
  change<-fwrite(orgin,file = paste0(i,".tsv.gz"),quote = F,row.names = F)
}

vcf转换csv


rm(list=ls())

library(VariantAnnotation)
library(gwasvcf)
library(ieugwasr)
library(gwasglue)

setwd("F:/孟德尔测试")

#读取要转换的文件(下载下来直接读进来)
spesis<-readVcf("ieu-b-4980.vcf")

#开始转换，格式化为MR标准文件
spesis_data <- gwasvcf_to_TwoSampleMR(spesis)

#保存为csv格式
write.csv(spesis_data,"spesis_data.csv",row.names = FALSE)

#读入并查看
data=fread("spesis_data.csv")
View(data)

秃头法



setwd("F:/孟德尔测试/spesis_data")

library(ieugwasr) 
library(data.table) 
library(vroom)

spesis1 <- vroom(file.choose())#这个文件先用一个莫名的软件处理过

#把p值转为数值型变量（"pval"是表格对应p值的列名）
spesis1$pval <- as.numeric(spesis1$pval)
#提取自己P阈值的SNP用于后续clump
spesis2 <- spesis1[which(spesis1$pval < 5e-8),] 
spesis3 <- spesis1[which(spesis1$pval < 1e-7),] 
spesis4 <- spesis1[which(spesis1$pval < 1e-6),] 

#填入自己SNP名称对应MarkerName和p值的表头这两列
spesis4_iv <- spesis4[,c("SNP", "pval")] 
#修改列名，列名必须为rsid和pval
colnames(spesis4_iv) <- c("rsid", "pval" ) 
#去除连锁不平衡
a <- ld_clump_local( dat = spesis4_iv, clump_kb = 10000, clump_r2 = 0.001, clump_p = 1, 
                     bfile = "F:/孟德尔测试/spesis_data/LD/data_maf0.01_rs_ref/data_maf0.01_rs_ref",#修改为所在地址 
                     plink_bin = "F:/孟德尔测试/spesis_data/LD/plink_win64_20231018/plink.exe")#修改为所在地址 

#把去除连锁不平衡后的snp的其他列加回去
#by.x = "rsid"：指定第一个数据框a中用于合并的列名为rsid。
#by.y = "SNP"：指定第二个数据框spesis4中用于合并的列名为SNP。
#all.x = TRUE：这个参数告诉merge函数保留第一个数据框a中的所有行，即使在第二个数据框spesis4中没有匹配的行。
#如果all.x设置为FALSE（默认值），那么只有在两个数据框中都有匹配的行才会被保留
spesis_clump_data <- merge(a, spesis4, by.x = "rsid", by.y = "SNP", all.x = TRUE)


write.table(spesis_clump_data,"spesisclump.txt",sep = "\t",quote = F,row.names = F)
#write.csv(spesis_clump_data, file="spesis_clump_data.csv", row.names=F)






#设置工作位置
setwd("E:\\桌面DESK\\MR示范") 
#读出表头，方便下一步从表头复制列名
head(exp_clump_data)
#读入暴露和结局snp数据（以下分别以snp和txt格式演示）
exp_dat <- read_exposure_data(
  filename = 'BCexpclump.txt',
  sep = "\t",#txt格式与csv格式区别在这里
  snp_col = "rsid",
  beta_col = "beta",
  se_col = "se",
  effect_allele_col = "effect allele",
  other_allele_col = "other allele",
  eaf_col = "eaf",
  pval_col = "pval.x",samplesize_col = "samplesize")
#在读取数据后，这行代码在exp_dat数据框中创建或修改了一个名为exposure的新列，并将其所有值设置为字符串"SLE"。
#这可能是为了标记暴露数据集是与系统性红斑狼疮（Systemic Lupus Erythematosus, SLE）相关的。
exp_dat$exposure <- "SLE"

outcome_dat<-read_outcome_data(snps = exp_dat$SNP,
                               filename = "convert_ieu-a-1126.vcf.gz.csv",
                               sep = ",",
                               snp_col = "SNP",
                               beta_col = "beta",
                               se_col = "se",
                               effect_allele_col = "effect allele",
                               other_allele_col = "other allele",
                               eaf_col = "eaf",
                               pval_col = "pval")


#暴露与结局取交集
dat<-harmonise_data(exposure_dat = exp_dat,outcome_dat = outcome_dat)
#剔除SNP
dat2 <- subset(dat,SNP !="rs6679677")
#计算P值
res<- mr(dat2)
res
OR<-generate_odds_ratios(res)
OR
#保存dat和结果数据
write.csv(dat , file="dat.csv")
write.csv(OR , file="OR.csv")
#MRPRESSO检验 
library(MRPRESSO)
mr_presso(BetaOutcome ="beta.outcome", BetaExposure = "beta.exposure", SdOutcome ="se.outcome", SdExposure = "se.exposure", 
          OUTLIERtest = TRUE,DISTORTIONtest = TRUE, data = dat, NbDistribution = 1000,  
          SignifThreshold = 0.05)
#异质性检验
mr_heterogeneity(dat)
#多效性检验
mr_pleiotropy_test(dat)
#绘图
#散点图
mr_scatter_plot(res,dat)
#漏斗图
mr_funnel_plot(singlesnp_results = mr_singlesnp(dat))
#留一法
mr_leaveoneout_plot(leaveoneout_results = mr_leaveoneout(dat))
#森林图
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)