# 加载必要的库
library(readxl)
library(tidyverse)
library(Rtsne)
setwd("D:/更年期项目/基于UR、VA样本分组看表型差异/3个分组/t-SNE图")
# 导入表格
merged_df <- read.csv("filtered_data_GUT.csv")
site <- unique(merged_df$site)
# 过滤掉存在率较低的种类和总丰度为零的样本
# merged_df[merged_df < 0.000001] <- 0
# presence_count <- colSums(merged_df[, -c(1:8)] > 0)
# presence_rate <- presence_count / nrow(merged_df)
# merged_df <- merged_df[, c(TRUE, TRUE,TRUE, TRUE, TRUE, presence_rate >= 0.01), drop = FALSE]
# merged_df <- merged_df[rowSums(merged_df[,-c(1:5)]) != 0, ]
# 筛选出good和Bad组
merged_df <- merged_df %>%
filter(group_2 %in% c("good","bad"))
merged_df <- merged_df %>%
select(-c(group_3,group_1)) %>%
rename(group = group_2)
# 获取所有时间点
time_points <- unique(merged_df$time)
# 自定义调色板
palette <- c("good" = "#9370DB", "bad" = "#FFD966")
# 循环绘制每个时间点的 t-SNE 图
for (time in time_points) {
# 过滤出当前时间点的数据
time_data <- merged_df %>%
filter(time == !!time)
# 计算当前样本数量
current_num_samples <- nrow(time_data)
# 动态设置 perplexity
perplexity_value <- min(30, max(1, current_num_samples / 3))
# 运行 t-SNE
tsne_result <- Rtsne(time_data[,-c(1:8)], dims = 2, check_duplicates = FALSE, perplexity = perplexity_value, verbose = TRUE, max_iter = 500)
# 匹配分组标签并创建 t-SNE 数据框
group_labels <- time_data$group
tsne_data <- data.frame(tsne_result$Y, Group = group_labels)
colnames(tsne_data) <- c("Dim1", "Dim2", "Group")
# 提取两个分组的数据
group1_data <- tsne_data[tsne_data$Group == "good", ]
group2_data <- tsne_data[tsne_data$Group == "bad", ]
# 对两个维度分别进行 t 检验
t_test_dim1 <- t.test(group1_data$Dim1, group2_data$Dim1)
t_test_dim2 <- t.test(group1_data$Dim2, group2_data$Dim2)
# 获取 p 值并计算综合 p 值
p_values_t <- c(t_test_dim1$p.value, t_test_dim2$p.value)
fisher_stat <- -2 * sum(log(p_values_t))
df <- 2 * length(p_values_t)
p_value_combined_t <- 1 - pchisq(fisher_stat, df)
# 绘制 t-SNE 图
p <- ggplot(tsne_data, aes(x = Dim1, y = Dim2, color = Group)) +
geom_point(alpha = 0.6, size = 3) +
scale_color_manual(values = palette) +
labs(title = paste("t-SNE Plot for", site, "Time:", time),
x = "Dimension 1",
y = "Dimension 2") +
theme_minimal() +
stat_ellipse(data = group1_data, aes(color = Group), alpha = 0.5, level = 0.95) +
stat_ellipse(data = group2_data, aes(color = Group), alpha = 0.5, level = 0.95) +
annotate("text",
x = mean(tsne_data$Dim1),
y = min(tsne_data$Dim2) - 2,
label = paste("t-test (combined) p:", sprintf("%.4e", p_value_combined_t)),
hjust = 0.5,
vjust = 1,
size = 3,
color = "black")
# 显示图形
print(p)
print(p_value_combined_t)
# 保存图形
ggsave(filename = paste0("tsne_plot_site ",site, time, ".pdf"), plot = p, width = 5, height = 4)
}
最新发布