library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
xls_files <- list.files(pattern = "\\.xls$")
for (xls_file in xls_files) {
go_data <- read.delim(xls_file,
header = TRUE,
check.names = FALSE,
stringsAsFactors = FALSE)
clean_data <- go_data %>%
rename(
"Class" = "Class",
"GO_term" = "GO_Name",
"GO_ID" = "GO_ID",
"Pvalue" = "P_value",
"FDR" = "corrected p-value(BH method)"
) %>%
mutate(
Pvalue = as.numeric(Pvalue),
FDR = as.numeric(FDR),
EnrichmentScore = as.numeric(EnrichmentScore)
) %>%
filter(FDR < 0.05) %>%
mutate(Significance = ifelse(FDR < 0.01, "**", ifelse(FDR < 0.05, "*", "")))
if (nrow(clean_data) == 0) {
message(paste("No significant terms in", xls_file, "skipping..."))
next
}
all_significant <- clean_data %>%
group_by(Class) %>%
arrange(Pvalue) %>%
slice_head(n = 10) %>%
ungroup() %>%
mutate(
Term_label = str_wrap(paste0(GO_term, "\n(", GO_ID, ")"), width = 40)
)
p <- ggplot(all_significant,
aes(x = EnrichmentScore,
y = reorder(Term_label, Pvalue),
color = -log10(FDR),
size = HitsGenesCountsInSelectedSet)) +
geom_point() +
facet_grid(Class ~ ., scales = "free_y", space = "free") +
scale_color_gradient(low = "blue", high = "red") +
labs(
title = "GO Term Enrichment Analysis",
x = "Enrichment Score",
y = "GO Term",
color = "-log10(FDR)",
size = "Gene Count"
) +
theme_bw() +
theme(
strip.text.y = element_text(angle = 0),
axis.text.y = element_text(size = 9),
panel.spacing = unit(0.5, "lines")
)
output_name <- sub("\\.xls$", "", xls_file)
ggsave(
filename = paste0(output_name, ".png"),
plot = p,
width = 12,
height = 8,
dpi = 300
)
}