-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgo_functions.R
154 lines (125 loc) · 4.34 KB
/
go_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Objects needed for GO analysis
TERM2NAME <- readRDS("TERM2NAME.rds")
#TERM2GENE <- readRDS("TERM2GENE.rds")
TERM2GENE <- readRDS("TERM2GENE_Fattel_2024.rds")
# Gather all Terms for the GO_search function
TERM2NAME_ALL <- do.call("rbind", TERM2NAME)
# Function to perform ego, provide as argument a vector containing geneID (characters type)
ego_analysis <- function(vector_geneID){
require(tidyverse)
require(clusterProfiler)
require(DOSE)
require(enrichplot)
require(xlsx)
require(readxl)
require(AnnotationDbi)
require(GO.db)
# Keep in each data frames only the defined ontology (BP, CC, or MF)
TERM2GENE_BP <- TERM2GENE %>% dplyr::filter(GO %in% TERM2NAME$BP$go_id)
TERM2GENE_CC <- TERM2GENE %>% dplyr::filter(GO %in% TERM2NAME$CC$go_id)
TERM2GENE_MF <- TERM2GENE %>% dplyr::filter(GO %in% TERM2NAME$MF$go_id)
# Check if all objects are loaded
if(!exists("TERM2GENE")){stop("TERM2GENE object not loaded")}
if(!exists("TERM2NAME")){stop("TERM2NAME object not loaded")}
# Perform enrichment analysis on each ontology separately
ego_BP <-enricher(
gene=vector_geneID,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff =0.2,
TERM2GENE=TERM2GENE_BP,
TERM2NAME = TERM2NAME$BP
)
ego_CC <-enricher(
gene=vector_geneID,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff =0.2,
TERM2GENE=TERM2GENE_CC,
TERM2NAME = TERM2NAME$CC
)
ego_MF <-enricher(
gene=vector_geneID,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff =0.2,
TERM2GENE=TERM2GENE_MF,
TERM2NAME = TERM2NAME$MF
)
# Create a list of ego results
ego_list <- list(ego_BP = ego_BP, ego_CC = ego_CC, ego_MF = ego_MF)
# Create variable Enrichment factor (Count / number of gene in the given GO)
ego_list2 <- lapply(ego_list, function(x) mutate(x, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio))))
# Create variable Fold enrichment (GeneRatio / BgRatio)
ego_final <- lapply(ego_list2, function(x) mutate(x, FoldEnrich = parse_ratio(GeneRatio) / parse_ratio(BgRatio)))
# Return final list of enrichResult class objects
return(ego_final)
}
# Additional function to explore clusterProfile enrich output
go_search <- function(method=c("gene2GO","GO2gene","GO2term","term2GO","GO2ontology"),id){
if(method=="gene2GO"){
GO <- TERM2GENE %>% dplyr::filter(gene==id) %>% pull(GO)
df <- TERM2NAME_ALL %>% dplyr::filter(go_id %in% GO)
return(df)
}
if(method=="GO2gene"){
df <- TERM2GENE %>% dplyr::filter(GO==id)
return(df)
}
if(method=="GO2term"){
df <-TERM2NAME_ALL %>% dplyr::filter(go_id==id)
return(df)
}
if(method=="term2GO"){
df <- TERM2NAME_ALL %>% dplyr::filter(Term==id)
return(df)
}
if(method=="GO2ontology"){
if(sum(TERM2NAME$BP$go_id==id)==1){
message("BP") }
else if (sum(TERM2NAME$CC$go_id==id)==1){
message("CC") }
else if (sum(TERM2NAME$MF$go_id==id)==1){
message("MF") } else {
message("GO ID not found in any ontology")
}
}
}
# Testing examples for go_search function
#go_search(method="gene2GO", "Zm00001eb248920")
#go_search(method="GO2gene", "GO:0000011")
#go_search(method="GO2term", "GO:0000062")
#go_search(method="term2GO", "reproduction")
#go_search(method="GO2ontology", "GO:0000062")
# Function to convert list of enrichResult objects into one dataframe
enrichResult2dataframe <- function(output_ego_analysis){
if(!is.null(output_ego_analysis$ego_BP)){
df_BP <- as.data.frame(output_ego_analysis$ego_BP@result)
df_BP$ontology <- "BP"
}
if(!is.null(output_ego_analysis$ego_CC)){
df_CC <- as.data.frame(output_ego_analysis$ego_CC@result)
df_CC$ontology <- "CC"
}
if(!is.null(output_ego_analysis$ego_MF)){
df_MF <- as.data.frame(output_ego_analysis$ego_MF@result)
df_MF$ontology <- "MF"
}
# Bind all. Use get0 so that there is no error when binding non-existing
# data frames
df_ego <- rbind(get0("df_BP"), get0("df_CC"), get0("df_MF"))
# Check if df_ego contains anything
if(!is.null(df_ego)){
# Put ontology in first column
df_ego <- df_ego %>% dplyr::select(ontology, everything())
return(df_ego)
} else {
message("No ontology contained a significant hit.")
}
}