1
1
library(magrittr )
2
2
library(biomaRt )
3
+ library(R6 )
3
4
5
+ # Placeholder for maybe_create_directory
6
+ maybe_create_directory <- function (filepath ) {
7
+ if (! dir.exists(filepath )) {
8
+ dir.create(filepath , recursive = TRUE )
9
+ }
10
+ }
11
+
12
+ # Placeholder for log_message
13
+ log_message <- function (message ) {
14
+ cat(paste0(" [INFO] " , message , " \n " ))
15
+ }
4
16
5
17
# ' BioMartData R6 class
6
18
# '
@@ -30,7 +42,7 @@ library(biomaRt)
30
42
# ' biomart$get_data(chromosomes = c("1", "2"), filepath = ".")
31
43
# '
32
44
# ' @export
33
- BioMartData <- R6 :: R6Class(
45
+ BioMartData <- R6Class(
34
46
" BioMartData" ,
35
47
public = list (
36
48
ensembl = NULL ,
@@ -48,30 +60,28 @@ BioMartData <- R6::R6Class(
48
60
self $ chromosomes <- getBM(attributes = ' chromosome_name' , mart = self $ ensembl )[, 1 ]
49
61
},
50
62
51
-
52
63
# ' @description Fetch gene data from Ensembl BioMart.
53
64
# ' @param chromosomes Character vector (default NULL). The chromosomes for which data will be fetched.
54
65
# ' @param filepath Character string (default PROTEOMES_PATH). Path where the fetched data will be stored.
55
66
get_data = function (chromosomes = NULL , filepath = PROTEOMES_PATH ) {
56
-
57
67
maybe_create_directory(filepath )
58
-
68
+
59
69
if (! is.null(chromosomes )) {
60
70
chromosomes <- intersect(chromosomes , self $ chromosomes )
61
71
} else {
62
72
chromosomes <- self $ chromosomes
63
73
}
64
-
74
+
65
75
log_message(" Fetching data for chromosomes..." )
66
76
data_list <- list ()
67
77
file_list <- list () # Create a list to hold the filenames
68
-
78
+
69
79
for (chrom in chromosomes ) {
70
80
log_message(paste(" Fetching data for chromosome" , chrom ))
71
81
tryCatch({
72
82
annotLookup <- getBM(
73
83
filters = c(" chromosome_name" ),
74
- values = list (chromosome ),
84
+ values = list (chrom ),
75
85
mart = self $ ensembl ,
76
86
attributes = c(
77
87
' external_gene_name' ,
@@ -83,27 +93,27 @@ BioMartData <- R6::R6Class(
83
93
),
84
94
uniqueRows = TRUE
85
95
)
86
-
96
+
87
97
annotLookup <- subset(annotLookup , uniprot_gn_id != ' ' )
88
98
filename <- file.path(filepath , paste0(" annotLookup_" , self $ dataset , " _" , chrom , " .csv" ))
89
99
write.csv(annotLookup , filename , row.names = FALSE )
90
100
data_list [[chrom ]] <- annotLookup
91
101
file_list <- c(file_list , filename ) # Add the filename to the list
92
102
},
93
103
error = function (e ) {
94
- log_message(paste(" Failed to fetch data for chromosome" , chromosome , " due to error:" , e ))
104
+ log_message(paste(" Failed to fetch data for chromosome" , chrom , " due to error:" , e ))
95
105
})
96
106
}
97
-
107
+
98
108
log_message(" Data fetching completed." )
99
-
109
+
100
110
self $ combined_data <- do.call(rbind , data_list )
101
111
self $ combined_data <- self $ combined_data [! (self $ combined_data $ peptide == " Sequence unavailable" ), ]
102
-
112
+
103
113
self $ combined_data <- self $ combined_data [, c(" external_gene_name" , " uniprot_gn_id" , " chromosome_name" , " start_position" , " end_position" , " peptide" )]
104
114
105
115
colnames(self $ combined_data ) <- c(" GeneName" , " SwissID" , " chromosome" , " start" , " end" , " seq" )
106
-
116
+
107
117
combined_filename <- file.path(filepath , paste0(self $ dataset , " _combined.csv" ))
108
118
file_list <- c(file_list , combined_filename )
109
119
write.csv(self $ combined_data , combined_filename , row.names = FALSE )
@@ -112,18 +122,17 @@ BioMartData <- R6::R6Class(
112
122
quoted_values <- paste0(' "' , column , ' "' )
113
123
return (quoted_values )
114
124
})
115
-
125
+
116
126
quoted_filename <- file.path(filepath , paste0(self $ dataset , " .csv" ))
117
127
write.csv(self $ combined_data , quoted_filename , row.names = FALSE , quote = FALSE )
118
128
119
129
# Remove the temporary files
120
130
log_message(paste(" Removing temporary files:" , file_list ))
121
-
131
+
122
132
full_file_paths <- normalizePath(file.path(file_list ), mustWork = FALSE )
123
133
file.remove(full_file_paths )
124
134
125
135
log_message(" Downloading proteome completed." )
126
-
127
136
}
128
137
)
129
138
)
0 commit comments