|
| 1 | +#!/usr/bin/env Rscript |
| 2 | + |
| 3 | +exchangeable_loom_version <- '3.0.0alpha' |
| 4 | + |
| 5 | +isExchangeableLoom <- function(h5f) { |
| 6 | + attrs <- h5readAttributes(h5f, '/') |
| 7 | + version <- attrs[['LOOM_SPEC_VERSION']] |
| 8 | + return(!is.null(version) && version == exchangeable_loom_version) |
| 9 | +} |
| 10 | + |
| 11 | +readDimNames <- function(h5f) { |
| 12 | + cell_attr <- h5readAttributes(h5f, '/col_attrs')[['CellID']] |
| 13 | + gene_attr <- h5readAttributes(h5f, '/row_attrs')[['Gene']] |
| 14 | + source <- h5readAttributes(h5f, '/')[['created_from']] |
| 15 | + if (!is.null(source) && source == 'anndata') { |
| 16 | + if (is.null(cell_attr)) |
| 17 | + cell_attr <- 'obs_names' |
| 18 | + if (is.null(gene_attr)) |
| 19 | + gene_attr <- 'var_names' |
| 20 | + } |
| 21 | + return(list(col=cell_attr, row=gene_attr)) |
| 22 | +} |
| 23 | + |
| 24 | +readManifest <- function(h5f) { |
| 25 | + tryCatch({ |
| 26 | + manifest <- data.frame(t(h5read(h5f, '/global/manifest')), stringsAsFactors=FALSE) |
| 27 | + colnames(manifest) <- c('loom_path', 'dtype', 'anndata_path', 'sce_path') |
| 28 | + return(manifest) |
| 29 | + }, |
| 30 | + error=function(e) { |
| 31 | + manifest <- NULL |
| 32 | + return(manifest) |
| 33 | + }) |
| 34 | +} |
| 35 | + |
| 36 | +nestedEnvAsList <- function(x) { |
| 37 | + out <- as.list(x) |
| 38 | + lapply(out, function(e) if (is.environment(e)) nestedEnvAsList(e) else e) |
| 39 | +} |
| 40 | + |
| 41 | +nestedEnv <- function(root_env, paths, v) { |
| 42 | + n <- length(paths) |
| 43 | + var <- paths[1] |
| 44 | + if (n == 1) { |
| 45 | + root_env[[var]] <- v |
| 46 | + } else { |
| 47 | + if (is.null(root_env[[var]])) |
| 48 | + root_env[[var]] <- new.env(parent=emptyenv()) |
| 49 | + nestedEnv(root_env[[var]], paths[2:n], v) |
| 50 | + } |
| 51 | + invisible() |
| 52 | +} |
| 53 | + |
| 54 | +flattenNestedListToEnv <- function(x, e, prefix=NULL) { |
| 55 | + entry_names <- names(x) |
| 56 | + if (is.null(entry_names)) entry_names <- seq(length(x)) |
| 57 | + sapply(entry_names, function(name) { |
| 58 | + if (is.null(prefix)) { |
| 59 | + full_name <- name |
| 60 | + } else { |
| 61 | + full_name <- paste(prefix, name, sep='__') |
| 62 | + } |
| 63 | + if (is.list(x[[name]])) { |
| 64 | + flattenNestedListToEnv(x[[name]], e, full_name) |
| 65 | + } else { |
| 66 | + e[[full_name]] <- x[[name]] |
| 67 | + } |
| 68 | + }) |
| 69 | + invisible() |
| 70 | +} |
| 71 | + |
| 72 | +makeManifest <- function(entries, dtype='array', loom_prefix='/global/', anndata_prefix='/uns/', |
| 73 | + sce_prefix='@metadata$') { |
| 74 | + n <- length(entries) |
| 75 | + if (is.list(entries)) { |
| 76 | + entry_names <- names(entries) |
| 77 | + is_scalar <- sapply(entries, function(x) is.vector(x) && length(x) == 1) |
| 78 | + dtypes <- ifelse(is_scalar, 'scalar', 'array') |
| 79 | + } else { |
| 80 | + entry_names <- entries |
| 81 | + dtypes <- rep(dtype, n) |
| 82 | + } |
| 83 | + loom_paths <- paste0(loom_prefix, entry_names) |
| 84 | + if (endsWith(loom_prefix, '[')) |
| 85 | + loom_paths <- paste0(loom_paths, ']') |
| 86 | + if (is.null(anndata_prefix)) { |
| 87 | + anndata_paths <- rep('', n) |
| 88 | + } else { |
| 89 | + if (anndata_prefix == '/obsm/X_') { |
| 90 | + ad_names <- tolower(entry_names) |
| 91 | + } else { |
| 92 | + ad_names <- gsub('__', '/', entry_names) |
| 93 | + } |
| 94 | + anndata_paths <- paste0(anndata_prefix, ad_names) |
| 95 | + } |
| 96 | + if (startsWith(sce_prefix, '@metadata$')) |
| 97 | + entry_names <- gsub('__', '$', entry_names) |
| 98 | + sce_paths <- paste0(sce_prefix, entry_names) |
| 99 | + return(data.frame(loom_path=loom_paths, dtype=dtypes, anndata_path=anndata_paths, |
| 100 | + sce_path=sce_paths, stringsAsFactors=FALSE)) |
| 101 | +} |
| 102 | + |
| 103 | +readExchangeableLoom <- function(filename, backed=TRUE) { |
| 104 | + stopifnot(file.exists(filename), H5Fis_hdf5(filename)) |
| 105 | + h5f <- H5Fopen(filename, flag='H5F_ACC_RDONLY') |
| 106 | + if (!isExchangeableLoom(h5f)) { |
| 107 | + H5Fclose(h5f) |
| 108 | + return(LoomExperiment::import(filename, type='SingleCellLoomExperiment')) |
| 109 | + } |
| 110 | + dim_names <- readDimNames(h5f) |
| 111 | + manifest <- readManifest(h5f) |
| 112 | + h5closeAll() |
| 113 | + |
| 114 | + suppressWarnings(scle <- LoomExperiment::import( |
| 115 | + filename, |
| 116 | + type='SingleCellLoomExperiment', |
| 117 | + rownames_attr=dim_names$row, |
| 118 | + colnames_attr=dim_names$col)) |
| 119 | + if (!backed) { |
| 120 | + for (i in seq_along(assays(scle))) { |
| 121 | + assays(scle)[[i]] <- as(assays(scle)[[i]], 'dgCMatrix') |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + h5f <- H5Fopen(filename, flag='H5F_ACC_RDONLY') |
| 126 | + |
| 127 | + # Add appropriate assay name |
| 128 | + mx_attrs <- h5readAttributes(h5f, '/matrix') |
| 129 | + if ('assay' %in% names(mx_attrs)) { |
| 130 | + names(assays(scle))[1] <- mx_attrs['assay'] |
| 131 | + } else { |
| 132 | + names(assays(scle))[1] <- 'counts' |
| 133 | + } |
| 134 | + |
| 135 | + if (!is.null(manifest)) { |
| 136 | + |
| 137 | + # Graphs are already handled by import(), just record entries |
| 138 | + is_graph <- (startsWith(manifest$loom_path, '/col_graphs/') | |
| 139 | + startsWith(manifest$loom_path, '/row_graphs/')) |
| 140 | + |
| 141 | + # Handle reducedDims |
| 142 | + is_rd <- startsWith(manifest$loom_path, '/global/reducedDims__') |
| 143 | + rd_paths <- manifest$loom_path[is_rd] |
| 144 | + names(rd_paths) <- sub('^@reducedDims@listData[$]', '', manifest$sce_path[is_rd]) |
| 145 | + reducedDims(scle) <- SimpleList(lapply(rd_paths, function(path) { |
| 146 | + mat <- t(h5read(h5f, path)) |
| 147 | + rownames(mat) <- colnames(scle) |
| 148 | + mat |
| 149 | + })) |
| 150 | + |
| 151 | + # Handle global attributes |
| 152 | + is_attr <- startsWith(manifest$loom_path, '/.attrs[') |
| 153 | + src_paths <- manifest$loom_path[is_attr] |
| 154 | + tgt_paths <- manifest$sce_path[is_attr] |
| 155 | + attr_names <- substr(src_paths, 9, nchar(src_paths)-1) |
| 156 | + global_attrs <- h5readAttributes(h5f, '/') |
| 157 | + mtdt <- new.env(parent=emptyenv(), size=length(tgt_paths)) |
| 158 | + for (i in seq_along(attr_names)) { |
| 159 | + v <- global_attrs[[ attr_names[i] ]] |
| 160 | + paths <- unlist(strsplit(sub('^@metadata[$]', '', tgt_paths[i]), '$', fixed=TRUE)) |
| 161 | + nestedEnv(mtdt, paths, v) |
| 162 | + scle@metadata[[ attr_names[i] ]] <- NULL |
| 163 | + } |
| 164 | + |
| 165 | + # Handle global datasets |
| 166 | + is_ds <- (!(is_graph | is_rd | is_attr) & manifest$sce_path != '') |
| 167 | + src_paths <- manifest$loom_path[is_ds] |
| 168 | + tgt_paths <- manifest$sce_path[is_ds] |
| 169 | + for (i in seq_along(tgt_paths)) { |
| 170 | + v <- h5read(h5f, src_paths[i]) |
| 171 | + paths <- unlist(strsplit(sub('^@metadata[$]', '', tgt_paths[i]), '$', fixed=TRUE)) |
| 172 | + nestedEnv(mtdt, paths, v) |
| 173 | + } |
| 174 | + mtdt <- nestedEnvAsList(mtdt) |
| 175 | + for (name in names(mtdt)) { |
| 176 | + scle@metadata[[name]] <- mtdt[[name]] |
| 177 | + } |
| 178 | + } |
| 179 | + h5closeAll() |
| 180 | + |
| 181 | + return(as(scle, 'SingleCellExperiment')) |
| 182 | +} |
| 183 | + |
| 184 | +writeExchangeableLoom <- function(sce, filename, main_layer=NULL, return_manifest=FALSE) { |
| 185 | + scle <- LoomExperiment::SingleCellLoomExperiment(sce) |
| 186 | + |
| 187 | + # Clean rowData and colData |
| 188 | + row_fct_idx <- sapply(rowData(scle), is.factor) |
| 189 | + rowData(scle)[row_fct_idx] <- lapply( |
| 190 | + rowData(scle)[row_fct_idx], function(x) type.convert(as.character(x), as.is=TRUE)) |
| 191 | + col_fct_idx <- sapply(colData(scle), is.factor) |
| 192 | + colData(scle)[col_fct_idx] <- lapply( |
| 193 | + colData(scle)[col_fct_idx], function(x) type.convert(as.character(x), as.is=TRUE)) |
| 194 | + |
| 195 | + # Handle reducedDims. Move embeddings out of reducedDims so they don't get |
| 196 | + # written to unwanted location by export(). |
| 197 | + rdims <- reducedDims(scle) |
| 198 | + reducedDims(scle) <- SimpleList() |
| 199 | + if (!isEmpty(rdims)) { |
| 200 | + rdim_manifest <- makeManifest( |
| 201 | + names(rdims), |
| 202 | + dtype='array', |
| 203 | + loom_prefix='/global/reducedDims__', |
| 204 | + anndata_prefix='/obsm/X_', |
| 205 | + sce_prefix='@reducedDims@listData$') |
| 206 | + } else { |
| 207 | + rdim_manifest <- NULL |
| 208 | + } |
| 209 | + |
| 210 | + # Handle graphs. They get written by export() but we still need to record the paths. |
| 211 | + if (!isEmpty(colGraphs(scle))) { |
| 212 | + colgraph_manifest <- makeManifest( |
| 213 | + names(colGraphs(scle)), |
| 214 | + dtype='graph', |
| 215 | + loom_prefix='/col_graphs/', |
| 216 | + anndata_prefix='/uns/', |
| 217 | + sce_prefix='@colGraphs$') |
| 218 | + } else { |
| 219 | + colgraph_manifest <- NULL |
| 220 | + } |
| 221 | + if (!isEmpty(rowGraphs(scle))) { |
| 222 | + rowgraph_manifest <- makeManifest( |
| 223 | + names(rowGraphs(scle)), |
| 224 | + dtype='graph', |
| 225 | + loom_prefix='/row_graphs/', |
| 226 | + anndata_prefix=NULL, |
| 227 | + sce_prefix='@rowGraphs$') |
| 228 | + } else { |
| 229 | + rowgraph_manifest <- NULL |
| 230 | + } |
| 231 | + |
| 232 | + # Handle metadata. Flatten nested lists to make export() happy. Scalars go |
| 233 | + # to /.attrs, others go to /global |
| 234 | + if (length(metadata(scle)) > 0) { |
| 235 | + mtdt <- new.env(parent=emptyenv()) |
| 236 | + flattenNestedListToEnv(scle@metadata, mtdt) |
| 237 | + mtdt <- lapply(mtdt, function(x) { |
| 238 | + if (!is.numeric(x) && !is.character(x) && !is.logical(x)) |
| 239 | + x <- type.convert(as.character(x), as.is=TRUE) |
| 240 | + if ('class' %in% names(attributes(x))) |
| 241 | + attributes(x)$class <- NULL |
| 242 | + x |
| 243 | + }) |
| 244 | + is_attr <- rep(FALSE, length(mtdt)) |
| 245 | + names(is_attr) <- names(mtdt) |
| 246 | + for (i in seq_along(mtdt)) { |
| 247 | + x <- mtdt[[i]] |
| 248 | + if ((is.vector(x) || is.array(x)) && (length(x) == 1)) { |
| 249 | + is_attr[i] <- TRUE |
| 250 | + mtdt[[i]] <- x[1] |
| 251 | + } |
| 252 | + } |
| 253 | + is_attr <- is_attr & !grepl('__', names(mtdt)) |
| 254 | + |
| 255 | + # Let export handle attributes |
| 256 | + metadata(scle) <- mtdt[is_attr] |
| 257 | + |
| 258 | + excluded_from_manifest <- c('LOOM_SPEC_VERSION', 'CreationDate', 'last_modified', |
| 259 | + 'CreatedWith', 'LoomExperiment-class', 'created_from', |
| 260 | + 'last_modified_by') |
| 261 | + attr_names <- names(metadata(scle)) |
| 262 | + attr_names <- attr_names[!attr_names %in% excluded_from_manifest] |
| 263 | + if (length(attr_names) > 0) { |
| 264 | + attr_manifest <- makeManifest( |
| 265 | + attr_names, |
| 266 | + dtype='scalar', |
| 267 | + loom_prefix='/.attrs[', |
| 268 | + anndata_prefix='/uns/', |
| 269 | + sce_prefix='@metadata$') |
| 270 | + } else { |
| 271 | + attr_manifest <- NULL |
| 272 | + } |
| 273 | + |
| 274 | + datasets <- mtdt[!is_attr] |
| 275 | + if (length(datasets) > 0) { |
| 276 | + dts_manifest <- makeManifest( |
| 277 | + datasets, |
| 278 | + dtype=NULL, |
| 279 | + loom_prefix='/global/', |
| 280 | + anndata_prefix='/uns/', |
| 281 | + sce_prefix='@metadata$') |
| 282 | + } else { |
| 283 | + dts_manifest <- NULL |
| 284 | + } |
| 285 | + } else { |
| 286 | + attr_manifest <- NULL |
| 287 | + dts_manifest <- NULL |
| 288 | + datasets <- list() |
| 289 | + } |
| 290 | + |
| 291 | + manifest <- rbind(attr_manifest, dts_manifest, rdim_manifest, colgraph_manifest, rowgraph_manifest) |
| 292 | + |
| 293 | + # Write to loom by LoomExperiment::export |
| 294 | + if (file.exists(filename)) |
| 295 | + file.remove(filename) |
| 296 | + suppressWarnings(export( |
| 297 | + scle, |
| 298 | + filename, |
| 299 | + matrix=ifelse(!is.null(main_layer) && main_layer %in% assayNames(scle), |
| 300 | + main_layer, assayNames(scle)[1]), |
| 301 | + colnames_attr='obs_names', |
| 302 | + rownames_attr='var_names')) |
| 303 | + h5closeAll() |
| 304 | + |
| 305 | + # Write extra bits |
| 306 | + h5f <- H5Fopen(filename) |
| 307 | + |
| 308 | + # Write extra global attributes |
| 309 | + h5writeAttribute(exchangeable_loom_version, h5f, 'LOOM_SPEC_VERSION') |
| 310 | + h5writeAttribute('sce', h5f, 'created_from') |
| 311 | + |
| 312 | + # Write column names of 'CellID' and 'Gene' as attributes of '/col_attrs' and '/row_attrs' |
| 313 | + h5g_ca <- H5Gopen(h5f, '/col_attrs') |
| 314 | + h5writeAttribute('obs_names', h5g_ca, 'CellID') |
| 315 | + h5g_ra <- H5Gopen(h5f, '/row_attrs') |
| 316 | + h5writeAttribute('var_names', h5g_ra, 'Gene') |
| 317 | + |
| 318 | + # Write primary asssay name as attribute of '/matrix' |
| 319 | + h5d_mx <- H5Dopen(h5f, '/matrix') |
| 320 | + h5writeAttribute('assay', h5d_mx, names(assays(scle))[1]) |
| 321 | + |
| 322 | + # Write manifest |
| 323 | + h5createGroup(h5f, '/global') |
| 324 | + if (!is.null(manifest)) { |
| 325 | + manifest <- manifest[order(manifest$dtype, manifest$loom_path), ] |
| 326 | + h5write(t(manifest), h5f, '/global/manifest') |
| 327 | + } |
| 328 | + |
| 329 | + # Write reducedDims |
| 330 | + for (i in seq_along(rdims)) { |
| 331 | + rdim <- rdims[[i]] |
| 332 | + loom_path <- rdim_manifest$loom_path[i] |
| 333 | + h5write(t(rdim), h5f, loom_path) |
| 334 | + } |
| 335 | + |
| 336 | + # Write extra global datasets |
| 337 | + for (i in seq_along(datasets)) { |
| 338 | + dts <- datasets[[i]] |
| 339 | + loom_path <- dts_manifest$loom_path[i] |
| 340 | + h5write(dts, h5f, loom_path) |
| 341 | + } |
| 342 | + |
| 343 | + # Remove '/col_attrs/reducedDims' to make anndata happy |
| 344 | + h5delete(h5f, '/col_attrs/reducedDims') |
| 345 | + h5closeAll() |
| 346 | + |
| 347 | + if (return_manifest) { |
| 348 | + return(manifest) |
| 349 | + } else { |
| 350 | + invisible() |
| 351 | + } |
| 352 | +} |
| 353 | + |
0 commit comments