Skip to content

Commit d760753

Browse files
committed
Forgot the actual functions...
1 parent 83b8238 commit d760753

File tree

1 file changed

+353
-0
lines changed

1 file changed

+353
-0
lines changed

R/loom_utils.R

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
#!/usr/bin/env Rscript
2+
3+
exchangeable_loom_version <- '3.0.0alpha'
4+
5+
isExchangeableLoom <- function(h5f) {
6+
attrs <- h5readAttributes(h5f, '/')
7+
version <- attrs[['LOOM_SPEC_VERSION']]
8+
return(!is.null(version) && version == exchangeable_loom_version)
9+
}
10+
11+
readDimNames <- function(h5f) {
12+
cell_attr <- h5readAttributes(h5f, '/col_attrs')[['CellID']]
13+
gene_attr <- h5readAttributes(h5f, '/row_attrs')[['Gene']]
14+
source <- h5readAttributes(h5f, '/')[['created_from']]
15+
if (!is.null(source) && source == 'anndata') {
16+
if (is.null(cell_attr))
17+
cell_attr <- 'obs_names'
18+
if (is.null(gene_attr))
19+
gene_attr <- 'var_names'
20+
}
21+
return(list(col=cell_attr, row=gene_attr))
22+
}
23+
24+
readManifest <- function(h5f) {
25+
tryCatch({
26+
manifest <- data.frame(t(h5read(h5f, '/global/manifest')), stringsAsFactors=FALSE)
27+
colnames(manifest) <- c('loom_path', 'dtype', 'anndata_path', 'sce_path')
28+
return(manifest)
29+
},
30+
error=function(e) {
31+
manifest <- NULL
32+
return(manifest)
33+
})
34+
}
35+
36+
nestedEnvAsList <- function(x) {
37+
out <- as.list(x)
38+
lapply(out, function(e) if (is.environment(e)) nestedEnvAsList(e) else e)
39+
}
40+
41+
nestedEnv <- function(root_env, paths, v) {
42+
n <- length(paths)
43+
var <- paths[1]
44+
if (n == 1) {
45+
root_env[[var]] <- v
46+
} else {
47+
if (is.null(root_env[[var]]))
48+
root_env[[var]] <- new.env(parent=emptyenv())
49+
nestedEnv(root_env[[var]], paths[2:n], v)
50+
}
51+
invisible()
52+
}
53+
54+
flattenNestedListToEnv <- function(x, e, prefix=NULL) {
55+
entry_names <- names(x)
56+
if (is.null(entry_names)) entry_names <- seq(length(x))
57+
sapply(entry_names, function(name) {
58+
if (is.null(prefix)) {
59+
full_name <- name
60+
} else {
61+
full_name <- paste(prefix, name, sep='__')
62+
}
63+
if (is.list(x[[name]])) {
64+
flattenNestedListToEnv(x[[name]], e, full_name)
65+
} else {
66+
e[[full_name]] <- x[[name]]
67+
}
68+
})
69+
invisible()
70+
}
71+
72+
makeManifest <- function(entries, dtype='array', loom_prefix='/global/', anndata_prefix='/uns/',
73+
sce_prefix='@metadata$') {
74+
n <- length(entries)
75+
if (is.list(entries)) {
76+
entry_names <- names(entries)
77+
is_scalar <- sapply(entries, function(x) is.vector(x) && length(x) == 1)
78+
dtypes <- ifelse(is_scalar, 'scalar', 'array')
79+
} else {
80+
entry_names <- entries
81+
dtypes <- rep(dtype, n)
82+
}
83+
loom_paths <- paste0(loom_prefix, entry_names)
84+
if (endsWith(loom_prefix, '['))
85+
loom_paths <- paste0(loom_paths, ']')
86+
if (is.null(anndata_prefix)) {
87+
anndata_paths <- rep('', n)
88+
} else {
89+
if (anndata_prefix == '/obsm/X_') {
90+
ad_names <- tolower(entry_names)
91+
} else {
92+
ad_names <- gsub('__', '/', entry_names)
93+
}
94+
anndata_paths <- paste0(anndata_prefix, ad_names)
95+
}
96+
if (startsWith(sce_prefix, '@metadata$'))
97+
entry_names <- gsub('__', '$', entry_names)
98+
sce_paths <- paste0(sce_prefix, entry_names)
99+
return(data.frame(loom_path=loom_paths, dtype=dtypes, anndata_path=anndata_paths,
100+
sce_path=sce_paths, stringsAsFactors=FALSE))
101+
}
102+
103+
readExchangeableLoom <- function(filename, backed=TRUE) {
104+
stopifnot(file.exists(filename), H5Fis_hdf5(filename))
105+
h5f <- H5Fopen(filename, flag='H5F_ACC_RDONLY')
106+
if (!isExchangeableLoom(h5f)) {
107+
H5Fclose(h5f)
108+
return(LoomExperiment::import(filename, type='SingleCellLoomExperiment'))
109+
}
110+
dim_names <- readDimNames(h5f)
111+
manifest <- readManifest(h5f)
112+
h5closeAll()
113+
114+
suppressWarnings(scle <- LoomExperiment::import(
115+
filename,
116+
type='SingleCellLoomExperiment',
117+
rownames_attr=dim_names$row,
118+
colnames_attr=dim_names$col))
119+
if (!backed) {
120+
for (i in seq_along(assays(scle))) {
121+
assays(scle)[[i]] <- as(assays(scle)[[i]], 'dgCMatrix')
122+
}
123+
}
124+
125+
h5f <- H5Fopen(filename, flag='H5F_ACC_RDONLY')
126+
127+
# Add appropriate assay name
128+
mx_attrs <- h5readAttributes(h5f, '/matrix')
129+
if ('assay' %in% names(mx_attrs)) {
130+
names(assays(scle))[1] <- mx_attrs['assay']
131+
} else {
132+
names(assays(scle))[1] <- 'counts'
133+
}
134+
135+
if (!is.null(manifest)) {
136+
137+
# Graphs are already handled by import(), just record entries
138+
is_graph <- (startsWith(manifest$loom_path, '/col_graphs/') |
139+
startsWith(manifest$loom_path, '/row_graphs/'))
140+
141+
# Handle reducedDims
142+
is_rd <- startsWith(manifest$loom_path, '/global/reducedDims__')
143+
rd_paths <- manifest$loom_path[is_rd]
144+
names(rd_paths) <- sub('^@reducedDims@listData[$]', '', manifest$sce_path[is_rd])
145+
reducedDims(scle) <- SimpleList(lapply(rd_paths, function(path) {
146+
mat <- t(h5read(h5f, path))
147+
rownames(mat) <- colnames(scle)
148+
mat
149+
}))
150+
151+
# Handle global attributes
152+
is_attr <- startsWith(manifest$loom_path, '/.attrs[')
153+
src_paths <- manifest$loom_path[is_attr]
154+
tgt_paths <- manifest$sce_path[is_attr]
155+
attr_names <- substr(src_paths, 9, nchar(src_paths)-1)
156+
global_attrs <- h5readAttributes(h5f, '/')
157+
mtdt <- new.env(parent=emptyenv(), size=length(tgt_paths))
158+
for (i in seq_along(attr_names)) {
159+
v <- global_attrs[[ attr_names[i] ]]
160+
paths <- unlist(strsplit(sub('^@metadata[$]', '', tgt_paths[i]), '$', fixed=TRUE))
161+
nestedEnv(mtdt, paths, v)
162+
scle@metadata[[ attr_names[i] ]] <- NULL
163+
}
164+
165+
# Handle global datasets
166+
is_ds <- (!(is_graph | is_rd | is_attr) & manifest$sce_path != '')
167+
src_paths <- manifest$loom_path[is_ds]
168+
tgt_paths <- manifest$sce_path[is_ds]
169+
for (i in seq_along(tgt_paths)) {
170+
v <- h5read(h5f, src_paths[i])
171+
paths <- unlist(strsplit(sub('^@metadata[$]', '', tgt_paths[i]), '$', fixed=TRUE))
172+
nestedEnv(mtdt, paths, v)
173+
}
174+
mtdt <- nestedEnvAsList(mtdt)
175+
for (name in names(mtdt)) {
176+
scle@metadata[[name]] <- mtdt[[name]]
177+
}
178+
}
179+
h5closeAll()
180+
181+
return(as(scle, 'SingleCellExperiment'))
182+
}
183+
184+
writeExchangeableLoom <- function(sce, filename, main_layer=NULL, return_manifest=FALSE) {
185+
scle <- LoomExperiment::SingleCellLoomExperiment(sce)
186+
187+
# Clean rowData and colData
188+
row_fct_idx <- sapply(rowData(scle), is.factor)
189+
rowData(scle)[row_fct_idx] <- lapply(
190+
rowData(scle)[row_fct_idx], function(x) type.convert(as.character(x), as.is=TRUE))
191+
col_fct_idx <- sapply(colData(scle), is.factor)
192+
colData(scle)[col_fct_idx] <- lapply(
193+
colData(scle)[col_fct_idx], function(x) type.convert(as.character(x), as.is=TRUE))
194+
195+
# Handle reducedDims. Move embeddings out of reducedDims so they don't get
196+
# written to unwanted location by export().
197+
rdims <- reducedDims(scle)
198+
reducedDims(scle) <- SimpleList()
199+
if (!isEmpty(rdims)) {
200+
rdim_manifest <- makeManifest(
201+
names(rdims),
202+
dtype='array',
203+
loom_prefix='/global/reducedDims__',
204+
anndata_prefix='/obsm/X_',
205+
sce_prefix='@reducedDims@listData$')
206+
} else {
207+
rdim_manifest <- NULL
208+
}
209+
210+
# Handle graphs. They get written by export() but we still need to record the paths.
211+
if (!isEmpty(colGraphs(scle))) {
212+
colgraph_manifest <- makeManifest(
213+
names(colGraphs(scle)),
214+
dtype='graph',
215+
loom_prefix='/col_graphs/',
216+
anndata_prefix='/uns/',
217+
sce_prefix='@colGraphs$')
218+
} else {
219+
colgraph_manifest <- NULL
220+
}
221+
if (!isEmpty(rowGraphs(scle))) {
222+
rowgraph_manifest <- makeManifest(
223+
names(rowGraphs(scle)),
224+
dtype='graph',
225+
loom_prefix='/row_graphs/',
226+
anndata_prefix=NULL,
227+
sce_prefix='@rowGraphs$')
228+
} else {
229+
rowgraph_manifest <- NULL
230+
}
231+
232+
# Handle metadata. Flatten nested lists to make export() happy. Scalars go
233+
# to /.attrs, others go to /global
234+
if (length(metadata(scle)) > 0) {
235+
mtdt <- new.env(parent=emptyenv())
236+
flattenNestedListToEnv(scle@metadata, mtdt)
237+
mtdt <- lapply(mtdt, function(x) {
238+
if (!is.numeric(x) && !is.character(x) && !is.logical(x))
239+
x <- type.convert(as.character(x), as.is=TRUE)
240+
if ('class' %in% names(attributes(x)))
241+
attributes(x)$class <- NULL
242+
x
243+
})
244+
is_attr <- rep(FALSE, length(mtdt))
245+
names(is_attr) <- names(mtdt)
246+
for (i in seq_along(mtdt)) {
247+
x <- mtdt[[i]]
248+
if ((is.vector(x) || is.array(x)) && (length(x) == 1)) {
249+
is_attr[i] <- TRUE
250+
mtdt[[i]] <- x[1]
251+
}
252+
}
253+
is_attr <- is_attr & !grepl('__', names(mtdt))
254+
255+
# Let export handle attributes
256+
metadata(scle) <- mtdt[is_attr]
257+
258+
excluded_from_manifest <- c('LOOM_SPEC_VERSION', 'CreationDate', 'last_modified',
259+
'CreatedWith', 'LoomExperiment-class', 'created_from',
260+
'last_modified_by')
261+
attr_names <- names(metadata(scle))
262+
attr_names <- attr_names[!attr_names %in% excluded_from_manifest]
263+
if (length(attr_names) > 0) {
264+
attr_manifest <- makeManifest(
265+
attr_names,
266+
dtype='scalar',
267+
loom_prefix='/.attrs[',
268+
anndata_prefix='/uns/',
269+
sce_prefix='@metadata$')
270+
} else {
271+
attr_manifest <- NULL
272+
}
273+
274+
datasets <- mtdt[!is_attr]
275+
if (length(datasets) > 0) {
276+
dts_manifest <- makeManifest(
277+
datasets,
278+
dtype=NULL,
279+
loom_prefix='/global/',
280+
anndata_prefix='/uns/',
281+
sce_prefix='@metadata$')
282+
} else {
283+
dts_manifest <- NULL
284+
}
285+
} else {
286+
attr_manifest <- NULL
287+
dts_manifest <- NULL
288+
datasets <- list()
289+
}
290+
291+
manifest <- rbind(attr_manifest, dts_manifest, rdim_manifest, colgraph_manifest, rowgraph_manifest)
292+
293+
# Write to loom by LoomExperiment::export
294+
if (file.exists(filename))
295+
file.remove(filename)
296+
suppressWarnings(export(
297+
scle,
298+
filename,
299+
matrix=ifelse(!is.null(main_layer) && main_layer %in% assayNames(scle),
300+
main_layer, assayNames(scle)[1]),
301+
colnames_attr='obs_names',
302+
rownames_attr='var_names'))
303+
h5closeAll()
304+
305+
# Write extra bits
306+
h5f <- H5Fopen(filename)
307+
308+
# Write extra global attributes
309+
h5writeAttribute(exchangeable_loom_version, h5f, 'LOOM_SPEC_VERSION')
310+
h5writeAttribute('sce', h5f, 'created_from')
311+
312+
# Write column names of 'CellID' and 'Gene' as attributes of '/col_attrs' and '/row_attrs'
313+
h5g_ca <- H5Gopen(h5f, '/col_attrs')
314+
h5writeAttribute('obs_names', h5g_ca, 'CellID')
315+
h5g_ra <- H5Gopen(h5f, '/row_attrs')
316+
h5writeAttribute('var_names', h5g_ra, 'Gene')
317+
318+
# Write primary asssay name as attribute of '/matrix'
319+
h5d_mx <- H5Dopen(h5f, '/matrix')
320+
h5writeAttribute('assay', h5d_mx, names(assays(scle))[1])
321+
322+
# Write manifest
323+
h5createGroup(h5f, '/global')
324+
if (!is.null(manifest)) {
325+
manifest <- manifest[order(manifest$dtype, manifest$loom_path), ]
326+
h5write(t(manifest), h5f, '/global/manifest')
327+
}
328+
329+
# Write reducedDims
330+
for (i in seq_along(rdims)) {
331+
rdim <- rdims[[i]]
332+
loom_path <- rdim_manifest$loom_path[i]
333+
h5write(t(rdim), h5f, loom_path)
334+
}
335+
336+
# Write extra global datasets
337+
for (i in seq_along(datasets)) {
338+
dts <- datasets[[i]]
339+
loom_path <- dts_manifest$loom_path[i]
340+
h5write(dts, h5f, loom_path)
341+
}
342+
343+
# Remove '/col_attrs/reducedDims' to make anndata happy
344+
h5delete(h5f, '/col_attrs/reducedDims')
345+
h5closeAll()
346+
347+
if (return_manifest) {
348+
return(manifest)
349+
} else {
350+
invisible()
351+
}
352+
}
353+

0 commit comments

Comments
 (0)