From 7fc9a093c689702f0561fb12027b196693aca2d5 Mon Sep 17 00:00:00 2001 From: Guangchuang Yu Date: Thu, 15 Aug 2024 17:47:14 +0800 Subject: [PATCH] update mh --- mh-mapping.R | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/mh-mapping.R b/mh-mapping.R index 6acd730..4eb8019 100644 --- a/mh-mapping.R +++ b/mh-mapping.R @@ -6,15 +6,44 @@ # +read.sssom <- function(file) { + read.delim(file, comment.char = "#") +} + + dir <- 'mh_mapping_initiative/mappings' +hpo2do <- read.sssom(file.path(dir, 'hp_doid_pistoia.sssom.tsv')) + +hpo2do <- hpo2do[, c(1,3)] |> + setNames(c("HPO", "DO")) |> + unique() + +dim(hpo2do) +head(hpo2do) + + +hpo2omim <- read.sssom('phenotype.hpoa')[, c("hpo_id", "database_id")] |> + setNames(c("hpo_id", "omim_id")) +do2omim <- read.sssom("OMIMinDO.tsv")[, c("id", "xrefs")] |> + setNames(c("do_id", "omim_id")) +do2omim[,2] <- sprintf("O%s", do2omim[,2]) +head(do2omim) + +# too large, the relationship may not true +hpo2do2 <- merge(hpo2omim, do2omim, by='omim_id')[,c("hpo_id", "do_id")] |> unique() + + + + + read_mpo2hpo <- function(dir) { ff <- list.files(path=dir, pattern="^mp_hp", full.names=TRUE) res <- lapply(ff, function(f) { - x <- read.delim(f, comment.char="#") + x <- read.sssom(f) x <- x[, c("subject_id", "object_id")] x <- x[x[,1] != "sssom:NoTermFound", ] x <- x[x[,2] != "sssom:NoTermFound", ]