Skip to content

Commit

Permalink
update to match new MultiEWCE
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Jan 24, 2024
1 parent d4292e0 commit 59cf10c
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 35 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(add_disease_genes)
export(add_evidence)
export(add_gene_frequency)
export(add_genes)
export(add_gpt_annotations)
export(add_hpo_definition)
export(add_hpo_id)
export(add_hpo_name)
Expand Down
1 change: 1 addition & 0 deletions R/0docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ NULL
#' @family add_
#' @param agg_by Column to aggregate metadata by.
#' @param add_definitions Add disease definitions using \link{add_mondo}.
#' @param gpt_filters A named list of filters to apply to the GPT annotations.
#' @inheritParams main
#' @inheritParams make_
#' @inheritParams get_
Expand Down
7 changes: 4 additions & 3 deletions R/add_gene_frequency.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
add_gene_frequency <- function(phenotype_to_genes = load_phenotype_to_genes(),
gene_frequency_threshold = NULL,
all.x = TRUE,
allow.cartesian = FALSE,
verbose = TRUE){

# devoptera::args2vars(add_gene_frequency)
# annot <- HPOExplorer::load_phenotype_to_genes("phenotype.hpoa")
frequency <- gene_freq_name <- gene_freq_mean <-
gene_freq_min <- gene_freq_max <- . <- NULL;

phenotype_to_genes <- add_hpo_id(phenos = phenotype_to_genes,
phenotype_to_genes= phenotype_to_genes)
phenotype_to_genes <- add_hpo_id(phenos = phenotype_to_genes)
new_cols <- c("gene_freq_name","gene_freq_min",
"gene_freq_max","gene_freq_mean")
if(!all(new_cols %in% names(phenotype_to_genes))){
Expand All @@ -41,7 +41,8 @@ add_gene_frequency <- function(phenotype_to_genes = load_phenotype_to_genes(),
x = phenotype_to_genes,
y = g2p[,c("hpo_id","gene_symbol","frequency"),with=FALSE],
by = c("hpo_id","gene_symbol"),
all.x = all.x)
all.x = all.x,
allow.cartesian = allow.cartesian)
#### Parse freq data ####
phenotype_to_genes[,gene_freq_name:=mapply(frequency,FUN=function(f){
if(grepl("HP:",f)) get_freq_dict()[f] else f })]
Expand Down
3 changes: 0 additions & 3 deletions R/add_genes.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ add_genes <- function(phenos = NULL,
gene_col = "gene_symbol",
all.x = FALSE,
allow.cartesian = FALSE){
# devoptera::args2vars(add_genes, reassign = TRUE)

#### Prepare gene data ####
phenotype_to_genes <- data.table::copy(phenotype_to_genes)
data.table::setnames(phenotype_to_genes,"disease_id","disease_id",
Expand All @@ -45,7 +43,6 @@ add_genes <- function(phenos = NULL,
}
#### Ensure necessary columns are in phenos ####
phenos <- add_hpo_id(phenos = phenos,
phenotype_to_genes = phenotype_to_genes,
hpo = hpo)
phenos <- add_disease(phenos = phenos,
allow.cartesian = allow.cartesian)
Expand Down
42 changes: 42 additions & 0 deletions R/add_gpt_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#' @describeIn add_ add_
#' Add ancestor
#'
#' Add annotations generated with a Large Language Model.
#' @param annot GPT annotation data.
#' @param annot_cols Columns to add.
#' @export
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_gpt_annotations(phenos)
add_gpt_annotations <- function(phenos,
annot = gpt_annot_codify(
reset_tiers_dict=TRUE
)$annot_weighted,
annot_cols = names(annot)[
!names(annot) %in% c("hpo_id","hpo_name")
],
gpt_filters=rep(list(NULL),
length(annot_cols))|>
`names<-`(annot_cols),
force_new = FALSE){
#### Force new columns ####
if(force_new){
messager("Force new. Removing existing annot columns.")
rm_cols <- annot_cols[annot_cols %in% names(phenos)]
if(length(rm_cols)>0) phenos[,(rm_cols):=NULL]
}
#### Check for existing columns ####
if(all(annot_cols %in% names(phenos))){
messager("Ancestor columns already present. Skipping.")
}else {
phenos <- data.table::merge.data.table(phenos,
annot[,-c("hpo_name")],
by= "hpo_id",
all.x = TRUE)
}
#### Filter ####
phenos <- KGExplorer::filter_dt(dat=phenos,
filters = gpt_filters)
#### Return #####
return(phenos)
}
3 changes: 1 addition & 2 deletions R/add_hpo_definition.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
#' @returns A named vector of HPO term descriptions.
#'
#' @export
#' @importFrom stats setNames
#' @importFrom data.table :=
#' @import data.table
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_hpo_definition(phenos = phenos)
Expand Down
21 changes: 4 additions & 17 deletions R/add_hpo_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,12 @@
#' phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")])
#' phenos2 <- add_hpo_id(phenos=phenos)
add_hpo_id <- function(phenos,
hpo = get_hpo(),
phenotype_to_genes = NULL) {
HPO_term_valid <- hpo_id <- NULL;

hpo = get_hpo()) {
if(!"hpo_id" %in% names(phenos)){
messager("Adding HPO IDs.")
alt_names <- grep("hpo_id","^id$",names(phenos),
value=TRUE, ignore.case = TRUE)
if(length(alt_names)>0){
data.table::setnames(phenos,alt_names[[1]],"hpo_id")
return(phenos)
} else {
if(is.null(phenotype_to_genes)) {
phenotype_to_genes <- load_phenotype_to_genes()
}
phenos <- fix_hpo_ids(dat=phenos,
phenotype_to_genes=phenotype_to_genes)
}
phenos[,HPO_term_valid:=(hpo_id %in% hpo@terms)]
phenos$hpo_id <- map_phenotypes(hpo = hpo,
terms = phenos$hpo_name,
to = "id")
}
return(phenos)
}
Expand Down
4 changes: 1 addition & 3 deletions R/add_hpo_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
#' phenos <- example_phenos()
#' phenos2 <- add_hpo_name(phenos=phenos)
add_hpo_name <- function(phenos,
hpo = get_hpo(),
phenotype_to_genes = NULL) {

hpo = get_hpo()) {
if(!"hpo_name" %in% names(phenos)){
messager("Adding HPO names.")
phenos <- add_hpo_id(phenos)
Expand Down
4 changes: 4 additions & 0 deletions R/gpt_annot_codify.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#' @param code_dict Numerical encodings of annotation values.
#' @param tiers_dict Numerical encodings of annotation column.
#' @param keep_congenital_onset Which stages of congenital onset to keep.
#' @param reset_tiers_dict Override \code{tiers_dict} values and set all values
#' to 1. This will ensure that all annotations are unweighted.
#' @inheritParams gpt_annot_check
#' @returns Named list
#'
Expand Down Expand Up @@ -34,13 +36,15 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
cancer=3,
reduced_fertility=4
),
reset_tiers_dict=FALSE,
keep_congenital_onset=head(names(code_dict),4)
){
# res <- gpt_annot_check(path="~/Downloads/gpt_hpo_annotations.csv")
# annot <- res$annot
severity_score_gpt <- congenital_onset <- hpo_name <- hpo_id <- NULL;

d <- data.table::copy(annot)
if(isTRUE(reset_tiers_dict)) tiers_dict <- lapply(tiers_dict,function(x){1})
#### Ensure only 1 row/hpo_name by simply taking the first ####
if(isTRUE(remove_duplicates)){
d <- d[,utils::head(.SD,1), by=c("hpo_id","hpo_name")]
Expand Down
36 changes: 30 additions & 6 deletions man/add_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/gpt_annot_codify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/make_.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 59cf10c

Please sign in to comment.