Skip to content

Commit

Permalink
update gpt funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 31, 2024
1 parent 02be201 commit dc1bbf3
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 31 deletions.
5 changes: 4 additions & 1 deletion R/add_gpt_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ add_gpt_annotations <- function(phenos,
if(all(annot_cols %in% names(phenos))){
messager("Ancestor columns already present. Skipping.")
}else {
## According to the latest GPT annotations (3-15-2024),
## merging on "hpo_id" yields more annotated results (10724)
## than merging on "hpo_name" (10678).
phenos <- data.table::merge.data.table(phenos,
annot[,-c("hpo_name")],
annot[,-c("hpo_name")][,.SD[1], by="hpo_id"],
by= "hpo_id",
all.x = TRUE)
}
Expand Down
24 changes: 11 additions & 13 deletions R/gpt_annot_codify.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,20 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
code_dict = c(
"never"=0,
"rarely"=1,
"varies"=2,
"often"=3,
"always"=4
"often"=2,
"always"=3
),
tiers_dict=list(
intellectual_disability=1,
death=1,
impaired_mobility=2,
physical_malformations=2,
blindness=3,
intellectual_disability=5,
death=5,
impaired_mobility=4,
physical_malformations=3,
blindness=4,
sensory_impairments=3,
immunodeficiency=3,
cancer=3,
reduced_fertility=4,
congenital_onset=1
reduced_fertility=1,
congenital_onset=4
),
reset_tiers_dict=FALSE,
filters=list()
Expand All @@ -62,15 +61,14 @@ gpt_annot_codify <- function(annot = gpt_annot_read(),
max_score <-
sum(
max(code_dict, na.rm = TRUE) *
(max(unlist(tiers_dict))+1) - unlist(tiers_dict)
(max(unlist(tiers_dict))*length(tiers_dict))
)
d_coded <- d[,lapply(.SD,FUN=function(x){
unlist(code_dict[tolower(x)])}),.SDcols = cols, by=c("hpo_id","hpo_name")]
d_weighted <- data.table::as.data.table(
lapply(stats::setNames(cols,cols),
function(co){
d_coded[[co]]*
((max(unlist(tiers_dict))+1)-tiers_dict[[co]])
d_coded[[co]]*tiers_dict[[co]]
})
)[,hpo_name:=d_coded$hpo_name][,severity_score_gpt:=(
rowSums(.SD,na.rm = TRUE)/max_score*100),
Expand Down
30 changes: 18 additions & 12 deletions R/gpt_annot_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,12 @@
#' @export
#' @examples
#' gpt_annot <- gpt_annot_read()
gpt_annot_read <- function(save_path=file.path(
KGExplorer::cache_dir(package = "HPOExplorer"),
"gpt4_hpo_annotations.csv"
),

phenotype_to_genes = load_phenotype_to_genes(),
gpt_annot_read <- function(save_path=file.path(KGExplorer::cache_dir(package="HPOExplorer"),
"gpt4_hpo_annotations.csv"),
phenotype_to_genes = load_phenotype_to_genes(),
force_new=FALSE,
hpo=get_hpo(),
include_nogenes=TRUE,
hpo=get_hpo(),
include_nogenes=TRUE,
verbose=TRUE){
pheno_count <- hpo_name <- hpo_id <- phenotype <- NULL;

Expand All @@ -35,10 +32,19 @@ include_nogenes=TRUE,
utils::download.file(path, save_path)
# path <- get_data("gpt4_hpo_annotations.csv")
}
d <- data.table::fread(save_path, header = TRUE)
d <- d[!is.na(phenotype)]
data.table::setnames(d,"phenotype","hpo_name")
d <- add_hpo_id(d, hpo = hpo)
{
d <- data.table::fread(save_path, header = TRUE)
d <- d[!is.na(phenotype)]
data.table::setnames(d,"phenotype","hpo_name")
d <- add_hpo_id(d, hpo = hpo)
}
{
#### Add subset with fixed hpo_names ####
# https://github.com/neurogenomics/RareDiseasePrioritisation/issues/31#issuecomment-1989079044
fixmap <- data.table::fread("https://github.com/neurogenomics/RareDiseasePrioritisation/files/14562614/mismatched_hpo_names_fixed.csv")
d <- rbind(d[!hpo_name %in% unique(fixmap$hpo_name)],
fixmap, fill=TRUE)
}
#### Check phenotype names ####
d <- merge(d,
unique(phenotype_to_genes[,c("hpo_id","hpo_name")]),
Expand Down
4 changes: 4 additions & 0 deletions R/map_phenotypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ map_phenotypes <- function(terms,
to=c("name","id"),
keep_order = TRUE,
ignore_case = TRUE,
ignore_char = eval(formals(
KGExplorer::map_ontology_terms
)$ignore_char),
invert = FALSE){
KGExplorer::map_ontology_terms(terms = terms,
ont = hpo,
to = to,
keep_order = keep_order,
ignore_case = ignore_case,
ignore_char = ignore_char,
invert = invert)
}
2 changes: 1 addition & 1 deletion R/search_hpo.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ search_hpo <- function(hpo = get_hpo(),
return(unique(res))
})
hit_counts <- lapply(query_hits, length)
messager("Number of phenotype gits per query group:")
messager("Number of phenotype hits per query group:")
messager(paste(paste(" -",names(hit_counts)),hit_counts,
collapse = "\n",sep=": "),v=verbose)
return(query_hits)
Expand Down
8 changes: 4 additions & 4 deletions man/gpt_annot_codify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/map_phenotypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit dc1bbf3

Please sign in to comment.