From ed4b07009bbe2a1af8e73f96a09b2d2976809527 Mon Sep 17 00:00:00 2001 From: "Brian M. Schilder" <34280215+bschilder@users.noreply.github.com> Date: Wed, 22 May 2024 15:55:01 +0100 Subject: [PATCH] add gpt_annot_plot_branches [\nocache] --- NAMESPACE | 1 + R/gpt_annot_plot_branches.R | 74 ++++++++++++++++++ man/gpt_annot_plot_branches.Rd | 45 +++++++++++ tests/testthat/Rplots.pdf | Bin 0 -> 8267 bytes tests/testthat/test-gpt_annot_plot_branches.R | 7 ++ 5 files changed, 127 insertions(+) create mode 100644 R/gpt_annot_plot_branches.R create mode 100644 man/gpt_annot_plot_branches.Rd create mode 100644 tests/testthat/Rplots.pdf create mode 100644 tests/testthat/test-gpt_annot_plot_branches.R diff --git a/NAMESPACE b/NAMESPACE index 76f9c43..024c442 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(gpt_annot_class) export(gpt_annot_codify) export(gpt_annot_melt) export(gpt_annot_plot) +export(gpt_annot_plot_branches) export(gpt_annot_read) export(hpo_api) export(hpo_to_matrix) diff --git a/R/gpt_annot_plot_branches.R b/R/gpt_annot_plot_branches.R new file mode 100644 index 0000000..c887a85 --- /dev/null +++ b/R/gpt_annot_plot_branches.R @@ -0,0 +1,74 @@ +#' Plot annotations from GPT: by branch +#' +#' Plot annotations from GPT by ancestral HPO branch. +#' @param gpt_annot Output from \link{gpt_annot_read}. +#' @param metric Annotation metric to use (name of column in \code{gpt_annot}). +#' @param fill_lab Fill label in legend. +#' @param show_plot Show the plot. +#' @inheritParams gpt_annot_check +#' @inheritParams add_ont_lvl +#' @inheritParams add_ancestor +#' @param metric Annotation metric to plot. +#' @returns Named list of plot and data. +#' +#' @export +#' @examples +#' out <- gpt_annot_plot_branches() +gpt_annot_plot_branches <- function(hpo=get_hpo(), + gpt_annot = gpt_annot_read(hpo = hpo), + keep_descendants=NULL, + keep_ont_levels=NULL, + metric="congenital_onset", + fill_lab=gsub("_"," ",metric), + show_plot=TRUE){ + hpo_name <- n <- ancestor_name <- NULL; + + metric <- metric[1] + gpt_annot <- add_ancestor(gpt_annot, + keep_descendants = keep_descendants, + hpo = hpo) + gpt_annot <- add_ont_lvl(gpt_annot, + keep_ont_levels=keep_ont_levels, + hpo = hpo) + branches_dt <- gpt_annot[,list(n=data.table::uniqueN(hpo_name)), + by=c("ancestor_name",metric)] + branches_dt[,c("proportion_always", + "proportion_often"):=list( + sum(n[get(metric) %in% c("always")], na.rm = TRUE)/ + sum(n, na.rm = TRUE), + sum(n[get(metric) %in% c("often")], na.rm = TRUE)/ + sum(n, na.rm = TRUE) + ), + by=c("ancestor_name")] + data.table::setorderv(branches_dt,c("proportion_always","proportion_often"), + c(-1), na.last = TRUE) + branches_dt[,c(metric):=factor( + get(metric), + levels=c("always","often","rarely","never",NA), + ordered = TRUE)] + branches_dt[,ancestor_name:=factor( + ancestor_name, + levels=unique(branches_dt$ancestor_name), + ordered = TRUE)] +# +# ggstatsplot::ggbarstats(branches_dt[!is.na(get(metric))], +# x="ancestor_name", +# y="n",fill=metric) + p <- ggplot2::ggplot(branches_dt[!is.na(get(metric))], + ggplot2::aes(x=ancestor_name, + y=n, + fill=!!ggplot2::sym(metric))) + + ggplot2::geom_bar(stat = "identity",position = "fill") + + ggplot2::scale_fill_brewer(palette = "GnBu", direction = -1) + + ggplot2::scale_y_continuous(labels = scales::percent) + + ggplot2::labs(x="HPO ancestor", + y="Phenotypes", + fill=fill_lab) + + ggplot2::theme_bw() + + ggplot2::theme( + axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, vjust = .5)) + + if(show_plot) methods::show(p) + return(list(data=branches_dt, + plot=p)) +} diff --git a/man/gpt_annot_plot_branches.Rd b/man/gpt_annot_plot_branches.Rd new file mode 100644 index 0000000..0403510 --- /dev/null +++ b/man/gpt_annot_plot_branches.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gpt_annot_plot_branches.R +\name{gpt_annot_plot_branches} +\alias{gpt_annot_plot_branches} +\title{Plot annotations from GPT: by branch} +\usage{ +gpt_annot_plot_branches( + hpo = get_hpo(), + gpt_annot = gpt_annot_read(hpo = hpo), + keep_descendants = NULL, + keep_ont_levels = NULL, + metric = "congenital_onset", + fill_lab = gsub("_", " ", metric), + show_plot = TRUE +) +} +\arguments{ +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} + +\item{gpt_annot}{Output from \link{gpt_annot_read}.} + +\item{keep_descendants}{Terms whose descendants should be kept +(including themselves). + Set to \code{NULL} (default) to skip this filtering step.} + +\item{keep_ont_levels}{Only keep phenotypes at certain \emph{absolute} +ontology levels to keep. +See \link{add_ont_lvl} for details.} + +\item{metric}{Annotation metric to plot.} + +\item{fill_lab}{Fill label in legend.} + +\item{show_plot}{Show the plot.} +} +\value{ +Named list of plot and data. +} +\description{ +Plot annotations from GPT by ancestral HPO branch. +} +\examples{ +out <- gpt_annot_plot_branches() +} diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a5f577880e451d419cb59e6471c68ffef17c054d GIT binary patch literal 8267 zcmd6MXFS_)`?i`z?Ny}&rACQ8TD7UY_o|4{R0t9pYVX~lY80VX@l(6@XpK}gs9DtB z)T$M$eUkphegE(Kd0zbAJ>Spg`{FvTb6w}F^E?iIV+}1)2{CCZ{;-9x#jt^}VS67c z34k~N?);ESQIQI$?gIfKp>R(P5E23q)VM7vE-5W8BPl5%BO@szB}4@@fV=!Z@64e{ z7~abiaL)(i^#BS+0E~Q*UcN}Yo3Rhv#TN|mA>2xfNr_3|vERAi1W1TVNr?*L=|ZSN z@}Ft|Ky!aD2mq)KLV{p$HvrHWoKo&=i7z`}%L)6cIZPq-H;Ce(o&N00H78WhQz-Bs0G6}M*{+eyFdta zBarx>c~GIU%I))d;F%DY%HRzTF0+QIL4gz?mO6V5qENPHun6C zh*am2)Wc8J$8+x#NVSQG_j22~M$ZS|tgNmmTZOnOx8J=uz|di_>$-Ig<1**_NQCzA zY)DqdPu#^5Tt(Z-@&@|X!={xZT^Z$$fdQ*hlJFIv)t=S4S2mAU1{bg=dkh^+* zDl`?END?r;HfJkE1loVHQh5XH8&{O1EItL8bH}mHl+g z*T1FtH2rd-Yy*f#8>~gIR(72)cTdCVx?t|jaA_@K=*o>}+jgT8aSs&Biz$q&+(@Ne z85PFK-+``4#UaO!^oylkNu3}Vq%QbID28aofDVF=_9H6$LQBi)1yiD(2&|ZzG1`}2 z+RwXwTb4TpHghLSa9`SGU$w3Xy#oEX8nj7HfP=*!lt~(?%U0ap<-WsEP?fu8x22%T ziJ&T6j1Xa#W@XMhj5h0%C>~@*pK7Rr4MFB*#QC{Zxx3{SS-i;#Q8FF_b%j;AS0_8_ z3MGoYzJ%SRx`v?ICGJej=?Uk}C6SDmUtykkm*cJnHoVj}=IMIWKC%k*UUoUF`2 z91VY_j{lwdLDDj__h-Q?xCroMkwmd^DGfdq{>@ugiU~1)=4Olz6DqJ?0psgF9TOpB z{;=WaG+Bgyvu!saG*L*Uy&d7rF*zsJ*Qlvx$=`}5DUjjRF@N5Q<8P^{KE5Ov#jDm6 zjHKeVM^8hiDLcM9`jJ_-BiXT}UR>DJ{^{NDKoi_5_fvZf$anL(wmx~m>$&C

oY|# zHO?h&Mq!Nf$-C-%OmT@!{+4<`6tO~<8I!uEqEN;Wsg4D?5~L%`G2V|+zOGVxC7 z=9H~n|D`!g` zJ?Dv~l*%&HAE~mUd6u5e6BsjYZ;>6_t2X77!l8ONyWEw}Sr?C!P63Q9$Y?vzZ^-#e zN60GO%KBc*LVA|R9HmIsZIOqvvTW7*8;&Y8l~mvNdxgg*(jM^&p!~!*#zAHG$Au&*vv0$`ml{Q;|KY2t z_(ufNp_m>ncSStv+r8Ksu;oqX=TRM@+s3I+8Z1A_bv3h)H2L-h&LH-&+Zy_;KVE&J zagKRxmC7qXb0@>II`XaZo31Vn`lP#R4#g;rNUN2TmP20B{)Bt_)+;wQ55tt-_^S`L zGDR+*BkMkr?ce^goEobbf4qm#T^DQnqXS@Cn#Q#w_V5S4z-C}Cz*=FH+uV5wB-qWZ z6?Yo(NngLj5q$a0rnFOMw2imm%U!)yac>(`QO}Y)<4DC@bC(YySWbc53aLYZ8&Z3d zNNZ-JFQOpjRG={ohzySFb!tRAj4h7ZSSUI(n!x6w)9d^vh1P~Xs5TpC8M!yT|E?w! zm!*AJBYacf@}40}YIA_Ffj%l5GJUXX&|0c(|GLH7Y`j9oq24EDtuT{Mf_+e7oD)^c z`dwT99U`UkSWhO{^~c_%;-fmx`4=i+BApSn*)OYQb-^X0`ue;dJ7%fVP;q@K3h8&` zwS|MMtb}`4i*+@HWsQ{s0+MkIjZ2j$Nc7{u=6*olga6gbISv&xrOD8 z@0LM<)E%!J3N@Y4)A0=jA!08{CNV?C*4DOOmTC*tJ#!@)RdeT-T&*TBGU|qa^=A#Y zu2uuZI<6!A%2eQrT`aMSdk8;~g%8%R^&9o84<|`@sI*wy9uAh5j_1eqm4;$6udMl# zW~Krq{H++Vq1?WEYpqF@k*lIREvN%Oik*#pEM=QhgQ#juZF}pir4No}g+rfI5;hGqU(nyrx6j(iy{G!+;Hl)lUn%O=Ghi^rHjDB zGb8V1;j%c497>w2`eFTZxs$6un(EGMm-i?E6(J{W^dnE@x?U+PSxnbfk?v`RPW_0u zAfF>|Gf$p&eBH-8lCUC`sUO-`bV4k^^tBq==G04Cn}~H2%usnbf3A12VW9QVt1ek3 zkhryH{-J+P{5($F3Cul=3hx{zwHGzJjLQryW1*tG&*W97&ok#v{Pv{~X`n&JC>2>u1!aS65&bUS zNrah`;FO`(@xOP9Q~U70cl!S`Q%;u^bBGzuL9lRQ*88pm_((wMPo?k4n5mVY>$r1y zD^n~`(kVY2ddZ%d^&HEH7xjA!9F_dBDV z%;Q~+iUB%o=MhqA;cEx3uP^QJ8cnQD<#_G)*%_IJl@+9CGTLZ$FR@IGxpiiyB{(f~ zlyvD!|DeEZkW!cL+>-r{T=Uk69-_sa3*FPX(lQ}azt~J(j{|4Y229Eo60CobY+!AF23(x?`{p4R(@XG3E*3GyoZXREXC6lgyF zQHmJ*;=n9UZ6EFSb;a|}+bXhJknxUMPCLL4A+_x{(2Oy&kM6Qv(+1t}ND$)p&&mYGOAGO@&vb-Dx?B}!XDdCUW!E_#7dG0!^0E=&~ zsF&72K9D%&YDIu%r?xIox8aCNn|3zR$JEaln&X0@HMM=-&B7WBWv8@X!ja47Q*+|3 zWy|F`Mr9U5**yq~?NT=Ev2+fX#JV%%R3-gkya5Faqt=0{t6V?w;@Uw=tXEib&vM1Y zDjat@&lM@2^A|3%G|z1bp#rUA>mO_HqMtNxL4QrfECws)^uHSR)4Vn`$@hlX_H>z^ z-2W6Mm0flymb@dLr(W3H@yWHtd6Luf1b1K410*=UOC7+s|Ub z<3&{9HxI)zwivlUI?D2DG*w1CdJExybgfJw>3aP&=i$Wa;QFandwsUqdOk$Fure3j zPDpkp^?hKtHvQb_f{wg0=l*(I*tK7->H`PuW$_Y$m#C zIlS^|(!eMiznlDC-#pG{c&%NLG-riZD=iTjf;M=T_>@%XN%#2pcX5rcEMt$~_301hV@Q_j_oc)*Ud3N5p5T7$hwmJ6F3AN3rFNYk&~k*WG~s41 zE^r6J9Dvdr$g0p=9^HF>8mo^JA{Lu99tg066_k{qZUtYFb*y40MaqZia_`+<$asw^f*w3}zm!>L>}rZ?>I9k?8#;y17C393E1N1b(0U z8-wVcX6Uq)4N1Q`e1T0l{=WVrKKMJC;Kuh3wigHI$Y1Z4_e4l|-oYD3&o6#)J*nKr zMa$fE{5CHdZ{X3X?9;Cd>KzLl9pla=wyp|{g-fI&P;{!^!2#idxF#<8Src`b%C4Tz ziZW%l1Sx;muSYeHl?-TzeO(QNA84V5$__d#IR~0WQm38_42#-|9#w=iPZv7u7dOnA zmtIcw)L(FmWfM?R=s86HJQ^pD0P5VBJ$w5#acein`3MqLk& z-=_U~#2WeeX!p%Rn#k@{+TEFG=E21ZSNXC9e#KLkQTcH>j5_S$e6AW5*T8-S7O0oQ zTHs96espc9_(fLS-Lq_1rT5N)r22U^bZvMw3bHZu6{lIxrXv+|>(h{Rd=5OdXYgWJ zuOLIc-36K$FEeacs0!b{FM#FzLqLzD&~8;^=nt<^1?`xqaHe)|s_Vw+>Uq~=>S-?z z!8bt)K}^WtLWAu0BI1hIG0poZl)6UAmUB_xQWjq>7AO1sr|#5zs@f-=82I+vag7}d zWvgElx6aRb@HEX|jK^Pu4S|pOlP~$3u=$gx0qQ|r5CA&>fuAu2IN*64xG$cMkpKV< zATCgl8XN_%6UX1lh)WZGhkv7V2$am<6bzm@0-HgQ06QR_yTWIHppXt!KrK9-15i{1 zYDoel2tj@mLO@Lv@}3!zKwT*+62jqA!r(q;ULf#q1c)%brv{Mu+x%Y?5&CLCL96D{E6On6v1 z@jcNm^ovFtUI`-O^u*O4xCwi9wqE(WR$2V@M!o6pqz(GTb^Cd3!X+(J5I=6&1s-KU z=oI!PBNf^E5d8~~-KB3g7n2ki+Y+BSv9qmS#x+W|34kMzI?*aU@TDsDKtov~!ROz^ zRuE<`Fj4SxLCJ8@MSr3>?cV#lLqO}#15AB%-Tj|!#TaiC$Fd99n?)z3FeS_n3>d5? zbbl77l}qAVkocOxZo@h6L*zPeZo};F!CuXK=c~Meyts#`N1NbCh{O4-=f{389V0`w z*)_Ovokv!|88k$dMUc9Q(-;pZ3z5U-CFACc7_1YJD89PQmo~_ot0~aYv}@s(n@HKW zZ}xhZ(C$LOwVhKmZHW5)zyZ^?>78W|YClE(IZ{I1QO?sfph%Po4dLtwmd{0BNfzhP@d*-<< zu{3~4Q-x$hg=39`se|-g2SWo9-5d$As$eXsx0A$NIP=p>8UQxt(7N2KFT%^6ZoVP+ z&1Jyv&0bfLVc&xSje32q$pgkymH-Mqamu3iwv`Mp zvKl8N`OC?XZ0~z_CFQxr!s|Oyy+{J-w?vabF;r#pDOUk^W4!jW;*WOBx?AZxoT2{HQ2CuKxtT7zOpR$lgOm-(ZD1vVR>Ge+jzP+Mh ztVox|oUPYQFDoym2KE-l8Ty?>y6*kmzC8#|E08YuvT`z-N02Fz@@=HHnlMN-@A;sX zyk3JIO7#ZgVXR@7*Xz)RXlors4MihP{x*ISZPy#JLSZ*}Jb1QlX7f(-wgOUmwOr_Y z6m9fp)I0^yJhM03O>%{;g-Hc>nJq;|1d+n_Ca9uVD{1osbNfHiE3ETS8s6G_C5(=} z+HvJwCbos<`AGBi=%HFkiwP|Sa|Lz(8lQw8WXt!MZ*|8msLz|k3!VwO34+ZdON}6A zjU&DzS{pVS@-*sFMP!4f(85~ygNRxomiYK=-CEro-K0<6EJD}wIKGgqtsh99f=+!; z?^uXh7+JJ!2#m^(ij-E2uDtPovp1?cGC8U>GE*Wo5;QVAQeX0@WOi5unhuS0UwZic zi+tH0lGY=+k8?ZgxkAk9x3NeWtTdmr)r0L}&cUp)p|KyspN2|@Zs#AGzk0ismtESb zeinIDKx=_9xR+a_zFN=^j;88NdDj2N5c$yX`z`HtJ6wc)+tgrym4iqMex@m^~N<@0XQlh>pMO zIhwTpVL#hgw(2(!zbS1Pz1=Eol7NlB`OQt%iN*9g| z*L;7M$4lGk9OMjKr2Y}Ml!KOD={<%{QN2JqNaa>*sU?^tTr5uv70}O{~qI)IEgsXxE+POEb{(?q0quQz2XMg zw%9gP4h5V7{(R>XaMkEWqd@A-ySjb`GdYSG3pu9GewcT#np5W#pB9f5wi!|zOrej2 z?h3TtB}sdh?rpBkP(*nqStqqG8B_DQCcdViFDq@!UY4JVQ3j$ukx<>-Ej=+X656Fv z?I@YK_|hz8UqJ5mv38D6jyT+@apH$T-_0LE`|XZlw(lLj*yY+)zH>qru6{bT7%VL; z3$WU(5VO78)&s|e)kW4p-9rRx#clxu8^v55jGkG3$$VXP7`$nhz!I<7EhpOmdovX~ zDfm^s0lHmOYQU|B;%Cz<*Avm5g|B$F;Zm_ysrQ2>pVvYvKj*;)EI(V$k8MtUtG_O2 zB{l%$p5m%*MNok6^(Lp?N&DVn46-t*PpH(Z*0W`|pKyQ}*VSzV`RX2OCs`RhE_dB3 znjCNTZ?5>h_jTB^*uu*8*t5cE2OH3S+E1Pz9Uqh)q;SS1t0i|7f<0|&)X6WYUGEH@ z4_*(8B?-bNW3i%{qL6a0vI5^9P9NU)&7wkvr+Z*!@Z$P3F5;LQF2r__vZbxs*xA|nv^0a)l9^pCg%?801RKJun)Ea-5dCU+`SG>Carv2-$p>4;FB7 z|EUiD-UNVVzRt+sVle^4uNa0P4}3}Z?cjG`{TTq{JSpzLP!5yOh)-Y zT&ctX5)xG6|9k*h85t=VfGgl1OcF1d6Fz|FKbV9peh>Pen6#_{Ud;XvOim8JZ2Si% zFOT1m{sU8x#IH5~6BCz|_^){4lK-7UPFCW-a>&Wc{hKe+2Ly#deDI1t0B8mcfZ*Q? s0P1+U!tvV~p&vj~I2;KeDDl60gFu3OkiR=4ry#39#m}#4q($|A09El6^8f$< literal 0 HcmV?d00001 diff --git a/tests/testthat/test-gpt_annot_plot_branches.R b/tests/testthat/test-gpt_annot_plot_branches.R new file mode 100644 index 0000000..4c5b41f --- /dev/null +++ b/tests/testthat/test-gpt_annot_plot_branches.R @@ -0,0 +1,7 @@ +test_that("gpt_annot_plot_branches works", { + + out <- gpt_annot_plot_branches() + testthat::expect_true(methods::is(out$plot,"gg")) + testthat::expect_true(methods::is(out$dat,"data.table")) + testthat::expect_true(length(unique(out$dat$ancestor_name))<100) +})