From 2c16ff40d0be13d10938cd449d8d26390327fe5c Mon Sep 17 00:00:00 2001 From: kellijohnson-NOAA Date: Thu, 13 Jun 2024 12:33:28 -0700 Subject: [PATCH] fix(data_mile1): sd of log(x) instead of CV For catch and index data the measurement of uncertainty should be the standard deviation (sd) of the logged observations. The operating model outputs a CV. I added a temporary function in data_mile1.R to calculate the sd from the CV. In the future we will need to, behind the scenes, take the log of the sd so the parameters stay within the bounds, which is what the vignette is doing now. This time series of uncertainty can later be fed to the modules but I did not change the vignette. Close #136 --- R/data_mile1.R | 6 +++++- data-raw/data_mile1.R | 15 ++++++++------- data/data_mile1.rda | Bin 3008 -> 3052 bytes man/data_mile1.Rd | 6 +++++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/data_mile1.R b/R/data_mile1.R index 060e2fa00..a986d7604 100644 --- a/R/data_mile1.R +++ b/R/data_mile1.R @@ -24,7 +24,11 @@ #' viable units for the composition data, where the former is the preferred #' unit of measurement.} #' \item{uncertainty}{A real value providing a measurement of uncertainty -#' for value.} +#' for value. For catches and survey indices of abundance this should be +#' the standard deviation of the logged observations if you are using the +#' lognormal distribution to fit your data. For composition data it will +#' be your input sample size. +#' } #' } #' @source \url{www.github.com/Bai-Li-NOAA/Age_Structured_Stock_Assessment_Model_Comparison} "data_mile1" diff --git a/data-raw/data_mile1.R b/data-raw/data_mile1.R index 0d50638ff..9326d30f7 100644 --- a/data-raw/data_mile1.R +++ b/data-raw/data_mile1.R @@ -15,6 +15,10 @@ ############################################################################### # Helper functions and load packages ############################################################################### +cv_2_sd <- function(x) { + sqrt(log(x^2 + 1)) +} + check_ASSAMC <- function() { packages_all <- .packages(all.available = TRUE) if (!"ASSAMC" %in% packages_all) { @@ -25,6 +29,7 @@ check_ASSAMC <- function() { library("ASSAMC") return(TRUE) } + check_ASSAMC() library(dplyr) @@ -52,11 +57,7 @@ landings_data <- data.frame( ), value = returnedom[["em_input"]]$L.obs[[1]], unit = "mt", # metric tons - # TODO: discuss if CV the appropriate input here given that landings will be - # modeled with a lognormal or similar likelihood. Just because previous - # models have used CV doesn't mean we have to continue to use it. - # E.g., `dlnorm(sdlog = )` uses a standard deviation on the log scale. - uncertainty = returnedom[["em_input"]]$cv.L[[1]] + uncertainty = cv_2_sd(returnedom[["em_input"]]$cv.L[[1]]) ) ############################################################################### @@ -76,7 +77,7 @@ index_data <- data.frame( ), value = returnedom[["em_input"]]$surveyB.obs[[1]], unit = "mt", - uncertainty = returnedom[["em_input"]]$cv.survey[[1]] + uncertainty = cv_2_sd(returnedom[["em_input"]]$cv.survey[[1]]) ) ############################################################################### @@ -164,7 +165,7 @@ unlink("FIMS_input_data.csv") usethis::use_data(data_mile1, overwrite = TRUE) rm( - check_ASSAMC, + check_ASSAMC, cv_2_sd, age_data, landings_data, index_data, weightatage_data, timingfishery, weightsfishery, data_mile1, returnedom, diff --git a/data/data_mile1.rda b/data/data_mile1.rda index d631bca5b2d48cd846a055eb4a8d3fa7d206b06b..cfdec993ace65dc4ec55bb36cd3e54754e1963f6 100644 GIT binary patch literal 3052 zcmbtQc{~#i8=sA_8Cy$^B^h%}a?B9F(rgj395a-V<;>M`lvK`!Nv^H3q;1$Nsa&aC z5pokshg^kF^eXD-$m{+6^ZxOE-sAImpXc*@KhN`gpX>7wj|Z9=S{~D+9BVx@8U!e) zi`(4XT-^NgAFy$7eJj}9{P`DdLEL6d!)EBX?AS#KxA!K4?f|M7!F(}kT;;+gHWk{S zSE_e4BZB)*WQ`P%Q~&_}EEgVEu^{(!P%fe0ARa$RhHTm< zB#>3S68ogMYbz+(7SP9XJjPI@u zW=bS0*V{N8BkPJ#Ly1IWmMyCd3#}dJ%*jg{rZgO5-{hGREOKk$Du+Bt7JWdE1H@}=Ia+0r3r>ZC#iLlr@M&=RomlG2;dn+!13qc2d#P3 z(d9Yr_-X;F>@X>j^4jn7+T*~>0cA^`iz>>Sr`|uWdH=@c2JdHI;le~`%1+AH?>@Rr*X*ra(s*>K*5_x7N^rQZ-8QA-7^RIdgMy96u+AHZ#y zc`4XJiJGLsqbBV>d$%dm`EFAN+ug9D6l_90+17ba@hvrJr_RGuey*^P|CQ=*N)HkE zgAL1l{9Ds*$!mtV=atj`U$+|P{?>=)-(2hfL*AC`T&e4W2WH$8Ldd|EWV!x&s;YkT zI}!${Y6~V z%3bw zUT!QJe_iX@pjkAUzlqh3BAd42-^OjA!rEH8j@e1DHRRu_m5u5+?|tlYk7#oS#{(kv zs6u!2s!mBUQYUfFxLCfb6F~h8H2EsyhfAuI7Zib!cotU~tz-$bOp9)>MRlm3#rcnB={EVGW!g29|Jv~!3fK6+B>k45Hen9WY* zL5tS>K9H{l^qP$%!p4QA>m((SDSZs-q=HWDwzigNHz;B-8qpF*=5ssS)Us;pS zO9pmUmW$-;IsZ=GSBSj9;-}0DFamNf?3Om-4wS=x28PGvE`ySE(!-K--oy8RZ6{Za3_uh)Az07A>@_&?Rm^vc`j?lIBG}%)bTAe} zVJj-RHnH`bWpai|j(r|Bf7AGUc#O%~b&g7Va&(N9HlKrX)V2lMG1+X5UC?b*o2ejR zj$MgbbrzdxN#J8xJo|2DMRRk7UB?busxJ2W>F>(WS{U1{@tmiu)!2o4^vjg)*n^%j zW0{ay^GGl;?Jnpfkigd`Buo@<53WrFruB$5`bj1XuibHw>7PGy?G2?R63=Srn>?uS zXl~9h1sE4cS_@#;_Yy-?RxuaBaX zt{)#OcckkJGrh+n-1m4W4m z2cL0vtZtX1<|I4E7afWevvfvO<5w^Pr_(s|d)iX8MoI3oX210ci&M%|7DzLuhp#s+ zJHA6%oj?7&`_H<{J-U^94D+rRHGWTuS-n-Xc#cGm#MKAtu?<0+yc6Q;`Otol;d79H*uwXeDQ#O#Os*zn%V8p z^TTNeb%35?hL(pFN~TA|HAknBK8c12d>_0$4g#U`cABKFdXH<)`fBTHOQFuYZT@_3 z|5nVbTujYUx;>6hLLm`0Tz- literal 3008 zcmbtWXHXMb7ETBtRUy(0h=C+X^U;D7DWM}nkPt#qL^=UPBp?XNBhpJ~AtK#`7K&1& z^Hi!x5nkvW1O(}#B7%xp^xxZ=o!yzeXXeaT&b{CHdG)X2HI(%1rLgv|Epx{J`TgNc zCi4UH@K-z1{c%i~%&)&YGyJ$3Uow5)o@~9(W;>%iKm=I>KzY0OLHV?gX57p5Ee$(w z21cCgt@tS*y+Tx12Y@}sI1)87I)cI@djLQx`-8$!z=$yn<-#6A5ufK_Vk@KG3u1WM)FgX7=@DQnxV=uK78A8pU-}cD01|?=4iDzlTKSM6Sg!F z?weqZ9$t#8)PmCszPt=@4@@ktpZUZUStQ`-DIGGH1kSbbQ$^egB;*8I`6_WM+Ed82 z2+P|r2Zsv;F8S%At7TZH^+}eKWK$Tz6=AQ3i?|%1m`s;;5NXvG+@G6BZsH@W!E;zA zAD`zKg7|mMlBF+8#>+xRbw>2DH@J+RCYaKV=AFwN<=Vo9h81c$&dsDoPq^W(RPD70 zTF_-M-|Y3U9R{3+!aqoSY3bYY8BWGtBv)6bd-5bvuZCH&2B!pyY*$f4Mv2+tPOT85 zlAE*S-r|qK$2PTXWyKakCOWGYo+ae_m2JKdyA-H#?cRsS>n*Y8djiBx#%6u+Zc1vS zED*#83uZFoyW=ei5bBcHWN?1s#Nn8u!9hUZ3nu6TK`RAW+#UbN=MR1BrP?IMw8u6K zlj;gPEE2Wo;GSKyZQ0iCpWF*S!8n)#24t4ECm)ozr#!cK!I!PH92VPH9s1 z7?0U09dbPpnZrej9$R;8cy5)qCmo9|73^v_r|yma(emRFY3%cGauv-Gx@DKu?BcN! zpOYc=|H6K5Q4zbW$}(&hD}CqCabo_{jGOOTGZ+4XOfk}toGQG!CmN_5P+hb^g4RR) zu_^)`_Cgpp#S)olC3BesXLBrOlPyr~8lt96F~l=hP?8(^#>m z@SI(&Z4o^B!NIq_1Lb&uG7mCJulHQL9&L?mC})Ifwfud-K!W8Nfab7t(lQR83`4-A zuk*9S0Y!<{a5uGrA{0ua>x*2dEQ_1-i|QP?(-3K_q9jBjA{iSK^yOo|)G6=M`L+AK& z)09K9IPAfEo?=kD6}xt8TE@yVpMIds3Q$NuL#UIacSKZ zi16l!e#X{OE!rId59PbY#SeMDLKOH|yTDmhszp%FhMvPK5hu8u zycxDPh@GO8AuU7RLCx7TVi#+~iS9a$UYD-M>GU{BjIe;5^n5B^P~O{0qU+EM^TbP8 zk5fe$Y!+F=_N6P>ko`nkO0$u)h)5v>bC%mHstKApT(n`W1ik6g9hRt(d+M^8AT;$P zlw7)7V+a*8R8WQ&7#gr;l1SWSXA_9{pJrT-#<^gfNM~ab2DB<5ulNT5s`G%)oUAt( z^ID@AYz)&oeJ;8z4XDo{d*olrf_i>MwVd-$Hw(cpo%WpgygFCSN}Fuar^9~Sg8b;^ z&u^d#SY-A7lr@{7s!r(j@l^>7-G()b6N(MMiLipaw+dKpiT95N-zt1+c+i~pF#B1z zhmR^OZ)}79>?a;Vl^tcIaInXJ+9se>&~w!HVo+G^L66AQ+{pgcaehFh=;XdQ_xDSU zgjl}2e%S2yL8_+J7?1hZo#)?o7u$1jlH2u@qltx|jvO?!A74E8z47m$JZo)hjj-`= zH+*{yCWCbzNf>2~F>NS`^hFaD%jne&qm_G_X<_g6{BP?E-;RBQa@g`;!txKjo&x8F z7Fz@a43>ZVYpqn{qfD{Q=cD(-;H+2i=C~J}=r+onZxkJiYr%Ex(Ii(!6AKQst5jl{*oMY(lqXS7K@(bt`&!csx+(s<4O%m{!xr(xzrv=}vPS+Y#6x zYV*r*3#Qse3!XADy5-INVLRyPrkNk%Va6bSauS@^W9EyK^{$7^TEq}33+t0oE|tBd z2=&mQ1n$;Nzs5Ot+cRC#e+c7dH@^?!B|rUWx_#f&v+gS-)~jzbXgQ(8r+Q**zxDk@ zsGBw}pA^QsE}bNyrj>;rN$Yu6D?2=BMYj-wTaAg9A>v;9n6A)Zus{t2wsJ{-c%}dH z1^5LHHA3ckZAc5shXgQ6>*_CfYpRUGP$fLwQ+eSGano;s>{ihOz&fDhM;(r2x&zZ_ zU^!N|F!t%R_}|aN#~FvYus#-(-7L4L=%S`w!oH?4lx``29L)#> zfhPH8t}Ac5%Lh2WibbQWnlE>2>zxkx~95cDN{v7-n>&@q+7{Vwr*YtRCjGl)si1T zEX+r#1-tGptW7~{o#)N~QsvO@2$J4G{Y-UyNe+or#Cb*nv0_L(2+CsvJzQ6JZuL1e z(nS8t0lVhBxcAs^oLTNTyskXN<(>Y`PcRtWJ@_Pd9=8_0{`SZU_1BjTUA))Di@Xi*ETnrFBV;xHQZRPN?N;Jt3vdoxJjcWRNyE60#_0sB zX;ELf;zP?(!t)c%n_61h@3iZ*Zm@RVurD^rhY>x>&)v z*e3fvNxSioLD*=VhNb`11XV0*PP2m-qW7skm;rrfO zDpaRo$YoFqkZ6$Sv_r-)Kvo?z-lf0)uM%5H(PyDR*qD6|PlA^r2=OjaR#u&Go#NKCr y>cCFl`8hBZ{?+vD0nX8unk>@GZF~K2^?P8F%`5N$v4G&k6ebv{Tc-Z{vHuH{cXt5* diff --git a/man/data_mile1.Rd b/man/data_mile1.Rd index c27b9c86a..fd5fe1406 100644 --- a/man/data_mile1.Rd +++ b/man/data_mile1.Rd @@ -24,7 +24,11 @@ units for each data type are as follows. \code{mt} is used for \code{index}, viable units for the composition data, where the former is the preferred unit of measurement.} \item{uncertainty}{A real value providing a measurement of uncertainty -for value.} +for value. For catches and survey indices of abundance this should be +the standard deviation of the logged observations if you are using the +lognormal distribution to fit your data. For composition data it will +be your input sample size. +} } } \source{