From c4b22cb68ae0f6c081d55f283e68bdaa07d40577 Mon Sep 17 00:00:00 2001
From: damontoth <Damon.Toth@hsc.utah.edu>
Date: Mon, 4 Nov 2024 17:28:24 -0700
Subject: [PATCH] Added g=0 case to pFinalSizeAndGen.R and added an example of
 this function to vignette #4

---
 R/pFinalSizeAndGen.R      |  4 +++-
 vignettes/H5N1Example.Rmd | 13 ++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/R/pFinalSizeAndGen.R b/R/pFinalSizeAndGen.R
index 040e3fc..da2e22d 100644
--- a/R/pFinalSizeAndGen.R
+++ b/R/pFinalSizeAndGen.R
@@ -9,7 +9,9 @@
 #' @export
 pFinalSizeAndGen <- function(g,n,j,R,k){
   
-  if(g==1){
+  if(g==0){
+    out <- pNextGenSize(n,0,R,k)
+  }else if(g==1){
     out <- pNextGenSize(n,j-n,R,k)*pNextGenSize(j-n,0,R,k)
   }else if(g==2){
     out <- sum(pNextGenSize(n,1:(j-n-1),R,k) * pNextGenSize(1:(j-n-1),(j-n-1):1,R,k) * pNextGenSize((j-n-1):1,0,R,k))
diff --git a/vignettes/H5N1Example.Rmd b/vignettes/H5N1Example.Rmd
index 5189568..d7980e4 100644
--- a/vignettes/H5N1Example.Rmd
+++ b/vignettes/H5N1Example.Rmd
@@ -78,9 +78,9 @@ This calculated the probability that, with $R=0.8$ and $k=0.1$ applied to every
 
 Now let's look at an example of how this function could be applied to study data from actual observed outbreak sizes. The H5N1 strain of influenza is currently of concern because of outbreaks among animals that sometimes cause spillover to humans. If these spillover events start leading to some substantial outbreaks involving human-to-human transmission, how could we use `pFinalSize()` to study these data?
 
-Say there are 30 H5N1 human infections from zoonotic spillover events, i.e. 30 instances of human infection from exposure to animals. In 25 of the spillover events, no subsequent human-to-human occurred. In 3 of the spillover events, exactly one human-to-human transmission occurred from the initially infected human with no further transmission (final size of 2 infections). In one spillover event, there was an outbreak of final size 10 infections, and another spillover event produced an outbreak of final size 50 infections. What values of R and k would best characterize this set of observations?
+Say there are 30 H5N1 human infections from zoonotic spillover events, i.e. 30 instances of human infection from exposure to animals. In 25 of the spillover events, no subsequent human-to-human occurred. In 3 of the spillover events, exactly one human-to-human transmission occurred from the initially infected human with no further transmission (final size of 2 infections). In one spillover event, there was an outbreak of final size 20 infections, and another spillover event produced an outbreak of final size 50 infections. What values of R and k would best characterize this set of observations?
 
-We can make use of the `pFinalSize()` function to create a (log) likelihood function for this dataset of outbreak clusters. First, we write a function called `pClusterSizes()` that calculates the probabilities of the four different final sizes observed (1, 2, 10, and 50) for given values of R and k:
+We can make use of the `pFinalSize()` function to create a (log) likelihood function for this dataset of outbreak clusters. First, we write a function called `pClusterSizes()` that calculates the probabilities of the four different final sizes observed (1, 2, 20, and 50) for given values of R and k:
 
 ```{r}
 pClusterSizes <- function(R,k) pFinalSize(n=1, j=c(1,2,20,50), R=R, k=k)
@@ -91,7 +91,7 @@ The argument `n` to `pFinalSize` is the initial number of cases at the start of
 ```{r}
 pClusterSizes(R = 1.5, k = 1) * 100
 ```
-If the reproduction number `R = 1.5`, and the dispersion parameter `k = 1`, the function calculates a 40% chance that an outbreak of total size 1 (no tranmsission) would occur, a 9.6% chance of exactly size 2, a 0.12% chance of exactly size 10, and less than 0.01% chance of outbreak of exactly size 50. 
+If the reproduction number `R = 1.5`, and the dispersion parameter `k = 1`, the function calculates a 40% chance that an outbreak of total size 1 (no tranmsission) would occur, a 9.6% chance of exactly size 2, a 0.12% chance of exactly size 20, and less than 0.01% chance of outbreak of exactly size 50. 
 
 Next we write a function `logLikData()` to calculate the log likelihood of observing 25, 3, 1, and 1 outbreak(s) of those four sizes, for given values of R and k:
 
@@ -139,6 +139,13 @@ c(sum(finalSize10), pFinalSize(n=1,j=10,R=0.8,k=0.07))
 ```
 The second output above demonstrates that the 9 probabilities sum to the total probability of an outbreak of final size 10 calculated by `pFinalSize`.
 
+Now say we have additional information about our example H5N1 cluster size data set: the size-20 outbreak occurred over 3 total generations of transmission, and the size-50 outbreak occurred over 4 generations. Does this change our estimates for R and k?
+
+```{r}
+pClusterSizeAndGen <- function(R,k) Vectorize(pFinalSizeAndGen)(g=c(0,1,3,4),n=1, j=c(1,2,20,50), R=R, k=k)
+logLikDataGen <- function(R,k) sum(c(25,3,1,1)*log(pClusterSizeAndGen(R,k)))
+optim(f = function(x) ifelse(all(x>0), -logLikDataGen(x[1],x[2]), Inf), par = c(R=1,k=1))$par
+```
 
 ## Risk assessment