diff --git a/template.qmd b/template.qmd index b7cab31..14bb402 100644 --- a/template.qmd +++ b/template.qmd @@ -140,7 +140,7 @@ samples is squared in red.](`r params$tree_ml`){#fig-tree_ml} ### Nucleotide diversity comparison Nucleotide diversity (π) has been calculated for $`r div_values[["boot.reps"]]`$ random -sample subsets of size $`r div_values[["sample.size"]]`$, extracted with replacement +sample subsets of size $`r div_values[["sample.size"]]`$, extracted from the context dataset. The distribution of the nuclotide diversity is assumed to `r div_values[["norm.text"]]` be normal after performing a Shapiro-Wilk test (p-value of $`r div_values[["normal.pvalue"]]`$). diff --git a/workflow/scripts/download_context.R b/workflow/scripts/download_context.R index 6ebee40..88016de 100644 --- a/workflow/scripts/download_context.R +++ b/workflow/scripts/download_context.R @@ -94,7 +94,7 @@ dataframes <- lapply( # Join results metadata <- bind_rows(dataframes) -log_info("Removeing overlapping sequences") +log_info("Removing overlapping sequences") # Checkpoint: remove samples that overlap with target samples according to GISAID ID samples.accids <- sample.metadata %>% pull(snakemake@params[["samples_gisaid_accession_column"]]) @@ -103,10 +103,10 @@ metadata <- metadata %>% filter(!accession_id %in% samples.accids) print(glue("{nrow(metadata)} accession_ids remaining after GISAID ID filter")) # Checkpoint: enforce a minimum number of samples to have at least -# as many possible combinations as bootstrap replicates. +# as many possible combinations as random subsample replicates. # This is done by calculating the root of a function based on the -# formula for calculating combinations with replacement -# for n ≥ r ≥ 0: combinations with replacement = n! / (r! (n-r)!) +# formula for calculating combinations for n ≥ r ≥ 0: +# combinations = n! / (r! (n-r)!) r <- nrow(sample.metadata) min.comb <- snakemake@params[["min_theoretical_combinations"]] solution <- uniroot( diff --git a/workflow/scripts/report/diversity_plot.R b/workflow/scripts/report/diversity_plot.R index 9740a7d..f0dfc65 100644 --- a/workflow/scripts/report/diversity_plot.R +++ b/workflow/scripts/report/diversity_plot.R @@ -60,7 +60,7 @@ diversity <- nuc.div(study_aln) # Perform bootstrap -log_info("Performing bootstraped calculation for nucleotide diversity in oontext samples") +log_info("Performing calculation for nucleotide diversity in context samples") plan(multisession, workers = snakemake@threads) divs <- boot.nd.parallel(gene_ex, length(study_aln), snakemake@params[["bootstrap_reps"]]) plan(sequential)