-
Notifications
You must be signed in to change notification settings - Fork 1
/
DocMSLT.Rmd
947 lines (724 loc) · 47.6 KB
/
DocMSLT.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
---
title: "Proportional multi-state multiple-cohort life table model"
author: "Belen Zapata-Diomedi,Alan Both, Ali Abbas"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
bookdown::html_document2:
toc: true
header-includes:
- \usepackage{setspace}\onehalfspacing
- \usepackage{float}
- \usepackage{pdflscape}
- \newcommand{\blandscape}{\begin{landscape}}
- \newcommand{\elandscape}{\end{landscape}}
biblio-style: apsr
always_allow_html: yes
bibliography: METAHIT.bib
---
```{r load-packages, eval=FALSE, echo = FALSE, warning = FALSE}
library(knitr)
library(kableExtra)
library(plyr)
library(lubridate)
library(ggplot2)
library(ggjoy)
library(ggpubr)
library(plm)
library(PerformanceAnalytics)
library(pander)
library(dplyr)
library(tidyr)
library(tibble)
library(tidyverse)
library(knitr)
library(readxl)
library(stringr)
library(Hmisc)
library(DiagrammeR)
library(tinytex)
library(readr)
library(png)
library(disbayes)
library(stringr)
options(scipen = 999)
#https://bookdown.org/yihui/rmarkdown-cookbook/r-code.html
```
```{r,eval=FALSE, echo = FALSE, warning = FALSE}
rm (list = ls())
setwd("C:/Users/beluz/Documents")
relative_path_execute <- paste0("~/mh-execute")
relative_path_mslt <- paste0("~/mh-mslt")
```
```{r setup, echo = FALSE, warning = FALSE}
## Global options
options(scipen=999)
options("getSymbols.warning4.0"=FALSE)
options("getSymbols.yahoo.warning"=FALSE)
knitr::opts_chunk$set(echo = TRUE, width = 100)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(eval = TRUE)
knitr::opts_chunk$set(error = TRUE)
```
# Introduction
The proportional multi-state multiple-cohort life table model (PMSLT) is a population level model (macro) approach to simulate health (and economic) implications of changes in exposure to health risk factors (e.g. physical inactivity, air pollution and diet). The PMSLT has been widely used to simulate outcomes for population level interventions for chronic disease prevention.
The model was developed by Jan Barendregt and colleagues and has been widely used in Australia and New Zeland [-@RN2; -@RN1].
The basic infrastructure of the model consist of three components: (1) Effect size for the intervention of interest (e.g. intervention to urban design that modifies population levels of physical activity); (2) Calculation of the potential impact fraction (PIF) to derive the change in occurrence of disease (incidence/case fatality) attributable to a change in the distribution of the risk factor (e.g. physical activity); and (3) Use of the PMSLT to simulate health (and economic) outcomes attributable to a change in the distribution of health risk factor/s in the population of interest. Figure \@ref(fig:figure1) summaries the basic infrastructure of the model. ITHIM is included in Figure \@ref(fig:figure1) to show that both approaches share in common steps one and two and differ in the mechanisms of calculation of change in health burden.
\newpage
```{r figure1, fig.cap = "Basic ITHIMR infrastructure", echo=FALSE}
knitr::include_graphics("presentation/Figure1.png")
```
**HALYs, QALYs and DALYs**
For the PMSLT model we use the term *health-adjusted life year* (HALY). As *summary measure of population health* it measures both quantity and quality of life, where one HALY represent the equivalent of one year in full health (which could be two years with a quality of life of 0.5, for example). Specific types of HALY are the quality-adjusted life year (QALY) and the disability-adjusted life year (DALY). The QALY derives from economics and was first used in the 1960s as a measure of health gain [@RN16]. The disability-adjusted life-year (DALY) was developed for use in burden of disease studies as a measure of health loss due to disease [@RN17]. Our calculated HALYs are neither QALYs not DALYs, but something in between. They are similar to QALYs in that they represent health gains. However, the main difference is in the calculation of the health-related quality of life component. QALYs use measures of utility weights that traditionally represent individual experiences of health, whereas our estimated HALYs use disability weights linked to specific diseases, which were developed for the Global Burden of Disease study [@RN18]. As discussed in past research [@RN10] the main advantage of using disability weights over utility weights is that disability weights refer to specific diseases rather than health states (which are difficult to link to risk factors-e.g. physical inactivity). We opted to use the more general terms HALYs given that the use of the DALYs terminology may lead to think that our calculations are similar to those in burden of diseases studies. In our study, our model does not explicitly separate years of life lost (YLL) and years lived with disability (YLD) components, but instead calculates the total number of life years gained/lost due to an intervention, adjusted for the average health-related quality of life in those years (by age and sex). In burden of disease studies, DALYs are defined as the sum Years of Life Lost (YLL) and Years Lived with Disability (YLD).
# Proportional multi-state multiple-cohort life table model
Figure \@ref(fig:figure2) is a schematic representation of the PMSLT. The two PMSLT components are the life table and the diseases process for each of the modeled diseases (ADD INJURIES).
The two components are modeled for both the base case (no intervention) and the scenario (with intervention) for each sex and 5-year age cohorts. The life table models the survival for the cohort and includes for a given age interval the probability of dying, the number of people alive, life years and the rate of total prevalent years lived with disability (pYLD total). Total pYLD is the proportion of life that is lost due to diseases and injury (disability) and adjusts life years to derive the measure of Health adjusted life years (HALYs).
For each of the modeled diseases and their disease processes, incidence, prevalence and disease specific mortality are calculated. Transition
probabilities between states are used in the derivation of incidence to adjust the transition from healthy to diseased and case fatality from
disease to dead from the diseases [@RN1] (see Table XX for data inputs).
The PIFs by age and sex cohort (see Figure \@ref(fig:figure1)) modify incidence in the scenario’s
disease process. The PIF measures the proportional change in disease risk when exposure to a risk factor changes [@RN11]. Because type 2
diabetes is a risk factor for ischemic heart disease and stroke [@RN14; @RN15], the
impact of changes in type 2 diabetes due to the scenario are calculated (TO DO). Disability weights (TO DO), which reflect the health loss attributable to disease on a scale from 0 (perfect health) to 1 (equivalent to death) [@RN18] are multiplied by disease prevalence to derive disease specific pYLDs.
The difference between pYLDs and mortality rates between base case and scenarios disease processes are collected in the scenario life table to modify all-cause mortality and pYLD total. Cohorts in the general life table
are modeled from the baseline year (e.g. 2017) until they die or reach the age of 100.
The health outputs are measured as the difference between the base case and scenario disease processes and life table. The model assumes that diseases are independent from one another (i.e. the probability of developing one disease is unrelated to the probability of developing
another) and are independent from all causes of death [@RN2].
For the latest scientific reference for the PMSLT see @RN19.
```{r figure2, fig.cap = "Analytical framework PMSLT", echo=FALSE}
knitr::include_graphics("presentation/Figure5.png")
```
## Disability weights
Disability weights (DW) are derived from disease specific prevalent years lived with disability (YLD) and disease specific prevalence by age group (5 years) and sex. Data for YLDs prevalence is from the Global Burden of Disease (GBD) data for 2017. An age and sex specific-correction was introduced to counteract the effects of accumulating co-morbid illnesses in the older age groups as shown in Equation \@ref(eq:dw). Interpolation was used to derive rates per single year of age from 5-year age intervals.
\begin{equation}
DW_{adjusted} = { { \frac{ { \frac{ { {YLD_{disease}}}} {P_{disease}} } } {1 - YLD_{total}}}}
(\#eq:dw)
\end{equation}
Where YLD~disease~ is the YLD mean number per age and sex for a given disease, P~disease~ is the prevalence as reported in the Global Burden of Disease study for a given disease by age and sex and YLD~total~ is the total YLD rate per age and sex
## PIF for type 2 diabetes
Type 2 diabetes is a risk factor for ischemic heart disease and stroke [@RN14; @RN15; @RN10], hence, we also included the PIFs for stroke and ischemic heart disease as a function of changes in diabetes as shown in Equation \@ref(eq:t2d) and summarized in Table XX for males and females respectively within the PMSLT.
\begin{equation}
PIF = { \frac{ -({ Prevalence_{scenario} - Prevalence_{base case}}) * (RR - 1)} {Prevalence_{base case} * (RR -1) +1}}
(\#eq:t2d)
\end{equation}
where Prevalence~scenario~ represents the prevalence of type 2 diabetes after the scenario, whilst
Prevalence~base case~ represents the prevalence for type 2 diabetes in the base case and Relative Risk (RR) represent the relative risk for stroke or ischemic heart disease respectively.
For ischemic heart disease and stroke, we calculated the combined PIF according to Equation Equation \@ref(eq:combinedPIF) given that these two diseases have more than one risk factor (physical inactivity, air pollution, noise and diabetes) in the model.
\begin{equation}
PIF_{combined} = \prod_{r} 1- (1 - PIF_{risk})
(\#eq:combinedPIF)
\end{equation}
# Generation of inputs for the proportional multi-state life table (PMSLT)
Data inputs for the proportional multi-state life table model (PMSLT) include inputs for: life table (population, mortality rates and prevalent years lived with disability rates for all causes (YLD)), diseases processes (incidence, remission-for cancers only and case fatality) and injuries process (mortality rates and years lived with disability rates-FOR REVISION). Table \@ref(tab:Table1) describes all data inputs for the PMSLT. Each of the PMSLT components and related code are explained below Table \@ref(tab:Table1). Data inputs were generated for the English city regions/Combined Authorities. We used Global Burden of Disease data [@RN4] for all data inputs, and an Disbayes (ADD REF package) to derive case fatality (not available in GBD) from GBD inputs. GBD data is available for rates per 100,000 people, total numbers and percentage by age groups (5-years smallest category) and sex. Percentage represents proportion of the contribution of a diseases to the total burden, we do not use percentages. All data inputs for the PMSLT are by sex and one year age groups, except population numbers which is for 5-year age groups. The model assumes that a cohort (e.g. males aged 40 years in 2019) will have the same observed data data for health for a 50 year old today when they reach the age of 50. This can be addressed with incorporating trends (currently trends applied to ischemic heart disease).
```{r Table1, echo=FALSE, include=TRUE}
pmslt_inputs <- tibble::tribble(
~"Component", ~"Data needs", ~"Input data", ~"Data processing",
"General life table", "Population numbers and mortality rates", "Population and all-cause mortality by age-group (5 years) and sex", "Population numbers by 5-year age groups and sex were derived from GBD rates (per 100,000) and numbers (i.e. number*100,000/rate). For the English city regions data is avaialbe for Lower Tier Local Authority (LTLA) and Upper Tier Local Authority (UTLA). Population numbers for LTLA and UTLA were aggregated to City Regions. Mortality rates per one were derived by diving all-cause mortality rates by population numbers by age and sex. Interpolation was used to derived single-year mortality rates from 5-year data.",
"General life table", "All-cause YLDs", "All-cause pYLDs by 5-year age groups and sex", "Rates per one derived from numbers and population data and interpolated to one-year age groups. Same aggreagation process as for mortality was used for City Regions.",
"Disease life table", "Diseases: Disability weights, incidence, remission and case fatality", "Disease specific pYLDs and prevalence. Case fatality derived with Disbayes. Incidence per year of age is an xx interpolation from GBD data and remission for cancers is from XX", "Section XX (TO DO) outlines Disbayes process. Disability weights were calculated by dividing disease specific pYLDs by prevalence and adjusted by all cause pYLDs for 5-year age groups and sex and then interpolated to 1-year age groups.",
"Injuries: pYLDs and mortality rates", "PYLDs and mortality rates for road injures for pedestrians, cyclist, COMPLETE", "pYLD and mortality numbers by 5-year age group and sex", "Rates per one derived from numbers and population data and interpolated to 1-year age groups",
"Diseases trends", "TO DO","TO DO", "Explained in sections Trends"
)
inputs <- kableExtra::kable(pmslt_inputs, booktabs = T, caption = "PMSLT inputs", longtable = TRUE)
kableExtra::kable_styling(inputs, latex_options = "striped", position = "float_left")
```
## Code
Table \@ref(tab:Table2) is an overview of inputs, outputs and code for the generation of data inputs.
```{r Table2, echo=FALSE, include=TRUE}
inputs_outputs <- tibble::tribble(
~"R file", ~"Function", ~"Input variable", ~"Input file", ~"Output variable", ~"Saved file",
"prepare_DATA.R", "PrepareData", "1) local_goverment_areas 2) lad 3) utla 4) gbd_data", " 1) mh-execute/input/mh_regions_lad_lookup.csv, 2) mh-mslt/input/Local_Authority_District_to_Region_(December_2017)_Lookup_in_England.csv, 3) mh-mslt/input/Lower_Tier_Local_Authority_to_Upper_Tier_Local_Authority_(December_2017)_Lookup_in_England_and_Wales.csv, 4) mh-mslt//input/gbd/GBD2019/METAHIT/", "local_goverment_areas", "NA",
"get_disease_names.R", "GetDiseaseTable", "disease_names_execute", "mh-execute/inputs/dose_response/disease_outcomes_lookup_new.csv", "DISEASE_SHORT_NAMES", "mh-mslt/output/paramters/DISEASE_SHORT_NAMES",
"prepare_GBD.R", "calculateGBDwider", "1) gbd 2) local_government_area", "gbd (calculated with GetGBD) and local_goverment_areas (calculated with GetGEO)", "gbd_wider", "NA",
"prepare_GBD.R", "calculateMSLT" , "1) gbd_wider (calculated with calculateGBDWider) 2) location_disease (calculated with GetDiseaseTable)", "gbd_wider, location (cityregion) and DISEASE_SHORT_NAMES", "mslt_df_wider", "mh-execute/inputs/mslt"
)
inputs_outputs <- kableExtra::kable(inputs_outputs, booktabs = T, caption = " Inputs and outputs", longtable = TRUE)
kableExtra::kable_styling(inputs_outputs, latex_options = "striped", position = "float_left")
```
The below code generates inputs for the English City Regions (Liverpool, Nottingham, Bristol, Northeast, Greater Manchester, Sheffield, West Midlands, Leeds and London).
Raw inputs are in "inputs" folder. Processed data is in "output" folder. Final data save in mh-execute inputs/mslt. Intermediate files are not saved.
### Map local authorities to city regions
Use **prepareData** to map between local authorities and city regions and join to GBD data. For the city regions we add up data available for lower and upper tier local authority areas.
```{r,eval=FALSE}
source("~/mh-mslt/R/prepare_DATA.R")
gbd_data <- PrepareData(
local_goverment_areas= paste0(relative_path_execute, "/inputs/mh_regions_lad_lookup.csv"),
lad= paste0(relative_path_mslt,"/input/Local_Authority_District_to_Region_(December_2017)_Lookup_in_England.csv"),
utla=paste0(relative_path_mslt,"/input/Lower_Tier_Local_Authority_to_Upper_Tier_Local_Authority_(December_2017)_Lookup_in_England_and_Wales.csv"),
gbd_data= paste0(relative_path_mslt, "/input/gbd/GBD2019/METAHIT/"))
head(gbd_data)
```
### Diseases and non-diseases coding table
The disease coding table adds information to *metahit-execute file disease_outcomes_lookup_new.csv* to be able to run the PMSLT. The file disease_outcomes_lookup_new.csv was designed to match relative risks from for the included risk factors (physical activity, air pollution, noise) to diseases **GetDiseaseTable** adds information for all the diseases for which we derived input data, adds road injuries, classifies diseases to be modeled as diseases (chronic diseases such as stroke) and non-diseases (injuries), this is necessary as chronic diseases and injuries are modeled differently. We classified all-cause mortality as non-diseases since we are not including all-cause mortality risk reduction in the PMSLT (MAY CHANGE). Since PIFs have not yet been calculated for all diseases, we also include a variable indicating whether the disease has PIF or not. Lastly, a classification was made for diseases for males only or females only.
The output is saved to be used when running the PMSLT in mh-execute.
```{r,eval=FALSE}
source("~/mh-mslt/R/get_disease_names.R")
DISEASE_SHORT_NAMES <- GetDiseaseTable(gbd_data= paste0(relative_path_mslt, "/input/gbd/GBD2019/METAHIT/"),
disease_names_execute=paste0(relative_path_execute, "/inputs/gbd/new_disease_names.csv")) %>% ### diseases not to include
dplyr::filter(!acronym %in% c("bladder-cancer", "alzheimer's-disease", "esophageal-cancer", "kidney-cancer", "prostate-cancer", "rectum-cancer", "myeloid-leukemia", "Road injuries", "Other road injuries"))
write.csv(DISEASE_SHORT_NAMES, "~/mh-mslt/output/parameters/DISEASE_SHORT_NAMES.csv")
```
## Data processing
Steps and code to generate inputs to run PMSLT from above generated files: gbd, local_goverment_areas and DISEASE_SHORT_NAMES.
### Calculate baseline data by area
Original GBD data tidy up with **GBDwider** for later processing for PMSLT dataframe and disbayes inputs. Generates dataframe for all areas (city regions, UK, countries and English Regions). Inputs are by age (5-year groups) and sex.
Intermediate data.
```{r,eval=FALSE}
source("~/mh-mslt/R/prepare_GBD.R")
gbd_wider <- calculateGBDwider(gbd = gbd_data)
head(gbd_wider)
```
## Calculate baseline data for PMSLT
A PMSLT dataframe of inputs using **CalculateMSLT** is generated from above dataframe by single year of age and adding Disbayes outputs. Disbayes output generated by Chris J. Data saved in input folder of mh-execute.
```{r,eval=FALSE}
source("~/mh-mslt/R/prepare_GBD.R")
mslt_inputs <- list()
index <- 1
for (loc in unique(gbd_wider$area)) {
mslt_inputs[[index]] <- calculateMSLT(gbd_wider, location = loc , disbayes = "~/mh-mslt/input/city regions/Output disbayes/resall_selected.rds")
names(mslt_inputs)[index] <- paste0(loc)
write.csv(mslt_inputs[[index]], file=paste0("~/mh-execute/inputs/mslt/", loc, "_mslt",".csv")) ### save to use in mh-excute
index <- index + 1
}
mslt_all <- mslt_inputs %>% bind_rows()
```
# Check mslt estimates using plots
``````{r,eval=FALSE}
mx <- mslt_all %>%
ggplot(aes(x=age, y=mx, colour=area)) +
geom_line() +
facet_wrap(~ sex) +
theme_classic()
mx
ylds_total <- mslt_all %>%
ggplot(aes(x=age, y=pyld_rate, colour=area)) +
geom_line() +
facet_wrap(~ sex) +
theme_classic()
ylds_total
```
# Code explained to run the PMSLT for one area (e.g. city regions, country);
## Inputs
1) mslt_df: above generated.
2) model parameters: age and sex cohort.
3) pifs: generated in mh-execute
4) relative risks diabetes: from literature
### Define model parameters
```{r, eval=FALSE}
i_age_cohort <- c(17, 22, 27, 32, 37, 42, 47, 52, 57, 62, 67, 72, 77, 82, 87, 92, 97)
i_sex <- c('male', 'female')
```
### Get potential impact fractions from mh-execute
Pifs are used in the scenario and non-disease (road injuries and lower respiratory disease) disease processes to modify incidence of diseases and mortality and yld rates for non-diseases (injuries and lower respiratory diseases).
For now this is place holder. We can generate pifs as an outcome of mh-execute and save in mslt-repo to run independently for each of the city regions.
The code below expands input pifs from 5-year age groups to one-year age groups. This is done to facilitate working with the mslt data in one-year age groups. Names of original pifs are also changed to match mslt inputs.
This file is not saved, and use in later code.
```{r, eval=FALSE}
pif_expanded <- read_csv("~/mh-mslt/input/pif_place_holder.csv") %>% ## PLACE HOLDER (from Aus)
mutate(pif_cyclist_deaths=1.1,
pif_pedestrian_deaths=1.1,
pif_cyclist_ylds=1.1,
pif_pedestrian_ylds=1.1,
pif_motor_deaths=1.1,
pif_motorcyclist_deaths=1.1,
pif_motor_ylds=1.1,
pif_motorcyclist_ylds=1.1,
pif_road_deaths=1.1,
pif_road_ylds=1.1,
pif_lri_ylds=0.03,
pif_lri_deaths=0.03,
pif_copd=0.03) %>%
dplyr::slice(rep(1:dplyr::n(), each = 5)) %>% ### expand to 1-yr group (repeat values)
mutate(age=rep(seq(16,100,1), times = 2))
```
### Get relative risks for diabetes on cardiovascular diseases
Diabetes is a risk factor for cardiovascular diseases. Parametric and probabilistic inputs.
To be included in metahit_functions.R Variables with normal distribution.
```{r, eval=FALSE}
### Parametric input
DIABETES_IHD_RR_F <- 2.82 ## c(2.82, CI (2.35, 3.38) get SD from CI
DIABETES_STROKE_RR_F <- 2.28 ## c(2.28) CI (1.93, 2.69) get SD from CI
DIABETES_IHD_RR_M <- 2.16 ## c(2.16, CI (1.82, 2.56) get SD from CI
DIABETES_STROKE_RR_M <- 1.83 ## c(1.83) CI (1.60, 2.08) get SD from CI
### Probabilistic input
# DIABETES_IHD_RR_F <- c(2.82, GetStDevRR(2.82, 2.35, 3.38))
# DIABETES_STROKE_RR_F <- c(2.28, GetStDevRR(2.28, 1.93, 2.69))
# DIABETES_IHD_RR_M <- c(2.16, GetStDevRR(2.16, 1.82, 2.56))
# DIABETES_STROKE_RR_M <- c(1.83, GetStDevRR(1.83, 1.60, 2.08))
```
### Run base case general life table
Inputs for a general life table are population numbers and mortality rates. The below process life tables with life year, health-adjusted life years, life expectancy and health-adjusted life expectancy for each age and sex cohort for each city region.
This code can be used independently if interested in life table analysis only.
Select an area of interest. Options are: liverpool, nottingham, bristol, northeast, greatermanchester, sheffield westmidlands, leeds, london, United Kingdom, England, East Midlands, East of England, Greater London, North East England, North West England, South East England, South West England , West Midlands, Yorkshire and the Humber, Northern Ireland, Scotland, Wales.
The example is for Bristol City Region.
NOTE: 1) mh-execute functions need updating with mh-mslt code; 2) Mortality trends not included.
```{r,eval=FALSE}
source("~/mh-mslt/R/functions_MSLT.R")
# dataframe of the age and sex cohorts (crossing just does a cross product) for loop below
age_sex_cohorts <- crossing(data.frame(age=i_age_cohort),
data.frame(sex=c('male', 'female'))) %>%
dplyr::mutate(cohort=paste0(age,"_",sex))
### Select inputs for area of interest
mslt_df <- read_csv(paste0(relative_path_execute, "/inputs/mslt/bristol_mslt.csv")) #use read_csv
general_life_table_list_bl <- list()
for (i in 1:nrow(age_sex_cohorts)){
suppressWarnings(
general_life_table_list_bl[[i]] <- RunLifeTable(
in_idata = mslt_df,
in_sex = age_sex_cohorts$sex[i],
in_mid_age = age_sex_cohorts$age[i],
death_rates = NA ## mortality trends data if available, not for now. We would need trends for each city region.
))
names(general_life_table_list_bl)[i] <- age_sex_cohorts$cohort[i]
}
# convert the list of dataframes to single dataframes
general_life_table_bl <- bind_rows(general_life_table_list_bl, .id = "age_group") %>%
mutate(age_group = as.numeric(gsub("_.*","",age_group)))
```
### Run base case disease processes
NOTE: 1) mh-execute functions need updating with mh-mslt code; 2) Mortality trends not included.
```{r,eval=FALSE}
source("~/mh-mslt/R/functions_MSLT.R")
disease_cohorts <- DISEASE_SHORT_NAMES %>%
# Exclude non-diseases, road injuries, and diseases with no pif
dplyr::filter(is_not_dis == 0 & acronym != 'no_pif' & acronym != 'other' ) %>%
dplyr::select(sname,acronym,males,females)
# adding the age and sex cohorts:
age_sex_disease_cohorts <- crossing(age_sex_cohorts,disease_cohorts) %>%
mutate(cohort=paste0(age,'_',sex,'_',sname)) %>%
# Exclude non-male diseases (and non-female if there were any)
filter( (sex=='male' & males==1) | (sex=='female' & females==1)) %>%
dplyr::select(age,sex,sname,acronym,cohort) %>%
# ishd and strk have the prerequisite disease dmt2
mutate(prerequsite=ifelse(sname %in% c("ishd","strk"),paste0(age,"_",sex,"_dmt2"),0)) %>%
# ensuring prequisites are calculated first
arrange(age,sex,prerequsite,sname)
disease_life_table_list_bl <- list()
for (i in 1:nrow(age_sex_disease_cohorts)){
disease_life_table_list_bl[[i]] <- RunDisease(
in_idata = mslt_df,
in_mid_age = age_sex_disease_cohorts$age[i],
in_sex = age_sex_disease_cohorts$sex[i],
in_disease = age_sex_disease_cohorts$sname[i],
incidence_trends = NA,
mortality_trends = NA
)
names(disease_life_table_list_bl)[i] <- age_sex_disease_cohorts$cohort[i]
}
```
### Run base case non-disease processes
```{r, eval=FALSE}
source("~/mh-mslt/R/functions_MSLT.R")
non_disease_cohorts <- DISEASE_SHORT_NAMES %>%
# Exclude non-diseases, road injuries, and diseases with no pif
dplyr::filter(is_not_dis == 1 & acronym != 'no_pif' & acronym != 'other' ) %>%
dplyr::select(sname,acronym,males,females)
# adding the age and sex cohorts:
age_sex_non_disease_cohorts <- crossing(age_sex_cohorts,non_disease_cohorts) %>%
mutate(cohort=paste0(age,'_',sex,'_',sname)) %>%
dplyr::select(age,sex,sname,acronym,cohort)
non_disease_life_table_list_bl <- list()
for (i in 1:nrow(age_sex_non_disease_cohorts )){
non_disease_life_table_list_bl[[i]] <- RunNonDisease(
in_idata = mslt_df,
in_sex = age_sex_non_disease_cohorts$sex[i],
in_mid_age = age_sex_non_disease_cohorts$age[i],
in_non_disease = age_sex_non_disease_cohorts$sname[i]
)
names(non_disease_life_table_list_bl)[i] <- age_sex_non_disease_cohorts$cohort[i]
}
```
### Run scenarios diseases processes
Re run diseases processes with modified incidence rates by the pif.
New incidence rates = bl incidence rates * (1-disease_pif).
```{r,eval=FALSE}
source("~/mh-mslt/R/functions_MSLT.R")
disease_relative_risks <- tribble(
~sex , ~prerequsite, ~disease , ~relative_risk ,
"male" , "dmt2" , "ishd" , DIABETES_IHD_RR_M ,
"female", "dmt2" , "ishd" , DIABETES_IHD_RR_F ,
"male" , "dmt2" , "strk" , DIABETES_STROKE_RR_M,
"female", "dmt2" , "strk" , DIABETES_STROKE_RR_F
)
disease_life_table_list_sc <- list()
for (i in 1:nrow(age_sex_disease_cohorts)){
# i=6
td1_age_sex <- mslt_df %>% ### new mslt dataframe with modified incidence rates
filter(age >= age_sex_disease_cohorts$age[i] & sex == age_sex_disease_cohorts$sex[i])
pif_colname <- paste0('pif_',age_sex_disease_cohorts$acronym[i])
pif_disease <- pif_expanded %>%
filter(age >= age_sex_disease_cohorts$age[i] & sex == age_sex_disease_cohorts$sex[i]) %>%
dplyr::select(age,sex,pif_colname)
# adjustment for diabetes effect on ihd and stroke
if(age_sex_disease_cohorts$prerequsite[i] != 0){
# get name for pif column
target_disease <- paste0("pif_",age_sex_disease_cohorts$acronym[i])
# get prerequisite disease cohort name (i.e., age_sex_dmt2 for diabetes)
dia_col <- age_sex_disease_cohorts$prerequsite[i]
# select relative risk of disease given diabetes (depends on sex, not age)
relative_risk <- disease_relative_risks %>%
filter(sex == age_sex_disease_cohorts$sex[i] &
disease == age_sex_disease_cohorts$sname[i]) %>%
pull(relative_risk)
# (store old pif)
# old_pif <- pif_disease[[target_disease]]
# diabetes pif = - { scenario prevalence - baseline prevalence } * (RR - 1) / { baseline prevalence * (RR - 1) + 1 }
scenario_prevalence <- disease_life_table_list_sc[[dia_col]]$px
baseline_prevalence <- disease_life_table_list_bl[[dia_col]]$px
pif_dia <- -(scenario_prevalence - baseline_prevalence)*(relative_risk-1)/
(baseline_prevalence * (relative_risk-1) + 1)
# modify pif for target disease: new pif = (1 - old pif) * (1 - diabetes pif)
pif_disease[[target_disease]] <- 1- (1-pif_disease[[target_disease]]) * (1-pif_dia)
# print(sum(old_pif-pif_disease[[target_disease]]))
}
incidence_colname <- paste0('incidence_', age_sex_disease_cohorts$sname[i])
new_col <- td1_age_sex%>%pull(incidence_colname) * (1 - (pif_disease%>%pull(pif_colname)))
new_col[is.na(new_col)] <- 0
td1_age_sex[[incidence_colname]] <- new_col
## Instead of idata, feed td to run scenarios. Now all diseases are run again, with the effect of diabetes
## on cardiovascular diseases taken into account.
disease_life_table_list_sc[[i]] <- RunDisease(
in_idata = td1_age_sex,
in_sex = age_sex_disease_cohorts$sex[i],
in_mid_age = age_sex_disease_cohorts$age[i],
in_disease = age_sex_disease_cohorts$sname[i],
incidence_trends = NA,
mortality_trends = NA
)
names(disease_life_table_list_sc)[i] <- age_sex_disease_cohorts$cohort[i]
}
for (cohort in age_sex_disease_cohorts$cohort) {
disease_life_table_list_sc[[cohort]]$diff_inc_disease <-
disease_life_table_list_sc[[cohort]]$incidence_disease - disease_life_table_list_bl[[cohort]]$incidence_disease
disease_life_table_list_sc[[cohort]]$diff_prev_disease <-
disease_life_table_list_sc[[cohort]]$px - disease_life_table_list_bl[[cohort]]$px
disease_life_table_list_sc[[cohort]]$diff_mort_disease <-
disease_life_table_list_sc[[cohort]]$mx - disease_life_table_list_bl[[cohort]]$mx
disease_life_table_list_sc[[cohort]]$diff_pylds_disease <-
(disease_life_table_list_sc[[cohort]]$px - disease_life_table_list_bl[[cohort]]$px) *
(disease_life_table_list_bl[[cohort]]$dw_disease)
}
# convert the list of dataframes to single dataframes
disease_life_table_bl <- bind_rows(disease_life_table_list_bl, .id = "age_sex_disease_cohort") %>%
mutate(age_sex_disease_cohort = as.numeric(gsub("_.*","",age_sex_disease_cohort))) %>%
dplyr::rename(age_group=age_sex_disease_cohort,
cause=disease)
disease_life_table_sc <- bind_rows(disease_life_table_list_sc, .id = "age_sex_disease_cohort") %>%
mutate(age_sex_disease_cohort = as.numeric(gsub("_.*","",age_sex_disease_cohort))) %>%
dplyr::rename(age_group=age_sex_disease_cohort,
cause=disease)
```
### Run scenarios non-diseases processes
Re run non_diseases life tables with modified mortality and ylds rates by the pif.
New rates = bl rates * (1-non_disease_pif).
TO DO: CHECK CALCULATIONS OF NON DISEASE PIFS. CALCULATION SCENARIO_NUMBERS/BASELINE_NUMBER?
```{r, eval=FALSE}
source("~/mh-mslt/R/functions_MSLT.R")
non_disease_life_table_list_sc <- list()
for (i in 1:nrow(age_sex_non_disease_cohorts)){
# i=6
td1_age_sex <- mslt_df %>% ### new mslt dataframe with modified injuries and lri deaths and ylds rates
filter(age >= age_sex_non_disease_cohorts$age[i] & sex == age_sex_non_disease_cohorts$sex[i])
pif_colname_deaths <- paste0('pif_',age_sex_non_disease_cohorts$acronym[i], '_deaths')
pif_colname_ylds <- paste0('pif_',age_sex_non_disease_cohorts$acronym[i], '_ylds')
pif_non_disease <- pif_expanded %>%
filter(age >= age_sex_non_disease_cohorts$age[i] & sex == age_sex_non_disease_cohorts$sex[i]) %>%
dplyr::select(age,sex,pif_colname_deaths,pif_colname_ylds)
death_colname <- paste0('deaths_rate_', age_sex_non_disease_cohorts$sname[i])
ylds_colname <- paste0('ylds_rate_', age_sex_non_disease_cohorts$sname[i])
new_deaths <- td1_age_sex%>%pull(death_colname) * (1 - (pif_non_disease%>%pull(pif_colname_deaths)))
new_deaths[is.na(new_deaths)] <- 0
new_ylds <- td1_age_sex%>%pull(ylds_colname) * (1 - (pif_non_disease%>%pull(pif_colname_ylds)))
new_ylds[is.na(new_ylds)] <- 0
td1_age_sex[[death_colname]] <- new_deaths
td1_age_sex[[ylds_colname]] <- new_ylds
## Instead of idata, feed td to run scenarios. Now all non_diseases are run again
non_disease_life_table_list_sc[[i]] <- RunNonDisease(
in_idata = td1_age_sex,
in_sex = age_sex_non_disease_cohorts$sex[i],
in_mid_age = age_sex_non_disease_cohorts$age[i],
in_non_disease = age_sex_non_disease_cohorts$sname[i]
)
names(non_disease_life_table_list_sc)[i] <- age_sex_non_disease_cohorts$cohort[i]
}
### Difference rates
for (cohort in age_sex_non_disease_cohorts$cohort) {
non_disease_life_table_list_sc[[cohort]]$diff_mort <-
non_disease_life_table_list_sc[[cohort]]$deaths_rate - non_disease_life_table_list_bl[[cohort]]$deaths_rate
non_disease_life_table_list_sc[[cohort]]$diff_pylds <-
non_disease_life_table_list_sc[[cohort]]$ylds_rate - non_disease_life_table_list_bl[[cohort]]$ylds_rate }
# convert the list of dataframes to single dataframes
non_disease_life_table_bl <- bind_rows(non_disease_life_table_list_bl, .id = "age_sex_non_disease_cohort") %>%
mutate(age_sex_non_disease_cohort = as.numeric(gsub("_.*","",age_sex_non_disease_cohort))) %>%
dplyr::rename(age_group=age_sex_non_disease_cohort,
cause=non_disease,
mx=deaths_rate)
non_disease_life_table_sc <- bind_rows(non_disease_life_table_list_sc, .id = "age_sex_non_disease_cohort") %>%
mutate(age_sex_non_disease_cohort = as.numeric(gsub("_.*","",age_sex_non_disease_cohort))) %>%
dplyr::rename(age_group=age_sex_non_disease_cohort,
cause=non_disease,
mx=deaths_rate)
```
### Collect changes in mortality and prevalent years lived with disability.
```{r, eval=FALSE}
### Diseases: Sum mortality rate and pylds change scenarios
mx_pylds_sc_total_disease_df <- disease_life_table_sc %>%
group_by(age_group,sex,age) %>%
dplyr::summarise(mortality_sum=sum(diff_mort_disease,na.rm=T),
pylds_sum=sum(diff_pylds_disease,na.rm=T)) %>%
ungroup() %>%
mutate(age_sex_cohort=paste0(age_group,'_',sex))
### Non-diseases
mx_pylds_sc_total_non_disease_df <- non_disease_life_table_sc %>%
group_by(age_group,sex,age) %>%
dplyr::summarise(mortality_sum=sum(diff_mort,na.rm=T),
pylds_sum=sum(diff_pylds,na.rm=T)) %>%
ungroup() %>%
mutate(age_sex_cohort=paste0(age_group,'_',sex))
```
### Run scenario life table
General life table re-run with modified mortality and Pylds rates
```{r, eval=FALSE}
general_life_table_list_sc <- list()
for (i in 1:nrow(age_sex_cohorts)){
# modify idata's mortality and pyld total for the said scenario
mx_pylds_sc_total_disease_df_cohort <- mx_pylds_sc_total_disease_df %>%
filter(age_sex_cohort==age_sex_cohorts$cohort[i]) %>%
dplyr::select(age,mortality_sum,pylds_sum)
mx_pylds_sc_total_non_disease_df_cohort <- mx_pylds_sc_total_non_disease_df %>%
filter(age_sex_cohort==age_sex_cohorts$cohort[i]) %>%
dplyr::select(age,mortality_sum,pylds_sum)
### Modify rates in static MSLT (pylds are always static, mx can include future trends)
#### With diseases changes in mortality and ylds
td2 <- mslt_df %>%
filter(sex==age_sex_cohorts$sex[i]) %>%
left_join(mx_pylds_sc_total_disease_df_cohort,by="age") %>%
mutate(mx=mx+replace_na(mortality_sum,0),
pyld_rate=pyld_rate+replace_na(pylds_sum,0)) %>%
dplyr::select(-mortality_sum,-pylds_sum)
#### With diseases changes in mortality and ylds
td3 <- td2 %>%
filter(sex==age_sex_cohorts$sex[i]) %>%
left_join(mx_pylds_sc_total_non_disease_df_cohort,by="age") %>%
mutate(mx=mx+replace_na(mortality_sum,0),
pyld_rate=pyld_rate+replace_na(pylds_sum,0)) %>%
dplyr::select(-mortality_sum,-pylds_sum)
# ### Modify death rates with future trends NOT AVAILABLE FOR METAHIT
# td3 <- death_projections %>%
# mutate(cohort=paste(age_cohort, sex, sep = "_")) %>% # variable to match change in mortality rates df
# filter(cohort==age_sex_cohorts$cohort[i]) %>%
# left_join(mx_pylds_sc_total_disease_df_cohort) %>%
# mutate(rate=rate+replace_na(mortality_sum,0))%>%
# dplyr::select(-mortality_sum,-pylds_sum)
suppressWarnings(
general_life_table_list_sc[[i]] <- RunLifeTable(
in_idata = td3,
in_sex = age_sex_cohorts$sex[i],
in_mid_age = age_sex_cohorts$age[i],
death_rates = NA
))
names(general_life_table_list_sc)[i] <- age_sex_cohorts$cohort[i]
}
# convert the list of dataframes to single dataframes
general_life_table_sc <- bind_rows(general_life_table_list_sc, .id = "age_group") %>%
mutate(age_group = as.numeric(gsub("_.*","",age_group)))
```
### Generate outputs data frame
```{r, eval=FALSE}
## In the following list 'output_life_table', 34 data frames are nested per age and sex cohort
## Outputs are generated following the index order of disease life tables baseline and scenarios where ##diabetes is firstcalculated as it impacts on cardiovascular diseases.
#### Diseases life tables
dia_index <- which(DISEASE_SHORT_NAMES$sname=='dmt2')
dia_order <- c(dia_index,c(1:nrow(DISEASE_SHORT_NAMES))[-dia_index])
### Combine diseases and general life tables for scenarios
### Step needed to calculate numbers (rates*people cohort)
scenario_d <- inner_join(disease_life_table_sc %>%
dplyr::select(age_group,sex,age,cause,incidence_disease,mx,px),
general_life_table_sc %>%
dplyr::select(age_group,sex,age,Lx,ex,Lwx,ewx),
by=c("age","sex","age_group")) %>%
mutate(intervention="sc")
baseline_d <- inner_join(disease_life_table_bl %>%
dplyr::select(age_group,sex,age,cause,incidence_disease,mx,px),
general_life_table_bl %>%
dplyr::select(age_group,sex,age,Lx,ex,Lwx,ewx),
by=c("age","sex","age_group")) %>%
mutate(intervention="bl")
disease_combined <- bind_rows(scenario_d,baseline_d) %>%
pivot_wider(names_from = intervention,
values_from = c(incidence_disease,mx,px,Lx,ex,Lwx,ewx)) %>%
mutate(inc_num_bl = incidence_disease_bl*(1-px_bl)*Lx_bl,
inc_num_sc = incidence_disease_sc*(1-px_sc)*Lx_sc,
inc_num_diff = inc_num_sc-inc_num_bl,
mx_num_bl = mx_bl*Lx_bl,
mx_num_sc = mx_sc*Lx_sc,
mx_num_diff = mx_num_sc-mx_num_bl) %>%
dplyr::select(c(-mx_sc, -mx_bl, -px_sc, -px_bl, -Lx_sc, -Lx_bl, -ex_sc, -ex_bl, -Lwx_sc, -Lwx_bl, -ewx_sc, -ewx_bl)) %>% dplyr::select(-starts_with(c( "incidence_"))) %>%
pivot_wider(names_from = cause,
values_from = inc_num_bl:mx_num_diff)
### Non_diseases life tables
scenario_nd <- inner_join(non_disease_life_table_sc %>%
dplyr::select(age_group,sex,age,cause,mx,ylds_rate, diff_mort, diff_pylds),
general_life_table_sc %>%
dplyr::select(age_group,sex,age,Lx),
by=c("age","sex","age_group")) %>%
mutate(intervention="sc")
baseline_nd <- inner_join(non_disease_life_table_sc %>%
dplyr::select(age_group,sex,age,cause,mx,ylds_rate, diff_mort, diff_pylds),
general_life_table_bl %>%
dplyr::select(age_group,sex,age,Lx),
by=c("age","sex","age_group")) %>%
mutate(intervention="bl")
non_disease_combined <- bind_rows(scenario_nd,baseline_nd) %>%
pivot_wider(names_from = intervention,
values_from = c(mx, ylds_rate, Lx,
values_fill=0)) %>%
mutate(ylds_num_bl = ylds_rate_bl*Lx_bl,
ylds_num_sc = ylds_rate_sc*Lx_sc,
ylds_num_diff = ylds_num_sc-ylds_num_bl,
mx_num_bl = mx_bl*Lx_bl,
mx_num_sc = mx_sc*Lx_sc,
mx_num_diff = mx_num_sc-mx_num_bl) %>%
dplyr::select(c(cause, sex, age, age_group, ylds_num_bl, ylds_num_sc, ylds_num_diff, mx_num_bl, mx_num_sc, mx_num_diff)) %>%
pivot_wider(names_from = cause,
values_from = ylds_num_bl:mx_num_diff)
### General life tables
general_lf <- bind_rows(
general_life_table_sc %>%
dplyr::select(age_group,sex,age,Lx,ex,Lwx,ewx) %>%
mutate(intervention="sc"),
general_life_table_bl %>%
dplyr::select(age_group,sex,age,Lx,ex,Lwx,ewx) %>%
mutate(intervention="bl")) %>%
pivot_wider(names_from = intervention,
values_from = c(Lx,ex,Lwx,ewx),
values_fill=0) %>%
mutate(Lx_diff = Lx_sc-Lx_bl,
Lwx_diff = Lwx_sc-Lwx_bl,
ex_diff = ex_sc-ex_bl,
ewx_diff = ewx_sc-ewx_bl)
######## Dataframe with all outputs by age and sex cohort over the simulation years (years of the cohort)
output_df <- inner_join(disease_combined,
general_lf,
by=c("age","sex","age_group")) %>%
inner_join(non_disease_combined,
by=c("age","sex","age_group"))
### Present changes in life expectancy, life years, health-adjusted life years and burden by cause (diseases and non-diseases) by age and sex cohort
```
# Final function to use in mh-execute to run the PMSLT
Generates a list with outputs for: TO DO, there may be too many.
```{r, eval=FALSE, echo=FALSE}
source("~/mh-mslt/R/RunMSLT.R")
outputs <- RunMSLT(
mslt_df=read_csv("~/mh-execute/inputs/mslt/bristol_mslt.csv"),
disease_names=readRDS("~/mh-mslt/output/parameters/DISEASE_SHORT_NAMES.rds"),
i_sex=c("male", "female"),
i_age_cohort=seq(from=17, to=97, by =5),
pif=read_csv("~/mh-mslt/input/pif_place_holder.csv"))
```
# Examples of output
## Tables
```{r, function_hook, echo=FALSE}
LifeYears <- knitr::kable(outputs[["LifeYears"]], booktabs = T, caption = "Life Years", longtable = TRUE)
kableExtra::kable_styling(LifeYears, latex_options = "striped", position = "float_left")
LifeExpectancy <- knitr::kable(outputs[["LifeExpectancy"]], booktabs = T, caption = "Life Expectancy", longtable = TRUE)
kableExtra::kable_styling(LifeExpectancy, latex_options = "striped", position = "float_left")
Diseases <- knitr::kable(outputs[["Cause"]], booktabs = T, caption = "Diseases", longtable = TRUE)
kableExtra::kable_styling(Diseases, latex_options = "striped", position = "float_left")
```
## Graphs
Examples, manual entry of diseases to be changed.
```{r, echo=FALSE}
output_df_agg_sex <- outputs[["output_df_agg_sex"]]
output_df_agg_all <- outputs[["output_df_agg_all"]]
### Incidence
data_f <- dplyr::filter(output_df_agg_sex, sex == "female") %>% dplyr::select("sex", "year", contains("diff"))
data_m <- dplyr::filter(output_df_agg_sex, sex == "male") %>% dplyr::select("sex", "year", contains("diff"))
data_t <- output_df_agg_all %>% dplyr::select("year", contains("diff")) %>%
mutate(sex ="total")
data <- bind_rows(data_f, data_m, data_t)
plot_1 <- data %>%
ggplot(aes(x = year)) +
geom_smooth(aes(y = inc_num_diff_brsc,method = "loess", color="Breast cancer")) +
geom_smooth(aes(y = inc_num_diff_carc, method = "loess", color="Colon cancer")) +
geom_smooth(aes(y = inc_num_diff_dmt2, method = "loess", color="Type 2 diabetes")) +
geom_smooth(aes(y = inc_num_diff_tbalc, method = "loess", color="Lung cancer")) +
geom_smooth(aes(y = inc_num_diff_utrc, method = "loess", color="Uterine cancer")) +
geom_smooth(aes(y = inc_num_diff_ishd, method = "loess", color="Ischemic heart disease")) +
geom_smooth(aes(y = inc_num_diff_strk, method = "loess", color="Stroke")) +
geom_smooth(aes(y = inc_num_diff_copd, method = "loess", color="COPD")) +
labs(x = "Simulation year",
title = paste("Changes in disease incidence over time"),
y = "Numbers") +
labs(color="") +
theme(plot.title = element_text(hjust = 0.5, size = 12,face="bold"),
axis.text=element_text(size=10),
axis.title=element_text(size=10)) +
theme_classic() +
geom_hline(yintercept=0, linetype='dashed', color = 'black')+
facet_wrap(. ~ sex) +
theme(
strip.background = element_blank() ) +
scale_fill_brewer(name = "Sex") +
theme(legend.position = "bottom",
legend.text = element_text(colour = "black", size = 8),
legend.key = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plot_1
### Deaths
plot_2 <- data %>%
ggplot(aes(x = year)) +
geom_smooth(aes(y = mx_num_diff_brsc,method = "loess", color="Breast cancer")) +
geom_smooth(aes(y = mx_num_diff_carc, method = "loess", color="Colon cancer")) +
geom_smooth(aes(y = mx_num_diff_dmt2, method = "loess", color="Type 2 diabetes")) +
geom_smooth(aes(y = mx_num_diff_tbalc, method = "loess", color="Lung cancer")) +
geom_smooth(aes(y = mx_num_diff_utrc, method = "loess", color="Uterine cancer")) +
geom_smooth(aes(y = mx_num_diff_ishd, method = "loess", color="Ischemic heart disease")) +
geom_smooth(aes(y = mx_num_diff_strk, method = "loess", color="Stroke")) +
geom_smooth(aes(y = mx_num_diff_copd, method = "loess", color="COPD")) +
labs(x = "Simulation year",
title = paste("Changes in disease mortality over time"),
y = "Numbers") +
labs(color="") +
theme(plot.title = element_text(hjust = 0.5, size = 12,face="bold"),
axis.text=element_text(size=10),
axis.title=element_text(size=10)) +
theme_classic() +
geom_hline(yintercept=0, linetype='dashed', color = 'black')+
facet_wrap(. ~ sex) +
theme(
strip.background = element_blank() ) +
scale_fill_brewer(name = "Sex") +
theme(legend.position = "bottom",
legend.text = element_text(colour = "black", size = 8),
legend.key = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plot_2
#### Graph for changes in life years
data_f <- dplyr::filter(output_df_agg_sex, sex == "female") %>% dplyr::select("sex", "year", "Lx_diff", "Lwx_diff")
data_m <- dplyr::filter(output_df_agg_sex, sex == "male") %>% dplyr::select("sex", "year", "Lx_diff", "Lwx_diff")
data_t <- output_df_agg_all %>% dplyr::select("year", "Lx_diff", "Lwx_diff")
plot_3 <- data_t %>%
ggplot(aes(x = year, y = Lx_diff)) +
geom_smooth(method = "loess", aes(color= "Life years total")) +
geom_smooth(data = data_t, aes(y = Lwx_diff, method = "loess", color= "HALYs total")) +
geom_smooth(data = data_f, aes(y = Lx_diff, method = "loess", color= "Life years female")) +
geom_smooth(data = data_f, aes(y = Lwx_diff, method = "loess", color= "HALYs female")) +
geom_smooth(data = data_m, aes(y = Lx_diff, method = "loess", color= "Life years male")) +
geom_smooth(data = data_m, aes(y = Lwx_diff, method = "loess", color= "HALYs male")) +
labs(x = "Simulation year",
title = paste("Difference life years and health-adjusted life years"),
y = "Numbers") +
labs(color="") +
theme(plot.title = element_text(hjust = 0.5, size = 12,face="bold"),
axis.text=element_text(size=10),
axis.title=element_text(size=10)) +
theme(legend.position = "right",
legend.title = element_blank(),
legend.text = element_text(colour = "black", size = 10),
legend.key = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme_classic() +
geom_hline(yintercept=0, linetype='dashed', color = 'black')
plot_3
```