-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathicaria_progress.R
1338 lines (1162 loc) · 47.8 KB
/
icaria_progress.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
library(redcapAPI)
#library(xlsx)
library(openxlsx)
#library(lubridate)
library(stringr)
library(english)
library(echarts4r)
kCRFAZiEvents <- c(
'epipenta1_v0_recru_arm_1', # EPI-Penta1 V0 Recruit AZi/Pbo1
'epimvr1_v4_iptisp4_arm_1', # EPI-MVR1 V4 IPTi-SP4 AZi/Pbo2
'epimvr2_v6_iptisp6_arm_1' # EPI-MVR2 V6 IPTi-SP6 AZi/Pbo3
)
kCRFHHEvent <- c(
'hhafter_1st_dose_o_arm_1', # HH-After 1st dose of AZi/Pbo
'hhafter_2nd_dose_o_arm_1', # HH-After 2nd dose of AZi/Pbo
'hhafter_3rd_dose_o_arm_1', # HH-After 3rd dose of AZi/Pbo
'hhat_18th_month_of_arm_1' # HH-At 18th month of age
)
kCRFNonAZiEvents <- c(
'epipenta2_v1_iptis_arm_1', # 3 EPI-Penta2 V1 IPTi-SP1
'epipenta3_v2_iptis_arm_1', # 4 EPI-Penta3 V2 IPTi-SP2
'epivita_v3_iptisp3_arm_1', # 5 EPI-VitA V3 IPTi-SP3
'epivita_v5_iptisp5_arm_1' # 8 EPI-VitA V5 IPTi-SP5
)
kCRFEvents <- c(
kCRFAZiEvents[1], # EPI-Penta1 V0 Recruit AZi/Pbo1
kCRFHHEvent[1], # HH-After 1st dose of AZi/Pbo
kCRFNonAZiEvents[1], # EPI-Penta2 V1 IPTi-SP1
kCRFNonAZiEvents[2], # EPI-Penta3 V2 IPTi-SP2
kCRFNonAZiEvents[3], # EPI-VitA V3 IPTi-SP3
kCRFAZiEvents[2], # EPI-MVR1 V4 IPTi-SP4 AZi/Pbo2
kCRFHHEvent[2], # HH-After 2nd dose of AZi/Pbo
kCRFNonAZiEvents[4], # EPI-VitA V5 IPTi-SP5
kCRFAZiEvents[3], # EPI-MVR2 V6 IPTi-SP6 AZi/Pbo3
kCRFHHEvent[3], # HH-After 3rd dose of AZi/Pbo
kCRFHHEvent[4] # HH-At 18th month of age
)
kEventsDateVars <- c(
'screening_date', # EPI-Penta1 V0 Recruit AZi/Pbo1
'hh_date', # HH-After 1st dose of AZi/Pbo
'int_date', # EPI-Penta2 V1 IPTi-SP1
'int_date', # EPI-Penta3 V2 IPTi-SP2
'int_date', # EPI-VitA V3 IPTi-SP3
'int_date', # EPI-MVR1 V4 IPTi-SP4 AZi/Pbo2
'hh_date', # HH-After 2nd dose of AZi/Pbo
'int_date', # EPI-VitA V5 IPTi-SP5
'int_date', # EPI-MVR2 V6 IPTi-SP6 AZi/Pbo3
'hh_date', # HH-After 3rd dose of AZi/Pbo
'hh_date' # HH-At 18th month of age
)
kCOHORTIPTiEvents <- c(
'ipti_1__10_weeks_r_arm_1', # IPTi 1 - 10 weeks Recruit
'ipti_2__14_weeks_arm_1', # IPTi 2 - 14 weeks
'ipti_3__9_months_arm_1' # IPTi 3 - 9 months
)
kCOHORTNonIPTiEvents <- c(
'mrv_2__15_months_arm_1', # MRV 2 - 15 months
'after_mrv_2_arm_1' # After MRV 2
)
ReadData <- function(api.url, api.token, variables = NULL) {
#browser()
rcon <- redcapConnection(api.url, api.token)
data <- exportRecords(rcon, factors = F, labels = F, fields = variables)
}
ExportDataAllHealthFacilities <- function(redcap.api.url, redcap.tokens) {
# Export data from each of the ICARIA Health Facility REDCap projects and
# append all data sets in a unique data frame for analisys.
#
# Args:
# redcap.api.url: String representing the URL to access the REDCap API.
# redcap.tokens: List of tokens (Strings) to access each of the ICARIA
# REDCap projects.
#
# Returns:
# Data frame with all the data together of different ICARIA Health
# Facilities.
data <- data.frame()
for (hf in names(redcap.tokens)) {
#browser()
if (hf != "profile" & hf != "cohort") {
print(paste("Extracting data from", hf))
# TODO: The set of variables to be stracted from REDCap projects should be
# predifined in order to improve efficiency
hf.data <- ReadData(redcap.api.url, redcap.tokens[[hf]])
if ((substr(hf, 1, 2) == "HF") & (substr(hf, 5, 5) == ".")) {
hf <- substr(hf, 1, 4)
} else {
hf <- hf
}
hf.data <- cbind(hf= hf, hf.data) ########################################################################################################################################
data <- rbind(data, hf.data)
}
}
# In order to count data by HF (table), we need to encode HF column as factor
data$hf <- as.factor(data$hf)
return(data)
}
ExportDataTrialProfile <- function(redcap.api.url, redcap.tokens) {
# Export data from the Trial Profile REDCap project containing the daily
# aggregated reports of the Screening Log in the ICARIA Health Facilities.
#
# Args:
# redcap.api.url: String representing the URL to access the REDCap API.
# redcap.tokens: List of tokens (Strings) to access each of the ICARIA
# REDCap projects, among them the Trial Profile project.
#
# Returns:
# Data frame with the Trial Profile data.
print("Extracting data from profile")
profile <- ReadData(redcap.api.url, redcap.tokens[['profile']])
return(profile)
}
ExportDataCohort <- function(redcap.api.url, redcap.tokens) {
# Export data from the COHORT ancillary study REDCap project.
#
# Args:
# redcap.api.url: String representing the URL to access the REDCap API.
# redcap.tokens: List of tokens (Strings) to access each of the ICARIA
# REDCap projects, among them the COHORT project.
#
# Returns:
# Data frame with the Trial Profile data.
print("Extracting data from cohort")
cohort <- ReadData(redcap.api.url, redcap.tokens[['cohort']])
# Health Facility IDs are scattered in three variables: hf_bombali,
# hf_port_loko and hf_tonkolili
hf.columns <- c("hf_bombali", "hf_port_loko", "hf_tonkolili")
record.in.hf <- cohort[, c("record_id", hf.columns)]
record.in.hf$hf <- rowSums(record.in.hf[, hf.columns], na.rm = T)
record.in.hf <- record.in.hf[which(record.in.hf$hf != 0), ]
cohort$hf <- lapply(cohort$record_id, function(id) {
record.in.hf$hf[which(record.in.hf$record_id == id)] })
cohort$hf <- as.factor(as.numeric(cohort$hf))
return(cohort)
}
SummarizeData <- function(hf.list, profile, data) {
# Compute the data frame to produce the general progress table.
#
# Args:
# hf.list: List of ICARIA health facilities IDs (integers) to be summarized.
# profile: Data frame containing the trial profile data extracted from the
# ICARIA Trial Profile REDCap project.
# data: Data frame containing the CRF data extracted from the ICARIA
# REDCap projects.
#
# Returns:
# Data frame with all the indicators by health facility to produce the
# general progress table of the report.
profile.sum <- SummarizeProfileData(hf.list, profile)
crf.sum <- SummarizeCRFData(hf.list, data)
# Merge both data frames and convert all columns to the same type: numeric
summary <- data.frame(profile.sum, crf.sum[, -1])
summary$hf.list <- as.character(summary$hf.list) # Remove factors
summary <- as.data.frame(lapply(summary, as.numeric))
return(summary)
}
SummarizeProfileData <- function(hf.list, profile) {
# Compute and returns the sum of the trial profile variables by health
# facility. These variables are:
# (1) n_penta1: Number of children vaccinated with Penta1
# (2) n_approached: Number of children vaccinated with Penta1 and approached
# by the ICARIA nurses
# (3) n_underweight: Number of pre-srceened children not selected for
# screening due to underweight
# (4) n_over_age: Number of pre-screened children not selected for
# screening due to the age
# (5) n_refusal: Number of children going through the informed consent
# process in which their caretakers do not accept to sign
# the informed content form.
# (6) n_consent: Number of children going through the informed consent
# process in which their caretakers do sign the informed
# content form.
#
# Args:
# hf.list: List of ICARIA health facilities IDs (integers) to be summarized.
# profile: Data frame containing the trial profile data extracted from the
# ICARIA Trial Profile REDCap project.
#
# Returns:
# Data frame with one row per health facility and one column per variable to
# be summarized.
# Trial profile variables to be summarized
vars <- c('n_penta1', 'n_approached', 'n_underweight', 'n_over_age',
'n_consent')
# Ordered variables to be visualized in the progress report
ordered.vars <- c('hf.list', 'n_penta1', 'n_approached', 'n_underweight',
'n_over_age', 'n_refusal', 'n_consent')
# Collapse HF IDs in the hf column no matter in which district the HF is
profile$hf <- rowSums(
x = profile[, c("hf_bombali", "hf_tonkolili", "hf_port_loko")],
na.rm = T
)
# Summarize all trial profiles variables by health facility
summary <- data.frame(hf.list)
for (var in vars) {
col <- c()
for (hf in hf.list) {
col[hf] <- sum(profile[which(profile$hf == hf), var], na.rm = T)
}
summary[var] <- col
}
# Compute the number of refusals based on the captured variables
summary$n_refusal <- summary$n_approached - summary$n_underweight -
summary$n_over_age - summary$n_consent
# Reorder columns to rescpect the progress report table design
summary <- summary[, ordered.vars]
return(summary)
}
CountNumberOfResponses <- function(data, var, val, event = NULL, by.hf = T) {
# Count number of concrete responses in a concrete variable either generally
# or by REDCap event and by ICARIA Health Facility.
#
# Args:
# data: Data frame containing the CRF data extracted from the ICARIA REDCap
# projects.
# var: Variable name to count.
# val: String represeting the reponse value to count.
# event: String representing the REDCap event to filter by when counting
# responses.
# by.hf: True/False if result must be disaggregated by ICARIA Health
# Facility or not.
#
# Returns:
# List of occurences by ICARIA Health Facility.
if (nrow(data) == 0)
return(0)
if (is.null(event)){
condition <- which(data[var] == val)
} else {
condition <- which(data['redcap_event_name'] == event & data[var] == val)
}
if (by.hf) {
col <- table(data[condition, c('hf', var)])
} else {
col <- table(data[condition, var])
}
if (length(col) == 0)
return(0)
return(col)
}
GetMigrations <- function(data) {
# Compute and returns a data frame with one row per participant migration
# event. A migration can be an OUT migration, when the participant is leaving
# a health facility catchment area or an IN migration, when the participant
# is coming to a different catchment area in which s/he was recruited. If a
# participant is moving form one ICARIA health facility to another, in this
# case s/he will have two rows here, one OUT migration and one IN migration.
# This data frame is composed by the following columns:
# (1) hf: String representing the ICARIA HF code.
# (2) record_id: Integer representing the REDCap record id of the migrated
# participant.
# (3) mig_date: Date of the migration.
# (4) origin: Integer representing the ID of the origin HF.
# (5) destination: Integer representing the ID of the destination HF.
# (6) in_mig: Boolean indicating whether or not this is an IN
# migration.
# (7) azi2_date: Date of the second (middle) AZi/Pbo dose.
#
# Args:
# data: Data frame containing ALL Health Facility data sets extracted
# from the ICARIA REDCap projects.
#
# Returns:
# Data frame with one row per migration.
# Prepare list of variables to be extracted from the project data frame
origin.prefix <- "mig_origin_hf_"
destination.prefix <- "mig_destination_hf_"
districts <- c("bombali", "port_loko", "tonkolili")
origin.columns <- paste0(origin.prefix, districts)
destination.columns <- paste0(destination.prefix, districts)
mig.columns <- c("hf", "record_id", "mig_reported_date", origin.columns,
destination.columns)
# Extract migrations variables from the project data frame
migrations <- data[which(data$migration_complete == 2), mig.columns]
# Collapse the origin and destination of the migration in just two columns
# independently of the district
migrations$origin <- rowSums(migrations[, origin.columns], na.rm = T)
migrations$destination <- rowSums(migrations[, destination.columns],
na.rm = T)
# Compute if each migration is an IN or OUT migration
migrations$in_mig <-
as.integer(substring(migrations$hf, 3)) == migrations$destination
# Include the dates all the events after recruitment toknow when the
# migration occurred (between which two events).
# TODO: There's no 2nd doses yet. Needs to be tested! (20210721)
merge.columns <- c("hf", "record_id")
for (i in 1:length(kCRFEvents)) {
event.date <- data[
which(data$redcap_event_name == kCRFEvents[i]),
c(merge.columns, kEventsDateVars[i])
]
date.column <- kCRFEvents[i]
colnames(event.date)[3] <- date.column
event.date <- event.date[which(!is.na(event.date[date.column])), ]
migrations <- merge(
x = migrations,
y = event.date,
by = merge.columns,
all.x = T
)
}
# Filter non-relevant columns
relevant.cols <- c("hf", "record_id", "mig_reported_date", "origin",
"destination", "in_mig", kCRFEvents)
migrations <- migrations[, relevant.cols]
return(migrations)
}
SummarizeCRFData <- function(hf.list, data) {
# Compute and returns the sum of the recruitment progress variables by health
# facility. These variables are:
# (1) n_consent: Number of children going through the informed consent
# process in which their caretakers do sign the informed
# content form.
# (2) n_failures: Number of total screening failures
# (3) n_ic_10w: Number of screening failures due to more than 10 weeks
# inclusion criterion.
# (4) n_ic_penta1: Number of screening failures due to non-eligibility to
# receive Penta1 inclusion criterion.
# (5) n_ic_weight: Number of screening failures due to underweight
# inclusion criterion.
# (6) n_ic_res: Number of screening failures due to residency inclusion
# criterion.
# (7) n_ec_study: Number of screening failures due to other study
# exclusion criterion.
# (8) n_ec_allergy: Number of screening failures due to allergies exclusion
# criterion.
# (9) n_ec_disease: Number of screening failures due to serious disease
# exclusion criterion.
# (10) n_ec_illness: Number of screening failures due to acute illness
# exclusion criterion.
# (11) n_random: Number of randomized participants
# (12) n_azi1: Number of 1st AZi/Pbo doses already administered
# (13) n_azi2: Number of 2nd AZi/Pbo doses already administered
# (14) n_azi3: Number of 3rd AZi/Pbo doses already administered
# (15) n_nc: Number of non-compliant participants
# (16) n_wdw: Number of total study withdrawals
# (17) n_wdw_parent: Number of withdrawals by parent request
# (18) n_wdw_inv: Number of withdrawals by investigator request
# (19) n_wdw_mig: Number of withdrawals due to migration
# (20) n_wdw_other: Number of withdrawals due to other reason
# (21) n_deaths: Number of participants who die
#
# Args:
# hf.list: List of ICARIA health facilities IDs (integers) to be summarized.
# data: Data frame containing ALL Health Facility data sets extracted
# from the ICARIA REDCap projects.
#
# Returns:
# Data frame with one row per health facility and one column per variable to
# be summarized.
# CRF variables (equal to 0) to be summarized
vars.to.0 <- c('ic_age_10w', 'ic_age', 'ic_weight', 'ic_residency_now',
'eligible')
# CRF variables (equal to 1) to be summarized
vars.to.1 <- c('screening_consent', 'ec_other_study', 'ec_allergies',
'ec_serious_disease', 'ec_acute_illness', 'eligible')
# CRF variables (equal to 1) by AZi event to be summarized
vars.to.1.event <- c('int_azi')
# Withdrawal reasons
wdw.reasons <- c(
1, # Parent/guardian's request
2, # Investigator's decision
3, # Migration
88 # Other
)
# Ordered variables to be visualized in the progress report
ordered.vars <- c('hf.list', 'screening_consent_1', 'eligible_0',
'ic_age_10w_0', 'ic_age_0', 'ic_weight_0',
'ic_residency_now_0', 'ec_other_study_1', 'ec_allergies_1',
'ec_serious_disease_1', 'ec_acute_illness_1', 'eligible_1',
'epipenta1_v0_recru_arm_1_int_azi_1',
'epimvr1_v4_iptisp4_arm_1_int_azi_1',
'epimvr2_v6_iptisp6_arm_1_int_azi_1', 'nc', 'n_wdw',
'wdrawal_reason_1', 'wdrawal_reason_2', 'wdrawal_reason_3',
'wdrawal_reason_88', 'death_complete')
# Variable names
var.names <- c('hf.list', 'n_consent', 'n_failures', 'n_ic_10w',
'n_ic_penta1', 'n_ic_weight', 'n_ic_res', 'n_ec_study',
'n_ec_allergy', 'n_ec_disease', 'n_ec_illness', 'n_random',
'n_azi1', 'n_azi2', 'n_azi3', 'n_nc', 'n_wdw', 'n_wdw_parent',
'n_wdw_inv', 'n_wdw_mig', 'n_wdw_other', 'n_deaths')
# Summarize all CRF variables by health facility
summary <- data.frame(hf.list)
# Summarize CRF variables in which the value of interest is 0
for (var in vars.to.0) {
column.name <- paste(var, "0", sep = "_")
summary[column.name] <- CountNumberOfResponses(data, var, 0)
}
# Summarize CRF variables in which the value of interest is 1
for (var in vars.to.1) {
column.name <- paste(var, "1", sep = "_")
summary[column.name] <- CountNumberOfResponses(data, var, 1)
}
# Summarize CRF variables by event in which the value of interest is 1
for (var in vars.to.1.event) {
for (event in kCRFAZiEvents) {
column.name <- paste(event, var, "1", sep = "_")
summary[column.name] <- CountNumberOfResponses(data, var, 1, event)
}
}
# Summarize non-compliant participants
nc.column <- 'nc'
data[nc.column] <- startsWith(data$child_fu_status, 'NC@')
summary[nc.column] <- CountNumberOfResponses(data, nc.column, T)
# Summarize withdrawal reasons
for (reason in wdw.reasons) {
var <- 'wdrawal_reason'
column.name <- paste(var, reason, sep = "_")
summary[column.name] <- CountNumberOfResponses(data, var, reason)
}
# Summarize deaths
var <- 'death_complete'
summary[var] <- CountNumberOfResponses(data, var, 2)
# Aggregated columns: n_wdw
summary$n_wdw <- summary$wdrawal_reason_1 + summary$wdrawal_reason_2 +
summary$wdrawal_reason_3 + summary$wdrawal_reason_88
# Apply migrations to summary
migrations <- GetMigrations(data)
# We have always to substract the number of IN Migrations to the following
# summary data frame columns: screening_consent_1 (ICF Signed) and eligible_1
# (Randomized)
summary$in_mig <- as.vector(table(migrations$hf[migrations$in_mig]))
summary$screening_consent_1 <- summary$screening_consent_1 - summary$in_mig
summary$eligible_1 <- summary$eligible_1 - summary$in_mig
# Check when the IN migrations occurred and substract in the AZi/Pbo events
# where the data collection was done in the previous health facility. I.e.
# substract when migration date is after event date
for (azi.event in kCRFAZiEvents) {
# Compute the number of IN migrations per HF that occurred after the event
# date
filter <- migrations$in_mig &
migrations$mig_reported_date > migrations[[azi.event]]
mig.after.event <- migrations[which(filter), ]
summary$in_mig <-
as.vector(table(mig.after.event$hf[mig.after.event$in_mig]))
# Substract this number as this activity occurred in the previous HF
azi.col.name <- paste0(azi.event, "_int_azi_1")
summary[azi.col.name] <- summary[[azi.col.name]] - summary$in_mig
}
# Reorder and rename columns to rescpect the progress report table design
summary <- summary[, ordered.vars]
colnames(summary) <- var.names
return(summary)
}
SummarizeCohortData <- function(hf.list, data) {
# Compute and returns the sum of the recruitment progress variables by health
# facility in the COHORT ancillary study. These variables are:
# (1) n_recruited: Number of children going through the informed consent
# process in which their caretakers do sign the informed
# content form.
# (2) n_ipti1: Number of children who took the 1st dose of IPTi.
# (3) n_ipti2: Number of children who took the 2nd dose of IPTi.
# (4) n_ipti3: Number of children who took the 3rd dose of IPTi.
# (5) n_mrv2: Number of children who took the 2nd dose of MRV.
# (6) n_after_mrv2: Number of children who was visited for the end of follow
# up visit.
#
# Args:
# hf.list: List of ICARIA health facilities IDs (integers) to be summarized.
# data: Data frame containing COHORT study data.
#
# Returns:
# Data frame with one row per health facility and one column per variable to
# be summarized.
# Variable names
var.names <- c('hf.list', 'n_recruited', 'n_ipti1', 'n_ipti2', 'n_ipti3',
'n_mrv2', 'n_after_mrv2')
# Summarize all COHORT variables by health facility
summary <- data.frame(hf.list)
# Get number of recruited participants. In this case, unlike the TRIAL
# projects, we don't have the eligible variable. So we have to check how many
# study number do we have
summary['n_recruited'] <- table(data$hf[which(!is.na(data$study_number))])
# Summarize IPTi events
for (event in kCOHORTIPTiEvents) {
# The Vaccination Status DCI is included in all EPI visits in which IPTi is
# administered according to the Sierra Leona Immunization Program, although
# it is not administered in all visits (I don't know why - missing U5 card?)
var <- 'intervention_complete'
summary[event] <- CountNumberOfResponses(data, var, 2, event)
}
# Summarize NON IPTi events
for (event in kCOHORTNonIPTiEvents) {
# The Clinical History is included in all EPI visits in which IPTi is not
# administered.
var <- 'clinical_history_complete'
summary[event] <- CountNumberOfResponses(data, var, 2, event)
}
# Convert all columns to the same type: numeric
summary$hf.list <- as.character(summary$hf.list) # Remove factors
summary <- as.data.frame(lapply(summary, as.numeric))
# Rename columns to respect the progress report table design
colnames(summary) <- var.names
return(summary)
}
NextWeekDay <- function(date, week.day) {
# Compute the date of the requested next week day since the provided date.
#
# Args:
# date: Date from which the date of the next day of the week will be
# calculated.
# week.day: Integer representing the next day of the week (1 = Sunday,
# 2 = Monday, 3 = Tuesday, 4 = Wednesday, 5 = Thursday, 6 = Friday
# and 7 = Saturday)
#
# Returns:
# Date of the requested next week day since the provided date.
date <- as.Date(date)
diff <- week.day - wday(date)
if( diff < 0 ) {
diff <- diff + 7
}
return(date + diff)
}
GetHealthFacilityTimeSeries <- function(hf.id, hf.data, report.date,
precision = "w", profile = NULL) {
# Compute and returns the status of the recruitment progress variables in a
# time series fashion for concrete ICARIA Health Facility. These variables
# are:
# (1) n_random: Number of randomized participants
# (2) n_in_mig: Number of in-migrated participants
# (3) n_out_mig: Number of out-migrated participants
# (4) n_azi1: Number of 1st AZi/Pbo doses already administered
# (5) n_hh1: Number of household visits already performed for AZi1
# (6) n_penta2: Number of participants who came for Penta2
# (7) n_penta3: Number of participants who came for Penta3
# (8) n_vit_a1: Number of participants who came for 1st Vitamin A dose
# (9) n_azi2: Number of 2nd AZi/Pbo doses already administered
# (10) n_hh2: Number of household visits already performed for AZi2
# (11) n_vit_a2: Number of participants who came for 2nd Vitamin A dose
# (12) n_azi3: Number of 3rd AZi/Pbo doses already administered
# (13) n_hh3: Number of household visits already performed for AZi3
# (14) n_end_fu: Number of participants who completed follow up
# (15) n_wdw: Number of total study withdrawals
# (16) n_deaths: Number of participants who die
#
# Args:
# hf.id: Integer representing the ID of the ICARIA Health Facility.
# hf.data: Data frame containing the Health Facility data set extracted
# from the corresponding ICARIA REDCap project.
# report.date: Date until the time series has to be reported.
# precision: Character indicating the precision of the time series
# (d = daily, w = weekly, m = monthy, y = yearly)
#
# Returns:
# Data frame with one row time point (depending on precision) and one column
# per variable.
# TODO: Implement precision feature. Right now, only weekly prescion is
# coded.
time.series <- data.frame()
# Ordered variables to be visualized in the progress report
profile.vars <- c('n_penta1', 'n_underweight', 'n_over_age', 'n_consent')
ordered.vars <- c(
c('date'),
profile.vars,
c('n_random', 'n_in_mig', 'n_out_mig'),
kCRFEvents,
c('n_wdw', 'n_deaths')
)
# Variable names
var.names <- c('date', 'n_penta1', 'n_underweight', 'n_over_age', 'n_consent',
'n_random', 'n_in_mig', 'n_out_mig', 'n_azi1', 'n_hh1',
'n_penta2', 'n_penta3', 'n_vit_a1', 'n_azi2', 'n_hh2',
'n_vit_a2', 'n_azi3', 'n_hh3', 'n_end_fu', 'n_wdw', 'n_deaths')
kWeekDay <- 2 # Monday 00:00 after starting
start.date <- min(hf.data$screening_date, na.rm = T)
time.point <- NextWeekDay(start.date, kWeekDay)
# Collapse HF IDs in the hf column no matter in which district the HF is and
# keep records of the current HF
if (!is.null(profile)) {
profile$hf <- rowSums(
x = profile[, c("hf_bombali", "hf_tonkolili", "hf_port_loko")],
na.rm = T
)
profile <- profile[which(profile$hf == hf.id), ]
}
while (time.point <= report.date) {
point <- list()
point['date'] <- as.character.Date(time.point)
# If we have profile data, compute pre-screening & screening indicators
if (!is.null(profile)) {
for (var in profile.vars)
point[var] = sum(
profile[which(profile$screening_date < time.point), c(var)],
na.rm = T
)
}
# Randomized participants
point['n_random'] <- CountNumberOfResponses(
data = hf.data[which(hf.data$screening_date < time.point), ],
var = "eligible",
val = 1,
by.hf = F
)
# AZi/Pbo doses
for (azi.event in kCRFAZiEvents) {
point[azi.event] <- CountNumberOfResponses(
data = hf.data[which(hf.data$int_date < time.point), ],
var = "int_azi",
val = 1,
event = azi.event,
by.hf = F
)
}
# Household post-AZi/Pbo supervision visits
for (hh.event in kCRFHHEvent) {
point[hh.event] <- CountNumberOfResponses(
data = hf.data[which(hf.data$hh_date < time.point), ],
var = "hh_child_seen",
val = 1,
event = hh.event,
by.hf = F
)
}
# Non-AZi/Pbo EPI visits
for (epi.visit in kCRFNonAZiEvents) {
point[epi.visit] <- CountNumberOfResponses(
data = hf.data[which(hf.data$int_date < time.point), ],
var = "intervention_complete",
val = 2,
event = epi.visit,
by.hf = F
)
}
# Withdrawals
point['n_wdw'] <- CountNumberOfResponses(
data = hf.data[which(hf.data$wdrawal_date < time.point), ],
var = "withdrawal_complete",
val = 2,
by.hf = F
)
# Deaths
point['n_deaths'] <- CountNumberOfResponses(
data = hf.data[which(hf.data$death_date < time.point), ],
var = "death_complete",
val = 2,
by.hf = F
)
# Out Migrations - Migrations in which the origin is this health facility
mig_origin_cols <- c('mig_origin_hf_bombali',
'mig_origin_hf_port_loko',
'mig_origin_hf_tonkolili')
hf.data$mig_origin_hf <- rowSums(hf.data[, mig_origin_cols], na.rm = T)
point['n_out_mig'] <- CountNumberOfResponses(
data = hf.data[which(hf.data$mig_reported_date < time.point &
hf.data$mig_origin_hf == hf.id), ],
var = "migration_complete",
val = 2,
by.hf = F
)
# In Migrations - Migrations in which the destination is this health
# facility
mig_destination_cols <- c('mig_destination_hf_bombali',
'mig_destination_hf_port_loko',
'mig_destination_hf_tonkolili')
hf.data$mig_destination_hf <- rowSums(hf.data[, mig_destination_cols],
na.rm = T)
point['n_in_mig'] <- CountNumberOfResponses(
data = hf.data[which(hf.data$mig_reported_date < time.point &
hf.data$mig_destination_hf == hf.id), ],
var = "migration_complete",
val = 2,
by.hf = F
)
# Apply migrations to the series point
migrations <- GetMigrations(hf.data)
if (nrow(migrations) > 0) {
# We have always to substract the number of IN Migrations to the indicator
# n_random (Randomized)
point$n_random <- point$n_random - point$n_in_mig
# Check when the IN migration occurred and substract in the events where
# the data collection was done in the previous health facility. I.e.
# substract when migration date is after event date
for (event in kCRFEvents) {
# Compute the number of IN migrations that occurred after the event date
filter <- migrations$mig_reported_date < time.point &
migrations$in_mig & migrations$mig_reported_date > migrations[[event]]
migrations.number <- nrow(migrations[which(filter), ])
# Substract this number as this activity occurred in the previous HF
point[event] <- point[[event]] - migrations.number
}
}
time.series <- rbind(time.series, point, stringsAsFactors = F)
time.point <- NextWeekDay(time.point + 1, kWeekDay)
}
# Reorder and rename columns to rescpect the progress report table design
time.series <- time.series[, ordered.vars]
colnames(time.series) <- var.names
return(time.series)
}
PreVisualizationProcess <- function(df, columns.remove.if.zero) {
# Process data frame before visualizing it.
#
# Args:
# columns.remove.if.zero: List of data frame columns to be removed if all
# the values are zero.
#
# Returns:
# Data frame processed and ready for viasualization
for (column in columns.remove.if.zero) {
if (sum(df[column]) == 0) {
df[column] <- NULL
}
}
return(df)
}
CreateExcelReport <- function(metadata, general.progress, cohort.progress,
time.series, health.facilities) {
# Positions in spreadsheet
kStartColumn <- 1
kStartRow <- 3
# Positions in data frame
kICFLogColumn <- 6
kICFeCRFColumn <- 7
kNumberOfLogIndicators <- 6
# Sizes
kNarrowColumn <- 9
kNormalColumn <- 11
kWideColumn <- 36
kLargeColumn <- 70
kWideRow <- 30
kSmallFont <- 10
kTinyFont <- 9
# Colors
kHEXDarkBlue <- "#44546A"
kHEXBlueGrayLight <- "#E6EEFA"
kHEXBlueGrayLight2 <- "#ACB9CA"
kHEXBlueGrayDark <- "#D6E2F6"
kHEXBlueGrayDark2 <- "#8497B0"
kHEXGray <- "#E7E6E6"
kHEXYellowLight <- "#FFF2CC"
kHEXYellowDark <- "#BF8F00"
kHEXRed <- "#FC0000"
kINDWhite <- 9
# Set districts and health facility names
general.progress$hf.list <- NULL
rownames(general.progress) <- paste(
health.facilities$district[health.facilities$trial],
health.facilities$code[health.facilities$trial],
health.facilities$name[health.facilities$trial]
)
cohort.progress$hf.list <- NULL
rownames(cohort.progress) <- paste(
health.facilities$district[health.facilities$cohort],
health.facilities$code[health.facilities$cohort],
health.facilities$name[health.facilities$cohort]
)
# Super header names
kSuperHeaders <- c("Source: Screening Log", "Source: eCRF")
# Set column names
colnames(general.progress) <- c("Penta1", "Approached", "Underweight",
"Over Age", "Refusals", "ICF Signed",
"ICF Signed", "Screening Failures",
"More than 10w", "No Penta1", "Less than 4kg",
"Catchment Area", "Other Study", "Allergy",
"Disease", "Illness", "Randomized",
"AZi/Pbo1", "AZi/Pbo2", "AZi/Pbo3",
"Non-Compliants", "Withdrawals",
"Parent Request", "Investigator", "Migration",
"Other", "Deaths")
colnames(cohort.progress) <- c("Recruited", "IPTi1", "IPTi2", "IPTi3", "MRV2",
"After MRV2")
# Create the totals rows
total.string <- "Total"
general.progress <- rbind(general.progress, colSums(general.progress))
general.last.row <- nrow(general.progress)
rownames(general.progress)[general.last.row] <- total.string
cohort.progress <- rbind(cohort.progress, colSums(cohort.progress))
cohort.last.row <- nrow(cohort.progress)
rownames(cohort.progress)[cohort.last.row] <- total.string
# Process data frame for visualization: (1) Remove failure and withdrawal
# reason columns if zero
columns.can.be.hidden <- c("More than 10w", "No Penta1", "Less than 4kg",
"Catchment Area", "Other Study", "Allergy",
"Disease", "Illness", "Parent Request",
"Investigator", "Migration", "Other")
general.progress <- PreVisualizationProcess(general.progress,
columns.can.be.hidden)
# Create Excel Work Book
wb <- createWorkbook(type = "xlsx") ####################################################################
# Workbook styles
right.align <- Alignment(
h = "ALIGN_RIGHT",
wrapText = T
)
left.align <- Alignment(
h = "ALIGN_LEFT"
)
center.align <- Alignment(
h = "ALIGN_CENTER"
)
header.background <- Fill(
backgroundColor = kHEXDarkBlue,
foregroundColor = kHEXDarkBlue,
pattern = "SOLID_FOREGROUND"
)
header.font <- Font(
wb = wb,
color = kINDWhite,
isBold = T,
heightInPoints = kSmallFont
)
table.header <- CellStyle(
wb = wb,
alignment = right.align,
fill = header.background,
font = header.font
)
super.header.background.1 <- Fill(
backgroundColor = kHEXBlueGrayDark2,
foregroundColor = kHEXBlueGrayDark2,
pattern = "SOLID_FOREGROUND"
)
super.header.background.2 <- Fill(
backgroundColor = kHEXBlueGrayLight2,
foregroundColor = kHEXBlueGrayLight2,
pattern = "SOLID_FOREGROUND"
)
super.header.font <- Font(
wb = wb,
color = kINDWhite,
isBold = T,
isItalic = T,
heightInPoints = kSmallFont
)
super.header.1 <- CellStyle(
wb = wb,
alignment = center.align,
fill = super.header.background.1,
font = super.header.font
)
super.header.2 <- CellStyle(
wb = wb,
alignment = center.align,
fill = super.header.background.2,
font = super.header.font
)
subcolumn.background.main <- Fill(
backgroundColor = kHEXBlueGrayDark,
foregroundColor = kHEXBlueGrayDark,
pattern = "SOLID_FOREGROUND"
)
subcolumn.background <- Fill(
backgroundColor = kHEXBlueGrayLight,
foregroundColor = kHEXBlueGrayLight,
pattern = "SOLID_FOREGROUND"
)
subcolumn.font <- Font(
wb = wb,
heightInPoints = kTinyFont,
isItalic = T
)
table.subcolumn <- CellStyle(
wb = wb,
fill = subcolumn.background.main
)
totals.background <- Fill(
backgroundColor = kHEXGray,
foregroundColor = kHEXGray,
pattern = "SOLID_FOREGROUND"
)
totals.font <- Font(
wb = wb,
isBold = T
)
table.totals <- CellStyle(
wb = wb,
fill = totals.background,
font = totals.font
)
comments.font <- Font(