forked from rohankhera/mimic-omop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_etl.sql
150 lines (135 loc) · 2.75 KB
/
check_etl.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
-- -----------------------------------------------------------------------------
-- File created - January-9-2018
-- ----------------------------------------------------------------------------
-- --------------------------------------------------
-- Need to install pgTAP
-- http://pgtap.org/
-- --------------------------------------------------
BEGIN;
SELECT plan ( 7 );
SELECT results_eq
(
'
SELECT count(*) FROM admissions;
'
,
'
SELECT count(*) FROM omop.visit_occurrence;
'
,
'Visit_occurrence table -- same number admission'
);
SELECT results_eq
(
'
SELECT COUNT(*)
FROM admissions
WHERE hospital_expire_flag = 1
OR diagnosis ILIKE ''%organ donor%'';
'
,
'
SELECT count(distinct visit_occurrence_id)
FROM omop.visit_occurrence
WHERE discharge_to_concept_id = 4216643
OR discharge_to_concept_id = 4022058;
'
, 'number of hospital admissions who die in-hospital match'
);
SELECT results_eq
(
'
SELECT cast(admission_type as TEXT) as visit_source_value, count(1) FROM admissions group by 1 ORDER BY 2,1 DESC;
'
,
'
SELECT cast (visit_source_value as TEXT), count(1) FROM omop.visit_occurrence group by 1 ORDER BY 2,1 DESC;
'
,
'Visit_occurrence table -- same distribution adm'
);
SELECT results_eq
(
'
SELECT
CAST(
CASE WHEN diagnosis ~* ''organ donor'' THEN ''DEAD/EXPIRED''
ELSE admission_location END
AS TEXT) as admitting_source_value
, count(1)
FROM admissions
GROUP BY 1
ORDER BY 2,1 DESC;
'
,
'
SELECT
CAST(admitting_source_value
AS TEXT) as admitting_source_value
, count(1)
FROM omop.visit_occurrence
GROUP BY 1
ORDER BY 2,1 DESC;
'
,
'Visit_occurrence table -- distribution admit source value'
);
SELECT results_eq
(
'
SELECT
CAST(
CASE WHEN diagnosis ~* ''organ donor'' THEN diagnosis
ELSE discharge_location END
AS TEXT) as discharge_to_source_value
, count(1)
FROM admissions
GROUP BY 1
ORDER BY 2,1 DESC;
'
,
'
SELECT
CAST(
discharge_to_source_value
AS TEXT) as discharge_to_source_value
, count(1)
FROM omop.visit_occurrence
GROUP BY 1
ORDER BY 2,1 DESC;
'
,
'Visit_occurrence table -- repartition discharge_to_source_value'
);
SELECT results_eq
(
'
SELECT count(visit_source_concept_id) FROM omop.visit_occurrence group by visit_source_concept_id order by 1 desc;
'
,
'
SELECT count(visit_source_value) FROM omop.visit_occurrence group by visit_source_value order by 1 desc;
'
,
'Visit_occurrence table -- links checker'
);
-- the same check with timestamp is wrong even before ETL
SELECT results_eq
(
'
select 0::integer;
'
,
'
WITH tmp AS
(
SELECT visit_occurrence_id, CASE WHEN visit_end_date < visit_start_date THEN 1 ELSE 0 END AS abnormal
FROM omop.visit_occurrence
)
SELECT max(abnormal) FROM tmp;
'
,
'Visit_occurrence table -- start_date > end_date'
);
SELECT * FROM finish();
ROLLBACK;