-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathmain.tex
721 lines (596 loc) · 47.2 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
\documentclass[english]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{fancyhdr}
\newcommand{\scidatalogo}{\includegraphics[height=36pt]{static/SciData_logo.jpg}}
\newcommand{\overleaflogo}{\includegraphics[height=36pt]{static/Overleaf-logo-300dpi.png}}
\pagestyle{fancy}
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
\setlength{\headheight}{40pt}
\lhead{\textsc{\scidatalogo}}
\rhead{\textsc{\overleaflogo}}
\begin{document}
% Data Descriptor Title (110 character maximum, inc. spaces)
\title{MIMIC-III, a freely accessible critical care database}
\author{
Alistair E.W Johnson\textsuperscript{1{†}},
Tom J. Pollard\textsuperscript{1{†}{*}},
Lu Shen\textsuperscript{2}, \\
Li-wei Lehman\textsuperscript{1},
Mengling Feng\textsuperscript{1,3},
Mohammad Ghassemi\textsuperscript{1}, \\
Benjamin Moody\textsuperscript{1},
Peter Szolovits\textsuperscript{4},
Leo Anthony Celi\textsuperscript{1,2}, \\
Roger G. Mark\textsuperscript{1,2}
}
\maketitle
\thispagestyle{fancy}
1. MIT Institute for Medical Engineering and Science, Massachusetts Institute of Technology, Cambridge, MA, United States 2. Beth Israel Deaconess Medical Center, Boston, MA, United States. 3. Institute for Infocomm Research, A*STAR, Singapore. 4. Clinical Decision Making Group, Laboratory for Computer Science, Massachusetts Institute of Technology, Cambridge, MA, United States \\
\\
{*}Corresponding author: Tom Pollard (tpollard@mit.edu). \\
{†}Authors contributed equally.
\begin{abstract} % 170 words
MIMIC-III ("Medical Information Mart for Intensive Care") is a large, single-center database comprising information relating to patients admitted to critical care units at a large tertiary care hospital. Data includes vital signs, medications, laboratory measurements, observations and notes charted by care providers, fluid balance, procedure codes, diagnostic codes, imaging reports, hospital length of stay, survival data, and more. The database supports applications including academic and industrial research, quality improvement initiatives, and higher education coursework.
\end{abstract}
\section*{Background \& summary} % 700 words
% --- REQUIREMENTS OF THIS SECTION --- %
% (700 words maximum) An overview of the study design, the assay(s)
% performed, and the created data, including any background information
% needed to put this study in the context of previous work and the literature.
% The section should also briefly outline the broader goals that motivated
% the creation of this dataset and the potential reuse value. We also
% encourage authors to include a figure that provides a schematic overview
% of the study and assay(s) design. This section and the other main
% body sections of the manuscript should include citations to the literature
% as needed \cite{cite1, cite2}. References should be included within the
% manuscript file itself as our system cannot accept BibTeX bibliography files.
% Authors who wish to use BibTeX to prepare their references should therefore
% copy the reference list from the .bbl file that BibTeX generates and paste it
% into the main manuscript .tex file (and delete the associated
% \textbackslash{}bibliography and \textbackslash{}bibliographystyle commands).
% ------------------------------------ %
In recent years there has been a concerted move towards the adoption of digital health record systems in hospitals. In the US, for example, the number of non-federal acute care hospitals with basic digital systems increased from 9.4\% to 75.5\% over the 7 year period between 2008 and 2014 \cite{cite1}.
Despite this advance, interoperability of digital systems remains an open issue, leading to challenges in data integration. As a result, the potential that hospital data offers in terms of understanding and improving care is yet to be fully realized. In parallel, the scientific research community is increasingly coming under criticism for the lack of reproducibility of studies \cite{cite2}.
Here we report the release of the MIMIC-III database, an update to the widely-used MIMIC-II database (Data Citation 1). MIMIC-III integrates deidentified, comprehensive clinical data of patients admitted to the Beth Israel Deaconess Medical Center in Boston, Massachusetts, and makes it widely accessible to researchers internationally under a data use agreement (Figure 1). The open nature of the data allows clinical studies to be reproduced and improved in ways that would not otherwise be possible.
Based on our experience with the previous major release of MIMIC (MIMIC-II, released in 2010) we anticipate MIMIC-III to be widely used internationally in areas such as academic and industrial research, quality improvement initiatives, and higher education coursework.
% TP: add examples of usage to the previous paragraph.
% Numerous papers have been published using MIMIC data over the past decade,
% offering insight into areas such as X, X, and X.
% MIMIC-II is currently used in numerous university courses, including ones at
% Stanford, Georgia Tech, and Columbia [ref].
To recognize the increasingly broad usage of MIMIC, we have renamed the full title of the database from "Multiparameter Intelligent Monitoring in Intensive Care" to "Medical Information Mart for Intensive Care". The MIMIC-III critical care database is unique and notable for the following reasons:
\begin{itemize}
\item it is the only freely accessible critical care database of its kind;
\item the dataset spans more than a decade, with detailed information about individual patient care;
\item analysis is unrestricted once a data use agreement is accepted, enabling clinical research and education around the world.
\end{itemize}
\subsection*{Patient characteristics}
MIMIC-III contains data associated with 53,423 distinct hospital admissions for adult patients (aged 16 years or above) admitted to critical care units between 2001 and 2012. In addition, it contains data for 7870 neonates admitted between 2001 and 2008. The data covers 38,597 distinct adult patients and 49,785 hospital admissions. The median age of adult patients is 65.8 years (Q1-Q3: 52.8 - 77.8), 55.9\% patients are male, and in-hospital mortality is 11.5\%. The median length of an ICU stay is 2.1 days (Q1-Q3: 1.2 - 4.6) and the median length of a hospital stay is 6.9 days (Q1-Q3: 4.1 - 11.9). A mean of 4579 charted observations ('chartevents') and 380 laboratory measurements ('labevents') are available for each hospital admission. Table \ref{table:patientpopulation} provides a breakdown of the adult population by care unit.
The primary International Classification of Diseases (ICD-9) codes from the patient discharges are listed in Table \ref{table:icddistribution}. The top three codes across hospital admissions for patients aged 16 years and above were:
\begin{itemize}
\item 414.01 ("Coronary atherosclerosis of native coronary artery"), accounting for 7.1\% of all hospital admissions;
\item 038.9 ("Unspecified septicemia"), accounting for 4.2\% of all hospital admissions; and
\item 410.71 ("Subendocardial infarction, initial episode of care"), accounting for 3.6\% of all hospital admissions.
\end{itemize}
% \subsection*{Roadmap}
% To maximise research potential, the database will be iteratively enhanced over subsequent minor releases. For example, we anticipate later versions of the database to incorporate data from the emergency care department of Beth Israel Deaconess Medical Center.
% In the more distant future we seek to create a federated database by linking MIMIC-III with international hospital databases. Progress has been made towards this aim in collaboration with several hospitals in Europe and South America.
\subsection*{Classes of data}
Data available in the MIMIC-III database ranges from time-stamped, nurse-verified physiological measurements made at the bedside to free-text interpretations of imaging studies provided by the radiology department. Table \ref{table:dataclasses} gives an overview of the different classes of data available. Figure 2 shows sample data for a single patient stay in a medical intensive care unit. The patient, who was undergoing a course of chemotherapy at the time of admission, presented with febrile neutropenia, anemia, and thrombocytopenia.
\section*{Methods}
% --- REQUIREMENTS OF THIS SECTION --- %
% The Methods should include detailed text describing any steps or procedures
% used in producing the data, including full descriptions of the experimental
% design, data acquisition assays, and any computational processing (e.g.
% normalization, image feature extraction). Related methods should be grouped
% under corresponding subheadings where possible, and methods should be described
% in enough detail to allow other researchers to interpret and repeat, if required,
% the full study. Specific data outputs should be explicitly referenced via data
% citation (see Data Records and Data Citations, below). Authors should
% previous descriptions of the methods under use, but ideally the method
% descriptions should be complete enough for others to understand and reproduce
% the methods and processing steps without referring to associated publications.
% There is no limit to the length of the Methods section.
% ------------------------------------ %
The Laboratory for Computational Physiology at Massachusetts Institute of Technology is an interdisciplinary team of data scientists and practicing physicians. MIMIC-III is the third iteration of the MIMIC critical care database, enabling us to draw upon prior experience with regard to data management and integration \cite{cite3}.
\subsection*{Database development}
The MIMIC-III database was populated with data that had been acquired during routine hospital care, so there was no associated burden on caregivers and no interference with their workflow. Data was downloaded from several sources, including:
\begin{itemize}
\item archives from critical care information systems
\item hospital electronic health record databases
\item Social Security Administration Death Master File
\end{itemize}
Two different critical care information systems were in place over the data collection period: Philips CareVue Clinical Information System (models M2331A and M1215A; Philips Health-care, Andover, MA) and iMDsoft MetaVision ICU (iMDsoft, Needham, MA). These systems were the source of clinical data such as:
\begin{itemize}
\item time-stamped nurse-verified physiological measurements (for example, hourly documentation of heart rate, arterial blood pressure, or respiratory rate);
\item documented progress notes by care providers;
\item continuous intravenous drip medications and fluid balances.
\end{itemize}
With exception to data relating to fluid intake, which differed significantly in structure between the CareVue and MetaVision systems, data was merged when building the database tables. Data which could not be merged is given a suffix to denote the data source. For example, inputs for patients monitored with the CareVue system are stored in INPUTEVENTS\_CV, whereas inputs for patients monitored with the Metavision system are stored in INPUTEVENTS\_MV. Additional information was collected from hospital and laboratory health record systems, including:
\begin{itemize}
\item patient demographics and in-hospital mortality.
\item laboratory test results (for example, hematology, chemistry, and microbiology results).
\item discharge summaries and reports of electrocardiogram and imaging studies.
\item billing-related information such as International Classification of Disease, 9th Edition (ICD-9) codes, Diagnosis Related Group (DRG) codes, and Current Procedural Terminology (CPT) codes.
\end{itemize}
Out-of-hospital mortality dates were obtained using the Social Security Administration Death Master File. A more detailed description of the data is shown in Table \ref{table:patientpopulation}. Physiological waveforms obtained from bedside monitors (such as electrocardiograms, blood pressure waveforms, photoplethysmograms, impedance pneumograms) were obtained for a subset of patients.
Several projects are ongoing to map concepts within the MIMIC database to standardized dictionaries. For example, researchers at the National Library of Medicine National Institutes of Health have mapped laboratory tests and medications in MIMIC-II to LOINC and RxNorm, respectively \cite{abhyankar2012}. Efforts are also underway to transform MIMIC to common data models, such as the Observational Medical Outcomes Partnership Common Data Model, to support the application of standardized tools and methods \cite{cite8}. These developments are progressively incorporated into the MIMIC database where possible.
The project was approved by the Institutional Review Boards of Beth Israel Deaconess Medical Center (Boston, MA) and the Massachusetts Institute of Technology (Cambridge, MA). Requirement for individual patient consent was waived because the project did not impact clinical care and all protected health information was deidentified.
\subsection*{Deidentification}
Before data was incorporated into the MIMIC-III database, it was first deidentified in accordance with Health Insurance Portability and Accountability Act (HIPAA) standards using structured data cleansing and date shifting. The deidentification process for structured data required the removal of all eighteen of the identifying data elements listed in HIPAA, including fields such as patient name, telephone number, address, and dates. In particular, dates were shifted into the future by a random offset for each individual patient in a consistent manner to preserve intervals, resulting in stays which occur sometime between the years 2100 and 2200. Time of day, day of the week, and approximate seasonality were conserved during date shifting. Dates of birth for patients aged over 89 were shifted to obscure their true age and comply with HIPAA regulations: these patients appear in the database with ages of over 300 years.
Protected health information was removed from free text fields, such as diagnostic reports and physician notes, using a rigorously evaluated deidentification system based on extensive dictionary look-ups and pattern-matching with regular expressions \cite{cite5}. The components of this deidentification system are continually expanded as new data is acquired.
\subsection*{Code availability}
% --- REQUIREMENTS OF THIS SECTION --- %
%For all studies using custom code in the generation or processing of datasets,
%a statement must be included here, indicating whether and how the code can be
%accessed, including any restrictions to access. This section should also include
%information on the versions of any software used, if relevant, and any specific
%variables or parameters used to generate, test, or process the current dataset.
% ------------------------------------ %
%TODO: I feel like the buildmimic code should be in the mimic-iii-building repository, not the mimic-code repository We can then detail the building process here - e.g. tested with PostgreSQL v9.4, Oracle v., etc...
% The code used to create the MIMIC-III database from raw hospital exports has been made available in a public archive. https://github.com/MIT-LCP/mimic-iii-building %
The code that underpins the MIMIC-III website and documentation is openly available and contributions from the research community are encouraged: \\ https://github.com/MIT-LCP/mimic-website
A Jupyter Notebook containing the code used to generate the tables and descriptive statistics included in this paper is available at: \\ https://github.com/MIT-LCP/mimic-iii-paper/
% Answer:
% Deidentification code available at github.com/MIT-lcp/deid
\section*{Data records}
% --- REQUIREMENTS OF THIS SECTION --- %
% Please explain each data record associated with this work, including
% the repository where this information is stored, and an overview of
% the data files and their formats. Each external data record should
% be listed in Data Citation section at the end of this template, and
% records should be cited throughout the manuscript as, for example
% (Data Citation 1).
% Tables should be used to support the data records, and should clearly indicate
% the samples and subjects, their provenance, and the experimental manipulations
% performed on each. They should also specify the data output resulting from each
% data-collection or analytical step, should these form part of the archived record.
% Please see the submission guidelines at the \emph{Scientific Data} website, and
% our Word templates for more information on preparing such tables.
% ------------------------------------ %
% Probably the most reasonable format:
% CHARTEVENTS, IOEVENTS, DATETIMEEVENTS from two ICU databases
% LABEVENTS from patient's medical record, spans across all their (?in-network) visits
% ADMISSIONS from hospital level admission/discharge/transfer information
% ICUSTAYEVENTS derived from ADMISSIONS
% DIAGNOSES_ICD from hospital billing database
% PROCEDURES_ICD from hospital billing database
% DRGCODES from hospital billing database
% NOTEEVENTS from hospital note entry database
% POE_MED_ORDER from provider order entry database (hospital wide)
% CALLOUT from hospital discharge planning database
% CAREGIVERS merged from the two ICU databases
% CPTEVENTS from hospital billing database
% D_ICD_DIAGNOSES, D_ICD_PROCEDURES, D_CPT from openly available sources
% D_LABITEMS from the same data that the labs were derived - the ITEMIDs were generated by us
% PATIENTS is derived from ADMISSIONS
% MICROBIOLOGYEVENTS ??
% SERVICES from hospital database
% TRANSFERS is the hospital ADT data
% Include a nice visualization of the data
MIMIC-III is a relational database consisting of 26 tables (Data Citation 1). Tables are linked by identifiers which usually have the suffix "ID". For example, SUBJECT\_ID refers to a unique patient, HADM\_ID refers to a unique admission to the hospital, and ICUSTAY\_ID refers to a unique admission to an intensive care unit.
Charted events such as notes, laboratory tests, and fluid balance are stored in a series of "events" tables. For example the OUTPUTEVENTS table contains all measurements related to output for a given patient, while the LABEVENTS table contains laboratory test results for a patient.
Tables prefixed with “D\_” are dictionary tables and provide definitions for identifiers. For example, every row of CHARTEVENTS is associated with a single ITEMID which represents the concept measured, but it does not contain the actual name of the measurement. By joining CHARTEVENTS and D\_ITEMS on ITEMID, it is possible to identify the concept represented by a given ITEMID. Further detail is provided below.
\subsection*{Data tables}
Developing the MIMIC data model involved balancing simplicity of interpretation against closeness to ground truth. As such, the model is a reflection of underlying data sources, modified over iterations of the MIMIC database in response to user feedback. Table \ref{table:mimictables} describes how data is distributed across the data tables. Care has been taken to avoid making assumptions about the underlying data when carrying out transformations, so MIMIC-III closely represents the raw hospital data.
Broadly speaking, five tables are used to define and track patient stays: ADMISSIONS; PATIENTS; ICUSTAYS; SERVICES; and TRANSFERS. Another five tables are dictionaries for cross-referencing codes against their respective definitions: D\_CPT; D\_ICD\_DIAGNOSES; D\_ICD\_PROCEDURES; D\_ITEMS; and D\_LABITEMS. The remaining tables contain data associated with patient care, such as physiological measurements, caregiver observations, and billing information.
In some cases it would be possible to merge tables - for example, the D\_ICD\_PROCEDURES and CPTEVENTS tables both contain detail relating to procedures and could be combined - but our approach is to keep the tables independent for clarity, since the data sources are significantly different. Rather than combining the tables within MIMIC data model, we suggest researchers develop database views and transforms as appropriate.
\section*{Technical validation}
% --- REQUIREMENTS OF THIS SECTION --- %
% This section presents any experiments or analyses that are needed
% to support the technical quality of the dataset. This section may
% be supported by up figures and tables, as needed. This is a required
% section; authors must present information justifying the reliability
% of their data.
% ------------------------------------ %
The number of structural changes were minimized to achieve the desired level of deidentification and data schema, helping to ensure that MIMIC-III closely represents the raw data collected within the Beth Israel Deaconess Medical Center.
Best practice for scientific computing was followed where possible \cite{cite4}. Code used to build MIMIC-III was version controlled and developed collaboratively within the laboratory. This approach encouraged and facilitated sharing of readable code and documentation, as well as frequent feedback from colleagues.
Issue tracking is used to ensure that limitations of the data and code are clearly documented and are dealt with as appropriate. The research community is encouraged to report and address issues as they are found, and a system for releasing minor database updates is in place.
% TP: some measures of quality might include:
% TBC...
% Prior to release internal testing. Descriptive analysis conducted to ensure known features
% of the patient population were represented by the data.
% Data imported successfully to PostgreSQL
% All ICUSTAY_ID associated with HADM_ID, all HADM_ID associated with a SUBJECT_ID
% No ages < 0
% Very few DODs < discharge
%Every distinct ICU admission in the database (unique ICUSTAY_ID) is associated with a single hospitalization (HADM_ID), and similarly every distinct hospitalization in the database is associated with a single patient (SUBJECT_ID).
\section*{Usage notes}
% --- REQUIREMENTS OF THIS SECTION --- %
% Brief instructions that may help other researchers reuse these dataset.
% This is an optional section, but strongly encouraged when helpful
% to readers. This may include discussion of software packages that
% are suitable for analyzing the assay data files, suggested downstream
% processing steps (e.g. normalization, etc.), or tips for integrating
% or comparing this with other datasets. If needed, authors are encouraged
% to provide code, programs, or data processing workflows when they may help
% others analyse the data. We encourage authors to archive related code in
% a DOI-issuing archive when possible, but code may also be supplied as
% supplementary information files.
% For studies involving privacy or safety controls on public access
% to the data, this section should describe in detail these controls,
% including how authors can apply to access the data, and what criteria
% will be used to determine who may access the data, and any limitations
% on data use.
% ------------------------------------ %
\subsection*{Data access}
MIMIC-III is provided as a collection of comma separated value (CSV) files, along with scripts to help with importing the data into database systems including PostreSQL, MySQL, and MonetDB. As the database contains detailed information regarding the clinical care of patients, it must be treated with appropriate care and respect. Researchers are required to formally request access via a process documented on the MIMIC website \cite{cite-mimic-website}. There are two key steps that must be completed before access is granted:
\begin{enumerate}
\item the researcher must complete a recognized course in protecting human research participants that includes Health Insurance Portability and Accountability Act (HIPAA) requirements.
\item the researcher must sign a data use agreement, which outlines appropriate data usage and security standards, and forbids efforts to identify individual patients.
\end{enumerate}
Approval requires at least a week. Once an application has been approved the researcher will receive emails containing instructions for downloading the database from PhysioNetWorks, a restricted access component of PhysioNet \cite{cite6}.
\subsection*{Example usage}
MIMIC has been used as a basis for coursework in numerous educational institutions, for example in medical analytics courses at Stanford University (course BIOMEDIN215), Massachusetts Institute of Technology (courses HST953 and HST950J/6.872), Georgia Institute of Technology (course CSE8803), University of Texas at Austin (course EE381V), and Columbia University (course G4002), amongst others. MIMIC has also provided the data that underpins a broad range of research studies, which have explored topics such as machine learning approaches for prediction of patient outcomes, clinical implications of blood pressure monitoring techniques, and semantic analysis of unstructured patient notes \cite{mimic-mayaud, mimic-lehman, mimic-velupillai, mimic-abhyankar}.
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3724452/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3609896/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4587060/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4147606/
A series of 'datathons' have been held alongside development of the MIMIC database. These events assemble caregivers, data scientists, and those with domain-specific knowledge with the aim of creating ideas and producing clinically relevant, reproducible research \cite{cite7}. In parallel the events introduce new researchers to MIMIC and provide a platform for continuous review and development of code and research.
Documentation for the MIMIC database is available online \cite{cite-mimic-website}. The content is under continuous development and includes a list of studies that have been carried out using MIMIC. The website includes functionality that enables the research community to directly submit updates and improvements via GitHub.
\subsection*{Collaborative research}
Our experience is that many researchers work independently to produce code for data processing and analysis. We seek to move towards a more collaborative, iterative, and self-checking development process where researchers work together on a shared code base. To facilitate collaboration, a public code repository has been created to encourage researchers to develop and share code collectively: https://github.com/MIT-LCP/mimic-code.
The repository has been seeded with code to calculate commonly utilized variables in critical care research, including severity of illness scores, comorbidity scores, and duration of various treatments such as mechanical ventilation and vasopressor use. We encourage users to incorporate this code into their research, provide improvements, and add new contributions that have potential to benefit the research community as a whole. Over time, we expect the repository to become increasingly vital for researchers working with the MIMIC-III database.
Alongside work on the centralized codebase, we support efforts to transform MIMIC into common data models such the Observational Medical Outcomes Partnership Common Data Model (OMOP-CDM) \cite{cite8}. Developing these common models may help to facilitate integration with complementary datasets and to enable the application of generalized analytic tools. Important efforts to map concepts to standardized clinical ontologies are also underway.
% http://www.ohdsi.org/data-standardization/the-common-data-model/
% More detail regarding the concepts derived in the repository is available \cite{?}.
% TP: we may want to assign a DOI to the MIMIC Code Repository. This would allow it to be moved in future and also allow us to track citations etc
\section*{Acknowledgements}
% --- REQUIREMENTS OF THIS SECTION --- %
% Text acknowledging non-author contributors. Acknowledgements should
% be brief, and should not include thanks to anonymous referees and
% editors, or effusive comments. Grant or contribution numbers may be
% acknowledged. Author contributions Please describe briefly the contributions
% of each author to this work on a separate line.
% ------------------------------------ %
This research and development was supported by grants NIH-R01-EB017205, NIH-R01-EB001659, and NIH-R01-GM104987 from the National Institutes of Health. The authors would also like to thank Philips Healthcare and staff at the Beth Israel Deaconess Medical Center, Boston, for supporting database development, and Ken Pierce for providing Figure 1.
\section*{Author contributions}
AEWJ, TJP, LS and LW built the MIMIC-III database. All authors gave input into the database development process and contributed to writing the paper.
\section*{Competing financial interests}
The authors declare no competing financial interests.
\section*{Figures legends}
% --- REQUIREMENTS OF THIS SECTION --- %
% Figure should be referred to using a consistent numbering scheme through
% the entire Data Descriptor. For initial submissions, authors may choose
% to supply this document as a single PDF with embedded figures, but
% separate figure image files must be provided for revisions and accepted
% manuscripts. In most cases, a Data Descriptor should not contain more
% than three figures, but more may be allowed when needed. We discourage
% the inclusion of figures in the Supplementary Information \textendash{}
% all key figures should be included here in the main Figure section.
% Figure legends begin with a brief title sentence for the whole figure
% and continue with a short description of what is shown in each panel,
% as well as explaining any symbols used. Legend must total no more
% than 350 words, and may contain literature references.
% ------------------------------------ %
% TP: response to reviewers. Fixed bug to display figure number in text.
\noindent
\textbf{Figure 1}: Overview of the MIMIC-III critical care database. \\
\noindent
\textbf{Figure 2}: Sample data for a single patient stay in a medical intensive care unit. GCS is Glasgow Coma Scale; NIBP is non-invasive blood pressure; and O2 saturation is blood oxygen saturation.
% Tables supporting the Data Descriptor. These can provide summary information
% (sample numbers, demographics, etc.), but they should generally not
% be used to present primary data (i.e. measurements). Tables containing
% primary data should be submitted to an appropriate data repository.
% Tables may be provided within the \LaTeX{} document or as separate
% files (tab-delimited text or Excel files). Legends, where needed,
% should be included here. Generally, a Data Descriptor should have
% fewer than ten Tables, but more may be allowed when needed. Tables
% may be of any size, but only Tables which fit onto a single printed
% page will be included in the PDF version of the article (up to a maximum of three).
% Maximum of three tables included in the PDF (up to 10 in total)
% Table 1:
% Demographics of the database
\begin{center}
\begin{table}
\begin{tabular}{|p{2.4cm}|p{1.2cm}|p{1.2cm}|p{1.2cm}|p{1.2cm}|p{1.2cm}|p{1.2cm}|p{1.2cm}|}
\hline
Critical care unit & CCU & CSRU & MICU & SICU & TSICU & Total \\
\hline
Distinct patients, no. (\% of total admissions) & 5,674 (14.7\%) & 8,091 (20.9\%) & 13,649 (35.4\%) & 6,372 (16.5\%) & 4,811 (12.5\%) & 38,597 (100\%) \\
\hline
Hospital admissions, no. (\% of total admissions) & 7,258 (14.6\%) & 9,156 (18.4\%) & 19,770 (39.7\%) & 8,110 (16.3\%) & 5,491 (11.0\%) & 49,785 (100\%) \\
\hline
Distinct ICU stays, no. (\% of total admissions) & 7,726 (14.5\%) & 9,854 (18.4\%) & 21,087 (39.5\%) & 8,891 (16.6\%) & 5,865 (11.0\%) & 53,423 (100\%) \\
\hline
Age, years, median (Q1-Q3) & 70.1 (58.4-80.5) & 67.6 (57.6-76.7) & 64.9 (51.7-78.2) & 63.6 (51.4-76.5) & 59.9 (42.9-75.7) & 65.8 (52.8-77.8) \\
\hline
Gender, male, \% of unit stays & 4,203 (57.9\%) & 6,000 (65.5\%) & 10,193 (51.6\%) & 4,251 (52.4\%) & 3,336 (60.7\%) & 27,983 (55.9\%) \\
\hline
ICU length of stay, median days (Q1-Q3) & 2.2 (1.2-4.1) & 2.2 (1.2-4.0) & 2.1 (1.2-4.1) & 2.3 (1.3-4.9) & 2.1 (1.2-4.6) & 2.1 (1.2-4.6) \\
\hline
Hospital length of stay, median days (Q1-Q3) & 5.8 (3.1-10.0) & 7.4 (5.2-11.4) & 6.4 (3.7-11.7) & 7.9 (4.4-14.2) & 7.4 (4.1-13.6) & 6.9 (4.1-11.9) \\
\hline
ICU mortality, percent of unit stays & 685 (8.9\%) & 353 (3.6\%) & 2,222 (10.5\%) & 813 (9.1\%) & 492 (8.4\%) & 4,565 (8.5\%) \\
\hline
Hospital mortality, percent of unit stays & 817 (11.3\%) & 424 (4.6\%) & 2,859 (14.5\%) & 1,020 (12.6\%) & 628 (11.4\%) & 5,748 (11.5\%) \\
\hline
\end{tabular}
\caption{Details of the MIMIC-III patient population by first critical care unit on hospital admission for patients aged 16 years and above. CCU is Coronary Care Unit; CSRU is Cardiac Surgery Recovery Unit; MICU is Medical Intensive Care Unit; SICU is Surgical Intensive Care Unit; TSICU is Trauma Surgical Intensive Care Unit.}
\label{table:patientpopulation}
\end{table}
\end{center}
% Table 2:
% Distribution of ICD-9 codes
\begin{center}
\begin{table}
\begin{tabular}{|p{3.9cm}|p{1.25cm}|p{1.25cm}|p{1.25cm}|p{1.25cm}|p{1.25cm}|p{1.25cm}|}
\hline
Critical care unit &
CCU stays, No. (\% by unit) &
CSRU stays, No. (\% by unit) &
MICU stays, No. (\% by unit) &
SICU stays, No. (\% by unit) &
TSICU stays, No. (\% by unit) &
Total stays, No. (\% by unit) \\
\hline
Infectious and parasitic diseases, i.e., septicemia, other infectious and parasitic diseases, etc (001–139)
& 305 (4.2\%) & 72 (0.8\%) & 3,229 (16.7\%) & 448 (5.6\%) & 152 (2.8\%) & 4,206 (8.6\%) \\
\hline
Neoplasms of digestive organs and intrathoracic organs, etc. (140–239)
& 126 (1.8\%) & 287 (3.2\%) & 1,415 (7.3\%) & 1,225 (15.3\%) & 466 (8.6\%) & 3,519 (7.2\%) \\
\hline
Endocrine, nutritional, metabolic, and immunity (240–279)
& 104 (1.4\%) & 36 (0.4\%) & 985 (5.1\%) & 178 (2.2\%) & 54 (1.0\%) & 1,357 (2.8\%) \\
\hline
Diseases of the circulatory system, i.e., ischemic heart diseases, diseases of pulmonary circulation, dysrhythmias, heart failure, cerebrovascular diseases, etc. (390–459)
& 5,131 (71.4\%) & 7,138 (78.6\%) & 2,638 (13.6\%) & 2,356 (29.5\%) & 684 (12.6\%) & 17,947 (36.6\%) \\
\hline
Pulmonary diseases, i.e., pneumonia and influenza, chronic obstructive pulmonary disease, etc. (460–519)
& 416 (5.8\%) & 141 (1.6\%) & 3,393 (17.5\%) & 390 (4.9\%) & 225 (4.1\%) & 4,565 (9.3\%) \\
\hline
Diseases of the digestive system (520–579)
& 264 (3.7\%) & 157 (1.7\%) & 3,046 (15.7\%) & 1,193 (14.9\%) & 440 (8.1\%) & 5,100 (10.4\%) \\
\hline
Diseases of the genitourinary system, i.e., nephritis, nephrotic syndrome, nephrosis, and other diseases of the genitourinary system (580–629)
& 130 (1.8\%) & 14 (0.2\%) & 738 (3.8\%) & 101 (1.3\%) & 31 (0.6\%) & 1,014 (2.1\%) \\
\hline
Trauma (800–959)
& 97 (1.3\%) & 494 (5.4\%) & 480 (2.5\%) & 836 (10.5\%) & 2,809 (51.7\%) & 4,716 (9.6\%) \\
\hline
Poisoning by drugs and biological substances (960–979)
& 50 (0.7\%) & 2 (0.0\%) & 584 (3.0\%) & 58 (0.7\%) & 11 (0.2\%) & 705 (1.4\%) \\
\hline
Other & 565 (7.9\%) & 739 (8.1\%) & 2,883 (14.9\%) & 1,204 (15.1\%) & 563 (10.4\%) & 5,954 (12.1\%) \\
\hline
Total & 7,188 (14.6\%) & 9,080 (18.5\%) & 19,391 (39.5\%) & 7,989 (16.3\%) & 5,435 (11.1\%) & 49,083 (100\%) \\
\hline
\end{tabular}
\caption{Distribution of primary International Classification of Diseases, 9th Edition (ICD-9) codes by care unit for patients aged 16 years and above. CCU is Coronary Care Unit; CSRU is Cardiac Surgery Recovery Unit; MICU is Medical Intensive Care Unit; SICU is Surgical Intensive Care Unit; TSICU is Trauma Surgical Intensive Care Unit.}
\label{table:icddistribution}
\end{table}
\end{center}
% Table 3:
% Overview of MIMIC-III
\begin{center}
\begin{table}
\begin{tabular}{|l|p{8cm}|}
\hline
Class of data & Description \\
\hline
Billing & Coded data recorded primarily for billing and administrative purposes. Includes Current Procedural Terminology (CPT) codes, Diagnosis-Related Group (DRG) codes, and International Classification of Diseases (ICD) codes. \\
\hline
Descriptive & Demographic detail, admission and discharge times, and dates of death. \\
\hline
Dictionary & Look-up tables for cross referencing concept identifiers (for example, International Classification of Diseases (ICD) codes) with associated labels. \\
\hline
Interventions & Procedures such as dialysis, imaging studies, and placement of lines. \\
\hline
Laboratory & Blood chemistry, hematology, urine analysis, and microbiology test results. \\
\hline
Medications & Administration records of intravenous medications and medication orders. \\
\hline
Notes & Free text notes such as provider progress notes and hospital discharge summaries. \\
\hline
Physiologic & Nurse-verified vital signs, approximately hourly (e.g. heart rate, blood pressure, respiratory rate). \\
\hline
Reports & Free text reports of electrocardiogram and imaging studies. \\
\hline
\end{tabular}
\caption{Classes of data available in the MIMIC-III critical care database.}
\label{table:dataclasses}
\end{table}
\end{center}
% Table 4:
% MIMIC data tables
\begin{center}
\begin{table}
\begin{tabular}{|l|p{10.5cm}|}
\hline
Table name & Description \\
\hline
ADMISSIONS & Every unique hospitalization for each patient in the database (defines HADM\_ID). \\
\hline
CALLOUT & Information regarding when a patient was cleared for ICU discharge and when the patient was actually discharged. \\
\hline
CAREGIVERS & Every caregiver who has recorded data in the database (defines CGID). \\
\hline
CHARTEVENTS & All charted observations for patients. \\
\hline
CPTEVENTS & Procedures recorded as Current Procedural Terminology (CPT) codes. \\
\hline
D\_CPT & High level dictionary of Current Procedural Terminology (CPT) codes. \\
\hline
D\_ICD\_DIAGNOSES & Dictionary of International Statistical Classification of Diseases and Related Health Problems (ICD-9) codes relating to diagnoses. \\
\hline
D\_ICD\_PROCEDURES & Dictionary of International Statistical Classification of Diseases and Related Health Problems (ICD-9) codes relating to procedures. \\
\hline
D\_ITEMS & Dictionary of local codes ('ITEMIDs') appearing in the MIMIC database, except those that relate to laboratory tests. \\
\hline
D\_LABITEMS & Dictionary of local codes ('ITEMIDs') appearing in the MIMIC database that relate to laboratory tests. \\
\hline
DATETIMEEVENTS & All recorded observations which are dates, for example time of dialysis or insertion of lines. \\
\hline
DIAGNOSES\_ICD & Hospital assigned diagnoses, coded using the International Statistical Classification of Diseases and Related Health Problems (ICD) system. \\
\hline
DRGCODES & Diagnosis Related Groups (DRG), which are used by the hospital for billing purposes. \\
\hline
ICUSTAYS & Every unique ICU stay in the database (defines ICUSTAY\_ID). \\
\hline
INPUTEVENTS\_CV & Intake for patients monitored using the Philips CareVue system while in the ICU, e.g. intravenous medications, enteral feeding, etc. \\
\hline
INPUTEVENTS\_MV & Intake for patients monitored using the iMDSoft MetaVision system while in the ICU, e.g. intravenous medications, enteral feeding, etc. \\
\hline
OUTPUTEVENTS & Output information for patients while in the ICU. \\
\hline
LABEVENTS & Laboratory measurements for patients both within the hospital and in outpatient clinics. \\
\hline
MICROBIOLOGYEVENTS & Microbiology culture results and antibiotic sensitivities from the hospital database. \\
\hline
NOTEEVENTS & Deidentified notes, including nursing and physician notes, ECG reports, radiology reports, and discharge summaries. \\
\hline
PATIENTS & Every unique patient in the database (defines SUBJECT\_ID). \\
\hline
PRESCRIPTIONS & Medications ordered for a given patient. \\
\hline
PROCEDUREEVENTS\_MV & Patient procedures for the subset of patients who were monitored in the ICU using the iMDSoft MetaVision system. \\
\hline
PROCEDURES\_ICD & Patient procedures, coded using the International Statistical Classification of Diseases and Related Health Problems (ICD) system. \\
\hline
SERVICES & The clinical service under which a patient is registered. \\
\hline
TRANSFERS & Patient movement from bed to bed within the hospital, including ICU admission and discharge. \\
\hline
\end{tabular}
\caption{An overview of the data tables comprising the MIMIC-III (v1.3) critical care database.}
\label{table:mimictables}
\end{table}
\end{center}
\clearpage
\begin{thebibliography}{1}
\expandafter\ifx\csname url\endcsname\relax
\def\url#1{\texttt{#1}}\fi
\expandafter\ifx\csname urlprefix\endcsname\relax\def\urlprefix{URL }\fi
\providecommand{\bibinfo}[2]{#2}
\providecommand{\eprint}[2][]{\url{#2}}
% TEMPLATE
% \bibitem{cite1}
% \bibinfo{author}{SURNAME, INITIAL.}, \bibinfo{author}{SURNAME, INITIAL.} \&
% \bibinfo{author}{SURNAME, INITIAL.},
% \newblock \bibinfo{title}{{TITLE.}}
% \newblock \emph{\bibinfo{journal}{JOURNAL}}
% \textbf{\bibinfo{volume}{VOLUME}}, \bibinfo{pages}{START--END}
% (\bibinfo{year}{YEAR}).
\bibitem{cite1}
\bibinfo{author}{Charles, D.},
\bibinfo{author}{King, J.}, \bibinfo{author}{Patel, V.} \&
\bibinfo{author}{Furukawa, M.}
\newblock \bibinfo{title}{{Adoption of Electronic Health record Systems
among U.S. Non-federal Acute Care Hospitals}}.
\newblock \emph{\bibinfo{journal}{ONC Data Brief No. 9}}
(\bibinfo{year}{2013}).
\bibitem{cite2}
\bibinfo{author}{Collins, F.S.} \&
\bibinfo{author}{Tabak, L.A.}
\newblock \bibinfo{title}{{NIH plans to enhance reproducibility}}.
\newblock \emph{\bibinfo{journal}{Nature}}
\textbf{\bibinfo{volume}{505}}, \bibinfo{pages}{612-613}
(\bibinfo{year}{2014}).
\bibitem{cite3}
\bibinfo{author}{Saeed, M.}, \bibinfo{author}{Villarroel, M.}, \bibinfo{author}{Reisner, A.T.}, \bibinfo{author}{Clifford, G.}, \bibinfo{author}{Lehman, L.}, \bibinfo{author}{Moody, G.}, \bibinfo{author}{Heldt, T.}, \bibinfo{author}{Kyaw, T.}, \bibinfo{author}{Moody, B.} \&
\bibinfo{author}{Mark, R.G.},
\newblock \bibinfo{title}{{Multiparameter Intelligent Monitoring in Intensive Care II (MIMIC-II): A public-access intensive care unit database.}}
\newblock \emph{\bibinfo{journal}{Critical Care Medicine}}
\textbf{\bibinfo{volume}{39}}, \bibinfo{pages}{952--960}
(\bibinfo{year}{2011}).
\bibitem{cite4}
\bibinfo{author}{Wilson, G.}, \bibinfo{author}{Aruliah, D.A.},
\bibinfo{author}{Brown, C.T.}, \bibinfo{author}{Chue Hong, N.},
\bibinfo{author}{Davis, M.}, \bibinfo{author}{Guy, R.T.},
\bibinfo{author}{Haddock, S.H.D.}, \bibinfo{author}{Huff, K.D.},
\bibinfo{author}{Mitchell, I.M.}, \bibinfo{author}{Plumbley, M.D.},
\bibinfo{author}{Waugh, B.}, \bibinfo{author}{White, E.P.} \&
\bibinfo{author}{Wilson, P.},
\newblock \bibinfo{title}{{Best practices for scientific computing.}}
\newblock \emph{\bibinfo{journal}{PLOS Biology}}
\textbf{\bibinfo{volume}{12}}, \bibinfo{pages}{e1001745}
(\bibinfo{year}{2014}).
\bibitem{cite5}
\bibinfo{author}{Neamatullah, I.}, \bibinfo{author}{Douglass, M.},
\bibinfo{author}{Lehman, L.}, \bibinfo{author}{Reisner, A.},
\bibinfo{author}{Villarroel, M.}, \bibinfo{author}{Long, W.},
\bibinfo{author}{Szolovits, P.}, \bibinfo{author}{Moody, G.},
\bibinfo{author}{Mark, R.G.} \& \bibinfo{author}{Clifford, G.},
\newblock \bibinfo{title}{{Automated de-identification of free-text medical records.}}
\newblock \emph{\bibinfo{journal}{BMC Medical Informatics and Decision Making}}
\textbf{\bibinfo{volume}{8}}, \bibinfo{pages}{1--32}
(\bibinfo{year}{2008}).
\bibitem{cite6}
\bibinfo{author}{Goldberger, A.L.}, \bibinfo{author}{Amaral, L.A.N.},
\bibinfo{author}{Glass, L.}, \bibinfo{author}{Hausdorff, J.M.},
\bibinfo{author}{Ivanov, P.Ch.}, \bibinfo{author}{Mark, R.G.},
\bibinfo{author}{Mietus, J.E.}, \bibinfo{author}{Moody, G.B.},
\bibinfo{author}{Peng, C.-K.} \& \bibinfo{author}{Stanley, H.E.},
\newblock \bibinfo{title}{{PhysioBank, PhysioToolkit, and PhysioNet.}}
\newblock \emph{\bibinfo{journal}{Circulation}}
\textbf{\bibinfo{volume}{101}}, \bibinfo{pages}{e215----e220}
(\bibinfo{year}{2000}).
\bibitem{cite-mimic-website}
\newblock \bibinfo{title}{{MIMIC-III Critical Care Database: Documentation and Website}}
\newblock \emph{\bibinfo{journal}{http://mimic.physionet.org}}
(\bibinfo{year}{Accessed: March 2016}).
\bibitem{cite7}
\bibinfo{author}{Aboab, J.}, \bibinfo{author}{Celi, L.A.},
\bibinfo{author}{Charlton, P.}, \bibinfo{author}{Feng, M.},
\bibinfo{author}{Ghassemi, M.}, \bibinfo{author}{Marshall, D.C.},
\bibinfo{author}{Mayaud, L.}, \bibinfo{author}{Naumann, T.},
\bibinfo{author}{McCague, N.}, \bibinfo{author}{Paik, K.E.},
\bibinfo{author}{Pollard, T.J.}, \bibinfo{author}{Resche-Rigon, M.},
\bibinfo{author}{Salciccioli, J.D.} \& \bibinfo{author}{Stone, D.J.}
\newblock \bibinfo{title}{{A “datathon” model to support cross-disciplinary collaboration.}}
\newblock \emph{\bibinfo{journal}{Science Translational Medicine}}
\textbf{\bibinfo{volume}{8}}, \bibinfo{pages}{333--ps8}
(\bibinfo{year}{2016}).
\bibitem{cite8}
\newblock \bibinfo{title}{{Observational Medical Outcomes Partnership Common Data Model Website.}}
\newblock \emph{\bibinfo{journal}{http://www.ohdsi.org/data-standardization/the-common-data-model/}}
(\bibinfo{year}{Accessed: March 2016}).
\bibitem{mimic-mayaud}
\bibinfo{author}{Mayaud, L.}, \bibinfo{author}{Lai, P.S.},
{Clifford, G.}, \bibinfo{author}{Tarassenko, L.},
{Celi, L.A.} \& \bibinfo{author}{Annane, D.}
\newblock \bibinfo{title}{{Dynamic data during hypotensive episode improves mortality predictions among patients with sepsis and hypotension.}}
\newblock \emph{\bibinfo{journal}{Critical Care Medicine}}
\textbf{\bibinfo{volume}{41(4)}}, \bibinfo{pages}{954--962}
(\bibinfo{year}{2014}).
\bibitem{mimic-lehman}
\bibinfo{author}{Lehman, L.H.}, \bibinfo{author}{Saeed, M.},
\bibinfo{author}{Talmor, D.}, \bibinfo{author}{Mark, R.G.} \&
\bibinfo{author}{Malhotra, A.}
\newblock \bibinfo{title}{{Methods of Blood Pressure Measurement in the ICU.}}
\newblock \emph{\bibinfo{journal}{Critical Care Medicine}}
\textbf{\bibinfo{volume}{41(1)}}, \bibinfo{pages}{34--40}
(\bibinfo{year}{2013}).
\bibitem{mimic-velupillai}
\bibinfo{author}{Velupillai, S.}, \bibinfo{author}{Mowery, D.},
\bibinfo{author}{South, B.R.}, \bibinfo{author}{Kvist, M.} \&
\bibinfo{author}{Dalianis, H.}
\newblock \bibinfo{title}{{Recent Advances in Clinical Natural Language Processing in Support of Semantic Analysis.}}
\newblock \emph{\bibinfo{journal}{Yearbook of Medical Informatics}}
\textbf{\bibinfo{volume}{10(1)}}, \bibinfo{pages}{183--193}
(\bibinfo{year}{2015}).
\bibitem{mimic-abhyankar}
\bibinfo{author}{Abhyankar, S.}, \bibinfo{author}{Demner-Fushman, D.},
\bibinfo{author}{Callaghan, F.M.} \& \bibinfo{author}{McDonald, C.J.}
\newblock \bibinfo{title}{{Combining structured and unstructured data to identify a cohort of ICU patients who received dialysis.}}
\newblock \emph{\bibinfo{journal}{J Am Med Inform Assoc}}
\textbf{\bibinfo{volume}{21(5)}}, \bibinfo{pages}{801--807}
(\bibinfo{year}{2014}).
\bibitem{abhyankar2012}
\bibinfo{author}{Abhyankar, S.}, \bibinfo{author}{Demner-Fushman, D.}
\& \bibinfo{author}{McDonald, C.J.}
\newblock \bibinfo{title}{{Standardizing clinical laboratory data for secondary use.}}
\newblock \emph{\bibinfo{journal}{J Biomed Inform}}
\textbf{\bibinfo{volume}{45(4)}}, \bibinfo{pages}{642--650}
(\bibinfo{year}{2012}).
\end{thebibliography}
\section*{Data Citations}
% Bibliographic information for the data records described in the manuscript.
% TP: response to reviewers. Update mimic reference.
1. Pollard, T.J. \& Johnson, A.E.W. The MIMIC-III Clinical Database. \\ http://dx.doi.org/10.13026/C2XW26 (2016).
\end{document}