Merge branch 'main' of https://github.com/alphanumericslab/ecg-image-kit

alphanumericslab · Apr 11, 2024 · fd13a63 · fd13a63
2 parents 36dabf5 + 56b907f
commit fd13a63
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 45 deletions.
diff --git a/codes/ecg-image-generator/README.md b/codes/ecg-image-generator/README.md
@@ -58,6 +58,7 @@ The basic mode of the tool creates ECG images without distortions. The mode of o
 - `--print_header`: Add text from header file on all the generated images; default: False
 - `--num_columns` : Number of columns of the ECG leads. The default(-1) will plot a single column for 2 lead data and 4 columns for the 12 or any other number of lead data. Default: -1; type: int
 - `--full_mode`: Sets the lead to add at the bottom of the paper ECG as a long strip obtained from the WFDB record's `.hea` header file, if the lead II is not available plots the first lead from the header file; default: `'II'`; type: str
+- `--mask_unplotted_samples`: Mask the samples not plotted in the images in the generated WFDB signal file; default: False. For example: for the 3x4 format, the code plots 2.5 seconds of each lead on the image and saves the complete signal in the WFDB file. If the flag is set, the code will mask the part of the signal not plotted in the image (In this case, t > 2.5seconds) with Nan values in the modified WFDB file. 
 - `--num_images`: Number of ECG images to be generated; default: all files in the input directory; type: int
 -   `--deterministic_lead`: Remove lead names from all generated images, default=False.
 - `--random_resolution`: Generate random resolutions of images, if True resolution is randomly picked from the range [50, `-r`] else every image is generated at the `-r` resolution; default: False
@@ -246,8 +247,8 @@ Please include references to the following articles in any publications:
 2. ECG-Image-Kit: A Toolkit for Synthesis, Analysis, and Digitization of Electrocardiogram Images, (2024). URL: https://github.com/alphanumericslab/ecg-image-kit
 
 ## Contributors
-- Kshama Kodthalu Shivashankara, School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, US
 - Deepanshi, Department of Biomedical Informatics, Emory University, GA, US
+- Kshama Kodthalu Shivashankara, School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, US
 - Matthew A Reyna, Department of Biomedical Informatics, Emory University, GA, US
 - Gari D Clifford, Department of Biomedical Informatics, Emory University, GA, US
 - Reza Sameni (contact person), Department of Biomedical Informatics, Emory University, GA, US
@@ -256,4 +257,4 @@ Please include references to the following articles in any publications:
 Please direct any inquiries, bug reports or requests for joining the team to: [ecg-image-kit@dbmi.emory.edu](ecg-image-kit@dbmi.emory.edu).
 
 
-![Static Badge](https://img.shields.io/badge/ecg_image-kit-blue)
+![Static Badge](https://img.shields.io/badge/ecg_image-kit-blue)
diff --git a/codes/ecg-image-generator/extract_leads.py b/codes/ecg-image-generator/extract_leads.py
@@ -18,7 +18,7 @@
 format_4_by_3 = [["I", "II", "III"], ["aVR", "aVL", "aVF", "AVR", "AVL", "AVF"], ["V1", "V2", "V3"], ["V4", "V5", "V6"]]
 
 # Run script.
-def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,add_bw,show_grid,add_print, start_index = -1, store_configs=False, store_text_bbox=True,key='val',resolution=100,units='inches',papersize='',add_lead_names=True,pad_inches=1,template_file=os.path.join('TemplateFiles','TextFile1.txt'),font_type=os.path.join('Fonts','Times_New_Roman.ttf'),standard_colours=5,full_mode='II',bbox = False,columns=-1):
+def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,add_bw,show_grid, add_print, mask_unplotted_samples = False, start_index = -1, store_configs=False, store_text_bbox=True,key='val',resolution=100,units='inches',papersize='',add_lead_names=True,pad_inches=1,template_file=os.path.join('TemplateFiles','TextFile1.txt'),font_type=os.path.join('Fonts','Times_New_Roman.ttf'),standard_colours=5,full_mode='II',bbox = False,columns=-1):
 
     # Extract a reduced-lead set from each pair of full-lead header and recording files.
     full_header_file = header_file
@@ -70,9 +70,6 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
     if(recording.shape[0]>recording.shape[1]):
        recording = np.transpose(recording)
 
-    if recording.shape[1]/rate < 10:
-        return []
-
     record_dict = create_signal_dictionary(recording,full_leads)
 
     gain_index = 0
@@ -95,27 +92,39 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
             if(len(record_dict[key][start:])<int(rate*abs_lead_step)):
                 end_flag = True
                 nanArray = np.empty(len(record_dict[key][start:]))
-                nanArray[:] = np.nan
+                if mask_unplotted_samples:
+                    nanArray[:] = np.nan
+                else:
+                    nanArray[:] = record_dict[key][start:]
                 if(full_mode!='None' and key==full_mode):
-                    segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
+                    if 'full'+full_mode not in segmented_ecg_data.keys():
+                        segmented_ecg_data['full'+full_mode] = nanArray.tolist()
+                    else:
+                        segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
                 if(key!='full'+full_mode):
-                    segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
+                    if key not in segmented_ecg_data.keys():
+                        segmented_ecg_data[key] = nanArray.tolist()
+                    else:
+                        segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
             else:
-                shilftedStart = start
+                shiftedStart = start
                 if columns == 4 and key in format_4_by_3[1]:
-                    shilftedStart = start + int(rate*lead_length_in_seconds)
+                    shiftedStart = start + int(rate*lead_length_in_seconds)
                 elif columns == 4 and key in format_4_by_3[2]:
-                    shilftedStart = start + int(2*rate*lead_length_in_seconds)
+                    shiftedStart = start + int(2*rate*lead_length_in_seconds)
                 elif columns == 4 and key in format_4_by_3[3]:
-                    shilftedStart = start + int(3*rate*lead_length_in_seconds)
-                end = shilftedStart + int(rate*lead_length_in_seconds)
+                    shiftedStart = start + int(3*rate*lead_length_in_seconds)
+                end = shiftedStart + int(rate*lead_length_in_seconds)
 
                 if(key!='full'+full_mode):
-                    frame[key] = samples_to_volts(record_dict[key][shilftedStart:end],adc[gain_index])
+                    frame[key] = samples_to_volts(record_dict[key][shiftedStart:end],adc[gain_index])
                     frame[key] = center_function(frame[key])
 
-                    nanArray = np.empty((int(shilftedStart - start)))
-                    nanArray[:] = np.nan
+                    nanArray = np.empty((int(shiftedStart - start)))
+                    if mask_unplotted_samples:
+                        nanArray[:] = np.nan
+                    else:
+                        nanArray[:] = record_dict[key][start: shiftedStart]
                     if columns == 4 and key not in format_4_by_3[0]:
                         if key not in segmented_ecg_data.keys():
                             segmented_ecg_data[key] = nanArray.tolist()
@@ -126,8 +135,12 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
                     else:
                         segmented_ecg_data[key] = segmented_ecg_data[key] + frame[key].tolist()
 
-                    nanArray = np.empty((int(abs_lead_step*rate - (end - shilftedStart) - (shilftedStart - start))))
-                    nanArray[:] = np.nan
+                    nanArray = np.empty((int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))))
+                    nanArray_len = int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))
+                    if mask_unplotted_samples:
+                        nanArray[:] = np.nan
+                    else:
+                        nanArray[:] = record_dict[key][end: end+nanArray_len]
                     segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
                 if(full_mode!='None' and key==full_mode):
                     if(len(record_dict[key][start:])>int(rate*10)):
@@ -157,27 +170,41 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
                 if(len(record_dict[key][start:])<int(rate*abs_lead_step)):
                     end_flag = True
                     nanArray = np.empty(len(record_dict[key][start:]))
-                    nanArray[:] = np.nan
+                    if mask_unplotted_samples:
+                        nanArray[:] = np.nan
+                    else:
+                        nanArray[:] = record_dict[key][start:]
+
                     if(full_mode!='None' and key==full_mode):
-                        segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
+                        if 'full'+full_mode not in segmented_ecg_data.keys():
+                            segmented_ecg_data['full'+full_mode] = nanArray.tolist()
+                        else:
+                            segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
                     if(key!='full'+full_mode):
-                        segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
+                        if key not in segmented_ecg_data.keys():
+                            segmented_ecg_data[key] = nanArray.tolist()
+                        else:
+                            segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
                 else:
-                    shilftedStart = start
+                    shiftedStart = start
                     if columns == 4 and key in format_4_by_3[1]:
-                        shilftedStart = start + int(rate*lead_length_in_seconds)
+                        shiftedStart = start + int(rate*lead_length_in_seconds)
                     elif columns == 4 and key in format_4_by_3[2]:
-                        shilftedStart = start + int(2*rate*lead_length_in_seconds)
+                        shiftedStart = start + int(2*rate*lead_length_in_seconds)
                     elif columns == 4 and key in format_4_by_3[3]:
-                        shilftedStart = start + int(3*rate*lead_length_in_seconds)
-                    end = shilftedStart + int(rate*lead_length_in_seconds)
+                        shiftedStart = start + int(3*rate*lead_length_in_seconds)
+                    end = shiftedStart + int(rate*lead_length_in_seconds)
 
                     if(key!='full'+full_mode):
-                        frame[key] = samples_to_volts(record_dict[key][shilftedStart:end],adc[gain_index])
+                        frame[key] = samples_to_volts(record_dict[key][shiftedStart:end],adc[gain_index])
                         frame[key] = center_function(frame[key])
 
-                        nanArray = np.empty((int(shilftedStart - start)))
-                        nanArray[:] = np.nan
+                        nanArray = np.empty((int(shiftedStart - start)))
+                        if mask_unplotted_samples:
+                            nanArray[:] = np.nan
+                        else:
+                            nanArray[:] = record_dict[key][start: shiftedStart]
+
                         if columns == 4 and key not in format_4_by_3[0]:
                             if key not in segmented_ecg_data.keys():
                                 segmented_ecg_data[key] = nanArray.tolist()
@@ -188,8 +215,13 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
                         else:
                             segmented_ecg_data[key] = segmented_ecg_data[key] + frame[key].tolist()
 
-                        nanArray = np.empty((int(abs_lead_step*rate - (end - shilftedStart) - (shilftedStart - start))))
-                        nanArray[:] = np.nan
+                        nanArray = np.empty((int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))))
+                        nanArray_len = int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))
+                        if mask_unplotted_samples:
+                            nanArray[:] = np.nan
+                        else:
+                            nanArray[:] = record_dict[key][end: end+nanArray_len]
+
                         segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
                     if(full_mode!='None' and key==full_mode):
                         if(len(record_dict[key][start:])>int(rate*10)):
@@ -213,7 +245,7 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
     outfile_array = []
 
     name, ext = os.path.splitext(full_header_file)
-    write_wfdb_file(segmented_ecg_data, name, rate, header_file, output_directory, full_mode)
+    write_wfdb_file(segmented_ecg_data, name, rate, header_file, output_directory, full_mode, mask_unplotted_samples)
 
     for i in range(len(ecg_frame)):
         dc = add_dc_pulse.rvs()

diff --git a/codes/ecg-image-generator/gen_ecg_image_from_data.py b/codes/ecg-image-generator/gen_ecg_image_from_data.py
@@ -27,6 +27,7 @@ def get_parser():
     parser.add_argument('-ph','--print_header',action="store_true",default=False)
     parser.add_argument('--num_columns',type=int,default = -1)
     parser.add_argument('--full_mode', type=str,default='II')
+    parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)
 
     parser.add_argument('-l', '--link', type=str, required=False,default='https://www.physionet.org/content/ptbdb/1.0.0/')
     parser.add_argument('-n','--num_words',type=int,required=False,default=5)
@@ -121,7 +122,7 @@ def run_single_file(args):
         else:
             standard_colours = False
 
-        out_array = get_paper_ecg(input_file=filename,header_file=header, start_index=args.start_index, store_configs=args.store_config, store_text_bbox=args.store_text_bounding_box, output_directory=args.output_directory,resolution=resolution,papersize=papersize,add_lead_names=lead,add_dc_pulse=bernoulli_dc,add_bw=bernoulli_bw,show_grid=bernoulli_grid,add_print=bernoulli_add_print,pad_inches=padding,font_type=font,standard_colours=standard_colours,full_mode=args.full_mode,bbox = args.bbox, columns = args.num_columns, seed=args.seed)
+        out_array = get_paper_ecg(input_file=filename,header_file=header, mask_unplotted_samples=args.mask_unplotted_samples, start_index=args.start_index, store_configs=args.store_config, store_text_bbox=args.store_text_bounding_box, output_directory=args.output_directory,resolution=resolution,papersize=papersize,add_lead_names=lead,add_dc_pulse=bernoulli_dc,add_bw=bernoulli_bw,show_grid=bernoulli_grid,add_print=bernoulli_add_print,pad_inches=padding,font_type=font,standard_colours=standard_colours,full_mode=args.full_mode,bbox = args.bbox, columns = args.num_columns, seed=args.seed)
 
         for out in out_array:
             if(args.fully_random):
@@ -172,4 +173,7 @@ def run_single_file(args):
         return len(out_array)
 
 if __name__=='__main__':
+    path = os.path.join(os.getcwd(), sys.argv[0])
+    parentPath = os.path.dirname(path)
+    os.chdir(parentPath)
     run_single_file(get_parser().parse_args(sys.argv[1:]))
diff --git a/codes/ecg-image-generator/gen_ecg_images_from_data_batch.py b/codes/ecg-image-generator/gen_ecg_images_from_data_batch.py
@@ -25,6 +25,7 @@ def get_parser():
     parser.add_argument('-ph','--print_header', action="store_true",default=False)
     parser.add_argument('--num_columns',type=int,default = -1)
     parser.add_argument('--full_mode', type=str,default='II')
+    parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)
 
     parser.add_argument('-l', '--link', type=str, required=False,default='')
     parser.add_argument('-n','--num_words',type=int,required=False,default=5)
@@ -102,11 +103,13 @@ def run(args):
 
             folder_struct_list = full_header_file.split('/')[:-1]
             args.output_directory = os.path.join(original_output_dir, '/'.join(folder_struct_list))
-
             i += run_single_file(args)
 
             if(args.num_images != -1 and i >= args.num_images):
                 break
 
 if __name__=='__main__':
+    path = os.path.join(os.getcwd(), sys.argv[0])
+    parentPath = os.path.dirname(path)
+    os.chdir(parentPath)
     run(get_parser().parse_args(sys.argv[1:]))
diff --git a/codes/ecg-image-generator/helper_functions.py b/codes/ecg-image-generator/helper_functions.py
@@ -250,7 +250,7 @@ def convert_inches_to_volts(inches):
 def convert_inches_to_seconds(inches):
     return float(inches*1.016)
 
-def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode):
+def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode, mask_unplotted_samples):
     full_header = load_header(header_file)
     full_leads = get_leads(full_header)
     full_leads = standardize_leads(full_leads)
@@ -289,3 +289,10 @@ def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode
         for line in header.comments:
             f.write("#" + line)
             f.write("\n")
+
+        if mask_unplotted_samples:
+            f.write("#mask_unplotted_samples: True")
+            f.write("\n")
+        else:
+            f.write("#mask_unplotted_samples: False")
+            f.write("\n")
diff --git a/codes/ecg-image-generator/requirements.txt b/codes/ecg-image-generator/requirements.txt
@@ -2,19 +2,20 @@ beautifulsoup4==4.12.2
 imageio==2.9.0
 imgaug==0.4.0
 imutils==0.5.4
-numpy==1.24.3
-keras==2.13.1
+keras==2.14.0
 opencv_python==4.6.0.66
-pandas==1.4.1
-Pillow==9.5.0
+pillow==9.5.0
 requests==2.21.0
 scikit-image==0.20.0
 scikit-learn==1.3.2
-scipy==1.9.1
+scipy==1.10.0
 seaborn==0.12.2
 spacy==3.0.8
-tensorflow==2.13.0
+tensorflow==2.14.0
 validators==0.18.2
-wfdb==3.4.1
-matplotlib
-html5lib==1.1
+matplotlib==3.8.3
+html5lib==1.1
+joblib==1.3.2
+numpy==1.26.2
+pandas==1.5.3
+wfdb==4.1.2