Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
rsameni committed Apr 11, 2024
2 parents 36dabf5 + 56b907f commit fd13a63
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 45 deletions.
5 changes: 3 additions & 2 deletions codes/ecg-image-generator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The basic mode of the tool creates ECG images without distortions. The mode of o
- `--print_header`: Add text from header file on all the generated images; default: False
- `--num_columns` : Number of columns of the ECG leads. The default(-1) will plot a single column for 2 lead data and 4 columns for the 12 or any other number of lead data. Default: -1; type: int
- `--full_mode`: Sets the lead to add at the bottom of the paper ECG as a long strip obtained from the WFDB record's `.hea` header file, if the lead II is not available plots the first lead from the header file; default: `'II'`; type: str
- `--mask_unplotted_samples`: Mask the samples not plotted in the images in the generated WFDB signal file; default: False. For example: for the 3x4 format, the code plots 2.5 seconds of each lead on the image and saves the complete signal in the WFDB file. If the flag is set, the code will mask the part of the signal not plotted in the image (In this case, t > 2.5seconds) with Nan values in the modified WFDB file.
- `--num_images`: Number of ECG images to be generated; default: all files in the input directory; type: int
- `--deterministic_lead`: Remove lead names from all generated images, default=False.
- `--random_resolution`: Generate random resolutions of images, if True resolution is randomly picked from the range [50, `-r`] else every image is generated at the `-r` resolution; default: False
Expand Down Expand Up @@ -246,8 +247,8 @@ Please include references to the following articles in any publications:
2. ECG-Image-Kit: A Toolkit for Synthesis, Analysis, and Digitization of Electrocardiogram Images, (2024). URL: https://github.com/alphanumericslab/ecg-image-kit
## Contributors
- Kshama Kodthalu Shivashankara, School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, US
- Deepanshi, Department of Biomedical Informatics, Emory University, GA, US
- Kshama Kodthalu Shivashankara, School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, US
- Matthew A Reyna, Department of Biomedical Informatics, Emory University, GA, US
- Gari D Clifford, Department of Biomedical Informatics, Emory University, GA, US
- Reza Sameni (contact person), Department of Biomedical Informatics, Emory University, GA, US
Expand All @@ -256,4 +257,4 @@ Please include references to the following articles in any publications:
Please direct any inquiries, bug reports or requests for joining the team to: [ecg-image-kit@dbmi.emory.edu](ecg-image-kit@dbmi.emory.edu).
![Static Badge](https://img.shields.io/badge/ecg_image-kit-blue)
![Static Badge](https://img.shields.io/badge/ecg_image-kit-blue)
94 changes: 63 additions & 31 deletions codes/ecg-image-generator/extract_leads.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
format_4_by_3 = [["I", "II", "III"], ["aVR", "aVL", "aVF", "AVR", "AVL", "AVF"], ["V1", "V2", "V3"], ["V4", "V5", "V6"]]

# Run script.
def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,add_bw,show_grid,add_print, start_index = -1, store_configs=False, store_text_bbox=True,key='val',resolution=100,units='inches',papersize='',add_lead_names=True,pad_inches=1,template_file=os.path.join('TemplateFiles','TextFile1.txt'),font_type=os.path.join('Fonts','Times_New_Roman.ttf'),standard_colours=5,full_mode='II',bbox = False,columns=-1):
def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,add_bw,show_grid, add_print, mask_unplotted_samples = False, start_index = -1, store_configs=False, store_text_bbox=True,key='val',resolution=100,units='inches',papersize='',add_lead_names=True,pad_inches=1,template_file=os.path.join('TemplateFiles','TextFile1.txt'),font_type=os.path.join('Fonts','Times_New_Roman.ttf'),standard_colours=5,full_mode='II',bbox = False,columns=-1):

# Extract a reduced-lead set from each pair of full-lead header and recording files.
full_header_file = header_file
Expand Down Expand Up @@ -70,9 +70,6 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
if(recording.shape[0]>recording.shape[1]):
recording = np.transpose(recording)

if recording.shape[1]/rate < 10:
return []

record_dict = create_signal_dictionary(recording,full_leads)

gain_index = 0
Expand All @@ -95,27 +92,39 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
if(len(record_dict[key][start:])<int(rate*abs_lead_step)):
end_flag = True
nanArray = np.empty(len(record_dict[key][start:]))
nanArray[:] = np.nan
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][start:]
if(full_mode!='None' and key==full_mode):
segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
if 'full'+full_mode not in segmented_ecg_data.keys():
segmented_ecg_data['full'+full_mode] = nanArray.tolist()
else:
segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
if(key!='full'+full_mode):
segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
if key not in segmented_ecg_data.keys():
segmented_ecg_data[key] = nanArray.tolist()
else:
segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
else:
shilftedStart = start
shiftedStart = start
if columns == 4 and key in format_4_by_3[1]:
shilftedStart = start + int(rate*lead_length_in_seconds)
shiftedStart = start + int(rate*lead_length_in_seconds)
elif columns == 4 and key in format_4_by_3[2]:
shilftedStart = start + int(2*rate*lead_length_in_seconds)
shiftedStart = start + int(2*rate*lead_length_in_seconds)
elif columns == 4 and key in format_4_by_3[3]:
shilftedStart = start + int(3*rate*lead_length_in_seconds)
end = shilftedStart + int(rate*lead_length_in_seconds)
shiftedStart = start + int(3*rate*lead_length_in_seconds)
end = shiftedStart + int(rate*lead_length_in_seconds)

if(key!='full'+full_mode):
frame[key] = samples_to_volts(record_dict[key][shilftedStart:end],adc[gain_index])
frame[key] = samples_to_volts(record_dict[key][shiftedStart:end],adc[gain_index])
frame[key] = center_function(frame[key])

nanArray = np.empty((int(shilftedStart - start)))
nanArray[:] = np.nan
nanArray = np.empty((int(shiftedStart - start)))
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][start: shiftedStart]
if columns == 4 and key not in format_4_by_3[0]:
if key not in segmented_ecg_data.keys():
segmented_ecg_data[key] = nanArray.tolist()
Expand All @@ -126,8 +135,12 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
else:
segmented_ecg_data[key] = segmented_ecg_data[key] + frame[key].tolist()

nanArray = np.empty((int(abs_lead_step*rate - (end - shilftedStart) - (shilftedStart - start))))
nanArray[:] = np.nan
nanArray = np.empty((int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))))
nanArray_len = int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][end: end+nanArray_len]
segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
if(full_mode!='None' and key==full_mode):
if(len(record_dict[key][start:])>int(rate*10)):
Expand Down Expand Up @@ -157,27 +170,41 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
if(len(record_dict[key][start:])<int(rate*abs_lead_step)):
end_flag = True
nanArray = np.empty(len(record_dict[key][start:]))
nanArray[:] = np.nan
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][start:]

if(full_mode!='None' and key==full_mode):
segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
if 'full'+full_mode not in segmented_ecg_data.keys():
segmented_ecg_data['full'+full_mode] = nanArray.tolist()
else:
segmented_ecg_data['full'+full_mode] = segmented_ecg_data['full'+full_mode] + nanArray.tolist()
if(key!='full'+full_mode):
segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
if key not in segmented_ecg_data.keys():
segmented_ecg_data[key] = nanArray.tolist()
else:
segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
else:
shilftedStart = start
shiftedStart = start
if columns == 4 and key in format_4_by_3[1]:
shilftedStart = start + int(rate*lead_length_in_seconds)
shiftedStart = start + int(rate*lead_length_in_seconds)
elif columns == 4 and key in format_4_by_3[2]:
shilftedStart = start + int(2*rate*lead_length_in_seconds)
shiftedStart = start + int(2*rate*lead_length_in_seconds)
elif columns == 4 and key in format_4_by_3[3]:
shilftedStart = start + int(3*rate*lead_length_in_seconds)
end = shilftedStart + int(rate*lead_length_in_seconds)
shiftedStart = start + int(3*rate*lead_length_in_seconds)
end = shiftedStart + int(rate*lead_length_in_seconds)

if(key!='full'+full_mode):
frame[key] = samples_to_volts(record_dict[key][shilftedStart:end],adc[gain_index])
frame[key] = samples_to_volts(record_dict[key][shiftedStart:end],adc[gain_index])
frame[key] = center_function(frame[key])

nanArray = np.empty((int(shilftedStart - start)))
nanArray[:] = np.nan
nanArray = np.empty((int(shiftedStart - start)))
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][start: shiftedStart]

if columns == 4 and key not in format_4_by_3[0]:
if key not in segmented_ecg_data.keys():
segmented_ecg_data[key] = nanArray.tolist()
Expand All @@ -188,8 +215,13 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
else:
segmented_ecg_data[key] = segmented_ecg_data[key] + frame[key].tolist()

nanArray = np.empty((int(abs_lead_step*rate - (end - shilftedStart) - (shilftedStart - start))))
nanArray[:] = np.nan
nanArray = np.empty((int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))))
nanArray_len = int(abs_lead_step*rate - (end - shiftedStart) - (shiftedStart - start))
if mask_unplotted_samples:
nanArray[:] = np.nan
else:
nanArray[:] = record_dict[key][end: end+nanArray_len]

segmented_ecg_data[key] = segmented_ecg_data[key] + nanArray.tolist()
if(full_mode!='None' and key==full_mode):
if(len(record_dict[key][start:])>int(rate*10)):
Expand All @@ -213,7 +245,7 @@ def get_paper_ecg(input_file,header_file,output_directory, seed, add_dc_pulse,ad
outfile_array = []

name, ext = os.path.splitext(full_header_file)
write_wfdb_file(segmented_ecg_data, name, rate, header_file, output_directory, full_mode)
write_wfdb_file(segmented_ecg_data, name, rate, header_file, output_directory, full_mode, mask_unplotted_samples)

for i in range(len(ecg_frame)):
dc = add_dc_pulse.rvs()
Expand Down
6 changes: 5 additions & 1 deletion codes/ecg-image-generator/gen_ecg_image_from_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def get_parser():
parser.add_argument('-ph','--print_header',action="store_true",default=False)
parser.add_argument('--num_columns',type=int,default = -1)
parser.add_argument('--full_mode', type=str,default='II')
parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)

parser.add_argument('-l', '--link', type=str, required=False,default='https://www.physionet.org/content/ptbdb/1.0.0/')
parser.add_argument('-n','--num_words',type=int,required=False,default=5)
Expand Down Expand Up @@ -121,7 +122,7 @@ def run_single_file(args):
else:
standard_colours = False

out_array = get_paper_ecg(input_file=filename,header_file=header, start_index=args.start_index, store_configs=args.store_config, store_text_bbox=args.store_text_bounding_box, output_directory=args.output_directory,resolution=resolution,papersize=papersize,add_lead_names=lead,add_dc_pulse=bernoulli_dc,add_bw=bernoulli_bw,show_grid=bernoulli_grid,add_print=bernoulli_add_print,pad_inches=padding,font_type=font,standard_colours=standard_colours,full_mode=args.full_mode,bbox = args.bbox, columns = args.num_columns, seed=args.seed)
out_array = get_paper_ecg(input_file=filename,header_file=header, mask_unplotted_samples=args.mask_unplotted_samples, start_index=args.start_index, store_configs=args.store_config, store_text_bbox=args.store_text_bounding_box, output_directory=args.output_directory,resolution=resolution,papersize=papersize,add_lead_names=lead,add_dc_pulse=bernoulli_dc,add_bw=bernoulli_bw,show_grid=bernoulli_grid,add_print=bernoulli_add_print,pad_inches=padding,font_type=font,standard_colours=standard_colours,full_mode=args.full_mode,bbox = args.bbox, columns = args.num_columns, seed=args.seed)

for out in out_array:
if(args.fully_random):
Expand Down Expand Up @@ -172,4 +173,7 @@ def run_single_file(args):
return len(out_array)

if __name__=='__main__':
path = os.path.join(os.getcwd(), sys.argv[0])
parentPath = os.path.dirname(path)
os.chdir(parentPath)
run_single_file(get_parser().parse_args(sys.argv[1:]))
5 changes: 4 additions & 1 deletion codes/ecg-image-generator/gen_ecg_images_from_data_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_parser():
parser.add_argument('-ph','--print_header', action="store_true",default=False)
parser.add_argument('--num_columns',type=int,default = -1)
parser.add_argument('--full_mode', type=str,default='II')
parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)

parser.add_argument('-l', '--link', type=str, required=False,default='')
parser.add_argument('-n','--num_words',type=int,required=False,default=5)
Expand Down Expand Up @@ -102,11 +103,13 @@ def run(args):

folder_struct_list = full_header_file.split('/')[:-1]
args.output_directory = os.path.join(original_output_dir, '/'.join(folder_struct_list))

i += run_single_file(args)

if(args.num_images != -1 and i >= args.num_images):
break

if __name__=='__main__':
path = os.path.join(os.getcwd(), sys.argv[0])
parentPath = os.path.dirname(path)
os.chdir(parentPath)
run(get_parser().parse_args(sys.argv[1:]))
9 changes: 8 additions & 1 deletion codes/ecg-image-generator/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def convert_inches_to_volts(inches):
def convert_inches_to_seconds(inches):
return float(inches*1.016)

def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode):
def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode, mask_unplotted_samples):
full_header = load_header(header_file)
full_leads = get_leads(full_header)
full_leads = standardize_leads(full_leads)
Expand Down Expand Up @@ -289,3 +289,10 @@ def write_wfdb_file(ecg_frame, filename, rate, header_file, write_dir, full_mode
for line in header.comments:
f.write("#" + line)
f.write("\n")

if mask_unplotted_samples:
f.write("#mask_unplotted_samples: True")
f.write("\n")
else:
f.write("#mask_unplotted_samples: False")
f.write("\n")
19 changes: 10 additions & 9 deletions codes/ecg-image-generator/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@ beautifulsoup4==4.12.2
imageio==2.9.0
imgaug==0.4.0
imutils==0.5.4
numpy==1.24.3
keras==2.13.1
keras==2.14.0
opencv_python==4.6.0.66
pandas==1.4.1
Pillow==9.5.0
pillow==9.5.0
requests==2.21.0
scikit-image==0.20.0
scikit-learn==1.3.2
scipy==1.9.1
scipy==1.10.0
seaborn==0.12.2
spacy==3.0.8
tensorflow==2.13.0
tensorflow==2.14.0
validators==0.18.2
wfdb==3.4.1
matplotlib
html5lib==1.1
matplotlib==3.8.3
html5lib==1.1
joblib==1.3.2
numpy==1.26.2
pandas==1.5.3
wfdb==4.1.2

0 comments on commit fd13a63

Please sign in to comment.