Skip to content

Commit 2bb1bbf

Browse files
committed
metric doc updated. svg support for plot subtool
1 parent efecfe9 commit 2bb1bbf

File tree

5 files changed

+85
-38
lines changed

5 files changed

+85
-38
lines changed

docs/metric.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ To run `metric` refer [here](commands.md/#metric).
55
| Column name | Description |
66
|-----------------------------|----------------------------------------------------------------|
77
| read_id | read_id of the raw signal |
8-
| region | statistics were calculated within this region |
8+
| ref_region | statistics were calculated within this region |
9+
| signal_region | aligned signal region |
910
| total_matches | total number of base to base matches |
1011
| total_deletion_occurrences | total number of deletion occurrences |
1112
| total_insertion_occurrences | total number of insertion occurrences |
@@ -29,3 +30,6 @@ To run `metric` refer [here](commands.md/#metric).
2930
| median_insertion | median of the number of samples in an insertion |
3031
| mean_insertion | mean of the number of samples in an insertion |
3132
| stdev_insertion | standard deviation of the number of samples in an insertion |
33+
| matches | matches array |
34+
| deletions | deletions array |
35+
| insertions | insertions array |

src/metric.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ def get_metric(args, fout, metric_record, read_id, signal_tuple, sig_algn_data,
5050
match_samples = []
5151
insertion_samples = []
5252
deletion_bases = []
53-
53+
ss_string = []
5454
for i in moves:
55+
ss_string.append(i)
5556
previous_x_coordinate = x_coordinate
5657
if 'D' in i:
5758
i = re.sub('D', '', i)
@@ -86,13 +87,16 @@ def get_metric(args, fout, metric_record, read_id, signal_tuple, sig_algn_data,
8687
break
8788

8889
if sig_algn_data["data_is_rna"] == 1:
89-
region_str = "{}:{}-{}".format(sig_algn_data["ref_name"], sig_algn_data["ref_end"], sig_algn_data["ref_end"] - base_index+1)
90+
ref_region = "{}:{}-{}".format(sig_algn_data["ref_name"], sig_algn_data["ref_end"], sig_algn_data["ref_end"] - base_index+1)
91+
signal_region = "{}-{}".format(x_real[0], x_real[x_coordinate - 1])
9092
# plot_title = f'{sig_algn_data["tag_name"]}[{sig_algn_data["ref_end"]:,}-{sig_algn_data["ref_end"] - base_index+1:,}]{indt}signal: [{int(x_real[0])}-{int(x_real[x_coordinate - 1])}]{indt}deletions(bases): {total_length_deletions} insertions(samples): {total_length_insertions}{indt}{read_id}{indt}signal dir:{draw_data["sig_dir"]}'
9193
else:
92-
region_str = "{}:{}-{}".format(sig_algn_data["ref_name"], sig_algn_data["ref_start"], sig_algn_data["ref_start"] + base_index-1)
94+
ref_region = "{}:{}-{}".format(sig_algn_data["ref_name"], sig_algn_data["ref_start"], sig_algn_data["ref_start"] + base_index-1)
95+
signal_region = "{}-{}".format(x_real[0], x_real[x_coordinate - 1])
9396
# plot_title = f'{sig_algn_data["tag_name"]}[{sig_algn_data["ref_start"]:,}-{sig_algn_data["ref_start"] + base_index-1:,}]{indt}signal: [{int(x_real[0])}-{int(x_real[x_coordinate - 1])}]{indt}deletions(bases): {total_length_deletions} insertions(samples): {total_length_insertions}{indt}{read_id}{indt}signal dir:{draw_data["sig_dir"]}'
9497

95-
metric_record['region'] = region_str
98+
metric_record['ref_region'] = ref_region
99+
metric_record['signal_region'] = signal_region
96100
metric_record['total_matches'] = base_index - total_length_deletions
97101
metric_record['total_deletion_occurrences'] = len(deletion_bases)
98102
metric_record['total_insertion_occurrences'] = len(insertion_samples)
@@ -141,9 +145,10 @@ def get_metric(args, fout, metric_record, read_id, signal_tuple, sig_algn_data,
141145
metric_record['mean_insertion'] = "."
142146
metric_record['stdev_insertion'] = "."
143147

144-
fout.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
148+
fout.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
145149
metric_record['read_id'],
146-
metric_record['region'],
150+
metric_record['ref_region'],
151+
metric_record['signal_region'],
147152
metric_record['total_matches'],
148153
metric_record['total_deletion_occurrences'],
149154
metric_record['total_insertion_occurrences'],
@@ -174,6 +179,9 @@ def get_metric(args, fout, metric_record, read_id, signal_tuple, sig_algn_data,
174179
deletion_bases,
175180
insertion_samples))
176181

182+
if args.extend_0:
183+
fout.write("\t{}".format(ss_string))
184+
177185
fout.write("\n")
178186

179187
def run(args):
@@ -256,14 +264,16 @@ def run(args):
256264
if args.profile != "":
257265
kmer_correction = -1*(plot_utils.search_for_profile_base_shift(args.profile)[0] + plot_utils.search_for_profile_base_shift(args.profile)[1])
258266

259-
metric_header = ("read_id\tregion\t"
267+
metric_header = ("read_id\tref_region\tsignal_region\t"
260268
"total_matches\ttotal_deletion_occurrences\ttotal_insertion_occurrences\t"
261269
"total_length_deletions\ttotal_length_insertions\t"
262270
"min_match\tmax_match\tmode_match\tmedian_matches\tmean_matches\tstdev_matches\t"
263271
"min_deletion\tmax_deletion\tmode_deletion\tmedian_deletion\tmean_deletion\tstdev_deletion\t"
264272
"min_insertion\tmax_insertion\tmode_insertion\tmedian_insertion\tmean_insertion\tstdev_insertion")
265273
if args.extend_0:
266274
metric_header += "\tmatches\tdeletions\tinsertions"
275+
if args.extend_1:
276+
metric_header += "\tss_string"
267277
fout.write("{}\n".format(metric_header))
268278
if use_paf == 1 and plot_sig_ref_flag == 0:
269279
print("Info: Signal to read method using PAF ...")
@@ -343,7 +353,6 @@ def run(args):
343353

344354
# print("plot region: {}-{}\tread_id: {}".format(ref_start, ref_end, read_id))
345355
metric_record['read_id'] = read_id
346-
metric_record['region'] = "{}:{}-{}".format("", ref_start, ref_end)
347356

348357
x = []
349358
x_real = []
@@ -542,7 +551,6 @@ def run(args):
542551
raise Exception("Error: base characters other than A,C,G,T/U,M,R,W,S,Y,K,V,H,D,B,N were detected. Please check your sequence files")
543552

544553
metric_record['read_id'] = read_id
545-
metric_record['region'] = "{}:{}-{}".format(ref_name, ref_start, ref_end)
546554

547555
x = []
548556
x_real = []
@@ -749,7 +757,6 @@ def run(args):
749757
raise Exception("Error: base characters other than A,C,G,T/U,M,R,W,S,Y,K,V,H,D,B,N were detected. Please check your sequence files")
750758

751759
metric_record['read_id'] = read_id
752-
metric_record['region'] = "{}:{}-{}".format(ref_name, ref_start, ref_end)
753760

754761
x = []
755762
x_real = []
@@ -875,6 +882,7 @@ def argparser():
875882
parser.add_argument('--sig_plot_limit', required=False, type=int, default=SIG_PLOT_LENGTH, help="maximum number of signal samples to plot")
876883
parser.add_argument('-o', '--output', required=False, type=str, default="", help="output file")
877884
parser.add_argument('--extend_0', required=False, action='store_true', help="print matches, deletions, insertions arrays")
885+
parser.add_argument('--extend_1', required=False, action='store_true', help="print ss string for the given region")
878886
return parser
879887

880888
if __name__ == "__main__":

src/plot.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from bokeh.models import BoxAnnotation, HoverTool, WheelZoomTool, ColumnDataSource, Label, LabelSet, Segment, Toggle, Range1d, FreehandDrawTool, CustomJS
99
from bokeh.layouts import row
1010
from bokeh.colors import RGB
11+
from bokeh.io import export_svg, export_svgs
1112
import pyslow5
1213
import copy
1314
import argparse
@@ -185,14 +186,14 @@ def plot_function(p, read_id, signal_tuple, sig_algn_data, fasta_sequence, base_
185186
toggle_moves.js_link('active', move_annotation_labels, 'visible')
186187

187188
source = ColumnDataSource(data=dict(x=x[:x_coordinate], y=y[:x_coordinate], x_real=x_real[:x_coordinate]))
188-
p.quad(top=y_max, bottom=y_min, left=base_box_details['left'], right=base_box_details['right'], color=base_box_details['fill_color'], alpha=0.75)
189+
p.quad(top=y_max, bottom=y_min, left=base_box_details['left'], right=base_box_details['right'], color=base_box_details['fill_color'], alpha=0.75, visible=draw_data['no_colours'])
189190
p.add_glyph(line_segment_source, glyph)
190191
p.add_layout(base_annotation_labels)
191192
p.add_layout(move_annotation_labels)
192193

193194
p.line('x', 'y', name="sig_plot_line", line_width=2, source=source)
194195
# add a circle renderer with a size, color, and alpha
195-
sample_labels = p.circle(x[:x_coordinate], y[:x_coordinate], radius=draw_data["point_size"], color=sample_label_colors, alpha=0.5)
196+
sample_labels = p.circle(x[:x_coordinate], y[:x_coordinate], radius=draw_data["point_size"], color=sample_label_colors, alpha=0.5, visible=draw_data['no_samples'])
196197
toggle_samples = Toggle(label="samples", button_type="danger", active=True, height=30, width=60)
197198
toggle_samples.js_link('active', sample_labels, 'visible')
198199

@@ -210,27 +211,26 @@ def plot_function(p, read_id, signal_tuple, sig_algn_data, fasta_sequence, base_
210211
p.title = plot_title
211212

212213
if location_plot > (y_max - y_min):
213-
if location_plot > PLOT_X_RANGE:
214-
p.x_range = Range1d(0, PLOT_X_RANGE, bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
214+
if location_plot > draw_data['xrange']:
215+
p.x_range = Range1d(0, draw_data['xrange'], bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
215216
else:
216217
p.x_range = Range1d(0, location_plot, bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
217218

218219
renderer = p.multi_line([[1, 1]], [[1, 1]], line_width=4, alpha=0.4, color='black')
219220
draw_tool = FreehandDrawTool(renderers=[renderer], num_objects=50)
220221
p.add_tools(draw_tool)
221222

222-
x_callback_base_annotation = CustomJS(args=dict(base_annotation_labels=base_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=PLOT_X_RANGE), code="""
223+
x_callback_base_annotation = CustomJS(args=dict(base_annotation_labels=base_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=draw_data['xrange']), code="""
223224
let xzoom = (init_font_size * init_xrange) / (cb_obj.end - cb_obj.start);
224225
base_annotation_labels['text_font_size'] = String(xzoom) + 'pt';
225226
""")
226227
p.x_range.js_on_change('start', x_callback_base_annotation)
227228

228-
x_callback_move_annotation = CustomJS(args=dict(move_annotation_labels=move_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=PLOT_X_RANGE), code="""
229+
x_callback_move_annotation = CustomJS(args=dict(move_annotation_labels=move_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=draw_data['xrange']), code="""
229230
let xzoom = (init_font_size * init_xrange) / (cb_obj.end - cb_obj.start);
230231
move_annotation_labels['text_font_size'] = String(xzoom) + 'pt';
231232
""")
232233
p.x_range.js_on_change('start', x_callback_move_annotation)
233-
234234
layout_ = p, row(toggle_bases, toggle_samples, toggle_moves)
235235
return layout_
236236
def plot_function_fixed_width(p, read_id, signal_tuple, sig_algn_data, fasta_sequence, base_limit, draw_data):
@@ -389,14 +389,14 @@ def plot_function_fixed_width(p, read_id, signal_tuple, sig_algn_data, fasta_seq
389389
fixed_width_x = fixed_width_x[1:]
390390

391391
source = ColumnDataSource(data=dict(x=fixed_width_x[:x_coordinate], y=y[:x_coordinate], x_real=x_real[:x_coordinate]))
392-
p.quad(top=y_max, bottom=y_min, left=base_box_details['left'], right=base_box_details['right'], color=base_box_details['fill_color'], alpha=0.75)
392+
p.quad(top=y_max, bottom=y_min, left=base_box_details['left'], right=base_box_details['right'], color=base_box_details['fill_color'], alpha=0.75, visible=draw_data['no_colours'])
393393
p.add_glyph(line_segment_source, glyph)
394394
p.add_layout(base_annotation_labels)
395395
p.add_layout(move_annotation_labels)
396396

397397
p.line('x', 'y', name="sig_plot_line", line_width=2, source=source)
398398
# add a circle renderer with a size, color, and alpha
399-
sample_labels = p.circle(fixed_width_x[:x_coordinate], y[:x_coordinate], radius=draw_data["point_size"], color=sample_label_colors, alpha=0.5)
399+
sample_labels = p.circle(fixed_width_x[:x_coordinate], y[:x_coordinate], radius=draw_data["point_size"], color=sample_label_colors, alpha=0.5, visible=draw_data['no_samples'])
400400
toggle_samples = Toggle(label="samples", button_type="danger", active=True, height=30, width=60)
401401
toggle_samples.js_link('active', sample_labels, 'visible')
402402

@@ -414,8 +414,8 @@ def plot_function_fixed_width(p, read_id, signal_tuple, sig_algn_data, fasta_seq
414414
p.title = plot_title
415415

416416
if location_plot > (y_max - y_min):
417-
if location_plot > PLOT_X_RANGE:
418-
p.x_range = Range1d(0, PLOT_X_RANGE, bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
417+
if location_plot > draw_data['xrange']:
418+
p.x_range = Range1d(0, draw_data['xrange'], bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
419419
else:
420420
p.x_range = Range1d(0, location_plot, bounds=(-1*PLOT_X_PADDING, location_plot+PLOT_X_PADDING))
421421
# else:
@@ -425,13 +425,13 @@ def plot_function_fixed_width(p, read_id, signal_tuple, sig_algn_data, fasta_seq
425425
draw_tool = FreehandDrawTool(renderers=[renderer], num_objects=50)
426426
p.add_tools(draw_tool)
427427

428-
x_callback_base_annotation = CustomJS(args=dict(base_annotation_labels=base_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=PLOT_X_RANGE), code="""
428+
x_callback_base_annotation = CustomJS(args=dict(base_annotation_labels=base_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=draw_data['xrange']), code="""
429429
let xzoom = (init_font_size * init_xrange) / (cb_obj.end - cb_obj.start);
430430
base_annotation_labels['text_font_size'] = String(xzoom) + 'pt';
431431
""")
432432
p.x_range.js_on_change('start', x_callback_base_annotation)
433433

434-
x_callback_move_annotation = CustomJS(args=dict(move_annotation_labels=move_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=PLOT_X_RANGE), code="""
434+
x_callback_move_annotation = CustomJS(args=dict(move_annotation_labels=move_annotation_labels, init_font_size=base_annotation_labels.text_font_size[:-2], init_xrange=draw_data['xrange']), code="""
435435
let xzoom = (init_font_size * init_xrange) / (cb_obj.end - cb_obj.start);
436436
move_annotation_labels['text_font_size'] = String(xzoom) + 'pt';
437437
""")
@@ -516,6 +516,9 @@ def run(args):
516516
indt = "\t\t\t\t\t\t\t\t"
517517
draw_data = {}
518518
draw_data["point_size"] = args.point_size
519+
draw_data["no_colours"] = args.no_colours
520+
draw_data["no_samples"] = args.no_samples
521+
draw_data["xrange"] = args.xrange
519522
draw_data["sig_plot_limit"] = args.sig_plot_limit
520523
draw_data["fixed_base_width"] = args.base_width
521524
draw_data["plot_dims"] = {}
@@ -619,8 +622,6 @@ def run(args):
619622

620623
print("plot region: {}-{}\tread_id: {}".format(ref_start, ref_end, read_id))
621624

622-
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name + ".html"
623-
624625
x = []
625626
x_real = []
626627
y = []
@@ -709,8 +710,15 @@ def run(args):
709710
else:
710711
layout_ = plot_function(p=p, read_id=read_id, signal_tuple=signal_tuple, sig_algn_data=sig_algn_dic, fasta_sequence=fasta_seq, base_limit=base_limit, draw_data=draw_data)
711712

712-
output_file(output_file_name, title=read_id)
713-
save(layout_)
713+
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name
714+
if args.save_svg:
715+
output_file_name += ".svg"
716+
layout_[0].output_backend = "svg"
717+
export_svgs(layout_, filename=output_file_name)
718+
else:
719+
output_file_name += ".html"
720+
output_file(output_file_name, title=read_id)
721+
save(layout_)
714722
print(f'output file: {os.path.abspath(output_file_name)}')
715723

716724
num_plots += 1
@@ -835,8 +843,6 @@ def run(args):
835843
if not bool(re.match('^[ACGTUMRWSYKVHDBN]+$', fasta_seq)):
836844
raise Exception("Error: base characters other than A,C,G,T/U,M,R,W,S,Y,K,V,H,D,B,N were detected. Please check your sequence files")
837845

838-
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name + ".html"
839-
840846
x = []
841847
x_real = []
842848
y = []
@@ -934,8 +940,15 @@ def run(args):
934940
else:
935941
layout_ = plot_function(p=p, read_id=read_id, signal_tuple=signal_tuple, sig_algn_data=sig_algn_dic, fasta_sequence=fasta_seq, base_limit=base_limit, draw_data=draw_data)
936942

937-
output_file(output_file_name, title=read_id)
938-
save(layout_)
943+
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name
944+
if args.save_svg:
945+
output_file_name += ".svg"
946+
layout_.output_backend = "svg"
947+
export_svgs(layout_, filename=output_file_name)
948+
else:
949+
output_file_name += ".html"
950+
output_file(output_file_name, title=read_id)
951+
save(layout_)
939952
print(f'output file: {os.path.abspath(output_file_name)}')
940953

941954
num_plots += 1
@@ -1055,8 +1068,6 @@ def run(args):
10551068
if not bool(re.match('^[ACGTUMRWSYKVHDBN]+$', fasta_seq)):
10561069
raise Exception("Error: base characters other than A,C,G,T/U,M,R,W,S,Y,K,V,H,D,B,N were detected. Please check your sequence files")
10571070

1058-
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name + ".html"
1059-
10601071
x = []
10611072
x_real = []
10621073
y = []
@@ -1151,9 +1162,17 @@ def run(args):
11511162
else:
11521163
layout_ = plot_function(p=p, read_id=read_id, signal_tuple=signal_tuple, sig_algn_data=sig_algn_dic, fasta_sequence=fasta_seq, base_limit=base_limit, draw_data=draw_data)
11531164

1154-
output_file(output_file_name, title=read_id)
1155-
save(layout_)
1165+
output_file_name = args.output_dir + "/" + read_id + "_" + args.tag_name
1166+
if args.save_svg:
1167+
output_file_name += ".svg"
1168+
layout_.output_backend = "svg"
1169+
export_svgs(layout_, filename=output_file_name)
1170+
else:
1171+
output_file_name += ".html"
1172+
output_file(output_file_name, title=read_id)
1173+
save(layout_)
11561174
print(f'output file: {os.path.abspath(output_file_name)}')
1175+
11571176
num_plots += 1
11581177
if num_plots == args.plot_limit:
11591178
break
@@ -1196,6 +1215,10 @@ def argparser():
11961215
parser.add_argument('--sig_plot_limit', required=False, type=int, default=SIG_PLOT_LENGTH, help="maximum number of signal samples to plot")
11971216
parser.add_argument('--bed', required=False, help="bed file with annotations")
11981217
parser.add_argument('--print_bed_labels', required=False, action='store_true', help="draw bed annotations with labels")
1218+
parser.add_argument('--no_colours', required=False, action='store_false', help="hide base colours")
1219+
parser.add_argument('--no_samples', required=False, action='store_false', help="hide sample points")
1220+
parser.add_argument('--save_svg', required=False, action='store_true', help="save as svg. tweak --region and --xrange to capture the necessary part of the plot")
1221+
parser.add_argument('--xrange', required=False, type=int, default=PLOT_X_RANGE, help="initial x range")
11991222
parser.add_argument('-o', '--output_dir', required=True, type=str, default="", help="output dir")
12001223
return parser
12011224

0 commit comments

Comments
 (0)