Merge branch 'publication' of github.com:rpgroup-pboc/vdj into public…

…ation
RPGroup-PBoC · May 19, 2020 · 948f5ac · 948f5ac
2 parents 7ccba79 + a7637f7
commit 948f5ac
Show file tree

Hide file tree

Showing 16 changed files with 372 additions and 102 deletions.
diff --git a/code/figures/Fig_point_mutants.py b/code/figures/Fig_point_mutants.py
diff --git a/code/figures/SiFig_bootstrap_mutant.py b/code/figures/SiFig_bootstrap_mutant.py
@@ -57,7 +57,7 @@
 
 fig, ax = plt.subplots(1, 1, figsize=(5, 5))
 ax.set_ylim([-0.01, 1.01])
-ax.plot(x, y, color='dodgerblue', lw=2)
+ax.step(x, y, color='dodgerblue', lw=2)
 ax.axvline(bs_df['loops_per_bead'].values[0], 0, 1.0, color='grey', lw=2)
 for percentile in col_names:
     ax.fill_betweenx(y_short, bs_df[percentile[0]].values[0], bs_df[[percentile[1]]].values[0],

diff --git a/code/figures/SiFig_coding_flanks.py b/code/figures/SiFig_coding_flanks.py
@@ -2,7 +2,7 @@
 Comparison of Coding Flank Effects and Critical SpacerC1A Mutant
 --------------------------------------------------------------------------------
 Author: Soichi Hirokawa
-Last Modified: September 25,2019
+Last Modified: January 7, 2020
 License: MIT
 
 Description
@@ -62,7 +62,7 @@
     dfs.append(dwell_dist)
 dwell_dist, cut_dist, unloop_dist = dfs
 
-
+#%%
 fig = plt.figure()
 ax = plt.gca()
 ax.get_xaxis().set_ticks([])
@@ -94,20 +94,26 @@
 gs2 = gridspec.GridSpec(1, 3)
 ax_dwell_cut = fig.add_subplot(gs2[0])
 ax_dwell_cut.set_xscale('log')
+ax_dwell_cut.text(18, 0.1, 'cutting\nonly',
+                horizontalalignment='center', fontsize=14)
 ax_dwell_cut.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_cut.set_ylabel('ECDF', fontsize=16)
 
 ax_dwell_unloop = fig.add_subplot(gs2[1])
 ax_dwell_unloop.set_xscale('log')
+ax_dwell_unloop.text(18, 0.1, 'unlooping\nonly',
+                horizontalalignment='center', fontsize=14)
 ax_dwell_unloop.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_unloop.set_yticklabels([])
 
 ax_dwell_all = fig.add_subplot(gs2[2])
 ax_dwell_all.set_xscale('log')
+ax_dwell_all.text(16, 0.1, 'cutting and\nunlooping',
+                horizontalalignment='center', fontsize=14)
 ax_dwell_all.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_all.set_yticklabels([])
 
-gs2.tight_layout(fig, rect=[0, 0, 1.5, 0.8])
+gs2.tight_layout(fig, rect=[0, 0, 1.56, 0.8])
 
 seqs = {'V4-57-1 (ref)':0.25, '12CodC6A':0.5}
 colors = {'V4-57-1 (ref)':'slategrey', '12CodC6A':'dodgerblue'}
@@ -126,13 +132,13 @@
         ax_conf_int.text(seqs[seq] + perc_widths[g] - 0.015, 0.05, str(int(g)) + '%',
                         ha='center', va='top')
 
-    ax_dwell_cut.plot(cut_dist[cut_dist['mutant']==seq]['x'], 
+    ax_dwell_cut.step(cut_dist[cut_dist['mutant']==seq]['x'], 
                         cut_dist[cut_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
-    ax_dwell_unloop.plot(unloop_dist[unloop_dist['mutant']==seq]['x'], 
+    ax_dwell_unloop.step(unloop_dist[unloop_dist['mutant']==seq]['x'], 
                         unloop_dist[unloop_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
-    ax_dwell_all.plot(dwell_dist[dwell_dist['mutant']==seq]['x'], 
+    ax_dwell_all.step(dwell_dist[dwell_dist['mutant']==seq]['x'], 
                         dwell_dist[dwell_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
     ax_posts.plot(cut_posts[cut_posts['mutant']==seq]['probability'],
@@ -162,7 +168,7 @@
 ax_conf_int.set_yticklabels([])
 ax_conf_int.set_facecolor('white')
 
-gs = gridspec.GridSpec(1, 3, hspace=0.2, wspace=0.4)
+gs = gridspec.GridSpec(1, 3, hspace=0.2, wspace=0.6)
 ax_loop = fig.add_subplot(gs[0])
 ax_loop.set_xticklabels([])
 ax_loop.set_xlim([0.125, 0.625])
@@ -178,21 +184,27 @@
 
 gs2 = gridspec.GridSpec(1, 3)
 ax_dwell_cut = fig.add_subplot(gs2[0])
+ax_dwell_cut.text(12, 0.1, 'cutting\nonly', 
+                horizontalalignment='center', fontsize=14)
 ax_dwell_cut.set_xscale('log')
 ax_dwell_cut.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_cut.set_ylabel('ECDF', fontsize=16)
 
 ax_dwell_unloop = fig.add_subplot(gs2[1])
+ax_dwell_unloop.text(12, 0.1, 'unlooping\nonly', 
+                horizontalalignment='center', fontsize=14)
 ax_dwell_unloop.set_xscale('log')
 ax_dwell_unloop.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_unloop.set_yticklabels([])
 
 ax_dwell_all = fig.add_subplot(gs2[2])
+ax_dwell_all.text(10, 0.1, 'cutting and\nunlooping',
+                horizontalalignment='center', fontsize=14)
 ax_dwell_all.set_xscale('log')
 ax_dwell_all.set_xlabel('dwell time [min]', fontsize=16)
 ax_dwell_all.set_yticklabels([])
 
-gs2.tight_layout(fig, rect=[0, 0, 1.5, 0.8])
+gs2.tight_layout(fig, rect=[0, 0, 1.55, 0.8])
 
 perc_widths = {p:w for p, w in zip(np.sort(loop['percentile'].unique()), np.arange(0.03, 0.21, 0.03))}
 
@@ -209,13 +221,13 @@
         ax_conf_int.text(seqs[seq] + perc_widths[g] - 0.015, 0.05, str(int(g)) + '%',
                         ha='center', va='top')
 
-    ax_dwell_cut.plot(cut_dist[cut_dist['mutant']==seq]['x'], 
+    ax_dwell_cut.step(cut_dist[cut_dist['mutant']==seq]['x'], 
                         cut_dist[cut_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
-    ax_dwell_unloop.plot(unloop_dist[unloop_dist['mutant']==seq]['x'], 
+    ax_dwell_unloop.step(unloop_dist[unloop_dist['mutant']==seq]['x'], 
                         unloop_dist[unloop_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
-    ax_dwell_all.plot(dwell_dist[dwell_dist['mutant']==seq]['x'], 
+    ax_dwell_all.step(dwell_dist[dwell_dist['mutant']==seq]['x'], 
                         dwell_dist[dwell_dist['mutant']==seq]['y'],
                         lw=2, color=colors[seq])
     ax_posts.plot(cut_posts[cut_posts['mutant']==seq]['probability'],
@@ -227,4 +239,6 @@
 
     ax_conf_int.text(seqs[seq] + 0.09, 0.65, seq, ha='center', va='bottom', fontsize=12)
 plt.savefig('../../figures/SiFig_spacerC1A_endogenous_cmparison.pdf', 
-            bbox_inches='tight', facecolor='white')
+            bbox_inches='tight', facecolor='white')
+
+# %%
diff --git a/code/figures/SubFig_reference_bootstrap.py b/code/figures/SubFig_reference_bootstrap.py
@@ -0,0 +1,80 @@
+"""
+Representative Looping Frequency Bootstrap and 95% Confidence Interval
+--------------------------------------------------------------------------------
+Author: Soichi Hirokawa
+Last Modified: January 7, 2020
+License: MIT
+
+Description
+--------------------------------------------------------------------------------
+This script generates the subfigure in the manuscript which shows a 
+representative bootstrap replicate distribution and the 95% confidence interval.
+
+Notes
+--------------------------------------------------------------------------------
+This script is designed to be executed from the `code/figures` directory and uses 
+a relative path to load the necessary CSV files.
+"""
+import numpy as np
+import pandas as pd
+import vdj.io
+import vdj.viz
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+vdj.viz.plotting_style()
+
+
+# Upload V4-57-1 sequence looping dataset
+data = pd.read_csv('../../data/compiled_looping_events.csv', comment='#')
+data = data[(data['mutant']=='WT12rss') & (data['hmgb1']==80) & 
+                (data['salt']=='Mg')]
+
+percentiles = [2.5, 97.5]
+col_names = ["bs_95_low", "bs_95_high"]
+bs_reps = int(1E6)
+bs_df = pd.DataFrame([])
+sampling = np.random.choice(data['n_loops'].values,size=(len(data), bs_reps),
+                            replace=True)
+loop_freq = np.sum(sampling, axis=0) / len(data)
+df_dict = {'mutant':'V4-57-1', 'salt':'Mg', 'hmgb1':80,
+            'n_loops':data['n_loops'].sum(), 'n_beads':len(data),
+            'loops_per_bead':data['n_loops'].sum() / len(data)}
+computed_percentiles = np.percentile(loop_freq, percentiles)
+for i,col in zip(computed_percentiles,col_names):
+    df_dict[col] = i
+
+bs_df = bs_df.append(df_dict, ignore_index=True)
+
+# Form ECDFs
+x = list(np.sort(loop_freq))
+y = list(np.arange(0, bs_reps, 1) / bs_reps)
+y_short = [-1, 2]
+text_perc = '95%'
+
+true_loops_val = y[x.index(bs_df['loops_per_bead'].values[0])]
+#%%
+fig, ax = plt.subplots(1, 1, figsize=(2,4))
+ax.set_xlim([-1, 250000])
+ax.hist(loop_freq, color='tomato', bins=20, zorder=10,
+        orientation='horizontal')
+ax.axhline(bs_df['loops_per_bead'].values[0], 0, true_loops_val, 
+            color='slategrey', ls='--', alpha=0.4, lw=2)
+
+ax.scatter(true_loops_val, bs_df['loops_per_bead'], color='slategrey',
+            s=50, alpha=0.7)
+ax.vlines(true_loops_val, bs_df[col_names[0]].values[0],
+        bs_df[col_names[1]].values[0], alpha=0.7, 
+        ls='-', color='slategrey', lw=3)
+
+_ = ax.set_ylim([loop_freq.min(),loop_freq.max()])
+_ = ax.set_ylabel('bootstrapped\nlooping frequency', fontsize=16)
+_ = ax.set_xlabel('counts', fontsize=16)
+_ = ax.set_ylim([0, 0.6])
+ytick = np.arange(0.0,0.7,0.1)
+_ = ax.set_yticks(ytick)
+_ = ax.set_yticklabels(['%.1f' %n for n in ytick])
+_ = ax.set_xticklabels([])
+
+fig.savefig('../../figures/SubFigXB_reference_bootstrap.pdf',
+            bbox_inches='tight', facecolor='white')
+# %%
diff --git a/code/figures/SubFig_reference_dwell.py b/code/figures/SubFig_reference_dwell.py
@@ -0,0 +1,56 @@
+"""
+Dwell Time Distribution and Quartiles Subfigure
+--------------------------------------------------------------------------------
+Author: Soichi Hirokawa
+Last Modified: January 7, 2020
+License: MIT
+
+Description
+--------------------------------------------------------------------------------
+This script generates the subfigure for the dwell time distribution histogram
+and how the quartiles are identified for subfigure panel C
+
+Notes
+--------------------------------------------------------------------------------
+This script is designed to be executed from the `code/figures` directory and uses 
+a relative path to load the necessary CSV files.
+"""
+import numpy as np
+import pandas as pd
+import vdj.io
+import vdj.viz
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+vdj.viz.plotting_style()
+
+# Load the dwell times
+dwell = pd.read_csv('../../data/compiled_dwell_times.csv', comment='#')
+dwell = dwell[(dwell['salt']=='Mg') & (dwell['hmgb1']==80) & (dwell['mutant']=='WT12rss')]
+
+#%%
+fig,ax = plt.subplots(1, 1, figsize=(9,3))
+bins=np.arange(0, 20, 1.0)
+ax.hist(dwell['dwell_time_min'], color='#e28371', bins=bins)
+ax.plot(dwell['dwell_time_min'].median(), 15, color='dodgerblue', lw=1.25,
+            ms=25, zorder=10, marker='o', markerfacecolor='white')
+ax.hlines(15, dwell['dwell_time_min'].quantile(0.25),
+        dwell['dwell_time_min'].quantile(0.75), color='dodgerblue',
+        lw=5, ls='-', zorder=10)
+ax.text(dwell['dwell_time_min'].median(), 14.6, 'N', fontsize=18, zorder=20,
+        color='dodgerblue', horizontalalignment='center',
+        verticalalignment='center')
+
+_ = ax.set_xticks(np.arange(0, 30, 5))
+_ = ax.set_xticklabels(np.arange(0, 30, 5), fontsize=18)
+_ = ax.set_xlim([0, 20])
+_ = ax.set_yticks(np.arange(0, 40, 10))
+_ = ax.set_yticklabels(np.arange(0, 40, 10), fontsize=18)
+_ = ax.set_ylabel('counts', fontsize=28)
+_ = ax.set_xlabel('time [min]', fontsize=28)
+_ = ax.tick_params(direction='out', length=0, width=2,
+                labelsize=20, right=False, top=False)
+
+fig.savefig('../../figures/SubFigB_reference_dwell_histogram.pdf', bbox_inches='tight',
+            facecolor='white')
+
+# %%
diff --git a/code/figures/SubFig_reference_posterior.py b/code/figures/SubFig_reference_posterior.py
@@ -0,0 +1,72 @@
+"""
+Posterior Distribution, and Mean and SD
+--------------------------------------------------------------------------------
+Author(s): Soichi Hirokawa
+Last Modified: January 7, 2020
+License: MIT
+
+Description
+--------------------------------------------------------------------------------
+This script generates the probability distribution that the reference sequence
+has a cutting probability p_cut and shows how the mean and standard deviation 
+pertain to the full distribution.
+
+Notes
+--------------------------------------------------------------------------------
+This script is designed to be run from the `code/figures` directory. It accesses 
+the proper CSV file through a relative path to the `data` folder
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import vdj.viz
+import vdj.io
+vdj.viz.plotting_style()
+
+# Load all cutting probability estimates taking gaussian approximation.
+cut_data = pd.read_csv('../../data/pooled_cutting_probability.csv', comment='#')
+cut_data = cut_data[(cut_data['hmgb1'] == 80) & (cut_data['salt']=='Mg') & (cut_data['mutant']=='WT12rss')]
+
+# Load the precomputed posterior distributioons
+cut_posts = pd.read_csv('../../data/pooled_cutting_probability_posteriors.csv', 
+                        comment='#')
+cut_posts = cut_posts[(cut_posts['hmgb1']==80) & (cut_posts['salt']=='Mg') & (cut_posts['mutant']=='WT12rss')]
+
+#%%
+fig, ax = plt.subplots(1, 1, figsize=(4.1, 2.3))
+ax.fill_between(cut_posts['probability'], 0, cut_posts['posterior'],
+                color='slategrey', alpha=0.4)
+ax.plot(cut_posts['probability'], cut_posts['posterior'], color='white')
+ax.plot(cut_data['mean'], cut_posts['posterior'].max()/3, zorder=10,
+            color='dodgerblue', marker='o', ms=15, markerfacecolor='white')
+ax.hlines(cut_posts['posterior'].max()/3, cut_data['mean'] - cut_data['std'],
+        cut_data['mean'] + cut_data['std'], color='dodgerblue', zorder=10,
+        lw=4)
+
+_ = ax.set_xlim([0.3, 0.7])
+_ = ax.set_ylim([0.0, cut_posts['posterior'].max()+0.005])
+_ = ax.set_ylabel('$P(p_\mathrm{cut} | n_\mathrm{loops}, n_\mathrm{cuts})$',
+                fontsize=15)
+_ = ax.set_xlabel(r'$p_\mathrm{cut}$', fontsize=15)
+
+bead_stats = '\n'.join((
+    'V4-57-1 12RSS',
+    '(reference)',
+    r'$n_\mathrm{loops}=%i$' % (cut_data['n_loops'], ),
+    r'$n_\mathrm{cuts}=%i$' % (cut_data['n_cuts'], )))
+
+bbox_props = dict(boxstyle='square', edgecolor='k', facecolor='white', alpha=0.5)
+
+#_ = ax.text(0.66, 0.051, bead_stats, fontsize=12, horizontalalignment='center',
+#            verticalalignment='top', bbox=bbox_props)
+_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3 + 0.008,
+            r'$\mu \pm \sigma$', fontsize=12, verticalalignment='center',
+            horizontalalignment='center')
+_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3-0.0005, 'N',
+            fontsize=13, color='dodgerblue', verticalalignment='center', 
+            horizontalalignment='center', zorder=15)
+
+fig.savefig('../../figures/SubFigX_point_posterior_definition.pdf', bbox_inches='tight',
+            facecolor='white')
+
+# %%
diff --git a/figures/Fig_point_mutation_stickplot.pdf b/figures/Fig_point_mutation_stickplot.pdf
diff --git a/figures/SiFig_coding_flank_reference.pdf b/figures/SiFig_coding_flank_reference.pdf
diff --git a/figures/SiFig_coding_flank_spacerC1A_endogenous_comparison.pdf b/figures/SiFig_coding_flank_spacerC1A_endogenous_comparison.pdf
diff --git a/figures/SiFig_spacerC1A_endogenous_cmparison.pdf b/figures/SiFig_spacerC1A_endogenous_cmparison.pdf
diff --git a/figures/SubFigB_reference_dwell_histogram.pdf b/figures/SubFigB_reference_dwell_histogram.pdf
diff --git a/figures/SubFigXB_reference_bootstrap.pdf b/figures/SubFigXB_reference_bootstrap.pdf
diff --git a/figures/SubFigXC_point_cut.pdf b/figures/SubFigXC_point_cut.pdf
diff --git a/figures/SubFigXC_point_dwell.pdf b/figures/SubFigXC_point_dwell.pdf
diff --git a/figures/SubFigXC_point_loop.pdf b/figures/SubFigXC_point_loop.pdf
diff --git a/figures/SubFigX_point_posterior_definition.pdf b/figures/SubFigX_point_posterior_definition.pdf