Skip to content

Commit

Permalink
Merge branch 'publication' of github.com:rpgroup-pboc/vdj into public…
Browse files Browse the repository at this point in the history
…ation
  • Loading branch information
gchure committed May 19, 2020
2 parents 7ccba79 + a7637f7 commit 948f5ac
Show file tree
Hide file tree
Showing 16 changed files with 372 additions and 102 deletions.
226 changes: 137 additions & 89 deletions code/figures/Fig_point_mutants.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion code/figures/SiFig_bootstrap_mutant.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.set_ylim([-0.01, 1.01])
ax.plot(x, y, color='dodgerblue', lw=2)
ax.step(x, y, color='dodgerblue', lw=2)
ax.axvline(bs_df['loops_per_bead'].values[0], 0, 1.0, color='grey', lw=2)
for percentile in col_names:
ax.fill_betweenx(y_short, bs_df[percentile[0]].values[0], bs_df[[percentile[1]]].values[0],
Expand Down
38 changes: 26 additions & 12 deletions code/figures/SiFig_coding_flanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Comparison of Coding Flank Effects and Critical SpacerC1A Mutant
--------------------------------------------------------------------------------
Author: Soichi Hirokawa
Last Modified: September 25,2019
Last Modified: January 7, 2020
License: MIT
Description
Expand Down Expand Up @@ -62,7 +62,7 @@
dfs.append(dwell_dist)
dwell_dist, cut_dist, unloop_dist = dfs


#%%
fig = plt.figure()
ax = plt.gca()
ax.get_xaxis().set_ticks([])
Expand Down Expand Up @@ -94,20 +94,26 @@
gs2 = gridspec.GridSpec(1, 3)
ax_dwell_cut = fig.add_subplot(gs2[0])
ax_dwell_cut.set_xscale('log')
ax_dwell_cut.text(18, 0.1, 'cutting\nonly',
horizontalalignment='center', fontsize=14)
ax_dwell_cut.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_cut.set_ylabel('ECDF', fontsize=16)

ax_dwell_unloop = fig.add_subplot(gs2[1])
ax_dwell_unloop.set_xscale('log')
ax_dwell_unloop.text(18, 0.1, 'unlooping\nonly',
horizontalalignment='center', fontsize=14)
ax_dwell_unloop.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_unloop.set_yticklabels([])

ax_dwell_all = fig.add_subplot(gs2[2])
ax_dwell_all.set_xscale('log')
ax_dwell_all.text(16, 0.1, 'cutting and\nunlooping',
horizontalalignment='center', fontsize=14)
ax_dwell_all.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_all.set_yticklabels([])

gs2.tight_layout(fig, rect=[0, 0, 1.5, 0.8])
gs2.tight_layout(fig, rect=[0, 0, 1.56, 0.8])

seqs = {'V4-57-1 (ref)':0.25, '12CodC6A':0.5}
colors = {'V4-57-1 (ref)':'slategrey', '12CodC6A':'dodgerblue'}
Expand All @@ -126,13 +132,13 @@
ax_conf_int.text(seqs[seq] + perc_widths[g] - 0.015, 0.05, str(int(g)) + '%',
ha='center', va='top')

ax_dwell_cut.plot(cut_dist[cut_dist['mutant']==seq]['x'],
ax_dwell_cut.step(cut_dist[cut_dist['mutant']==seq]['x'],
cut_dist[cut_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_dwell_unloop.plot(unloop_dist[unloop_dist['mutant']==seq]['x'],
ax_dwell_unloop.step(unloop_dist[unloop_dist['mutant']==seq]['x'],
unloop_dist[unloop_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_dwell_all.plot(dwell_dist[dwell_dist['mutant']==seq]['x'],
ax_dwell_all.step(dwell_dist[dwell_dist['mutant']==seq]['x'],
dwell_dist[dwell_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_posts.plot(cut_posts[cut_posts['mutant']==seq]['probability'],
Expand Down Expand Up @@ -162,7 +168,7 @@
ax_conf_int.set_yticklabels([])
ax_conf_int.set_facecolor('white')

gs = gridspec.GridSpec(1, 3, hspace=0.2, wspace=0.4)
gs = gridspec.GridSpec(1, 3, hspace=0.2, wspace=0.6)
ax_loop = fig.add_subplot(gs[0])
ax_loop.set_xticklabels([])
ax_loop.set_xlim([0.125, 0.625])
Expand All @@ -178,21 +184,27 @@

gs2 = gridspec.GridSpec(1, 3)
ax_dwell_cut = fig.add_subplot(gs2[0])
ax_dwell_cut.text(12, 0.1, 'cutting\nonly',
horizontalalignment='center', fontsize=14)
ax_dwell_cut.set_xscale('log')
ax_dwell_cut.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_cut.set_ylabel('ECDF', fontsize=16)

ax_dwell_unloop = fig.add_subplot(gs2[1])
ax_dwell_unloop.text(12, 0.1, 'unlooping\nonly',
horizontalalignment='center', fontsize=14)
ax_dwell_unloop.set_xscale('log')
ax_dwell_unloop.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_unloop.set_yticklabels([])

ax_dwell_all = fig.add_subplot(gs2[2])
ax_dwell_all.text(10, 0.1, 'cutting and\nunlooping',
horizontalalignment='center', fontsize=14)
ax_dwell_all.set_xscale('log')
ax_dwell_all.set_xlabel('dwell time [min]', fontsize=16)
ax_dwell_all.set_yticklabels([])

gs2.tight_layout(fig, rect=[0, 0, 1.5, 0.8])
gs2.tight_layout(fig, rect=[0, 0, 1.55, 0.8])

perc_widths = {p:w for p, w in zip(np.sort(loop['percentile'].unique()), np.arange(0.03, 0.21, 0.03))}

Expand All @@ -209,13 +221,13 @@
ax_conf_int.text(seqs[seq] + perc_widths[g] - 0.015, 0.05, str(int(g)) + '%',
ha='center', va='top')

ax_dwell_cut.plot(cut_dist[cut_dist['mutant']==seq]['x'],
ax_dwell_cut.step(cut_dist[cut_dist['mutant']==seq]['x'],
cut_dist[cut_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_dwell_unloop.plot(unloop_dist[unloop_dist['mutant']==seq]['x'],
ax_dwell_unloop.step(unloop_dist[unloop_dist['mutant']==seq]['x'],
unloop_dist[unloop_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_dwell_all.plot(dwell_dist[dwell_dist['mutant']==seq]['x'],
ax_dwell_all.step(dwell_dist[dwell_dist['mutant']==seq]['x'],
dwell_dist[dwell_dist['mutant']==seq]['y'],
lw=2, color=colors[seq])
ax_posts.plot(cut_posts[cut_posts['mutant']==seq]['probability'],
Expand All @@ -227,4 +239,6 @@

ax_conf_int.text(seqs[seq] + 0.09, 0.65, seq, ha='center', va='bottom', fontsize=12)
plt.savefig('../../figures/SiFig_spacerC1A_endogenous_cmparison.pdf',
bbox_inches='tight', facecolor='white')
bbox_inches='tight', facecolor='white')

# %%
80 changes: 80 additions & 0 deletions code/figures/SubFig_reference_bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Representative Looping Frequency Bootstrap and 95% Confidence Interval
--------------------------------------------------------------------------------
Author: Soichi Hirokawa
Last Modified: January 7, 2020
License: MIT
Description
--------------------------------------------------------------------------------
This script generates the subfigure in the manuscript which shows a
representative bootstrap replicate distribution and the 95% confidence interval.
Notes
--------------------------------------------------------------------------------
This script is designed to be executed from the `code/figures` directory and uses
a relative path to load the necessary CSV files.
"""
import numpy as np
import pandas as pd
import vdj.io
import vdj.viz
import matplotlib.pyplot as plt
import matplotlib.patches as patches
vdj.viz.plotting_style()


# Upload V4-57-1 sequence looping dataset
data = pd.read_csv('../../data/compiled_looping_events.csv', comment='#')
data = data[(data['mutant']=='WT12rss') & (data['hmgb1']==80) &
(data['salt']=='Mg')]

percentiles = [2.5, 97.5]
col_names = ["bs_95_low", "bs_95_high"]
bs_reps = int(1E6)
bs_df = pd.DataFrame([])
sampling = np.random.choice(data['n_loops'].values,size=(len(data), bs_reps),
replace=True)
loop_freq = np.sum(sampling, axis=0) / len(data)
df_dict = {'mutant':'V4-57-1', 'salt':'Mg', 'hmgb1':80,
'n_loops':data['n_loops'].sum(), 'n_beads':len(data),
'loops_per_bead':data['n_loops'].sum() / len(data)}
computed_percentiles = np.percentile(loop_freq, percentiles)
for i,col in zip(computed_percentiles,col_names):
df_dict[col] = i

bs_df = bs_df.append(df_dict, ignore_index=True)

# Form ECDFs
x = list(np.sort(loop_freq))
y = list(np.arange(0, bs_reps, 1) / bs_reps)
y_short = [-1, 2]
text_perc = '95%'

true_loops_val = y[x.index(bs_df['loops_per_bead'].values[0])]
#%%
fig, ax = plt.subplots(1, 1, figsize=(2,4))
ax.set_xlim([-1, 250000])
ax.hist(loop_freq, color='tomato', bins=20, zorder=10,
orientation='horizontal')
ax.axhline(bs_df['loops_per_bead'].values[0], 0, true_loops_val,
color='slategrey', ls='--', alpha=0.4, lw=2)

ax.scatter(true_loops_val, bs_df['loops_per_bead'], color='slategrey',
s=50, alpha=0.7)
ax.vlines(true_loops_val, bs_df[col_names[0]].values[0],
bs_df[col_names[1]].values[0], alpha=0.7,
ls='-', color='slategrey', lw=3)

_ = ax.set_ylim([loop_freq.min(),loop_freq.max()])
_ = ax.set_ylabel('bootstrapped\nlooping frequency', fontsize=16)
_ = ax.set_xlabel('counts', fontsize=16)
_ = ax.set_ylim([0, 0.6])
ytick = np.arange(0.0,0.7,0.1)
_ = ax.set_yticks(ytick)
_ = ax.set_yticklabels(['%.1f' %n for n in ytick])
_ = ax.set_xticklabels([])

fig.savefig('../../figures/SubFigXB_reference_bootstrap.pdf',
bbox_inches='tight', facecolor='white')
# %%
56 changes: 56 additions & 0 deletions code/figures/SubFig_reference_dwell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
Dwell Time Distribution and Quartiles Subfigure
--------------------------------------------------------------------------------
Author: Soichi Hirokawa
Last Modified: January 7, 2020
License: MIT
Description
--------------------------------------------------------------------------------
This script generates the subfigure for the dwell time distribution histogram
and how the quartiles are identified for subfigure panel C
Notes
--------------------------------------------------------------------------------
This script is designed to be executed from the `code/figures` directory and uses
a relative path to load the necessary CSV files.
"""
import numpy as np
import pandas as pd
import vdj.io
import vdj.viz
import matplotlib.pyplot as plt
import matplotlib.patches as patches
vdj.viz.plotting_style()

# Load the dwell times
dwell = pd.read_csv('../../data/compiled_dwell_times.csv', comment='#')
dwell = dwell[(dwell['salt']=='Mg') & (dwell['hmgb1']==80) & (dwell['mutant']=='WT12rss')]

#%%
fig,ax = plt.subplots(1, 1, figsize=(9,3))
bins=np.arange(0, 20, 1.0)
ax.hist(dwell['dwell_time_min'], color='#e28371', bins=bins)
ax.plot(dwell['dwell_time_min'].median(), 15, color='dodgerblue', lw=1.25,
ms=25, zorder=10, marker='o', markerfacecolor='white')
ax.hlines(15, dwell['dwell_time_min'].quantile(0.25),
dwell['dwell_time_min'].quantile(0.75), color='dodgerblue',
lw=5, ls='-', zorder=10)
ax.text(dwell['dwell_time_min'].median(), 14.6, 'N', fontsize=18, zorder=20,
color='dodgerblue', horizontalalignment='center',
verticalalignment='center')

_ = ax.set_xticks(np.arange(0, 30, 5))
_ = ax.set_xticklabels(np.arange(0, 30, 5), fontsize=18)
_ = ax.set_xlim([0, 20])
_ = ax.set_yticks(np.arange(0, 40, 10))
_ = ax.set_yticklabels(np.arange(0, 40, 10), fontsize=18)
_ = ax.set_ylabel('counts', fontsize=28)
_ = ax.set_xlabel('time [min]', fontsize=28)
_ = ax.tick_params(direction='out', length=0, width=2,
labelsize=20, right=False, top=False)

fig.savefig('../../figures/SubFigB_reference_dwell_histogram.pdf', bbox_inches='tight',
facecolor='white')

# %%
72 changes: 72 additions & 0 deletions code/figures/SubFig_reference_posterior.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Posterior Distribution, and Mean and SD
--------------------------------------------------------------------------------
Author(s): Soichi Hirokawa
Last Modified: January 7, 2020
License: MIT
Description
--------------------------------------------------------------------------------
This script generates the probability distribution that the reference sequence
has a cutting probability p_cut and shows how the mean and standard deviation
pertain to the full distribution.
Notes
--------------------------------------------------------------------------------
This script is designed to be run from the `code/figures` directory. It accesses
the proper CSV file through a relative path to the `data` folder
"""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import vdj.viz
import vdj.io
vdj.viz.plotting_style()

# Load all cutting probability estimates taking gaussian approximation.
cut_data = pd.read_csv('../../data/pooled_cutting_probability.csv', comment='#')
cut_data = cut_data[(cut_data['hmgb1'] == 80) & (cut_data['salt']=='Mg') & (cut_data['mutant']=='WT12rss')]

# Load the precomputed posterior distributioons
cut_posts = pd.read_csv('../../data/pooled_cutting_probability_posteriors.csv',
comment='#')
cut_posts = cut_posts[(cut_posts['hmgb1']==80) & (cut_posts['salt']=='Mg') & (cut_posts['mutant']=='WT12rss')]

#%%
fig, ax = plt.subplots(1, 1, figsize=(4.1, 2.3))
ax.fill_between(cut_posts['probability'], 0, cut_posts['posterior'],
color='slategrey', alpha=0.4)
ax.plot(cut_posts['probability'], cut_posts['posterior'], color='white')
ax.plot(cut_data['mean'], cut_posts['posterior'].max()/3, zorder=10,
color='dodgerblue', marker='o', ms=15, markerfacecolor='white')
ax.hlines(cut_posts['posterior'].max()/3, cut_data['mean'] - cut_data['std'],
cut_data['mean'] + cut_data['std'], color='dodgerblue', zorder=10,
lw=4)

_ = ax.set_xlim([0.3, 0.7])
_ = ax.set_ylim([0.0, cut_posts['posterior'].max()+0.005])
_ = ax.set_ylabel('$P(p_\mathrm{cut} | n_\mathrm{loops}, n_\mathrm{cuts})$',
fontsize=15)
_ = ax.set_xlabel(r'$p_\mathrm{cut}$', fontsize=15)

bead_stats = '\n'.join((
'V4-57-1 12RSS',
'(reference)',
r'$n_\mathrm{loops}=%i$' % (cut_data['n_loops'], ),
r'$n_\mathrm{cuts}=%i$' % (cut_data['n_cuts'], )))

bbox_props = dict(boxstyle='square', edgecolor='k', facecolor='white', alpha=0.5)

#_ = ax.text(0.66, 0.051, bead_stats, fontsize=12, horizontalalignment='center',
# verticalalignment='top', bbox=bbox_props)
_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3 + 0.008,
r'$\mu \pm \sigma$', fontsize=12, verticalalignment='center',
horizontalalignment='center')
_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3-0.0005, 'N',
fontsize=13, color='dodgerblue', verticalalignment='center',
horizontalalignment='center', zorder=15)

fig.savefig('../../figures/SubFigX_point_posterior_definition.pdf', bbox_inches='tight',
facecolor='white')

# %%
Binary file modified figures/Fig_point_mutation_stickplot.pdf
Binary file not shown.
Binary file modified figures/SiFig_coding_flank_reference.pdf
Binary file not shown.
Binary file not shown.
Binary file modified figures/SiFig_spacerC1A_endogenous_cmparison.pdf
Binary file not shown.
Binary file added figures/SubFigB_reference_dwell_histogram.pdf
Binary file not shown.
Binary file added figures/SubFigXB_reference_bootstrap.pdf
Binary file not shown.
Binary file added figures/SubFigXC_point_cut.pdf
Binary file not shown.
Binary file added figures/SubFigXC_point_dwell.pdf
Binary file not shown.
Binary file added figures/SubFigXC_point_loop.pdf
Binary file not shown.
Binary file added figures/SubFigX_point_posterior_definition.pdf
Binary file not shown.

0 comments on commit 948f5ac

Please sign in to comment.