-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'publication' of github.com:rpgroup-pboc/vdj into public…
…ation
- Loading branch information
Showing
16 changed files
with
372 additions
and
102 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
""" | ||
Representative Looping Frequency Bootstrap and 95% Confidence Interval | ||
-------------------------------------------------------------------------------- | ||
Author: Soichi Hirokawa | ||
Last Modified: January 7, 2020 | ||
License: MIT | ||
Description | ||
-------------------------------------------------------------------------------- | ||
This script generates the subfigure in the manuscript which shows a | ||
representative bootstrap replicate distribution and the 95% confidence interval. | ||
Notes | ||
-------------------------------------------------------------------------------- | ||
This script is designed to be executed from the `code/figures` directory and uses | ||
a relative path to load the necessary CSV files. | ||
""" | ||
import numpy as np | ||
import pandas as pd | ||
import vdj.io | ||
import vdj.viz | ||
import matplotlib.pyplot as plt | ||
import matplotlib.patches as patches | ||
vdj.viz.plotting_style() | ||
|
||
|
||
# Upload V4-57-1 sequence looping dataset | ||
data = pd.read_csv('../../data/compiled_looping_events.csv', comment='#') | ||
data = data[(data['mutant']=='WT12rss') & (data['hmgb1']==80) & | ||
(data['salt']=='Mg')] | ||
|
||
percentiles = [2.5, 97.5] | ||
col_names = ["bs_95_low", "bs_95_high"] | ||
bs_reps = int(1E6) | ||
bs_df = pd.DataFrame([]) | ||
sampling = np.random.choice(data['n_loops'].values,size=(len(data), bs_reps), | ||
replace=True) | ||
loop_freq = np.sum(sampling, axis=0) / len(data) | ||
df_dict = {'mutant':'V4-57-1', 'salt':'Mg', 'hmgb1':80, | ||
'n_loops':data['n_loops'].sum(), 'n_beads':len(data), | ||
'loops_per_bead':data['n_loops'].sum() / len(data)} | ||
computed_percentiles = np.percentile(loop_freq, percentiles) | ||
for i,col in zip(computed_percentiles,col_names): | ||
df_dict[col] = i | ||
|
||
bs_df = bs_df.append(df_dict, ignore_index=True) | ||
|
||
# Form ECDFs | ||
x = list(np.sort(loop_freq)) | ||
y = list(np.arange(0, bs_reps, 1) / bs_reps) | ||
y_short = [-1, 2] | ||
text_perc = '95%' | ||
|
||
true_loops_val = y[x.index(bs_df['loops_per_bead'].values[0])] | ||
#%% | ||
fig, ax = plt.subplots(1, 1, figsize=(2,4)) | ||
ax.set_xlim([-1, 250000]) | ||
ax.hist(loop_freq, color='tomato', bins=20, zorder=10, | ||
orientation='horizontal') | ||
ax.axhline(bs_df['loops_per_bead'].values[0], 0, true_loops_val, | ||
color='slategrey', ls='--', alpha=0.4, lw=2) | ||
|
||
ax.scatter(true_loops_val, bs_df['loops_per_bead'], color='slategrey', | ||
s=50, alpha=0.7) | ||
ax.vlines(true_loops_val, bs_df[col_names[0]].values[0], | ||
bs_df[col_names[1]].values[0], alpha=0.7, | ||
ls='-', color='slategrey', lw=3) | ||
|
||
_ = ax.set_ylim([loop_freq.min(),loop_freq.max()]) | ||
_ = ax.set_ylabel('bootstrapped\nlooping frequency', fontsize=16) | ||
_ = ax.set_xlabel('counts', fontsize=16) | ||
_ = ax.set_ylim([0, 0.6]) | ||
ytick = np.arange(0.0,0.7,0.1) | ||
_ = ax.set_yticks(ytick) | ||
_ = ax.set_yticklabels(['%.1f' %n for n in ytick]) | ||
_ = ax.set_xticklabels([]) | ||
|
||
fig.savefig('../../figures/SubFigXB_reference_bootstrap.pdf', | ||
bbox_inches='tight', facecolor='white') | ||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
""" | ||
Dwell Time Distribution and Quartiles Subfigure | ||
-------------------------------------------------------------------------------- | ||
Author: Soichi Hirokawa | ||
Last Modified: January 7, 2020 | ||
License: MIT | ||
Description | ||
-------------------------------------------------------------------------------- | ||
This script generates the subfigure for the dwell time distribution histogram | ||
and how the quartiles are identified for subfigure panel C | ||
Notes | ||
-------------------------------------------------------------------------------- | ||
This script is designed to be executed from the `code/figures` directory and uses | ||
a relative path to load the necessary CSV files. | ||
""" | ||
import numpy as np | ||
import pandas as pd | ||
import vdj.io | ||
import vdj.viz | ||
import matplotlib.pyplot as plt | ||
import matplotlib.patches as patches | ||
vdj.viz.plotting_style() | ||
|
||
# Load the dwell times | ||
dwell = pd.read_csv('../../data/compiled_dwell_times.csv', comment='#') | ||
dwell = dwell[(dwell['salt']=='Mg') & (dwell['hmgb1']==80) & (dwell['mutant']=='WT12rss')] | ||
|
||
#%% | ||
fig,ax = plt.subplots(1, 1, figsize=(9,3)) | ||
bins=np.arange(0, 20, 1.0) | ||
ax.hist(dwell['dwell_time_min'], color='#e28371', bins=bins) | ||
ax.plot(dwell['dwell_time_min'].median(), 15, color='dodgerblue', lw=1.25, | ||
ms=25, zorder=10, marker='o', markerfacecolor='white') | ||
ax.hlines(15, dwell['dwell_time_min'].quantile(0.25), | ||
dwell['dwell_time_min'].quantile(0.75), color='dodgerblue', | ||
lw=5, ls='-', zorder=10) | ||
ax.text(dwell['dwell_time_min'].median(), 14.6, 'N', fontsize=18, zorder=20, | ||
color='dodgerblue', horizontalalignment='center', | ||
verticalalignment='center') | ||
|
||
_ = ax.set_xticks(np.arange(0, 30, 5)) | ||
_ = ax.set_xticklabels(np.arange(0, 30, 5), fontsize=18) | ||
_ = ax.set_xlim([0, 20]) | ||
_ = ax.set_yticks(np.arange(0, 40, 10)) | ||
_ = ax.set_yticklabels(np.arange(0, 40, 10), fontsize=18) | ||
_ = ax.set_ylabel('counts', fontsize=28) | ||
_ = ax.set_xlabel('time [min]', fontsize=28) | ||
_ = ax.tick_params(direction='out', length=0, width=2, | ||
labelsize=20, right=False, top=False) | ||
|
||
fig.savefig('../../figures/SubFigB_reference_dwell_histogram.pdf', bbox_inches='tight', | ||
facecolor='white') | ||
|
||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
Posterior Distribution, and Mean and SD | ||
-------------------------------------------------------------------------------- | ||
Author(s): Soichi Hirokawa | ||
Last Modified: January 7, 2020 | ||
License: MIT | ||
Description | ||
-------------------------------------------------------------------------------- | ||
This script generates the probability distribution that the reference sequence | ||
has a cutting probability p_cut and shows how the mean and standard deviation | ||
pertain to the full distribution. | ||
Notes | ||
-------------------------------------------------------------------------------- | ||
This script is designed to be run from the `code/figures` directory. It accesses | ||
the proper CSV file through a relative path to the `data` folder | ||
""" | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
import vdj.viz | ||
import vdj.io | ||
vdj.viz.plotting_style() | ||
|
||
# Load all cutting probability estimates taking gaussian approximation. | ||
cut_data = pd.read_csv('../../data/pooled_cutting_probability.csv', comment='#') | ||
cut_data = cut_data[(cut_data['hmgb1'] == 80) & (cut_data['salt']=='Mg') & (cut_data['mutant']=='WT12rss')] | ||
|
||
# Load the precomputed posterior distributioons | ||
cut_posts = pd.read_csv('../../data/pooled_cutting_probability_posteriors.csv', | ||
comment='#') | ||
cut_posts = cut_posts[(cut_posts['hmgb1']==80) & (cut_posts['salt']=='Mg') & (cut_posts['mutant']=='WT12rss')] | ||
|
||
#%% | ||
fig, ax = plt.subplots(1, 1, figsize=(4.1, 2.3)) | ||
ax.fill_between(cut_posts['probability'], 0, cut_posts['posterior'], | ||
color='slategrey', alpha=0.4) | ||
ax.plot(cut_posts['probability'], cut_posts['posterior'], color='white') | ||
ax.plot(cut_data['mean'], cut_posts['posterior'].max()/3, zorder=10, | ||
color='dodgerblue', marker='o', ms=15, markerfacecolor='white') | ||
ax.hlines(cut_posts['posterior'].max()/3, cut_data['mean'] - cut_data['std'], | ||
cut_data['mean'] + cut_data['std'], color='dodgerblue', zorder=10, | ||
lw=4) | ||
|
||
_ = ax.set_xlim([0.3, 0.7]) | ||
_ = ax.set_ylim([0.0, cut_posts['posterior'].max()+0.005]) | ||
_ = ax.set_ylabel('$P(p_\mathrm{cut} | n_\mathrm{loops}, n_\mathrm{cuts})$', | ||
fontsize=15) | ||
_ = ax.set_xlabel(r'$p_\mathrm{cut}$', fontsize=15) | ||
|
||
bead_stats = '\n'.join(( | ||
'V4-57-1 12RSS', | ||
'(reference)', | ||
r'$n_\mathrm{loops}=%i$' % (cut_data['n_loops'], ), | ||
r'$n_\mathrm{cuts}=%i$' % (cut_data['n_cuts'], ))) | ||
|
||
bbox_props = dict(boxstyle='square', edgecolor='k', facecolor='white', alpha=0.5) | ||
|
||
#_ = ax.text(0.66, 0.051, bead_stats, fontsize=12, horizontalalignment='center', | ||
# verticalalignment='top', bbox=bbox_props) | ||
_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3 + 0.008, | ||
r'$\mu \pm \sigma$', fontsize=12, verticalalignment='center', | ||
horizontalalignment='center') | ||
_ = ax.text(cut_data['mean'], cut_posts['posterior'].max()/3-0.0005, 'N', | ||
fontsize=13, color='dodgerblue', verticalalignment='center', | ||
horizontalalignment='center', zorder=15) | ||
|
||
fig.savefig('../../figures/SubFigX_point_posterior_definition.pdf', bbox_inches='tight', | ||
facecolor='white') | ||
|
||
# %% |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.