Skip to content

Commit 8328163

Browse files
authored
Merge pull request #76 from robomics/feature/stripepy-plot
Initial implementation of stripepy plot
2 parents 774cb22 + 93fa236 commit 8328163

20 files changed

+1645
-228
lines changed

.github/workflows/build-dockerfile.yml

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
push:
99
branches: [main]
1010
paths:
11+
- ".github/workflows/cache-test-datasets.yml"
1112
- ".github/workflows/build-dockerfile.yml"
1213
- "src/**"
1314
- "test/**"
@@ -22,6 +23,7 @@ on:
2223

2324
pull_request:
2425
paths:
26+
- ".github/workflows/cache-test-datasets.yml"
2527
- ".github/workflows/build-dockerfile.yml"
2628
- "src/**"
2729
- "test/**"
@@ -41,17 +43,14 @@ defaults:
4143
run:
4244
shell: bash
4345

44-
env:
45-
TEST_DATASET_ID: 4DNFI9GMP2J8
46-
TEST_DATASET_URL: "https://zenodo.org/records/14283922/files/4DNFI9GMP2J8.stripepy.mcool?download=1"
47-
TEST_DATASET_MD5: "a17d08460c03cf6c926e2ca5743e4888"
48-
TEST_RESULT_FILE_V1_ID: results_4DNFI9GMP2J8_v1.hdf5
49-
TEST_RESULT_FILE_V1_URL: "https://zenodo.org/records/14283922/files/results_4DNFI9GMP2J8_v1.hdf5?download=1"
50-
TEST_RESULT_FILE_V1_MD5: "632b2a7a6e5c1a24dc3635710ed68a80"
51-
5246
jobs:
47+
cache-test-datasets:
48+
name: Cache test dataset
49+
uses: paulsengroup/StripePy/.github/workflows/cache-test-datasets.yml@main
50+
5351
build-dockerfile:
5452
name: Build Dockerfile
53+
needs: [cache-test-datasets]
5554
runs-on: ubuntu-latest
5655
permissions:
5756
contents: "read"
@@ -75,32 +74,13 @@ jobs:
7574
- name: Install build requirements
7675
run: pip install -r requirements.txt
7776

78-
- name: Cache test dataset
79-
id: cache-dset
77+
- name: Restore test dataset
8078
uses: actions/cache/restore@v4
8179
with:
82-
key: test-dataset
83-
path: ${{ env.TEST_DATASET_ID }}.mcool
84-
85-
- name: Download test dataset
86-
if: steps.cache-dset.outputs.cache-hit != 'true'
87-
run: |
88-
mkdir -p test/data/
89-
curl -L '${{ env.TEST_DATASET_URL }}' -o 'test/data/${{ env.TEST_DATASET_ID }}.mcool'
90-
curl -L '${{ env.TEST_RESULT_FILE_V1_URL }}' -o 'test/data/${{ env.TEST_RESULT_FILE_V1_ID }}'
91-
92-
echo "${{ env.TEST_DATASET_MD5 }} test/data/${{ env.TEST_DATASET_ID }}.mcool" > checksum.md5
93-
echo "${{ env.TEST_RESULT_FILE_V1_MD5 }} test/data/${{ env.TEST_RESULT_FILE_V1_ID }}" >> checksum.md5
94-
md5sum -c checksum.md5
95-
96-
- name: Save test dataset
97-
if: steps.cache-dset.outputs.cache-hit != 'true'
98-
uses: actions/cache/save@v4
99-
with:
100-
key: test-dataset
101-
path: |
102-
test/data/${{env.TEST_DATASET_ID }}.mcool
103-
test/data/results*.hdf5
80+
key: ${{ needs.cache-test-datasets.outputs.cache-key }}
81+
path: test/data/
82+
fail-on-cache-miss: true
83+
enableCrossOsArchive: true
10484

10585
- name: Generate build args
10686
id: build-args

.github/workflows/ci.yml

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
push:
99
branches: [main]
1010
paths:
11+
- ".github/workflows/cache-test-datasets.yml"
1112
- ".github/workflows/ci.yml"
1213
- "src/**"
1314
- "test/**"
@@ -16,6 +17,7 @@ on:
1617

1718
pull_request:
1819
paths:
20+
- ".github/workflows/cache-test-datasets.yml"
1921
- ".github/workflows/ci.yml"
2022
- "src/**"
2123
- "test/**"
@@ -62,17 +64,18 @@ jobs:
6264
6365
return { include: includes }
6466
67+
cache-test-datasets:
68+
name: Cache test datasets
69+
uses: paulsengroup/StripePy/.github/workflows/cache-test-datasets.yml@main
70+
6571
ci:
6672
name: CI
67-
needs: [matrix-factory]
73+
needs: [cache-test-datasets, matrix-factory]
6874
strategy:
6975
fail-fast: false
7076
matrix: ${{ fromJson(needs.matrix-factory.outputs.matrix) }}
7177
runs-on: ${{ matrix.os }}
7278

73-
env:
74-
TEST_DATASET_ID: 4DNFI9GMP2J8
75-
7679
steps:
7780
- uses: actions/checkout@v4
7881
with:
@@ -93,35 +96,13 @@ jobs:
9396
stripepy --help
9497
stripepy --version
9598
96-
- name: Cache test dataset
97-
id: cache-dset
99+
- name: Restore test dataset
98100
uses: actions/cache/restore@v4
99101
with:
100-
key: test-dataset
101-
path: |
102-
test/data/${{env.TEST_DATASET_ID }}.mcool
103-
test/data/results*.hdf5
104-
105-
- name: Download test dataset
106-
if: steps.cache-dset.outputs.cache-hit != 'true'
107-
run: |
108-
mkdir test/data/
109-
stripepy download \
110-
--name ${{ env.TEST_DATASET_ID }} \
111-
--output test/data/${{ env.TEST_DATASET_ID }}.mcool
112-
113-
stripepy download \
114-
--name __results_v1 \
115-
--output test/data/results_4DNFI9GMP2J8_v1.hdf5
116-
117-
- name: Save test dataset
118-
if: steps.cache-dset.outputs.cache-hit != 'true'
119-
uses: actions/cache/save@v4
120-
with:
121-
key: test-dataset
122-
path: |
123-
test/data/${{env.TEST_DATASET_ID }}.mcool
124-
test/data/results*.hdf5
102+
key: ${{ needs.cache-test-datasets.outputs.cache-key }}
103+
path: test/data/
104+
fail-on-cache-miss: true
105+
enableCrossOsArchive: true
125106

126107
- name: Run unit tests
127108
run: |

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,6 @@ cython_debug/
168168
# Custom entries
169169
.DS_Store
170170
output/
171-
*.log
171+
test/data
172172
*.log*
173173
*.bak*

README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ StripePy is organized into a few subcommands:
7070

7171
- `stripepy call`: run the stripe detection algorithm and store the identified stripes in a `.hdf5` file.
7272
- `stripepy view`: take the `result.hdf5` file generated by `stripepy call` and extract stripes in BEDPE format.
73+
- `stripepy plot`: generate various kinds of plots to inspect the stripes identified by `stripepy call`.
7374
- `stripepy download`: download a minified sample dataset suitable to quickly test StripePy.
7475

7576
### Walkthrough
@@ -1108,6 +1109,44 @@ Running the last cell will display a HiGlass window embedded in the Jupyter note
11081109

11091110
![HiGlass window](docs/assets/4DNFI9GMP2J8_chr2_156mbp_higlass_view.png)
11101111

1112+
## Generating plots
1113+
1114+
StripePy comes with a `plot` subcommand that can be used to generate various kinds of plots.
1115+
1116+
`stripepy plot` supports the following subcommands:
1117+
1118+
- `contact-map` (`cm`): plot stripes and other features over the Hi-C matrix
1119+
- `pseudodistribution` (`pd`): plot the pseudo-distribution over the given region of interest
1120+
- `stripe-hist` (`hist`): generate and plot the histograms showing the distribution of the stripe heights and widths
1121+
1122+
`stripepy cm` takes as input a Hi-C matrix in `.cool`, `.mcool`, or `.hic` format, and optionally the `.hdf5` file generated by `stripepy call` (this parameter is mandatory when highlighting stripes or stripe seeds).
1123+
1124+
`stripepy pd` and `stripepy hist` do not require the Hi-C matrix file, and require the `.hdf5` file generated by `stripepy call` instead.
1125+
1126+
All three subcommands support specifying a region of interest through the `--region` option.
1127+
When the commands are run without specifying the region of interest, `stripepy cm` and `stripepy pd` will generate plots for a random 2.5 Mbp region, while `stripepy hist` will generate histograms using data from the entire genome.
1128+
1129+
Example usage:
1130+
1131+
```bash
1132+
# Plot the pseudo-distribution over a region of interest
1133+
stripepy plot pd results.hdf5 /tmp/pseudodistribution.png --region chr2:120100000-122100000
1134+
1135+
# Plot the histograms using genome-wide data
1136+
stripepy plot hist results.hdf5 /tmp/stripe_hist_gw.png
1137+
1138+
# Plot the Hi-C matrix
1139+
stripepy plot cm 4DNFI9GMP2J8.mcool 10000 /tmp/matrix.png
1140+
1141+
# Plot the Hi-C matrix higlighting the stripe seeds
1142+
stripepy plot cm 4DNFI9GMP2J8.mcool 10000 /tmp/matrix_with_seeds.png --stripepy-hdf5 results.hdf5 --highlight-seeds
1143+
1144+
# Plot the Hi-C matrix higlighting the architectural stripes
1145+
stripepy plot cm 4DNFI9GMP2J8.mcool 10000 /tmp/matrix_with_stripes.png --stripepy-hdf5 results.hdf5 --highlight-stripes
1146+
```
1147+
1148+
Some example plots generated with `stripepy plot` can be found in file `stripepy-plot-test-images.tar.xz` from [doi.org/10.5281/zenodo.14283921](https://doi.org/10.5281/zenodo.14283921)
1149+
11111150
## Getting help
11121151

11131152
For any issues regarding StripePy installation, walkthrough, and output interpretation please open a [discussion](https://github.com/paulsengroup/StripePy/discussions) on GitHub.

src/stripepy/IO.py

Lines changed: 5 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -695,43 +695,6 @@ def format_ticks(ax, x=True, y=True, rotate=True):
695695
ax.tick_params(axis="x", rotation=45)
696696

697697

698-
def HiC(I, RoI, plot_in_bp=False, output_folder=None, file_name=None, title=None, compactify=False):
699-
"""
700-
:param I: Hi-C matrix to be plotted as image and saved
701-
:param RoI: refers to the Region of Interest [RoI[0], RoI[1]]x[RoI[2], RoI[3]]
702-
(e.g., in genomic coordinates)
703-
:param plot_in_bp: if True, labels are set in base pairs w.r.t. the genomic interval in RoI;
704-
if False, labels are set in pixel coordinates
705-
:param output_folder: path to folder where to save the image
706-
:param file_name: name of the file to be created
707-
:param title: title to give to the image
708-
:param compactify: if False, it adds axes ticks, color bars
709-
:return: -
710-
"""
711-
if output_folder is None or file_name is None:
712-
return
713-
714-
fig, ax = plt.subplots(1, 1)
715-
img = ax.matshow(I, vmax=np.amax(I), extent=(RoI[0], RoI[1], RoI[3], RoI[2]), cmap=fruit_punch)
716-
if plot_in_bp:
717-
format_ticks(ax)
718-
719-
if compactify is True:
720-
plt.axis("off")
721-
else:
722-
# plt.colorbar(img)
723-
if title is not None:
724-
fig.suptitle(title)
725-
726-
fig.set_dpi(256)
727-
plt.axis("scaled")
728-
fig.tight_layout()
729-
730-
plt.savefig(f"{output_folder}/{file_name}", bbox_inches="tight")
731-
732-
plt.close()
733-
734-
735698
def pseudodistrib(
736699
pseudo_distrib,
737700
IoI,
@@ -743,6 +706,7 @@ def pseudodistrib(
743706
title=None,
744707
display=False,
745708
):
709+
# TODO remove
746710
"""
747711
:param pseudo_distrib: 1D ndarray representing a scalar function sampled over a uniform mesh
748712
:param IoI: refers to the Interval of Interest [IoI[0], IoI[1]] (e.g., in genomic coordinates)
@@ -808,6 +772,7 @@ def pseudodistrib(
808772
def pseudodistrib_and_HIoIs(
809773
pseudo_distrib, IoIs, resolution, colors=None, output_folder=None, file_name=None, title=None, display=False
810774
):
775+
# TODO remove
811776
"""
812777
:param pseudo_distrib: 1D ndarray representing a scalar function sampled over a uniform mesh
813778
:param IoIs: list of lists, where the innermost lists are pairs of coordinates; the first pair
@@ -824,7 +789,6 @@ def pseudodistrib_and_HIoIs(
824789
:param display: if False, it does not display the plot
825790
:return: -
826791
"""
827-
828792
fig, ax = plt.subplots(1, 1)
829793
for IoI, color in zip(IoIs, colors):
830794
ax.plot(
@@ -834,21 +798,17 @@ def pseudodistrib_and_HIoIs(
834798
linewidth=0.5,
835799
linestyle="solid",
836800
)
837-
838801
ax.xaxis.set_major_formatter(EngFormatter("b"))
839802
if no_frills_in_images is False:
840803
if title is not None:
841804
fig.suptitle(title)
842-
843805
ax.set_xlabel("genomic coordinates (bp)")
844806
ax.set_ylabel("pseudo-distribution")
845807
fig.tight_layout()
846808
ax.grid(True)
847809
# plt.axis('scaled')
848-
849810
if output_folder is not None and file_name is not None:
850811
plt.savefig(f"{output_folder}/{file_name}", bbox_inches="tight")
851-
852812
if display:
853813
plt.show()
854814
plt.close()
@@ -865,7 +825,7 @@ def HiC_and_sites(
865825
file_name=None,
866826
title=None,
867827
display=False,
868-
):
828+
): # TODO remove
869829
"""
870830
:param I: Hi-C matrix to be plotted as image and saved
871831
:param sites: list of locations of interest
@@ -935,6 +895,7 @@ def HiC_and_HIoIs(
935895
title=None,
936896
display=False,
937897
):
898+
# TODO: remove
938899
"""
939900
:param I: Hi-C matrix to be plotted as image and saved
940901
:param HIoIs: list of lists, where the innermost lists are pairs of elements
@@ -1036,6 +997,7 @@ def plot_stripes(
1036997
title=None,
1037998
display=False,
1038999
):
1000+
# TODO remove
10391001
"""
10401002
:param I: Hi-C matrix to be plotted as image and saved
10411003
:param LT_HIoIs: Horizontal Intervals of Interest (lower-triangular part)
@@ -1280,22 +1242,3 @@ def plot_stripes_and_peaks(
12801242
if display:
12811243
plt.show()
12821244
plt.close()
1283-
1284-
1285-
def save_candidates_bedpe(HIoIs, VIoIs, resolution, chr, output_folder, file_name):
1286-
"""
1287-
:param HIoIs: Horizontal Intervals of Interest
1288-
:param VIoIs: Vertical Intervals of Interest
1289-
:param resolution: resolution
1290-
:param chr: chromosome
1291-
:param output_folder: path to folder where to save the image
1292-
:param file_name: name of the file to be created
1293-
:return: -
1294-
"""
1295-
1296-
with open(f"{output_folder}/{file_name}", "w") as f:
1297-
for HIoI, VIoI in zip(HIoIs, VIoIs):
1298-
f.write(
1299-
f"{chr}\t{resolution * HIoI[0]}\t{resolution * HIoI[1]}\t"
1300-
f"{chr}\t{resolution * VIoI[0]}\t{resolution * VIoI[1]}\n"
1301-
)

src/stripepy/cli/call.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ def run(
3434
start_global_time = time.time()
3535

3636
# Data loading:
37-
f, chr_starts, chr_ends, bp_lengths = others.cmap_loading(configs_input["contact-map"], configs_input["resolution"])
37+
f, chr_starts, chr_ends, bp_lengths = others.cmap_loading(configs_input["contact_map"], configs_input["resolution"])
3838

3939
# Remove existing folders:
4040
# configs_output["output_folder"] = (
41-
# f"{configs_output['output_folder']}/{configs_input['contact-map'].stem}/{configs_input['resolution']}"
41+
# f"{configs_output['output_folder']}/{configs_input['contact_map'].stem}/{configs_input['resolution']}"
4242
# )
4343
configs_output["output_folder"] = (
44-
configs_output["output_folder"] / configs_input["contact-map"].stem / str(configs_input["resolution"])
44+
configs_output["output_folder"] / configs_input["contact_map"].stem / str(configs_input["resolution"])
4545
)
4646
IO.remove_and_create_folder(configs_output["output_folder"], configs_output["force"])
4747

0 commit comments

Comments
 (0)