Skip to content

Commit

Permalink
Modernize hictkpy usage
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics committed Oct 23, 2024
1 parent db2a389 commit c4ee350
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 18 deletions.
9 changes: 3 additions & 6 deletions src/stripepy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ def main():
configs_input, configs_thresholds, configs_output = cli.parse_args()

# Data loading:
c, chr_starts, chr_ends, bp_lengths = others.cmap_loading(configs_input["contact-map"], configs_input["resolution"])
f, chr_starts, chr_ends, bp_lengths = others.cmap_loading(configs_input["contact-map"], configs_input["resolution"])

# Remove existing folders:
configs_output["output_folder"] = f"{configs_output['output_folder']}/{configs_input['resolution']}"
IO.remove_and_create_folder(configs_output["output_folder"])

# Extract a list of tuples where each tuple is (index, chr), e.g. (2,'chr3'):
c_pairs = others.chromosomes_to_study(list(c.chromosomes().keys()), bp_lengths, MIN_SIZE_CHROMOSOME)
c_pairs = others.chromosomes_to_study(list(f.chromosomes().keys()), bp_lengths, MIN_SIZE_CHROMOSOME)

# Create HDF5 file to store candidate stripes:
hf = h5py.File(f"{configs_output['output_folder']}/results.hdf5", "w")
Expand All @@ -76,10 +76,7 @@ def main():
IO.create_folders_for_plots(f"{configs_output['output_folder']}/plots/{this_chr}")

# Extract current chromosome (only the upper-triangular part is stored by hictkpy!):
I = c.fetch(this_chr).to_coo().tolil()
I += I.T
I.setdiag(I.diagonal() / 2)
I = I.tocsr()
I = f.fetch(this_chr).to_csr("full")

This comment has been minimized.

Copy link
@robomics

robomics Oct 23, 2024

Author Contributor

@rea1991, hictkpy now supports fetching interactions directly in CSR format. It is also possible to mirror interactions below the diagonal (see here for more details).
I believe the new code does the same exact thing as the old code, but it would be good if you could test the new change.


# RoI:
RoI = others.define_RoI(
Expand Down
4 changes: 2 additions & 2 deletions src/stripepy/utils/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ def recoverable_anchors_recognition(
path2GT = f"{base_path}/MoDLE-benchmark/"
file_name = f"{file_name_base}_{contact_density}_{noise}"
path2mcool = f"{base_path}/MoDLE-benchmark/data/{file_name}/"
c = hictkpy.File(f"{path2mcool}{file_name}.mcool::resolutions/{resolution}", resolution)
c = hictkpy.File(f"{path2mcool}{file_name}.mcool", resolution)
c_names = list(c.chromosomes().keys())
c_ids = list(range(len(c_names)))
c_pairs = list(zip(c_ids, c_names))
Expand Down Expand Up @@ -1116,7 +1116,7 @@ def recoverable_anchors_classification(
path2GT = f"{base_path}/MoDLE-benchmark/"
file_name = f"{file_name_base}_{contact_density}_{noise}"
path2mcool = f"{base_path}/MoDLE-benchmark/data/{file_name}/"
c = hictkpy.File(f"{path2mcool}{file_name}.mcool::resolutions/{resolution}", resolution)
c = hictkpy.File(f"{path2mcool}{file_name}.mcool", resolution)
c_names = list(c.chromosomes().keys())
c_ids = list(range(len(c_names)))
c_pairs = list(zip(c_ids, c_names))
Expand Down
19 changes: 9 additions & 10 deletions src/stripepy/utils/others.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,27 @@

def cmap_loading(path, resolution):
# Retrieve metadata:
file_name, file_ext = os.path.splitext(path)
file_format = file_ext.lower()[1:] # Remove the dot from the extension and convert to lowercase

if file_format == "cool" or file_format == "hic":
c = hictkpy.File(path, resolution)
elif file_format == "mcool":
c = hictkpy.File(f"{path}::/resolutions/{resolution}", resolution)
if hictkpy.is_scool_file(path):
raise RuntimeError(".scool files are not currently supported.")
if hictkpy.is_cooler(path):
f = hictkpy.File(path)
if f.resolution() != resolution:
raise RuntimeError(f"error opening file \"{f}\": expected {resolution} resolution, found {f.resolution()}.")
else:
raise ValueError("Unsupported file format: " + file_format)
f = hictkpy.MultiResFile(path)[resolution]

# Retrieve metadata:
chr_starts = [0] # left ends of each chromosome (in matrix coordinates)
chr_ends = [] # right ends of each chromosome (in matrix coordinates)
chr_sizes = [] # integer bp lengths, one per chromosome
for _, bp_length in c.chromosomes().items():
for bp_length in f.chromosomes().values():
chr_sizes.append(bp_length)
chr_ends.append(chr_starts[-1] + int(np.ceil(bp_length / resolution)))
chr_starts.append(chr_ends[-1])
# print(f"{chr_starts[-2]}-{chr_ends[-1]}-{chr_sizes[-1]}")
chr_starts.pop(-1)

return c, chr_starts, chr_ends, chr_sizes
return f, chr_starts, chr_ends, chr_sizes


def chromosomes_to_study(chromosomes, length_in_bp, min_size_allowed):
Expand Down

0 comments on commit c4ee350

Please sign in to comment.