Skip to content

Commit

Permalink
Fixing the issue with parallel runs where one process gets killed, sh…
Browse files Browse the repository at this point in the history
…ould be caught and handled now
  • Loading branch information
fawaz-dabbaghieh committed Jun 20, 2024
1 parent 48deb74 commit c2fafe9
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 357 deletions.
42 changes: 31 additions & 11 deletions gaftools/cli/order_gfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import logging
import time
from collections import defaultdict
from collections import namedtuple, defaultdict
from gaftools.gfa import GFA

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -49,7 +49,8 @@ def run_order_gfa(
chromosome_order=None,
with_sequence=False,
):
if chromosome_order is not None:

if not chromosome_order is None:
chromosome_order = chromosome_order.split(sep=",")

if not os.path.isdir(outdir):
Expand All @@ -66,6 +67,7 @@ def run_order_gfa(
logging.error(f"were not able to create directory {outdir}, OSError")
sys.exit()


logger.info(f"Reading {gfa_filename}")
if with_sequence:
graph = GFA(gfa_filename, low_memory=False)
Expand Down Expand Up @@ -112,8 +114,8 @@ def run_order_gfa(
# Initialize files
# f_gfa = open(outdir+'/'+gfa_filename.split("/")[-1][:-4]+'-'+chromosome+'.gfa', 'w')

scaffold_nodes, inside_nodes, node_order, bo, bubble_count = decompose_and_order(
graph, component_nodes, chromosome, bo
scaffold_nodes, inside_nodes, node_order, bo, bubble_count = (
decompose_and_order(graph, component_nodes, chromosome, bo)
)

# skip a chromosome if something went wrong
Expand All @@ -128,7 +130,12 @@ def run_order_gfa(
)
out_files.append(f_gfa)
f_colors = open(
outdir + os.sep + gfa_filename.split(os.sep)[-1][:-4] + "-" + chromosome + ".csv",
outdir
+ os.sep
+ gfa_filename.split(os.sep)[-1][:-4]
+ "-"
+ chromosome
+ ".csv",
"w",
)
f_colors.write("Name,Color,SN,SO,BO,NO\n")
Expand Down Expand Up @@ -156,19 +163,28 @@ def run_order_gfa(
else:
so_tag = "NA"
f_colors.write(
"{},{},{},{},{},{}\n".format(node_name, color, sn_tag, so_tag, bo_tag, no_tag)
"{},{},{},{},{},{}\n".format(
node_name, color, sn_tag, so_tag, bo_tag, no_tag
)
)

graph.write_gfa(
set_of_nodes=component_nodes, output_file=f_gfa, append=False, order_bo=True
set_of_nodes=component_nodes,
output_file=f_gfa,
append=False,
order_bo=True,
)

f_colors.close()

else:
logger.warning(f"Chromosome {chromosome} was skipped")
final_gfa = (
outdir + os.sep + gfa_filename.split(os.sep)[-1].split(".")[0] + "-complete" + ".gfa"
outdir
+ os.sep
+ gfa_filename.split(os.sep)[-1].split(".")[0]
+ "-complete"
+ ".gfa"
)
with open(final_gfa, "w") as outfile:
# outputting all the S lines first
Expand Down Expand Up @@ -235,8 +251,12 @@ def decompose_and_order(graph, component, component_name, bo_start=0):
logger.info(f" Scaffold graph: {len(scaffold_graph)} nodes")

# Find start/end points of the line by looking for nodes with degree 1
degree_one = [x.id for x in scaffold_graph.nodes.values() if len(x.neighbors()) == 1]
degree_two = [x.id for x in scaffold_graph.nodes.values() if len(x.neighbors()) == 2]
degree_one = [
x.id for x in scaffold_graph.nodes.values() if len(x.neighbors()) == 1
]
degree_two = [
x.id for x in scaffold_graph.nodes.values() if len(x.neighbors()) == 2
]

try:
assert len(degree_one) == 2
Expand Down Expand Up @@ -294,7 +314,7 @@ def count_sn(graph, comp):
"""
counts = defaultdict(int)
for n in comp:
if "SN" not in graph[n].tags:
if not "SN" in graph[n].tags:
continue
counts[graph[n].tags["SN"][1]] += 1
return counts
Expand Down
Loading

0 comments on commit c2fafe9

Please sign in to comment.