diff --git a/data/chrk_ath_12samples_10kb.w100000_S.json b/data/chrk_ath_12samples_10kb.w100000_S.json index 3f89fd3..94b34f5 100644 --- a/data/chrk_ath_12samples_10kb.w100000_S.json +++ b/data/chrk_ath_12samples_10kb.w100000_S.json @@ -1,4 +1,4 @@ -{"odgi_version": 10,"bin_width": 100000,"pangenome_length": 210026186} +{"odgi_version": 12,"bin_width": 100000,"pangenome_length": 210026186} {"bin_id":1} {"bin_id":2} {"bin_id":3} diff --git a/matrixcomponent/JSONparser.py b/matrixcomponent/JSONparser.py index 5b469e7..51ed15b 100644 --- a/matrixcomponent/JSONparser.py +++ b/matrixcomponent/JSONparser.py @@ -41,7 +41,7 @@ def process_path(line=None): for r in ranges: compressed_ranges.extend(r) - bin = matrix.Bin(b[0], b[1], b[2], compressed_ranges) + bin = matrix.Bin(b[0], b[1], b[2], compressed_ranges, 0, b[3]) p.bins.setdefault(bin.bin_id, bin) # do the major part of the segmentation.find_dividers() method diff --git a/matrixcomponent/PangenomeSchematic.py b/matrixcomponent/PangenomeSchematic.py index 13e77f7..dda5ff1 100644 --- a/matrixcomponent/PangenomeSchematic.py +++ b/matrixcomponent/PangenomeSchematic.py @@ -1,4 +1,9 @@ +import gzip import json +import logging +import os +import shutil + from collections import OrderedDict from statistics import mean @@ -10,11 +15,159 @@ from dataclasses import dataclass -from matrixcomponent import JSON_VERSION +from joblib import delayed +from rdflib import URIRef, Graph, Namespace + +from matrixcomponent import JSON_VERSION, ontology from matrixcomponent.matrix import Component, Bin, LinkColumn from DNASkittleUtils.Contigs import Contig, write_contigs_to_file +LOGGER = logging.getLogger(__name__) +"""logging.Logger: The logger for this module""" + +# move out of the class to be able to use with joblib +def write_rdf(schematic, ontology_output_path): + zoom_level = ontology.ZoomLevel() + zoom_level.zoom_factor = schematic.bin_width + zoom_level.ns = URIRef('http://example.org/vg/') + + prev_comp_id = -1 + cell_counter = 0 + ocomp_dict = {} + obin_dict = {} + oposition_dict = {} + + min_bin_id = schematic.first_bin + max_bin_id = schematic.last_bin + + for ic, component in enumerate(schematic.components): + ocomp = ontology.Component(ic + 1) + ocomp.ns = zoom_level.ns_term() + '/' + zoom_level.components.append(ocomp) + + # save the sequence 1-2-3-..-n as a bi-directed list + if prev_comp_id in ocomp_dict: + prev_comp = ocomp_dict[prev_comp_id] + ocomp.reverse_component_edge = prev_comp.ns_term() + prev_comp.forward_component_edge = ocomp.ns_term() + + ocomp_dict[ic] = ocomp + prev_comp_id = ic + + obin_ns = ocomp.ns_term() + '/' + obin_tmp = ontology.Bin() + obin_tmp.ns = obin_ns + + # bins + for bins in component.matrix: + for bin in bins[1][1]: # follow the compressed format + if bin: + cur_bin_id = bin.bin_id + obin = ontology.Bin() + obin.ns = obin_ns + obin.bin_rank = cur_bin_id + obin_dict[cur_bin_id] = obin + + if (cur_bin_id > min_bin_id): + obin_tmp.bin_rank = cur_bin_id - 1 + obin.reverse_bin_edge = obin_tmp.ns_term() # string value + if (cur_bin_id < max_bin_id): + obin_tmp.bin_rank = cur_bin_id + 1 + obin.forward_bin_edge = obin_tmp.ns_term() # string value + + ocomp.bins.append(obin) + + cell_counter = cell_counter + 1 + ocell = ontology.Cell() + ocell.id = cell_counter + ocell.path_id = schematic.path_names[bin.path_id] # saved in the populate_component_matrix + ocell.inversion_percent = bin.inversion + ocell.position_percent = bin.position + + # todo: are begin,end the real bin_ids or the compressed ones? a sparse list sense + cell_ns = URIRef("{0}/".format(ocell.path_id)) + for be in range(0, len(bin.nucleotide_ranges), 2): + begin, end = bin.nucleotide_ranges[be], bin.nucleotide_ranges[be + 1] + real_begin = begin if begin else end + real_end = end if end else begin + + oregion = ontology.Region() + oregion.begin = real_begin + oregion.end = real_end + ocell.cell_region.append(oregion) + + path = schematic.path_names[bin.path_id] + oposition_begin = ontology.Position(real_begin, begin < end, path, cell_ns) + oposition_end = ontology.Position(real_end, begin < end, path, cell_ns) + oposition_dict[oposition_begin.ns_term()] = oposition_begin + oposition_dict[oposition_end.ns_term()] = oposition_end + + obin.cells.append(ocell) + + # links between components and their bins + LOGGER.info(f"Bin dictionary {obin_dict}") + + olink_dict = {} + link_counter = 0 + for component in schematic.components: + # search in all arrivals component <-> component links; departures are iterated automatically + # every link from departures will be in some other arrival + for link in component.departures: + if len(link.participants): + link_counter = link_counter + 1 + olink = ontology.Link() + olink.id = link_counter + olink_dict[link_counter] = olink + + link_counter = 0 + for component in schematic.components: + # search in all arrivals component <-> component links; departures are iterated automatically + # every link from departures will be in some other arrival + for link in component.departures: + if len(link.participants): + link_counter = link_counter + 1 + olink = olink_dict[link_counter] + + if link.upstream in obin_dict: + from_bin = obin_dict[link.upstream] + olink.departure = from_bin.ns_term() + else: + LOGGER.info(f"No upstream {link.upstream}") + + if link.downstream in obin_dict: + to_bin = obin_dict[link.downstream] + olink.arrival = to_bin.ns_term() + else: + LOGGER.info(f"No downstream {link.downstream}") + + olink.paths = [schematic.path_names[k] for k in link.participants] + olink.linkZoomLevel = zoom_level.ns_term() + zoom_level.links.append(olink) + + g = Graph() + vg = Namespace('http://biohackathon.org/resource/vg#') + faldo = Namespace('http://biohackathon.org/resource/faldo#') + g.bind('vg', vg) + g.bind('faldo', faldo) + + # here the magic happens + zoom_level.add_to_graph(g, vg, faldo) + for oposition in oposition_dict.values(): + oposition.add_to_graph(g, vg, faldo) + for path in schematic.path_names: + ontology.Path(path).add_to_graph(g, vg, faldo) + + # format='nt' works 10x faster than 'turtle'; the produced files are 3x bigger + # do not forget to compress them afterwards - gives 15x diskspace reduction + g.serialize(destination=ontology_output_path, format='nt') + with open(ontology_output_path, 'rb') as fin: + with gzip.open(ontology_output_path + '.gz', 'wb') as fout: + shutil.copyfileobj(fin, fout) + fout.close() + os.remove(ontology_output_path) + + @dataclass class PangenomeSchematic: json_version: int @@ -37,14 +190,7 @@ def dumper(obj): ranges.append([flat_ranges[i], flat_ranges[i+1]]) return [obj.coverage, obj.inversion, ranges] if isinstance(obj, LinkColumn): - # todo: get rid of this once the JS side can work with sparse containers - if self.json_version <= 14: - bools = [False] * len(self.path_names) - for i in obj.participants: - bools[i] = True - return {'upstream':obj.upstream, 'downstream':obj.downstream, 'participants':bools} - else: - return {'upstream':obj.upstream, 'downstream':obj.downstream, 'participants':obj.participants.tolist()} + return {'upstream': obj.upstream, 'downstream': obj.downstream, 'participants': obj.participants.tolist()} if isinstance(obj, set): return list(obj) try: @@ -68,28 +214,7 @@ def update_first_last_bin(self): self.first_bin = 1 # these have not been properly initialized self.last_bin = self.components[-1].last_bin - def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_links): - # todo: get rid of this once the JS side can work with sparse containers - if self.json_version <= 14: - empty = [] - for comp in self.components: - bools = [False] * len(self.path_names) - for i in comp.occupants: - bools[i] = True - comp.occupants = bools - - matrix = [empty] * len(self.path_names) - fb, lb = comp.first_bin, comp.last_bin - for item in comp.matrix: - padded = [empty] * (lb - fb + 1) - sliced = item[1] - for id, val in zip(sliced[0], sliced[1]): - padded[id] = val - matrix[item[0]] = padded - - comp.matrix = matrix - - + def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_links, ontology_folder, parallel): """Splits one Schematic into multiple files with their own unique first and last_bin based on the volume of data desired per file specified by cells_per_file. """ @@ -137,6 +262,22 @@ def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_li c = folder.joinpath(schematic.fasta_filename(i)) write_contigs_to_file(c, chunk) + if ontology_folder: + # generator for the pairs (Schematics, path) - will be instantiated on the item access! + prepared_schematics = ( (PangenomeSchematic(JSON_VERSION, self.bin_width, self.components[cut:cut_points[i + 1]][0].first_bin, + self.components[cut:cut_points[i + 1]][-1].last_bin, self.includes_connectors, + self.components[cut:cut_points[i + 1]], self.path_names, + self.total_nr_files, self.pangenome_length), + str(ontology_folder.joinpath(self.ttl_filename(i))) ) + for i, cut in enumerate(cut_points[:-1]) if self.components[cut:cut_points[i + 1]] ) + + #TODO() Now parallel is disabled because links overlap across partial pangenomic schematic occurs, though they cannot rescue because the communication across threads is not implemented yet. + + #if parallel: + # results = parallel(delayed(write_rdf)(sch, path) for (sch, path) in prepared_schematics) + #else: + write_rdf(self, str(ontology_folder.joinpath(self.ttl_filename(0)))) + return bin2file_mapping def find_cut_points_in_file_split(self, columns_per_file, column_counts): @@ -154,12 +295,7 @@ def find_cut_points_in_file_split(self, columns_per_file, column_counts): def lazy_average_occupants(self): """grab four random components and check how many occupants they have""" samples = [self.components[int(len(self.components) * (perc/100))] for perc in range(1, 99)] - - # todo: get rid of this once the JS side can work with sparse containers - if self.json_version <= 14: - avg_paths = mean([sum(x.occupants) for x in samples]) - else: - avg_paths = mean([len(x.occupants) for x in samples]) + avg_paths = mean([len(x.occupants) for x in samples]) return avg_paths @@ -172,6 +308,9 @@ def filename(self, nth_file): def fasta_filename(self, nth_file): return f'seq_chunk{self.pad_file_nr(nth_file)}_bin{self.bin_width}.fa' + def ttl_filename(self, nth_file): + return f'seq_chunk{self.pad_file_nr(nth_file)}_bin{self.bin_width}.nt' + def write_index_file(self, folder, bin2file_mapping): file_contents = {'bin_width': self.bin_width, diff --git a/matrixcomponent/matrix.py b/matrixcomponent/matrix.py index 0c81c00..b2ba9e9 100644 --- a/matrixcomponent/matrix.py +++ b/matrixcomponent/matrix.py @@ -12,6 +12,8 @@ class Bin(recordclass.dataobject): coverage: float inversion: float nucleotide_ranges: 'numpy.array' # List[List[int]] is encoded as a Numpy flat array - this saves memory + path_id: int + position: float ## Path is all for input files diff --git a/matrixcomponent/ontology.py b/matrixcomponent/ontology.py new file mode 100644 index 0000000..d9e7301 --- /dev/null +++ b/matrixcomponent/ontology.py @@ -0,0 +1,258 @@ +from typing import List +from rdflib import Namespace, Graph, Literal, URIRef, RDF, XSD + + +class Path: + path: str + + def __init__(self, path): + self.path = path + + def ns_term(self): + return "path/{0}".format(self.path) # path1 + + def add_to_graph(self, graph: Graph, vg: Namespace, faldo: Namespace) -> None: + path = URIRef(self.ns_term()) # str representation + + # add the object itself + graph.add((path, RDF.type, vg.Path)) + + +class Position: + id: int + is_forward: bool + path: str + ns: URIRef + + def __init__(self, id, is_forward, path, ns): + self.id = id + self.is_forward = is_forward + self.path = path + self.ns = ns + + def ns_term(self): + return self.ns + "{0}".format(str(self.id)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + position = self.ns_term() # str representation + + # add the object itself + graph.add((position, RDF.type, faldo.ExactPosition)) + if (self.is_forward): + graph.add((position, RDF.type, faldo.ForwardStrandPosition)) + else: + graph.add((position, RDF.type, faldo.ReverseStrandPosition)) + + # add its properties, recursively if needed + graph.add((position, faldo.position, Literal(self.id))) + graph.add((position, faldo.reference, URIRef(self.path))) + + +class Region: + ns: URIRef + begin: int + end: int + + def ns_term(self): + return self.ns + "{0}-{1}".format(str(self.begin), str(self.end)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + region = self.ns_term() # str representation + + # add the object itself + graph.add((region, RDF.type, faldo.Region)) + + # add its properties, recursively if needed + real_begin = self.begin if self.begin else self.end + real_end = self.end if self.end else self.begin + graph.add((region, faldo.begin, self.ns + str(real_begin))) + graph.add((region, faldo.end, self.ns + str(real_end))) + + +class Cell: + id: int + path_id: str + ns: URIRef + position_percent: float + inversion_percent: float + cell_region: List[Region] # [[1,4], [7,11]] + + def __init__(self): + self.cell_region = [] + + def ns_term(self): + return self.ns + "cell{0}/path/".format(str(self.path_id)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + cell = self.ns_term() + inner_ns = URIRef("{0}/path/".format(self.path_id)) + + # add the object itself + graph.add((cell, RDF.type, vg.Cell)) + + # add its properties, recursively if needed + graph.add((cell, vg.positionPercent, Literal(self.position_percent, datatype=XSD.double))) + graph.add((cell, vg.inversionPercent, Literal(self.inversion_percent, datatype=XSD.double))) + for region in self.cell_region: + region.ns = inner_ns + graph.add((cell, vg.cellRegions, region.ns_term())) # can have multiple regions + region.add_to_graph(graph, vg, faldo) + + +class Bin: + ns: URIRef + bin_rank: int + forward_bin_edge: str + reverse_bin_edge: str + cells: List[Region] + + def __init__(self): + self.cells = [] + self.forward_bin_edge = '' + self.reverse_bin_edge = '' + + def ns_term(self): + return self.ns + "bin{0}".format(str(self.bin_rank)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + bin = self.ns_term() + inner_ns = bin + "/" + + # add the object itself + graph.add((bin, RDF.type, vg.Bin)) + + # add its properties, recursively if needed + graph.add((bin, vg.binRank, Literal(self.bin_rank))) + for cell in self.cells: + cell.ns = inner_ns + graph.add((bin, vg.cells, cell.ns_term())) # can have multiple bins + cell.add_to_graph(graph, vg, faldo) + + # add the reference to another object if needed + if self.forward_bin_edge: + graph.add((bin, vg.forwardBinEdge, URIRef(self.forward_bin_edge))) + + if self.reverse_bin_edge: + graph.add((bin, vg.reverseBinEdge, URIRef(self.reverse_bin_edge))) + + +class Link: + id: int + ns: URIRef + arrival: str + departure: str + paths: List[str] + forward_link_edge: int + reverse_link_edge: int + linkZoomLevel: str + + def __init__(self): + self.paths = [] + self.forward_link_edge = -1 + self.reverse_link_edge = -1 + self.arrival = '' + self.departure = '' + self.linkZoomLevel = '' + + def ns_term(self): + return self.ns + "link{0}".format(str(self.id)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + link = self.ns_term() + + # add the object itself + graph.add((link, RDF.type, vg.Link)) + + # add its properties, recursively if needed + graph.add((link, vg.linkRank, Literal(self.id))) + + if self.linkZoomLevel: + graph.add((link, vg.linkZoomLevel, URIRef(self.linkZoomLevel))) + # assert self.arrival and self.departure + if self.arrival: + graph.add((link, vg.arrival, URIRef(self.arrival))) + + if self.departure: + graph.add((link, vg.departure, URIRef(self.departure))) + + for path in self.paths: + graph.add((link, vg.linkPaths, URIRef(path))) # can have multiple bins + + # add the reference to another object if needed + if self.forward_link_edge > -1: + graph.add((link, vg.forwardLinkEdge, "{0}link{1}".format(self.ns, str(self.forward_link_edge)))) + + if self.reverse_link_edge > -1: + graph.add((link, vg.reverseLinkEdge, "{0}link{1}".format(self.ns, str(self.reverse_link_edge)))) + + +class Component: + id: int + ns: URIRef + forward_component_edge: str # id of the next Component + reverse_component_edge: str # id of the previous Component + component_rank: int + bins: List[Bin] + + def __init__(self, id): + self.id = id + self.bins = [] + self.forward_component_edge = '' + self.reverse_component_edge = '' + + def ns_term(self): + return self.ns + "component{0}".format(str(self.id)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + component = self.ns_term() + inner_ns = component + "/" + + # add the object itself + graph.add((component, RDF.type, vg.Component)) + + # add its properties, recursively if needed + graph.add((component, vg.componentRank, Literal(self.id))) + for bin in self.bins: + bin.ns = inner_ns + graph.add((component, vg.bins, bin.ns_term())) # can have multiple bins + bin.add_to_graph(graph, vg, faldo) # add the inner content of each + + # add the reference to another object if needed + if self.forward_component_edge: + graph.add((component, vg.forwardComponentEdge, URIRef(self.forward_component_edge))) + + if self.reverse_component_edge: + graph.add((component, vg.reverseComponentEdge, URIRef(self.reverse_component_edge))) + + +class ZoomLevel: + id: str + ns: URIRef + zoom_factor: int + components: List[Component] + links: List[Link] + + def __init__(self): + self.components = [] + self.links = [] + + def ns_term(self): + return self.ns + "zoom{0}".format(str(self.zoom_factor)) + + def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None: + zoomfactor = self.ns_term() + inner_ns = zoomfactor + "/" + + # add the object itself + graph.add((zoomfactor, RDF.type, vg.ZoomLevel)) + + # add its properties, recursively if needed + graph.add((zoomfactor, vg.zoomFactor, Literal(self.zoom_factor))) + for i,comp in enumerate(self.components): + comp.ns = inner_ns + graph.add((zoomfactor, vg.components, comp.ns_term())) + comp.add_to_graph(graph, vg, faldo) + + for link in self.links: + link.ns = inner_ns + link.add_to_graph(graph, vg, faldo) diff --git a/queries/selectBins1To5OfZoomlevel1.rq b/queries/selectBins1To5OfZoomlevel1.rq new file mode 100644 index 0000000..a5c1f5c --- /dev/null +++ b/queries/selectBins1To5OfZoomlevel1.rq @@ -0,0 +1,23 @@ +PREFIX vg: +PREFIX faldo: +PREFIX rdfs: + +SELECT ?cell ?cellregion ?inversionpercent ?positionpercent ?beginpos ?endpos ?path +WHERE { + ?zoomlevel a vg:ZoomLevel; + vg:components ?components; + vg:zoomFactor ?zoomfactor . + FILTER(?zoomfactor = 1) + ?components vg:bins ?bin . + ?bin vg:cells ?cell; + vg:binRank ?binrank . + FILTER(?binrank < 6 && ?binrank > 0) + ?cell vg:cellRegions ?faldoregion; + vg:inversionPercent ?inversionpercent; + vg:positionPercent ?positionpercent . + ?faldoregion faldo:begin ?begin; + faldo:end ?end . + ?begin faldo:position ?beginpos . + ?end faldo:position ?endpos . + ?begin faldo:reference ?reference . +} \ No newline at end of file diff --git a/queries/selectLinksFilterBins1To5OfZoomLevel1.rq b/queries/selectLinksFilterBins1To5OfZoomLevel1.rq new file mode 100644 index 0000000..f76b6a3 --- /dev/null +++ b/queries/selectLinksFilterBins1To5OfZoomLevel1.rq @@ -0,0 +1,18 @@ +PREFIX vg: +PREFIX faldo: +PREFIX rdfs: + +SELECT ?link ?path ?arrivalbinrank ?departurebinrank ?zoomfactor +WHERE { + ?link a vg:Link; + vg:arrival ?arrivalbin; + vg:departure ?departurebin; + vg:linkPaths ?path ; + vg:linkZoomLevel ?zoomlevel . + ?arrivalbin vg:binRank ?arrivalbinrank . + ?departurebin vg:binRank ?departurebinrank . + FILTER((?arrivalbinrank < 6 && ?arrivalbinrank > 0) + || (?departurebinrank < 6 && ?departurebinrank > 0)) + ?zoomlevel vg:zoomFactor ?zoomfactor . + FILTER(?zoomfactor = 1) +} \ No newline at end of file diff --git a/queries/selectPangenomeSeq1To5.rq b/queries/selectPangenomeSeq1To5.rq new file mode 100644 index 0000000..4159e0a --- /dev/null +++ b/queries/selectPangenomeSeq1To5.rq @@ -0,0 +1,11 @@ +PREFIX vg: +PREFIX rdf: + +SELECT (SUBSTR(group_concat(?sequence; separator=''), 1,5) as ?panSeq) { + SELECT + * + WHERE {?s a vg:Node; + rdf:value ?sequence . + } + ORDER BY ?s +} diff --git a/requirements.txt b/requirements.txt index a40f53d..f2ac401 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ DNASkittleUtils==1.0.13 numpy==1.18.2 pytest==5.4.1 sortedcontainers==2.1.0 -joblib==0.14.1 +joblib==0.1 numba==0.48.0 psutil==5.7.0 -recordclass==0.13.2 \ No newline at end of file +recordclass==0.13.2 diff --git a/segmentation.py b/segmentation.py index 8eacfda..b5f3e24 100644 --- a/segmentation.py +++ b/segmentation.py @@ -65,6 +65,7 @@ def populate_component_matrix(paths: List[Path], schematic: PangenomeSchematic): # this case enforces first_bin == last_bin --- comp.matrix[p] has a single element for comp, fr in zip(comp_filtered, from_filtered): bin = values[fr] + bin.path_id = p # save for later comp.matrix.append([p, [[0], [bin]]]) if bin.coverage > 0.1: comp.occupants.add(p) @@ -83,6 +84,8 @@ def populate_component_matrix(paths: List[Path], schematic: PangenomeSchematic): sliced = values[fr:to] ids = [bin.bin_id - fb for bin in sliced] comp.matrix.append([p,[ids, sliced]]) + for bin in values[fr:to]: + bin.path_id = p # save for later if any([bin.coverage > 0.1 for bin in sliced]): comp.occupants.add(p) @@ -279,14 +282,17 @@ def _split_lines(self, text, width): return argparse.HelpFormatter._split_lines(self, text, width) -def write_files(folder, odgi_fasta: Path, schematic: PangenomeSchematic, no_adjacent_links): +def write_files(folder, ontology_folder, odgi_fasta: Path, schematic: PangenomeSchematic, no_adjacent_links, parallel): os.makedirs(folder, exist_ok=True) # make directory for all files + if ontology_folder: + os.makedirs(ontology_folder, exist_ok=True) fasta = None if odgi_fasta: fasta = read_contigs(odgi_fasta)[0] - bin2file_mapping = schematic.split_and_write(args.cells_per_file, folder, fasta, no_adjacent_links) + bin2file_mapping = schematic.split_and_write(args.cells_per_file, folder, fasta, no_adjacent_links, + ontology_folder, parallel) schematic.write_index_file(folder, bin2file_mapping) @@ -343,6 +349,12 @@ def get_arguments(): action='store_true', help='Switches off the add_adjacent_connector_column() routine)') + parser.add_argument('-t', '--do-ttl', + dest='do_ttl', + default=False, + action='store_true', + help='do the ontology turtle output or not)') + args = parser.parse_args() # file path logic for single or list of files with wildcard * @@ -391,7 +403,12 @@ def main(): # this one spits out json and optionally other output files (fasta, ttl) path_name = str(bin_width) folder_path = osPath(args.output_folder).joinpath(path_name) # full path - write_files(folder_path, args.fasta, schematic, args.no_adjacent_links) + + ontology_folder_path = None + if args.do_ttl: + ontology_folder_path = osPath(args.output_folder).joinpath(path_name + '-rdf') + + write_files(folder_path, ontology_folder_path, args.fasta, schematic, args.no_adjacent_links, parallel) LOGGER.info("Finished processing the file " + json_file)