diff --git a/data/chrk_ath_12samples_10kb.w100000_S.json b/data/chrk_ath_12samples_10kb.w100000_S.json
index 3f89fd3..94b34f5 100644
--- a/data/chrk_ath_12samples_10kb.w100000_S.json
+++ b/data/chrk_ath_12samples_10kb.w100000_S.json
@@ -1,4 +1,4 @@
-{"odgi_version": 10,"bin_width": 100000,"pangenome_length": 210026186}
+{"odgi_version": 12,"bin_width": 100000,"pangenome_length": 210026186}
 {"bin_id":1}
 {"bin_id":2}
 {"bin_id":3}
diff --git a/matrixcomponent/JSONparser.py b/matrixcomponent/JSONparser.py
index 5b469e7..51ed15b 100644
--- a/matrixcomponent/JSONparser.py
+++ b/matrixcomponent/JSONparser.py
@@ -41,7 +41,7 @@ def process_path(line=None):
             for r in ranges:
                 compressed_ranges.extend(r)
 
-            bin = matrix.Bin(b[0], b[1], b[2], compressed_ranges)
+            bin = matrix.Bin(b[0], b[1], b[2], compressed_ranges, 0, b[3])
             p.bins.setdefault(bin.bin_id, bin)
 
         # do the major part of the segmentation.find_dividers() method
diff --git a/matrixcomponent/PangenomeSchematic.py b/matrixcomponent/PangenomeSchematic.py
index 13e77f7..dda5ff1 100644
--- a/matrixcomponent/PangenomeSchematic.py
+++ b/matrixcomponent/PangenomeSchematic.py
@@ -1,4 +1,9 @@
+import gzip
 import json
+import logging
+import os
+import shutil
+
 from collections import OrderedDict
 from statistics import mean
 
@@ -10,11 +15,159 @@
 
 from dataclasses import dataclass
 
-from matrixcomponent import JSON_VERSION
+from joblib import delayed
+from rdflib import URIRef, Graph, Namespace
+
+from matrixcomponent import JSON_VERSION, ontology
 from matrixcomponent.matrix import Component, Bin, LinkColumn
 
 from DNASkittleUtils.Contigs import Contig, write_contigs_to_file
 
+LOGGER = logging.getLogger(__name__)
+"""logging.Logger: The logger for this module"""
+
+# move out of the class to be able to use with joblib
+def write_rdf(schematic, ontology_output_path):
+    zoom_level = ontology.ZoomLevel()
+    zoom_level.zoom_factor = schematic.bin_width
+    zoom_level.ns = URIRef('http://example.org/vg/')
+
+    prev_comp_id = -1
+    cell_counter = 0
+    ocomp_dict = {}
+    obin_dict = {}
+    oposition_dict = {}
+
+    min_bin_id = schematic.first_bin
+    max_bin_id = schematic.last_bin
+
+    for ic, component in enumerate(schematic.components):
+        ocomp = ontology.Component(ic + 1)
+        ocomp.ns = zoom_level.ns_term() + '/'
+        zoom_level.components.append(ocomp)
+
+        # save the sequence 1-2-3-..-n as a bi-directed list
+        if prev_comp_id in ocomp_dict:
+            prev_comp = ocomp_dict[prev_comp_id]
+            ocomp.reverse_component_edge = prev_comp.ns_term()
+            prev_comp.forward_component_edge = ocomp.ns_term()
+
+        ocomp_dict[ic] = ocomp
+        prev_comp_id = ic
+
+        obin_ns = ocomp.ns_term() + '/'
+        obin_tmp = ontology.Bin()
+        obin_tmp.ns = obin_ns
+
+        # bins
+        for bins in component.matrix:
+            for bin in bins[1][1]:  # follow the compressed format
+                if bin:
+                    cur_bin_id = bin.bin_id
+                    obin = ontology.Bin()
+                    obin.ns = obin_ns
+                    obin.bin_rank = cur_bin_id
+                    obin_dict[cur_bin_id] = obin
+
+                    if (cur_bin_id > min_bin_id):
+                        obin_tmp.bin_rank = cur_bin_id - 1
+                        obin.reverse_bin_edge = obin_tmp.ns_term()  # string value
+                    if (cur_bin_id < max_bin_id):
+                        obin_tmp.bin_rank = cur_bin_id + 1
+                        obin.forward_bin_edge = obin_tmp.ns_term()  # string value
+
+                    ocomp.bins.append(obin)
+
+                    cell_counter = cell_counter + 1
+                    ocell = ontology.Cell()
+                    ocell.id = cell_counter
+                    ocell.path_id = schematic.path_names[bin.path_id]  # saved in the populate_component_matrix
+                    ocell.inversion_percent = bin.inversion
+                    ocell.position_percent = bin.position
+
+                    # todo: are begin,end the real bin_ids or the compressed ones? a sparse list sense
+                    cell_ns = URIRef("{0}/".format(ocell.path_id))
+                    for be in range(0, len(bin.nucleotide_ranges), 2):
+                        begin, end = bin.nucleotide_ranges[be], bin.nucleotide_ranges[be + 1]
+                        real_begin = begin if begin else end
+                        real_end = end if end else begin
+
+                        oregion = ontology.Region()
+                        oregion.begin = real_begin
+                        oregion.end = real_end
+                        ocell.cell_region.append(oregion)
+
+                        path = schematic.path_names[bin.path_id]
+                        oposition_begin = ontology.Position(real_begin, begin < end, path, cell_ns)
+                        oposition_end = ontology.Position(real_end, begin < end, path, cell_ns)
+                        oposition_dict[oposition_begin.ns_term()] = oposition_begin
+                        oposition_dict[oposition_end.ns_term()] = oposition_end
+
+                    obin.cells.append(ocell)
+
+    # links between components and their bins
+    LOGGER.info(f"Bin dictionary {obin_dict}")
+
+    olink_dict = {}
+    link_counter = 0
+    for component in schematic.components:
+        # search in all arrivals component <-> component links; departures are iterated automatically
+        # every link from departures will be in some other arrival
+        for link in component.departures:
+            if len(link.participants):
+                link_counter = link_counter + 1
+                olink = ontology.Link()
+                olink.id = link_counter
+                olink_dict[link_counter] = olink
+
+    link_counter = 0
+    for component in schematic.components:
+        # search in all arrivals component <-> component links; departures are iterated automatically
+        # every link from departures will be in some other arrival
+        for link in component.departures:
+            if len(link.participants):
+                link_counter = link_counter + 1
+                olink = olink_dict[link_counter]
+
+                if link.upstream in obin_dict:
+                    from_bin = obin_dict[link.upstream]
+                    olink.departure = from_bin.ns_term()
+                else:
+                    LOGGER.info(f"No upstream {link.upstream}")
+
+                if link.downstream in obin_dict:
+                    to_bin = obin_dict[link.downstream]
+                    olink.arrival = to_bin.ns_term()
+                else:
+                    LOGGER.info(f"No downstream {link.downstream}")
+
+                olink.paths = [schematic.path_names[k] for k in link.participants]
+                olink.linkZoomLevel = zoom_level.ns_term()
+                zoom_level.links.append(olink)
+
+    g = Graph()
+    vg = Namespace('http://biohackathon.org/resource/vg#')
+    faldo = Namespace('http://biohackathon.org/resource/faldo#')
+    g.bind('vg', vg)
+    g.bind('faldo', faldo)
+
+    # here the magic happens
+    zoom_level.add_to_graph(g, vg, faldo)
+    for oposition in oposition_dict.values():
+        oposition.add_to_graph(g, vg, faldo)
+    for path in schematic.path_names:
+        ontology.Path(path).add_to_graph(g, vg, faldo)
+
+    # format='nt' works 10x faster than 'turtle'; the produced files are 3x bigger
+    # do not forget to compress them afterwards - gives 15x diskspace reduction
+    g.serialize(destination=ontology_output_path, format='nt')
+    with open(ontology_output_path, 'rb') as fin:
+        with gzip.open(ontology_output_path + '.gz', 'wb') as fout:
+            shutil.copyfileobj(fin, fout)
+            fout.close()
+    os.remove(ontology_output_path)
+
+
 @dataclass
 class PangenomeSchematic:
     json_version: int
@@ -37,14 +190,7 @@ def dumper(obj):
                     ranges.append([flat_ranges[i], flat_ranges[i+1]])
                 return [obj.coverage, obj.inversion, ranges]
             if isinstance(obj, LinkColumn):
-                # todo: get rid of this once the JS side can work with sparse containers
-                if self.json_version <= 14:
-                    bools = [False] * len(self.path_names)
-                    for i in obj.participants:
-                        bools[i] = True
-                    return {'upstream':obj.upstream, 'downstream':obj.downstream, 'participants':bools}
-                else:
-                    return {'upstream':obj.upstream, 'downstream':obj.downstream, 'participants':obj.participants.tolist()}
+                return {'upstream': obj.upstream, 'downstream': obj.downstream, 'participants': obj.participants.tolist()}
             if isinstance(obj, set):
                 return list(obj)
             try:
@@ -68,28 +214,7 @@ def update_first_last_bin(self):
         self.first_bin = 1  # these have not been properly initialized
         self.last_bin = self.components[-1].last_bin
 
-    def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_links):
-        # todo: get rid of this once the JS side can work with sparse containers
-        if self.json_version <= 14:
-            empty = []
-            for comp in self.components:
-                bools = [False] * len(self.path_names)
-                for i in comp.occupants:
-                    bools[i] = True
-                comp.occupants = bools
-
-                matrix = [empty] * len(self.path_names)
-                fb, lb = comp.first_bin, comp.last_bin
-                for item in comp.matrix:
-                    padded = [empty] * (lb - fb + 1)
-                    sliced = item[1]
-                    for id, val in zip(sliced[0], sliced[1]):
-                        padded[id] = val
-                    matrix[item[0]] = padded
-
-                comp.matrix = matrix
-
-
+    def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_links, ontology_folder, parallel):
         """Splits one Schematic into multiple files with their own
         unique first and last_bin based on the volume of data desired per
         file specified by cells_per_file.  """
@@ -137,6 +262,22 @@ def split_and_write(self, cells_per_file, folder, fasta : Contig, no_adjacent_li
                     c = folder.joinpath(schematic.fasta_filename(i))
                     write_contigs_to_file(c, chunk)
 
+        if ontology_folder:
+            # generator for the pairs (Schematics, path) - will be instantiated on the item access!
+            prepared_schematics = ( (PangenomeSchematic(JSON_VERSION, self.bin_width, self.components[cut:cut_points[i + 1]][0].first_bin,
+                                                   self.components[cut:cut_points[i + 1]][-1].last_bin, self.includes_connectors,
+                                                   self.components[cut:cut_points[i + 1]], self.path_names,
+                                                   self.total_nr_files, self.pangenome_length),
+                                     str(ontology_folder.joinpath(self.ttl_filename(i))) )
+                                   for i, cut in enumerate(cut_points[:-1]) if self.components[cut:cut_points[i + 1]] )
+
+            #TODO() Now parallel is disabled because links overlap across partial pangenomic schematic occurs, though they cannot rescue because the communication across threads is not implemented yet.
+
+            #if parallel:
+            #    results = parallel(delayed(write_rdf)(sch, path) for (sch, path) in prepared_schematics)
+            #else:
+            write_rdf(self, str(ontology_folder.joinpath(self.ttl_filename(0))))
+
         return bin2file_mapping
 
     def find_cut_points_in_file_split(self, columns_per_file, column_counts):
@@ -154,12 +295,7 @@ def find_cut_points_in_file_split(self, columns_per_file, column_counts):
     def lazy_average_occupants(self):
         """grab four random components and check how many occupants they have"""
         samples = [self.components[int(len(self.components) * (perc/100))] for perc in range(1, 99)]
-
-        # todo: get rid of this once the JS side can work with sparse containers
-        if self.json_version <= 14:
-            avg_paths = mean([sum(x.occupants) for x in samples])
-        else:
-            avg_paths = mean([len(x.occupants) for x in samples])
+        avg_paths = mean([len(x.occupants) for x in samples])
 
         return avg_paths
 
@@ -172,6 +308,9 @@ def filename(self, nth_file):
     def fasta_filename(self, nth_file):
         return f'seq_chunk{self.pad_file_nr(nth_file)}_bin{self.bin_width}.fa'
 
+    def ttl_filename(self, nth_file):
+        return f'seq_chunk{self.pad_file_nr(nth_file)}_bin{self.bin_width}.nt'
+
     def write_index_file(self, folder, bin2file_mapping):
 
         file_contents = {'bin_width': self.bin_width,
diff --git a/matrixcomponent/matrix.py b/matrixcomponent/matrix.py
index 0c81c00..b2ba9e9 100644
--- a/matrixcomponent/matrix.py
+++ b/matrixcomponent/matrix.py
@@ -12,6 +12,8 @@ class Bin(recordclass.dataobject):
     coverage: float
     inversion: float
     nucleotide_ranges: 'numpy.array' # List[List[int]] is encoded as a Numpy flat array - this saves memory
+    path_id: int
+    position: float
 
 ## Path is all for input files
 
diff --git a/matrixcomponent/ontology.py b/matrixcomponent/ontology.py
new file mode 100644
index 0000000..d9e7301
--- /dev/null
+++ b/matrixcomponent/ontology.py
@@ -0,0 +1,258 @@
+from typing import List
+from rdflib import Namespace, Graph, Literal, URIRef, RDF, XSD
+
+
+class Path:
+    path: str
+
+    def __init__(self, path):
+        self.path = path
+
+    def ns_term(self):
+        return "path/{0}".format(self.path) # path1
+
+    def add_to_graph(self, graph: Graph, vg: Namespace, faldo: Namespace) -> None:
+        path = URIRef(self.ns_term())  # str representation
+
+        # add the object itself
+        graph.add((path, RDF.type, vg.Path))
+
+
+class Position:
+    id: int
+    is_forward: bool
+    path: str
+    ns: URIRef
+
+    def __init__(self, id, is_forward, path, ns):
+        self.id = id
+        self.is_forward = is_forward
+        self.path = path
+        self.ns = ns
+
+    def ns_term(self):
+        return self.ns + "{0}".format(str(self.id))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        position = self.ns_term()  # str representation
+
+        # add the object itself
+        graph.add((position, RDF.type, faldo.ExactPosition))
+        if (self.is_forward):
+            graph.add((position, RDF.type, faldo.ForwardStrandPosition))
+        else:
+            graph.add((position, RDF.type, faldo.ReverseStrandPosition))
+
+        # add its properties, recursively if needed
+        graph.add((position, faldo.position, Literal(self.id)))
+        graph.add((position, faldo.reference, URIRef(self.path)))
+
+
+class Region:
+    ns: URIRef
+    begin: int
+    end: int
+
+    def ns_term(self):
+        return self.ns + "{0}-{1}".format(str(self.begin), str(self.end))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        region = self.ns_term()  # str representation
+
+        # add the object itself
+        graph.add((region, RDF.type, faldo.Region))
+
+        # add its properties, recursively if needed
+        real_begin = self.begin if self.begin else self.end
+        real_end   = self.end if self.end else self.begin
+        graph.add((region, faldo.begin, self.ns + str(real_begin)))
+        graph.add((region, faldo.end, self.ns + str(real_end)))
+
+
+class Cell:
+    id: int
+    path_id: str
+    ns: URIRef
+    position_percent: float
+    inversion_percent: float
+    cell_region: List[Region] # [[1,4], [7,11]]
+
+    def __init__(self):
+        self.cell_region = []
+
+    def ns_term(self):
+        return self.ns + "cell{0}/path/".format(str(self.path_id))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        cell = self.ns_term()
+        inner_ns = URIRef("{0}/path/".format(self.path_id))
+
+        # add the object itself
+        graph.add((cell, RDF.type, vg.Cell))
+
+        # add its properties, recursively if needed
+        graph.add((cell, vg.positionPercent, Literal(self.position_percent, datatype=XSD.double)))
+        graph.add((cell, vg.inversionPercent, Literal(self.inversion_percent, datatype=XSD.double)))
+        for region in self.cell_region:
+            region.ns = inner_ns
+            graph.add((cell, vg.cellRegions, region.ns_term()))  # can have multiple regions
+            region.add_to_graph(graph, vg, faldo)
+
+
+class Bin:
+    ns: URIRef
+    bin_rank: int
+    forward_bin_edge: str
+    reverse_bin_edge: str
+    cells: List[Region]
+
+    def __init__(self):
+        self.cells = []
+        self.forward_bin_edge = ''
+        self.reverse_bin_edge = ''
+
+    def ns_term(self):
+        return self.ns + "bin{0}".format(str(self.bin_rank))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        bin = self.ns_term()
+        inner_ns = bin + "/"
+
+        # add the object itself
+        graph.add((bin, RDF.type, vg.Bin))
+
+        # add its properties, recursively if needed
+        graph.add((bin, vg.binRank, Literal(self.bin_rank)))
+        for cell in self.cells:
+            cell.ns = inner_ns
+            graph.add((bin, vg.cells, cell.ns_term()))  # can have multiple bins
+            cell.add_to_graph(graph, vg, faldo)
+
+        # add the reference to another object if needed
+        if self.forward_bin_edge:
+            graph.add((bin, vg.forwardBinEdge, URIRef(self.forward_bin_edge)))
+
+        if self.reverse_bin_edge:
+            graph.add((bin, vg.reverseBinEdge, URIRef(self.reverse_bin_edge)))
+
+
+class Link:
+    id: int
+    ns: URIRef
+    arrival: str
+    departure: str
+    paths: List[str]
+    forward_link_edge: int
+    reverse_link_edge: int
+    linkZoomLevel: str
+
+    def __init__(self):
+        self.paths = []
+        self.forward_link_edge = -1
+        self.reverse_link_edge = -1
+        self.arrival = ''
+        self.departure = ''
+        self.linkZoomLevel = ''
+
+    def ns_term(self):
+        return self.ns + "link{0}".format(str(self.id))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        link = self.ns_term()
+
+        # add the object itself
+        graph.add((link, RDF.type, vg.Link))
+
+        # add its properties, recursively if needed
+        graph.add((link, vg.linkRank, Literal(self.id)))
+
+        if self.linkZoomLevel:
+            graph.add((link, vg.linkZoomLevel, URIRef(self.linkZoomLevel)))
+        # assert self.arrival and self.departure
+        if self.arrival:
+            graph.add((link, vg.arrival, URIRef(self.arrival)))
+
+        if self.departure:
+            graph.add((link, vg.departure, URIRef(self.departure)))
+
+        for path in self.paths:
+            graph.add((link, vg.linkPaths, URIRef(path)))  # can have multiple bins
+
+        # add the reference to another object if needed
+        if self.forward_link_edge > -1:
+            graph.add((link, vg.forwardLinkEdge, "{0}link{1}".format(self.ns, str(self.forward_link_edge))))
+
+        if self.reverse_link_edge > -1:
+            graph.add((link, vg.reverseLinkEdge, "{0}link{1}".format(self.ns, str(self.reverse_link_edge))))
+
+
+class Component:
+    id: int
+    ns: URIRef
+    forward_component_edge: str  # id of the next Component
+    reverse_component_edge: str  # id of the previous Component
+    component_rank: int
+    bins: List[Bin]
+
+    def __init__(self, id):
+        self.id = id
+        self.bins = []
+        self.forward_component_edge = ''
+        self.reverse_component_edge = ''
+
+    def ns_term(self):
+        return self.ns + "component{0}".format(str(self.id))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        component = self.ns_term()
+        inner_ns = component + "/"
+
+        # add the object itself
+        graph.add((component, RDF.type, vg.Component))
+
+        # add its properties, recursively if needed
+        graph.add((component, vg.componentRank, Literal(self.id)))
+        for bin in self.bins:
+            bin.ns = inner_ns
+            graph.add((component, vg.bins, bin.ns_term()))  # can have multiple bins
+            bin.add_to_graph(graph, vg, faldo)  # add the inner content of each
+
+        # add the reference to another object if needed
+        if self.forward_component_edge:
+            graph.add((component, vg.forwardComponentEdge, URIRef(self.forward_component_edge)))
+
+        if self.reverse_component_edge:
+            graph.add((component, vg.reverseComponentEdge, URIRef(self.reverse_component_edge)))
+
+
+class ZoomLevel:
+    id: str
+    ns: URIRef
+    zoom_factor: int
+    components: List[Component]
+    links: List[Link]
+
+    def __init__(self):
+        self.components = []
+        self.links = []
+
+    def ns_term(self):
+        return self.ns + "zoom{0}".format(str(self.zoom_factor))
+
+    def add_to_graph(self, graph: Graph, vg, faldo: Namespace) -> None:
+        zoomfactor = self.ns_term()
+        inner_ns = zoomfactor + "/"
+
+        # add the object itself
+        graph.add((zoomfactor, RDF.type, vg.ZoomLevel))
+
+        # add its properties, recursively if needed
+        graph.add((zoomfactor, vg.zoomFactor, Literal(self.zoom_factor)))
+        for i,comp in enumerate(self.components):
+            comp.ns = inner_ns
+            graph.add((zoomfactor, vg.components, comp.ns_term()))
+            comp.add_to_graph(graph, vg, faldo)
+
+        for link in self.links:
+            link.ns = inner_ns
+            link.add_to_graph(graph, vg, faldo)
diff --git a/queries/selectBins1To5OfZoomlevel1.rq b/queries/selectBins1To5OfZoomlevel1.rq
new file mode 100644
index 0000000..a5c1f5c
--- /dev/null
+++ b/queries/selectBins1To5OfZoomlevel1.rq
@@ -0,0 +1,23 @@
+PREFIX vg: <http://biohackathon.org/resource/vg#>
+PREFIX faldo: <http://biohackathon.org/resource/faldo#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+SELECT ?cell ?cellregion ?inversionpercent ?positionpercent ?beginpos ?endpos ?path
+WHERE {
+        ?zoomlevel a vg:ZoomLevel;
+                vg:components ?components;
+                vg:zoomFactor ?zoomfactor .
+        FILTER(?zoomfactor = 1)
+        ?components vg:bins ?bin .
+        ?bin vg:cells ?cell;
+                vg:binRank ?binrank .
+        FILTER(?binrank < 6 && ?binrank > 0)
+        ?cell vg:cellRegions ?faldoregion;
+                vg:inversionPercent ?inversionpercent;
+                vg:positionPercent ?positionpercent .
+        ?faldoregion faldo:begin ?begin;
+                faldo:end ?end .
+        ?begin faldo:position ?beginpos .
+        ?end faldo:position ?endpos .
+        ?begin faldo:reference ?reference .
+}
\ No newline at end of file
diff --git a/queries/selectLinksFilterBins1To5OfZoomLevel1.rq b/queries/selectLinksFilterBins1To5OfZoomLevel1.rq
new file mode 100644
index 0000000..f76b6a3
--- /dev/null
+++ b/queries/selectLinksFilterBins1To5OfZoomLevel1.rq
@@ -0,0 +1,18 @@
+PREFIX vg: <http://biohackathon.org/resource/vg#>
+PREFIX faldo: <http://biohackathon.org/resource/faldo#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+SELECT ?link ?path ?arrivalbinrank ?departurebinrank ?zoomfactor
+WHERE {
+        ?link a vg:Link;
+                vg:arrival ?arrivalbin;
+                vg:departure ?departurebin;
+                vg:linkPaths ?path ;
+                vg:linkZoomLevel ?zoomlevel .
+        ?arrivalbin vg:binRank ?arrivalbinrank .
+        ?departurebin vg:binRank ?departurebinrank .
+        FILTER((?arrivalbinrank < 6 && ?arrivalbinrank > 0)
+                || (?departurebinrank < 6 && ?departurebinrank > 0))
+        ?zoomlevel vg:zoomFactor ?zoomfactor .
+        FILTER(?zoomfactor = 1)
+}
\ No newline at end of file
diff --git a/queries/selectPangenomeSeq1To5.rq b/queries/selectPangenomeSeq1To5.rq
new file mode 100644
index 0000000..4159e0a
--- /dev/null
+++ b/queries/selectPangenomeSeq1To5.rq
@@ -0,0 +1,11 @@
+PREFIX vg:<http://biohackathon.org/resource/vg#>
+PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+
+SELECT (SUBSTR(group_concat(?sequence; separator=''), 1,5) as ?panSeq) {
+    SELECT
+        *
+    WHERE {?s a vg:Node;
+        rdf:value ?sequence .
+    }
+    ORDER BY ?s
+}
diff --git a/requirements.txt b/requirements.txt
index a40f53d..f2ac401 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ DNASkittleUtils==1.0.13
 numpy==1.18.2
 pytest==5.4.1
 sortedcontainers==2.1.0
-joblib==0.14.1
+joblib==0.1
 numba==0.48.0
 psutil==5.7.0
-recordclass==0.13.2
\ No newline at end of file
+recordclass==0.13.2
diff --git a/segmentation.py b/segmentation.py
index 8eacfda..b5f3e24 100644
--- a/segmentation.py
+++ b/segmentation.py
@@ -65,6 +65,7 @@ def populate_component_matrix(paths: List[Path], schematic: PangenomeSchematic):
             # this case enforces first_bin == last_bin --- comp.matrix[p] has a single element
             for comp, fr in zip(comp_filtered, from_filtered):
                 bin = values[fr]
+                bin.path_id = p  # save for later
                 comp.matrix.append([p, [[0], [bin]]])
                 if bin.coverage > 0.1:
                     comp.occupants.add(p)
@@ -83,6 +84,8 @@ def populate_component_matrix(paths: List[Path], schematic: PangenomeSchematic):
                 sliced = values[fr:to]
                 ids = [bin.bin_id - fb for bin in sliced]
                 comp.matrix.append([p,[ids, sliced]])
+                for bin in values[fr:to]:
+                    bin.path_id = p  # save for later
                 if any([bin.coverage > 0.1 for bin in sliced]):
                     comp.occupants.add(p)
 
@@ -279,14 +282,17 @@ def _split_lines(self, text, width):
         return argparse.HelpFormatter._split_lines(self, text, width)
 
 
-def write_files(folder, odgi_fasta: Path, schematic: PangenomeSchematic, no_adjacent_links):
+def write_files(folder, ontology_folder, odgi_fasta: Path, schematic: PangenomeSchematic, no_adjacent_links, parallel):
     os.makedirs(folder, exist_ok=True)  # make directory for all files
+    if ontology_folder:
+        os.makedirs(ontology_folder, exist_ok=True)
 
     fasta = None
     if odgi_fasta:
         fasta = read_contigs(odgi_fasta)[0]
 
-    bin2file_mapping = schematic.split_and_write(args.cells_per_file, folder, fasta, no_adjacent_links)
+    bin2file_mapping = schematic.split_and_write(args.cells_per_file, folder, fasta, no_adjacent_links,
+                                                 ontology_folder, parallel)
 
     schematic.write_index_file(folder, bin2file_mapping)
 
@@ -343,6 +349,12 @@ def get_arguments():
                         action='store_true',
                         help='Switches off the add_adjacent_connector_column() routine)')
 
+    parser.add_argument('-t', '--do-ttl',
+                        dest='do_ttl',
+                        default=False,
+                        action='store_true',
+                        help='do the ontology turtle output or not)')
+
     args = parser.parse_args()
 
     # file path logic for single or list of files with wildcard *
@@ -391,7 +403,12 @@ def main():
         # this one spits out json and optionally other output files (fasta, ttl)
         path_name = str(bin_width)
         folder_path = osPath(args.output_folder).joinpath(path_name)  # full path
-        write_files(folder_path, args.fasta, schematic, args.no_adjacent_links)
+
+        ontology_folder_path = None
+        if args.do_ttl:
+            ontology_folder_path = osPath(args.output_folder).joinpath(path_name + '-rdf')
+
+        write_files(folder_path, ontology_folder_path, args.fasta, schematic, args.no_adjacent_links, parallel)
 
         LOGGER.info("Finished processing the file " + json_file)