diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 2604ee6ef..e4ac92a34 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1277,10 +1277,8 @@ def _add_edge(edges, src, dest): qdb.sql_connection.TRN.add(sql, [self.id]) sql_edges = qdb.sql_connection.TRN.execute_fetchindex() - lineage = nx.DiGraph() - edges = set() - nodes = {} - if sql_edges: + # helper function to reduce code duplication + def _helper(sql_edges, edges, nodes): for jid, pid, cid in sql_edges: if jid not in nodes: nodes[jid] = ('job', @@ -1291,9 +1289,29 @@ def _add_edge(edges, src, dest): nodes[cid] = ('artifact', qdb.artifact.Artifact(cid)) edges.add((nodes[pid], nodes[jid])) edges.add((nodes[jid], nodes[cid])) + + lineage = nx.DiGraph() + edges = set() + nodes = dict() + extra_edges = set() + extra_nodes = dict() + if sql_edges: + _helper(sql_edges, edges, nodes) else: nodes[self.id] = ('artifact', self) lineage.add_node(nodes[self.id]) + # if this is an Analysis we need to check if there are extra + # edges/nodes as there is a chance that there are connecions + # between them + if self.analysis is not None: + roots = [a for a in self.analysis.artifacts + if not a.parents and a != self] + for r in roots: + # add the root to the options then their children + extra_nodes[r.id] = ('artifact', r) + qdb.sql_connection.TRN.add(sql, [r.id]) + sql_edges = qdb.sql_connection.TRN.execute_fetchindex() + _helper(sql_edges, extra_edges, extra_nodes) # The code above returns all the jobs that have been successfully # executed. We need to add all the jobs that are in all the other @@ -1329,8 +1347,10 @@ def _add_edge(edges, src, dest): # need to check both the input_artifacts and the # pending properties for in_art in n_obj.input_artifacts: - _add_edge(edges, nodes[in_art.id], - nodes[n_obj.id]) + iid = in_art.id + if iid not in nodes and iid in extra_nodes: + nodes[iid] = extra_nodes[iid] + _add_edge(edges, nodes[iid], nodes[n_obj.id]) pending = n_obj.pending for pred_id in pending: diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index b9ba78149..5cd425e23 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1406,6 +1406,42 @@ def test_has_human(self): self.assertTrue(artifact.has_human) + def test_descendants_with_jobs(self): + # let's tests that we can connect two artifacts with different root + # in the same analysis + # 1. make sure there are 3 nodes + a = qdb.artifact.Artifact(8) + self.assertEqual(len(a.descendants_with_jobs.nodes), 3) + self.assertEqual(len(a.analysis.artifacts), 2) + # 2. add a new root and make sure we see it + c = qdb.artifact.Artifact.create( + self.filepaths_root, "BIOM", analysis=a.analysis, + data_type="16S") + self.assertEqual(len(a.analysis.artifacts), 3) + # 3. add jobs conencting the new artifact to the other root + # a -> job -> b + # c + # job1 connects b & c + # job2 connects a & c + cmd = qdb.software.Command.create( + qdb.software.Software(1), + "CommandWithMultipleInputs", "", { + 'input_b': ['artifact:["BIOM"]', None], + 'input_c': ['artifact:["BIOM"]', None]}, {'out': 'BIOM'}) + params = qdb.software.Parameters.load( + cmd, values_dict={'input_b': a.children[0].id, 'input_c': c.id}) + job1 = qdb.processing_job.ProcessingJob.create( + qdb.user.User('test@foo.bar'), params) + params = qdb.software.Parameters.load( + cmd, values_dict={'input_b': a.id, 'input_c': c.id}) + job2 = qdb.processing_job.ProcessingJob.create( + qdb.user.User('test@foo.bar'), params) + + jobs = [j[1] for e in a.descendants_with_jobs.edges + for j in e if j[0] == 'job'] + self.assertIn(job1, jobs) + self.assertIn(job2, jobs) + @qiita_test_checker() class ArtifactArchiveTests(TestCase): diff --git a/qiita_pet/handlers/analysis_handlers/base_handlers.py b/qiita_pet/handlers/analysis_handlers/base_handlers.py index bc3de16d0..bd9c208e2 100644 --- a/qiita_pet/handlers/analysis_handlers/base_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/base_handlers.py @@ -196,7 +196,9 @@ def analyisis_graph_handler_get_request(analysis_id, user): # This should never happen, but worth having a useful message raise ValueError('More than one workflow in a single analysis') - return {'edges': edges, 'nodes': nodes, 'workflow': wf_id, + # the list(set()) is to remove any duplicated nodes + return {'edges': list(set(edges)), 'nodes': list(set(nodes)), + 'workflow': wf_id, 'artifacts_being_deleted': artifacts_being_deleted}