From aa860148ad3e5ab499b7c1536fa50d83ad453ba3 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 10 Jan 2024 13:35:42 -0700 Subject: [PATCH 1/3] fix #3224 --- qiita_db/artifact.py | 23 ++++++++++++++++++---- qiita_db/test/test_artifact.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 2604ee6ef..878a38759 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1277,10 +1277,8 @@ def _add_edge(edges, src, dest): qdb.sql_connection.TRN.add(sql, [self.id]) sql_edges = qdb.sql_connection.TRN.execute_fetchindex() - lineage = nx.DiGraph() - edges = set() - nodes = {} - if sql_edges: + # helper function to reduce code duplication + def _helper(sql_edges, edges, nodes): for jid, pid, cid in sql_edges: if jid not in nodes: nodes[jid] = ('job', @@ -1291,6 +1289,23 @@ def _add_edge(edges, src, dest): nodes[cid] = ('artifact', qdb.artifact.Artifact(cid)) edges.add((nodes[pid], nodes[jid])) edges.add((nodes[jid], nodes[cid])) + + lineage = nx.DiGraph() + edges = set() + nodes = {} + if sql_edges: + _helper(sql_edges, edges, nodes) + # if this is an Analysis, then we need to also check for + # any job/artifact with in the Analysis + if self.analysis is not None: + roots = [a for a in self.analysis.artifacts + if not a.parents and a != self] + for r in roots: + # add the root to the options then their children + nodes[r.id] = ('artifact', r) + qdb.sql_connection.TRN.add(sql, [r.id]) + sql_edges = qdb.sql_connection.TRN.execute_fetchindex() + _helper(sql_edges, edges, nodes) else: nodes[self.id] = ('artifact', self) lineage.add_node(nodes[self.id]) diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index b9ba78149..5cd425e23 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1406,6 +1406,42 @@ def test_has_human(self): self.assertTrue(artifact.has_human) + def test_descendants_with_jobs(self): + # let's tests that we can connect two artifacts with different root + # in the same analysis + # 1. make sure there are 3 nodes + a = qdb.artifact.Artifact(8) + self.assertEqual(len(a.descendants_with_jobs.nodes), 3) + self.assertEqual(len(a.analysis.artifacts), 2) + # 2. add a new root and make sure we see it + c = qdb.artifact.Artifact.create( + self.filepaths_root, "BIOM", analysis=a.analysis, + data_type="16S") + self.assertEqual(len(a.analysis.artifacts), 3) + # 3. add jobs conencting the new artifact to the other root + # a -> job -> b + # c + # job1 connects b & c + # job2 connects a & c + cmd = qdb.software.Command.create( + qdb.software.Software(1), + "CommandWithMultipleInputs", "", { + 'input_b': ['artifact:["BIOM"]', None], + 'input_c': ['artifact:["BIOM"]', None]}, {'out': 'BIOM'}) + params = qdb.software.Parameters.load( + cmd, values_dict={'input_b': a.children[0].id, 'input_c': c.id}) + job1 = qdb.processing_job.ProcessingJob.create( + qdb.user.User('test@foo.bar'), params) + params = qdb.software.Parameters.load( + cmd, values_dict={'input_b': a.id, 'input_c': c.id}) + job2 = qdb.processing_job.ProcessingJob.create( + qdb.user.User('test@foo.bar'), params) + + jobs = [j[1] for e in a.descendants_with_jobs.edges + for j in e if j[0] == 'job'] + self.assertIn(job1, jobs) + self.assertIn(job2, jobs) + @qiita_test_checker() class ArtifactArchiveTests(TestCase): From b31066b568a8908eddb42336b6140c15034e5cd5 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 11 Jan 2024 11:57:32 -0700 Subject: [PATCH 2/3] fixing code --- qiita_db/artifact.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 878a38759..e4ac92a34 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1292,23 +1292,26 @@ def _helper(sql_edges, edges, nodes): lineage = nx.DiGraph() edges = set() - nodes = {} + nodes = dict() + extra_edges = set() + extra_nodes = dict() if sql_edges: _helper(sql_edges, edges, nodes) - # if this is an Analysis, then we need to also check for - # any job/artifact with in the Analysis - if self.analysis is not None: - roots = [a for a in self.analysis.artifacts - if not a.parents and a != self] - for r in roots: - # add the root to the options then their children - nodes[r.id] = ('artifact', r) - qdb.sql_connection.TRN.add(sql, [r.id]) - sql_edges = qdb.sql_connection.TRN.execute_fetchindex() - _helper(sql_edges, edges, nodes) else: nodes[self.id] = ('artifact', self) lineage.add_node(nodes[self.id]) + # if this is an Analysis we need to check if there are extra + # edges/nodes as there is a chance that there are connecions + # between them + if self.analysis is not None: + roots = [a for a in self.analysis.artifacts + if not a.parents and a != self] + for r in roots: + # add the root to the options then their children + extra_nodes[r.id] = ('artifact', r) + qdb.sql_connection.TRN.add(sql, [r.id]) + sql_edges = qdb.sql_connection.TRN.execute_fetchindex() + _helper(sql_edges, extra_edges, extra_nodes) # The code above returns all the jobs that have been successfully # executed. We need to add all the jobs that are in all the other @@ -1344,8 +1347,10 @@ def _helper(sql_edges, edges, nodes): # need to check both the input_artifacts and the # pending properties for in_art in n_obj.input_artifacts: - _add_edge(edges, nodes[in_art.id], - nodes[n_obj.id]) + iid = in_art.id + if iid not in nodes and iid in extra_nodes: + nodes[iid] = extra_nodes[iid] + _add_edge(edges, nodes[iid], nodes[n_obj.id]) pending = n_obj.pending for pred_id in pending: From 4a4a9f0dd60e465ab82cf9dacb1a971e29190121 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 11 Jan 2024 12:49:29 -0700 Subject: [PATCH 3/3] rm duplicated edges/nodes --- qiita_pet/handlers/analysis_handlers/base_handlers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qiita_pet/handlers/analysis_handlers/base_handlers.py b/qiita_pet/handlers/analysis_handlers/base_handlers.py index bc3de16d0..bd9c208e2 100644 --- a/qiita_pet/handlers/analysis_handlers/base_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/base_handlers.py @@ -196,7 +196,9 @@ def analyisis_graph_handler_get_request(analysis_id, user): # This should never happen, but worth having a useful message raise ValueError('More than one workflow in a single analysis') - return {'edges': edges, 'nodes': nodes, 'workflow': wf_id, + # the list(set()) is to remove any duplicated nodes + return {'edges': list(set(edges)), 'nodes': list(set(nodes)), + 'workflow': wf_id, 'artifacts_being_deleted': artifacts_being_deleted}