|
1 | 1 | from copy import deepcopy
|
2 | 2 | from collections import defaultdict
|
3 |
| -import ast, json |
| 3 | +import ast, json, uuid |
4 | 4 | from string import Template
|
5 | 5 |
|
6 |
| -class Opportunity: |
7 |
| - def __init__(self, hash, qg, kg, a_i, ai2kg): |
8 |
| - """ |
9 |
| - Define a coalescent opportunity by a hash (the fixed parts of the answers) |
10 |
| - qg: a (qg_id, semantic type) pair |
11 |
| - kg: kg_ids allowed for the combined answers |
12 |
| - a_i: indices into the results saying which answers are combined to give this |
13 |
| - opportunity |
14 |
| - kg2a_i: dict mapping frozenset of kg -> which answer they appear in. Used to filter. |
15 |
| - """ |
16 |
| - self.answer_hash = hash |
17 |
| - self.qg_id = qg[0] |
18 |
| - self.qg_semantic_type = qg[1] |
19 |
| - self.kg_ids = kg |
20 |
| - self.answer_indices = a_i |
21 |
| - self.answerid2kg = ai2kg |
22 |
| - def get_kg_ids(self): |
23 |
| - return self.kg_ids |
24 |
| - def get_qg_id(self): |
25 |
| - return self.qg_id |
26 |
| - def get_qg_semantic_type(self): |
27 |
| - stype = self.qg_semantic_type |
28 |
| - if isinstance(stype, list): |
29 |
| - stype = stype[0] |
30 |
| - return stype |
31 |
| - def get_answer_indices(self): |
32 |
| - return self.answer_indices |
33 |
| - def filter(self,new_kg_ids): |
34 |
| - """We constructed the opportunities without regard to what nodes we have data on. Now, we want to filter |
35 |
| - this opportunity to only answers where we actually have info about the nodes. |
36 |
| - new_kg_ids is a subset of self.kg_ids. If we have an answer where we don't have all the nodes, we want to |
37 |
| - get rid of that answer, and return a new (filtered) opp. If we get rid of all answers, return None""" |
38 |
| - if len(new_kg_ids) == len(self.kg_ids): |
39 |
| - #No filtering required |
40 |
| - return self |
41 |
| - nkgids = set(new_kg_ids) |
42 |
| - new_ai2kg = {} |
43 |
| - for answer_i, kgs in self.answerid2kg.items(): |
44 |
| - keep = True |
45 |
| - for kg_i in kgs: |
46 |
| - if kg_i not in nkgids: |
47 |
| - keep = False |
48 |
| - break |
49 |
| - if keep: |
50 |
| - new_ai2kg[answer_i] = kgs |
51 |
| - if len(new_ai2kg) == 0: |
52 |
| - return None |
53 |
| - #If we removed any answers, we might also need to remove some kg_ids that weren't explicitly filtered |
54 |
| - final_kg_ids = set() |
55 |
| - for kgi in new_ai2kg.values(): |
56 |
| - final_kg_ids.update(kgi) |
57 |
| - return Opportunity(self.answer_hash, (self.qg_id , self.qg_semantic_type ), list(final_kg_ids), list(new_ai2kg.keys()), new_ai2kg) |
| 6 | +### |
| 7 | +# These classes are used to extract the meaning from the TRAPI MCQ query into a more usable form |
| 8 | +### |
| 9 | + |
| 10 | +# TODO: Handle the case where we are not gifted a category for the group node |
| 11 | +class MCQGroupNode: |
| 12 | + def __init__(self, query_graph): |
| 13 | + for qnode_id, qnode in query_graph["nodes"].items(): |
| 14 | + if qnode.get("set_interpretation", "") == "MANY": |
| 15 | + self.curies = qnode["member_ids"] |
| 16 | + self.qnode_id = qnode_id |
| 17 | + self.uuid = qnode["ids"][0] |
| 18 | + self.semantic_type = qnode["categories"][0] |
| 19 | + |
| 20 | +class MCQEnrichedNode: |
| 21 | + def __init__(self, query_graph): |
| 22 | + for qnode_id, qnode in query_graph["nodes"].items(): |
| 23 | + if qnode.get("set_interpretation", "") != "MANY": |
| 24 | + self.qnode_id = qnode_id |
| 25 | + self.semantic_types = qnode["categories"] |
| 26 | + |
| 27 | +class MCQEdge: |
| 28 | + def __init__(self, query_graph,groupnode_qnodeid): |
| 29 | + for qedge_id, qedge in query_graph["edges"].items(): |
| 30 | + if qedge["subject"] == groupnode_qnodeid: |
| 31 | + self.group_is_subject = True |
| 32 | + else: |
| 33 | + self.group_is_subject = False |
| 34 | + self.qedge_id = qedge_id |
| 35 | + self.predicate_only = qedge.get("predicates",["biolink:related_to"])[0] |
| 36 | + self.predicate = {"predicate": self.predicate_only } |
| 37 | + self.qualifiers = [] |
| 38 | + qualifier_constraints = qedge.get("qualifiers_constraints", []) |
| 39 | + if len(qualifier_constraints) > 0: |
| 40 | + qc = qualifier_constraints[0] |
| 41 | + self.qualifiers = qc.get("qualifier_set", []) |
| 42 | + for q in self.qualifiers: |
| 43 | + self.predicate[q["qualifier_type_id"]] = q["qualifier_value"] |
| 44 | + |
| 45 | +class MCQDefinition: |
| 46 | + def __init__(self,in_message): |
| 47 | + query_graph = in_message["message"]["query_graph"] |
| 48 | + self.group_node = MCQGroupNode(query_graph) |
| 49 | + self.enriched_node = MCQEnrichedNode(query_graph) |
| 50 | + self.edge = MCQEdge(query_graph,self.group_node.qnode_id) |
| 51 | + |
| 52 | + |
| 53 | +### |
| 54 | +# These components are about holding the results of Graph enrichment in a TRAPI independent way |
| 55 | +### |
58 | 56 |
|
59 | 57 | class NewNode:
|
60 |
| - def __init__(self,newnode, newnodetype, edge_pred_and_qual, newnode_is, newnode_name): |
61 |
| - self.newnode = newnode |
| 58 | + def __init__(self, newnode, newnodetype: list[str]): #edge_pred_and_qual, newnode_is): |
| 59 | + self.new_curie = newnode |
62 | 60 | self.newnode_type = newnodetype
|
63 |
| - self.new_edges = edge_pred_and_qual |
64 |
| - self.newnode_is = newnode_is |
65 |
| - self.newnode_name = newnode_name |
| 61 | + self.newnode_name = None |
| 62 | + |
| 63 | +class NewEdge: |
| 64 | + def __init__(self, source, predicate: str, target): |
| 65 | + self.source = source |
| 66 | + self.predicate = predicate |
| 67 | + self.target = target |
| 68 | + def get_prov_link(self): |
| 69 | + return f"{self.source} {self.predicate} {self.target}" |
| 70 | + |
| 71 | + def get_sym_prov_link(self): |
| 72 | + return f"{self.target} {self.predicate} {self.source}" |
| 73 | + |
| 74 | + def add_prov(self,prov): |
| 75 | + self.prov = prov |
| 76 | + |
| 77 | +class Lookup_params: |
| 78 | + def __init__(self, in_message): |
| 79 | + for qedge_id, qedges in in_message.get("message", {}).get("query_graph", {}).get("edges", {}).items(): |
| 80 | + subject = in_message.get("message", {}).get("query_graph", {}).get("nodes", {})[qedges["subject"]] |
| 81 | + object = in_message.get("message", {}).get("query_graph", {}).get("nodes", {})[qedges["object"]] |
| 82 | + if subject.get("ids", []): |
| 83 | + is_source = True |
| 84 | + else: |
| 85 | + is_source = False |
| 86 | + if is_source: |
| 87 | + curies = subject["ids"][0] |
| 88 | + input_qnode = qedges["subject"] |
| 89 | + output_qnode = qedges["object"] |
| 90 | + semantic_type = object.get("categories", [])[0] |
| 91 | + else: |
| 92 | + curie = object["ids"][0] |
| 93 | + input_qnode = qedges["object"] |
| 94 | + output_qnode = qedges["subject"] |
| 95 | + semantic_type = subject.get("categories", [])[0] |
| 96 | + predicate_parts = {"predicate": qedges["predicates"][0]} |
| 97 | + qualifier_constraints = qedges.get("qualifier_constraints", []) |
| 98 | + if len(qualifier_constraints) > 0: |
| 99 | + qc = qualifier_constraints[0] |
| 100 | + qs = qc.get("qualifier_set", []) |
| 101 | + for q in qs: |
| 102 | + predicate_parts[q["qualifier_type_id"].split(":")[1]] = q["qualifier_value"] |
| 103 | + predicate_parts = json.dumps(predicate_parts, sort_keys=True) |
| 104 | + self.is_source = is_source |
| 105 | + self.curie = curie |
| 106 | + self.predicate_parts = predicate_parts |
| 107 | + self.input_qnode = input_qnode |
| 108 | + self.output_qnode = output_qnode |
| 109 | + self.output_semantic_type = semantic_type |
| 110 | + self.qedge_id = qedge_id |
| 111 | + |
| 112 | +class Lookup: |
| 113 | + def __init__(self, curie, predicate, is_source, node_names, node_types, lookup_ids, params_output_semantic_type = []): |
| 114 | + self.predicate = predicate |
| 115 | + self.is_source = is_source |
| 116 | + self.link_ids = lookup_ids |
| 117 | + |
| 118 | + self.add_input_node(curie, node_types) |
| 119 | + self.add_input_node_name(node_names) |
| 120 | + self.add_links(node_names, node_types) |
| 121 | + self.add_linked_edges(curie, is_source) |
| 122 | + |
| 123 | + |
| 124 | + def add_input_node(self, curie, node_types): |
| 125 | + """Optionally, we can patch by adding a new node, which will share a relationship of |
| 126 | + some sort to the curies in self.set_curies. The remaining parameters give the edge_type |
| 127 | + of those edges, as well as defining whether the edge points to the newnode (newnode_is = 'target') |
| 128 | + or away from it (newnode_is = 'source') """ |
| 129 | + self.input_qnode_curie = NewNode(curie, node_types.get(curie, None)) |
| 130 | + def add_input_node_name(self, node_names): |
| 131 | + self.input_qnode_curie.name = node_names.get(self.input_qnode_curie.new_curie, None) |
| 132 | + |
| 133 | + def add_links(self, nodenames, nodetypes): |
| 134 | + self.lookup_links = [Lookup_Links(link_id, nodenames.get(link_id), nodetypes.get(link_id)) for link_id in self.link_ids] |
| 135 | + def add_linked_edges(self, input_node, input_node_is_source): |
| 136 | + """Add edges between the newnode (curie) and the curies that they were linked to""" |
| 137 | + if input_node_is_source: |
| 138 | + for i, new_ids in enumerate(self.link_ids): |
| 139 | + self.lookup_links[i].link_edge = NewEdge(input_node, self.predicate, new_ids) |
| 140 | + else: |
| 141 | + for i, new_ids in enumerate(self.link_ids): |
| 142 | + self.lookup_links[i].link_edge = NewEdge(new_ids, self.predicate, input_node) |
| 143 | + def add_linked_kg_edges_id(self, eid): |
| 144 | + """Add edges between the newnode (curie) and the curies that they were linked to as written in the KG""" |
| 145 | + self.link_kg_edges_ids.append(eid) |
| 146 | + def get_prov_links(self): |
| 147 | + return [link.link_edge.get_prov_link() for link in self.lookup_links] |
| 148 | + |
| 149 | + def add_provenance(self, prov): |
| 150 | + for link in self.lookup_links: |
| 151 | + if prov.get(link.link_edge.get_prov_link()): |
| 152 | + link.link_edge.add_prov(prov[link.link_edge.get_prov_link()]) |
| 153 | + else: |
| 154 | + link.link_edge.add_prov(prov[link.link_edge.get_sym_prov_link()]) |
66 | 155 |
|
67 |
| -class PropertyPatch: |
68 |
| - def __init__(self,qg_id,curies,props,answer_ids): |
69 |
| - self.qg_id = qg_id |
70 |
| - self.set_curies = curies |
71 |
| - self.new_props = props |
72 |
| - self.answer_indices = answer_ids |
73 |
| - self.added_nodes = [] |
| 156 | + def add_enrichment(self, lookup_indices, enriched_node, predicate, is_source, pvalue): |
| 157 | + for index in lookup_indices: |
| 158 | + if hasattr(self.lookup_links[index], 'enrichments'): |
| 159 | + self.lookup_links[index].enrichments.append(Link_enrichment(enriched_node, predicate, is_source, pvalue)) |
| 160 | + else: |
| 161 | + self.lookup_links[index].enrichments= [Link_enrichment(enriched_node, predicate, is_source, pvalue)] |
| 162 | + |
| 163 | +class Lookup_Links: |
| 164 | + def __init__(self, link_id, link_name, link_type): |
| 165 | + self.link_id = link_id |
| 166 | + self.link_name = link_name |
| 167 | + self.link_type = link_type |
| 168 | + |
| 169 | +class Link_enrichment: |
| 170 | + def __init__(self, enriched_node, predicate, is_source,pvalue): |
| 171 | + self.enriched_node = enriched_node |
| 172 | + self.predicate = predicate |
| 173 | + self.is_source = is_source |
| 174 | + self.p_value = pvalue |
| 175 | + |
| 176 | +class Enrichment: |
| 177 | + def __init__(self,p_value,newnode: str, predicate: str, is_source, ndraws, n, total_node_count, curies, node_type: list[str]): |
| 178 | + """Here the curies are the curies that actually link to newnode, not just the input curies.""" |
| 179 | + self.p_value = p_value |
| 180 | + self.linked_curies = curies |
| 181 | + self.enriched_node = None |
| 182 | + self.predicate = predicate |
| 183 | + self.is_source = is_source |
74 | 184 | self.provmap = {}
|
75 |
| - def add_provenance(self,provmap): |
76 |
| - self.provmap = provmap |
77 |
| - def add_extra_node(self,newnode, newnodetype, edge_pred_and_qual, newnode_is,newnode_name): |
| 185 | + self.add_extra_node(newnode, node_type) |
| 186 | + self.add_extra_edges(newnode, predicate, is_source) |
| 187 | + self.counts = [ndraws, n, total_node_count] |
| 188 | + def add_extra_node(self,newnode, newnodetype: list[str]): |
78 | 189 | """Optionally, we can patch by adding a new node, which will share a relationship of
|
79 | 190 | some sort to the curies in self.set_curies. The remaining parameters give the edge_type
|
80 | 191 | of those edges, as well as defining whether the edge points to the newnode (newnode_is = 'target')
|
81 | 192 | or away from it (newnode_is = 'source') """
|
82 |
| - self.added_nodes.append( NewNode(newnode, newnodetype, edge_pred_and_qual, newnode_is, newnode_name) ) |
83 |
| - def apply(self,answers,question,graph,graph_index,patch_no): |
| 193 | + self.enriched_node = NewNode(newnode, newnodetype) |
| 194 | + def add_extra_node_name_and_label(self,name_dict,label_dict): |
| 195 | + self.enriched_node.newnode_name = name_dict.get(self.enriched_node.new_curie, None) |
| 196 | + self.enriched_node.newnode_type = label_dict.get(self.enriched_node.new_curie, []) |
| 197 | + def add_extra_edges(self, newnode, predicate: str, newnode_is_source): |
| 198 | + """Add edges between the newnode (curie) and the curies that they were linked to""" |
| 199 | + if newnode_is_source: |
| 200 | + self.links = [NewEdge(newnode,predicate,curie) for curie in self.linked_curies] |
| 201 | + else: |
| 202 | + self.links = [NewEdge(curie,predicate,newnode) for curie in self.linked_curies] |
| 203 | + def get_prov_links(self): |
| 204 | + return [link.get_prov_link() for link in self.links] |
| 205 | + def add_provenance(self,prov): |
| 206 | + for link in self.links: |
| 207 | + provlink = link.get_prov_link() |
| 208 | + symprovlink = link.get_sym_prov_link() |
| 209 | + if prov.get(provlink): |
| 210 | + link.add_prov(prov[provlink]) |
| 211 | + else: |
| 212 | + link.add_prov(prov[symprovlink]) |
| 213 | + |
| 214 | + #TODO: this should not exist in here any more, we are just making a data class |
| 215 | + def x_apply(self,answers,question,graph,graph_index,patch_no): |
84 | 216 | # Find the answers to combine. It's not necessarily the answer_ids. Those were the
|
85 | 217 | # answers that were originally in the opportunity, but we might have only found commonality
|
86 | 218 | # among a subset of them
|
|
0 commit comments