From d34502ec6b8567fd1fc1bc87f1ea84859a99be14 Mon Sep 17 00:00:00 2001 From: cbizon Date: Thu, 13 Jul 2023 12:09:03 -0400 Subject: [PATCH 1/3] fixed some more affects rules --- src/rules/Notes | 5 ++++ .../check_rule_for_direction.py | 0 .../rules_with_types_cleaned_finalized.json | 24 +++++++++++++++---- 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 src/rules/kara_typed_rules/check_rule_for_direction.py diff --git a/src/rules/Notes b/src/rules/Notes index 2e75d00..ace808b 100644 --- a/src/rules/Notes +++ b/src/rules/Notes @@ -9,5 +9,10 @@ The current rules have been created through a somewhat complicated process: * remove subclass rules 5. By hand, I went into kara_typed_rules/rules_with_types_cleaned_finalized.json and changed "biolink:correlated_with" to "biolink:positively_correlated_with" +6. For reasons unclear to me after all this, we still have a bunch of rules of this form: + ?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_biolink:affects ?b + For these I go into hand and edit the rule to be + ?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_biolink:affects ?b + Changing both the "Rule" and the edge in "template" by adding the correct qualifier This process is obviously roundabout and we are going to clarify it in the next round of rule minings. diff --git a/src/rules/kara_typed_rules/check_rule_for_direction.py b/src/rules/kara_typed_rules/check_rule_for_direction.py new file mode 100644 index 0000000..e69de29 diff --git a/src/rules/kara_typed_rules/rules_with_types_cleaned_finalized.json b/src/rules/kara_typed_rules/rules_with_types_cleaned_finalized.json index 468ac16..a013f8b 100644 --- a/src/rules/kara_typed_rules/rules_with_types_cleaned_finalized.json +++ b/src/rules/kara_typed_rules/rules_with_types_cleaned_finalized.json @@ -4863,7 +4863,7 @@ } }, { - "Rule": "?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_biolink:affects ?b", + "Rule": "?e increased_activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_biolink:affects ?b", "Head Coverage": "0.000698966", "Std Confidence": "0.192307692", "PCA Confidence": "0.3125", @@ -4909,6 +4909,10 @@ { "qualifier_type_id": "biolink:object_aspect_qualifier", "qualifier_value": "activity_or_abundance" + }, + { + "qualifier_type_id": "biolink:object_direction_qualifier", + "qualifier_value": "increased" } ] } @@ -7751,7 +7755,7 @@ } }, { - "Rule": "?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a decreased_activity_biolink:affects ?b", + "Rule": "?e decreased_activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a decreased_activity_biolink:affects ?b", "Head Coverage": "0.000052801", "Std Confidence": "0.104395604", "PCA Confidence": "0.154471545", @@ -7797,6 +7801,10 @@ { "qualifier_type_id": "biolink:object_aspect_qualifier", "qualifier_value": "activity_or_abundance" + }, + { + "qualifier_type_id": "biolink:object_direction_qualifier", + "qualifier_value": "decreased" } ] } @@ -8832,7 +8840,7 @@ } }, { - "Rule": "?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_or_abundance_biolink:affects ?b", + "Rule": "?e increased_activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a increased_activity_or_abundance_biolink:affects ?b", "Head Coverage": "0.000584971", "Std Confidence": "0.192307692", "PCA Confidence": "0.3125", @@ -8878,6 +8886,10 @@ { "qualifier_type_id": "biolink:object_aspect_qualifier", "qualifier_value": "activity_or_abundance" + }, + { + "qualifier_type_id": "biolink:object_direction_qualifier", + "qualifier_value": "increased" } ] } @@ -11779,7 +11791,7 @@ } }, { - "Rule": "?e activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a decreased_activity_or_abundance_biolink:affects ?b", + "Rule": "?e decreased_activity_or_abundance_biolink:affects ?b ?e biolink:similar_to ?a => ?a decreased_activity_or_abundance_biolink:affects ?b", "Head Coverage": "0.000051974", "Std Confidence": "0.104395604", "PCA Confidence": "0.154471545", @@ -11825,6 +11837,10 @@ { "qualifier_type_id": "biolink:object_aspect_qualifier", "qualifier_value": "activity_or_abundance" + }, + { + "qualifier_type_id": "biolink:object_direction_qualifier", + "qualifier_value": "decreased" } ] } From 26b97091cf4fa4ecc85d4d238b426c3fcc4b5933 Mon Sep 17 00:00:00 2001 From: cbizon Date: Thu, 13 Jul 2023 12:11:26 -0400 Subject: [PATCH 2/3] some diagnostics --- .../check_rule_for_direction.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/rules/kara_typed_rules/check_rule_for_direction.py b/src/rules/kara_typed_rules/check_rule_for_direction.py index e69de29..84e7d15 100644 --- a/src/rules/kara_typed_rules/check_rule_for_direction.py +++ b/src/rules/kara_typed_rules/check_rule_for_direction.py @@ -0,0 +1,22 @@ +import json + +with open("rules_with_types_cleaned_finalized.json","r") as infile: + rules = json.load(infile) + +for key,ruleset in rules.items(): + print(key) + for rule in ruleset: + trapi = rule["template"] + edges = trapi["query_graph"]["edges"] + for edge_id,edge in edges.items(): + if edge["predicates"][0] == "biolink:affects": + try: + qualifiers = edge["qualifier_constraints"][0]["qualifier_set"] + found = False + for qualifier in qualifiers: + if qualifier["qualifier_type_id"] == "biolink:object_direction_qualifier": + found = True + if not found: + print(" ",rule["Rule"]) + except: + print(" ", rule["Rule"]) \ No newline at end of file From c637a14b419d0349e8319b1edea223e79c76c92e Mon Sep 17 00:00:00 2001 From: cbizon Date: Thu, 13 Jul 2023 12:12:32 -0400 Subject: [PATCH 3/3] bump --- src/service_aggregator.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/service_aggregator.py b/src/service_aggregator.py index 277244c..a243c63 100644 --- a/src/service_aggregator.py +++ b/src/service_aggregator.py @@ -225,7 +225,7 @@ async def post_with_callback(host_url, query, guid, params=None): # check the response status. if post_response.status_code != 200: # queue isn't needed for failed service call - logger.warning(f"Deleting unneeded queue for {guid}") + logger.warning(f"{guid} POST status: {post_response.status_code}. Deleting unneeded queue.") await delete_queue(guid) # if there is an error this will return a type return post_response @@ -337,6 +337,8 @@ async def check_for_messages(guid, num_queries, num_previously_received=0): num_responses += 1 logger.info(f"{guid}: Strider returned {num_responses} out of {num_queries}.") jr = process_message(message) + with open(f"{guid}_{num_responses}.json","w") as outf: + json.dump(jr,outf,indent=2) if is_end_message(jr): logger.info(f"{guid}: Received complete message from multistrider") complete = True @@ -467,7 +469,6 @@ async def subservice_post(name, url, message, guid, asyncquery=False, params=Non if len(response.json()): #pass it through pydantic for validation and cleaning ret_val = await to_jsonable_dict(PDResponse.parse_obj(response.json()).dict(exclude_none = True)) - except Exception as e: status_code = 500 logger.exception(f"{guid}: ARAGORN Exception {e} translating json from post to {name}") @@ -913,6 +914,10 @@ async def robokop_infer(input_message, guid, question_qnode, answer_qnode): max_conns = os.environ.get("MAX_CONNECTIONS", 5) nrules = int(os.environ.get("MAXIMUM_ROBOKOPKG_RULES", 101)) messages = expand_query(input_message, {}, guid) + with open('robokop_infer.txt', 'w') as logfile: + json.dump(input_message, logfile, indent=2) + logfile.write("------\n") + json.dump(messages, logfile, indent=2) lookup_query_graph = messages[0]["message"]["query_graph"] logger.info(f"{guid}: {len(messages)} to send to {automat_url}") result_messages = [] @@ -926,6 +931,7 @@ async def robokop_infer(input_message, guid, question_qnode, answer_qnode): responses = await asyncio.gather(*tasks) + nr = 0 for response in responses: if response.status_code == 200: #Validate and clean @@ -934,6 +940,9 @@ async def robokop_infer(input_message, guid, question_qnode, answer_qnode): num_results = len(rmessage["message"].get("results",[])) logger.info(f"Returned {num_results} results") if num_results > 0 and num_results < 10000: #more than this number of results and you're into noise. + with (open(f"{guid}_r_{nr}.json", 'w')) as outf: + json.dump(rmessage, outf, indent=2) + nr += 1 result_messages.append(rmessage) else: logger.error(f"{guid}: {response.status_code} returned.") @@ -1032,6 +1041,9 @@ async def omnicorp(message, params, guid) -> (dict, int): """ url = f'{os.environ.get("RANKER_URL", "https://aragorn-ranker.renci.org/1.4/")}omnicorp_overlay' + with open("to_corp.json","w") as f: + f.write(json.dumps(message,indent=2)) + rval, omni_status = await subservice_post("omnicorp", url, message, guid) # Omnicorp is not strictly necessary. When we get something other than a 200, @@ -1048,6 +1060,9 @@ async def score(message, params, guid) -> (dict, int): :param guid: :return: """ + with open("to_score.json","w") as f: + f.write(json.dumps(message,indent=2)) + ranker_url = os.environ.get("RANKER_URL", "https://aragorn-ranker.renci.org/1.4/") score_url = f"{ranker_url}score"