final fixes

Querent-ai · May 6, 2024 · f871294 · f871294
1 parent 7f17935
commit f871294
Show file tree

Hide file tree

Showing 7 changed files with 31 additions and 38 deletions.
diff --git a/querent/core/transformers/gpt_llm_bert_ner_or_fixed_entities_set_ner.py b/querent/core/transformers/gpt_llm_bert_ner_or_fixed_entities_set_ner.py
@@ -335,7 +335,6 @@ async def process_tokens(self, data: IngestedTokens):
             else:
                 filtered_triples, file = result
                 modified_data = GPTLLM.remove_items_from_tuples(filtered_triples)
-                print("Data in GPT------------------------", modified_data[:1])
                 for entity1, context_json, entity2 in modified_data:
                     context_data = json.loads(context_json)
                     context = context_data.get("context", "")
@@ -352,15 +351,12 @@ async def process_tokens(self, data: IngestedTokens):
                             relationships.append(output_tuple)
                 if len(relationships) > 0:
                     if self.fixed_relationships and self.sample_relationships:
-                        print("Both are settttttttttttttttttttt-----")
                         embedding_triples = self.create_emb.generate_embeddings(relationships, relationship_finder=True, generate_embeddings_with_fixed_relationship = True)
                     elif self.sample_relationships:
-                        print("Only Sample Relationships are settttttttttttttttttttttttt-----")
                         embedding_triples = self.create_emb.generate_embeddings(relationships, relationship_finder=True)
                     else:
                         embedding_triples = self.create_emb.generate_embeddings(relationships)
                     if self.sample_relationships:
-                        print("Going to compute scores------------------------------")
                         embedding_triples = self.predicate_context_extractor.update_embedding_triples_with_similarity(self.predicate_json_emb, embedding_triples)
                     for triple in embedding_triples:
                         if not self.termination_event.is_set():
@@ -377,9 +373,7 @@ async def process_tokens(self, data: IngestedTokens):
                 else:
                     return
         except Exception as e:
-            print("Exception in GPT-----------------------", e)
             self.logger.error(f"Invalid {self.__class__.__name__} configuration. Unable to extract predicates using GPT. {e}")
-            raise Exception(f"An error occurred while extracting predicates using GPT: {e}")
 
     async def process_messages(self, data: IngestedMessages):
         raise NotImplementedError
diff --git a/querent/core/transformers/relationship_extraction_llm.py b/querent/core/transformers/relationship_extraction_llm.py
@@ -197,7 +197,6 @@ def extract_relationships(self, triples, fixed_entities = False):
                                                 entity2=predicate.get('entity2_nn_chunk', ''),
                                                 entity1_label=predicate.get('entity1_label', ''), 
                                                 entity2_label=predicate.get('entity2_label', ''))
-                print("This is the prompt -----------------",query )
                 answer_relation = self.qa_system.ask_question(prompt=query, llm=self.qa_system.llm, grammar=self.grammar)
                 try:
                     choices_text = answer_relation['choices'][0]['text']

diff --git a/querent/kg/ner_helperfunctions/ner_llm_transformer.py b/querent/kg/ner_helperfunctions/ner_llm_transformer.py
@@ -382,7 +382,7 @@ def final_ingested_images_tuples(self, filtered_triples, create_embeddings):
         info['predicate_type'] = "has image"
         info['context_embeddings'] = create_embeddings.get_embeddings([info['context']])[0]
         updated_json = json.dumps(info)
-        updated_tuple = (entity, updated_json, second_entity)
+        updated_tuple = (info['entity1_nn_chunk'], updated_json, info['entity2_nn_chunk'])
         return updated_tuple
 
     def remove_duplicates(self, data):

diff --git a/tests/data/llm/predicate_checker/image+predicate.pdf b/tests/data/llm/predicate_checker/image+predicate.pdf
diff --git a/tests/data/llm/predicate_checker/test.pdf → tests/data/llm/test.pdf b/tests/data/llm/predicate_checker/test.pdf → tests/data/llm/test.pdf
diff --git a/tests/workflows/bert_llm_test_fixed_entities_predicates_workflow.py b/tests/workflows/bert_llm_test_fixed_entities_predicates_workflow.py
@@ -57,33 +57,33 @@
 #             'min_samples': 3,
 #             'cluster_persistence_threshold':0.2
 #         }
-#     # ,fixed_entities = ["university", "greenwood", "liam zheng", "department", "Metroville", "Emily Stanton", "Coach", "health", "training", "atheletes" ]
-#     # ,sample_entities=["organization", "organization", "person",  "department", "city", "person", "person", "method", "method", "person"]
-# #     ,fixed_relationships=[
-# #     "Increase in research funding leads to environmental science focus",
-# #     "Dr. Emily Stanton's advocacy for cleaner energy",
-# #     "University's commitment to reduce carbon emissions",
-# #     "Dr. Stanton's research influences architectural plans",
-# #     "Collaborative project between sociology and environmental sciences",
-# #     "Student government launches mental health awareness workshops",
-# #     "Enhanced fitness programs improve sports teams' performance",
-# #     "Coach Torres influences student-athletes' holistic health",
-# #     "Partnership expands access to digital resources",
-# #     "Interdisciplinary approach enriches academic experience"
-# # ]
-# #             , sample_relationships=[
-# #     "Causal",
-# #     "Contributory",
-# #     "Causal",
-# #     "Influential",
-# #     "Collaborative",
-# #     "Initiative",
-# #     "Beneficial",
-# #     "Influential",
-# #     "Collaborative",
-# #     "Enriching"
-# # ],  
-#             # is_confined_search = True,
+#     ,fixed_entities = ["university", "greenwood", "liam zheng", "department", "Metroville", "Emily Stanton", "Coach", "health", "training", "atheletes" ]
+#     ,sample_entities=["organization", "organization", "person",  "department", "city", "person", "person", "method", "method", "person"]
+#     ,fixed_relationships=[
+#     "Increase in research funding leads to environmental science focus",
+#     "Dr. Emily Stanton's advocacy for cleaner energy",
+#     "University's commitment to reduce carbon emissions",
+#     "Dr. Stanton's research influences architectural plans",
+#     "Collaborative project between sociology and environmental sciences",
+#     "Student government launches mental health awareness workshops",
+#     "Enhanced fitness programs improve sports teams' performance",
+#     "Coach Torres influences student-athletes' holistic health",
+#     "Partnership expands access to digital resources",
+#     "Interdisciplinary approach enriches academic experience"
+# ]
+#             , sample_relationships=[
+#     "Causal",
+#     "Contributory",
+#     "Causal",
+#     "Influential",
+#     "Collaborative",
+#     "Initiative",
+#     "Beneficial",
+#     "Influential",
+#     "Collaborative",
+#     "Enriching"
+# ] 
+#             ,is_confined_search = True
 
 #     ,user_context="Query: Your task is to analyze and interpret the context to construct semantic triples. The above context is from a  university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
 #     )

diff --git a/tests/workflows/gpt_llm_test_fixed_entities_predicates_workflow.py b/tests/workflows/gpt_llm_test_fixed_entities_predicates_workflow.py
@@ -85,10 +85,10 @@
 #     "Influential",
 #     "Collaborative",
 #     "Enriching"
-# ],  
-#             is_confined_search = True,
+# ]  
+#             ,is_confined_search = True
 
-#     user_context="Your task is to analyze and interpret the context to construct semantic triples. The above context is from a  university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
+#     ,user_context="Your task is to analyze and interpret the context to construct semantic triples. The above context is from a  university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
 #     )
 #     llm_instance = GPTLLM(result_queue, bert_llm_config)
 #     class StateChangeCallback(EventCallbackInterface):