Skip to content

Commit

Permalink
final fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ngupta10 committed May 6, 2024
1 parent 7f17935 commit f871294
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ async def process_tokens(self, data: IngestedTokens):
else:
filtered_triples, file = result
modified_data = GPTLLM.remove_items_from_tuples(filtered_triples)
print("Data in GPT------------------------", modified_data[:1])
for entity1, context_json, entity2 in modified_data:
context_data = json.loads(context_json)
context = context_data.get("context", "")
Expand All @@ -352,15 +351,12 @@ async def process_tokens(self, data: IngestedTokens):
relationships.append(output_tuple)
if len(relationships) > 0:
if self.fixed_relationships and self.sample_relationships:
print("Both are settttttttttttttttttttt-----")
embedding_triples = self.create_emb.generate_embeddings(relationships, relationship_finder=True, generate_embeddings_with_fixed_relationship = True)
elif self.sample_relationships:
print("Only Sample Relationships are settttttttttttttttttttttttt-----")
embedding_triples = self.create_emb.generate_embeddings(relationships, relationship_finder=True)
else:
embedding_triples = self.create_emb.generate_embeddings(relationships)
if self.sample_relationships:
print("Going to compute scores------------------------------")
embedding_triples = self.predicate_context_extractor.update_embedding_triples_with_similarity(self.predicate_json_emb, embedding_triples)
for triple in embedding_triples:
if not self.termination_event.is_set():
Expand All @@ -377,9 +373,7 @@ async def process_tokens(self, data: IngestedTokens):
else:
return
except Exception as e:
print("Exception in GPT-----------------------", e)
self.logger.error(f"Invalid {self.__class__.__name__} configuration. Unable to extract predicates using GPT. {e}")
raise Exception(f"An error occurred while extracting predicates using GPT: {e}")

async def process_messages(self, data: IngestedMessages):
raise NotImplementedError
1 change: 0 additions & 1 deletion querent/core/transformers/relationship_extraction_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ def extract_relationships(self, triples, fixed_entities = False):
entity2=predicate.get('entity2_nn_chunk', ''),
entity1_label=predicate.get('entity1_label', ''),
entity2_label=predicate.get('entity2_label', ''))
print("This is the prompt -----------------",query )
answer_relation = self.qa_system.ask_question(prompt=query, llm=self.qa_system.llm, grammar=self.grammar)
try:
choices_text = answer_relation['choices'][0]['text']
Expand Down
2 changes: 1 addition & 1 deletion querent/kg/ner_helperfunctions/ner_llm_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def final_ingested_images_tuples(self, filtered_triples, create_embeddings):
info['predicate_type'] = "has image"
info['context_embeddings'] = create_embeddings.get_embeddings([info['context']])[0]
updated_json = json.dumps(info)
updated_tuple = (entity, updated_json, second_entity)
updated_tuple = (info['entity1_nn_chunk'], updated_json, info['entity2_nn_chunk'])
return updated_tuple

def remove_duplicates(self, data):
Expand Down
Binary file not shown.
File renamed without changes.
54 changes: 27 additions & 27 deletions tests/workflows/bert_llm_test_fixed_entities_predicates_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,33 +57,33 @@
# 'min_samples': 3,
# 'cluster_persistence_threshold':0.2
# }
# # ,fixed_entities = ["university", "greenwood", "liam zheng", "department", "Metroville", "Emily Stanton", "Coach", "health", "training", "atheletes" ]
# # ,sample_entities=["organization", "organization", "person", "department", "city", "person", "person", "method", "method", "person"]
# # ,fixed_relationships=[
# # "Increase in research funding leads to environmental science focus",
# # "Dr. Emily Stanton's advocacy for cleaner energy",
# # "University's commitment to reduce carbon emissions",
# # "Dr. Stanton's research influences architectural plans",
# # "Collaborative project between sociology and environmental sciences",
# # "Student government launches mental health awareness workshops",
# # "Enhanced fitness programs improve sports teams' performance",
# # "Coach Torres influences student-athletes' holistic health",
# # "Partnership expands access to digital resources",
# # "Interdisciplinary approach enriches academic experience"
# # ]
# # , sample_relationships=[
# # "Causal",
# # "Contributory",
# # "Causal",
# # "Influential",
# # "Collaborative",
# # "Initiative",
# # "Beneficial",
# # "Influential",
# # "Collaborative",
# # "Enriching"
# # ],
# # is_confined_search = True,
# ,fixed_entities = ["university", "greenwood", "liam zheng", "department", "Metroville", "Emily Stanton", "Coach", "health", "training", "atheletes" ]
# ,sample_entities=["organization", "organization", "person", "department", "city", "person", "person", "method", "method", "person"]
# ,fixed_relationships=[
# "Increase in research funding leads to environmental science focus",
# "Dr. Emily Stanton's advocacy for cleaner energy",
# "University's commitment to reduce carbon emissions",
# "Dr. Stanton's research influences architectural plans",
# "Collaborative project between sociology and environmental sciences",
# "Student government launches mental health awareness workshops",
# "Enhanced fitness programs improve sports teams' performance",
# "Coach Torres influences student-athletes' holistic health",
# "Partnership expands access to digital resources",
# "Interdisciplinary approach enriches academic experience"
# ]
# , sample_relationships=[
# "Causal",
# "Contributory",
# "Causal",
# "Influential",
# "Collaborative",
# "Initiative",
# "Beneficial",
# "Influential",
# "Collaborative",
# "Enriching"
# ]
# ,is_confined_search = True

# ,user_context="Query: Your task is to analyze and interpret the context to construct semantic triples. The above context is from a university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
# )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@
# "Influential",
# "Collaborative",
# "Enriching"
# ],
# is_confined_search = True,
# ]
# ,is_confined_search = True

# user_context="Your task is to analyze and interpret the context to construct semantic triples. The above context is from a university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
# ,user_context="Your task is to analyze and interpret the context to construct semantic triples. The above context is from a university document along with the identified entities using NER. Identify which entity is the subject entity and which is the object entity based on the context, and determine the meaningful relationship or predicate linking the subject entity to the object entity. Also identify the respective subject entity type , object entity and predicate type. Answer:"
# )
# llm_instance = GPTLLM(result_queue, bert_llm_config)
# class StateChangeCallback(EventCallbackInterface):
Expand Down

0 comments on commit f871294

Please sign in to comment.