From 5e0fd47a2b7770854d93128cfadd688796814ca1 Mon Sep 17 00:00:00 2001 From: Lenny Bontenakel Date: Fri, 6 Jan 2023 12:04:56 +0100 Subject: [PATCH] init --- README.md | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..cc193d1 --- /dev/null +++ b/README.md @@ -0,0 +1,189 @@ +# Analysis ML2Grow + + +## poc-ai-airflow-bertopic + +### load (endpoint, query) +Executes the query and loads it into a dataframe + +### retrain +Retrains the model on the topics + +### save-topics +clears topics and then saves them + +```sparql + PREFIX ext: + + DELETE {{ + GRAPH {{ + <{topic_base_uri}> ext:relevant_words ?words; ext:count ?count; ext:topic_label ?label . + + {joined} + }} + }} + WHERE {{ + GRAPH {{ + <{topic_base_uri}> ext:relevant_words ?words; ext:count ?count; ext:topic_label ?label . + + {joined} + }} + }} +``` + +```sparql + PREFIX ext: + + INSERT {{ + GRAPH {{ + + <{topic_base_uri}> a ext:isTopic; ext:relevant_words {", ".join(relevant_word_uri)} ; ext:count {v['count']} ; ext:topic_label \"{v['topic_label']}\" . + + {joined} + }} + }} +``` + +### save-transform +Takes BERTopic output and saves it to a sparql endpoint + + +```sparql + PREFIX ext: + + DELETE{{ + GRAPH {{ + <{record['thing']}> ext:HasTopic ?topic_link ; ext:ingestedByMl2GrowSmartRegulationsTopics ?srt . + ?topic_link ext:TopicURI ?topic_uri ; ext:score ?topic_score . + }} + }} + WHERE {{ + GRAPH {{ + <{record['thing']}> ext:HasTopic ?topic_link ; ext:ingestedByMl2GrowSmartRegulationsTopics ?srt . + ?topic_link ext:TopicURI ?topic_uri ; ext:score ?topic_score . + }} + }} +``` + +```sparql + PREFIX ext: + INSERT {{ + GRAPH {{ + <{record['thing']}> ext:HasTopic <{new_uri}> ; ext:ingestedByMl2GrowSmartRegulationsTopics "1" . + + <{new_uri}> a ext:TopicScore; + ext:TopicURI ; + ext:score {record['probability']} . + }} + }} +``` + + + +### transform + + + + +## poc-ai-bertopic + +initial loading of the model. Using Roberta as the embedding model. +```topic_model = BERTopic.load("/models/topic.model", embedding_model=model)``` + +Uses this model for data processing and fetching. See specifications for this model: + - + + +## poc-ai-embed + + +## poc-ai-ner +named entity recognition + +- + +## poc-ai-text-generation + + +## poc-ai-airflow-dags + +- + +Directed Acyclic Graphs +used to configure and run stuffs + +Sparql queries for loading data in config folder. + +## poc-ai-airflow-embed + +### scripts/save.py + +```sparql + PREFIX ext: + + DELETE{{ + GRAPH {{ + <{file_reference}> ext:searchEmbedding ?embed; ext:ingestedByMl2GrowSmartRegulationsEmbedding ?sre . + }} + }} + WHERE{{ + <{file_reference}> ext:searchEmbedding ?embed; ext:ingestedByMl2GrowSmartRegulationsEmbedding ?sre . + }} +``` + +```sparql + PREFIX ext: + + INSERT {{ + GRAPH {{ + <{file_reference}> ext:searchEmbedding \"{list(record["embedding"])}\". + <{file_reference}> ext:ingestedByMl2GrowSmartRegulationsEmbedding "1". + }} + }} +``` + +## poc-ai-airflow-ner +cli scripts for Named Entity Recognition + +### scripts/save.py + +```sparql + PREFIX ext: + + + DELETE{{ + GRAPH {{ + <{file_name}> ext:hasNer ?b ; ext:ingestedml2GrowSmartRegulationsNer ?srn . + ?b ext:entity ?ner_type; ext:end ?end; ext:start ?start; ext:word ?word . + }} + }} + WHERE{{ + <{file_name}> ext:hasNer ?b ; ext:ingestedml2GrowSmartRegulationsNer ?srn . + ?b ext:entity ?ner_type; ext:end ?end; ext:start ?start; ext:word ?word . + + }} +``` + + + +## poc-ai-airflow-zeroshot + +### scripts/save.py + +```sparql + PREFIX ext: + + DELETE {{ + GRAPH {{ + <{file_name}> ext:BBC_scoring ?bbc . + <{file_name}> ext:ingestedMl2GrowSmartRegulationsBBC ?sbr . + ?bbc ext:score ?bbc_score . + }} + }} + + WHERE {{ + <{file_name}> ext:BBC_scoring ?bbc . + <{file_name}> ext:ingestedMl2GrowSmartRegulationsBBC ?sbr . + ?bbc ext:score ?bbc_score . + }} +```