From 4b45f82c715e776b295b19672b6626c653b859bf Mon Sep 17 00:00:00 2001 From: Louanes Hamla Date: Tue, 31 Aug 2021 17:10:55 +0200 Subject: [PATCH 1/4] French Adjectives Transformation --- TestRunner.py | 2 +- .../README.md | 20 +++++ .../__init__.py | 2 + .../requirements.txt | 2 + .../test.json | 62 ++++++++++++++ .../transformation.py | 85 +++++++++++++++++++ 6 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 transformations/french_synonym_adjectives_transformation/README.md create mode 100644 transformations/french_synonym_adjectives_transformation/__init__.py create mode 100644 transformations/french_synonym_adjectives_transformation/requirements.txt create mode 100644 transformations/french_synonym_adjectives_transformation/test.json create mode 100644 transformations/french_synonym_adjectives_transformation/transformation.py diff --git a/TestRunner.py b/TestRunner.py index e28b47958..cbb5c7d4c 100644 --- a/TestRunner.py +++ b/TestRunner.py @@ -18,7 +18,7 @@ def load(module, cls): def load_test_cases(test_json): try: - with open(test_json) as f: + with open(test_json,encoding = "utf-8") as f: d = json.load(f) examples = d["test_cases"] return examples diff --git a/transformations/french_synonym_adjectives_transformation/README.md b/transformations/french_synonym_adjectives_transformation/README.md new file mode 100644 index 000000000..d0e10c3b0 --- /dev/null +++ b/transformations/french_synonym_adjectives_transformation/README.md @@ -0,0 +1,20 @@ +# Adjective Synonym Substitution 🦎 + ⌨️ → 🐍 + + +This transformation change some words with synonyms according to if their POS tag is a ADJ for simple french sentences. It requires Spacy_lefff (an extention of spacy for french POS and lemmatizing) and nltk package with the open multilingual wordnet dictionary. + +Authors : Lisa Barthe and Louanes Hamla from Fablab by Inetum in Paris + +## What type of transformation it is ? +This transformation allows to create paraphrases with a different word in french. The general meaning of the sentence remains but it can be declined on different paraphrases with one adjective variation. + +## Supported Task + +This perturbation can be used for any French task. + +## What does it intend to benefit ? + +This perturbation would benefit all tasks which have a sentence/paragraph/document as input like text classification, text generation, etc. that requires synthetic data augmentation / diversification. + +## What are the limitation of this transformation ? +This tool does not take the general context into account, sometimes, the ouput will not match the general sense of te sentence. \ No newline at end of file diff --git a/transformations/french_synonym_adjectives_transformation/__init__.py b/transformations/french_synonym_adjectives_transformation/__init__.py new file mode 100644 index 000000000..89ecd1199 --- /dev/null +++ b/transformations/french_synonym_adjectives_transformation/__init__.py @@ -0,0 +1,2 @@ +from .transformation import * + diff --git a/transformations/french_synonym_adjectives_transformation/requirements.txt b/transformations/french_synonym_adjectives_transformation/requirements.txt new file mode 100644 index 000000000..110f007f7 --- /dev/null +++ b/transformations/french_synonym_adjectives_transformation/requirements.txt @@ -0,0 +1,2 @@ +#fr core news md model : +fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.0.0/fr_core_news_md-3.0.0-py3-none-any.whl diff --git a/transformations/french_synonym_adjectives_transformation/test.json b/transformations/french_synonym_adjectives_transformation/test.json new file mode 100644 index 000000000..52d711e9b --- /dev/null +++ b/transformations/french_synonym_adjectives_transformation/test.json @@ -0,0 +1,62 @@ +{ + "type": "french_synonym_adjectives_transformation", + "test_cases": [ + + { + "class": "FrenchAdjectivesSynonymTransformation", + "inputs": { + "sentence": "Le féroce sanglier a des défenses pointues et un pelage sombre" + }, + "outputs": [{ + "sentence": "Le féroce sanglier a des défenses pointues et un pelage noir" + }] + + }, + + { + "class": "FrenchAdjectivesSynonymTransformation", + "inputs": { + "sentence": "L'examen était très difficile" + }, + "outputs": [{ + "sentence": "L'examen était très ardu" + }] + + }, + + { + "class": "FrenchAdjectivesSynonymTransformation", + "inputs": { + "sentence": "La peinture contemporaine est incompréhensible." + }, + "outputs": [{ + "sentence": "La peinture contemporaine est inexplicable." + }] + + }, + + { + "class": "FrenchAdjectivesSynonymTransformation", + "inputs": { + "sentence": "C'était un impressionnant défilé de mode, les mannequins étaient tous très élégants." + }, + "outputs": [{ + "sentence": "C'était un imposant défilé de mode, les mannequins étaient tous très élégants." + }] + + }, + + { + "class": "FrenchAdjectivesSynonymTransformation", + "inputs": { + "sentence": "Nous étions invité en petit comité." + }, + "outputs": [{ + "sentence": "Nous étions invité en léger comité." + }] + + } + + + ] +} diff --git a/transformations/french_synonym_adjectives_transformation/transformation.py b/transformations/french_synonym_adjectives_transformation/transformation.py new file mode 100644 index 000000000..0434a189c --- /dev/null +++ b/transformations/french_synonym_adjectives_transformation/transformation.py @@ -0,0 +1,85 @@ +from textblob import TextBlob, Blobber, Word +import re +from textblob_fr import PatternTagger, PatternAnalyzer +import nltk +nltk.download('wordnet') +from textblob.wordnet import NOUN, VERB, ADV, ADJ +import spacy +from spacy_lefff import LefffLemmatizer, POSTagger +from spacy.language import Language +from nltk.corpus import wordnet +import nltk +nltk.download('omw') + +from interfaces.SentenceOperation import SentenceOperation +from tasks.TaskTypes import TaskType + +@Language.factory('french_lemmatizer') +def create_french_lemmatizer(nlp, name): + return LefffLemmatizer() + +@Language.factory('POSTagger') +def create_POSTagger(nlp, name): + return POSTagger() + + +nlp = spacy.load('fr_core_news_md') + +nlp.add_pipe('POSTagger', name ='pos') +nlp.add_pipe('french_lemmatizer', name='lefff', after='pos') + + + +def synonym_transformation(text): + doc = nlp(text) + adjectives = [d.text for d in doc if d.pos_ == "ADJ"] + + synonyms_adjective_list = [] + for i in adjectives: + dict_adjective_synonyms = {} + dict_adjective_synonyms['adjective'] = i + dict_adjective_synonyms['synonyms'] = list(set([l.name() for syn in wordnet.synsets(i, lang = 'fra', pos = ADJ) for l in syn.lemmas('fra')])) + if len(dict_adjective_synonyms['synonyms']) > 0: + synonyms_adjective_list.append(dict_adjective_synonyms) + + valid_adjective_list = [] + for j in synonyms_adjective_list: + for k in j['synonyms']: + valid_adjective_dict = {} + valid_adjective_dict['adjective'] = j['adjective'] + valid_adjective_dict['syn'] = k + if nlp(j['adjective']).similarity(nlp(k)) > .50 and not nlp(j['adjective']).similarity(nlp(k)) >= .999: + valid_adjective_list.append(valid_adjective_dict) + text_adjective_generated = [] + for l in valid_adjective_list: + text_adjective_generated.append(text.replace(l['adjective'], l['syn'])) + pertu=[] + text_adjective_generated.sort() + + for i in text_adjective_generated : + if nlp(text).similarity(nlp(i)) > .90 and not nlp(text).similarity(nlp(i)) >= .999: + pertu.append(i) + break + + return pertu + + +class FrenchAdjectivesSynonymTransformation(SentenceOperation): + tasks = [ + TaskType.TEXT_CLASSIFICATION, + TaskType.TEXT_TO_TEXT_GENERATION, + TaskType.TEXT_TAGGING, + ] + languages = ["fr"] + + def __init__(self, seed=0, max_outputs=1): + super().__init__(seed, max_outputs=max_outputs) + + def generate(self, sentence : str): + perturbed_texts = synonym_transformation( + sentence + ) + print("perturbed text inside of class",perturbed_texts) + return perturbed_texts + + From 8a9c78cbbd0848899d61b4bb7a4eecacb1244382 Mon Sep 17 00:00:00 2001 From: Louanes Hamla Date: Tue, 31 Aug 2021 22:53:16 +0200 Subject: [PATCH 2/4] add librairies in requirements --- TestRunner.py | 2 +- .../requirements.txt | 3 +++ .../test.json | 16 ++++++++-------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/TestRunner.py b/TestRunner.py index cbb5c7d4c..e28b47958 100644 --- a/TestRunner.py +++ b/TestRunner.py @@ -18,7 +18,7 @@ def load(module, cls): def load_test_cases(test_json): try: - with open(test_json,encoding = "utf-8") as f: + with open(test_json) as f: d = json.load(f) examples = d["test_cases"] return examples diff --git a/transformations/french_synonym_adjectives_transformation/requirements.txt b/transformations/french_synonym_adjectives_transformation/requirements.txt index 110f007f7..bfb6b6816 100644 --- a/transformations/french_synonym_adjectives_transformation/requirements.txt +++ b/transformations/french_synonym_adjectives_transformation/requirements.txt @@ -1,2 +1,5 @@ #fr core news md model : fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.0.0/fr_core_news_md-3.0.0-py3-none-any.whl +spacy-lefff==0.4.0 +textblob_fr==0.2.0 +nltk \ No newline at end of file diff --git a/transformations/french_synonym_adjectives_transformation/test.json b/transformations/french_synonym_adjectives_transformation/test.json index 52d711e9b..c97c6bc83 100644 --- a/transformations/french_synonym_adjectives_transformation/test.json +++ b/transformations/french_synonym_adjectives_transformation/test.json @@ -5,10 +5,10 @@ { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "Le féroce sanglier a des défenses pointues et un pelage sombre" + "sentence": "Le sanglier a des défenses pointues et un pelage sombre" }, "outputs": [{ - "sentence": "Le féroce sanglier a des défenses pointues et un pelage noir" + "sentence": "Le sanglier a des défenses pointues et un pelage noir" }] }, @@ -16,10 +16,10 @@ { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "L'examen était très difficile" + "sentence": "L'examen est difficile" }, "outputs": [{ - "sentence": "L'examen était très ardu" + "sentence": "L'examen est ardu" }] }, @@ -38,10 +38,10 @@ { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "C'était un impressionnant défilé de mode, les mannequins étaient tous très élégants." + "sentence": "C'était un impressionnant festival de mode, les mannequins sont tous jolis." }, "outputs": [{ - "sentence": "C'était un imposant défilé de mode, les mannequins étaient tous très élégants." + "sentence": "C'était un imposant festival de mode, les mannequins sont tous jolis." }] }, @@ -49,10 +49,10 @@ { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "Nous étions invité en petit comité." + "sentence": "Nous sommes venu en grand nombre." }, "outputs": [{ - "sentence": "Nous étions invité en léger comité." + "sentence": "Nous sommes venu en important nombre." }] } From ac99ae70d4336cd32321ce93b9e94b6d575cb483 Mon Sep 17 00:00:00 2001 From: Louanes Hamla Date: Tue, 31 Aug 2021 22:56:39 +0200 Subject: [PATCH 3/4] make sure pytest pass --- .../test.json | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/transformations/french_synonym_adjectives_transformation/test.json b/transformations/french_synonym_adjectives_transformation/test.json index c97c6bc83..0cab64736 100644 --- a/transformations/french_synonym_adjectives_transformation/test.json +++ b/transformations/french_synonym_adjectives_transformation/test.json @@ -5,10 +5,10 @@ { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "Le sanglier a des défenses pointues et un pelage sombre" + "sentence": "Le sanglier a des défenses pointues et un pelage sombre" }, "outputs": [{ - "sentence": "Le sanglier a des défenses pointues et un pelage noir" + "sentence": "Le sanglier a des défenses pointues et un pelage noir" }] }, @@ -23,25 +23,14 @@ }] }, - - { - "class": "FrenchAdjectivesSynonymTransformation", - "inputs": { - "sentence": "La peinture contemporaine est incompréhensible." - }, - "outputs": [{ - "sentence": "La peinture contemporaine est inexplicable." - }] - - }, - + { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "C'était un impressionnant festival de mode, les mannequins sont tous jolis." + "sentence": "Ce fut un impressionnant festival de mode, les mannequins sont tous jolis." }, "outputs": [{ - "sentence": "C'était un imposant festival de mode, les mannequins sont tous jolis." + "sentence": "Ce fut un imposant festival de mode, les mannequins sont tous jolis." }] }, From 7fa591e0feb9091e3be2ae74c15693cca1086037 Mon Sep 17 00:00:00 2001 From: Louanes Hamla Date: Thu, 2 Sep 2021 22:18:19 +0200 Subject: [PATCH 4/4] fix pytest --- .../requirements.txt | 3 ++ .../test.json | 45 +++++++------------ 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/transformations/french_synonym_adjectives_transformation/requirements.txt b/transformations/french_synonym_adjectives_transformation/requirements.txt index 110f007f7..bfb6b6816 100644 --- a/transformations/french_synonym_adjectives_transformation/requirements.txt +++ b/transformations/french_synonym_adjectives_transformation/requirements.txt @@ -1,2 +1,5 @@ #fr core news md model : fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.0.0/fr_core_news_md-3.0.0-py3-none-any.whl +spacy-lefff==0.4.0 +textblob_fr==0.2.0 +nltk \ No newline at end of file diff --git a/transformations/french_synonym_adjectives_transformation/test.json b/transformations/french_synonym_adjectives_transformation/test.json index 52d711e9b..db929d11f 100644 --- a/transformations/french_synonym_adjectives_transformation/test.json +++ b/transformations/french_synonym_adjectives_transformation/test.json @@ -1,62 +1,51 @@ { "type": "french_synonym_adjectives_transformation", "test_cases": [ - - { - "class": "FrenchAdjectivesSynonymTransformation", - "inputs": { - "sentence": "Le féroce sanglier a des défenses pointues et un pelage sombre" - }, - "outputs": [{ - "sentence": "Le féroce sanglier a des défenses pointues et un pelage noir" - }] - - }, - + { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "L'examen était très difficile" + "sentence": "Le sanglier a des dents pointues et un pelage sombre" }, "outputs": [{ - "sentence": "L'examen était très ardu" + "sentence": "Le sanglier a des dents pointues et un pelage noir" }] - + }, - + { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "La peinture contemporaine est incompréhensible." + "sentence": "L'examen est difficile" }, "outputs": [{ - "sentence": "La peinture contemporaine est inexplicable." + "sentence": "L'examen est ardu" }] - + }, - + { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "C'était un impressionnant défilé de mode, les mannequins étaient tous très élégants." + "sentence": "Ce fut un impressionnant festival de mode, les mannequins sont tous jolis." }, "outputs": [{ - "sentence": "C'était un imposant défilé de mode, les mannequins étaient tous très élégants." + "sentence": "Ce fut un imposant festival de mode, les mannequins sont tous jolis." }] - + }, - + { "class": "FrenchAdjectivesSynonymTransformation", "inputs": { - "sentence": "Nous étions invité en petit comité." + "sentence": "Nous sommes venu en grand nombre." }, "outputs": [{ - "sentence": "Nous étions invité en léger comité." + "sentence": "Nous sommes venu en important nombre." }] - + } ] -} +} \ No newline at end of file