GEM-benchmark · Louanes1 · Aug 31, 2021 · Aug 31, 2021 · Aug 31, 2021 · Sep 2, 2021
diff --git a/transformations/french_synonym_adjectives_transformation/README.md b/transformations/french_synonym_adjectives_transformation/README.md
@@ -0,0 +1,20 @@
+# Adjective Synonym Substitution 🦎  + ⌨️ → 🐍
+
+
+This transformation change some words with synonyms according to if their POS tag is a ADJ for simple french sentences. It requires Spacy_lefff (an extention of spacy for french POS and lemmatizing) and nltk package with the open multilingual wordnet dictionary.
+
+Authors : Lisa Barthe and Louanes Hamla from Fablab by Inetum in Paris
+
+## What type of transformation it is ?
+This transformation allows to create paraphrases with a different word in french. The general meaning of the sentence remains but it can be declined on different paraphrases with one adjective variation.
+
+## Supported Task
+
+This perturbation can be used for any French task.
+
+## What does it intend to benefit ?
+
+This perturbation would benefit all tasks which have a sentence/paragraph/document as input like text classification, text generation, etc. that requires synthetic data augmentation / diversification.
+
+## What are the limitation of this transformation ?
+This tool does not take the general context into account, sometimes, the ouput will not match the general sense of te sentence.
diff --git a/transformations/french_synonym_adjectives_transformation/__init__.py b/transformations/french_synonym_adjectives_transformation/__init__.py
@@ -0,0 +1,2 @@
+from .transformation import *
+
diff --git a/transformations/french_synonym_adjectives_transformation/requirements.txt b/transformations/french_synonym_adjectives_transformation/requirements.txt
@@ -0,0 +1,5 @@
+#fr core news md model :
+fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.0.0/fr_core_news_md-3.0.0-py3-none-any.whl
+spacy-lefff==0.4.0
+textblob_fr==0.2.0
+nltk
diff --git a/transformations/french_synonym_adjectives_transformation/test.json b/transformations/french_synonym_adjectives_transformation/test.json
@@ -0,0 +1,40 @@
+{
+  "type": "french_synonym_adjectives_transformation",
+  "test_cases": [
+
+
+    {
+      "class": "FrenchAdjectivesSynonymTransformation",
+      "inputs": {
+        "sentence": "Le sanglier a des dents pointues et un pelage sombre"
+      },
+      "outputs": [{
+        "sentence": "Le sanglier a des dents pointues et un pelage noir"
+
+        }]
+      },
+    {
+      "class": "FrenchAdjectivesSynonymTransformation",
+      "inputs": {
+        "sentence": "Ce fut un impressionnant festival de mode, les mannequins sont tous jolis."
+      },
+      "outputs": [{
+        "sentence": "Ce fut un imposant festival de mode, les mannequins sont tous jolis."
+      }]
+
+    },
+
+    {
+      "class": "FrenchAdjectivesSynonymTransformation",
+      "inputs": {
+        "sentence": "Nous sommes venu en grand nombre."
+      },
+      "outputs": [{
+        "sentence": "Nous sommes venu en important nombre."
+      }]
+
+    }
+
+
+  ]
+}
diff --git a/transformations/french_synonym_adjectives_transformation/transformation.py b/transformations/french_synonym_adjectives_transformation/transformation.py
@@ -0,0 +1,85 @@
+from textblob import TextBlob, Blobber, Word
+import re
+from textblob_fr import PatternTagger, PatternAnalyzer
+import nltk
+nltk.download('wordnet')
+from textblob.wordnet import NOUN, VERB, ADV, ADJ
+import spacy
+from spacy_lefff import LefffLemmatizer, POSTagger
+from spacy.language import Language
+from nltk.corpus import wordnet
+import nltk
+nltk.download('omw') 
+
+from interfaces.SentenceOperation import SentenceOperation
+from tasks.TaskTypes import TaskType
+
+@Language.factory('french_lemmatizer')
+def create_french_lemmatizer(nlp, name):
+    return LefffLemmatizer()
+
+@Language.factory('POSTagger')
+def create_POSTagger(nlp, name):
+    return POSTagger()
+
+
+nlp = spacy.load('fr_core_news_md')
+
+nlp.add_pipe('POSTagger', name ='pos')
+nlp.add_pipe('french_lemmatizer', name='lefff', after='pos')
+
+
+
+def synonym_transformation(text):    
+	doc = nlp(text)
+	adjectives = [d.text for d in doc if d.pos_ == "ADJ"]
+
+	synonyms_adjective_list = []
+	for i in adjectives:
+		dict_adjective_synonyms = {}
+		dict_adjective_synonyms['adjective'] = i
+		dict_adjective_synonyms['synonyms'] = list(set([l.name() for syn in wordnet.synsets(i, lang = 'fra', pos = ADJ) for l in syn.lemmas('fra')]))
+		if len(dict_adjective_synonyms['synonyms']) > 0:
+			synonyms_adjective_list.append(dict_adjective_synonyms)
+
+	valid_adjective_list = []
+	for j in synonyms_adjective_list:
+		for k in j['synonyms']:
+			valid_adjective_dict = {}
+			valid_adjective_dict['adjective'] = j['adjective']
+			valid_adjective_dict['syn'] = k
+			if nlp(j['adjective']).similarity(nlp(k)) > .50 and not nlp(j['adjective']).similarity(nlp(k)) >= .999:
+				valid_adjective_list.append(valid_adjective_dict)
+	text_adjective_generated = []
+	for l in valid_adjective_list:
+		text_adjective_generated.append(text.replace(l['adjective'], l['syn']))
+	pertu=[]
+	text_adjective_generated.sort()
+
+	for i in text_adjective_generated :
+		if nlp(text).similarity(nlp(i)) > .90 and not nlp(text).similarity(nlp(i)) >= .999:
+			pertu.append(i)
+			break
+
+	return pertu
+
+
+class FrenchAdjectivesSynonymTransformation(SentenceOperation):
+    tasks = [
+        TaskType.TEXT_CLASSIFICATION,
+        TaskType.TEXT_TO_TEXT_GENERATION,
+        TaskType.TEXT_TAGGING,
+    ]
+    languages = ["fr"]
+
+    def __init__(self, seed=0, max_outputs=1):
+        super().__init__(seed, max_outputs=max_outputs)
+
+    def generate(self, sentence : str):
+        perturbed_texts = synonym_transformation(
+            sentence
+        )
+        print("perturbed text inside of class",perturbed_texts)
+        return perturbed_texts
+
+