From 9c863fa3d0e47bb6cba0d2f0def0e93b80f98e44 Mon Sep 17 00:00:00 2001
From: Louanes Hamla <louanes.hamla@inetum.com>
Date: Tue, 31 Aug 2021 16:11:13 +0200
Subject: [PATCH 1/4] Added my french_noun_synonym transformation

---
 TestRunner.py                                 |   2 +-
 .../french_synonym_transformation/README.md   |  20 +++++
 .../french_synonym_transformation/__init__.py |   2 +
 .../requirements.txt                          | Bin 0 -> 278 bytes
 .../french_synonym_transformation/test.json   |  62 +++++++++++++
 .../transformation.py                         |  85 ++++++++++++++++++
 6 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 transformations/french_synonym_transformation/README.md
 create mode 100644 transformations/french_synonym_transformation/__init__.py
 create mode 100644 transformations/french_synonym_transformation/requirements.txt
 create mode 100644 transformations/french_synonym_transformation/test.json
 create mode 100644 transformations/french_synonym_transformation/transformation.py

diff --git a/TestRunner.py b/TestRunner.py
index e28b47958..4c70f826f 100644
--- a/TestRunner.py
+++ b/TestRunner.py
@@ -18,7 +18,7 @@ def load(module, cls):
 
 def load_test_cases(test_json):
     try:
-        with open(test_json) as f:
+        with open(test_json,encoding="utf-8") as f:
             d = json.load(f)
             examples = d["test_cases"]
         return examples
diff --git a/transformations/french_synonym_transformation/README.md b/transformations/french_synonym_transformation/README.md
new file mode 100644
index 000000000..5dd50940e
--- /dev/null
+++ b/transformations/french_synonym_transformation/README.md
@@ -0,0 +1,20 @@
+# Noun Synonym Substitution 🦎  + ⌨️ → 🐍
+
+
+This transformation change some words with synonyms according to if their POS tag is a NOUN for simple french sentences. It requires Spacy_lefff (an extention of spacy for french POS and lemmatizing) and nltk package with the open multilingual wordnet dictionary.
+
+Authors : Lisa Barthe and Louanes Hamla from Fablab by Inetum in Paris
+
+## What type of transformation it is ?
+This transformation allows to create paraphrases with a different word in french. The general meaning of the sentence remains but it can be declined on different paraphrases with one noun variation.
+
+## Supported Task
+
+This perturbation can be used for any French task.
+
+## What does it intend to benefit ?
+
+This perturbation would benefit all tasks which have a sentence/paragraph/document as input like text classification, text generation, etc. that requires synthetic data augmentation / diversification.
+
+## What are the limitation of this transformation ?
+This tool does not take the general context into account, sometimes, the ouput will not match the general sense of te sentence.
\ No newline at end of file
diff --git a/transformations/french_synonym_transformation/__init__.py b/transformations/french_synonym_transformation/__init__.py
new file mode 100644
index 000000000..89ecd1199
--- /dev/null
+++ b/transformations/french_synonym_transformation/__init__.py
@@ -0,0 +1,2 @@
+from .transformation import *
+
diff --git a/transformations/french_synonym_transformation/requirements.txt b/transformations/french_synonym_transformation/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc277cc070b5b1d7d3147193a7adb9419f31e3d7
GIT binary patch
literal 278
zcmb7<OAf*?3`FNBT%ZJr9V>1U1sW()N-6>hxjgWkMfWIj{2PzQ-_J!44LU31Bhj6|
zsu0Z@wb2oq^yDp64|2S#Zse0*I%(TyXR1+eFbSqYKf~mR76lNk^JN$#h(gD<B`lD|
uUKyFGOc_G}>15jcs?D$3eAhN;&-Punf3O3Z8nbq*skEyDzQI(vD(MHS4>Rik

literal 0
HcmV?d00001

diff --git a/transformations/french_synonym_transformation/test.json b/transformations/french_synonym_transformation/test.json
new file mode 100644
index 000000000..e97494ea9
--- /dev/null
+++ b/transformations/french_synonym_transformation/test.json
@@ -0,0 +1,62 @@
+{
+  "type": "french_synonym_transformation",
+  "test_cases": [
+    
+    {
+      "class": "FrenchNounSynonymTransformation",
+      "inputs": {
+        "sentence": "Il a vécu par la force et par la force il nous a quittés. Je n'ai pas été surpris de la cohérence avec laquelle il a conclu son existence."
+      },
+      "outputs": [{
+        "sentence": "Il a vécu par la violence et par la violence il nous a quittés. Je n'ai pas été surpris de la cohérence avec laquelle il a conclu son existence."
+      }]
+    
+    },
+    
+    {
+      "class": "FrenchNounSynonymTransformation",
+      "inputs": {
+        "sentence": "Dans cette vie, vous devez planter un arbre, écrire un livre et avoir un enfant."
+      },
+      "outputs": [{
+        "sentence": "Dans cette vie, vous devez planter un arbre, écrire un ouvrage et avoir un enfant."
+      }]
+    
+    },
+    
+    {
+      "class": "FrenchNounSynonymTransformation",
+      "inputs": {
+        "sentence": "Vous ne pouvez pas voter pour cet homme, avec tout ce qu’il a fait!"
+      },
+      "outputs": [{
+        "sentence": "Vous ne pouvez pas voter pour cet individu, avec tout ce qu’il a fait!"
+      }]
+    
+    },
+    
+    {
+      "class": "FrenchNounSynonymTransformation",
+      "inputs": {
+        "sentence": "La hausse des taux attirera les investissements."
+      },
+      "outputs": [{
+        "sentence": "La hausse des ratio attirera les investissements."
+      }]
+    
+    },
+    
+    {
+      "class": "FrenchNounSynonymTransformation",
+      "inputs": {
+        "sentence": "Il mène son projet rapidement et avec rigueur."
+      },
+      "outputs": [{
+        "sentence": "Il mène son projet rapidement et avec sévérité."
+      }]
+    
+    }
+
+
+  ]
+}
diff --git a/transformations/french_synonym_transformation/transformation.py b/transformations/french_synonym_transformation/transformation.py
new file mode 100644
index 000000000..90738f50d
--- /dev/null
+++ b/transformations/french_synonym_transformation/transformation.py
@@ -0,0 +1,85 @@
+from textblob import TextBlob, Blobber, Word
+import re
+from textblob_fr import PatternTagger, PatternAnalyzer
+import nltk
+nltk.download('wordnet')
+from textblob.wordnet import NOUN, VERB, ADV, ADJ
+import spacy
+from spacy_lefff import LefffLemmatizer, POSTagger
+from spacy.language import Language
+from nltk.corpus import wordnet
+import nltk
+nltk.download('omw') 
+
+from interfaces.SentenceOperation import SentenceOperation
+from tasks.TaskTypes import TaskType
+
+@Language.factory('french_lemmatizer')
+def create_french_lemmatizer(nlp, name):
+    return LefffLemmatizer()
+
+@Language.factory('POSTagger')
+def create_POSTagger(nlp, name):
+    return POSTagger()
+
+
+nlp = spacy.load('fr_core_news_md')
+
+nlp.add_pipe('POSTagger', name ='pos')
+nlp.add_pipe('french_lemmatizer', name='lefff', after='pos')
+
+
+def synonym_transformation(text):    
+	doc = nlp(text)
+	nouns = [d.text for d in doc if d.pos_ == "NOUN"]
+	synonyms_noun_list = []
+	for i in nouns :
+		dict_noun_synonyms = {}
+		dict_noun_synonyms['noun'] = i
+		dict_noun_synonyms['synonyms'] = list(set([l.name() for syn in wordnet.synsets(i, lang = 'fra') for l in syn.lemmas('fra')]))
+		if len(dict_noun_synonyms['synonyms']) > 0:
+			synonyms_noun_list.append(dict_noun_synonyms)
+
+	valid_noun_list = []
+	for j in synonyms_noun_list:
+		for k in j['synonyms']:
+			valid_noun_dict = {}
+			valid_noun_dict['noun'] = j['noun']
+			valid_noun_dict['syn'] = k
+			if nlp(j['noun']).similarity(nlp(k)) > .55 and not nlp(j['noun']).similarity(nlp(k)) >= .999:
+				valid_noun_list.append(valid_noun_dict)
+
+	text_noun_generated = []
+	pertu=[]
+	for l in valid_noun_list:
+		text_noun_generated.append(text.replace(l['noun'], l['syn']))
+	text_noun_generated.sort(reverse=True)
+	for sent in text_noun_generated:
+		if nlp(text).similarity(nlp(i)) > .40 and not nlp(text).similarity(nlp(i)) >= .999:
+			pertu.append(sent)
+			break
+
+	return pertu
+
+
+
+
+class FrenchNounSynonymTransformation(SentenceOperation):
+    tasks = [
+        TaskType.TEXT_CLASSIFICATION,
+        TaskType.TEXT_TO_TEXT_GENERATION,
+        TaskType.TEXT_TAGGING,
+    ]
+    languages = ["fr"]
+
+    def __init__(self, seed=0, max_outputs=1):
+        super().__init__(seed, max_outputs=max_outputs)
+
+    def generate(self, sentence : str):
+        perturbed_texts = synonym_transformation(
+            sentence
+        )
+        print("perturbed text inside of class",perturbed_texts)
+        return perturbed_texts
+
+

From 82c23dd699b4d6649a1cb7c19bef4168e330f834 Mon Sep 17 00:00:00 2001
From: Louanes Hamla <louanes.hamla@inetum.com>
Date: Tue, 31 Aug 2021 23:07:16 +0200
Subject: [PATCH 2/4] add librariries to requirements

---
 TestRunner.py                                 |   2 +-
 .../requirements.txt                          | Bin 278 -> 422 bytes
 .../french_synonym_transformation/test.json   |  25 +++++-------------
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/TestRunner.py b/TestRunner.py
index 4c70f826f..e28b47958 100644
--- a/TestRunner.py
+++ b/TestRunner.py
@@ -18,7 +18,7 @@ def load(module, cls):
 
 def load_test_cases(test_json):
     try:
-        with open(test_json,encoding="utf-8") as f:
+        with open(test_json) as f:
             d = json.load(f)
             examples = d["test_cases"]
         return examples
diff --git a/transformations/french_synonym_transformation/requirements.txt b/transformations/french_synonym_transformation/requirements.txt
index fc277cc070b5b1d7d3147193a7adb9419f31e3d7..4821ff3dc04dae9b6c9148adace31b8479adb5ce 100644
GIT binary patch
delta 156
zcmbQnw2ay8|377hG=?Gu1%_mXd?1|)WaR<La)x3cE0-Y!Oy&dWRE8WN+lqmgfs0`y
zlL%uBh*8W?z>o;kTFIabluiX21%kFfXuzPyU;@MjU=1ZexeA67h9sb>d>|PQwiKq$
O2(B&<C||;m%>V!mdK!oT

delta 11
ScmZ3+JdKIz|G$l9B8&hVE(ARQ

diff --git a/transformations/french_synonym_transformation/test.json b/transformations/french_synonym_transformation/test.json
index e97494ea9..612a9aea9 100644
--- a/transformations/french_synonym_transformation/test.json
+++ b/transformations/french_synonym_transformation/test.json
@@ -5,10 +5,10 @@
     {
       "class": "FrenchNounSynonymTransformation",
       "inputs": {
-        "sentence": "Il a vécu par la force et par la force il nous a quittés. Je n'ai pas été surpris de la cohérence avec laquelle il a conclu son existence."
+        "sentence": "Il vit par la force et par la force il est parti."
       },
       "outputs": [{
-        "sentence": "Il a vécu par la violence et par la violence il nous a quittés. Je n'ai pas été surpris de la cohérence avec laquelle il a conclu son existence."
+        "sentence": "Il vit par la violence et par la violence il est parti."
       }]
     
     },
@@ -16,25 +16,14 @@
     {
       "class": "FrenchNounSynonymTransformation",
       "inputs": {
-        "sentence": "Dans cette vie, vous devez planter un arbre, écrire un livre et avoir un enfant."
+        "sentence": "Dans cette vie, vous devez planter un arbre, lire un livre et avoir un enfant."
       },
       "outputs": [{
-        "sentence": "Dans cette vie, vous devez planter un arbre, écrire un ouvrage et avoir un enfant."
+        "sentence": "Dans cette vie, vous devez planter un arbre, lire un ouvrage et avoir un enfant."
       }]
     
     },
-    
-    {
-      "class": "FrenchNounSynonymTransformation",
-      "inputs": {
-        "sentence": "Vous ne pouvez pas voter pour cet homme, avec tout ce qu’il a fait!"
-      },
-      "outputs": [{
-        "sentence": "Vous ne pouvez pas voter pour cet individu, avec tout ce qu’il a fait!"
-      }]
-    
-    },
-    
+       
     {
       "class": "FrenchNounSynonymTransformation",
       "inputs": {
@@ -49,10 +38,10 @@
     {
       "class": "FrenchNounSynonymTransformation",
       "inputs": {
-        "sentence": "Il mène son projet rapidement et avec rigueur."
+        "sentence": "Il entreprend son projet rapidement et avec enthousiasme."
       },
       "outputs": [{
-        "sentence": "Il mène son projet rapidement et avec sévérité."
+        "sentence": "Il entreprend son projet rapidement et avec passion."
       }]
     
     }

From e02a9a13c4002cffd55f8b97288c3b425c7fce9e Mon Sep 17 00:00:00 2001
From: Louanes Hamla <louanes.hamla@inetum.com>
Date: Tue, 7 Sep 2021 09:00:06 +0200
Subject: [PATCH 3/4] use utf-16

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cdab6188c..22b023532 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@ def recursive_requirements():
             os.path.dirname(__file__), folder + "/requirements.txt"
         )
         if os.path.isfile(r_file):
-            with open(r_file) as f:
+            with open(r_file,encoding='utf-16') as f:
                 requirements += f.read() + "\n"
     return requirements
 

From cbce7c45f212c5705c6e3df23565c35d3b28195e Mon Sep 17 00:00:00 2001
From: Louanes Hamla <louanes.hamla@inetum.com>
Date: Tue, 7 Sep 2021 09:07:18 +0200
Subject: [PATCH 4/4] specify utf-8 in the read function of setup.py

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 22b023532..2d3b67f4b 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@ def all_folders():
 
 
 def read(fname):
-    with open(os.path.join(os.path.dirname(__file__), fname)) as f:
+    with open(os.path.join(os.path.dirname(__file__), fname),encoding='utf-8') as f:
         data = f.read()
     return data
 
@@ -34,7 +34,7 @@ def recursive_requirements():
             os.path.dirname(__file__), folder + "/requirements.txt"
         )
         if os.path.isfile(r_file):
-            with open(r_file,encoding='utf-16') as f:
+            with open(r_file) as f:
                 requirements += f.read() + "\n"
     return requirements