From 4289a8ea01f58eaaec5abc5397832729c8fa3082 Mon Sep 17 00:00:00 2001
From: cristinazuhe <zuhe18@gmail.com>
Date: Fri, 26 Jan 2024 11:32:38 +0100
Subject: [PATCH] Solved bug on clip_avg

---
 flexnlp/pool/aggregators.py           | 24 ++++++++++++------------
 flexnlp/utils/adapters/ss_adapters.py |  2 +-
 setup.py                              | 15 +++------------
 3 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/flexnlp/pool/aggregators.py b/flexnlp/pool/aggregators.py
index f3850d0..8263fc3 100644
--- a/flexnlp/pool/aggregators.py
+++ b/flexnlp/pool/aggregators.py
@@ -7,7 +7,7 @@
 import numpy as np
 import tensorly as tl
 from flex.pool.decorators import aggregate_weights
-from flex.pool.decorators import set_tensorly_backend
+from flex.pool.aggregators import set_tensorly_backend
 
 
 def clip_avg_f(aggregate_weights_as_list: list, clip_threshold: float = 0.9):
@@ -19,20 +19,20 @@ def clip_avg_f(aggregate_weights_as_list: list, clip_threshold: float = 0.9):
             w = tl.tensor(client_weights[layer_index])
             weights_per_layer.append(w)
         weights_per_layer = tl.stack(weights_per_layer)
-        clip_threshold = np.quantile(weights_per_layer, clip_threshold)
-        sum_clipped_layer = tl.sum(tl.clip(weights_per_layer, -clip_threshold, clip_threshold), axis=0)
+        clip_thresh = np.percentile(weights_per_layer, clip_threshold*100, axis=0)
+        sum_clipped_layer = tl.mean(tl.clip(weights_per_layer, -clip_thresh, clip_thresh), axis=0)
         agg_weights.append(sum_clipped_layer)
     return agg_weights
 
 @aggregate_weights
-def clip_avg(aggregate_weights_as_list: list, clip_threshold: float = 0.9):
-    """Aggregate the weights using the clip average method.
-    This function calculates the quantile of the weights of each layer and
+def clip_avg(aggregated_weights_as_list: list, clip_threshold: float = 0.9):
+    """Aggregate the weights using the clip average aggregation method.
+    This function calculates the percentile of the weights of each layer and
     then clips the weights to the interval [-quantile, quantile].
 
     Args:
-        aggregate_weights_as_list (list): List of weights to aggregate.
-        clip_threshold (float, optional): Quantile threshold to apply to each
+        aggregated_weights_as_list (list): List of weights to aggregate.
+        clip_threshold (float, optional): Percentile threshold to apply to each
         layer. Defaults to 0.9.
 
     Returns:
@@ -43,15 +43,15 @@ def clip_avg(aggregate_weights_as_list: list, clip_threshold: float = 0.9):
 
         aggregator = flex.pool.aggregators
         server = flex.pool.servers
-        clip_threshold = 0.98 # quantile to clip the weights
+        clip_threshold = 0.9 # percentile to clip the weights
         aggregator.map(server, clip_avg, clip_threshold)
 
     Example of use using the FlePool without separating server
     and aggregator, and following a client-server architecture:
 
         from flex.pool.primitives import clip_avg
-        clip_threshold = 0.98 # quantile to clip the weights
+        clip_threshold = 0.9 # percentile to clip the weights
         flex_pool.aggregators.map(flex_pool.servers, clip_avg, clip_threshold=clip_threshold)    
     """
-    set_tensorly_backend()
-    return clip_avg_f(aggregate_weights_as_list, clip_threshold)
+    set_tensorly_backend(aggregated_weights_as_list)
+    return clip_avg_f(aggregated_weights_as_list, clip_threshold)
diff --git a/flexnlp/utils/adapters/ss_adapters.py b/flexnlp/utils/adapters/ss_adapters.py
index 13ff794..7cd51c1 100644
--- a/flexnlp/utils/adapters/ss_adapters.py
+++ b/flexnlp/utils/adapters/ss_adapters.py
@@ -30,4 +30,4 @@ def ss_triplet_input_adapter(X_train_as_list: list = None, X_test_as_list: list
     if test and len(X_test_as_list) > 1:
         dev_examples = [InputExample(texts=[example['query'], example['pos'][0], example['neg'][0]]) for example in X_test_as_list]
 
-    return train_examples, dev_examples        
\ No newline at end of file
+    return train_examples, dev_examples
diff --git a/setup.py b/setup.py
index f2a0e6a..0f6d427 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,6 @@
 from setuptools import find_packages, setup
 
 
-TF_requires = ["tensorflow<2.11", # https://github.com/tensorflow/tensorflow/issues/58973
-                "tensorflow_datasets", 
-                "tensorflow_hub"
-        ]
-
 PT_requires = ["torch", 
                 "torchvision", 
                 "torchtext", 
@@ -13,8 +8,6 @@
                 "portalocker",
         ]
 
-HF_requires = ["datasets"]
-
 setup(
         name="flexnlp",
         version="0.0.1",
@@ -38,19 +31,17 @@
                         "portalocker",
                         "torchdata",
                         "datasets",
-                        "transformers"
+                        "transformers",
+                        "sentence_transformers",
+                        "sentencepiece",
                         ],
         extras_require={
-                "tensorflow": TF_requires,
                 "pytorch": PT_requires,
-                "hugginface": HF_requires,
                 "develop": ["pytest",
                         "pytest-cov",
                         "pytest-xdist",
                         "coverage",
                         "jinja2",
-                        *TF_requires,
-                        *HF_requires
                         ],
         },
         python_requires=">=3.8.10",