From 922b2b7924085d91410ecfdc86e950373477a4d6 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Mon, 27 Jun 2022 12:04:23 +0200
Subject: [PATCH 1/4] Upgraded pymatgen and matminer requirements

---
 README.md                         |  6 ------
 modnet/featurizers/featurizers.py |  8 ++++----
 modnet/preprocessing.py           | 10 +++++-----
 setup.py                          |  8 ++++----
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index d4ffd405..72a4b761 100644
--- a/README.md
+++ b/README.md
@@ -45,12 +45,6 @@ activate the environment:
 conda activate modnet
 ```
 
-Then, install pymatgen v2020.8.13 with conda, which will bundle several pre-built dependencies (e.g., numpy, scipy):
-
-```shell
-conda install -c conda-forge pymatgen=2020.8.13
-```
-
 Finally, install MODNet from PyPI with pip:
 
 ```bash
diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py
index 0835668c..0fd3ec77 100644
--- a/modnet/featurizers/featurizers.py
+++ b/modnet/featurizers/featurizers.py
@@ -70,7 +70,7 @@ def featurize(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             The featurized DataFrame.
@@ -137,7 +137,7 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             pandas.DataFrame: the decorated DataFrame, or an empty
@@ -184,7 +184,7 @@ def featurize_structure(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             pandas.DataFrame: the decorated DataFrame.
@@ -206,7 +206,7 @@ def featurize_site(
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
             aliases: optional dictionary to map matminer output column
                 names to new aliases, mostly used for
                 backwards-compatibility.
diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py
index 8cf3bed5..7b888eee 100644
--- a/modnet/preprocessing.py
+++ b/modnet/preprocessing.py
@@ -13,7 +13,7 @@
 from typing import Dict, List, Union, Optional, Callable, Hashable, Iterable, Tuple
 from functools import partial
 
-from pymatgen import Structure, Composition
+from pymatgen.core import Structure, Composition
 
 from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
 from sklearn.utils import resample
@@ -539,14 +539,14 @@ def merge_ranked(lists: List[List[Hashable]]) -> List[Hashable]:
 
 
 class MODData:
-    """The MODData class takes takes a list of `pymatgen.Structure`
+    """The MODData class takes takes a list of `pymatgen.core.structure.Structure`
     objects and creates a `pandas.DataFrame` that contains many matminer
     features per structure. It then uses mutual information between
     features and targets, and between the features themselves, to
     perform feature selection using relevance-redundancy indices.
 
     Attributes:
-        df_structure (pd.DataFrame): dataframe storing the `pymatgen.Structure`
+        df_structure (pd.DataFrame): dataframe storing the `pymatgen.core.structure.Structure`
             representations for each structured, indexed by ID.
         df_targets (pd.Dataframe): dataframe storing the prediction targets
             per structure, indexed by ID.
@@ -906,12 +906,12 @@ def rebalance(self):
 
     @property
     def structures(self) -> List[Union[Structure, CompositionContainer]]:
-        """Returns the list of `pymatgen.Structure` objects."""
+        """Returns the list of `pymatgen.core.structure.Structure` objects."""
         return list(self.df_structure["structure"])
 
     @property
     def compositions(self) -> List[Union[Structure, CompositionContainer]]:
-        """Returns the list of materials as`pymatgen.Composition` objects."""
+        """Returns the list of materials as`pymatgen.core.composition.Composition` objects."""
         return [s.composition for s in self.df_structure["structure"]]
 
     @property
diff --git a/setup.py b/setup.py
index 45d311d3..bb05700a 100644
--- a/setup.py
+++ b/setup.py
@@ -37,10 +37,10 @@
         "pandas>=0.25.3",
         "tensorflow>=2.4",
         "tensorflow-probability>=0.12",
-        "pymatgen>=2020,<2020.9",
-        "matminer>=0.6.2",
-        "numpy>=1.18.3",
-        "scikit-learn>=0.23,<0.24",
+        "pymatgen>=2022.5.17",
+        "matminer>=0.7.6",
+        "numpy>=1.22.3",
+        "scikit-learn>=1.1.0",
     ],
     tests_require=tests_require,
     test_suite="modnet.tests",

From 62c482571b9a1ba0d6f3a825f095204475f20153 Mon Sep 17 00:00:00 2001
From: ppdebreuck <pierre-paul.debreuck@student.uclouvain.be>
Date: Tue, 11 Jul 2023 16:57:26 +0200
Subject: [PATCH 2/4] backward compatibility warning

---
 modnet/models/vanilla.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py
index 77119444..bb145796 100644
--- a/modnet/models/vanilla.py
+++ b/modnet/models/vanilla.py
@@ -846,6 +846,11 @@ def _restore_model(self):
                     fill_value=-1,
                 ).fit(np.zeros((1, self.n_feat))),
             )
+        if not hasattr(self, "targets_groups"):
+            self.targets_groups = [x for subl in self.targets for x in subl]
+            LOG.warning(
+                "Installed modnet version (v>=0.4.0) does not match loaded model (v<0.4.0) and may result in errors. Please retrain or change your modnet version !"
+            )
 
     def save(self, filename: str) -> None:
         """Save the `MODNetModel` to filename:

From a408f0ac02120fc63b9f1987d618deb2583958f6 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 09:04:03 +0200
Subject: [PATCH 3/4] Possibility to remove all NaNs features or not after
 featurization.

---
 modnet/featurizers/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modnet/featurizers/utils.py b/modnet/featurizers/utils.py
index 2297b6ef..3d54863e 100644
--- a/modnet/featurizers/utils.py
+++ b/modnet/featurizers/utils.py
@@ -3,12 +3,13 @@
 __all__ = ("clean_df",)
 
 
-def clean_df(df):
+def clean_df(df, drop_allnan: bool = True):
     """Cleans dataframe by dropping missing values, replacing NaN's and infinities
     and selecting only columns containing numerical data.
 
     Args:
         df (pd.DataFrame): the dataframe to clean.
+        drop_allnan: if True, clean_df will remove features that are fully NaNs.
 
     Returns:
         pandas.DataFrame: the cleaned dataframe.
@@ -16,7 +17,8 @@ def clean_df(df):
     """
 
     df = df.select_dtypes(include="number")
-    df = df.dropna(axis=1, how="all")
+    if drop_allnan:
+        df = df.dropna(axis=1, how="all")
     df = df.replace([np.inf, -np.inf, np.nan], np.nan)
 
     return df

From 6b46db4368530b2e3744ea780d8dd2b063d46f68 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 14 Jul 2023 07:05:02 +0000
Subject: [PATCH 4/4] Bump scikit-learn from 1.2.0 to 1.3.0

Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.2.0 to 1.3.0.
- [Release notes](https://github.com/scikit-learn/scikit-learn/releases)
- [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.2.0...1.3.0)

---
updated-dependencies:
- dependency-name: scikit-learn
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 16a28429..bb647b2b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ pandas==1.5.2
 pymatgen==2023.1.30
 matminer==0.8.0
 numpy>=1.20
-scikit-learn==1.2.0
+scikit-learn==1.3.0
 emmet-core<0.57  # Can remove after https://github.com/materialsproject/api/issues/819