From 922b2b7924085d91410ecfdc86e950373477a4d6 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Mon, 27 Jun 2022 12:04:23 +0200 Subject: [PATCH 1/4] Upgraded pymatgen and matminer requirements --- README.md | 6 ------ modnet/featurizers/featurizers.py | 8 ++++---- modnet/preprocessing.py | 10 +++++----- setup.py | 8 ++++---- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index d4ffd405..72a4b761 100644 --- a/README.md +++ b/README.md @@ -45,12 +45,6 @@ activate the environment: conda activate modnet ``` -Then, install pymatgen v2020.8.13 with conda, which will bundle several pre-built dependencies (e.g., numpy, scipy): - -```shell -conda install -c conda-forge pymatgen=2020.8.13 -``` - Finally, install MODNet from PyPI with pip: ```bash diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py index 0835668c..0fd3ec77 100644 --- a/modnet/featurizers/featurizers.py +++ b/modnet/featurizers/featurizers.py @@ -70,7 +70,7 @@ def featurize(self, df: pd.DataFrame) -> pd.DataFrame: Arguments: df: the input dataframe with a `"structure"` column - containing `pymatgen.Structure` objects. + containing `pymatgen.core.structure.Structure` objects. Returns: The featurized DataFrame. @@ -137,7 +137,7 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame: Arguments: df: the input dataframe with a `"structure"` column - containing `pymatgen.Structure` objects. + containing `pymatgen.core.structure.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame, or an empty @@ -184,7 +184,7 @@ def featurize_structure(self, df: pd.DataFrame) -> pd.DataFrame: Arguments: df: the input dataframe with a `"structure"` column - containing `pymatgen.Structure` objects. + containing `pymatgen.core.structure.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame. @@ -206,7 +206,7 @@ def featurize_site( Arguments: df: the input dataframe with a `"structure"` column - containing `pymatgen.Structure` objects. + containing `pymatgen.core.structure.Structure` objects. aliases: optional dictionary to map matminer output column names to new aliases, mostly used for backwards-compatibility. diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py index 8cf3bed5..7b888eee 100644 --- a/modnet/preprocessing.py +++ b/modnet/preprocessing.py @@ -13,7 +13,7 @@ from typing import Dict, List, Union, Optional, Callable, Hashable, Iterable, Tuple from functools import partial -from pymatgen import Structure, Composition +from pymatgen.core import Structure, Composition from sklearn.feature_selection import mutual_info_regression, mutual_info_classif from sklearn.utils import resample @@ -539,14 +539,14 @@ def merge_ranked(lists: List[List[Hashable]]) -> List[Hashable]: class MODData: - """The MODData class takes takes a list of `pymatgen.Structure` + """The MODData class takes takes a list of `pymatgen.core.structure.Structure` objects and creates a `pandas.DataFrame` that contains many matminer features per structure. It then uses mutual information between features and targets, and between the features themselves, to perform feature selection using relevance-redundancy indices. Attributes: - df_structure (pd.DataFrame): dataframe storing the `pymatgen.Structure` + df_structure (pd.DataFrame): dataframe storing the `pymatgen.core.structure.Structure` representations for each structured, indexed by ID. df_targets (pd.Dataframe): dataframe storing the prediction targets per structure, indexed by ID. @@ -906,12 +906,12 @@ def rebalance(self): @property def structures(self) -> List[Union[Structure, CompositionContainer]]: - """Returns the list of `pymatgen.Structure` objects.""" + """Returns the list of `pymatgen.core.structure.Structure` objects.""" return list(self.df_structure["structure"]) @property def compositions(self) -> List[Union[Structure, CompositionContainer]]: - """Returns the list of materials as`pymatgen.Composition` objects.""" + """Returns the list of materials as`pymatgen.core.composition.Composition` objects.""" return [s.composition for s in self.df_structure["structure"]] @property diff --git a/setup.py b/setup.py index 45d311d3..bb05700a 100644 --- a/setup.py +++ b/setup.py @@ -37,10 +37,10 @@ "pandas>=0.25.3", "tensorflow>=2.4", "tensorflow-probability>=0.12", - "pymatgen>=2020,<2020.9", - "matminer>=0.6.2", - "numpy>=1.18.3", - "scikit-learn>=0.23,<0.24", + "pymatgen>=2022.5.17", + "matminer>=0.7.6", + "numpy>=1.22.3", + "scikit-learn>=1.1.0", ], tests_require=tests_require, test_suite="modnet.tests", From 62c482571b9a1ba0d6f3a825f095204475f20153 Mon Sep 17 00:00:00 2001 From: ppdebreuck Date: Tue, 11 Jul 2023 16:57:26 +0200 Subject: [PATCH 2/4] backward compatibility warning --- modnet/models/vanilla.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py index 77119444..bb145796 100644 --- a/modnet/models/vanilla.py +++ b/modnet/models/vanilla.py @@ -846,6 +846,11 @@ def _restore_model(self): fill_value=-1, ).fit(np.zeros((1, self.n_feat))), ) + if not hasattr(self, "targets_groups"): + self.targets_groups = [x for subl in self.targets for x in subl] + LOG.warning( + "Installed modnet version (v>=0.4.0) does not match loaded model (v<0.4.0) and may result in errors. Please retrain or change your modnet version !" + ) def save(self, filename: str) -> None: """Save the `MODNetModel` to filename: From a408f0ac02120fc63b9f1987d618deb2583958f6 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Fri, 14 Jul 2023 09:04:03 +0200 Subject: [PATCH 3/4] Possibility to remove all NaNs features or not after featurization. --- modnet/featurizers/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modnet/featurizers/utils.py b/modnet/featurizers/utils.py index 2297b6ef..3d54863e 100644 --- a/modnet/featurizers/utils.py +++ b/modnet/featurizers/utils.py @@ -3,12 +3,13 @@ __all__ = ("clean_df",) -def clean_df(df): +def clean_df(df, drop_allnan: bool = True): """Cleans dataframe by dropping missing values, replacing NaN's and infinities and selecting only columns containing numerical data. Args: df (pd.DataFrame): the dataframe to clean. + drop_allnan: if True, clean_df will remove features that are fully NaNs. Returns: pandas.DataFrame: the cleaned dataframe. @@ -16,7 +17,8 @@ def clean_df(df): """ df = df.select_dtypes(include="number") - df = df.dropna(axis=1, how="all") + if drop_allnan: + df = df.dropna(axis=1, how="all") df = df.replace([np.inf, -np.inf, np.nan], np.nan) return df From 6b46db4368530b2e3744ea780d8dd2b063d46f68 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Jul 2023 07:05:02 +0000 Subject: [PATCH 4/4] Bump scikit-learn from 1.2.0 to 1.3.0 Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.2.0 to 1.3.0. - [Release notes](https://github.com/scikit-learn/scikit-learn/releases) - [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.2.0...1.3.0) --- updated-dependencies: - dependency-name: scikit-learn dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 16a28429..bb647b2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,5 @@ pandas==1.5.2 pymatgen==2023.1.30 matminer==0.8.0 numpy>=1.20 -scikit-learn==1.2.0 +scikit-learn==1.3.0 emmet-core<0.57 # Can remove after https://github.com/materialsproject/api/issues/819