fix mypy and flake

automl · Jul 26, 2022 · c138173 · c138173
1 parent e69ff3b
commit c138173
Show file tree

Hide file tree

Showing 11 changed files with 68 additions and 46 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -1908,7 +1908,7 @@ def _init_ensemble_builder(
         # builder in the provide dask client
         required_dataset_properties = {'task_type': self.task_type,
                                        'output_type': self.dataset.output_type}
-        
+
         proc_ensemble = EnsembleBuilderManager(
             start_time=time.time(),
             time_left_for_ensembles=time_left_for_ensembles,

diff --git a/autoPyTorch/api/time_series_forecasting.py b/autoPyTorch/api/time_series_forecasting.py
@@ -526,6 +526,9 @@ def predict(
                 predicted value, it needs to be with shape (B, H, N),
                 B is the number of series, H is forecasting horizon (n_prediction_steps), N is the number of targets
         """
+        if self.dataset is None:
+            raise AttributeError(f"Expected dataset to be initialised when predicting in {self.__class__.__name__}")
+
         if X_test is None or not isinstance(X_test[0], TimeSeriesSequence):
             assert past_targets is not None
             # Validate and construct TimeSeriesSequence
@@ -566,6 +569,9 @@ def update_sliding_window_size(self, n_prediction_steps: int) -> None:
                 forecast horizon. Sometimes we could also make our base sliding window size based on the
                 forecast horizon
         """
+        if self.dataset is None:
+            raise AttributeError(f"Expected dataset to be initialised when updating sliding window"
+                                 f" in {self.__class__.__name__}")
         base_window_size = int(np.ceil(self.dataset.base_window_size))
         # we don't want base window size to large, which might cause a too long computation time, in which case
         # we will use n_prediction_step instead (which is normally smaller than base_window_size)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
@@ -49,7 +49,7 @@ def __init__(
         self.categories: List[List[int]] = []
         self.categorical_columns: List[int] = []
         self.numerical_columns: List[int] = []
-        self.encode_columns: List[int] = []
+        self.encode_columns: List[str] = []
 
         self.all_nan_columns: Optional[Set[Union[int, str]]] = None
 

diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
@@ -283,7 +283,6 @@ def transform(
             X = self.numpy_to_pandas(X)
 
         if ispandas(X) and not issparse(X):
-            X = cast(pd.DataFrame, X)
 
             if self.all_nan_columns is None:
                 raise ValueError('_fit must be called before calling transform')
@@ -491,7 +490,7 @@ def _get_columns_to_encode(
         # Also, register the feature types for the estimator
         feat_types = []
 
-        # Make sure each column is a valid type            
+        # Make sure each column is a valid type
         for i, column in enumerate(X.columns):
             if self.all_nan_columns is not None and column in self.all_nan_columns:
                 continue

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
@@ -276,7 +276,8 @@ def __init__(self,
         initial_configurations = []
 
         if STRING_TO_TASK_TYPES.get(self.task_type, -1) == TIMESERIES_FORECASTING:
-            # TODO: update search space (to remove reg cocktails) for forecasting tasks so that we can use the portfolio (or build the portfolio again)
+            # TODO: update search space (to remove reg cocktails) for forecasting tasks so
+            # that we can use the portfolio (or build the portfolio again)
             # initial_configurations = self.get_init_configs_for_forecasting(config_space, kwargs)
             # proxy-validation sets
             self.min_num_test_instances: Optional[int] = kwargs.get('min_num_test_instances',  # type:ignore[assignment]

diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
@@ -1,7 +1,7 @@
-from copy import copy
 import warnings
 from abc import ABCMeta
 from collections import Counter
+from copy import copy
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from ConfigSpace import Configuration
@@ -297,7 +297,7 @@ def _get_hyperparameter_search_space(self,
         """
         raise NotImplementedError()
 
-    def _add_forbidden_conditions(self, cs):
+    def _add_forbidden_conditions(self, cs: ConfigurationSpace) -> ConfigurationSpace:
         """
         Add forbidden conditions to ensure valid configurations.
         Currently, Learned Entity Embedding is only valid when encoder is one hot encoder
@@ -308,6 +308,10 @@ def _add_forbidden_conditions(self, cs):
             cs (ConfigurationSpace):
                 Configuration space to which forbidden conditions are added.
 
+        Returns:
+            ConfigurationSpace:
+                with forbidden conditions added to the search space
+
         """
 
         # Learned Entity Embedding is only valid when encoder is one hot encoder

diff --git a/autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py b/autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py
@@ -576,7 +576,14 @@ def forward(self,
                 ) -> ALL_NET_OUTPUT:
 
         if isinstance(past_targets, dict):
-            past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
+            (
+                past_targets,
+                past_features,
+                future_features,
+                past_observed_targets,
+                future_targets,
+                decoder_observed_values
+            ) = self._unwrap_past_targets(past_targets)
 
         x_past, x_future, x_static, loc, scale, static_context_initial_hidden, _ = self.pre_processing(
             past_targets=past_targets,
@@ -610,13 +617,12 @@ def forward(self,
     def _unwrap_past_targets(
         self,
         past_targets: dict
-    ) -> Tuple[
-        torch.Tensor,
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-        Optional[torch.BoolTensor],
-        Optional[torch.Tensor]]:
+    ) -> Tuple[torch.Tensor,
+               Optional[torch.Tensor],
+               Optional[torch.Tensor],
+               Optional[torch.Tensor],
+               Optional[torch.BoolTensor],
+               Optional[torch.Tensor]]:
         """
         Time series forecasting network requires multiple inputs for the forward pass which is different to how pytorch
         networks usually work. SWA's update_bn in line #452 of trainer choice, does not unwrap the dictionary of the
@@ -637,7 +643,14 @@ def _unwrap_past_targets(
         future_features = past_targets_copy.pop('future_features', None)
         past_observed_targets = past_targets_copy.pop('past_observed_targets', None)
         decoder_observed_values = past_targets_copy.pop('decoder_observed_values', None)
-        return past_targets,past_features,future_features,past_observed_targets
+        return (
+            past_targets,
+            past_features,
+            future_features,
+            past_observed_targets,
+            future_targets,
+            decoder_observed_values
+        )
 
     def pred_from_net_output(self, net_output: ALL_NET_OUTPUT) -> torch.Tensor:
         if self.output_type == 'regression':
@@ -730,9 +743,16 @@ def forward(self,
                 future_features: Optional[torch.Tensor] = None,
                 past_observed_targets: Optional[torch.BoolTensor] = None,
                 decoder_observed_values: Optional[torch.Tensor] = None, ) -> ALL_NET_OUTPUT:
-        
+
         if isinstance(past_targets, dict):
-            past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
+            (
+                past_targets,
+                past_features,
+                future_features,
+                past_observed_targets,
+                future_targets,
+                decoder_observed_values
+            ) = self._unwrap_past_targets(past_targets)
 
         x_past, _, x_static, loc, scale, static_context_initial_hidden, past_targets = self.pre_processing(
             past_targets=past_targets,
@@ -1025,7 +1045,14 @@ def forward(self,
                 decoder_observed_values: Optional[torch.Tensor] = None, ) -> ALL_NET_OUTPUT:
 
         if isinstance(past_targets, dict):
-            past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
+            (
+                past_targets,
+                past_features,
+                future_features,
+                past_observed_targets,
+                future_targets,
+                decoder_observed_values
+            ) = self._unwrap_past_targets(past_targets)
 
         encode_length = min(self.window_size, past_targets.shape[1])
 
@@ -1295,7 +1322,14 @@ def forward(self,  # type: ignore[override]
                                                                                    Tuple[torch.Tensor, torch.Tensor]]:
 
         if isinstance(past_targets, dict):
-            past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
+            (
+                past_targets,
+                past_features,
+                future_features,
+                past_observed_targets,
+                future_targets,
+                decoder_observed_values
+            ) = self._unwrap_past_targets(past_targets)
 
         # Unlike other networks, NBEATS network is required to predict both past and future targets.
         # Thereby, we return two tensors for backcast and forecast

diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
@@ -1,4 +1,3 @@
-import copy
 import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 

diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py
@@ -1,4 +1,3 @@
-import copy
 import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 

diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
@@ -645,7 +645,6 @@ def test_feature_validator_get_columns_to_encode_error_feat_type(input_data_feat
     with pytest.raises(ValueError, match=r"Expected type of features to be in .*"):
         validator._validate_feat_types(X)
 
-
     # Null columns in the train split but not necessarily in the test split
     train_features = {
         'A': [np.NaN, np.NaN, np.NaN],
@@ -706,25 +705,3 @@ def test_feature_validator_get_columns_to_encode_error_feat_type(input_data_feat
             null_columns.append(column)
 
     assert null_columns == [1]
-
-def test_comparator():
-    numerical = 'numerical'
-    categorical = 'categorical'
-
-    validator = TabularFeatureValidator
-
-    feat_type = [numerical, categorical] * 10
-    ans = [categorical] * 10 + [numerical] * 10
-    feat_type = sorted(
-        feat_type,
-        key=functools.cmp_to_key(validator._comparator)
-    )
-    assert ans == feat_type
-
-    feat_type = [numerical] * 10 + [categorical] * 10
-    ans = [categorical] * 10 + [numerical] * 10
-    feat_type = sorted(
-        feat_type,
-        key=functools.cmp_to_key(validator._comparator)
-    )
-    assert ans == feat_type
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
@@ -35,7 +35,10 @@
 
 @pytest.fixture
 def exclude():
-    return {'feature_preprocessor': ['SelectRatesClassification', 'SelectPercentileClassification'], 'network_embedding': ['LearnedEntityEmbedding']}
+    return {
+        'feature_preprocessor': ['SelectRatesClassification', 'SelectPercentileClassification'],
+        'network_embedding': ['LearnedEntityEmbedding']
+    }
 
 
 @pytest.mark.parametrize("fit_dictionary_tabular", ['classification_categorical_only',