From 1ffcccb21f6b5a52334479e2e311af2532490211 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 00:04:06 +0100
Subject: [PATCH 01/42] Added function to convert shift to dict that specifies
 the shift for each input individually

---
 physXAI/preprocessing/preprocessing.py | 75 ++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 4754ecc..53faf39 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -12,6 +12,81 @@
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
 
 
+def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
+    """
+    Convert a given shift variable into a dictionary in which a shift is defined for every input
+
+    Args:
+        s (Union[int, str, dict]): Shift value. Either a single string or int which then will be applied to all the inputs or
+            a dictionary in which a different shift can be defined for each input. If the dictionary does not specify the
+            shift for all inputs, the shift for inputs not specified is set to 'previous' as default
+        inputs (list(str)): List of Input variables
+    """
+
+    def return_valid_shift(val: Union[int, str]):
+        """ check the validity of the given shift and return a string if val is int """
+        if val in ['current', 0]:
+            val = 'current'
+        elif val in ['previous', 1]:
+            val = 'previous'
+        elif val == 'mean_over_interval':
+            val = 'mean_over_interval'
+        else:
+            raise ValueError(
+                f"Value of shift not supported, value is: {val}. Shift must be 'current' (or 0 if s is int), "
+                f"'previous' (or 1 if s is int) or 'mean_over_interval'.")
+        return val
+
+    if isinstance(s, Union[int, str]):
+        d = {}
+        s = return_valid_shift(s)
+
+        # add shift for each input
+        for inp in inputs:
+            d.update({inp: s})
+        return d
+
+    elif isinstance(s, dict):
+        def get_lag(inputs: list[str], current_input: str) -> int:
+            """ get lag of current input """
+            count = 0
+            for inp in inputs:
+                spl = inp.split(current_input) # make sure it is the current input
+                if spl[0] == '' and spl[1] != '' and spl[1].split('_lag')[0] == '':
+                    count += 1
+            return count
+
+        # check if lags exist
+        d = {}
+        inputs_without_lags = {}
+        for inp in inputs:
+            # skip if current input is just the lag of another inp
+            if not inp.__contains__('_lag'):
+                inputs_without_lags.update({inp: get_lag(inputs, inp)})
+
+        for inp in inputs_without_lags.keys():
+            # if an input has a shift assigned already, the validity is checked
+            # otherwise 'previous' is assigned (default value)
+            if inp in s.keys():
+                d.update({inp: return_valid_shift(s[inp])})
+            else:
+                d.update({inp: 'previous'})
+
+            # all inputs with lags should have the same shift
+            if inputs_without_lags[inp] > 0: # if current input has lags
+                for i in range(inputs_without_lags[inp]):
+                    name = inp + '_lag' + str(i+1)
+
+                    # if a shift was already defined for this lag, check if it matches the shift of the original inp
+                    if name in s.keys():
+                        assert return_valid_shift(s[name]) == d[inp], \
+                            'Make sure that all lags of an input have the same shift'
+                    d.update({name: d[inp]})
+        return d
+    else:
+        raise TypeError(f'shift must be of type int, str or dict, is type {type(s)}')
+
+
 class PreprocessingData(ABC):
     """
     Abstract Preprocessing Class

From b8706cc9794a2b21fa7efdfa319255e92b4cbbb6 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 00:05:02 +0100
Subject: [PATCH 02/42] Added unittests for function
 preprocessing.convert_shift_to_dict

---
 unittests/test_coverage.py | 69 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index e17b448..54bea18 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -1,13 +1,14 @@
 import json
 import os
 import pathlib
+import unittest
 from unittest.mock import patch
 import keras
 import pytest
 ######################################################################################################################
 from physXAI.utils.logging import Logger, get_parent_working_directory
 from physXAI.preprocessing.preprocessing import PreprocessingSingleStep, PreprocessingMultiStep, \
-    PreprocessingData
+    PreprocessingData, convert_shift_to_dict
 from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureConstant
 from physXAI.feature_selection.recursive_feature_elimination import recursive_feature_elimination_pipeline
 from physXAI.models.models import LinearRegressionModel, AbstractModel
@@ -93,6 +94,72 @@ def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
                                   overlapping_sequences=False, batch_size=1)
     prep.pipeline(file_path)
 
+class TestPreprocessingShiftConversion(unittest.TestCase):
+
+    inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
+              'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
+
+    # test case: int given for shift
+    def test_int(self):
+        shift = 0
+        res = convert_shift_to_dict(shift, self.inputs)
+        res_expected = {'reaTZon_y': 'current', 'reaTZon_y_lag1': 'current', 'reaTZon_y_lag2': 'current',
+                        'weaSta_reaWeaTDryBul_y': 'current', 'weaSta_reaWeaTDryBul_y_lag1': 'current',
+                        'weaSta_reaWeaHDirNor_y': 'current', 'oveHeaPumY_u': 'current', 'oveHeaPumY_u_lag1': 'current',
+                        'oveHeaPumY_u_lag2': 'current'}
+        assert res == res_expected
+
+    # test case: unsupported int given for shift
+    def test_unsupported_int(self):
+        shift = 2
+        with self.assertRaises(ValueError):
+            convert_shift_to_dict(shift, self.inputs)
+
+    # test case: str given for shift
+    def test_str(self):
+        shift = 'mean_over_interval'
+        res = convert_shift_to_dict(shift, self.inputs)
+        res_expected = {'reaTZon_y': 'mean_over_interval', 'reaTZon_y_lag1': 'mean_over_interval',
+                        'reaTZon_y_lag2': 'mean_over_interval', 'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
+                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
+                        'weaSta_reaWeaHDirNor_y': 'mean_over_interval', 'oveHeaPumY_u': 'mean_over_interval',
+                        'oveHeaPumY_u_lag1': 'mean_over_interval', 'oveHeaPumY_u_lag2': 'mean_over_interval'}
+        assert res == res_expected
+
+    # test case: unsupported str given for shift
+    def test_unsupported_str(self):
+        shift = 'test'
+        with self.assertRaises(ValueError):
+            convert_shift_to_dict(shift, self.inputs)
+
+    # test case: unsupported type given for shift
+    def test_unsupported_type(self):
+        shift = ['previous']
+        with self.assertRaises(TypeError):
+            convert_shift_to_dict(shift, self.inputs)
+
+    # test case: autocomplete incomplete dictionary given for shift
+    def test_autocomplete_incomplete_dict(self):
+        shift = {'reaTZon_y': 0, 'reaTZon_y_lag1': 0, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
+
+        # previous is default for all inputs that are not specified
+        res = convert_shift_to_dict(shift, self.inputs)
+        res_expected = {'reaTZon_y': 'current', 'reaTZon_y_lag1': 'current', 'reaTZon_y_lag2': 'current',
+                        'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
+                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
+                        'weaSta_reaWeaHDirNor_y': 'previous', 'oveHeaPumY_u': 'previous',
+                        'oveHeaPumY_u_lag1': 'previous',
+                        'oveHeaPumY_u_lag2': 'previous'}
+        assert len(res) == len(self.inputs)
+        assert res == res_expected
+
+    # test case: lags of the same input have mismatching shifts
+    def test_lag_with_mismatching_shifts(self):
+        shift = {'reaTZon_y': 0, 'reaTZon_y_lag1': 1, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
+        with self.assertRaises(AssertionError):
+            convert_shift_to_dict(shift, self.inputs)
+
+
 @pytest.fixture(scope='module')
 def p_hp_data(file_path, inputs_php, output_php):
     # Setup up logger for saving

From 499b4c7b3ccef246a41322a4e01ee6245ba24f2a Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 00:08:54 +0100
Subject: [PATCH 03/42] Small import improvement

---
 unittests/test_coverage.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 54bea18..f083a35 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -1,8 +1,8 @@
 import json
 import os
 import pathlib
-import unittest
 from unittest.mock import patch
+from unittest import TestCase
 import keras
 import pytest
 ######################################################################################################################
@@ -94,7 +94,7 @@ def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
                                   overlapping_sequences=False, batch_size=1)
     prep.pipeline(file_path)
 
-class TestPreprocessingShiftConversion(unittest.TestCase):
+class TestPreprocessingShiftConversion(TestCase):
 
     inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
               'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']

From f40439631f5e5637b07efd89c7c4cc0a85229e54 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 09:18:37 +0100
Subject: [PATCH 04/42] Bug fix for backwards compatibility with python 3.9

---
 physXAI/preprocessing/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 53faf39..c1e14b7 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -23,7 +23,7 @@ def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
         inputs (list(str)): List of Input variables
     """
 
-    def return_valid_shift(val: Union[int, str]):
+    def return_valid_shift(val: (int, str)):
         """ check the validity of the given shift and return a string if val is int """
         if val in ['current', 0]:
             val = 'current'

From 15c2ed7b82055f953e35ab0dbf49e65b66cd5be7 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 09:25:37 +0100
Subject: [PATCH 05/42] Corrected bug fix for backwards compatibility with
 python 3.9

---
 physXAI/preprocessing/preprocessing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index c1e14b7..37c659f 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -23,7 +23,7 @@ def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
         inputs (list(str)): List of Input variables
     """
 
-    def return_valid_shift(val: (int, str)):
+    def return_valid_shift(val: Union[int, str]):
         """ check the validity of the given shift and return a string if val is int """
         if val in ['current', 0]:
             val = 'current'
@@ -37,7 +37,7 @@ def return_valid_shift(val: (int, str)):
                 f"'previous' (or 1 if s is int) or 'mean_over_interval'.")
         return val
 
-    if isinstance(s, Union[int, str]):
+    if isinstance(s, (int, str)):
         d = {}
         s = return_valid_shift(s)
 

From 828c64d0050e9f259afa7a69e9366103164cc975 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 21 Nov 2025 17:08:45 +0100
Subject: [PATCH 06/42] partly integrated new structure for shifting inputs and
 outputs

---
 physXAI/preprocessing/preprocessing.py | 65 ++++++++++++++++++++------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index fe6f1d9..f2fb300 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -1,8 +1,9 @@
 import os
 from abc import ABC, abstractmethod
-from typing import Optional, Union
+from typing import Optional, Union, Iterable
 import numpy as np
 import pandas as pd
+import itertools
 from sklearn.model_selection import train_test_split
 from physXAI.preprocessing.constructed import FeatureConstruction
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
@@ -14,12 +15,13 @@
 
 def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
     """
-    Convert a given shift variable into a dictionary in which a shift is defined for every input
+    Convert a given shift variable (int, str) into a dictionary in which a shift is defined for every input.
+    If a dictionary is given as shift, check entries and autocomplete dict if necessary.
 
     Args:
         s (Union[int, str, dict]): Shift value. Either a single string or int which then will be applied to all the inputs or
             a dictionary in which a different shift can be defined for each input. If the dictionary does not specify the
-            shift for all inputs, the shift for inputs not specified is set to 'previous' as default
+            shift for all inputs, the shift for inputs not specified is set to 'previous' as default (autocomplete)
         inputs (list(str)): List of Input variables
     """
 
@@ -92,7 +94,7 @@ class PreprocessingData(ABC):
     Abstract Preprocessing Class
     """
 
-    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: int = 1,
+    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
@@ -103,7 +105,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: int
         Args:
             inputs (List[str]): List of column names to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (int): The number of time steps to shift the target variable for forecasting.
+            shift (int): The number of time steps to shift the target variable for forecasting.  # TODO: update docstring
                          A shift of one means predicting the next time step.
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
@@ -126,7 +128,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: int
         if isinstance(output, str):
             output = [output]
         self.output: list[str] = output
-        self.shift: int = shift
+        self.shift: dict = convert_shift_to_dict(shift, inputs)
         self.time_step = time_step
 
         # Training, validation and test size should be equal to 1
@@ -202,7 +204,7 @@ class PreprocessingSingleStep(PreprocessingData):
     validation, and test sets.
     """
 
-    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: int = 1,
+    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
@@ -213,7 +215,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: int
         Args:
             inputs (List[str]): List of column names to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (int): The number of time steps to shift the target variable for forecasting.
+            shift (int): The number of time steps to shift the target variable for forecasting. # TODO: update doc dring
                          A shift of one means predicting the next time step.
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
@@ -257,16 +259,51 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
         df = df.loc[first_valid_index:last_valid_index]
         if df.isnull().values.any():
-            if self.ignore_nan:
+            if self.ignore_nan:  # TODO: restructure this
                 df.dropna(inplace=True)
             else:
-                raise ValueError("Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
+                pass  # raise ValueError("Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
 
         X = df[self.inputs]
-        y = df[self.output].shift(-self.shift)
-        if self.shift > 0:  # pragma: no cover
-            y = y.iloc[:-self.shift]
-            X = X.iloc[:-self.shift]
+        y = df[self.output]
+
+        assert len(self.inputs) == len(self.shift.keys()), (f"Something went wrong, number of inputs ({len(self.inputs)})"
+                                                            f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")
+
+        if all('current' == self.shift[k] for k in self.shift.keys()):
+            pass  # nothing to do here
+        elif all('previous' == self.shift[k] for k in self.shift.keys()):
+            X = X.shift(1)
+            y = y.iloc[1:]
+            X = X.iloc[1:]
+        elif all('mean_over_interval' == self.shift[k] for k in self.shift.keys()):
+
+            # output interval is target grid
+            y.dropna(inplace=True)
+
+            def pairwise(iterable: Iterable):
+                "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+                a, b = itertools.tee(iterable)
+                next(b, None)
+                return zip(a, b)
+
+            original_grid = np.array(X.index)
+            results = []
+            for i, j in pairwise(y.index):
+                slicer = np.logical_and(original_grid >= i, original_grid < j)
+                d = {'Index': j}
+                for inp in self.inputs:
+                    d[inp] = X[inp][slicer].mean()
+                results.append(d)
+
+            # length of X and Y have to be synchronized
+            y = y.iloc[1:]
+            X = pd.DataFrame(results).set_index('Index')
+
+        else:  # different inputs have different shift
+            pass
+
+        # y = df[self.output].shift(-self.shift)
 
         return X, y
 

From e2986de05f24b2da9213fa114097e1258c77eb51 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 26 Nov 2025 10:02:13 +0100
Subject: [PATCH 07/42] Fixed error occurring with
 recursive_feature_elimination

---
 physXAI/preprocessing/preprocessing.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index f2fb300..cc6281c 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -265,7 +265,12 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                 pass  # raise ValueError("Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
 
         X = df[self.inputs]
-        y = df[self.output]
+        y = df[self.output].copy()
+
+        # check if current inputs match inputs (keys) in shift dictionary and update shift if necessary
+        # required for recursive feature selection since inputs change after initialization of Preprocessing object
+        if (len(self.inputs) != len(self.shift.keys())) or not all(inp in self.shift.keys() for inp in self.inputs):
+            self.shift = convert_shift_to_dict(self.shift, self.inputs)
 
         assert len(self.inputs) == len(self.shift.keys()), (f"Something went wrong, number of inputs ({len(self.inputs)})"
                                                             f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")

From 9973f0cde3b2757386ec94afa8ad26aa09989c1e Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 26 Nov 2025 23:25:06 +0100
Subject: [PATCH 08/42] Implemented new structure and methods for shifting
 input data

---
 physXAI/preprocessing/constructed.py   |  12 +-
 physXAI/preprocessing/preprocessing.py | 154 ++++++++++++++++++-------
 2 files changed, 121 insertions(+), 45 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index e90b7d3..c481277 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -555,7 +555,7 @@ def get_feature(name: str) -> Union[FeatureBase, None]:
         return None
 
     @staticmethod
-    def process(df: DataFrame):
+    def process(df: DataFrame, feature_names: list[str] = None):
         """
         Processes the input DataFrame by applying all registered feature transformations in order.
         Each feature's `process` method is called, which typically adds a new column to `df`
@@ -563,10 +563,16 @@ def process(df: DataFrame):
 
         Args:
             df (DataFrame): The DataFrame to process and add features to.
+            feature_names (list[str]): optional parameter to only process those features given in feature_names
         """
 
-        for f in FeatureConstruction.features:
-            f.process(df)
+        if not feature_names:
+            for f in FeatureConstruction.features:
+                f.process(df)
+        else:
+            for f in FeatureConstruction.features:
+                if f.feature in feature_names:
+                    f.process(df)
 
     @staticmethod
     def get_config() -> list:
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index cc6281c..6e39a7e 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import itertools
 from sklearn.model_selection import train_test_split
-from physXAI.preprocessing.constructed import FeatureConstruction
+from physXAI.preprocessing.constructed import FeatureConstruction, FeatureLag
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
 from physXAI.utils.logging import get_full_path
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -168,11 +168,14 @@ def load_data(self, file_path: str) -> pd.DataFrame:
         else:
             assert self.time_step % time_step == 0, (f"Value Error: Given time step {self.time_step} is not a multiple "
                                                      f"of data time step: {time_step}.")
-            filtering = (df.index - df.index[0]) % self.time_step == 0
-            df = df[filtering]
 
         return df
 
+    def filter_df_according_to_timestep(self, df: pd.DataFrame):
+        filtering = (df.index - df.index[0]) % self.time_step == 0
+        df = df[filtering]
+        return df
+
     @abstractmethod
     def pipeline(self, file_path: str) -> TrainingDataGeneric:
         """
@@ -248,67 +251,131 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                                                        and target (y) DataFrames.
         """
 
-        # Applies feature constructions defined in `FeatureConstruction`.
-        FeatureConstruction.process(df)
-
-        df = df[self.inputs + [out for out in self.output if out not in self.inputs]]
-
-        # Nan handling
-        non_nan_rows = df.notna().all(axis=1)
-        first_valid_index = non_nan_rows.idxmax() if non_nan_rows.any() else None
-        last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
-        df = df.loc[first_valid_index:last_valid_index]
-        if df.isnull().values.any():
-            if self.ignore_nan:  # TODO: restructure this
-                df.dropna(inplace=True)
-            else:
-                pass  # raise ValueError("Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
-
-        X = df[self.inputs]
-        y = df[self.output].copy()
-
         # check if current inputs match inputs (keys) in shift dictionary and update shift if necessary
         # required for recursive feature selection since inputs change after initialization of Preprocessing object
         if (len(self.inputs) != len(self.shift.keys())) or not all(inp in self.shift.keys() for inp in self.inputs):
             self.shift = convert_shift_to_dict(self.shift, self.inputs)
 
-        assert len(self.inputs) == len(self.shift.keys()), (f"Something went wrong, number of inputs ({len(self.inputs)})"
-                                                            f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")
+        assert len(self.inputs) == len(self.shift.keys()), (
+            f"Something went wrong, number of inputs ({len(self.inputs)})"
+            f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")
 
-        if all('current' == self.shift[k] for k in self.shift.keys()):
-            pass  # nothing to do here
-        elif all('previous' == self.shift[k] for k in self.shift.keys()):
-            X = X.shift(1)
-            y = y.iloc[1:]
-            X = X.iloc[1:]
-        elif all('mean_over_interval' == self.shift[k] for k in self.shift.keys()):
+        # extract the names of lagged inputs
+        lagged_inputs = []
+        for f in FeatureConstruction.features:
+            if isinstance(f, FeatureLag):
+                lagged_inputs.append(f.feature) # name of the feature
 
-            # output interval is target grid
-            y.dropna(inplace=True)
+        inputs_without_lags = [inp for inp in self.inputs if inp not in lagged_inputs]
+
+        # Applies feature constructions defined in `FeatureConstruction`.
+        # Only apply for those features that are not lags since lags must be constructed after sampling the data
+        # according to the given time step
+        FeatureConstruction.process(df, feature_names=inputs_without_lags)
 
+        df = df[inputs_without_lags + [out for out in self.output if out not in inputs_without_lags]]
+
+        # Nan handling in first and last rows
+        non_nan_rows = df.notna().all(axis=1)
+        first_valid_index = non_nan_rows.idxmax() if non_nan_rows.any() else None
+        last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
+        df = df.loc[first_valid_index:last_valid_index]
+
+        def get_mean_over_interval(y: pd.DataFrame, x: pd.DataFrame, inputs: list[str]):
             def pairwise(iterable: Iterable):
                 "s -> (s0,s1), (s1,s2), (s2, s3), ..."
                 a, b = itertools.tee(iterable)
                 next(b, None)
                 return zip(a, b)
 
-            original_grid = np.array(X.index)
+            original_grid = np.array(x.index)
             results = []
-            for i, j in pairwise(y.index):
+            for i, j in pairwise(y.index): # output interval is target grid
                 slicer = np.logical_and(original_grid >= i, original_grid < j)
                 d = {'Index': j}
-                for inp in self.inputs:
-                    d[inp] = X[inp][slicer].mean()
+                for inp in inputs:
+                    d[inp] = x[inp][slicer].mean()
                 results.append(d)
 
-            # length of X and Y have to be synchronized
+            x = pd.DataFrame(results).set_index('Index')
+
+            return x
+
+        # output is independent of shift -> filter / sample according to time step already
+        y = df[self.output].copy()
+        y = self.filter_df_according_to_timestep(y)
+
+        X = df[inputs_without_lags].copy()
+
+        if all('current' == self.shift[k] for k in inputs_without_lags):
+            # filter / sample data
+            X = self.filter_df_according_to_timestep(X)
+            # nothing more to do here
+        elif all('previous' == self.shift[k] for k in inputs_without_lags):
+            # filter / sample data
+            X = self.filter_df_according_to_timestep(X)
+
+            # shift data by 1 and shorten DataFrames accordingly
+            X = X.shift(1)
+            y = y.iloc[1:]
+            X = X.iloc[1:]
+        elif all('mean_over_interval' == self.shift[k] for k in inputs_without_lags):
+            X = get_mean_over_interval(y, X, inputs_without_lags)
+            # synchronize length between X and y
             y = y.iloc[1:]
-            X = pd.DataFrame(results).set_index('Index')
 
-        else:  # different inputs have different shift
-            pass
+        else:  # different inputs have different shifts
+            res = []
+            for inp in inputs_without_lags:
+                # only process inputs with shift method mean_over_interval first since X cannot be filtered / sampled
+                # to the actual required time steps until the intermediate values were taken into the mean
+                if self.shift[inp] == 'mean_over_interval':
+                    res.append(get_mean_over_interval(y, X[[inp]], [inp]))
+
+            # filter / sample X according to required time step
+            X = self.filter_df_according_to_timestep(X)
+            # process inputs with shift methods 'current' and 'previous'
+            for inp in inputs_without_lags:
+                _x = X[[inp]]
+                if self.shift[inp] == 'current':
+                    # no transformation needed
+                    res.append(_x)
+                elif self.shift[inp] == 'previous':
+                    # shift by 1
+                    _x = _x.shift(1)
+                    _x = _x.iloc[1:]
+                    res.append(_x)
+                elif self.shift[inp] == 'mean_over_interval':
+                    continue
+                else:
+                    raise NotImplementedError(f"Shift method '{self.shift[inp]}' not implemented.")
+
+            X = pd.concat(res, axis=1)
+
+            # Shift methods 'previous' and 'mean_over_interval' reduce available data points by 1.
+            # Therefore, length of X and y have to be synchronized
+            if 'previous' in self.shift.values() or 'mean_over_interval' in self.shift.values():
+                y = y.iloc[1:]
+                X = X.sort_index(ascending=True)
+                X = X.iloc[1:]
+
+        res_df = pd.concat([X, y], axis=1)
+
+        if res_df.isnull().values.any():
+            if self.ignore_nan:
+                res_df.dropna(inplace=True)
+            else:
+                raise ValueError(
+                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
+
+        # Applies feature constructions defined in `FeatureConstruction` to the lagged inputs
+        FeatureConstruction.process(res_df, feature_names=lagged_inputs)
 
-        # y = df[self.output].shift(-self.shift)
+        # drop NaNs occurring due to creation of lags
+        res_df.dropna(inplace=True)
+
+        X = res_df[self.inputs]
+        y = res_df[self.output]
 
         return X, y
 
@@ -469,6 +536,9 @@ def process_data(self, df: pd.DataFrame) -> TrainingDataMultiStep:
             TrainingDataMultiStep: Container with tf.data.Dataset objects.
         """
 
+        # filter data
+        df = self.filter_df_according_to_timestep(df)
+
         # Applies feature constructions defined in `FeatureConstruction`.
         FeatureConstruction.process(df)
 

From 0d3783bf9c2aff7a2d6a332d211e808fc768deb1 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 28 Nov 2025 12:02:23 +0100
Subject: [PATCH 09/42] Fixed small error with feature selection test script

---
 physXAI/preprocessing/constructed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index c481277..590b694 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -566,7 +566,7 @@ def process(df: DataFrame, feature_names: list[str] = None):
             feature_names (list[str]): optional parameter to only process those features given in feature_names
         """
 
-        if not feature_names:
+        if feature_names is None:
             for f in FeatureConstruction.features:
                 f.process(df)
         else:

From 2d23228a1b4aa7274bd8f753e02e6f17f82c1954 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sat, 29 Nov 2025 00:56:41 +0100
Subject: [PATCH 10/42] Fixed error in feature construction output wasn't
 considered, error occurred when using pinn

---
 physXAI/preprocessing/preprocessing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 6e39a7e..a173b6a 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -263,7 +263,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         # extract the names of lagged inputs
         lagged_inputs = []
         for f in FeatureConstruction.features:
-            if isinstance(f, FeatureLag):
+            if isinstance(f, FeatureLag) and (f.feature in (self.inputs + self.output)):
                 lagged_inputs.append(f.feature) # name of the feature
 
         inputs_without_lags = [inp for inp in self.inputs if inp not in lagged_inputs]
@@ -271,7 +271,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         # Applies feature constructions defined in `FeatureConstruction`.
         # Only apply for those features that are not lags since lags must be constructed after sampling the data
         # according to the given time step
-        FeatureConstruction.process(df, feature_names=inputs_without_lags)
+        FeatureConstruction.process(df, feature_names=inputs_without_lags + [out for out in self.output if out not in inputs_without_lags])
 
         df = df[inputs_without_lags + [out for out in self.output if out not in inputs_without_lags]]
 

From 5ba2f224cd0218ba5ecb21c2b9f71e29a15b8b53 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Sat, 29 Nov 2025 00:00:04 +0000
Subject: [PATCH 11/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index c149003..1c7007c 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#97CA00" d="M63 0h36v20H63z"/>
+        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">90%</text>
-        <text x="80" y="14">90%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">89%</text>
+        <text x="80" y="14">89%</text>
     </g>
 </svg>

From 3dec440b94b2db71ead112488c0646ea3eecdf48 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Wed, 3 Dec 2025 08:07:48 +0000
Subject: [PATCH 12/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index 1c7007c..b3e8ba0 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">89%</text>
-        <text x="80" y="14">89%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">88%</text>
+        <text x="80" y="14">88%</text>
     </g>
 </svg>

From 88b1ccc9fac20f99793f7b9cee65da0038f98ae0 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 3 Dec 2025 10:29:36 +0100
Subject: [PATCH 13/42] implemented custom default for shift

---
 physXAI/preprocessing/preprocessing.py | 23 +++++++++++++++++------
 unittests/test_coverage.py             | 15 +++++++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index a173b6a..f961074 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -13,7 +13,7 @@
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
 
 
-def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
+def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str], custom_default: Union[int, str] = None) -> dict:
     """
     Convert a given shift variable (int, str) into a dictionary in which a shift is defined for every input.
     If a dictionary is given as shift, check entries and autocomplete dict if necessary.
@@ -21,8 +21,9 @@ def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str]) -> dict:
     Args:
         s (Union[int, str, dict]): Shift value. Either a single string or int which then will be applied to all the inputs or
             a dictionary in which a different shift can be defined for each input. If the dictionary does not specify the
-            shift for all inputs, the shift for inputs not specified is set to 'previous' as default (autocomplete)
+            shift for all inputs, the shift for inputs not specified is set to the default value (autocomplete)
         inputs (list(str)): List of Input variables
+        custom_default (Union[int, str]): if no custom default is specified, 'previous' is used as default shift
     """
 
     def return_valid_shift(val: Union[int, str]):
@@ -39,6 +40,9 @@ def return_valid_shift(val: Union[int, str]):
                 f"'previous' (or 1 if s is int) or 'mean_over_interval'.")
         return val
 
+    # set custom default or - if no custom default is specified - use 'previous' as default
+    default = 'previous' if custom_default is None else return_valid_shift(custom_default)
+
     if isinstance(s, (int, str)):
         d = {}
         s = return_valid_shift(s)
@@ -68,11 +72,11 @@ def get_lag(inputs: list[str], current_input: str) -> int:
 
         for inp in inputs_without_lags.keys():
             # if an input has a shift assigned already, the validity is checked
-            # otherwise 'previous' is assigned (default value)
+            # otherwise default value is assigned
             if inp in s.keys():
                 d.update({inp: return_valid_shift(s[inp])})
             else:
-                d.update({inp: 'previous'})
+                d.update({inp: default})
 
             # all inputs with lags should have the same shift
             if inputs_without_lags[inp] > 0: # if current input has lags
@@ -128,7 +132,14 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         if isinstance(output, str):
             output = [output]
         self.output: list[str] = output
-        self.shift: dict = convert_shift_to_dict(shift, inputs)
+
+        if isinstance(shift, dict) and '_default' in shift.keys():
+            self.shift_default = shift['_default']
+            shift.__delitem__('_default')
+        else:
+            self.shift_default = None
+        self.shift: dict = convert_shift_to_dict(shift, inputs, custom_default=self.shift_default)
+
         self.time_step = time_step
 
         # Training, validation and test size should be equal to 1
@@ -254,7 +265,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         # check if current inputs match inputs (keys) in shift dictionary and update shift if necessary
         # required for recursive feature selection since inputs change after initialization of Preprocessing object
         if (len(self.inputs) != len(self.shift.keys())) or not all(inp in self.shift.keys() for inp in self.inputs):
-            self.shift = convert_shift_to_dict(self.shift, self.inputs)
+            self.shift = convert_shift_to_dict(self.shift, self.inputs, custom_default=self.shift_default)
 
         assert len(self.inputs) == len(self.shift.keys()), (
             f"Something went wrong, number of inputs ({len(self.inputs)})"
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index f083a35..e968034 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -153,6 +153,21 @@ def test_autocomplete_incomplete_dict(self):
         assert len(res) == len(self.inputs)
         assert res == res_expected
 
+    # test case: autocomplete incomplete dictionary given for shift with custom default
+    def test_autocomplete_incomplete_dict_with_custom_default(self):
+        shift = {'reaTZon_y': 1, 'reaTZon_y_lag1': 1, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
+
+        # previous is default for all inputs that are not specified
+        res = convert_shift_to_dict(shift, self.inputs, custom_default=0)
+        res_expected = {'reaTZon_y': 'previous', 'reaTZon_y_lag1': 'previous', 'reaTZon_y_lag2': 'previous',
+                        'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
+                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
+                        'weaSta_reaWeaHDirNor_y': 'current', 'oveHeaPumY_u': 'current',
+                        'oveHeaPumY_u_lag1': 'current',
+                        'oveHeaPumY_u_lag2': 'current'}
+        assert len(res) == len(self.inputs)
+        assert res == res_expected
+
     # test case: lags of the same input have mismatching shifts
     def test_lag_with_mismatching_shifts(self):
         shift = {'reaTZon_y': 0, 'reaTZon_y_lag1': 1, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}

From ef9a8d4a0b2cc00a0de934307972d733ead3403f Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 3 Dec 2025 11:15:54 +0100
Subject: [PATCH 14/42] Updated docstrings

---
 physXAI/preprocessing/preprocessing.py | 30 ++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index f961074..210ab0a 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -109,8 +109,19 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         Args:
             inputs (List[str]): List of column names to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (int): The number of time steps to shift the target variable for forecasting.  # TODO: update docstring
-                         A shift of one means predicting the next time step.
+            shift (Union[int, str, dict]): Time step of the input data used to predict the output.
+                - If a single int or str is given, it applies to all inputs.
+                - If a dict is provided, it can specify different shifts for individual inputs.
+                - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
+                Examples:
+                    - shift = 0 or shift = 'current': Current time step will be used for prediction.
+                    - shift = 1 or shift = 'previous': Previous values will be used for prediction.
+                    - shift = 'mean_over_interval': Mean between current and previous time step will be used.
+                    - shift = {
+                        'inp_1': 1,
+                        'inp_2': 'mean_over_interval',
+                        '_default': 0,  # current time step will be used for all inputs not specified in the dict
+                    }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
             val_size (float): Proportion of the dataset to allocate to the validation set.
@@ -229,8 +240,19 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         Args:
             inputs (List[str]): List of column names to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (int): The number of time steps to shift the target variable for forecasting. # TODO: update doc dring
-                         A shift of one means predicting the next time step.
+            shift (Union[int, str, dict]): Time step of the input data used to predict the output.
+                - If a single int or str is given, it applies to all inputs.
+                - If a dict is provided, it can specify different shifts for individual inputs.
+                - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
+                Examples:
+                    - shift = 0 or shift = 'current': Current time step will be used for prediction.
+                    - shift = 1 or shift = 'previous': Previous values will be used for prediction.
+                    - shift = 'mean_over_interval': Mean between current and previous time step will be used.
+                    - shift = {
+                        'inp_1': 1,
+                        'inp_2': 'mean_over_interval',
+                        '_default': 0,  # current time step will be used for all inputs not specified in the dict
+                    }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
             val_size (float): Proportion of the dataset to allocate to the validation set.

From b4588036227b5d2fa0a60c95663c16750111bee7 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 3 Dec 2025 15:49:19 +0100
Subject: [PATCH 15/42] Implemented test and example for different shifts
 updated docstrings

---
 .../Dummy_shifting.py                         | 73 +++++++++++++++++++
 physXAI/preprocessing/preprocessing.py        | 15 ++--
 unittests/test_coverage.py                    | 31 ++++++++
 3 files changed, 113 insertions(+), 6 deletions(-)
 create mode 100644 executables/bestest_hydronic_heat_pump/Dummy_shifting.py

diff --git a/executables/bestest_hydronic_heat_pump/Dummy_shifting.py b/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
new file mode 100644
index 0000000..340dd75
--- /dev/null
+++ b/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
@@ -0,0 +1,73 @@
+from physXAI.models.ann.ann_design import ClassicalANNModel
+from physXAI.preprocessing.preprocessing import PreprocessingSingleStep
+from physXAI.preprocessing.constructed import Feature
+from physXAI.utils.logging import Logger
+
+
+"""
+This script demonstrates the usage of different shifts. It is not physically meaningful.
+"""
+# Setup up logger for saving
+Logger.setup_logger(folder_name='Dummy_shifting_ann', override=True)
+
+# File path to data
+file_path = r"data/bestest_hydronic_heat_pump/pid_data.csv"
+
+# List of input features. Can include constructed features and lagged inputs
+inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
+          'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
+# Output feature
+output = 'Change(T_zone)'
+
+""" 
+The constructed features are automatically added to the data via 'physXAI.preprocessing.constructed.py' 
+Lagged inputs can be added directly based on the feature
+"""
+x1 = Feature('reaTZon_y')
+x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
+x2 = Feature('weaSta_reaWeaTDryBul_y')
+x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
+x3 = Feature('oveHeaPumY_u')
+x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
+
+
+"""
+shift (Union[int, str, dict]): Time step of the input data used to predict the output.
+    - If a single int or str is given, it applies to all inputs.
+    - If a dict is provided, it can specify different shifts for individual inputs.
+    - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
+    Examples:
+        - shift = 0 or shift = 'current': Current time step will be used for prediction.
+        - shift = 1 or shift = 'previous': Previous values will be used for prediction.
+        - shift = 'mean_over_interval': Mean between current and previous time step will be used.
+        - shift = {
+            'inp_1': 1,
+            'inp_2': 'mean_over_interval',
+            '_default': 0,  # current time step will be used for all inputs not specified in the dict
+            # If no custom default value is given in dict, 'previous' will be used as default
+        }
+"""
+shift = {
+    'reaTZon_y': 'previous',  # for all lags of reaTZon_y, the shift will be set automatically
+    'weaSta_reaWeaHDirNor_y': 'mean_over_interval',
+    '_default': 0,
+}
+
+# Create Training data
+# Time step defines target sampling: if original sampling of data is in 15min intervals, it is resampled to 1h intervals for time_step=4
+# Hence, if the shift method of an input is defined as 'mean_over_interval', the mean over the last hour is taken as input
+prep = PreprocessingSingleStep(inputs, output, shift=shift, time_step=4)
+
+# Process Training data
+td = prep.pipeline(file_path)
+
+# Classical ANN
+m = ClassicalANNModel(epochs=500)
+
+# Training pipeline
+model = m.pipeline(td)
+
+# Log setup of preprocessing and model as json
+Logger.log_setup(prep, m)
+# Log training data as pickle
+Logger.save_training_data(td)
\ No newline at end of file
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 210ab0a..1ad0f6e 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -121,6 +121,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
                         'inp_1': 1,
                         'inp_2': 'mean_over_interval',
                         '_default': 0,  # current time step will be used for all inputs not specified in the dict
+                        # If no custom default value is given in dict, 'previous' will be used as default
                     }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
@@ -252,6 +253,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
                         'inp_1': 1,
                         'inp_2': 'mean_over_interval',
                         '_default': 0,  # current time step will be used for all inputs not specified in the dict
+                        # If no custom default value is given in dict, 'previous' will be used as default
                     }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
@@ -274,7 +276,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                 1. Applies feature constructions defined in `FeatureConstruction`.
                 2. Selects relevant input and output columns.
                 3. Handles missing values by dropping rows.
-                4. Shifts the target variable(s) `y` for forecasting.
+                4. Applies the shift on each input variable.
 
                 Args:
                     df (pd.DataFrame): The input DataFrame.
@@ -314,7 +316,8 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
         df = df.loc[first_valid_index:last_valid_index]
 
-        def get_mean_over_interval(y: pd.DataFrame, x: pd.DataFrame, inputs: list[str]):
+        def get_mean_over_interval(y: pd.DataFrame, x: pd.DataFrame):
+            """return mean values of x on target sampling (index of y)"""
             def pairwise(iterable: Iterable):
                 "s -> (s0,s1), (s1,s2), (s2, s3), ..."
                 a, b = itertools.tee(iterable)
@@ -326,7 +329,7 @@ def pairwise(iterable: Iterable):
             for i, j in pairwise(y.index): # output interval is target grid
                 slicer = np.logical_and(original_grid >= i, original_grid < j)
                 d = {'Index': j}
-                for inp in inputs:
+                for inp in x.columns:
                     d[inp] = x[inp][slicer].mean()
                 results.append(d)
 
@@ -353,7 +356,7 @@ def pairwise(iterable: Iterable):
             y = y.iloc[1:]
             X = X.iloc[1:]
         elif all('mean_over_interval' == self.shift[k] for k in inputs_without_lags):
-            X = get_mean_over_interval(y, X, inputs_without_lags)
+            X = get_mean_over_interval(y, X)
             # synchronize length between X and y
             y = y.iloc[1:]
 
@@ -363,7 +366,7 @@ def pairwise(iterable: Iterable):
                 # only process inputs with shift method mean_over_interval first since X cannot be filtered / sampled
                 # to the actual required time steps until the intermediate values were taken into the mean
                 if self.shift[inp] == 'mean_over_interval':
-                    res.append(get_mean_over_interval(y, X[[inp]], [inp]))
+                    res.append(get_mean_over_interval(y, X[[inp]]))
 
             # filter / sample X according to required time step
             X = self.filter_df_according_to_timestep(X)
@@ -386,7 +389,7 @@ def pairwise(iterable: Iterable):
             X = pd.concat(res, axis=1)
 
             # Shift methods 'previous' and 'mean_over_interval' reduce available data points by 1.
-            # Therefore, length of X and y have to be synchronized
+            # Therefore, lengths of X and y have to be synchronized
             if 'previous' in self.shift.values() or 'mean_over_interval' in self.shift.values():
                 y = y.iloc[1:]
                 X = X.sort_index(ascending=True)
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index e968034..49c6099 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -37,6 +37,11 @@ def inputs_php():
 def inputs_tair():
     return ['reaTZon_y', 'weaSta_reaWeaTDryBul_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1']
 
+@pytest.fixture(scope='module')
+def inputs_tair_extended():
+    return ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
+              'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
+
 @pytest.fixture(scope='module')
 def output_php():
     return 'reaPHeaPum_y'
@@ -227,6 +232,32 @@ def tair_data_total(file_path, inputs_tair, output_tair):
     td = prep.pipeline(file_path)
     return prep, td
 
+def test_shifting(file_path, inputs_tair_extended, output_tair):
+    # Setup up logger for saving
+    Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
+
+    # Create lags
+    x1 = Feature('reaTZon_y')
+    x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
+    x2 = Feature('weaSta_reaWeaTDryBul_y')
+    x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
+    x3 = Feature('oveHeaPumY_u')
+    x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
+
+    shift = {
+        'reaTZon_y': 'previous',  # for all lags of reaTZon_y, the shift will be set automatically
+        'weaSta_reaWeaHDirNor_y': 'mean_over_interval',
+        '_default': 0,
+    }
+
+    # Create & process Training data
+    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, shift=shift, time_step=4)
+    td = prep.pipeline(file_path)
+
+    # Build & train Classical ANN
+    m = ClassicalANNModel(epochs=100)
+    model = m.pipeline(td)
+
 def test_model_linReg(inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)

From 23ac15bb17078960058b8560210aa0cdd8ef861e Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Wed, 3 Dec 2025 14:53:41 +0000
Subject: [PATCH 16/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index b3e8ba0..c149003 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
+        <path fill="#97CA00" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">88%</text>
-        <text x="80" y="14">88%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">90%</text>
+        <text x="80" y="14">90%</text>
     </g>
 </svg>

From 80815ee97acc79bcdc2564a7cce533824814d1df Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 3 Dec 2025 15:59:52 +0100
Subject: [PATCH 17/42] reduce number of epochs for more efficient testing

---
 executables/bestest_hydronic_heat_pump/Dummy_shifting.py | 2 +-
 unittests/test_coverage.py                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/Dummy_shifting.py b/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
index 340dd75..48859bb 100644
--- a/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
+++ b/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
@@ -62,7 +62,7 @@
 td = prep.pipeline(file_path)
 
 # Classical ANN
-m = ClassicalANNModel(epochs=500)
+m = ClassicalANNModel(epochs=50)
 
 # Training pipeline
 model = m.pipeline(td)
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 49c6099..54ba351 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -255,7 +255,7 @@ def test_shifting(file_path, inputs_tair_extended, output_tair):
     td = prep.pipeline(file_path)
 
     # Build & train Classical ANN
-    m = ClassicalANNModel(epochs=100)
+    m = ClassicalANNModel(epochs=1)
     model = m.pipeline(td)
 
 def test_model_linReg(inputs_php, output_php, file_path):

From 367624fb13ce4dabb6cddfd6663e89e69769defa Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 5 Dec 2025 11:11:38 +0100
Subject: [PATCH 18/42] implemented handling of constructed features including
 lagged features fixing review issue
 https://github.com/RWTH-EBC/physXAI/pull/51#discussion_r2589444241

---
 physXAI/preprocessing/constructed.py   | 41 ++++++++++++++++++++++++++
 physXAI/preprocessing/preprocessing.py | 11 +++----
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 975b3e9..df88405 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -559,6 +559,47 @@ def get_feature(name: str) -> Union[FeatureBase, None]:
                 return f
         return None
 
+    @staticmethod
+    def get_features_including_lagged_features(l: list[str] = None) -> list[str]:
+        """
+        returns a list of the names of all FeatureLag and FeatureTwo where at least one feature is a FeatureLag
+        - within the given list or
+        - of all constructed features if list is None
+
+        Args:
+            l (list[str]): list of feature names to search in
+
+        Returns:
+            list[str]: the list of lag-based features
+        """
+
+        # if no list is given, search in all features
+        if not l:
+            l = FeatureConstruction.features
+
+        def recursive_search(feature):
+            """Recursively checks for lagged features"""
+            if isinstance(feature, FeatureLag):
+                return True
+
+            elif isinstance(feature, FeatureTwo):
+                # Check both sub-features recursively
+                return recursive_search(feature.feature1) or recursive_search(feature.feature2)
+
+            return False
+
+        res = list()
+        for f in FeatureConstruction.features:
+            if isinstance(f, FeatureLag) and (f.feature in l):
+                res.append(f.feature)  # name of the feature
+
+            elif isinstance(f, FeatureTwo) and (f.feature in l):
+                # Use recursive search to check for nested lagged features
+                if recursive_search(f.feature1) or recursive_search(f.feature2):
+                    res.append(f.feature)
+
+        return res
+
     @staticmethod
     def process(df: DataFrame, feature_names: list[str] = None):
         """
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 1ad0f6e..cdf26db 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -295,13 +295,10 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
             f"Something went wrong, number of inputs ({len(self.inputs)})"
             f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")
 
-        # extract the names of lagged inputs
-        lagged_inputs = []
-        for f in FeatureConstruction.features:
-            if isinstance(f, FeatureLag) and (f.feature in (self.inputs + self.output)):
-                lagged_inputs.append(f.feature) # name of the feature
+        # extract the names of all features in inputs and outputs that are based on lagged features
+        lag_based_features = FeatureConstruction.get_features_including_lagged_features(self.inputs + self.output)
 
-        inputs_without_lags = [inp for inp in self.inputs if inp not in lagged_inputs]
+        inputs_without_lags = [inp for inp in self.inputs if inp not in lag_based_features]
 
         # Applies feature constructions defined in `FeatureConstruction`.
         # Only apply for those features that are not lags since lags must be constructed after sampling the data
@@ -405,7 +402,7 @@ def pairwise(iterable: Iterable):
                     "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
 
         # Applies feature constructions defined in `FeatureConstruction` to the lagged inputs
-        FeatureConstruction.process(res_df, feature_names=lagged_inputs)
+        FeatureConstruction.process(res_df, feature_names=lag_based_features)
 
         # drop NaNs occurring due to creation of lags
         res_df.dropna(inplace=True)

From 567efcdb7ba1ce42bd738f7552b0dbcbd5f56029 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 5 Dec 2025 17:31:27 +0100
Subject: [PATCH 19/42] Partly integrated shift as attribute sampling_method in
 Feature

---
 physXAI/preprocessing/constructed.py   | 98 ++++++++++++++++++++++----
 physXAI/preprocessing/preprocessing.py | 72 +++++++++----------
 2 files changed, 120 insertions(+), 50 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index df88405..a9e0b7a 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -4,6 +4,20 @@
 from pandas import DataFrame, Series
 
 
+def _return_valid_sampling_method(v: Union[int, str]):
+    """ check the validity of the given sampling method and return a string if val is int """
+    if v in ['current', 0]:
+        return 'current'
+    elif v in ['previous', 1]:
+        return 'previous'
+    elif v == 'mean_over_interval':
+        return 'mean_over_interval'
+    else:
+        raise ValueError(
+            f"Value of sampling method not supported, value is: {v}. Sampling method must be 'current' "
+            f"(or 0 if s is int), 'previous' (or 1 if s is int) or 'mean_over_interval'.")
+
+
 class FeatureBase(ABC):
     """
     Abstract Base Class for all feature engineering components.
@@ -11,20 +25,45 @@ class FeatureBase(ABC):
     in a Pandas DataFrame. It supports arithmetic operations to combine features.
     """
 
-    def __init__(self, name: str, **kwargs):
+    def __init__(self, name: str, sampling_method: Union[str, int] = None, **kwargs):
         """
         Initializes a FeatureBase instance.
 
         Args:
             name (str): The name of the feature. This will be the column name in the DataFrame.
+            sampling_method (Union[str, int]): Time step of the input data used to predict the output.
+                - if None: FeatureConstruction._default_sampling_method is used
+                - if 'current' or 0: Current time step will be used for prediction.
+                - if 'previous' or 1: Previous time step will be used for prediction.
+                - if 'mean_over_interval': Mean between current and previous time step will be used.
             **kwargs: Catches any additional keyword arguments.
         """
 
         self.feature: str = name
+        self.sampling_method = sampling_method
 
         # Automatically registers the newly created feature instance with the FeatureConstruction manager
         FeatureConstruction.append(self)
 
+    @property
+    def sampling_method(self):
+        return self._sampling_method
+
+    @sampling_method.setter
+    def sampling_method(self, val: Union[str, int] = None):
+        """
+        Sets the feature's sampling method. If None is given, FeatureConstruction._default_sampling_method is used
+        Available methods:
+        - 'current' or 0: Current time step will be used for prediction.
+        - 'previous' or 1: Previous time step will be used for prediction.
+        - 'mean_over_interval': Mean between current and previous time step will be used.
+        """
+
+        if val is None:
+            self._sampling_method = FeatureConstruction.get_default_sampling_method()
+        else:
+            self._sampling_method = _return_valid_sampling_method(val)
+
     def rename(self, name: str):
         """
         Renames the feature.
@@ -103,7 +142,8 @@ def lag(self, lag: int, previous: bool = True):
                                 FeatureLag object for the specified lag_value.
 
            Returns:
-               FeatureLag or List[FeatureLag]: A single lagged feature or a list of lagged features.
+               FeatureLag or List[FeatureLag]: A single lagged feature or a list of lagged features, each with the same
+                                                sampling method as their corresponding base feature.
         """
 
         if previous and lag > 1:
@@ -115,8 +155,11 @@ def lag(self, lag: int, previous: bool = True):
             return FeatureLag(self, lag)
 
     def get_config(self) -> dict:
-        return {'class_name': self.__class__.__name__,
-                'name': self.feature}
+        return {
+            'class_name': self.__class__.__name__,
+            'name': self.feature,
+            'sampling_method': self.sampling_method,
+        }
 
     @classmethod
     def from_config(cls, config: dict) -> 'FeatureBase':
@@ -189,11 +232,27 @@ def __init__(self, f: Union[FeatureBase, str], lag: int, name: str = None, **kwa
         """
         if isinstance(f, FeatureBase):
             self.origf: str = f.feature
+            if name is None:
+                name = f.feature + f'_lag{lag}'
+
+            # lags must have the same sampling_method as their base feature
+            sampling_method = f.sampling_method
         else:
             self.origf: str = f
-        if name is None:
-            name = f.feature + f'_lag{lag}'
-        super().__init__(name)
+            if name is None:
+                name = f + f'_lag{lag}'
+
+            # lags must have the same sampling_method as their base feature
+            sampling_method = FeatureConstruction.get_feature(f).sampling_method
+
+        if 'sampling_method' in kwargs.keys():
+            assert kwargs['sampling_method'] == sampling_method, (f'lags must have the same sampling method as their '
+                                                                  f'base feature. Sampling method of base feature is '
+                                                                  f'{sampling_method} but for lag '
+                                                                  f'{kwargs['sampling_method']} was given')
+            kwargs.__delitem__('sampling_method')  # constructor must not get more than one arg with the same key
+
+        super().__init__(name, sampling_method=sampling_method, **kwargs)
         self.lag: int = lag
 
     def process(self, df: DataFrame) -> Series:
@@ -236,7 +295,7 @@ def __init__(self, feature1: Union[FeatureBase, int, float], feature2: Union[Fea
             f2n = str(feature2)
         if name is None:
             name = self.name(f1n, f2n)
-        super().__init__(name)
+        super().__init__(name, **kwargs)
         self.feature1 = feature1
         self.feature2 = feature2
 
@@ -414,7 +473,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'exp(' + f1.feature + ')'
-        super().__init__(name)
+        super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -444,7 +503,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'sin(' + f1.feature + ')'
-        super().__init__(name)
+        super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -474,7 +533,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'cos(' + f1.feature + ')'
-        super().__init__(name)
+        super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -504,7 +563,7 @@ class FeatureConstant(FeatureBase):
 
     def __init__(self, c: float, name: str, **kwargs):
         self.c = c
-        super().__init__(name)
+        super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -525,6 +584,21 @@ class FeatureConstruction:
     """
 
     features = list[FeatureBase]()
+    _default_sampling_method = 'previous'
+
+    @staticmethod
+    def get_default_sampling_method():
+        return FeatureConstruction._default_sampling_method
+
+    @staticmethod
+    def set_default_sampling_method(val: Union[str, int]):
+        """
+        Sets the default sampling method for all features that do not have a custom sampling method. Available methods:
+        - 'current' or 0: Current time step will be used for prediction.
+        - 'previous' or 1: Previous time step will be used for prediction.
+        - 'mean_over_interval': Mean between current and previous time step will be used.
+        """
+        FeatureConstruction._default_sampling_method = _return_valid_sampling_method(val)
 
     @staticmethod
     def reset():
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index cdf26db..b044165 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import itertools
 from sklearn.model_selection import train_test_split
-from physXAI.preprocessing.constructed import FeatureConstruction, FeatureLag
+from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
 from physXAI.utils.logging import get_full_path
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -194,7 +194,7 @@ def load_data(self, file_path: str) -> pd.DataFrame:
 
         return df
 
-    def filter_df_according_to_timestep(self, df: pd.DataFrame):
+    def sample_df_according_to_timestep(self, df: pd.DataFrame):
         filtering = (df.index - df.index[0]) % self.time_step == 0
         df = df[filtering]
         return df
@@ -276,7 +276,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                 1. Applies feature constructions defined in `FeatureConstruction`.
                 2. Selects relevant input and output columns.
                 3. Handles missing values by dropping rows.
-                4. Applies the shift on each input variable.
+                4. Applies the defined sampling method on each input variable.
 
                 Args:
                     df (pd.DataFrame): The input DataFrame.
@@ -286,15 +286,6 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                                                        and target (y) DataFrames.
         """
 
-        # check if current inputs match inputs (keys) in shift dictionary and update shift if necessary
-        # required for recursive feature selection since inputs change after initialization of Preprocessing object
-        if (len(self.inputs) != len(self.shift.keys())) or not all(inp in self.shift.keys() for inp in self.inputs):
-            self.shift = convert_shift_to_dict(self.shift, self.inputs, custom_default=self.shift_default)
-
-        assert len(self.inputs) == len(self.shift.keys()), (
-            f"Something went wrong, number of inputs ({len(self.inputs)})"
-            f" doesn't match number of inputs defined in shift ({len(self.shift.keys())})")
-
         # extract the names of all features in inputs and outputs that are based on lagged features
         lag_based_features = FeatureConstruction.get_features_including_lagged_features(self.inputs + self.output)
 
@@ -304,6 +295,7 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         # Only apply for those features that are not lags since lags must be constructed after sampling the data
         # according to the given time step
         FeatureConstruction.process(df, feature_names=inputs_without_lags + [out for out in self.output if out not in inputs_without_lags])
+        features_without_lags: list[FeatureBase] = [FeatureConstruction.get_feature(inp) for inp in inputs_without_lags]
 
         df = df[inputs_without_lags + [out for out in self.output if out not in inputs_without_lags]]
 
@@ -334,60 +326,63 @@ def pairwise(iterable: Iterable):
 
             return x
 
-        # output is independent of shift -> filter / sample according to time step already
+        # output is independent of sampling of inputs -> sample according to time step already
         y = df[self.output].copy()
-        y = self.filter_df_according_to_timestep(y)
+        y = self.sample_df_according_to_timestep(y)
 
         X = df[inputs_without_lags].copy()
 
-        if all('current' == self.shift[k] for k in inputs_without_lags):
+        if all('current' == f.sampling_method for f in features_without_lags):
             # filter / sample data
-            X = self.filter_df_according_to_timestep(X)
+            X = self.sample_df_according_to_timestep(X)
             # nothing more to do here
-        elif all('previous' == self.shift[k] for k in inputs_without_lags):
+        elif all('previous' == f.sampling_method for f in features_without_lags):
             # filter / sample data
-            X = self.filter_df_according_to_timestep(X)
+            X = self.sample_df_according_to_timestep(X)
 
             # shift data by 1 and shorten DataFrames accordingly
             X = X.shift(1)
             y = y.iloc[1:]
             X = X.iloc[1:]
-        elif all('mean_over_interval' == self.shift[k] for k in inputs_without_lags):
+        elif all('mean_over_interval' == f.sampling_method for f in features_without_lags):
             X = get_mean_over_interval(y, X)
             # synchronize length between X and y
             y = y.iloc[1:]
 
-        else:  # different inputs have different shifts
+        else:  # different inputs have different sampling methods
             res = []
-            for inp in inputs_without_lags:
-                # only process inputs with shift method mean_over_interval first since X cannot be filtered / sampled
+            previous_or_mean_in_sampling_methods = False
+            for f in features_without_lags:
+                # only process inputs with sampling method mean_over_interval first since X cannot be sampled
                 # to the actual required time steps until the intermediate values were taken into the mean
-                if self.shift[inp] == 'mean_over_interval':
-                    res.append(get_mean_over_interval(y, X[[inp]]))
-
-            # filter / sample X according to required time step
-            X = self.filter_df_according_to_timestep(X)
-            # process inputs with shift methods 'current' and 'previous'
-            for inp in inputs_without_lags:
-                _x = X[[inp]]
-                if self.shift[inp] == 'current':
+                if f.sampling_method == 'mean_over_interval':
+                    res.append(get_mean_over_interval(y, X[[f.feature]]))
+                    previous_or_mean_in_sampling_methods = True
+
+            # sample X according to required time step
+            X = self.sample_df_according_to_timestep(X)
+            # process inputs with sampling methods 'current' and 'previous'
+            for f in features_without_lags:
+                _x = X[[f.feature]]
+                if f.sampling_method == 'current':
                     # no transformation needed
                     res.append(_x)
-                elif self.shift[inp] == 'previous':
+                elif f.sampling_method == 'previous':
                     # shift by 1
                     _x = _x.shift(1)
                     _x = _x.iloc[1:]
                     res.append(_x)
-                elif self.shift[inp] == 'mean_over_interval':
+                    previous_or_mean_in_sampling_methods = True
+                elif f.sampling_method == 'mean_over_interval':
                     continue
                 else:
-                    raise NotImplementedError(f"Shift method '{self.shift[inp]}' not implemented.")
+                    raise NotImplementedError(f"Sampling method '{f.sampling_method}' not implemented.")
 
             X = pd.concat(res, axis=1)
 
-            # Shift methods 'previous' and 'mean_over_interval' reduce available data points by 1.
+            # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
             # Therefore, lengths of X and y have to be synchronized
-            if 'previous' in self.shift.values() or 'mean_over_interval' in self.shift.values():
+            if previous_or_mean_in_sampling_methods:
                 y = y.iloc[1:]
                 X = X.sort_index(ascending=True)
                 X = X.iloc[1:]
@@ -399,7 +394,8 @@ def pairwise(iterable: Iterable):
                 res_df.dropna(inplace=True)
             else:
                 raise ValueError(
-                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set ignore_nan=True in PreprocessingSingleStep.")
+                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set "
+                    "ignore_nan=True in PreprocessingSingleStep.")
 
         # Applies feature constructions defined in `FeatureConstruction` to the lagged inputs
         FeatureConstruction.process(res_df, feature_names=lag_based_features)
@@ -570,7 +566,7 @@ def process_data(self, df: pd.DataFrame) -> TrainingDataMultiStep:
         """
 
         # filter data
-        df = self.filter_df_according_to_timestep(df)
+        df = self.sample_df_according_to_timestep(df)
 
         # Applies feature constructions defined in `FeatureConstruction`.
         FeatureConstruction.process(df)

From e0fc769d29568a559d1957daa81638567121161e Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sun, 7 Dec 2025 11:46:59 +0100
Subject: [PATCH 20/42] Implemented input list as list of Features and str
 before: only str allowed

---
 physXAI/preprocessing/constructed.py   | 27 ++++++++++++++++++++++++++
 physXAI/preprocessing/preprocessing.py | 12 ++++++------
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index a9e0b7a..19af47b 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -674,6 +674,33 @@ def recursive_search(feature):
 
         return res
 
+    @staticmethod
+    def process_inputs(inputs: list[Union[str, FeatureBase]]) -> list[str]:
+        """
+        Creates a Feature for all inputs that are not yet created as features
+
+        Args:
+             inputs (list(Union[str, FeatureBase])): List of column names or Features to be used as input features.
+
+        Returns:
+            list[str]: list of column names of all input features
+        """
+
+        input_str = list()
+
+        for inp in inputs:
+            if isinstance(inp, FeatureBase):
+                input_str.append(inp.feature)  # get name of feature (which is used as column name)
+            elif isinstance(inp, str):
+                input_str.append(inp)
+                # check if a Feature with the given name (inp) was already created, otherwise create it
+                if not any(inp == f.feature for f in FeatureConstruction.features):
+                    Feature(name=inp)
+            else:
+                raise TypeError(f"Only inputs with types 'str' or 'FeatureBase' allowed, got type {type(inp)} instead")
+
+        return input_str
+
     @staticmethod
     def process(df: DataFrame, feature_names: list[str] = None):
         """
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index b044165..97b5310 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -98,7 +98,7 @@ class PreprocessingData(ABC):
     Abstract Preprocessing Class
     """
 
-    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
+    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
@@ -107,7 +107,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         Initializes the Preprocessing instance.
 
         Args:
-            inputs (List[str]): List of column names to be used as input features.
+            inputs (List[Union[str, FeatureBase]]): List of column names or Features to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
             shift (Union[int, str, dict]): Time step of the input data used to predict the output.
                 - If a single int or str is given, it applies to all inputs.
@@ -140,7 +140,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         self.csv_header = csv_header
         self.csv_skiprows = csv_skiprows
 
-        self.inputs: list[str] = inputs
+        self.inputs: list[str] = FeatureConstruction.process_inputs(inputs)
         if isinstance(output, str):
             output = [output]
         self.output: list[str] = output
@@ -230,7 +230,7 @@ class PreprocessingSingleStep(PreprocessingData):
     validation, and test sets.
     """
 
-    def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
+    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
@@ -239,7 +239,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], shift: Unio
         Initializes the PreprocessingSingleStep instance.
 
         Args:
-            inputs (List[str]): List of column names to be used as input features.
+            inputs (List[Union[str, FeatureBase]]): List of column names or Features to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
             shift (Union[int, str, dict]): Time step of the input data used to predict the output.
                 - If a single int or str is given, it applies to all inputs.
@@ -492,7 +492,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], label_width
         Initializes the PreprocessingMultiStep instance.
 
         Args:
-            inputs (List[str]): Column names for input features to the main RNN.
+           inputs (List[Union[str, FeatureBase]]): List of column names or Features that are input features to the main RNN.
             output (Union[str, List[str]]): Column name(s) for target variable(s).
             label_width (int): Number of time steps in the output (label) sequence.
             warmup_width (int): Number of time steps in the warmup sequence (for RNN state initialization).

From be1365b350f200ab53351ca10ec3b88c57662530 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Sun, 7 Dec 2025 10:50:09 +0000
Subject: [PATCH 21/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index c149003..6963b3e 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#97CA00" d="M63 0h36v20H63z"/>
+        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">90%</text>
-        <text x="80" y="14">90%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">87%</text>
+        <text x="80" y="14">87%</text>
     </g>
 </svg>

From 7d439f883f3e66c6824bd267831bb00919cb2f03 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sun, 7 Dec 2025 11:58:16 +0100
Subject: [PATCH 22/42] Fix SyntaxError in python versions earlier than 3.12

---
 physXAI/preprocessing/constructed.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 19af47b..a8c5da1 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -246,10 +246,10 @@ def __init__(self, f: Union[FeatureBase, str], lag: int, name: str = None, **kwa
             sampling_method = FeatureConstruction.get_feature(f).sampling_method
 
         if 'sampling_method' in kwargs.keys():
-            assert kwargs['sampling_method'] == sampling_method, (f'lags must have the same sampling method as their '
-                                                                  f'base feature. Sampling method of base feature is '
-                                                                  f'{sampling_method} but for lag '
-                                                                  f'{kwargs['sampling_method']} was given')
+            assert kwargs['sampling_method'] == sampling_method, (
+                f"lags must have the same sampling method as their base feature. Sampling method of base feature is"
+                f" {sampling_method} but for lag {kwargs['sampling_method']} was given as sampling method."
+            )
             kwargs.__delitem__('sampling_method')  # constructor must not get more than one arg with the same key
 
         super().__init__(name, sampling_method=sampling_method, **kwargs)

From f160948bf084802fab25c71d681aca6043411ec2 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sun, 7 Dec 2025 14:09:28 +0100
Subject: [PATCH 23/42] Added DeprecationWarning for shift parameter, updated
 testing and example

---
 .../Dummy_shifting.py                         | 73 -----------------
 .../different_sampling_methods.py             | 78 +++++++++++++++++++
 physXAI/preprocessing/preprocessing.py        | 67 ++++++----------
 unittests/test_coverage.py                    | 37 ++++++---
 4 files changed, 126 insertions(+), 129 deletions(-)
 delete mode 100644 executables/bestest_hydronic_heat_pump/Dummy_shifting.py
 create mode 100644 executables/bestest_hydronic_heat_pump/different_sampling_methods.py

diff --git a/executables/bestest_hydronic_heat_pump/Dummy_shifting.py b/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
deleted file mode 100644
index 48859bb..0000000
--- a/executables/bestest_hydronic_heat_pump/Dummy_shifting.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from physXAI.models.ann.ann_design import ClassicalANNModel
-from physXAI.preprocessing.preprocessing import PreprocessingSingleStep
-from physXAI.preprocessing.constructed import Feature
-from physXAI.utils.logging import Logger
-
-
-"""
-This script demonstrates the usage of different shifts. It is not physically meaningful.
-"""
-# Setup up logger for saving
-Logger.setup_logger(folder_name='Dummy_shifting_ann', override=True)
-
-# File path to data
-file_path = r"data/bestest_hydronic_heat_pump/pid_data.csv"
-
-# List of input features. Can include constructed features and lagged inputs
-inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
-          'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
-# Output feature
-output = 'Change(T_zone)'
-
-""" 
-The constructed features are automatically added to the data via 'physXAI.preprocessing.constructed.py' 
-Lagged inputs can be added directly based on the feature
-"""
-x1 = Feature('reaTZon_y')
-x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
-x2 = Feature('weaSta_reaWeaTDryBul_y')
-x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
-x3 = Feature('oveHeaPumY_u')
-x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
-
-
-"""
-shift (Union[int, str, dict]): Time step of the input data used to predict the output.
-    - If a single int or str is given, it applies to all inputs.
-    - If a dict is provided, it can specify different shifts for individual inputs.
-    - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
-    Examples:
-        - shift = 0 or shift = 'current': Current time step will be used for prediction.
-        - shift = 1 or shift = 'previous': Previous values will be used for prediction.
-        - shift = 'mean_over_interval': Mean between current and previous time step will be used.
-        - shift = {
-            'inp_1': 1,
-            'inp_2': 'mean_over_interval',
-            '_default': 0,  # current time step will be used for all inputs not specified in the dict
-            # If no custom default value is given in dict, 'previous' will be used as default
-        }
-"""
-shift = {
-    'reaTZon_y': 'previous',  # for all lags of reaTZon_y, the shift will be set automatically
-    'weaSta_reaWeaHDirNor_y': 'mean_over_interval',
-    '_default': 0,
-}
-
-# Create Training data
-# Time step defines target sampling: if original sampling of data is in 15min intervals, it is resampled to 1h intervals for time_step=4
-# Hence, if the shift method of an input is defined as 'mean_over_interval', the mean over the last hour is taken as input
-prep = PreprocessingSingleStep(inputs, output, shift=shift, time_step=4)
-
-# Process Training data
-td = prep.pipeline(file_path)
-
-# Classical ANN
-m = ClassicalANNModel(epochs=50)
-
-# Training pipeline
-model = m.pipeline(td)
-
-# Log setup of preprocessing and model as json
-Logger.log_setup(prep, m)
-# Log training data as pickle
-Logger.save_training_data(td)
\ No newline at end of file
diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
new file mode 100644
index 0000000..e292a6f
--- /dev/null
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -0,0 +1,78 @@
+from physXAI.models.ann.ann_design import ClassicalANNModel
+from physXAI.preprocessing.preprocessing import PreprocessingSingleStep
+from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureExp
+from physXAI.utils.logging import Logger
+
+"""
+This script demonstrates the usage of different sampling methods. It is not physically meaningful.
+
+When creating a Feature (or any subclass of FeatureBase like FeatureLag, FeatureAdd etc.), a sampling method can be
+specified.
+
+sampling_method (Union[str, int]): Time step of the input data used to predict the output.
+    - if None: FeatureConstruction.get_default_sampling_method() is used
+    - if 'current' or 0: Current time step will be used for prediction.
+    - if 'previous' or 1: Previous time step will be used for prediction.
+    - if 'mean_over_interval': Mean between current and previous time step will be used.
+    
+    Specify default sampling method using FeatureConstruction.set_default_sampling_method(<your default sampling>).
+    If no default sampling method is specified by the user, 'previous' is used as default.
+"""
+FeatureConstruction.set_default_sampling_method(0)
+
+# Setup up logger for saving
+Logger.setup_logger(folder_name='different_sampling_methods_ann', override=True)
+
+# File path to data
+file_path = r"data/bestest_hydronic_heat_pump/pid_data.csv"
+
+# List of input features. Can include names of constructed features and lagged inputs
+inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
+          Feature('weaSta_reaWeaHDirNor_y', sampling_method='mean_over_interval'), 'oveHeaPumY_u',
+          'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
+# Output feature
+output = 'Change(T_zone)'
+
+""" 
+The constructed features are automatically added to the data via 'physXAI.preprocessing.constructed.py' 
+Lagged inputs can be added directly based on the feature
+"""
+# create lags of reaTZon_y: reaTZon_y_lag1, reaTZon_y_lag2
+x1 = Feature('reaTZon_y', sampling_method='previous')
+lx1 = x1.lag(2)  # for all lags of reaTZon_y, the shift will be set automatically as 'previous'
+
+# create lag of weaSta_reaWeaTDryBul_y: weaSta_reaWeaTDryBul_y_lag1
+x2 = Feature('weaSta_reaWeaTDryBul_y')
+lx2 = x2.lag(1)
+
+# create lag of oveHeaPumY_u: oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
+x3 = Feature('oveHeaPumY_u')
+x3.lag(2)
+
+# dummy Features
+y = x1 + lx1[0]
+z = y + x1
+z.rename('example_feature_two')
+z.sampling_method = 'mean_over_interval'
+e = FeatureExp(x1-273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
+
+# add dummy features to inputs
+inputs.extend([z, e])
+
+# Create Training data
+# Time step defines target sampling: if original sampling of data is in 15min intervals, it is resampled to 1h intervals
+# for time_step=4. Hence, if the shift method of an input is defined as 'mean_over_interval', the mean over the last
+# hour is taken as input
+prep = PreprocessingSingleStep(inputs, output, time_step=4)
+
+# Process Training data
+td = prep.pipeline(file_path)
+
+# Build & train Classical ANN
+m = ClassicalANNModel(epochs=50)
+model = m.pipeline(td)
+
+# Log setup of preprocessing and model as json
+Logger.log_setup(prep, m)
+# Log training data as pickle
+Logger.save_training_data(td)
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 97b5310..fa6f82c 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -98,31 +98,17 @@ class PreprocessingData(ABC):
     Abstract Preprocessing Class
     """
 
-    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
+    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]],
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
-                 csv_header: int = 0, csv_skiprows: Union[int, list[int]] = [], ignore_nan: bool = False):
+                 csv_header: int = 0, csv_skiprows: Union[int, list[int]] = [], ignore_nan: bool = False, **kwargs):
         """
         Initializes the Preprocessing instance.
 
         Args:
             inputs (List[Union[str, FeatureBase]]): List of column names or Features to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (Union[int, str, dict]): Time step of the input data used to predict the output.
-                - If a single int or str is given, it applies to all inputs.
-                - If a dict is provided, it can specify different shifts for individual inputs.
-                - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
-                Examples:
-                    - shift = 0 or shift = 'current': Current time step will be used for prediction.
-                    - shift = 1 or shift = 'previous': Previous values will be used for prediction.
-                    - shift = 'mean_over_interval': Mean between current and previous time step will be used.
-                    - shift = {
-                        'inp_1': 1,
-                        'inp_2': 'mean_over_interval',
-                        '_default': 0,  # current time step will be used for all inputs not specified in the dict
-                        # If no custom default value is given in dict, 'previous' will be used as default
-                    }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
             val_size (float): Proportion of the dataset to allocate to the validation set.
@@ -145,13 +131,6 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
             output = [output]
         self.output: list[str] = output
 
-        if isinstance(shift, dict) and '_default' in shift.keys():
-            self.shift_default = shift['_default']
-            shift.__delitem__('_default')
-        else:
-            self.shift_default = None
-        self.shift: dict = convert_shift_to_dict(shift, inputs, custom_default=self.shift_default)
-
         self.time_step = time_step
 
         # Training, validation and test size should be equal to 1
@@ -230,7 +209,7 @@ class PreprocessingSingleStep(PreprocessingData):
     validation, and test sets.
     """
 
-    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]], shift: Union[int, str, dict] = 'previous',
+    def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, list[str]],
                  time_step: Optional[Union[int, float]] = None,
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
@@ -241,20 +220,6 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
         Args:
             inputs (List[Union[str, FeatureBase]]): List of column names or Features to be used as input features.
             output (Union[str, List[str]]): Column name(s) for the target variable(s).
-            shift (Union[int, str, dict]): Time step of the input data used to predict the output.
-                - If a single int or str is given, it applies to all inputs.
-                - If a dict is provided, it can specify different shifts for individual inputs.
-                - If not all inputs are specified in the dict, unspecified inputs will use a default value (autocomplete).
-                Examples:
-                    - shift = 0 or shift = 'current': Current time step will be used for prediction.
-                    - shift = 1 or shift = 'previous': Previous values will be used for prediction.
-                    - shift = 'mean_over_interval': Mean between current and previous time step will be used.
-                    - shift = {
-                        'inp_1': 1,
-                        'inp_2': 'mean_over_interval',
-                        '_default': 0,  # current time step will be used for all inputs not specified in the dict
-                        # If no custom default value is given in dict, 'previous' will be used as default
-                    }
             time_step (Optional[Union[int, float]]): Optional time step sampling. If None, sampling of data is used.
             test_size (float): Proportion of the dataset to allocate to the test set.
             val_size (float): Proportion of the dataset to allocate to the validation set.
@@ -267,8 +232,23 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
             ignore_nan (bool): If True, rows with NaN values will be dropped. If False, an error is raised if NaNs are present. Default is False.
         """
 
-        super().__init__(inputs, output, shift, time_step, test_size, val_size, random_state, time_index_col,
-                         csv_delimiter, csv_encoding, csv_header, csv_skiprows, ignore_nan)
+        if 'shift' in kwargs.keys():
+            DeprecationWarning(
+                "shift parameter is deprecated for SingleStep models and replaced by sampling_method, an attribute of "
+                "each Feature. This allows specifying individual 'shifts' for each Feature / input. A default sampling"
+                "method can be specified via FeatureConstruction.set_default_sampling_method(<your default sampling>)."
+            )
+            DeprecationWarning(
+                f"shift parameter was given as shift={kwargs['shift']}. Setting FeatureConstruction.set_default_"
+                f"sampling_method(shift) and override possible individual sampling methods of all Features. If this is"
+                f"not intended, remove shift parameter when initializing PreprocessingSingleStep object!"
+            )
+            FeatureConstruction.set_default_sampling_method(kwargs['shift'])
+            for f in FeatureConstruction.features:
+                f.sampling_method = kwargs['shift']
+
+        super().__init__(inputs, output, time_step, test_size, val_size, random_state, time_index_col,
+                         csv_delimiter, csv_encoding, csv_header, csv_skiprows, ignore_nan, **kwargs)
 
     def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         """
@@ -462,7 +442,6 @@ def get_config(self) -> dict:
             '__class_name__': self.__class__.__name__,
             'inputs': self.inputs,
             'output': self.output,
-            'shift': self.shift,
             'test_size': self.test_size,
             'val_size': self.val_size,
             'random_state': self.random_state,
@@ -475,7 +454,7 @@ def from_config(cls, config: dict) -> 'PreprocessingSingleStep':
         return cls(**config)
 
 
-class PreprocessingMultiStep (PreprocessingData):
+class PreprocessingMultiStep(PreprocessingData):
     """
     Handles preprocessing for multi-step forecasting models, typically RNNs.
     This involves creating windowed datasets suitable for sequence models,
@@ -487,7 +466,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], label_width
                  test_size: float = 0.1, val_size: float = 0.1, random_state: int = 42,
                  time_index_col: Union[str, float] = 0, csv_delimiter: str = ';', csv_encoding: str = 'latin1',
                  csv_header: int = 0, csv_skiprows: Union[int, list[int]] = [],
-                 overlapping_sequences: bool = True, batch_size=32, init_features: list[str] = None,**kwargs):
+                 overlapping_sequences: bool = True, batch_size=32, init_features: list[str] = None, **kwargs):
         """
         Initializes the PreprocessingMultiStep instance.
 
@@ -514,7 +493,7 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], label_width
                                                  If None and warmup_width > 0, defaults to `inputs`.
                                                  If None and warmup_width <= 0, defaults to empty list.
         """
-        super().__init__(inputs, output, shift, time_step, test_size, val_size, random_state, time_index_col,
+        super().__init__(inputs, output, time_step, test_size, val_size, random_state, time_index_col,
                          csv_delimiter, csv_encoding, csv_header, csv_skiprows)
 
         self.overlapping_sequences = overlapping_sequences
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 54ba351..fcac12f 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -9,7 +9,7 @@
 from physXAI.utils.logging import Logger, get_parent_working_directory
 from physXAI.preprocessing.preprocessing import PreprocessingSingleStep, PreprocessingMultiStep, \
     PreprocessingData, convert_shift_to_dict
-from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureConstant
+from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureConstant, FeatureExp
 from physXAI.feature_selection.recursive_feature_elimination import recursive_feature_elimination_pipeline
 from physXAI.models.models import LinearRegressionModel, AbstractModel
 from physXAI.models.ann.ann_design import ClassicalANNModel, CMNNModel, LinANNModel, PINNModel, RNNModel, \
@@ -40,7 +40,8 @@ def inputs_tair():
 @pytest.fixture(scope='module')
 def inputs_tair_extended():
     return ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
-              'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
+            Feature('weaSta_reaWeaHDirNor_y', sampling_method='mean_over_interval'), 'oveHeaPumY_u',
+            'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
 
 @pytest.fixture(scope='module')
 def output_php():
@@ -232,32 +233,44 @@ def tair_data_total(file_path, inputs_tair, output_tair):
     td = prep.pipeline(file_path)
     return prep, td
 
-def test_shifting(file_path, inputs_tair_extended, output_tair):
+def test_sampling_methods(file_path, inputs_tair_extended, output_tair):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
 
+    FeatureConstruction.set_default_sampling_method(0)
+
     # Create lags
-    x1 = Feature('reaTZon_y')
-    x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
+    x1 = Feature('reaTZon_y', sampling_method='previous')
+    lx1 = x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
     x2 = Feature('weaSta_reaWeaTDryBul_y')
-    x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
+    lx2 = x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
     x3 = Feature('oveHeaPumY_u')
     x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
 
-    shift = {
-        'reaTZon_y': 'previous',  # for all lags of reaTZon_y, the shift will be set automatically
-        'weaSta_reaWeaHDirNor_y': 'mean_over_interval',
-        '_default': 0,
-    }
+    # dummy Features
+    y = x1 + lx1[0]
+    z = y + x1
+    z.rename('test_feature_two')
+    z.sampling_method = 'mean_over_interval'
+    e = FeatureExp(x1-273.15, 'exp', sampling_method=1) # reduce x1 by 273.15, otherwise values are too high
+
+    inputs_tair_extended.extend([z, e])
 
     # Create & process Training data
-    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, shift=shift, time_step=4)
+    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, time_step=4)
     td = prep.pipeline(file_path)
 
     # Build & train Classical ANN
     m = ClassicalANNModel(epochs=1)
     model = m.pipeline(td)
 
+    # check correct sampling_method specification
+    assert x1.sampling_method == 'previous' and lx1[1].sampling_method == 'previous'
+    assert x2.sampling_method == 'current' and lx2.sampling_method == 'current'
+    assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').sampling_method == 'mean_over_interval'
+    assert FeatureConstruction.get_feature('test_feature_two').sampling_method == 'mean_over_interval'
+    assert e.sampling_method == 'previous'
+
 def test_model_linReg(inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)

From 11d1719f759b31b02f4e1b8d5b54f683f89eac9e Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sun, 7 Dec 2025 17:26:04 +0100
Subject: [PATCH 24/42] Fixed small mistake regarding DataFrame length

---
 physXAI/preprocessing/preprocessing.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index fa6f82c..923cb50 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -331,13 +331,13 @@ def pairwise(iterable: Iterable):
 
         else:  # different inputs have different sampling methods
             res = []
-            previous_or_mean_in_sampling_methods = False
+            previous_or_mean_in_sampling_methods = []
             for f in features_without_lags:
                 # only process inputs with sampling method mean_over_interval first since X cannot be sampled
                 # to the actual required time steps until the intermediate values were taken into the mean
                 if f.sampling_method == 'mean_over_interval':
                     res.append(get_mean_over_interval(y, X[[f.feature]]))
-                    previous_or_mean_in_sampling_methods = True
+                    previous_or_mean_in_sampling_methods.append(True)
 
             # sample X according to required time step
             X = self.sample_df_according_to_timestep(X)
@@ -347,25 +347,28 @@ def pairwise(iterable: Iterable):
                 if f.sampling_method == 'current':
                     # no transformation needed
                     res.append(_x)
+                    previous_or_mean_in_sampling_methods.append(False)
                 elif f.sampling_method == 'previous':
                     # shift by 1
                     _x = _x.shift(1)
                     _x = _x.iloc[1:]
                     res.append(_x)
-                    previous_or_mean_in_sampling_methods = True
+                    previous_or_mean_in_sampling_methods.append(True)
                 elif f.sampling_method == 'mean_over_interval':
                     continue
                 else:
                     raise NotImplementedError(f"Sampling method '{f.sampling_method}' not implemented.")
 
             X = pd.concat(res, axis=1)
+            X = X.sort_index(ascending=True)
 
             # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
             # Therefore, lengths of X and y have to be synchronized
-            if previous_or_mean_in_sampling_methods:
+            if any(previous_or_mean_in_sampling_methods):
                 y = y.iloc[1:]
-                X = X.sort_index(ascending=True)
-                X = X.iloc[1:]
+                # if at least one of the features uses 'current' as sampling method, shorten X
+                if not all(previous_or_mean_in_sampling_methods):
+                    X = X.iloc[1:]
 
         res_df = pd.concat([X, y], axis=1)
 

From 8d9af45a4a5bc5023f011849228c77220c9c7a46 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Sun, 7 Dec 2025 17:26:41 +0100
Subject: [PATCH 25/42] Fixed testing bug

---
 unittests/test_coverage.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index fcac12f..524ed73 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -85,6 +85,8 @@ def test_preprocessing(monkeypatch, file_path, inputs_php, output_php):
     prep = PreprocessingSingleStep(inputs_php, output_php)
     prep.pipeline(file_path)
 
+    FeatureConstruction.reset()
+
 def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
 
@@ -100,6 +102,8 @@ def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
                                   overlapping_sequences=False, batch_size=1)
     prep.pipeline(file_path)
 
+    FeatureConstruction.reset()
+
 class TestPreprocessingShiftConversion(TestCase):
 
     inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
@@ -271,6 +275,8 @@ def test_sampling_methods(file_path, inputs_tair_extended, output_tair):
     assert FeatureConstruction.get_feature('test_feature_two').sampling_method == 'mean_over_interval'
     assert e.sampling_method == 'previous'
 
+    FeatureConstruction.reset()
+
 def test_model_linReg(inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)

From 8f8c377950cacba752d79663c5198c7bc485c5e5 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Sun, 7 Dec 2025 16:30:08 +0000
Subject: [PATCH 26/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index 6963b3e..1c7007c 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">87%</text>
-        <text x="80" y="14">87%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">89%</text>
+        <text x="80" y="14">89%</text>
     </g>
 </svg>

From c42f42404b7ceb4d929b75563df66dad0ab293ea Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 13:20:28 +0100
Subject: [PATCH 27/42] Updated testing for sampling method as attribute of
 Feature deleted deprecated code and test for shift conversion

---
 physXAI/preprocessing/constructed.py   |   5 +
 physXAI/preprocessing/preprocessing.py |  80 ---------
 unittests/test_coverage.py             | 231 +++++++++++++------------
 3 files changed, 123 insertions(+), 193 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index a8c5da1..341f319 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -6,6 +6,10 @@
 
 def _return_valid_sampling_method(v: Union[int, str]):
     """ check the validity of the given sampling method and return a string if val is int """
+
+    if not isinstance(v, (int, str)):
+        raise TypeError(f'Type of sampling method not supported. Type is {type(v)}, must be int or str.')
+
     if v in ['current', 0]:
         return 'current'
     elif v in ['previous', 1]:
@@ -604,6 +608,7 @@ def set_default_sampling_method(val: Union[str, int]):
     def reset():
         """Clears all registered features and input names."""
         FeatureConstruction.features = list[FeatureBase]()
+        FeatureConstruction.set_default_sampling_method('previous')
 
     @staticmethod
     def append(f: FeatureBase):
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 923cb50..1b8a5ff 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -13,86 +13,6 @@
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
 
 
-def convert_shift_to_dict(s: Union[int, str, dict], inputs: list[str], custom_default: Union[int, str] = None) -> dict:
-    """
-    Convert a given shift variable (int, str) into a dictionary in which a shift is defined for every input.
-    If a dictionary is given as shift, check entries and autocomplete dict if necessary.
-
-    Args:
-        s (Union[int, str, dict]): Shift value. Either a single string or int which then will be applied to all the inputs or
-            a dictionary in which a different shift can be defined for each input. If the dictionary does not specify the
-            shift for all inputs, the shift for inputs not specified is set to the default value (autocomplete)
-        inputs (list(str)): List of Input variables
-        custom_default (Union[int, str]): if no custom default is specified, 'previous' is used as default shift
-    """
-
-    def return_valid_shift(val: Union[int, str]):
-        """ check the validity of the given shift and return a string if val is int """
-        if val in ['current', 0]:
-            val = 'current'
-        elif val in ['previous', 1]:
-            val = 'previous'
-        elif val == 'mean_over_interval':
-            val = 'mean_over_interval'
-        else:
-            raise ValueError(
-                f"Value of shift not supported, value is: {val}. Shift must be 'current' (or 0 if s is int), "
-                f"'previous' (or 1 if s is int) or 'mean_over_interval'.")
-        return val
-
-    # set custom default or - if no custom default is specified - use 'previous' as default
-    default = 'previous' if custom_default is None else return_valid_shift(custom_default)
-
-    if isinstance(s, (int, str)):
-        d = {}
-        s = return_valid_shift(s)
-
-        # add shift for each input
-        for inp in inputs:
-            d.update({inp: s})
-        return d
-
-    elif isinstance(s, dict):
-        def get_lag(inputs: list[str], current_input: str) -> int:
-            """ get lag of current input """
-            count = 0
-            for inp in inputs:
-                spl = inp.split(current_input) # make sure it is the current input
-                if spl[0] == '' and spl[1] != '' and spl[1].split('_lag')[0] == '':
-                    count += 1
-            return count
-
-        # check if lags exist
-        d = {}
-        inputs_without_lags = {}
-        for inp in inputs:
-            # skip if current input is just the lag of another inp
-            if not inp.__contains__('_lag'):
-                inputs_without_lags.update({inp: get_lag(inputs, inp)})
-
-        for inp in inputs_without_lags.keys():
-            # if an input has a shift assigned already, the validity is checked
-            # otherwise default value is assigned
-            if inp in s.keys():
-                d.update({inp: return_valid_shift(s[inp])})
-            else:
-                d.update({inp: default})
-
-            # all inputs with lags should have the same shift
-            if inputs_without_lags[inp] > 0: # if current input has lags
-                for i in range(inputs_without_lags[inp]):
-                    name = inp + '_lag' + str(i+1)
-
-                    # if a shift was already defined for this lag, check if it matches the shift of the original inp
-                    if name in s.keys():
-                        assert return_valid_shift(s[name]) == d[inp], \
-                            'Make sure that all lags of an input have the same shift'
-                    d.update({name: d[inp]})
-        return d
-    else:
-        raise TypeError(f'shift must be of type int, str or dict, is type {type(s)}')
-
-
 class PreprocessingData(ABC):
     """
     Abstract Preprocessing Class
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 524ed73..cd7a910 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -8,14 +8,13 @@
 ######################################################################################################################
 from physXAI.utils.logging import Logger, get_parent_working_directory
 from physXAI.preprocessing.preprocessing import PreprocessingSingleStep, PreprocessingMultiStep, \
-    PreprocessingData, convert_shift_to_dict
-from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureConstant, FeatureExp
+    PreprocessingData
+from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureConstant, FeatureExp, FeatureLag
 from physXAI.feature_selection.recursive_feature_elimination import recursive_feature_elimination_pipeline
 from physXAI.models.models import LinearRegressionModel, AbstractModel
 from physXAI.models.ann.ann_design import ClassicalANNModel, CMNNModel, LinANNModel, PINNModel, RNNModel, \
     RBFModel
 
-
 base_path = os.path.join(pathlib.Path(__file__).resolve().parent.parent, 'stored_data')
 
 
@@ -25,18 +24,22 @@ def disable_plotly_show():
     with patch('plotly.graph_objects.Figure.show'):
         yield
 
+
 @pytest.fixture(scope='module')
 def file_path():
     return os.path.join(pathlib.Path(__file__).resolve().parent.parent, "data/bestest_hydronic_heat_pump/pid_data.csv")
 
+
 @pytest.fixture(scope='module')
 def inputs_php():
     return ['oveHeaPumY_u', 'Func(logistic)', 'weaSta_reaWeaTDryBul_y', 'reaTZon_y']
 
+
 @pytest.fixture(scope='module')
 def inputs_tair():
     return ['reaTZon_y', 'weaSta_reaWeaTDryBul_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1']
 
+
 @pytest.fixture(scope='module')
 def inputs_tair_extended():
     return ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
@@ -47,13 +50,16 @@ def inputs_tair_extended():
 def output_php():
     return 'reaPHeaPum_y'
 
+
 @pytest.fixture(scope='module')
 def output_tair():
     return 'Change(T_zone)'
 
+
 def test_path_setup():
     get_parent_working_directory()
 
+
 def test_preprocessing(monkeypatch, file_path, inputs_php, output_php):
     monkeypatch.setattr('builtins.input', lambda _: "Y")
 
@@ -87,6 +93,7 @@ def test_preprocessing(monkeypatch, file_path, inputs_php, output_php):
 
     FeatureConstruction.reset()
 
+
 def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
 
@@ -104,85 +111,112 @@ def test_preprocessing_multistep(file_path, inputs_tair, output_tair):
 
     FeatureConstruction.reset()
 
-class TestPreprocessingShiftConversion(TestCase):
-
-    inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
-              'weaSta_reaWeaHDirNor_y', 'oveHeaPumY_u', 'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
 
-    # test case: int given for shift
-    def test_int(self):
-        shift = 0
-        res = convert_shift_to_dict(shift, self.inputs)
-        res_expected = {'reaTZon_y': 'current', 'reaTZon_y_lag1': 'current', 'reaTZon_y_lag2': 'current',
-                        'weaSta_reaWeaTDryBul_y': 'current', 'weaSta_reaWeaTDryBul_y_lag1': 'current',
-                        'weaSta_reaWeaHDirNor_y': 'current', 'oveHeaPumY_u': 'current', 'oveHeaPumY_u_lag1': 'current',
-                        'oveHeaPumY_u_lag2': 'current'}
-        assert res == res_expected
+class TestSamplingMethodsFaults(TestCase):
 
-    # test case: unsupported int given for shift
-    def test_unsupported_int(self):
-        shift = 2
-        with self.assertRaises(ValueError):
-            convert_shift_to_dict(shift, self.inputs)
-
-    # test case: str given for shift
-    def test_str(self):
-        shift = 'mean_over_interval'
-        res = convert_shift_to_dict(shift, self.inputs)
-        res_expected = {'reaTZon_y': 'mean_over_interval', 'reaTZon_y_lag1': 'mean_over_interval',
-                        'reaTZon_y_lag2': 'mean_over_interval', 'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
-                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
-                        'weaSta_reaWeaHDirNor_y': 'mean_over_interval', 'oveHeaPumY_u': 'mean_over_interval',
-                        'oveHeaPumY_u_lag1': 'mean_over_interval', 'oveHeaPumY_u_lag2': 'mean_over_interval'}
-        assert res == res_expected
-
-    # test case: unsupported str given for shift
+    # test case: unsupported str given as sampling method
     def test_unsupported_str(self):
-        shift = 'test'
         with self.assertRaises(ValueError):
-            convert_shift_to_dict(shift, self.inputs)
+            FeatureConstruction.set_default_sampling_method('test')
 
-    # test case: unsupported type given for shift
+    # test case: unsupported type given for sampling method
     def test_unsupported_type(self):
-        shift = ['previous']
         with self.assertRaises(TypeError):
-            convert_shift_to_dict(shift, self.inputs)
-
-    # test case: autocomplete incomplete dictionary given for shift
-    def test_autocomplete_incomplete_dict(self):
-        shift = {'reaTZon_y': 0, 'reaTZon_y_lag1': 0, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
-
-        # previous is default for all inputs that are not specified
-        res = convert_shift_to_dict(shift, self.inputs)
-        res_expected = {'reaTZon_y': 'current', 'reaTZon_y_lag1': 'current', 'reaTZon_y_lag2': 'current',
-                        'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
-                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
-                        'weaSta_reaWeaHDirNor_y': 'previous', 'oveHeaPumY_u': 'previous',
-                        'oveHeaPumY_u_lag1': 'previous',
-                        'oveHeaPumY_u_lag2': 'previous'}
-        assert len(res) == len(self.inputs)
-        assert res == res_expected
-
-    # test case: autocomplete incomplete dictionary given for shift with custom default
-    def test_autocomplete_incomplete_dict_with_custom_default(self):
-        shift = {'reaTZon_y': 1, 'reaTZon_y_lag1': 1, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
-
-        # previous is default for all inputs that are not specified
-        res = convert_shift_to_dict(shift, self.inputs, custom_default=0)
-        res_expected = {'reaTZon_y': 'previous', 'reaTZon_y_lag1': 'previous', 'reaTZon_y_lag2': 'previous',
-                        'weaSta_reaWeaTDryBul_y': 'mean_over_interval',
-                        'weaSta_reaWeaTDryBul_y_lag1': 'mean_over_interval',
-                        'weaSta_reaWeaHDirNor_y': 'current', 'oveHeaPumY_u': 'current',
-                        'oveHeaPumY_u_lag1': 'current',
-                        'oveHeaPumY_u_lag2': 'current'}
-        assert len(res) == len(self.inputs)
-        assert res == res_expected
-
-    # test case: lags of the same input have mismatching shifts
+            FeatureConstruction.set_default_sampling_method(['current'])
+
+    # test case: lags of the same input have mismatching sampling methods
     def test_lag_with_mismatching_shifts(self):
-        shift = {'reaTZon_y': 0, 'reaTZon_y_lag1': 1, 'weaSta_reaWeaTDryBul_y': 'mean_over_interval'}
+
+        x = Feature('test', sampling_method='current')
         with self.assertRaises(AssertionError):
-            convert_shift_to_dict(shift, self.inputs)
+            FeatureLag(x, lag=1, sampling_method='previous')
+        FeatureConstruction.reset()
+
+
+def test_sampling_method_use_default(file_path, inputs_tair, output_tair):
+    """test case: use default sampling when no default is specified by user"""
+
+    # when not overriding default sampling method, 'previous' is used
+
+    x = Feature('oveHeaPumY_u')
+    x.lag(1)
+
+    # Create & process Training data
+    prep = PreprocessingSingleStep(inputs_tair, output_tair)
+    td = prep.pipeline(file_path)
+
+    assert len(inputs_tair) == len(FeatureConstruction.features)
+
+    for inp in inputs_tair:
+        f = FeatureConstruction.get_feature(inp)
+        assert f.sampling_method == 'previous'
+
+    FeatureConstruction.reset()
+
+
+def test_sampling_method_str(file_path, inputs_tair, output_tair):
+    """test case: set default using str (setting default with int is done in test_different_sampling_methods)"""
+
+    FeatureConstruction.set_default_sampling_method('mean_over_interval')
+
+    x = Feature('oveHeaPumY_u')
+    x.lag(1)
+
+    # Create & process Training data
+    prep = PreprocessingSingleStep(inputs_tair, output_tair, time_step=4)
+    td = prep.pipeline(file_path)
+
+    assert len(inputs_tair) == len(FeatureConstruction.features)
+
+    for inp in inputs_tair:
+        f = FeatureConstruction.get_feature(inp)
+        assert f.sampling_method == 'mean_over_interval'
+
+    FeatureConstruction.reset()
+
+
+def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair):
+    """test case: different sampling methods given"""
+
+    # Setup up logger for saving
+    Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
+
+    # set default
+    FeatureConstruction.set_default_sampling_method(0)
+
+    # Create lags
+    x1 = Feature('reaTZon_y', sampling_method='previous')
+    lx1 = x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
+    x2 = Feature('weaSta_reaWeaTDryBul_y')
+    lx2 = x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
+    x3 = Feature('oveHeaPumY_u')
+    x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
+
+    # dummy Features
+    y = x1 + lx1[0]
+    z = y + x1
+    z.rename('test_feature_two')
+    z.sampling_method = 'mean_over_interval'
+    e = FeatureExp(x1 - 273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
+
+    inputs_tair_extended.extend([z, e])
+
+    # Create & process Training data
+    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, time_step=4)
+    td = prep.pipeline(file_path)
+
+    # Build & train Classical ANN
+    m = ClassicalANNModel(epochs=1)
+    model = m.pipeline(td)
+
+    # check correct sampling_method specification
+    assert x1.sampling_method == 'previous' and lx1[1].sampling_method == 'previous'
+    assert x2.sampling_method == 'current' and lx2.sampling_method == 'current'
+    assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').sampling_method == 'mean_over_interval'
+    assert FeatureConstruction.get_feature('test_feature_two').sampling_method == 'mean_over_interval'
+    assert e.sampling_method == 'previous'
+
+    FeatureConstruction.reset()
 
 
 @pytest.fixture(scope='module')
@@ -194,6 +228,7 @@ def p_hp_data(file_path, inputs_php, output_php):
     td = prep.pipeline(file_path)
     return prep, td
 
+
 @pytest.fixture(scope='module')
 def tair_data_delta(file_path, inputs_tair, output_tair):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -208,6 +243,7 @@ def tair_data_delta(file_path, inputs_tair, output_tair):
     td = prep.pipeline(file_path)
     return prep, td
 
+
 @pytest.fixture(scope='module')
 def tair_data_noval(file_path, inputs_tair, output_tair):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -237,45 +273,6 @@ def tair_data_total(file_path, inputs_tair, output_tair):
     td = prep.pipeline(file_path)
     return prep, td
 
-def test_sampling_methods(file_path, inputs_tair_extended, output_tair):
-    # Setup up logger for saving
-    Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
-
-    FeatureConstruction.set_default_sampling_method(0)
-
-    # Create lags
-    x1 = Feature('reaTZon_y', sampling_method='previous')
-    lx1 = x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
-    x2 = Feature('weaSta_reaWeaTDryBul_y')
-    lx2 = x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
-    x3 = Feature('oveHeaPumY_u')
-    x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
-
-    # dummy Features
-    y = x1 + lx1[0]
-    z = y + x1
-    z.rename('test_feature_two')
-    z.sampling_method = 'mean_over_interval'
-    e = FeatureExp(x1-273.15, 'exp', sampling_method=1) # reduce x1 by 273.15, otherwise values are too high
-
-    inputs_tair_extended.extend([z, e])
-
-    # Create & process Training data
-    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, time_step=4)
-    td = prep.pipeline(file_path)
-
-    # Build & train Classical ANN
-    m = ClassicalANNModel(epochs=1)
-    model = m.pipeline(td)
-
-    # check correct sampling_method specification
-    assert x1.sampling_method == 'previous' and lx1[1].sampling_method == 'previous'
-    assert x2.sampling_method == 'current' and lx2.sampling_method == 'current'
-    assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').sampling_method == 'mean_over_interval'
-    assert FeatureConstruction.get_feature('test_feature_two').sampling_method == 'mean_over_interval'
-    assert e.sampling_method == 'previous'
-
-    FeatureConstruction.reset()
 
 def test_model_linReg(inputs_php, output_php, file_path):
     # Setup up logger for saving
@@ -293,6 +290,7 @@ def test_model_linReg(inputs_php, output_php, file_path):
     Logger.log_setup(prep, m, save_name_model='model_linReg.json')
     Logger.save_training_data(td, path=os.path.join(Logger._logger, 'training_data2'))
 
+
 def test_model_ann(p_hp_data, inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -311,6 +309,7 @@ def test_model_ann(p_hp_data, inputs_php, output_php, file_path):
     Logger.log_setup(None, m)
     Logger.save_training_data(td)
 
+
 def test_model_cmnn(p_hp_data, inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -349,6 +348,7 @@ def test_model_cmnn(p_hp_data, inputs_php, output_php, file_path):
     Logger.log_setup(prep, m)
     Logger.save_training_data(td)
 
+
 def test_model_linANN(p_hp_data, inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -374,6 +374,7 @@ def test_model_linANN(p_hp_data, inputs_php, output_php, file_path):
     Logger.log_setup(prep, m)
     Logger.save_training_data(td)
 
+
 def test_model_pinn(inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -406,6 +407,7 @@ def test_model_pinn(inputs_php, output_php, file_path):
     Logger.log_setup(prep, m)
     Logger.save_training_data(td)
 
+
 def test_models_rnn(file_path):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
 
@@ -441,6 +443,7 @@ def test_models_rnn(file_path):
     m = RNNModel(epochs=1, rnn_layer='RNN', early_stopping_epochs=None)
     m.pipeline(td, save_model=False, plot=False)
 
+
 def test_read_setup():
 
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -476,6 +479,7 @@ def test_read_setup():
         config_model = json.load(f)
     AbstractModel.model_from_config(config_model)
 
+
 def test_feature_selection(monkeypatch, p_hp_data, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -491,6 +495,7 @@ def test_feature_selection(monkeypatch, p_hp_data, file_path):
     recursive_feature_elimination_pipeline(file_path, prep, m, ascending_lag_order=True,
                                            fixed_inputs=['weaSta_reaWeaTDryBul_y', 'oveHeaPumY_u'])
 
+
 def test_feature_selection_multi(monkeypatch, tair_data_delta, tair_data_noval ,tair_data_total, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
@@ -522,4 +527,4 @@ def test_feature_selection_multi(monkeypatch, tair_data_delta, tair_data_noval ,
     recursive_feature_elimination_pipeline(file_path, prep2, m, use_multi_step_error=False)
     m.pipeline(td2, save_model=False, plot=False)
     Logger.log_setup(prep, None)
-    Logger.save_training_data(td, path=os.path.join(Logger._logger, 'training_data2.json'))
\ No newline at end of file
+    Logger.save_training_data(td, path=os.path.join(Logger._logger, 'training_data2.json'))

From 8323ee50ede6e6faf522cab692253944cb8bfa07 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 17:15:39 +0100
Subject: [PATCH 28/42] Fixed error: char not allowed in folder name

---
 physXAI/utils/logging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/physXAI/utils/logging.py b/physXAI/utils/logging.py
index 7f873d7..61334b2 100644
--- a/physXAI/utils/logging.py
+++ b/physXAI/utils/logging.py
@@ -142,7 +142,7 @@ def setup_logger(folder_name: str = None, override: bool = False, base_path: str
         if base_path is None:
             base_path = Logger.base_path
         if folder_name is None:
-            folder_name = datetime.now().strftime("%d.%m.%y %H:%M:%S")
+            folder_name = datetime.now().strftime("%y-%m-%d %H.%M.%S")
             folder_name = os.path.join(base_path, folder_name)
         else:
             folder_name = os.path.join(base_path, folder_name)

From 8fd0093e4da701f8f1524726b12a414b0847b522 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 17:38:08 +0100
Subject: [PATCH 29/42] fixed bug with property & added testing for deprecated
 shift

---
 .../different_sampling_methods.py             |  2 +-
 physXAI/preprocessing/constructed.py          | 16 ++---
 physXAI/preprocessing/preprocessing.py        | 18 ++---
 unittests/test_coverage.py                    | 66 +++++++++++++++----
 4 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index e292a6f..26c3f07 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -53,7 +53,7 @@
 y = x1 + lx1[0]
 z = y + x1
 z.rename('example_feature_two')
-z.sampling_method = 'mean_over_interval'
+z.set_sampling_method('mean_over_interval')
 e = FeatureExp(x1-273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
 
 # add dummy features to inputs
diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 341f319..4ae2e9b 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -44,17 +44,17 @@ def __init__(self, name: str, sampling_method: Union[str, int] = None, **kwargs)
         """
 
         self.feature: str = name
-        self.sampling_method = sampling_method
+        self._sampling_method = None
+        self.set_sampling_method(sampling_method)
 
         # Automatically registers the newly created feature instance with the FeatureConstruction manager
         FeatureConstruction.append(self)
 
-    @property
-    def sampling_method(self):
+    def get_sampling_method(self) -> str:
+        """returns the Features sampling method"""
         return self._sampling_method
 
-    @sampling_method.setter
-    def sampling_method(self, val: Union[str, int] = None):
+    def set_sampling_method(self, val: Union[str, int] = None):
         """
         Sets the feature's sampling method. If None is given, FeatureConstruction._default_sampling_method is used
         Available methods:
@@ -162,7 +162,7 @@ def get_config(self) -> dict:
         return {
             'class_name': self.__class__.__name__,
             'name': self.feature,
-            'sampling_method': self.sampling_method,
+            'sampling_method': self.get_sampling_method(),
         }
 
     @classmethod
@@ -240,14 +240,14 @@ def __init__(self, f: Union[FeatureBase, str], lag: int, name: str = None, **kwa
                 name = f.feature + f'_lag{lag}'
 
             # lags must have the same sampling_method as their base feature
-            sampling_method = f.sampling_method
+            sampling_method = f.get_sampling_method()
         else:
             self.origf: str = f
             if name is None:
                 name = f + f'_lag{lag}'
 
             # lags must have the same sampling_method as their base feature
-            sampling_method = FeatureConstruction.get_feature(f).sampling_method
+            sampling_method = FeatureConstruction.get_feature(f).get_sampling_method()
 
         if 'sampling_method' in kwargs.keys():
             assert kwargs['sampling_method'] == sampling_method, (
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 1b8a5ff..d45bee8 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -165,7 +165,7 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
             )
             FeatureConstruction.set_default_sampling_method(kwargs['shift'])
             for f in FeatureConstruction.features:
-                f.sampling_method = kwargs['shift']
+                f.set_sampling_method(kwargs['shift'])
 
         super().__init__(inputs, output, time_step, test_size, val_size, random_state, time_index_col,
                          csv_delimiter, csv_encoding, csv_header, csv_skiprows, ignore_nan, **kwargs)
@@ -232,11 +232,11 @@ def pairwise(iterable: Iterable):
 
         X = df[inputs_without_lags].copy()
 
-        if all('current' == f.sampling_method for f in features_without_lags):
+        if all('current' == f.get_sampling_method() for f in features_without_lags):
             # filter / sample data
             X = self.sample_df_according_to_timestep(X)
             # nothing more to do here
-        elif all('previous' == f.sampling_method for f in features_without_lags):
+        elif all('previous' == f.get_sampling_method() for f in features_without_lags):
             # filter / sample data
             X = self.sample_df_according_to_timestep(X)
 
@@ -244,7 +244,7 @@ def pairwise(iterable: Iterable):
             X = X.shift(1)
             y = y.iloc[1:]
             X = X.iloc[1:]
-        elif all('mean_over_interval' == f.sampling_method for f in features_without_lags):
+        elif all('mean_over_interval' == f.get_sampling_method() for f in features_without_lags):
             X = get_mean_over_interval(y, X)
             # synchronize length between X and y
             y = y.iloc[1:]
@@ -255,7 +255,7 @@ def pairwise(iterable: Iterable):
             for f in features_without_lags:
                 # only process inputs with sampling method mean_over_interval first since X cannot be sampled
                 # to the actual required time steps until the intermediate values were taken into the mean
-                if f.sampling_method == 'mean_over_interval':
+                if f.get_sampling_method() == 'mean_over_interval':
                     res.append(get_mean_over_interval(y, X[[f.feature]]))
                     previous_or_mean_in_sampling_methods.append(True)
 
@@ -264,20 +264,20 @@ def pairwise(iterable: Iterable):
             # process inputs with sampling methods 'current' and 'previous'
             for f in features_without_lags:
                 _x = X[[f.feature]]
-                if f.sampling_method == 'current':
+                if f.get_sampling_method() == 'current':
                     # no transformation needed
                     res.append(_x)
                     previous_or_mean_in_sampling_methods.append(False)
-                elif f.sampling_method == 'previous':
+                elif f.get_sampling_method() == 'previous':
                     # shift by 1
                     _x = _x.shift(1)
                     _x = _x.iloc[1:]
                     res.append(_x)
                     previous_or_mean_in_sampling_methods.append(True)
-                elif f.sampling_method == 'mean_over_interval':
+                elif f.get_sampling_method() == 'mean_over_interval':
                     continue
                 else:
-                    raise NotImplementedError(f"Sampling method '{f.sampling_method}' not implemented.")
+                    raise NotImplementedError(f"Sampling method '{f.get_sampling_method()}' not implemented.")
 
             X = pd.concat(res, axis=1)
             X = X.sort_index(ascending=True)
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index cd7a910..48e6133 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -125,7 +125,7 @@ def test_unsupported_type(self):
             FeatureConstruction.set_default_sampling_method(['current'])
 
     # test case: lags of the same input have mismatching sampling methods
-    def test_lag_with_mismatching_shifts(self):
+    def test_lag_with_mismatching_sampling_methods(self):
 
         x = Feature('test', sampling_method='current')
         with self.assertRaises(AssertionError):
@@ -149,7 +149,7 @@ def test_sampling_method_use_default(file_path, inputs_tair, output_tair):
 
     for inp in inputs_tair:
         f = FeatureConstruction.get_feature(inp)
-        assert f.sampling_method == 'previous'
+        assert f.get_sampling_method() == 'previous'
 
     FeatureConstruction.reset()
 
@@ -170,7 +170,7 @@ def test_sampling_method_str(file_path, inputs_tair, output_tair):
 
     for inp in inputs_tair:
         f = FeatureConstruction.get_feature(inp)
-        assert f.sampling_method == 'mean_over_interval'
+        assert f.get_sampling_method() == 'mean_over_interval'
 
     FeatureConstruction.reset()
 
@@ -178,9 +178,6 @@ def test_sampling_method_str(file_path, inputs_tair, output_tair):
 def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair):
     """test case: different sampling methods given"""
 
-    # Setup up logger for saving
-    Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
-
     # set default
     FeatureConstruction.set_default_sampling_method(0)
 
@@ -196,7 +193,7 @@ def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair
     y = x1 + lx1[0]
     z = y + x1
     z.rename('test_feature_two')
-    z.sampling_method = 'mean_over_interval'
+    z.set_sampling_method('mean_over_interval')
     e = FeatureExp(x1 - 273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
 
     inputs_tair_extended.extend([z, e])
@@ -210,11 +207,11 @@ def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair
     model = m.pipeline(td)
 
     # check correct sampling_method specification
-    assert x1.sampling_method == 'previous' and lx1[1].sampling_method == 'previous'
-    assert x2.sampling_method == 'current' and lx2.sampling_method == 'current'
-    assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').sampling_method == 'mean_over_interval'
-    assert FeatureConstruction.get_feature('test_feature_two').sampling_method == 'mean_over_interval'
-    assert e.sampling_method == 'previous'
+    assert x1.get_sampling_method() == 'previous' and lx1[1].get_sampling_method() == 'previous'
+    assert x2.get_sampling_method() == 'current' and lx2.get_sampling_method() == 'current'
+    assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').get_sampling_method() == 'mean_over_interval'
+    assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'mean_over_interval'
+    assert e.get_sampling_method() == 'previous'
 
     FeatureConstruction.reset()
 
@@ -310,6 +307,51 @@ def test_model_ann(p_hp_data, inputs_php, output_php, file_path):
     Logger.save_training_data(td)
 
 
+def test_deprecated_shift(p_hp_data, inputs_php, output_php, file_path):
+
+    # Setup up logger for saving
+    Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
+
+    # Create & process Training data
+    prep = PreprocessingSingleStep(inputs_php, output_php, shift=0)  # deprecated shift given in preprocessing
+    td = prep.pipeline(file_path)
+
+    m = ClassicalANNModel(epochs=1, n_neurons=[4, 4], n_layers=2, activation_function=['softplus', 'softplus'],
+                          early_stopping_epochs=None, rescale_output=False)
+    m.pipeline(td)
+
+    m.epochs = 1
+    m.online_pipeline(td, os.path.join(Logger._logger, 'model.keras'))
+
+    assert FeatureConstruction.get_default_sampling_method() == 'current'
+    FeatureConstruction.set_default_sampling_method('previous')  # reset default sampling
+
+    # from config
+    config_prep = {
+        "__class_name__": "PreprocessingSingleStep",
+        "inputs": [
+            "oveHeaPumY_u",
+            "Func(logistic)",
+            "weaSta_reaWeaTDryBul_y",
+            "reaTZon_y"
+        ],
+        "output": [
+            "reaPHeaPum_y"
+        ],
+        "shift": 0,  # deprecated shift
+        "test_size": 0.1,
+        "val_size": 0.1,
+        "random_state": 42,
+        "time_step": 1.0,
+    }
+
+    a = PreprocessingData.from_config(config_prep)
+    assert isinstance(a, PreprocessingSingleStep)
+    assert FeatureConstruction.get_default_sampling_method() == 'current'
+
+    FeatureConstruction.reset()
+
+
 def test_model_cmnn(p_hp_data, inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)

From 049676192254fe1ff792ad6f8c9d0951e2ea06eb Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 17:39:23 +0100
Subject: [PATCH 30/42] Fixed small error in testing script

---
 physXAI/preprocessing/preprocessing.py | 13 ++++++++++++-
 unittests/test_coverage.py             |  8 +++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index d45bee8..4a484d7 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -118,7 +118,18 @@ def get_config(self) -> dict:
     @classmethod
     @abstractmethod
     def from_config(cls, config: dict) -> 'PreprocessingData':
-        pass
+
+        if "__class_name__" in config.keys():
+            if config["__class_name__"] == 'PreprocessingSingleStep':
+                return PreprocessingSingleStep.from_config(config)
+            elif config["__class_name__"] == 'PreprocessingMultiStep':
+                return PreprocessingMultiStep.from_config(config)
+            else:
+                raise ValueError(
+                    f"config does not contain a valid '__class_name__'. config['__class_name__'] is "
+                    f"{config["__class_name__"]} but only 'PreprocessingSingleStep' or 'PreprocessingMultiStep' allowed.")
+        else:
+            raise ValueError("No valid config given. config does not contain key '__class_name__'")
 
 
 class PreprocessingSingleStep(PreprocessingData):
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 48e6133..7a5b99c 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -462,7 +462,7 @@ def test_models_rnn(file_path):
 
     m = RNNModel(epochs=1, rnn_layer='LSTM', init_layer='dense')
     m.pipeline(td, os.path.join(Logger._logger, 'model2.keras'))
-    Logger.log_setup(td, m, 'preprocessing_config2.json',
+    Logger.log_setup(prep, m, 'preprocessing_config2.json',
                      save_name_constructed='constructed_config2.json')
     Logger.save_training_data(td)
 
@@ -495,13 +495,15 @@ def test_read_setup():
     path = os.path.join(Logger._logger, save_name_preprocessing)
     with open(path, "r") as f:
         config_prep = json.load(f)
-    PreprocessingData.from_config(config_prep)
+    a = PreprocessingData.from_config(config_prep)
+    assert isinstance(a, PreprocessingSingleStep)
 
     save_name_preprocessing = 'preprocessing_config2.json'
     path = os.path.join(Logger._logger, save_name_preprocessing)
     with open(path, "r") as f:
         config_prep = json.load(f)
-    PreprocessingData.from_config(config_prep)
+    b = PreprocessingData.from_config(config_prep)
+    assert isinstance(b, PreprocessingMultiStep)
 
     save_name_constructed = Logger.save_name_constructed
     path = os.path.join(Logger._logger, save_name_constructed)

From 03335f6ca94976ae9fed1f44bcb3e423f80e96ea Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 20:19:23 +0100
Subject: [PATCH 31/42] fixed small syntax error with older python versions

---
 physXAI/preprocessing/preprocessing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 4a484d7..9a39e38 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -120,14 +120,14 @@ def get_config(self) -> dict:
     def from_config(cls, config: dict) -> 'PreprocessingData':
 
         if "__class_name__" in config.keys():
-            if config["__class_name__"] == 'PreprocessingSingleStep':
+            if config['__class_name__'] == 'PreprocessingSingleStep':
                 return PreprocessingSingleStep.from_config(config)
-            elif config["__class_name__"] == 'PreprocessingMultiStep':
+            elif config['__class_name__'] == 'PreprocessingMultiStep':
                 return PreprocessingMultiStep.from_config(config)
             else:
                 raise ValueError(
                     f"config does not contain a valid '__class_name__'. config['__class_name__'] is "
-                    f"{config["__class_name__"]} but only 'PreprocessingSingleStep' or 'PreprocessingMultiStep' allowed.")
+                    f"{config['__class_name__']} but only 'PreprocessingSingleStep' or 'PreprocessingMultiStep' allowed.")
         else:
             raise ValueError("No valid config given. config does not contain key '__class_name__'")
 

From 9450d0fe57882c6eac463f8a28b0681c5b26f0f4 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 10 Dec 2025 20:49:24 +0100
Subject: [PATCH 32/42] Moved default_sampling_method from FeatureConstruction
 to Feature

---
 .../different_sampling_methods.py             |  8 ++---
 physXAI/preprocessing/constructed.py          | 31 +++++++++++++------
 physXAI/preprocessing/preprocessing.py        | 10 +++---
 unittests/test_coverage.py                    | 14 ++++-----
 4 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index 26c3f07..84c5afa 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -1,6 +1,6 @@
 from physXAI.models.ann.ann_design import ClassicalANNModel
 from physXAI.preprocessing.preprocessing import PreprocessingSingleStep
-from physXAI.preprocessing.constructed import Feature, FeatureConstruction, FeatureExp
+from physXAI.preprocessing.constructed import Feature, FeatureExp
 from physXAI.utils.logging import Logger
 
 """
@@ -10,15 +10,15 @@
 specified.
 
 sampling_method (Union[str, int]): Time step of the input data used to predict the output.
-    - if None: FeatureConstruction.get_default_sampling_method() is used
+    - if None: Feature.get_default_sampling_method() is used
     - if 'current' or 0: Current time step will be used for prediction.
     - if 'previous' or 1: Previous time step will be used for prediction.
     - if 'mean_over_interval': Mean between current and previous time step will be used.
     
-    Specify default sampling method using FeatureConstruction.set_default_sampling_method(<your default sampling>).
+    Specify default sampling method using Feature.set_default_sampling_method(<your default sampling>).
     If no default sampling method is specified by the user, 'previous' is used as default.
 """
-FeatureConstruction.set_default_sampling_method(0)
+Feature.set_default_sampling_method(0)
 
 # Setup up logger for saving
 Logger.setup_logger(folder_name='different_sampling_methods_ann', override=True)
diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 4ae2e9b..2683acf 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -36,7 +36,7 @@ def __init__(self, name: str, sampling_method: Union[str, int] = None, **kwargs)
         Args:
             name (str): The name of the feature. This will be the column name in the DataFrame.
             sampling_method (Union[str, int]): Time step of the input data used to predict the output.
-                - if None: FeatureConstruction._default_sampling_method is used
+                - if None: Feature._default_sampling_method is used
                 - if 'current' or 0: Current time step will be used for prediction.
                 - if 'previous' or 1: Previous time step will be used for prediction.
                 - if 'mean_over_interval': Mean between current and previous time step will be used.
@@ -56,7 +56,7 @@ def get_sampling_method(self) -> str:
 
     def set_sampling_method(self, val: Union[str, int] = None):
         """
-        Sets the feature's sampling method. If None is given, FeatureConstruction._default_sampling_method is used
+        Sets the feature's sampling method. If None is given, Feature._default_sampling_method is used
         Available methods:
         - 'current' or 0: Current time step will be used for prediction.
         - 'previous' or 1: Previous time step will be used for prediction.
@@ -64,7 +64,7 @@ def set_sampling_method(self, val: Union[str, int] = None):
         """
 
         if val is None:
-            self._sampling_method = FeatureConstruction.get_default_sampling_method()
+            self._sampling_method = Feature.get_default_sampling_method()
         else:
             self._sampling_method = _return_valid_sampling_method(val)
 
@@ -213,7 +213,22 @@ class Feature(FeatureBase):
     Represents a basic feature that is assumed to exist directly in the input DataFrame.
     Its `process` method simply retrieves the column by its name.
     """
-    pass
+
+    _default_sampling_method = 'previous'
+
+    @classmethod
+    def get_default_sampling_method(cls):
+        return Feature._default_sampling_method
+
+    @classmethod
+    def set_default_sampling_method(cls, val: Union[str, int]):
+        """
+        Sets the default sampling method for all features that do not have a custom sampling method. Available methods:
+        - 'current' or 0: Current time step will be used for prediction.
+        - 'previous' or 1: Previous time step will be used for prediction.
+        - 'mean_over_interval': Mean between current and previous time step will be used.
+        """
+        Feature._default_sampling_method = _return_valid_sampling_method(val)
 
 
 @register_feature
@@ -590,10 +605,6 @@ class FeatureConstruction:
     features = list[FeatureBase]()
     _default_sampling_method = 'previous'
 
-    @staticmethod
-    def get_default_sampling_method():
-        return FeatureConstruction._default_sampling_method
-
     @staticmethod
     def set_default_sampling_method(val: Union[str, int]):
         """
@@ -606,9 +617,9 @@ def set_default_sampling_method(val: Union[str, int]):
 
     @staticmethod
     def reset():
-        """Clears all registered features and input names."""
+        """Clears all registered features and input names. Furthermore, resets the default sampling method"""
         FeatureConstruction.features = list[FeatureBase]()
-        FeatureConstruction.set_default_sampling_method('previous')
+        Feature.set_default_sampling_method('previous')
 
     @staticmethod
     def append(f: FeatureBase):
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 9a39e38..49f8dbe 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import itertools
 from sklearn.model_selection import train_test_split
-from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase
+from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase, Feature
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
 from physXAI.utils.logging import get_full_path
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -167,14 +167,14 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
             DeprecationWarning(
                 "shift parameter is deprecated for SingleStep models and replaced by sampling_method, an attribute of "
                 "each Feature. This allows specifying individual 'shifts' for each Feature / input. A default sampling"
-                "method can be specified via FeatureConstruction.set_default_sampling_method(<your default sampling>)."
+                "method can be specified via Feature.set_default_sampling_method(<your default sampling>)."
             )
             DeprecationWarning(
-                f"shift parameter was given as shift={kwargs['shift']}. Setting FeatureConstruction.set_default_"
-                f"sampling_method(shift) and override possible individual sampling methods of all Features. If this is"
+                f"shift parameter was given as shift={kwargs['shift']}. Setting Feature.set_default_sampling_method"
+                f"(shift) and overriding possible individual sampling methods of all Features. If this is"
                 f"not intended, remove shift parameter when initializing PreprocessingSingleStep object!"
             )
-            FeatureConstruction.set_default_sampling_method(kwargs['shift'])
+            Feature.set_default_sampling_method(kwargs['shift'])
             for f in FeatureConstruction.features:
                 f.set_sampling_method(kwargs['shift'])
 
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 7a5b99c..3c0f074 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -117,12 +117,12 @@ class TestSamplingMethodsFaults(TestCase):
     # test case: unsupported str given as sampling method
     def test_unsupported_str(self):
         with self.assertRaises(ValueError):
-            FeatureConstruction.set_default_sampling_method('test')
+            Feature.set_default_sampling_method('test')
 
     # test case: unsupported type given for sampling method
     def test_unsupported_type(self):
         with self.assertRaises(TypeError):
-            FeatureConstruction.set_default_sampling_method(['current'])
+            Feature.set_default_sampling_method(['current'])
 
     # test case: lags of the same input have mismatching sampling methods
     def test_lag_with_mismatching_sampling_methods(self):
@@ -157,7 +157,7 @@ def test_sampling_method_use_default(file_path, inputs_tair, output_tair):
 def test_sampling_method_str(file_path, inputs_tair, output_tair):
     """test case: set default using str (setting default with int is done in test_different_sampling_methods)"""
 
-    FeatureConstruction.set_default_sampling_method('mean_over_interval')
+    Feature.set_default_sampling_method('mean_over_interval')
 
     x = Feature('oveHeaPumY_u')
     x.lag(1)
@@ -179,7 +179,7 @@ def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair
     """test case: different sampling methods given"""
 
     # set default
-    FeatureConstruction.set_default_sampling_method(0)
+    Feature.set_default_sampling_method(0)
 
     # Create lags
     x1 = Feature('reaTZon_y', sampling_method='previous')
@@ -323,8 +323,8 @@ def test_deprecated_shift(p_hp_data, inputs_php, output_php, file_path):
     m.epochs = 1
     m.online_pipeline(td, os.path.join(Logger._logger, 'model.keras'))
 
-    assert FeatureConstruction.get_default_sampling_method() == 'current'
-    FeatureConstruction.set_default_sampling_method('previous')  # reset default sampling
+    assert Feature.get_default_sampling_method() == 'current'
+    Feature.set_default_sampling_method('previous')  # reset default sampling
 
     # from config
     config_prep = {
@@ -347,7 +347,7 @@ def test_deprecated_shift(p_hp_data, inputs_php, output_php, file_path):
 
     a = PreprocessingData.from_config(config_prep)
     assert isinstance(a, PreprocessingSingleStep)
-    assert FeatureConstruction.get_default_sampling_method() == 'current'
+    assert Feature.get_default_sampling_method() == 'current'
 
     FeatureConstruction.reset()
 

From 93068a00beac8b9fb5225e2399c3f9f94da63a61 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Thu, 11 Dec 2025 16:42:31 +0100
Subject: [PATCH 33/42] Deleted deprecated code

---
 physXAI/preprocessing/constructed.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 2683acf..a3045ed 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -603,17 +603,6 @@ class FeatureConstruction:
     """
 
     features = list[FeatureBase]()
-    _default_sampling_method = 'previous'
-
-    @staticmethod
-    def set_default_sampling_method(val: Union[str, int]):
-        """
-        Sets the default sampling method for all features that do not have a custom sampling method. Available methods:
-        - 'current' or 0: Current time step will be used for prediction.
-        - 'previous' or 1: Previous time step will be used for prediction.
-        - 'mean_over_interval': Mean between current and previous time step will be used.
-        """
-        FeatureConstruction._default_sampling_method = _return_valid_sampling_method(val)
 
     @staticmethod
     def reset():

From 096a863276ee350acc6ee71ceab3da2679f31ebb Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 17 Dec 2025 15:58:59 +0100
Subject: [PATCH 34/42] Implemented handling of constructed outputs

---
 .../different_sampling_methods.py             |   6 +-
 physXAI/preprocessing/constructed.py          |  40 +++-
 physXAI/preprocessing/preprocessing.py        | 217 ++++++++++--------
 unittests/test_coverage.py                    |  16 +-
 4 files changed, 163 insertions(+), 116 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index 84c5afa..5ff4676 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -31,7 +31,7 @@
           Feature('weaSta_reaWeaHDirNor_y', sampling_method='mean_over_interval'), 'oveHeaPumY_u',
           'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
 # Output feature
-output = 'Change(T_zone)'
+output = ['Change(t_air)']
 
 """ 
 The constructed features are automatically added to the data via 'physXAI.preprocessing.constructed.py' 
@@ -53,9 +53,11 @@
 y = x1 + lx1[0]
 z = y + x1
 z.rename('example_feature_two')
-z.set_sampling_method('mean_over_interval')
 e = FeatureExp(x1-273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
 
+change_tair = x1 - lx1[0]
+change_tair.rename('Change(t_air)')
+
 # add dummy features to inputs
 inputs.extend([z, e])
 
diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index a3045ed..eb2a45e 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -14,12 +14,13 @@ def _return_valid_sampling_method(v: Union[int, str]):
         return 'current'
     elif v in ['previous', 1]:
         return 'previous'
-    elif v == 'mean_over_interval':
-        return 'mean_over_interval'
+    elif v in ['mean_over_interval', '_']:
+        return v
     else:
         raise ValueError(
             f"Value of sampling method not supported, value is: {v}. Sampling method must be 'current' "
-            f"(or 0 if s is int), 'previous' (or 1 if s is int) or 'mean_over_interval'.")
+            f"(or 0 if sampling_method is int), 'previous' (or 1 if sampling_method is int) or 'mean_over_interval'. "
+            f"In case of deactivated sampling (for outputs), sampling_method must be '_'.")
 
 
 class FeatureBase(ABC):
@@ -680,12 +681,39 @@ def recursive_search(feature):
         return res
 
     @staticmethod
-    def process_inputs(inputs: list[Union[str, FeatureBase]]) -> list[str]:
+    def get_constructed_features(l: list[str] = None) -> list[str]:
+        """
+        returns a list of the names of all constructed features (features that have a type other than 'Feature')
+        - within the given list or
+        - of all constructed features if list is None
+
+        Args:
+            l (list[str]): list of feature names to search in
+
+        Returns:
+            list[str]: the list of the names of the constructed features
+        """
+
+        # if no list is given, search in all features
+        if not l:
+            l = FeatureConstruction.features
+
+        res = list()
+        for f in FeatureConstruction.features:
+            if not isinstance(f, Feature) and (f.feature in l):
+                res.append(f.feature)  # name of the feature
+
+        return res
+
+    @staticmethod
+    def create_features(inputs: list[Union[str, FeatureBase]], no_sampling_method: bool = False) -> list[str]:
         """
         Creates a Feature for all inputs that are not yet created as features
 
         Args:
              inputs (list(Union[str, FeatureBase])): List of column names or Features to be used as input features.
+             no_sampling_method (bool): deactivate sampling_method for outputs, default = False.
+                                        If deactivated, sampling_method will be set to '_'
 
         Returns:
             list[str]: list of column names of all input features
@@ -696,11 +724,15 @@ def process_inputs(inputs: list[Union[str, FeatureBase]]) -> list[str]:
         for inp in inputs:
             if isinstance(inp, FeatureBase):
                 input_str.append(inp.feature)  # get name of feature (which is used as column name)
+                if no_sampling_method:
+                    inp.set_sampling_method('_')
             elif isinstance(inp, str):
                 input_str.append(inp)
                 # check if a Feature with the given name (inp) was already created, otherwise create it
                 if not any(inp == f.feature for f in FeatureConstruction.features):
                     Feature(name=inp)
+                if no_sampling_method:
+                    FeatureConstruction.get_feature(inp).set_sampling_method('_')
             else:
                 raise TypeError(f"Only inputs with types 'str' or 'FeatureBase' allowed, got type {type(inp)} instead")
 
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 49f8dbe..592bb0f 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import itertools
 from sklearn.model_selection import train_test_split
-from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase, Feature
+from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase, Feature, FeatureTwo
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
 from physXAI.utils.logging import get_full_path
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -46,10 +46,11 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
         self.csv_header = csv_header
         self.csv_skiprows = csv_skiprows
 
-        self.inputs: list[str] = FeatureConstruction.process_inputs(inputs)
+        self.inputs: list[str] = FeatureConstruction.create_features(inputs)
         if isinstance(output, str):
             output = [output]
-        self.output: list[str] = output
+        # outputs shouldn't have any sampling method
+        self.output: list[str] = FeatureConstruction.create_features(output, no_sampling_method=True)
 
         self.time_step = time_step
 
@@ -197,18 +198,18 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
                                                        and target (y) DataFrames.
         """
 
-        # extract the names of all features in inputs and outputs that are based on lagged features
-        lag_based_features = FeatureConstruction.get_features_including_lagged_features(self.inputs + self.output)
+        # extract the names of all constructed features
+        constructed_input_features = FeatureConstruction.get_constructed_features(self.inputs)
+        constructed_output_features = FeatureConstruction.get_constructed_features(self.output)
 
-        inputs_without_lags = [inp for inp in self.inputs if inp not in lag_based_features]
+        # Only apply sampling method to those features which are not constructed features
+        # but which data is taken directly from the data frame
+        inputs_without_constructed = [inp for inp in self.inputs if inp not in constructed_input_features]
+        output_without_constructed = [out for out in self.output if out not in constructed_output_features]
 
-        # Applies feature constructions defined in `FeatureConstruction`.
-        # Only apply for those features that are not lags since lags must be constructed after sampling the data
-        # according to the given time step
-        FeatureConstruction.process(df, feature_names=inputs_without_lags + [out for out in self.output if out not in inputs_without_lags])
-        features_without_lags: list[FeatureBase] = [FeatureConstruction.get_feature(inp) for inp in inputs_without_lags]
+        features_without_constructed: list[FeatureBase] = [FeatureConstruction.get_feature(inp) for inp in inputs_without_constructed]
 
-        df = df[inputs_without_lags + [out for out in self.output if out not in inputs_without_lags]]
+        df = df[inputs_without_constructed + output_without_constructed]
 
         # Nan handling in first and last rows
         non_nan_rows = df.notna().all(axis=1)
@@ -216,103 +217,93 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
         df = df.loc[first_valid_index:last_valid_index]
 
-        def get_mean_over_interval(y: pd.DataFrame, x: pd.DataFrame):
-            """return mean values of x on target sampling (index of y)"""
-            def pairwise(iterable: Iterable):
-                "s -> (s0,s1), (s1,s2), (s2, s3), ..."
-                a, b = itertools.tee(iterable)
-                next(b, None)
-                return zip(a, b)
-
-            original_grid = np.array(x.index)
-            results = []
-            for i, j in pairwise(y.index): # output interval is target grid
-                slicer = np.logical_and(original_grid >= i, original_grid < j)
-                d = {'Index': j}
-                for inp in x.columns:
-                    d[inp] = x[inp][slicer].mean()
-                results.append(d)
-
-            x = pd.DataFrame(results).set_index('Index')
-
-            return x
-
-        # output is independent of sampling of inputs -> sample according to time step already
-        y = df[self.output].copy()
-        y = self.sample_df_according_to_timestep(y)
-
-        X = df[inputs_without_lags].copy()
-
-        if all('current' == f.get_sampling_method() for f in features_without_lags):
-            # filter / sample data
-            X = self.sample_df_according_to_timestep(X)
-            # nothing more to do here
-        elif all('previous' == f.get_sampling_method() for f in features_without_lags):
-            # filter / sample data
-            X = self.sample_df_according_to_timestep(X)
-
-            # shift data by 1 and shorten DataFrames accordingly
-            X = X.shift(1)
-            y = y.iloc[1:]
-            X = X.iloc[1:]
-        elif all('mean_over_interval' == f.get_sampling_method() for f in features_without_lags):
-            X = get_mean_over_interval(y, X)
-            # synchronize length between X and y
-            y = y.iloc[1:]
-
-        else:  # different inputs have different sampling methods
-            res = []
-            previous_or_mean_in_sampling_methods = []
-            for f in features_without_lags:
-                # only process inputs with sampling method mean_over_interval first since X cannot be sampled
-                # to the actual required time steps until the intermediate values were taken into the mean
-                if f.get_sampling_method() == 'mean_over_interval':
-                    res.append(get_mean_over_interval(y, X[[f.feature]]))
-                    previous_or_mean_in_sampling_methods.append(True)
-
-            # sample X according to required time step
-            X = self.sample_df_according_to_timestep(X)
-            # process inputs with sampling methods 'current' and 'previous'
-            for f in features_without_lags:
-                _x = X[[f.feature]]
-                if f.get_sampling_method() == 'current':
-                    # no transformation needed
-                    res.append(_x)
-                    previous_or_mean_in_sampling_methods.append(False)
-                elif f.get_sampling_method() == 'previous':
-                    # shift by 1
-                    _x = _x.shift(1)
-                    _x = _x.iloc[1:]
-                    res.append(_x)
-                    previous_or_mean_in_sampling_methods.append(True)
-                elif f.get_sampling_method() == 'mean_over_interval':
-                    continue
-                else:
-                    raise NotImplementedError(f"Sampling method '{f.get_sampling_method()}' not implemented.")
-
-            X = pd.concat(res, axis=1)
-            X = X.sort_index(ascending=True)
+        # sample input data; different inputs can have different sampling methods
+        res = []
+        previous_or_mean_in_sampling_methods = []
+        X = df[inputs_without_constructed].copy()
+        target_grid = self.sample_df_according_to_timestep(df).index
+        for f in features_without_constructed:
+            # only process inputs with sampling method mean_over_interval first since X cannot be sampled
+            # to the actual required time steps until the intermediate values were taken into the mean
+            if f.get_sampling_method() == 'mean_over_interval':
+                res.append(get_mean_over_interval(X[[f.feature]], target_grid))
+                previous_or_mean_in_sampling_methods.append(True)
+
+        # sample X according to required time step
+        X = self.sample_df_according_to_timestep(X)
+        # process inputs with sampling methods 'current' and 'previous'
+        for f in features_without_constructed:
+            _x = X[[f.feature]]
+            if f.get_sampling_method() == 'current':
+                # no transformation needed
+                res.append(_x)
+                previous_or_mean_in_sampling_methods.append(False)
+            elif f.get_sampling_method() == 'previous':
+                # shift by 1
+                _x = _x.shift(1)
+                _x = _x.iloc[1:]
+                res.append(_x)
+                previous_or_mean_in_sampling_methods.append(True)
+            elif f.get_sampling_method() == 'mean_over_interval':
+                continue
+            else:
+                raise NotImplementedError(f"Sampling method '{f.get_sampling_method()}' not implemented.")
+        # concatenate sampled input data
+        X = pd.concat(res, axis=1)
+        X = X.sort_index(ascending=True)
+
+        # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
+        if any(previous_or_mean_in_sampling_methods):
+            # if at least one of the features uses 'current' as sampling method, shorten X
+            if not all(previous_or_mean_in_sampling_methods):
+                X = X.iloc[1:]
+
+        if X.isnull().values.any():
+            if self.ignore_nan:
+                X.dropna(inplace=True)
+            else:
+                raise ValueError(
+                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set "
+                    "ignore_nan=True in PreprocessingSingleStep.")
 
+        # sample output data
+        if len(output_without_constructed) != 0:  # at least one non-constructed output feature
+            y = df[output_without_constructed].copy()
+            y = self.sample_df_according_to_timestep(y)
             # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
-            # Therefore, lengths of X and y have to be synchronized
+            # synchronize length of X and y
             if any(previous_or_mean_in_sampling_methods):
                 y = y.iloc[1:]
-                # if at least one of the features uses 'current' as sampling method, shorten X
-                if not all(previous_or_mean_in_sampling_methods):
-                    X = X.iloc[1:]
+            if y.isnull().values.any():
+                if self.ignore_nan:
+                    y.dropna(inplace=True)
+                else:
+                    raise ValueError(
+                        "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended,"
+                        "set ignore_nan=True in PreprocessingSingleStep.")
 
-        res_df = pd.concat([X, y], axis=1)
+            res_df = pd.concat([X, y], axis=1)
 
-        if res_df.isnull().values.any():
-            if self.ignore_nan:
-                res_df.dropna(inplace=True)
-            else:
-                raise ValueError(
-                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set "
-                    "ignore_nan=True in PreprocessingSingleStep.")
+        else:  # only constructed outputs
+            res_df = X
+
+        # Applies feature constructions defined in `FeatureConstruction`
+        FeatureConstruction.process(res_df, feature_names=constructed_input_features + constructed_output_features)
+
+        # assume constructed outputs solely base on features with sampling current or sampling previous / mean_over_interval
 
-        # Applies feature constructions defined in `FeatureConstruction` to the lagged inputs
-        FeatureConstruction.process(res_df, feature_names=lag_based_features)
+        if any(previous_or_mean_in_sampling_methods):
+            methods = ['previous', 'mean_over_interval']
+            # if constructed output features are based on input features with sampling previous or mean_over_interval,
+            # the constructed output has to be shifted to invert the shift of the input features
+            for out in constructed_output_features:
+                out_feature = FeatureConstruction.get_feature(out)
+                if isinstance(out_feature, FeatureTwo):
+                    if out_feature.feature1.get_sampling_method() in methods or out_feature.feature2.get_sampling_method() in methods:
+                        res_df[out_feature.feature] = res_df[out_feature.feature].shift(-1)  # shift
+                else: # constructed feature that doesn't consist of two features
+                    if out_feature.f1.get_sampling_method() in methods:
+                        res_df[out_feature.feature] = res_df[out_feature.feature].shift(-1)  # shift
 
         # drop NaNs occurring due to creation of lags
         res_df.dropna(inplace=True)
@@ -388,6 +379,28 @@ def from_config(cls, config: dict) -> 'PreprocessingSingleStep':
         return cls(**config)
 
 
+def get_mean_over_interval(x: pd.DataFrame, target_grid: pd.DataFrame.index) -> pd.DataFrame:
+    """samples and returns x on target grid taking the mean over the interval (between the grid indices)"""
+    def pairwise(iterable: Iterable):
+        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+        a, b = itertools.tee(iterable)
+        next(b, None)
+        return zip(a, b)
+
+    original_grid = np.array(x.index)
+    results = []
+    for i, j in pairwise(target_grid):
+        slicer = np.logical_and(original_grid >= i, original_grid < j)
+        d = {'Index': j}
+        for inp in x.columns:
+            d[inp] = x[inp][slicer].mean()
+        results.append(d)
+
+    x = pd.DataFrame(results).set_index('Index')
+
+    return x
+
+
 class PreprocessingMultiStep(PreprocessingData):
     """
     Handles preprocessing for multi-step forecasting models, typically RNNs.
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 3c0f074..444c1a2 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -145,8 +145,6 @@ def test_sampling_method_use_default(file_path, inputs_tair, output_tair):
     prep = PreprocessingSingleStep(inputs_tair, output_tair)
     td = prep.pipeline(file_path)
 
-    assert len(inputs_tair) == len(FeatureConstruction.features)
-
     for inp in inputs_tair:
         f = FeatureConstruction.get_feature(inp)
         assert f.get_sampling_method() == 'previous'
@@ -166,8 +164,6 @@ def test_sampling_method_str(file_path, inputs_tair, output_tair):
     prep = PreprocessingSingleStep(inputs_tair, output_tair, time_step=4)
     td = prep.pipeline(file_path)
 
-    assert len(inputs_tair) == len(FeatureConstruction.features)
-
     for inp in inputs_tair:
         f = FeatureConstruction.get_feature(inp)
         assert f.get_sampling_method() == 'mean_over_interval'
@@ -175,7 +171,7 @@ def test_sampling_method_str(file_path, inputs_tair, output_tair):
     FeatureConstruction.reset()
 
 
-def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair):
+def test_different_sampling_methods(file_path, inputs_tair_extended):
     """test case: different sampling methods given"""
 
     # set default
@@ -193,13 +189,16 @@ def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair
     y = x1 + lx1[0]
     z = y + x1
     z.rename('test_feature_two')
-    z.set_sampling_method('mean_over_interval')
     e = FeatureExp(x1 - 273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
 
     inputs_tair_extended.extend([z, e])
 
+    # output
+    change_tair = x1 - lx1[0]
+    change_tair.rename('Change(t_air)')
+
     # Create & process Training data
-    prep = PreprocessingSingleStep(inputs_tair_extended, output_tair, time_step=4)
+    prep = PreprocessingSingleStep(inputs_tair_extended, [change_tair], time_step=4)
     td = prep.pipeline(file_path)
 
     # Build & train Classical ANN
@@ -210,8 +209,9 @@ def test_different_sampling_methods(file_path, inputs_tair_extended, output_tair
     assert x1.get_sampling_method() == 'previous' and lx1[1].get_sampling_method() == 'previous'
     assert x2.get_sampling_method() == 'current' and lx2.get_sampling_method() == 'current'
     assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').get_sampling_method() == 'mean_over_interval'
-    assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'mean_over_interval'
+    assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'current'
     assert e.get_sampling_method() == 'previous'
+    assert change_tair.get_sampling_method() == '_'
 
     FeatureConstruction.reset()
 

From 203c601e69ddab5e2a98a476b07ea143040775ae Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Wed, 17 Dec 2025 15:05:27 +0000
Subject: [PATCH 35/42] Update coverage badge [skip ci]

---
 build/reports/coverage.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/reports/coverage.svg b/build/reports/coverage.svg
index 1c7007c..b3e8ba0 100644
--- a/build/reports/coverage.svg
+++ b/build/reports/coverage.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">89%</text>
-        <text x="80" y="14">89%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">88%</text>
+        <text x="80" y="14">88%</text>
     </g>
 </svg>

From bafeb3cb9812dd9bb87b9f4b0eb7fe4a399d0f8e Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Thu, 18 Dec 2025 00:22:29 +0100
Subject: [PATCH 36/42] restructured sampling_method sampling_method of
 constructed features determined based on corresponding base_features(s)

---
 .../different_sampling_methods.py             | 17 ++--
 physXAI/preprocessing/constructed.py          | 85 +++++++++++++++----
 unittests/test_coverage.py                    | 12 ++-
 3 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index 5ff4676..4048da2 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -6,8 +6,8 @@
 """
 This script demonstrates the usage of different sampling methods. It is not physically meaningful.
 
-When creating a Feature (or any subclass of FeatureBase like FeatureLag, FeatureAdd etc.), a sampling method can be
-specified.
+When creating a Feature, a sampling method can be specified.
+For constructed features, no sampling method is necessary. It is assigned based on their corresponding base feature(s)
 
 sampling_method (Union[str, int]): Time step of the input data used to predict the output.
     - if None: Feature.get_default_sampling_method() is used
@@ -30,8 +30,8 @@
 inputs = ['reaTZon_y', 'reaTZon_y_lag1', 'reaTZon_y_lag2', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTDryBul_y_lag1',
           Feature('weaSta_reaWeaHDirNor_y', sampling_method='mean_over_interval'), 'oveHeaPumY_u',
           'oveHeaPumY_u_lag1', 'oveHeaPumY_u_lag2']
-# Output feature
-output = ['Change(t_air)']
+# Output feature. Can include names of constructed features as well
+output = ['Change(T_air)']
 
 """ 
 The constructed features are automatically added to the data via 'physXAI.preprocessing.constructed.py' 
@@ -52,14 +52,13 @@
 # dummy Features
 y = x1 + lx1[0]
 z = y + x1
-z.rename('example_feature_two')
+z.rename('example_feature_two')  # since z is a constructed feature based on x1, its sampling_method will be previous
 e = FeatureExp(x1-273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
+inputs.extend([z, e])  # add dummy features to inputs
 
+# construct output
 change_tair = x1 - lx1[0]
-change_tair.rename('Change(t_air)')
-
-# add dummy features to inputs
-inputs.extend([z, e])
+change_tair.rename('Change(T_air)')
 
 # Create Training data
 # Time step defines target sampling: if original sampling of data is in 15min intervals, it is resampled to 1h intervals
diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index eb2a45e..69c1ed5 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -204,6 +204,8 @@ def feature_from_config(item_conf: dict) -> 'FeatureBase':
     """
     class_name = item_conf['class_name']
     feature_class = CONSTRUCTED_CLASS_REGISTRY[class_name]
+    if 'sampling_method' in item_conf.keys() and item_conf['sampling_method'] == '_':
+        item_conf['ignore_sampling_for_output'] = True
     f1f = feature_class.from_config(item_conf)
     return f1f
 
@@ -232,6 +234,52 @@ def set_default_sampling_method(cls, val: Union[str, int]):
         Feature._default_sampling_method = _return_valid_sampling_method(val)
 
 
+def get_sampling_from_base(base_features: Union[FeatureBase, list[FeatureBase]], **kwargs) -> [str, list]:
+    """
+    Returns the appropriate sampling_method for a constructed feature based on its base feature(s)
+
+    Args:
+         base_features (Union[FeatureBase, list[FeatureBase]]): single base feature or list of max. two base features
+         **kwargs: additional keyword arguments. If sampling_method is given in kwargs as well, its validity is checked
+
+    Returns:
+        sampling_method (str): sampling method
+        kwargs: kwargs which does not contain the key 'sampling_method' (anymore)
+    """
+
+    if not isinstance(base_features, list):
+        base_features = [base_features]
+
+    assert len(base_features) <= 2, f'Expected a maximum of two features, got {len(base_features)} instead'
+
+    sampling = []
+    for f in base_features:
+        if isinstance(f, FeatureBase):
+            sampling.append(f.get_sampling_method())
+        elif isinstance(f, (int, float)):  # FeatureTwo can be built with int or float values
+            continue
+        else:
+            raise ValueError(f"Expected type [FeatureBase, int, float], got type {type(f)} instead")
+
+    if len(sampling) > 1:
+        assert len(set(sampling)) == 1, f'Sampling methods of base feature are not equal, got {sampling}'
+
+    sampling_method = sampling[0]
+
+    if 'sampling_method' in kwargs.keys():
+        if 'ignore_sampling_for_output' in kwargs.keys() and kwargs['ignore_sampling_for_output']:
+            # necessary for feature construction from config
+            sampling_method = '_'
+        else:
+            assert _return_valid_sampling_method(kwargs['sampling_method']) == sampling_method, (
+                f"Constructed features must have the same sampling method as their base feature(s). Sampling method of "
+                f"base feature(s) is {sampling_method} but {kwargs['sampling_method']} was given as sampling method."
+            )
+        kwargs.__delitem__('sampling_method')  # constructor must not get more than one arg with the same key
+
+    return sampling_method, kwargs
+
+
 @register_feature
 class FeatureLag(FeatureBase):
     """
@@ -254,23 +302,13 @@ def __init__(self, f: Union[FeatureBase, str], lag: int, name: str = None, **kwa
             self.origf: str = f.feature
             if name is None:
                 name = f.feature + f'_lag{lag}'
-
-            # lags must have the same sampling_method as their base feature
-            sampling_method = f.get_sampling_method()
         else:
             self.origf: str = f
             if name is None:
                 name = f + f'_lag{lag}'
 
-            # lags must have the same sampling_method as their base feature
-            sampling_method = FeatureConstruction.get_feature(f).get_sampling_method()
-
-        if 'sampling_method' in kwargs.keys():
-            assert kwargs['sampling_method'] == sampling_method, (
-                f"lags must have the same sampling method as their base feature. Sampling method of base feature is"
-                f" {sampling_method} but for lag {kwargs['sampling_method']} was given as sampling method."
-            )
-            kwargs.__delitem__('sampling_method')  # constructor must not get more than one arg with the same key
+        # lags must have the same sampling_method as their base feature
+        sampling_method, kwargs = get_sampling_from_base(FeatureConstruction.get_feature(self.origf), **kwargs)
 
         super().__init__(name, sampling_method=sampling_method, **kwargs)
         self.lag: int = lag
@@ -292,8 +330,8 @@ class FeatureTwo(FeatureBase, ABC):
     Examples: FeatureAdd (f1 + f2), FeatureSub (f1 - f2).
     """
 
-    def __init__(self, feature1: Union[FeatureBase, int, float], feature2: Union[FeatureBase, int, float], name: str = None,
-                 **kwargs):
+    def __init__(self, feature1: Union[FeatureBase, int, float], feature2: Union[FeatureBase, int, float],
+                 name: str = None, **kwargs):
         """
         Initializes a FeatureTwo instance.
 
@@ -315,7 +353,10 @@ def __init__(self, feature1: Union[FeatureBase, int, float], feature2: Union[Fea
             f2n = str(feature2)
         if name is None:
             name = self.name(f1n, f2n)
-        super().__init__(name, **kwargs)
+
+        # constructed features must have the same sampling_method as their base features
+        sampling_method, kwargs = get_sampling_from_base([feature1, feature2], **kwargs)
+        super().__init__(name, sampling_method=sampling_method, **kwargs)
         self.feature1 = feature1
         self.feature2 = feature2
 
@@ -493,7 +534,9 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'exp(' + f1.feature + ')'
-        super().__init__(name, **kwargs)
+        # constructed features must have the same sampling_method as their base features
+        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -523,7 +566,9 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'sin(' + f1.feature + ')'
-        super().__init__(name, **kwargs)
+        # constructed features must have the same sampling_method as their base features
+        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -553,7 +598,9 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         self.f1: FeatureBase = f1
         if name is None:
             name = 'cos(' + f1.feature + ')'
-        super().__init__(name, **kwargs)
+        # constructed features must have the same sampling_method as their base features
+        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
         if self.feature not in df.columns:
@@ -583,6 +630,8 @@ class FeatureConstant(FeatureBase):
 
     def __init__(self, c: float, name: str, **kwargs):
         self.c = c
+        if 'sampling_method' in kwargs.keys():
+            UserWarning(f"Using 'sampling_method' for {self.__class__} does not have any effect.")
         super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 444c1a2..eab2736 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -195,7 +195,7 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
 
     # output
     change_tair = x1 - lx1[0]
-    change_tair.rename('Change(t_air)')
+    change_tair.rename('Change(T_air)')
 
     # Create & process Training data
     prep = PreprocessingSingleStep(inputs_tair_extended, [change_tair], time_step=4)
@@ -209,7 +209,7 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
     assert x1.get_sampling_method() == 'previous' and lx1[1].get_sampling_method() == 'previous'
     assert x2.get_sampling_method() == 'current' and lx2.get_sampling_method() == 'current'
     assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').get_sampling_method() == 'mean_over_interval'
-    assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'current'
+    assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'previous'
     assert e.get_sampling_method() == 'previous'
     assert change_tair.get_sampling_method() == '_'
 
@@ -485,6 +485,8 @@ def test_models_rnn(file_path):
     m = RNNModel(epochs=1, rnn_layer='RNN', early_stopping_epochs=None)
     m.pipeline(td, save_model=False, plot=False)
 
+    FeatureConstruction.reset()
+
 
 def test_read_setup():
 
@@ -497,6 +499,7 @@ def test_read_setup():
         config_prep = json.load(f)
     a = PreprocessingData.from_config(config_prep)
     assert isinstance(a, PreprocessingSingleStep)
+    FeatureConstruction.reset()
 
     save_name_preprocessing = 'preprocessing_config2.json'
     path = os.path.join(Logger._logger, save_name_preprocessing)
@@ -504,12 +507,14 @@ def test_read_setup():
         config_prep = json.load(f)
     b = PreprocessingData.from_config(config_prep)
     assert isinstance(b, PreprocessingMultiStep)
+    FeatureConstruction.reset()
 
     save_name_constructed = Logger.save_name_constructed
     path = os.path.join(Logger._logger, save_name_constructed)
     with open(path, "r") as f:
         config_constructed = json.load(f)
     FeatureConstruction.from_config(config_constructed)
+    FeatureConstruction.reset()
 
     save_name_model = Logger.save_name_model_config
     path = os.path.join(Logger._logger, save_name_model)
@@ -570,5 +575,6 @@ def test_feature_selection_multi(monkeypatch, tair_data_delta, tair_data_noval ,
     m = ClassicalANNModel(epochs=1, n_neurons=4)
     recursive_feature_elimination_pipeline(file_path, prep2, m, use_multi_step_error=False)
     m.pipeline(td2, save_model=False, plot=False)
-    Logger.log_setup(prep, None)
+    Logger.log_setup(prep, None, save_name_preprocessing='preprocessing_feature-selection-multi.json',
+                     save_name_constructed='constructed_config_feature-selection-multi.json')
     Logger.save_training_data(td, path=os.path.join(Logger._logger, 'training_data2.json'))

From 79db1dc81ab9af4cb47aa31e49d5aaa279df0a74 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Thu, 18 Dec 2025 00:58:02 +0100
Subject: [PATCH 37/42] fixed testing bug resetting
 FeatureConstruction.features also affected p_hp_data

---
 unittests/test_coverage.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index eab2736..5980116 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -529,12 +529,13 @@ def test_read_setup():
     AbstractModel.model_from_config(config_model)
 
 
-def test_feature_selection(monkeypatch, p_hp_data, file_path):
+def test_feature_selection(monkeypatch, inputs_php, output_php, file_path):
     # Setup up logger for saving
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
     monkeypatch.setattr('builtins.input', lambda _: "2")
 
-    prep = p_hp_data[0]
+    # Create Training data
+    prep = PreprocessingSingleStep(inputs_php, output_php)
 
     m = LinearRegressionModel()
 

From c2f002a9af94e61f256c37b65ae572fbdf52f588 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Thu, 18 Dec 2025 17:33:43 +0100
Subject: [PATCH 38/42] Refactoring of sampling, corrected use of UserWarnings

---
 physXAI/preprocessing/constructed.py   |  36 ++--
 physXAI/preprocessing/preprocessing.py | 166 ++++---------------
 physXAI/preprocessing/sampling.py      | 218 +++++++++++++++++++++++++
 unittests/test_coverage.py             |   9 +-
 4 files changed, 268 insertions(+), 161 deletions(-)
 create mode 100644 physXAI/preprocessing/sampling.py

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 69c1ed5..08e9616 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -2,25 +2,8 @@
 from typing import Type, Union
 import numpy as np
 from pandas import DataFrame, Series
-
-
-def _return_valid_sampling_method(v: Union[int, str]):
-    """ check the validity of the given sampling method and return a string if val is int """
-
-    if not isinstance(v, (int, str)):
-        raise TypeError(f'Type of sampling method not supported. Type is {type(v)}, must be int or str.')
-
-    if v in ['current', 0]:
-        return 'current'
-    elif v in ['previous', 1]:
-        return 'previous'
-    elif v in ['mean_over_interval', '_']:
-        return v
-    else:
-        raise ValueError(
-            f"Value of sampling method not supported, value is: {v}. Sampling method must be 'current' "
-            f"(or 0 if sampling_method is int), 'previous' (or 1 if sampling_method is int) or 'mean_over_interval'. "
-            f"In case of deactivated sampling (for outputs), sampling_method must be '_'.")
+import warnings
+from physXAI.preprocessing.sampling import _return_valid_sampling_method
 
 
 class FeatureBase(ABC):
@@ -234,7 +217,7 @@ def set_default_sampling_method(cls, val: Union[str, int]):
         Feature._default_sampling_method = _return_valid_sampling_method(val)
 
 
-def get_sampling_from_base(base_features: Union[FeatureBase, list[FeatureBase]], **kwargs) -> [str, list]:
+def get_sampling_from_base_feature(base_features: Union[FeatureBase, list[FeatureBase]], **kwargs) -> [str, list]:
     """
     Returns the appropriate sampling_method for a constructed feature based on its base feature(s)
 
@@ -308,7 +291,7 @@ def __init__(self, f: Union[FeatureBase, str], lag: int, name: str = None, **kwa
                 name = f + f'_lag{lag}'
 
         # lags must have the same sampling_method as their base feature
-        sampling_method, kwargs = get_sampling_from_base(FeatureConstruction.get_feature(self.origf), **kwargs)
+        sampling_method, kwargs = get_sampling_from_base_feature(FeatureConstruction.get_feature(self.origf), **kwargs)
 
         super().__init__(name, sampling_method=sampling_method, **kwargs)
         self.lag: int = lag
@@ -355,7 +338,7 @@ def __init__(self, feature1: Union[FeatureBase, int, float], feature2: Union[Fea
             name = self.name(f1n, f2n)
 
         # constructed features must have the same sampling_method as their base features
-        sampling_method, kwargs = get_sampling_from_base([feature1, feature2], **kwargs)
+        sampling_method, kwargs = get_sampling_from_base_feature([feature1, feature2], **kwargs)
         super().__init__(name, sampling_method=sampling_method, **kwargs)
         self.feature1 = feature1
         self.feature2 = feature2
@@ -535,7 +518,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         if name is None:
             name = 'exp(' + f1.feature + ')'
         # constructed features must have the same sampling_method as their base features
-        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        sampling_method, kwargs = get_sampling_from_base_feature(f1, **kwargs)
         super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
@@ -567,7 +550,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         if name is None:
             name = 'sin(' + f1.feature + ')'
         # constructed features must have the same sampling_method as their base features
-        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        sampling_method, kwargs = get_sampling_from_base_feature(f1, **kwargs)
         super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
@@ -599,7 +582,7 @@ def __init__(self, f1: FeatureBase, name: str = None, **kwargs):
         if name is None:
             name = 'cos(' + f1.feature + ')'
         # constructed features must have the same sampling_method as their base features
-        sampling_method, kwargs = get_sampling_from_base(f1, **kwargs)
+        sampling_method, kwargs = get_sampling_from_base_feature(f1, **kwargs)
         super().__init__(name, sampling_method=sampling_method, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
@@ -631,7 +614,8 @@ class FeatureConstant(FeatureBase):
     def __init__(self, c: float, name: str, **kwargs):
         self.c = c
         if 'sampling_method' in kwargs.keys():
-            UserWarning(f"Using 'sampling_method' for {self.__class__} does not have any effect.")
+            warnings.warn(f"Using 'sampling_method' for {self.__class__.__name__} does not have any effect.",
+                          UserWarning)
         super().__init__(name, **kwargs)
 
     def process(self, df: DataFrame) -> Series:
diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 592bb0f..958ea0c 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -1,12 +1,13 @@
 import os
 from abc import ABC, abstractmethod
-from typing import Optional, Union, Iterable
+from typing import Optional, Union
 import numpy as np
 import pandas as pd
-import itertools
+import warnings
 from sklearn.model_selection import train_test_split
 from physXAI.preprocessing.constructed import FeatureConstruction, FeatureBase, Feature, FeatureTwo
 from physXAI.preprocessing.training_data import TrainingData, TrainingDataMultiStep, TrainingDataGeneric
+from physXAI.preprocessing.sampling import Sampling
 from physXAI.utils.logging import get_full_path
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 import keras
@@ -94,11 +95,6 @@ def load_data(self, file_path: str) -> pd.DataFrame:
 
         return df
 
-    def sample_df_according_to_timestep(self, df: pd.DataFrame):
-        filtering = (df.index - df.index[0]) % self.time_step == 0
-        df = df[filtering]
-        return df
-
     @abstractmethod
     def pipeline(self, file_path: str) -> TrainingDataGeneric:
         """
@@ -165,16 +161,14 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
         """
 
         if 'shift' in kwargs.keys():
-            DeprecationWarning(
-                "shift parameter is deprecated for SingleStep models and replaced by sampling_method, an attribute of "
-                "each Feature. This allows specifying individual 'shifts' for each Feature / input. A default sampling"
-                "method can be specified via Feature.set_default_sampling_method(<your default sampling>)."
-            )
-            DeprecationWarning(
-                f"shift parameter was given as shift={kwargs['shift']}. Setting Feature.set_default_sampling_method"
-                f"(shift) and overriding possible individual sampling methods of all Features. If this is"
-                f"not intended, remove shift parameter when initializing PreprocessingSingleStep object!"
-            )
+            warnings.warn("shift parameter is deprecated for SingleStep models and replaced by sampling_method,"
+                          "an attribute of each Feature. This allows specifying individual 'shifts' for each Feature / "
+                          "input. A default sampling method can be specified via "
+                          "Feature.set_default_sampling_method(<your default sampling>).", DeprecationWarning)
+            warnings.warn(f"shift parameter was given as shift={kwargs['shift']}. Setting"
+                          f"Feature.set_default_sampling_method(shift) and overriding possible individual sampling "
+                          f"methods of all Features. If this is not intended, remove shift parameter when initializing"
+                          f" PreprocessingSingleStep object!", DeprecationWarning)
             Feature.set_default_sampling_method(kwargs['shift'])
             for f in FeatureConstruction.features:
                 f.set_sampling_method(kwargs['shift'])
@@ -185,10 +179,11 @@ def __init__(self, inputs: list[Union[str, FeatureBase]], output: Union[str, lis
     def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         """
                 Processes the loaded DataFrame:
-                1. Applies feature constructions defined in `FeatureConstruction`.
-                2. Selects relevant input and output columns.
-                3. Handles missing values by dropping rows.
-                4. Applies the defined sampling method on each input variable.
+
+                1. Selects relevant input and output columns.
+                2. Handles missing values by dropping rows.
+                3. Applies the defined sampling method on each (unconstructed) input variable.
+                4. Applies feature constructions defined in `FeatureConstruction`.
 
                 Args:
                     df (pd.DataFrame): The input DataFrame.
@@ -199,15 +194,13 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         """
 
         # extract the names of all constructed features
-        constructed_input_features = FeatureConstruction.get_constructed_features(self.inputs)
-        constructed_output_features = FeatureConstruction.get_constructed_features(self.output)
+        constructed_inputs = FeatureConstruction.get_constructed_features(self.inputs)
+        constructed_outputs = FeatureConstruction.get_constructed_features(self.output)
 
         # Only apply sampling method to those features which are not constructed features
-        # but which data is taken directly from the data frame
-        inputs_without_constructed = [inp for inp in self.inputs if inp not in constructed_input_features]
-        output_without_constructed = [out for out in self.output if out not in constructed_output_features]
-
-        features_without_constructed: list[FeatureBase] = [FeatureConstruction.get_feature(inp) for inp in inputs_without_constructed]
+        # but whose data is taken directly from the data frame
+        inputs_without_constructed = [inp for inp in self.inputs if inp not in constructed_inputs]
+        output_without_constructed = [out for out in self.output if out not in constructed_outputs]
 
         df = df[inputs_without_constructed + output_without_constructed]
 
@@ -217,95 +210,25 @@ def process_data(self, df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         last_valid_index = non_nan_rows.iloc[::-1].idxmax() if non_nan_rows.any() else None
         df = df.loc[first_valid_index:last_valid_index]
 
-        # sample input data; different inputs can have different sampling methods
-        res = []
-        previous_or_mean_in_sampling_methods = []
-        X = df[inputs_without_constructed].copy()
-        target_grid = self.sample_df_according_to_timestep(df).index
-        for f in features_without_constructed:
-            # only process inputs with sampling method mean_over_interval first since X cannot be sampled
-            # to the actual required time steps until the intermediate values were taken into the mean
-            if f.get_sampling_method() == 'mean_over_interval':
-                res.append(get_mean_over_interval(X[[f.feature]], target_grid))
-                previous_or_mean_in_sampling_methods.append(True)
-
-        # sample X according to required time step
-        X = self.sample_df_according_to_timestep(X)
-        # process inputs with sampling methods 'current' and 'previous'
-        for f in features_without_constructed:
-            _x = X[[f.feature]]
-            if f.get_sampling_method() == 'current':
-                # no transformation needed
-                res.append(_x)
-                previous_or_mean_in_sampling_methods.append(False)
-            elif f.get_sampling_method() == 'previous':
-                # shift by 1
-                _x = _x.shift(1)
-                _x = _x.iloc[1:]
-                res.append(_x)
-                previous_or_mean_in_sampling_methods.append(True)
-            elif f.get_sampling_method() == 'mean_over_interval':
-                continue
-            else:
-                raise NotImplementedError(f"Sampling method '{f.get_sampling_method()}' not implemented.")
-        # concatenate sampled input data
-        X = pd.concat(res, axis=1)
-        X = X.sort_index(ascending=True)
-
-        # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
-        if any(previous_or_mean_in_sampling_methods):
-            # if at least one of the features uses 'current' as sampling method, shorten X
-            if not all(previous_or_mean_in_sampling_methods):
-                X = X.iloc[1:]
-
-        if X.isnull().values.any():
-            if self.ignore_nan:
-                X.dropna(inplace=True)
-            else:
-                raise ValueError(
-                    "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended, set "
-                    "ignore_nan=True in PreprocessingSingleStep.")
+        sampler = Sampling(inputs_without_constructed, output_without_constructed, self.time_step, self.ignore_nan)
+        # sample input data
+        X = sampler.sample_unconstructed_inputs(df)
 
         # sample output data
         if len(output_without_constructed) != 0:  # at least one non-constructed output feature
-            y = df[output_without_constructed].copy()
-            y = self.sample_df_according_to_timestep(y)
-            # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
-            # synchronize length of X and y
-            if any(previous_or_mean_in_sampling_methods):
-                y = y.iloc[1:]
-            if y.isnull().values.any():
-                if self.ignore_nan:
-                    y.dropna(inplace=True)
-                else:
-                    raise ValueError(
-                        "Data Error: The TrainingData contains NaN values in intermediate rows. If this is intended,"
-                        "set ignore_nan=True in PreprocessingSingleStep.")
-
+            y = sampler.sample_unconstructed_outputs(df)
             res_df = pd.concat([X, y], axis=1)
-
         else:  # only constructed outputs
             res_df = X
 
         # Applies feature constructions defined in `FeatureConstruction`
-        FeatureConstruction.process(res_df, feature_names=constructed_input_features + constructed_output_features)
-
-        # assume constructed outputs solely base on features with sampling current or sampling previous / mean_over_interval
-
-        if any(previous_or_mean_in_sampling_methods):
-            methods = ['previous', 'mean_over_interval']
-            # if constructed output features are based on input features with sampling previous or mean_over_interval,
-            # the constructed output has to be shifted to invert the shift of the input features
-            for out in constructed_output_features:
-                out_feature = FeatureConstruction.get_feature(out)
-                if isinstance(out_feature, FeatureTwo):
-                    if out_feature.feature1.get_sampling_method() in methods or out_feature.feature2.get_sampling_method() in methods:
-                        res_df[out_feature.feature] = res_df[out_feature.feature].shift(-1)  # shift
-                else: # constructed feature that doesn't consist of two features
-                    if out_feature.f1.get_sampling_method() in methods:
-                        res_df[out_feature.feature] = res_df[out_feature.feature].shift(-1)  # shift
-
-        # drop NaNs occurring due to creation of lags
+        FeatureConstruction.process(res_df, feature_names=constructed_inputs + constructed_outputs)
+
+        if len(constructed_outputs) != 0:
+            # correct shifting of constructed outputs if any
+            res_df = sampler.sample_constructed_outputs(res_df, constructed_outputs)
+
+        # drop NaNs occurring due to creation of lags (constructed feature)
         res_df.dropna(inplace=True)
 
         X = res_df[self.inputs]
@@ -379,28 +302,6 @@ def from_config(cls, config: dict) -> 'PreprocessingSingleStep':
         return cls(**config)
 
 
-def get_mean_over_interval(x: pd.DataFrame, target_grid: pd.DataFrame.index) -> pd.DataFrame:
-    """samples and returns x on target grid taking the mean over the interval (between the grid indices)"""
-    def pairwise(iterable: Iterable):
-        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
-        a, b = itertools.tee(iterable)
-        next(b, None)
-        return zip(a, b)
-
-    original_grid = np.array(x.index)
-    results = []
-    for i, j in pairwise(target_grid):
-        slicer = np.logical_and(original_grid >= i, original_grid < j)
-        d = {'Index': j}
-        for inp in x.columns:
-            d[inp] = x[inp][slicer].mean()
-        results.append(d)
-
-    x = pd.DataFrame(results).set_index('Index')
-
-    return x
-
-
 class PreprocessingMultiStep(PreprocessingData):
     """
     Handles preprocessing for multi-step forecasting models, typically RNNs.
@@ -492,7 +393,8 @@ def process_data(self, df: pd.DataFrame) -> TrainingDataMultiStep:
         """
 
         # filter data
-        df = self.sample_df_according_to_timestep(df)
+        sampler = Sampling(unconstructed_inputs=[], unconstructed_outputs=[], time_step=self.time_step)
+        df = sampler.sample_df_according_to_timestep(df)
 
         # Applies feature constructions defined in `FeatureConstruction`.
         FeatureConstruction.process(df)
diff --git a/physXAI/preprocessing/sampling.py b/physXAI/preprocessing/sampling.py
new file mode 100644
index 0000000..9d34196
--- /dev/null
+++ b/physXAI/preprocessing/sampling.py
@@ -0,0 +1,218 @@
+from typing import Union, Iterable
+import pandas as pd
+import numpy as np
+import itertools
+
+
+def _return_valid_sampling_method(v: Union[int, str]):
+    """ check the validity of the given sampling method and return a string if value is int """
+
+    if not isinstance(v, (int, str)):
+        raise TypeError(f'Type of sampling method not supported. Type is {type(v)}, must be int or str.')
+
+    if v in ['current', 0]:
+        return 'current'
+    elif v in ['previous', 1]:
+        return 'previous'
+    elif v in ['mean_over_interval', '_']:
+        return v
+    else:
+        raise ValueError(
+            f"Value of sampling method not supported, value is: {v}. Sampling method must be 'current' "
+            f"(or 0 if sampling_method is int), 'previous' (or 1 if sampling_method is int) or 'mean_over_interval'. "
+            f"In case of deactivated sampling (for outputs), sampling_method must be '_'.")
+
+
+class Sampling:
+    def __init__(self, unconstructed_inputs: list[str], unconstructed_outputs: list[str], time_step: Union[int, float],
+                 ignore_nan: bool = False):
+        """
+        A class providing methods for sampling
+
+        Args:
+            unconstructed_inputs (list[str]): names of unconstructed (!) input features
+            unconstructed_outputs (list[str]): names of unconstructed (!) output features
+            time_step (Union[int, float]): sampling interval, multiple of sampling of data
+            ignore_nan: If True, intermediate rows with NaN values will be dropped.
+                        If False, an error is raised if NaNs are present in intermediate rows after processing.
+                        Default is False.
+        """
+        self.inputs = unconstructed_inputs
+        self.outputs = unconstructed_outputs
+        self.time_step = time_step
+        self.ignore_nan = ignore_nan
+
+    def sample_df_according_to_timestep(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        samples given data frame to the new grid defined by time_step
+
+        Args:
+            df: pandas DataFrame
+        Returns:
+            pd.DataFrame: DataFrame with the new sampling grid
+        """
+        filtering = (df.index - df.index[0]) % self.time_step == 0
+        df = df[filtering]
+        return df
+
+    def previous_or_mean_in_sampling_methods(self) -> list[bool]:
+        """
+        checks if any input uses the sampling methods 'previous' or 'mean_over_interval'
+
+        Returns:
+             list[bool]: list of bool stating if the sampling method of an input is prev./mean (True) or not (False)
+                         (list in the order of self.inputs)
+        """
+        # no import on module level possible due to circular import
+        from physXAI.preprocessing.preprocessing import FeatureConstruction
+
+        arr = []
+        for fn in self.inputs:
+            sm = FeatureConstruction.get_feature(fn).get_sampling_method()
+            arr.append(sm in ['previous', 'mean_over_interval'])
+        return arr
+
+    def sample_unconstructed_inputs(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        extracts the unconstructed inputs from the given DataFrame, applies their corresponding sampling method and
+        samples them to the target grid
+
+        Args:
+            df (pd.DataFrame): data
+        Returns:
+            pd.DataFrame: DataFrame (X) that solely contains all unconstructed inputs (with the correct sampling)
+        """
+
+        # no import on module level possible due to circular import
+        from physXAI.preprocessing.preprocessing import FeatureConstruction
+
+        # extract inputs from DataFrame and get target sampling grid
+        X = df[self.inputs].copy()
+        target_grid = self.sample_df_according_to_timestep(df).index
+
+        # different inputs can have different sampling methods
+        res = []
+        features_without_constructed = [FeatureConstruction.get_feature(inp) for inp in self.inputs]
+        for f in features_without_constructed:
+            # only process inputs with sampling method mean_over_interval first since X cannot be sampled
+            # to the actual required time steps until the intermediate values were taken into the mean
+            if f.get_sampling_method() == 'mean_over_interval':
+                res.append(get_mean_over_interval(X[[f.feature]], target_grid))
+
+        # sample X to target grid
+        X = self.sample_df_according_to_timestep(X)
+        # process inputs with sampling methods 'current' and 'previous'
+        for f in features_without_constructed:
+            _x = X[[f.feature]]
+            if f.get_sampling_method() == 'current':
+                # no transformation needed
+                res.append(_x)
+            elif f.get_sampling_method() == 'previous':
+                # shift by 1
+                _x = _x.shift(1)
+                _x = _x.iloc[1:]
+                res.append(_x)
+            elif f.get_sampling_method() == 'mean_over_interval':
+                continue
+            else:
+                raise NotImplementedError(f"Sampling method '{f.get_sampling_method()}' not implemented.")
+        # concatenate sampled input data
+        X = pd.concat(res, axis=1)
+        X = X.sort_index(ascending=True)
+
+        # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
+        previous_or_mean = self.previous_or_mean_in_sampling_methods()
+        if any(previous_or_mean):
+            # if at least one of the features uses 'current' as sampling method, shorten X
+            if not all(previous_or_mean):
+                X = X.iloc[1:]
+
+        # check for NaNs
+        if X.isnull().values.any():
+            if self.ignore_nan:
+                X.dropna(inplace=True)
+            else:
+                raise ValueError(
+                    "Data Error: The input data contains NaN values in intermediate rows. If this is intended, set "
+                    "ignore_nan=True in PreprocessingSingleStep.")
+        return X
+
+    def sample_unconstructed_outputs(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        extracts the unconstructed outputs from the given DataFrame and samples them to the target grid
+
+        Args:
+            df (pd.DataFrame): data
+        Returns:
+            pd.DataFrame: DataFrame (y) that solely contains all unconstructed outputs
+        """
+        y = df[self.outputs].copy()
+        y = self.sample_df_according_to_timestep(y)
+
+        # Sampling methods 'previous' and 'mean_over_interval' reduce available data points by 1.
+        # synchronize length of X and y
+        if any(self.previous_or_mean_in_sampling_methods()):
+            y = y.iloc[1:]
+
+        # check for NaNs
+        if y.isnull().values.any():
+            if self.ignore_nan:
+                y.dropna(inplace=True)
+            else:
+                raise ValueError(
+                    "Data Error: The output data contains NaN values in intermediate rows. If this is intended,"
+                    "set ignore_nan=True in PreprocessingSingleStep.")
+        return y
+
+    def sample_constructed_outputs(self, df: pd.DataFrame, constructed_outputs: list[str]) -> pd.DataFrame:
+        """
+        Correct shifting of constructed outputs if they are based on input features with sampling previous or mean_over_interval.
+        Since the inputs are shifted before the constructed features are created, the constructed output has to be
+        shifted to invert / neutralize the shift of the input features that was applied before.
+
+        Args:
+            df (pd.DataFrame): data including constructed features
+            constructed_outputs (list[str]): names of constructed output features
+        Returns:
+            pd.DataFrame: modified DataFrame (df)
+        """
+        # no import on module level possible due to circular import
+        from physXAI.preprocessing.preprocessing import FeatureConstruction, FeatureTwo
+
+        if any(self.previous_or_mean_in_sampling_methods()):
+            methods = ['previous', 'mean_over_interval']
+            for out in constructed_outputs:
+                out_feature = FeatureConstruction.get_feature(out)
+                if isinstance(out_feature, FeatureTwo):
+                    # correct shifting only if output bases on input features with before mentioned sampling methods
+                    if (out_feature.feature1.get_sampling_method() in methods or
+                            out_feature.feature2.get_sampling_method() in methods):
+                        df[out_feature.feature] = df[out_feature.feature].shift(-1)
+                else:  # constructed feature that doesn't consist of two features (FeatureExp, ...)
+                    # correct shifting only if output bases on input features with before mentioned sampling methods
+                    if out_feature.f1.get_sampling_method() in methods:
+                        df[out_feature.feature] = df[out_feature.feature].shift(-1)
+        return df
+
+
+def get_mean_over_interval(x: pd.DataFrame, target_grid: pd.DataFrame.index) -> pd.DataFrame:
+    """samples and returns x on target grid taking the mean over the interval (between the grid indices)"""
+
+    def pairwise(iterable: Iterable):
+        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+        a, b = itertools.tee(iterable)
+        next(b, None)
+        return zip(a, b)
+
+    original_grid = np.array(x.index)
+    results = []
+    for i, j in pairwise(target_grid):
+        slicer = np.logical_and(original_grid >= i, original_grid < j)
+        d = {'Index': j}
+        for inp in x.columns:
+            d[inp] = x[inp][slicer].mean()
+        results.append(d)
+
+    x = pd.DataFrame(results).set_index('Index')
+
+    return x
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 5980116..e3bd9b5 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -213,6 +213,8 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
     assert e.get_sampling_method() == 'previous'
     assert change_tair.get_sampling_method() == '_'
 
+    with pytest.warns(UserWarning):
+        c = FeatureConstant(c=100, name='test_const', sampling_method=1)
     FeatureConstruction.reset()
 
 
@@ -313,7 +315,8 @@ def test_deprecated_shift(p_hp_data, inputs_php, output_php, file_path):
     Logger.setup_logger(base_path=base_path, folder_name='unittests\\test_coverage', override=True)
 
     # Create & process Training data
-    prep = PreprocessingSingleStep(inputs_php, output_php, shift=0)  # deprecated shift given in preprocessing
+    with pytest.warns(DeprecationWarning):
+        prep = PreprocessingSingleStep(inputs_php, output_php, shift=0)  # deprecated shift given in preprocessing
     td = prep.pipeline(file_path)
 
     m = ClassicalANNModel(epochs=1, n_neurons=[4, 4], n_layers=2, activation_function=['softplus', 'softplus'],
@@ -344,8 +347,8 @@ def test_deprecated_shift(p_hp_data, inputs_php, output_php, file_path):
         "random_state": 42,
         "time_step": 1.0,
     }
-
-    a = PreprocessingData.from_config(config_prep)
+    with pytest.warns(DeprecationWarning):
+        a = PreprocessingData.from_config(config_prep)
     assert isinstance(a, PreprocessingSingleStep)
     assert Feature.get_default_sampling_method() == 'current'
 

From 3cbcdba2ecc3251219c191aff9ba6903388c215e Mon Sep 17 00:00:00 2001
From: Patrick Henkel <patrick.henkel@eonerc.rwth-aachen.de>
Date: Mon, 22 Dec 2025 17:25:26 +0100
Subject: [PATCH 39/42] Updated

---
 physXAI/preprocessing/constructed.py | 14 +++++++-------
 physXAI/preprocessing/sampling.py    |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index 08e9616..d883275 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -1,9 +1,9 @@
 from abc import ABC, abstractmethod
-from typing import Type, Union
+from typing import Optional, Type, Union
 import numpy as np
 from pandas import DataFrame, Series
 import warnings
-from physXAI.preprocessing.sampling import _return_valid_sampling_method
+from physXAI.preprocessing.sampling import return_valid_sampling_method
 
 
 class FeatureBase(ABC):
@@ -13,13 +13,13 @@ class FeatureBase(ABC):
     in a Pandas DataFrame. It supports arithmetic operations to combine features.
     """
 
-    def __init__(self, name: str, sampling_method: Union[str, int] = None, **kwargs):
+    def __init__(self, name: str, sampling_method: Optional[Union[str, int]] = None, **kwargs):
         """
         Initializes a FeatureBase instance.
 
         Args:
             name (str): The name of the feature. This will be the column name in the DataFrame.
-            sampling_method (Union[str, int]): Time step of the input data used to predict the output.
+            sampling_method (Optional[Union[str, int]]): Time step of the input data used to predict the output.
                 - if None: Feature._default_sampling_method is used
                 - if 'current' or 0: Current time step will be used for prediction.
                 - if 'previous' or 1: Previous time step will be used for prediction.
@@ -50,7 +50,7 @@ def set_sampling_method(self, val: Union[str, int] = None):
         if val is None:
             self._sampling_method = Feature.get_default_sampling_method()
         else:
-            self._sampling_method = _return_valid_sampling_method(val)
+            self._sampling_method = return_valid_sampling_method(val)
 
     def rename(self, name: str):
         """
@@ -214,7 +214,7 @@ def set_default_sampling_method(cls, val: Union[str, int]):
         - 'previous' or 1: Previous time step will be used for prediction.
         - 'mean_over_interval': Mean between current and previous time step will be used.
         """
-        Feature._default_sampling_method = _return_valid_sampling_method(val)
+        Feature._default_sampling_method = return_valid_sampling_method(val)
 
 
 def get_sampling_from_base_feature(base_features: Union[FeatureBase, list[FeatureBase]], **kwargs) -> [str, list]:
@@ -254,7 +254,7 @@ def get_sampling_from_base_feature(base_features: Union[FeatureBase, list[Featur
             # necessary for feature construction from config
             sampling_method = '_'
         else:
-            assert _return_valid_sampling_method(kwargs['sampling_method']) == sampling_method, (
+            assert return_valid_sampling_method(kwargs['sampling_method']) == sampling_method, (
                 f"Constructed features must have the same sampling method as their base feature(s). Sampling method of "
                 f"base feature(s) is {sampling_method} but {kwargs['sampling_method']} was given as sampling method."
             )
diff --git a/physXAI/preprocessing/sampling.py b/physXAI/preprocessing/sampling.py
index 9d34196..4426c42 100644
--- a/physXAI/preprocessing/sampling.py
+++ b/physXAI/preprocessing/sampling.py
@@ -4,7 +4,7 @@
 import itertools
 
 
-def _return_valid_sampling_method(v: Union[int, str]):
+def return_valid_sampling_method(v: Union[int, str]):
     """ check the validity of the given sampling method and return a string if value is int """
 
     if not isinstance(v, (int, str)):

From 1af0fb4aa1867c401e33ba19861f077901e490f8 Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 24 Dec 2025 13:08:33 +0100
Subject: [PATCH 40/42] Updated

---
 .../different_sampling_methods.py             |  2 +-
 physXAI/preprocessing/constructed.py          | 30 ++++++++++++++-----
 unittests/test_coverage.py                    | 10 +++++--
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index 4048da2..37205eb 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -53,7 +53,7 @@
 y = x1 + lx1[0]
 z = y + x1
 z.rename('example_feature_two')  # since z is a constructed feature based on x1, its sampling_method will be previous
-e = FeatureExp(x1-273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
+e = FeatureExp(x1-273.15, 'exp')  # reduce x1 by 273.15, otherwise values are too high
 inputs.extend([z, e])  # add dummy features to inputs
 
 # construct output
diff --git a/physXAI/preprocessing/constructed.py b/physXAI/preprocessing/constructed.py
index d883275..945eea9 100644
--- a/physXAI/preprocessing/constructed.py
+++ b/physXAI/preprocessing/constructed.py
@@ -219,7 +219,10 @@ def set_default_sampling_method(cls, val: Union[str, int]):
 
 def get_sampling_from_base_feature(base_features: Union[FeatureBase, list[FeatureBase]], **kwargs) -> [str, list]:
     """
-    Returns the appropriate sampling_method for a constructed feature based on its base feature(s)
+    Returns the appropriate sampling_method for a constructed feature based on its base feature(s). A constructed
+    feature must be built from features with sampling methods that apply the same time shift. Therefore, constructed
+    features can either base on features which have solely the sampling method 'current' (no time shift applied) or on
+    features which have one of the sampling methods ['previous','mean_over_interval'] (time shift of one unit applied).
 
     Args:
          base_features (Union[FeatureBase, list[FeatureBase]]): single base feature or list of max. two base features
@@ -244,20 +247,31 @@ def get_sampling_from_base_feature(base_features: Union[FeatureBase, list[Featur
         else:
             raise ValueError(f"Expected type [FeatureBase, int, float], got type {type(f)} instead")
 
-    if len(sampling) > 1:
-        assert len(set(sampling)) == 1, f'Sampling methods of base feature are not equal, got {sampling}'
+    sampling = list(set(sampling))
 
-    sampling_method = sampling[0]
+    if len(sampling) == 1:
+        sampling_method = sampling[0]
+    else:
+        if 'current' in sampling:  # 'current' together with other sampling methods
+            raise ValueError(f"Sampling method(s) of base feature(s) are not equal 'current', got sampling method(s): {sampling}")
+        else:  # 'previous' together with 'mean_over_interval'
+            sampling_method = 'mean_over_interval'
 
     if 'sampling_method' in kwargs.keys():
         if 'ignore_sampling_for_output' in kwargs.keys() and kwargs['ignore_sampling_for_output']:
             # necessary for feature construction from config
             sampling_method = '_'
         else:
-            assert return_valid_sampling_method(kwargs['sampling_method']) == sampling_method, (
-                f"Constructed features must have the same sampling method as their base feature(s). Sampling method of "
-                f"base feature(s) is {sampling_method} but {kwargs['sampling_method']} was given as sampling method."
-            )
+            message = (f"Constructed features must be built from features with sampling methods that apply the same "
+                       f"time shift. Therefore, constructed features can either base on features which have solely the "
+                       f"sampling method 'current' (no time shift applied) or on features which have one of the sampling"
+                       f" methods ['previous','mean_over_interval'] (time shift of one unit applied).\n"
+                       f"Sampling method of base feature(s) is '{sampling_method}' but in kwargs "
+                       f"'{return_valid_sampling_method(kwargs['sampling_method'])}' was given as sampling method.")
+            if sampling_method == 'current':
+                assert return_valid_sampling_method(kwargs['sampling_method']) == sampling_method, message
+            else:
+                assert return_valid_sampling_method(kwargs['sampling_method']) in ['previous', 'mean_over_interval'], message
         kwargs.__delitem__('sampling_method')  # constructor must not get more than one arg with the same key
 
     return sampling_method, kwargs
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index e3bd9b5..7d409fd 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -127,9 +127,15 @@ def test_unsupported_type(self):
     # test case: lags of the same input have mismatching sampling methods
     def test_lag_with_mismatching_sampling_methods(self):
 
-        x = Feature('test', sampling_method='current')
+        # allowed
+        x = Feature('test_correct', sampling_method='mean_over_interval')
+        x2 = FeatureLag(x, lag=2, sampling_method='mean_over_interval')
+        e = FeatureExp(x, sampling_method='previous')
+
+        # not allowed
+        y = Feature('test_fault', sampling_method='current')
         with self.assertRaises(AssertionError):
-            FeatureLag(x, lag=1, sampling_method='previous')
+            FeatureLag(y, lag=1, sampling_method='previous')
         FeatureConstruction.reset()
 
 

From bcef26ed33490804c502934b664e4b51d721578a Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Wed, 24 Dec 2025 13:30:50 +0100
Subject: [PATCH 41/42] Updated

---
 .../different_sampling_methods.py                | 16 +++++++++++++---
 unittests/test_coverage.py                       | 15 +++++++++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
index 37205eb..77816d2 100644
--- a/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
+++ b/executables/bestest_hydronic_heat_pump/different_sampling_methods.py
@@ -7,7 +7,7 @@
 This script demonstrates the usage of different sampling methods. It is not physically meaningful.
 
 When creating a Feature, a sampling method can be specified.
-For constructed features, no sampling method is necessary. It is assigned based on their corresponding base feature(s)
+For constructed features, no sampling method is necessary. It is assigned based on their corresponding base feature(s).
 
 sampling_method (Union[str, int]): Time step of the input data used to predict the output.
     - if None: Feature.get_default_sampling_method() is used
@@ -17,7 +17,12 @@
     
     Specify default sampling method using Feature.set_default_sampling_method(<your default sampling>).
     If no default sampling method is specified by the user, 'previous' is used as default.
+
+Constructed features must be built from features with sampling methods that apply the same time shift. Therefore, 
+constructed features can either base on features which have solely the sampling method 'current' (no time shift applied)
+or on features which have one of the sampling methods ['previous','mean_over_interval'] (time shift of one unit applied).
 """
+
 Feature.set_default_sampling_method(0)
 
 # Setup up logger for saving
@@ -46,7 +51,7 @@
 lx2 = x2.lag(1)
 
 # create lag of oveHeaPumY_u: oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
-x3 = Feature('oveHeaPumY_u')
+x3 = Feature('oveHeaPumY_u', sampling_method='mean_over_interval')
 x3.lag(2)
 
 # dummy Features
@@ -54,7 +59,12 @@
 z = y + x1
 z.rename('example_feature_two')  # since z is a constructed feature based on x1, its sampling_method will be previous
 e = FeatureExp(x1-273.15, 'exp')  # reduce x1 by 273.15, otherwise values are too high
-inputs.extend([z, e])  # add dummy features to inputs
+
+# x1 and x3 have sampling methods 'previous' and 'mean_over_interval'.
+# Since both of them apply a time shift of one, they can be combined in constructed features
+a = x1 + x3
+
+inputs.extend([z, e, a])  # add dummy features to inputs
 
 # construct output
 change_tair = x1 - lx1[0]
diff --git a/unittests/test_coverage.py b/unittests/test_coverage.py
index 7d409fd..de712b0 100644
--- a/unittests/test_coverage.py
+++ b/unittests/test_coverage.py
@@ -132,6 +132,9 @@ def test_lag_with_mismatching_sampling_methods(self):
         x2 = FeatureLag(x, lag=2, sampling_method='mean_over_interval')
         e = FeatureExp(x, sampling_method='previous')
 
+        with pytest.warns(UserWarning):
+            c = FeatureConstant(c=100, name='test_const', sampling_method=1)
+
         # not allowed
         y = Feature('test_fault', sampling_method='current')
         with self.assertRaises(AssertionError):
@@ -188,7 +191,7 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
     lx1 = x1.lag(2)  # reaTZon_y_lag1, reaTZon_y_lag2
     x2 = Feature('weaSta_reaWeaTDryBul_y')
     lx2 = x2.lag(1)  # weaSta_reaWeaTDryBul_y_lag1
-    x3 = Feature('oveHeaPumY_u')
+    x3 = Feature('oveHeaPumY_u', sampling_method='mean_over_interval')
     x3.lag(2)  # oveHeaPumY_u_lag1, oveHeaPumY_u_lag2
 
     # dummy Features
@@ -197,7 +200,12 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
     z.rename('test_feature_two')
     e = FeatureExp(x1 - 273.15, 'exp', sampling_method=1)  # reduce x1 by 273.15, otherwise values are too high
 
-    inputs_tair_extended.extend([z, e])
+    # x1 and x3 have sampling methods 'previous' and 'mean_over_interval'.
+    # Since both of them apply a time shift of one, they can be combined in constructed features
+    a = x1 + x3
+    a.rename('test_add')
+
+    inputs_tair_extended.extend([z, e, a])
 
     # output
     change_tair = x1 - lx1[0]
@@ -216,11 +224,10 @@ def test_different_sampling_methods(file_path, inputs_tair_extended):
     assert x2.get_sampling_method() == 'current' and lx2.get_sampling_method() == 'current'
     assert FeatureConstruction.get_feature('weaSta_reaWeaHDirNor_y').get_sampling_method() == 'mean_over_interval'
     assert FeatureConstruction.get_feature('test_feature_two').get_sampling_method() == 'previous'
+    assert FeatureConstruction.get_feature('test_add').get_sampling_method() == 'mean_over_interval'
     assert e.get_sampling_method() == 'previous'
     assert change_tair.get_sampling_method() == '_'
 
-    with pytest.warns(UserWarning):
-        c = FeatureConstant(c=100, name='test_const', sampling_method=1)
     FeatureConstruction.reset()
 
 

From 095516eff9d580d0d2d13a9fd4ca0527362e286a Mon Sep 17 00:00:00 2001
From: "ross.simon" <simon.ross@eonerc.rwth-aachen.de>
Date: Fri, 2 Jan 2026 17:45:40 +0100
Subject: [PATCH 42/42] corrected usage of input list

---
 physXAI/preprocessing/preprocessing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/physXAI/preprocessing/preprocessing.py b/physXAI/preprocessing/preprocessing.py
index 958ea0c..7e5d613 100644
--- a/physXAI/preprocessing/preprocessing.py
+++ b/physXAI/preprocessing/preprocessing.py
@@ -355,11 +355,11 @@ def __init__(self, inputs: list[str], output: Union[str, list[str]], label_width
         keras.utils.set_random_seed(random_state)
 
         # Determine necessary parameters for window creation
-        self.features: list[str] = (inputs + self.output +
-                                    [f for f in self.init_features if f not in inputs and f not in self.output])
+        self.features: list[str] = (self.inputs + self.output +
+                                    [f for f in self.init_features if f not in self.inputs and f not in self.output])
         self.column_indices: dict[str, int] = {name: i for i, name in enumerate(self.features)}
-        self.warmup_columns_input: list[str] = list(set(self.init_features) & set(inputs))
-        self.warmup_columns_labels: list[str] = list(set(self.init_features) - set(inputs))
+        self.warmup_columns_input: list[str] = list(set(self.init_features) & set(self.inputs))
+        self.warmup_columns_labels: list[str] = list(set(self.init_features) - set(self.inputs))
 
         self.label_width: int = label_width
         self.warmup_width: int = warmup_width