diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py
index 5a77dfa8..f1387c8e 100644
--- a/alphastats/DataSet.py
+++ b/alphastats/DataSet.py
@@ -52,6 +52,11 @@ def __init__(
             metadata_path (str, optional): path to metadata file. Defaults to None.
             sample_column (str, optional): column in metadata file indicating the sample IDs. Defaults to None.
 
+        Attributes of a DataSet instance:
+            DataSet().rawinput: Raw Protein data.
+            DataSet().mat:      Processed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.
+            DataSet().metadata: Metadata for the samples in the matrix. Metadata will be matched with DataSet().mat when needed (for instance Volcano Plot).
+
         """
         self._check_loader(loader=loader)
 
@@ -64,14 +69,14 @@ def __init__(
         self.gene_names: str = loader.gene_names
 
         # include filtering before
-        self.create_matrix()
+        self._create_matrix()
         self._check_matrix_values()
 
         self.metadata: pd.DataFrame
         self.sample: str
         if metadata_path is not None:
             self.sample = sample_column
-            self.metadata = self.load_metadata(file_path=metadata_path)
+            self.metadata = self._load_metadata(file_path=metadata_path)
             self._remove_misc_samples_in_metadata()
         else:
             self.sample = "sample"
@@ -83,12 +88,18 @@ def __init__(
             )
             self.intensity_column = intensity_column
 
-        # save preprocessing settings
-        self.preprocessing_info: Dict = self._save_dataset_info()
+        # init preprocessing settings
+        self.preprocessing_info: Dict = Preprocess.init_preprocessing_info(
+            num_samples=self.mat.shape[0],
+            num_protein_groups=self.mat.shape[1],
+            intensity_column=self.intensity_column,
+            filter_columns=self.filter_columns,
+        )
+
+        self.preprocessed = False
         self.preprocessed: bool = False
 
         print("DataSet has been created.")
-        self.overview()
 
     def preprocess(
         self,
@@ -126,8 +137,16 @@ def preprocess(
 
     def reset_preprocessing(self):
         """Reset all preprocessing steps"""
-        self.create_matrix()
-        # TODO fix bug: metadata is not reset here
+        self._create_matrix()
+        self.preprocessing_info = Preprocess.init_preprocessing_info(
+            num_samples=self.mat.shape[0],
+            num_protein_groups=self.mat.shape[1],
+            intensity_column=self.intensity_column,
+            filter_columns=self.filter_columns,
+        )
+
+        self.preprocessed = False
+        # TODO fix bug: metadata is not reset/reloaded here
         print("All preprocessing steps are reset.")
 
     def batch_correction(self, batch: str) -> None:
@@ -192,7 +211,7 @@ def _subset(self):
         )
         return self.mat[self.mat.index.isin(self.metadata[self.sample].tolist())]
 
-    def create_matrix(self):
+    def _create_matrix(self):
         """
         Creates a matrix of the Outputfile, with columns displaying features (Proteins) and
         rows the samples.
@@ -216,17 +235,15 @@ def create_matrix(self):
         # transpose dataframe
         mat = df.transpose()
         mat.replace([np.inf, -np.inf], np.nan, inplace=True)
+        self.rawmat = mat
 
         # remove proteins with only zero  # TODO this is re-done in preprocessing
-        self.mat = mat.loc[:, (mat != 0).any(axis=0)]
-        self.mat = self.mat.astype(float)
-
-        # reset preproccessing info
-        self.preprocessing_info = self._save_dataset_info()
-        self.preprocessed = False
-        self.rawmat = mat
+        mat_no_zeros = mat.loc[:, (mat != 0).any(axis=0)]
+        self.mat = mat_no_zeros.astype(float)
 
-    def load_metadata(self, file_path: Union[pd.DataFrame, str]) -> pd.DataFrame:
+    def _load_metadata(
+        self, file_path: Union[pd.DataFrame, str]
+    ) -> Optional[pd.DataFrame]:
         """Load metadata either xlsx, txt, csv or txt file
 
         Args:
@@ -249,11 +266,11 @@ def load_metadata(self, file_path: Union[pd.DataFrame, str]) -> pd.DataFrame:
         elif file_path.endswith(".csv"):
             df = pd.read_csv(file_path)
         else:
-            df = None
             logging.warning(
                 "WARNING: Metadata could not be read. \nMetadata has to be a .xslx, .tsv, .csv or .txt file"
             )
-            return
+            return None
+
         if df is not None and self.sample not in df.columns:
             logging.error(f"sample_column: {self.sample} not found in {file_path}")
 
@@ -261,32 +278,3 @@ def load_metadata(self, file_path: Union[pd.DataFrame, str]) -> pd.DataFrame:
         #  warnings.warn("WARNING: Sample names do not match sample labelling in protein data")
         df.columns = df.columns.astype(str)
         return df
-
-    def _save_dataset_info(self):
-        n_proteingroups = self.mat.shape[1]
-        preprocessing_dict = {
-            PreprocessingStateKeys.RAW_DATA_NUM_PG: n_proteingroups,
-            PreprocessingStateKeys.NUM_PG: self.mat.shape[1],
-            PreprocessingStateKeys.NUM_SAMPLES: self.mat.shape[0],
-            PreprocessingStateKeys.INTENSITY_COLUMN: self.intensity_column,
-            PreprocessingStateKeys.LOG2_TRANSFORMED: False,
-            PreprocessingStateKeys.NORMALIZATION: None,
-            PreprocessingStateKeys.IMPUTATION: None,
-            PreprocessingStateKeys.CONTAMINATIONS_REMOVED: False,
-            PreprocessingStateKeys.CONTAMINATION_COLUMNS: self.filter_columns,
-            PreprocessingStateKeys.NUM_REMOVED_PG_DUE_TO_CONTAMINATION: 0,
-            PreprocessingStateKeys.DATA_COMPLETENESS_CUTOFF: 0,
-            PreprocessingStateKeys.NUM_PG_REMOVED_DUE_TO_DATA_COMPLETENESS_CUTOFF: 0,
-            PreprocessingStateKeys.MISSING_VALUES_REMOVED: False,
-        }
-        return preprocessing_dict
-
-    def overview(self):
-        """Print overview of the DataSet"""
-        dataset_overview = (
-            "Attributes of the DataSet can be accessed using: \n"
-            + "DataSet.rawinput:\t Raw Protein data.\n"
-            + "DataSet.mat:\t\tProcessed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.\n"
-            + "DataSet.metadata:\tMetadata for the samples in the matrix. Metadata will be matched with DataSet.mat when needed (for instance Volcano Plot)."
-        )
-        print(dataset_overview)
diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py
index 107b1745..f1e21ddd 100644
--- a/alphastats/DataSet_Preprocess.py
+++ b/alphastats/DataSet_Preprocess.py
@@ -59,6 +59,30 @@ def __init__(
         self.preprocessing_info = preprocessing_info  # changed
         self.mat = mat  # changed
 
+    @staticmethod
+    def init_preprocessing_info(
+        num_samples: int,
+        num_protein_groups: int,
+        intensity_column: str,
+        filter_columns: List[str],
+    ) -> Dict:
+        """Initialize preprocessing info."""
+        return {
+            PreprocessingStateKeys.RAW_DATA_NUM_PG: num_protein_groups,
+            PreprocessingStateKeys.NUM_PG: num_protein_groups,
+            PreprocessingStateKeys.NUM_SAMPLES: num_samples,
+            PreprocessingStateKeys.INTENSITY_COLUMN: intensity_column,
+            PreprocessingStateKeys.LOG2_TRANSFORMED: False,
+            PreprocessingStateKeys.NORMALIZATION: None,
+            PreprocessingStateKeys.IMPUTATION: None,
+            PreprocessingStateKeys.CONTAMINATIONS_REMOVED: False,
+            PreprocessingStateKeys.CONTAMINATION_COLUMNS: filter_columns,
+            PreprocessingStateKeys.NUM_REMOVED_PG_DUE_TO_CONTAMINATION: 0,
+            PreprocessingStateKeys.DATA_COMPLETENESS_CUTOFF: 0,
+            PreprocessingStateKeys.NUM_PG_REMOVED_DUE_TO_DATA_COMPLETENESS_CUTOFF: 0,
+            PreprocessingStateKeys.MISSING_VALUES_REMOVED: False,
+        }
+
     def _remove_samples(self, sample_list: list):
         # exclude samples for analysis
         self.mat = self.mat.drop(sample_list)
diff --git a/alphastats/gui/utils/preprocessing_helper.py b/alphastats/gui/utils/preprocessing_helper.py
index da9e53d7..69e621ad 100644
--- a/alphastats/gui/utils/preprocessing_helper.py
+++ b/alphastats/gui/utils/preprocessing_helper.py
@@ -285,7 +285,6 @@ def display_preprocessing_info(preprocessing_info):
     )
 
 
-# TODO: cache this
 def reset_preprocessing(dataset: DataSet) -> None:
     """Reset the preprocessing of the dataset.
 
@@ -296,5 +295,5 @@ def reset_preprocessing(dataset: DataSet) -> None:
         None
     """
 
-    dataset.create_matrix()
+    dataset.reset_preprocessing()
     st.info("Preprocessing has been reset.")
diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py
index 127ae94d..5a75a07d 100644
--- a/tests/test_DataSet.py
+++ b/tests/test_DataSet.py
@@ -80,14 +80,14 @@ def test_load_metadata_missing_sample_column(self, mock):
             # is error raised when name of sample column is missing
             path = self.metadata_path
             self.obj.sample = "wrong_sample_column"
-            self.obj.load_metadata(file_path=path)
+            self.obj._load_metadata(file_path=path)
             mock.assert_called_once()
 
         @patch("logging.Logger.warning")
         def test_load_metadata_warning(self, mock):
             # is dataframe None and is warning produced
             file_path = "wrong/file.xxx"
-            self.obj.load_metadata(file_path=file_path)
+            self.obj._load_metadata(file_path=file_path)
             mock.assert_called_once()
 
         def test_create_matrix(self):
@@ -221,15 +221,15 @@ def test_dataset_without_metadata(self):
     def test_load_metadata_fileformats(self):
         # test if different fileformats get loaded correctly
         metadata_path = "testfiles/alphapept/metadata.txt"
-        self.obj.load_metadata(file_path=metadata_path)
+        self.obj._load_metadata(file_path=metadata_path)
         self.assertEqual(self.obj.metadata.shape, (2, 2))
 
         metadata_path = "testfiles/alphapept/metadata.tsv"
-        self.obj.load_metadata(file_path=metadata_path)
+        self.obj._load_metadata(file_path=metadata_path)
         self.assertEqual(self.obj.metadata.shape, (2, 2))
 
         metadata_path = "testfiles/alphapept/metadata.csv"
-        self.obj.load_metadata(file_path=metadata_path)
+        self.obj._load_metadata(file_path=metadata_path)
         self.assertEqual(self.obj.metadata.shape, (2, 2))
 
     @patch("logging.Logger.warning")