Merge pull request #154 from molgenis/fix/predict_output_non_archive

Fixed a bug that would force the predict output file to always be gzipped
molgenis · Dec 15, 2022 · a91aebe · a91aebe
2 parents 541df37 + 2590fe5
commit a91aebe
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 7 deletions.
diff --git a/src/molgenis/capice/core/capice_exporter.py b/src/molgenis/capice/core/capice_exporter.py
@@ -40,7 +40,7 @@ def export_capice_prediction(self, datafile: pd.DataFrame):
         export_path = os.path.join(self.file_path, self.capice_filename)
         datafile = self._post_process_split_cols(datafile)
         datafile = self._post_process_set_correct_dtypes(datafile)
-        datafile[self.export_cols].to_csv(export_path, sep='\t', compression='gzip', index=False)
+        datafile[self.export_cols].to_csv(export_path, sep='\t', index=False)
         if not self.output_given:
             print('Successfully exported CAPICE datafile to: %s', export_path)
 

diff --git a/tests/capice/core/test_capice_exporter.py b/tests/capice/core/test_capice_exporter.py
@@ -73,7 +73,7 @@ def test_prediction_output(self):
         self.exporter.capice_filename = filename
         self.exporter.export_capice_prediction(datafile=self.prediction_output_dataframe)
         self.assertTrue(os.path.isfile(filename_path))
-        exported_data = pd.read_csv(filename_path, compression='gzip', sep='\t')
+        exported_data = pd.read_csv(filename_path, sep='\t')
         exported_data[Column.chr.value] = exported_data[Column.chr.value].astype(str)
         pd.testing.assert_frame_equal(exported_data, self.expected_prediction_output_dataframe)
 
@@ -90,7 +90,7 @@ def test_exporter_force(self):
             present_file_conn.write('This file is already present')
         self.exporter.capice_filename = present_file
         self.exporter.export_capice_prediction(datafile=self.prediction_output_dataframe)
-        forced_file = pd.read_csv(present_file_path, compression='gzip', sep='\t')
+        forced_file = pd.read_csv(present_file_path, sep='\t')
         forced_file[Column.chr.value] = forced_file[Column.chr.value].astype(str)
         pd.testing.assert_frame_equal(forced_file, self.expected_prediction_output_dataframe)
 

diff --git a/tests/capice/test_main_predict.py b/tests/capice/test_main_predict.py
@@ -13,7 +13,7 @@ class TestMainNonTrain(unittest.TestCase):
     def setUpClass(cls):
         print('Setting up.')
         manager, cls.output_dir = set_up_manager_and_out()
-        manager.output_filename = os.path.join(cls.output_dir, 'test_output.txt')
+        manager.output_filename = os.path.join(cls.output_dir, 'test_output.tsv')
 
         cls.model = load_model(ResourceFile.XGB_BOOSTER_POC_UBJ.value)
 
@@ -31,9 +31,7 @@ def test_integration_main_nontrain(self):
         predict = CapicePredict(input_path=infile, model=self.model, output_path=self.output_dir,
                                 output_given=True)
         predict.run()
-        prediction_output = pd.read_csv(os.path.join(self.output_dir, 'test_output.txt'),
-                                        compression='gzip',
-                                        sep='\t')
+        prediction_output = pd.read_csv(os.path.join(self.output_dir, 'test_output.tsv'), sep='\t')
         self.assertEqual(prediction_output.shape, (4, 11))