Skip to content

Commit

Permalink
Merge pull request #154 from molgenis/fix/predict_output_non_archive
Browse files Browse the repository at this point in the history
Fixed a bug that would force the predict output file to always be gzipped
  • Loading branch information
SietsmaRJ authored Dec 15, 2022
2 parents 541df37 + 2590fe5 commit a91aebe
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/molgenis/capice/core/capice_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def export_capice_prediction(self, datafile: pd.DataFrame):
export_path = os.path.join(self.file_path, self.capice_filename)
datafile = self._post_process_split_cols(datafile)
datafile = self._post_process_set_correct_dtypes(datafile)
datafile[self.export_cols].to_csv(export_path, sep='\t', compression='gzip', index=False)
datafile[self.export_cols].to_csv(export_path, sep='\t', index=False)
if not self.output_given:
print('Successfully exported CAPICE datafile to: %s', export_path)

Expand Down
4 changes: 2 additions & 2 deletions tests/capice/core/test_capice_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_prediction_output(self):
self.exporter.capice_filename = filename
self.exporter.export_capice_prediction(datafile=self.prediction_output_dataframe)
self.assertTrue(os.path.isfile(filename_path))
exported_data = pd.read_csv(filename_path, compression='gzip', sep='\t')
exported_data = pd.read_csv(filename_path, sep='\t')
exported_data[Column.chr.value] = exported_data[Column.chr.value].astype(str)
pd.testing.assert_frame_equal(exported_data, self.expected_prediction_output_dataframe)

Expand All @@ -90,7 +90,7 @@ def test_exporter_force(self):
present_file_conn.write('This file is already present')
self.exporter.capice_filename = present_file
self.exporter.export_capice_prediction(datafile=self.prediction_output_dataframe)
forced_file = pd.read_csv(present_file_path, compression='gzip', sep='\t')
forced_file = pd.read_csv(present_file_path, sep='\t')
forced_file[Column.chr.value] = forced_file[Column.chr.value].astype(str)
pd.testing.assert_frame_equal(forced_file, self.expected_prediction_output_dataframe)

Expand Down
6 changes: 2 additions & 4 deletions tests/capice/test_main_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class TestMainNonTrain(unittest.TestCase):
def setUpClass(cls):
print('Setting up.')
manager, cls.output_dir = set_up_manager_and_out()
manager.output_filename = os.path.join(cls.output_dir, 'test_output.txt')
manager.output_filename = os.path.join(cls.output_dir, 'test_output.tsv')

cls.model = load_model(ResourceFile.XGB_BOOSTER_POC_UBJ.value)

Expand All @@ -31,9 +31,7 @@ def test_integration_main_nontrain(self):
predict = CapicePredict(input_path=infile, model=self.model, output_path=self.output_dir,
output_given=True)
predict.run()
prediction_output = pd.read_csv(os.path.join(self.output_dir, 'test_output.txt'),
compression='gzip',
sep='\t')
prediction_output = pd.read_csv(os.path.join(self.output_dir, 'test_output.tsv'), sep='\t')
self.assertEqual(prediction_output.shape, (4, 11))


Expand Down

0 comments on commit a91aebe

Please sign in to comment.