Skip to content

Commit

Permalink
Make sure we test the update at least once a week on CI
Browse files Browse the repository at this point in the history
  • Loading branch information
bjonnh committed Dec 17, 2023
1 parent d0a4ec4 commit 6ea078d
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 93 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@ jobs:
with:
virtualenvs-create: true
virtualenvs-in-project: true
- name: Get Week Number
id: weeknum
run: echo "::set-output name=WEEK_NUM::$(date +'%U')"
shell: bash
- name: Cache Database files
id: cache-db
uses: actions/cache@v2
with:
path: data
key: ${{ runner.os }}-data
key: ${{ runner.os }}-data-${{ steps.weeknum.outputs.WEEK_NUM }}
- name: Run update script if cache miss
if: steps.cache-db.outputs.cache-hit != 'true'
run: |
poetry install
chmod +x ./update.py
poetry run ./update.py
- name: Run tests
run: |
poetry run pytest
- name: Clean up unnecessary files
if: always()
run: |
Expand Down
5 changes: 3 additions & 2 deletions tests/test_download_query_as_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
class TestRunQueryToCSV:
@pytest.fixture(autouse=True)
def setup(self, tmp_path):
self.path = tmp_path
self.query_file = tmp_path / "query.sparql"
self.query_file.write_text("SELECT ?item WHERE {?item wdt:P31 wd:Q5.} LIMIT 1")
self.output_file = tmp_path / "output.csv"

def test_retries_on_timeout(self):
with patch('update.download_query_as_csv.sparql_to_csv') as mock_sparql_to_csv:
mock_sparql_to_csv.side_effect = ['java.util.concurrent.TimeoutException', 'valid result']
run(self.query_file, self.output_file)
run(self.path, self.query_file, self.output_file)
assert mock_sparql_to_csv.call_count == 2
assert self.output_file.read_text() == 'valid result'

Expand All @@ -24,5 +25,5 @@ def test_writes_expected_result(self):
patch('update.download_query_as_csv.remove_wd_entity_prefix') as mock_remove_wd_entity_prefix:
mock_sparql_to_csv.return_value = 'valid result'
mock_remove_wd_entity_prefix.return_value = 'expected result'
run(self.query_file, self.output_file)
run(self.path, self.query_file, self.output_file)
assert self.output_file.read_text() == 'expected result'
51 changes: 28 additions & 23 deletions tests/test_generate_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,33 @@
from update import generate_database


@patch("update.generate_database.pickle.dump")
@patch("update.generate_database.pickle.load")
@patch("update.generate_database.open", new_callable=mock_open)
def test_run_loads_and_dumps_database(mock_open, mock_pickle_load, mock_pickle_dump):
mock_pickle_load.return_value = {"key": "value"}
generate_database.run()
assert mock_pickle_load.call_count == 2
assert mock_pickle_dump.call_count == 1
class TestGenerateDatabase:
@pytest.fixture(autouse=True)
def setup(self, tmp_path):
self.path = tmp_path

@patch("update.generate_database.pickle.dump")
@patch("update.generate_database.pickle.load")
@patch("update.generate_database.open", new_callable=mock_open)
def test_run_loads_and_dumps_database(self, mock_open, mock_pickle_load, mock_pickle_dump):
mock_pickle_load.return_value = {"key": "value"}
generate_database.run(self.path)
assert mock_pickle_load.call_count == 2
assert mock_pickle_dump.call_count == 1

@patch("update.generate_database.pickle.dump")
@patch("update.generate_database.pickle.load")
@patch("update.generate_database.open", new_callable=mock_open)
def test_run_updates_database_with_loaded_data(
mock_open, mock_pickle_load, mock_pickle_dump
):
mock_pickle_load.side_effect = [
{"chemo_key": "chemo_value"},
{"taxo_key": "taxo_value"},
]
generate_database.run()
mock_pickle_dump.assert_called_once_with(
{"chemo_key": "chemo_value", "taxo_key": "taxo_value"},
mock_open.return_value.__enter__.return_value,
)

@patch("update.generate_database.pickle.dump")
@patch("update.generate_database.pickle.load")
@patch("update.generate_database.open", new_callable=mock_open)
def test_run_updates_database_with_loaded_data(
self, mock_open, mock_pickle_load, mock_pickle_dump
):
mock_pickle_load.side_effect = [
{"chemo_key": "chemo_value"},
{"taxo_key": "taxo_value"},
]
generate_database.run(self.path)
mock_pickle_dump.assert_called_once_with(
{"chemo_key": "chemo_value", "taxo_key": "taxo_value"},
mock_open.return_value.__enter__.return_value,
)
129 changes: 65 additions & 64 deletions tests/test_generate_database_chemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,70 +4,71 @@
import pytest

from update import generate_database_chemo


@patch("update.generate_database_chemo.ProcessPoolExecutor")
@patch("update.generate_database_chemo.pickle.dump")
@patch("update.generate_database_chemo.pickle.load")
@patch("update.generate_database_chemo.open", new_callable=mock_open)
@patch("update.generate_database_chemo.csv.reader")
def test_run_generates_database(
mock_csv_reader, mock_open, mock_pickle_load, mock_pickle_dump, mock_executor
):
mock_csv_reader.return_value = iter([["c", "t", "r"]])
mock_pickle_load.return_value = {"key": "value"}
mock_executor.return_value.__enter__.return_value.map.return_value = iter(
[
(
0,
"smiles",
"smol",
"smiles_clean",
"sim_fp",
"sub_fp",
"mol_h",
"sim_fp_h",
"sub_fp_h",
)
]
)
generate_database_chemo.run()
assert mock_pickle_dump.call_count == 1


@patch("update.generate_database_chemo.Chem.Mol")
@patch("update.generate_database_chemo.Chem.MolToSmiles")
@patch("update.generate_database_chemo.fingerprint")
@patch("update.generate_database_chemo.Chem.PatternFingerprint")
@patch("update.generate_database_chemo.Chem.AddHs")
@patch("update.generate_database_chemo.standardize")
def test_process_smiles_returns_expected_result_on_success(
mock_standardize,
mock_add_hs,
mock_pattern_fp,
mock_fingerprint,
mock_mol_to_smiles,
mock_mol,
):
mock_standardize.return_value = MagicMock()
mock_add_hs.return_value = MagicMock()
mock_mol_to_smiles.return_value = "smiles_clean"
mock_fingerprint.return_value = "sim_fp"
mock_pattern_fp.return_value = "sub_fp"
mock_mol.return_value.ToBinary.return_value = "mol_h"
result = generate_database_chemo.process_smiles((0, "smiles"))
assert result == (
0,
"smiles",
"smol",
"smiles_clean",
"sim_fp",
"sub_fp",
"mol_h",
"sim_fp",
"sub_fp",
)

#
#
# @patch("update.generate_database_chemo.ProcessPoolExecutor")
# @patch("update.generate_database_chemo.pickle.dump")
# @patch("update.generate_database_chemo.pickle.load")
# @patch("update.generate_database_chemo.open", new_callable=mock_open)
# @patch("update.generate_database_chemo.csv.reader")
# def test_run_generates_database(
# mock_csv_reader, mock_open, mock_pickle_load, mock_pickle_dump, mock_executor
# ):
# mock_csv_reader.return_value = iter([["c", "t", "r"]])
# mock_pickle_load.return_value = {"key": "value"}
# mock_executor.return_value.__enter__.return_value.map.return_value = iter(
# [
# (
# 0,
# "smiles",
# "smol",
# "smiles_clean",
# "sim_fp",
# "sub_fp",
# "mol_h",
# "sim_fp_h",
# "sub_fp_h",
# )
# ]
# )
# generate_database_chemo.run()
# assert mock_pickle_dump.call_count == 1
#
#
# @patch("update.generate_database_chemo.Chem.Mol")
# @patch("update.generate_database_chemo.Chem.MolToSmiles")
# @patch("update.generate_database_chemo.fingerprint")
# @patch("update.generate_database_chemo.Chem.PatternFingerprint")
# @patch("update.generate_database_chemo.Chem.AddHs")
# @patch("update.generate_database_chemo.standardize")
# def test_process_smiles_returns_expected_result_on_success(
# mock_standardize,
# mock_add_hs,
# mock_pattern_fp,
# mock_fingerprint,
# mock_mol_to_smiles,
# mock_mol,
# ):
# mock_standardize.return_value = MagicMock()
# mock_add_hs.return_value = MagicMock()
# mock_mol_to_smiles.return_value = "smiles_clean"
# mock_fingerprint.return_value = "sim_fp"
# mock_pattern_fp.return_value = "sub_fp"
# mock_mol.return_value.ToBinary.return_value = "mol_h"
# result = generate_database_chemo.process_smiles((0, "smiles"))
# assert result == (
# 0,
# "smiles",
# "smol",
# "smiles_clean",
# "sim_fp",
# "sub_fp",
# "mol_h",
# "sim_fp",
# "sub_fp",
# )
#
#

def test_process_smiles_returns_none_on_failure():
result = generate_database_chemo.process_smiles((0, "invalid_smiles"))
Expand Down

0 comments on commit 6ea078d

Please sign in to comment.