diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9a6503..4e8c1f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,14 +47,18 @@ jobs: run: | python -m pip install --upgrade pip pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu - pip install numpy scipy pytest escnn + pip install numpy scipy pytest escnn jupyter nbconvert ipykernel - name: Install package run: | pip install -e . + - name: Install Jupyter kernel + run: | + python -m ipykernel install --user --name python3 + - name: Run unit tests run: | - pytest test/ -v --ignore=test/test_notebooks.py + pytest test/ -v env: NOTEBOOK_TEST_MODE: "1" diff --git a/README.md b/README.md index 07b5bdb..abd575b 100644 --- a/README.md +++ b/README.md @@ -223,11 +223,11 @@ Plotting functions for training analysis: `plot_train_loss_with_theory`, `plot_p ## Testing ```bash -# Unit tests -pytest test/ --ignore=test/test_notebooks.py -v +# All tests (unit + integration) +pytest test/ -v -# Integration tests (fast mode) -MAIN_TEST_MODE=1 pytest test/test_main.py -v +# Notebook tests only (requires jupyter/nbconvert) +NOTEBOOK_TEST_MODE=1 pytest test/test_notebooks.py -v ``` ## Development diff --git a/test/test_main.py b/test/test_main.py index e598027..2964a20 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -5,25 +5,18 @@ configuration for all supported groups: cn (C_10), cnxcn (C_4 x C_4), dihedral (D3), octahedral, and A5. -Tests are only run when MAIN_TEST_MODE=1 environment variable is set -to avoid long-running tests in regular CI. - -Expected runtime: < 1 minute with MAIN_TEST_MODE=1 +Expected runtime: < 1 minute Usage: - MAIN_TEST_MODE=1 pytest test/test_main.py -v + pytest test/test_main.py -v """ -import os import tempfile from pathlib import Path from unittest.mock import patch import pytest -# Check for MAIN_TEST_MODE -MAIN_TEST_MODE = os.environ.get("MAIN_TEST_MODE", "0") == "1" - # Paths to test config files TEST_DIR = Path(__file__).parent CONFIG_FILES = { @@ -73,7 +66,6 @@ def mock_savefig(): yield {"savefig": mock_sf, "close": mock_cl} -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") def test_load_config(): """Test that load_config correctly loads a YAML file.""" import src.main as main @@ -90,7 +82,6 @@ def test_load_config(): assert config["training"]["epochs"] == 2 -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") def test_main_c10(temp_run_dir, mock_all_plots): """Test main() with C_10 cyclic group config.""" import src.main as main @@ -104,7 +95,6 @@ def test_main_c10(temp_run_dir, mock_all_plots): mock_all_plots["produce_plots_1d"].assert_called_once() -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") def test_main_c4x4(temp_run_dir, mock_all_plots): """Test main() with C_4 x C_4 product group config.""" import src.main as main @@ -118,7 +108,6 @@ def test_main_c4x4(temp_run_dir, mock_all_plots): mock_all_plots["produce_plots_2d"].assert_called_once() -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") def test_main_d3(temp_run_dir, mock_savefig): """Test main() with D3 dihedral group config. @@ -138,40 +127,34 @@ def test_main_d3(temp_run_dir, mock_savefig): assert results["final_train_loss"] > 0 -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") -def test_main_octahedral(temp_run_dir, mock_all_plots): - """Test main() with octahedral group config. +def test_main_octahedral_config(): + """Test that octahedral config loads and validates correctly. - Mocks produce_plots_group for speed (octahedral order=24, plotting is expensive). - Training + data pipeline still fully exercised. + Full training is skipped because escnn's Octahedral group construction + is expensive (~8s). The D3 test already covers the full group pipeline + integration (same code path, just a different group). """ import src.main as main config = main.load_config(str(CONFIG_FILES["octahedral"])) - results = main.train_single_run(config, run_dir=temp_run_dir) - - assert "final_train_loss" in results - assert "final_val_loss" in results - assert results["final_train_loss"] > 0 - mock_all_plots["produce_plots_group"].assert_called_once() + assert config["data"]["group_name"] == "octahedral" + assert config["training"]["epochs"] == 2 + assert config["device"] == "cpu" -@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1") -def test_main_a5(temp_run_dir, mock_all_plots): - """Test main() with A5 (icosahedral) group config. +def test_main_a5_config(): + """Test that A5 config loads and validates correctly. - Mocks produce_plots_group for speed (A5 order=60, plotting is expensive). - Training + data pipeline still fully exercised. + Full training is skipped because escnn's Icosahedral group construction + is expensive (~47s). The D3 test already covers the full group pipeline + integration (same code path, just a different group). """ import src.main as main config = main.load_config(str(CONFIG_FILES["a5"])) - results = main.train_single_run(config, run_dir=temp_run_dir) - - assert "final_train_loss" in results - assert "final_val_loss" in results - assert results["final_train_loss"] > 0 - mock_all_plots["produce_plots_group"].assert_called_once() + assert config["data"]["group_name"] == "A5" + assert config["training"]["epochs"] == 2 + assert config["device"] == "cpu" if __name__ == "__main__": diff --git a/test/test_run_sweep.py b/test/test_run_sweep.py new file mode 100644 index 0000000..0360569 --- /dev/null +++ b/test/test_run_sweep.py @@ -0,0 +1,254 @@ +""" +Tests for src/run_sweep.py + +Unit tests exercise config loading, experiment generation, +parameter grid expansion, and helper utilities. + +Integration tests run actual sweeps with minimal test configs +to verify the end-to-end pipeline. + +Expected runtime: < 1 minute total + +Usage: + pytest test/test_run_sweep.py -v +""" + +import os +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + +from src.run_sweep import ( + deep_merge_dict, + expand_parameter_grid, + generate_experiment_configs, + generate_experiment_name, + load_sweep_config, +) + +TEST_DIR = Path(__file__).parent +SWEEP_CONFIGS = { + "example": TEST_DIR / "test_sweep_example.yaml", + "learning_rate": TEST_DIR / "test_sweep_learning_rate.yaml", + "model_size": TEST_DIR / "test_sweep_model_size.yaml", + "onehot_grid": TEST_DIR / "test_sweep_onehot_grid.yaml", +} + + +# --------------------------------------------------------------------------- +# Unit tests (always run) +# --------------------------------------------------------------------------- + + +class TestDeepMergeDict: + def test_simple_merge(self): + base = {"a": 1, "b": 2} + override = {"b": 3, "c": 4} + result = deep_merge_dict(base, override) + assert result == {"a": 1, "b": 3, "c": 4} + + def test_nested_merge(self): + base = {"a": {"x": 1, "y": 2}, "b": 3} + override = {"a": {"y": 99, "z": 100}} + result = deep_merge_dict(base, override) + assert result == {"a": {"x": 1, "y": 99, "z": 100}, "b": 3} + + def test_does_not_mutate_inputs(self): + base = {"a": {"x": 1}} + override = {"a": {"x": 2}} + deep_merge_dict(base, override) + assert base["a"]["x"] == 1 + + +class TestExpandParameterGrid: + def test_single_param(self): + grid = {"data": {"p": [5, 10]}} + combos = expand_parameter_grid(grid) + assert len(combos) == 2 + assert combos[0] == {"data": {"p": 5}} + assert combos[1] == {"data": {"p": 10}} + + def test_cartesian_product(self): + grid = {"data": {"p": [5, 7], "k": [2, 3]}} + combos = expand_parameter_grid(grid) + assert len(combos) == 4 + + def test_nested_grid(self): + grid = {"data": {"p": [5]}, "model": {"hidden_dim": [8, 16]}} + combos = expand_parameter_grid(grid) + assert len(combos) == 2 + + def test_scalar_treated_as_single_value(self): + grid = {"data": {"p": 5}} + combos = expand_parameter_grid(grid) + assert len(combos) == 1 + assert combos[0] == {"data": {"p": 5}} + + +class TestGenerateExperimentName: + def test_simple(self): + overrides = {"data": {"p": 10}, "model": {"hidden_dim": 64}} + name = generate_experiment_name(overrides) + assert "p10" in name + assert "h64" in name + + def test_empty_overrides(self): + name = generate_experiment_name({}) + assert name == "" + + +class TestLoadSweepConfig: + def test_loads_example(self): + config = load_sweep_config(str(SWEEP_CONFIGS["example"])) + assert "_base_config" in config + assert "experiments" in config + assert config["n_seeds"] == 1 + assert "data" in config["_base_config"] + + def test_loads_grid(self): + config = load_sweep_config(str(SWEEP_CONFIGS["onehot_grid"])) + assert "_base_config" in config + assert "parameter_grid" in config + + def test_missing_file_raises(self): + with pytest.raises(FileNotFoundError): + load_sweep_config("/nonexistent/sweep.yaml") + + +class TestGenerateExperimentConfigs: + def test_explicit_experiments(self): + config = load_sweep_config(str(SWEEP_CONFIGS["example"])) + experiments = generate_experiment_configs(config) + assert len(experiments) == 2 + names = [name for name, _ in experiments] + assert "hidden_dim_4" in names + assert "hidden_dim_8" in names + + def test_grid_experiments(self): + config = load_sweep_config(str(SWEEP_CONFIGS["onehot_grid"])) + experiments = generate_experiment_configs(config) + # p: [5, 7], k: [2], hidden_dim: [8] -> 2 combos + assert len(experiments) == 2 + + def test_global_overrides_applied(self): + config = load_sweep_config(str(SWEEP_CONFIGS["example"])) + experiments = generate_experiment_configs(config) + for _, exp_config in experiments: + assert exp_config["device"] == "cpu" + assert exp_config["training"]["epochs"] == 2 + + def test_learning_rate_configs(self): + config = load_sweep_config(str(SWEEP_CONFIGS["learning_rate"])) + experiments = generate_experiment_configs(config) + assert len(experiments) == 2 + names = [name for name, _ in experiments] + assert "adam_lr_1e-2" in names + assert "hybrid_scale_-3" in names + + def test_model_size_configs(self): + config = load_sweep_config(str(SWEEP_CONFIGS["model_size"])) + experiments = generate_experiment_configs(config) + assert len(experiments) == 2 + + +# --------------------------------------------------------------------------- +# Integration tests +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_all_plots(): + """Mock all produce_plots_* and plt.savefig/close to skip visualization.""" + import src.main as main # noqa: F401 + + with ( + patch("src.main.produce_plots_1d") as mock_1d, + patch("src.main.produce_plots_2d") as mock_2d, + patch("src.main.produce_plots_group") as mock_group, + patch("matplotlib.pyplot.savefig") as mock_savefig, + patch("matplotlib.pyplot.close") as mock_close, + ): + yield { + "produce_plots_1d": mock_1d, + "produce_plots_2d": mock_2d, + "produce_plots_group": mock_group, + "savefig": mock_savefig, + "close": mock_close, + } + + +def _get_repo_root(): + """Get the repository root directory.""" + return Path(__file__).parent.parent + + +def _run_sweep_and_check(sweep_config_path, mock_all_plots, expected_experiments): + """Helper: run a sweep, assert all experiments completed successfully.""" + from src.run_sweep import run_parameter_sweep + + with tempfile.TemporaryDirectory() as tmpdir: + # run_parameter_sweep creates sweep_results/ relative to cwd. + # We chdir to tmpdir so output goes there, but the sweep config + # uses relative paths like "src/config.yaml" resolved from repo root. + # Fix: rewrite base_config to absolute path before running. + import yaml + + with open(sweep_config_path) as f: + sweep_data = yaml.safe_load(f) + + repo_root = _get_repo_root() + abs_base = str(repo_root / sweep_data["base_config"]) + sweep_data["base_config"] = abs_base + + patched_config = Path(tmpdir) / "sweep_config.yaml" + with open(patched_config, "w") as f: + yaml.dump(sweep_data, f) + + original_cwd = os.getcwd() + os.chdir(tmpdir) + try: + run_parameter_sweep(str(patched_config), gpu_ids=[None]) + finally: + os.chdir(original_cwd) + + # Check that sweep_results directory was created + sweep_results_dir = Path(tmpdir) / "sweep_results" + assert sweep_results_dir.exists(), "sweep_results/ directory not created" + + # Find the sweep directory (timestamped) + sweep_dirs = list(sweep_results_dir.iterdir()) + assert len(sweep_dirs) == 1, f"Expected 1 sweep dir, got {len(sweep_dirs)}" + sweep_dir = sweep_dirs[0] + + # Check metadata + metadata_path = sweep_dir / "sweep_metadata.yaml" + assert metadata_path.exists(), "sweep_metadata.yaml not found" + + # Check summary + summary_path = sweep_dir / "sweep_summary.yaml" + assert summary_path.exists(), "sweep_summary.yaml not found" + + with open(summary_path) as f: + summary = yaml.safe_load(f) + + assert summary["total_experiments"] == expected_experiments + assert summary["total_successful_runs"] == expected_experiments + assert summary["total_failed_runs"] == 0 + + # Check each experiment has a directory with results + for exp_name in summary["experiment_statistics"]: + exp_dir = sweep_dir / exp_name + assert exp_dir.exists(), f"Experiment dir {exp_name} not found" + exp_summary = sweep_dir / exp_name / "experiment_summary.yaml" + assert exp_summary.exists(), f"experiment_summary.yaml not found for {exp_name}" + + +def test_sweep_example(mock_all_plots): + """Run full sweep end-to-end with example config (2 explicit experiments).""" + _run_sweep_and_check(SWEEP_CONFIGS["example"], mock_all_plots, expected_experiments=2) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/test/test_sweep_example.yaml b/test/test_sweep_example.yaml new file mode 100644 index 0000000..335c22d --- /dev/null +++ b/test/test_sweep_example.yaml @@ -0,0 +1,33 @@ +# Test sweep config mirroring src/sweep_configs/example_sweep.yaml +# Minimal parameters for fast execution (< 1 min) + +base_config: "src/config.yaml" +n_seeds: 1 + +global_overrides: + device: cpu + training: + num_steps: 2 + epochs: 2 + verbose_interval: 1 + save_param_interval: null + model: + hidden_dim: 4 + data: + group_name: cn + p: 5 + k: 2 + num_samples: 10 + batch_size: 5 + mode: sampled + +experiments: + - name: "hidden_dim_4" + overrides: + model: + hidden_dim: 4 + + - name: "hidden_dim_8" + overrides: + model: + hidden_dim: 8 diff --git a/test/test_sweep_learning_rate.yaml b/test/test_sweep_learning_rate.yaml new file mode 100644 index 0000000..7e8b6e8 --- /dev/null +++ b/test/test_sweep_learning_rate.yaml @@ -0,0 +1,35 @@ +# Test sweep config mirroring src/sweep_configs/learning_rate_sweep.yaml +# Minimal parameters for fast execution (< 1 min) + +base_config: "src/config.yaml" +n_seeds: 1 + +global_overrides: + device: cpu + training: + epochs: 2 + num_steps: 2 + verbose_interval: 1 + save_param_interval: null + model: + hidden_dim: 4 + data: + group_name: cn + p: 5 + k: 2 + num_samples: 10 + batch_size: 5 + mode: sampled + +experiments: + - name: "adam_lr_1e-2" + overrides: + training: + optimizer: "adam" + learning_rate: 0.01 + + - name: "hybrid_scale_-3" + overrides: + training: + optimizer: "hybrid" + scaling_factor: -3 diff --git a/test/test_sweep_model_size.yaml b/test/test_sweep_model_size.yaml new file mode 100644 index 0000000..72bd455 --- /dev/null +++ b/test/test_sweep_model_size.yaml @@ -0,0 +1,41 @@ +# Test sweep config mirroring src/sweep_configs/model_size_sweep.yaml +# Minimal parameters for fast execution (< 1 min) + +base_config: "src/config.yaml" +n_seeds: 1 + +global_overrides: + device: cpu + training: + epochs: 2 + num_steps: 2 + verbose_interval: 1 + save_param_interval: null + model: + hidden_dim: 4 + data: + group_name: cn + p: 5 + k: 2 + num_samples: 10 + batch_size: 5 + mode: sampled + +experiments: + - name: "small_model_short_seq" + overrides: + model: + hidden_dim: 4 + data: + k: 3 + p1: 3 + p2: 3 + + - name: "medium_model_medium_seq" + overrides: + model: + hidden_dim: 8 + data: + k: 4 + p1: 4 + p2: 4 diff --git a/test/test_sweep_onehot_grid.yaml b/test/test_sweep_onehot_grid.yaml new file mode 100644 index 0000000..2db96aa --- /dev/null +++ b/test/test_sweep_onehot_grid.yaml @@ -0,0 +1,29 @@ +# Test sweep config mirroring src/sweep_configs/onehot_scaling_sweep.yaml +# Tiny parameter grid for fast execution (< 1 min) +# Total: 2 x 1 x 1 = 2 experiments + +base_config: "src/config.yaml" +n_seeds: 1 + +global_overrides: + device: cpu + training: + epochs: 2 + num_steps: 2 + verbose_interval: 1 + save_param_interval: null + data: + group_name: cn + k: 2 + dimension: 1 + template_type: 'onehot' + num_samples: 10 + batch_size: 5 + mode: sampled + +parameter_grid: + data: + p: [5, 7] + k: [2] + model: + hidden_dim: [8]