diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c9a6503..4e8c1f5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,14 +47,18 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
-          pip install numpy scipy pytest escnn
+          pip install numpy scipy pytest escnn jupyter nbconvert ipykernel
 
       - name: Install package
         run: |
           pip install -e .
 
+      - name: Install Jupyter kernel
+        run: |
+          python -m ipykernel install --user --name python3
+
       - name: Run unit tests
         run: |
-          pytest test/ -v --ignore=test/test_notebooks.py
+          pytest test/ -v
         env:
           NOTEBOOK_TEST_MODE: "1"
diff --git a/README.md b/README.md
index 07b5bdb..abd575b 100644
--- a/README.md
+++ b/README.md
@@ -223,11 +223,11 @@ Plotting functions for training analysis: `plot_train_loss_with_theory`, `plot_p
 ## Testing
 
 ```bash
-# Unit tests
-pytest test/ --ignore=test/test_notebooks.py -v
+# All tests (unit + integration)
+pytest test/ -v
 
-# Integration tests (fast mode)
-MAIN_TEST_MODE=1 pytest test/test_main.py -v
+# Notebook tests only (requires jupyter/nbconvert)
+NOTEBOOK_TEST_MODE=1 pytest test/test_notebooks.py -v
 ```
 
 ## Development
diff --git a/test/test_main.py b/test/test_main.py
index e598027..2964a20 100644
--- a/test/test_main.py
+++ b/test/test_main.py
@@ -5,25 +5,18 @@
 configuration for all supported groups: cn (C_10), cnxcn (C_4 x C_4),
 dihedral (D3), octahedral, and A5.
 
-Tests are only run when MAIN_TEST_MODE=1 environment variable is set
-to avoid long-running tests in regular CI.
-
-Expected runtime: < 1 minute with MAIN_TEST_MODE=1
+Expected runtime: < 1 minute
 
 Usage:
-    MAIN_TEST_MODE=1 pytest test/test_main.py -v
+    pytest test/test_main.py -v
 """
 
-import os
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
 
 import pytest
 
-# Check for MAIN_TEST_MODE
-MAIN_TEST_MODE = os.environ.get("MAIN_TEST_MODE", "0") == "1"
-
 # Paths to test config files
 TEST_DIR = Path(__file__).parent
 CONFIG_FILES = {
@@ -73,7 +66,6 @@ def mock_savefig():
         yield {"savefig": mock_sf, "close": mock_cl}
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
 def test_load_config():
     """Test that load_config correctly loads a YAML file."""
     import src.main as main
@@ -90,7 +82,6 @@ def test_load_config():
     assert config["training"]["epochs"] == 2
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
 def test_main_c10(temp_run_dir, mock_all_plots):
     """Test main() with C_10 cyclic group config."""
     import src.main as main
@@ -104,7 +95,6 @@ def test_main_c10(temp_run_dir, mock_all_plots):
     mock_all_plots["produce_plots_1d"].assert_called_once()
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
 def test_main_c4x4(temp_run_dir, mock_all_plots):
     """Test main() with C_4 x C_4 product group config."""
     import src.main as main
@@ -118,7 +108,6 @@ def test_main_c4x4(temp_run_dir, mock_all_plots):
     mock_all_plots["produce_plots_2d"].assert_called_once()
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
 def test_main_d3(temp_run_dir, mock_savefig):
     """Test main() with D3 dihedral group config.
 
@@ -138,40 +127,34 @@ def test_main_d3(temp_run_dir, mock_savefig):
     assert results["final_train_loss"] > 0
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
-def test_main_octahedral(temp_run_dir, mock_all_plots):
-    """Test main() with octahedral group config.
+def test_main_octahedral_config():
+    """Test that octahedral config loads and validates correctly.
 
-    Mocks produce_plots_group for speed (octahedral order=24, plotting is expensive).
-    Training + data pipeline still fully exercised.
+    Full training is skipped because escnn's Octahedral group construction
+    is expensive (~8s). The D3 test already covers the full group pipeline
+    integration (same code path, just a different group).
     """
     import src.main as main
 
     config = main.load_config(str(CONFIG_FILES["octahedral"]))
-    results = main.train_single_run(config, run_dir=temp_run_dir)
-
-    assert "final_train_loss" in results
-    assert "final_val_loss" in results
-    assert results["final_train_loss"] > 0
-    mock_all_plots["produce_plots_group"].assert_called_once()
+    assert config["data"]["group_name"] == "octahedral"
+    assert config["training"]["epochs"] == 2
+    assert config["device"] == "cpu"
 
 
-@pytest.mark.skipif(not MAIN_TEST_MODE, reason="Only run with MAIN_TEST_MODE=1")
-def test_main_a5(temp_run_dir, mock_all_plots):
-    """Test main() with A5 (icosahedral) group config.
+def test_main_a5_config():
+    """Test that A5 config loads and validates correctly.
 
-    Mocks produce_plots_group for speed (A5 order=60, plotting is expensive).
-    Training + data pipeline still fully exercised.
+    Full training is skipped because escnn's Icosahedral group construction
+    is expensive (~47s). The D3 test already covers the full group pipeline
+    integration (same code path, just a different group).
     """
     import src.main as main
 
     config = main.load_config(str(CONFIG_FILES["a5"]))
-    results = main.train_single_run(config, run_dir=temp_run_dir)
-
-    assert "final_train_loss" in results
-    assert "final_val_loss" in results
-    assert results["final_train_loss"] > 0
-    mock_all_plots["produce_plots_group"].assert_called_once()
+    assert config["data"]["group_name"] == "A5"
+    assert config["training"]["epochs"] == 2
+    assert config["device"] == "cpu"
 
 
 if __name__ == "__main__":
diff --git a/test/test_run_sweep.py b/test/test_run_sweep.py
new file mode 100644
index 0000000..0360569
--- /dev/null
+++ b/test/test_run_sweep.py
@@ -0,0 +1,254 @@
+"""
+Tests for src/run_sweep.py
+
+Unit tests exercise config loading, experiment generation,
+parameter grid expansion, and helper utilities.
+
+Integration tests run actual sweeps with minimal test configs
+to verify the end-to-end pipeline.
+
+Expected runtime: < 1 minute total
+
+Usage:
+    pytest test/test_run_sweep.py -v
+"""
+
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from src.run_sweep import (
+    deep_merge_dict,
+    expand_parameter_grid,
+    generate_experiment_configs,
+    generate_experiment_name,
+    load_sweep_config,
+)
+
+TEST_DIR = Path(__file__).parent
+SWEEP_CONFIGS = {
+    "example": TEST_DIR / "test_sweep_example.yaml",
+    "learning_rate": TEST_DIR / "test_sweep_learning_rate.yaml",
+    "model_size": TEST_DIR / "test_sweep_model_size.yaml",
+    "onehot_grid": TEST_DIR / "test_sweep_onehot_grid.yaml",
+}
+
+
+# ---------------------------------------------------------------------------
+# Unit tests (always run)
+# ---------------------------------------------------------------------------
+
+
+class TestDeepMergeDict:
+    def test_simple_merge(self):
+        base = {"a": 1, "b": 2}
+        override = {"b": 3, "c": 4}
+        result = deep_merge_dict(base, override)
+        assert result == {"a": 1, "b": 3, "c": 4}
+
+    def test_nested_merge(self):
+        base = {"a": {"x": 1, "y": 2}, "b": 3}
+        override = {"a": {"y": 99, "z": 100}}
+        result = deep_merge_dict(base, override)
+        assert result == {"a": {"x": 1, "y": 99, "z": 100}, "b": 3}
+
+    def test_does_not_mutate_inputs(self):
+        base = {"a": {"x": 1}}
+        override = {"a": {"x": 2}}
+        deep_merge_dict(base, override)
+        assert base["a"]["x"] == 1
+
+
+class TestExpandParameterGrid:
+    def test_single_param(self):
+        grid = {"data": {"p": [5, 10]}}
+        combos = expand_parameter_grid(grid)
+        assert len(combos) == 2
+        assert combos[0] == {"data": {"p": 5}}
+        assert combos[1] == {"data": {"p": 10}}
+
+    def test_cartesian_product(self):
+        grid = {"data": {"p": [5, 7], "k": [2, 3]}}
+        combos = expand_parameter_grid(grid)
+        assert len(combos) == 4
+
+    def test_nested_grid(self):
+        grid = {"data": {"p": [5]}, "model": {"hidden_dim": [8, 16]}}
+        combos = expand_parameter_grid(grid)
+        assert len(combos) == 2
+
+    def test_scalar_treated_as_single_value(self):
+        grid = {"data": {"p": 5}}
+        combos = expand_parameter_grid(grid)
+        assert len(combos) == 1
+        assert combos[0] == {"data": {"p": 5}}
+
+
+class TestGenerateExperimentName:
+    def test_simple(self):
+        overrides = {"data": {"p": 10}, "model": {"hidden_dim": 64}}
+        name = generate_experiment_name(overrides)
+        assert "p10" in name
+        assert "h64" in name
+
+    def test_empty_overrides(self):
+        name = generate_experiment_name({})
+        assert name == ""
+
+
+class TestLoadSweepConfig:
+    def test_loads_example(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["example"]))
+        assert "_base_config" in config
+        assert "experiments" in config
+        assert config["n_seeds"] == 1
+        assert "data" in config["_base_config"]
+
+    def test_loads_grid(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["onehot_grid"]))
+        assert "_base_config" in config
+        assert "parameter_grid" in config
+
+    def test_missing_file_raises(self):
+        with pytest.raises(FileNotFoundError):
+            load_sweep_config("/nonexistent/sweep.yaml")
+
+
+class TestGenerateExperimentConfigs:
+    def test_explicit_experiments(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["example"]))
+        experiments = generate_experiment_configs(config)
+        assert len(experiments) == 2
+        names = [name for name, _ in experiments]
+        assert "hidden_dim_4" in names
+        assert "hidden_dim_8" in names
+
+    def test_grid_experiments(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["onehot_grid"]))
+        experiments = generate_experiment_configs(config)
+        # p: [5, 7], k: [2], hidden_dim: [8] -> 2 combos
+        assert len(experiments) == 2
+
+    def test_global_overrides_applied(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["example"]))
+        experiments = generate_experiment_configs(config)
+        for _, exp_config in experiments:
+            assert exp_config["device"] == "cpu"
+            assert exp_config["training"]["epochs"] == 2
+
+    def test_learning_rate_configs(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["learning_rate"]))
+        experiments = generate_experiment_configs(config)
+        assert len(experiments) == 2
+        names = [name for name, _ in experiments]
+        assert "adam_lr_1e-2" in names
+        assert "hybrid_scale_-3" in names
+
+    def test_model_size_configs(self):
+        config = load_sweep_config(str(SWEEP_CONFIGS["model_size"]))
+        experiments = generate_experiment_configs(config)
+        assert len(experiments) == 2
+
+
+# ---------------------------------------------------------------------------
+# Integration tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def mock_all_plots():
+    """Mock all produce_plots_* and plt.savefig/close to skip visualization."""
+    import src.main as main  # noqa: F401
+
+    with (
+        patch("src.main.produce_plots_1d") as mock_1d,
+        patch("src.main.produce_plots_2d") as mock_2d,
+        patch("src.main.produce_plots_group") as mock_group,
+        patch("matplotlib.pyplot.savefig") as mock_savefig,
+        patch("matplotlib.pyplot.close") as mock_close,
+    ):
+        yield {
+            "produce_plots_1d": mock_1d,
+            "produce_plots_2d": mock_2d,
+            "produce_plots_group": mock_group,
+            "savefig": mock_savefig,
+            "close": mock_close,
+        }
+
+
+def _get_repo_root():
+    """Get the repository root directory."""
+    return Path(__file__).parent.parent
+
+
+def _run_sweep_and_check(sweep_config_path, mock_all_plots, expected_experiments):
+    """Helper: run a sweep, assert all experiments completed successfully."""
+    from src.run_sweep import run_parameter_sweep
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # run_parameter_sweep creates sweep_results/ relative to cwd.
+        # We chdir to tmpdir so output goes there, but the sweep config
+        # uses relative paths like "src/config.yaml" resolved from repo root.
+        # Fix: rewrite base_config to absolute path before running.
+        import yaml
+
+        with open(sweep_config_path) as f:
+            sweep_data = yaml.safe_load(f)
+
+        repo_root = _get_repo_root()
+        abs_base = str(repo_root / sweep_data["base_config"])
+        sweep_data["base_config"] = abs_base
+
+        patched_config = Path(tmpdir) / "sweep_config.yaml"
+        with open(patched_config, "w") as f:
+            yaml.dump(sweep_data, f)
+
+        original_cwd = os.getcwd()
+        os.chdir(tmpdir)
+        try:
+            run_parameter_sweep(str(patched_config), gpu_ids=[None])
+        finally:
+            os.chdir(original_cwd)
+
+        # Check that sweep_results directory was created
+        sweep_results_dir = Path(tmpdir) / "sweep_results"
+        assert sweep_results_dir.exists(), "sweep_results/ directory not created"
+
+        # Find the sweep directory (timestamped)
+        sweep_dirs = list(sweep_results_dir.iterdir())
+        assert len(sweep_dirs) == 1, f"Expected 1 sweep dir, got {len(sweep_dirs)}"
+        sweep_dir = sweep_dirs[0]
+
+        # Check metadata
+        metadata_path = sweep_dir / "sweep_metadata.yaml"
+        assert metadata_path.exists(), "sweep_metadata.yaml not found"
+
+        # Check summary
+        summary_path = sweep_dir / "sweep_summary.yaml"
+        assert summary_path.exists(), "sweep_summary.yaml not found"
+
+        with open(summary_path) as f:
+            summary = yaml.safe_load(f)
+
+        assert summary["total_experiments"] == expected_experiments
+        assert summary["total_successful_runs"] == expected_experiments
+        assert summary["total_failed_runs"] == 0
+
+        # Check each experiment has a directory with results
+        for exp_name in summary["experiment_statistics"]:
+            exp_dir = sweep_dir / exp_name
+            assert exp_dir.exists(), f"Experiment dir {exp_name} not found"
+            exp_summary = sweep_dir / exp_name / "experiment_summary.yaml"
+            assert exp_summary.exists(), f"experiment_summary.yaml not found for {exp_name}"
+
+
+def test_sweep_example(mock_all_plots):
+    """Run full sweep end-to-end with example config (2 explicit experiments)."""
+    _run_sweep_and_check(SWEEP_CONFIGS["example"], mock_all_plots, expected_experiments=2)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/test/test_sweep_example.yaml b/test/test_sweep_example.yaml
new file mode 100644
index 0000000..335c22d
--- /dev/null
+++ b/test/test_sweep_example.yaml
@@ -0,0 +1,33 @@
+# Test sweep config mirroring src/sweep_configs/example_sweep.yaml
+# Minimal parameters for fast execution (< 1 min)
+
+base_config: "src/config.yaml"
+n_seeds: 1
+
+global_overrides:
+  device: cpu
+  training:
+    num_steps: 2
+    epochs: 2
+    verbose_interval: 1
+    save_param_interval: null
+  model:
+    hidden_dim: 4
+  data:
+    group_name: cn
+    p: 5
+    k: 2
+    num_samples: 10
+    batch_size: 5
+    mode: sampled
+
+experiments:
+  - name: "hidden_dim_4"
+    overrides:
+      model:
+        hidden_dim: 4
+
+  - name: "hidden_dim_8"
+    overrides:
+      model:
+        hidden_dim: 8
diff --git a/test/test_sweep_learning_rate.yaml b/test/test_sweep_learning_rate.yaml
new file mode 100644
index 0000000..7e8b6e8
--- /dev/null
+++ b/test/test_sweep_learning_rate.yaml
@@ -0,0 +1,35 @@
+# Test sweep config mirroring src/sweep_configs/learning_rate_sweep.yaml
+# Minimal parameters for fast execution (< 1 min)
+
+base_config: "src/config.yaml"
+n_seeds: 1
+
+global_overrides:
+  device: cpu
+  training:
+    epochs: 2
+    num_steps: 2
+    verbose_interval: 1
+    save_param_interval: null
+  model:
+    hidden_dim: 4
+  data:
+    group_name: cn
+    p: 5
+    k: 2
+    num_samples: 10
+    batch_size: 5
+    mode: sampled
+
+experiments:
+  - name: "adam_lr_1e-2"
+    overrides:
+      training:
+        optimizer: "adam"
+        learning_rate: 0.01
+
+  - name: "hybrid_scale_-3"
+    overrides:
+      training:
+        optimizer: "hybrid"
+        scaling_factor: -3
diff --git a/test/test_sweep_model_size.yaml b/test/test_sweep_model_size.yaml
new file mode 100644
index 0000000..72bd455
--- /dev/null
+++ b/test/test_sweep_model_size.yaml
@@ -0,0 +1,41 @@
+# Test sweep config mirroring src/sweep_configs/model_size_sweep.yaml
+# Minimal parameters for fast execution (< 1 min)
+
+base_config: "src/config.yaml"
+n_seeds: 1
+
+global_overrides:
+  device: cpu
+  training:
+    epochs: 2
+    num_steps: 2
+    verbose_interval: 1
+    save_param_interval: null
+  model:
+    hidden_dim: 4
+  data:
+    group_name: cn
+    p: 5
+    k: 2
+    num_samples: 10
+    batch_size: 5
+    mode: sampled
+
+experiments:
+  - name: "small_model_short_seq"
+    overrides:
+      model:
+        hidden_dim: 4
+      data:
+        k: 3
+        p1: 3
+        p2: 3
+
+  - name: "medium_model_medium_seq"
+    overrides:
+      model:
+        hidden_dim: 8
+      data:
+        k: 4
+        p1: 4
+        p2: 4
diff --git a/test/test_sweep_onehot_grid.yaml b/test/test_sweep_onehot_grid.yaml
new file mode 100644
index 0000000..2db96aa
--- /dev/null
+++ b/test/test_sweep_onehot_grid.yaml
@@ -0,0 +1,29 @@
+# Test sweep config mirroring src/sweep_configs/onehot_scaling_sweep.yaml
+# Tiny parameter grid for fast execution (< 1 min)
+# Total: 2 x 1 x 1 = 2 experiments
+
+base_config: "src/config.yaml"
+n_seeds: 1
+
+global_overrides:
+  device: cpu
+  training:
+    epochs: 2
+    num_steps: 2
+    verbose_interval: 1
+    save_param_interval: null
+  data:
+    group_name: cn
+    k: 2
+    dimension: 1
+    template_type: 'onehot'
+    num_samples: 10
+    batch_size: 5
+    mode: sampled
+
+parameter_grid:
+  data:
+    p: [5, 7]
+    k: [2]
+  model:
+    hidden_dim: [8]