geometric-intelligence · ninamiolane · Feb 6, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,60 @@
+# Continuous Integration workflow for group-agf
+# Runs linting and unit tests on every pull request
+
+name: CI
+
+on:
+  pull_request:
+    branches: [main, master]
+  push:
+    branches: [main, master]
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install ruff
+        run: pip install ruff
+
+      - name: Run ruff linter
+        run: ruff check . --exclude "*.ipynb"
+
+      - name: Run ruff formatter check
+        run: ruff format --check . --exclude "*.ipynb"
+
+  test:
+    name: Unit Tests
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+          pip install numpy scipy pytest escnn
+
+      - name: Install package
+        run: |
+          pip install -e .
+
+      - name: Run unit tests
+        run: |
+          pytest test/ -v --ignore=test/test_notebooks.py
+        env:
+          NOTEBOOK_TEST_MODE: "1"
diff --git a/.gitignore b/.gitignore
@@ -199,9 +199,9 @@ cython_debug/
 .abstra/
 
 # Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
 #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
 #  you could uncomment the following to ignore the entire vscode folder
 # .vscode/
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,36 @@
+# Pre-commit hooks for group-agf repository
+# Install: pre-commit install
+# Run manually: pre-commit run --all-files
+
+repos:
+  # General file hygiene
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: '\.ipynb$'
+      - id: end-of-file-fixer
+        exclude: '\.ipynb$'
+      - id: check-yaml
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: check-merge-conflict
+      - id: debug-statements
+
+  # Ruff - Python linting and formatting
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.0
+    hooks:
+      # Linter
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+        exclude: '\.ipynb$'
+      # Formatter
+      - id: ruff-format
+        exclude: '\.ipynb$'
+
+  # Notebook cell output clearing (optional - uncomment if desired)
+  # - repo: https://github.com/kynan/nbstripout
+  #   rev: 0.7.1
+  #   hooks:
+  #     - id: nbstripout
diff --git a/README.md b/README.md
@@ -1,9 +1,13 @@
 # group-agf
+
+[![CI](https://github.com/geometric-intelligence/group-agf/actions/workflows/ci.yml/badge.svg)](https://github.com/geometric-intelligence/group-agf/actions/workflows/ci.yml)
+
 Group Alternating Gradient Flows
 
 # Installing Dependencies
 
 ```
+sudo apt install -y gfortran
 conda env create -f conda.yaml
 conda activate gagf
 poetry install

diff --git a/gagf/rnns/README.md b/gagf/rnns/README.md
@@ -31,7 +31,7 @@ SGD with per-neuron learning rate scaling that exploits model homogeneity.
 - Learning rate: typically 1.0 for SequentialMLP
 - Exploits the property: scaling all parameters of neuron i by α scales output by α^degree
 
-### Hybrid (`'hybrid'`)  
+### Hybrid (`'hybrid'`)
 Combines per-neuron scaled SGD (for W_in, W_drive, W_out) with Adam (for W_mix).
 - **Only for QuadraticRNN**
 - Best for exploiting both MLP-like and recurrent structure
@@ -48,13 +48,14 @@ Automatically selects the recommended optimizer:
 To train a model on modular addition tasks:
 - **1D**: $(C_p)^k$ - Cyclic group of order $p$
 - **2D**: $(C_{p1} \times C_{p2})^k$ - Product of two cyclic groups
+- **Dihedral D3**: $D_3$ - Dihedral group of order 3
 
 **Steps:**
 
 1. Edit `gagf/rnns/config.yaml` to specify your experiment.
 
    **Key configuration parameters:**
-   
+
    | Parameter | Options | Description |
    |-----------|---------|-------------|
    | `data.dimension` | `1` or `2` | Use 1D cyclic group or 2D product group |
@@ -69,7 +70,7 @@ To train a model on modular addition tasks:
    | `training.learning_rate` | float | Base learning rate |
 
    **Example configurations:**
-   
+
    ```yaml
    # 1D task with QuadraticRNN
    data:
@@ -81,7 +82,7 @@ To train a model on modular addition tasks:
      model_type: 'QuadraticRNN'
      hidden_dim: 200
    ```
-   
+
    ```yaml
    # 2D task with SequentialMLP
    data:
@@ -122,7 +123,7 @@ experiments:
     overrides:
       model:
         hidden_dim: 32
-  
+
   - name: "hidden_dim_64"
     overrides:
       model:
@@ -180,4 +181,4 @@ sweeps/
         └── seed_2/
 ```
 
-See `gagf/rnns/sweeps/` for more example sweep configurations.
+See `gagf/rnns/sweeps/` for more example sweep configurations.
diff --git a/gagf/rnns/config.yaml b/gagf/rnns/config.yaml
@@ -15,90 +15,103 @@
 # Data Configuration
 # ------------------
 data:
-  # Dimension: 1 for C_p (cyclic group), 2 for C_p1 x C_p2 (product group)
-  dimension: 1  # 1 | 2
-  
+  # Dimension: 1 for C_p (cyclic group), 2 for C_p1 x C_p2 (product group), 'D3' for Dihedral D3
+  dimension: D3  # 1 | 2 | 'D3'
+
   # Group Parameters
   # For dimension=1: only 'p' is used
   # For dimension=2: 'p1' and 'p2' are used
+  # For dimension='D3': none of p, p1, p2 are used
   p: 10        # Cyclic group dimension (1D only)
-  
+
   p1: 4 #10       # Height/rows dimension (2D only)
   p2: 4        # Width/cols dimension (2D only)
-  
-  k: 2          # Sequence length
+
+  k: 5          # Sequence length
   batch_size: 1000
   seed: 5
-  
+
   # Template Generation
-  template_type: 'fourier'    # 'mnist' | 'fourier' | 'gaussian'
+  # For dimension=1,2: 'mnist' | 'fourier' | 'gaussian' | 'onehot'
+  # For dimension='D3': 'onehot' | 'custom_fourier'
+  template_type: onehot
   mnist_label: 4            # MNIST digit (0-9), only if template_type='mnist'
   n_freqs: 1               # Number of Fourier modes, only if template_type='fourier'
-
+
+  # D3 custom_fourier template: powers for each irrep's Fourier coefficient
+  # D3 has 3 irreps with dimensions [1, 1, 2], so powers should have 3 values
+  # Large ratio between powers = clearer staircase steps
+  powers:
+  - 0.0
+  - 2000.0
+  - 400.0
+
   # Dataset Mode (offline training only)
-  mode: sampled             # 'sampled' | 'exhaustive'
-  num_samples: 50000
+  mode: exhaustive             # 'sampled' | 'exhaustive'
+  num_samples: 1000
 
 # Model Configuration
 # -------------------
 model:
-  model_type: 'SequentialMLP'  # 'QuadraticRNN' | 'SequentialMLP'
-  
-  hidden_dim: 6              # Hidden layer size
+  model_type: SequentialMLP  # 'QuadraticRNN' | 'SequentialMLP'
+
+  hidden_dim: 600              # Hidden layer size
                               # Note: SequentialMLP may need larger values (e.g., 600)
-  
-  init_scale: 1.0e-2          # Weight initialization scale
+
+  init_scale: 4.5e-3          # Weight initialization scale
                               # Larger k may need larger init_scale
-  
+
   return_all_outputs: false   # true = seq-to-seq guidance (for QuadraticRNN only)
-                              # false = seq-to-one (final output only) 
-  
-  transform_type: 'quadratic' # 'quadratic' | 'multiplicative'
+                              # false = seq-to-one (final output only)
+
+  transform_type: quadratic # 'quadratic' | 'multiplicative'
                               # Only used for QuadraticRNN
 
 # Training Configuration
 # ----------------------
 training:
-  mode: 'online'              # 'online' | 'offline'
-  
+  mode: offline              # 'online' | 'offline'
+
   # Steps/Epochs
-  epochs: 200                 # Used when mode='offline'
-  num_steps: 1000000 #500000           # Used when mode='online'
-  
+  epochs: 2 #10000                  # Used when mode='offline'
+  num_steps: 100                # Used when mode='online'
+
   # Optimizer
-  optimizer: 'adam' #per_neuron'           # 'auto' | 'adam' | 'hybrid' | 'per_neuron'
+  optimizer: adam       # 'auto' | 'adam' | 'hybrid' | 'per_neuron'
                               # 'auto' selects optimizer based on model:
                               #   - SequentialMLP → 'per_neuron' (recommended)
                               #   - QuadraticRNN → 'adam'
                               # 'hybrid' is QuadraticRNN-specific only
-  
-  learning_rate: 1.0e-3       # Base learning rate
+
+  learning_rate: 0.00008        # Base learning rate
                               # Recommended settings:
                               #   - adam: 1e-3 to 1e-4
-                              #   - per_neuron (SequentialMLP): 1.0
+                              #   - per_neuron (SequentialMLP): 1.0 (or 0.01 for D3)
                               #   - hybrid: see scaling_factor
-
-  betas: [0.9, 0.999]         # Adam/hybrid beta parameters
+
+  betas:
+  - 0.9
+  - 0.999         # Adam/hybrid beta parameters
   weight_decay: 0.0
-  
+
   # Homogeneity-based scaling parameters
   scaling_factor: -3          # For 'hybrid' optimizer only (QuadraticRNN)
   degree: null                # For 'per_neuron' optimizer: degree of homogeneity
                               # If null (default), auto-inferred from model:
                               #   - SequentialMLP: uses k+1 (k = sequence length)
                               #   - Other models: defaults to 2
-  
+
   # Training Dynamics
   grad_clip: 0.1
-  verbose_interval: 10
-  save_param_interval: null    # Save params every N steps/epochs
+  verbose_interval: 1000
+  save_param_interval: 10      # Save params every N steps/epochs
                               # Set to null to only save initial & final (memory efficient for sweeps)
-  
+
   # Early Stopping (optional)
   # -------------------------
   # Stop training early when loss reduction reaches a threshold.
   # Set to null to disable (train for full num_steps/epochs).
-  reduction_threshold: 0.99   # e.g., 0.99 = stop when 99% loss reduction achieved
+  reduction_threshold: null   # e.g., 0.99 = stop when 99% loss reduction achieved
                               # null = disabled (train for full steps/epochs)
 
 # Device
@@ -108,4 +121,6 @@ device: cuda:1                  # 'cuda' | 'cpu'
 # Analysis & Visualization
 # ------------------------
 analysis:
-  checkpoints: [0.0, 0.25, 0.5, 0.75, 1.0]  # Fraction of training for analysis
+  checkpoints:
+  - 0.0
+  - 1.0  # Fraction of training for analysis