Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f1395dd
add install of gfortran in main readme + try out running Fran's code:…
ninamiolane Jan 13, 2026
ca421c0
Add comments to guide code generation
ninamiolane Jan 13, 2026
0f4d521
D3 running with one-hot
ninamiolane Jan 13, 2026
31d12d6
We have a staircase with D3 and specifying powers
ninamiolane Jan 13, 2026
eb5864e
try onehot with D3
ninamiolane Jan 13, 2026
66276ae
Ran onehot experiments
ninamiolane Jan 13, 2026
59bef75
Starting double checking
ninamiolane Jan 13, 2026
68f0499
bringing back main
ninamiolane Jan 13, 2026
71a6179
changing the order of composition
ninamiolane Jan 13, 2026
85de59e
Still a bias for higher dim
ninamiolane Jan 13, 2026
14f4c99
try lowering the lr
ninamiolane Jan 13, 2026
b3208b9
bring back docs in config.yaml
ninamiolane Jan 13, 2026
b09b293
rm leftover todos
ninamiolane Jan 13, 2026
534d958
Rm leftover comments
ninamiolane Jan 13, 2026
3039d8e
Config with adam to have a cleaner/slower jump
ninamiolane Jan 14, 2026
885f2da
combined plots scaling expts with d3
ninamiolane Feb 5, 2026
b3f9b04
Added 85 unit-tests to protect codebase
ninamiolane Feb 5, 2026
c5c2202
Remove leftover notebooks and svg
ninamiolane Feb 5, 2026
ff00c15
Refactor tests
ninamiolane Feb 5, 2026
a081eb4
Refactor notebooks
ninamiolane Feb 5, 2026
201cb1c
Add GI suite + lint the repo + precommit hook
ninamiolane Feb 6, 2026
fcdb9c1
Test main files
ninamiolane Feb 6, 2026
1f6bf2d
Both mains run with 2 epochs
ninamiolane Feb 6, 2026
f07a0af
lint repo
ninamiolane Feb 6, 2026
3ed21ca
accept notebooks changes
ninamiolane Feb 6, 2026
96f707c
ruff fix
ninamiolane Feb 6, 2026
08dee84
fxi imports
ninamiolane Feb 6, 2026
8d61311
ruff format
ninamiolane Feb 6, 2026
0a4b30a
add ci badge
ninamiolane Feb 6, 2026
ef7d9f8
fix install in ci.yml
ninamiolane Feb 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Continuous Integration workflow for group-agf
# Runs linting and unit tests on every pull request

name: CI

on:
pull_request:
branches: [main, master]
push:
branches: [main, master]

jobs:
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install ruff
run: pip install ruff

- name: Run ruff linter
run: ruff check . --exclude "*.ipynb"

- name: Run ruff formatter check
run: ruff format --check . --exclude "*.ipynb"

test:
name: Unit Tests
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install numpy scipy pytest escnn

- name: Install package
run: |
pip install -e .

- name: Run unit tests
run: |
pytest test/ -v --ignore=test/test_notebooks.py
env:
NOTEBOOK_TEST_MODE: "1"
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ cython_debug/
.abstra/

# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/

Expand Down
36 changes: 36 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Pre-commit hooks for group-agf repository
# Install: pre-commit install
# Run manually: pre-commit run --all-files

repos:
# General file hygiene
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
exclude: '\.ipynb$'
- id: end-of-file-fixer
exclude: '\.ipynb$'
- id: check-yaml
- id: check-added-large-files
args: ['--maxkb=1000']
- id: check-merge-conflict
- id: debug-statements

# Ruff - Python linting and formatting
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.0
hooks:
# Linter
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
exclude: '\.ipynb$'
# Formatter
- id: ruff-format
exclude: '\.ipynb$'

# Notebook cell output clearing (optional - uncomment if desired)
# - repo: https://github.com/kynan/nbstripout
# rev: 0.7.1
# hooks:
# - id: nbstripout
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# group-agf

[![CI](https://github.com/geometric-intelligence/group-agf/actions/workflows/ci.yml/badge.svg)](https://github.com/geometric-intelligence/group-agf/actions/workflows/ci.yml)

Group Alternating Gradient Flows

# Installing Dependencies

```
sudo apt install -y gfortran
conda env create -f conda.yaml
conda activate gagf
poetry install
Expand Down
13 changes: 7 additions & 6 deletions gagf/rnns/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ SGD with per-neuron learning rate scaling that exploits model homogeneity.
- Learning rate: typically 1.0 for SequentialMLP
- Exploits the property: scaling all parameters of neuron i by α scales output by α^degree

### Hybrid (`'hybrid'`)
### Hybrid (`'hybrid'`)
Combines per-neuron scaled SGD (for W_in, W_drive, W_out) with Adam (for W_mix).
- **Only for QuadraticRNN**
- Best for exploiting both MLP-like and recurrent structure
Expand All @@ -48,13 +48,14 @@ Automatically selects the recommended optimizer:
To train a model on modular addition tasks:
- **1D**: $(C_p)^k$ - Cyclic group of order $p$
- **2D**: $(C_{p1} \times C_{p2})^k$ - Product of two cyclic groups
- **Dihedral D3**: $D_3$ - Dihedral group of order 3

**Steps:**

1. Edit `gagf/rnns/config.yaml` to specify your experiment.

**Key configuration parameters:**

| Parameter | Options | Description |
|-----------|---------|-------------|
| `data.dimension` | `1` or `2` | Use 1D cyclic group or 2D product group |
Expand All @@ -69,7 +70,7 @@ To train a model on modular addition tasks:
| `training.learning_rate` | float | Base learning rate |

**Example configurations:**

```yaml
# 1D task with QuadraticRNN
data:
Expand All @@ -81,7 +82,7 @@ To train a model on modular addition tasks:
model_type: 'QuadraticRNN'
hidden_dim: 200
```

```yaml
# 2D task with SequentialMLP
data:
Expand Down Expand Up @@ -122,7 +123,7 @@ experiments:
overrides:
model:
hidden_dim: 32

- name: "hidden_dim_64"
overrides:
model:
Expand Down Expand Up @@ -180,4 +181,4 @@ sweeps/
└── seed_2/
```

See `gagf/rnns/sweeps/` for more example sweep configurations.
See `gagf/rnns/sweeps/` for more example sweep configurations.
91 changes: 53 additions & 38 deletions gagf/rnns/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,90 +15,103 @@
# Data Configuration
# ------------------
data:
# Dimension: 1 for C_p (cyclic group), 2 for C_p1 x C_p2 (product group)
dimension: 1 # 1 | 2
# Dimension: 1 for C_p (cyclic group), 2 for C_p1 x C_p2 (product group), 'D3' for Dihedral D3
dimension: D3 # 1 | 2 | 'D3'

# Group Parameters
# For dimension=1: only 'p' is used
# For dimension=2: 'p1' and 'p2' are used
# For dimension='D3': none of p, p1, p2 are used
p: 10 # Cyclic group dimension (1D only)

p1: 4 #10 # Height/rows dimension (2D only)
p2: 4 # Width/cols dimension (2D only)
k: 2 # Sequence length

k: 5 # Sequence length
batch_size: 1000
seed: 5

# Template Generation
template_type: 'fourier' # 'mnist' | 'fourier' | 'gaussian'
# For dimension=1,2: 'mnist' | 'fourier' | 'gaussian' | 'onehot'
# For dimension='D3': 'onehot' | 'custom_fourier'
template_type: onehot
mnist_label: 4 # MNIST digit (0-9), only if template_type='mnist'
n_freqs: 1 # Number of Fourier modes, only if template_type='fourier'


# D3 custom_fourier template: powers for each irrep's Fourier coefficient
# D3 has 3 irreps with dimensions [1, 1, 2], so powers should have 3 values
# Large ratio between powers = clearer staircase steps
powers:
- 0.0
- 2000.0
- 400.0

# Dataset Mode (offline training only)
mode: sampled # 'sampled' | 'exhaustive'
num_samples: 50000
mode: exhaustive # 'sampled' | 'exhaustive'
num_samples: 1000

# Model Configuration
# -------------------
model:
model_type: 'SequentialMLP' # 'QuadraticRNN' | 'SequentialMLP'
hidden_dim: 6 # Hidden layer size
model_type: SequentialMLP # 'QuadraticRNN' | 'SequentialMLP'

hidden_dim: 600 # Hidden layer size
# Note: SequentialMLP may need larger values (e.g., 600)
init_scale: 1.0e-2 # Weight initialization scale

init_scale: 4.5e-3 # Weight initialization scale
# Larger k may need larger init_scale

return_all_outputs: false # true = seq-to-seq guidance (for QuadraticRNN only)
# false = seq-to-one (final output only)
transform_type: 'quadratic' # 'quadratic' | 'multiplicative'
# false = seq-to-one (final output only)

transform_type: quadratic # 'quadratic' | 'multiplicative'
# Only used for QuadraticRNN

# Training Configuration
# ----------------------
training:
mode: 'online' # 'online' | 'offline'
mode: offline # 'online' | 'offline'

# Steps/Epochs
epochs: 200 # Used when mode='offline'
num_steps: 1000000 #500000 # Used when mode='online'
epochs: 2 #10000 # Used when mode='offline'
num_steps: 100 # Used when mode='online'

# Optimizer
optimizer: 'adam' #per_neuron' # 'auto' | 'adam' | 'hybrid' | 'per_neuron'
optimizer: adam # 'auto' | 'adam' | 'hybrid' | 'per_neuron'
# 'auto' selects optimizer based on model:
# - SequentialMLP → 'per_neuron' (recommended)
# - QuadraticRNN → 'adam'
# 'hybrid' is QuadraticRNN-specific only
learning_rate: 1.0e-3 # Base learning rate

learning_rate: 0.00008 # Base learning rate
# Recommended settings:
# - adam: 1e-3 to 1e-4
# - per_neuron (SequentialMLP): 1.0
# - per_neuron (SequentialMLP): 1.0 (or 0.01 for D3)
# - hybrid: see scaling_factor

betas: [0.9, 0.999] # Adam/hybrid beta parameters

betas:
- 0.9
- 0.999 # Adam/hybrid beta parameters
weight_decay: 0.0

# Homogeneity-based scaling parameters
scaling_factor: -3 # For 'hybrid' optimizer only (QuadraticRNN)
degree: null # For 'per_neuron' optimizer: degree of homogeneity
# If null (default), auto-inferred from model:
# - SequentialMLP: uses k+1 (k = sequence length)
# - Other models: defaults to 2

# Training Dynamics
grad_clip: 0.1
verbose_interval: 10
save_param_interval: null # Save params every N steps/epochs
verbose_interval: 1000
save_param_interval: 10 # Save params every N steps/epochs
# Set to null to only save initial & final (memory efficient for sweeps)

# Early Stopping (optional)
# -------------------------
# Stop training early when loss reduction reaches a threshold.
# Set to null to disable (train for full num_steps/epochs).
reduction_threshold: 0.99 # e.g., 0.99 = stop when 99% loss reduction achieved
reduction_threshold: null # e.g., 0.99 = stop when 99% loss reduction achieved
# null = disabled (train for full steps/epochs)

# Device
Expand All @@ -108,4 +121,6 @@ device: cuda:1 # 'cuda' | 'cpu'
# Analysis & Visualization
# ------------------------
analysis:
checkpoints: [0.0, 0.25, 0.5, 0.75, 1.0] # Fraction of training for analysis
checkpoints:
- 0.0
- 1.0 # Fraction of training for analysis
Loading