From 260db9ec75c2b1c03b5ae9ec263ff541d20b90e0 Mon Sep 17 00:00:00 2001
From: Franz Srambical <79149449+emergenz@users.noreply.github.com>
Date: Mon, 28 Oct 2024 17:16:55 +0100
Subject: [PATCH 1/3] fix: use extension-module for pyo3 (#1)

works!
---
 environment/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment/Cargo.toml b/environment/Cargo.toml
index a09719f..b161e8d 100644
--- a/environment/Cargo.toml
+++ b/environment/Cargo.toml
@@ -25,4 +25,4 @@ smallvec = "1.10.0"
 
 [dependencies.pyo3]
 version = "0.20.2"
-features = ["abi3-py38"]
+features = ["extension-module"]

From abbb4bdcdb1176bcc929565f380c4b6915023cb6 Mon Sep 17 00:00:00 2001
From: Franz Srambical <franz.srambical@gmail.com>
Date: Wed, 30 Oct 2024 12:55:26 +0100
Subject: [PATCH 2/3] feat: add launchers + docs for distributed setup

---
 README.md                           | 20 ++++++++++++++---
 launch/run_bootstrap_distributed.sh |  9 ++++++++
 launch/start_redis.sh               | 33 +++++++++++++++++++++++++++++
 launch/start_worker.sh              |  7 ++++++
 redis.def                           |  2 ++
 5 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 launch/run_bootstrap_distributed.sh
 create mode 100644 launch/start_redis.sh
 create mode 100644 launch/start_worker.sh
 create mode 100644 redis.def

diff --git a/README.md b/README.md
index eff340f..219d00d 100644
--- a/README.md
+++ b/README.md
@@ -83,10 +83,24 @@ The entry point for the conjecture-prove loop is in [learning/bootstrap.py](boot
 [learning] $ python bootstrap.py theory=groups
 ```
 
-We use hydra for configuration -- the relevant file here is [config/bootstrap.yaml](config/bootstrap.yaml). This will run the loop in "sequential" mode, in a single process. There is a distributed mode, backed by a [https://docs.celeryq.dev/en/stable/](Celery queue), that you can use to leverage multiple CPUs/GPUs, either in the same or different machines (it doesn't matter, as long as they can connect to the queue). The setup is a bit manual: you must first spin up a Redis server, then run Celery worker processes backed by the Redis server, and finally run bootstrap.py with a DISTRIBUTED=1 environment variable:
+We use hydra for configuration -- the relevant file here is [config/bootstrap.yaml](config/bootstrap.yaml). This will run the loop in "sequential" mode, in a single process. There is a distributed mode, backed by a [https://docs.celeryq.dev/en/stable/](Celery queue), that you can use to leverage multiple CPUs/GPUs, either in the same or different machines (it doesn't matter, as long as they can connect to the queue).
 
-```sh
-[learning] $ DISTRIBUTED=1 python bootstrap.py theory=groups
+The setup is a bit manual:
+1. Build the redis container
+```
+apptainer build redis.sif redis.def
+```
+1. Start the redis container
+```
+sh launch/start_redis.sh
+```
+2. Run Celery worker process
+```
+sh launch/start_worker.sh
+```
+3. Run bootstrap.py in distributed mode
+```
+sh launch/run_bootstrap_distributed.sh
 ```
 
 Feel free to open an issue if you're interested in setting this up, and I can expand on the documentation. The details might get a little bit cluster-specific, though the general setup is just that you need (a) a Redis server, (b) a number of worker processes that connect to it, and (c) a teacher process that runs the bootstrapping loop, also connecting to the same Redis server.
diff --git a/launch/run_bootstrap_distributed.sh b/launch/run_bootstrap_distributed.sh
new file mode 100644
index 0000000..c2c582c
--- /dev/null
+++ b/launch/run_bootstrap_distributed.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# Read the Redis port
+export REDIS=$(cat redis_hostname_port.txt)
+export DISTRIBUTED=1
+
+# Run the bootstrap script
+# TODO(f.srambical): Make theory a command line argument
+cd learning/
+python bootstrap.py theory=groups
\ No newline at end of file
diff --git a/launch/start_redis.sh b/launch/start_redis.sh
new file mode 100644
index 0000000..9e8f76c
--- /dev/null
+++ b/launch/start_redis.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+LOGLEVEL=""
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -v|--verbose)
+            LOGLEVEL="--loglevel verbose"
+            shift
+            ;;
+    esac
+done
+
+# Get a random available port or use a specific one assigned by your cluster
+export REDIS_PORT=$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()')
+echo "Starting Redis on port $REDIS_PORT"
+
+# Save the port to a file for other processes to read
+echo "$(hostname):$REDIS_PORT" > redis_hostname_port.txt
+
+# Cleanup function to remove port file and kill Redis when the script exits
+cleanup() {
+    rm -f redis_hostname_port.txt
+    pkill -f "redis-server --port $REDIS_PORT"
+    exit
+}
+
+# Set up trap to catch script termination
+trap cleanup SIGINT SIGTERM
+
+# Start Redis container in the foreground
+# FIXME(f.srambical): `--save ""` is a quickfix and leads to redis not trying to persist data
+# We should instead fix data persistence in the redis container
+apptainer run --env REDIS_PORT=$REDIS_PORT redis.sif redis-server --port $REDIS_PORT --protected-mode no --bind 0.0.0.0 $LOGLEVEL
diff --git a/launch/start_worker.sh b/launch/start_worker.sh
new file mode 100644
index 0000000..296ffc8
--- /dev/null
+++ b/launch/start_worker.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Read the Redis port
+export REDIS=$(cat redis_hostname_port.txt)
+
+# Start a Celery worker
+cd learning/
+celery -A worker worker --concurrency=1 -n $REDIS
\ No newline at end of file
diff --git a/redis.def b/redis.def
new file mode 100644
index 0000000..0319c56
--- /dev/null
+++ b/redis.def
@@ -0,0 +1,2 @@
+Bootstrap: docker
+From: redis:7.2
\ No newline at end of file

From cee7561393ac5e85626dbded607570f5c6cf5b44 Mon Sep 17 00:00:00 2001
From: Franz Srambical <franz.srambical@gmail.com>
Date: Wed, 30 Oct 2024 12:59:01 +0100
Subject: [PATCH 3/3] fix: remove comment

---
 launch/start_redis.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/launch/start_redis.sh b/launch/start_redis.sh
index 9e8f76c..ab5f997 100644
--- a/launch/start_redis.sh
+++ b/launch/start_redis.sh
@@ -28,6 +28,4 @@ cleanup() {
 trap cleanup SIGINT SIGTERM
 
 # Start Redis container in the foreground
-# FIXME(f.srambical): `--save ""` is a quickfix and leads to redis not trying to persist data
-# We should instead fix data persistence in the redis container
 apptainer run --env REDIS_PORT=$REDIS_PORT redis.sif redis-server --port $REDIS_PORT --protected-mode no --bind 0.0.0.0 $LOGLEVEL