From 4b18ed21ec1fff9686424b29cca879847b9f6009 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 19:50:39 -0700 Subject: [PATCH 01/18] feat: Experiment docker --- experiments/Dockerfile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 experiments/Dockerfile diff --git a/experiments/Dockerfile b/experiments/Dockerfile new file mode 100644 index 0000000..8e71f1c --- /dev/null +++ b/experiments/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:24.10 +MAINTAINER Leni Aniva + +# Install Python and Poetry +RUN apt update &&\ + apt install -y python3 curl &&\ + curl -sSL https://install.python-poetry.org | python3 - + +# Install Lean +RUN curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz &&\ + ./elan-init -y --default-toolchain none +ENV PATH="$PATH:~/.local/bin:~/.elan/bin" + +COPY .. ~/pantograph +WORKDIR ~/pantograph + +RUN poetry build &&\ + poetry install From c5aca8c67f7dab2519d4d427bfebab855f963757 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 21:31:06 -0700 Subject: [PATCH 02/18] fix: Dockerfile build directory --- experiments/Dockerfile => Dockerfile | 11 ++++++----- experiments/README.md | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) rename experiments/Dockerfile => Dockerfile (68%) create mode 100644 experiments/README.md diff --git a/experiments/Dockerfile b/Dockerfile similarity index 68% rename from experiments/Dockerfile rename to Dockerfile index 8e71f1c..0f32b65 100644 --- a/experiments/Dockerfile +++ b/Dockerfile @@ -1,18 +1,19 @@ FROM ubuntu:24.10 -MAINTAINER Leni Aniva # Install Python and Poetry RUN apt update &&\ - apt install -y python3 curl &&\ + apt install -y python3 curl git &&\ curl -sSL https://install.python-poetry.org | python3 - # Install Lean RUN curl -sSfL https://github.com/leanprover/elan/releases/download/v3.1.1/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz &&\ ./elan-init -y --default-toolchain none -ENV PATH="$PATH:~/.local/bin:~/.elan/bin" +ENV PATH="$PATH:/root/.local/bin:/root/.elan/bin" -COPY .. ~/pantograph -WORKDIR ~/pantograph +COPY . /root/pantograph +WORKDIR /root/pantograph RUN poetry build &&\ poetry install + +WORKDIR /data diff --git a/experiments/README.md b/experiments/README.md new file mode 100644 index 0000000..e7eebb5 --- /dev/null +++ b/experiments/README.md @@ -0,0 +1,17 @@ +# Experiments + +## Environment + +The experiments require +- Internet access +- At least 15G of disk space on the drive hosting this repository +- An [OpenAI API](https://openai.com/index/openai-api/) key + +## Building the container + +If you already have the docker image, you can skip this step. + +Execute, in the root directory of this repository, +``` sh +docker build . --tag pantograph +``` From 128e59fd32ffcb5c922686d90b7ce0909c43dfec Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 21:47:22 -0700 Subject: [PATCH 03/18] feat: Early eval script --- Dockerfile | 2 +- experiments/README.md | 9 +++++++++ experiments/dsp/early.sh | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100755 experiments/dsp/early.sh diff --git a/Dockerfile b/Dockerfile index 0f32b65..257eea1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,6 @@ COPY . /root/pantograph WORKDIR /root/pantograph RUN poetry build &&\ - poetry install + poetry install --with dev WORKDIR /data diff --git a/experiments/README.md b/experiments/README.md index e7eebb5..9b4678f 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -15,3 +15,12 @@ Execute, in the root directory of this repository, ``` sh docker build . --tag pantograph ``` + +## Experiments + +Execute in the project root directory +``` sh +experiments/dsp/early.sh +``` +the results will be written to `result/` + diff --git a/experiments/dsp/early.sh b/experiments/dsp/early.sh new file mode 100755 index 0000000..8fdd74b --- /dev/null +++ b/experiments/dsp/early.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python experiments/dsp/main.py eval --output /data/result/debug From 7eb1e0aed06d9b59a85c50514d758207f533b777 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 21:52:08 -0700 Subject: [PATCH 04/18] fix: Add dockerignore file --- .dockerignore | 5 +++++ .gitignore | 1 + 2 files changed, 6 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8674fac --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.github +dist +docs +examples +experiments diff --git a/.gitignore b/.gitignore index c4e9286..b231113 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .* !.gitignore !.github +!.dockerignore # Python *.py[cod] From 717620fbf01034c43d7f47e0c0c9efaff1e90a2d Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 21:58:27 -0700 Subject: [PATCH 05/18] fix: Set docker pwd to /root/pantograph --- Dockerfile | 4 +--- experiments/dsp/early.sh | 2 +- experiments/dsp/main.py | 11 ++++------- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 257eea1..1c8a651 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,4 @@ COPY . /root/pantograph WORKDIR /root/pantograph RUN poetry build &&\ - poetry install --with dev - -WORKDIR /data + poetry install diff --git a/experiments/dsp/early.sh b/experiments/dsp/early.sh index 8fdd74b..b73c824 100755 --- a/experiments/dsp/early.sh +++ b/experiments/dsp/early.sh @@ -5,4 +5,4 @@ docker run --rm -it \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ pantograph run \ - python experiments/dsp/main.py eval --output /data/result/debug + python /data/experiments/dsp/main.py eval --output /data/result/debug diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index 8437b85..57b3a90 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -329,11 +329,6 @@ def full_proof_search_dsp_lean( experiment_dir = Path(__file__).resolve().parent -def get_project_and_lean_path(): - cwd = experiment_dir / 'lean_src_proj' - p = subprocess.check_output(['lake', 'env', 'printenv', 'LEAN_PATH'], cwd=cwd) - return cwd, p - def load_data(args) -> list[Datum]: p = Path(args.dataset).expanduser() data = None @@ -363,13 +358,15 @@ def main(args): path_output = Path(args.output) path_output.mkdir(exist_ok=True, parents=True) + project_path = experiment_dir / 'lean_src_proj' + p = subprocess.check_output(['lake', 'build'], cwd=project_path) + print(p) + # Start server - project_path, lean_path = get_project_and_lean_path() def server_func(): return Server( imports=["Mathlib", "Aesop"], project_path=project_path, - lean_path=lean_path, core_options=DEFAULT_CORE_OPTIONS, ) From 5ff0f5a3597c6dd618b835bd9845295239425186 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 22:01:49 -0700 Subject: [PATCH 06/18] fix: Server failed to create output --- experiments/dsp/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index 57b3a90..c17ad64 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -273,7 +273,7 @@ def single_proof_search_dsp_lean( try: server = server_func() except Exception as e: - print(colored("Failed to create server: {e}", "red")) + print(colored(f"Failed to create server: {e}", "red")) return DatumResult( name=str(datum), error=str(e), From ade05a775fb31c5ac08083ef07d0db7e25ae5958 Mon Sep 17 00:00:00 2001 From: Chuyue Sun Date: Mon, 21 Oct 2024 01:07:40 -0500 Subject: [PATCH 07/18] wip docker fix --- Dockerfile | 2 +- poetry.lock | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1c8a651..a8c2b67 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,4 +14,4 @@ COPY . /root/pantograph WORKDIR /root/pantograph RUN poetry build &&\ - poetry install + poetry install --with dev diff --git a/poetry.lock b/poetry.lock index 2a7ad49..a80fb8c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4760,4 +4760,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "12935bf20e5ac4351734a47b1a71486daccbc0eed1239abe67858b3b17df8b37" +content-hash = "d3a0126fc7cba9e3d52c2fa538507198b186682917727591a9cfb23e55156fde" diff --git a/pyproject.toml b/pyproject.toml index ef55b3c..0a2b92f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ python = "^3.10" generate-setup-file = false script = "build.py" -[tool.poetry.group.dev.dependencies] +[tool.poetry.dev-dependencies] # Experiment related dependencies here to not clutter the main project dependencies. notebook = "^7.2.1" numpy = "^1.26.4" From 33ab3f6feb610544ac6a3c54c377b143471aca3e Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 23:17:59 -0700 Subject: [PATCH 08/18] Revert "wip docker fix" This reverts commit ade05a775fb31c5ac08083ef07d0db7e25ae5958. --- Dockerfile | 2 +- poetry.lock | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index a8c2b67..1c8a651 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,4 +14,4 @@ COPY . /root/pantograph WORKDIR /root/pantograph RUN poetry build &&\ - poetry install --with dev + poetry install diff --git a/poetry.lock b/poetry.lock index a80fb8c..2a7ad49 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4760,4 +4760,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d3a0126fc7cba9e3d52c2fa538507198b186682917727591a9cfb23e55156fde" +content-hash = "12935bf20e5ac4351734a47b1a71486daccbc0eed1239abe67858b3b17df8b37" diff --git a/pyproject.toml b/pyproject.toml index 0a2b92f..ef55b3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ python = "^3.10" generate-setup-file = false script = "build.py" -[tool.poetry.dev-dependencies] +[tool.poetry.group.dev.dependencies] # Experiment related dependencies here to not clutter the main project dependencies. notebook = "^7.2.1" numpy = "^1.26.4" From f6bf8990c62c23244a7e529a9a6e2e494ed38a55 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Sun, 20 Oct 2024 23:56:52 -0700 Subject: [PATCH 09/18] fix: Import dependencies in `LeanSrcProj` --- experiments/dsp/lean_src_proj/LeanSrcProj.lean | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experiments/dsp/lean_src_proj/LeanSrcProj.lean b/experiments/dsp/lean_src_proj/LeanSrcProj.lean index e99d3a6..399825c 100644 --- a/experiments/dsp/lean_src_proj/LeanSrcProj.lean +++ b/experiments/dsp/lean_src_proj/LeanSrcProj.lean @@ -1 +1,2 @@ -def hello := "world" \ No newline at end of file +import Mathlib +import Aesop From cecb4faf17b20df7b3dbcb1808970bbbf2a7ba37 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Mon, 21 Oct 2024 19:58:13 -0700 Subject: [PATCH 10/18] experiment: Early eval script --- experiments/README.md | 6 ++++++ experiments/dsp/main.py | 20 ++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/experiments/README.md b/experiments/README.md index 9b4678f..f53490b 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -18,6 +18,12 @@ docker build . --tag pantograph ## Experiments +### Early Evaluation + +Due to the nature of this project and how Lean's build system works, the first +pass needs to build the Mathlib library. This building of the library will take +about 30 minutes to run. + Execute in the project root directory ``` sh experiments/dsp/early.sh diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index c17ad64..4710da5 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -1,4 +1,5 @@ import sys, os, json, subprocess, time, datetime +from abc import abstractmethod from pathlib import Path from dataclasses import asdict from typing import Union, Any, Tuple, Optional @@ -39,6 +40,13 @@ def __init__(self): def __call__(self, *args, **kwards): pass + @abstractmethod + def sample_draft(self, prompt: str): + pass + @abstractmethod + def sample_sketch(self, prompt: str): + pass + class OpenAI_DSP_Engine(Engine): def __init__( self, @@ -88,6 +96,7 @@ def role_prompt(self) -> str: def sample_draft(self, prompt: str): extra = {} if self.model.startswith("o1") else dict( + seed=0, temperature=self.draft_sampling_params.temperature, top_p=self.draft_sampling_params.top_p, stop=self.draft_sampling_params.stop[:3], @@ -103,6 +112,7 @@ def sample_draft(self, prompt: str): ) def sample_sketch(self, prompt: str): extra = {} if self.model.startswith("o1") else dict( + seed=0, temperature=self.sketch_sampling_params.temperature, top_p=self.sketch_sampling_params.top_p, ) @@ -359,8 +369,14 @@ def main(args): path_output.mkdir(exist_ok=True, parents=True) project_path = experiment_dir / 'lean_src_proj' - p = subprocess.check_output(['lake', 'build'], cwd=project_path) - print(p) + + print("Building src project ...") + while True: + # Lean sometimes fails to build. Try until it succeeds. + completion = subprocess.run(['lake', 'build'], cwd=project_path, check=False) + if completion.returncode == 0: + break; + print("Built src project") # Start server def server_func(): From 4006156ac6c469b1b24740596928e892fa215671 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Mon, 21 Oct 2024 21:26:32 -0700 Subject: [PATCH 11/18] experiment: Add docker running scripts --- experiments/README.md | 39 ++++++++++++++++++++++++++++---- experiments/dsp/all-4o.sh | 24 ++++++++++++++++++++ experiments/dsp/all-o1.sh | 22 ++++++++++++++++++ experiments/dsp/pack-artefact.sh | 11 +++++++++ experiments/dsp/ref-plots.sh | 15 ++++++++++++ 5 files changed, 106 insertions(+), 5 deletions(-) create mode 100755 experiments/dsp/all-4o.sh create mode 100755 experiments/dsp/all-o1.sh create mode 100644 experiments/dsp/pack-artefact.sh create mode 100644 experiments/dsp/ref-plots.sh diff --git a/experiments/README.md b/experiments/README.md index f53490b..2d05e97 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -1,17 +1,22 @@ # Experiments +Unpack the `artefact.zip` file. Then cd into the unpacked directory and execute +the commands below. + ## Environment The experiments require - Internet access - At least 15G of disk space on the drive hosting this repository -- An [OpenAI API](https://openai.com/index/openai-api/) key - -## Building the container +- An [OpenAI API](https://openai.com/index/openai-api/) key, for invoking the models. -If you already have the docker image, you can skip this step. +## Building/Loading the container -Execute, in the root directory of this repository, +If you **have** the docker image: +``` sh +docker load --input pantograph +``` +(Optional) You can also build the docker image from scratch ``` sh docker build . --tag pantograph ``` @@ -30,3 +35,27 @@ experiments/dsp/early.sh ``` the results will be written to `result/` +### Plots for the paper + +To generate the plots for the paper, execute + +``` sh +bash experiments/dsp/ref-plots.sh +``` +which will output the plots in `result/ref-{o1,4o}/plot` based on data in `result/ref-*` + +### GPT-4o experiment + +``` sh +bash experiments/dsp/all-4o.sh +``` + +which will output the plots in `result/4o/plot` + +### o1-preview experiment + +``` sh +bash experiments/dsp/all-o1.sh +``` + +which will output the plots in `result/o1/plot` diff --git a/experiments/dsp/all-4o.sh b/experiments/dsp/all-4o.sh new file mode 100755 index 0000000..8d5b15b --- /dev/null +++ b/experiments/dsp/all-4o.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +main() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python /data/experiments/dsp/main.py eval $@ +} +plot() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python /data/experiments/dsp/plot.py $@ +} +BASE_DIR=/data/result/4o +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --format minif2f --output $BASE_DIR/valid1 +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --n-samples 3 --format minif2f --output $BASE_DIR/valid3 +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --format minif2f --output $BASE_DIR/test1 +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --n-samples 3 --format minif2f --output $BASE_DIR/test3 +plot --result $BASE_DIR/{valid1,valid3,test1,test3} --names valid1 valid3 test1 test3 --plot-output $BASE_DIR/plot diff --git a/experiments/dsp/all-o1.sh b/experiments/dsp/all-o1.sh new file mode 100755 index 0000000..7764359 --- /dev/null +++ b/experiments/dsp/all-o1.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +main() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python /data/experiments/dsp/main.py eval $@ +} +plot() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python /data/experiments/dsp/plot.py $@ +} +BASE_DIR=/data/result/o1 +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --format minif2f --model o1-preview --output $BASE_DIR/valid1 +main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --format minif2f --model o1-preview --output $BASE_DIR/test1 +plot --result $BASE_DIR/{valid1,test1} --names valid1 test1 --plot-output $BASE_DIR/plot diff --git a/experiments/dsp/pack-artefact.sh b/experiments/dsp/pack-artefact.sh new file mode 100644 index 0000000..3805d1f --- /dev/null +++ b/experiments/dsp/pack-artefact.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Packs the artefact +# Execute this at the project root directory to produce `artefact.zip` + +echo "Packing Git archive" +git archive --format zip --output artefact.zip HEAD +echo "Packing docker image" +docker image save pantograph > pantograph.tar +echo "Adding experimental results" +zip -ur artefact.zip result/ref-4o result/ref-o1 pantograph.tar diff --git a/experiments/dsp/ref-plots.sh b/experiments/dsp/ref-plots.sh new file mode 100644 index 0000000..8374650 --- /dev/null +++ b/experiments/dsp/ref-plots.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env sh + +plot() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + pantograph run \ + python /data/experiments/dsp/plot.py $@ +} + +BASE_DIR=/data/result/ref-4o +plot --result $BASE_DIR/{valid1,test1,valid3,test3} --names valid1 test1 valid3 test3 --plot-output $BASE_DIR/plot +BASE_DIR=/data/result/ref-o1 +plot --result $BASE_DIR/{valid1,test1} --names valid1 test1 --plot-output $BASE_DIR/plot From f82418c97e1eb3b8728cf4596d1d37aae7d8d70c Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Mon, 21 Oct 2024 21:50:32 -0700 Subject: [PATCH 12/18] doc: Artefact usage instructions --- .dockerignore | 2 ++ experiments/README.md | 22 ++++++++++++++++++---- experiments/dsp/all-4o.sh | 5 +++-- experiments/dsp/all-o1.sh | 6 ++++-- experiments/dsp/pack-artefact.sh | 6 +++--- experiments/dsp/ref-plots.sh | 3 ++- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/.dockerignore b/.dockerignore index 8674fac..7f40213 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,5 @@ dist docs examples experiments +result +*.tar diff --git a/experiments/README.md b/experiments/README.md index 2d05e97..3ad5336 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -10,19 +10,31 @@ The experiments require - At least 15G of disk space on the drive hosting this repository - An [OpenAI API](https://openai.com/index/openai-api/) key, for invoking the models. -## Building/Loading the container +Set the `OPENAI_API_KEY` environment variable to the API key. -If you **have** the docker image: +### Building/Loading the container + +There are 3 ways to load the container. Choose one. +- (Artefact reviewer) If you **have** the docker image: ``` sh docker load --input pantograph ``` -(Optional) You can also build the docker image from scratch +- Use the docker repository +``` sh +docker pull chrysoberyl/pantograph +``` +- Build the docker image from scratch ``` sh docker build . --tag pantograph ``` ## Experiments +The experiments are bound by limitations of OpenAI. Since OpenAI as a commercial +company cannot indefinitely store all snapshots of their models, the experiments +rely on OpenAI's provided version of `o1-preview` and `gpt-4o` models. This may +impact the reproducibility of the experiments in the future. + ### Early Evaluation Due to the nature of this project and how Lean's build system works, the first @@ -42,7 +54,9 @@ To generate the plots for the paper, execute ``` sh bash experiments/dsp/ref-plots.sh ``` -which will output the plots in `result/ref-{o1,4o}/plot` based on data in `result/ref-*` + +which will output the plots in `result/ref-{o1,4o}/plot` based on the provided +experiment result data in `result/ref-*` ### GPT-4o experiment diff --git a/experiments/dsp/all-4o.sh b/experiments/dsp/all-4o.sh index 8d5b15b..6b7c2c6 100755 --- a/experiments/dsp/all-4o.sh +++ b/experiments/dsp/all-4o.sh @@ -1,11 +1,12 @@ #!/bin/bash +IMAGE=chrysoberyl/pantograph main() { docker run --rm -it \ --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/main.py eval $@ } plot() { @@ -13,7 +14,7 @@ plot() { --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/plot.py $@ } BASE_DIR=/data/result/4o diff --git a/experiments/dsp/all-o1.sh b/experiments/dsp/all-o1.sh index 7764359..6165be7 100755 --- a/experiments/dsp/all-o1.sh +++ b/experiments/dsp/all-o1.sh @@ -1,11 +1,13 @@ #!/bin/bash +IMAGE=chrysoberyl/pantograph + main() { docker run --rm -it \ --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/main.py eval $@ } plot() { @@ -13,7 +15,7 @@ plot() { --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/plot.py $@ } BASE_DIR=/data/result/o1 diff --git a/experiments/dsp/pack-artefact.sh b/experiments/dsp/pack-artefact.sh index 3805d1f..afdeda4 100644 --- a/experiments/dsp/pack-artefact.sh +++ b/experiments/dsp/pack-artefact.sh @@ -5,7 +5,7 @@ echo "Packing Git archive" git archive --format zip --output artefact.zip HEAD -echo "Packing docker image" -docker image save pantograph > pantograph.tar +#echo "Packing docker image" +#docker image save pantograph > pantograph.tar echo "Adding experimental results" -zip -ur artefact.zip result/ref-4o result/ref-o1 pantograph.tar +zip -ur artefact.zip result/ref-4o/{valid1,valid3,test1,test3} result/ref-o1/{valid1,valid3} #pantograph.tar diff --git a/experiments/dsp/ref-plots.sh b/experiments/dsp/ref-plots.sh index 8374650..d17a1da 100644 --- a/experiments/dsp/ref-plots.sh +++ b/experiments/dsp/ref-plots.sh @@ -1,11 +1,12 @@ #!/usr/bin/env sh +IMAGE=chrysoberyl/pantograph plot() { docker run --rm -it \ --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/plot.py $@ } From 8566d30d1206d97ef5e3769d1ee8509aace537ff Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Mon, 21 Oct 2024 21:52:36 -0700 Subject: [PATCH 13/18] feat: Code harden --- experiments/dsp/main.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index 4710da5..db02bbb 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -288,12 +288,20 @@ def single_proof_search_dsp_lean( name=str(datum), error=str(e), ) - # -- Prove: y_fl = prove(eng, x_fl_prob, z_fl_pred_sketches) - prove_result = step_prove(eng, server, x_fl_prob, sketch) - results.append(prove_result) - if isinstance(prove_result, SearchResult) and prove_result.success: - success = True - break + try: + # -- Prove: y_fl = prove(eng, x_fl_prob, z_fl_pred_sketches) + prove_result = step_prove(eng, server, x_fl_prob, sketch) + results.append(prove_result) + if isinstance(prove_result, SearchResult) and prove_result.success: + success = True + break + except Exception as e: + print(colored(f"Search failed: {e}", "red")) + return DatumResult( + name=str(datum), + error=str(e), + ) + duration = time.time() - start_time return DatumResult( From bc5a361a08a8eb270910819fae663fb06b085dc6 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Mon, 21 Oct 2024 21:56:45 -0700 Subject: [PATCH 14/18] doc: Artefact packing instructions --- experiments/ARTEFACT.md | 28 ++++++++++++++++++++++++++++ experiments/dsp/pack-artefact.sh | 6 +++--- 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 experiments/ARTEFACT.md diff --git a/experiments/ARTEFACT.md b/experiments/ARTEFACT.md new file mode 100644 index 0000000..2622d7d --- /dev/null +++ b/experiments/ARTEFACT.md @@ -0,0 +1,28 @@ +# Artefact Packing + +This is about how to pack the artefacts described in `README.md`. + +## Execute the DSP experiments + +Run the DSP experiments from the project root, +```sh +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/valid.jsonl --format minif2f --output result/ref-4o/valid1 +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/valid.jsonl --format minif2f --n-samples 3 --output result/ref-4o/valid3 +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/test.jsonl --format minif2f --output result/ref-4o/test1 +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/test.jsonl --format minif2f --n-samples 3 --output result/ref-4o/test3 + +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/valid.jsonl --format minif2f --model o1-preview --output result/ref-o1/valid1 +python experiments/dsp/main.py eval \ + --dataset experiments/minif2f/test.jsonl --format minif2f --model o1-preview --output result/ref-o1/test1 +``` +Then, pack it with the script + +```sh +bash experiments/dsp/pack-artefact.sh +``` + diff --git a/experiments/dsp/pack-artefact.sh b/experiments/dsp/pack-artefact.sh index afdeda4..4ab53d3 100644 --- a/experiments/dsp/pack-artefact.sh +++ b/experiments/dsp/pack-artefact.sh @@ -5,7 +5,7 @@ echo "Packing Git archive" git archive --format zip --output artefact.zip HEAD -#echo "Packing docker image" -#docker image save pantograph > pantograph.tar +echo "Packing docker image" +docker image save chrysoberyl/pantograph > pantograph.tar echo "Adding experimental results" -zip -ur artefact.zip result/ref-4o/{valid1,valid3,test1,test3} result/ref-o1/{valid1,valid3} #pantograph.tar +zip -ur artefact.zip result/ref-4o/{valid1,valid3,test1,test3} result/ref-o1/{valid1,valid3} pantograph.tar From ecbf5386ea3ffcb46a50a4122a2cf8671aad1ae9 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Tue, 22 Oct 2024 10:05:09 -0700 Subject: [PATCH 15/18] refactor: Separate the all and plot scripts --- experiments/README.md | 2 ++ experiments/dsp/all-4o.sh | 11 +---------- experiments/dsp/all-o1.sh | 11 +---------- experiments/dsp/plot-4o.sh | 14 ++++++++++++++ experiments/dsp/plot-o1.sh | 14 ++++++++++++++ 5 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 experiments/dsp/plot-4o.sh create mode 100644 experiments/dsp/plot-o1.sh diff --git a/experiments/README.md b/experiments/README.md index 3ad5336..8f68353 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -62,6 +62,7 @@ experiment result data in `result/ref-*` ``` sh bash experiments/dsp/all-4o.sh +bash experiments/dsp/plot-4o.sh ``` which will output the plots in `result/4o/plot` @@ -70,6 +71,7 @@ which will output the plots in `result/4o/plot` ``` sh bash experiments/dsp/all-o1.sh +bash experiments/dsp/plot-o1.sh ``` which will output the plots in `result/o1/plot` diff --git a/experiments/dsp/all-4o.sh b/experiments/dsp/all-4o.sh index 6b7c2c6..93689ea 100755 --- a/experiments/dsp/all-4o.sh +++ b/experiments/dsp/all-4o.sh @@ -1,6 +1,7 @@ #!/bin/bash IMAGE=chrysoberyl/pantograph +BASE_DIR=/data/result/4o main() { docker run --rm -it \ --volume $PWD:/data \ @@ -9,17 +10,7 @@ main() { $IMAGE run \ python /data/experiments/dsp/main.py eval $@ } -plot() { - docker run --rm -it \ - --volume $PWD:/data \ - --entrypoint poetry \ - --env OPENAI_API_KEY=$OPENAI_API_KEY \ - $IMAGE run \ - python /data/experiments/dsp/plot.py $@ -} -BASE_DIR=/data/result/4o main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --format minif2f --output $BASE_DIR/valid1 main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --n-samples 3 --format minif2f --output $BASE_DIR/valid3 main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --format minif2f --output $BASE_DIR/test1 main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --n-samples 3 --format minif2f --output $BASE_DIR/test3 -plot --result $BASE_DIR/{valid1,valid3,test1,test3} --names valid1 valid3 test1 test3 --plot-output $BASE_DIR/plot diff --git a/experiments/dsp/all-o1.sh b/experiments/dsp/all-o1.sh index 6165be7..2c3c9af 100755 --- a/experiments/dsp/all-o1.sh +++ b/experiments/dsp/all-o1.sh @@ -1,6 +1,7 @@ #!/bin/bash IMAGE=chrysoberyl/pantograph +BASE_DIR=/data/result/o1 main() { docker run --rm -it \ @@ -10,15 +11,5 @@ main() { $IMAGE run \ python /data/experiments/dsp/main.py eval $@ } -plot() { - docker run --rm -it \ - --volume $PWD:/data \ - --entrypoint poetry \ - --env OPENAI_API_KEY=$OPENAI_API_KEY \ - $IMAGE run \ - python /data/experiments/dsp/plot.py $@ -} -BASE_DIR=/data/result/o1 main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/valid.jsonl --format minif2f --model o1-preview --output $BASE_DIR/valid1 main --dataset /data/experiments/dsp/ --dataset /data/experiments/minif2f/test.jsonl --format minif2f --model o1-preview --output $BASE_DIR/test1 -plot --result $BASE_DIR/{valid1,test1} --names valid1 test1 --plot-output $BASE_DIR/plot diff --git a/experiments/dsp/plot-4o.sh b/experiments/dsp/plot-4o.sh new file mode 100644 index 0000000..0ea3869 --- /dev/null +++ b/experiments/dsp/plot-4o.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +IMAGE=chrysoberyl/pantograph +BASE_DIR=/data/result/4o + +plot() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + $IMAGE run \ + python /data/experiments/dsp/plot.py $@ +} +plot --result $BASE_DIR/{valid1,valid3,test1,test3} --names valid1 valid3 test1 test3 --plot-output $BASE_DIR/plot diff --git a/experiments/dsp/plot-o1.sh b/experiments/dsp/plot-o1.sh new file mode 100644 index 0000000..212a562 --- /dev/null +++ b/experiments/dsp/plot-o1.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +IMAGE=chrysoberyl/pantograph +BASE_DIR=/data/result/o1 + +plot() { + docker run --rm -it \ + --volume $PWD:/data \ + --entrypoint poetry \ + --env OPENAI_API_KEY=$OPENAI_API_KEY \ + $IMAGE run \ + python /data/experiments/dsp/plot.py $@ +} +plot --result $BASE_DIR/{valid1,test1} --names valid1 test1 --plot-output $BASE_DIR/plot From 2691482eafae1693f02913548157a60ced8df24a Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Wed, 23 Oct 2024 00:47:20 -0700 Subject: [PATCH 16/18] fix: Image name --- experiments/README.md | 4 ++-- experiments/dsp/early.sh | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/experiments/README.md b/experiments/README.md index 8f68353..bc703fb 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -17,7 +17,7 @@ Set the `OPENAI_API_KEY` environment variable to the API key. There are 3 ways to load the container. Choose one. - (Artefact reviewer) If you **have** the docker image: ``` sh -docker load --input pantograph +docker load --input pantograph.tar ``` - Use the docker repository ``` sh @@ -25,7 +25,7 @@ docker pull chrysoberyl/pantograph ``` - Build the docker image from scratch ``` sh -docker build . --tag pantograph +docker build . --tag chrysoberyl/pantograph ``` ## Experiments diff --git a/experiments/dsp/early.sh b/experiments/dsp/early.sh index b73c824..0c87185 100755 --- a/experiments/dsp/early.sh +++ b/experiments/dsp/early.sh @@ -1,8 +1,10 @@ #!/bin/bash +IMAGE=chrysoberyl/pantograph + docker run --rm -it \ --volume $PWD:/data \ --entrypoint poetry \ --env OPENAI_API_KEY=$OPENAI_API_KEY \ - pantograph run \ + $IMAGE run \ python /data/experiments/dsp/main.py eval --output /data/result/debug From 8cc342560efd48c4959a171e6d04d79d2a9fd13a Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Wed, 23 Oct 2024 08:17:15 -0700 Subject: [PATCH 17/18] feat: Error handling in draft/sketch --- experiments/dsp/main.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index db02bbb..57407b6 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -265,14 +265,20 @@ def single_proof_search_dsp_lean( ) -> DatumResult: start_time = time.time() - # -- Draft: [y_nl_pred_draft]_n ~ draft(eng, x_nl_prob, P_draft) - y_nl_pred_drafts = step_draft(eng, datum) - - # -- Sketch: z_fl_pred_sketch ~ sketch(eng, x_nl_prob, [y_nl_pred_draft]_n, x_fl_prob, P_sketch) - z_fl_pred_sketches, x_fl_prob = step_sketch(eng, datum, y_nl_pred_drafts) + try: + # -- Draft: [y_nl_pred_draft]_n ~ draft(eng, x_nl_prob, P_draft) + y_nl_pred_drafts = step_draft(eng, datum) - assert len(z_fl_pred_sketches) == eng.sketch_sampling_params.n + # -- Sketch: z_fl_pred_sketch ~ sketch(eng, x_nl_prob, [y_nl_pred_draft]_n, x_fl_prob, P_sketch) + z_fl_pred_sketches, x_fl_prob = step_sketch(eng, datum, y_nl_pred_drafts) + assert len(z_fl_pred_sketches) == eng.sketch_sampling_params.n + except Exception as e: + print(colored(f"Failed to create sketch/draft: {e}", "red")) + return DatumResult( + name=str(datum), + error=str(e), + ) results = [] success = False From 62ce02622b5ebff1e223e6ea05981d1e77de577f Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Wed, 23 Oct 2024 23:20:12 -0700 Subject: [PATCH 18/18] fix: Artefact packing script --- .gitignore | 3 +++ experiments/README.md | 8 ++++++++ experiments/dsp/pack-artefact.sh | 2 +- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b231113..28c1f7f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,8 @@ *.egg-info # Output +*.zip +*.tar /dist /venv +/result diff --git a/experiments/README.md b/experiments/README.md index bc703fb..434a7f8 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -28,6 +28,9 @@ docker pull chrysoberyl/pantograph docker build . --tag chrysoberyl/pantograph ``` +The docker image's name **must be** `chrysoberyl/pantograph` for the shell +scripts below to work. + ## Experiments The experiments are bound by limitations of OpenAI. Since OpenAI as a commercial @@ -47,6 +50,11 @@ experiments/dsp/early.sh ``` the results will be written to `result/` +All experiments are cached, so if they crash halfway (which really should not +happen) you can continue to run them and they'll pick up from where they left +off. Hence you can delete the relevant folders in `result/` to force an +experiment to restart from the beginning. + ### Plots for the paper To generate the plots for the paper, execute diff --git a/experiments/dsp/pack-artefact.sh b/experiments/dsp/pack-artefact.sh index 4ab53d3..ac7db5c 100644 --- a/experiments/dsp/pack-artefact.sh +++ b/experiments/dsp/pack-artefact.sh @@ -4,7 +4,7 @@ # Execute this at the project root directory to produce `artefact.zip` echo "Packing Git archive" -git archive --format zip --output artefact.zip HEAD +git-archive-all artefact.zip echo "Packing docker image" docker image save chrysoberyl/pantograph > pantograph.tar echo "Adding experimental results" diff --git a/poetry.lock b/poetry.lock index 2a7ad49..20e0217 100644 --- a/poetry.lock +++ b/poetry.lock @@ -851,6 +851,17 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] +[[package]] +name = "git-archive-all" +version = "1.23.1" +description = "Archive git repository with its submodules." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.6" +files = [ + {file = "git-archive-all-1.23.1.tar.gz", hash = "sha256:a42fe0cedd2e0361250a4f3130f3d3543f640d4f1fe28530771df5972108729f"}, + {file = "git_archive_all-1.23.1-py2.py3-none-any.whl", hash = "sha256:d9f9611f2df629de3df1d6f134955ced4c704cbc0e423d769a7c0e55a8484242"}, +] + [[package]] name = "gitdb" version = "4.0.11" @@ -4760,4 +4771,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "12935bf20e5ac4351734a47b1a71486daccbc0eed1239abe67858b3b17df8b37" +content-hash = "1ede574e6f55d5ed07f6064fd08c745bc6f3b0d8a22d87a9f7ad7ef4418ccb47" diff --git a/pyproject.toml b/pyproject.toml index ef55b3c..84e3f50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ termcolor = "^2.4.0" matplotlib = "^3.9.2" seaborn = "^0.13.2" pandas = "^2.2.3" +git-archive-all = "^1.23.1" [tool.poetry.group.doc] optional = true