dmzio · dmzio · Oct 27, 2025 · Oct 27, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,38 @@
+name: CI
+
+run-name: "CI #${{ github.run_number }}"
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+  pull_request:
+
+jobs:
+  quality:
+    name: Lint and Test
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Ruff lint
+        run: ruff check src tests
+
+      - name: Ruff format check
+        run: ruff format --check src tests
+
+      - name: Run tests
+        run: pytest -q
diff --git a/Dockerfile b/Dockerfile
@@ -14,8 +14,15 @@ WORKDIR /workspace
 
 # Copy dependency lists first to leverage Docker layer caching
 COPY requirements.txt /workspace/requirements.txt
-# Install Python dependencies
-RUN uv pip install -r /workspace/requirements.txt
+
+# Create a virtual environment and install Python dependencies
+RUN uv venv /opt/venv \
+    && . /opt/venv/bin/activate \
+    && uv pip install -r /workspace/requirements.txt
+
+# Ensure the virtual environment is used by default
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
 
 # Copy the rest of the repository into the container
 COPY . /workspace
@@ -24,4 +31,4 @@ COPY . /workspace
 ENV PYTHONPATH=/workspace/src
 
 # Set entrypoint
-ENTRYPOINT ["bash"]
+ENTRYPOINT ["bash"]
diff --git a/Makefile b/Makefile
@@ -34,4 +34,4 @@ run:
 
 # Download the MolMole dataset into the data/ directory
 download:
-	docker compose run --rm research python -m molmole_research.downloader --dataset doxa-friend/MolMole_Patent300 --out data/images
+	docker compose run --rm research python -m molmole_research.downloader --dataset doxa-friend/MolMole_Patent300 --out data/images
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: '3.9'
-
 services:
   research:
     build: .
@@ -12,8 +10,8 @@ services:
       # Provide your OpenAI API key via environment variable.  When left unset
       # the extractor will attempt to read from ~/.config/openai or prompt the user.
       OPENAI_API_KEY: ${OPENAI_API_KEY:-}
-      # Optionally override the API base if using a self‑hosted endpoint
+      # Optionally override the API base if using a self-hosted endpoint
       OPENAI_API_BASE: ${OPENAI_API_BASE:-}
     tty: true
     stdin_open: true
-    command: ["bash"]
+    command: ["bash"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,4 +5,5 @@ exclude = [".venv", "build", "dist"]
 
 [tool.pytest.ini_options]
 addopts = "-ra --strict-markers"
-testpaths = ["tests"]
+testpaths = ["tests"]
+pythonpath = ["src"]
diff --git a/requirements.txt b/requirements.txt
@@ -5,12 +5,12 @@ huggingface_hub>=0.21.0
 pillow>=10.0.0
 typer[all]>=0.12.0
 tqdm>=4.66.0
-rdkit-pypi>=2023.9.5
+rdkit>=2023.9.5
 
 # YAML parsing for the runner
 pyyaml>=6.0
 
 # Development dependencies
 pytest>=7.4.0
 pytest-mock>=3.10.0
-ruff>=0.1.5
+ruff>=0.1.5
diff --git a/src/molmole_research/__init__.py b/src/molmole_research/__init__.py
@@ -20,4 +20,4 @@
     "runner",
 ]
 
-__version__ = "0.1.0"
+__version__ = "0.1.0"
diff --git a/src/molmole_research/downloader.py b/src/molmole_research/downloader.py
@@ -45,9 +45,7 @@ def download_dataset(
         "doxa-friend/MolMole_Patent300", help="HuggingFace dataset identifier"
     ),
     split: str = typer.Option("train", help="Which split to download (e.g., train/validation)"),
-    out: Path = typer.Option(
-        Path("data/images"), help="Output directory for images and labels"
-    ),
+    out: Path = typer.Option(Path("data/images"), help="Output directory for images and labels"),
 ) -> None:
     """Download the specified dataset and save it locally.
 
@@ -66,7 +64,7 @@ def download_dataset(
     except Exception as exc:  # pragma: no cover - network errors
         typer.echo(
             f"Failed to download dataset {dataset}. Please ensure you have access and"
-            " have run `huggingface-cli login` if required. Error: {exc}"
+            f" have run `huggingface-cli login` if required. Error: {exc}"
         )
         raise typer.Exit(1)
 
@@ -112,4 +110,4 @@ def download_dataset(
 
 
 if __name__ == "__main__":
-    app()
+    app()
diff --git a/src/molmole_research/evaluator.py b/src/molmole_research/evaluator.py
@@ -17,9 +17,10 @@
 
 import json
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict, List
 
 import typer
+
 try:
     # RDKit is used for canonical SMILES and InChI generation.  It may not be
     # available in all environments, so we make its import optional and
@@ -79,9 +80,7 @@ def evaluate(
     dataset_dir: Path = typer.Option(
         Path("data/images"), exists=True, help="Directory containing labels.json"
     ),
-    out: Path = typer.Option(
-        Path("results"), help="Directory where metrics will be saved"
-    ),
+    out: Path = typer.Option(Path("results"), help="Directory where metrics will be saved"),
 ) -> None:
     """Evaluate predictions against ground truth SMILES.
 
@@ -158,4 +157,4 @@ def evaluate(
 
 
 if __name__ == "__main__":
-    app()
+    app()
diff --git a/src/molmole_research/extractor.py b/src/molmole_research/extractor.py
@@ -18,12 +18,11 @@
 import base64
 import datetime as dt
 import json
-import os
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional
+from typing import List, Optional
 
 import typer
-from PIL import Image
+
 try:
     # Attempt to import the OpenAI client library.  This dependency is optional
     # because it may not be available in environments without network access or
@@ -129,12 +128,13 @@ def default_prompt() -> str:
 
 @app.command("run")
 def run_extraction(
-    model: str = typer.Option(
-        ..., "--model", help="Name of the vision model to use (e.g. gpt-4o)"
-    ),
+    model: str = typer.Option(..., "--model", help="Name of the vision model to use (e.g. gpt-4o)"),
     dataset_dir: Path = typer.Option(
-        Path("data/images/images"), exists=True, file_okay=False, dir_okay=True,
-        help="Directory containing images downloaded by the downloader"
+        Path("data/images/images"),
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+        help="Directory containing images downloaded by the downloader",
     ),
     out: Path = typer.Option(
         Path("results"), help="Directory where the JSONL results will be saved"
@@ -180,6 +180,7 @@ def run_extraction(
                     max_tokens=max_tokens,
                 )
             except Exception as exc:  # pragma: no cover - network errors
+                typer.echo(f"Failed to process {img_path.name}: {exc}")
                 text = ""
             record = {"file_name": img_path.name, "text": text}
             fh.write(json.dumps(record) + "\n")
@@ -188,4 +189,4 @@ def run_extraction(
 
 
 if __name__ == "__main__":
-    app()
+    app()
diff --git a/src/molmole_research/runner.py b/src/molmole_research/runner.py
@@ -57,9 +57,7 @@ def _find_latest_prediction(out_dir: Path, model_prefix: str) -> Optional[Path]:
 
 @app.command("run")
 def run_experiments(
-    config: Path = typer.Option(
-        None, exists=False, help="YAML file with experiment definitions"
-    ),
+    config: Path = typer.Option(None, exists=False, help="YAML file with experiment definitions"),
     dataset_dir: Path = typer.Option(
         Path("data/images/images"), exists=True, help="Directory of dataset images"
     ),
@@ -166,4 +164,4 @@ def run_experiments(
 
 
 if __name__ == "__main__":
-    app()
+    app()
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -3,4 +3,4 @@
 This file exists to make ``tests`` a Python package.  Having a package
 allows relative imports within the test suite and is required when using
 pytest with certain plugins.
-"""
+"""
diff --git a/tests/test_downloader.py b/tests/test_downloader.py
@@ -9,9 +9,7 @@
 from __future__ import annotations
 
 import json
-from pathlib import Path
 
-import pytest
 from PIL import Image
 import datasets
 
@@ -28,11 +26,13 @@ def _make_dummy_dataset(num_items: int = 2):
         images.append(img)
         smiles.append("C")  # simplest molecule
         file_names.append(f"item_{i}.png")
-    return datasets.Dataset.from_dict({
-        "image": images,
-        "smiles": smiles,
-        "file_name": file_names,
-    })
+    return datasets.Dataset.from_dict(
+        {
+            "image": images,
+            "smiles": smiles,
+            "file_name": file_names,
+        }
+    )
 
 
 def test_downloader_saves_images_and_labels(monkeypatch, tmp_path):
@@ -46,9 +46,7 @@ def fake_load_dataset(*args, **kwargs):  # noqa: D401
     monkeypatch.setattr(datasets, "load_dataset", fake_load_dataset)
 
     out_dir = tmp_path / "download"
-    download_dataset.callback(  # type: ignore[attr-defined]
-        dataset="dummy", split="train", out=out_dir
-    )
+    download_dataset(dataset="dummy", split="train", out=out_dir)
     # Check that images were saved
     images_dir = out_dir / "images"
     assert images_dir.exists() and images_dir.is_dir()
@@ -61,4 +59,4 @@ def fake_load_dataset(*args, **kwargs):  # noqa: D401
     labels = json.loads(labels_path.read_text())
     assert len(labels) == 3
     for entry in labels:
-        assert entry["smiles"] == "C"
+        assert entry["smiles"] == "C"
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
@@ -9,8 +9,6 @@
 import json
 from pathlib import Path
 
-import pytest
-
 from molmole_research import evaluator
 
 
@@ -47,7 +45,7 @@ def test_evaluator_accuracy(tmp_path):
     _write_predictions(pred_path, preds)
 
     results_dir = tmp_path / "results"
-    evaluator.evaluate.callback(pred=pred_path, dataset_dir=dataset_dir, out=results_dir)
+    evaluator.evaluate(pred=pred_path, dataset_dir=dataset_dir, out=results_dir)
 
     # Load metrics
     metrics_files = list(results_dir.iterdir())
@@ -58,4 +56,4 @@ def test_evaluator_accuracy(tmp_path):
     # Only the first prediction is correct
     assert metrics["correct_smiles"] == 1
     assert metrics["correct_inchi"] == 1
-    assert abs(metrics["accuracy_smiles"] - 0.5) < 1e-6
+    assert abs(metrics["accuracy_smiles"] - 0.5) < 1e-6
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
@@ -10,7 +10,6 @@
 import json
 from pathlib import Path
 
-import pytest
 from PIL import Image
 
 from molmole_research import extractor
@@ -35,8 +34,14 @@ def fake_call_openai_model(*args, **kwargs):  # noqa: D401
     monkeypatch.setattr(extractor, "call_openai_model", fake_call_openai_model)
 
     out_dir = tmp_path / "results"
-    extractor.run_extraction.callback(
-        model="gpt-4o-test", dataset_dir=dataset_dir, out=out_dir, api_base=None, api_key=None, temperature=0.0, max_tokens=32
+    extractor.run_extraction(
+        model="gpt-4o-test",
+        dataset_dir=dataset_dir,
+        out=out_dir,
+        api_base=None,
+        api_key=None,
+        temperature=0.0,
+        max_tokens=32,
     )
     # There should be exactly one JSONL file in out_dir
     files = list(out_dir.iterdir())
@@ -46,4 +51,4 @@ def fake_call_openai_model(*args, **kwargs):  # noqa: D401
     assert len(contents) == 2
     for line in contents:
         record = json.loads(line)
-        assert record["text"] == "C"
+        assert record["text"] == "C"