Merge pull request #23 from BirkhoffG/mnist

BirkhoffG · web-flow · commit fb1a92fc66fd · 2024-02-03T21:31:39.000-05:00
Create a benchmarking script
diff --git a/benchmarks/mnist.py b/benchmarks/mnist.py
@@ -0,0 +1,249 @@
+# Adapted from https://github.com/google/flax/blob/main/examples/mnist/train.py
+
+from torchvision.datasets import FashionMNIST
+from jax_dataloader.core import (
+    get_backend_compatibilities,
+    SUPPORTED_DATASETS,
+    JAXDataset,
+)
+from jax_dataloader.imports import *
+import jax_dataloader as jdl
+import optax
+import ml_collections
+from flax import linen as nn
+from flax.metrics import tensorboard
+from flax.training import train_state
+import time
+import rich
+import einops
+import os
+import json
+
+
+# https://github.com/huggingface/datasets/blob/main/benchmarks/benchmark_iterating.py
+RESULTS_BASEPATH, RESULTS_FILENAME = os.path.split(__file__)
+RESULTS_FILE_PATH = os.path.join(RESULTS_BASEPATH, "results", RESULTS_FILENAME.replace(".py", ".json"))
+
+
+class CNN(nn.Module):
+    """A simple CNN model."""
+
+    @nn.compact
+    def __call__(self, x: jnp.ndarray):
+        if x.ndim == 3:
+            x = einops.rearrange(x, "h w c -> h w c 1")
+        x = x / 255.0
+        x = nn.Conv(features=32, kernel_size=(3, 3))(x)
+        x = nn.relu(x)
+        x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+        x = nn.Conv(features=64, kernel_size=(3, 3))(x)
+        x = nn.relu(x)
+        x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+        x = x.reshape((x.shape[0], -1))  # flatten
+        x = nn.Dense(features=256)(x)
+        x = nn.relu(x)
+        x = nn.Dense(features=10)(x)
+        return x
+
+
+@jax.jit
+def apply_model(state, images, labels):
+    """Computes gradients, loss and accuracy for a single batch."""
+
+    def loss_fn(params):
+        logits = state.apply_fn({"params": params}, images)
+        one_hot = jax.nn.one_hot(labels, 10)
+        loss = jnp.mean(optax.softmax_cross_entropy(logits=logits, labels=one_hot))
+        return loss, logits
+
+    grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+    (loss, logits), grads = grad_fn(state.params)
+    accuracy = jnp.mean(jnp.argmax(logits, -1) == labels)
+    return grads, loss, accuracy
+
+
+@jax.jit
+def update_model(state, grads):
+    return state.apply_gradients(grads=grads)
+
+
+def get_img_labels(batch):
+    if isinstance(batch, tuple) or isinstance(batch, list):
+        # print(batch[0])
+        if isinstance(batch[0], dict):
+            imgs, labels = batch[0]["image"], batch[0]["label"]
+        else:
+            imgs, labels = batch
+    elif isinstance(batch, dict):
+        imgs, labels = batch["image"], batch["label"]
+    else:
+        raise ValueError(
+            f"Unknown batch type: {type(batch)}",
+        )
+    return imgs, labels
+
+
+def train_epoch(state, dataloader):
+    """Train for a single epoch."""
+
+    epoch_loss = []
+    epoch_accuracy = []
+
+    for batch in dataloader:
+        images, labels = get_img_labels(batch)
+        # print(images.shape, labels.shape)
+        grads, loss, accuracy = apply_model(state, images, labels)
+        state = update_model(state, grads)
+        epoch_loss.append(loss)
+        epoch_accuracy.append(accuracy)
+    train_loss = np.mean(epoch_loss)
+    train_accuracy = np.mean(epoch_accuracy)
+    return state, train_loss, train_accuracy
+
+
+def create_train_state(rng, config):
+    """Creates initial `TrainState`."""
+    cnn = CNN()
+    params = cnn.init(rng, jnp.ones([32, 28, 28]))["params"]
+    tx = optax.sgd(config.learning_rate, config.momentum)
+    return train_state.TrainState.create(apply_fn=cnn.apply, params=params, tx=tx)
+
+
+def train_and_evaluate(
+    config: ml_collections.ConfigDict, workdir: str
+) -> train_state.TrainState:
+    """Execute model training and evaluation loop.
+
+    Args:
+      config: Hyperparameter configuration for training and evaluation.
+      workdir: Directory where the tensorboard summaries are written to.
+
+    Returns:
+      The train state (which includes the `.params`).
+    """
+    train_ds, test_ds = get_datasets(config.dataset_type)
+    train_dl, test_dl = map(
+        lambda ds: jdl.DataLoader(
+            ds, backend=config.backend, batch_size=config.batch_size, shuffle=True
+        ),
+        (train_ds, test_ds),
+    )
+    rng = jax.random.key(0)
+    rng, init_rng = jax.random.split(rng)
+    state = create_train_state(init_rng, config)
+
+    runtime_per_epoch = []
+    for epoch in range(1, config.num_epochs + 1):
+        rng, input_rng = jax.random.split(rng)
+        start = time.time()
+        state, train_loss, train_accuracy = train_epoch(state, train_dl)
+        runtime_per_epoch.append(time.time() - start)
+
+        test_accuracy = []
+        for batch in test_dl:
+            images, labels = get_img_labels(batch)
+            logits = state.apply_fn({"params": state.params}, images)
+            accuracy = jnp.mean(jnp.argmax(logits, -1) == labels)
+            test_accuracy.append(accuracy)
+        
+        test_accuracy = np.mean(test_accuracy)
+
+    rich.print(f"Test accuracy: {test_accuracy:.3f}")
+    rich.print(f"Training batches: {len(train_dl)}")
+    return state, runtime_per_epoch
+
+
+def get_datasets(ds_type: Literal["jax", "torch", "tf", "hf"]):
+    """Returns train and test datasets."""
+
+    train_ds_torch = FashionMNIST(
+        "/tmp/mnist/",
+        download=True,
+        transform=lambda x: np.array(x, dtype=float),
+        train=True,
+    )
+    test_ds_torch = FashionMNIST(
+        "/tmp/mnist/",
+        download=True,
+        transform=lambda x: np.array(x, dtype=float),
+        train=False,
+    )
+
+    if ds_type == "jax":
+        train_ds = jdl.ArrayDataset(
+            train_ds_torch.data.numpy(), train_ds_torch.targets.numpy()
+        )
+        test_ds = jdl.ArrayDataset(
+            test_ds_torch.data.numpy(), test_ds_torch.targets.numpy()
+        )
+    elif ds_type == "torch":
+        train_ds, test_ds = train_ds_torch, test_ds_torch
+    elif ds_type == "hf":
+        ds = hf_datasets.load_dataset("fashion_mnist")
+        train_ds, test_ds = ds["train"], ds["test"]
+    elif ds_type == "tf":
+        train_ds = tfds.load("fashion_mnist", split="train")
+        test_ds = tfds.load("fashion_mnist", split="test")
+    else:
+        raise ValueError(f"Unknown dataset type: {ds_type}")
+    return train_ds, test_ds
+
+
+def get_config():
+    config = ml_collections.ConfigDict()
+
+    config.dataset_type = "jax"
+    config.backend = "jax"
+    config.batch_size = 128
+    config.num_epochs = 10
+    config.learning_rate = 0.1
+    config.momentum = 0.9
+    return config
+
+
+def main():
+    """Benchmark the training time for compatible backends and datasets."""
+
+    compat = get_backend_compatibilities()
+    type2ds_name = {
+        JAXDataset: "jax",
+        TorchDataset: "torch",
+        TFDataset: "tf",
+        HFDataset: "hf",
+    }
+    print('Downloading datasets...')
+    # download datasets
+    for ds_name in type2ds_name.values(): get_datasets(ds_name) 
+
+    runtime = {}
+    config = get_config()
+    for backend, ds in compat.items():
+        if len(ds) > 0:
+            _supported = [s in ds for s in SUPPORTED_DATASETS]
+            runtime["backend=" + backend] = {}
+            config.backend = backend
+
+            for i, ds_type in enumerate(SUPPORTED_DATASETS):
+                if _supported[i]:
+                    
+                    ds_name = type2ds_name[ds_type]
+                    config.dataset_type = ds_name
+                    print(f"backend={backend}, dataset={ds_name}:")
+                    train_state, runtime_per_epoch = train_and_evaluate(config, "/tmp/mnist")
+                    runtime["backend=" + backend]["dataset=" + ds_name] = runtime_per_epoch
+                    
+                    rich.print(f"Runtime per epoch: {np.mean(runtime_per_epoch[1:]): .3f} (std={np.std(runtime_per_epoch[1:]): .3f}).")
+                    del train_state
+                else:
+                    ds_name = type2ds_name[ds_type]
+                    runtime["backend=" + backend]["dataset=" + ds_name] = []
+                    print(f"backend={backend}, dataset={ds_name}: Not supported. Skipping.")
+    
+    with open(RESULTS_FILE_PATH, "wb") as f:
+        f.write(json.dumps(runtime).encode("utf-8"))
+    return runtime
+
+
+if __name__ == "__main__":
+    # main()
+    rich.print_json(data=main())
diff --git a/benchmarks/results/mnist.json b/benchmarks/results/mnist.json
@@ -0,0 +1 @@
+{"backend=jax": {"dataset=jax": [6.00615930557251, 0.7483389377593994, 0.743844747543335, 0.7461087703704834, 0.7428982257843018, 0.7401800155639648, 0.7440762519836426, 0.7452375888824463, 0.7448098659515381, 0.7438862323760986], "dataset=torch": [], "dataset=tf": [], "dataset=hf": [5.5362865924835205, 4.725424528121948, 4.779559373855591, 4.82304835319519, 4.8099517822265625, 4.83212685585022, 4.809313774108887, 4.818130016326904, 4.873654127120972, 4.943045377731323]}, "backend=pytorch": {"dataset=jax": [1.839327096939087, 0.966571569442749, 0.9141397476196289, 0.9267113208770752, 0.9212968349456787, 0.9556140899658203, 0.912912130355835, 0.9298040866851807, 1.024169921875, 0.9741010665893555], "dataset=torch": [3.6769654750823975, 2.9240589141845703, 2.9682703018188477, 2.9608328342437744, 2.9808106422424316, 2.9670567512512207, 3.0383379459381104, 3.0271716117858887, 2.952286720275879, 2.946077823638916], "dataset=tf": [], "dataset=hf": [6.0433268547058105, 5.160061359405518, 5.155879259109497, 5.418692350387573, 6.080808639526367, 5.252376079559326, 5.186870098114014, 5.237852096557617, 5.198176622390747, 5.24579381942749]}, "backend=tensorflow": {"dataset=jax": [1.793637752532959, 0.9664194583892822, 0.8171765804290771, 0.8145453929901123, 0.8179428577423096, 0.8119533061981201, 0.8084969520568848, 0.8056025505065918, 0.8245463371276855, 0.8152358531951904], "dataset=torch": [], "dataset=tf": [3.459198474884033, 1.1708533763885498, 1.228088617324829, 1.1557502746582031, 1.124732494354248, 1.1766808032989502, 1.119699239730835, 1.1329691410064697, 1.1301155090332031, 1.1265795230865479], "dataset=hf": [34.50724649429321, 34.40732932090759, 34.51625299453735, 33.26384210586548, 32.72089385986328, 32.59306216239929, 32.585314989089966, 32.482611656188965, 32.63717746734619, 32.016772747039795]}}
diff --git a/jax_dataloader/imports.py b/jax_dataloader/imports.py
@@ -26,10 +26,7 @@
     import torch.utils.data as torch_data
     import torch
 
-    TorchDataset = Annotated[
-        torch_data.Dataset,
-        Is[lambda _: torch_data is not None],
-    ]
+    TorchDataset = torch_data.Dataset
 except ModuleNotFoundError:
     torch_data = None
     torch = None
@@ -55,10 +52,11 @@
     import tensorflow as tf
     import tensorflow_datasets as tfds
 
-    TFDataset = Annotated[
-        tf.data.Dataset,
-        Is[lambda _: tf is not None],
-    ]
+    # TFDataset = Annotated[
+    #     tf.data.Dataset,
+    #     Is[lambda _: tf is not None],
+    # ]
+    TFDataset = tf.data.Dataset
 except ModuleNotFoundError:
     tf = None
     tfds = None
diff --git a/jax_dataloader/loaders/jax.py b/jax_dataloader/loaders/jax.py
@@ -82,7 +82,7 @@ def to_jax_dataset(dataset: JAXDataset):
 
 @dispatch
 def to_jax_dataset(dataset: HFDataset):
-    return dataset.with_format('jax')
+    return dataset.with_format('numpy')
 
 # %% ../../nbs/loader.jax.ipynb 8
 class DataLoaderJAX(BaseDataLoader):
diff --git a/jax_dataloader/loaders/torch.py b/jax_dataloader/loaders/torch.py
@@ -8,6 +8,7 @@
 from ..utils import check_pytorch_installed
 from ..tests import *
 from jax.tree_util import tree_map
+import warnings
 
 
 # %% auto 0
diff --git a/nbs/loader.jax.ipynb b/nbs/loader.jax.ipynb
@@ -152,7 +152,7 @@
     "\n",
     "@dispatch\n",
     "def to_jax_dataset(dataset: HFDataset):\n",
-    "    return dataset.with_format('jax')"
+    "    return dataset.with_format('numpy')"
    ]
   },
   {
diff --git a/nbs/loader.torch.ipynb b/nbs/loader.torch.ipynb
@@ -47,7 +47,8 @@
     "from jax_dataloader.datasets import Dataset, ArrayDataset, JAXDataset\n",
     "from jax_dataloader.utils import check_pytorch_installed\n",
     "from jax_dataloader.tests import *\n",
-    "from jax.tree_util import tree_map\n"
+    "from jax.tree_util import tree_map\n",
+    "import warnings\n"
    ]
   },
   {

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+{"backend=jax": {"dataset=jax": [6.00615930557251, 0.7483389377593994, 0.743844747543335, 0.7461087703704834, 0.7428982257843018, 0.7401800155639648, 0.7440762519836426, 0.7452375888824463, 0.7448098659515381, 0.7438862323760986], "dataset=torch": [], "dataset=tf": [], "dataset=hf": [5.5362865924835205, 4.725424528121948, 4.779559373855591, 4.82304835319519, 4.8099517822265625, 4.83212685585022, 4.809313774108887, 4.818130016326904, 4.873654127120972, 4.943045377731323]}, "backend=pytorch": {"dataset=jax": [1.839327096939087, 0.966571569442749, 0.9141397476196289, 0.9267113208770752, 0.9212968349456787, 0.9556140899658203, 0.912912130355835, 0.9298040866851807, 1.024169921875, 0.9741010665893555], "dataset=torch": [3.6769654750823975, 2.9240589141845703, 2.9682703018188477, 2.9608328342437744, 2.9808106422424316, 2.9670567512512207, 3.0383379459381104, 3.0271716117858887, 2.952286720275879, 2.946077823638916], "dataset=tf": [], "dataset=hf": [6.0433268547058105, 5.160061359405518, 5.155879259109497, 5.418692350387573, 6.080808639526367, 5.252376079559326, 5.186870098114014, 5.237852096557617, 5.198176622390747, 5.24579381942749]}, "backend=tensorflow": {"dataset=jax": [1.793637752532959, 0.9664194583892822, 0.8171765804290771, 0.8145453929901123, 0.8179428577423096, 0.8119533061981201, 0.8084969520568848, 0.8056025505065918, 0.8245463371276855, 0.8152358531951904], "dataset=torch": [], "dataset=tf": [3.459198474884033, 1.1708533763885498, 1.228088617324829, 1.1557502746582031, 1.124732494354248, 1.1766808032989502, 1.119699239730835, 1.1329691410064697, 1.1301155090332031, 1.1265795230865479], "dataset=hf": [34.50724649429321, 34.40732932090759, 34.51625299453735, 33.26384210586548, 32.72089385986328, 32.59306216239929, 32.585314989089966, 32.482611656188965, 32.63717746734619, 32.016772747039795]}}
Original file line number	Diff line number	Diff line change
`@@ -152,7 +152,7 @@`
`152`	`152`	`"\n",`
`153`	`153`	`"@dispatch\n",`
`154`	`154`	`"def to_jax_dataset(dataset: HFDataset):\n",`
`155`		`- " return dataset.with_format('jax')"`
	`155`	`+ " return dataset.with_format('numpy')"`
`156`	`156`	`]`
`157`	`157`	`},`
`158`	`158`	`{`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,8 @@`
`47`	`47`	`"from jax_dataloader.datasets import Dataset, ArrayDataset, JAXDataset\n",`
`48`	`48`	`"from jax_dataloader.utils import check_pytorch_installed\n",`
`49`	`49`	`"from jax_dataloader.tests import *\n",`
`50`		`- "from jax.tree_util import tree_map\n"`
	`50`	`+ "from jax.tree_util import tree_map\n",`
	`51`	`+ "import warnings\n"`
`51`	`52`	`]`
`52`	`53`	`},`
`53`	`54`	`{`