TileDB-Inc
diff --git a/‎examples/models/pytorch_tiledb_models_example.ipynb
Lines changed: 74 additions & 122 deletions b/‎examples/models/pytorch_tiledb_models_example.ipynb
Lines changed: 74 additions & 122 deletions
@@ -53,9 +53,7 @@
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "<torch._C.Generator at 0x15dae6650>"
-      ]
+      "text/plain": "<torch._C.Generator at 0x1218c0b70>"
      },
      "execution_count": 2,
      "metadata": {},
@@ -105,18 +103,11 @@
      ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1834a028d80340888143ba2c4d99a1b0",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/9912422 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%\n"
+     ]
     },
     {
      "name": "stdout",
@@ -129,18 +120,11 @@
      ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "55ca1ec83c0a4526ab2f201e1615c490",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/28881 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%\n"
+     ]
     },
     {
      "name": "stdout",
@@ -153,18 +137,11 @@
      ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2284fcddf2b243ad8989c7326457b9ed",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1648877 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%\n"
+     ]
     },
     {
      "name": "stdout",
@@ -177,18 +154,11 @@
      ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "86116abe57bb42c09ed7de5fbd982b5e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4542 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%"
+     ]
     },
     {
      "name": "stdout",
@@ -197,6 +167,13 @@
       "Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw\n",
       "\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
     }
    ],
    "source": [
@@ -283,7 +260,7 @@
     }
    },
    "source": [
-    "We continue with the training loop and we iterate over all training data once per epoch. Loading the individual batches\n",
+    "We continue with the training loop, and we iterate over all training data once per epoch. Loading the individual batches\n",
     "is handled by the DataLoader. We need to set the gradients to zero using optimizer.zero_grad() since PyTorch by default\n",
     "accumulates gradients. We then produce the output of the network (forward pass) and compute a negative log-likelihodd\n",
     "loss between the output and the ground truth label. The backward() call we now collect a new set of gradients which we\n",
@@ -299,6 +276,14 @@
     }
    },
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-12-07 17:00:23.979857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -388,8 +373,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can now save the trained model as a TileDB array. In case we want to train  the model further in a later time, we can also save\n",
-    "the optimizer in our TileDB array. In case we will use our model only for inference, we don't have to save the optimizer and we\n",
+    "We can now save the trained model as a TileDB array. In case we want to train the model further in a later time, we can also save\n",
+    "the optimizer in our TileDB array. In case we will use our model only for inference, we don't have to save the optimizer, and we\n",
     "only keep the model. We first declare a PytTorchTileDB object and initialize it with the corresponding TileDB uri, model and optimizer,\n",
     "and then save the model as a TileDB array. Finally, we can save any kind of metadata (in any structure, i.e., list, tuple or dictionary)\n",
     "by passing a dictionary to the meta attribute."
@@ -443,17 +428,20 @@
       " '../data/pytorch-mnist-1/__schema',\n",
       " '../data/pytorch-mnist-1/__fragments']\n",
       "Key: TILEDB_ML_MODEL_ML_FRAMEWORK, Value: PYTORCH\n",
-      "Key: TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION, Value: 1.10.2\n",
+      "Key: TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION, Value: 1.12.0\n",
       "Key: TILEDB_ML_MODEL_PREVIEW, Value: Net(\n",
       "  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))\n",
       "  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))\n",
       "  (conv2_drop): Dropout2d(p=0.5, inplace=False)\n",
       "  (fc1): Linear(in_features=320, out_features=50, bias=True)\n",
       "  (fc2): Linear(in_features=50, out_features=10, bias=True)\n",
       ")\n",
-      "Key: TILEDB_ML_MODEL_PYTHON_VERSION, Value: 3.9.13\n",
+      "Key: TILEDB_ML_MODEL_PYTHON_VERSION, Value: 3.9.9\n",
       "Key: TILEDB_ML_MODEL_STAGE, Value: STAGING\n",
       "Key: epochs, Value: 1\n",
+      "Key: model_state_dict_size, Value: 90053\n",
+      "Key: optimizer_state_dict_size, Value: 90064\n",
+      "Key: tensorboard_size, Value: 22674\n",
       "Key: train_loss, Value: (2.358812093734741, 2.285137891769409, 2.3066349029541016, 2.2708795070648193, 2.2367401123046875, 2.24334716796875, 2.1832549571990967, 2.1485116481781006, 2.1049115657806396, 2.0044069290161133, 1.8622523546218872, 1.8843708038330078, 1.7973158359527588, 1.6879109144210815, 1.508046269416809, 1.764279842376709, 1.4700727462768555, 1.3514467477798462, 1.2905819416046143, 1.0177571773529053, 1.042162299156189, 1.0987662076950073, 1.2285516262054443, 1.1495932340621948, 0.8452475070953369, 0.9741130471229553, 0.8569056987762451, 0.9234588146209717, 1.0218565464019775, 0.8069543242454529, 0.8789511919021606, 0.8185049891471863, 0.8055434226989746, 0.8231522440910339, 0.8543609976768494, 0.7746452689170837, 0.718348503112793, 0.5433375239372253, 0.7593768239021301, 0.65492182970047, 0.6999298930168152, 0.8053513765335083, 0.790733814239502, 0.7599329948425293, 0.540409505367279, 0.6412327885627747, 0.6593738198280334)\n"
      ]
     }
@@ -494,18 +482,21 @@
      "output_type": "stream",
      "text": [
       "Key: TILEDB_ML_MODEL_ML_FRAMEWORK, Value: PYTORCH\n",
-      "Key: TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION, Value: 1.10.2\n",
+      "Key: TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION, Value: 1.12.0\n",
       "Key: TILEDB_ML_MODEL_PREVIEW, Value: Net(\n",
       "  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))\n",
       "  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))\n",
       "  (conv2_drop): Dropout2d(p=0.5, inplace=False)\n",
       "  (fc1): Linear(in_features=320, out_features=50, bias=True)\n",
       "  (fc2): Linear(in_features=50, out_features=10, bias=True)\n",
       ")\n",
-      "Key: TILEDB_ML_MODEL_PYTHON_VERSION, Value: 3.9.13\n",
+      "Key: TILEDB_ML_MODEL_PYTHON_VERSION, Value: 3.9.9\n",
       "Key: TILEDB_ML_MODEL_STAGE, Value: STAGING\n",
       "Key: epochs, Value: 1\n",
+      "Key: model_state_dict_size, Value: 90053\n",
       "Key: new_meta, Value: [\"Any kind of info\"]\n",
+      "Key: optimizer_state_dict_size, Value: 90064\n",
+      "Key: tensorboard_size, Value: 22674\n",
       "Key: train_loss, Value: (2.358812093734741, 2.285137891769409, 2.3066349029541016, 2.2708795070648193, 2.2367401123046875, 2.24334716796875, 2.1832549571990967, 2.1485116481781006, 2.1049115657806396, 2.0044069290161133, 1.8622523546218872, 1.8843708038330078, 1.7973158359527588, 1.6879109144210815, 1.508046269416809, 1.764279842376709, 1.4700727462768555, 1.3514467477798462, 1.2905819416046143, 1.0177571773529053, 1.042162299156189, 1.0987662076950073, 1.2285516262054443, 1.1495932340621948, 0.8452475070953369, 0.9741130471229553, 0.8569056987762451, 0.9234588146209717, 1.0218565464019775, 0.8069543242454529, 0.8789511919021606, 0.8185049891471863, 0.8055434226989746, 0.8231522440910339, 0.8543609976768494, 0.7746452689170837, 0.718348503112793, 0.5433375239372253, 0.7593768239021301, 0.65492182970047, 0.6999298930168152, 0.8053513765335083, 0.790733814239502, 0.7599329948425293, 0.540409505367279, 0.6412327885627747, 0.6593738198280334)\n"
      ]
     }
@@ -524,49 +515,6 @@
     "    print(\"Key: {}, Value: {}\".format(key, value))"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For the case of PyTorch models, internally, we save model's state_dict and optimizer's state_dict,\n",
-    "as [variable sized attributes)](https://docs.tiledb.com/main/how-to/arrays/writing-arrays/var-length-attributes)\n",
-    "(pickled), i.e., we can open the TileDB and get only the state_dict of the model or optimizer,\n",
-    "without bringing the whole model in memory. For example, we can load model's and optimizer's state_dict\n",
-    "for model `pytorch-mnist-1` as follows."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Type: <class 'collections.OrderedDict'> , Keys: odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])\n",
-      "Type: <class 'dict'>, Keys: dict_keys(['state', 'param_groups'])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# First open arrays\n",
-    "model_array_1 = tiledb.open(uri)[:]\n",
-    "\n",
-    "# Load model state_dict\n",
-    "model_1_state_dict = pickle.loads(model_array_1['model_state_dict'].item(0))\n",
-    "\n",
-    "# Load optimizer state_dict\n",
-    "optimizer_1_state_dict = pickle.loads(model_array_1['optimizer_state_dict'].item(0))\n",
-    "\n",
-    "print(f'Type: {type(model_1_state_dict)} , Keys: {model_1_state_dict.keys()}')\n",
-    "print(f'Type: {type(optimizer_1_state_dict)}, Keys: {optimizer_1_state_dict.keys()}')"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -581,7 +529,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -592,10 +540,7 @@
     "# Place holder for the loaded model\n",
     "network = Net()\n",
     "optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)\n",
-    "\n",
-    "# Load returns possible extra attributes, other than model's and optimizer's state dicts. In case there were\n",
-    "# no extra attributes it will return an empty dict\n",
-    "_ = tiledb_model_1.load(model=network, optimizer=optimizer)"
+    "tiledb_model_1.load(model=network, optimizer=optimizer)"
    ]
   },
   {
@@ -607,7 +552,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -724,16 +669,16 @@
       "number of fragments: 2\n",
       "\n",
       "===== FRAGMENT NUMBER 0 =====\n",
-      "fragment uri: file:///Users/konstantinostsitsimpikos/tileroot/TileDB-ML/examples/data/pytorch-mnist-1/__fragments/__1660811273615_1660811273615_23699d36dbc744809486d88176c2920f_13\n",
-      "timestamp range: (1660811273615, 1660811273615)\n",
+      "fragment uri: file:///Users/george/PycharmProjects/TileDB-ML/examples/data/pytorch-mnist-1/__fragments/__1670425246498_1670425246498_5ca20757611a43009e22606647ee9b22_16\n",
+      "timestamp range: (1670425246498, 1670425246498)\n",
       "number of unconsolidated metadata: 2\n",
-      "version: 13\n",
+      "version: 16\n",
       "\n",
       "===== FRAGMENT NUMBER 1 =====\n",
-      "fragment uri: file:///Users/konstantinostsitsimpikos/tileroot/TileDB-ML/examples/data/pytorch-mnist-1/__fragments/__1660811314379_1660811314379_0309938da153404e88a7a64ff044fc20_13\n",
-      "timestamp range: (1660811314379, 1660811314379)\n",
+      "fragment uri: file:///Users/george/PycharmProjects/TileDB-ML/examples/data/pytorch-mnist-1/__fragments/__1670425278236_1670425278236_8e60255a3abe4173b21458369995c20c_16\n",
+      "timestamp range: (1670425278236, 1670425278236)\n",
       "number of unconsolidated metadata: 2\n",
-      "version: 13\n"
+      "version: 16\n"
      ]
     }
    ],
@@ -789,7 +734,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -826,7 +771,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -915,7 +860,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -924,11 +869,9 @@
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "'../data/tiledb-pytorch-mnist/pytorch-mnist-2'"
-      ]
+      "text/plain": "'../data/tiledb-pytorch-mnist/pytorch-mnist-2'"
      },
-     "execution_count": 15,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -949,13 +892,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "file:///Users/george/PycharmProjects/TileDB-ML/examples/data/tiledb-pytorch-mnist/pytorch-mnist-1 array\n",
+      "file:///Users/george/PycharmProjects/TileDB-ML/examples/data/tiledb-pytorch-mnist/pytorch-mnist-2 array\n"
+     ]
+    }
+   ],
    "source": [
     "tiledb.ls(group, lambda obj_path, obj_type: print(obj_path, obj_type))"
    ]