From de9a54120c528a2cb468a4811783455b02e4b065 Mon Sep 17 00:00:00 2001
From: Adam Tupper <adam.tupper@outlook.com>
Date: Fri, 8 Mar 2024 13:38:31 -0500
Subject: [PATCH 1/3] Fix matrix definitions.

---
 GPU_Workshop.ipynb | 93 +++++++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 35 deletions(-)
diff --git a/GPU_Workshop.ipynb b/GPU_Workshop.ipynb
index afc2204..9fe004b 100644
--- a/GPU_Workshop.ipynb
+++ b/GPU_Workshop.ipynb
@@ -51,7 +51,7 @@
    "source": [
     "# Example 1 - Solving Linear Systems\n",
     "\n",
-    "In this example, you will simultaneously solve N systems of equations of the type $Ax=b$ where $A$ is a $MxM$ matrix and $B$ is $Mx1$\n",
+    "In this example, you will simultaneously solve N systems of equations of the type $Ax=b$ where $A$ is a $M \\times M$ matrix and $B$ is $M \\times 1$\n",
     "\n",
     "You will be given two functions: one to generate random matrices and one to solve the systems of equations. You will use the magic function <code>%timeit</code> to compare the running times for these functions with different combinations of the following parameters:\n",
     "\n",
@@ -140,7 +140,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "a,b = generate_data(1,10000,10000,\"gpu\")"
+    "a, b = generate_data(1, 10000, 10000, \"gpu\")"
    ]
   },
   {
@@ -372,15 +372,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def generate_regression_data(function,n_samples):\n",
+    "def generate_regression_data(function, n_samples):\n",
     "\n",
-    "    samples = [(x,function(x) + np.random.normal(0,1) * 0.1) for x in np.sort(np.random.uniform(0,1,n_samples))]\n",
+    "    samples = [\n",
+    "        (x, function(x) + np.random.normal(0, 1) * 0.1)\n",
+    "        for x in np.sort(np.random.uniform(0, 1, n_samples))\n",
+    "    ]\n",
     "    X, y = zip(*samples)\n",
     "\n",
-    "    X = np.array(X,dtype=jnp.float32) #Change the numpy array's data type into a JAX data type\n",
-    "    X = np.c_[X,np.ones(X.shape[0])]\n",
-    "    y = np.array(y,dtype=jnp.float32)\n",
-    "    return X,y"
+    "    X = np.array(\n",
+    "        X, dtype=jnp.float32\n",
+    "    )  # Change the numpy array's data type into a JAX data type\n",
+    "    X = np.c_[X, np.ones(X.shape[0])]\n",
+    "    y = np.array(y, dtype=jnp.float32)\n",
+    "    return X, y"
    ]
   },
   {
@@ -397,9 +402,9 @@
    "outputs": [],
    "source": [
     "f = lambda x: 2 * x + 5\n",
-    "N_SAMPLES=50\n",
+    "N_SAMPLES = 50\n",
     "\n",
-    "X,y = generate_regression_data(f,N_SAMPLES)"
+    "X, y = generate_regression_data(f, N_SAMPLES)"
    ]
   },
   {
@@ -415,12 +420,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_target = np.linspace(0,1,50).reshape(-1,1)\n",
+    "X_target = np.linspace(0, 1, 50).reshape(-1, 1)\n",
     "\n",
-    "plt.scatter(X[:,0],y,label='Samples')\n",
-    "plt.plot(X_target,f(X_target),color='r',label='Target')\n",
+    "plt.scatter(X[:, 0], y, label=\"Samples\")\n",
+    "plt.plot(X_target, f(X_target), color=\"r\", label=\"Target\")\n",
     "\n",
-    "plt.legend(loc='lower right')"
+    "plt.legend(loc=\"lower right\")"
    ]
   },
   {
@@ -449,15 +454,18 @@
     "def model(beta, X):\n",
     "    return (X @ beta).T\n",
     "\n",
+    "\n",
     "def loss(beta, X, y):\n",
-    "    return jnp.mean((model(beta, X) - y)**2)\n",
+    "    return jnp.mean((model(beta, X) - y) ** 2)\n",
+    "\n",
     "\n",
     "@jax.jit\n",
     "def update(beta, X, y, learning_rate):\n",
     "    return beta - learning_rate * jax.grad(loss)(beta, X, y)\n",
     "\n",
+    "\n",
     "def fit_model(X, y, n_iterations, learning_rate=0.1):\n",
-    "    beta = jnp.zeros([X.shape[1],1])\n",
+    "    beta = jnp.zeros([X.shape[1], 1])\n",
     "    if y.ndim > 1:\n",
     "        y = y.reshape(-1)\n",
     "    for i in range(n_iterations):\n",
@@ -496,8 +504,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.scatter(X[:,0],y,label='Samples')\n",
-    "plt.plot(X_target, np.dot(np.c_[X_target, np.ones(X.shape[0])],beta_fit), color='g',label=\"Linear Fit\")\n",
+    "plt.scatter(X[:, 0], y, label=\"Samples\")\n",
+    "plt.plot(\n",
+    "    X_target,\n",
+    "    np.dot(np.c_[X_target, np.ones(X.shape[0])], beta_fit),\n",
+    "    color=\"g\",\n",
+    "    label=\"Linear Fit\",\n",
+    ")\n",
     "plt.legend(loc=\"lower right\")"
    ]
   },
@@ -585,21 +598,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def generate_data_multi_regression(n_samples,n_variables):\n",
+    "def generate_data_multi_regression(n_samples, n_variables):\n",
     "\n",
-    "    beta = np.random.randint(1,10,(n_variables + 1,1))\n",
+    "    beta = np.random.randint(1, 10, (n_variables + 1, 1))\n",
     "\n",
-    "    f = lambda x: np.dot(x,beta)\n",
+    "    f = lambda x: np.dot(x, beta)\n",
     "\n",
-    "    X = np.array(np.random.rand(n_samples,n_variables))\n",
-    "    X = np.c_[X,np.ones(X.shape[0])]\n",
+    "    X = np.array(np.random.rand(n_samples, n_variables))\n",
+    "    X = np.c_[X, np.ones(X.shape[0])]\n",
     "\n",
-    "    y = f(X) + np.random.normal(0,1,(n_samples,1))\n",
+    "    y = f(X) + np.random.normal(0, 1, (n_samples, 1))\n",
     "\n",
-    "    X = np.array(X,dtype=jnp.float32)\n",
-    "    y = np.array(y,dtype=jnp.float32)\n",
+    "    X = np.array(X, dtype=jnp.float32)\n",
+    "    y = np.array(y, dtype=jnp.float32)\n",
     "\n",
-    "    return X,y"
+    "    return X, y"
    ]
   },
   {
@@ -716,12 +729,15 @@
     "        x = self.fc3(x)\n",
     "        return x\n",
     "\n",
-    "net = Net().cuda() # Load model on the GPU\n",
     "\n",
-    "criterion = nn.CrossEntropyLoss().cuda() # Load the loss function on the GPU\n",
+    "net = Net().cuda()  # Load model on the GPU\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss().cuda()  # Load the loss function on the GPU\n",
     "optimizer = optim.SGD(net.parameters(), lr=0.001)\n",
     "\n",
-    "transform_train = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])"
+    "transform_train = transforms.Compose(\n",
+    "    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]\n",
+    ")"
    ]
   },
   {
@@ -749,10 +765,17 @@
     "NUM_WORKERS = 10\n",
     "PRE_FETCH = 2\n",
     "\n",
-    "datadir = os.environ[\"CIFAR10_PATH\"] #f\"{os.getenv('SLURM_TMPDIR')}/data\"\n",
-    "dataset_train = CIFAR10(root=datadir, train=True, download=False, transform=transform_train)\n",
-    "\n",
-    "train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, prefetch_factor=PRE_FETCH)"
+    "datadir = os.environ[\"CIFAR10_PATH\"]  # f\"{os.getenv('SLURM_TMPDIR')}/data\"\n",
+    "dataset_train = CIFAR10(\n",
+    "    root=datadir, train=True, download=False, transform=transform_train\n",
+    ")\n",
+    "\n",
+    "train_loader = DataLoader(\n",
+    "    dataset_train,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    num_workers=NUM_WORKERS,\n",
+    "    prefetch_factor=PRE_FETCH,\n",
+    ")"
    ]
   },
   {
@@ -833,7 +856,7 @@
     "torch.cuda.empty_cache()\n",
     "\n",
     "resnet = resnet152().cuda()\n",
-    "criterion = nn.CrossEntropyLoss().cuda() # Load the loss function on the GPU\n",
+    "criterion = nn.CrossEntropyLoss().cuda()  # Load the loss function on the GPU\n",
     "optimizer = optim.SGD(net.parameters(), lr=0.001)"
    ]
   },

From a9781a1c921b7c281e65531a3535960c4e51fba7 Mon Sep 17 00:00:00 2001
From: Adam Tupper <adam.tupper@outlook.com>
Date: Fri, 8 Mar 2024 14:12:13 -0500
Subject: [PATCH 2/3] Fix typos and matrix definitions.

---
 GPU_Workshop.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPU_Workshop.ipynb b/GPU_Workshop.ipynb
index 9fe004b..c515083 100644
--- a/GPU_Workshop.ipynb
+++ b/GPU_Workshop.ipynb
@@ -564,7 +564,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally, let's compare the execution time we obtained for the CPU with the execution time of the same function on the GPU. What can you say about the difference in performance as the number of samples gorw?"
+    "Finally, let's compare the execution time we obtained for the CPU with the execution time of the same function on the GPU. What can you say about the difference in performance as the number of samples grows?"
    ]
   },
   {
@@ -587,7 +587,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "So far you have solved a single-variable linear regression problem, that is, one where the matrix $X$ has dimensions $Nx2$ where $N$ is the number of samples and the second column of $X$ is filled with 1's. As it turns out, the one operation where you use $X$ in <code>fit_model</code> is a matrix multiplication, an operation that GPUs are able to massively parallelize. What do you think will happen if you add more columns to $X$? In other words, what will be the difference in performance between GPU an CPU in a **multivariate linear regression problem**?\n",
+    "So far you have solved a single-variable linear regression problem, that is, one where the matrix $X$ has dimensions $N \\times 2$ where $N$ is the number of samples and the second column of $X$ is filled with 1's. As it turns out, the one operation where you use $X$ in <code>fit_model</code> is a matrix multiplication, an operation that GPUs are able to massively parallelize. What do you think will happen if you add more columns to $X$? In other words, what will be the difference in performance between GPU an CPU in a **multivariate linear regression problem**?\n",
     "\n",
     "First you will use a slightly altered function to generate multivariate data (i.e, $X$ with more than 2 columns). This time, the coefficients <code>beta</code> of the linear function used to generate the data will be chosen randomly:"
    ]

From 466756d27891b5a5cefa4bfaee54a0713f0436b9 Mon Sep 17 00:00:00 2001
From: Adam Tupper <adam.tupper@outlook.com>
Date: Fri, 8 Mar 2024 14:12:13 -0500
Subject: [PATCH 3/3] Fix typos and matrix definitions.

---
 GPU_Workshop.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPU_Workshop.ipynb b/GPU_Workshop.ipynb
index 9fe004b..c515083 100644
--- a/GPU_Workshop.ipynb
+++ b/GPU_Workshop.ipynb
@@ -564,7 +564,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally, let's compare the execution time we obtained for the CPU with the execution time of the same function on the GPU. What can you say about the difference in performance as the number of samples gorw?"
+    "Finally, let's compare the execution time we obtained for the CPU with the execution time of the same function on the GPU. What can you say about the difference in performance as the number of samples grows?"
    ]
   },
   {
@@ -587,7 +587,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "So far you have solved a single-variable linear regression problem, that is, one where the matrix $X$ has dimensions $Nx2$ where $N$ is the number of samples and the second column of $X$ is filled with 1's. As it turns out, the one operation where you use $X$ in <code>fit_model</code> is a matrix multiplication, an operation that GPUs are able to massively parallelize. What do you think will happen if you add more columns to $X$? In other words, what will be the difference in performance between GPU an CPU in a **multivariate linear regression problem**?\n",
+    "So far you have solved a single-variable linear regression problem, that is, one where the matrix $X$ has dimensions $N \\times 2$ where $N$ is the number of samples and the second column of $X$ is filled with 1's. As it turns out, the one operation where you use $X$ in <code>fit_model</code> is a matrix multiplication, an operation that GPUs are able to massively parallelize. What do you think will happen if you add more columns to $X$? In other words, what will be the difference in performance between GPU an CPU in a **multivariate linear regression problem**?\n",
     "\n",
     "First you will use a slightly altered function to generate multivariate data (i.e, $X$ with more than 2 columns). This time, the coefficients <code>beta</code> of the linear function used to generate the data will be chosen randomly:"
    ]