diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..3a7e5af
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,13 @@
+version: "2"
+
+build:
+ os: "ubuntu-22.04"
+ tools:
+ python: "3.10"
+
+python:
+ install:
+ - requirements: docs/requirements.txt
+
+sphinx:
+ configuration: docs/source/conf.py
\ No newline at end of file
diff --git a/README.md b/README.md
index 65e2b22..3b10270 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,6 @@
-# intro-to-pytorch
+# Introduction to Pytorch
This repo is designed as a comprehensive starting point for those new to PyTorch and deep learning. It provides hands-on tutorials and examples to help you get acquainted with the core concepts and features of PyTorch, one of the most popular open-source machine learning libraries.
+
+**Documentation**: https://intro-to-pytorch.readthedocs.io/en/latest/index.html
+
+**Australian Research Environment (ARE)**: https://handson-with-gadi.readthedocs.io/en/latest/tutorial/login.html
diff --git a/data/pima-indians-diabetes.data.csv b/data/pima-indians-diabetes.data.csv
new file mode 100644
index 0000000..9c3e9b8
--- /dev/null
+++ b/data/pima-indians-diabetes.data.csv
@@ -0,0 +1,768 @@
+6,148,72,35,0,33.6,0.627,50,1
+1,85,66,29,0,26.6,0.351,31,0
+8,183,64,0,0,23.3,0.672,32,1
+1,89,66,23,94,28.1,0.167,21,0
+0,137,40,35,168,43.1,2.288,33,1
+5,116,74,0,0,25.6,0.201,30,0
+3,78,50,32,88,31.0,0.248,26,1
+10,115,0,0,0,35.3,0.134,29,0
+2,197,70,45,543,30.5,0.158,53,1
+8,125,96,0,0,0.0,0.232,54,1
+4,110,92,0,0,37.6,0.191,30,0
+10,168,74,0,0,38.0,0.537,34,1
+10,139,80,0,0,27.1,1.441,57,0
+1,189,60,23,846,30.1,0.398,59,1
+5,166,72,19,175,25.8,0.587,51,1
+7,100,0,0,0,30.0,0.484,32,1
+0,118,84,47,230,45.8,0.551,31,1
+7,107,74,0,0,29.6,0.254,31,1
+1,103,30,38,83,43.3,0.183,33,0
+1,115,70,30,96,34.6,0.529,32,1
+3,126,88,41,235,39.3,0.704,27,0
+8,99,84,0,0,35.4,0.388,50,0
+7,196,90,0,0,39.8,0.451,41,1
+9,119,80,35,0,29.0,0.263,29,1
+11,143,94,33,146,36.6,0.254,51,1
+10,125,70,26,115,31.1,0.205,41,1
+7,147,76,0,0,39.4,0.257,43,1
+1,97,66,15,140,23.2,0.487,22,0
+13,145,82,19,110,22.2,0.245,57,0
+5,117,92,0,0,34.1,0.337,38,0
+5,109,75,26,0,36.0,0.546,60,0
+3,158,76,36,245,31.6,0.851,28,1
+3,88,58,11,54,24.8,0.267,22,0
+6,92,92,0,0,19.9,0.188,28,0
+10,122,78,31,0,27.6,0.512,45,0
+4,103,60,33,192,24.0,0.966,33,0
+11,138,76,0,0,33.2,0.420,35,0
+9,102,76,37,0,32.9,0.665,46,1
+2,90,68,42,0,38.2,0.503,27,1
+4,111,72,47,207,37.1,1.390,56,1
+3,180,64,25,70,34.0,0.271,26,0
+7,133,84,0,0,40.2,0.696,37,0
+7,106,92,18,0,22.7,0.235,48,0
+9,171,110,24,240,45.4,0.721,54,1
+7,159,64,0,0,27.4,0.294,40,0
+0,180,66,39,0,42.0,1.893,25,1
+1,146,56,0,0,29.7,0.564,29,0
+2,71,70,27,0,28.0,0.586,22,0
+7,103,66,32,0,39.1,0.344,31,1
+7,105,0,0,0,0.0,0.305,24,0
+1,103,80,11,82,19.4,0.491,22,0
+1,101,50,15,36,24.2,0.526,26,0
+5,88,66,21,23,24.4,0.342,30,0
+8,176,90,34,300,33.7,0.467,58,1
+7,150,66,42,342,34.7,0.718,42,0
+1,73,50,10,0,23.0,0.248,21,0
+7,187,68,39,304,37.7,0.254,41,1
+0,100,88,60,110,46.8,0.962,31,0
+0,146,82,0,0,40.5,1.781,44,0
+0,105,64,41,142,41.5,0.173,22,0
+2,84,0,0,0,0.0,0.304,21,0
+8,133,72,0,0,32.9,0.270,39,1
+5,44,62,0,0,25.0,0.587,36,0
+2,141,58,34,128,25.4,0.699,24,0
+7,114,66,0,0,32.8,0.258,42,1
+5,99,74,27,0,29.0,0.203,32,0
+0,109,88,30,0,32.5,0.855,38,1
+2,109,92,0,0,42.7,0.845,54,0
+1,95,66,13,38,19.6,0.334,25,0
+4,146,85,27,100,28.9,0.189,27,0
+2,100,66,20,90,32.9,0.867,28,1
+5,139,64,35,140,28.6,0.411,26,0
+13,126,90,0,0,43.4,0.583,42,1
+4,129,86,20,270,35.1,0.231,23,0
+1,79,75,30,0,32.0,0.396,22,0
+1,0,48,20,0,24.7,0.140,22,0
+7,62,78,0,0,32.6,0.391,41,0
+5,95,72,33,0,37.7,0.370,27,0
+0,131,0,0,0,43.2,0.270,26,1
+2,112,66,22,0,25.0,0.307,24,0
+3,113,44,13,0,22.4,0.140,22,0
+2,74,0,0,0,0.0,0.102,22,0
+7,83,78,26,71,29.3,0.767,36,0
+0,101,65,28,0,24.6,0.237,22,0
+5,137,108,0,0,48.8,0.227,37,1
+2,110,74,29,125,32.4,0.698,27,0
+13,106,72,54,0,36.6,0.178,45,0
+2,100,68,25,71,38.5,0.324,26,0
+15,136,70,32,110,37.1,0.153,43,1
+1,107,68,19,0,26.5,0.165,24,0
+1,80,55,0,0,19.1,0.258,21,0
+4,123,80,15,176,32.0,0.443,34,0
+7,81,78,40,48,46.7,0.261,42,0
+4,134,72,0,0,23.8,0.277,60,1
+2,142,82,18,64,24.7,0.761,21,0
+6,144,72,27,228,33.9,0.255,40,0
+2,92,62,28,0,31.6,0.130,24,0
+1,71,48,18,76,20.4,0.323,22,0
+6,93,50,30,64,28.7,0.356,23,0
+1,122,90,51,220,49.7,0.325,31,1
+1,163,72,0,0,39.0,1.222,33,1
+1,151,60,0,0,26.1,0.179,22,0
+0,125,96,0,0,22.5,0.262,21,0
+1,81,72,18,40,26.6,0.283,24,0
+2,85,65,0,0,39.6,0.930,27,0
+1,126,56,29,152,28.7,0.801,21,0
+1,96,122,0,0,22.4,0.207,27,0
+4,144,58,28,140,29.5,0.287,37,0
+3,83,58,31,18,34.3,0.336,25,0
+0,95,85,25,36,37.4,0.247,24,1
+3,171,72,33,135,33.3,0.199,24,1
+8,155,62,26,495,34.0,0.543,46,1
+1,89,76,34,37,31.2,0.192,23,0
+4,76,62,0,0,34.0,0.391,25,0
+7,160,54,32,175,30.5,0.588,39,1
+4,146,92,0,0,31.2,0.539,61,1
+5,124,74,0,0,34.0,0.220,38,1
+5,78,48,0,0,33.7,0.654,25,0
+4,97,60,23,0,28.2,0.443,22,0
+4,99,76,15,51,23.2,0.223,21,0
+0,162,76,56,100,53.2,0.759,25,1
+6,111,64,39,0,34.2,0.260,24,0
+2,107,74,30,100,33.6,0.404,23,0
+5,132,80,0,0,26.8,0.186,69,0
+0,113,76,0,0,33.3,0.278,23,1
+1,88,30,42,99,55.0,0.496,26,1
+3,120,70,30,135,42.9,0.452,30,0
+1,118,58,36,94,33.3,0.261,23,0
+1,117,88,24,145,34.5,0.403,40,1
+0,105,84,0,0,27.9,0.741,62,1
+4,173,70,14,168,29.7,0.361,33,1
+9,122,56,0,0,33.3,1.114,33,1
+3,170,64,37,225,34.5,0.356,30,1
+8,84,74,31,0,38.3,0.457,39,0
+2,96,68,13,49,21.1,0.647,26,0
+2,125,60,20,140,33.8,0.088,31,0
+0,100,70,26,50,30.8,0.597,21,0
+0,93,60,25,92,28.7,0.532,22,0
+0,129,80,0,0,31.2,0.703,29,0
+5,105,72,29,325,36.9,0.159,28,0
+3,128,78,0,0,21.1,0.268,55,0
+5,106,82,30,0,39.5,0.286,38,0
+2,108,52,26,63,32.5,0.318,22,0
+10,108,66,0,0,32.4,0.272,42,1
+4,154,62,31,284,32.8,0.237,23,0
+0,102,75,23,0,0.0,0.572,21,0
+9,57,80,37,0,32.8,0.096,41,0
+2,106,64,35,119,30.5,1.400,34,0
+5,147,78,0,0,33.7,0.218,65,0
+2,90,70,17,0,27.3,0.085,22,0
+1,136,74,50,204,37.4,0.399,24,0
+4,114,65,0,0,21.9,0.432,37,0
+9,156,86,28,155,34.3,1.189,42,1
+1,153,82,42,485,40.6,0.687,23,0
+8,188,78,0,0,47.9,0.137,43,1
+7,152,88,44,0,50.0,0.337,36,1
+2,99,52,15,94,24.6,0.637,21,0
+1,109,56,21,135,25.2,0.833,23,0
+2,88,74,19,53,29.0,0.229,22,0
+17,163,72,41,114,40.9,0.817,47,1
+4,151,90,38,0,29.7,0.294,36,0
+7,102,74,40,105,37.2,0.204,45,0
+0,114,80,34,285,44.2,0.167,27,0
+2,100,64,23,0,29.7,0.368,21,0
+0,131,88,0,0,31.6,0.743,32,1
+6,104,74,18,156,29.9,0.722,41,1
+3,148,66,25,0,32.5,0.256,22,0
+4,120,68,0,0,29.6,0.709,34,0
+4,110,66,0,0,31.9,0.471,29,0
+3,111,90,12,78,28.4,0.495,29,0
+6,102,82,0,0,30.8,0.180,36,1
+6,134,70,23,130,35.4,0.542,29,1
+2,87,0,23,0,28.9,0.773,25,0
+1,79,60,42,48,43.5,0.678,23,0
+2,75,64,24,55,29.7,0.370,33,0
+8,179,72,42,130,32.7,0.719,36,1
+6,85,78,0,0,31.2,0.382,42,0
+0,129,110,46,130,67.1,0.319,26,1
+5,143,78,0,0,45.0,0.190,47,0
+5,130,82,0,0,39.1,0.956,37,1
+6,87,80,0,0,23.2,0.084,32,0
+0,119,64,18,92,34.9,0.725,23,0
+1,0,74,20,23,27.7,0.299,21,0
+5,73,60,0,0,26.8,0.268,27,0
+4,141,74,0,0,27.6,0.244,40,0
+7,194,68,28,0,35.9,0.745,41,1
+8,181,68,36,495,30.1,0.615,60,1
+1,128,98,41,58,32.0,1.321,33,1
+8,109,76,39,114,27.9,0.640,31,1
+5,139,80,35,160,31.6,0.361,25,1
+3,111,62,0,0,22.6,0.142,21,0
+9,123,70,44,94,33.1,0.374,40,0
+7,159,66,0,0,30.4,0.383,36,1
+11,135,0,0,0,52.3,0.578,40,1
+8,85,55,20,0,24.4,0.136,42,0
+5,158,84,41,210,39.4,0.395,29,1
+1,105,58,0,0,24.3,0.187,21,0
+3,107,62,13,48,22.9,0.678,23,1
+4,109,64,44,99,34.8,0.905,26,1
+4,148,60,27,318,30.9,0.150,29,1
+0,113,80,16,0,31.0,0.874,21,0
+1,138,82,0,0,40.1,0.236,28,0
+0,108,68,20,0,27.3,0.787,32,0
+2,99,70,16,44,20.4,0.235,27,0
+6,103,72,32,190,37.7,0.324,55,0
+5,111,72,28,0,23.9,0.407,27,0
+8,196,76,29,280,37.5,0.605,57,1
+5,162,104,0,0,37.7,0.151,52,1
+1,96,64,27,87,33.2,0.289,21,0
+7,184,84,33,0,35.5,0.355,41,1
+2,81,60,22,0,27.7,0.290,25,0
+0,147,85,54,0,42.8,0.375,24,0
+7,179,95,31,0,34.2,0.164,60,0
+0,140,65,26,130,42.6,0.431,24,1
+9,112,82,32,175,34.2,0.260,36,1
+12,151,70,40,271,41.8,0.742,38,1
+5,109,62,41,129,35.8,0.514,25,1
+6,125,68,30,120,30.0,0.464,32,0
+5,85,74,22,0,29.0,1.224,32,1
+5,112,66,0,0,37.8,0.261,41,1
+0,177,60,29,478,34.6,1.072,21,1
+2,158,90,0,0,31.6,0.805,66,1
+7,119,0,0,0,25.2,0.209,37,0
+7,142,60,33,190,28.8,0.687,61,0
+1,100,66,15,56,23.6,0.666,26,0
+1,87,78,27,32,34.6,0.101,22,0
+0,101,76,0,0,35.7,0.198,26,0
+3,162,52,38,0,37.2,0.652,24,1
+4,197,70,39,744,36.7,2.329,31,0
+0,117,80,31,53,45.2,0.089,24,0
+4,142,86,0,0,44.0,0.645,22,1
+6,134,80,37,370,46.2,0.238,46,1
+1,79,80,25,37,25.4,0.583,22,0
+4,122,68,0,0,35.0,0.394,29,0
+3,74,68,28,45,29.7,0.293,23,0
+4,171,72,0,0,43.6,0.479,26,1
+7,181,84,21,192,35.9,0.586,51,1
+0,179,90,27,0,44.1,0.686,23,1
+9,164,84,21,0,30.8,0.831,32,1
+0,104,76,0,0,18.4,0.582,27,0
+1,91,64,24,0,29.2,0.192,21,0
+4,91,70,32,88,33.1,0.446,22,0
+3,139,54,0,0,25.6,0.402,22,1
+6,119,50,22,176,27.1,1.318,33,1
+2,146,76,35,194,38.2,0.329,29,0
+9,184,85,15,0,30.0,1.213,49,1
+10,122,68,0,0,31.2,0.258,41,0
+0,165,90,33,680,52.3,0.427,23,0
+9,124,70,33,402,35.4,0.282,34,0
+1,111,86,19,0,30.1,0.143,23,0
+9,106,52,0,0,31.2,0.380,42,0
+2,129,84,0,0,28.0,0.284,27,0
+2,90,80,14,55,24.4,0.249,24,0
+0,86,68,32,0,35.8,0.238,25,0
+12,92,62,7,258,27.6,0.926,44,1
+1,113,64,35,0,33.6,0.543,21,1
+3,111,56,39,0,30.1,0.557,30,0
+2,114,68,22,0,28.7,0.092,25,0
+1,193,50,16,375,25.9,0.655,24,0
+11,155,76,28,150,33.3,1.353,51,1
+3,191,68,15,130,30.9,0.299,34,0
+3,141,0,0,0,30.0,0.761,27,1
+4,95,70,32,0,32.1,0.612,24,0
+3,142,80,15,0,32.4,0.200,63,0
+4,123,62,0,0,32.0,0.226,35,1
+5,96,74,18,67,33.6,0.997,43,0
+0,138,0,0,0,36.3,0.933,25,1
+2,128,64,42,0,40.0,1.101,24,0
+0,102,52,0,0,25.1,0.078,21,0
+2,146,0,0,0,27.5,0.240,28,1
+10,101,86,37,0,45.6,1.136,38,1
+2,108,62,32,56,25.2,0.128,21,0
+3,122,78,0,0,23.0,0.254,40,0
+1,71,78,50,45,33.2,0.422,21,0
+13,106,70,0,0,34.2,0.251,52,0
+2,100,70,52,57,40.5,0.677,25,0
+7,106,60,24,0,26.5,0.296,29,1
+0,104,64,23,116,27.8,0.454,23,0
+5,114,74,0,0,24.9,0.744,57,0
+2,108,62,10,278,25.3,0.881,22,0
+0,146,70,0,0,37.9,0.334,28,1
+10,129,76,28,122,35.9,0.280,39,0
+7,133,88,15,155,32.4,0.262,37,0
+7,161,86,0,0,30.4,0.165,47,1
+2,108,80,0,0,27.0,0.259,52,1
+7,136,74,26,135,26.0,0.647,51,0
+5,155,84,44,545,38.7,0.619,34,0
+1,119,86,39,220,45.6,0.808,29,1
+4,96,56,17,49,20.8,0.340,26,0
+5,108,72,43,75,36.1,0.263,33,0
+0,78,88,29,40,36.9,0.434,21,0
+0,107,62,30,74,36.6,0.757,25,1
+2,128,78,37,182,43.3,1.224,31,1
+1,128,48,45,194,40.5,0.613,24,1
+0,161,50,0,0,21.9,0.254,65,0
+6,151,62,31,120,35.5,0.692,28,0
+2,146,70,38,360,28.0,0.337,29,1
+0,126,84,29,215,30.7,0.520,24,0
+14,100,78,25,184,36.6,0.412,46,1
+8,112,72,0,0,23.6,0.840,58,0
+0,167,0,0,0,32.3,0.839,30,1
+2,144,58,33,135,31.6,0.422,25,1
+5,77,82,41,42,35.8,0.156,35,0
+5,115,98,0,0,52.9,0.209,28,1
+3,150,76,0,0,21.0,0.207,37,0
+2,120,76,37,105,39.7,0.215,29,0
+10,161,68,23,132,25.5,0.326,47,1
+0,137,68,14,148,24.8,0.143,21,0
+0,128,68,19,180,30.5,1.391,25,1
+2,124,68,28,205,32.9,0.875,30,1
+6,80,66,30,0,26.2,0.313,41,0
+0,106,70,37,148,39.4,0.605,22,0
+2,155,74,17,96,26.6,0.433,27,1
+3,113,50,10,85,29.5,0.626,25,0
+7,109,80,31,0,35.9,1.127,43,1
+2,112,68,22,94,34.1,0.315,26,0
+3,99,80,11,64,19.3,0.284,30,0
+3,182,74,0,0,30.5,0.345,29,1
+3,115,66,39,140,38.1,0.150,28,0
+6,194,78,0,0,23.5,0.129,59,1
+4,129,60,12,231,27.5,0.527,31,0
+3,112,74,30,0,31.6,0.197,25,1
+0,124,70,20,0,27.4,0.254,36,1
+13,152,90,33,29,26.8,0.731,43,1
+2,112,75,32,0,35.7,0.148,21,0
+1,157,72,21,168,25.6,0.123,24,0
+1,122,64,32,156,35.1,0.692,30,1
+10,179,70,0,0,35.1,0.200,37,0
+2,102,86,36,120,45.5,0.127,23,1
+6,105,70,32,68,30.8,0.122,37,0
+8,118,72,19,0,23.1,1.476,46,0
+2,87,58,16,52,32.7,0.166,25,0
+1,180,0,0,0,43.3,0.282,41,1
+12,106,80,0,0,23.6,0.137,44,0
+1,95,60,18,58,23.9,0.260,22,0
+0,165,76,43,255,47.9,0.259,26,0
+0,117,0,0,0,33.8,0.932,44,0
+5,115,76,0,0,31.2,0.343,44,1
+9,152,78,34,171,34.2,0.893,33,1
+7,178,84,0,0,39.9,0.331,41,1
+1,130,70,13,105,25.9,0.472,22,0
+1,95,74,21,73,25.9,0.673,36,0
+1,0,68,35,0,32.0,0.389,22,0
+5,122,86,0,0,34.7,0.290,33,0
+8,95,72,0,0,36.8,0.485,57,0
+8,126,88,36,108,38.5,0.349,49,0
+1,139,46,19,83,28.7,0.654,22,0
+3,116,0,0,0,23.5,0.187,23,0
+3,99,62,19,74,21.8,0.279,26,0
+5,0,80,32,0,41.0,0.346,37,1
+4,92,80,0,0,42.2,0.237,29,0
+4,137,84,0,0,31.2,0.252,30,0
+3,61,82,28,0,34.4,0.243,46,0
+1,90,62,12,43,27.2,0.580,24,0
+3,90,78,0,0,42.7,0.559,21,0
+9,165,88,0,0,30.4,0.302,49,1
+1,125,50,40,167,33.3,0.962,28,1
+13,129,0,30,0,39.9,0.569,44,1
+12,88,74,40,54,35.3,0.378,48,0
+1,196,76,36,249,36.5,0.875,29,1
+5,189,64,33,325,31.2,0.583,29,1
+5,158,70,0,0,29.8,0.207,63,0
+5,103,108,37,0,39.2,0.305,65,0
+4,146,78,0,0,38.5,0.520,67,1
+4,147,74,25,293,34.9,0.385,30,0
+5,99,54,28,83,34.0,0.499,30,0
+6,124,72,0,0,27.6,0.368,29,1
+0,101,64,17,0,21.0,0.252,21,0
+3,81,86,16,66,27.5,0.306,22,0
+1,133,102,28,140,32.8,0.234,45,1
+3,173,82,48,465,38.4,2.137,25,1
+0,118,64,23,89,0.0,1.731,21,0
+0,84,64,22,66,35.8,0.545,21,0
+2,105,58,40,94,34.9,0.225,25,0
+2,122,52,43,158,36.2,0.816,28,0
+12,140,82,43,325,39.2,0.528,58,1
+0,98,82,15,84,25.2,0.299,22,0
+1,87,60,37,75,37.2,0.509,22,0
+4,156,75,0,0,48.3,0.238,32,1
+0,93,100,39,72,43.4,1.021,35,0
+1,107,72,30,82,30.8,0.821,24,0
+0,105,68,22,0,20.0,0.236,22,0
+1,109,60,8,182,25.4,0.947,21,0
+1,90,62,18,59,25.1,1.268,25,0
+1,125,70,24,110,24.3,0.221,25,0
+1,119,54,13,50,22.3,0.205,24,0
+5,116,74,29,0,32.3,0.660,35,1
+8,105,100,36,0,43.3,0.239,45,1
+5,144,82,26,285,32.0,0.452,58,1
+3,100,68,23,81,31.6,0.949,28,0
+1,100,66,29,196,32.0,0.444,42,0
+5,166,76,0,0,45.7,0.340,27,1
+1,131,64,14,415,23.7,0.389,21,0
+4,116,72,12,87,22.1,0.463,37,0
+4,158,78,0,0,32.9,0.803,31,1
+2,127,58,24,275,27.7,1.600,25,0
+3,96,56,34,115,24.7,0.944,39,0
+0,131,66,40,0,34.3,0.196,22,1
+3,82,70,0,0,21.1,0.389,25,0
+3,193,70,31,0,34.9,0.241,25,1
+4,95,64,0,0,32.0,0.161,31,1
+6,137,61,0,0,24.2,0.151,55,0
+5,136,84,41,88,35.0,0.286,35,1
+9,72,78,25,0,31.6,0.280,38,0
+5,168,64,0,0,32.9,0.135,41,1
+2,123,48,32,165,42.1,0.520,26,0
+4,115,72,0,0,28.9,0.376,46,1
+0,101,62,0,0,21.9,0.336,25,0
+8,197,74,0,0,25.9,1.191,39,1
+1,172,68,49,579,42.4,0.702,28,1
+6,102,90,39,0,35.7,0.674,28,0
+1,112,72,30,176,34.4,0.528,25,0
+1,143,84,23,310,42.4,1.076,22,0
+1,143,74,22,61,26.2,0.256,21,0
+0,138,60,35,167,34.6,0.534,21,1
+3,173,84,33,474,35.7,0.258,22,1
+1,97,68,21,0,27.2,1.095,22,0
+4,144,82,32,0,38.5,0.554,37,1
+1,83,68,0,0,18.2,0.624,27,0
+3,129,64,29,115,26.4,0.219,28,1
+1,119,88,41,170,45.3,0.507,26,0
+2,94,68,18,76,26.0,0.561,21,0
+0,102,64,46,78,40.6,0.496,21,0
+2,115,64,22,0,30.8,0.421,21,0
+8,151,78,32,210,42.9,0.516,36,1
+4,184,78,39,277,37.0,0.264,31,1
+0,94,0,0,0,0.0,0.256,25,0
+1,181,64,30,180,34.1,0.328,38,1
+0,135,94,46,145,40.6,0.284,26,0
+1,95,82,25,180,35.0,0.233,43,1
+2,99,0,0,0,22.2,0.108,23,0
+3,89,74,16,85,30.4,0.551,38,0
+1,80,74,11,60,30.0,0.527,22,0
+2,139,75,0,0,25.6,0.167,29,0
+1,90,68,8,0,24.5,1.138,36,0
+0,141,0,0,0,42.4,0.205,29,1
+12,140,85,33,0,37.4,0.244,41,0
+5,147,75,0,0,29.9,0.434,28,0
+1,97,70,15,0,18.2,0.147,21,0
+6,107,88,0,0,36.8,0.727,31,0
+0,189,104,25,0,34.3,0.435,41,1
+2,83,66,23,50,32.2,0.497,22,0
+4,117,64,27,120,33.2,0.230,24,0
+8,108,70,0,0,30.5,0.955,33,1
+4,117,62,12,0,29.7,0.380,30,1
+0,180,78,63,14,59.4,2.420,25,1
+1,100,72,12,70,25.3,0.658,28,0
+0,95,80,45,92,36.5,0.330,26,0
+0,104,64,37,64,33.6,0.510,22,1
+0,120,74,18,63,30.5,0.285,26,0
+1,82,64,13,95,21.2,0.415,23,0
+2,134,70,0,0,28.9,0.542,23,1
+0,91,68,32,210,39.9,0.381,25,0
+2,119,0,0,0,19.6,0.832,72,0
+2,100,54,28,105,37.8,0.498,24,0
+14,175,62,30,0,33.6,0.212,38,1
+1,135,54,0,0,26.7,0.687,62,0
+5,86,68,28,71,30.2,0.364,24,0
+10,148,84,48,237,37.6,1.001,51,1
+9,134,74,33,60,25.9,0.460,81,0
+9,120,72,22,56,20.8,0.733,48,0
+1,71,62,0,0,21.8,0.416,26,0
+8,74,70,40,49,35.3,0.705,39,0
+5,88,78,30,0,27.6,0.258,37,0
+10,115,98,0,0,24.0,1.022,34,0
+0,124,56,13,105,21.8,0.452,21,0
+0,74,52,10,36,27.8,0.269,22,0
+0,97,64,36,100,36.8,0.600,25,0
+8,120,0,0,0,30.0,0.183,38,1
+6,154,78,41,140,46.1,0.571,27,0
+1,144,82,40,0,41.3,0.607,28,0
+0,137,70,38,0,33.2,0.170,22,0
+0,119,66,27,0,38.8,0.259,22,0
+7,136,90,0,0,29.9,0.210,50,0
+4,114,64,0,0,28.9,0.126,24,0
+0,137,84,27,0,27.3,0.231,59,0
+2,105,80,45,191,33.7,0.711,29,1
+7,114,76,17,110,23.8,0.466,31,0
+8,126,74,38,75,25.9,0.162,39,0
+4,132,86,31,0,28.0,0.419,63,0
+3,158,70,30,328,35.5,0.344,35,1
+0,123,88,37,0,35.2,0.197,29,0
+4,85,58,22,49,27.8,0.306,28,0
+0,84,82,31,125,38.2,0.233,23,0
+0,145,0,0,0,44.2,0.630,31,1
+0,135,68,42,250,42.3,0.365,24,1
+1,139,62,41,480,40.7,0.536,21,0
+0,173,78,32,265,46.5,1.159,58,0
+4,99,72,17,0,25.6,0.294,28,0
+8,194,80,0,0,26.1,0.551,67,0
+2,83,65,28,66,36.8,0.629,24,0
+2,89,90,30,0,33.5,0.292,42,0
+4,99,68,38,0,32.8,0.145,33,0
+4,125,70,18,122,28.9,1.144,45,1
+3,80,0,0,0,0.0,0.174,22,0
+6,166,74,0,0,26.6,0.304,66,0
+5,110,68,0,0,26.0,0.292,30,0
+2,81,72,15,76,30.1,0.547,25,0
+7,195,70,33,145,25.1,0.163,55,1
+6,154,74,32,193,29.3,0.839,39,0
+2,117,90,19,71,25.2,0.313,21,0
+3,84,72,32,0,37.2,0.267,28,0
+6,0,68,41,0,39.0,0.727,41,1
+7,94,64,25,79,33.3,0.738,41,0
+3,96,78,39,0,37.3,0.238,40,0
+10,75,82,0,0,33.3,0.263,38,0
+0,180,90,26,90,36.5,0.314,35,1
+1,130,60,23,170,28.6,0.692,21,0
+2,84,50,23,76,30.4,0.968,21,0
+8,120,78,0,0,25.0,0.409,64,0
+12,84,72,31,0,29.7,0.297,46,1
+0,139,62,17,210,22.1,0.207,21,0
+9,91,68,0,0,24.2,0.200,58,0
+2,91,62,0,0,27.3,0.525,22,0
+3,99,54,19,86,25.6,0.154,24,0
+3,163,70,18,105,31.6,0.268,28,1
+9,145,88,34,165,30.3,0.771,53,1
+7,125,86,0,0,37.6,0.304,51,0
+13,76,60,0,0,32.8,0.180,41,0
+6,129,90,7,326,19.6,0.582,60,0
+2,68,70,32,66,25.0,0.187,25,0
+3,124,80,33,130,33.2,0.305,26,0
+6,114,0,0,0,0.0,0.189,26,0
+9,130,70,0,0,34.2,0.652,45,1
+3,125,58,0,0,31.6,0.151,24,0
+3,87,60,18,0,21.8,0.444,21,0
+1,97,64,19,82,18.2,0.299,21,0
+3,116,74,15,105,26.3,0.107,24,0
+0,117,66,31,188,30.8,0.493,22,0
+0,111,65,0,0,24.6,0.660,31,0
+2,122,60,18,106,29.8,0.717,22,0
+0,107,76,0,0,45.3,0.686,24,0
+1,86,66,52,65,41.3,0.917,29,0
+6,91,0,0,0,29.8,0.501,31,0
+1,77,56,30,56,33.3,1.251,24,0
+4,132,0,0,0,32.9,0.302,23,1
+0,105,90,0,0,29.6,0.197,46,0
+0,57,60,0,0,21.7,0.735,67,0
+0,127,80,37,210,36.3,0.804,23,0
+3,129,92,49,155,36.4,0.968,32,1
+8,100,74,40,215,39.4,0.661,43,1
+3,128,72,25,190,32.4,0.549,27,1
+10,90,85,32,0,34.9,0.825,56,1
+4,84,90,23,56,39.5,0.159,25,0
+1,88,78,29,76,32.0,0.365,29,0
+8,186,90,35,225,34.5,0.423,37,1
+5,187,76,27,207,43.6,1.034,53,1
+4,131,68,21,166,33.1,0.160,28,0
+1,164,82,43,67,32.8,0.341,50,0
+4,189,110,31,0,28.5,0.680,37,0
+1,116,70,28,0,27.4,0.204,21,0
+3,84,68,30,106,31.9,0.591,25,0
+6,114,88,0,0,27.8,0.247,66,0
+1,88,62,24,44,29.9,0.422,23,0
+1,84,64,23,115,36.9,0.471,28,0
+7,124,70,33,215,25.5,0.161,37,0
+1,97,70,40,0,38.1,0.218,30,0
+8,110,76,0,0,27.8,0.237,58,0
+11,103,68,40,0,46.2,0.126,42,0
+11,85,74,0,0,30.1,0.300,35,0
+6,125,76,0,0,33.8,0.121,54,1
+0,198,66,32,274,41.3,0.502,28,1
+1,87,68,34,77,37.6,0.401,24,0
+6,99,60,19,54,26.9,0.497,32,0
+0,91,80,0,0,32.4,0.601,27,0
+2,95,54,14,88,26.1,0.748,22,0
+1,99,72,30,18,38.6,0.412,21,0
+6,92,62,32,126,32.0,0.085,46,0
+4,154,72,29,126,31.3,0.338,37,0
+0,121,66,30,165,34.3,0.203,33,1
+3,78,70,0,0,32.5,0.270,39,0
+2,130,96,0,0,22.6,0.268,21,0
+3,111,58,31,44,29.5,0.430,22,0
+2,98,60,17,120,34.7,0.198,22,0
+1,143,86,30,330,30.1,0.892,23,0
+1,119,44,47,63,35.5,0.280,25,0
+6,108,44,20,130,24.0,0.813,35,0
+2,118,80,0,0,42.9,0.693,21,1
+10,133,68,0,0,27.0,0.245,36,0
+2,197,70,99,0,34.7,0.575,62,1
+0,151,90,46,0,42.1,0.371,21,1
+6,109,60,27,0,25.0,0.206,27,0
+12,121,78,17,0,26.5,0.259,62,0
+8,100,76,0,0,38.7,0.190,42,0
+8,124,76,24,600,28.7,0.687,52,1
+1,93,56,11,0,22.5,0.417,22,0
+8,143,66,0,0,34.9,0.129,41,1
+6,103,66,0,0,24.3,0.249,29,0
+3,176,86,27,156,33.3,1.154,52,1
+0,73,0,0,0,21.1,0.342,25,0
+11,111,84,40,0,46.8,0.925,45,1
+2,112,78,50,140,39.4,0.175,24,0
+3,132,80,0,0,34.4,0.402,44,1
+2,82,52,22,115,28.5,1.699,25,0
+6,123,72,45,230,33.6,0.733,34,0
+0,188,82,14,185,32.0,0.682,22,1
+0,67,76,0,0,45.3,0.194,46,0
+1,89,24,19,25,27.8,0.559,21,0
+1,173,74,0,0,36.8,0.088,38,1
+1,109,38,18,120,23.1,0.407,26,0
+1,108,88,19,0,27.1,0.400,24,0
+6,96,0,0,0,23.7,0.190,28,0
+1,124,74,36,0,27.8,0.100,30,0
+7,150,78,29,126,35.2,0.692,54,1
+4,183,0,0,0,28.4,0.212,36,1
+1,124,60,32,0,35.8,0.514,21,0
+1,181,78,42,293,40.0,1.258,22,1
+1,92,62,25,41,19.5,0.482,25,0
+0,152,82,39,272,41.5,0.270,27,0
+1,111,62,13,182,24.0,0.138,23,0
+3,106,54,21,158,30.9,0.292,24,0
+3,174,58,22,194,32.9,0.593,36,1
+7,168,88,42,321,38.2,0.787,40,1
+6,105,80,28,0,32.5,0.878,26,0
+11,138,74,26,144,36.1,0.557,50,1
+3,106,72,0,0,25.8,0.207,27,0
+6,117,96,0,0,28.7,0.157,30,0
+2,68,62,13,15,20.1,0.257,23,0
+9,112,82,24,0,28.2,1.282,50,1
+0,119,0,0,0,32.4,0.141,24,1
+2,112,86,42,160,38.4,0.246,28,0
+2,92,76,20,0,24.2,1.698,28,0
+6,183,94,0,0,40.8,1.461,45,0
+0,94,70,27,115,43.5,0.347,21,0
+2,108,64,0,0,30.8,0.158,21,0
+4,90,88,47,54,37.7,0.362,29,0
+0,125,68,0,0,24.7,0.206,21,0
+0,132,78,0,0,32.4,0.393,21,0
+5,128,80,0,0,34.6,0.144,45,0
+4,94,65,22,0,24.7,0.148,21,0
+7,114,64,0,0,27.4,0.732,34,1
+0,102,78,40,90,34.5,0.238,24,0
+2,111,60,0,0,26.2,0.343,23,0
+1,128,82,17,183,27.5,0.115,22,0
+10,92,62,0,0,25.9,0.167,31,0
+13,104,72,0,0,31.2,0.465,38,1
+5,104,74,0,0,28.8,0.153,48,0
+2,94,76,18,66,31.6,0.649,23,0
+7,97,76,32,91,40.9,0.871,32,1
+1,100,74,12,46,19.5,0.149,28,0
+0,102,86,17,105,29.3,0.695,27,0
+4,128,70,0,0,34.3,0.303,24,0
+6,147,80,0,0,29.5,0.178,50,1
+4,90,0,0,0,28.0,0.610,31,0
+3,103,72,30,152,27.6,0.730,27,0
+2,157,74,35,440,39.4,0.134,30,0
+1,167,74,17,144,23.4,0.447,33,1
+0,179,50,36,159,37.8,0.455,22,1
+11,136,84,35,130,28.3,0.260,42,1
+0,107,60,25,0,26.4,0.133,23,0
+1,91,54,25,100,25.2,0.234,23,0
+1,117,60,23,106,33.8,0.466,27,0
+5,123,74,40,77,34.1,0.269,28,0
+2,120,54,0,0,26.8,0.455,27,0
+1,106,70,28,135,34.2,0.142,22,0
+2,155,52,27,540,38.7,0.240,25,1
+2,101,58,35,90,21.8,0.155,22,0
+1,120,80,48,200,38.9,1.162,41,0
+11,127,106,0,0,39.0,0.190,51,0
+3,80,82,31,70,34.2,1.292,27,1
+10,162,84,0,0,27.7,0.182,54,0
+1,199,76,43,0,42.9,1.394,22,1
+8,167,106,46,231,37.6,0.165,43,1
+9,145,80,46,130,37.9,0.637,40,1
+6,115,60,39,0,33.7,0.245,40,1
+1,112,80,45,132,34.8,0.217,24,0
+4,145,82,18,0,32.5,0.235,70,1
+10,111,70,27,0,27.5,0.141,40,1
+6,98,58,33,190,34.0,0.430,43,0
+9,154,78,30,100,30.9,0.164,45,0
+6,165,68,26,168,33.6,0.631,49,0
+1,99,58,10,0,25.4,0.551,21,0
+10,68,106,23,49,35.5,0.285,47,0
+3,123,100,35,240,57.3,0.880,22,0
+8,91,82,0,0,35.6,0.587,68,0
+6,195,70,0,0,30.9,0.328,31,1
+9,156,86,0,0,24.8,0.230,53,1
+0,93,60,0,0,35.3,0.263,25,0
+3,121,52,0,0,36.0,0.127,25,1
+2,101,58,17,265,24.2,0.614,23,0
+2,56,56,28,45,24.2,0.332,22,0
+0,162,76,36,0,49.6,0.364,26,1
+0,95,64,39,105,44.6,0.366,22,0
+4,125,80,0,0,32.3,0.536,27,1
+5,136,82,0,0,0.0,0.640,69,0
+2,129,74,26,205,33.2,0.591,25,0
+3,130,64,0,0,23.1,0.314,22,0
+1,107,50,19,0,28.3,0.181,29,0
+1,140,74,26,180,24.1,0.828,23,0
+1,144,82,46,180,46.1,0.335,46,1
+8,107,80,0,0,24.6,0.856,34,0
+13,158,114,0,0,42.3,0.257,44,1
+2,121,70,32,95,39.1,0.886,23,0
+7,129,68,49,125,38.5,0.439,43,1
+2,90,60,0,0,23.5,0.191,25,0
+7,142,90,24,480,30.4,0.128,43,1
+3,169,74,19,125,29.9,0.268,31,1
+0,99,0,0,0,25.0,0.253,22,0
+4,127,88,11,155,34.5,0.598,28,0
+4,118,70,0,0,44.5,0.904,26,0
+2,122,76,27,200,35.9,0.483,26,0
+6,125,78,31,0,27.6,0.565,49,1
+1,168,88,29,0,35.0,0.905,52,1
+2,129,0,0,0,38.5,0.304,41,0
+4,110,76,20,100,28.4,0.118,27,0
+6,80,80,36,0,39.8,0.177,28,0
+10,115,0,0,0,0.0,0.261,30,1
+2,127,46,21,335,34.4,0.176,22,0
+9,164,78,0,0,32.8,0.148,45,1
+2,93,64,32,160,38.0,0.674,23,1
+3,158,64,13,387,31.2,0.295,24,0
+5,126,78,27,22,29.6,0.439,40,0
+10,129,62,36,0,41.2,0.441,38,1
+0,134,58,20,291,26.4,0.352,21,0
+3,102,74,0,0,29.5,0.121,32,0
+7,187,50,33,392,33.9,0.826,34,1
+3,173,78,39,185,33.8,0.970,31,1
+10,94,72,18,0,23.1,0.595,56,0
+1,108,60,46,178,35.5,0.415,24,0
+5,97,76,27,0,35.6,0.378,52,1
+4,83,86,19,0,29.3,0.317,34,0
+1,114,66,36,200,38.1,0.289,21,0
+1,149,68,29,127,29.3,0.349,42,1
+5,117,86,30,105,39.1,0.251,42,0
+1,111,94,0,0,32.8,0.265,45,0
+4,112,78,40,0,39.4,0.236,38,0
+1,116,78,29,180,36.1,0.496,25,0
+0,141,84,26,0,32.4,0.433,22,0
+2,175,88,0,0,22.9,0.326,22,0
+2,92,52,0,0,30.1,0.141,22,0
+3,130,78,23,79,28.4,0.323,34,1
+8,120,86,0,0,28.4,0.259,22,1
+2,174,88,37,120,44.5,0.646,24,1
+2,106,56,27,165,29.0,0.426,22,0
+2,105,75,0,0,23.3,0.560,53,0
+4,95,60,32,0,35.4,0.284,28,0
+0,126,86,27,120,27.4,0.515,21,0
+8,65,72,23,0,32.0,0.600,42,0
+2,99,60,17,160,36.6,0.453,21,0
+1,102,74,0,0,39.5,0.293,42,1
+11,120,80,37,150,42.3,0.785,48,1
+3,102,44,20,94,30.8,0.400,26,0
+1,109,58,18,116,28.5,0.219,22,0
+9,140,94,0,0,32.7,0.734,45,1
+13,153,88,37,140,40.6,1.174,39,0
+12,100,84,33,105,30.0,0.488,46,0
+1,147,94,41,0,49.3,0.358,27,1
+1,81,74,41,57,46.3,1.096,32,0
+3,187,70,22,200,36.4,0.408,36,1
+6,162,62,0,0,24.3,0.178,50,1
+4,136,70,0,0,31.2,1.182,22,1
+1,121,78,39,74,39.0,0.261,28,0
+3,108,62,24,0,26.0,0.223,25,0
+0,181,88,44,510,43.3,0.222,26,1
+8,154,78,32,0,32.4,0.443,45,1
+1,128,88,39,110,36.5,1.057,37,1
+7,137,90,41,0,32.0,0.391,39,0
+0,123,72,0,0,36.3,0.258,52,1
+1,106,76,0,0,37.5,0.197,26,0
+6,190,92,0,0,35.5,0.278,66,1
+2,88,58,26,16,28.4,0.766,22,0
+9,170,74,31,0,44.0,0.403,43,1
+9,89,62,0,0,22.5,0.142,33,0
+10,101,76,48,180,32.9,0.171,63,0
+2,122,70,27,0,36.8,0.340,27,0
+5,121,72,23,112,26.2,0.245,30,0
+1,126,60,0,0,30.1,0.349,47,1
+1,93,70,31,0,30.4,0.315,23,0
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..c26c3da
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,5 @@
+sphinx==7.1.2
+sphinx-rtd-theme==1.3.0rc1
+#sphinxcontrib-pseudocode==0.7.0
+#sphinxcontrib-jsmath==1.0.1
+#sphinxcontrib-plantuml==0.30
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..0c05c0b
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,40 @@
+# Configuration file for the Sphinx documentation builder.
+
+# -- Project information
+
+project = 'Introduction to Neural Networks and PyTorch'
+copyright = '2024, National Computational Infrastructure'
+author = 'Joseph John'
+
+release = '0.1'
+version = '0.1.0'
+
+# -- General configuration
+
+extensions = [
+ 'sphinx.ext.duration',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.mathjax',
+ #'sphinx.ext.graphviz',
+ #'sphinxcontrib.pseudocode',
+ #'sphinxcontrib.plantuml',
+]
+
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3/', None),
+ 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
+intersphinx_disabled_domains = ['std']
+
+templates_path = ['_templates']
+
+# -- Options for HTML output
+
+html_theme = 'sphinx_rtd_theme'
+
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
+
diff --git a/docs/source/figs/.DS_Store b/docs/source/figs/.DS_Store
new file mode 100644
index 0000000..c4ef5c5
Binary files /dev/null and b/docs/source/figs/.DS_Store differ
diff --git a/docs/source/figs/2layer_NN b/docs/source/figs/2layer_NN
new file mode 100644
index 0000000..859c853
--- /dev/null
+++ b/docs/source/figs/2layer_NN
@@ -0,0 +1,121 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/figs/2layer_NN.drawio b/docs/source/figs/2layer_NN.drawio
new file mode 100644
index 0000000..fa8cd25
--- /dev/null
+++ b/docs/source/figs/2layer_NN.drawio
@@ -0,0 +1,121 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/figs/2layer_NN.drawio.png b/docs/source/figs/2layer_NN.drawio.png
new file mode 100644
index 0000000..de8a0dc
Binary files /dev/null and b/docs/source/figs/2layer_NN.drawio.png differ
diff --git a/docs/source/figs/activation.drawio b/docs/source/figs/activation.drawio
new file mode 100644
index 0000000..878868b
--- /dev/null
+++ b/docs/source/figs/activation.drawio
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/figs/activation.drawio.png b/docs/source/figs/activation.drawio.png
new file mode 100644
index 0000000..72c24e3
Binary files /dev/null and b/docs/source/figs/activation.drawio.png differ
diff --git a/docs/source/figs/comp_graph.drawio b/docs/source/figs/comp_graph.drawio
new file mode 100644
index 0000000..f8d7180
--- /dev/null
+++ b/docs/source/figs/comp_graph.drawio
@@ -0,0 +1,58 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/figs/comp_graph.drawio.png b/docs/source/figs/comp_graph.drawio.png
new file mode 100644
index 0000000..65a03c2
Binary files /dev/null and b/docs/source/figs/comp_graph.drawio.png differ
diff --git a/docs/source/figs/global_local.png b/docs/source/figs/global_local.png
new file mode 100644
index 0000000..8fd952f
Binary files /dev/null and b/docs/source/figs/global_local.png differ
diff --git a/docs/source/figs/gradient-descent.png b/docs/source/figs/gradient-descent.png
new file mode 100644
index 0000000..1aad3b3
Binary files /dev/null and b/docs/source/figs/gradient-descent.png differ
diff --git a/docs/source/figs/gradient.png b/docs/source/figs/gradient.png
new file mode 100644
index 0000000..f075ac7
Binary files /dev/null and b/docs/source/figs/gradient.png differ
diff --git a/docs/source/figs/layers.png b/docs/source/figs/layers.png
new file mode 100644
index 0000000..b758f03
Binary files /dev/null and b/docs/source/figs/layers.png differ
diff --git a/docs/source/figs/loss.png b/docs/source/figs/loss.png
new file mode 100644
index 0000000..39eb68b
Binary files /dev/null and b/docs/source/figs/loss.png differ
diff --git a/docs/source/figs/neuron.drawio b/docs/source/figs/neuron.drawio
new file mode 100644
index 0000000..35f172f
--- /dev/null
+++ b/docs/source/figs/neuron.drawio
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/figs/neuron.drawio.png b/docs/source/figs/neuron.drawio.png
new file mode 100644
index 0000000..ef19563
Binary files /dev/null and b/docs/source/figs/neuron.drawio.png differ
diff --git a/docs/source/figs/pinning.png b/docs/source/figs/pinning.png
new file mode 100644
index 0000000..db84c2e
Binary files /dev/null and b/docs/source/figs/pinning.png differ
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..1b879ae
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,24 @@
+Introduction to Neural Networks and PyTorch
+===========================================
+
+This workshop provides an introduction to Neural Networks Using Pytorch.
+
+.. note::
+
+ This project is under active development.
+
+Contents
+--------
+
+.. toctree::
+
+ prerequisite
+ outcomes
+ modules
+ packages
+ tutorial
+ references
+
+
+
+
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
new file mode 100644
index 0000000..d2b84c7
--- /dev/null
+++ b/docs/source/modules.rst
@@ -0,0 +1,34 @@
+Modules
+=======
+
+.. note::
+ 1. python3/3.11.0
+ 2. cuda/12.3.2
+
+Modules are how we manage software in most HPC machines. We can see all the available modules using the command
+
+.. code-block:: console
+ :linenos:
+
+ module avail
+
+If we want load a module *python3/3.11.0* we can use the command
+
+.. code-block:: console
+ :linenos:
+
+ module load python3/3.11.0
+
+If we want to unload the same module use the command
+
+.. code-block:: console
+ :linenos:
+
+ module unload python3/3.11.0
+
+We can unload all the modules using the command
+
+.. code-block:: console
+ :linenos:
+
+ module purge
\ No newline at end of file
diff --git a/docs/source/outcomes.rst b/docs/source/outcomes.rst
new file mode 100644
index 0000000..f3ae1fe
--- /dev/null
+++ b/docs/source/outcomes.rst
@@ -0,0 +1,12 @@
+Learning Outcomes
+=================
+
+.. note::
+ #. Learn the theoretical aspects of Neural Networks.
+ #. Understand how tensors work in PyTorch.
+ #. Learn how to build a Neural Network in Pytorch.
+
+In this workshop, you will learn the basics of PyTorch, including tensor operations, dynamic computation graphs, and neural network modules.
+You will also gain practical experience in building and training neural networks using PyTorch.
+
+
diff --git a/docs/source/packages.rst b/docs/source/packages.rst
new file mode 100644
index 0000000..b09f447
--- /dev/null
+++ b/docs/source/packages.rst
@@ -0,0 +1,60 @@
+Python Virtual Environment
+==========================
+
+.. note::
+
+ #. torch
+ #. torchvision
+ #. torchaudio
+ #. NumPy
+ #. Matplotlib
+ #. jupyterlab
+ #. pandas
+
+
+In this workshop, we will use a Python virtual environment to manage all the required Python packages. A Python virtual environment is an isolated
+workspace that allows you to manage project-specific dependencies without affecting the global Python installation or other projects. By creating a
+virtual environment, you can install and manage libraries and packages independently, ensuring that each project has its own set of dependencies and
+avoiding version conflicts. This isolation helps maintain consistent and reproducible development environments.
+
+We've already set up the Python virtual environment for this workshop, so you *don't need* to install one separately. However, the following
+commands will guide you on how to create one if necessary.
+
+To get started with Python virtual environment load the Python module you want to use. In this workshop, we will be using *python3/3.11.0*.
+
+.. code-block:: console
+ :linenos:
+
+ module load python3/3.11.0 cuda/12.3.2
+
+Create the Python virtual environment.
+
+.. code-block:: console
+ :linenos:
+
+ python3 -m venv my_env
+
+Activate the Python virtual environment.
+
+.. code-block:: console
+ :linenos:
+
+ source my_env/bin/activate
+
+Install all the required Python packages.
+
+.. code-block:: console
+ :linenos:
+
+ python3 -m pip install python-papi numpy codetiming numba mpi4py
+ python3 -m pip install torch torchvision torchaudio
+ python3 -m pip install jupyterlab
+ python3 -m pip install matplotlib
+
+You can deactivate the virtual environment once you are done with it.
+
+.. code-block:: console
+ :linenos:
+
+ deactivate
+
\ No newline at end of file
diff --git a/docs/source/prerequisite.rst b/docs/source/prerequisite.rst
new file mode 100644
index 0000000..34b2c25
--- /dev/null
+++ b/docs/source/prerequisite.rst
@@ -0,0 +1,35 @@
+Prerequisite
+============
+
+.. note::
+ #. Experience with Python.
+ #. Experience with Jupyter notebooks.
+ #. Experience with bash or similar Unix shells.
+
+ It's also beneficial to have a solid understanding of *matrix operations* and *differential calculus*.
+ While this is not required for programming, it is essential for the theoretical aspects of neural
+ networks.
+
+ This workshop assumes that you have experience coding in Python and are familiar with using Unix shell.
+ If you are using a Windows machine please make sure you have a shell that supports SSH. Windows users can either use
+ `PowerShell `_ or the `Windows Subsystem for Linux `_
+
+.. important::
+ For a smooth setup on the supercomputer system, please register for an NCI account if you don't have one:
+
+ #. Go to https://my.nci.org.au/mancini
+ #. Click on the "Sign up" button to start your registration form.
+ #. Complete all steps in the registration form. (Note: you must provide a current email address from your institution or place of work, not a personal email.)
+ #. Select the option to join project vp91 at Step 3 of the form.
+ #. Click "Finish" on the final page of the form to complete your registration request.
+
+ or
+
+ If you already have an account
+ #. Login to your NCI account https://my.nci.org.au/mancini
+ #. Select Project and Groups on the left-side menu
+ #. Select Find project or group on the top tab and search vp91 to apply for membership
+
+
+.. warning::
+ Project vp91 is temporary for training purposes only. Data in vp91 will be cleaned in one week time after the training.
\ No newline at end of file
diff --git a/docs/source/references.rst b/docs/source/references.rst
new file mode 100644
index 0000000..8f8a88a
--- /dev/null
+++ b/docs/source/references.rst
@@ -0,0 +1,14 @@
+Reference
+=========
+1. https://pytorch.org/tutorials/intermediate/ddp_series_multinode.html
+2. https://www.run.ai/guides/multi-gpu/pytorch-multi-gpu-4-techniques-explained
+3. https://medium.com/codex/a-comprehensive-tutorial-to-pytorch-distributeddataparallel-1f4b42bb1b51
+4. https://www.coursera.org/learn/neural-networks-deep-learning
+
+
+Contributers
+************
+
+1. `Joseph John, Staff Scientist, NCI `_
+
+*ChatGPT has been utilized to enhance and generate texts in this document*.
\ No newline at end of file
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
new file mode 100644
index 0000000..f7e8eb8
--- /dev/null
+++ b/docs/source/tutorial.rst
@@ -0,0 +1,65 @@
+Tutorial
+========
+
+In this tutorial, we'll be using the Gadi HPC machine at NCI. A Python virtual environment
+will be provided for you during the session.
+
+.. list-table::
+ :widths: 20 20 20 20
+ :header-rows: 1
+
+ * - Topics
+ - Material
+ - Exercises
+ - Duration
+ * - What is a Neural Network?
+ - 60 minutes
+ -
+ - 60 minutes
+ * - Tensors
+ - 20 minutes
+ - 10 minutes
+ - 30 minutes
+ * - Loading a Dataset
+ - 30 minutes
+ - 10 minutes
+ - 40 minutes
+ * - Building a Neural Network
+ - 30 minutes
+ - 30 minutes
+ - 60 minutes
+ * - Training on the GPU
+ - 20 minutes
+ - 15 minutes
+ - 35 minutes
+ * - Training on Multiple GPUs (DataParallel)
+ - 20 minutes
+ - 10 minutes
+ - 30 minutes
+ * - Training on Multiple GPUs (DistributedDataParallelism)
+ - 20 minutes
+ - 10 minutes
+ - 30 minutes
+ * - Training on Multiple Nodes
+ - 30 minutes
+ - 10 minutes
+ - 40 minutes
+
+
+.. toctree::
+
+ tutorial/getting_started
+ tutorial/what_is_NN.rst
+ tutorial/tensor.rst
+ tutorial/dataloader.rst
+ tutorial/building_NN.rst
+ tutorial/gpu_NN.rst
+ tutorial/dataparallel.rst
+ tutorial/distributed_dataparallel.rst
+ tutorial/multi_node.rst
+
+
+
+
+
+
diff --git a/docs/source/tutorial/basics.rst b/docs/source/tutorial/basics.rst
new file mode 100644
index 0000000..dbe5623
--- /dev/null
+++ b/docs/source/tutorial/basics.rst
@@ -0,0 +1,118 @@
+Basics of Parallelism
+--------------------
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 15 min
+ * **Exercises:** 15 min
+
+ **Objectives:**
+ #. Learn about the difference between threads and process
+ #. Learn how to synchronize between threads.
+ #. Learn how to synchronize between processes.
+
+
+Process
+********
+
+A process is an instance of a program in execution. A process is responsible for executing a program's
+instructions and providing the environment in which the program operates (such as memory and I/O devices).
+
+#. **Program Code**: The instructions of the program that the process is executing.
+#. **Process Stack**: Contains temporary data such as method/function parameters, return addresses, and local variables.
+#. **Heap**: A region of memory used for dynamic memory allocation during the process's execution.
+#. **Data Section**: Contains global and static variables used by the process.
+#. **Process Control Block (PCB)**: A data structure maintained by the operating system that holds information about the process, including its state, program counter, CPU registers, and memory management information.
+
+We can launch multiple process and the same time and the processes are isolated from each other.
+Each process manages its own resources, including memory and CPU time and one application can
+have more than one process. Process communicates between each other using Inter-Process Communication (IPC)
+mechanisms.
+
+The OS can manage multiple process at the same time and the OS can switch the process executing in a CPU.
+This involves saving the current state of the process that is being paused (the "old" process) and restoring
+the state of the process that is being resumed (the "new" process). The state of a process is
+captured in its *Process Control Block (PCB)*. This idea of switching between processes is called
+*Context Switching*.
+
+Threads
+*******
+
+A thread is the smallest unit of execution within a process. A process can contain multiple threads that
+share the same resources but execute independently. While each process is isolated from processes, threads
+within the same process share the same memory space and resources but they execute independently.
+
+*Concurrency* refers to the ability to run multiple threads simultaneously. Threads can be managed by
+the operating system to run on different CPU cores, doing different computations, thereby
+improving performance. If two concurrent threads (or processes) can be run simultaneously we can say
+they are *parallel*.
+
+Challenges with Threads
+***********************
+
+#. **Synchronization**: As threads share resources, they need mechanisms to synchronize access to prevent conflicts and ensure data consistency. Common synchronization tools include mutexes, semaphores, and locks.
+#. **Deadlock**: A situation where two or more threads are waiting indefinitely for resources held by each other, leading to a standstill.
+#. **Race Conditions**: Occur when the outcome depends on the unpredictable timing of thread execution, potentially causing inconsistent results.
+
+
+Synchronization in programming is the coordination of concurrent threads or processes to ensure they operate
+correctly when accessing shared resources. It prevents issues such as race conditions and data corruption by
+managing access to shared resources, ensuring that only one thread or process can modify the resource at a time.
+Synchronization mechanisms, like locks, semaphores, and mutexes, help maintain consistency and order in a
+multithreaded or multiprocess environment.
+
+The Room and the Key: An analogy
+*********************************
+
+**The Room**: Think of a room that represents a shared resource or a critical section of code in a program.
+This room can only be used by one person at a time to ensure that things don't get messed up.
+
+**The Lock**: The lock is like a physical lock that controls access to the room. Only one person can hold the
+key of the lock at any given time.
+
+**Entering the Room**: When a person (a thread) wants to use the room (access the shared resource),
+they need to get the key (acquire the lock). If no one else is using the room, the person can take the key,
+enter the room, and use it as needed.
+
+**Occupied Room**: If someone is already inside the room and using it, other people who want to use the room
+must wait outside. They cannot enter until the current occupant leaves and returns the key.
+
+**Exiting the Room**: Once the person is done using the room, they leave and return the key (release the lock).
+This allows another person to take the key and use the room.
+
+**Preventing Conflicts**: The lock ensures that only one person is in the room at any time. This prevents
+conflicts or issues that might arise if multiple people were trying to use the room simultaneously.
+
+Exercise
+*********
+
+1. What occurs when locks aren't used??
+
+.. code-block:: console
+ :linenos:
+
+ module load python3/3.11.0
+ python3 threads.py
+
+2. How do threads differ from processes?
+
+.. code-block:: console
+ :linenos:
+
+ module load python3/3.11.0
+ python3 process.py
+
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. Processes are isolated with separate memory spaces, while threads share the same memory space within a process.
+ #. Processes have higher creation and management overhead due to separate resources and memory, whereas threads are lighter and cheaper to manage.
+ #. Threads can communicate easily and efficiently since they share memory, while processes require more complex and resource-intensive Inter-Process Communication (IPC) mechanisms.
+ #. Locks can be used for synchronization.
+
+
+
+
diff --git a/docs/source/tutorial/building_NN.rst b/docs/source/tutorial/building_NN.rst
new file mode 100644
index 0000000..9362113
--- /dev/null
+++ b/docs/source/tutorial/building_NN.rst
@@ -0,0 +1,157 @@
+Building a Neural Network
+=========================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 30 min
+ * **Exercises:** 30 min
+
+ **Objectives:**
+ #. Learn how to implement a neural network in PyTorch.
+ #. Learn the differ modules that go into building a neural network in PyTorch.
+
+Dataset
+*******
+We will use the Pima Indians Diabetes dataset for the demonstration. The Pima Indians Diabetes dataset is a popular dataset in the field of machine learning
+and statistics, particularly for those working on classification problems.
+
+#. **Source**: The dataset was created by the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) and is available in the UCI Machine Learning Repository.
+#. **Purpose**: The dataset is used to predict the onset of diabetes within five years based on diagnostic measures.
+#. **Features**: The dataset contains 768 samples, each with 8 features.
+
+The features are:
+
+#. Pregnancies: Number of times pregnant.
+#. Glucose: Plasma glucose concentration (mg/dL) a 2 hours in an oral glucose tolerance test.
+#. Blood Pressure: Diastolic blood pressure (mm Hg) at the time of screening.
+#. Skin Thickness: Triceps skinfold thickness (mm) measured at the back of the upper arm.
+#. Insulin: 2-Hour serum insulin (mu U/ml).
+#. BMI: Body mass index.
+#. Diabetes Pedigree Function: A function that scores likelihood of diabetes based on family history.
+#. Age: Age of the individual (years).
+
+**Outcome**: Whether or not the individual has diabetes (1 for positive, 0 for negative).
+
+Defining the Model
+*******************
+
+When designing the model, we have to keep the following points in mind:
+
+#. The input features in the input layer must match the input features in the dataset.
+#. A high number of layers can increase computation time, while too few layers may result in poor predictions.
+#. Each layer should be followed by an activation function.
+
+In this example, we will use a 3-layer neural network:
+
+#. The input layer expects 8 features.
+#. The first hidden layer has 12 neurons, followed by a ReLU activation function.
+#. The second hidden layer has 8 neurons, followed by another ReLU activation function.
+#. The output layer has one neuron, followed by a sigmoid activation function.
+
+The **sigmoid function** outputs values between 0 and 1, which is exactly what we need.
+
+Sequential vs. Class-Based Models
+***********************************
+
+In PyTorch, neural networks can be defined using different approaches, and two common ones are the `Sequential` model and the `class-based model`.
+
+The `Sequential` model is a simple, linear stack of layers where each layer has a single input and output. It is useful for straightforward feedforward
+networks where layers are applied in a sequential order.
+
+**Characteristics:**
+
+#. **Ease of Use:** It is easier to use for simple architectures where layers are applied in a linear fashion.
+#. **Defined Using:** `torch.nn.Sequential`.
+
+**Limitations:**
+
+#. **Flexibility:** Limited flexibility for more complex architectures (e.g., networks with multiple inputs/outputs, shared layers, or non-sequential data flow).
+#. **Custom Behavior:** Difficult to implement custom forward passes or dynamic architectures.
+
+
+The `class-based`` model allows you to define a network by subclassing `torch.nn.Module`. This approach provides greater flexibility and control, making it
+suitable for complex models and custom behaviors.
+
+**Characteristics:**
+
+#. **Flexibility:** Offers full control over the network architecture, including complex data flows, multiple inputs/outputs, and custom forward methods.
+#. **Defined Using:** Subclass of `torch.nn.Module`.
+
+
+**Advantages:**
+
+#. **Custom Forward Pass:** You can define complex forward passes and control data flow through the network
+#. **Dynamic Behavior:** Allows for dynamic computations, such as conditional layers or operations.
+
+
+Choosing between the two depends on the complexity of the network you need to build and your specific requirements for flexibility and control.
+
+Loss function
+*************
+
+Each model needs a loss function. In this case we will use the Binary Cross-Entropy (BCE) Loss. It Measures the performance of a classification model whose
+output is a probability value between 0 and 1. It calculates the difference between the predicted probabilities and the actual binary labels (0 or 1) and
+penalizes the model more when the predictions are further from the true labels.
+
+.. math::
+
+ BCELoss(y', y) = −[ylog(y')+(1 − y)log(1 − y')]
+
+Where, y' is the predicted output and y is the actual otput.
+
+Optmizer
+*********
+
+Optimizer's main role is to update the model's parameters based on the gradients computed during backpropagation.
+
+1. **Parameter Updates**: Optimizers adjust the weights and biases of the neural network to reduce the loss. This involves applying algorithms that modify
+the parameters to minimize the difference between the predicted outputs and the actual targets.
+
+2. **Learning Rate Management**: Most optimizers include mechanisms to adjust the learning rate, either statically or dynamically, to control how large
+the parameter updates are.
+
+In this example we use an optimizer called Adaptive Moment Estimation (Adam). This computes an adaptive learning rates for each parameter by considering
+both the mean and the variance of the gradients.
+
+Training the Model
+*******************
+
+Training a neural network involves epochs and batches, which define how data is fed to the model:
+
+#. **Epoch:** A full pass through the entire training dataset.
+#. **Batch:** A subset of samples processed at a time, with gradient descent performed after each batch.
+
+In practice, the dataset is divided into batches, and each batch is processed sequentially in a training loop. Completing all batches constitutes one epoch.
+The process is repeated for multiple epochs to refine the model.
+
+Batch size is constrained by system memory (GPU memory), and computational demands scale with batch size. More epochs and batches lead to better model
+performance but increase training time. The optimal number of epochs and batch size is often determined through experimentation.
+
+1. **optimizer.zero_grad()**: During training, gradients accumulate by default in PyTorch. This means that if you don't clear them, gradients from multiple
+backward passes (from different batches) will be added together, which can lead to incorrect updates to the model parameters. By calling optimizer.zero_grad(),
+you ensure that gradients from previous steps are reset to zero, preventing them from affecting the current update.
+
+2. **loss.backward()**: Calculates the gradients of the loss with respect to each parameter of the model. This is done using backpropagation, a key algorithm
+for training neural networks.
+
+3. **optimizer.step()**: Used to update the model's parameters based on the gradients computed during during the backward pass (loss.backward()).
+
+Model Evaluation
+******************
+
+Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the
+model on the same data used for training.
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ Try the notebook *building_NN.ipynb*.
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. PyTorch offers *Sequential* models for simple linear stacks and *Class-based* models for complex, customizable architectures.
+ #. Training involves epochs and batches with functions like `optimizer.zero_grad()`, `loss.backward()`, and `optimizer.step()`
+ #. Ideally, data should be split into training and testing sets.
\ No newline at end of file
diff --git a/docs/source/tutorial/cnn.rst b/docs/source/tutorial/cnn.rst
new file mode 100644
index 0000000..4669009
--- /dev/null
+++ b/docs/source/tutorial/cnn.rst
@@ -0,0 +1,11 @@
+What is a Convolutional Neural Network (CNN)?
+==================================================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 45 min
+ * **Exercises:** 0 min
+
+ **Objectives:**
+ #. Learn the different parts of a convolutional neural network.
\ No newline at end of file
diff --git a/docs/source/tutorial/dataloader.rst b/docs/source/tutorial/dataloader.rst
new file mode 100644
index 0000000..09f4ef3
--- /dev/null
+++ b/docs/source/tutorial/dataloader.rst
@@ -0,0 +1,251 @@
+Loading a Dataset in PyTorch
+=============================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 15 min
+ * **Exercises:** 15 min
+
+ **Objectives:**
+ #. Learn how to use pre-loaded data in PyTorch.
+ #. Learn how to use custom data in PyTorch.
+ #. Learn how to use custom dataloader in PyTorch.
+
+PyTorch offers two data primitives—`torch.utils.data.DataLoader` and `torch.utils.data.Dataset`— which facilitate the use of both pre-loaded datasets and custom data.
+Dataset is an abstract class that represents a dataset. It defines how the data should be accessed and loaded, allowing users to specify how to retrieve
+individual data points. DataLoader wraps around a Dataset and provides iterable functionality, handling batching, shuffling, and loading data in
+parallel using multiprocessing.
+
+.. list-table:: Differences Between Dataset and DataLoader
+ :header-rows: 1
+
+ * - Feature
+ - Dataset
+ - DataLoader
+ * - Purpose
+ - Defines how individual data samples are loaded.
+ - Provides batch loading and efficient data iteration.
+ * - Customizable
+ - Users implement custom loading logic (e.g., loading images, preprocessing).
+ - Handles batching, shuffling, and parallel data loading.
+ * - Methods
+ - Requires ``__len__()`` and ``__getitem__()`` methods.
+ - Takes a Dataset as input and provides data batches.
+ * - Functionality
+ - Accesses individual data points (samples).
+ - Loads data in batches and supports multiprocessing.
+ * - Parallelization
+ - Not parallelized (loads one item at a time).
+ - Supports parallel data loading (``num_workers``).
+
+
+Pre-loaded Datasets
+********************
+
+The `Fashion-MNIST` dataset is an example of a pre-loaded curated dataset. It can be loaded using the following parameters:
+
+- `root` specifies the path where the training or test data is stored.
+- `train` indicates whether to load the training or test dataset.
+- `download=True` will download the data from the internet if it's not available at the specified `root`.
+- `transform` and `target_transform` define the transformations applied to the features and labels, respectively.
+
+Load the training data:
+
+.. code-block:: python
+ :linenos:
+
+ training_data = datasets.FashionMNIST(
+ root="data", # root directory of data
+ train=True, # load training dataset
+ download=True, # download the data if unvailable at the `root`
+ transform=ToTensor() # transformations applied to the features and labels
+ )
+
+Load the testing data:
+
+.. code-block:: python
+ :linenos:
+
+ training_data = datasets.FashionMNIST(
+ root="data", # root directory of data
+ train=False, # load testing dataset
+ download=True, # download the data if unvailable at the `root`
+ transform=ToTensor() # transformations applied to the features and labels
+ )
+
+Custom Dataset
+***************
+
+What if working with a custom dataset? To illustrate this, we will download a dataset and set it up for
+use in PyTorch training.
+
+.. admonition:: Explanation
+ :class: attention
+
+ The data used for this demonstration is relatively *clean*. In a practical use case, significant
+ time will likely be spent on cleaning and preparing the data.
+
+The data:
+
+ #. There are **3 classes**: pizza, steak, and sushi.
+ #. The data is split into *train* and *test* datasets.
+ #. Both *train* and *test* datasets are further organized into 3 directories, each corresponding to one of the classes.
+
+.. admonition:: Explanation
+ :class: attention
+
+ In practice, it is our responsibility to divide the data into training and testing sets and
+ further categorize it into different classes.
+
+Transformation on the data
+**************************************
+
+Transform functions in the PyTorch library simplify the application of various data enhancement/manipulation techniques
+to your input data. These functions enable you to apply multiple changes simultaneously.
+
+
+.. code-block:: python
+ :linenos:
+
+ data_transform = transforms.Compose([
+ transforms.Resize(size=(64, 64)), # Resize the images to 64x64
+ transforms.RandomHorizontalFlip(p=0.5), # Horizontally flip image with a 0.5 probability
+ transforms.ToTensor() # convert to tensor of shape (C x H x W) in the range [0.0, 1.0]
+ ])
+
+.. admonition:: Explanation
+ :class: attention
+
+ A Tensor Image is a tensor with a shape of (C, H, W), where C represents the number of channels,
+ and H and W denote the image's height and width. Typically, an image consists of three color
+ channels: red, green, and blue (RGB).
+
+ **Note**: PyTorch uses the [C, H, W] format by default, while Matplotlib uses [H, W, C].
+
+Loading Image Data Using ImageFolder
+***********************************
+
+`ImageFolder` is a generic data loader where images are expected to be organized into separate directories,
+each corresponding to a different class.
+
+.. code-block:: python
+ :linenos:
+
+ train_data = datasets.ImageFolder(root=train_dir, # root of the train images
+ transform=data_transform, # transforms to perform on each image
+ target_transform=None # transforms to perform on labels (eg: 1-hot encoding)
+ )
+
+ test_data = datasets.ImageFolder(root=test_dir, # root of the test images
+ transform=data_transform # transforms to perform on each image
+ )
+
+
+DataLoader
+**********
+
+In PyTorch, `DataLoader` is a built-in class that offers an efficient and flexible method for loading
+data into a model for training or inference. It is especially beneficial for managing large datasets that
+may not fit into memory and for carrying out data augmentation and preprocessing.
+Data loader combines a dataset and a sampler, and provides an iterable over the given dataset.
+
+
+.. code-block:: python
+ :linenos:
+
+ from torch.utils.data import DataLoader
+
+ train_dataloader = DataLoader(dataset=train_data, # dataset from which to load the data
+ batch_size=8, # samples per batch to load
+ num_workers=1, # subprocesses to use for data loading
+ shuffle=True) # reshuffled the data at every epoch
+
+ test_dataloader = DataLoader(dataset=test_data, # dataset from which to load the data
+ batch_size=8, # samples per batch to load
+ num_workers=1, # subprocesses to use for data loading
+ shuffle=False) # don't shuffle testing data
+
+.. admonition:: Explanation
+ :class: attention
+
+ Each tensor will be of size [8, 3, 64, 64] -> [batch_size, channels, height, width].
+
+
+Writing a custom DataLoader
+****************************
+
+The DataLoader works in conjunction with a Dataset class that defines how to access and preprocess data.
+
+1. Initialization (`__init__``): Loads the dataset from a file (e.g., CSV) or another source. Performs any necessary preprocessing, such as normalization or
+feature extraction.
+
+2. Length (`__len__``): Returns the number of samples in the dataset, which helps the DataLoader know how many batches to create.
+
+3. Item Retrieval (`__getitem__``): Retrieves a sample from the dataset given an index. This method is called by the DataLoader to get individual data points
+for batching.
+
+We will use the Pima Indians Diabetes dataset for the demonstration. The Pima Indians Diabetes dataset is a popular dataset in the field of machine learning
+and statistics, particularly for those working on classification problems.
+
+#. **Source**: The dataset was created by the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) and is available in the UCI Machine Learning Repository.
+#. **Purpose**: The dataset is used to predict the onset of diabetes within five years based on diagnostic measures.
+#. **Features**: The dataset contains 768 samples, each with 8 features.
+
+The features are:
+
+#. Pregnancies: Number of times pregnant.
+#. Glucose: Plasma glucose concentration (mg/dL) a 2 hours in an oral glucose tolerance test.
+#. Blood Pressure: Diastolic blood pressure (mm Hg) at the time of screening.
+#. Skin Thickness: Triceps skinfold thickness (mm) measured at the back of the upper arm.
+#. Insulin: 2-Hour serum insulin (mu U/ml).
+#. BMI: Body mass index.
+#. Diabetes Pedigree Function: A function that scores likelihood of diabetes based on family history.
+#. Age: Age of the individual (years).
+
+**Outcome**: Whether or not the individual has diabetes (1 for positive, 0 for negative).
+
+.. code-block:: python
+ :linenos:
+
+ column_names = [ 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness','Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
+
+ class PimaDataset(Dataset):
+
+ def __init__(self, csv_file):
+ # Load the CSV file without header and assign column names
+ self.data = pd.read_csv(csv_file, header=None, names=column_names)
+ self.features = self.data.drop('Outcome', axis=1).values
+ self.labels = self.data['Outcome'].values
+
+ # Convert to PyTorch tensors
+ self.features_tensor = torch.tensor(self.features, dtype=torch.float32)
+ self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)
+
+ # Calculate mean and std
+ self.mean = self.features_tensor.mean(dim=0)
+ self.std = self.features_tensor.std(dim=0)
+
+ # Normalize the features
+ self.features_tensor = (self.features_tensor - self.mean) / self.std
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, idx):
+ feature = self.features_tensor[idx]
+ label = self.labels_tensor[idx]
+ return feature, label
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ Try the notebook *dataloader.ipynb*.
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. PyTorch provides pre-loaded datasets that can be used directly.
+ #. Custom datasets can also be utilized in PyTorch.
+ #. We can create custom dataloaders in PyTorch.
\ No newline at end of file
diff --git a/docs/source/tutorial/dataparallel.rst b/docs/source/tutorial/dataparallel.rst
new file mode 100644
index 0000000..9c6a848
--- /dev/null
+++ b/docs/source/tutorial/dataparallel.rst
@@ -0,0 +1,86 @@
+Multi-GPU Training using Data Parallelism
+=========================================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 15 min
+ * **Exercises:** 10 min
+
+ **Objectives:**
+ #. Learn how to use multiple GPUs in training using data parallelism.
+
+By default, PyTorch will use only one GPU. However, you can easily leverage multiple GPUs by running your model in parallel using `DataParallel`.
+
+DataParallel
+*************
+
+Whenever you have multiple GPUs, you can wrap your model with `nn.DataParallel`. Then, you can move your model to the GPUs using `model.to(device)`.
+
+.. code-block:: python
+ :linenos:
+
+ if torch.cuda.device_count() > 1:
+ class_model = nn.DataParallel(class_model)
+ class_model.to(device)
+
+Then we can use the model as usual and pytorch will distribute the data across multiple GPUs.
+
+
+Detailed Working
+*****************
+
+`nn.DataParallel` splits the input data across the available GPUs, performing computations in parallel, and then aggregating the results.
+
+1. **Splitting the Input Data**
+
+- **Batch Splitting**: `nn.DataParallel` splits each mini-batch of data into smaller chunks, with each chunk sent to a different GPU.
+
+- **Replication**: The model is replicated on each GPU, ensuring that each GPU has a copy of the model.
+
+2. **Parallel Computation**
+
+- **Forward Pass**: Each GPU performs a forward pass on its respective chunk of the data. Since the model is replicated on each GPU, the computations are done independently for each chunk.
+
+- **Backward Pass**: During backpropagation, gradients are computed separately on each GPU.
+
+3. **Aggregation of Results**
+
+- **Concatenation of Outputs**: After the forward pass, `nn.DataParallel` gathers the outputs from all GPUs and concatenates them along the batch dimension. This is necessary to maintain the correct order of the outputs.
+
+- **Gradient Aggregation**: During backpropagation, `nn.DataParallel` aggregates the gradients from each GPU. It does this by summing the gradients computed by each GPU, which are then used to update the model parameters.
+
+4. **Synchronizing Parameters**
+
+- **Parameter Updates**: After gradients are aggregated, the model parameters are updated on the primary GPU. The updated parameters are then synchronized and broadcasted to all other GPUs.
+
+
+Limitations
+***********
+
+`nn.DataParallel` in PyTorch allows you to distribute data across multiple GPUs for parallel processing, but it has some limitations:
+
+#. **Single-process bottleneck**: nn.DataParallel uses a single process that sends data to each GPU, collects the results, and aggregates them. This can become a bottleneck, especially with a large number of GPUs.
+#. **Limited scalability**: As the number of GPUs increases, the performance gains from nn.DataParallel diminish due to the overhead of distributing data and collecting results.
+#. **Less efficient memory usage**: nn.DataParallel replicates the entire model on each GPU, which can lead to inefficient memory usage, especially with large models.
+#. **Inflexible device placement**: nn.DataParallel requires all GPUs to be on the same machine. It doesn't support distributed training across multiple nodes
+
+
+When using nn.DataParallel in PyTorch, `class_model.parameters()).device` often returns `cuda:0`, even if multiple GPUs are used. This happens because
+nn.DataParallel replicates the model across multiple GPUs but keeps the original model's parameters on the primary GPU (cuda:0). The `nn.DataParallel` wrapper
+itself does not move parameters to different GPUs; it only distributes the input data to the GPUs and then aggregates the results. The underlying parameters of the model are still located on the primary device.
+It's always a good idea to use `nvidia-smi` to check that the GPU utilization is as expected.
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ Try the notebook *multi_GPU.ipynb*.
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. We can use `nn.DataParallel` to utilize multiple GPUs for training.
+ #. The training is limited to a single node and cannot span across multiple nodes.
+
diff --git a/docs/source/tutorial/distributed_dataparallel.rst b/docs/source/tutorial/distributed_dataparallel.rst
new file mode 100644
index 0000000..0fb7188
--- /dev/null
+++ b/docs/source/tutorial/distributed_dataparallel.rst
@@ -0,0 +1,125 @@
+Distributed Data Parallelism
+=============================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 20 min
+ * **Exercises:** 10 min
+
+ **Objectives:**
+ #. Learn how to use multiple GPUs in training using distributed data parallelism.
+ #. Train the model using PBS a job script.
+
+
+Components of a distributed data parallel model:
+
+- **Master Node:** The primary GPU responsible for synchronization, model replication, loading models, and logging.
+- **Process Group:** When training or testing a model across K GPUs, these K processes form a group.
+- **Rank:** Each process within the process group is identified by a rank, ranging from 0 to K-1 (similar to MPI).
+- **World Size:** The total number of processes in the group, which equals the number of GPUs (similar to MPI).
+
+Advantage over DataParallel
+****************************
+
+- **Scalability:** DataParallel operates as a single-process, multi-threaded approach and only works on a single machine, whereas, DistributedDataParallel (DDP) uses a multi-process approach and supports both single- and multi-machine training. DataParallel is often slower than DDP, even on a single machine, due to *GIL* contention across threads, the overhead of replicating the model per iteration, and the extra steps involved in scattering inputs and gathering outputs.
+
+- **Model Parallelism:** If your model is too large to fit on a single GPU, you need to use model parallelism to distribute it across multiple GPUs. DistributedDataParallel supports model parallelism, while DataParallel does not. When combining DDP with model parallelism, each DDP process utilizes model parallelism, and all processes together perform data parallelism.
+
+
+Process Group
+*************
+
+In DistributedDataParallel (DDP), a *Process Group* is a collection of processes that can communicate with each other during distributed training.
+
+.. code-block:: python
+ :linenos:
+
+ import torch.distributed as dist
+
+ def setup(rank, world_size):
+ os.environ['MASTER_ADDR'] = 'localhost'
+ os.environ['MASTER_PORT'] = '12355'
+ dist.init_process_group("nccl", rank=rank, world_size=world_size)
+
+.. admonition:: Explanation
+ :class: attention
+
+ Here, `nccl` is the backend that determines how communication between processes is handled. Other common backends are `gloo`, a CPU-based backend, and `mpi`
+ the where MPI (Message Passing Interface) based backend.
+
+Splitting the Dataloader
+************************
+
+To split the data across multiple GPUs we use `DistributedSampler`.
+
+.. code-block:: python
+ :linenos:
+
+ def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):
+ dataset = PimaDataset(datapath)
+ sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)
+
+ dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)
+
+ return dataloader
+
+
+.. admonition:: Explanation
+ :class: attention
+
+ - `num_replicas` - Is typically the number of processes in the distributed training job.
+ - `rank` - Each process is assigned a rank which ensures that each process only accesses the data corresponding to its rank.
+ - `drop_last` - When working with datasets in distributed training, it is common for the total number of samples in the dataset to not be perfectly divisible by the product of the batch size and the number of replicas. When `drop_last` is set to *True*, the last batch that is not full will be dropped.
+
+and a distributed `DataLoader`.
+
+.. admonition:: Explanation
+ :class: attention
+
+ - `num_workers` - Number of subprocesses to use for data loading.
+ - `pin_memory` - Pinned (or Page-locked) memory is a region of host memory that is "locked" in physical RAM and cannot be paged out to disk by the operating system. This ensures that the memory remains in RAM and is directly accessible for operations like data transfer between the CPU and GPU. Page-locking excessive amounts of memory with cudaMallocHost() may degrade system performance, since it reduces the amount of memory available to the system for paging. As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device.
+
+ .. image:: ../figs/pinning.png
+
+
+
+Wrapping a Model in DDP
+**********************
+
+DistributedDataParallel (DDP) is a PyTorch wrapper that helps to parallelize training across multiple GPUs and minimizes communication overhead and
+synchronizes gradients automatically.
+
+
+.. code-block:: python
+ :linenos:
+
+ model_ddp = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=True)
+
+.. admonition:: Explanation
+ :class: attention
+
+ - `model`: The neural network (`torch.nn.Module`) that you want to train. Before wrapping it with DDP, it should be placed on the appropriate device (GPU) using model.to(device).
+ - `device-ids`: Specifies the GPU device(s) to which this process's model should be mapped. The rank typically corresponds to the index of the current process within the distributed setup, and in a single-node setup with multiple GPUs, rank is often the GPU ID. For example, if rank=0, it means this process will use GPU 0.
+ - `output_device` : Specifies the device where the output of the model should be stored.
+ - `find_unused_parameters` : DDP assumes all model parameters are used in every forward pass, and it synchronizes their gradients accordingly. Setting `find_unused_parameters=True`` ensures that DDP will only synchronize the gradients of parameters that are actually used, preventing errors and unnecessary communication overhead.
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ 1. Examine the program *src/distributed_data_parallel.py*. What the changes from data_parallel.ipynb?
+ 2. Examine the job script *job_scripts/distributed_data_parallel.pbs*.
+ 3. Run the program using the job script *job_scripts/distributed_data_parallel.pbs*.
+
+ .. code-block:: console
+ :linenos:
+
+ cd job_scripts
+ qsub distributed_data_parallel.pbs
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. We can use distributed data parallelism to use multiple GPUs on the same node.
\ No newline at end of file
diff --git a/docs/source/tutorial/getting_started.rst b/docs/source/tutorial/getting_started.rst
new file mode 100644
index 0000000..f90ffc0
--- /dev/null
+++ b/docs/source/tutorial/getting_started.rst
@@ -0,0 +1,47 @@
+Getting Started
+===============
+
+To access the Gadi system, follow these steps:
+
+1. **SSH into the Gadi system**:
+
+ .. code-block:: console
+ :linenos:
+
+ ssh -XY @gadi.nci.org.au
+
+
+ Alternatively, you can use the Gadi terminal option at `ARE `_.
+
+2. **Change to the project directory**:
+
+ .. code-block:: console
+ :linenos:
+
+ cd /scratch/vp91
+
+
+3. **Create and navigate to a directory with your username**:
+
+ .. code-block:: console
+ :linenos:
+
+ mkdir -p $USER
+ cd $USER
+
+
+4. **Clone the repository**:
+
+ .. code-block:: console
+ :linenos:
+
+ git clone https://github.com/NCI900-Training-Organisation/intro-to-pytorch.git
+ cd intro-to-pytorch.git
+
+
+
+In the repository:
+
+- The `python/src` directory contains all the Python code.
+- The `python/job_scripts` directory includes all the PBS job scripts.
+- The `python/job_scripts/sample_outputs` directory holds the sample outputs.
diff --git a/docs/source/tutorial/gpu_NN.rst b/docs/source/tutorial/gpu_NN.rst
new file mode 100644
index 0000000..5f55346
--- /dev/null
+++ b/docs/source/tutorial/gpu_NN.rst
@@ -0,0 +1,81 @@
+Training on a GPU
+=================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 15 min
+ * **Exercises:** 15 min
+
+ **Objectives:**
+ #. Learn how to traing the model on a GPU.
+ #. Learn how to save a model.
+ #. Learn how to load a saved model.
+
+Set the default device
+**********************
+
+We can set a default device when building a model, ensuring that all operations occur on this device. If available, we can set the GPU as the default device.
+
+.. code-block:: python
+ :linenos:
+
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+Saving and Loading a Model
+******************************
+
+We can save the model in a specific path in the syste.
+
+.. code-block:: python
+ :linenos:
+
+ modelpath = os.path.expandvars('/home/$USER/class_model')
+ torch.save(class_model.state_dict(), modelpath)
+
+This saved model can be loaded when needed. During loading, you can directly specify the device using the `map_location` parameter or move the model to the
+desired device afterward using the `.to()` function.
+
+.. code-block:: python
+ :linenos:
+
+ class_model.load_state_dict(torch.load(modelpath, map_location=device, weights_only=True))
+ class_model.to(device)
+
+Training on the GPU
+*******************
+
+When training, both the model and all the data it operates on should be on the same device.
+
+.. code-block:: python
+ :linenos:
+
+ n_epochs = 100
+ batch_size = 10
+
+ for epoch in range(n_epochs):
+ for i in range(0, len(X_tensor), batch_size):
+ Xbatch = X_tensor[i:i+batch_size].to(device) # move the tensor to GPU
+
+ y_pred = class_model(Xbatch)
+
+ ybatch = y_tensor[i:i+batch_size].to(device) # move the tensor to GPU
+
+ loss = loss_fn(y_pred, ybatch)
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+.. admonition:: Exercise
+ :class: todo
+
+ Try the notebook *GPU_NN.ipynb*.
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. We can set a default device in PyTorch.
+ #. During training, ensure that both the model and the data it operates on are on the same device.
+
+
diff --git a/docs/source/tutorial/multi_node.rst b/docs/source/tutorial/multi_node.rst
new file mode 100644
index 0000000..c7dbc9f
--- /dev/null
+++ b/docs/source/tutorial/multi_node.rst
@@ -0,0 +1,178 @@
+Multi-Node Parallelism
+=======================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 20 min
+ * **Exercises:** 10 min
+
+ **Objectives:**
+ #. Learn how to use multiple GPUs, in multiple nodes using Torchrun.
+
+
+To run the provided code on multiple nodes using torchrun (previously torch.distributed.launch), we need to make a few modifications to the
+single node code:
+
+- **Environment Variables for Multi-Node Training**: Set environment variables like MASTER_ADDR, MASTER_PORT, WORLD_SIZE, and RANK using command-line arguments when launching the script with torchrun.
+- **Modifications to the setup function**: The setup function should be updated to handle the environment variables for multi-node training.
+- **main function**: Remove the use of mp.spawn and instead rely on torchrun to handle the spawning of processes across nodes.
+
+In PyTorch distributed parallelism, **global rank** and **local rank** are key concepts for managing processes across multiple nodes and
+GPUs. The global rank uniquely identifies each process in the entire distributed setup, ranging from `0` to `world_size - 1`, where
+`world_size` is the total number of processes across all nodes. The local rank, on the other hand, identifies each process within a
+specific node, typically corresponding to a particular GPU on that node. The global rank is crucial for tasks that require a unique
+process identity across the system, while the local rank is used for GPU assignment within a node. These ranks are essential for
+ensuring that each process operates correctly within the distributed environment.
+
+.. image:: ../figs/global_local.png
+
+
+PBS Script
+**********
+
+As Gadi uses the PBS job scheduler we can use it to run the training on multiple nodes. Here we are requesting 2 nodes, each with 4 GPUs.
+
+.. code-block:: console
+ :linenos:
+
+ #!/bin/bash
+
+ #PBS -P vp91
+ #PBS -q gpuvolta
+
+ #PBS -l ncpus=96
+ #PBS -l ngpus=8
+ #PBS -l mem=10GB
+ #PBS -l walltime=00:20:00
+
+ #PBS -N multinode
+
+ module load python3/3.11.0
+ module load cuda/12.3.2
+
+ . /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+ # Set variables
+ if [[ $PBS_NCPUS -ge $PBS_NCI_NCPUS_PER_NODE ]]
+ then
+ NNODES=$((PBS_NCPUS / PBS_NCI_NCPUS_PER_NODE))
+ else
+ NNODES=1
+ fi
+
+ PROC_PER_NODE=$((PBS_NGPUS / NNODES))
+
+ MASTER_ADDR=$(cat $PBS_NODEFILE | head -n 1)
+
+ # Launch script
+ LAUNCH_SCRIPT=/scratch/vp91/jxj900/intro-to-pytorch/job_scripts/multinode_torchrun.sh
+
+ # Set execute permission
+ chmod u+x ${LAUNCH_SCRIPT}
+
+ # Run PyTorch application
+ for inode in $(seq 1 $PBS_NCI_NCPUS_PER_NODE $PBS_NCPUS); do
+ echo $inode
+ pbsdsh -n $inode ${LAUNCH_SCRIPT} ${NNODES} ${PROC_PER_NODE} ${MASTER_ADDR} &
+ done
+
+ wait
+
+.. admonition:: Explanation
+ :class: attention
+
+ `MASTER_ADDR`: The IP address or hostname of the master node, which is typically the first node allocated by PBS.
+ `PROC_PER_NODE`: The number of GPUs per node.
+ `NNODES`: The total number of nodes.
+
+Here, `pbsdsh` launches the `multinode_torchrun.sh` script simultaneously on all nodes. The `multinode_torchrun.sh` script contains the following:
+
+.. code-block:: console
+ :linenos:
+
+ #!/bin/bash
+
+ # Load shell environment variables
+ source ~/.bashrc
+
+ module load python3/3.11.0
+ module load cuda/12.3.2
+
+ . /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+ # Application script
+ APPLICATION_SCRIPT=/scratch/vp91/jxj900/intro-to-pytorch/src/multinode_torchrun.py
+
+ # Set execute permission
+ chmod u+x ${APPLICATION_SCRIPT}
+
+ # Run PyTorch application
+ torchrun --nnodes=${1} --nproc_per_node=${2} --rdzv_id=100 --rdzv_backend=c10d --rdzv_endpoint=${3}:29400 ${APPLICATION_SCRIPT}
+
+
+Where `torchrun` will launch the training program `distributed_data_parallel.py` on each node and
+use all the 4 GPUs on each node.
+
+
+
+.. admonition:: Explanation
+ :class: attention
+
+ The rendezvous backend in PyTorch is a key component of the distributed training setup. It is
+ responsible for coordinating the initialization of multiple processes that may be running across different
+ nodes in a distributed system. This process is crucial for ensuring that all distributed processes are aware
+ of each other and can start training in a synchronized manner.
+
+ - `rdzv_backend`: The backend used for the rendezvous process (c10d is default for PyTorch).
+ - `rdzv_endpoint`: The network address of the rendezvous server, combining `MASTER_ADDR` and `MASTER_PORT`.
+
+Alternative Options
+********************
+
+Alternatively, if you can SSH into the individual nodes, you can proceed with the following steps.
+
+On the first node (rank 0):
+
+.. code-block:: console
+ :linenos:
+
+ torchrun --nnodes=2 --nproc_per_node=4 --node_rank=0 --master_addr="" --master_port=12355 /scratch/vp91/$USER/intro-to-pytorch/src/multinode_torchrun.py
+
+On the second node (rank 1):
+
+
+.. code-block:: console
+ :linenos:
+
+ torchrun --nnodes=2 --nproc_per_node=4 --node_rank=1 --master_addr="" --master_port=12355 /scratch/vp91/$USER/intro-to-pytorch/src/multinode_torchrun.py
+
+Of course, this becomes be a very difficult task if you have large number of Nodes.
+
+.. admonition:: Explanation
+ :class: attention
+
+ If you have a `SLURM scheduler `_, things are a bit easier since the *srun* command can launch the Torchrun directly
+ from the job script, on all nodes, eliminating the need for an additional shell script.
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ 1. Examine the program *src/ multinode_torchrun.py*. What are the changes from *src/distributed_data_parallel.py*?
+ 2. Examine the job script *job_scripts/multinode_torchrun.pbs*. Can you simplify it?
+ 3. Examine the job script *job_scripts/multinode_torchrun.sh*. Can you simplify it?
+ 4. Run the program using the job script *job_scripts/multinode_torchrun.pbs*.
+
+ .. code-block:: console
+ :linenos:
+
+ cd job_scripts
+ qsub multinode_torchrun.pbs
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. We can use Torchrun to use multiple GPUs in multiple nodes.
+ #. We can use PBS script to launch multi-node trainings.
\ No newline at end of file
diff --git a/docs/source/tutorial/tensor.rst b/docs/source/tutorial/tensor.rst
new file mode 100644
index 0000000..e2aa910
--- /dev/null
+++ b/docs/source/tutorial/tensor.rst
@@ -0,0 +1,247 @@
+Tensors in PyTorch
+===================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 20 min
+ * **Exercises:** 15 min
+
+ **Objectives:**
+ #. Learn about tensors.
+ #. Learn the differences between a tensor and NumPy array.
+ #. Learn how to move tensors to GPUs.
+
+
+Tensors are specialized data structures used in PyTorch to represent model inputs, outputs, and parameters. While they are conceptually similar to
+arrays and matrices, they offer additional features such as support for hardware accelerators like GPUs and
+automatic differentiation.
+
+Creating a Tensor
+*****************
+
+A tensor can be created in multiple ways:
+
+1. Directly from data
+
+.. code-block:: python
+ :linenos:
+
+ data = [[1, 2],[3, 4]]
+ x_tensor= torch.tensor(data)
+
+2. From NumPy
+
+.. code-block:: python
+ :linenos:
+
+ x_np = np.array(data)
+ x_tensor = torch.from_numpy(x_np)
+
+3. From another Tensor
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones_like(x_tensor)
+ y_tensor = torch.rand_like(x_tensor, dtype=torch.float)
+
+
+.. admonition:: Explanation
+ :class: attention
+
+ **torch.rand_like()** returns a tensor with the same size as input that but filled with random numbers
+ from the interval [0,1).
+
+
+Operations on Tensors
+*********************
+
+Tensors can perform almost all operations a NumPy array can perform
+
+1. indexing and slicing
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones(4, 4)
+ print(f"First row: {x_tensor[0]}")
+ print(f"First column: {x_tensor[:, 0]}")
+ print(f"Last column: {x_tensor[..., -1]}")
+ x_tensor[:,1] = 0
+ print(x_tensor)
+
+2. Concatenate multiple tensors
+
+.. code-block:: python
+ :linenos:
+
+ y_tensor = torch.cat([x_tensor, x_tensor, x_tensor], dim=1)
+ print(y_tensor)
+
+
+3. Arithmetic Operations
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones(4, 4)
+
+ # Transpose
+ x_T_tensor = x_tensor.T
+
+ # Matrix Multiplication
+ y1_tensor = x_tensor @ x_tensor.T
+ y2_tensor = x_tensor.matmul(x_tensor.T)
+
+ y3_tensor = torch.rand_like(y1_tensor)
+ torch.matmul(x_tensor, x_tensor.T, out=y3_tensor)
+
+
+ # Element-wise multiplication
+ z1_tensor = x_tensor * x_tensor
+ z2_tensor = x_tensor.mul(x_tensor)
+
+ z3_tensor = torch.rand_like(x_tensor)
+ torch.mul(x_tensor, x_tensor, out=z3_tensor)
+
+3. In-place Operations
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones(4, 4)
+
+ # Transpose
+ x_tensor.t_()
+
+ # Copy
+ y_tensor = torch.rand_like(x_tensor)
+ x_tensor.copy_(y_tensor)
+
+NumPy and Tensor
+****************
+
+Tensors on the **CPU** and NumPy arrays can share memory locations, so modifying one will also affect
+the other.
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones(5)
+ x_np = x_tensor.numpy() # tensor to numpy
+ print(f"t: {x_tensor}")
+ print(f"n: {x_np}")
+
+ x_tensor.add_(1)
+
+ print(f"t: {x_tensor}")
+ print(f"n: {x_np}")
+
+ y_np = np.ones(5)
+ z_np = np.zeros(5)
+ y_tensor = torch.from_numpy(y_np) # numpy to tensor
+
+ np.add(y_np, 1, out=z_np)
+
+ np.add(y_np, 1, out=n)
+
+ print(f"t: {x_tensor}")
+ print(f"n: {x_np}")
+
+
+Moving Tensor to GPU
+*********************
+
+It's always wise to check for GPU availability before performing any GPU operations. If a GPU is available,
+we can move our tensor to it.
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor_gpu = x_tensor.to("cuda")
+
+A better approach is to set the default device before starting any computations.
+
+.. code-block:: python
+ :linenos:
+
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+ y_tensor_gpu = y_tensor.to(device)
+
+This way, your code will work regardless of whether a GPU is available or not.
+
+Tensor Attributes
+*****************
+
+.. code-block:: python
+ :linenos:
+
+ print(f"Shape of tensor: {y_tensor.shape}")
+ print(f"Datatype of tensor: {y_tensor.dtype}")
+ print(f"Device tensor is stored on: {y_tensor.device}")
+
+
+*Automatic differentiation* is a key feature that distinguishes tensors from NumPy arrays. This capability
+is particularly useful in neural networks, where model weights are adjusted during backpropagation based
+on the gradient of the loss function with respect to each parameter. Tensors support automatic gradient
+computation for any computational graph. For example, consider the computational graph of a one-layer
+neural network:
+
+
+.. image:: ../figs/loss.png
+
+In this context, **w** and **b** are the parameters that need to be optimized. Therefore, we compute
+the gradients of the loss function with respect to these variables.
+
+.. math::
+
+ z = x * w + b
+
+ g1 = \frac{\partial loss}{\partial w}
+
+ g2 = \frac{\partial loss}{\partial b}
+
+Tensors make this process quite straightforward:
+
+.. code-block:: python
+ :linenos:
+
+ x_tensor = torch.ones(5) # input tensor
+ y_tensor = torch.zeros(3) # expected output
+
+ w_tensor = torch.randn(5, 3, requires_grad=True)
+ b_tensor = torch.randn(3, requires_grad=True)
+
+ z_tensor = torch.matmul(x_tensor, w_tensor) + b_tensor
+
+ loss_tensor = torch.nn.functional.binary_cross_entropy_with_logits(z_tensor, y_tensor)
+ loss_tensor.backward()
+
+ print(w_tensor.grad)
+ print(b_tensor.grad)
+
+
+When you perform operations in PyTorch involving tensors that have **requires_grad=True**, PyTorch builds a computational graph in the background.
+This graph records the operations performed on the tensors, allowing for automatic differentiation during backpropagation.
+When you calculate *z_tensor = torch.matmul(x_tensor, w_tensor) + b_tensor*, PyTorch tracks the entire sequence of operations.
+Because w_tensor and b_tensor have requires_grad=True, PyTorch knows that these tensors are part of the computational graph.
+Every operation (such as torch.matmul and addition) creates nodes in this graph, linking the output z_tensor back to the inputs w_tensor and b_tensor.
+
+
+
+.. admonition:: Exercise
+ :class: todo
+
+ Try the notebook *tensors.ipynb*.
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. Tensors in PyTorch can be created using various methods.
+ #. Moving tensors to GPUs can be done in a device-agnostic manner.
+ #. Automatic differentiation is straightforward with tensors in PyTorch.
+
+
+
+
diff --git a/docs/source/tutorial/what_is_NN.rst b/docs/source/tutorial/what_is_NN.rst
new file mode 100644
index 0000000..7603077
--- /dev/null
+++ b/docs/source/tutorial/what_is_NN.rst
@@ -0,0 +1,554 @@
+What is a Neural Network?
+=========================
+
+.. admonition:: Overview
+ :class: Overview
+
+ * **Tutorial:** 45 min
+ * **Exercises:** 0 min
+
+ **Objectives:**
+ #. Learn the different parts of a neural network
+
+Neural Networks (NN) are computational models inspired by the human brain, designed to recognize patterns and make data-based decisions.
+They consist of interconnected layers of nodes, or "neurons," which process and transform input information. Through training, neural networks
+learn to improve their accuracy in tasks like image recognition, language processing, and more.
+
+Neuron
+******
+
+In the context of a neural network, a neuron is a fundamental unit that processes inputs to produce an
+output. Let's break down its role and functionality step by step:
+
+1. **Input features**: These are the individual measurable properties or characteristics of the data that are fed into the network. Features can be any numerical data -
+for example if we use image as the input, the input features will be the pixel values of the image.
+
+1. **Weights**: Input features are each associated with a weight, which is a numerical value that adjusts
+the importance of the corresponding input feature.
+
+2. **Calculating the Weighted Sum**: For each neuron, you first multiply each input feature by its corresponding weight. Then, you sum up all these weighted inputs.
+This sum represents the combined influence of all the input feature on the neuron.
+
+3. **Adding Bias**: To this weighted sum, you add a bias term. The bias is another adjustable parameter that helps the neuron model more complex patterns
+by shifting the activation function's input.
+
+4. **Activation Function**: Finally, you apply an activation function to the resulting value (the weighted sum plus bias). The activation function introduces
+non-linearity into the neuron's output, which allows the network to learn and represent more complex patterns and relationships.
+
+
+.. image:: ../figs/neuron.drawio.png
+
+In summary, a neuron in a neural network processes its inputs through a series of multiplication, summation, bias addition, and activation
+function application—to produce an output value. This output is then used in further computations within the network or as the final prediction, depending
+on the network's structure.
+
+
+
+Activation Function
+********************
+
+Activation functions are crucial components of neural networks, performing several key roles that
+influence the network's ability to learn and make predictions.
+
+Role of Activation Functions:
+
+1. **Produce Outputs of Neurons**: After computing the weighted sum of inputs and adding the bias, an activation function is applied to
+ this value.
+
+2. **Update Weights and Biases During Training**: Activation functions play a role in updating weights and biases during the *training process*. When the
+ network is trained using methods like *backpropagation*, the *gradient* of the *loss function* with respect to the weights and biases is calculated.
+ The gradient depends on the *derivative* of the activation function, which helps adjust weights and biases to minimize the *error*. Therefore, the choice of
+ activation function affects how effectively the network learns.
+
+Characteristics of Activation Functions:
+
+1. **Scale and Normalize Outputs**:
+ Activation functions often scale and normalize the neuron's output.
+2. **Introduce Non-Linearity**:
+ Activation functions introduce non-linearity into the network. Without non-linearity, even a multi-layer network would behave like a single-layer network,
+ as linear combinations of linear functions are still linear. Non-linearity allows the network to learn and model complex patterns
+ and relationships in the data.
+
+3. **Define Range of Outputs**: It defines the minumun and maximum value of the network output. For instance, The sigmoid activation function outputs values
+ between 0 and 1. It is defined as :math:`\sigma(x) = \frac{1}{1 + e^{-x}}`.
+
+4. **Simple Derivatives**:
+ Most activation functions have simple derivatives, which makes them computationally efficient during the training process.
+ The derivative of the **ReLU** (Rectified Linear Unit) function, which is :math:`\max(0, x)`.
+
+Neural Network - A Network of Neurons
+**************************************
+
+A neural network is a complex system of interconnected neurons organized into layers. Each layer's output serves as the input for the next layer,
+creating a stack of neurons that processes data in stages. Mathematically this can be boiled down to a sophisticated function that maps inputs to outputs
+through numerous parameters. Training involves adjusting these parameters to improve the network's performance and accuracy.
+
+.. image:: ../figs/layers.png
+
+A neural network consists of three types of layers: input, hidden, and output. The input layer receives and holds raw data, with each neuron representing a
+feature of the data. Hidden layers process this data by applying weights, biases, and activation functions to extract and learn complex patterns. These layers
+transform the data and pass it to the next layer in the network. The output layer produces the final prediction or classification result based on the
+processed information from the hidden layers. Each layer plays a crucial role in enabling the network to learn from and make accurate predictions on the data.
+
+.. admonition:: Explanation
+ :class: attention
+
+ Matrix X represents the input matrix, where each column vector corresponds to an input sample. So if the matrix has the dimensions :math:`n \times m`
+ *n* will be the number of featues in each input sample and *m* will the total number of samples (also called training data).
+
+ .. math::
+
+ X = \begin{bmatrix}
+ x_{1}^{(1)} & x_{1}^{(2)} & x_{1}^{(3)} & .... & x_{1}^{(m)} \\
+ x_{2}^{(1)} & x_{2}^{(2)} & x_{3}^{(3)} & .... & x_{2}^{(m)} \\
+ x_{3}^{(1)} & x_{3}^{(2)} & x_{3}^{(3)} & .... & x_{3}^{(m)} \\
+ . & . & . & .... & . \\
+ . & . & . & .... & . \\
+ x_{n}^{(1)} & x_{n}^{(2)} & x_{n}^{(3)} & .... & x_{n}^{(m)}
+ \end{bmatrix}
+
+ :math:`X^{(1)}` will represent the entire vector n x 1 vector representing first data sample while
+ :math:`x_{3}^{(1)}` will represent the third feature in first data sample.
+
+ The figure below illustrates a 2-layer neural network where a single data sample (with 3 featues) is provided as input.
+ The input layer is not counted as one of the layers.
+
+ .. image:: ../figs/2layer_NN.drawio.png
+
+ Each hidden layer produces activations: in this example, layer 1 has 3 activations, while layer 2 has only one activation.
+
+ .. math::
+ a^{[1]} = \begin{bmatrix}
+ a_{1}^{[1]} \\
+ a_{2}^{[1]} \\
+ a_{3}^{[1]}
+ \end{bmatrix}
+
+ .. math::
+ a^{[2]} = a_{1}^{[2]}
+
+ The number in square brackets represents the layer number, while the subscript denotes the neuron's index within that layer.
+
+ Each neuron in every layer computes the *Z* value for each input sample and then calculates the activation value for that sample.
+
+ The figure illustrates this process with an example of the first neuron in layer 1 processing the first input sample.
+
+ .. image:: ../figs/activation.drawio.png
+
+ In this situation if :math:`W_{1}^{[1]}` is
+
+ .. math::
+
+ W_{1}^{[1]} = \begin{bmatrix}
+ 10.0 \\
+ 20.0 \\
+ 30.0
+ \end{bmatrix}
+
+ then :math:`W_{1}^{[1]T}` becomes
+
+ .. math::
+ W_{1}^{[1]} = \begin{bmatrix}
+ 10.0 & 20.0 & 30.0
+ \end{bmatrix}
+
+ and if :math:`X^{(1)}` is
+
+ .. math::
+
+ X^{(1)} = \begin{bmatrix}
+ 0.3 \\
+ 0.2 \\
+ 0.5
+ \end{bmatrix}
+
+ and if bias b = 10
+
+ we can calculate :math:`Z_{1}^{[1](1)}` as
+
+ .. math::
+
+ Z_{1}^{[1](1)} = \begin{bmatrix}
+ 10.0 & 20.0 & 30.0
+ \end{bmatrix} \times \begin{bmatrix}
+ 0.3 \\
+ 0.2 \\
+ 0.5
+ \end{bmatrix} + 10 \\
+
+ = (10.0* 0.3) + (20.0 * 0.2) + (30.0 * 0.5) + 10 = 32
+
+ Now if we apply the ReLU activation fuction :math:`max(0, x)` we get the activation as :math:`a_{1}^{[1](1)}`
+
+ .. math::
+ max(0, 32) = 32
+
+
+ Similarly, we can calculate the activations for all the neorons in layer 1 for the input sample :math:`X^{(1)}`
+
+
+ .. math::
+
+ a_{1}^{[1](1)} = f(Z_{1}^{[1](1)}) = W_{1}^{[1]T} \times X^{(1)} + b_{1}^{[1]}
+
+ .. math::
+
+ a_{2}^{[1](1)} = f(Z_{2}^{[1](1)}) = W_{2}^{[1]T} \times X^{(1)} + b_{2}^{[1]}
+
+ .. math::
+
+ a_{3}^{[1](1)} = f(Z_{3}^{[1](1)}) = W_{3}^{[1]T} \times X^{(1)} + b_{3}^{[1]}
+
+ Where :math:`W_{1}^{[1]T}, W_{2}^{[1]T}, W_{3}^{[1]T}` are transpose of vectors of size :math:`(3 \times 1)`.
+
+ The above example demonstrates how this process works for a single neuron within a layer. In practice we can stack the weights of all neuron in a layer
+ into a matrix.
+
+ .. math::
+
+ W = \begin{bmatrix}
+ ------ W_{1}^{[1]T} ------- \\
+ ------ W_{2}^{[1]T} ------- \\
+ ------ W_{3}^{[1]T} -------
+ \end{bmatrix}
+
+ Similarly we can stack the bias of different neuron in a layer
+
+ .. math::
+
+ B = \begin{bmatrix}
+ b_{1}^{[1]} \\
+ b_{2}^{[1]} \\
+ b_{3}^{[1]}
+ \end{bmatrix}
+
+ and the operation
+
+ .. math::
+
+ Z = W^{T} \times X + B
+
+ corresponds to the calculations
+
+ .. math::
+
+ Z^{[1](1)} = \begin{bmatrix}
+ Z_{1}^{[1](1)} \\
+ Z_{2}^{[1](1)} \\
+ Z_{3}^{[1](1)}
+ \end{bmatrix}
+
+ .. math::
+
+ = \begin{bmatrix}
+ W_{1}^{[1]T} \times X^{(1)} + b_{1}^{[1]} \\
+ W_{2}^{[1]T} \times X^{(1)} + b_{2}^{[1]} \\
+ W_{3}^{[1]T} \times X^{(1)} + b_{3}^{[1]}
+ \end{bmatrix}
+
+
+
+ and finally we apply the activation function to the above matrix
+
+ .. math::
+
+ a^{[1](1)} = \begin{bmatrix}
+ f(Z_{1}^{[1](1)}) \\
+ f(Z_{2}^{[1](1)}) \\
+ f(Z_{3}^{[1](1)})
+ \end{bmatrix}
+
+ .. math::
+
+ = \begin{bmatrix}
+ a_{1}^{[1](1)} \\
+ a_{2}^{[1](1)} \\
+ a_{3}^{[2](1)}
+ \end{bmatrix}
+
+
+
+
+ The above example illustrates how a single input sample is processed by a layer with 3 neurons. For *m* input samples and *a* neurons, we can compute the
+ complete activation of the first layer for all samples as follows:
+
+ .. math::
+
+ a^{[1]} = \begin{bmatrix}
+ a_{1}^{(1)} & a_{1}^{(2)} & .... & a_{1}^{(m)} \\
+ a_{2}^{(1)} & a_{2}^{(2)} & .... & a_{2}^{(m)} \\
+ a_{3}^{(1)} & a_{3}^{(2)} & .... & a_{3}^{(m)} \\
+ . & . & .... & . \\
+ . & . & .... & . \\
+ a_{a}^{(1)} & a_{a}^{(2)} & .... & a_{a}^{(m)} \\
+ \end{bmatrix}
+
+This will involve a GEneral Matrix multiplication (GEMM) operation :math:`W^{[1]T} \times X` where :math:`X` will be the entire input sample
+represented as a matrix of dimensions :math:`n \times m` (where *n* is the number of features in an input sample and *m* is the number of input samples.
+In the above example *n* is 3). :math:`W^{[1]T}` will be a matrix of dimensions :math:`a \times m` (where *a* is the number of input neurons in that layer
+and *m* is the number input samples. The above example *a* is 3). This will result in the output of the first layer represented as the matrix :math:`a^{[1]}`
+and it will have the dimensions :math:`a \times n`. In matrix :math:`a^{[1]}`, the horizontal axis represents the training samples, while the vertical axis
+represents the neurons in a layer.
+
+The matrix :math:`a^{[1]}` holds the value of :math:`a` neurons applied to :math:`m` input samples. This matrix then forms the input to the next layer in the neural network.
+
+We typically initialize the weights of each neuron randomly, although methods like **Xavier Initialization**, **He Initialization**, or
+**Orthogonal Initialization** are commonly used to improve training efficiency.
+
+
+
+Loss Function and Cost Functions
+********************************
+
+During training, for each batch of input samples, calculations are
+propagated through the network in a process called the **forward pass**. After each forward pass, the weights of the network are updated using
+the **backpropagation** algorithm, which adjusts the weights based on the gradients to minimize error. However, in practice weight updates do not happen after
+every individual sample; instead, they occur after each batch of data, depending on the **batch size** used. An **epoch** refers to a full pass
+through the entire training dataset, where the network processes all data samples, performing forward passes and backpropagation for each batch.
+
+1. The **loss function** (also known as the error function or objective function) measures the error or difference between
+the predicted output of the neural network and the actual target values for a single training example.
+In this tutorial loss function will be denoted as :math:`L(y', y)` where :math:`y'` is the predicted output while :math:`y` is the actual output.
+
+2. The **cost function** is the average or aggregate of the loss function computed over the entire training dataset. It provides a measure of the
+overall performance of the model across all examples. In this tutorial loss function will be denoted as :math:`J(W, b)` where :math:`w` is weight
+and :math:`b` is biases in the NN.
+
+
+The network performs the following steps to calculate the cost:
+
+1. Inputs the data.
+2. Executes a forward pass to generate the network's output.
+3. Computes the error in the output using the loss function.
+
+In the example of the 2-layer neural network we discussed earlier, the loss calculation would look like this:
+
+.. math::
+
+ Z^{[1]} = W^{[1]T} \times X + b^{[1]} \rightarrow a^{[1]} = f(Z^{[1]}) \rightarrow Z^{[2]} = W^{[2]T} \times X + b^{[2]} \rightarrow a^{[2]} = f(Z^{[2]}) \rightarrow L(a^{[2]}, y)
+
+Where the loss :math:`L(y', y)` is
+
+.. math::
+
+ L(y', y) = y' - y = a^{[2]} - y
+
+Since errors can be both positive and negative, we want to ensure they don't cancel each other out.
+Therefore, in the cost function :math:`J(W, b)` we typically use the square of the error or the absolute value to avoid this issue.
+
+
+.. admonition:: Explanation
+ :class: attention
+
+ Mean Squared Error (MSE) is a common cost function.
+
+ .. math::
+
+ J(W, b) = \frac{1}{2} \times \sum_{n=1}^{m} (y_{train} - y_{network})^{2}
+
+
+Gradient Descent
+****************
+
+After computing the cost, we can adjust the weights and biases to minimize the cost in the next epoch. This is done using an optimization algorithm like
+gradient descent. The goal is to iteratively update the values of W (weights) and b (biases) in the direction that reduces the cost function :math:`J(W, b)`.
+
+In gradient descent, we compute the gradient of the cost function with respect to the weights and biases, which tells us the direction of the steepest
+increase in the cost. We then adjust the weights and biases by moving in the opposite direction of this gradient to minimize the cost. The update rule
+is as follows:
+
+.. math::
+
+ w := w - \alpha \times \frac{\partial J(W, b)}{\partial w} \\
+ b := w - \alpha \times \frac{\partial J(W, b)}{\partial b}
+
+until we find the optimal values for *w* and *b* that yield the minimum value for :math:`J(W, b)`. Here :math:`\alpha` is the learning rate.
+
+.. image:: ../figs/gradient-descent.png
+.. image:: ../figs/gradient.png
+
+When selecting a cost function for a neural network, we typically choose a **convex function** because it ensures that there is only a single global
+optimal value, rather than multiple local minima. A **convex function** has the property that any line segment between two points on the function
+lies above or on the graph, meaning it has a **single valley shape**. This guarantees that when we minimize the cost, we are moving toward the global
+minimum, rather than getting stuck in a local minimum. To find this optimal value, we continuously update the model parameters, such as the weights and
+biases, using optimization techniques like gradient descent. This process moves us steadily toward the minimum point of the cost function.
+
+
+
+.. admonition:: Explanation
+ :class: attention
+
+ The derivatives give you the slope (the direction in which we need to move the parameter values) of the loss function and eventually it moves to the local optimum.
+
+ Suppose we have a function
+
+ .. math::
+
+ J = 3 \times v
+
+ Then the derivative of j with respect to v is
+
+ .. math::
+
+ \frac{\partial J}{\partial v} = 3
+
+ What this means is that if *v* changes by a small value :math:`\delta`, J changes by :math:`3 \times \delta`. For example
+
+ .. math::
+ v = 2 \rightarrow J = 6
+ v = 2.001 \rightarrow j = 6.003
+
+ In this example when v changes by 0.001 J changes by .003 (:math: `6.003 - 6`).
+
+But how does this approach help when the cost function :math:`J` involves weights and biases across multiple layers in the neural network, rather than just
+a single layer? So we are not dealing with :math:`J(W, b)` but instead :math:`J(W^{[1]}, W^{[1]}, ...., W^{[L]}, b^{[1]}, b^{[2]},...., b^{[L]})`.
+That is where we use the conscept of **Computational graphs**.
+
+
+How does this approach help when the cost function :math:`J` involves weights and biases across multiple layers in the neural network, rather than just
+a single layer? In this case, we are dealing with a more complex function, :math:`J(W, b)` but instead :math:`J(W^{[1]}, W^{[1]}, ...., W^{[L]}, b^{[1]}, b^{[2]},...., b^{[L]})`
+where :math:`L` represents the number of layers in the network.
+
+This complexity is addressed using the concept of **computational graphs**. A **computational graph** is a directed acyclic graph where each node
+represents an operation (like addition or multiplication) or a variable (such as weights, biases, or activations), and the edges represent the flow of
+data between operations.
+
+
+.. admonition:: Explanation
+ :class: attention
+
+
+ Suppose we have a set of computations as follows:
+
+ .. math::
+
+ J(a, b, c) = 3 \times (a + b \times c) \\
+
+ We can rewrite this as:
+
+ .. math::
+
+ u = b \times c
+
+ .. math::
+
+ v = a + u
+
+ .. math::
+
+ J = 3 \times v
+
+ We can reprsent this computation as a directed graph where the nodes represent operations and edges represent the flow of data between these operations.
+
+ .. image:: ../figs/comp_graph.drawio.png
+
+ Then, by traversing the computational graph from right to left, we can determine how changes in parameters in one node affect the cost
+ function :math:`J(a, b c)`.
+
+ If we change the value of *v* how much would the value of *J* change?
+
+ .. math::
+
+ \frac{\partial J}{\partial v} = 3 \; \rightarrow eq(1)
+
+ How does the change in *a* change the value of *J* (chain rule)?
+
+ .. math::
+
+ \frac{\partial J}{\partial a} = \frac{\partial J}{\partial v} \times \frac{\partial v}{\partial a} \\
+ \frac{\partial J}{\partial v} = 3 \; (from \: eq(1)) \\
+ \frac{\partial v}{\partial a} = 1 \\
+
+ \frac{\partial J}{\partial a} = 3 \times 1 = 3 \; \rightarrow eq(2)
+
+ How does the change in *u* change the value of *J* (chain rule)?
+
+ .. math::
+
+ \frac{\partial J}{\partial u} = \frac{\partial J}{\partial v} \times \frac{\partial v}{\partial u} \\
+ \frac{\partial J}{\partial v} = 3 \; (from \: eq(1)) \\
+ \frac{\partial v}{\partial a} = 1 \\
+
+ \frac{\partial J}{\partial u} = 3 \times 1 = 3 \; \rightarrow eq(3)
+
+ How does the change in *b* change the value of *J* (chain rule)?
+
+ .. math::
+
+ \frac{\partial J}{\partial b} = \frac{\partial J}{\partial u} \times \frac{\partial u}{\partial b} \\
+ \frac{\partial J}{\partial u} = 3 \; (from \: eq(3)) \\
+ \frac{\partial u}{\partial b} = c \\
+
+ \frac{\partial J}{\partial u} = 3 \times c = 3c
+
+
+ How does the change in *c* change the value of *J* (chain rule)?
+
+ .. math::
+
+ \frac{\partial J}{\partial c} = \frac{\partial J}{\partial u} \times \frac{\partial u}{\partial c} \\
+ \frac{\partial J}{\partial u} = 3 \; (from \: eq(3)) \\
+ \frac{\partial u}{\partial c} = b \\
+
+ \frac{\partial J}{\partial u} = 3 \times b = 3b
+
+ As seen from above when computing a derivative it is easier to move from the right to left following the computation graph.
+
+
+
+Backpropagation
+***************
+
+Based on the cost function, we may need to either excite (increase the influence) or inhibit (decrease the influence) certain neurons. To achieve this,
+each layer indirectly affects the weights and biases of the preceding layer using the same computational graph concept we discussed earlier. This process
+is known as backpropagation.
+
+So, how does backpropagation connect with computational graphs? Let's examine a brief (and incomplete) Python code snippet that demonstrates how to
+update the final hidden layer using the cost function from the output layer.
+
+.. code-block:: python
+ :linenos:
+
+ # forward pass from the last hidden layer to the output layer
+ for i in range (1, m):
+ Zi = gemm(W, X[:i]) + b # matrix multiplication followed by addition
+ ai = f(Zi) # f() is the activation function
+
+ l = L(ai, yi) # L() is the loss function
+ J+ = l # accumulate the loss for each input sample
+
+ # average over m input samples
+ J = J / m
+
+ # Backpropagation from the output layer to the last hidden layer
+ # assuming we have just two neurons in the layer
+ dW1 += slope_W1(J, W1) # find the slope (derivative) of the cost function wrt W1
+ dW2 += slope_W2(J, W2) # find the slope (derivative) of the cost function wrt W2
+ db1 += slope_b1(J, b1) # find the slope (derivative) of the cost function wrt db1
+ db2 += slope_b2(J, b2) # find the slope (derivative) of the cost function wrt db2
+
+ # update the weights and biases
+ W1 = W1 - alpha * dW1 # alpha is the learning rate
+ W2 = W2 - alpha * dW2
+ b1 = b1 - alpha * db1
+ b2 = b2 - alpha * db2
+
+
+Where :math:`dW1 = \frac{\partial J}{\partial W_{1}}`, :math:`dW2 = \frac{\partial J}{\partial W_{2}}`, :math:`db1 = \frac{\partial J}{\partial b_{1}}` and :math:`db2 = \frac{\partial J}{\partial b_{2}}`. In practice, we will replace the for loop with a vectorized implementation to improve efficiency.
+
+Convergence
+***************
+
+Finally, we stop the training when the network converges. In the context of neural networks, convergence refers to the point where the training process
+stabilizes, and the performance metrics (such as the cost function) cease to improve significantly or become consistent.
+
+
+
+
+
+.. admonition:: Key Points
+ :class: hint
+
+ #. At its core, a neural network performs general matrix-matrix operations (GEMM).
+ #. After each epoch, weights are adjusted to recalibrate the network.
+ #. The more data you have, the more effective this recalibration becomes (brute force approach).
\ No newline at end of file
diff --git a/job_scripts/bkp_multinode_torchrun.pbs b/job_scripts/bkp_multinode_torchrun.pbs
new file mode 100644
index 0000000..cad8293
--- /dev/null
+++ b/job_scripts/bkp_multinode_torchrun.pbs
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#PBS -P vp91
+#PBS -q gpuvolta
+
+#PBS -l ncpus=96
+#PBS -l ngpus=8
+#PBS -l mem=10GB
+#PBS -l walltime=00:05:00
+
+#PBS -N multinode
+
+module load python3/3.11.0
+module load cuda/12.3.2
+module load nccl/2.19.4
+
+. /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+python3 /scratch/vp91/$USER/intro-to-pytorch/src/distributed_data_parallel.py
+
+# Get the list of allocated nodes
+NODES=$(cat $PBS_NODEFILE | uniq)
+NODE_ARR=($NODES)
+
+
+
+# Define the master node (usually the first node in the list)
+MASTER_ADDR=${NODE_ARR[0]}
+MASTER_PORT=12355 # Set an appropriate port for communication
+
+NNODES=2
+NPROC_PER_NODE=4
+WORLD_SIZE=$(($NNODES * $NPROC_PER_NODE))
+
+# Rendezvous backend and endpoint
+RDZV_BACKEND="c10d"
+RDZV_ENDPOINT="${MASTER_ADDR}:${MASTER_PORT}"
+RDZV_ID="100"
+
+torchrun --nnodes=$NNODES --nproc_per_node=$NPROC_PER_NODE \
+ --rdzv_backend=$RDZV_BACKEND --rdzv_endpoint=$RDZV_ENDPOINT --rdzv_id=$RDZV_ID \
+ /scratch/vp91/$USER/intro-to-pytorch/src/multinode_torchrun.py
\ No newline at end of file
diff --git a/job_scripts/distributed_data_parallel.pbs b/job_scripts/distributed_data_parallel.pbs
new file mode 100644
index 0000000..2bdf2d1
--- /dev/null
+++ b/job_scripts/distributed_data_parallel.pbs
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+#PBS -P vp91
+#PBS -q gpuvolta
+
+#PBS -l ncpus=24
+#PBS -l ngpus=2
+#PBS -l mem=10GB
+#PBS -l walltime=00:05:00
+
+#PBS -N distributed_data_parallel
+
+module load python3/3.11.0
+module load cuda/12.3.2
+
+. /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+python3 /scratch/vp91/$USER/intro-to-pytorch/src/distributed_data_parallel.py
\ No newline at end of file
diff --git a/job_scripts/multinode_torchrun.pbs b/job_scripts/multinode_torchrun.pbs
new file mode 100644
index 0000000..fee78ab
--- /dev/null
+++ b/job_scripts/multinode_torchrun.pbs
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#PBS -P vp91
+#PBS -q gpuvolta
+
+#PBS -l ncpus=96
+#PBS -l ngpus=8
+#PBS -l mem=10GB
+#PBS -l walltime=00:20:00
+
+#PBS -N multinode
+
+module load python3/3.11.0
+module load cuda/12.3.2
+
+. /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+# Set variables
+if [[ $PBS_NCPUS -ge $PBS_NCI_NCPUS_PER_NODE ]]
+then
+ NNODES=$((PBS_NCPUS / PBS_NCI_NCPUS_PER_NODE))
+else
+ NNODES=1
+fi
+
+PROC_PER_NODE=$((PBS_NGPUS / NNODES))
+
+MASTER_ADDR=$(cat $PBS_NODEFILE | head -n 1)
+
+# Launch script
+LAUNCH_SCRIPT=/scratch/vp91/jxj900/intro-to-pytorch/job_scripts/multinode_torchrun.sh
+
+# Set execute permission
+chmod u+x ${LAUNCH_SCRIPT}
+
+# Run PyTorch application
+for inode in $(seq 1 $PBS_NCI_NCPUS_PER_NODE $PBS_NCPUS); do
+ echo $inode
+ pbsdsh -n $inode ${LAUNCH_SCRIPT} ${NNODES} ${PROC_PER_NODE} ${MASTER_ADDR} &
+done
+
+wait
\ No newline at end of file
diff --git a/job_scripts/multinode_torchrun.sh b/job_scripts/multinode_torchrun.sh
new file mode 100755
index 0000000..dee1fec
--- /dev/null
+++ b/job_scripts/multinode_torchrun.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# Load shell environment variables
+source ~/.bashrc
+
+module load python3/3.11.0
+module load cuda/12.3.2
+
+. /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+# Application script
+APPLICATION_SCRIPT=/scratch/vp91/jxj900/intro-to-pytorch/src/multinode_torchrun.py
+
+# Set execute permission
+chmod u+x ${APPLICATION_SCRIPT}
+
+# Run PyTorch application
+torchrun --nnodes=${1} --nproc_per_node=${2} --rdzv_id=100 --rdzv_backend=c10d --rdzv_endpoint=${3}:29400 ${APPLICATION_SCRIPT}
diff --git a/job_scripts/test.pbs b/job_scripts/test.pbs
new file mode 100644
index 0000000..4706964
--- /dev/null
+++ b/job_scripts/test.pbs
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#PBS -P vp91
+#PBS -q normal
+
+#PBS -l ncpus=96
+#PBS -l mem=10GB
+#PBS -l walltime=00:05:00
+
+#PBS -N multinode
+
+module load python3/3.11.0
+module load cuda/12.3.2
+module load nccl/2.19.4
+
+. /scratch/vp91/Training-Venv/pytorch/bin/activate
+
+which python
+
+# Get the list of allocated nodes
+NODES=$(cat $PBS_NODEFILE | uniq)
+echo $NODES
+
+NODE_ARR=($NODES)
+echo $NODE_ARR
+
+# Define the master node (usually the first node in the list)
+MASTER_ADDR=${NODE_ARR[0]}
+MASTER_PORT=12355 # Set an appropriate port for communication
+echo $MASTER_ADDR
+
+NNODES=2
+NPROC_PER_NODE=4
+WORLD_SIZE=$(($NNODES * $NPROC_PER_NODE))
+
+echo $WORLD_SIZE
+
+echo $PBS_NODEID
+
+# Rendezvous backend and endpoint
+RDZV_BACKEND="c10d"
+RDZV_ENDPOINT="${MASTER_ADDR}:${MASTER_PORT}"
\ No newline at end of file
diff --git a/notebooks/.ipynb_checkpoints/GPU_NN-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/GPU_NN-checkpoint.ipynb
new file mode 100644
index 0000000..3a0b249
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/GPU_NN-checkpoint.ipynb
@@ -0,0 +1,315 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Neural Networks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96127ef4-bb03-492f-81b9-672f74c20b5c",
+ "metadata": {},
+ "source": [
+ "Neural networks are computational models inspired by the human brain, designed to recognize patterns and\n",
+ "make decisions based on data. They consist of interconnected layers of nodes, or \"neurons,\" which process\n",
+ "and transform input information. Through training, neural networks learn to improve their accuracy in tasks like image recognition, language processing, and more.Neural networks comprise of layers that perform operations on data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b32c13d2-bee6-436c-b838-2c8e04a24ec6",
+ "metadata": {},
+ "source": [
+ "### Curate the dataset\n",
+ "Load the dataset, split into features (X) and output (y) variables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "222a7a99-2723-486d-9a1e-58d2792c84e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "dataset = np.loadtxt(datapath, delimiter=',')\n",
+ "X = dataset[:,0:8] \n",
+ "y = dataset[:,8]\n",
+ "\n",
+ "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
+ "y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Save the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a58574-4ce7-495c-be0b-d22694a6ed7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "modelpath = os.path.expandvars('/scratch/vp91/$USER/class_model')\n",
+ "print(modelpath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b2f9c1c9-4d45-447d-ac23-40593759da3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.save(class_model.state_dict(), modelpath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "65839c60-6d5f-4540-aabe-fbd1914692d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls /scratch/vp91/$USER/class_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a7fcdf90-1db4-4c6e-9f28-1bc8e7a4bfd0",
+ "metadata": {},
+ "source": [
+ "#### Load the model on the GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model.load_state_dict(torch.load(modelpath, map_location=device, weights_only=True))\n",
+ "class_model.to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad6b09b5-9e9b-4376-ad89-d1ff8b4791eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " for i in range(0, len(X_tensor), batch_size):\n",
+ " Xbatch = X_tensor[i:i+batch_size].to(device) # move the tensor to GPU\n",
+ "\n",
+ " y_pred = class_model(Xbatch)\n",
+ " \n",
+ " ybatch = y_tensor[i:i+batch_size].to(device) # move the tensor to GPU\n",
+ " \n",
+ " loss = loss_fn(y_pred, ybatch)\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " \n",
+ " print(f'Finished epoch {epoch}, latest loss {loss}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "#### Evaluate the Model\n",
+ "\n",
+ "Currently, we are testing the model on the training dataset. Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the model on the same data used for training.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "89e28fe2-90c5-4cd4-bd37-30ebe9183772",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with torch.no_grad():\n",
+ " y_pred = class_model(X_tensor.to(device))\n",
+ " \n",
+ "accuracy = (y_pred.round().to(device) == y_tensor.to(device)).float().mean()\n",
+ "print(f\"Accuracy {accuracy}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/building_NN-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/building_NN-checkpoint.ipynb
new file mode 100644
index 0000000..20e626c
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/building_NN-checkpoint.ipynb
@@ -0,0 +1,426 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Neural Networks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96127ef4-bb03-492f-81b9-672f74c20b5c",
+ "metadata": {},
+ "source": [
+ "Neural networks are computational models inspired by the human brain, designed to recognize patterns and\n",
+ "make decisions based on data. They consist of interconnected layers of nodes, or \"neurons,\" which process\n",
+ "and transform input information. Through training, neural networks learn to improve their accuracy in tasks like image recognition, language processing, and more.Neural networks comprise of layers that perform operations on data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f1baab1-a7b6-429e-afa3-822e61da46ad",
+ "metadata": {},
+ "source": [
+ "### Dataset\n",
+ "The Pima Indians Diabetes dataset is a popular dataset in the field of machine learning and statistics, particularly for those working on classification problems. \n",
+ "\n",
+ "Dataset Overview:\n",
+ "**Source**: The dataset was created by the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) and is available in the UCI Machine Learning Repository.\n",
+ "**Purpose**: The dataset is used to predict the onset of diabetes within five years based on diagnostic measures.\n",
+ "**Features**: The dataset contains 768 samples, each with 8 features. \n",
+ "\n",
+ "The features are:\n",
+ "\n",
+ "1. Pregnancies: Number of times pregnant.\n",
+ "2. Glucose: Plasma glucose concentration (mg/dL) a 2 hours in an oral glucose tolerance test.\n",
+ "3. Blood Pressure: Diastolic blood pressure (mm Hg) at the time of screening.\n",
+ "4. Skin Thickness: Triceps skinfold thickness (mm) measured at the back of the upper arm.\n",
+ "5. Insulin: 2-Hour serum insulin (mu U/ml).\n",
+ "6. BMI: Body mass index (weight in kg/(height in m)^2).\n",
+ "7. Diabetes Pedigree Function: A function that scores likelihood of diabetes based on family history.\n",
+ "8. Age: Age of the individual (years).\n",
+ "\n",
+ "**Outcome**: Whether or not the individual has diabetes (1 for positive, 0 for negative)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d4b1b9b-bf50-4867-8345-43a7106a25da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!head /scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee303492-97bf-4274-9a14-04c1c116f6c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "print(datapath)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b32c13d2-bee6-436c-b838-2c8e04a24ec6",
+ "metadata": {},
+ "source": [
+ "### Curate the dataset\n",
+ "Load the dataset, split into features (X) and output (y) variables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "222a7a99-2723-486d-9a1e-58d2792c84e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = np.loadtxt(datapath, delimiter=',')\n",
+ "X = dataset[:,0:8] \n",
+ "y = dataset[:,8]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c2a20b9-0c73-4995-b772-0e773cc03c8b",
+ "metadata": {},
+ "source": [
+ "### Convert the data to tensors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3c45e8f-894e-46d3-84c1-25fbca333f81",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
+ "y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model\n",
+ "\n",
+ "When designing the model, keep the following points in mind:\n",
+ "\n",
+ "1. The input features in the input layer must match the input features in the dataset (`X_tensor`).\n",
+ "2. A high number of layers can increase computation time, while too few layers may result in poor predictions.\n",
+ "3. Each layer should be followed by an activation function.\n",
+ "\n",
+ "In this example, we will use a 3-layer neural network:\n",
+ "\n",
+ "1. The input layer expects 8 features.\n",
+ "2. The first hidden layer has 12 neurons, followed by a ReLU activation function.\n",
+ "3. The second hidden layer has 8 neurons, followed by another ReLU activation function.\n",
+ "4. The output layer has one neuron, followed by a sigmoid activation function.\n",
+ "\n",
+ "The sigmoid function outputs values between 0 and 1, which is exactly what we need."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20d8051c-32ee-45c1-b797-58c0e68bbcfb",
+ "metadata": {},
+ "source": [
+ "\n",
+ "In PyTorch, neural networks can be defined using different approaches, and two common ones are the Sequential model and the class-based model."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a81a5b5b-d3bc-434f-9b92-a8571d7599f5",
+ "metadata": {},
+ "source": [
+ "#### Sequential model\n",
+ "\n",
+ "* The Sequential model is a simple, linear stack of layers where each layer has a single input and output. It is useful for straightforward feedforward networks where layers are applied in a sequential order.\n",
+ "* It is easier to use for simple architectures where layers are applied in a linear fashion.\n",
+ "* Defined Using: *torch.nn.Sequential*."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6cb10442-640a-4ded-a81f-6c2607a86bed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "seq_model = nn.Sequential(\n",
+ " nn.Linear(8, 12),\n",
+ " nn.ReLU(),\n",
+ " nn.Linear(12, 8),\n",
+ " nn.ReLU(),\n",
+ " nn.Linear(8, 1),\n",
+ " nn.Sigmoid()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8647eb7-799b-42e5-b42d-be699b5e5a3e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(seq_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36a8225d-4528-4a2f-a23e-9285d4ab5c8e",
+ "metadata": {},
+ "source": [
+ "### Class-Based Model\n",
+ "\n",
+ "The class-based model allows you to define a network by subclassing torch.nn.Module. This approach provides greater flexibility and control, making it suitable for complex models and custom behaviors.\n",
+ "\n",
+ "* Offers full control over the network architecture, including complex data flows, multiple inputs/outputs, and custom forward methods.\n",
+ "* Custom Forward Pass: You can define complex forward passes and control data flow through the network.\n",
+ "* Dynamic Behavior: Allows for dynamic computations, such as conditional layers or operations.\n",
+ "* Defined Using: Subclass of torch.nn.Module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e889fd5-49a7-4509-a2fc-ef7d5f8c722f",
+ "metadata": {},
+ "source": [
+ "### Define the loss function\n",
+ "Binary Cross-Entropy (BCE) Loss: Measures the performance of a classification model whose output is a probability value between 0 and 1. It calculates the difference between the predicted probabilities and the actual binary labels (0 or 1) and penalizes the model more when the predictions are further from the true labels.\n",
+ "\n",
+ "BCELoss(y', y)=−[ylog(y')+(1−y)log(1−y')]\n",
+ "\n",
+ "Where, y' is the predicted output and y is the actual otput."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76aa2eab-7897-439e-9c20-08eb523ec7d6",
+ "metadata": {},
+ "source": [
+ "### Optimizer\n",
+ "\n",
+ "Optimizer's main role is to update the model's parameters based on the gradients computed during backpropagation.\n",
+ "\n",
+ "1. **Parameter Updates**: Optimizers adjust the weights and biases of the neural network to reduce the loss. This involves applying algorithms that modify the parameters to minimize the difference between the predicted outputs and the actual targets.\n",
+ "2. **Learning Rate Management**: Most optimizers include mechanisms to adjust the learning rate, either statically or dynamically, to control how large the parameter updates are.\n",
+ "\n",
+ "In this example we use an optimizer called Adaptive Moment Estimation (Adam). This computes an adaptive learning rates for each parameter by considering both the mean and the variance of the gradients."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model\n",
+ "\n",
+ "Training a neural network involves epochs and batches, which define how data is fed to the model:\n",
+ "\n",
+ "- **Epoch:** A full pass through the entire training dataset.\n",
+ "- **Batch:** A subset of samples processed at a time, with gradient descent performed after each batch.\n",
+ "\n",
+ "In practice, the dataset is divided into batches, and each batch is processed sequentially in a training loop. Completing all batches constitutes one epoch. The process is repeated for multiple epochs to refine the model.\n",
+ "\n",
+ "Batch size is constrained by system memory (GPU memory), and computational demands scale with batch size. More epochs and batches lead to better model performance but increase training time. The optimal number of epochs and batch size is often determined through experimentation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ea1f2ef-0b63-435c-a93f-329aa9ae6228",
+ "metadata": {},
+ "source": [
+ "#### Purpose of optimizer.zero_grad(), loss.backward(), optimizer.step()\n",
+ "\n",
+ "**optimizer.zero_grad()**: During training, gradients accumulate by default in PyTorch. This means that if you don’t clear them, gradients from multiple backward passes (from different batches) will be added together, which can lead to incorrect updates to the model parameters.\n",
+ "By calling optimizer.zero_grad(), you ensure that gradients from previous steps are reset to zero, preventing them from affecting the current update.\n",
+ "\n",
+ "**loss.backward()**: Calculates the gradients of the loss with respect to each parameter of the model. This is done using backpropagation, a key algorithm for training neural networks.\n",
+ "\n",
+ "**optimizer.step()**: Used to update the model's parameters based on the gradients computed during during the backward pass (**loss.backward()**)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad6b09b5-9e9b-4376-ad89-d1ff8b4791eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " for i in range(0, len(X_tensor), batch_size):\n",
+ " Xbatch = X_tensor[i:i+batch_size]\n",
+ " y_pred = class_model(Xbatch)\n",
+ " ybatch = y_tensor[i:i+batch_size]\n",
+ " loss = loss_fn(y_pred, ybatch)\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " print(f'Finished epoch {epoch}, latest loss {loss}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "# Evaluate the Model\n",
+ "\n",
+ "Currently, we are testing the model on the training dataset. Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the model on the same data used for training.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "89e28fe2-90c5-4cd4-bd37-30ebe9183772",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with torch.no_grad():\n",
+ " y_pred = class_model(X_tensor)\n",
+ " \n",
+ "accuracy = (y_pred.round() == y_tensor).float().mean()\n",
+ "print(f\"Accuracy {accuracy}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **Increase the number of layers in the neural network.** Observe any changes in accuracy.\n",
+ "2. **Change the optimizer from Adam to [Stochastic Gradient Descent (SGD)](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html).** Evaluate how this affects the loss calculation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/data_parallel-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/data_parallel-checkpoint.ipynb
new file mode 100644
index 0000000..e90d4e5
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/data_parallel-checkpoint.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Using Multiple GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d29976e-56b5-46ce-8743-5480524bbca1",
+ "metadata": {},
+ "source": [
+ "### Dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea28a5d7-0d69-47c9-ba24-995f02168856",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "110beb4c-c07b-4a1b-b2d4-ce7795af31a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = PimaDataset(datapath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26d7f973-cf59-4709-a0c2-a158100449e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "batch_size = 32\n",
+ "data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Data Parallelism\n",
+ "Pytorch will only use one GPU by default. You can easily run your operations on multiple GPUs by making your model run parallelly using `nn.DataParallel`. \n",
+ "\n",
+ "Check for multiple GPUs and if multiple GPUs are available, wrap the model with `nn.DataParallel`. Finally, move the model to the GPUs using `model.to(device)`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dc9a702-4b3a-423f-80d3-79d1e3d9e11f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(torch.cuda.device_count())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a58574-4ce7-495c-be0b-d22694a6ed7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if torch.cuda.device_count() > 1:\n",
+ " class_model = nn.DataParallel(class_model)\n",
+ " print(f\"Using {torch.cuda.device_count()} GPUs: {', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model.to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67278570-0838-40a9-9410-851c51a46b95",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "DataParallel splits your data automatically and sends job orders to multiple models on several GPUs. After each model finishes their job, DataParallel collects and merges the results before returning it to you."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " running_loss = 0.0\n",
+ " for batch_features, batch_labels in data_loader:\n",
+ " batch_features = batch_features.to(device)\n",
+ " batch_labels = batch_labels.to(device)\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " outputs = class_model(batch_features)\n",
+ " \n",
+ " batch_labels = batch_labels.unsqueeze(1).float()\n",
+ " loss = loss_fn(outputs, batch_labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " running_loss += loss.item() * batch_features.size(0)\n",
+ " \n",
+ " epoch_loss = running_loss / len(dataset)\n",
+ " print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss:.4f}')\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32e7a7d0-666f-482a-b250-34a3f1199240",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aedca9ef-fa2d-41e8-8847-c7e9fbcf498c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Model is on device: {next(class_model.parameters()).device}\")\n",
+ "if isinstance(class_model, nn.DataParallel):\n",
+ " print(f\"DataParallel devices: {class_model.device_ids}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training (change epoch to 100)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/dataloader-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/dataloader-checkpoint.ipynb
new file mode 100644
index 0000000..e21222e
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/dataloader-checkpoint.ipynb
@@ -0,0 +1,577 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b670ae8e-1350-4be1-8575-df9267fdfae7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from torch.utils.data import Dataset\n",
+ "from torchvision import datasets\n",
+ "from torchvision.transforms import ToTensor\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "import requests\n",
+ "import zipfile\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f82a4673-e5e9-4f5f-b7e6-112a8fa1e47d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "775f4111-d1e2-4cd1-bd2d-a7a1bbb3d21d",
+ "metadata": {},
+ "source": [
+ "PyTorch offers two data primitives—`torch.utils.data.DataLoader` and `torch.utils.data.Dataset`— which \n",
+ "facilitate the use of both pre-loaded datasets and custom data.\n",
+ "\n",
+ "The `Fashion-MNIST` dataset is an example of a pre-loaded curated dataset. It can be loaded using the following parameters:\n",
+ "\n",
+ "- `root` specifies the path where the training or test data is stored.\n",
+ "- `train` indicates whether to load the training or test dataset.\n",
+ "- `download=True` will download the data from the internet if it's not available at the specified `root`.\n",
+ "- `transform` and `target_transform` define the transformations applied to the features and labels, respectively.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "749f1295-2191-40e3-9f7f-8c34589d1a7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "training_data = datasets.FashionMNIST(\n",
+ " root=\"data\",\n",
+ " train=True,\n",
+ " download=True,\n",
+ " transform=ToTensor()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b818ed5f-c845-4fca-9015-99196f3b937d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_data = datasets.FashionMNIST(\n",
+ " root=\"data\",\n",
+ " train=False,\n",
+ " download=True,\n",
+ " transform=ToTensor()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aef48041-ad83-4813-924d-22404d691286",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls data/FashionMNIST/raw/"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80984294-a299-4da7-802d-9184706a5f2a",
+ "metadata": {},
+ "source": [
+ "#### Visualizing a sample of the dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9f52a941-cd0e-4665-9e6c-182ffc33c5b5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "labels_map = {\n",
+ " 0: \"T-Shirt\",\n",
+ " 1: \"Trouser\",\n",
+ " 2: \"Pullover\",\n",
+ " 3: \"Dress\",\n",
+ " 4: \"Coat\",\n",
+ " 5: \"Sandal\",\n",
+ " 6: \"Shirt\",\n",
+ " 7: \"Sneaker\",\n",
+ " 8: \"Bag\",\n",
+ " 9: \"Ankle Boot\",\n",
+ "}\n",
+ "figure = plt.figure(figsize=(8, 8))\n",
+ "cols, rows = 3, 3\n",
+ "for i in range(1, cols * rows + 1):\n",
+ " sample_idx = torch.randint(len(training_data), size=(1,)).item()\n",
+ " img, label = training_data[sample_idx]\n",
+ " figure.add_subplot(rows, cols, i)\n",
+ " plt.title(labels_map[label])\n",
+ " plt.axis(\"off\")\n",
+ " plt.imshow(img.squeeze(), cmap=\"gray\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d1d3c91-ba90-469d-9d8b-ab18a7768b84",
+ "metadata": {},
+ "source": [
+ "### Custom dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57f0bdd5-5521-421f-a56c-532764d123af",
+ "metadata": {},
+ "source": [
+ "What if working with a custom dataset? To illustrate this, we will download a dataset and set it up for\n",
+ "use in PyTorch training. The data used for this demonstration is relatively *clean*. In a practical use case, significant time will likely be spent on cleaning and preparing the data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "727a8d18-c149-4ae7-8791-d301fa83e579",
+ "metadata": {},
+ "source": [
+ "The data:\n",
+ "\n",
+ "1. There are **3 classes**: pizza, steak, and sushi.\n",
+ "2. The data is split into *train* and *test* datasets.\n",
+ "3. Both *train* and *test* datasets are further organized into 3 directories, each corresponding to one of the classes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "88c9b050-a64a-45a6-ac46-06b9a0632431",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import zipfile\n",
+ "from pathlib import Path\n",
+ "\n",
+ "# Setup path to data folder\n",
+ "data_root = Path(\"custom_data/\")\n",
+ "image_path = data_root / \"pizza_steak_sushi\"\n",
+ "\n",
+ "# If the image data doesn't exist, download it and curate it. \n",
+ "if not image_path.is_dir():\n",
+ " image_path.mkdir(parents=True, exist_ok=True)\n",
+ " \n",
+ " # Download pizza, steak, sushi data\n",
+ " url = \"https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip\"\n",
+ " with open(data_root / \"pizza_steak_sushi.zip\", \"wb\") as f:\n",
+ " request = requests.get(url)\n",
+ " f.write(request.content)\n",
+ "\n",
+ " with zipfile.ZipFile(data_root / \"pizza_steak_sushi.zip\", \"r\") as zip_ref:\n",
+ " zip_ref.extractall(image_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9675e0c5-cffc-4420-a419-eaf3e2198fb3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls custom_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b2d51954-2b02-42bb-90e3-121c105e3c7c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a8043a6-7ea8-44e7-908d-e0921c9930db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1425fa36-3c2e-4597-9b78-65516835ac83",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b27f580f-988a-425f-9b3b-e65a23742500",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/train/pizza"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "696d0b71-2825-48ed-b84a-cf9519e296f4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from PIL import Image\n",
+ "img = Image.open(\"custom_data/pizza_steak_sushi/train/pizza/928670.jpg\")\n",
+ "img"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4577dd6b-c581-4dc5-996a-92b7c3a07436",
+ "metadata": {},
+ "source": [
+ "#### Setup train and testing paths"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49b85b56-2ace-4a01-b361-b00ef2b28f9e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "train_dir = image_path / \"train\"\n",
+ "test_dir = image_path / \"test\"\n",
+ "\n",
+ "train_dir, test_dir"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ba0bbe2b-edcd-4f07-b715-e42c6d33dc1c",
+ "metadata": {},
+ "source": [
+ "#### Transformation on the data\n",
+ "\n",
+ "\n",
+ "Transform functions in the PyTorch library simplify the application of various data enhancement/manipulation techniques \n",
+ "to your input data. These functions enable you to apply multiple changes simultaneously."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6ce41688-a909-477c-94ef-010ea6724445",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from torch.utils.data import DataLoader\n",
+ "from torchvision import datasets, transforms"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c42cbfdb-a5f3-4e07-8beb-e151835902fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Write transform for image\n",
+ "data_transform = transforms.Compose([\n",
+ " # Resize the images to 64x64\n",
+ " transforms.Resize(size=(64, 64)),\n",
+ " # Flip the images randomly on the horizontal\n",
+ " transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance\n",
+ " # Turn the image into a torch.Tensor\n",
+ " transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 \n",
+ "])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e53af9e2-b0b9-40ff-9526-a4239600dc3f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with Image.open(\"custom_data/pizza_steak_sushi/train/pizza/928670.jpg\") as f:\n",
+ " fig, ax = plt.subplots(1, 2)\n",
+ " ax[0].imshow(f) \n",
+ " ax[0].set_title(f\"Original \\nSize: {f.size}\")\n",
+ " ax[0].axis(\"off\")\n",
+ "\n",
+ " transformed_image = data_transform(f).permute(1, 2, 0) \n",
+ " ax[1].imshow(transformed_image) \n",
+ " ax[1].set_title(f\"Transformed \\nSize: {transformed_image.shape}\")\n",
+ " ax[1].axis(\"off\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "af651bd5-8afa-4c8c-9661-03683f91be8d",
+ "metadata": {},
+ "source": [
+ "#### Loading Image Data Using ImageFolder\n",
+ "\n",
+ "`ImageFolder` is a generic data loader where images are expected to be organized into separate directories,\n",
+ "each corresponding to a different class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "245ada1a-e053-4905-aa44-39ef9814fde8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Use ImageFolder to create dataset(s)\n",
+ "from torchvision import datasets\n",
+ "train_data = datasets.ImageFolder(root=train_dir, # target folder of images\n",
+ " transform=data_transform, # transforms to perform on data (images)\n",
+ " target_transform=None) # transforms to perform on labels (if necessary)\n",
+ "\n",
+ "test_data = datasets.ImageFolder(root=test_dir, \n",
+ " transform=data_transform)\n",
+ "\n",
+ "print(f\"Train data:\\n{train_data}\\nTest data:\\n{test_data}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c7ad54a-a8f2-4963-86c3-3150cb68cae3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get class names as a list\n",
+ "class_names = train_data.classes\n",
+ "class_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59f14ace-5c56-4a18-b24e-67c34321a2ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Can also get class names as a dict\n",
+ "class_dict = train_data.class_to_idx\n",
+ "class_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ede23b28-601c-45ba-a544-ca4d828045ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check the lengths\n",
+ "len(train_data), len(test_data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c8d91e6-c12d-4fb1-ab75-c95f1309c995",
+ "metadata": {},
+ "source": [
+ "#### DataLoader\n",
+ "\n",
+ "\n",
+ "In PyTorch, `DataLoader` is a built-in class that offers an efficient and flexible method for loading \n",
+ "data into a model for training or inference. It is especially beneficial for managing large datasets that \n",
+ "may not fit into memory and for carrying out data augmentation and preprocessing. \n",
+ "Data loader combines a dataset and a sampler, and provides an iterable over the given dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e4e7b6e1-7ec9-411e-9d3d-422d4d6f8bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69ed025b-c14c-4130-97f6-5d00a3757880",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Turn train and test Datasets into DataLoaders\n",
+ "from torch.utils.data import DataLoader\n",
+ "train_dataloader = DataLoader(dataset=train_data, \n",
+ " batch_size=8, # how many samples per batch?\n",
+ " num_workers=1, # how many subprocesses to use for data loading? (higher = more)\n",
+ " shuffle=True) # shuffle the data?\n",
+ "\n",
+ "test_dataloader = DataLoader(dataset=test_data, \n",
+ " batch_size=8, \n",
+ " num_workers=1, \n",
+ " shuffle=False) # don't usually need to shuffle testing data\n",
+ "\n",
+ "train_dataloader, test_dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59008e99-69c0-4a5b-ae9a-419273c07841",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "img, label = next(iter(train_dataloader))\n",
+ "\n",
+ "print(f\"Image shape: {img.shape} -> [batch_size, color_channels, height, width]\")\n",
+ "print(f\"Label shape: {label.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e3c2b18-2162-4d8e-beb9-991093854c57",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type(img)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "496fb7e6-2727-4ba8-bb2c-6b8460b9565f",
+ "metadata": {},
+ "source": [
+ "#### Custom DataLoader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0887d989-49c3-4012-910f-e011340b0059",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c088ccc3-85a1-4a5d-ac9b-7699ff7e91fa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = PimaDataset(datapath)\n",
+ "batch_size = 32\n",
+ "data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "87e8a8d6-f0bc-4618-ad3e-dd3b1a5ca8c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "features, outcomes = next(iter(data_loader))\n",
+ "\n",
+ "print(f\"Image shape: {features.shape} -> [batch_size, inputs_features]\")\n",
+ "print(f\"Label shape: {outcomes.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c0f401ab-c3cd-400f-8c97-b3c94146ac56",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/distributed_data_parallel-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/distributed_data_parallel-checkpoint.ipynb
new file mode 100644
index 0000000..23a6c4e
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/distributed_data_parallel-checkpoint.ipynb
@@ -0,0 +1,305 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Using Multiple GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import os\n",
+ "import pandas as pd\n",
+ "\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim\n",
+ "import torch.distributed as dist\n",
+ "import torch.multiprocessing as mp\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "from torch.utils.data.distributed import DistributedSampler\n",
+ "from torch.nn.parallel import DistributedDataParallel as DDP"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nb_gpus = 2\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f944a147-3f5e-4c42-b142-850d04458270",
+ "metadata": {},
+ "source": [
+ "### Process Groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "50696138-0c0d-4a0b-aa80-ed73cff87fd2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def setup(rank, world_size):\n",
+ " os.environ['MASTER_ADDR'] = 'localhost'\n",
+ " os.environ['MASTER_PORT'] = '12355'\n",
+ " dist.init_process_group(\"nccl\", rank=rank, world_size=world_size)\n",
+ " \n",
+ "def cleanup():\n",
+ " dist.destroy_process_group()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d29976e-56b5-46ce-8743-5480524bbca1",
+ "metadata": {},
+ "source": [
+ "### Dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea28a5d7-0d69-47c9-ba24-995f02168856",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ec4ebf96-12bc-4520-bbcb-690e5edebac9",
+ "metadata": {},
+ "source": [
+ "### Split the dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49ca23e3-f8d9-4818-93e4-fa9304792335",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):\n",
+ " dataset = PimaDataset(datapath)\n",
+ " sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)\n",
+ " \n",
+ " dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)\n",
+ " \n",
+ " return dataloader"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Wrap model in DDP\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dc9a702-4b3a-423f-80d3-79d1e3d9e11f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(rank, world_size):\n",
+ "\n",
+ " # setup the process groups\n",
+ " setup(rank, world_size)\n",
+ " # prepare the dataloader\n",
+ " dataloader = prepare(rank, world_size)\n",
+ " \n",
+ " # instantiate the model(it's your own model) and move it to the right device\n",
+ " model = PimaClassifier().to(rank)\n",
+ " \n",
+ " # wrap the model with DDP\n",
+ " # device_ids tell DDP where is your model\n",
+ " # output_device tells DDP where to output, in our case, it is rank\n",
+ " # find_unused_parameters=True instructs DDP to find unused output of the forward() function of any module in the model\n",
+ " model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=True)\n",
+ "\n",
+ " loss_fn = nn.BCELoss()\n",
+ " optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
+ "\n",
+ " n_epochs = 100\n",
+ " for epoch in range(n_epochs):\n",
+ "\n",
+ " # if we are using DistributedSampler, we have to tell it which epoch this is\n",
+ " dataloader.sampler.set_epoch(epoch)\n",
+ "\n",
+ " for batch_features, batch_labels in dataloader:\n",
+ " batch_features = batch_features.to(rank)\n",
+ " batch_labels = batch_labels.to(rank)\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " outputs = model(batch_features)\n",
+ " \n",
+ " batch_labels = batch_labels.unsqueeze(1).float()\n",
+ " loss = loss_fn(outputs, batch_labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " cleanup()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if __name__ == '__main__':\n",
+ "\n",
+ " world_size = nb_gpus \n",
+ " mp.spawn(main, args=(world_size,), nprocs=world_size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training (change epoch to 100)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/multi_GPU-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/multi_GPU-checkpoint.ipynb
new file mode 100644
index 0000000..e90d4e5
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/multi_GPU-checkpoint.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Using Multiple GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d29976e-56b5-46ce-8743-5480524bbca1",
+ "metadata": {},
+ "source": [
+ "### Dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea28a5d7-0d69-47c9-ba24-995f02168856",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "110beb4c-c07b-4a1b-b2d4-ce7795af31a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = PimaDataset(datapath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26d7f973-cf59-4709-a0c2-a158100449e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "batch_size = 32\n",
+ "data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Data Parallelism\n",
+ "Pytorch will only use one GPU by default. You can easily run your operations on multiple GPUs by making your model run parallelly using `nn.DataParallel`. \n",
+ "\n",
+ "Check for multiple GPUs and if multiple GPUs are available, wrap the model with `nn.DataParallel`. Finally, move the model to the GPUs using `model.to(device)`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dc9a702-4b3a-423f-80d3-79d1e3d9e11f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(torch.cuda.device_count())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a58574-4ce7-495c-be0b-d22694a6ed7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if torch.cuda.device_count() > 1:\n",
+ " class_model = nn.DataParallel(class_model)\n",
+ " print(f\"Using {torch.cuda.device_count()} GPUs: {', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model.to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67278570-0838-40a9-9410-851c51a46b95",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "DataParallel splits your data automatically and sends job orders to multiple models on several GPUs. After each model finishes their job, DataParallel collects and merges the results before returning it to you."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " running_loss = 0.0\n",
+ " for batch_features, batch_labels in data_loader:\n",
+ " batch_features = batch_features.to(device)\n",
+ " batch_labels = batch_labels.to(device)\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " outputs = class_model(batch_features)\n",
+ " \n",
+ " batch_labels = batch_labels.unsqueeze(1).float()\n",
+ " loss = loss_fn(outputs, batch_labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " running_loss += loss.item() * batch_features.size(0)\n",
+ " \n",
+ " epoch_loss = running_loss / len(dataset)\n",
+ " print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss:.4f}')\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32e7a7d0-666f-482a-b250-34a3f1199240",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aedca9ef-fa2d-41e8-8847-c7e9fbcf498c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Model is on device: {next(class_model.parameters()).device}\")\n",
+ "if isinstance(class_model, nn.DataParallel):\n",
+ " print(f\"DataParallel devices: {class_model.device_ids}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training (change epoch to 100)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/.ipynb_checkpoints/tensors-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/tensors-checkpoint.ipynb
new file mode 100644
index 0000000..cbd5bf4
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/tensors-checkpoint.ipynb
@@ -0,0 +1,375 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "205ea8ba-4865-4511-8a54-14fcd4b22ed0",
+ "metadata": {},
+ "source": [
+ "### Tensors in PyTorch\n",
+ "\n",
+ "Tensors are specialized data structures used in PyTorch to represent model inputs, outputs, and parameters. While they are conceptually similar to arrays and matrices, they offer additional features such as support for hardware accelerators like GPUs and automatic differentiation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "244c9ced-e83c-4c24-a992-f216dfa34456",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c20aaadc-e2a0-4a89-9703-b091578b4dc0",
+ "metadata": {},
+ "source": [
+ "### Creating a Tensor"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "816961f2-0932-47d1-923f-d9743ec8c062",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "383b7ba2-6d5d-4380-9f36-fb62f6ce1d8f",
+ "metadata": {},
+ "source": [
+ "##### 1. Directly from data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2137307c-2fa0-4953-92e3-fcb24408ff77",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = [[1, 2],[3, 4]]\n",
+ "x_tensor= torch.tensor(data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cba5f6e9-f0fc-4cee-b48d-660e6267541d",
+ "metadata": {},
+ "source": [
+ "##### 2. From NumPy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48cd31ef-a810-4aaa-a314-d5f5705e7be4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_np = np.array(data)\n",
+ "x_tensor = torch.from_numpy(x_np)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d365755-871b-4380-8e36-972799542b5e",
+ "metadata": {},
+ "source": [
+ "##### 3. From another Tensor\n",
+ "\n",
+ "**torch.rand_like()** returns a tensor with the same size as input that but filled with random numbers from the interval [0,1)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "62f40f87-637b-4e58-be66-8fe8f8d4b84b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones_like(x_tensor)\n",
+ "y_tensor = torch.rand_like(x_tensor, dtype=torch.float) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84e9c4b6-6b85-430d-a793-93521879f671",
+ "metadata": {},
+ "source": [
+ "### Operations on Tensors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c59bbc8d-00d1-48cc-bc2c-4443c8ccec31",
+ "metadata": {},
+ "source": [
+ "#### 1. indexing and slicing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d3e7c194-ecea-4ef9-af39-3f563daddc3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "print(f\"First row: {x_tensor[0]}\")\n",
+ "print(f\"First column: {x_tensor[:, 0]}\")\n",
+ "print(f\"Last column: {x_tensor[..., -1]}\")\n",
+ "x_tensor[:,1] = 0\n",
+ "print(x_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bbe77b93-12ab-475f-81fc-c5a00db24621",
+ "metadata": {},
+ "source": [
+ "#### 2. Concatenate multiple tensors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2b00e821-b480-4f7e-8788-1e983ed1693b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_tensor = torch.cat([x_tensor, x_tensor, x_tensor], dim=1)\n",
+ "print(y_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77389fec-07d3-4473-a036-af0b9cd39986",
+ "metadata": {},
+ "source": [
+ "#### 3. Arithmetic Operations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d518086-f064-486e-9e28-29ea15ce7779",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "\n",
+ "# Transpose\n",
+ "x_T_tensor = x_tensor.T\n",
+ "\n",
+ "# Matrix Multiplication\n",
+ "y1_tensor = x_tensor @ x_tensor.T\n",
+ "y2_tensor = x_tensor.matmul(x_tensor.T)\n",
+ "\n",
+ "y3_tensor = torch.rand_like(y1_tensor)\n",
+ "torch.matmul(x_tensor, x_tensor.T, out=y3_tensor)\n",
+ "\n",
+ "\n",
+ "# Element-wise multiplication\n",
+ "z1_tensor = x_tensor * x_tensor\n",
+ "z2_tensor = x_tensor.mul(x_tensor)\n",
+ "\n",
+ "z3_tensor = torch.rand_like(x_tensor)\n",
+ "torch.mul(x_tensor, x_tensor, out=z3_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ce833cc-6ff6-4016-96bb-60b70812d584",
+ "metadata": {},
+ "source": [
+ "##### 3. In-place Operations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21c046eb-43ad-4259-a2b6-55da191d22db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "\n",
+ "# Transpose\n",
+ "x_tensor.t_()\n",
+ "\n",
+ "# Copy\n",
+ "y_tensor = torch.rand_like(x_tensor)\n",
+ "x_tensor.copy_(y_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d73522a1-db18-4fe0-9c07-f27d39f4a992",
+ "metadata": {},
+ "source": [
+ "### NumPy and Tensor\n",
+ "Tensors on the **CPU** and NumPy arrays can share memory locations, so modifying one will also affect \n",
+ "the other."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1eee82b4-8f71-4e19-b27e-c47add1714e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(5) \n",
+ "x_np = x_tensor.numpy() # tensor to numpy\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")\n",
+ "\n",
+ "x_tensor.add_(1)\n",
+ "\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")\n",
+ "\n",
+ "y_np = np.ones(5)\n",
+ "z_np = np.zeros(5)\n",
+ "y_tensor = torch.from_numpy(y_np) # numpy to tensor\n",
+ "\n",
+ "np.add(y_np, 1, out=z_np)\n",
+ "\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "43bca47c-de5b-423d-a3d3-153fdaa76bd9",
+ "metadata": {},
+ "source": [
+ "### Moving Tensor to GPU\n",
+ "It's always wise to check for GPU availability before performing any GPU operations. If a GPU is available, we can move our tensor to it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2199f283-5bbd-4534-b10b-1ed259d56f31",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor_gpu = x_tensor.to(\"cuda\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "663e4316-0e47-4528-bfc0-873b695d3e23",
+ "metadata": {},
+ "source": [
+ "A better approach is to set the default device before starting any computations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9aa52b7-b24d-4686-8798-6ec9582b19f9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
+ "y_tensor_gpu = y_tensor.to(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55d565e8-074b-4e10-872f-c6051c38c20b",
+ "metadata": {},
+ "source": [
+ "### Tensor attributes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "552841ee-fa9d-450e-8ab5-16d7d1d41008",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Shape of tensor: {y_tensor.shape}\")\n",
+ "print(f\"Datatype of tensor: {y_tensor.dtype}\")\n",
+ "print(f\"Device tensor is stored on: {y_tensor.device}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6e90c428-525b-47f2-9dbd-8e017f56e813",
+ "metadata": {},
+ "source": [
+ "*Automatic differentiation* is a key feature that distinguishes tensors from NumPy arrays. This capability\n",
+ "is particularly useful in neural networks, where model weights are adjusted during backpropagation based \n",
+ "on the gradient of the loss function with respect to each parameter. Tensors support automatic gradient \n",
+ "computation for any computational graph. For example, consider the computational graph of a one-layer \n",
+ "neural network:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c93bab5-f9bd-439b-a334-cbe482c379ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(5) # input tensor\n",
+ "y_tensor = torch.zeros(3) # expected output\n",
+ "\n",
+ "w_tensor = torch.randn(5, 3, requires_grad=True)\n",
+ "b_tensor = torch.randn(3, requires_grad=True)\n",
+ "\n",
+ "z_tensor = torch.matmul(x_tensor, w_tensor) + b_tensor\n",
+ "\n",
+ "loss_tensor = torch.nn.functional.binary_cross_entropy_with_logits(z_tensor, y_tensor)\n",
+ "loss_tensor.backward()\n",
+ "\n",
+ "print(w_tensor.grad)\n",
+ "print(b_tensor.grad)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2cd58b4d-d46d-4095-a21e-38179685590b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/GPU_NN.ipynb b/notebooks/GPU_NN.ipynb
new file mode 100644
index 0000000..a912a11
--- /dev/null
+++ b/notebooks/GPU_NN.ipynb
@@ -0,0 +1,305 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Training on GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b32c13d2-bee6-436c-b838-2c8e04a24ec6",
+ "metadata": {},
+ "source": [
+ "### Curate the dataset\n",
+ "Load the dataset, split into features (X) and output (y) variables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "222a7a99-2723-486d-9a1e-58d2792c84e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "dataset = np.loadtxt(datapath, delimiter=',')\n",
+ "X = dataset[:,0:8] \n",
+ "y = dataset[:,8]\n",
+ "\n",
+ "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
+ "y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Save the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a58574-4ce7-495c-be0b-d22694a6ed7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "modelpath = os.path.expandvars('/scratch/vp91/$USER/class_model')\n",
+ "print(modelpath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b2f9c1c9-4d45-447d-ac23-40593759da3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.save(class_model.state_dict(), modelpath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "65839c60-6d5f-4540-aabe-fbd1914692d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls /scratch/vp91/$USER/class_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a7fcdf90-1db4-4c6e-9f28-1bc8e7a4bfd0",
+ "metadata": {},
+ "source": [
+ "#### Load the model on the GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model.load_state_dict(torch.load(modelpath, map_location=device, weights_only=True))\n",
+ "class_model.to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad6b09b5-9e9b-4376-ad89-d1ff8b4791eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " for i in range(0, len(X_tensor), batch_size):\n",
+ " Xbatch = X_tensor[i:i+batch_size].to(device) # move the tensor to GPU\n",
+ "\n",
+ " y_pred = class_model(Xbatch)\n",
+ " \n",
+ " ybatch = y_tensor[i:i+batch_size].to(device) # move the tensor to GPU\n",
+ " \n",
+ " loss = loss_fn(y_pred, ybatch)\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " \n",
+ " print(f'Finished epoch {epoch}, latest loss {loss}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "#### Evaluate the Model\n",
+ "\n",
+ "Currently, we are testing the model on the training dataset. Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the model on the same data used for training.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "89e28fe2-90c5-4cd4-bd37-30ebe9183772",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with torch.no_grad():\n",
+ " y_pred = class_model(X_tensor.to(device))\n",
+ " \n",
+ "accuracy = (y_pred.round().to(device) == y_tensor.to(device)).float().mean()\n",
+ "print(f\"Accuracy {accuracy}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/building_NN.ipynb b/notebooks/building_NN.ipynb
new file mode 100644
index 0000000..20e626c
--- /dev/null
+++ b/notebooks/building_NN.ipynb
@@ -0,0 +1,426 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Neural Networks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96127ef4-bb03-492f-81b9-672f74c20b5c",
+ "metadata": {},
+ "source": [
+ "Neural networks are computational models inspired by the human brain, designed to recognize patterns and\n",
+ "make decisions based on data. They consist of interconnected layers of nodes, or \"neurons,\" which process\n",
+ "and transform input information. Through training, neural networks learn to improve their accuracy in tasks like image recognition, language processing, and more.Neural networks comprise of layers that perform operations on data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f1baab1-a7b6-429e-afa3-822e61da46ad",
+ "metadata": {},
+ "source": [
+ "### Dataset\n",
+ "The Pima Indians Diabetes dataset is a popular dataset in the field of machine learning and statistics, particularly for those working on classification problems. \n",
+ "\n",
+ "Dataset Overview:\n",
+ "**Source**: The dataset was created by the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) and is available in the UCI Machine Learning Repository.\n",
+ "**Purpose**: The dataset is used to predict the onset of diabetes within five years based on diagnostic measures.\n",
+ "**Features**: The dataset contains 768 samples, each with 8 features. \n",
+ "\n",
+ "The features are:\n",
+ "\n",
+ "1. Pregnancies: Number of times pregnant.\n",
+ "2. Glucose: Plasma glucose concentration (mg/dL) a 2 hours in an oral glucose tolerance test.\n",
+ "3. Blood Pressure: Diastolic blood pressure (mm Hg) at the time of screening.\n",
+ "4. Skin Thickness: Triceps skinfold thickness (mm) measured at the back of the upper arm.\n",
+ "5. Insulin: 2-Hour serum insulin (mu U/ml).\n",
+ "6. BMI: Body mass index (weight in kg/(height in m)^2).\n",
+ "7. Diabetes Pedigree Function: A function that scores likelihood of diabetes based on family history.\n",
+ "8. Age: Age of the individual (years).\n",
+ "\n",
+ "**Outcome**: Whether or not the individual has diabetes (1 for positive, 0 for negative)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d4b1b9b-bf50-4867-8345-43a7106a25da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!head /scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee303492-97bf-4274-9a14-04c1c116f6c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "print(datapath)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b32c13d2-bee6-436c-b838-2c8e04a24ec6",
+ "metadata": {},
+ "source": [
+ "### Curate the dataset\n",
+ "Load the dataset, split into features (X) and output (y) variables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "222a7a99-2723-486d-9a1e-58d2792c84e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = np.loadtxt(datapath, delimiter=',')\n",
+ "X = dataset[:,0:8] \n",
+ "y = dataset[:,8]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c2a20b9-0c73-4995-b772-0e773cc03c8b",
+ "metadata": {},
+ "source": [
+ "### Convert the data to tensors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3c45e8f-894e-46d3-84c1-25fbca333f81",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_tensor = torch.tensor(X, dtype=torch.float32)\n",
+ "y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model\n",
+ "\n",
+ "When designing the model, keep the following points in mind:\n",
+ "\n",
+ "1. The input features in the input layer must match the input features in the dataset (`X_tensor`).\n",
+ "2. A high number of layers can increase computation time, while too few layers may result in poor predictions.\n",
+ "3. Each layer should be followed by an activation function.\n",
+ "\n",
+ "In this example, we will use a 3-layer neural network:\n",
+ "\n",
+ "1. The input layer expects 8 features.\n",
+ "2. The first hidden layer has 12 neurons, followed by a ReLU activation function.\n",
+ "3. The second hidden layer has 8 neurons, followed by another ReLU activation function.\n",
+ "4. The output layer has one neuron, followed by a sigmoid activation function.\n",
+ "\n",
+ "The sigmoid function outputs values between 0 and 1, which is exactly what we need."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20d8051c-32ee-45c1-b797-58c0e68bbcfb",
+ "metadata": {},
+ "source": [
+ "\n",
+ "In PyTorch, neural networks can be defined using different approaches, and two common ones are the Sequential model and the class-based model."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a81a5b5b-d3bc-434f-9b92-a8571d7599f5",
+ "metadata": {},
+ "source": [
+ "#### Sequential model\n",
+ "\n",
+ "* The Sequential model is a simple, linear stack of layers where each layer has a single input and output. It is useful for straightforward feedforward networks where layers are applied in a sequential order.\n",
+ "* It is easier to use for simple architectures where layers are applied in a linear fashion.\n",
+ "* Defined Using: *torch.nn.Sequential*."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6cb10442-640a-4ded-a81f-6c2607a86bed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "seq_model = nn.Sequential(\n",
+ " nn.Linear(8, 12),\n",
+ " nn.ReLU(),\n",
+ " nn.Linear(12, 8),\n",
+ " nn.ReLU(),\n",
+ " nn.Linear(8, 1),\n",
+ " nn.Sigmoid()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8647eb7-799b-42e5-b42d-be699b5e5a3e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(seq_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36a8225d-4528-4a2f-a23e-9285d4ab5c8e",
+ "metadata": {},
+ "source": [
+ "### Class-Based Model\n",
+ "\n",
+ "The class-based model allows you to define a network by subclassing torch.nn.Module. This approach provides greater flexibility and control, making it suitable for complex models and custom behaviors.\n",
+ "\n",
+ "* Offers full control over the network architecture, including complex data flows, multiple inputs/outputs, and custom forward methods.\n",
+ "* Custom Forward Pass: You can define complex forward passes and control data flow through the network.\n",
+ "* Dynamic Behavior: Allows for dynamic computations, such as conditional layers or operations.\n",
+ "* Defined Using: Subclass of torch.nn.Module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e889fd5-49a7-4509-a2fc-ef7d5f8c722f",
+ "metadata": {},
+ "source": [
+ "### Define the loss function\n",
+ "Binary Cross-Entropy (BCE) Loss: Measures the performance of a classification model whose output is a probability value between 0 and 1. It calculates the difference between the predicted probabilities and the actual binary labels (0 or 1) and penalizes the model more when the predictions are further from the true labels.\n",
+ "\n",
+ "BCELoss(y', y)=−[ylog(y')+(1−y)log(1−y')]\n",
+ "\n",
+ "Where, y' is the predicted output and y is the actual otput."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76aa2eab-7897-439e-9c20-08eb523ec7d6",
+ "metadata": {},
+ "source": [
+ "### Optimizer\n",
+ "\n",
+ "Optimizer's main role is to update the model's parameters based on the gradients computed during backpropagation.\n",
+ "\n",
+ "1. **Parameter Updates**: Optimizers adjust the weights and biases of the neural network to reduce the loss. This involves applying algorithms that modify the parameters to minimize the difference between the predicted outputs and the actual targets.\n",
+ "2. **Learning Rate Management**: Most optimizers include mechanisms to adjust the learning rate, either statically or dynamically, to control how large the parameter updates are.\n",
+ "\n",
+ "In this example we use an optimizer called Adaptive Moment Estimation (Adam). This computes an adaptive learning rates for each parameter by considering both the mean and the variance of the gradients."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model\n",
+ "\n",
+ "Training a neural network involves epochs and batches, which define how data is fed to the model:\n",
+ "\n",
+ "- **Epoch:** A full pass through the entire training dataset.\n",
+ "- **Batch:** A subset of samples processed at a time, with gradient descent performed after each batch.\n",
+ "\n",
+ "In practice, the dataset is divided into batches, and each batch is processed sequentially in a training loop. Completing all batches constitutes one epoch. The process is repeated for multiple epochs to refine the model.\n",
+ "\n",
+ "Batch size is constrained by system memory (GPU memory), and computational demands scale with batch size. More epochs and batches lead to better model performance but increase training time. The optimal number of epochs and batch size is often determined through experimentation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ea1f2ef-0b63-435c-a93f-329aa9ae6228",
+ "metadata": {},
+ "source": [
+ "#### Purpose of optimizer.zero_grad(), loss.backward(), optimizer.step()\n",
+ "\n",
+ "**optimizer.zero_grad()**: During training, gradients accumulate by default in PyTorch. This means that if you don’t clear them, gradients from multiple backward passes (from different batches) will be added together, which can lead to incorrect updates to the model parameters.\n",
+ "By calling optimizer.zero_grad(), you ensure that gradients from previous steps are reset to zero, preventing them from affecting the current update.\n",
+ "\n",
+ "**loss.backward()**: Calculates the gradients of the loss with respect to each parameter of the model. This is done using backpropagation, a key algorithm for training neural networks.\n",
+ "\n",
+ "**optimizer.step()**: Used to update the model's parameters based on the gradients computed during during the backward pass (**loss.backward()**)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad6b09b5-9e9b-4376-ad89-d1ff8b4791eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " for i in range(0, len(X_tensor), batch_size):\n",
+ " Xbatch = X_tensor[i:i+batch_size]\n",
+ " y_pred = class_model(Xbatch)\n",
+ " ybatch = y_tensor[i:i+batch_size]\n",
+ " loss = loss_fn(y_pred, ybatch)\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " print(f'Finished epoch {epoch}, latest loss {loss}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "# Evaluate the Model\n",
+ "\n",
+ "Currently, we are testing the model on the training dataset. Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the model on the same data used for training.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "89e28fe2-90c5-4cd4-bd37-30ebe9183772",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with torch.no_grad():\n",
+ " y_pred = class_model(X_tensor)\n",
+ " \n",
+ "accuracy = (y_pred.round() == y_tensor).float().mean()\n",
+ "print(f\"Accuracy {accuracy}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **Increase the number of layers in the neural network.** Observe any changes in accuracy.\n",
+ "2. **Change the optimizer from Adam to [Stochastic Gradient Descent (SGD)](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html).** Evaluate how this affects the loss calculation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/data_parallel.ipynb b/notebooks/data_parallel.ipynb
new file mode 100644
index 0000000..e90d4e5
--- /dev/null
+++ b/notebooks/data_parallel.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Using Multiple GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d29976e-56b5-46ce-8743-5480524bbca1",
+ "metadata": {},
+ "source": [
+ "### Dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea28a5d7-0d69-47c9-ba24-995f02168856",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "110beb4c-c07b-4a1b-b2d4-ce7795af31a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = PimaDataset(datapath)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26d7f973-cf59-4709-a0c2-a158100449e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "batch_size = 32\n",
+ "data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289c55b3-f54b-4a79-ba58-5788237aabb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model = PimaClassifier()\n",
+ "print(class_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Data Parallelism\n",
+ "Pytorch will only use one GPU by default. You can easily run your operations on multiple GPUs by making your model run parallelly using `nn.DataParallel`. \n",
+ "\n",
+ "Check for multiple GPUs and if multiple GPUs are available, wrap the model with `nn.DataParallel`. Finally, move the model to the GPUs using `model.to(device)`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dc9a702-4b3a-423f-80d3-79d1e3d9e11f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(torch.cuda.device_count())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a58574-4ce7-495c-be0b-d22694a6ed7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if torch.cuda.device_count() > 1:\n",
+ " class_model = nn.DataParallel(class_model)\n",
+ " print(f\"Using {torch.cuda.device_count()} GPUs: {', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class_model.to(device)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loss_fn = nn.BCELoss()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "518bf03c-0594-494e-bdb3-a41421ac53a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimizer = optim.Adam(class_model.parameters(), lr=0.001)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8608326d-27a3-4a5b-b485-e9af74b0f2e8",
+ "metadata": {},
+ "source": [
+ "#### Training the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67278570-0838-40a9-9410-851c51a46b95",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04b239a9-7e1c-42ba-8b54-58c79d26986b",
+ "metadata": {},
+ "source": [
+ "DataParallel splits your data automatically and sends job orders to multiple models on several GPUs. After each model finishes their job, DataParallel collects and merges the results before returning it to you."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84664807-8163-46dd-b037-1b3f73d8cbd9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "n_epochs = 100\n",
+ "batch_size = 10\n",
+ " \n",
+ "for epoch in range(n_epochs):\n",
+ " running_loss = 0.0\n",
+ " for batch_features, batch_labels in data_loader:\n",
+ " batch_features = batch_features.to(device)\n",
+ " batch_labels = batch_labels.to(device)\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " outputs = class_model(batch_features)\n",
+ " \n",
+ " batch_labels = batch_labels.unsqueeze(1).float()\n",
+ " loss = loss_fn(outputs, batch_labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " running_loss += loss.item() * batch_features.size(0)\n",
+ " \n",
+ " epoch_loss = running_loss / len(dataset)\n",
+ " print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss:.4f}')\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32e7a7d0-666f-482a-b250-34a3f1199240",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aedca9ef-fa2d-41e8-8847-c7e9fbcf498c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Model is on device: {next(class_model.parameters()).device}\")\n",
+ "if isinstance(class_model, nn.DataParallel):\n",
+ " print(f\"DataParallel devices: {class_model.device_ids}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training (change epoch to 100)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/dataloader.ipynb b/notebooks/dataloader.ipynb
new file mode 100644
index 0000000..c889a63
--- /dev/null
+++ b/notebooks/dataloader.ipynb
@@ -0,0 +1,779 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b670ae8e-1350-4be1-8575-df9267fdfae7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from torch.utils.data import Dataset\n",
+ "from torchvision import datasets\n",
+ "from torchvision.transforms import ToTensor\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "import requests\n",
+ "import zipfile\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f82a4673-e5e9-4f5f-b7e6-112a8fa1e47d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "775f4111-d1e2-4cd1-bd2d-a7a1bbb3d21d",
+ "metadata": {},
+ "source": [
+ "PyTorch offers two data primitives—`torch.utils.data.DataLoader` and `torch.utils.data.Dataset`— which \n",
+ "facilitate the use of both pre-loaded datasets and custom data.\n",
+ "\n",
+ "The `Fashion-MNIST` dataset is an example of a pre-loaded curated dataset. It can be loaded using the following parameters:\n",
+ "\n",
+ "- `root` specifies the path where the training or test data is stored.\n",
+ "- `train` indicates whether to load the training or test dataset.\n",
+ "- `download=True` will download the data from the internet if it's not available at the specified `root`.\n",
+ "- `transform` and `target_transform` define the transformations applied to the features and labels, respectively.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "749f1295-2191-40e3-9f7f-8c34589d1a7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "training_data = datasets.FashionMNIST(\n",
+ " root=\"data\",\n",
+ " train=True,\n",
+ " download=True,\n",
+ " transform=ToTensor()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b818ed5f-c845-4fca-9015-99196f3b937d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_data = datasets.FashionMNIST(\n",
+ " root=\"data\",\n",
+ " train=False,\n",
+ " download=True,\n",
+ " transform=ToTensor()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "aef48041-ad83-4813-924d-22404d691286",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t10k-images-idx3-ubyte\t train-images-idx3-ubyte\n",
+ "t10k-images-idx3-ubyte.gz train-images-idx3-ubyte.gz\n",
+ "t10k-labels-idx1-ubyte\t train-labels-idx1-ubyte\n",
+ "t10k-labels-idx1-ubyte.gz train-labels-idx1-ubyte.gz\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls data/FashionMNIST/raw/"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80984294-a299-4da7-802d-9184706a5f2a",
+ "metadata": {},
+ "source": [
+ "#### Visualizing a sample of the dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9f52a941-cd0e-4665-9e6c-182ffc33c5b5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "labels_map = {\n",
+ " 0: \"T-Shirt\",\n",
+ " 1: \"Trouser\",\n",
+ " 2: \"Pullover\",\n",
+ " 3: \"Dress\",\n",
+ " 4: \"Coat\",\n",
+ " 5: \"Sandal\",\n",
+ " 6: \"Shirt\",\n",
+ " 7: \"Sneaker\",\n",
+ " 8: \"Bag\",\n",
+ " 9: \"Ankle Boot\",\n",
+ "}\n",
+ "figure = plt.figure(figsize=(8, 8))\n",
+ "cols, rows = 3, 3\n",
+ "for i in range(1, cols * rows + 1):\n",
+ " sample_idx = torch.randint(len(training_data), size=(1,)).item()\n",
+ " img, label = training_data[sample_idx]\n",
+ " figure.add_subplot(rows, cols, i)\n",
+ " plt.title(labels_map[label])\n",
+ " plt.axis(\"off\")\n",
+ " plt.imshow(img.squeeze(), cmap=\"gray\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d1d3c91-ba90-469d-9d8b-ab18a7768b84",
+ "metadata": {},
+ "source": [
+ "### Custom dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57f0bdd5-5521-421f-a56c-532764d123af",
+ "metadata": {},
+ "source": [
+ "What if working with a custom dataset? To illustrate this, we will download a dataset and set it up for\n",
+ "use in PyTorch training. The data used for this demonstration is relatively *clean*. In a practical use case, significant time will likely be spent on cleaning and preparing the data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "727a8d18-c149-4ae7-8791-d301fa83e579",
+ "metadata": {},
+ "source": [
+ "The data:\n",
+ "\n",
+ "1. There are **3 classes**: pizza, steak, and sushi.\n",
+ "2. The data is split into *train* and *test* datasets.\n",
+ "3. Both *train* and *test* datasets are further organized into 3 directories, each corresponding to one of the classes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "88c9b050-a64a-45a6-ac46-06b9a0632431",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import zipfile\n",
+ "from pathlib import Path\n",
+ "\n",
+ "# Setup path to data folder\n",
+ "data_root = Path(\"custom_data/\")\n",
+ "image_path = data_root / \"pizza_steak_sushi\"\n",
+ "\n",
+ "# If the image data doesn't exist, download it and curate it. \n",
+ "if not image_path.is_dir():\n",
+ " image_path.mkdir(parents=True, exist_ok=True)\n",
+ " \n",
+ " # Download pizza, steak, sushi data\n",
+ " url = \"https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip\"\n",
+ " with open(data_root / \"pizza_steak_sushi.zip\", \"wb\") as f:\n",
+ " request = requests.get(url)\n",
+ " f.write(request.content)\n",
+ "\n",
+ " with zipfile.ZipFile(data_root / \"pizza_steak_sushi.zip\", \"r\") as zip_ref:\n",
+ " zip_ref.extractall(image_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "9675e0c5-cffc-4420-a419-eaf3e2198fb3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pizza_steak_sushi pizza_steak_sushi.zip\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls custom_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "b2d51954-2b02-42bb-90e3-121c105e3c7c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "test train\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "0a8043a6-7ea8-44e7-908d-e0921c9930db",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pizza steak sushi\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1425fa36-3c2e-4597-9b78-65516835ac83",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "pizza steak sushi\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b27f580f-988a-425f-9b3b-e65a23742500",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1008844.jpg 1654444.jpg 2291093.jpg 2785084.jpg 320570.jpg\t 5764.jpg\n",
+ "1033251.jpg 1660415.jpg 2330965.jpg 2800325.jpg 3269634.jpg 618348.jpg\n",
+ "1044789.jpg 1899785.jpg 2382016.jpg 2811032.jpg 3281494.jpg 667309.jpg\n",
+ "1089334.jpg 1947572.jpg 2426686.jpg 2821048.jpg 3338774.jpg 68684.jpg\n",
+ "1105700.jpg 1968947.jpg 2428085.jpg 2885050.jpg 3441394.jpg 702165.jpg\n",
+ "12301.jpg 2026009.jpg 244505.jpg 2885796.jpg 3505182.jpg 715169.jpg\n",
+ "1285298.jpg 2121603.jpg 2451169.jpg 2924941.jpg 3530210.jpg 739735.jpg\n",
+ "138855.jpg 2154394.jpg 2493954.jpg 29417.jpg 3589437.jpg 741883.jpg\n",
+ "1412034.jpg 218711.jpg 2569760.jpg 2992084.jpg 3699992.jpg 764429.jpg\n",
+ "1524655.jpg 2190018.jpg 2576168.jpg 300869.jpg 3821701.jpg 765799.jpg\n",
+ "1572608.jpg 220190.jpg 2687575.jpg 3018077.jpg 38349.jpg\t 786995.jpg\n",
+ "1633289.jpg 2228322.jpg 2702825.jpg 3109486.jpg 3860002.jpg 853441.jpg\n",
+ "1649276.jpg 2285942.jpg 2760984.jpg 3196721.jpg 393658.jpg\t 928670.jpg\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls custom_data/pizza_steak_sushi/train/pizza"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "696d0b71-2825-48ed-b84a-cf9519e296f4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/jpeg": "",
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from PIL import Image\n",
+ "img = Image.open(\"custom_data/pizza_steak_sushi/train/pizza/928670.jpg\")\n",
+ "img"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4577dd6b-c581-4dc5-996a-92b7c3a07436",
+ "metadata": {},
+ "source": [
+ "#### Setup train and testing paths"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "49b85b56-2ace-4a01-b361-b00ef2b28f9e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(PosixPath('custom_data/pizza_steak_sushi/train'),\n",
+ " PosixPath('custom_data/pizza_steak_sushi/test'))"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "train_dir = image_path / \"train\"\n",
+ "test_dir = image_path / \"test\"\n",
+ "\n",
+ "train_dir, test_dir"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ba0bbe2b-edcd-4f07-b715-e42c6d33dc1c",
+ "metadata": {},
+ "source": [
+ "#### Transformation on the data\n",
+ "\n",
+ "\n",
+ "Transform functions in the PyTorch library simplify the application of various data enhancement/manipulation techniques \n",
+ "to your input data. These functions enable you to apply multiple changes simultaneously."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "6ce41688-a909-477c-94ef-010ea6724445",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from torch.utils.data import DataLoader\n",
+ "from torchvision import datasets, transforms"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "c42cbfdb-a5f3-4e07-8beb-e151835902fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Write transform for image\n",
+ "data_transform = transforms.Compose([\n",
+ " # Resize the images to 64x64\n",
+ " transforms.Resize(size=(64, 64)),\n",
+ " # Flip the images randomly on the horizontal\n",
+ " transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance\n",
+ " # Turn the image into a torch.Tensor\n",
+ " transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 \n",
+ "])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "e53af9e2-b0b9-40ff-9526-a4239600dc3f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "with Image.open(\"custom_data/pizza_steak_sushi/train/pizza/928670.jpg\") as f:\n",
+ " fig, ax = plt.subplots(1, 2)\n",
+ " ax[0].imshow(f) \n",
+ " ax[0].set_title(f\"Original \\nSize: {f.size}\")\n",
+ " ax[0].axis(\"off\")\n",
+ "\n",
+ " transformed_image = data_transform(f).permute(1, 2, 0) \n",
+ " ax[1].imshow(transformed_image) \n",
+ " ax[1].set_title(f\"Transformed \\nSize: {transformed_image.shape}\")\n",
+ " ax[1].axis(\"off\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "af651bd5-8afa-4c8c-9661-03683f91be8d",
+ "metadata": {},
+ "source": [
+ "#### Loading Image Data Using ImageFolder\n",
+ "\n",
+ "`ImageFolder` is a generic data loader where images are expected to be organized into separate directories,\n",
+ "each corresponding to a different class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "245ada1a-e053-4905-aa44-39ef9814fde8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Train data:\n",
+ "Dataset ImageFolder\n",
+ " Number of datapoints: 225\n",
+ " Root location: custom_data/pizza_steak_sushi/train\n",
+ " StandardTransform\n",
+ "Transform: Compose(\n",
+ " Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=True)\n",
+ " RandomHorizontalFlip(p=0.5)\n",
+ " ToTensor()\n",
+ " )\n",
+ "Test data:\n",
+ "Dataset ImageFolder\n",
+ " Number of datapoints: 75\n",
+ " Root location: custom_data/pizza_steak_sushi/test\n",
+ " StandardTransform\n",
+ "Transform: Compose(\n",
+ " Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=True)\n",
+ " RandomHorizontalFlip(p=0.5)\n",
+ " ToTensor()\n",
+ " )\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Use ImageFolder to create dataset(s)\n",
+ "from torchvision import datasets\n",
+ "train_data = datasets.ImageFolder(root=train_dir, # target folder of images\n",
+ " transform=data_transform, # transforms to perform on data (images)\n",
+ " target_transform=None) # transforms to perform on labels (if necessary)\n",
+ "\n",
+ "test_data = datasets.ImageFolder(root=test_dir, \n",
+ " transform=data_transform)\n",
+ "\n",
+ "print(f\"Train data:\\n{train_data}\\nTest data:\\n{test_data}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "2c7ad54a-a8f2-4963-86c3-3150cb68cae3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['pizza', 'steak', 'sushi']"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get class names as a list\n",
+ "class_names = train_data.classes\n",
+ "class_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "59f14ace-5c56-4a18-b24e-67c34321a2ec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'pizza': 0, 'steak': 1, 'sushi': 2}"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Can also get class names as a dict\n",
+ "class_dict = train_data.class_to_idx\n",
+ "class_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "ede23b28-601c-45ba-a544-ca4d828045ba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(225, 75)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Check the lengths\n",
+ "len(train_data), len(test_data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c8d91e6-c12d-4fb1-ab75-c95f1309c995",
+ "metadata": {},
+ "source": [
+ "#### DataLoader\n",
+ "\n",
+ "\n",
+ "In PyTorch, `DataLoader` is a built-in class that offers an efficient and flexible method for loading \n",
+ "data into a model for training or inference. It is especially beneficial for managing large datasets that \n",
+ "may not fit into memory and for carrying out data augmentation and preprocessing. \n",
+ "Data loader combines a dataset and a sampler, and provides an iterable over the given dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e4e7b6e1-7ec9-411e-9d3d-422d4d6f8bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "69ed025b-c14c-4130-97f6-5d00a3757880",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Turn train and test Datasets into DataLoaders\n",
+ "from torch.utils.data import DataLoader\n",
+ "train_dataloader = DataLoader(dataset=train_data, \n",
+ " batch_size=8, # how many samples per batch?\n",
+ " num_workers=1, # how many subprocesses to use for data loading? (higher = more)\n",
+ " shuffle=True) # shuffle the data?\n",
+ "\n",
+ "test_dataloader = DataLoader(dataset=test_data, \n",
+ " batch_size=8, \n",
+ " num_workers=1, \n",
+ " shuffle=False) # don't usually need to shuffle testing data\n",
+ "\n",
+ "train_dataloader, test_dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "59008e99-69c0-4a5b-ae9a-419273c07841",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Image shape: torch.Size([8, 3, 64, 64]) -> [batch_size, color_channels, height, width]\n",
+ "Label shape: torch.Size([8])\n"
+ ]
+ }
+ ],
+ "source": [
+ "img, label = next(iter(train_dataloader))\n",
+ "\n",
+ "print(f\"Image shape: {img.shape} -> [batch_size, color_channels, height, width]\")\n",
+ "print(f\"Label shape: {label.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "1e3c2b18-2162-4d8e-beb9-991093854c57",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "torch.Tensor"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(img)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "496fb7e6-2727-4ba8-bb2c-6b8460b9565f",
+ "metadata": {},
+ "source": [
+ "#### Custom DataLoader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "0887d989-49c3-4012-910f-e011340b0059",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c088ccc3-85a1-4a5d-ac9b-7699ff7e91fa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = PimaDataset(datapath)\n",
+ "batch_size = 32\n",
+ "data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "87e8a8d6-f0bc-4618-ad3e-dd3b1a5ca8c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "features, outcomes = next(iter(data_loader))\n",
+ "\n",
+ "print(f\"Image shape: {features.shape} -> [batch_size, inputs_features]\")\n",
+ "print(f\"Label shape: {outcomes.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c0f401ab-c3cd-400f-8c97-b3c94146ac56",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/distributed_data_parallel.ipynb b/notebooks/distributed_data_parallel.ipynb
new file mode 100644
index 0000000..23a6c4e
--- /dev/null
+++ b/notebooks/distributed_data_parallel.ipynb
@@ -0,0 +1,305 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1cdb801e-e281-476f-b3e9-e470785d3ad9",
+ "metadata": {},
+ "source": [
+ "### Using Multiple GPUs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "604d5312-0b33-4162-b5f4-551c21732550",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import os\n",
+ "import pandas as pd\n",
+ "\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.optim as optim\n",
+ "import torch.distributed as dist\n",
+ "import torch.multiprocessing as mp\n",
+ "from torch.utils.data import Dataset, DataLoader\n",
+ "from torch.utils.data.distributed import DistributedSampler\n",
+ "from torch.nn.parallel import DistributedDataParallel as DDP"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49a40db4-da7b-4d24-b707-a39b79d2440e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4098c4ec-368b-4802-9800-fc4c4b7479ba",
+ "metadata": {},
+ "source": [
+ "#### Set Device\n",
+ "Se the default device as the GPU if it exists"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b51be64-542f-401c-ae73-00da2bbd6471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nb_gpus = 2\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')\n",
+ "\n",
+ "# Define the custom Dataset class\n",
+ "column_names = [\n",
+ " 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',\n",
+ " 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f944a147-3f5e-4c42-b142-850d04458270",
+ "metadata": {},
+ "source": [
+ "### Process Groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "50696138-0c0d-4a0b-aa80-ed73cff87fd2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def setup(rank, world_size):\n",
+ " os.environ['MASTER_ADDR'] = 'localhost'\n",
+ " os.environ['MASTER_PORT'] = '12355'\n",
+ " dist.init_process_group(\"nccl\", rank=rank, world_size=world_size)\n",
+ " \n",
+ "def cleanup():\n",
+ " dist.destroy_process_group()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d29976e-56b5-46ce-8743-5480524bbca1",
+ "metadata": {},
+ "source": [
+ "### Dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea28a5d7-0d69-47c9-ba24-995f02168856",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the custom Dataset class\n",
+ "class PimaDataset(Dataset):\n",
+ " def __init__(self, csv_file):\n",
+ " # Load the CSV file without header and assign column names\n",
+ " self.data = pd.read_csv(csv_file, header=None, names=column_names)\n",
+ " self.features = self.data.drop('Outcome', axis=1).values\n",
+ " self.labels = self.data['Outcome'].values\n",
+ " \n",
+ " # Convert to PyTorch tensors\n",
+ " self.features_tensor = torch.tensor(self.features, dtype=torch.float32)\n",
+ " self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)\n",
+ " \n",
+ " # Calculate mean and std\n",
+ " self.mean = self.features_tensor.mean(dim=0)\n",
+ " self.std = self.features_tensor.std(dim=0)\n",
+ " \n",
+ " # Normalize the features\n",
+ " self.features_tensor = (self.features_tensor - self.mean) / self.std\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.data)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " feature = self.features_tensor[idx]\n",
+ " label = self.labels_tensor[idx]\n",
+ " return feature, label"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ec4ebf96-12bc-4520-bbcb-690e5edebac9",
+ "metadata": {},
+ "source": [
+ "### Split the dataloader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49ca23e3-f8d9-4818-93e4-fa9304792335",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):\n",
+ " dataset = PimaDataset(datapath)\n",
+ " sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)\n",
+ " \n",
+ " dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)\n",
+ " \n",
+ " return dataloader"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "abc70a8b-2c37-4c09-bd7c-717d556cb39c",
+ "metadata": {},
+ "source": [
+ "### Defining the Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "263d5838-320d-4dad-ac59-e2d95ada7873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class PimaClassifier(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.hidden1 = nn.Linear(8, 12)\n",
+ " self.act1 = nn.ReLU()\n",
+ " self.hidden2 = nn.Linear(12, 8)\n",
+ " self.act2 = nn.ReLU()\n",
+ " self.output = nn.Linear(8, 1)\n",
+ " self.act_output = nn.Sigmoid()\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.act1(self.hidden1(x))\n",
+ " x = self.act2(self.hidden2(x))\n",
+ " x = self.act_output(self.output(x))\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09ec5647-aded-4179-89c0-0c5d44b0c6db",
+ "metadata": {},
+ "source": [
+ "#### Wrap model in DDP\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dc9a702-4b3a-423f-80d3-79d1e3d9e11f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(rank, world_size):\n",
+ "\n",
+ " # setup the process groups\n",
+ " setup(rank, world_size)\n",
+ " # prepare the dataloader\n",
+ " dataloader = prepare(rank, world_size)\n",
+ " \n",
+ " # instantiate the model(it's your own model) and move it to the right device\n",
+ " model = PimaClassifier().to(rank)\n",
+ " \n",
+ " # wrap the model with DDP\n",
+ " # device_ids tell DDP where is your model\n",
+ " # output_device tells DDP where to output, in our case, it is rank\n",
+ " # find_unused_parameters=True instructs DDP to find unused output of the forward() function of any module in the model\n",
+ " model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=True)\n",
+ "\n",
+ " loss_fn = nn.BCELoss()\n",
+ " optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
+ "\n",
+ " n_epochs = 100\n",
+ " for epoch in range(n_epochs):\n",
+ "\n",
+ " # if we are using DistributedSampler, we have to tell it which epoch this is\n",
+ " dataloader.sampler.set_epoch(epoch)\n",
+ "\n",
+ " for batch_features, batch_labels in dataloader:\n",
+ " batch_features = batch_features.to(rank)\n",
+ " batch_labels = batch_labels.to(rank)\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " outputs = model(batch_features)\n",
+ " \n",
+ " batch_labels = batch_labels.unsqueeze(1).float()\n",
+ " loss = loss_fn(outputs, batch_labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ "\n",
+ " cleanup()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d70bfdf8-9619-4448-ad45-cb2277d937ea",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "593672e5-4e14-473d-80f9-2ed00c127729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if __name__ == '__main__':\n",
+ "\n",
+ " world_size = nb_gpus \n",
+ " mp.spawn(main, args=(world_size,), nprocs=world_size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07a44bb-25c6-4f67-8a34-514d7eadbbaf",
+ "metadata": {},
+ "source": [
+ "### Exercise\n",
+ "\n",
+ "1. **What is the time difference in training**? Compare it with the previous training (change epoch to 100)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/tensors.ipynb b/notebooks/tensors.ipynb
new file mode 100644
index 0000000..cbd5bf4
--- /dev/null
+++ b/notebooks/tensors.ipynb
@@ -0,0 +1,375 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "205ea8ba-4865-4511-8a54-14fcd4b22ed0",
+ "metadata": {},
+ "source": [
+ "### Tensors in PyTorch\n",
+ "\n",
+ "Tensors are specialized data structures used in PyTorch to represent model inputs, outputs, and parameters. While they are conceptually similar to arrays and matrices, they offer additional features such as support for hardware accelerators like GPUs and automatic differentiation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "244c9ced-e83c-4c24-a992-f216dfa34456",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "# The jupyter notebook is launched from your $HOME directory.\n",
+ "# Change the working directory to the workshop directory\n",
+ "# which was created in your username directory under /scratch/vp91\n",
+ "os.chdir(os.path.expandvars(\"/scratch/vp91/$USER/\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c20aaadc-e2a0-4a89-9703-b091578b4dc0",
+ "metadata": {},
+ "source": [
+ "### Creating a Tensor"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "816961f2-0932-47d1-923f-d9743ec8c062",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "383b7ba2-6d5d-4380-9f36-fb62f6ce1d8f",
+ "metadata": {},
+ "source": [
+ "##### 1. Directly from data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2137307c-2fa0-4953-92e3-fcb24408ff77",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = [[1, 2],[3, 4]]\n",
+ "x_tensor= torch.tensor(data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cba5f6e9-f0fc-4cee-b48d-660e6267541d",
+ "metadata": {},
+ "source": [
+ "##### 2. From NumPy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48cd31ef-a810-4aaa-a314-d5f5705e7be4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_np = np.array(data)\n",
+ "x_tensor = torch.from_numpy(x_np)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d365755-871b-4380-8e36-972799542b5e",
+ "metadata": {},
+ "source": [
+ "##### 3. From another Tensor\n",
+ "\n",
+ "**torch.rand_like()** returns a tensor with the same size as input that but filled with random numbers from the interval [0,1)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "62f40f87-637b-4e58-be66-8fe8f8d4b84b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones_like(x_tensor)\n",
+ "y_tensor = torch.rand_like(x_tensor, dtype=torch.float) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84e9c4b6-6b85-430d-a793-93521879f671",
+ "metadata": {},
+ "source": [
+ "### Operations on Tensors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c59bbc8d-00d1-48cc-bc2c-4443c8ccec31",
+ "metadata": {},
+ "source": [
+ "#### 1. indexing and slicing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d3e7c194-ecea-4ef9-af39-3f563daddc3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "print(f\"First row: {x_tensor[0]}\")\n",
+ "print(f\"First column: {x_tensor[:, 0]}\")\n",
+ "print(f\"Last column: {x_tensor[..., -1]}\")\n",
+ "x_tensor[:,1] = 0\n",
+ "print(x_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bbe77b93-12ab-475f-81fc-c5a00db24621",
+ "metadata": {},
+ "source": [
+ "#### 2. Concatenate multiple tensors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2b00e821-b480-4f7e-8788-1e983ed1693b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_tensor = torch.cat([x_tensor, x_tensor, x_tensor], dim=1)\n",
+ "print(y_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77389fec-07d3-4473-a036-af0b9cd39986",
+ "metadata": {},
+ "source": [
+ "#### 3. Arithmetic Operations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d518086-f064-486e-9e28-29ea15ce7779",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "\n",
+ "# Transpose\n",
+ "x_T_tensor = x_tensor.T\n",
+ "\n",
+ "# Matrix Multiplication\n",
+ "y1_tensor = x_tensor @ x_tensor.T\n",
+ "y2_tensor = x_tensor.matmul(x_tensor.T)\n",
+ "\n",
+ "y3_tensor = torch.rand_like(y1_tensor)\n",
+ "torch.matmul(x_tensor, x_tensor.T, out=y3_tensor)\n",
+ "\n",
+ "\n",
+ "# Element-wise multiplication\n",
+ "z1_tensor = x_tensor * x_tensor\n",
+ "z2_tensor = x_tensor.mul(x_tensor)\n",
+ "\n",
+ "z3_tensor = torch.rand_like(x_tensor)\n",
+ "torch.mul(x_tensor, x_tensor, out=z3_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ce833cc-6ff6-4016-96bb-60b70812d584",
+ "metadata": {},
+ "source": [
+ "##### 3. In-place Operations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21c046eb-43ad-4259-a2b6-55da191d22db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(4, 4)\n",
+ "\n",
+ "# Transpose\n",
+ "x_tensor.t_()\n",
+ "\n",
+ "# Copy\n",
+ "y_tensor = torch.rand_like(x_tensor)\n",
+ "x_tensor.copy_(y_tensor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d73522a1-db18-4fe0-9c07-f27d39f4a992",
+ "metadata": {},
+ "source": [
+ "### NumPy and Tensor\n",
+ "Tensors on the **CPU** and NumPy arrays can share memory locations, so modifying one will also affect \n",
+ "the other."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1eee82b4-8f71-4e19-b27e-c47add1714e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(5) \n",
+ "x_np = x_tensor.numpy() # tensor to numpy\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")\n",
+ "\n",
+ "x_tensor.add_(1)\n",
+ "\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")\n",
+ "\n",
+ "y_np = np.ones(5)\n",
+ "z_np = np.zeros(5)\n",
+ "y_tensor = torch.from_numpy(y_np) # numpy to tensor\n",
+ "\n",
+ "np.add(y_np, 1, out=z_np)\n",
+ "\n",
+ "print(f\"t: {x_tensor}\")\n",
+ "print(f\"n: {x_np}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "43bca47c-de5b-423d-a3d3-153fdaa76bd9",
+ "metadata": {},
+ "source": [
+ "### Moving Tensor to GPU\n",
+ "It's always wise to check for GPU availability before performing any GPU operations. If a GPU is available, we can move our tensor to it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2199f283-5bbd-4534-b10b-1ed259d56f31",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor_gpu = x_tensor.to(\"cuda\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "663e4316-0e47-4528-bfc0-873b695d3e23",
+ "metadata": {},
+ "source": [
+ "A better approach is to set the default device before starting any computations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9aa52b7-b24d-4686-8798-6ec9582b19f9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
+ "y_tensor_gpu = y_tensor.to(device)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55d565e8-074b-4e10-872f-c6051c38c20b",
+ "metadata": {},
+ "source": [
+ "### Tensor attributes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "552841ee-fa9d-450e-8ab5-16d7d1d41008",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Shape of tensor: {y_tensor.shape}\")\n",
+ "print(f\"Datatype of tensor: {y_tensor.dtype}\")\n",
+ "print(f\"Device tensor is stored on: {y_tensor.device}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6e90c428-525b-47f2-9dbd-8e017f56e813",
+ "metadata": {},
+ "source": [
+ "*Automatic differentiation* is a key feature that distinguishes tensors from NumPy arrays. This capability\n",
+ "is particularly useful in neural networks, where model weights are adjusted during backpropagation based \n",
+ "on the gradient of the loss function with respect to each parameter. Tensors support automatic gradient \n",
+ "computation for any computational graph. For example, consider the computational graph of a one-layer \n",
+ "neural network:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c93bab5-f9bd-439b-a334-cbe482c379ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_tensor = torch.ones(5) # input tensor\n",
+ "y_tensor = torch.zeros(3) # expected output\n",
+ "\n",
+ "w_tensor = torch.randn(5, 3, requires_grad=True)\n",
+ "b_tensor = torch.randn(3, requires_grad=True)\n",
+ "\n",
+ "z_tensor = torch.matmul(x_tensor, w_tensor) + b_tensor\n",
+ "\n",
+ "loss_tensor = torch.nn.functional.binary_cross_entropy_with_logits(z_tensor, y_tensor)\n",
+ "loss_tensor.backward()\n",
+ "\n",
+ "print(w_tensor.grad)\n",
+ "print(b_tensor.grad)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2cd58b4d-d46d-4095-a21e-38179685590b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/distributed_data_parallel.py b/src/distributed_data_parallel.py
new file mode 100755
index 0000000..1261438
--- /dev/null
+++ b/src/distributed_data_parallel.py
@@ -0,0 +1,136 @@
+import numpy as np
+import os
+import pandas as pd
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.utils.data import Dataset, DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+
+
+
+nb_gpus = 2
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')
+
+
+# Define the custom Dataset class
+column_names = [
+ 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
+ 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
+]
+
+# Define the custom Dataset class
+class PimaDataset(Dataset):
+ def __init__(self, csv_file):
+ # Load the CSV file without header and assign column names
+ self.data = pd.read_csv(csv_file, header=None, names=column_names)
+ self.features = self.data.drop('Outcome', axis=1).values
+ self.labels = self.data['Outcome'].values
+
+ # Convert to PyTorch tensors
+ self.features_tensor = torch.tensor(self.features, dtype=torch.float32)
+ self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)
+
+ # Calculate mean and std
+ self.mean = self.features_tensor.mean(dim=0)
+ self.std = self.features_tensor.std(dim=0)
+
+ # Normalize the features
+ self.features_tensor = (self.features_tensor - self.mean) / self.std
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, idx):
+ feature = self.features_tensor[idx]
+ label = self.labels_tensor[idx]
+ return feature, label
+
+def setup(rank, world_size):
+ os.environ['MASTER_ADDR'] = 'localhost'
+ os.environ['MASTER_PORT'] = '12355'
+ dist.init_process_group("nccl", rank=rank, world_size=world_size)
+
+def cleanup():
+ dist.destroy_process_group()
+
+def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):
+ dataset = PimaDataset(datapath)
+ sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)
+
+ dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)
+
+ return dataloader
+
+class PimaClassifier(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.hidden1 = nn.Linear(8, 12)
+ self.act1 = nn.ReLU()
+ self.hidden2 = nn.Linear(12, 8)
+ self.act2 = nn.ReLU()
+ self.output = nn.Linear(8, 1)
+ self.act_output = nn.Sigmoid()
+
+ def forward(self, x):
+ x = self.act1(self.hidden1(x))
+ x = self.act2(self.hidden2(x))
+ x = self.act_output(self.output(x))
+ return x
+
+
+def main(rank, world_size):
+
+ # setup the process groups
+ setup(rank, world_size)
+ # prepare the dataloader
+ dataloader = prepare(rank, world_size)
+
+ # instantiate the model(it's your own model) and move it to the right device
+ model = PimaClassifier().to(rank)
+
+ # wrap the model with DDP
+ # device_ids tell DDP where is your model
+ # output_device tells DDP where to output, in our case, it is rank
+ # find_unused_parameters=True instructs DDP to find unused output of the forward() function of any module in the model
+ model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=True)
+
+ loss_fn = nn.BCELoss()
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+ n_epochs = 100
+ for epoch in range(n_epochs):
+
+ # if we are using DistributedSampler, we have to tell it which epoch this is
+ dataloader.sampler.set_epoch(epoch)
+
+ for batch_features, batch_labels in dataloader:
+ batch_features = batch_features.to(rank)
+ batch_labels = batch_labels.to(rank)
+
+ optimizer.zero_grad()
+
+ outputs = model(batch_features)
+
+ batch_labels = batch_labels.unsqueeze(1).float()
+ loss = loss_fn(outputs, batch_labels)
+ loss.backward()
+ optimizer.step()
+
+ cleanup()
+
+
+
+
+
+if __name__ == '__main__':
+
+ world_size = nb_gpus
+ mp.spawn(main, args=(world_size,), nprocs=world_size)
+
+
diff --git a/src/multinode_torchrun.py b/src/multinode_torchrun.py
new file mode 100755
index 0000000..4bd9738
--- /dev/null
+++ b/src/multinode_torchrun.py
@@ -0,0 +1,143 @@
+import numpy as np
+import os
+import pandas as pd
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from torch.utils.data import Dataset, DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+
+
+
+nb_gpus = 2
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+datapath = os.path.expandvars('/scratch/vp91/jxj900/intro-to-pytorch/data/pima-indians-diabetes.data.csv')
+
+
+# Define the custom Dataset class
+column_names = [
+ 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
+ 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
+]
+
+# Define the custom Dataset class
+class PimaDataset(Dataset):
+ def __init__(self, csv_file):
+ # Load the CSV file without header and assign column names
+ self.data = pd.read_csv(csv_file, header=None, names=column_names)
+ self.features = self.data.drop('Outcome', axis=1).values
+ self.labels = self.data['Outcome'].values
+
+ # Convert to PyTorch tensors
+ self.features_tensor = torch.tensor(self.features, dtype=torch.float32)
+ self.labels_tensor = torch.tensor(self.labels, dtype=torch.long)
+
+ # Calculate mean and std
+ self.mean = self.features_tensor.mean(dim=0)
+ self.std = self.features_tensor.std(dim=0)
+
+ # Normalize the features
+ self.features_tensor = (self.features_tensor - self.mean) / self.std
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, idx):
+ feature = self.features_tensor[idx]
+ label = self.labels_tensor[idx]
+ return feature, label
+
+def setup():
+ #rank = int(os.environ['RANK'])
+ #world_size = int(os.environ['WORLD_SIZE'])
+ #dist.init_process_group("nccl", rank=rank, world_size=world_size)
+ dist.init_process_group("nccl")
+ #torch.cuda.set_device(rank)
+ torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
+
+
+def cleanup():
+ dist.destroy_process_group()
+
+def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):
+ dataset = PimaDataset(datapath)
+ sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)
+
+ dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)
+
+ return dataloader
+
+class PimaClassifier(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.hidden1 = nn.Linear(8, 12)
+ self.act1 = nn.ReLU()
+ self.hidden2 = nn.Linear(12, 8)
+ self.act2 = nn.ReLU()
+ self.output = nn.Linear(8, 1)
+ self.act_output = nn.Sigmoid()
+
+ def forward(self, x):
+ x = self.act1(self.hidden1(x))
+ x = self.act2(self.hidden2(x))
+ x = self.act_output(self.output(x))
+ return x
+
+
+def main():
+
+ # setup the process groups
+ setup()
+
+ gpu_id = int(os.environ['LOCAL_RANK'])
+ rank = int(os.environ['RANK'])
+ world_size = int(os.environ['WORLD_SIZE'])
+
+ # prepare the dataloader
+ dataloader = prepare(rank, world_size)
+
+ # instantiate the model(it's your own model) and move it to the right device
+ model = PimaClassifier().to(gpu_id)
+
+ # wrap the model with DDP
+ # device_ids tell DDP where is your model
+ # output_device tells DDP where to output, in our case, it is rank
+ # find_unused_parameters=True instructs DDP to find unused output of the forward() function of any module in the model
+ model = DDP(model, device_ids=[gpu_id], output_device=rank, find_unused_parameters=True)
+
+ loss_fn = nn.BCELoss()
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+ n_epochs = 100
+ for epoch in range(n_epochs):
+
+ # if we are using DistributedSampler, we have to tell it which epoch this is
+ dataloader.sampler.set_epoch(epoch)
+
+ for batch_features, batch_labels in dataloader:
+ batch_features = batch_features.to(gpu_id)
+ batch_labels = batch_labels.to(gpu_id)
+
+ optimizer.zero_grad()
+
+ outputs = model(batch_features)
+
+ batch_labels = batch_labels.unsqueeze(1).float()
+ loss = loss_fn(outputs, batch_labels)
+ loss.backward()
+ optimizer.step()
+
+ cleanup()
+
+
+
+
+
+if __name__ == '__main__':
+ main()
+
+