diff --git a/README.md b/README.md
index d2fa33d..1860bd7 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,25 @@ Project 0 Getting Started
**University of Pennsylvania, CIS 5650: GPU Programming and Architecture, Project 0**
-* (TODO) YOUR NAME HERE
- * (TODO) [LinkedIn](), [personal website](), [twitter](), etc.
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Pavel Zdravkov Peev
+ * LinkedIn: https://www.linkedin.com/in/pavel-peev-5568561b9/
+ * Personal Website: https://www.Cartaphil.com
+* Tested on: Windows 11, i7-12700, NVIDIA T1000, (SEAS VLAB 007)
-### (TODO: Your README)
+### Analysis and Screenshots.
-Include screenshots, analysis, etc. (Remember, this is public, so don't put
-anything here that you don't want to share with the world.)
+Had problems with 2.1.4 and 2.1.5 due to not having administrative access on the virtual lab computers.
+Was expecting PC to arrive 8/28/2025, but it seems to have been delayed, will redo with new pc when it arrives.
+
+### 2.1.2
+
+
+
+### 2.1.3
+
+
+### 2.2
+
+
+### 2.3
+
diff --git a/cuda-gl-check/src/main.cpp b/cuda-gl-check/src/main.cpp
index 886fd4c..8775072 100644
--- a/cuda-gl-check/src/main.cpp
+++ b/cuda-gl-check/src/main.cpp
@@ -10,8 +10,7 @@
* C main function.
*/
int main(int argc, char* argv[]) {
- // TODO: Change this line to use your name!
- m_yourName = "TODO: YOUR NAME HERE";
+ m_yourName = "Pavel Peev";
if (init(argc, argv)) {
mainLoop();
diff --git a/cuda-introduction/source/common.cu b/cuda-introduction/source/common.cu
index dce8793..beacbe5 100644
--- a/cuda-introduction/source/common.cu
+++ b/cuda-introduction/source/common.cu
@@ -9,7 +9,7 @@ unsigned divup(unsigned size, unsigned div)
{
// TODO: implement a 1 line function to return the divup operation.
// Note: You only need to use addition, subtraction, and division operations.
- return 0;
+ return size % div == 0 ? (size / div) : (size / div) + 1;
}
void clearHostAndDeviceArray(float *res, float *dev_res, unsigned size, const int value)
diff --git a/cuda-introduction/source/saxpy.cu b/cuda-introduction/source/saxpy.cu
index 5ed591f..ef4eab4 100644
--- a/cuda-introduction/source/saxpy.cu
+++ b/cuda-introduction/source/saxpy.cu
@@ -9,20 +9,21 @@
__global__ void saxpy(float* const z, const float* const x, const float* const y, const float a, const unsigned size)
{
// TODO 9: Compute the global index for each thread.
- unsigned idx = 0;
+ unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
// TODO 10: Check if idx is out of bounds. If yes, return.
- if (idx >= 0)
+ if (idx >= size)
return;
// TODO 11: Perform the SAXPY operation: z = a * x + y.
+ z[idx] = a * x[idx] + y[idx];
}
int main(int argc, char *argv[])
{
// TODO 1: Set the size. Start with something simple like 64.
// TODO Optional: Try out these sizes: 256, 1024, 2048, 14, 103, 1025, 3127
- const unsigned size = 0;
+ const unsigned size = 2024;
// Host arrays.
float* x = new float[size];
@@ -53,9 +54,15 @@ int main(int argc, char *argv[])
// TODO 2: Allocate memory on the device. Fill in the blanks for d_x, then do the same commands for d_y and d_z.
// CUDA(cudaMalloc((void **)& pointer, size in bytes)));
-
+ CUDA(cudaMalloc((void**)&d_x, size * sizeof(float)));
+ CUDA(cudaMalloc((void**)&d_y, size * sizeof(float)));
+ CUDA(cudaMalloc((void**)&d_z, size * sizeof(float)));
// TODO 3: Copy array contents of X and Y from the host (CPU) to the device (GPU). Follow what you did for 2,
// CUDA(cudaMemcpy(dest ptr, source ptr, size in bytes, direction enum));
+ CUDA(cudaMemcpy(d_x, x, size * sizeof(float), cudaMemcpyHostToDevice));
+ CUDA(cudaMemcpy(d_y, y, size * sizeof(float), cudaMemcpyHostToDevice));
+ CUDA(cudaMemcpy(d_z, z, size * sizeof(float), cudaMemcpyHostToDevice));
+
CUDA(cudaDeviceSynchronize());
@@ -69,17 +76,17 @@ int main(int argc, char *argv[])
// TODO 4: Setup threads and blocks.
// Start threadPerBlock as 128, then try out differnt configurations: 32, 64, 256, 512, 1024
// Use divup to get the number of blocks to launch.
- const unsigned threadsPerBlock = 0;
+ const unsigned threadsPerBlock = 32;
// TODO 5: Implement the divup function in common.cpp
const unsigned blocks = divup(size, threadsPerBlock);
// TODO 6: Launch the GPU kernel with blocks and threadPerBlock as launch configuration
// saxpy<<< >>> (....);
-
+ saxpy <<>> (d_z, d_x, d_y, a, size);
// TODO 7: Copy the answer back to the host (CPU) from the device (GPU).
// Copy what you did in 3, except for d_z -> z.
-
+ CUDA(cudaMemcpy(z, d_z, size * sizeof(float), cudaMemcpyDeviceToHost));
// LOOK: Use postprocess to check the result
compareReferenceAndResult(z_gold, z, size, 1e-6);
std::cout << "****************************************************" << std::endl << std::endl;
@@ -87,7 +94,9 @@ int main(int argc, char *argv[])
// TODO 8: free device memory using cudaFree
// CUDA(cudaFree(device pointer));
-
+ CUDA(cudaFree(d_x));
+ CUDA(cudaFree(d_y));
+ CUDA(cudaFree(d_z));
// free host memory
delete[] x;
delete[] y;
diff --git a/images/2-1-2-Screenshot.png b/images/2-1-2-Screenshot.png
new file mode 100644
index 0000000..73b6382
Binary files /dev/null and b/images/2-1-2-Screenshot.png differ
diff --git a/images/2-1-3-Screenshot.png b/images/2-1-3-Screenshot.png
new file mode 100644
index 0000000..4aebd6e
Binary files /dev/null and b/images/2-1-3-Screenshot.png differ
diff --git a/images/2-2-Screenshot.png b/images/2-2-Screenshot.png
new file mode 100644
index 0000000..9f513a5
Binary files /dev/null and b/images/2-2-Screenshot.png differ
diff --git a/images/2-3Screenshot.png b/images/2-3Screenshot.png
new file mode 100644
index 0000000..12f5e13
Binary files /dev/null and b/images/2-3Screenshot.png differ