diff --git a/README.md b/README.md index d2fa33d..1860bd7 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,25 @@ Project 0 Getting Started **University of Pennsylvania, CIS 5650: GPU Programming and Architecture, Project 0** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Pavel Zdravkov Peev + * LinkedIn: https://www.linkedin.com/in/pavel-peev-5568561b9/ + * Personal Website: https://www.Cartaphil.com +* Tested on: Windows 11, i7-12700, NVIDIA T1000, (SEAS VLAB 007) -### (TODO: Your README) +### Analysis and Screenshots. -Include screenshots, analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +Had problems with 2.1.4 and 2.1.5 due to not having administrative access on the virtual lab computers. +Was expecting PC to arrive 8/28/2025, but it seems to have been delayed, will redo with new pc when it arrives. + +### 2.1.2 + +2-1-2-Screenshot + +### 2.1.3 +2-1-3-Screenshot + +### 2.2 +2-2-Screenshot + +### 2.3 +2-3Screenshot diff --git a/cuda-gl-check/src/main.cpp b/cuda-gl-check/src/main.cpp index 886fd4c..8775072 100644 --- a/cuda-gl-check/src/main.cpp +++ b/cuda-gl-check/src/main.cpp @@ -10,8 +10,7 @@ * C main function. */ int main(int argc, char* argv[]) { - // TODO: Change this line to use your name! - m_yourName = "TODO: YOUR NAME HERE"; + m_yourName = "Pavel Peev"; if (init(argc, argv)) { mainLoop(); diff --git a/cuda-introduction/source/common.cu b/cuda-introduction/source/common.cu index dce8793..beacbe5 100644 --- a/cuda-introduction/source/common.cu +++ b/cuda-introduction/source/common.cu @@ -9,7 +9,7 @@ unsigned divup(unsigned size, unsigned div) { // TODO: implement a 1 line function to return the divup operation. // Note: You only need to use addition, subtraction, and division operations. - return 0; + return size % div == 0 ? (size / div) : (size / div) + 1; } void clearHostAndDeviceArray(float *res, float *dev_res, unsigned size, const int value) diff --git a/cuda-introduction/source/saxpy.cu b/cuda-introduction/source/saxpy.cu index 5ed591f..ef4eab4 100644 --- a/cuda-introduction/source/saxpy.cu +++ b/cuda-introduction/source/saxpy.cu @@ -9,20 +9,21 @@ __global__ void saxpy(float* const z, const float* const x, const float* const y, const float a, const unsigned size) { // TODO 9: Compute the global index for each thread. - unsigned idx = 0; + unsigned idx = blockIdx.x * blockDim.x + threadIdx.x; // TODO 10: Check if idx is out of bounds. If yes, return. - if (idx >= 0) + if (idx >= size) return; // TODO 11: Perform the SAXPY operation: z = a * x + y. + z[idx] = a * x[idx] + y[idx]; } int main(int argc, char *argv[]) { // TODO 1: Set the size. Start with something simple like 64. // TODO Optional: Try out these sizes: 256, 1024, 2048, 14, 103, 1025, 3127 - const unsigned size = 0; + const unsigned size = 2024; // Host arrays. float* x = new float[size]; @@ -53,9 +54,15 @@ int main(int argc, char *argv[]) // TODO 2: Allocate memory on the device. Fill in the blanks for d_x, then do the same commands for d_y and d_z. // CUDA(cudaMalloc((void **)& pointer, size in bytes))); - + CUDA(cudaMalloc((void**)&d_x, size * sizeof(float))); + CUDA(cudaMalloc((void**)&d_y, size * sizeof(float))); + CUDA(cudaMalloc((void**)&d_z, size * sizeof(float))); // TODO 3: Copy array contents of X and Y from the host (CPU) to the device (GPU). Follow what you did for 2, // CUDA(cudaMemcpy(dest ptr, source ptr, size in bytes, direction enum)); + CUDA(cudaMemcpy(d_x, x, size * sizeof(float), cudaMemcpyHostToDevice)); + CUDA(cudaMemcpy(d_y, y, size * sizeof(float), cudaMemcpyHostToDevice)); + CUDA(cudaMemcpy(d_z, z, size * sizeof(float), cudaMemcpyHostToDevice)); + CUDA(cudaDeviceSynchronize()); @@ -69,17 +76,17 @@ int main(int argc, char *argv[]) // TODO 4: Setup threads and blocks. // Start threadPerBlock as 128, then try out differnt configurations: 32, 64, 256, 512, 1024 // Use divup to get the number of blocks to launch. - const unsigned threadsPerBlock = 0; + const unsigned threadsPerBlock = 32; // TODO 5: Implement the divup function in common.cpp const unsigned blocks = divup(size, threadsPerBlock); // TODO 6: Launch the GPU kernel with blocks and threadPerBlock as launch configuration // saxpy<<< >>> (....); - + saxpy <<>> (d_z, d_x, d_y, a, size); // TODO 7: Copy the answer back to the host (CPU) from the device (GPU). // Copy what you did in 3, except for d_z -> z. - + CUDA(cudaMemcpy(z, d_z, size * sizeof(float), cudaMemcpyDeviceToHost)); // LOOK: Use postprocess to check the result compareReferenceAndResult(z_gold, z, size, 1e-6); std::cout << "****************************************************" << std::endl << std::endl; @@ -87,7 +94,9 @@ int main(int argc, char *argv[]) // TODO 8: free device memory using cudaFree // CUDA(cudaFree(device pointer)); - + CUDA(cudaFree(d_x)); + CUDA(cudaFree(d_y)); + CUDA(cudaFree(d_z)); // free host memory delete[] x; delete[] y; diff --git a/images/2-1-2-Screenshot.png b/images/2-1-2-Screenshot.png new file mode 100644 index 0000000..73b6382 Binary files /dev/null and b/images/2-1-2-Screenshot.png differ diff --git a/images/2-1-3-Screenshot.png b/images/2-1-3-Screenshot.png new file mode 100644 index 0000000..4aebd6e Binary files /dev/null and b/images/2-1-3-Screenshot.png differ diff --git a/images/2-2-Screenshot.png b/images/2-2-Screenshot.png new file mode 100644 index 0000000..9f513a5 Binary files /dev/null and b/images/2-2-Screenshot.png differ diff --git a/images/2-3Screenshot.png b/images/2-3Screenshot.png new file mode 100644 index 0000000..12f5e13 Binary files /dev/null and b/images/2-3Screenshot.png differ