Skip to content
Open
26 changes: 20 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,25 @@ Project 0 Getting Started

**University of Pennsylvania, CIS 5650: GPU Programming and Architecture, Project 0**

* (TODO) YOUR NAME HERE
* (TODO) [LinkedIn](), [personal website](), [twitter](), etc.
* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
* Pavel Zdravkov Peev
* LinkedIn: https://www.linkedin.com/in/pavel-peev-5568561b9/
* Personal Website: https://www.Cartaphil.com
* Tested on: Windows 11, i7-12700, NVIDIA T1000, (SEAS VLAB 007)

### (TODO: Your README)
### Analysis and Screenshots.

Include screenshots, analysis, etc. (Remember, this is public, so don't put
anything here that you don't want to share with the world.)
Had problems with 2.1.4 and 2.1.5 due to not having administrative access on the virtual lab computers.
Was expecting PC to arrive 8/28/2025, but it seems to have been delayed, will redo with new pc when it arrives.

### 2.1.2

<img width="397" height="418" alt="2-1-2-Screenshot" src="https://github.com/user-attachments/assets/945490a4-9f9c-4ad0-88f1-34a8e8fda4d6" />

### 2.1.3
<img width="959" height="481" alt="2-1-3-Screenshot" src="https://github.com/user-attachments/assets/27aa63bc-45ee-422f-a135-a8c68b7b852e" />

### 2.2
<img width="535" height="310" alt="2-2-Screenshot" src="https://github.com/user-attachments/assets/06e95e31-8a96-42c0-adc8-ba341eb53710" />

### 2.3
<img width="791" height="440" alt="2-3Screenshot" src="https://github.com/user-attachments/assets/527c0c9b-7783-45df-a9d7-8b084a0ba2af" />
3 changes: 1 addition & 2 deletions cuda-gl-check/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
* C main function.
*/
int main(int argc, char* argv[]) {
// TODO: Change this line to use your name!
m_yourName = "TODO: YOUR NAME HERE";
m_yourName = "Pavel Peev";

if (init(argc, argv)) {
mainLoop();
Expand Down
2 changes: 1 addition & 1 deletion cuda-introduction/source/common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ unsigned divup(unsigned size, unsigned div)
{
// TODO: implement a 1 line function to return the divup operation.
// Note: You only need to use addition, subtraction, and division operations.
return 0;
return size % div == 0 ? (size / div) : (size / div) + 1;
}

void clearHostAndDeviceArray(float *res, float *dev_res, unsigned size, const int value)
Expand Down
25 changes: 17 additions & 8 deletions cuda-introduction/source/saxpy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,21 @@
__global__ void saxpy(float* const z, const float* const x, const float* const y, const float a, const unsigned size)
{
// TODO 9: Compute the global index for each thread.
unsigned idx = 0;
unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;

// TODO 10: Check if idx is out of bounds. If yes, return.
if (idx >= 0)
if (idx >= size)
return;

// TODO 11: Perform the SAXPY operation: z = a * x + y.
z[idx] = a * x[idx] + y[idx];
}

int main(int argc, char *argv[])
{
// TODO 1: Set the size. Start with something simple like 64.
// TODO Optional: Try out these sizes: 256, 1024, 2048, 14, 103, 1025, 3127
const unsigned size = 0;
const unsigned size = 2024;

// Host arrays.
float* x = new float[size];
Expand Down Expand Up @@ -53,9 +54,15 @@ int main(int argc, char *argv[])

// TODO 2: Allocate memory on the device. Fill in the blanks for d_x, then do the same commands for d_y and d_z.
// CUDA(cudaMalloc((void **)& pointer, size in bytes)));

CUDA(cudaMalloc((void**)&d_x, size * sizeof(float)));
CUDA(cudaMalloc((void**)&d_y, size * sizeof(float)));
CUDA(cudaMalloc((void**)&d_z, size * sizeof(float)));
// TODO 3: Copy array contents of X and Y from the host (CPU) to the device (GPU). Follow what you did for 2,
// CUDA(cudaMemcpy(dest ptr, source ptr, size in bytes, direction enum));
CUDA(cudaMemcpy(d_x, x, size * sizeof(float), cudaMemcpyHostToDevice));
CUDA(cudaMemcpy(d_y, y, size * sizeof(float), cudaMemcpyHostToDevice));
CUDA(cudaMemcpy(d_z, z, size * sizeof(float), cudaMemcpyHostToDevice));


CUDA(cudaDeviceSynchronize());

Expand All @@ -69,25 +76,27 @@ int main(int argc, char *argv[])
// TODO 4: Setup threads and blocks.
// Start threadPerBlock as 128, then try out differnt configurations: 32, 64, 256, 512, 1024
// Use divup to get the number of blocks to launch.
const unsigned threadsPerBlock = 0;
const unsigned threadsPerBlock = 32;

// TODO 5: Implement the divup function in common.cpp
const unsigned blocks = divup(size, threadsPerBlock);

// TODO 6: Launch the GPU kernel with blocks and threadPerBlock as launch configuration
// saxpy<<< >>> (....);

saxpy <<<blocks, threadsPerBlock>>> (d_z, d_x, d_y, a, size);
// TODO 7: Copy the answer back to the host (CPU) from the device (GPU).
// Copy what you did in 3, except for d_z -> z.

CUDA(cudaMemcpy(z, d_z, size * sizeof(float), cudaMemcpyDeviceToHost));
// LOOK: Use postprocess to check the result
compareReferenceAndResult(z_gold, z, size, 1e-6);
std::cout << "****************************************************" << std::endl << std::endl;
////////////////////////////////////////////////////////////

// TODO 8: free device memory using cudaFree
// CUDA(cudaFree(device pointer));

CUDA(cudaFree(d_x));
CUDA(cudaFree(d_y));
CUDA(cudaFree(d_z));
// free host memory
delete[] x;
delete[] y;
Expand Down
Binary file added images/2-1-2-Screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/2-1-3-Screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/2-2-Screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/2-3Screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.