Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1.suo
Binary file not shown.
4 changes: 2 additions & 2 deletions Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.0.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
Expand Down Expand Up @@ -114,6 +114,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.0.targets" />
</ImportGroup>
</Project>
Binary file modified Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
Binary file not shown.
Binary file added Part1/PROJ_WIN/Release.rar
Binary file not shown.
1,137 changes: 569 additions & 568 deletions Part1/PROJ_WIN/src/kernel.cu.deps

Large diffs are not rendered by default.

57 changes: 55 additions & 2 deletions Part1/src/kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
dim3 threadsPerBlock(blockSize);

int numObjects;
const float planetMass = 3e8;
const __device__ float planetMass = 3e8;
const __device__ float starMass = 5e10;

const float scene_scale = 2e2; //size of the height map in simulation space
Expand Down Expand Up @@ -83,25 +83,72 @@ __global__ void generateCircularVelArray(int time, int N, glm::vec3 * arr, glm::
}
}

__device__ glm::vec3 accelerateEachOther(const int& N,const glm::vec4& my_pos, const glm::vec4& their_pos){
int index=(blockIdx.x*blockDim.x)+threadIdx.x;
if(index<N){
glm::vec3 r_ab=glm::vec3(their_pos.x-my_pos.x,their_pos.y-my_pos.y,0.0f);
if(glm::length(r_ab)<ZERO_ABSORPTION_EPSILON) return glm::vec3(0.0f);
glm::vec3 acc=r_ab;
acc*=G/glm::pow(glm::length(r_ab),3.0f);
return acc;
}
return glm::vec3(0.0f);
}

// TODO: Core force calc kernel global memory
// HINT : You may want to write a helper function that will help you
// calculate the acceleration contribution of a single body.
// REMEMBER : F = (G * m_a * m_b) / (r_ab ^ 2)
__device__ glm::vec3 accelerate(int N, glm::vec4 my_pos, glm::vec4 * their_pos)
{
return glm::vec3(0.0f);
int index=(blockIdx.x*blockDim.x)+threadIdx.x;
if(index<N){
//force of the center star

glm::vec3 r_ab=glm::vec3(my_pos.x-their_pos[index].x,my_pos.y-their_pos[index].y,0.0f);
glm::vec3 acc=r_ab;
acc*=G*starMass/glm::pow(glm::length(r_ab),3.0f)*0.0f;
for(int i=0;i<N;i++)
acc+=accelerateEachOther(N, their_pos[index], their_pos[i]);
return acc;
}

return glm::vec3(0.0f);
}


// TODO : update the acceleration of each body
__global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index=(blockIdx.x*blockDim.x)+threadIdx.x;
if(index<N){
//force of the center star

glm::vec3 r_ab=glm::vec3(-pos[index].x,-pos[index].y,0.0f);
glm::vec3 tmp=r_ab;
tmp*=G*starMass/glm::pow(glm::length(r_ab),3.0f);
acc[index]=tmp*0.0f;
for(int i=0;i<N;i++)
acc[index]+=accelerateEachOther(N, pos[index], pos[i]);
acc[index]*=planetMass;
acc[index]+=tmp;
//return acc;
}

//return glm::vec3(0.0f);

}

// TODO : update velocity and position using a simple Euler integration scheme
__global__ void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
{
// FILL IN HERE
int index=(blockIdx.x*blockDim.x)+threadIdx.x;
if(index<N){
vel[index]+=acc[index]*dt;
pos[index]+=glm::vec4(vel[index]*dt,0.0f);
}
}

// Update the vertex buffer object
Expand Down Expand Up @@ -156,6 +203,7 @@ __global__ void sendToPBO(int N, glm::vec4 * pos, float4 * pbo, int width, int h
void initCuda(int N)
{
numObjects = N;
//myBlockNum=dim3((int)ceil(float(numObjects)/float(blockSize)));
dim3 fullBlocksPerGrid((int)ceil(float(N)/float(blockSize)));

cudaMalloc((void**)&dev_pos, N*sizeof(glm::vec4));
Expand All @@ -180,6 +228,11 @@ void initCuda(int N)
void cudaNBodyUpdateWrapper(float dt)
{
// FILL IN HERE
dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize)));
updateF<<<fullBlocksPerGrid, blockSize>>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
cudaThreadSynchronize();
updateS<<<fullBlocksPerGrid, blockSize>>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
cudaThreadSynchronize();
}

void cudaUpdateVBO(float * vbodptr, int width, int height)
Expand Down
8 changes: 5 additions & 3 deletions Part1/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "main.h"

#define N_FOR_VIS 5000
#define N_FOR_VIS 4800
#define DT 0.2
#define VISUALIZE 1
//-------------------------------
Expand Down Expand Up @@ -72,22 +72,24 @@ void runCuda()

int timebase = 0;
int frame = 0;

int allframe=0;
void display()
{
static float fps = 0;
frame++;
allframe++;
int time=glutGet(GLUT_ELAPSED_TIME);

if (time - timebase > 1000) {
fps = frame*1000.0f/(time-timebase);
timebase = time;
frame = 0;
}
float avrfps=allframe*1000.0f/time;
runCuda();

char title[100];
sprintf( title, "565 NBody sim [%0.2f fps]", fps );
sprintf( title, "565 NBody sim [%0.2f fps] [%0.2f avrfps]", fps,avrfps);
glutSetWindowTitle(title);

glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
Expand Down
26 changes: 26 additions & 0 deletions Part2/matrix_math/matrix_math.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_math", "matrix_math\matrix_math.vcxproj", "{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|Win32.ActiveCfg = Debug|Win32
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|Win32.Build.0 = Debug|Win32
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|x64.ActiveCfg = Debug|x64
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|x64.Build.0 = Debug|x64
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|Win32.ActiveCfg = Release|Win32
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|Win32.Build.0 = Release|Win32
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|x64.ActiveCfg = Release|x64
{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
96 changes: 96 additions & 0 deletions Part2/matrix_math/matrix_math/kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@

//#include "cuda_runtime.h"
//#include "device_launch_parameters.h"
//
//#include <stdio.h>
//
//#include "matrix_math.cu"
//
//cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
//
//__global__ void addKernel(int *c, const int *a, const int *b)
//{
// int i = threadIdx.x;
// c[i] = a[i] + b[i];
//}

//
//// Helper function for using CUDA to add vectors in parallel.
//cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
//{
// int *dev_a = 0;
// int *dev_b = 0;
// int *dev_c = 0;
// cudaError_t cudaStatus;
//
// // Choose which GPU to run on, change this on a multi-GPU system.
// cudaStatus = cudaSetDevice(0);
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
// goto Error;
// }
//
// // Allocate GPU buffers for three vectors (two input, one output) .
// cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMalloc failed!");
// goto Error;
// }
//
// cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMalloc failed!");
// goto Error;
// }
//
// cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMalloc failed!");
// goto Error;
// }
//
// // Copy input vectors from host memory to GPU buffers.
// cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMemcpy failed!");
// goto Error;
// }
//
// cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMemcpy failed!");
// goto Error;
// }
//
// // Launch a kernel on the GPU with one thread for each element.
// addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
//
// // Check for any errors launching the kernel
// cudaStatus = cudaGetLastError();
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
// goto Error;
// }
//
// // cudaDeviceSynchronize waits for the kernel to finish, and returns
// // any errors encountered during the launch.
// cudaStatus = cudaDeviceSynchronize();
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
// goto Error;
// }
//
// // Copy output vector from GPU buffer to host memory.
// cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
// if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaMemcpy failed!");
// goto Error;
// }
//
//Error:
// cudaFree(dev_c);
// cudaFree(dev_a);
// cudaFree(dev_b);
//
// return cudaStatus;
//}
Loading