CIS565-Fall-2014 · chiwsy · Sep 21, 2014 · Sep 22, 2014 · Sep 22, 2014 · Sep 22, 2014
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1.suo b/Part1/PROJ_WIN/CIS565_PROJ_1.suo
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj b/Part1/PROJ_WIN/CIS565_PROJ_1/CIS565_PROJ_1.vcxproj
@@ -30,7 +30,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.0.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.0.targets" />
   </ImportGroup>
 </Project>
diff --git a/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb b/Part1/PROJ_WIN/CIS565_PROJ_1/vc100.pdb
diff --git a/Part1/PROJ_WIN/Release.rar b/Part1/PROJ_WIN/Release.rar
diff --git a/Part1/PROJ_WIN/src/kernel.cu.deps b/Part1/PROJ_WIN/src/kernel.cu.deps
diff --git a/Part1/src/kernel.cu b/Part1/src/kernel.cu
@@ -9,7 +9,7 @@
 dim3 threadsPerBlock(blockSize);
 
 int numObjects;
-const float planetMass = 3e8;
+const __device__ float planetMass = 3e8;
 const __device__ float starMass = 5e10;
 
 const float scene_scale = 2e2; //size of the height map in simulation space
@@ -83,25 +83,72 @@ __global__ void generateCircularVelArray(int time, int N, glm::vec3 * arr, glm::
     }
 }
 
+__device__ glm::vec3 accelerateEachOther(const int& N,const glm::vec4& my_pos, const glm::vec4& their_pos){
+	int index=(blockIdx.x*blockDim.x)+threadIdx.x;
+	if(index<N){
+		glm::vec3 r_ab=glm::vec3(their_pos.x-my_pos.x,their_pos.y-my_pos.y,0.0f);
+		if(glm::length(r_ab)<ZERO_ABSORPTION_EPSILON) return glm::vec3(0.0f);
+		glm::vec3 acc=r_ab;
+		acc*=G/glm::pow(glm::length(r_ab),3.0f);
+		return acc;
+	}
+	return glm::vec3(0.0f);
+}
+
 // TODO: Core force calc kernel global memory
 //		 HINT : You may want to write a helper function that will help you 
 //              calculate the acceleration contribution of a single body.
 //		 REMEMBER : F = (G * m_a * m_b) / (r_ab ^ 2)
 __device__  glm::vec3 accelerate(int N, glm::vec4 my_pos, glm::vec4 * their_pos)
 {
-    return glm::vec3(0.0f);
+    int index=(blockIdx.x*blockDim.x)+threadIdx.x;
+	if(index<N){
+		//force of the center star
+
+		glm::vec3 r_ab=glm::vec3(my_pos.x-their_pos[index].x,my_pos.y-their_pos[index].y,0.0f);
+		glm::vec3 acc=r_ab;
+		acc*=G*starMass/glm::pow(glm::length(r_ab),3.0f)*0.0f;
+		for(int i=0;i<N;i++)
+			acc+=accelerateEachOther(N, their_pos[index], their_pos[i]);
+		return acc;
+	}
+
+	return glm::vec3(0.0f);
 }
 
+
 // TODO : update the acceleration of each body
 __global__ void updateF(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
 {
 	// FILL IN HERE
+	int index=(blockIdx.x*blockDim.x)+threadIdx.x;
+	if(index<N){
+		//force of the center star
+
+		glm::vec3 r_ab=glm::vec3(-pos[index].x,-pos[index].y,0.0f);
+		glm::vec3 tmp=r_ab;
+		tmp*=G*starMass/glm::pow(glm::length(r_ab),3.0f);
+		acc[index]=tmp*0.0f;
+		for(int i=0;i<N;i++)
+			acc[index]+=accelerateEachOther(N, pos[index], pos[i]);
+		acc[index]*=planetMass;
+		acc[index]+=tmp;
+		//return acc;
+	}
+
+	//return glm::vec3(0.0f);
+
 }
 
 // TODO : update velocity and position using a simple Euler integration scheme
 __global__ void updateS(int N, float dt, glm::vec4 * pos, glm::vec3 * vel, glm::vec3 * acc)
 {
 	// FILL IN HERE
+	int index=(blockIdx.x*blockDim.x)+threadIdx.x;
+	if(index<N){
+		vel[index]+=acc[index]*dt;
+		pos[index]+=glm::vec4(vel[index]*dt,0.0f);
+	}
 }
 
 // Update the vertex buffer object
@@ -156,6 +203,7 @@ __global__ void sendToPBO(int N, glm::vec4 * pos, float4 * pbo, int width, int h
 void initCuda(int N)
 {
     numObjects = N;
+	//myBlockNum=dim3((int)ceil(float(numObjects)/float(blockSize)));
     dim3 fullBlocksPerGrid((int)ceil(float(N)/float(blockSize)));
 
     cudaMalloc((void**)&dev_pos, N*sizeof(glm::vec4));
@@ -180,6 +228,11 @@ void initCuda(int N)
 void cudaNBodyUpdateWrapper(float dt)
 {
 	// FILL IN HERE
+	dim3 fullBlocksPerGrid((int)ceil(float(numObjects)/float(blockSize)));
+	updateF<<<fullBlocksPerGrid, blockSize>>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
+	cudaThreadSynchronize();
+	updateS<<<fullBlocksPerGrid, blockSize>>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
+	cudaThreadSynchronize();
 }
 
 void cudaUpdateVBO(float * vbodptr, int width, int height)

diff --git a/Part1/src/main.cpp b/Part1/src/main.cpp
@@ -4,7 +4,7 @@
 
 #include "main.h"
 
-#define N_FOR_VIS 5000
+#define N_FOR_VIS 4800
 #define DT 0.2
 #define VISUALIZE 1
 //-------------------------------
@@ -72,22 +72,24 @@ void runCuda()
 
 int timebase = 0;
 int frame = 0;
-
+int allframe=0;
 void display()
 {
     static float fps = 0;
     frame++;
+	allframe++;
     int time=glutGet(GLUT_ELAPSED_TIME);
 
     if (time - timebase > 1000) {
         fps = frame*1000.0f/(time-timebase);
         timebase = time;
         frame = 0;
     }
+	float avrfps=allframe*1000.0f/time;
     runCuda();
 
     char title[100];
-    sprintf( title, "565 NBody sim [%0.2f fps]", fps );
+    sprintf( title, "565 NBody sim [%0.2f fps] [%0.2f avrfps]", fps,avrfps);
     glutSetWindowTitle(title);
 
     glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);

diff --git a/Part2/matrix_math/matrix_math.sln b/Part2/matrix_math/matrix_math.sln
@@ -0,0 +1,26 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_math", "matrix_math\matrix_math.vcxproj", "{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|Win32.ActiveCfg = Debug|Win32
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|Win32.Build.0 = Debug|Win32
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|x64.ActiveCfg = Debug|x64
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Debug|x64.Build.0 = Debug|x64
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|Win32.ActiveCfg = Release|Win32
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|Win32.Build.0 = Release|Win32
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|x64.ActiveCfg = Release|x64
+		{8539B65E-A3B7-45BA-8449-5A5A8C3DEA69}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/Part2/matrix_math/matrix_math/kernel.cu b/Part2/matrix_math/matrix_math/kernel.cu
@@ -0,0 +1,96 @@
+
+//#include "cuda_runtime.h"
+//#include "device_launch_parameters.h"
+//
+//#include <stdio.h>
+//
+//#include "matrix_math.cu"
+//
+//cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
+//
+//__global__ void addKernel(int *c, const int *a, const int *b)
+//{
+//    int i = threadIdx.x;
+//    c[i] = a[i] + b[i];
+//}
+
+//
+//// Helper function for using CUDA to add vectors in parallel.
+//cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
+//{
+//    int *dev_a = 0;
+//    int *dev_b = 0;
+//    int *dev_c = 0;
+//    cudaError_t cudaStatus;
+//
+//    // Choose which GPU to run on, change this on a multi-GPU system.
+//    cudaStatus = cudaSetDevice(0);
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
+//        goto Error;
+//    }
+//
+//    // Allocate GPU buffers for three vectors (two input, one output)    .
+//    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMalloc failed!");
+//        goto Error;
+//    }
+//
+//    cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMalloc failed!");
+//        goto Error;
+//    }
+//
+//    cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMalloc failed!");
+//        goto Error;
+//    }
+//
+//    // Copy input vectors from host memory to GPU buffers.
+//    cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMemcpy failed!");
+//        goto Error;
+//    }
+//
+//    cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMemcpy failed!");
+//        goto Error;
+//    }
+//
+//    // Launch a kernel on the GPU with one thread for each element.
+//    addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
+//
+//    // Check for any errors launching the kernel
+//    cudaStatus = cudaGetLastError();
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
+//        goto Error;
+//    }
+//    
+//    // cudaDeviceSynchronize waits for the kernel to finish, and returns
+//    // any errors encountered during the launch.
+//    cudaStatus = cudaDeviceSynchronize();
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
+//        goto Error;
+//    }
+//
+//    // Copy output vector from GPU buffer to host memory.
+//    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
+//    if (cudaStatus != cudaSuccess) {
+//        fprintf(stderr, "cudaMemcpy failed!");
+//        goto Error;
+//    }
+//
+//Error:
+//    cudaFree(dev_c);
+//    cudaFree(dev_a);
+//    cudaFree(dev_b);
+//    
+//    return cudaStatus;
+//}