From 12be161fc4fec4a79e7ea0ff8158274db0b7f98f Mon Sep 17 00:00:00 2001 From: Daniel Krupka Date: Tue, 12 Dec 2017 21:20:07 -0500 Subject: [PATCH] changes --- .gitmodules | 6 +- CMakeLists.txt | 11 +- external/glfw | 1 + src/CMakeLists.txt | 3 +- src/gui.cpp | 95 ++++------- src/gui.hpp | 10 +- src/main.cpp | 150 ++---------------- src/phys.cu | 384 +++++++++++++++------------------------------ src/ppm.cpp | 105 +++++++++---- src/ppm.hpp | 26 +-- src/ppm_cuda.cu | 72 +++++---- 11 files changed, 322 insertions(+), 541 deletions(-) create mode 160000 external/glfw diff --git a/.gitmodules b/.gitmodules index 899bdc6..8728388 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,10 @@ [submodule "external/nanogui"] path = external/nanogui - url = https://github.com/wjakob/nanogui + url = https://github.com/wjakob/nanogui + branch = master [submodule "external/lapack"] path = external/lapack url = https://github.com/Reference-LAPACK/lapack +[submodule "external/glfw"] + path = external/glfw + url = https://github.com/glfw/glfw diff --git a/CMakeLists.txt b/CMakeLists.txt index c9e8621..0a5af69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,13 +48,21 @@ find_package(CUDA REQUIRED) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON) set(CUDA_SEPARABLE_COMPILATION ON) +### GLFW options +set(BUILD_SHARED_LIBS OFF CACHE BOOL " " FORCE) +set(GLFW_BUILD_EXAMPLES OFF CACHE BOOL " " FORCE) +set(GLFW_BUILD_TESTS OFF CACHE BOOL " " FORCE) +set(GLFW_BUILD_DOCS OFF CACHE BOOL " " FORCE) +add_subdirectory(external/glfw) +include_directories(external/glfw) + ### NanoGui options set(NANOGUI_BUILD_EXAMPLE OFF CACHE BOOL " " FORCE) set(NANOGUI_BUILD_PYTHON OFF CACHE BOOL " " FORCE) set(NANOGUI_INSTALL OFF CACHE BOOL " " FORCE) set(NANOGUI_BUILD_SHARED OFF CACHE BOOL " " FORCE) add_subdirectory(external/nanogui) -set_property(TARGET glfw glfw_objects nanogui PROPERTY FOLDER "dependencies") +#set_property(TARGET nanogui PROPERTY FOLDER "dependencies") add_definitions(${NANOGUI_EXTRA_DEFS}) include_directories(external/nanogui/include ${NANOGUI_EXTRA_INCS}) @@ -80,6 +88,7 @@ cuda_add_executable(${CMAKE_PROJECT_NAME} target_link_libraries(${CMAKE_PROJECT_NAME} ppm ${CUDA_CUBLAS_LIBRARIES} + glfw nanogui ${NANOGUI_EXTRA_LIBS} ) diff --git a/external/glfw b/external/glfw new file mode 160000 index 0000000..c5694b3 --- /dev/null +++ b/external/glfw @@ -0,0 +1 @@ +Subproject commit c5694b3013ff6c5038f58eba5ef9f62ec43112be diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eff7c68..60f9b0a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -cuda_add_library(ppm STATIC +cuda_add_library(ppm "ppm_cuda.cu" "phys.cu" "ppm.cpp" @@ -7,6 +7,7 @@ cuda_add_library(ppm STATIC "util/math.hpp" "util/error.hpp" "bezier.hpp" + STATIC OPTIONS -lineinfo -std=c++11 -arch=sm_50 ) diff --git a/src/gui.cpp b/src/gui.cpp index 04892d9..af820fa 100644 --- a/src/gui.cpp +++ b/src/gui.cpp @@ -27,22 +27,17 @@ class PpmCanvas : public nanogui::GLCanvas { {} virtual void drawGL() override { + int oldBlendSrc, oldBlendDst; glGetIntegerv(GL_BLEND_SRC_RGB, &oldBlendSrc); glGetIntegerv(GL_BLEND_DST_RGB, &oldBlendDst); glBlendFunc(GL_ONE, GL_ZERO); glEnable(GL_DEPTH_TEST); ppm->draw(shader); + glDisable(GL_DEPTH_TEST); glBlendFunc(oldBlendSrc, oldBlendDst); } - - virtual bool mouseButtonEvent(const nanogui::Vector2i &pos, int button, bool down, int mods) override { - if (down) { - gui->click(pos[0] - position()[0], pos[1]- position()[1]); - } - return true; - } private: Shader *shader; @@ -58,16 +53,14 @@ PpmGui::PpmGui(int w, int h) : fovy(M_PI / 4), zNear(0.1), zFar(100.0), yAngle(0), xAngle(0), zoom(5.0), ppmTime(0.0), fpsTime(0.0), nbFrames(0), - fName(""), nBasis(4), nSamp(4), nSub(2), - fClick(5.0f), - clickIdx(-1) + fName(""), nBasis(4), nSamp(4), nSub(2) { using namespace nanogui; printf("PpmGui: creating window\n"); Widget *window = new Window(this, ""); window->setPosition(Vector2i(0,0)); - window->setLayout(new BoxLayout(Orientation::Horizontal, Alignment::Middle, 0, 5)); + window->setLayout(new BoxLayout(Orientation::Horizontal, Alignment::Middle, 0, 5)); printf("PpmGui: initialize glew\n"); glewExperimental = GL_TRUE; @@ -99,16 +92,16 @@ PpmGui::PpmGui(int w, int h) : CheckBox *chkFill = new CheckBox(tools, "Show Surface"); chkFill->setChecked(ppm->visFill); chkFill->setCallback([this](bool value) { ppm->visFill = value; }); - + new Label(tools, "nBasis"); IntBox *c3 = new IntBox(tools, nBasis); new Label(tools, "nSamp"); IntBox *c4 = new IntBox(tools, nSamp); new Label(tools, "nSub"); IntBox *c5 = new IntBox(tools, nSub); - + c3->setMinValue(3); - c3->setCallback([this,c4](int value) { + c3->setCallback([this,c4](int value) { nBasis = value; nSamp = std::max(nBasis, nSamp); c4->setValue(nSamp); @@ -117,66 +110,59 @@ PpmGui::PpmGui(int w, int h) : }); c3->setEditable(true); c3->setSpinnable(true); - + c4->setMinValue(nBasis); - c4->setCallback([this](int value) { + c4->setCallback([this](int value) { nSamp = value; rebuild(); }); c4->setEditable(true); c4->setSpinnable(true); - + c5->setMinValue(1); - c5->setCallback([this](int value) { + c5->setCallback([this](int value) { nSub = value; rebuild(); }); c5->setEditable(true); c5->setSpinnable(true); - + new Label(tools, "kSelf"); FloatBox *f1 = new FloatBox(tools, ppm->kSelf); new Label(tools, "kDamp"); FloatBox *f2 = new FloatBox(tools, ppm->kDamp); new Label(tools, "kNbr"); FloatBox *f3 = new FloatBox(tools, ppm->kNbr); - new Label(tools, "Click Force"); - FloatBox *f4 = new FloatBox(tools, fClick); - + f1->setMinValue(0.0f); - f1->setCallback([this](float value) { + f1->setCallback([this](float value) { ppm->kSelf = value; }); f1->setEditable(true); - + f2->setMinValue(0.0f); - f2->setCallback([this](float value) { + f2->setCallback([this](float value) { ppm->kDamp = value; }); f2->setEditable(true); - + f3->setMinValue(0.0f); - f3->setCallback([this](float value) { + f3->setCallback([this](float value) { ppm->kNbr = value; }); f3->setEditable(true); - - f4->setCallback([this](float value) { - fClick = value; - }); - f4->setEditable(true); - + new Label(tools, "PPM time (ms)"); ppmTimeBox = new FloatBox(tools); - + new Label(tools, "Base faces"); ppmBaseFaceBox = new IntBox(tools); new Label(tools, "Tess rate (faces/s)"); ppmTessRateBox = new FloatBox(tools); - + Button *fileButton = new Button(tools, "Load OBJ"); - fileButton->setCallback([this,fileButton] { + fileButton->setCallback([this,fileButton] { fName = nanogui::file_dialog({{"obj","Wavefront OBJ"}}, true); rebuild(); fileButton->setTooltip(fName); @@ -190,7 +176,7 @@ PpmGui::PpmGui(int w, int h) : canvas->setBackgroundColor({ 0, 0, 0, 255 }); canvas->setDrawBorder(false); canvas->setSize({ w - tools->width(), h }); - + printf("PpmGui: performing layout\n"); performLayout(); @@ -212,22 +198,6 @@ void PpmGui::mainLoop() { } } -void PpmGui::click(int x, int y) { - float ratio = float(canvas->width())/canvas->height(); - float fovx = atan(tan(fovy) * ratio); - glm::vec3 dz = glm::normalize(-camPos); - glm::vec3 dx = glm::cross(dz, up); - glm::vec3 dy = glm::cross(dx, dz); - - float vx = float(x) / canvas->width(); - float vy = float(y) / canvas->height(); - - clickDir = glm::normalize(dz + tanf(fovx/2)*(2.0f*vx-1)*dx + tanf(fovy/2)*(1.0f - 2.0f*vy)*dy); - clickIdx = ppm->intersect(camPos, clickDir); - - printf("click %f %f %f %d\n", clickDir.x, clickDir.y, clickDir.z, clickIdx); -} - void PpmGui::rebuild() { if (fName.empty()) return; @@ -241,15 +211,15 @@ void PpmGui::updateCamera() { camPos.x = zoom * cos(yAngle) * sin(xAngle); camPos.y = zoom * sin(yAngle); camPos.z = zoom * cos(yAngle) * cos(xAngle); - + right = -glm::vec3(sin(xAngle - M_PI/2), 0, cos(xAngle - M_PI/2)); up = glm::normalize(glm::cross(right, -camPos)); - + projection = glm::perspective(fovy, float(canvas->width()) / float(canvas->height()), zNear, zFar); view = glm::lookAt(camPos, glm::vec3(0,0,0), up); mvp = projection*view*ppm->model; - + shader->setUniform("model", mvp); shader->setUniform("invTrModel", glm::inverse(glm::transpose(mvp))); shader->setUniform("CamDir", glm::normalize(-camPos)); @@ -258,14 +228,14 @@ void PpmGui::updateCamera() { bool PpmGui::resizeEvent(const nanogui::Vector2i &size) { if (Screen::resizeEvent(size)) return true; - + performLayout(); // to recalculate toolbar width canvas->setSize({ size[0] - tools->width(), size[1] }); xSize = size[0]; ySize = size[1]; performLayout(); updateCamera(); - + return true; } @@ -274,14 +244,13 @@ void PpmGui::draw(NVGcontext *ctx) { float currentTime = glfwGetTime(); float dt = currentTime - prevTime; prevTime = currentTime; - ppmTime += ppm->update(clickIdx, fClick*clickDir, dt); - clickIdx = -1; - + ppmTime += ppm->update(dt); + mvp = projection*view*ppm->model; shader->setUniform("model", mvp); shader->setUniform("invTrModel", glm::inverse(glm::transpose(mvp))); Screen::draw(ctx); - + // perform timing nbFrames++; if (currentTime - fpsTime >= 1.0) { @@ -324,7 +293,7 @@ bool PpmGui::keyboardEvent(int key, int scancode, int action, int modifiers) { updateCamera(); return true; } - + if (key == GLFW_KEY_LEFT_BRACKET && action == GLFW_PRESS) { zoom *= 1.1; updateCamera(); diff --git a/src/gui.hpp b/src/gui.hpp index 17c4644..b4e38c0 100644 --- a/src/gui.hpp +++ b/src/gui.hpp @@ -12,13 +12,13 @@ class PpmGui : public nanogui::Screen { PpmGui(int w, int h); virtual ~PpmGui(); void mainLoop(); - void click(int x, int y); void rebuild(); void updateCamera(); virtual bool resizeEvent(const nanogui::Vector2i &size); virtual void draw(NVGcontext *ctx); virtual bool keyboardEvent(int key, int scancode, int action, int modifiers); - + glm::vec3 clickDir; + private: PPM *ppm; std::string fName; @@ -28,7 +28,7 @@ class PpmGui : public nanogui::Screen { nanogui::GLCanvas *canvas; nanogui::Widget *tools; - + int xSize, ySize; float lastX, lastY, xpos, ypos; bool leftMousePressed, rightMousePressed; @@ -41,10 +41,6 @@ class PpmGui : public nanogui::Screen { int nbFrames; nanogui::FloatBox *ppmTimeBox; - int clickIdx; - float fClick; - glm::vec3 clickDir; - Shader *shader; }; diff --git a/src/main.cpp b/src/main.cpp index 2368694..af207ee 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,152 +1,20 @@ #include +#include +#include #include "ppm.hpp" #include "gui.hpp" int main(int argc, char *argv[]) { - printf("argc = %d\n", argc); - if (argc > 2) { - - PPM *ppm = new PPM(false); - const char *fName = argv[1]; - const char *opt = argv[2]; - - int nBasis, nSamp, nSub; - if (!strcmp(opt, "--samp")) { - if (argc != 7) { - printf("usage: %s --samp start end deg sub\n", argv[0]); - delete ppm; - return 0; - } - - int r2 = atoi(argv[3]), r3 = atoi(argv[4]); - nBasis = atoi(argv[5]); - nSub = atoi(argv[6]); - for (int i = r2; i <= r3; i++) { - nSamp = nSub * (1 << i); - ppm->rebuild(fName, nBasis, nSamp, nSub); - printf("%d -> ", nSamp); - try { - ppm->useTessSM = false; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f ", dt); - } catch (const std::exception &e) { - printf("%s ", e.what()); - } - try { - ppm->useTessSM = true; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f\n", dt); - } catch (const std::exception &e) { - printf("%s\n", e.what()); - } - } - } - - else if (!strcmp(opt, "--deg")) { - if (argc != 6) { - printf("usage: %s --deg start end sub\n", argv[0]); - delete ppm; - return 0; - } - - int r2 = atoi(argv[3]), r3 = atoi(argv[4]); - nSub = atoi(argv[5]); - for (int i = r2; i <= r3; i++) { - nSamp = nBasis = 2*(2 + i); - ppm->rebuild(fName, nBasis, nSamp, nSub); - - printf("%d -> ", nBasis); - try { - ppm->useTessSM = false; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f ", dt); - } catch (const std::exception &e) { - printf("%s ", e.what()); - } - try { - ppm->useTessSM = true; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f\n", dt); - } catch (const std::exception &e) { - printf("%s\n", e.what()); - } - - } - } - - else if (!strcmp(opt, "--sub")) { - if (argc != 7) { - printf("usage: %s --sub start end deg samp\n", argv[0]); - delete ppm; - return 0; - } - - int r2 = atoi(argv[3]), r3 = atoi(argv[4]); - nBasis = atoi(argv[5]); - nSamp = atoi(argv[6]); - for (int i = r2; i <= r3; i++) { - nSub = (1 << i); - ppm->rebuild(fName, nBasis, nSamp, nSub); - - printf("%d -> ", nSub); - try { - ppm->useTessSM = false; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f ", dt); - } catch (const std::exception &e) { - printf("%s ", e.what()); - } - try { - ppm->useTessSM = true; - float dt = ppm->update(-1, glm::vec3(0.0f),.01f); - printf("%f\n", dt); - } catch (const std::exception &e) { - printf("%s\n", e.what()); - } - } - } - - else if (!strcmp(opt, "--ix")) { - if (argc != 5) { - printf("usage: %s --ix sub count\n", argv[0]); - delete ppm; - return 0; - } - - nBasis = 4; - nSamp = 4; - nSub = (1 << atoi(argv[3])); - ppm->rebuild(fName, nBasis, nSamp, nSub); - ppm->update(-1, glm::vec3(0.0f),.1f); - - float dt = 0, t; - float2 uv; - srand(time(NULL)); - for (int i = 0; i < atoi(argv[4]); i++) { - float phi = 2.0f*float(rand())*M_PI/RAND_MAX, theta = float(rand())*M_PI/RAND_MAX; - glm::vec3 p0(cos(phi)*cos(theta), sin(phi), cos(phi)*sin(theta)); - float t0 = clock(); - ppm->intersect(p0, -p0); - float t1 = clock(); - dt += (t1-t0)/CLOCKS_PER_SEC; - } - printf("ix mean = %.3f us\n", 1.0e6f*dt/atoi(argv[4])); - } - - else { - printf("usage: %s [--samp, --deg, --sub, --ix]\n", argv[0]); - } - - delete ppm; - return 0; - } - // initialize graphics - nanogui::init(); - PpmGui gui(1200, 800); - gui.mainLoop(); + try { + nanogui::init(); + PpmGui gui(1200, 800); + gui.mainLoop(); + } catch (const std::exception &e) { + printf("%s\n", e.what()); + } return 0; } diff --git a/src/phys.cu b/src/phys.cu index e17accb..198ba37 100644 --- a/src/phys.cu +++ b/src/phys.cu @@ -3,166 +3,82 @@ #include "util/error.hpp" #include "util/uvidx.hpp" -#include -#include -#include -#include - #include #include -__global__ void kCalcInertia(int nFace, const HeData *heFaces, const float *vtxData, glm::mat3 *moiOut, float *massOut, glm::vec3 *cmOut) { +__global__ void kCalcInertia(int nFace, const HeData *heFaces, const float + *vtxData, float *moiOut, float *volOut, float *cmOut) { int fIdx = threadIdx.x + blockIdx.x * blockDim.x; if (fIdx >= nFace) return; - - extern __shared__ glm::mat3 matSM[]; - glm::mat3 &C = matSM[0]; - glm::mat3 &A = matSM[1+threadIdx.x]; - float *pA = glm::value_ptr(A); - float *pC = glm::value_ptr(C); - - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - pA[3*i + j] = vtxData[PPM_NVARS*heFaces[3*fIdx + i].src + j]; - if (threadIdx.x == 0) pC[3*i + j] = (i == j) ? (1.0f/60) : (1.0f/120); - } - } - - float detA = glm::determinant(A); - float *cmPtr = glm::value_ptr(*cmOut); - atomicAdd(&cmPtr[fIdx+0], detA*(pA[0]+pA[3]+pA[6])/18.0f); - atomicAdd(&cmPtr[fIdx+1], detA*(pA[1]+pA[4]+pA[7])/18.0f); - atomicAdd(&cmPtr[fIdx+2], detA*(pA[2]+pA[5]+pA[8])/18.0f); - atomicAdd(massOut, detA/6); - - __syncthreads(); - float *moiPtr = glm::value_ptr(*moiOut); - A = detA * A * C * glm::transpose(A); - for (int i = 0; i < 9; i++) - atomicAdd(&moiPtr[i], pA[i]); -} -__global__ void kCalcTessInertia(int nFace, const int *tessIdx, const float *tessVtx, glm::mat3 *moiOut, float *massOut, glm::vec3 *cmOut) { - int fIdx = threadIdx.x + blockIdx.x * blockDim.x; - if (fIdx >= nFace) - return; - - extern __shared__ glm::mat3 matSM[]; - glm::mat3 &C = matSM[0]; - glm::mat3 &A = matSM[1+threadIdx.x]; - float *pA = glm::value_ptr(A); - float *pC = glm::value_ptr(C); - + extern __shared__ float matSM[]; + float *pA = &matSM[12*threadIdx.x]; + float *r = &matSM[12*threadIdx.x + 9]; + for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - pA[3*i + j] = tessVtx[PPM_NVARS*tessIdx[3*fIdx + i] + j]; - if (threadIdx.x == 0) pC[3*i + j] = (i == j) ? (1.0f/60) : (1.0f/120); - } - } - - float detA = glm::determinant(A); - float *cmPtr = glm::value_ptr(*cmOut); - atomicAdd(&cmPtr[fIdx+0], detA*(pA[0]+pA[3]+pA[6])/18.0f); - atomicAdd(&cmPtr[fIdx+1], detA*(pA[1]+pA[4]+pA[7])/18.0f); - atomicAdd(&cmPtr[fIdx+2], detA*(pA[2]+pA[5]+pA[8])/18.0f); - atomicAdd(massOut, detA/6); - - __syncthreads(); - float *moiPtr = glm::value_ptr(*moiOut); - A = detA * A * C * glm::transpose(A); - for (int i = 0; i < 9; i++) - atomicAdd(&moiPtr[i], pA[i]); + for (int j = 0; j < 3; j++) { + pA[3*i + j] = vtxData[PPM_NVARS*heFaces[3*fIdx + i].src + j]; + }} + r[0] = pA[0] + pA[3] + pA[6]; + r[1] = pA[1] + pA[4] + pA[7]; + r[2] = pA[2] + pA[5] + pA[8]; + + float detA = pA[0]*(pA[4]*pA[8]-pA[5]*pA[7]) + - pA[3]*(pA[1]*pA[8]-pA[7]*pA[2]) + + pA[6]*(pA[1]*pA[5]-pA[2]*pA[4]); + atomicAdd(&cmOut[0], detA*r[0]/18.0f); + atomicAdd(&cmOut[1], detA*r[1]/18.0f); + atomicAdd(&cmOut[2], detA*r[2]/18.0f); + atomicAdd(volOut, detA/6.0f); + + detA *= 60.0f*nFace; + atomicAdd(&moiOut[0], (r[0]*r[0] + pA[0]*pA[0] + pA[3]*pA[3] + pA[6]*pA[6])/detA); + atomicAdd(&moiOut[1], (r[0]*r[1] + pA[0]*pA[1] + pA[3]*pA[4] + pA[6]*pA[7])/detA); + atomicAdd(&moiOut[2], (r[0]*r[2] + pA[2]*pA[0] + pA[5]*pA[3] + pA[8]*pA[6])/detA); + atomicAdd(&moiOut[3], (r[1]*r[0] + pA[0]*pA[1] + pA[3]*pA[4] + pA[6]*pA[7])/detA); + atomicAdd(&moiOut[4], (r[1]*r[1] + pA[1]*pA[1] + pA[4]*pA[4] + pA[7]*pA[7])/detA); + atomicAdd(&moiOut[5], (r[1]*r[2] + pA[1]*pA[2] + pA[4]*pA[5] + pA[7]*pA[8])/detA); + atomicAdd(&moiOut[6], (r[2]*r[0] + pA[2]*pA[0] + pA[5]*pA[3] + pA[8]*pA[6])/detA); + atomicAdd(&moiOut[7], (r[2]*r[1] + pA[1]*pA[2] + pA[4]*pA[5] + pA[7]*pA[8])/detA); + atomicAdd(&moiOut[8], (r[2]*r[2] + pA[2]*pA[2] + pA[5]*pA[5] + pA[8]*pA[8])/detA); } -__global__ void kCenter(int nVtx, glm::vec3 cm, float *vData) { - int vIdx = threadIdx.x + blockDim.x * blockIdx.x; - if (vIdx >= nVtx) - return; - - vData[PPM_NVARS*vIdx + 0] -= cm[0]; - vData[PPM_NVARS*vIdx + 1] -= cm[1]; - vData[PPM_NVARS*vIdx + 2] -= cm[2]; -} +void PPM::physCalc() { + cudaMemset(dev_moi, 0, 9*sizeof(float)); + cudaMemset(dev_cm, 0, 3*sizeof(float)); + cudaMemset(dev_vol, 0, sizeof(float)); -void PPM::physInit() { - fprintf(stderr, "phys alloc\n"); - glm::mat3 *dev_moi; - cudaMalloc(&dev_moi, sizeof(glm::mat3)); - cudaMemset(dev_moi, 0, sizeof(glm::mat3)); - float *dev_mass; - cudaMalloc(&dev_mass, sizeof(float)); - cudaMemset(dev_mass, 0, sizeof(float)); - glm::vec3 *dev_cm; - cudaMalloc(&dev_cm, sizeof(glm::vec3)); - cudaMemset(dev_cm, 0, sizeof(glm::vec3)); - fprintf(stderr, "phys compute\n"); dim3 blkDim(256), blkCnt((nFace + 255)/256); - int nSM = (1+blkDim.x) * sizeof(glm::mat3); - kCalcInertia<<>>(nFace, dev_heFaces, dev_vList, dev_moi, dev_mass, dev_cm); + int nSM = (blkDim.x) * 12 * sizeof(float); + kCalcInertia<<>>(nFace, dev_heFaces, dev_vList, dev_moi, dev_vol, dev_cm); checkCUDAError("kCalcInertia", __LINE__); fprintf(stderr, "phys copy\n"); - cudaMemcpy(&moi, dev_moi, sizeof(glm::mat3), cudaMemcpyDeviceToHost); - cudaMemcpy(&cm, dev_cm, sizeof(glm::vec3), cudaMemcpyDeviceToHost); - cudaMemcpy(&mass, dev_mass, sizeof(float), cudaMemcpyDeviceToHost); - - fprintf(stderr, "phys recenter\n"); - kCenter<<>>(nVtx, cm, dev_vList); - checkCUDAError("kCenter", __LINE__); - cudaMemcpy(&vList[0], dev_vList, PPM_NVARS*nVtx*sizeof(float), cudaMemcpyDeviceToHost); - - fprintf(stderr, "phys reduce\n"); - float tr = moi[0][0] + moi[1][1] + moi[2][2]; + cudaMemcpy(moi, dev_moi, 9*sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(cm, dev_cm, 3*sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(&vol, dev_vol, sizeof(float), cudaMemcpyDeviceToHost); + + fprintf(stderr, "phys moi finalize\n"); + float tr = moi[0] + moi[4] + moi[8]; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - moi[i][j] = ((i == j) ? tr : 0.0f) - moi[i][j]; + moi[3*i+j] = ((i == j) ? tr : 0.0f) - moi[3*i+j]; }} - mass *= 1000.0f; - - fprintf(stderr, "phys result: %f (%f %f %f)\n", mass, cm.x, cm.y, cm.z); - fprintf(stderr, "%f %f %f\n", moi[0][0], moi[1][0], moi[2][0]); - fprintf(stderr, "%f %f %f\n", moi[0][1], moi[1][1], moi[2][1]); - fprintf(stderr, "%f %f %f\n\n", moi[0][2], moi[1][2], moi[2][2]); - - cudaFree(dev_moi); - cudaFree(dev_mass); - cudaFree(dev_cm); + + fprintf(stderr, "phys result: %f (%f %f %f)\n", vol, cm[0], cm[1], cm[2]); + fprintf(stderr, "%f %f %f\n", moi[0], moi[3], moi[6]); + fprintf(stderr, "%f %f %f\n", moi[1], moi[4], moi[7]); + fprintf(stderr, "%f %f %f\n\n", moi[2], moi[5], moi[8]); } -void PPM::physTess() { +void PPM::physInit() { fprintf(stderr, "phys alloc\n"); - glm::mat3 *dev_moi; - cudaMalloc(&dev_moi, sizeof(glm::mat3)); - cudaMemset(dev_moi, 0, sizeof(glm::mat3)); - float *dev_mass; - cudaMalloc(&dev_mass, sizeof(float)); - cudaMemset(dev_mass, 0, sizeof(float)); - glm::vec3 *dev_cm; - cudaMalloc(&dev_cm, sizeof(glm::vec3)); - cudaMemset(dev_cm, 0, sizeof(glm::vec3)); - - fprintf(stderr, "phys compute\n"); - dim3 blkDim(256), blkCnt((nFace + 255)/256); - int nSM = (1+blkDim.x) * sizeof(glm::mat3); - kCalcTessInertia<<>>(nFace, dev_tessIdx, dev_tessVtx, dev_moi, dev_mass, dev_cm); - - cudaMemcpy(&moi, dev_moi, sizeof(glm::mat3), cudaMemcpyDeviceToHost); - cudaMemcpy(&cm, dev_cm, sizeof(glm::vec3), cudaMemcpyDeviceToHost); - cudaMemcpy(&mass, dev_mass, sizeof(float), cudaMemcpyDeviceToHost); - - fprintf(stderr, "phys reduce\n"); - moi -= mass*glm::outerProduct(cm,cm); - moi = glm::mat3(moi[0][0] + moi[1][1] + moi[2][2]) - moi; - fprintf(stderr, "phys result: %f (%f %f %f)\n", mass, cm.x, cm.y, cm.z); - fprintf(stderr, "%f %f %f\n", moi[0][0], moi[1][0], moi[2][0]); - fprintf(stderr, "%f %f %f\n", moi[0][1], moi[1][1], moi[2][1]); - fprintf(stderr, "%f %f %f\n\n", moi[0][2], moi[1][2], moi[2][2]); - - cudaFree(dev_moi); - cudaFree(dev_mass); - cudaFree(dev_cm); + devAlloc(&dev_moi, 9*sizeof(float)); + devAlloc(&dev_cm, 3*sizeof(float)); + devAlloc(&dev_vol, sizeof(float)); + physCalc(); } __global__ void kMeshIntersect(bool exec, bool biDir, @@ -172,12 +88,12 @@ __global__ void kMeshIntersect(bool exec, bool biDir, int fSubIdx = threadIdx.x + blockIdx.x * blockDim.x; if (fSubIdx >= nSubFace) return; - + vTessIdx = &vTessIdx[3*fSubIdx]; const float *v0 = &vTessData[PPM_NVARS*vTessIdx[0]]; const float *v1 = &vTessData[PPM_NVARS*vTessIdx[1]]; const float *v2 = &vTessData[PPM_NVARS*vTessIdx[2]]; - + float e1[3], e2[3]; for (int i = 0; i < 3; i++) { e1[i] = v1[i] - v0[i]; @@ -188,27 +104,27 @@ __global__ void kMeshIntersect(bool exec, bool biDir, p[0] = dir[1]*e2[2] - dir[2]*e2[1]; p[1] = dir[2]*e2[0] - dir[0]*e2[2]; p[2] = dir[0]*e2[1] - dir[1]*e2[0]; - + float idet = e1[0]*p[0] + e1[1]*p[1] + e1[2]*p[2]; if (idet > -1e-5 && idet < 1e-5) return; idet = 1.0f/idet; - + float T[3]; for (int i = 0; i < 3; i++) T[i] = p0[i] - v0[i]; float u = idet*(p[0]*T[0] + p[1]*T[1] + p[2]*T[2]); if (u < 0 || u > 1) return; - - + + p[0] = T[1]*e1[2] - T[2]*e1[1]; p[1] = T[2]*e1[0] - T[0]*e1[2]; p[2] = T[0]*e1[1] - T[1]*e1[0]; float v = idet*(dir[0]*p[0] + dir[1]*p[1] + dir[2]*p[2]); if (v < 0 || u+v > 1) return; - + float t = idet*(e2[0]*p[0] + e2[1]*p[1] + e2[2]*p[2]); if (biDir || (t > 1e-5)) { if (exec) { @@ -283,35 +199,21 @@ __global__ void kPhysNeighbor(int nVtx, const HeData *heLoops, const int2 *vBndL dv[9*vIdx + 8] = vSM[2] - kDamp*dv[9*vIdx + 5]; } -__global__ void kPhysVerlet2(int nVtx, float *dv, float dt) { +__global__ void kPhysVerlet2(int nVtx, float *dv, float mass, const int2 *vBndList, float dt) { int vIdx = threadIdx.x + blockIdx.x * blockDim.x; if (vIdx >= nVtx) return; + const int2 &bnd = vBndList[vIdx]; + mass *= (bnd.y - bnd.x); + dv = &dv[9*vIdx]; - dv[3] += 0.5f*dv[6]*dt; - dv[4] += 0.5f*dv[7]*dt; - dv[5] += 0.5f*dv[8]*dt; + dv[3] += 0.5f*dv[6]*dt/mass; + dv[4] += 0.5f*dv[7]*dt/mass; + dv[5] += 0.5f*dv[8]*dt/mass; } - -/* - - if (uv.x > 0 && uv.y > 0) { - fSubIdx = fIdx*nSubFace + UV_IDX(uv.x - 1, uv.y - 1) + nSubVtx - nSub - 1; - idxOut[3*fSubIdx + 0] = tessGetIdx(uv.x, uv.y, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - idxOut[3*fSubIdx + 1] = tessGetIdx(uv.x-1, uv.y, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - idxOut[3*fSubIdx + 2] = tessGetIdx(uv.x, uv.y-1, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - } - - if (uv.x+uv.y < nSub) { - fSubIdx = fIdx*nSubFace + UV_IDX(uv.x, uv.y); - idxOut[3*fSubIdx + 0] = tessGetIdx(uv.x, uv.y, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - idxOut[3*fSubIdx + 1] = tessGetIdx(uv.x+1, uv.y, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - idxOut[3*fSubIdx + 2] = tessGetIdx(uv.x, uv.y + 1, heLoops, heFaces, heTessOrder, fIdx, nVtx, nHe, nFace, nSub); - } -*/ -__global__ void kPhysClick(int nSub, int nSubVtx, int fSubIdx, +__global__ void kPhysClick(int nSub, int nSubVtx, int fSubIdx, const HeData *heFaces, const float *vData, const float *force, float *dv) { int fIdx = fSubIdx / (nSub*nSub); @@ -331,19 +233,41 @@ __global__ void kPhysClick(int nSub, int nSubVtx, int fSubIdx, const float *v0 = &vData[PPM_NVARS*he0.src], *v1 = &vData[PPM_NVARS*he1.src], *v2 = &vData[PPM_NVARS*he2.src]; float *dv0 = &dv[9*he0.src], *dv1 = &dv[9*he1.src], *dv2 = &dv[9*he2.src]; - dv0[6] += w * force[0]; - dv0[7] += w * force[1]; - dv0[8] += w * force[2]; - dv1[6] += uv.x * force[0]; - dv1[7] += uv.x * force[1]; - dv1[8] += uv.x * force[2]; - dv2[6] += uv.y * force[0]; - dv2[7] += uv.y * force[1]; - dv2[8] += uv.y * force[2]; + dv0[6] += w * force[0] ; + dv0[7] += w * force[1] ; + dv0[8] += w * force[2] ; + dv1[6] += uv.x * force[0] ; + dv1[7] += uv.x * force[1] ; + dv1[8] += uv.x * force[2] ; + dv2[6] += uv.y * force[0] ; + dv2[7] += uv.y * force[1] ; + dv2[8] += uv.y * force[2] ; +} + +__global__ void kPhysNonInertial(int nVtx, glm::vec3 angVel, const float *cm, + const float *vTessData, float *dv) { + int vIdx = threadIdx.x + blockIdx.x * blockDim.x; + if (vIdx >= nVtx) + return; + + vTessData = &vTessData[PPM_NVARS*vIdx]; + dv = &dv[9*vIdx]; + + glm::vec3 r(vTessData[0] - cm[0], vTessData[1] - cm[1], vTessData[2] - cm[2]); + float dot = r[0]*angVel[0] + r[1]*angVel[1] + r[2]*angVel[2]; + float ang = angVel[0]*angVel[0] + angVel[1]*angVel[1] + angVel[2]*angVel[2]; + + glm::vec3 cf; + cf[0] = dot*angVel[0] - ang*r[0]; + cf[1] = dot*angVel[1] - ang*r[1]; + cf[2] = dot*angVel[2] - ang*r[2]; + dv[6] += cf[0]; + dv[7] += cf[1]; + dv[8] += cf[2]; } -__global__ void kPhysClick_RigidBody(int nSub, int nSubVtx, int fSubIdx, - const HeData *heFaces, const float *vData, +__global__ void kPhysClick_RigidBody(int nSub, int nSubVtx, int fSubIdx, + const HeData *heFaces, const float *cm, const float *vData, const float *force, float *rbTorque) { int fIdx = fSubIdx / (nSub*nSub); int uvOff = fSubIdx - fIdx*nSub*nSub; @@ -362,8 +286,8 @@ __global__ void kPhysClick_RigidBody(int nSub, int nSubVtx, int fSubIdx, const float *v0 = &vData[PPM_NVARS*he0.src], *v1 = &vData[PPM_NVARS*he1.src], *v2 = &vData[PPM_NVARS*he2.src]; glm::vec3 arm; - for (int i = 0; i < 3; i++) - arm[i] = w*v0[i] + uv.x*v1[i] + uv.y*v2[i]; + for (int i = 0; i < 3; i++) + arm[i] = w*(v0[i]-cm[i]) + uv.x*(v1[i]-cm[i]) + uv.y*(v2[i]-cm[i]); rbTorque[0] = arm[1]*force[2] - arm[2]*force[1]; rbTorque[1] = arm[2]*force[0] - arm[0]*force[2]; rbTorque[2] = arm[0]*force[1] - arm[1]*force[0]; @@ -385,14 +309,14 @@ int PPM::intersect(const glm::vec3 &p0, const glm::vec3 &dir) { } glm::vec3 lp0(lp0_4.x/lp0_4.w, lp0_4.y/lp0_4.w, lp0_4.z/lp0_4.w); glm::vec3 ldir(ldir_4.x, ldir_4.y, ldir_4.z); - + unsigned int *dev_count; cudaMalloc(&dev_count, sizeof(unsigned int)); cudaMemset(dev_count, 0, sizeof(unsigned int)); dim3 blkCnt((nFace*nSubFace + 255) / 256), blkDim(256); - kMeshIntersect<<>>(false, false, nFace*nSubFace, dev_tessIdx, dev_tessVtx, lp0, ldir, dev_count, nullptr, nullptr); + kMeshIntersect<<>>(false, false, nFace*nSubFace, dev_tessIdx, dev_tessVtx, p0, dir, dev_count, nullptr, nullptr); checkCUDAError("kMeshIntersect", __LINE__); - + unsigned int count; cudaMemcpy(&count, dev_count, sizeof(unsigned int), cudaMemcpyDeviceToHost); if (!count) { @@ -400,19 +324,19 @@ int PPM::intersect(const glm::vec3 &p0, const glm::vec3 &dir) { cudaFree(dev_count); return -1; } - + float *dev_tOut; int *dev_idxOut; cudaMalloc(&dev_tOut, count*sizeof(float)); cudaMalloc(&dev_idxOut, count*sizeof(int)); kMeshIntersect<<>>(true, false, nFace*nSubFace, dev_tessIdx, dev_tessVtx, lp0, ldir, dev_count, dev_idxOut, dev_tOut); checkCUDAError("kMeshIntersect", __LINE__); - + std::vector tOut(count); std::vector idxOut(count); cudaMemcpy(&tOut[0], dev_tOut, count*sizeof(float), cudaMemcpyDeviceToHost); cudaMemcpy(&idxOut[0], dev_idxOut, count*sizeof(int), cudaMemcpyDeviceToHost); - + int minPos = std::min_element(tOut.begin(), tOut.end()) - tOut.begin(); int idx = idxOut[minPos]; @@ -422,95 +346,39 @@ int PPM::intersect(const glm::vec3 &p0, const glm::vec3 &dir) { return idx; } -void PPM::updateRigidBody(int clickIdx, const glm::vec3 &clickForce, float dt) { - - static bool done = false; - - // rotation VV1, half-torque appliucation - rbAngMom[0] += 0.5f * dt * rbTorque[0]; - rbAngMom[1] += 0.5f * dt * rbTorque[1]; - rbAngMom[2] += 0.5f * dt * rbTorque[2]; - - // rotation VV1, velocity application - glm::mat3 rot = glm::toMat3(rbRot); - glm::vec3 w = rot*glm::inverse(moi)*glm::transpose(rot)*rbAngMom / 1.0e3f; - float th = glm::length(w); - if (th > 0.001f) { - glm::quat dq = glm::angleAxis(th*dt, w/th); - rbRot = dq * rbRot; - } - model = glm::toMat4(rbRot); - model[3][0] = rbPos[0]; - model[3][1] = rbPos[1]; - model[3][2] = rbPos[2]; - model[3][3] = 1.0f; - - if (clickIdx > -1) { - // rotation VV1, get acceleration - float *dev_rbTorque, *dev_rbForce; - cudaMalloc(&dev_rbTorque, 3*sizeof(float)); - cudaMalloc(&dev_rbForce, 3*sizeof(float)); - - glm::mat4 im = glm::inverse(model) ; - glm::vec4 lF_4(0.0f); - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 3; j++) { - lF_4[i] += im[j][i]*clickForce[j]; - } - } - glm::vec3 lF(1000.0f*lF_4.x, 1000.0f*lF_4.y, 1000.0f*lF_4.z); - cudaMemcpy(dev_rbForce, glm::value_ptr(lF), 3*sizeof(float), cudaMemcpyHostToDevice); - - kPhysClick_RigidBody<<<1,1>>>(nSub, nSubVtx, clickIdx, dev_heFaces, dev_vList, dev_rbForce, dev_rbTorque); - cudaMemcpy(glm::value_ptr(rbTorque), dev_rbTorque, 3*sizeof(float), cudaMemcpyDeviceToHost); - cudaFree(dev_rbTorque); - cudaFree(dev_rbForce); - } else { - rbTorque[0] = rbTorque[1] = rbTorque[2] = 0.0f; - } - - if (!done) { - done = true; - rbTorque[0] = 25.0f; - rbTorque[1] = 0.0f; - rbTorque[2] = 0.1f; - }; - // rotation VV1, half-torque - rbAngMom[0] += 0.5f * dt * rbTorque[0]; - rbAngMom[1] += 0.5f * dt * rbTorque[1]; - rbAngMom[2] += 0.5f * dt * rbTorque[2]; -} +void PPM::updateSb(float dt) { + dim3 blkDim, blkCnt; -void PPM::updateCoeff(int clickIdx, const glm::vec3 &clickForce, float dt) { - dim3 blkDim(128), blkCnt; + // softbody VV - velocity half-update + blkDim.x = 128; blkCnt.x = (nVtx + blkDim.x - 1) / blkDim.x; kPhysVerlet1<<>>(nVtx, dev_dv, mass/nHe, dev_vBndList, dt); - checkCUDAError("kPhysVerlet1", __LINE__); - + checkCUDAError("kPhysVerlet1", __LINE__); + + // softbody VV - position update blkDim.x = 16; blkDim.y = 64; blkCnt.x = (nBasis2 + blkDim.x - 1) / blkDim.x; blkCnt.y = (nVtx + blkDim.y - 1) / blkDim.y; kUpdateCoeff<<>>(nBasis2, nVtx, bezier->dev_V, 1.0, dev_dv, dev_coeff, dt); checkCUDAError("kUpdateCoeff", __LINE__); + physCalc(); + // softbody VV - force update blkDim.x = 256; blkDim.y = 1; blkCnt.x = (nVtx + blkDim.x - 1) / blkDim.x; blkCnt.y = 1; int nSM = 3*blkDim.x*sizeof(float); kPhysNeighbor<<>>(nVtx, dev_heLoops, dev_vBndList, kSelf, kDamp, kNbr, dev_dv); - if (clickIdx > -1) { - float *dev_clickForce; - cudaMalloc(&dev_clickForce, 3*sizeof(float)); - cudaMemcpy(dev_clickForce, glm::value_ptr(clickForce), 3*sizeof(float), cudaMemcpyHostToDevice); - kPhysClick<<<1,1>>>(nSub, nSubVtx, clickIdx, dev_heFaces, dev_vList, dev_clickForce, dev_dv); - cudaFree(dev_clickForce); - } checkCUDAError("kPhysNeighbor", __LINE__); - + kPhysNonInertial<<>>(nVtx, rbAngVel, dev_cm, dev_vList, dev_dv); + checkCUDAError("kPhysNonInertial", __LINE__); + + // softbody VV - velocity half-update + blkDim.x = 128; blkCnt.x = (nVtx + blkDim.x - 1) / blkDim.x; - kPhysVerlet2<<>>(nVtx, dev_dv, dt); - checkCUDAError("kPhysVerlet2", __LINE__); + kPhysVerlet2<<>>(nVtx, dev_dv, mass/nHe, dev_vBndList, dt); + checkCUDAError("kPhysVerlet2", __LINE__); } diff --git a/src/ppm.cpp b/src/ppm.cpp index ab8d898..a987c8a 100644 --- a/src/ppm.cpp +++ b/src/ppm.cpp @@ -14,6 +14,12 @@ #include #include +#include +#include +#include +#include +#include + PPM::PPM(bool glVis) : useTessSM(false), @@ -22,7 +28,7 @@ PPM::PPM(bool glVis) : useVisualize(glVis), visFill(true), visSkel(true), - visDbgNormals(true), + visDbgNormals(false), inFile(""), isBuilt(false), kSelf(10.0f), @@ -30,7 +36,8 @@ PPM::PPM(bool glVis) : kNbr(10.0f), rbRot(1.0f, 0.0f, 0.0f, 0.0f), rbAngMom(0.0f), - rbPos(0.0f) + rbPos(0.0f), + mass(1000.0f) {} PPM::~PPM() { @@ -51,8 +58,14 @@ void PPM::rebuild(const char *fName, int nBasis, int nGrid, int nSub) { visFree(); devFree(); isBuilt = false; + + rbRot = glm::quat(1.0f, 0.0f, 0.0f, 0.0f); + rbAngMom = glm::vec3(0.0f, 0.0f, 0.0f); + rbPos = glm::vec3(0.0f); + rbAngVel = glm::vec3(0.0f); + } - + if (!objRead(fName)) { fprintf(stderr, "couldn't open file %s\n", fName); return; @@ -73,14 +86,15 @@ void PPM::rebuild(const char *fName, int nBasis, int nGrid, int nSub) { // calculate mesh physics properties physInit(); + rbAngMom.y = 10.0f*mass; // initialize visualization data if (useVisualize) { visInit(); fprintf(stderr, "vis done\n"); } - - // calculate PPM data + + // calculate PPM data this->nBasis = nBasis; this->nGrid = nGrid; this->nBasis2 = nBasis*nBasis; @@ -115,7 +129,7 @@ void PPM::getHeLoops() { if (!hasPred) break; } - if (first != loop.size()) + if (first != loop.size()) std::swap(loop[first],loop[0]); // finish the chain @@ -280,6 +294,8 @@ void PPM::visInit() { fprintf(stderr, "loading vidx vbo\n"); glBindBuffer(GL_ARRAY_BUFFER, vboVtx); glBufferData(GL_ARRAY_BUFFER, PPM_NVARS * nVtx*sizeof(float), &vList[0], GL_STATIC_DRAW); + cudaGraphicsGLRegisterBuffer(&dev_vboVtx, vboVtx, cudaGraphicsMapFlagsNone); + checkCUDAError("cudaGraphicsGLRegisterBuffer", __LINE__); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, PPM_NVARS * sizeof(float), (const void*)0); glEnableVertexAttribArray(1); @@ -290,6 +306,7 @@ void PPM::visInit() { fprintf(stderr, "loading fidx vbo\n"); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vboIdx); glBufferData(GL_ELEMENT_ARRAY_BUFFER, 3 * nFace*sizeof(int), &fList[0], GL_STATIC_DRAW); + //glEnableVertexAttribArray(3); //glVertexAttribIPointer(3, GL_INT, 3, 0, (const void*)0); @@ -324,7 +341,7 @@ void PPM::visInit() { void PPM::draw(Shader *vShader) { if (!isBuilt) return; - + if (visSkel) { glPointSize(1.0f); vShader->setUniform("uColor", 0.8f, 0.2f, 0.1f); @@ -333,7 +350,7 @@ void PPM::draw(Shader *vShader) { vShader->bindIndexData(vboIdx); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glDrawElements(GL_TRIANGLES, 3 * nFace, GL_UNSIGNED_INT, 0); - + glPointSize(3.0f); vShader->setUniform("uColor", 0.2f, 0.1f, 0.8f); vShader->setUniform("nShade", false); @@ -364,39 +381,71 @@ void PPM::visFree() { if (!useVisualize) return; + cudaGraphicsUnregisterResource(dev_vboVtx); + checkCUDAError("GraphicsUnregisterResource", __LINE__); glDeleteBuffers(1,&vboVtx); // vList.size() vertices (3 floats) glDeleteBuffers(1,&vboIdx); // vList.size() vertices (3 floats) + cudaGraphicsUnregisterResource(dev_vboTessIdx); checkCUDAError("GraphicsUnregisterResource", __LINE__); cudaGraphicsUnregisterResource(dev_vboTessVtx); checkCUDAError("GraphicsUnregisterResource", __LINE__); glDeleteBuffers(1, &vboTessVtx); // vList.size() vertices (3 floats) glDeleteBuffers(1, &vboTessIdx); // vList.size() vertices (3 floats) + glDeleteVertexArrays(1, &vaoBase); // vList.size() vertices (3 floats) glDeleteVertexArrays(1, &vaoTess); // vList.size() vertices (3 floats) } void PPM::cudaProbe() { - int nDevices; - cudaGetDeviceCount(&nDevices); - if (nDevices <= 0) - throw std::runtime_error("No CUDA device detected"); - - for (int i = 0; i < nDevices; i++) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, i); - checkCUDAError("cudaGetDeviceProperties", __LINE__); - - fprintf(stderr, "Device Number: %d\n", i); - fprintf(stderr, " Device name: %s\n", prop.name); - fprintf(stderr, " Compute capability: %d.%d", prop.major, prop.minor); - if (prop.major < 3) { - fprintf(stderr, " (< 3.0, disabling texSamp)"); - canUseTexObjs = false; - } else { - canUseTexObjs = true; - } - fprintf(stderr, "\n"); + int nDevices; + cudaGetDeviceCount(&nDevices); + if (nDevices <= 0) + throw std::runtime_error("No CUDA device detected"); + + for (int i = 0; i < nDevices; i++) { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, i); + checkCUDAError("cudaGetDeviceProperties", __LINE__); + + fprintf(stderr, "Device Number: %d\n", i); + fprintf(stderr, " Device name: %s\n", prop.name); + fprintf(stderr, " Compute capability: %d.%d", prop.major, prop.minor); + if (prop.major < 3) { + fprintf(stderr, " (< 3.0, disabling texSamp)"); + canUseTexObjs = false; + } else { + canUseTexObjs = true; } + fprintf(stderr, "\n"); } +} + +void PPM::updateRbRot(float dt) { + + // rotation VV1, half-torque appliucation + rbAngMom[0] += 0.5f * dt * rbTorque[0]; + rbAngMom[1] += 0.5f * dt * rbTorque[1]; + rbAngMom[2] += 0.5f * dt * rbTorque[2]; + glm::mat3 rot = glm::toMat3(rbRot); + glm::mat3 mmoi; + memcpy(glm::value_ptr(mmoi), moi, 9*sizeof(float)); + rbAngVel = rot*glm::inverse(mmoi)*glm::transpose(rot)*rbAngMom / mass; + + // rotation VV1, velocity application + float th = glm::length(rbAngVel); + if (th > 0.001f) { + glm::quat dq = glm::angleAxis(th*dt, rbAngVel/th); + rbRot = dq * rbRot; + } + model = glm::toMat4(rbRot); + + // TODO: proper torque calculation + rbTorque[0] = rbTorque[1] = rbTorque[2] = 0.0f; + + // rotation VV1, half-torque + rbAngMom[0] += 0.5f * dt * rbTorque[0]; + rbAngMom[1] += 0.5f * dt * rbTorque[1]; + rbAngMom[2] += 0.5f * dt * rbTorque[2]; +} diff --git a/src/ppm.hpp b/src/ppm.hpp index 997b218..575223e 100644 --- a/src/ppm.hpp +++ b/src/ppm.hpp @@ -50,9 +50,9 @@ class PPM { void rebuild(const char *fName, int nBasis, int nGrid, int nSub); - float update(int clickIdx, const glm::vec3 &clickForce, float dt); + float update(float dt); void draw(Shader *vShader); - + int intersect(const glm::vec3 &p0, const glm::vec3 &dir); // visualization options @@ -75,6 +75,7 @@ class PPM { // physics attributes float kSelf, kNbr, kDamp; glm::mat4 model; + private: // PPM properties int nBasis, nGrid, nBasis2, nGrid2; @@ -111,17 +112,24 @@ class PPM { unsigned int vboIdx, vboVtx; unsigned int vboTessIdx, vboTessVtx; cudaGraphicsResource *dev_vboTessIdx, *dev_vboTessVtx; - + cudaGraphicsResource *dev_vboVtx; + // physics data - glm::mat3 moi; - glm::vec3 cm; + float *dev_moi, moi[9]; + float *dev_cm, cm[3]; + float *dev_vol, vol; float mass; + float *dev_moi_work, *dev_vol_work, *dev_cm_work; + // rigid body attributes glm::quat rbRot; glm::vec3 rbPos; glm::vec3 rbAngMom; - glm::vec3 rbForce, rbTorque; + float *dev_rbForce, rbForce[3]; + float *dev_rbTorque, rbTorque[3]; + glm::vec3 rbAngVel; + float *dev_rbAngVel; bool objRead(const char *fName); bool objReadVtx(std::istream &fStream); @@ -135,13 +143,13 @@ class PPM { void devCoeffInit(); void devTessInit(); void physInit(); - void physTess(); + void physCalc(); void cudaProbe(); void genSampTex(); void genCoeff(); - void updateCoeff(int clickIdx, const glm::vec3 &clickForce, float dt); - void updateRigidBody(int clickIdx, const glm::vec3 &clickForce, float dt); + void updateSb(float dt); + void updateRbRot(float dt); std::vector allocList; template void devAlloc(T **ptr, size_t size) { diff --git a/src/ppm_cuda.cu b/src/ppm_cuda.cu index 34bb2ec..ee2d89d 100644 --- a/src/ppm_cuda.cu +++ b/src/ppm_cuda.cu @@ -184,7 +184,7 @@ __global__ void kTessVtx_Face(int nVtx, int nFace, int nSub, int nBasis2, int nSubVtx = (nSub+1)*(nSub+2)/2; if (fIdx >= nFace || uvIdx >= (nSub-1)*(nSub-2)/2 || dataIdx >= PPM_NVARS) return; - + int2 uv; fIuvInternalIdxMap(uvIdx,uv); const HeData &he0 = heFaces[3 * fIdx + 0]; @@ -221,9 +221,9 @@ __global__ void kTessVtx_Edge(int nVtx, int nHe, int nSub, int nBasis2, __global__ void kTessVtx_Vtx(int nVtx, int nSub, int nBasis2, - const HeData *heLoops, const int2 *vBndList, + const HeData *heLoops, const int2 *vBndList, const float *bezData, const float *wgtData, const float *coeff, - float *vDataOut) { + float *vDataOut, float *vList) { int vIdx = blockIdx.x * blockDim.x + threadIdx.x; int dataIdx = blockIdx.y * blockDim.y + threadIdx.y; if (vIdx >= nVtx || dataIdx >= PPM_NVARS) @@ -231,13 +231,14 @@ __global__ void kTessVtx_Vtx(int nVtx, int nSub, int nBasis2, const HeData &he = heLoops[vBndList[vIdx].x]; int dIdx = he.bezOff*(nSub+1)*(nSub+2)/2; - + const float *bez = &bezData[dIdx*nBasis2]; float wgt = wgtData[dIdx], res; for (int i = 0; i < nBasis2; i++) res += wgt * bez[i] * coeff[i + (he.src + dataIdx * nVtx) * nBasis2]; vDataOut[PPM_NVARS*vIdx + dataIdx] = res / wgt; + vList[PPM_NVARS*vIdx + dataIdx] = res / wgt; } __global__ void kGetNormals(int nHe, const HeData *heLoops, float *vData) { @@ -267,14 +268,14 @@ __device__ inline int tessGetIdx(int u, int v, const HeData *heFaces, const int int heIdx2 = heTessOrder[3*fIdx+1]; int heIdx3 = heTessOrder[3*fIdx+2]; int w = nSub-u-v; - + if (u == 0 && v == 0) return nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2 + he0.src; if (w == 0 && v == 0) return nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2 + he1.src; if (u == 0 && w == 0) return nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2 + he2.src; - + if (v == 0) { if (heIdx1 < nHe/2) return nFace*(nSub-1)*(nSub-2)/2 + heIdx1*(nSub-1) + u-1; @@ -293,9 +294,9 @@ __device__ inline int tessGetIdx(int u, int v, const HeData *heFaces, const int else return nFace*(nSub-1)*(nSub-2)/2 + (nHe-heIdx3-1)*(nSub-1) + v-1; } - + return fIdx*(nSub-1)*(nSub-2)/2 + UV_IDX(u-1,v-1); - + } __global__ void kTessEdges(int nVtx, int nHe, int nFace, int nSub, @@ -436,7 +437,7 @@ void PPM::devMeshInit() { // populate the loops fprintf(stderr, "sorting loops\n"); getHeLoops(); - + fprintf(stderr, "uploading mesh data\n"); devAlloc(&dev_heLoops, nHe*sizeof(HeData)); cudaMemcpy(dev_heLoops, &heLoops[0], nHe*sizeof(HeData), cudaMemcpyHostToDevice); @@ -446,7 +447,7 @@ void PPM::devMeshInit() { cudaMemcpy(dev_vBndList, &vBndList[0], nVtx*sizeof(int2), cudaMemcpyHostToDevice); devAlloc(&dev_vList, PPM_NVARS*nVtx*sizeof(float)); cudaMemcpy(dev_vList, &vList[0], PPM_NVARS*nVtx*sizeof(float), cudaMemcpyHostToDevice); - + // recalculate normals dim3 blkDim(256); dim3 blkCnt((nHe + blkDim.x - 1)/blkDim.x); @@ -474,7 +475,7 @@ void PPM::devPatchInit() { kBezEval<<>>(d, nBasis, nSub, nSubVtx, &dev_bezPatch[dOff*nSubVtx*nBasis2], &dev_wgtPatch[dOff*nSubVtx]); checkCUDAError("kBezEval", __LINE__); } - + fprintf(stderr, "creating sample data\n"); if (canUseTexObjs) { genSampTex(); @@ -491,7 +492,7 @@ void PPM::devTessInit() { } else { devAlloc(&dev_tessIdx, 3*nFace*nSubFace*sizeof(int)); } - + dim3 blkSize(1024), blkCnt((nHe +1023) / 1024); devAlloc(&dev_heTessIdx, nHe*sizeof(int)); kGetHeTessIdx<<>>(nHe, dev_heFaces, dev_heTessIdx); @@ -503,21 +504,21 @@ void PPM::devTessInit() { thrust::sort_by_key(heTessIdx_ptr, heTessIdx_ptr+nHe, order_vec.begin()); thrust::copy(order_itr, order_itr+nHe, heTessIdx_ptr); thrust::sort_by_key(order_vec.begin(), order_vec.end(), heTessIdx_ptr); - + blkSize.x = 32; blkSize.y = 16; blkCnt.x = (nFace + blkSize.x - 1) / blkSize.x; blkCnt.y = (nSubVtx + blkSize.y - 1) / blkSize.y; kTessEdges<<>>(nVtx, nHe, nFace, nSub, dev_heFaces, dev_heTessIdx, dev_tessIdx); checkCUDAError("kTessEdges", __LINE__); - + thrust::copy(order_itr, order_itr+nHe, order_vec.begin()); thrust::sort_by_key(heTessIdx_ptr, heTessIdx_ptr+nHe, order_vec.begin()); thrust::copy(order_vec.begin(), order_vec.end(), heTessIdx_ptr); if (useVisualize) cudaGraphicsUnmapResources(1, &dev_vboTessIdx, 0); - + if (!useVisualize) devAlloc(&dev_tessVtx, PPM_NVARS*(nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2 + nVtx)*sizeof(float)); devAlloc(&dev_tessWgt, nFace*nSubVtx*sizeof(float)); @@ -528,7 +529,7 @@ void PPM::devTessInit() { void PPM::devInit() { } -float PPM::update(int clickIdx, const glm::vec3 &clickForce, float dt) { +float PPM::update(float dt) { if (!isBuilt) return 0.0f; @@ -540,21 +541,22 @@ float PPM::update(int clickIdx, const glm::vec3 &clickForce, float dt) { dim3 blkDim; dim3 blkCnt; - // generate/update bezier coefficients - updateCoeff(clickIdx, clickForce, dt); - updateRigidBody(clickIdx, clickForce, dt); - // calculate new vertex positions if (useVisualize) { size_t nBytes; cudaGraphicsMapResources(1, &dev_vboTessVtx, 0); cudaGraphicsResourceGetMappedPointer((void**)&dev_tessVtx, &nBytes, dev_vboTessVtx); + cudaGraphicsMapResources(1, &dev_vboVtx, 0); + cudaGraphicsResourceGetMappedPointer((void**)&dev_vList, &nBytes, dev_vboVtx); } + updateSb(dt); + updateRbRot(dt); + if (nSub > 2) { - blkDim.x = 16; - blkDim.y = 4; - blkDim.z = 2; + blkDim.x = 8; + blkDim.y = 16; + blkDim.z = 1; blkCnt.x = (nFace + blkDim.x - 1) / blkDim.x; blkCnt.y = ((nSub-1)*(nSub-2)/2 + blkDim.y - 1) / blkDim.y; blkCnt.z = (PPM_NVARS + blkDim.z - 1) / blkDim.z; @@ -562,11 +564,11 @@ float PPM::update(int clickIdx, const glm::vec3 &clickForce, float dt) { dev_heFaces, dev_bezPatch, dev_wgtPatch, dev_coeff, dev_tessVtx); checkCUDAError("kTessVtx_Face", __LINE__); } - + if (nSub > 1) { - blkDim.x = 16; + blkDim.x = 32; blkDim.y = 4; - blkDim.z = 2; + blkDim.z = 1; blkCnt.x = (nHe/2 + blkDim.x - 1) / blkDim.x; blkCnt.y = (nSub-1 + blkDim.y - 1) / blkDim.y; blkCnt.z = (PPM_NVARS + blkDim.z - 1) / blkDim.z; @@ -574,19 +576,23 @@ float PPM::update(int clickIdx, const glm::vec3 &clickForce, float dt) { dev_heFaces, dev_heTessIdx, dev_bezPatch, dev_wgtPatch, dev_coeff, dev_tessVtx + PPM_NVARS*nFace*(nSub-1)*(nSub-2)/2); checkCUDAError("kTessVtx_Edge", __LINE__); } - - blkDim.x = 32; - blkDim.y = 4; + + blkDim.x = 64; + blkDim.y = 1; blkDim.z = 1; blkCnt.x = (nVtx + blkDim.x - 1) / blkDim.x; blkCnt.y = (PPM_NVARS + blkDim.y - 1) / blkDim.y; blkCnt.z = 1; kTessVtx_Vtx<<>>(nVtx, nSub, nBasis2, - dev_heLoops, dev_vBndList, dev_bezPatch, dev_wgtPatch, dev_coeff, dev_tessVtx + PPM_NVARS*(nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2)); + dev_heLoops, dev_vBndList, dev_bezPatch, dev_wgtPatch, dev_coeff, dev_tessVtx + PPM_NVARS*(nFace*(nSub-1)*(nSub-2)/2 + nHe*(nSub-1)/2), dev_vList); checkCUDAError("kTessVtx_Vtx", __LINE__); - - if (useVisualize) + + // generate/update bezier coefficients + + if (useVisualize) { cudaGraphicsUnmapResources(1, &dev_vboTessVtx, 0); + cudaGraphicsUnmapResources(1, &dev_vboVtx, 0); + } float meas_dt; cudaEventRecord(stop, 0); @@ -595,6 +601,8 @@ float PPM::update(int clickIdx, const glm::vec3 &clickForce, float dt) { cudaEventDestroy(start); cudaEventDestroy(stop); + // recalculate physics + return meas_dt; }