From ed63d9829dc55429e68918d0d7e9425bdb81ffd2 Mon Sep 17 00:00:00 2001 From: Anthony Lombardi Date: Wed, 25 Oct 2023 14:14:52 -0400 Subject: [PATCH] WIP: PERF: Parallelize PSO --- libautoscoper/src/PSO.cpp | 142 ++++++++++++++++++++++++---------- libautoscoper/src/PSO.hpp | 27 ++++++- libautoscoper/src/Tracker.cpp | 83 +++++++++++--------- 3 files changed, 175 insertions(+), 77 deletions(-) diff --git a/libautoscoper/src/PSO.cpp b/libautoscoper/src/PSO.cpp index 6491687e..4e67fbe1 100644 --- a/libautoscoper/src/PSO.cpp +++ b/libautoscoper/src/PSO.cpp @@ -1,10 +1,15 @@ #include "PSO.hpp" #include #include +#include +#include +#include +// Mutex ensures that only one thread can access the critical section at a time +std::mutex MTX; // New Particle Swarm Optimization -float host_fitness_function(float x[]) +float host_fitness_function(std::vector x) { double xyzypr_manip[6] = { 0 }; for (int i = 0; i <= NUM_OF_DIMENSIONS - 1; i++) @@ -46,18 +51,60 @@ float getRandomClamped() return (float)rand() / (float)RAND_MAX; } -void pso(float *positions, float *velocities, float *pBests, float *gBest, unsigned int MAX_EPOCHS, unsigned int MAX_STALL) +void thread_handler(Particle* p, Particle* pBest, Particle* gBest) { + // Get the NCC of the current particle + p->ncc_val = host_fitness_function(p->position); + + // Check that the pBest has been initialized + if (pBest->ncc_val == -1.0f) + pBest->ncc_val = host_fitness_function(pBest->position); + + // Update the pBest if the current particle is better + if (p->ncc_val < pBest->ncc_val) { + *pBest = *p; + } + + // Critial Section + MTX.lock(); + // Update the gBest if the current particle is better + if (p->ncc_val < gBest->ncc_val) { + *gBest = *p; + } + MTX.unlock(); +} + +void pso(std::vector* particles, Particle* gBest, unsigned int MAX_EPOCHS, unsigned int MAX_STALL) { int stall_iter = 0; - float tempParticle1[NUM_OF_DIMENSIONS]; - float tempParticle2[NUM_OF_DIMENSIONS]; - bool do_this = true; unsigned int counter = 0; - //for (int iter = 0; iter < (signed int)MAX_EPOCHS; iter++) - float OMEGA = 0.8f; + // Make a copy of the particles, this will be the initial pBest + std::vector pBest; + Particle* pBestTemp; + for (Particle* p : *particles) { + pBestTemp = new Particle(); + for (int i = 0; i < NUM_OF_DIMENSIONS; i++) { + pBestTemp->position.push_back(p->position[i]); + } + pBest.push_back(pBestTemp); + } + pBestTemp = nullptr; + delete pBestTemp; + + // Initialize the velocities to 0 for each DOF + std::vector velocities; + for (int i = 0; i < NUM_OF_PARTICLES; i++) { + for (int j = 0; j < NUM_OF_DIMENSIONS; j++) { + velocities.push_back(0.0f); + } + } + + // Calc NCC for gBest + gBest->ncc_val = host_fitness_function(gBest->position); + + while (do_this) { //std::cout << "OMEGA: " << OMEGA << std::endl; @@ -66,55 +113,68 @@ void pso(float *positions, float *velocities, float *pBests, float *gBest, unsig do_this = false; } - float currentBest = host_fitness_function(gBest); + Particle* currentBest = gBest; - for (int i = 0; i < NUM_OF_PARTICLES*NUM_OF_DIMENSIONS; i++) + // Update the velocities and positions + for (int i = 0; i < NUM_OF_PARTICLES; i++) { - float rp = getRandomClamped(); - float rg = getRandomClamped(); + for (int j = 0; j < NUM_OF_DIMENSIONS; j++) { + float rp = getRandomClamped(); + float rg = getRandomClamped(); - velocities[i] = OMEGA * velocities[i] + c1 * rp*(pBests[i] - positions[i]) + c2 * rg*(gBest[i%NUM_OF_DIMENSIONS] - positions[i]); + velocities.at(i*NUM_OF_DIMENSIONS + j) = OMEGA * velocities.at(i*NUM_OF_DIMENSIONS + j) + c1 * rp*(pBest.at(i)->position.at(j) - particles->at(i)->position.at(j)) + c2 * rg * (gBest->position.at(j) - particles->at(i)->position.at(j)); + particles->at(i)->position.at(j) += velocities.at(i*NUM_OF_DIMENSIONS + j); - positions[i] += velocities[i]; + //velocities[i = OMEGA * velocities[i] + c1 * rp * (pBest.at(i) - positions[i]) + c2 * rg * (gBest[i % NUM_OF_DIMENSIONS] - positions[i]); + //positions[i] += velocities[i]; + } } OMEGA = OMEGA * 0.9f; - for (int i = 0; i < NUM_OF_PARTICLES*NUM_OF_DIMENSIONS; i += NUM_OF_DIMENSIONS) - { - for (int j = 0; j < NUM_OF_DIMENSIONS; j++) - { - tempParticle1[j] = positions[i + j]; - tempParticle2[j] = pBests[i + j]; - } - - if (host_fitness_function(tempParticle1) < host_fitness_function(tempParticle2)) - { - for (int j = 0; j < NUM_OF_DIMENSIONS; j++) - { - pBests[i + j] = positions[i + j]; - } - - if (host_fitness_function(tempParticle1) < host_fitness_function(gBest)) - { - //cout << "Current Best is: " ; - for (int j = 0; j < NUM_OF_DIMENSIONS; j++) - { - gBest[j] = pBests[i + j]; - } - } - } + //for (int i = 0; i < NUM_OF_PARTICLES*NUM_OF_DIMENSIONS; i += NUM_OF_DIMENSIONS) + //{ + // for (int j = 0; j < NUM_OF_DIMENSIONS; j++) + // { + // tempParticle1[j] = positions[i + j]; + // tempParticle2[j] = pBests[i + j]; + // } + + // if (host_fitness_function(tempParticle1) < host_fitness_function(tempParticle2)) + // {gBest->ncc_val + // for (int j = 0; j < NUM_OF_DIMENSIONS; j++) + // { + // pBests[i + j] = positions[i + j]; + // } + + // if (host_fitness_function(tempParticle1) < host_fitness_function(gBest)) + // { + // //cout << "Current Best is: " ; + // for (int j = 0; j < NUM_OF_DIMENSIONS; j++) + // { + // gBest[j] = pBests[i + j]; + // } + // } + // } + //} + + std::vector threads; + + // Start the threads + for (int i = 0; i < NUM_OF_PARTICLES; i++) { + threads.push_back(std::thread(thread_handler, particles->at(i), pBest[i],gBest)); } - float epochBest = host_fitness_function(gBest); + // Join the threads + for (auto &t : threads) t.join(); - std::cout << "Current Best NCC: " << epochBest << std::endl; + std::cout << "Current Best NCC: " << gBest->ncc_val << std::endl; //std::cout << "Stall: " << stall_iter << std::endl; - if (abs(epochBest - currentBest) < 1e-4f) + if (abs(gBest->ncc_val - currentBest->ncc_val) < 1e-4f) { //std::cout << "Increased Stall Iter" << std::endl; stall_iter++; - } else if (abs(epochBest - currentBest) > 0.001f) + } else if (abs(gBest->ncc_val - currentBest->ncc_val) > 0.001f) { //std::cout << "Zeroed Stall Iter" << std::endl; stall_iter = 0; diff --git a/libautoscoper/src/PSO.hpp b/libautoscoper/src/PSO.hpp index f7dc0b37..63ca1298 100644 --- a/libautoscoper/src/PSO.hpp +++ b/libautoscoper/src/PSO.hpp @@ -12,12 +12,37 @@ #include #include #include +#include const int NUM_OF_PARTICLES = 100; const int NUM_OF_DIMENSIONS = 6; const float c1 = 1.5f; const float c2 = 1.5f; // END: NEW PSO +struct Particle { + float ncc_val; + std::vector position; + // Copy constructor + Particle(const Particle& p) { + ncc_val = p.ncc_val; + position = p.position; + } + // Default constructor + Particle() { + ncc_val = -1.0f; + } + Particle(std::vector pos) { + ncc_val = -1.0f; + position = pos; + } + // Assignment operator + Particle& operator=(const Particle& p) { + ncc_val = p.ncc_val; + position = p.position; + return *this; + } +}; + double PSO_FUNC(double *P); void intializeRandom(); @@ -26,4 +51,4 @@ float getRandom(float low, float high); float getRandomClamped(); float host_fitness_function(float x[]); -void pso(float *positions, float *velocities, float *pBests, float *gBest, unsigned int MAX_EPOCHS, unsigned int MAX_STALL); +void pso(std::vector* particles, Particle *gBest, unsigned int MAX_EPOCHS, unsigned int MAX_STALL); diff --git a/libautoscoper/src/Tracker.cpp b/libautoscoper/src/Tracker.cpp index a1fe8f13..2f2e12e6 100644 --- a/libautoscoper/src/Tracker.cpp +++ b/libautoscoper/src/Tracker.cpp @@ -419,41 +419,59 @@ void Tracker::optimize(int frame, int dFrame, int repeats, int opt_method, unsig unsigned int MAX_EPOCHS = max_iter; unsigned int MAX_STALL = max_stall_iter; - float positions[NUM_OF_PARTICLES*NUM_OF_DIMENSIONS]; + /*float positions[NUM_OF_PARTICLES*NUM_OF_DIMENSIONS]; float velocities[NUM_OF_PARTICLES*NUM_OF_DIMENSIONS]; float pBests[NUM_OF_PARTICLES*NUM_OF_DIMENSIONS]; - float gBest[NUM_OF_DIMENSIONS]; + float gBest[NUM_OF_DIMENSIONS];*/ + + std::vector particles; + Particle* gBest = new Particle(); srand((unsigned)time(NULL)); - for (int i = 0; i < NUM_OF_PARTICLES*NUM_OF_DIMENSIONS; i++) - { - if (i >= NUM_OF_DIMENSIONS) { - positions[i] = getRandom(START_RANGE_MIN, START_RANGE_MAX); - } - // First point will be the initial position - else { - //if (j > 1) - //{ - positions[i] = (float)0.0; - //} - //else - //{ - // positions[i] = (float)0.2; // Noise for refinements - //} - } - pBests[i] = positions[i]; - velocities[i] = 0; - } + Particle* p = new Particle({ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }); + particles.push_back(p); // First particle is the initial position + *gBest = *p; // gBest is initially the initial position - for (int i = 0; i < NUM_OF_DIMENSIONS; i++) + for (int i = 0; i < NUM_OF_PARTICLES-1; i++) { - gBest[i] = pBests[i]; + p = new Particle(); + for (int j = 0; j < NUM_OF_DIMENSIONS; j++) { + p->position.push_back(getRandom(START_RANGE_MIN, START_RANGE_MAX)); + } + particles.push_back(p); } + p = nullptr; + delete p; + + //for (int i = 0; i < NUM_OF_PARTICLES*NUM_OF_DIMENSIONS; i++) + //{ + // if (i >= NUM_OF_DIMENSIONS) { + // positions[i] = getRandom(START_RANGE_MIN, START_RANGE_MAX); + // } + // // First point will be the initial position + // else { + // //if (j > 1) + // //{ + // positions[i] = (float)0.0; + // //} + // //else + // //{ + // // positions[i] = (float)0.2; // Noise for refinements + // //} + // } + // pBests[i] = positions[i]; + // velocities[i] = 0; + //} + + //for (int i = 0; i < NUM_OF_DIMENSIONS; i++) + //{ + // gBest[i] = pBests[i]; + //} clock_t cpu_begin = clock(); - pso(positions, velocities, pBests, gBest, MAX_EPOCHS, MAX_STALL); + pso(&particles, gBest, MAX_EPOCHS, MAX_STALL); //cuda_pso(positions, velocities, pBests, gBest, MAX_EPOCHS); clock_t cpu_end = clock(); @@ -461,19 +479,14 @@ void Tracker::optimize(int frame, int dFrame, int repeats, int opt_method, unsig printf("Time elapsed:%10.3lf s\n", (double)(cpu_end - cpu_begin) / CLOCKS_PER_SEC); std::cout << "Pose change from initial position: "; - for (int i = 0; i < NUM_OF_DIMENSIONS; i++) - { - if (i == NUM_OF_DIMENSIONS - 1) - { - std::cout << gBest[i] << std::endl; - } - else { - std::cout << gBest[i] << ","; - } - xyzypr_manip[i] = gBest[i]; + for (int i = 0; i < NUM_OF_DIMENSIONS-1; i++) { + std::cout << gBest->position[i] << ", "; + xyzypr_manip[i] = gBest->position[i]; } + std::cout << gBest->position[NUM_OF_DIMENSIONS-1] << std::endl; + xyzypr_manip[NUM_OF_DIMENSIONS-1] = gBest->position[NUM_OF_DIMENSIONS-1]; - printf("Minimum NCC from PSO = %f\n", host_fitness_function(gBest)); + printf("Minimum NCC from PSO = %f\n", gBest->ncc_val); manip = CoordFrame::from_xyzAxis_angle(xyzypr_manip); // PSO End