Skip to content

Commit

Permalink
super final commit
Browse files Browse the repository at this point in the history
  • Loading branch information
themisvr committed Oct 29, 2020
1 parent ab5bb15 commit 255c68e
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 46 deletions.
9 changes: 8 additions & 1 deletion include/io_utils/io_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,14 @@ void user_interface(Cube_args **);

void user_interface(Lsh_args **);

void write_output(const std::string &, const uint16_t &, const size_t &, const size_t &, \
void print_statistics(const uint16_t , const size_t ,
const std::vector<std::vector<std::pair<uint32_t, size_t>>> &, \
const std::vector<std::chrono::microseconds> &, \
const std::vector<std::vector<uint32_t>> &,
const std::vector<std::chrono::microseconds> &);


void write_output(const std::string &, const uint16_t , const size_t , \
const std::vector<std::vector<std::pair<uint32_t, size_t>>> &, \
const std::vector<std::chrono::microseconds> &, const std::vector<std::vector<uint32_t>> &, \
const std::vector<std::chrono::microseconds> &, const std::vector<std::vector<size_t>> &, \
Expand Down
2 changes: 1 addition & 1 deletion include/modules/hypercube/hypercube.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class Hypercube {
Hypercube (uint32_t projdim, uint16_t cands, uint16_t probes, uint16_t nns, float r, \
size_t trn, uint32_t d, double meandist, const std::vector<std::vector<T>> &samples) \
: projection_dimension(projdim), max_candidates(cands), max_probes(probes), \
N(nns), R(r), train_samples(trn), D(d), win(MULTIPLE1 * meandist)
N(nns), R(r), train_samples(trn), D(d), win(MULTIPLE2 * meandist)
{
std::cout << "Window is: ";
std::cout << win << std::endl;
Expand Down
5 changes: 4 additions & 1 deletion include/modules/lsh/lsh.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


#define HT_SIZE(N) ((N / 16))
#define MULTIPLE 4
#define MULTIPLE 6


template <typename T>
Expand Down Expand Up @@ -102,6 +102,7 @@ class LSH {
std::vector<std::pair<uint32_t, size_t>> res;
uint64_t af_value{};
uint32_t dist{};
uint32_t checked{};

initialize_k_best_vectors(res);

Expand All @@ -114,11 +115,13 @@ class LSH {
std::vector<size_t> bucket = ith_table[af_value % ht_size];

for (auto const &index : bucket) {
++checked;
dist = manhattan_distance_rd<T> (dataset[index], query);
if (dist < min_dist) {
min_dist = dist;
best_vectors.emplace_back(std::make_pair(dist, index));
}
if (checked > 10 * L) break;
}
}
std::sort(best_vectors.begin(), best_vectors.end(), [](const std::pair<uint32_t, size_t> &left, \
Expand Down
85 changes: 62 additions & 23 deletions src/common/io_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,67 @@ uint32_t bigend_to_littlend(uint32_t big_endian) {
}


void write_output(const std::string &out, const uint16_t &nns, const size_t &size, const size_t &begin, \

void print_statistics(const uint16_t nns, const size_t size, const std::vector<std::vector<std::pair<uint32_t, size_t>>> &ann_res, \
const std::vector<std::chrono::microseconds> &ann_query_times, \
const std::vector<std::vector<uint32_t>> &enn_dists, \
const std::vector<std::chrono::microseconds> &enn_query_times) {

std::vector<std::pair<uint32_t, size_t>> approx_nearest;
std::vector<uint32_t> exact_nearest;
std::vector<double> approx_factor;

size_t wrong_dists{};
size_t not_found{};


for (size_t i = 0; i != size; ++i) {
approx_nearest = ann_res[i];
exact_nearest = enn_dists[i];
for (size_t j = 0; j != nns; ++j) {
uint32_t dist = approx_nearest[j].first;
if (dist == std::numeric_limits<uint32_t>::max()) {
// that means that we didnt find any neighbor (nearest neighbor-1)
if (j == 0) ++not_found;
}
else {
if
(dist < exact_nearest[j]) wrong_dists++;
else
approx_factor.emplace_back((double) (dist / exact_nearest[j]));
}
}
}
std::cout << "\t\tPRINTING STATISTICS" << std::endl;
std::cout << "\nWrong Distances (distanceLSH < distanceTrue): " << wrong_dists << std::endl;
std::cout << "Not Found: " << not_found << std::endl;

double mean_af{};
double max_af = std::numeric_limits<double>::min();

for (auto const &af: approx_factor) {
if (af > max_af) max_af = af;
mean_af += af;
}
std::cout << "\nMax-Approximation-Factor: " << (double) max_af << std::endl;
std::cout << "Mean-Approximation-Factor: " << mean_af / approx_factor.size() << std::endl;

size_t lsh_mean_time{};
for (auto const &time: ann_query_times) {
lsh_mean_time += time.count();
}
std::cout << "Mean-Time-Search-LSH: " << lsh_mean_time / size << std::endl;

size_t exact_mean_time{};
for (auto const &time: enn_query_times) {
exact_mean_time += time.count();
}
std::cout << "Mean-Time-Search-Exact: " << exact_mean_time / size << std::endl;
}



void write_output(const std::string &out, const uint16_t nns, const size_t size, \
const std::vector<std::vector<std::pair<uint32_t, size_t>>> &ann_res, \
const std::vector<std::chrono::microseconds> &ann_query_times, \
const std::vector<std::vector<uint32_t>> &enn_dists, const std::vector<std::chrono::microseconds> &enn_query_times, \
Expand All @@ -296,26 +356,21 @@ void write_output(const std::string &out, const uint16_t &nns, const size_t &siz
std::ofstream ofile;
ofile.open(out, std::ios::out | std::ios::trunc);

size_t wrong_dists{};
size_t not_found{};

for (size_t i = 0; i != size; ++i) {
approx_nearest = ann_res[i];
exact_nearest = enn_dists[i];
ofile << "Query: " << begin - 1 + i << std::endl;
ofile << "Query: " << i << std::endl;
for (size_t j = 0; j != nns; ++j) {
uint32_t dist = approx_nearest[j].first;
size_t ith_vec = approx_nearest[j].second;
if (dist == std::numeric_limits<uint32_t>::max()) {
// that means that we didnt find any neighbor (nearest neighbor-1)
if (j == 0) ++not_found;
ofile << "Nearest neighbor-" << j + 1 << ": " << "Not Found" << std::endl;
ofile << "distance" << structure << ": " << "None" << std::endl;
}
else {
ofile << "Nearest neighbor-" << j + 1 << ": " << ith_vec << std::endl;
ofile << "distance" << structure << ": " << dist << std::endl;
if (dist < exact_nearest[j]) wrong_dists++;
}
ofile << "distanceTrue: " << exact_nearest[j] << std::endl;
}
Expand All @@ -332,21 +387,5 @@ void write_output(const std::string &out, const uint16_t &nns, const size_t &siz
ofile << c << std::endl;
}
}
// error printing for debugging purposes
std::cout << "Not Found: " << not_found << std::endl;
std::cout << "Wrong Distances (distanceLSH < distanceTrue): " << wrong_dists << std::endl;

size_t lsh_mean_time{};
for (auto const &time: ann_query_times) {
lsh_mean_time += time.count();
}
std::cout << "meanTimeSearchLSH: " << lsh_mean_time / size << std::endl;

size_t exact_mean_time{};
for (auto const &time: enn_query_times) {
exact_mean_time += time.count();
}
std::cout << "meanTimeSearchBF: " << exact_mean_time / size << std::endl;

ofile.close();
}
34 changes: 15 additions & 19 deletions src/cube/cube_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,48 +40,44 @@ static void start_hypercube_simulation(Cube_args *args)
std::cout << "Done!" << std::endl;

std::cout << "\nQuery file contains " << test_samples.size() << " queries" << std::endl;
size_t begin = user_prompt_query_index("Enter query begin index: ", 1, test_samples.size());
size_t end = user_prompt_query_index("Enter query end index: ", 1, test_samples.size());
if (begin > end) {
std::cerr << "\nInvalid begin or end query index!" << std::endl;
delete args;
exit(EXIT_FAILURE);
}
size_t size = end - begin;

std::cout << "\nStart Executing ANN / ENN / Range-Search" << std::endl;
std::cout << "..." << std::endl;
/********** Start ANN / ENN / Range search **********/
std::vector<std::vector<std::pair<uint32_t, size_t>>> ann_results(size, \
std::vector<std::vector<std::pair<uint32_t, size_t>>> ann_results(test_samples.size(), \
std::vector<std::pair<uint32_t, size_t>> (args->get_nearest_neighbors_num()));

std::vector<std::vector<uint32_t>> enn_distances(size, \
std::vector<std::vector<uint32_t>> enn_distances(test_samples.size(), \
std::vector<uint32_t> (args->get_nearest_neighbors_num()));

std::vector<std::vector<size_t>> range_results(size);
std::vector<std::chrono::microseconds> ann_query_times(size);
std::vector<std::chrono::microseconds> enn_query_times(size);
std::vector<std::vector<size_t>> range_results(test_samples.size());
std::vector<std::chrono::microseconds> ann_query_times(test_samples.size());
std::vector<std::chrono::microseconds> enn_query_times(test_samples.size());

for (size_t i = 0; i != size; ++i) {
for (size_t i = 0; i != test_samples.size(); ++i) {

/* Approximate K-NN calculation */
start = std::chrono::high_resolution_clock::now();
ann_results[i] = cube.approximate_nn(test_samples[begin + i - 1], training_samples);
ann_results[i] = cube.approximate_nn(test_samples[i], training_samples);
stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
ann_query_times[i] = duration;

/* Exact NN calculation */
start = std::chrono::high_resolution_clock::now();
enn_distances[i] = exact_nn<uint8_t> (training_samples, test_samples[begin + i - 1], args->get_nearest_neighbors_num());
enn_distances[i] = exact_nn<uint8_t> (training_samples, test_samples[i], args->get_nearest_neighbors_num());
stop = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
enn_query_times[i] = duration;

/* Range Search */
range_results[i] = cube.range_search(test_samples[begin + i - 1], training_samples);
range_results[i] = cube.range_search(test_samples[i], training_samples);
}

print_statistics(args->get_nearest_neighbors_num(), test_samples.size(), ann_results, ann_query_times, \
enn_distances, enn_query_times);

std::cout << "\nWriting formatted output to \"" << args->get_output_file_path() << "\"..."<< std::endl;
write_output(args->get_output_file_path(), args->get_nearest_neighbors_num(), size, begin, \
write_output(args->get_output_file_path(), args->get_nearest_neighbors_num(), test_samples.size(),
ann_results, ann_query_times, enn_distances, enn_query_times, range_results, "Hypercube");
std::cout << "Done!" << std::endl;

Expand Down
7 changes: 6 additions & 1 deletion src/lsh/lsh_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ static void start_lsh_simulation(Lsh_args *args) {
std::cout << "Done!" << std::endl;

std::cout << "\nQuery file contains " << queries.size() << " queries" << std::endl;
std::cout << "\nStart Executing ANN / ENN / Range-Search" << std::endl;
std::cout << "..." << std::endl;

/********** Start ANN / ENN / Range search **********/
std::vector<std::vector<std::pair<uint32_t, size_t>>> ann_results(queries.size(), \
Expand Down Expand Up @@ -78,8 +80,11 @@ static void start_lsh_simulation(Lsh_args *args) {
range_results[i] = lsh.approximate_range_search(C, R, queries[i]);
}

print_statistics(args->get_nearest_neighbors_num(), queries.size(), ann_results, ann_query_times, \
enn_distances, enn_query_times);

std::cout << "\nWriting formatted output to \"" << args->get_output_file_path() << "\"..."<< std::endl;
write_output(args->get_output_file_path(), args->get_nearest_neighbors_num(), queries.size(), 1, \
write_output(args->get_output_file_path(), args->get_nearest_neighbors_num(), queries.size(), \
ann_results, ann_query_times, enn_distances, enn_query_times, range_results, "LSH");
std::cout << "Done!" << std::endl;

Expand Down

0 comments on commit 255c68e

Please sign in to comment.