diff --git a/src/algebra/Join.cpp b/src/algebra/Join.cpp index 43de72c..a895ed6 100644 --- a/src/algebra/Join.cpp +++ b/src/algebra/Join.cpp @@ -41,16 +41,30 @@ void materializedTupleToHashTable( assert(ht_state.hash_table); assert(!mat.handles.empty()); assert(mat.handles.size() == mat.materializers.size()); + const size_t batch_size = 256; + std::vector hashes(batch_size); for (auto& read_handle : mat.handles) { // Pick morsels from the read handle. while (const TupleMaterializer::MatChunk* chunk = read_handle->pullChunk()) { // Materialize all tuples from the chunk. + // We traverse the materialized tuple in batches of 256 similar as a vectorized + // engine would. For large hash tables this increases throughput significantly. const char* curr_tuple = reinterpret_cast(chunk->data.get()); while (curr_tuple < chunk->end_ptr) { - // Copy over the whole tuple into the hash table. - ht_state.hash_table->insert(curr_tuple); + size_t curr_batch_size = std::min(batch_size, (chunk->end_ptr - curr_tuple) / slot_size); + const char* curr_tuple_hash_it = curr_tuple; + for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) { + hashes[batch_idx] = ht_state.hash_table->compute_hash(curr_tuple_hash_it); + curr_tuple_hash_it += slot_size; + } + for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) { + ht_state.hash_table->slot_prefetch(hashes[batch_idx]); + } + for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) { + ht_state.hash_table->insert(curr_tuple, hashes[batch_idx]); + curr_tuple += slot_size; + } // Move to the next tuple. - curr_tuple += slot_size; } } } diff --git a/src/runtime/NewHashTables.cpp b/src/runtime/NewHashTables.cpp index 43882f7..995e4ef 100644 --- a/src/runtime/NewHashTables.cpp +++ b/src/runtime/NewHashTables.cpp @@ -174,9 +174,8 @@ char* AtomicHashTable::lookupDisable(const char* key) { template template -char* AtomicHashTable::insert(const char* key) { +char* AtomicHashTable::insert(const char* key, uint64_t hash) { // Look up the initial slot in the linear probing chain . - const uint64_t hash = comp.hash(key); const auto idx = hash & mod_mask; IteratorState it{ .idx = idx, @@ -203,6 +202,13 @@ char* AtomicHashTable::insert(const char* key) { } } +template +template +char* AtomicHashTable::insert(const char* key) { + const uint64_t hash = comp.hash(key); + return insert(key, hash); +} + template typename AtomicHashTable::IteratorState AtomicHashTable::itStart() const { IteratorState it; @@ -257,10 +263,16 @@ template class AtomicHashTable; template char* AtomicHashTable::insert(const char* key); template char* AtomicHashTable::insert(const char* key); +template char* AtomicHashTable::insert(const char* key, uint64_t hash); +template char* AtomicHashTable::insert(const char* key, uint64_t hash); + template class AtomicHashTable; template char* AtomicHashTable::insert(const char* key); template char* AtomicHashTable::insert(const char* key); +template char* AtomicHashTable::insert(const char* key, uint64_t hash); +template char* AtomicHashTable::insert(const char* key, uint64_t hash); + template class ExclusiveHashTable; template class ExclusiveHashTable; diff --git a/src/runtime/NewHashTables.h b/src/runtime/NewHashTables.h index 6fb2e70..446e6c1 100644 --- a/src/runtime/NewHashTables.h +++ b/src/runtime/NewHashTables.h @@ -65,6 +65,9 @@ struct AtomicHashTable { /// the payload as well. template char* insert(const char* key); + /// Insert variation when we already computed the hash. + template + char* insert(const char* key, uint64_t hash); private: /// An iterator within the atomic hash table.