diff --git a/src/algebra/Join.cpp b/src/algebra/Join.cpp
index 43de72c..a895ed6 100644
--- a/src/algebra/Join.cpp
+++ b/src/algebra/Join.cpp
@@ -41,16 +41,30 @@ void materializedTupleToHashTable(
    assert(ht_state.hash_table);
    assert(!mat.handles.empty());
    assert(mat.handles.size() == mat.materializers.size());
+   const size_t batch_size = 256;
+   std::vector<uint64_t> hashes(batch_size);
    for (auto& read_handle : mat.handles) {
       // Pick morsels from the read handle.
       while (const TupleMaterializer::MatChunk* chunk = read_handle->pullChunk()) {
          // Materialize all tuples from the chunk.
+         // We traverse the materialized tuple in batches of 256 similar as a vectorized
+         // engine would. For large hash tables this increases throughput significantly.
          const char* curr_tuple = reinterpret_cast<const char*>(chunk->data.get());
          while (curr_tuple < chunk->end_ptr) {
-            // Copy over the whole tuple into the hash table.
-            ht_state.hash_table->insert<false>(curr_tuple);
+            size_t curr_batch_size = std::min(batch_size, (chunk->end_ptr - curr_tuple) / slot_size);
+            const char* curr_tuple_hash_it = curr_tuple;
+            for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
+               hashes[batch_idx] = ht_state.hash_table->compute_hash(curr_tuple_hash_it);
+               curr_tuple_hash_it += slot_size;
+            }
+            for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
+               ht_state.hash_table->slot_prefetch(hashes[batch_idx]);
+            }
+            for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
+               ht_state.hash_table->insert<false>(curr_tuple, hashes[batch_idx]);
+               curr_tuple += slot_size;
+            }
             // Move to the next tuple.
-            curr_tuple += slot_size;
          }
       }
    }
diff --git a/src/runtime/NewHashTables.cpp b/src/runtime/NewHashTables.cpp
index 43882f7..995e4ef 100644
--- a/src/runtime/NewHashTables.cpp
+++ b/src/runtime/NewHashTables.cpp
@@ -174,9 +174,8 @@ char* AtomicHashTable<Comparator>::lookupDisable(const char* key) {
 
 template <class Comparator>
 template <bool copy_only_key>
-char* AtomicHashTable<Comparator>::insert(const char* key) {
+char* AtomicHashTable<Comparator>::insert(const char* key, uint64_t hash) {
    // Look up the initial slot in the linear probing chain .
-   const uint64_t hash = comp.hash(key);
    const auto idx = hash & mod_mask;
    IteratorState it{
       .idx = idx,
@@ -203,6 +202,13 @@ char* AtomicHashTable<Comparator>::insert(const char* key) {
    }
 }
 
+template <class Comparator>
+template <bool copy_only_key>
+char* AtomicHashTable<Comparator>::insert(const char* key) {
+   const uint64_t hash = comp.hash(key);
+   return insert(key, hash);
+}
+
 template <class Comparator>
 typename AtomicHashTable<Comparator>::IteratorState AtomicHashTable<Comparator>::itStart() const {
    IteratorState it;
@@ -257,10 +263,16 @@ template class AtomicHashTable<SimpleKeyComparator>;
 template char* AtomicHashTable<SimpleKeyComparator>::insert<true>(const char* key);
 template char* AtomicHashTable<SimpleKeyComparator>::insert<false>(const char* key);
 
+template char* AtomicHashTable<SimpleKeyComparator>::insert<true>(const char* key, uint64_t hash);
+template char* AtomicHashTable<SimpleKeyComparator>::insert<false>(const char* key, uint64_t hash);
+
 template class AtomicHashTable<ComplexKeyComparator>;
 template char* AtomicHashTable<ComplexKeyComparator>::insert<true>(const char* key);
 template char* AtomicHashTable<ComplexKeyComparator>::insert<false>(const char* key);
 
+template char* AtomicHashTable<ComplexKeyComparator>::insert<true>(const char* key, uint64_t hash);
+template char* AtomicHashTable<ComplexKeyComparator>::insert<false>(const char* key, uint64_t hash);
+
 template class ExclusiveHashTable<SimpleKeyComparator>;
 template class ExclusiveHashTable<ComplexKeyComparator>;
 
diff --git a/src/runtime/NewHashTables.h b/src/runtime/NewHashTables.h
index 6fb2e70..446e6c1 100644
--- a/src/runtime/NewHashTables.h
+++ b/src/runtime/NewHashTables.h
@@ -65,6 +65,9 @@ struct AtomicHashTable {
    ///                       the payload as well.
    template <bool copy_only_key = true>
    char* insert(const char* key);
+   /// Insert variation when we already computed the hash.
+   template <bool copy_only_key = true>
+   char* insert(const char* key, uint64_t hash);
 
    private:
    /// An iterator within the atomic hash table.