Skip to content

Commit

Permalink
Improve Prefetching Primitives
Browse files Browse the repository at this point in the history
At the moment our ROF backend doesn't perform better than the regular
JIT backend. We need to get to a point where the vectorized hash table
lookups actually help our overall performance. This commit makes a first
step in this direction:

Improve hash/prefetch primitives:
   Until now we had a single primitive that would hash, and then another
   primitive that would prefetch based on the hash. Looking at the ROF
   paper their prefetching strategy is actually different: they fuse
   hashing and prefetching into a single primitive. This allows
   overlapping memory loads with computation, leading to better CPU
   utilization. We now do the same and have a fused hash/prefetch
   primitive.
  • Loading branch information
wagjamin committed Nov 3, 2023
1 parent 111f77a commit cb9a38e
Show file tree
Hide file tree
Showing 21 changed files with 124 additions and 109 deletions.
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -stdlib=libc++")
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -rdynamic -g -O0 -fsanitize=address")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -stdlib=libc++ -gdwarf-4")
# Generate DWARF 4 in debug to work on older GDB versions
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -rdynamic -g -gdwarf-4 -O0 -fsanitize=address")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0 -fsanitize=address")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")

# ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion bench/benchmarks.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#include <benchmark/benchmark.h>

BENCHMARK_MAIN();
BENCHMARK_MAIN();
3 changes: 2 additions & 1 deletion bench/compiler_invoke.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "benchmark/benchmark.h"
#include "exec/InterruptableJob.h"
#include <array>
#include <fstream>

/// The benchmarks in this file test the overhead of invoking the
Expand Down Expand Up @@ -84,4 +85,4 @@ BENCHMARK(invoke_gcc_direct)->Arg(0)->Arg(1);

}

}
}
54 changes: 49 additions & 5 deletions bench/vectorized_ht.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ struct BenchmarkHashTable {
__builtin_prefetch(&entries[slot_idx]);
};

const uint64_t vec_slot_and_load(const KeyType& key) const {
const auto hash = XXH3_64bits(&key, key_size);
const auto slot = hash % capacity;
__builtin_prefetch(&entries[slot]);
return slot;
};

const Entry* vec_lookup(const KeyType& key, uint64_t slot_idx) const {
const Entry* entry = &entries[slot_idx];
while (entry->key != 0) {
Expand Down Expand Up @@ -190,6 +197,42 @@ void BM_ht_perf_vectorized(benchmark::State& state) {
state.SetItemsProcessed(state.iterations() * num_elems);
}

/**
* Vectorized hash table as in the ROF paper. Fused prefetching & hash
* computation to overlap loads and computation nicely.
*/
void BM_ht_perf_vectorized_rof(benchmark::State& state) {
const uint64_t num_elems = state.range(0);
const uint64_t batch_size = state.range(1);
BenchmarkHashTable<uint64_t, uint64_t> ht{static_cast<size_t>(num_elems) * 2, 8};
for (uint64_t k = 1; k <= num_elems; ++k) {
ht.tat_insert(7 * k, k);
}
std::vector<uint64_t> keys(batch_size);
std::vector<uint64_t> slots(batch_size);
for (auto _ : state) {
// Lookup every key again.
for (uint64_t k = 1; k <= num_elems; k += batch_size) {
const auto curr_batch = std::min(batch_size, num_elems - k + 1);
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
keys[tid] = 7 * (k + tid);
}
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
slots[tid] = ht.vec_slot_and_load(keys[tid]);
}
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
const auto* res = ht.vec_lookup(keys[tid], slots[tid]);
// We have to do something with the result, otherwise the compiler is too smart
// to optimize memory accesses away.
if (res->value > num_elems) {
throw std::runtime_error("bad ht lookup for " + std::to_string(k));
}
}
}
}
state.SetItemsProcessed(state.iterations() * num_elems);
}

void BM_ht_perf_tat_inkfuse(benchmark::State& state) {
const uint64_t num_elems = state.range(0);
inkfuse::SimpleKeyComparator comp{8};
Expand Down Expand Up @@ -231,10 +274,7 @@ void BM_ht_perf_vectorized_inkfuse(benchmark::State& state) {
keys[tid] = 7 * (k + tid);
}
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
hashes[tid] = ht.compute_hash(reinterpret_cast<const char*>(&keys[tid]));
}
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
ht.slot_prefetch(hashes[tid]);
hashes[tid] = ht.compute_hash_and_prefetch(reinterpret_cast<const char*>(&keys[tid]));
}
for (uint64_t tid = 0; tid < curr_batch; ++tid) {
const auto* res = ht.lookup(reinterpret_cast<const char*>(&keys[tid]), hashes[tid]);
Expand All @@ -254,8 +294,12 @@ BENCHMARK(BM_ht_perf_vectorized)->ArgPair(1 << 9, 256)->ArgPair(1 << 13, 256)->A
// Different internal batch sizes. 256 is a good value.
BENCHMARK(BM_ht_perf_vectorized)->ArgPair(1 << 25, 64)->ArgPair(1 << 25, 128)->ArgPair(1 << 25, 256)->ArgPair(1 << 25, 512)->ArgPair(1 << 25, 1024)->ArgPair(1 << 25, 2024)->ArgPair(1 << 25, 4048)->ArgPair(1 << 25, 8096)->ArgPair(1 << 25, 16192);

BENCHMARK(BM_ht_perf_vectorized_rof)->ArgPair(1 << 9, 256)->ArgPair(1 << 13, 256)->ArgPair(1 << 15, 256)->ArgPair(1 << 19, 256)->ArgPair(1 << 25, 256)->ArgPair(1 << 30, 256);
// Different internal batch sizes. 256 is a good value.
BENCHMARK(BM_ht_perf_vectorized_rof)->ArgPair(1 << 25, 64)->ArgPair(1 << 25, 128)->ArgPair(1 << 25, 256)->ArgPair(1 << 25, 512)->ArgPair(1 << 25, 1024)->ArgPair(1 << 25, 2024)->ArgPair(1 << 25, 4048)->ArgPair(1 << 25, 8096)->ArgPair(1 << 25, 16192);

BENCHMARK(BM_ht_perf_vectorized_inkfuse)->ArgPair(1 << 9, 256)->ArgPair(1 << 13, 256)->ArgPair(1 << 15, 256)->ArgPair(1 << 19, 256)->ArgPair(1 << 25, 256)->ArgPair(1 << 30, 256);
// Different internal batch sizes. 256 is a good value.
BENCHMARK(BM_ht_perf_vectorized_inkfuse)->ArgPair(1 << 25, 64)->ArgPair(1 << 25, 128)->ArgPair(1 << 25, 256)->ArgPair(1 << 25, 512)->ArgPair(1 << 25, 1024)->ArgPair(1 << 25, 2024)->ArgPair(1 << 25, 4048)->ArgPair(1 << 25, 8096)->ArgPair(1 << 25, 16192);

} // namespacf
} // namespace
18 changes: 6 additions & 12 deletions src/algebra/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,9 @@ void materializedTupleToHashTable(
size_t curr_batch_size = std::min(batch_size, (chunk->end_ptr - curr_tuple) / slot_size);
const char* curr_tuple_hash_it = curr_tuple;
for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
hashes[batch_idx] = ht_state.hash_table->compute_hash(curr_tuple_hash_it);
hashes[batch_idx] = ht_state.hash_table->compute_hash_and_prefetch(curr_tuple_hash_it);
curr_tuple_hash_it += slot_size;
}
for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
ht_state.hash_table->slot_prefetch(hashes[batch_idx]);
}
for (size_t batch_idx = 0; batch_idx < curr_batch_size; ++batch_idx) {
ht_state.hash_table->insert<false>(curr_tuple, hashes[batch_idx]);
curr_tuple += slot_size;
Expand Down Expand Up @@ -269,19 +266,16 @@ void Join::decayPkJoin(inkfuse::PipelineDAG& dag) const {
pseudo.push_back(&pseudo_iu);
}

// 2.2.1 Compute the hash.
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htHash<AtomicHashTable<SimpleKeyComparator>>(this, *hash_right, *scratch_pad_right, std::move(pseudo), &ht_state));

// 2.2.2 Prefetch the slot.
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htPrefetch<AtomicHashTable<SimpleKeyComparator>>(this, &*prefetch_pseudo, *hash_right, &ht_state));
// 2.2.1 Compute the hash and prefetch the slot.
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htHashAndPrefetch<AtomicHashTable<SimpleKeyComparator>>(this, *hash_right, *scratch_pad_right, std::move(pseudo), &ht_state));

// 2.2.3 Perfom the lookup.
// 2.2.2 Perfom the lookup.
if (type == JoinType::LeftSemi) {
// Lookup on a slot disables the slot, giving semi-join behaviour.
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htLookupWithHash<AtomicHashTable<SimpleKeyComparator>, true>(this, *lookup_right, *scratch_pad_right, *hash_right, &*prefetch_pseudo, &ht_state));
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htLookupWithHash<AtomicHashTable<SimpleKeyComparator>, true>(this, *lookup_right, *scratch_pad_right, *hash_right, /* prefetch_pseudo = */ nullptr, &ht_state));
} else {
// Regular lookup that does not disable slots.
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htLookupWithHash<AtomicHashTable<SimpleKeyComparator>, false>(this, *lookup_right, *scratch_pad_right, *hash_right, &*prefetch_pseudo, &ht_state));
probe_pipe.attachSuboperator(RuntimeFunctionSubop::htLookupWithHash<AtomicHashTable<SimpleKeyComparator>, false>(this, *lookup_right, *scratch_pad_right, *hash_right, /* prefetch_pseudo = */ nullptr, &ht_state));
}
}

Expand Down
34 changes: 3 additions & 31 deletions src/algebra/suboperators/RuntimeFunctionSubop.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ struct RuntimeFunctionSubop : public TemplatedSuboperator<RuntimeFunctionSubopSt
/// Build an insert function for a hash table.
static std::unique_ptr<RuntimeFunctionSubop> htInsert(const RelAlgOp* source, const IU* pointers_, const IU& key_, std::vector<const IU*> pseudo_ius_, DefferredStateInitializer* state_init_ = nullptr);

/// Hash a key with the hash table's hash function.
/// Hash a key with the hash table's hash function and prefetch the corresponding slot.
template <class HashTable>
static std::unique_ptr<RuntimeFunctionSubop> htHash(const RelAlgOp* source, const IU& hash_, const IU& key_, std::vector<const IU*> pseudo_ius_, DefferredStateInitializer* state_init_ = nullptr) {
std::string fct_name = "ht_" + HashTable::ID + "_compute_hash";
static std::unique_ptr<RuntimeFunctionSubop> htHashAndPrefetch(const RelAlgOp* source, const IU& hash_, const IU& key_, std::vector<const IU*> pseudo_ius_, DefferredStateInitializer* state_init_ = nullptr) {
std::string fct_name = "ht_" + HashTable::ID + "_compute_hash_and_prefetch";
std::vector<const IU*> in_ius{&key_};
for (auto pseudo : pseudo_ius_) {
// Pseudo IUs are used as input IUs in the backing graph, but do not influence arguments.
Expand All @@ -55,34 +55,6 @@ struct RuntimeFunctionSubop : public TemplatedSuboperator<RuntimeFunctionSubopSt
out));
}

/// Hash a key with the hash table's hash function.
template <class HashTable>
static std::unique_ptr<RuntimeFunctionSubop> htPrefetch(const RelAlgOp* source, const IU* prefetch_pseudo, const IU& hash_, DefferredStateInitializer* state_init_ = nullptr) {
std::string fct_name = "ht_" + HashTable::ID + "_slot_prefetch";
std::vector<const IU*> in_ius{&hash_};
std::vector<bool> ref{false};
std::vector<const IU*> out_ius_{};
if (prefetch_pseudo) {
out_ius_.push_back(prefetch_pseudo);
}
std::vector<const IU*> args{&hash_};
std::unique_ptr<RuntimeFunctionSubop> result_subop{new RuntimeFunctionSubop(
source,
state_init_,
std::move(fct_name),
std::move(in_ius),
std::move(out_ius_),
std::move(args),
std::move(ref),
/* out = */ nullptr)};
// Prefetch instructions should never be generated in the operator-fusing code.
// When performing operator-fusing code generation, we are going through
// the code tuple-at-a time. As a result, the followup superator (e.g. HT lookup)
// will directly cause the cache miss anyways.
result_subop->optimization_properties.ct_only_vectorized = true;
return result_subop;
}

/// Build a hash table lookup function.
template <class HashTable, bool disable_slot>
static std::unique_ptr<RuntimeFunctionSubop> htLookupWithHash(const RelAlgOp* source, const IU& pointers_, const IU& key_, const IU& hash_, const IU* prefetch_pseudo_, DefferredStateInitializer* state_init_ = nullptr) {
Expand Down
12 changes: 2 additions & 10 deletions src/interpreter/RuntimeFunctionSubopFragmentizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ RuntimeFunctionSubopFragmentizer::RuntimeFunctionSubopFragmentizer() {

// Fragmentize Vectorized Hash Table Primitives
{
// Hash:
// Hash and prefetch:
auto& [name, pipe] = pipes.emplace_back();
const auto& key = generated_ius.emplace_back(in_type);
const auto& hash = generated_ius.emplace_back(IR::UnsignedInt::build(8));
const auto& op = pipe.attachSuboperator(RuntimeFunctionSubop::htHash<AtomicHashTable<SimpleKeyComparator>>(nullptr, hash, key, {}));
const auto& op = pipe.attachSuboperator(RuntimeFunctionSubop::htHashAndPrefetch<AtomicHashTable<SimpleKeyComparator>>(nullptr, hash, key, {}));
name = op.id();
}
{
Expand Down Expand Up @@ -106,14 +106,6 @@ RuntimeFunctionSubopFragmentizer::RuntimeFunctionSubopFragmentizer() {
}
}

// Fragmentize Prefetch.
{
auto& [name, pipe] = pipes.emplace_back();
const auto& hash = generated_ius.emplace_back(IR::UnsignedInt::build(8));
const auto& op = pipe.attachSuboperator(RuntimeFunctionSubop::htPrefetch<AtomicHashTable<SimpleKeyComparator>>(nullptr, nullptr, hash));
name = op.id();
}

// Fragmentize tuple materialization.
{
auto& [name, pipe] = pipes.emplace_back();
Expand Down
12 changes: 6 additions & 6 deletions src/runtime/HashTableRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ extern "C" void HashTableRuntime::ht_dl_it_advance(void* table, char** it_data,
}

// Atomic hash table.
extern "C" uint64_t HashTableRuntime::ht_at_sk_compute_hash(void* table, char* key) {
return reinterpret_cast<AtomicHashTable<SimpleKeyComparator>*>(table)->compute_hash(key);
extern "C" uint64_t HashTableRuntime::ht_at_sk_compute_hash_and_prefetch(void* table, char* key) {
return reinterpret_cast<AtomicHashTable<SimpleKeyComparator>*>(table)->compute_hash_and_prefetch(key);
}

extern "C" void HashTableRuntime::ht_at_sk_slot_prefetch(void* table, uint64_t hash) {
Expand All @@ -74,8 +74,8 @@ extern "C" char* HashTableRuntime::ht_at_sk_lookup_with_hash_disable(void* table
return reinterpret_cast<AtomicHashTable<SimpleKeyComparator>*>(table)->lookupDisable(key, hash);
}

extern "C" uint64_t HashTableRuntime::ht_at_ck_compute_hash(void* table, char* key) {
return reinterpret_cast<AtomicHashTable<ComplexKeyComparator>*>(table)->compute_hash(key);
extern "C" uint64_t HashTableRuntime::ht_at_ck_compute_hash_and_prefetch(void* table, char* key) {
return reinterpret_cast<AtomicHashTable<ComplexKeyComparator>*>(table)->compute_hash_and_prefetch(key);
}

extern "C" void HashTableRuntime::ht_at_ck_slot_prefetch(void* table, uint64_t hash) {
Expand Down Expand Up @@ -172,7 +172,7 @@ void HashTableRuntime::registerRuntime() {
.addArg("table", IR::Pointer::build(IR::Void::build()))
.addArg("key", IR::Pointer::build(IR::Char::build()), true);

RuntimeFunctionBuilder("ht_at_sk_compute_hash", IR::UnsignedInt::build(8))
RuntimeFunctionBuilder("ht_at_sk_compute_hash_and_prefetch", IR::UnsignedInt::build(8))
.addArg("table", IR::Pointer::build(IR::Void::build()))
.addArg("key", IR::Pointer::build(IR::Char::build()), true);

Expand All @@ -190,7 +190,7 @@ void HashTableRuntime::registerRuntime() {
.addArg("key", IR::Pointer::build(IR::Char::build()))
.addArg("hash", IR::UnsignedInt::build(8), true);

RuntimeFunctionBuilder("ht_at_ck_compute_hash", IR::UnsignedInt::build(8))
RuntimeFunctionBuilder("ht_at_ck_compute_hash_and_prefetch", IR::UnsignedInt::build(8))
.addArg("table", IR::Pointer::build(IR::Void::build()))
.addArg("key", IR::Pointer::build(IR::Char::build()), true);

Expand Down
4 changes: 2 additions & 2 deletions src/runtime/HashTableRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ extern "C" char* ht_at_sk_lookup(void* table, char* key);
extern "C" char* ht_at_sk_lookup_disable(void* table, char* key);
extern "C" char* ht_at_ck_lookup(void* table, char* key);

extern "C" uint64_t ht_at_sk_compute_hash(void* table, char* key);
extern "C" uint64_t ht_at_sk_compute_hash_and_prefetch(void* table, char* key);
extern "C" void ht_at_sk_slot_prefetch(void* table, uint64_t hash);
extern "C" char* ht_at_sk_lookup_with_hash(void* table, char* key, uint64_t hash);
extern "C" char* ht_at_sk_lookup_with_hash_disable(void* table, char* key, uint64_t hash);

extern "C" uint64_t ht_at_ck_compute_hash(void* table, char* key);
extern "C" uint64_t ht_at_ck_compute_hash_and_prefetch(void* table, char* key);
extern "C" void ht_at_ck_slot_prefetch(void* table, uint64_t hash);
extern "C" char* ht_at_ck_lookup_with_hash(void* table, char* key, uint64_t hash);
extern "C" char* ht_at_ck_lookup_with_hash_disable(void* table, char* key, uint64_t hash);
Expand Down
10 changes: 8 additions & 2 deletions src/runtime/NewHashTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,14 @@ AtomicHashTable<Comparator>::AtomicHashTable(Comparator comp_, uint16_t total_sl
}

template <class Comparator>
uint64_t AtomicHashTable<Comparator>::compute_hash(const char* key) const {
return comp.hash(key);
uint64_t AtomicHashTable<Comparator>::compute_hash_and_prefetch(const char* key) const {
uint64_t hash = comp.hash(key);
const uint64_t slot_id = hash & mod_mask;
// Prefetch the actual data array.
__builtin_prefetch(&data[slot_id * total_slot_size]);
// Prefetch the bitmask slot.
__builtin_prefetch(&tags[slot_id]);
return hash;
}

template <class Comparator>
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/NewHashTables.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ struct AtomicHashTable {

AtomicHashTable(Comparator comp_, uint16_t total_slot_size_, size_t num_slots_);

/// Compute the hash for a given key.
uint64_t compute_hash(const char* key) const;
/// Compute the hash for a given key and prefetch the corresponding hash table slot.
uint64_t compute_hash_and_prefetch(const char* key) const;
/// Prefetch the tag and data slots for a specific hash.
void slot_prefetch(uint64_t hash) const;
/// Get the pointer to a given key, or nullptr if the group does not exist.
Expand Down
7 changes: 4 additions & 3 deletions test/operators/test_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "algebra/RelAlgOp.h"
#include "codegen/Value.h"
#include "codegen/backend_c/BackendC.h"
#include "exec/FuseChunk.h"
#include "exec/PipelineExecutor.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -167,8 +168,8 @@ TEST_P(ExpressionTParametrized, hash) {
auto& ctx = exec.getExecutionContext();
auto& c_in1 = ctx.getColumn(source, 0);

c_in1.size = 1000;
for (uint16_t k = 0; k < 1000; ++k) {
c_in1.size = DEFAULT_CHUNK_SIZE;
for (uint16_t k = 0; k < DEFAULT_CHUNK_SIZE; ++k) {
reinterpret_cast<uint64_t*>(c_in1.raw_data)[k] = k;
}

Expand All @@ -180,7 +181,7 @@ TEST_P(ExpressionTParametrized, hash) {
std::unordered_set<uint64_t> seen;
// This set should have no hash collisions.
auto& hash_col = ctx.getColumn(hash_iu, 0);
for (uint16_t k = 0; k < 1000; ++k) {
for (uint16_t k = 0; k < DEFAULT_CHUNK_SIZE; ++k) {
auto elem = reinterpret_cast<uint64_t*>(hash_col.raw_data)[k];
EXPECT_EQ(seen.count(elem), 0);
seen.insert(elem);
Expand Down
12 changes: 6 additions & 6 deletions test/operators/test_table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "algebra/TableScan.h"
#include "algebra/suboperators/sinks/FuseChunkSink.h"
#include "codegen/backend_c/BackendC.h"
#include "exec/FuseChunk.h"
#include "exec/PipelineExecutor.h"
#include <gtest/gtest.h>

Expand All @@ -15,9 +16,9 @@ TEST(test_table_scan, scan_1) {
StoredRelation rel;
auto& col_1 = rel.attachPODColumn("col_1", IR::UnsignedInt::build(8));
auto& storage = col_1.getStorage();
storage.resize(8 * 1000);
for (uint64_t k = 0; k < 1000; ++k)
{
// two full fuse chunks in the source table
storage.resize(8 * 2 * DEFAULT_CHUNK_SIZE);
for (uint64_t k = 0; k < 2 * DEFAULT_CHUNK_SIZE; ++k) {
reinterpret_cast<uint64_t*>(storage.data())[k] = k;
}

Expand All @@ -40,9 +41,8 @@ TEST(test_table_scan, scan_1) {
EXPECT_NO_THROW(exec.runPipeline());
auto& col = exec.getExecutionContext().getColumn(tscan_iu, 0);

for (uint64_t k = 0; k < 1000; ++k)
{
EXPECT_EQ(reinterpret_cast<uint64_t*>(col.raw_data)[k], k);
for (uint64_t k = 0; k < DEFAULT_CHUNK_SIZE; ++k) {
EXPECT_EQ(reinterpret_cast<uint64_t*>(col.raw_data)[k], DEFAULT_CHUNK_SIZE + k);
}
}

Expand Down
Loading

0 comments on commit cb9a38e

Please sign in to comment.