@@ -16,16 +16,18 @@ namespace queryosity {
16
16
17
17
namespace dataset {
18
18
19
+ using entry_t = unsigned long long ;
20
+
19
21
using partition_t = std::vector<part_t >;
20
22
23
+ using slot_t = unsigned int ;
24
+
21
25
namespace partition {
22
26
23
27
partition_t align (std::vector<partition_t > const &partitions);
24
28
25
29
partition_t truncate (partition_t const &parts, long long nentries_max);
26
30
27
- partition_t merge (partition_t const &parts, unsigned int nslots_max);
28
-
29
31
} // namespace partition
30
32
31
33
} // namespace dataset
@@ -34,12 +36,12 @@ partition_t merge(partition_t const &parts, unsigned int nslots_max);
34
36
35
37
inline queryosity::dataset::partition_t queryosity::dataset::partition::align (
36
38
std::vector<partition_t > const &partitions) {
37
- std::map<unsigned long long , unsigned int > edge_counts;
39
+ std::map<entry_t , unsigned int > edge_counts;
38
40
const unsigned int num_vectors = partitions.size ();
39
41
40
42
// Count appearances of each edge
41
43
for (const auto &vec : partitions) {
42
- std::map<unsigned long long , bool >
44
+ std::map<entry_t , bool >
43
45
seen_edges; // Ensure each edge is only counted once per vector
44
46
for (const auto &p : vec) {
45
47
if (seen_edges.find (p.first ) == seen_edges.end ()) {
@@ -54,74 +56,22 @@ inline queryosity::dataset::partition_t queryosity::dataset::partition::align(
54
56
}
55
57
56
58
// Filter edges that appear in all vectors
57
- std::vector<unsigned long long > aligned_edges;
59
+ std::vector<entry_t > aligned_edges;
58
60
for (const auto &pair : edge_counts) {
59
61
if (pair.second == num_vectors) {
60
62
aligned_edges.push_back (pair.first );
61
63
}
62
64
}
63
65
64
66
// Create aligned vector of pairs
65
- std::vector<std::pair<unsigned long long , unsigned long long >> aligned_ranges;
67
+ std::vector<std::pair<entry_t , entry_t >> aligned_ranges;
66
68
for (size_t i = 0 ; i < aligned_edges.size () - 1 ; ++i) {
67
69
aligned_ranges.emplace_back (aligned_edges[i], aligned_edges[i + 1 ]);
68
70
}
69
71
70
72
return aligned_ranges;
71
73
}
72
74
73
- inline queryosity::dataset::partition_t queryosity::dataset::partition::merge (
74
- queryosity::dataset::partition_t const &parts, unsigned int nslots_max) {
75
-
76
- // no merging needed
77
- if (nslots_max >= static_cast <unsigned int >(parts.size ()))
78
- return parts;
79
-
80
- assert (!parts.empty () && nslots_max > 0 );
81
-
82
- partition_t parts_merged;
83
-
84
- const unsigned int total_size = parts.back ().second - parts.front ().first ;
85
- const unsigned int size_per_slot = total_size / nslots_max;
86
- const unsigned int extra_size = total_size % nslots_max;
87
-
88
- unsigned int current_start = parts[0 ].first ;
89
- unsigned int current_end = current_start;
90
- unsigned int accumulated_size = 0 ;
91
- unsigned int nslots_created = 0 ;
92
-
93
- for (const auto &part : parts) {
94
- unsigned int part_size = part.second - part.first ;
95
- // check if another part can be added
96
- if (accumulated_size + part_size >
97
- size_per_slot + (nslots_created < extra_size ? 1 : 0 ) &&
98
- nslots_created < nslots_max - 1 ) {
99
- // add the current range if adding next part will exceed the average size
100
- parts_merged.emplace_back (current_start, current_end);
101
- current_start = current_end;
102
- accumulated_size = 0 ;
103
- ++nslots_created;
104
- }
105
-
106
- // add part size to the current slot
107
- accumulated_size += part_size;
108
- current_end += part_size;
109
-
110
- // handle the last slot differently to include all remaining parts
111
- if (nslots_created == nslots_max - 1 ) {
112
- parts_merged.emplace_back (current_start, parts.back ().second );
113
- break ; // All parts have been processed
114
- }
115
- }
116
-
117
- // ensure we have exactly nslots_max slots
118
- if (static_cast <unsigned int >(parts_merged.size ()) < nslots_max) {
119
- parts_merged.emplace_back (current_start, parts.back ().second );
120
- }
121
-
122
- return parts_merged;
123
- }
124
-
125
75
inline queryosity::dataset::partition_t
126
76
queryosity::dataset::partition::truncate (
127
77
queryosity::dataset::partition_t const &parts, long long nentries_max) {
0 commit comments