Skip to content

Commit 996bf1b

Browse files
committed
feat(C++): filter property and return VerticesCollection
1 parent b86304a commit 996bf1b

File tree

3 files changed

+174
-3
lines changed

3 files changed

+174
-3
lines changed

cpp/examples/label_filtering_example.cc renamed to cpp/examples/filtering_example.cc

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,63 @@ void vertices_collection(
8080
std::cout << property << " ";
8181
std::cout << std::endl;
8282
}
83-
}
83+
std::cout << std::endl;
84+
85+
std::cout << "Test vertices with property in a filtered vertices set"
86+
<< std::endl;
87+
std::cout << "--------------------------------------" << std::endl;
88+
auto filter = graphar::_Equal(graphar::_Property("name"),
89+
graphar::_Literal("Safi_Airways"));
90+
auto maybe_filter_vertices_collection_4 =
91+
graphar::VerticesCollection::verticesWithProperty(
92+
std::string("name"), filter, graph_info, type);
93+
ASSERT(!maybe_filter_vertices_collection_4.has_error());
94+
auto filter_vertices_4 = maybe_filter_vertices_collection_4.value();
95+
std::cout << "valid vertices num: " << filter_vertices_4->size() << std::endl;
8496

97+
for (auto it = filter_vertices_4->begin(); it != filter_vertices_4->end();
98+
++it) {
99+
// get a node's all labels
100+
auto label_result = it.label();
101+
std::cout << "id: " << it.id() << " ";
102+
if (!label_result.has_error()) {
103+
for (auto label : label_result.value()) {
104+
std::cout << label << " ";
105+
}
106+
}
107+
std::cout << "name: ";
108+
auto property = it.property<std::string>("name").value();
109+
std::cout << property << " ";
110+
std::cout << std::endl;
111+
}
112+
113+
std::cout << "Test vertices with property" << std::endl;
114+
std::cout << "--------------------------------------" << std::endl;
115+
auto filter_2 =
116+
graphar::_Equal(graphar::_Property("name"), graphar::_Literal("Kam_Air"));
117+
auto maybe_filter_vertices_collection_5 =
118+
graphar::VerticesCollection::verticesWithProperty(
119+
std::string("name"), filter_2, filter_vertices_3);
120+
ASSERT(!maybe_filter_vertices_collection_5.has_error());
121+
auto filter_vertices_5 = maybe_filter_vertices_collection_5.value();
122+
std::cout << "valid vertices num: " << filter_vertices_5->size() << std::endl;
123+
124+
for (auto it = filter_vertices_5->begin(); it != filter_vertices_5->end();
125+
++it) {
126+
// get a node's all labels
127+
auto label_result = it.label();
128+
std::cout << "id: " << it.id() << " ";
129+
if (!label_result.has_error()) {
130+
for (auto label : label_result.value()) {
131+
std::cout << label << " ";
132+
}
133+
}
134+
std::cout << "name: ";
135+
auto property = it.property<std::string>("name").value();
136+
std::cout << property << " ";
137+
std::cout << std::endl;
138+
}
139+
}
85140
int main(int argc, char* argv[]) {
86141
// read file and construct graph info
87142
std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";

cpp/src/graphar/high-level/graph_reader.cc

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717
* under the License.
1818
*/
1919

20+
#include "graphar/high-level/graph_reader.h"
2021
#include <algorithm>
2122
#include <unordered_set>
22-
2323
#include "arrow/array.h"
2424
#include "graphar/api/arrow_reader.h"
2525
#include "graphar/convert_to_arrow_type.h"
26-
#include "graphar/high-level/graph_reader.h"
2726
#include "graphar/label.h"
2827
#include "graphar/types.h"
2928

@@ -264,6 +263,69 @@ Result<std::vector<IdType>> VerticesCollection::filter_by_acero(
264263
return indices64;
265264
}
266265

266+
Result<std::vector<IdType>> VerticesCollection::filter(
267+
std::string property_name, std::shared_ptr<Expression> filter_expression,
268+
std::vector<IdType>* new_valid_chunk) {
269+
std::vector<int> indices;
270+
const int TOT_ROWS_NUM = vertex_num_;
271+
const int CHUNK_SIZE = vertex_info_->GetChunkSize();
272+
int total_count = 0;
273+
auto property_group = vertex_info_->GetPropertyGroup(property_name);
274+
auto maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
275+
vertex_info_, property_group, prefix_, {});
276+
auto filter_reader = maybe_filter_reader.value();
277+
filter_reader->Filter(filter_expression);
278+
std::vector<int64_t> indices64;
279+
if (is_filtered_) {
280+
for (int chunk_idx : valid_chunk_) {
281+
// how to itetate valid_chunk_?
282+
filter_reader->seek(chunk_idx * CHUNK_SIZE);
283+
auto filter_result = filter_reader->GetChunk();
284+
auto filter_table = filter_result.value();
285+
int count = filter_table->num_rows();
286+
if (count != 0 && new_valid_chunk != nullptr) {
287+
new_valid_chunk->emplace_back(static_cast<IdType>(chunk_idx));
288+
// TODO(elssky): record indices
289+
int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
290+
GeneralParams::kVertexIndexCol);
291+
auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
292+
auto int64_array =
293+
std::static_pointer_cast<arrow::Int64Array>(column_array);
294+
for (int64_t i = 0; i < int64_array->length(); ++i) {
295+
if (!int64_array->IsNull(i)) {
296+
indices64.push_back(int64_array->Value(i));
297+
}
298+
}
299+
}
300+
}
301+
} else {
302+
for (int chunk_idx = 0; chunk_idx * CHUNK_SIZE < TOT_ROWS_NUM;
303+
++chunk_idx) {
304+
auto filter_result = filter_reader->GetChunk();
305+
auto filter_table = filter_result.value();
306+
int count = filter_table->num_rows();
307+
filter_reader->next_chunk();
308+
total_count += count;
309+
if (count != 0) {
310+
valid_chunk_.emplace_back(static_cast<IdType>(chunk_idx));
311+
// TODO(elssky): record indices
312+
int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
313+
GeneralParams::kVertexIndexCol);
314+
auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
315+
auto int64_array =
316+
std::static_pointer_cast<arrow::Int64Array>(column_array);
317+
for (int64_t i = 0; i < int64_array->length(); ++i) {
318+
if (!int64_array->IsNull(i)) {
319+
indices64.push_back(int64_array->Value(i));
320+
}
321+
}
322+
}
323+
}
324+
}
325+
// std::cout << "Total valid count: " << total_count << std::endl;
326+
return indices64;
327+
}
328+
267329
Result<std::shared_ptr<VerticesCollection>>
268330
VerticesCollection::verticesWithLabel(
269331
const std::string& filter_label,
@@ -384,6 +446,48 @@ VerticesCollection::verticesWithMultipleLabels(
384446
return new_vertices_collection;
385447
}
386448

449+
Result<std::shared_ptr<VerticesCollection>>
450+
VerticesCollection::verticesWithProperty(
451+
const std::string property_name, const graphar::util::Filter filter,
452+
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type) {
453+
auto prefix = graph_info->GetPrefix();
454+
auto vertex_info = graph_info->GetVertexInfo(type);
455+
auto vertices_collection =
456+
std::make_shared<VerticesCollection>(vertex_info, prefix);
457+
vertices_collection->filtered_ids_ =
458+
vertices_collection->filter(property_name, filter).value();
459+
vertices_collection->is_filtered_ = true;
460+
return vertices_collection;
461+
}
462+
463+
Result<std::shared_ptr<VerticesCollection>>
464+
VerticesCollection::verticesWithProperty(
465+
const std::string property_name, const graphar::util::Filter filter,
466+
const std::shared_ptr<VerticesCollection>& vertices_collection) {
467+
auto new_vertices_collection = std::make_shared<VerticesCollection>(
468+
vertices_collection->vertex_info_, vertices_collection->prefix_);
469+
auto filtered_ids = vertices_collection
470+
->filter(property_name, filter,
471+
&new_vertices_collection->valid_chunk_)
472+
.value();
473+
if (vertices_collection->is_filtered_) {
474+
std::unordered_set<IdType> origin_set(
475+
vertices_collection->filtered_ids_.begin(),
476+
vertices_collection->filtered_ids_.end());
477+
std::unordered_set<int> intersection;
478+
for (int num : filtered_ids) {
479+
if (origin_set.count(num)) {
480+
intersection.insert(num);
481+
}
482+
}
483+
filtered_ids =
484+
std::vector<IdType>(intersection.begin(), intersection.end());
485+
new_vertices_collection->is_filtered_ = true;
486+
}
487+
new_vertices_collection->filtered_ids_ = filtered_ids;
488+
return new_vertices_collection;
489+
}
490+
387491
template <typename T>
388492
Result<T> Vertex::property(const std::string& property) const {
389493
if constexpr (std::is_final<T>::value) {

cpp/src/graphar/high-level/graph_reader.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,10 @@ class VerticesCollection {
382382
Result<std::vector<IdType>> filter_by_acero(
383383
std::vector<std::string> filter_labels) const;
384384

385+
Result<std::vector<IdType>> filter(
386+
std::string property_name, std::shared_ptr<Expression> filter_expression,
387+
std::vector<IdType>* new_valid_chunk = nullptr);
388+
385389
/**
386390
* @brief Query vertices with a specific label
387391
*
@@ -431,6 +435,14 @@ class VerticesCollection {
431435
const std::vector<std::string>& filter_labels,
432436
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
433437

438+
static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
439+
const std::string property_name, const graphar::util::Filter filter,
440+
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
441+
442+
static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
443+
const std::string property_name, const graphar::util::Filter filter,
444+
const std::shared_ptr<VerticesCollection>& vertices_collection);
445+
434446
/**
435447
* @brief Query vertices with multiple labels within a given collection
436448
*

0 commit comments

Comments
 (0)