From dcf4bd649e9f104671bace3c87322388829777a7 Mon Sep 17 00:00:00 2001 From: Weibin Zeng Date: Fri, 1 Dec 2023 13:30:21 +0800 Subject: [PATCH] [BugFix][C++] Finalize S3 in FileSystem destructor (#289) --- .github/workflows/ci.yml | 1 + cpp/CMakeLists.txt | 1 + cpp/include/gar/util/filesystem.h | 2 +- cpp/src/filesystem.cc | 8 ++++++++ cpp/test/test_info.cc | 3 --- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e3868edb..8df8e1e2a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,6 +107,7 @@ jobs: - name: Test run: | cd build + export ASAN_OPTIONS=detect_leaks=0 export GAR_TEST_DATA=$PWD/../testing/ make test diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0157e785f..fe2bbe1f8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -169,6 +169,7 @@ if (NOT Arrow_FOUND OR NOT ArrowDataset_FOUND OR NOT ArrowAcero_FOUND OR NOT Par message(STATUS "Arrow is not installed, will build from source.") set(BUILD_ARROW_FROM_SOURCE ON) else() + message(STATUS "-- Found Arrow: ${Arrow_VERSION}") set(BUILD_ARROW_FROM_SOURCE OFF) endif() diff --git a/cpp/include/gar/util/filesystem.h b/cpp/include/gar/util/filesystem.h index 6893745de..7d61ed435 100644 --- a/cpp/include/gar/util/filesystem.h +++ b/cpp/include/gar/util/filesystem.h @@ -59,7 +59,7 @@ class FileSystem { explicit FileSystem(std::shared_ptr arrow_fs) : arrow_fs_(arrow_fs) {} - ~FileSystem() = default; + ~FileSystem(); /** * @brief Read and filter a file as an arrow::Table. diff --git a/cpp/src/filesystem.cc b/cpp/src/filesystem.cc index 15d29415d..be314f6ff 100644 --- a/cpp/src/filesystem.cc +++ b/cpp/src/filesystem.cc @@ -18,6 +18,7 @@ limitations under the License. #include "arrow/csv/api.h" #include "arrow/dataset/api.h" #include "arrow/filesystem/api.h" +#include "arrow/filesystem/s3fs.h" #include "arrow/ipc/writer.h" #include "parquet/arrow/writer.h" @@ -267,6 +268,13 @@ Result FileSystem::GetFileNumOfDir(const std::string& dir_path, return static_cast(file_infos.size()); } +FileSystem::~FileSystem() { +#if defined(ARROW_VERSION) && ARROW_VERSION >= 12000000 + // Finalize the S3 client if it is initialized, otherwise it would mutex error + arrow::fs::FinalizeS3(); +#endif +} + Result> FileSystemFromUriOrPath( const std::string& uri_string, std::string* out_path) { if (uri_string.length() >= 1 && uri_string[0] == '/') { diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc index 12546a21f..1b8b1f91b 100644 --- a/cpp/test/test_info.cc +++ b/cpp/test/test_info.cc @@ -367,8 +367,6 @@ TEST_CASE("test_graph_info_load_from_file") { REQUIRE(edge_infos.size() == 1); } -// ISSUE-187 -#if defined(ARROW_VERSION) && ARROW_VERSION < 12000000 TEST_CASE("test_graph_info_load_from_s3") { std::string path = "s3://graphar/ldbc/ldbc.graph.yml" @@ -382,5 +380,4 @@ TEST_CASE("test_graph_info_load_from_s3") { REQUIRE(vertex_infos.size() == 8); REQUIRE(edge_infos.size() == 23); } -#endif } // namespace GAR_NAMESPACE