diff --git a/scripts/config_hdfs.sh b/scripts/config_hdfs.sh index 0ad1260c61..de8da9fbf2 100755 --- a/scripts/config_hdfs.sh +++ b/scripts/config_hdfs.sh @@ -58,6 +58,11 @@ if [ ! -d "$HADOOP_HOME/etc/hadoop" ] || [ ! -d "$HADOOP_HOME/share/hadoop" ]; t echo "HADOOP_HOME must be set to the location of your Hadoop jars and core-site.xml." return 1 fi + +# Reduces the use of operating system signals by the JVM. See details: +# https://docs.oracle.com/javase/8/docs/technotes/tools/unix/java.html +export LIBHDFS_OPTS="-Xrs" + export CLASSPATH=$CLASSPATH:$HADOOP_HOME/etc/hadoop/ for f in $HADOOP_HOME/share/hadoop/common/lib/*.jar; do export CLASSPATH=$CLASSPATH:$f diff --git a/src/block_service/hdfs/hdfs_service.cpp b/src/block_service/hdfs/hdfs_service.cpp index b8e9dee4ee..e303710496 100644 --- a/src/block_service/hdfs/hdfs_service.cpp +++ b/src/block_service/hdfs/hdfs_service.cpp @@ -18,10 +18,13 @@ #include #include #include +#include #include #include +#include #include +#include "fmt/core.h" #include "hdfs/hdfs.h" #include "hdfs_service.h" #include "rocksdb/slice.h" @@ -69,28 +72,57 @@ namespace block_service { DEFINE_TASK_CODE(LPC_HDFS_SERVICE_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_BLOCK_SERVICE) +namespace { +static const char *LIBHDFS_OPTS = "LIBHDFS_OPTS"; +static const char *REDUCE_SIGNAL_OPT = "-Xrs"; + +bool check_LIBHDFS_OPTS() +{ + char *str = ::getenv(LIBHDFS_OPTS); + if (str == nullptr) { + return false; + } + + std::unordered_set opts; + utils::split_args(str, opts, ' '); + return opts.count(REDUCE_SIGNAL_OPT) > 0; +} + +void adjust_LIBHDFS_OPTS() +{ + if (!check_LIBHDFS_OPTS()) { + // Reduces the use of operating system signals by the JVM. See details: + // https://docs.oracle.com/javase/8/docs/technotes/tools/unix/java.html + std::string new_env; + char *old_env = ::getenv(LIBHDFS_OPTS); + if (old_env == nullptr) { + new_env = REDUCE_SIGNAL_OPT; + } else { + new_env = fmt::format("{} {}", old_env, REDUCE_SIGNAL_OPT); + } + ::setenv(LIBHDFS_OPTS, new_env.c_str(), 1); + LOG_WARNING("update '{}' to '{}'", LIBHDFS_OPTS, new_env); + } + CHECK_TRUE(check_LIBHDFS_OPTS()); +} +} // anonymous namespace + hdfs_service::hdfs_service() { _read_token_bucket.reset(new folly::DynamicTokenBucket()); _write_token_bucket.reset(new folly::DynamicTokenBucket()); } -hdfs_service::~hdfs_service() -{ - // We should not call hdfsDisconnect() here if jvm has exited. - // And there is no simple, safe way to call hdfsDisconnect() - // when process terminates (the proper solution is likely to create a - // signal handler to detect when the process is killed, but we would still - // leak when pegasus crashes). - // - // close(); -} +hdfs_service::~hdfs_service() { close(); } error_code hdfs_service::initialize(const std::vector &args) { if (args.size() < 1) { return ERR_INVALID_PARAMETERS; } + + adjust_LIBHDFS_OPTS(); + // Name_node and root_path should be set in args of block_service configuration. // If no path was configured, just use "/" as default root path. _hdfs_name_node = args[0];