Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

basic prototype for prometheus logging #146

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ OFF)
option(USE_JSON_GENERATED_PERF_EVENTS "Add performance events generated using
Intel json spec, see hbt/src/perf_event/json_events/intel"
OFF)
option(USE_PROMETHEUS "Enable logging to prometheus, this requires
prometheus-cpp to be installed on the system"
OFF)

if(USE_PROMETHEUS)
find_package(prometheus-cpp CONFIG REQUIRED)
endif()

file(READ "version.txt" DYNOLOG_VERSION)
string(STRIP ${DYNOLOG_VERSION} DYNOLOG_VERSION)
Expand Down
9 changes: 9 additions & 0 deletions dynolog/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
cmake_minimum_required(VERSION 3.16)
add_definitions(-DDYNOLOG_VERSION=${DYNOLOG_VERSION} -DDYNOLOG_GIT_REV=${DYNOLOG_GIT_REV})

message("Use Prometheus = ${USE_PROMETHEUS}")
message("Use ODS Graph API = ${USE_ODS_GRAPH_API}")

# our build script will first create a src/ dir where all source code will exist
file (GLOB dynolog_src "*.h" "*.cpp")

Expand All @@ -14,6 +17,12 @@ if(USE_ODS_GRAPH_API)
target_compile_options(dynolog_lib PUBLIC "-DUSE_GRAPH_ENDPOINT")
endif()

if(USE_PROMETHEUS)
find_package(prometheus-cpp CONFIG REQUIRED)
add_definitions(-DUSE_PROMETHEUS)
target_link_libraries(dynolog_lib PRIVATE prometheus-cpp::pull)
endif()

target_link_libraries(dynolog_lib PUBLIC Monitor)
target_link_libraries(dynolog_lib PUBLIC BuiltinMetrics)

Expand Down
22 changes: 17 additions & 5 deletions dynolog/src/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,22 @@
#include "dynolog/src/tracing/IPCMonitor.h"
#include "hbt/src/perf_event/BuiltinMetrics.h"

#ifdef USE_PROMETHEUS
#include "dynolog/src/PrometheusLogger.h"
#endif

using namespace dynolog;
using json = nlohmann::json;
namespace hbt = facebook::hbt;

DEFINE_int32(port, 1778, "Port for listening RPC requests : FUTURE");
DEFINE_int32(port, 1778, "Port for listening RPC requests.");
DEFINE_bool(use_JSON, false, "Emit metrics to JSON file through JSON logger");
#ifdef USE_PROMETHEUS
DEFINE_bool(use_prometheus, false, "Emit metrics to Prometheus");
#endif
DEFINE_bool(use_fbrelay, false, "Emit metrics to FB Relay on Lab machines");
DEFINE_bool(use_ODS, false, "Emit metrics to ODS through ODS logger");
DEFINE_bool(use_scuba, false, "Emit metrics to Scuba through Scuba logger");
DEFINE_int32(
kernel_monitor_reporting_interval_s,
60,
Expand All @@ -41,14 +52,10 @@ DEFINE_int32(
dcgm_reporting_interval_s,
10,
"Duration in seconds to read and report metrics for DCGM");
DEFINE_bool(use_fbrelay, false, "Emit metrics to FB Relay on Lab machines");
DEFINE_bool(
enable_ipc_monitor,
false,
"Enabled IPC monitor for on system tracing requests.");
DEFINE_bool(use_ODS, false, "Emit metrics to ODS through ODS logger");
DEFINE_bool(use_JSON, false, "Emit metrics to JSON file through JSON logger");
DEFINE_bool(use_scuba, false, "Emit metrics to Scuba through Scuba logger");
DEFINE_bool(
enable_gpu_monitor,
false,
Expand All @@ -57,6 +64,11 @@ DEFINE_bool(enable_perf_monitor, false, "Enable heartbeat perf monitoring.");

std::unique_ptr<Logger> getLogger(const std::string& scribe_category = "") {
std::vector<std::unique_ptr<Logger>> loggers;
#ifdef USE_PROMETHEUS
if (FLAGS_use_prometheus) {
loggers.push_back(std::make_unique<PrometheusLogger>());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Have the Prometheus server port/ip binding parameterized

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes added that :)

}
#endif
if (FLAGS_use_fbrelay) {
loggers.push_back(std::make_unique<FBRelayLogger>());
}
Expand Down
22 changes: 22 additions & 0 deletions dynolog/src/Metrics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

#include "dynolog/src/Metrics.h"

namespace dynolog {

const std::vector<MetricDesc>& getAllMetrics() {
static std::vector<MetricDesc> metrics_ = {
{.name = "cpu_util",
.type = MetricType::Ratio,
.desc = "Fraction of total CPU time spend on user or system mode."},
{.name = "uptime",
.type = MetricType::Instant,
.desc = "How long the system has been running in seconds."},
};
return metrics_;
}

} // namespace dynolog
28 changes: 28 additions & 0 deletions dynolog/src/Metrics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are Metrics specific to Prometheus? If so, should the naming reflect that?

//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <string>
#include <vector>

namespace dynolog {

enum class MetricType {
Delta,
Instant,
Ratio,
Rate,
};

struct MetricDesc {
std::string name;
MetricType type;
std::string desc;
};

const std::vector<MetricDesc>& getAllMetrics();

} // namespace dynolog
82 changes: 82 additions & 0 deletions dynolog/src/PrometheusLogger.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

#include "dynolog/src/PrometheusLogger.h"
#include "dynolog/src/Metrics.h"

#include <fmt/format.h>
#include <glog/logging.h>

#ifdef USE_PROMETHEUS
using namespace prometheus;

DEFINE_int32(
prometheus_port,
8080,
"Port to setup HTTP server for Prometheus to scrape.");

namespace dynolog {

inline auto& buildGaugeFromMetric(const MetricDesc& m, Registry& registry) {
return BuildGauge().Name(m.name).Help(m.desc).Register(registry);
}

PrometheusManager::PrometheusManager()
: exposer_(fmt::format("127.0.0.1:{}", FLAGS_prometheus_port)) {
LOG(INFO) << "Initialized prometheus HTTP server on port = "
<< FLAGS_prometheus_port;

// setup registry
registry_ = std::make_shared<Registry>();

// setup counters and gauges
for (const auto& m : getAllMetrics()) {
// all metric types fit with Gauges so far.
auto& g = buildGaugeFromMetric(m, *registry_).Add({{"host_name", "test"}});
gauges_[m.name] = &g;
}

// setup registry
exposer_.RegisterCollectable(registry_);
}

void PrometheusManager::log(const std::string& key, double val) {
auto it = gauges_.find(key);
if (it == gauges_.end()) {
return;
}
auto g = it->second;
if (!g) {
return;
}
g->Set(val);
}

static std::shared_ptr<PrometheusManager> singleton_() {
static std::shared_ptr<PrometheusManager> manager_ =
std::make_shared<PrometheusManager>();
return manager_;
}

// static
PrometheusManager::LoggingGuard PrometheusManager::singleton() {
auto s = singleton_();
return LoggingGuard{.manager = s, .lock_guard = s->lock()};
}

void PrometheusLogger::logImpl(const std::string& key, double val) {
kvs_[key] = val;
}

void PrometheusLogger::finalize() {
auto logging_guard = PrometheusManager::singleton();
auto prom = logging_guard.manager;
for (const auto& [key, val] : kvs_) {
prom->log(key, val);
}
}

} // namespace dynolog
#endif // USE_PROMETHEUS
92 changes: 92 additions & 0 deletions dynolog/src/PrometheusLogger.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include "dynolog/src/Logger.h"

#ifdef USE_PROMETHEUS
#include <prometheus/counter.h>
#include <prometheus/exposer.h>
#include <prometheus/gauge.h>
#include <prometheus/registry.h>
#endif

#include <memory>
#include <mutex>
#include <unordered_map>

#ifdef USE_PROMETHEUS

DECLARE_int32(prometheus_port);

namespace dynolog {

class PrometheusManager {
public:
struct LoggingGuard {
std::shared_ptr<PrometheusManager> manager;
std::lock_guard<std::mutex> lock_guard;
};

PrometheusManager();

// Note that this method is not thread-safe and so
// should only be used with the LoggingGuard in scope
void log(const std::string& key, double val);

static LoggingGuard singleton();

private:
std::lock_guard<std::mutex> lock() {
return std::lock_guard{mutex_};
}

std::mutex mutex_;
prometheus::Exposer exposer_;
std::shared_ptr<prometheus::Registry> registry_;

// only store a reference to Gauge because copying is not allowed
std::unordered_map<std::string, prometheus::Gauge*> gauges_;

// Should match googletest/include/gtest/gtest_prod.h
// friend class test_case_name##_##test_name##_Test
friend class PrometheusLoggerTest_ExporterTest_Test;
};

class PrometheusLogger : public Logger {
public:
// Timestamp is set during logging part.
void setTimestamp(Timestamp /*ts*/) override {}

void logInt(const std::string& key, int64_t val) override {
logImpl(key, static_cast<double>(val));
}

void logFloat(const std::string& key, float val) override {
logImpl(key, static_cast<double>(val));
}

void logUint(const std::string& key, uint64_t val) override {
logImpl(key, static_cast<double>(val));
}

// not supported
void logStr(const std::string& /*key*/, const std::string& /*val*/) override {
}

void finalize() override;

private:
void logImpl(const std::string& key, double val);

std::unordered_map<std::string, double> kvs_;

friend class PrometheusLoggerTest_BasicTest_Test;
friend class PrometheusLoggerTest_ExporterTest_Test;
};

} // namespace dynolog
#endif // USE_PROMETHEUS
4 changes: 4 additions & 0 deletions dynolog/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
include(${PROJECT_SOURCE_DIR}/testing/BuildTests.cmake)

dynolog_add_test(KernelCollecterTest KernelCollecterTest.cpp)
if(USE_PROMETHEUS)
add_definitions(-DUSE_PROMETHEUS)
dynolog_add_test(PrometheusLoggerTest PrometheusLoggerTest.cpp)
endif()

add_subdirectory(rpc)
add_subdirectory(tracing)
Expand Down
56 changes: 56 additions & 0 deletions dynolog/tests/PrometheusLoggerTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.

#include "dynolog/src/PrometheusLogger.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "dynolog/src/Metrics.h"

namespace dynolog {

TEST(PrometheusLoggerTest, BasicTest) {
// check that basic logger class is collecting values
// from various log functions.

PrometheusLogger logger;

logger.logInt("uptime", 10000);
logger.logFloat("pi", 3.1457);
logger.logUint("cpu_util", 25);

auto& kvs = logger.kvs_;
EXPECT_FLOAT_EQ(kvs["uptime"], 10000);
EXPECT_FLOAT_EQ(kvs["pi"], 3.1457);
EXPECT_FLOAT_EQ(kvs["cpu_util"], 25);

// DO NOT RUN finalize() on logger to avoid sending data to prometheus
// exporter.
}

TEST(PrometheusLoggerTest, ExporterTest) {
/* Allow Prometheus exporter to use any available port*/
FLAGS_prometheus_port = 0;

float i = 0, j = 0;
{
PrometheusLogger logger;
for (const auto& m : getAllMetrics()) {
logger.logFloat(m.name, i++);
}
logger.finalize();

auto logging_guard = PrometheusManager::singleton();
auto prom = logging_guard.manager;
for (const auto& m : getAllMetrics()) {
EXPECT_FLOAT_EQ(prom->gauges_[m.name]->Value(), j)
<< "Metric " << m.name << " did not match expected value";
j++;
}
}
ASSERT_GT(i, 0) << "No metrics were logged!";
ASSERT_EQ(i, j);
}

} // namespace dynolog
Loading