Skip to content

Commit d05707b

Browse files
committed
fix(native): Fix OS metrics to report cumulative values for AVG type
All 6 OS-related metrics were defined as **AVG** type but reported as **delta values**, causing incorrect averaging and potential data loss in Prometheus monitoring. Changed metrics to report **cumulative values** since process start: - presto_cpp.os_user_cpu_time_micros - presto_cpp.os_system_cpu_time_micros - presto_cpp.os_num_soft_page_faults - presto_cpp.os_num_hard_page_faults - presto_cpp.os_num_voluntary_context_switches - presto_cpp.os_num_forced_context_switches This ensures: 1. Alignment with other AVG metrics in the system (task counts, cache sizes, etc.) 2. Proper rate calculations in monitoring systems and no data loss regardless of scraping intervals
1 parent 12cd04c commit d05707b

File tree

2 files changed

+6
-26
lines changed

2 files changed

+6
-26
lines changed

presto-native-execution/presto_cpp/main/PeriodicTaskManager.cpp

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -441,38 +441,26 @@ void PeriodicTaskManager::updateOperatingSystemStats() {
441441
const int64_t userCpuTimeUs{
442442
static_cast<int64_t>(usage.ru_utime.tv_sec) * 1'000'000 +
443443
static_cast<int64_t>(usage.ru_utime.tv_usec)};
444-
RECORD_METRIC_VALUE(
445-
kCounterOsUserCpuTimeMicros, userCpuTimeUs - lastUserCpuTimeUs_);
446-
lastUserCpuTimeUs_ = userCpuTimeUs;
444+
RECORD_METRIC_VALUE(kCounterOsUserCpuTimeMicros, userCpuTimeUs);
447445

448446
const int64_t systemCpuTimeUs{
449447
static_cast<int64_t>(usage.ru_stime.tv_sec) * 1'000'000 +
450448
static_cast<int64_t>(usage.ru_stime.tv_usec)};
451-
RECORD_METRIC_VALUE(
452-
kCounterOsSystemCpuTimeMicros, systemCpuTimeUs - lastSystemCpuTimeUs_);
453-
lastSystemCpuTimeUs_ = systemCpuTimeUs;
449+
RECORD_METRIC_VALUE(kCounterOsSystemCpuTimeMicros, systemCpuTimeUs);
454450

455451
const int64_t softPageFaults{usage.ru_minflt};
456-
RECORD_METRIC_VALUE(
457-
kCounterOsNumSoftPageFaults, softPageFaults - lastSoftPageFaults_);
458-
lastSoftPageFaults_ = softPageFaults;
452+
RECORD_METRIC_VALUE(kCounterOsNumSoftPageFaults, softPageFaults);
459453

460454
const int64_t hardPageFaults{usage.ru_majflt};
461-
RECORD_METRIC_VALUE(
462-
kCounterOsNumHardPageFaults, hardPageFaults - lastHardPageFaults_);
463-
lastHardPageFaults_ = hardPageFaults;
455+
RECORD_METRIC_VALUE(kCounterOsNumHardPageFaults, hardPageFaults);
464456

465457
const int64_t voluntaryContextSwitches{usage.ru_nvcsw};
466458
RECORD_METRIC_VALUE(
467-
kCounterOsNumVoluntaryContextSwitches,
468-
voluntaryContextSwitches - lastVoluntaryContextSwitches_);
469-
lastVoluntaryContextSwitches_ = voluntaryContextSwitches;
459+
kCounterOsNumVoluntaryContextSwitches, voluntaryContextSwitches);
470460

471461
const int64_t forcedContextSwitches{usage.ru_nivcsw};
472462
RECORD_METRIC_VALUE(
473-
kCounterOsNumForcedContextSwitches,
474-
forcedContextSwitches - lastForcedContextSwitches_);
475-
lastForcedContextSwitches_ = forcedContextSwitches;
463+
kCounterOsNumForcedContextSwitches, forcedContextSwitches);
476464
}
477465

478466
void PeriodicTaskManager::addOperatingSystemStatsUpdateTask() {

presto-native-execution/presto_cpp/main/PeriodicTaskManager.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,6 @@ class PeriodicTaskManager {
142142
std::shared_ptr<velox::connector::Connector>>& connectors_;
143143
PrestoServer* server_;
144144

145-
// Operating system related stats.
146-
int64_t lastUserCpuTimeUs_{0};
147-
int64_t lastSystemCpuTimeUs_{0};
148-
int64_t lastSoftPageFaults_{0};
149-
int64_t lastHardPageFaults_{0};
150-
int64_t lastVoluntaryContextSwitches_{0};
151-
int64_t lastForcedContextSwitches_{0};
152-
153145
int64_t lastHttpClientNumConnectionsCreated_{0};
154146

155147
// NOTE: declare last since the threads access other members of `this`.

0 commit comments

Comments
 (0)