26
26
* should export all the necessary building blocks like `LastValueAccumulation`
27
27
* class and `AggregatorKind` enum
28
28
* - we might look for an alternate solution in this case (no proposal for now)
29
+ *
30
+ * Most of this code will be removed when https://github.com/open-telemetry/opentelemetry-js/issues/4616
31
+ * is solved.
29
32
*/
30
33
/**
31
34
* @typedef {import('@opentelemetry/api').HrTime } HrTime
@@ -52,10 +55,12 @@ const {Aggregation, DataPointType, View} = metrics;
52
55
const { HostMetrics} = require ( '@opentelemetry/host-metrics' ) ;
53
56
54
57
/**
55
- * Copied from `@opentelemetry/sdk-metrics` since it's not exported
58
+ * TODO: Copied from `@opentelemetry/sdk-metrics` since it's not exported
56
59
* https://github.com/open-telemetry/opentelemetry-js/blob/f86251d40fbf615be87319c8a1f5643afb820076/packages/sdk-metrics/src/aggregator/LastValue.ts#L34
57
60
*
58
- * @todo remoce this class and require it when exported
61
+ * Remove when https://github.com/open-telemetry/opentelemetry-js/issues/4616 is fixed
62
+ *
63
+ * @todo remove this class when sdk.metrics exports it
59
64
* @class
60
65
* @implements {Accumulation}
61
66
*/
@@ -100,8 +105,9 @@ class LastValueAccumulation {
100
105
* @implements {Aggregator<LastValueAccumulation>}
101
106
*/
102
107
class SystemCpuUtilizationAggregator {
103
- // TODO: hardcoded the value of `AggregatorKind` enum for GAUGE
104
- // remove when exported
108
+ // TODO: Hardcoded the value of `AggregatorKind` enum for GAUGE. Remove
109
+ // when issue below is fixed
110
+ // Issue: https://github.com/open-telemetry/opentelemetry-js/issues/4616
105
111
// https://github.com/open-telemetry/opentelemetry-js/blob/f86251d40fbf615be87319c8a1f5643afb820076/packages/sdk-metrics/src/aggregator/types.ts#L23
106
112
kind = 2 ;
107
113
@@ -158,8 +164,19 @@ class SystemCpuUtilizationAggregator {
158
164
}
159
165
160
166
/**
161
- * Does the sum of data points grouping by `system.cpu.logical_number` so we have the total
162
- * utilization per CPU. Basically the value would be 1 - idle_value
167
+ * Groups data points by `system.cpu.logical_number` so we have the total
168
+ * utilization per CPU.
169
+ *
170
+ * We cannot sum up the utilization of all the states since `os.cpus()` is
171
+ * not returning all of the possible states but limited to: user, nice, sys, idle, irq
172
+ * https://nodejs.org/api/all.html#all_os_oscpus
173
+ *
174
+ * where in linux we have more: user, nice, system, idle, iowait, irq, softirq, steal, guest, guest_nice
175
+ * https://man7.org/linux/man-pages/man5/proc.5.html
176
+ *
177
+ * So in order to have the most accurate metric of utilization we use
178
+ * the formula 1 - (idle utilization)
179
+ *
163
180
* As an example given the data points:
164
181
* - { value: 0.1, attributes: { 'system.cpu.logical_number': 0, 'system.cpu.state': 'idle' } }
165
182
* - { value: 0.5, attributes: { 'system.cpu.logical_number': 0, 'system.cpu.state': 'system' } }
@@ -188,15 +205,6 @@ class SystemCpuUtilizationAggregator {
188
205
accumulationByAttributes ,
189
206
endTime
190
207
) {
191
- // We cannot sum up the utilization of all the states since `os.cpus()` is
192
- // not returning all of the possible states but limited to: user, nice, sys, idle, irq
193
- // https://nodejs.org/api/all.html#all_os_oscpus
194
- //
195
- // where in linux we have more: user, nice, system, idle, iowait, irq, softirq, steal, guest, guest_nice
196
- // https://man7.org/linux/man-pages/man5/proc.5.html
197
- //
198
- // So in order to have the most accurate metric of utilization we use
199
- // the formula 1 - (idle utilization)
200
208
return {
201
209
descriptor,
202
210
aggregationTemporality,
@@ -225,27 +233,35 @@ class SystemCpuUtilizationAggregation extends Aggregation {
225
233
/** @type {HostMetrics } */
226
234
let hostMetricsInstance ;
227
235
function enableHostMetrics ( ) {
228
- // TODO: make this configurable, user might collect host metrics with a separate utility
229
- hostMetricsInstance = new HostMetrics ( {
230
- name : '' ,
231
- } ) ;
236
+ // @ts -ignore - config interface expects a `name` property but there is a default value
237
+ hostMetricsInstance = new HostMetrics ( { } ) ;
232
238
hostMetricsInstance . start ( ) ;
233
239
}
234
240
241
+ // It is known that host metrics sends a lot of data so for now we drop some
242
+ // instruments that are not handled by Kibana and doing aggregations
243
+ // for others that we want to include shorly (CPU metrics)
244
+ // Ref (data amount issue): https://github.com/elastic/elastic-otel-node/issues/51
245
+ // Ref (metrics in Kibana): https://github.com/elastic/kibana/pull/174700
235
246
/** @type {metrics.View[] } */
236
247
const HOST_METRICS_VIEWS = [
237
- // drop `system.network.*` metrics for now
248
+ // drop `system.network.*` (not in Kibana)
238
249
new View ( {
239
250
instrumentName : 'system.network.*' ,
240
251
aggregation : Aggregation . Drop ( ) ,
241
252
} ) ,
242
- // drop `system.cpu.time` also
243
- // TODO: check if we can do an aggregation here
253
+ // drop `system.cpu.time` (not in Kibana)
244
254
new View ( {
245
255
instrumentName : 'system.cpu.time' ,
246
256
aggregation : Aggregation . Drop ( ) ,
247
257
} ) ,
248
- // use the aggregation we craeted above
258
+ // drop `process.*` (not in Kibana)
259
+ new View ( {
260
+ instrumentName : 'process.*' ,
261
+ aggregation : Aggregation . Drop ( ) ,
262
+ } ) ,
263
+ // Do an aggregation to avoid cardinality problems because of the possible
264
+ // permutations of state & logical_number attributes
249
265
new View ( {
250
266
instrumentName : 'system.cpu.utilization' ,
251
267
aggregation : new SystemCpuUtilizationAggregation ( ) ,
0 commit comments