Skip to content

Commit

Permalink
Move memory breakdown metrics to new endpoint
Browse files Browse the repository at this point in the history
Collecting them on a large system (tens of thousands of processes
or more) can be time consuming as we iterate over all processes.
By putting them on a separate endpoint, we make that opt-in
  • Loading branch information
mkuratczyk committed Jul 23, 2024
1 parent e094a9b commit 618f695
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,35 +86,6 @@
{2, ?MILLISECOND, erlang_uptime_seconds, gauge, "Node uptime", uptime}
]},

{node_memory, [
{2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code},
{2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers},
{2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers},
{2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels},
{2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other},
{2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs},
{2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs},
{2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs},
{2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs},
{2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs},
{2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs},
{2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins},
{2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store},
{2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc},
{2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics},
{2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db},
{2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia},
{2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets},
{2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets},
{2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets},
{2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary},
{2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index},
{2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom},
{2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system},
{2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused},
{2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated}
]},

{node_persister_metrics, [
{2, undefined, io_read_ops_total, counter, "Total number of I/O read operations", io_read_count},
{2, undefined, io_read_bytes_total, counter, "Total number of I/O bytes read", io_read_bytes},
Expand Down Expand Up @@ -277,6 +248,36 @@
]}
]).

-define(METRICS_MEMORY_BREAKDOWN, [
{node_memory, [
{2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code},
{2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers},
{2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers},
{2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels},
{2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other},
{2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs},
{2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs},
{2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs},
{2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs},
{2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs},
{2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs},
{2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins},
{2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store},
{2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc},
{2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics},
{2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db},
{2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia},
{2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets},
{2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets},
{2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets},
{2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary},
{2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index},
{2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom},
{2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system},
{2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused},
{2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated}
]}]).

-define(TOTALS, [
%% ordering differs from metrics above, refer to list comprehension
{connection_created, connections, gauge, "Connections currently open"},
Expand Down Expand Up @@ -305,6 +306,10 @@ collect_mf('per-object', Callback) ->
totals(Callback),
emit_identity_info(Callback),
ok;
collect_mf('memory-breakdown', Callback) ->
collect(false, ?METRIC_NAME_PREFIX, false, ?METRICS_MEMORY_BREAKDOWN, Callback),
emit_identity_info(Callback),
ok;
collect_mf(_Registry, Callback) ->
PerObjectMetrics = application:get_env(rabbitmq_prometheus, return_per_object_metrics, false),
collect(PerObjectMetrics, ?METRIC_NAME_PREFIX, false, ?METRICS_RAW, Callback),
Expand Down
3 changes: 3 additions & 0 deletions deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ build_dispatcher() ->
prometheus_registry:register_collectors('detailed', [
prometheus_rabbitmq_core_metrics_collector
]),
prometheus_registry:register_collectors('memory-breakdown', [
prometheus_rabbitmq_core_metrics_collector
]),
rabbit_prometheus_handler:setup(),
cowboy_router:compile([{'_', dispatcher()}]).

Expand Down
1 change: 1 addition & 0 deletions deps/rabbitmq_prometheus/src/rabbit_prometheus_handler.erl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ is_authorized(ReqData, Context) ->
setup() ->
setup_metrics(telemetry_registry()),
setup_metrics('per-object'),
setup_metrics('memory-breakdown'),
setup_metrics('detailed').

setup_metrics(Registry) ->
Expand Down
20 changes: 14 additions & 6 deletions deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ all() ->
{group, commercial},
{group, detailed_metrics},
{group, special_chars},
{group, authentication}
{group, authentication},
{group, memory_breakdown_endpoint_metrics}
].

groups() ->
Expand All @@ -49,6 +50,9 @@ groups() ->
endpoint_per_object_metrics,
specific_erlang_metrics_present_test
]},
{memory_breakdown_endpoint_metrics, [], [
memory_breakdown_metrics_test
]},
{commercial, [], [
build_info_product_test
]},
Expand Down Expand Up @@ -247,7 +251,9 @@ init_per_group(special_chars, Config0) ->
init_per_group(authentication, Config) ->
Config1 = rabbit_ct_helpers:merge_app_env(
Config, {rabbitmq_prometheus, [{authentication, [{enabled, true}]}]}),
init_per_group(authentication, Config1, []).
init_per_group(authentication, Config1, []);
init_per_group(memory_breakdown_endpoint_metrics, Config) ->
init_per_group(memory_breakdown_endpoint_metrics, Config, []).



Expand Down Expand Up @@ -387,10 +393,6 @@ aggregated_metrics_test(Config) ->
?assertEqual(match, re:run(Body, "^rabbitmq_queue_consumers ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "TYPE rabbitmq_auth_attempts_total", [{capture, none}, multiline])),
?assertEqual(nomatch, re:run(Body, "TYPE rabbitmq_auth_attempts_detailed_total", [{capture, none}, multiline])),
%% Memory breakdown
?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])),
%% Check the first metric value in each ETS table that requires converting
?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
Expand Down Expand Up @@ -437,6 +439,12 @@ per_object_metrics_test(Config, Path) ->
%% Check the first TOTALS metric value
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).

memory_breakdown_metrics_test(Config) ->
{_Headers, Body} = http_get_with_pal(Config, "/metrics/memory-breakdown", [], 200),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])),
?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])).

build_info_test(Config) ->
{_Headers, Body} = http_get_with_pal(Config, [], 200),
?assertEqual(match, re:run(Body, "^rabbitmq_build_info{", [{capture, none}, multiline])),
Expand Down

0 comments on commit 618f695

Please sign in to comment.