From 618f695645636fb6d978afe4b4165bbe9a2d37de Mon Sep 17 00:00:00 2001 From: Michal Kuratczyk Date: Mon, 22 Jul 2024 18:17:32 +0200 Subject: [PATCH] Move memory breakdown metrics to new endpoint Collecting them on a large system (tens of thousands of processes or more) can be time consuming as we iterate over all processes. By putting them on a separate endpoint, we make that opt-in --- ...etheus_rabbitmq_core_metrics_collector.erl | 63 ++++++++++--------- .../src/rabbit_prometheus_dispatcher.erl | 3 + .../src/rabbit_prometheus_handler.erl | 1 + .../test/rabbit_prometheus_http_SUITE.erl | 20 ++++-- 4 files changed, 52 insertions(+), 35 deletions(-) diff --git a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl index 3af1df4dfa1a..848e6c764fde 100644 --- a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl +++ b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl @@ -86,35 +86,6 @@ {2, ?MILLISECOND, erlang_uptime_seconds, gauge, "Node uptime", uptime} ]}, - {node_memory, [ - {2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code}, - {2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers}, - {2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers}, - {2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels}, - {2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other}, - {2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs}, - {2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs}, - {2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs}, - {2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs}, - {2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs}, - {2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs}, - {2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins}, - {2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store}, - {2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc}, - {2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics}, - {2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db}, - {2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia}, - {2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets}, - {2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets}, - {2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets}, - {2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary}, - {2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index}, - {2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom}, - {2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system}, - {2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused}, - {2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated} - ]}, - {node_persister_metrics, [ {2, undefined, io_read_ops_total, counter, "Total number of I/O read operations", io_read_count}, {2, undefined, io_read_bytes_total, counter, "Total number of I/O bytes read", io_read_bytes}, @@ -277,6 +248,36 @@ ]} ]). +-define(METRICS_MEMORY_BREAKDOWN, [ + {node_memory, [ + {2, undefined, memory_code_module_bytes, gauge, "Code module memory footprint", code}, + {2, undefined, memory_client_connection_reader_bytes, gauge, "Client connection reader processes footprint in bytes", connection_readers}, + {2, undefined, memory_client_connection_writer_bytes, gauge, "Client connection writer processes footprint in bytes", connection_writers}, + {2, undefined, memory_client_connection_channel_bytes, gauge, "Client connection channel processes footprint in bytes", connection_channels}, + {2, undefined, memory_client_connection_other_bytes, gauge, "Client connection other processes footprint in bytes", connection_other}, + {2, undefined, memory_classic_queue_erlang_process_bytes, gauge, "Classic queue processes footprint in bytes", queue_procs}, + {2, undefined, memory_quorum_queue_erlang_process_bytes, gauge, "Quorum queue processes footprint in bytes", quorum_queue_procs}, + {2, undefined, memory_quorum_queue_dlx_erlang_process_bytes, gauge, "Quorum queue DLX worker processes footprint in bytes", quorum_queue_dlx_procs}, + {2, undefined, memory_stream_erlang_process_bytes, gauge, "Stream processes footprint in bytes", stream_queue_procs}, + {2, undefined, memory_stream_replica_reader_erlang_process_bytes, gauge, "Stream replica reader processes footprint in bytes", stream_queue_replica_reader_procs}, + {2, undefined, memory_stream_coordinator_erlang_process_bytes, gauge, "Stream coordinator processes footprint in bytes", stream_queue_coordinator_procs}, + {2, undefined, memory_plugin_bytes, gauge, "Total plugin footprint in bytes", plugins}, + {2, undefined, memory_modern_metadata_store_bytes, gauge, "Modern metadata store footprint in bytes", metadata_store}, + {2, undefined, memory_other_erlang_process_bytes, gauge, "Other processes footprint in bytes", other_proc}, + {2, undefined, memory_metrics_bytes, gauge, "Metric table footprint in bytes", metrics}, + {2, undefined, memory_management_stats_db_bytes, gauge, "Management stats database footprint in bytes", mgmt_db}, + {2, undefined, memory_classic_metadata_store_bytes, gauge, "Classic metadata store footprint in bytes", mnesia}, + {2, undefined, memory_quorum_queue_ets_table_bytes, gauge, "Quorum queue ETS tables footprint in bytes", quorum_ets}, + {2, undefined, memory_modern_metadata_store_ets_table_bytes, gauge, "Modern metadata store ETS tables footprint in bytes", metadata_store_ets}, + {2, undefined, memory_other_ets_table_bytes, gauge, "Other ETS tables footprint in bytes", other_ets}, + {2, undefined, memory_binary_heap_bytes, gauge, "Binary heap size in bytes", binary}, + {2, undefined, memory_message_index_bytes, gauge, "Message index footprint in bytes", msg_index}, + {2, undefined, memory_atom_table_bytes, gauge, "Atom table size in bytes", atom}, + {2, undefined, memory_other_system_bytes, gauge, "Other runtime footprint in bytes", other_system}, + {2, undefined, memory_runtime_allocated_unused_bytes, gauge, "Runtime allocated but unused blocks size in bytes", allocated_unused}, + {2, undefined, memory_runtime_reserved_unallocated_bytes, gauge, "Runtime reserved but unallocated blocks size in bytes", reserved_unallocated} + ]}]). + -define(TOTALS, [ %% ordering differs from metrics above, refer to list comprehension {connection_created, connections, gauge, "Connections currently open"}, @@ -305,6 +306,10 @@ collect_mf('per-object', Callback) -> totals(Callback), emit_identity_info(Callback), ok; +collect_mf('memory-breakdown', Callback) -> + collect(false, ?METRIC_NAME_PREFIX, false, ?METRICS_MEMORY_BREAKDOWN, Callback), + emit_identity_info(Callback), + ok; collect_mf(_Registry, Callback) -> PerObjectMetrics = application:get_env(rabbitmq_prometheus, return_per_object_metrics, false), collect(PerObjectMetrics, ?METRIC_NAME_PREFIX, false, ?METRICS_RAW, Callback), diff --git a/deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl b/deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl index 5ede00f50f7a..e8b5a1d0de3f 100644 --- a/deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl +++ b/deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl @@ -33,6 +33,9 @@ build_dispatcher() -> prometheus_registry:register_collectors('detailed', [ prometheus_rabbitmq_core_metrics_collector ]), + prometheus_registry:register_collectors('memory-breakdown', [ + prometheus_rabbitmq_core_metrics_collector + ]), rabbit_prometheus_handler:setup(), cowboy_router:compile([{'_', dispatcher()}]). diff --git a/deps/rabbitmq_prometheus/src/rabbit_prometheus_handler.erl b/deps/rabbitmq_prometheus/src/rabbit_prometheus_handler.erl index b5f4076ccab7..ff780d273042 100644 --- a/deps/rabbitmq_prometheus/src/rabbit_prometheus_handler.erl +++ b/deps/rabbitmq_prometheus/src/rabbit_prometheus_handler.erl @@ -46,6 +46,7 @@ is_authorized(ReqData, Context) -> setup() -> setup_metrics(telemetry_registry()), setup_metrics('per-object'), + setup_metrics('memory-breakdown'), setup_metrics('detailed'). setup_metrics(Registry) -> diff --git a/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl b/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl index 8b41466a04eb..1a9c514391be 100644 --- a/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl +++ b/deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl @@ -25,7 +25,8 @@ all() -> {group, commercial}, {group, detailed_metrics}, {group, special_chars}, - {group, authentication} + {group, authentication}, + {group, memory_breakdown_endpoint_metrics} ]. groups() -> @@ -49,6 +50,9 @@ groups() -> endpoint_per_object_metrics, specific_erlang_metrics_present_test ]}, + {memory_breakdown_endpoint_metrics, [], [ + memory_breakdown_metrics_test + ]}, {commercial, [], [ build_info_product_test ]}, @@ -247,7 +251,9 @@ init_per_group(special_chars, Config0) -> init_per_group(authentication, Config) -> Config1 = rabbit_ct_helpers:merge_app_env( Config, {rabbitmq_prometheus, [{authentication, [{enabled, true}]}]}), - init_per_group(authentication, Config1, []). + init_per_group(authentication, Config1, []); +init_per_group(memory_breakdown_endpoint_metrics, Config) -> + init_per_group(memory_breakdown_endpoint_metrics, Config, []). @@ -387,10 +393,6 @@ aggregated_metrics_test(Config) -> ?assertEqual(match, re:run(Body, "^rabbitmq_queue_consumers ", [{capture, none}, multiline])), ?assertEqual(match, re:run(Body, "TYPE rabbitmq_auth_attempts_total", [{capture, none}, multiline])), ?assertEqual(nomatch, re:run(Body, "TYPE rabbitmq_auth_attempts_detailed_total", [{capture, none}, multiline])), - %% Memory breakdown - ?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])), - ?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])), - ?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])), %% Check the first metric value in each ETS table that requires converting ?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])), ?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])), @@ -437,6 +439,12 @@ per_object_metrics_test(Config, Path) -> %% Check the first TOTALS metric value ?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])). +memory_breakdown_metrics_test(Config) -> + {_Headers, Body} = http_get_with_pal(Config, "/metrics/memory-breakdown", [], 200), + ?assertEqual(match, re:run(Body, "^rabbitmq_memory_quorum_queue_erlang_process_bytes ", [{capture, none}, multiline])), + ?assertEqual(match, re:run(Body, "^rabbitmq_memory_classic_queue_erlang_process_bytes ", [{capture, none}, multiline])), + ?assertEqual(match, re:run(Body, "^rabbitmq_memory_binary_heap_bytes ", [{capture, none}, multiline])). + build_info_test(Config) -> {_Headers, Body} = http_get_with_pal(Config, [], 200), ?assertEqual(match, re:run(Body, "^rabbitmq_build_info{", [{capture, none}, multiline])),