From ab766d976eebf2c4685157be037e2a1dba784516 Mon Sep 17 00:00:00 2001 From: macpie Date: Mon, 31 Jul 2023 14:27:47 -0700 Subject: [PATCH] Remove useless metrics (#983) * Remove useless metrics * Remove unused functions --- include/metrics.hrl | 42 +-- src/cli/router_cli_organization.erl | 1 - src/decoders/router_decoder.erl | 11 +- src/device/router_device_channels_worker.erl | 7 - src/device/router_device_routing.erl | 50 +--- src/device/router_device_worker.erl | 5 +- src/grpc/helium_packet_service.erl | 6 +- src/grpc/helium_router_service.erl | 5 +- src/metrics/router_metrics.erl | 253 ++----------------- src/router_sc_worker.erl | 2 - src/router_xor_filter_worker.erl | 3 - test/router_lorawan_handler_test.erl | 3 +- test/router_metrics_SUITE.erl | 41 +-- test/router_test_ics_route_service.erl | 3 +- 14 files changed, 35 insertions(+), 397 deletions(-) diff --git a/include/metrics.hrl b/include/metrics.hrl index 266c80514..8dd311452 100644 --- a/include/metrics.hrl +++ b/include/metrics.hrl @@ -1,61 +1,23 @@ -define(METRICS_TICK_INTERVAL, timer:seconds(10)). -define(METRICS_TICK, '__router_metrics_tick'). --define(METRICS_DC, "router_dc_balance"). --define(METRICS_SC_OPENED_COUNT, "router_state_channel_opened_count"). --define(METRICS_SC_OVERSPENT_COUNT, "router_state_channel_overspent_count"). --define(METRICS_SC_ACTIVE_COUNT, "router_state_channel_active_count"). --define(METRICS_SC_ACTIVE_BALANCE, "router_state_channel_active_balance"). --define(METRICS_SC_ACTIVE_ACTORS, "router_state_channel_active_actors"). --define(METRICS_SC_CLOSE_CONFLICT, "router_state_channel_close_conflicts"). --define(METRICS_ROUTING_OFFER, "router_device_routing_offer_duration"). -define(METRICS_ROUTING_PACKET, "router_device_routing_packet_duration"). --define(METRICS_PACKET_TRIP, "router_device_packet_trip_duration"). --define(METRICS_PACKET_HOLD_TIME, "router_device_packet_hold_time_duration"). --define(METRICS_PACKET_ERROR, "router_device_packet_error_count"). --define(METRICS_DECODED_TIME, "router_decoder_decoded_duration"). --define(METRICS_FUN_DURATION, "router_function_duration"). --define(METRICS_CONSOLE_API_TIME, "router_console_api_duration"). --define(METRICS_DOWNLINK, "router_device_downlink_packet"). +-define(METRICS_CONSOLE_API, "router_console_api_duration"). -define(METRICS_WS, "router_ws_state"). --define(METRICS_CHAIN_BLOCKS, "router_blockchain_blocks"). -define(METRICS_VM_CPU, "router_vm_cpu"). -define(METRICS_VM_PROC_Q, "router_vm_process_queue"). -define(METRICS_VM_ETS_MEMORY, "router_vm_ets_memory"). --define(METRICS_XOR_FILTER, "router_xor_filter"). --define(METRICS_GRPC_CONNECTION_COUNT, "router_grpc_connection_count"). --define(METRICS_SC_CLOSE_SUBMIT, "router_sc_close_submit_count"). -define(METRICS_DEVICE_TOTAL, "router_device_total_gauge"). -define(METRICS_DEVICE_RUNNING, "router_device_running_gauge"). -define(METRICS, [ - {?METRICS_DC, prometheus_gauge, [], "Active State Channel balance"}, - {?METRICS_SC_OPENED_COUNT, prometheus_gauge, [], "Opened State Channels count"}, - {?METRICS_SC_OVERSPENT_COUNT, prometheus_gauge, [], "Overspent State Channels count"}, - {?METRICS_SC_ACTIVE_COUNT, prometheus_gauge, [], "Active State Channels count"}, - {?METRICS_SC_ACTIVE_BALANCE, prometheus_gauge, [], "Active State Channels balance"}, - {?METRICS_SC_ACTIVE_ACTORS, prometheus_gauge, [], "Active State Channels actors"}, - {?METRICS_SC_CLOSE_CONFLICT, prometheus_gauge, [], "State Channels close with conflicts"}, - {?METRICS_ROUTING_OFFER, prometheus_histogram, [type, status, reason], - "Routing Offer duration"}, {?METRICS_ROUTING_PACKET, prometheus_histogram, [type, status, reason, downlink], "Routing Packet duration"}, - {?METRICS_PACKET_TRIP, prometheus_histogram, [type, downlink], "Packet round trip duration"}, - {?METRICS_PACKET_HOLD_TIME, prometheus_histogram, [type], "Packet hold time duration"}, - {?METRICS_PACKET_ERROR, prometheus_counter, [type, error], "Packet errors in routing"}, - {?METRICS_DECODED_TIME, prometheus_histogram, [type, status], "Decoder decoded duration"}, - {?METRICS_FUN_DURATION, prometheus_histogram, [function], "Function duration"}, - {?METRICS_CONSOLE_API_TIME, prometheus_histogram, [type, status], "Console API duration"}, - {?METRICS_DOWNLINK, prometheus_counter, [type, status], "Downlink count"}, + {?METRICS_CONSOLE_API, prometheus_histogram, [type, status], "Console API duration"}, {?METRICS_WS, prometheus_boolean, [], "Websocket State"}, - {?METRICS_CHAIN_BLOCKS, prometheus_gauge, [], "Router's blockchain blocks"}, {?METRICS_VM_CPU, prometheus_gauge, [cpu], "Router CPU usage"}, {?METRICS_VM_PROC_Q, prometheus_gauge, [name], "Router process queue"}, {?METRICS_VM_ETS_MEMORY, prometheus_gauge, [name], "Router ets memory"}, - {?METRICS_XOR_FILTER, prometheus_counter, [], "Router XOR Filter udpates"}, - {?METRICS_GRPC_CONNECTION_COUNT, prometheus_gauge, [], "Number of active GRPC Connections"}, - {?METRICS_SC_CLOSE_SUBMIT, prometheus_counter, [status], - "Router state channels close txn status"}, {?METRICS_DEVICE_TOTAL, prometheus_gauge, [], "Device total gauge"}, {?METRICS_DEVICE_RUNNING, prometheus_gauge, [], "Device running gauge"} ]). diff --git a/src/cli/router_cli_organization.erl b/src/cli/router_cli_organization.erl index 6502030c7..206de7241 100644 --- a/src/cli/router_cli_organization.erl +++ b/src/cli/router_cli_organization.erl @@ -280,7 +280,6 @@ org_unfunded_cmd() -> ] ]. - reset_unfunded(_, _, _) -> Before = router_console_dc_tracker:list_unfunded(), ok = router_console_dc_tracker:reset_unfunded_from_api(), diff --git a/src/decoders/router_decoder.erl b/src/decoders/router_decoder.erl index c54ac8566..ba5ed73c5 100644 --- a/src/decoders/router_decoder.erl +++ b/src/decoders/router_decoder.erl @@ -81,20 +81,13 @@ delete(ID) -> UplinkDetails :: map() ) -> {ok, any()} | {error, any()}. decode(DecoderID, Payload, Port, UplinkDetails) -> - Start = erlang:system_time(millisecond), try decode_(DecoderID, Payload, Port, UplinkDetails) of - {Type, {ok, _} = OK} -> - End = erlang:system_time(millisecond), - ok = router_metrics:decoder_observe(Type, ok, End - Start), + {_Type, {ok, _} = OK} -> OK; - {Type, {error, _} = Err} -> - End = erlang:system_time(millisecond), - ok = router_metrics:decoder_observe(Type, error, End - Start), + {_Type, {error, _} = Err} -> Err catch _Class:_Reason:_Stacktrace -> - End = erlang:system_time(millisecond), - ok = router_metrics:decoder_observe(decoder_crashed, error, End - Start), lager:error("decoder ~p crashed: ~p (~p) stacktrace ~p", [ DecoderID, _Reason, diff --git a/src/device/router_device_channels_worker.erl b/src/device/router_device_channels_worker.erl index 27ef7797f..8aee0c8b3 100644 --- a/src/device/router_device_channels_worker.erl +++ b/src/device/router_device_channels_worker.erl @@ -105,10 +105,8 @@ frame_timeout(Pid, UUID, BalanceNonce) -> -spec handle_console_downlink(binary(), map(), router_channel:channel(), first | last) -> ok. handle_console_downlink(DeviceID, MapPayload, Channel, Position) -> - {ChannelHandler, _} = router_channel:handler(Channel), case router_devices_sup:maybe_start_worker(DeviceID, #{}) of {error, _Reason} -> - ok = router_metrics:downlink_inc(ChannelHandler, error), Desc = io_lib:format("Failed to queue downlink (worker failed): ~p", [_Reason]), ok = maybe_report_downlink_dropped(DeviceID, Desc, Channel), lager:info("failed to start/find device ~p: ~p", [DeviceID, _Reason]); @@ -118,7 +116,6 @@ handle_console_downlink(DeviceID, MapPayload, Channel, Position) -> lager:info("clearing device queue because downlink payload from console"), router_device_worker:clear_queue(Pid); {ok, {Confirmed, Port, Region, Payload}} -> - ok = router_metrics:downlink_inc(ChannelHandler, ok), router_device_worker:queue_downlink( Pid, #downlink{ @@ -135,7 +132,6 @@ handle_console_downlink(DeviceID, MapPayload, Channel, Position) -> _Reason ]), ok = maybe_report_downlink_dropped(DeviceID, Desc, Channel), - ok = router_metrics:downlink_inc(ChannelHandler, error), lager:debug("could not parse json downlink message ~p for ~p", [ _Reason, DeviceID @@ -264,13 +260,11 @@ handle_cast( {handle_downlink, BinaryPayload, Channel}, #state{device_worker = DeviceWorker} = State ) -> - {ChannelHandler, _} = router_channel:handler(Channel), case downlink_decode(BinaryPayload) of {ok, clear_queue} -> lager:info("clearing device queue because downlink payload"), router_device_worker:clear_queue(DeviceWorker); {ok, {Confirmed, Port, Region, Payload}} -> - ok = router_metrics:downlink_inc(ChannelHandler, ok), ok = router_device_worker:queue_downlink(DeviceWorker, #downlink{ confirmed = Confirmed, port = Port, @@ -279,7 +273,6 @@ handle_cast( region = Region }); {error, _Reason} -> - ok = router_metrics:downlink_inc(ChannelHandler, error), lager:debug("could not parse json downlink message ~p", [_Reason]) end, {noreply, State}; diff --git a/src/device/router_device_routing.erl b/src/device/router_device_routing.erl index 9bf84e4bb..d2a91d59a 100644 --- a/src/device/router_device_routing.erl +++ b/src/device/router_device_routing.erl @@ -125,11 +125,9 @@ handle_offer(Offer, HandlerPid) -> false -> {error, deprecated}; true -> - Start = erlang:system_time(millisecond), Routing = blockchain_state_channel_offer_v1:routing(Offer), - {OfferCheckTime, OfferCheck} = timer:tc(fun offer_check/1, [Offer]), Resp = - case OfferCheck of + case offer_check(Offer) of {error, _} = Error0 -> Error0; ok -> @@ -140,18 +138,8 @@ handle_offer(Offer, HandlerPid) -> packet_offer(Offer) end end, - End = erlang:system_time(millisecond), erlang:spawn(fun() -> - ok = router_metrics:function_observe( - 'router_device_routing:offer_check', OfferCheckTime - ), - ok = router_metrics:packet_trip_observe_start( - blockchain_state_channel_offer_v1:packet_hash(Offer), - blockchain_state_channel_offer_v1:hotspot(Offer), - Start - ), - ok = print_handle_offer_resp(Offer, HandlerPid, Resp), - ok = handle_offer_metrics(Routing, Resp, End - Start) + ok = print_handle_offer_resp(Offer, HandlerPid, Resp) end), case Resp of {ok, Device} -> @@ -906,7 +894,6 @@ packet( end end; {error, api_not_found} -> - router_metrics:packet_routing_error(join, api_not_found), lager:debug( [{app_eui, AppEUI}, {dev_eui, DevEUI}], "no key for ~p ~p received by ~s", @@ -918,7 +905,6 @@ packet( ), {error, undefined_app_key}; {error, _Reason} -> - router_metrics:packet_routing_error(join, bad_mic), lager:debug( [{app_eui, AppEUI}, {dev_eui, DevEUI}], "Device ~s with AppEUI ~s tried to join through ~s " ++ @@ -1050,7 +1036,6 @@ send_to_device_worker( undefined -> case find_device(PubKeyBin, DevAddr, MIC, Payload) of {error, unknown_device} -> - router_metrics:packet_routing_error(packet, device_not_found), lager:warning( "unable to find device for packet [devaddr: ~p / ~p] [gateway: ~p]", [ @@ -1219,10 +1204,8 @@ get_device_for_offer(Offer, DevAddr, PubKeyBin) -> PubKeyBin :: libp2p_crypto:pubkey_bin() ) -> [router_device:device()]. get_and_sort_devices(DevAddr, PubKeyBin) -> - {Time1, Devices0} = timer:tc(router_device_cache, get_by_devaddr, [DevAddr]), - router_metrics:function_observe('router_device_cache:get_by_devaddr', Time1), - Devices1 = router_device_devaddr:sort_devices(Devices0, PubKeyBin), - Devices1. + Devices0 = router_device_cache:get_by_devaddr(DevAddr), + router_device_devaddr:sort_devices(Devices0, PubKeyBin). -spec get_device_by_mic(binary(), binary(), [router_device:device()]) -> {router_device:device(), binary(), non_neg_integer()} | undefined. @@ -1384,26 +1367,6 @@ maybe_start_worker(DeviceID) -> WorkerID = router_devices_sup:id(DeviceID), router_devices_sup:maybe_start_worker(WorkerID, #{}). --spec handle_offer_metrics( - #routing_information_pb{}, - {ok, router_device:device()} | {error, any()}, - non_neg_integer() -) -> ok. -handle_offer_metrics(#routing_information_pb{data = {eui, _}}, {ok, _}, Time) -> - ok = router_metrics:routing_offer_observe(join, accepted, accepted, Time); -handle_offer_metrics(#routing_information_pb{data = {eui, _}}, {error, Reason}, Time) -> - ok = router_metrics:routing_offer_observe(join, rejected, Reason, Time); -handle_offer_metrics(#routing_information_pb{data = {devaddr, _}}, {ok, _}, Time) -> - ok = router_metrics:routing_offer_observe(packet, accepted, accepted, Time); -handle_offer_metrics( - #routing_information_pb{data = {devaddr, _}}, - {error, ?DEVADDR_NOT_IN_SUBNET}, - Time -) -> - ok = router_metrics:routing_offer_observe(packet, rejected, ?DEVADDR_NOT_IN_SUBNET, Time); -handle_offer_metrics(#routing_information_pb{data = {devaddr, _}}, {error, Reason}, Time) -> - ok = router_metrics:routing_offer_observe(packet, rejected, Reason, Time). - -spec reason_to_single_atom(any()) -> any(). reason_to_single_atom(Reason) -> case Reason of @@ -1502,9 +1465,6 @@ handle_join_offer_test() -> meck:expect(blockchain_worker, blockchain, fun() -> chain end), meck:new(router_console_dc_tracker, [passthrough]), meck:expect(router_console_dc_tracker, has_enough_dc, fun(_, _) -> {ok, orgid, 0, 1} end), - meck:new(router_metrics, [passthrough]), - meck:expect(router_metrics, routing_offer_observe, fun(_, _, _, _) -> ok end), - meck:expect(router_metrics, function_observe, fun(_, _) -> ok end), meck:new(router_devices_sup, [passthrough]), meck:expect(router_devices_sup, maybe_start_worker, fun(_, _) -> {ok, self()} end), @@ -1527,8 +1487,6 @@ handle_join_offer_test() -> meck:unload(blockchain_worker), ?assert(meck:validate(router_console_dc_tracker)), meck:unload(router_console_dc_tracker), - ?assert(meck:validate(router_metrics)), - meck:unload(router_metrics), ?assert(meck:validate(router_devices_sup)), meck:unload(router_devices_sup), ets:delete(?BF_ETS), diff --git a/src/device/router_device_worker.erl b/src/device/router_device_worker.erl index 9a5539f34..62891361f 100644 --- a/src/device/router_device_worker.erl +++ b/src/device/router_device_worker.erl @@ -566,7 +566,6 @@ handle_cast( ) -> PHash = blockchain_helium_packet_v1:packet_hash(Packet0), lager:debug("got join packet (~p) ~p", [PHash, lager:pr(Packet0, blockchain_helium_packet_v1)]), - ok = router_metrics:packet_hold_time_observe(join, HoldTime), %% TODO we should really just call this once per join nonce %% and have a seperate function for getting the join nonce so we can check %% the cache @@ -698,7 +697,7 @@ handle_cast( end end; handle_cast( - {frame, _NwkSKey, PacketFCnt, Packet, PacketTime, HoldTime, PubKeyBin, _Region, _Pid}, + {frame, _NwkSKey, PacketFCnt, Packet, PacketTime, _HoldTime, PubKeyBin, _Region, _Pid}, #state{ device = Device, db = DB, @@ -706,7 +705,6 @@ handle_cast( is_active = false } = State ) -> - ok = router_metrics:packet_hold_time_observe(packet, HoldTime), PHash = blockchain_helium_packet_v1:packet_hash(Packet), ok = router_device_multibuy:max(PHash, 0), ok = router_utils:event_uplink_dropped_device_inactive( @@ -735,7 +733,6 @@ handle_cast( cf = CF } = State ) -> - ok = router_metrics:packet_hold_time_observe(packet, HoldTime), MetricPacketType = case Disco of true -> discovery_packet; diff --git a/src/grpc/helium_packet_service.erl b/src/grpc/helium_packet_service.erl index deba1743c..9f46b4f3c 100644 --- a/src/grpc/helium_packet_service.erl +++ b/src/grpc/helium_packet_service.erl @@ -31,12 +31,10 @@ route(#envelope_up_v1_pb{data = {packet, PacketUp}}, StreamState) -> Self = self(), erlang:spawn(fun() -> SCPacket = to_sc_packet(PacketUp), - {Time, _} = timer:tc(router_device_routing, handle_free_packet, [ + router_device_routing:handle_free_packet( SCPacket, erlang:system_time(millisecond), Self - ]), - router_metrics:function_observe('router_device_routing:handle_free_packet', Time) + ) end), - {ok, StreamState} end; route(_EnvUp, StreamState) -> diff --git a/src/grpc/helium_router_service.erl b/src/grpc/helium_router_service.erl index 03cbf7bf4..1e1d0548e 100644 --- a/src/grpc/helium_router_service.erl +++ b/src/grpc/helium_router_service.erl @@ -28,10 +28,9 @@ route(Ctx, #blockchain_state_channel_message_v1_pb{msg = {packet, SCPacket}} = _ true -> %% handle the packet and then await a response %% if no response within given time, then give up and return error - {Time, _} = timer:tc(router_device_routing, handle_free_packet, [ + router_device_routing:handle_free_packet( SCPacket, erlang:system_time(millisecond), self() - ]), - router_metrics:function_observe('router_device_routing:handle_free_packet', Time), + ), wait_for_response(Ctx) end. diff --git a/src/metrics/router_metrics.erl b/src/metrics/router_metrics.erl index c45e17a24..50a939089 100644 --- a/src/metrics/router_metrics.erl +++ b/src/metrics/router_metrics.erl @@ -9,20 +9,11 @@ %% ------------------------------------------------------------------ -export([ start_link/1, - routing_offer_observe/4, routing_packet_observe/4, routing_packet_observe_start/3, - packet_trip_observe_start/3, packet_trip_observe_end/5, packet_trip_observe_end/6, - packet_hold_time_observe/2, - packet_routing_error/2, - decoder_observe/3, - function_observe/2, console_api_observe/3, - downlink_inc/2, - ws_state/1, - xor_filter_update/1, - sc_close_submit_inc/1 + ws_state/1 ]). %% ------------------------------------------------------------------ @@ -40,10 +31,7 @@ -define(SERVER, ?MODULE). -record(state, { - chain = undefined :: undefined | blockchain:blockchain(), - pubkey_bin :: libp2p_crypto:pubkey_bin(), - routing_packet_duration :: map(), - packet_duration :: map() + packets :: map() }). %% ------------------------------------------------------------------ @@ -53,30 +41,20 @@ start_link(Args) -> gen_server:start_link({local, ?SERVER}, ?SERVER, Args, []). --spec routing_offer_observe(join | packet, accepted | rejected, any(), non_neg_integer()) -> ok. -routing_offer_observe(Type, Status, Reason, Time) when - (Type == join orelse Type == packet) andalso - (Status == accepted orelse Status == rejected) --> - _ = prometheus_histogram:observe(?METRICS_ROUTING_OFFER, [Type, Status, Reason], Time), - ok. - -spec routing_packet_observe(join | packet, any(), rejected, non_neg_integer()) -> ok. routing_packet_observe(Type, Status, Reason, Time) when (Type == join orelse Type == packet) andalso Status == rejected -> - _ = prometheus_histogram:observe(?METRICS_ROUTING_PACKET, [Type, Status, Reason, false], Time), + _ = prometheus_histogram:observe( + ?METRICS_ROUTING_PACKET, [Type, Status, Reason, false], Time + ), ok. -spec routing_packet_observe_start(binary(), binary(), non_neg_integer()) -> ok. routing_packet_observe_start(PacketHash, PubKeyBin, Time) -> gen_server:cast(?MODULE, {routing_packet_observe_start, PacketHash, PubKeyBin, Time}). --spec packet_trip_observe_start(binary(), binary(), non_neg_integer()) -> ok. -packet_trip_observe_start(PacketHash, PubKeyBin, Time) -> - gen_server:cast(?MODULE, {packet_trip_observe_start, PacketHash, PubKeyBin, Time}). - -spec packet_trip_observe_end(binary(), binary(), non_neg_integer(), atom(), boolean()) -> ok. packet_trip_observe_end(PacketHash, PubKeyBin, Time, Type, Downlink) -> packet_trip_observe_end(PacketHash, PubKeyBin, Time, Type, Downlink, false). @@ -97,37 +75,9 @@ packet_trip_observe_end(PacketHash, PubKeyBin, Time, Type, Downlink, false) -> {packet_trip_observe_end, PacketHash, PubKeyBin, Time, Type, Downlink} ). --spec packet_hold_time_observe(Type :: join | packet, HoldTime :: non_neg_integer()) -> ok. -packet_hold_time_observe(Type, HoldTime) when Type == join orelse Type == packet -> - _ = prometheus_histogram:observe(?METRICS_PACKET_HOLD_TIME, [Type], HoldTime), - ok. - --spec packet_routing_error( - Type :: join | packet, - Error :: device_not_found | api_not_found | bad_mic -) -> ok. -packet_routing_error(Type, Error) -> - _ = prometheus_counter:inc(?METRICS_PACKET_ERROR, [Type, Error]), - ok. - --spec decoder_observe(atom(), ok | error, non_neg_integer()) -> ok. -decoder_observe(Type, Status, Time) when Status == ok orelse Status == error -> - _ = prometheus_histogram:observe(?METRICS_DECODED_TIME, [Type, Status], Time), - ok. - --spec function_observe(atom(), non_neg_integer()) -> ok. -function_observe(Fun, Time) -> - _ = prometheus_histogram:observe(?METRICS_FUN_DURATION, [Fun], Time), - ok. - -spec console_api_observe(atom(), atom(), non_neg_integer()) -> ok. console_api_observe(Type, Status, Time) -> - _ = prometheus_histogram:observe(?METRICS_CONSOLE_API_TIME, [Type, Status], Time), - ok. - --spec downlink_inc(atom(), ok | error) -> ok. -downlink_inc(Type, Status) -> - _ = prometheus_counter:inc(?METRICS_DOWNLINK, [Type, Status]), + _ = prometheus_histogram:observe(?METRICS_CONSOLE_API, [Type, Status], Time), ok. -spec ws_state(boolean()) -> ok. @@ -135,16 +85,6 @@ ws_state(State) -> _ = prometheus_boolean:set(?METRICS_WS, State), ok. --spec xor_filter_update(DC :: non_neg_integer()) -> ok. -xor_filter_update(DC) -> - _ = prometheus_counter:inc(?METRICS_XOR_FILTER, DC), - ok. - --spec sc_close_submit_inc(ok | error) -> ok. -sc_close_submit_inc(Status) -> - _ = prometheus_counter:inc(?METRICS_SC_CLOSE_SUBMIT, [Status]), - ok. - %% ------------------------------------------------------------------ %% gen_server Function Definitions %% ------------------------------------------------------------------ @@ -158,18 +98,9 @@ init(Args) -> {port, router_utils:get_env_int(metrics_port, 3000)} ], {ok, _Pid} = elli:start_link(ElliOpts), - {PubKey, _, _} = router_blockchain:get_key(), - PubkeyBin = libp2p_crypto:pubkey_to_bin(PubKey), - case router_blockchain:is_chain_dead() of - false -> - _ = erlang:send_after(500, self(), post_init); - true -> - schedule_next_tick() - end, + _ = schedule_next_tick(), {ok, #state{ - pubkey_bin = PubkeyBin, - routing_packet_duration = #{}, - packet_duration = #{} + packets = #{} }}. handle_call(_Msg, _From, State) -> @@ -178,34 +109,17 @@ handle_call(_Msg, _From, State) -> handle_cast( {routing_packet_observe_start, PacketHash, PubKeyBin, Start}, - #state{routing_packet_duration = RPD} = State -) -> - {noreply, State#state{routing_packet_duration = maps:put({PacketHash, PubKeyBin}, Start, RPD)}}; -handle_cast( - {packet_trip_observe_start, PacketHash, PubKeyBin, Start}, - #state{packet_duration = PD} = State + #state{packets = RPD} = State ) -> - {noreply, State#state{packet_duration = maps:put({PacketHash, PubKeyBin}, Start, PD)}}; + {noreply, State#state{packets = maps:put({PacketHash, PubKeyBin}, Start, RPD)}}; handle_cast( {packet_trip_observe_end, PacketHash, PubKeyBin, End, Type, Downlink}, - #state{routing_packet_duration = RPD, packet_duration = PD} = State0 + #state{packets = RPD} = State0 ) -> State1 = - case maps:get({PacketHash, PubKeyBin}, PD, undefined) of - undefined -> - State0; - Start0 -> - _ = prometheus_histogram:observe( - ?METRICS_PACKET_TRIP, - [Type, Downlink], - End - Start0 - ), - State0#state{packet_duration = maps:remove({PacketHash, PubKeyBin}, PD)} - end, - State2 = case maps:get({PacketHash, PubKeyBin}, RPD, undefined) of undefined -> - State1; + State0; Start1 -> _ = prometheus_histogram:observe( ?METRICS_ROUTING_PACKET, @@ -217,61 +131,25 @@ handle_cast( ], End - Start1 ), - State1#state{routing_packet_duration = maps:remove({PacketHash, PubKeyBin}, RPD)} + State0#state{packets = maps:remove({PacketHash, PubKeyBin}, RPD)} end, - {noreply, State2}; + {noreply, State1}; handle_cast(_Msg, State) -> lager:warning("rcvd unknown cast msg: ~p", [_Msg]), {noreply, State}. -handle_info(post_init, #state{chain = undefined} = State) -> - case router_blockchain:privileged_maybe_get_blockchain() of - undefined -> - erlang:send_after(500, self(), post_init), - {noreply, State}; - Chain -> - _ = schedule_next_tick(), - ok = blockchain_event:add_handler(self()), - {noreply, State#state{chain = Chain}} - end; -handle_info({blockchain_event, {new_chain, Chain}}, State) -> - {noreply, State#state{chain = Chain}}; -handle_info( - {blockchain_event, {add_block, _BlockHash, _Syncing, _Ledger}}, - #state{chain = undefined} = State -) -> - lager:info("got block ~p with not chain", [_BlockHash]), - erlang:send_after(500, self(), post_init), - {noreply, State}; -handle_info( - {blockchain_event, {add_block, BlockHash, _Syncing, _Ledger}}, - #state{chain = Chain, pubkey_bin = PubkeyBin} = State -) -> - _ = erlang:spawn(fun() -> ok = record_sc_close_conflict(Chain, BlockHash, PubkeyBin) end), - {noreply, State}; handle_info( ?METRICS_TICK, #state{ - pubkey_bin = PubkeyBin, - routing_packet_duration = RPD, - packet_duration = PD + packets = RPD } = State ) -> lager:info("running metrics"), erlang:spawn_opt( fun() -> - case router_blockchain:is_chain_dead() of - false -> - ok = record_dc_balance(PubkeyBin), - ok = record_state_channels(), - ok = record_chain_blocks(); - true -> - ok - end, ok = record_vm_stats(), ok = record_ets(), ok = record_queues(), - ok = record_grpc_connections(), ok = record_devices() end, [ @@ -281,8 +159,7 @@ handle_info( ), _ = schedule_next_tick(), {noreply, State#state{ - routing_packet_duration = cleanup_pd(RPD), - packet_duration = cleanup_pd(PD) + packets = cleanup_pd(RPD) }}; handle_info(_Msg, State) -> lager:warning("rcvd unknown info msg: ~p, ~p", [_Msg, State]), @@ -334,87 +211,6 @@ cleanup_pd(PD) -> End = erlang:system_time(millisecond), maps:filter(fun(_K, Start) -> End - Start < timer:seconds(10) end, PD). --spec record_sc_close_conflict( - Chain :: blockchain:blockchain(), - BlockHash :: binary(), - PubkeyBin :: libp2p_crypto:pubkey_bin() -) -> ok. -record_sc_close_conflict(Chain, BlockHash, PubkeyBin) -> - case blockchain:get_block(BlockHash, Chain) of - {error, _Reason} -> - lager:error("failed to get block:~p ~p", [BlockHash, _Reason]); - {ok, Block} -> - Txns = lists:filter( - fun(Txn) -> - case blockchain_txn:type(Txn) of - blockchain_txn_state_channel_close_v1 -> - SC = blockchain_txn_state_channel_close_v1:state_channel(Txn), - blockchain_state_channel_v1:owner(SC) == PubkeyBin andalso - blockchain_txn_state_channel_close_v1:conflicts_with(Txn) =/= - undefined; - _ -> - false - end - end, - blockchain_block:transactions(Block) - ), - _ = prometheus_gauge:set(?METRICS_SC_CLOSE_CONFLICT, erlang:length(Txns)), - ok - end. - --spec record_dc_balance(PubkeyBin :: libp2p_crypto:pubkey_bin()) -> ok. -record_dc_balance(PubkeyBin) -> - case router_blockchain:find_dc_entry(PubkeyBin) of - {error, _} -> - ok; - {ok, Entry} -> - Balance = blockchain_ledger_data_credits_entry_v1:balance(Entry), - _ = prometheus_gauge:set(?METRICS_DC, Balance), - ok - end. - --spec record_state_channels() -> ok. -record_state_channels() -> - {OpenedCount, OverspentCount, _GettingCloseCount} = router_sc_worker:counts(), - _ = prometheus_gauge:set(?METRICS_SC_OPENED_COUNT, OpenedCount), - _ = prometheus_gauge:set(?METRICS_SC_OVERSPENT_COUNT, OverspentCount), - - ActiveSCs = maps:values(blockchain_state_channels_server:get_actives()), - ActiveCount = erlang:length(ActiveSCs), - _ = prometheus_gauge:set(?METRICS_SC_ACTIVE_COUNT, ActiveCount), - - {TotalDCLeft, TotalActors} = lists:foldl( - fun({ActiveSC, _, _}, {DCs, Actors}) -> - Summaries = blockchain_state_channel_v1:summaries(ActiveSC), - TotalDC = blockchain_state_channel_v1:total_dcs(ActiveSC), - DCLeft = blockchain_state_channel_v1:amount(ActiveSC) - TotalDC, - %% If SC ran out of DC we should not be counted towards active metrics - case DCLeft of - 0 -> - {DCs, Actors}; - _ -> - {DCs + DCLeft, Actors + erlang:length(Summaries)} - end - end, - {0, 0}, - ActiveSCs - ), - _ = prometheus_gauge:set(?METRICS_SC_ACTIVE_BALANCE, TotalDCLeft), - _ = prometheus_gauge:set(?METRICS_SC_ACTIVE_ACTORS, TotalActors), - ok. - --spec record_chain_blocks() -> ok. -record_chain_blocks() -> - case router_blockchain:head_block() of - {error, _} -> - ok; - {ok, Block} -> - Now = erlang:system_time(seconds), - Time = blockchain_block:time(Block), - _ = prometheus_gauge:set(?METRICS_CHAIN_BLOCKS, Now - Time), - ok - end. - -spec record_vm_stats() -> ok. record_vm_stats() -> [{_Mem, CPU}] = recon:node_stats_list(1, 1), @@ -446,23 +242,6 @@ record_ets() -> ), ok. --spec record_grpc_connections() -> ok. -record_grpc_connections() -> - Opts = application:get_env(grpcbox, listen_opts, #{}), - PoolName = grpcbox_services_sup:pool_name(Opts), - try - Counts = acceptor_pool:count_children(PoolName), - proplists:get_value(active, Counts) - of - Count -> - _ = prometheus_gauge:set(?METRICS_GRPC_CONNECTION_COUNT, Count) - catch - _:_ -> - lager:warning("no grpcbox acceptor named ~p", [PoolName]), - _ = prometheus_gauge:set(?METRICS_GRPC_CONNECTION_COUNT, 0) - end, - ok. - -spec record_queues() -> ok. record_queues() -> CurrentQs = lists:foldl( diff --git a/src/router_sc_worker.erl b/src/router_sc_worker.erl index 6fa077a5a..c6a29ed22 100644 --- a/src/router_sc_worker.erl +++ b/src/router_sc_worker.erl @@ -129,10 +129,8 @@ counts() -> blockchain_txn_state_channel_close_v1:txn_state_channel_close() ) -> ok. sc_hook_close_submit(ok, _SignedTxn) -> - ok = router_metrics:sc_close_submit_inc(ok), lager:info("txn accepted"); sc_hook_close_submit(Error, SignedTxn) -> - ok = router_metrics:sc_close_submit_inc(error), lager:error("failed to submit txn ~p", [Error]), lager:error("~p", [SignedTxn]). diff --git a/src/router_xor_filter_worker.erl b/src/router_xor_filter_worker.erl index 93440d8c5..2732e602d 100644 --- a/src/router_xor_filter_worker.erl +++ b/src/router_xor_filter_worker.erl @@ -1113,9 +1113,6 @@ craft_update_filter_txn(PubKey, SignFun, OUI, Filter, Nonce, Index) -> -spec submit_txn(Txn :: blockchain_txn_routing_v1:txn_routing()) -> blockchain_txn:hash(). submit_txn(Txn) -> - Cost = router_blockchain:calculate_routing_txn_fee(Txn), - ok = router_metrics:xor_filter_update(Cost), - Hash = blockchain_txn_routing_v1:hash(Txn), Self = self(), Callback = fun(Return) -> Self ! {?SUBMIT_RESULT, Hash, Return} end, diff --git a/test/router_lorawan_handler_test.erl b/test/router_lorawan_handler_test.erl index f5f07a0bc..86aabe08f 100644 --- a/test/router_lorawan_handler_test.erl +++ b/test/router_lorawan_handler_test.erl @@ -124,7 +124,8 @@ handle_info( lager:info("got packet ~p", [JSON]), State#state.pid ! rx, Prefix = router_utils:get_env_int(devaddr_prefix, $H), - <> = <<33554431:25/integer-unsigned-little, Prefix:7/integer>>, + <> = + <<33554431:25/integer-unsigned-little, Prefix:7/integer>>, HeliumPacket = #packet_pb{ type = lorawan, payload = base64:decode(maps:get(<<"data">>, JSON)), diff --git a/test/router_metrics_SUITE.erl b/test/router_metrics_SUITE.erl index 88b86334e..a6d38ffab 100644 --- a/test/router_metrics_SUITE.erl +++ b/test/router_metrics_SUITE.erl @@ -69,7 +69,6 @@ end_per_testcase(TestCase, Config) -> %%-------------------------------------------------------------------- metrics_test(Config) -> - StartTime = erlang:system_time(seconds), #{ pubkey_bin := _PubKeyBin, stream := _Stream, @@ -79,28 +78,6 @@ metrics_test(Config) -> router_metrics ! ?METRICS_TICK, ok = timer:sleep(timer:seconds(1)), - case router_blockchain:is_chain_dead() of - false -> - ?assertEqual(5000, prometheus_gauge:value(?METRICS_DC)), - BlockAge = prometheus_gauge:value(?METRICS_CHAIN_BLOCKS), - ct:pal("[~p:~p:~p] MARKER ~p~n", [ - ?MODULE, - ?FUNCTION_NAME, - ?LINE, - {StartTime, BlockAge, erlang:system_time(seconds)} - ]), - ?assert(BlockAge > StartTime andalso BlockAge < erlang:system_time(seconds)); - true -> - ?assertEqual(0, prometheus_gauge:value(?METRICS_DC)) - end, - - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_OPENED_COUNT)), - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_OVERSPENT_COUNT)), - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_ACTIVE_COUNT)), - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_ACTIVE_BALANCE)), - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_ACTIVE_ACTORS)), - ?assertEqual(0, prometheus_gauge:value(?METRICS_SC_CLOSE_CONFLICT)), - {_, RoutingPacketTime} = prometheus_histogram:value(?METRICS_ROUTING_PACKET, [ join, accepted, @@ -111,25 +88,13 @@ metrics_test(Config) -> ct:pal("[~p:~p:~p] MARKER ~p~n", [?MODULE, ?FUNCTION_NAME, ?LINE, RoutingPacketTime]), ?assert(RoutingPacketTime > 1999 andalso RoutingPacketTime < 2025), - {_, HoldTime} = prometheus_histogram:value(?METRICS_PACKET_HOLD_TIME, [join]), - %% Hold Time is hard coded to 100ms in tests - ?assertEqual(100, HoldTime), - - {_, ConsoleAPITime} = prometheus_histogram:value(?METRICS_CONSOLE_API_TIME, [report_status, ok]), + {_, ConsoleAPITime} = prometheus_histogram:value(?METRICS_CONSOLE_API, [ + report_status, ok + ]), ?assert(ConsoleAPITime < 100), ?assertEqual(true, prometheus_boolean:value(?METRICS_WS)), ?assert(prometheus_gauge:value(?METRICS_VM_CPU, [1]) > 0), - %% When run with the grpc suite this value will be 1. When running tests - %% without that suite, it will be 0. - GRPCCount = prometheus_gauge:value(?METRICS_GRPC_CONNECTION_COUNT), - ?assert(GRPCCount == 0 orelse GRPCCount == 1), - - ok = router_sc_worker:sc_hook_close_submit(ok, txn), - ?assert(prometheus_counter:value(?METRICS_SC_CLOSE_SUBMIT, [ok]) > 0), - - ok = router_sc_worker:sc_hook_close_submit(error, txn), - ?assert(prometheus_counter:value(?METRICS_SC_CLOSE_SUBMIT, [error]) > 0), ok. diff --git a/test/router_test_ics_route_service.erl b/test/router_test_ics_route_service.erl index ba0f24f16..ff4681d21 100644 --- a/test/router_test_ics_route_service.erl +++ b/test/router_test_ics_route_service.erl @@ -71,8 +71,7 @@ remove_skf(SKF) -> %% end), Match = [ {{iot_config_skf_v1_pb, '$1', '$2', '$3', '_'}, [], [ - {'andalso', {'==', R0, '$1'}, - {'andalso', {'==', D0, '$2'}, {'==', S0, '$3'}}} + {'andalso', {'==', R0, '$1'}, {'andalso', {'==', D0, '$2'}, {'==', S0, '$3'}}} ]} ], %% If more than 1 is deleted, we have entered a DB state that is not allwoed.