From b4093524bd990c0afae6fc712972d239554499ec Mon Sep 17 00:00:00 2001 From: Bryan Naegele Date: Wed, 29 Sep 2021 10:10:39 -0600 Subject: [PATCH 1/3] Emit telemetry failure event on handler failure --- src/telemetry.erl | 14 ++++++++++++-- test/telemetry_SUITE.erl | 29 ++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/telemetry.erl b/src/telemetry.erl index 07ad93f..7d3c12b 100644 --- a/src/telemetry.erl +++ b/src/telemetry.erl @@ -63,7 +63,7 @@ %% or `&handle_event/4' ) as event handlers. %% %% All the handlers are executed by the process dispatching event. If the function fails (raises, -%% exits or throws) then the handler is removed. +%% exits or throws) then the handler is removed and a failure event is emitted. %% Note that you should not rely on the order in which handlers are invoked. -spec attach(HandlerId, EventName, Function, Config) -> ok | {error, already_exists} when HandlerId :: handler_id(), @@ -86,7 +86,10 @@ attach(HandlerId, EventName, Function, Config) -> %% or `&handle_event/4' ) as event handlers. %% %% All the handlers are executed by the process dispatching event. If the function fails (raises, -%% exits or throws) then the handler is removed. +%% exits or throws) a handler failure event is emitted and then the handler is removed. +%% Handler failure events `[telemetry, handler, failure]` should only be used for monitoring +%% and diagnostic purposes. Re-attaching a failed handler will likely result in the handler +%% failing again. %% Note that you should not rely on the order in which handlers are invoked. -spec attach_many(HandlerId, [EventName], Function, Config) -> ok | {error, already_exists} when HandlerId :: handler_id(), @@ -151,6 +154,13 @@ execute(EventName, Measurements, Metadata) when is_map(Measurements) and is_map( catch ?WITH_STACKTRACE(Class, Reason, Stacktrace) detach(HandlerId), + FailureMetadata = #{event_name => EventName, + handler_id => HandlerId, + handler_config => Config, + class => Class, + reason => Reason, + stacktrace => Stacktrace}, + execute([telemetry, handler, failure], #{count => 1}, FailureMetadata), ?LOG_ERROR("Handler ~p has failed and has been detached. " "Class=~p~nReason=~p~nStacktrace=~p~n", [HandlerId, Class, Reason, Stacktrace]) diff --git a/test/telemetry_SUITE.erl b/test/telemetry_SUITE.erl index a1d8b49..f65fe45 100644 --- a/test/telemetry_SUITE.erl +++ b/test/telemetry_SUITE.erl @@ -97,21 +97,44 @@ list_for_prefix(Config) -> ?assertEqual([], telemetry:list_handlers(Event ++ [something])). -%% handler function is detached when it fails +%% handler function is detached when it fails and failure event is emitted detach_on_exception(Config) -> HandlerId = ?config(id, Config), Event = [a, test, event], HandlerFun = fun ?MODULE:raise_on_event/4, - telemetry:attach(HandlerId, Event, HandlerFun, []), + HandlerConfig = [], + + FailureHandlerId = failure_handler_id, + FailureEvent = [telemetry, handler, failure], + FailureHandlerConfig = #{send_to => self()}, + FailureHandlerFun = fun ?MODULE:echo_event/4, + + telemetry:attach(HandlerId, Event, HandlerFun, HandlerConfig), + telemetry:attach(FailureHandlerId, FailureEvent, FailureHandlerFun, FailureHandlerConfig), ?assertMatch([#{id := HandlerId, event_name := Event, function := HandlerFun, - config := []}], + config := HandlerConfig}], telemetry:list_handlers(Event)), telemetry:execute(Event, #{some => 1}, #{some => metadata}), + receive + {event, FailureEvent, _FailureMeasurements, FailureMetadata, FailureHandlerConfig} -> + ?assertMatch(#{event_name := Event, + handler_id := HandlerId, + handler_config := HandlerConfig, + class := throw, + reason := got_event, + stacktrace := [_ | _]}, + FailureMetadata), + ok + after + 300 -> + ct:fail(failure_event_not_emitted) + end, + ?assertEqual([], telemetry:list_handlers(Event)), %% detaching returns error if handler with given ID doesn't exist From 342abf0825fbe511fd873eb5126253f10de0ccd9 Mon Sep 17 00:00:00 2001 From: Bryan Naegele Date: Thu, 30 Sep 2021 17:04:23 -0600 Subject: [PATCH 2/3] Feedback --- src/telemetry.erl | 19 +++++++++++++------ test/telemetry_SUITE.erl | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/telemetry.erl b/src/telemetry.erl index 7d3c12b..a6c1254 100644 --- a/src/telemetry.erl +++ b/src/telemetry.erl @@ -64,6 +64,11 @@ %% %% All the handlers are executed by the process dispatching event. If the function fails (raises, %% exits or throws) then the handler is removed and a failure event is emitted. +%% +%% Handler failure events `[telemetry, handler, failure]` should only be used for monitoring +%% and diagnostic purposes. Re-attaching a failed handler will likely result in the handler +%% failing again. +%% %% Note that you should not rely on the order in which handlers are invoked. -spec attach(HandlerId, EventName, Function, Config) -> ok | {error, already_exists} when HandlerId :: handler_id(), @@ -87,9 +92,11 @@ attach(HandlerId, EventName, Function, Config) -> %% %% All the handlers are executed by the process dispatching event. If the function fails (raises, %% exits or throws) a handler failure event is emitted and then the handler is removed. +%% %% Handler failure events `[telemetry, handler, failure]` should only be used for monitoring %% and diagnostic purposes. Re-attaching a failed handler will likely result in the handler %% failing again. +%% %% Note that you should not rely on the order in which handlers are invoked. -spec attach_many(HandlerId, [EventName], Function, Config) -> ok | {error, already_exists} when HandlerId :: handler_id(), @@ -155,12 +162,12 @@ execute(EventName, Measurements, Metadata) when is_map(Measurements) and is_map( ?WITH_STACKTRACE(Class, Reason, Stacktrace) detach(HandlerId), FailureMetadata = #{event_name => EventName, - handler_id => HandlerId, - handler_config => Config, - class => Class, - reason => Reason, - stacktrace => Stacktrace}, - execute([telemetry, handler, failure], #{count => 1}, FailureMetadata), + handler_id => HandlerId, + handler_config => Config, + kind => Class, + reason => Reason, + stacktrace => Stacktrace}, + execute([telemetry, handler, failure], #{}, FailureMetadata), ?LOG_ERROR("Handler ~p has failed and has been detached. " "Class=~p~nReason=~p~nStacktrace=~p~n", [HandlerId, Class, Reason, Stacktrace]) diff --git a/test/telemetry_SUITE.erl b/test/telemetry_SUITE.erl index f65fe45..79d4f72 100644 --- a/test/telemetry_SUITE.erl +++ b/test/telemetry_SUITE.erl @@ -125,7 +125,7 @@ detach_on_exception(Config) -> ?assertMatch(#{event_name := Event, handler_id := HandlerId, handler_config := HandlerConfig, - class := throw, + kind := throw, reason := got_event, stacktrace := [_ | _]}, FailureMetadata), From 730b27a4cbc178f5ae1022e244154eb08895f6ed Mon Sep 17 00:00:00 2001 From: Bryan Naegele Date: Fri, 1 Oct 2021 09:38:17 -0600 Subject: [PATCH 3/3] Use same measurements we use for span start --- src/telemetry.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/telemetry.erl b/src/telemetry.erl index a6c1254..41dde99 100644 --- a/src/telemetry.erl +++ b/src/telemetry.erl @@ -167,7 +167,8 @@ execute(EventName, Measurements, Metadata) when is_map(Measurements) and is_map( kind => Class, reason => Reason, stacktrace => Stacktrace}, - execute([telemetry, handler, failure], #{}, FailureMetadata), + FailureMeasurements = #{monotonic_time => erlang:monotonic_time(), system_time => erlang:system_time()}, + execute([telemetry, handler, failure], FailureMeasurements, FailureMetadata), ?LOG_ERROR("Handler ~p has failed and has been detached. " "Class=~p~nReason=~p~nStacktrace=~p~n", [HandlerId, Class, Reason, Stacktrace])