Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit telemetry failure event on handler failure #98

Merged
merged 3 commits into from Oct 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 20 additions & 2 deletions src/telemetry.erl
Expand Up @@ -63,7 +63,12 @@
%% or `&handle_event/4' ) as event handlers.
%%
%% All the handlers are executed by the process dispatching event. If the function fails (raises,
%% exits or throws) then the handler is removed.
%% exits or throws) then the handler is removed and a failure event is emitted.
%%
%% Handler failure events `[telemetry, handler, failure]` should only be used for monitoring
%% and diagnostic purposes. Re-attaching a failed handler will likely result in the handler
%% failing again.
%%
%% Note that you should not rely on the order in which handlers are invoked.
-spec attach(HandlerId, EventName, Function, Config) -> ok | {error, already_exists} when
HandlerId :: handler_id(),
Expand All @@ -86,7 +91,12 @@ attach(HandlerId, EventName, Function, Config) ->
%% or `&handle_event/4' ) as event handlers.
%%
%% All the handlers are executed by the process dispatching event. If the function fails (raises,
%% exits or throws) then the handler is removed.
%% exits or throws) a handler failure event is emitted and then the handler is removed.
%%
%% Handler failure events `[telemetry, handler, failure]` should only be used for monitoring
bryannaegele marked this conversation as resolved.
Show resolved Hide resolved
%% and diagnostic purposes. Re-attaching a failed handler will likely result in the handler
%% failing again.
%%
%% Note that you should not rely on the order in which handlers are invoked.
-spec attach_many(HandlerId, [EventName], Function, Config) -> ok | {error, already_exists} when
HandlerId :: handler_id(),
Expand Down Expand Up @@ -151,6 +161,14 @@ execute(EventName, Measurements, Metadata) when is_map(Measurements) and is_map(
catch
?WITH_STACKTRACE(Class, Reason, Stacktrace)
detach(HandlerId),
FailureMetadata = #{event_name => EventName,
handler_id => HandlerId,
handler_config => Config,
kind => Class,
reason => Reason,
stacktrace => Stacktrace},
FailureMeasurements = #{monotonic_time => erlang:monotonic_time(), system_time => erlang:system_time()},
execute([telemetry, handler, failure], FailureMeasurements, FailureMetadata),
?LOG_ERROR("Handler ~p has failed and has been detached. "
"Class=~p~nReason=~p~nStacktrace=~p~n",
[HandlerId, Class, Reason, Stacktrace])
Expand Down
29 changes: 26 additions & 3 deletions test/telemetry_SUITE.erl
Expand Up @@ -97,21 +97,44 @@ list_for_prefix(Config) ->

?assertEqual([], telemetry:list_handlers(Event ++ [something])).

%% handler function is detached when it fails
%% handler function is detached when it fails and failure event is emitted
detach_on_exception(Config) ->
HandlerId = ?config(id, Config),
Event = [a, test, event],
HandlerFun = fun ?MODULE:raise_on_event/4,
telemetry:attach(HandlerId, Event, HandlerFun, []),
HandlerConfig = [],

FailureHandlerId = failure_handler_id,
FailureEvent = [telemetry, handler, failure],
FailureHandlerConfig = #{send_to => self()},
FailureHandlerFun = fun ?MODULE:echo_event/4,

telemetry:attach(HandlerId, Event, HandlerFun, HandlerConfig),
telemetry:attach(FailureHandlerId, FailureEvent, FailureHandlerFun, FailureHandlerConfig),

?assertMatch([#{id := HandlerId,
event_name := Event,
function := HandlerFun,
config := []}],
config := HandlerConfig}],
telemetry:list_handlers(Event)),

telemetry:execute(Event, #{some => 1}, #{some => metadata}),

receive
{event, FailureEvent, _FailureMeasurements, FailureMetadata, FailureHandlerConfig} ->
?assertMatch(#{event_name := Event,
handler_id := HandlerId,
handler_config := HandlerConfig,
kind := throw,
reason := got_event,
stacktrace := [_ | _]},
FailureMetadata),
ok
after
300 ->
ct:fail(failure_event_not_emitted)
end,

?assertEqual([], telemetry:list_handlers(Event)),

%% detaching returns error if handler with given ID doesn't exist
Expand Down