diff --git a/audit/broker.go b/audit/broker.go index 4fd039ee618a6..15d096cfb0b91 100644 --- a/audit/broker.go +++ b/audit/broker.go @@ -21,6 +21,11 @@ import ( "github.com/hashicorp/vault/sdk/logical" ) +const ( + // timeout is the duration which should be used for context related timeouts. + timeout = 5 * time.Second +) + var ( _ Registrar = (*Broker)(nil) _ Auditor = (*Broker)(nil) @@ -276,9 +281,32 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro e.Data = in + // Evaluate whether we need a new context for auditing. + var auditContext context.Context + if isContextViable(ctx) { + auditContext = ctx + } else { + // In cases where we are trying to audit the request, and the existing + // context is not viable due to a short deadline, we detach ourselves from + // the original context (keeping only the namespace). + // This is so that we get a fair run at writing audit entries if Vault + // has taken up a lot of time handling the request before audit (request) + // is triggered. Pipeline nodes and the eventlogger.Broker may check for a + // cancelled context and refuse to process the nodes further. + ns, err := namespace.FromContext(ctx) + if err != nil { + retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err)) + return retErr.ErrorOrNil() + } + + tempContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer auditCancel() + auditContext = namespace.ContextWithNamespace(tempContext, ns) + } + var status eventlogger.Status if hasAuditPipelines(b.broker) { - status, err = b.broker.Send(ctx, event.AuditType.AsEventType(), e) + status, err = b.broker.Send(auditContext, event.AuditType.AsEventType(), e) if err != nil { retErr = multierror.Append(retErr, multierror.Append(err, status.Warnings...)) return retErr.ErrorOrNil() @@ -297,7 +325,7 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro } // Handle any additional audit that is required (Enterprise/CE dependant). - err = b.handleAdditionalAudit(ctx, e) + err = b.handleAdditionalAudit(auditContext, e) if err != nil { retErr = multierror.Append(retErr, err) } @@ -335,21 +363,28 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (ret err e.Data = in - // In cases where we are trying to audit the response, we detach - // ourselves from the original context (keeping only the namespace). - // This is so that we get a fair run at writing audit entries if Vault - // has taken up a lot of time handling the request before audit (response) - // is triggered. Pipeline nodes and the eventlogger.Broker may check for a - // cancelled context and refuse to process the nodes further. - ns, err := namespace.FromContext(ctx) - if err != nil { - retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err)) - return retErr.ErrorOrNil() - } + // Evaluate whether we need a new context for auditing. + var auditContext context.Context + if isContextViable(ctx) { + auditContext = ctx + } else { + // In cases where we are trying to audit the response, and the existing + // context is not viable due to a short deadline, we detach ourselves from + // the original context (keeping only the namespace). + // This is so that we get a fair run at writing audit entries if Vault + // has taken up a lot of time handling the request before audit (response) + // is triggered. Pipeline nodes and the eventlogger.Broker may check for a + // cancelled context and refuse to process the nodes further. + ns, err := namespace.FromContext(ctx) + if err != nil { + retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err)) + return retErr.ErrorOrNil() + } - auditContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second) - defer auditCancel() - auditContext = namespace.ContextWithNamespace(auditContext, ns) + tempContext, auditCancel := context.WithTimeout(context.Background(), timeout) + defer auditCancel() + auditContext = namespace.ContextWithNamespace(tempContext, ns) + } var status eventlogger.Status if hasAuditPipelines(b.broker) { @@ -424,3 +459,26 @@ func (b *Broker) IsRegistered(name string) bool { return b.isRegisteredByName(name) } + +// isContextViable examines the supplied context to see if its own deadline would +// occur later than a newly created context with a specific timeout. +// If the existing context is viable it can be used 'as-is', if not, the caller +// should consider creating a new context with the relevant deadline and associated +// context values (e.g. namespace) in order to reduce the likelihood that the +// audit system believes there is a failure in audit (and updating its metrics) +// when the root cause is elsewhere. +func isContextViable(ctx context.Context) bool { + if ctx == nil { + return false + } + + deadline, hasDeadline := ctx.Deadline() + + // If there's no deadline on the context then we don't need to worry about + // it being cancelled due to a timeout. + if !hasDeadline { + return true + } + + return deadline.After(time.Now().Add(timeout)) +} diff --git a/audit/broker_test.go b/audit/broker_test.go index dd7df8b91a461..c2d80b92fccfd 100644 --- a/audit/broker_test.go +++ b/audit/broker_test.go @@ -158,3 +158,52 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) { } }) } + +// TestBroker_isContextViable_basics checks the expected result of isContextViable +// for basic inputs such as nil and a never-ending context. +func TestBroker_isContextViable_basics(t *testing.T) { + t.Parallel() + + require.False(t, isContextViable(nil)) + require.True(t, isContextViable(context.Background())) +} + +// TestBroker_isContextViable_timeouts checks the expected result of isContextViable +// for various timeout durations. +func TestBroker_isContextViable_timeouts(t *testing.T) { + t.Parallel() + + tests := map[string]struct { + timeout time.Duration + expected bool + }{ + "2s-smaller-deadline": { + timeout: timeout - 2*time.Second, + expected: false, + }, + "same-deadline": { + timeout: timeout, + expected: false, // Expected as a near miss + }, + "same-deadline-plus": { + timeout: timeout + 5*time.Millisecond, + expected: true, + }, + "2x-longer-deadline": { + timeout: timeout * 2, + expected: true, + }, + } + + for name, tc := range tests { + name := name + tc := tc + t.Run(name, func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(context.Background(), tc.timeout) + t.Cleanup(func() { cancel() }) + require.Equal(t, tc.expected, isContextViable(ctx)) + }) + } +} diff --git a/changelog/26616.txt b/changelog/26616.txt new file mode 100644 index 0000000000000..af8e85e919406 --- /dev/null +++ b/changelog/26616.txt @@ -0,0 +1,5 @@ +```release-note:bug +core/audit: Audit logging a Vault request/response will now use a minimum 5 second context timeout. +If the existing context deadline occurs later than 5s in the future, it will be used, otherwise a +new context, separate from the original will be used. +``` \ No newline at end of file