Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VAULT-24945: audit - add context timeout to audit request #26616

Merged
merged 7 commits into from Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 74 additions & 16 deletions audit/broker.go
Expand Up @@ -21,6 +21,11 @@ import (
"github.com/hashicorp/vault/sdk/logical"
)

const (
// timeout is the duration which should be used for context related timeouts.
timeout = 5 * time.Second
)

var (
_ Registrar = (*Broker)(nil)
_ Auditor = (*Broker)(nil)
Expand Down Expand Up @@ -276,9 +281,32 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro

e.Data = in

// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the request, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (request)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := namespace.FromContext(ctx)
if err != nil {
retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
return retErr.ErrorOrNil()
}

tempContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer auditCancel()
auditContext = namespace.ContextWithNamespace(tempContext, ns)
}

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
status, err = b.broker.Send(ctx, event.AuditType.AsEventType(), e)
status, err = b.broker.Send(auditContext, event.AuditType.AsEventType(), e)
if err != nil {
retErr = multierror.Append(retErr, multierror.Append(err, status.Warnings...))
return retErr.ErrorOrNil()
Expand All @@ -297,7 +325,7 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro
}

// Handle any additional audit that is required (Enterprise/CE dependant).
err = b.handleAdditionalAudit(ctx, e)
err = b.handleAdditionalAudit(auditContext, e)
if err != nil {
retErr = multierror.Append(retErr, err)
}
Expand Down Expand Up @@ -335,21 +363,28 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (ret err

e.Data = in

// In cases where we are trying to audit the response, we detach
// ourselves from the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (response)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := namespace.FromContext(ctx)
if err != nil {
retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
return retErr.ErrorOrNil()
}
// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the response, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (response)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := namespace.FromContext(ctx)
if err != nil {
retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
return retErr.ErrorOrNil()
}

auditContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer auditCancel()
auditContext = namespace.ContextWithNamespace(auditContext, ns)
tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
defer auditCancel()
auditContext = namespace.ContextWithNamespace(tempContext, ns)
}

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
Expand Down Expand Up @@ -424,3 +459,26 @@ func (b *Broker) IsRegistered(name string) bool {

return b.isRegisteredByName(name)
}

// isContextViable examines the supplied context to see if its own deadline would
// occur later than a newly created context with a specific timeout.
// If the existing context is viable it can be used 'as-is', if not, the caller
// should consider creating a new context with the relevant deadline and associated
// context values (e.g. namespace) in order to reduce the likelihood that the
// audit system believes there is a failure in audit (and updating its metrics)
// when the root cause is elsewhere.
Comment on lines +463 to +469
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

func isContextViable(ctx context.Context) bool {
if ctx == nil {
return false
}

deadline, hasDeadline := ctx.Deadline()

// If there's no deadline on the context then we don't need to worry about
// it being cancelled due to a timeout.
if !hasDeadline {
return true
}

return deadline.After(time.Now().Add(timeout))
}
49 changes: 49 additions & 0 deletions audit/broker_test.go
Expand Up @@ -158,3 +158,52 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) {
}
})
}

// TestBroker_isContextViable_basics checks the expected result of isContextViable
// for basic inputs such as nil and a never-ending context.
func TestBroker_isContextViable_basics(t *testing.T) {
t.Parallel()

require.False(t, isContextViable(nil))
require.True(t, isContextViable(context.Background()))
}

// TestBroker_isContextViable_timeouts checks the expected result of isContextViable
// for various timeout durations.
func TestBroker_isContextViable_timeouts(t *testing.T) {
t.Parallel()

tests := map[string]struct {
timeout time.Duration
expected bool
}{
"2s-smaller-deadline": {
timeout: timeout - 2*time.Second,
expected: false,
},
"same-deadline": {
timeout: timeout,
expected: false, // Expected as a near miss
},
"same-deadline-plus": {
timeout: timeout + 5*time.Millisecond,
expected: true,
},
"2x-longer-deadline": {
timeout: timeout * 2,
expected: true,
},
}

for name, tc := range tests {
name := name
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()

ctx, cancel := context.WithTimeout(context.Background(), tc.timeout)
t.Cleanup(func() { cancel() })
require.Equal(t, tc.expected, isContextViable(ctx))
})
}
}
5 changes: 5 additions & 0 deletions changelog/26616.txt
@@ -0,0 +1,5 @@
```release-note:bug
core/audit: Audit logging a Vault request/response will now use a minimum 5 second context timeout.
If the existing context deadline occurs later than 5s in the future, it will be used, otherwise a
new context, separate from the original will be used.
```