hashicorp · peteski22 · Apr 30, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 30, 2024
diff --git a/audit/broker.go b/audit/broker.go
@@ -21,6 +21,11 @@ import (
 	"github.com/hashicorp/vault/sdk/logical"
 )
 
+const (
+	// timeout is the duration which should be used for context related timeouts.
+	timeout = 5 * time.Second
+)
+
 var (
 	_ Registrar = (*Broker)(nil)
 	_ Auditor   = (*Broker)(nil)
@@ -276,9 +281,32 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro
 
 	e.Data = in
 
+	// Evaluate whether we need a new context for auditing.
+	var auditContext context.Context
+	if isContextViable(ctx) {
+		auditContext = ctx
+	} else {
+		// In cases where we are trying to audit the request, and the existing
+		// context is not viable due to a short deadline, we detach ourselves from
+		// the original context (keeping only the namespace).
+		// This is so that we get a fair run at writing audit entries if Vault
+		// has taken up a lot of time handling the request before audit (request)
+		// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
+		// cancelled context and refuse to process the nodes further.
+		ns, err := namespace.FromContext(ctx)
+		if err != nil {
+			retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
+			return retErr.ErrorOrNil()
+		}
+
+		tempContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer auditCancel()
+		auditContext = namespace.ContextWithNamespace(tempContext, ns)
+	}
+
 	var status eventlogger.Status
 	if hasAuditPipelines(b.broker) {
-		status, err = b.broker.Send(ctx, event.AuditType.AsEventType(), e)
+		status, err = b.broker.Send(auditContext, event.AuditType.AsEventType(), e)
 		if err != nil {
 			retErr = multierror.Append(retErr, multierror.Append(err, status.Warnings...))
 			return retErr.ErrorOrNil()
@@ -297,7 +325,7 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (ret erro
 	}
 
 	// Handle any additional audit that is required (Enterprise/CE dependant).
-	err = b.handleAdditionalAudit(ctx, e)
+	err = b.handleAdditionalAudit(auditContext, e)
 	if err != nil {
 		retErr = multierror.Append(retErr, err)
 	}
@@ -335,21 +363,28 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (ret err
 
 	e.Data = in
 
-	// In cases where we are trying to audit the response, we detach
-	// ourselves from the original context (keeping only the namespace).
-	// This is so that we get a fair run at writing audit entries if Vault
-	// has taken up a lot of time handling the request before audit (response)
-	// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
-	// cancelled context and refuse to process the nodes further.
-	ns, err := namespace.FromContext(ctx)
-	if err != nil {
-		retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
-		return retErr.ErrorOrNil()
-	}
+	// Evaluate whether we need a new context for auditing.
+	var auditContext context.Context
+	if isContextViable(ctx) {
+		auditContext = ctx
+	} else {
+		// In cases where we are trying to audit the response, and the existing
+		// context is not viable due to a short deadline, we detach ourselves from
+		// the original context (keeping only the namespace).
+		// This is so that we get a fair run at writing audit entries if Vault
+		// has taken up a lot of time handling the request before audit (response)
+		// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
+		// cancelled context and refuse to process the nodes further.
+		ns, err := namespace.FromContext(ctx)
+		if err != nil {
+			retErr = multierror.Append(retErr, fmt.Errorf("namespace missing from context: %w", err))
+			return retErr.ErrorOrNil()
+		}
 
-	auditContext, auditCancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer auditCancel()
-	auditContext = namespace.ContextWithNamespace(auditContext, ns)
+		tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
+		defer auditCancel()
+		auditContext = namespace.ContextWithNamespace(tempContext, ns)
+	}
 
 	var status eventlogger.Status
 	if hasAuditPipelines(b.broker) {
@@ -424,3 +459,26 @@ func (b *Broker) IsRegistered(name string) bool {
 
 	return b.isRegisteredByName(name)
 }
+
+// isContextViable examines the supplied context to see if its own deadline would
+// occur later than a newly created context with a specific timeout.
+// If the existing context is viable it can be used 'as-is', if not, the caller
+// should consider creating a new context with the relevant deadline and associated
+// context values (e.g. namespace) in order to reduce the likelihood that the
+// audit system believes there is a failure in audit (and updating its metrics)
+// when the root cause is elsewhere.
+func isContextViable(ctx context.Context) bool {
+	if ctx == nil {
+		return false
+	}
+
+	deadline, hasDeadline := ctx.Deadline()
+
+	// If there's no deadline on the context then we don't need to worry about
+	// it being cancelled due to a timeout.
+	if !hasDeadline {
+		return true
+	}
+
+	return deadline.After(time.Now().Add(timeout))
+}
diff --git a/audit/broker_test.go b/audit/broker_test.go
@@ -158,3 +158,52 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) {
 		}
 	})
 }
+
+// TestBroker_isContextViable_basics checks the expected result of isContextViable
+// for basic inputs such as nil and a never-ending context.
+func TestBroker_isContextViable_basics(t *testing.T) {
+	t.Parallel()
+
+	require.False(t, isContextViable(nil))
+	require.True(t, isContextViable(context.Background()))
+}
+
+// TestBroker_isContextViable_timeouts checks the expected result of isContextViable
+// for various timeout durations.
+func TestBroker_isContextViable_timeouts(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		timeout  time.Duration
+		expected bool
+	}{
+		"2s-smaller-deadline": {
+			timeout:  timeout - 2*time.Second,
+			expected: false,
+		},
+		"same-deadline": {
+			timeout:  timeout,
+			expected: false, // Expected as a near miss
+		},
+		"same-deadline-plus": {
+			timeout:  timeout + 5*time.Millisecond,
+			expected: true,
+		},
+		"2x-longer-deadline": {
+			timeout:  timeout * 2,
+			expected: true,
+		},
+	}
+
+	for name, tc := range tests {
+		name := name
+		tc := tc
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+
+			ctx, cancel := context.WithTimeout(context.Background(), tc.timeout)
+			t.Cleanup(func() { cancel() })
+			require.Equal(t, tc.expected, isContextViable(ctx))
+		})
+	}
+}
diff --git a/changelog/26616.txt b/changelog/26616.txt
@@ -0,0 +1,5 @@
+```release-note:bug
+core/audit: Audit logging a Vault request/response will now use a minimum 5 second context timeout. 
+If the existing context deadline occurs later than 5s in the future, it will be used, otherwise a 
+new context, separate from the original will be used.
+```