Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client: fix detection of whether IO was performed in NewStream #4611

Merged
merged 3 commits into from
Jul 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 24 additions & 11 deletions internal/transport/http2_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,26 +616,39 @@ func (t *http2Client) getCallAuthData(ctx context.Context, audience string, call
return callAuthData, nil
}

// PerformedIOError wraps an error to indicate IO may have been performed
// before the error occurred.
type PerformedIOError struct {
// NewStreamError wraps an error and reports additional information.
type NewStreamError struct {
Err error

DoNotRetry bool
PerformedIO bool
}

// Error implements error.
func (p PerformedIOError) Error() string {
return p.Err.Error()
func (e NewStreamError) Error() string {
return e.Err.Error()
}

// NewStream creates a stream and registers it into the transport as "active"
// streams.
// streams. All non-nil errors returned will be *NewStreamError.
func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Stream, err error) {
defer func() {
if err != nil {
nse, ok := err.(*NewStreamError)
if !ok {
nse = &NewStreamError{Err: err}
}
if len(t.perRPCCreds) > 0 || callHdr.Creds != nil {
// We may have performed I/O in the per-RPC creds callback, so do not
// allow transparent retry.
nse.PerformedIO = true
}
err = nse
}
}()
ctx = peer.NewContext(ctx, t.getPeer())
headerFields, err := t.createHeaderFields(ctx, callHdr)
if err != nil {
// We may have performed I/O in the per-RPC creds callback, so do not
// allow transparent retry.
return nil, PerformedIOError{err}
return nil, err
}
s := t.newStream(ctx, callHdr)
cleanup := func(err error) {
Expand Down Expand Up @@ -741,7 +754,7 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Strea
break
}
if hdrListSizeErr != nil {
return nil, hdrListSizeErr
return nil, &NewStreamError{Err: hdrListSizeErr, DoNotRetry: true}
}
firstTry = false
select {
Expand Down
2 changes: 1 addition & 1 deletion internal/transport/transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ func (s) TestGracefulClose(t *testing.T) {
go func() {
defer wg.Done()
str, err := ct.NewStream(ctx, &CallHdr{})
if err == ErrConnClosing {
if err != nil && err.(*NewStreamError).Err == ErrConnClosing {
return
} else if err != nil {
t.Errorf("_.NewStream(_, _) = _, %v, want _, %v", err, ErrConnClosing)
Expand Down
28 changes: 15 additions & 13 deletions rpc_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,26 +829,28 @@ func Errorf(c codes.Code, format string, a ...interface{}) error {

// toRPCErr converts an error into an error from the status package.
func toRPCErr(err error) error {
if err == nil || err == io.EOF {
switch err {
case nil, io.EOF:
return err
}
if err == io.ErrUnexpectedEOF {
case context.DeadlineExceeded:
return status.Error(codes.DeadlineExceeded, err.Error())
case context.Canceled:
return status.Error(codes.Canceled, err.Error())
case io.ErrUnexpectedEOF:
return status.Error(codes.Internal, err.Error())
}
if _, ok := status.FromError(err); ok {
return err
}

switch e := err.(type) {
case transport.ConnectionError:
return status.Error(codes.Unavailable, e.Desc)
default:
switch err {
case context.DeadlineExceeded:
return status.Error(codes.DeadlineExceeded, err.Error())
case context.Canceled:
return status.Error(codes.Canceled, err.Error())
}
case *transport.NewStreamError:
return toRPCErr(e.Err)
}

if _, ok := status.FromError(err); ok {
return err
}

return status.Error(codes.Unknown, err.Error())
}

Expand Down
49 changes: 30 additions & 19 deletions stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,12 +421,9 @@ func (a *csAttempt) newStream() error {
cs.callHdr.PreviousAttempts = cs.numRetries
s, err := a.t.NewStream(cs.ctx, cs.callHdr)
if err != nil {
if _, ok := err.(transport.PerformedIOError); ok {
// Return without converting to an RPC error so retry code can
// inspect.
return err
}
return toRPCErr(err)
// Return without converting to an RPC error so retry code can
// inspect.
return err
}
cs.attempt.s = s
cs.attempt.p = &parser{r: s}
Expand Down Expand Up @@ -525,19 +522,28 @@ func (cs *clientStream) commitAttempt() {
// shouldRetry returns nil if the RPC should be retried; otherwise it returns
// the error that should be returned by the operation.
func (cs *clientStream) shouldRetry(err error) error {
unprocessed := false
if cs.attempt.s == nil {
pioErr, ok := err.(transport.PerformedIOError)
if ok {
// Unwrap error.
err = toRPCErr(pioErr.Err)
} else {
unprocessed = true
// Error from NewClientStream.
nse, ok := err.(*transport.NewStreamError)
if !ok {
// Unexpected, but assume no I/O was performed and the RPC is not
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this unexpected? I would think this is the normal race for retry, and the transport is closed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unexpected to not get a *NewStreamError from NewClientStream. I added the defer to make it always return that type.

// fatal, so retry indefinitely.
return nil
}
if !ok && !cs.callInfo.failFast {
// In the event of a non-IO operation error from NewStream, we
// never attempted to write anything to the wire, so we can retry
// indefinitely for non-fail-fast RPCs.

// Unwrap and convert error.
err = toRPCErr(nse.Err)

// Never retry DoNotRetry errors, which indicate the RPC should not be
// retried due to max header list size violation, etc.
if nse.DoNotRetry {
return err
}

// In the event of a non-IO operation error from NewStream, we never
// attempted to write anything to the wire, so we can retry
// indefinitely.
if !nse.PerformedIO {
return nil
}
}
Expand All @@ -546,6 +552,7 @@ func (cs *clientStream) shouldRetry(err error) error {
return err
}
// Wait for the trailers.
unprocessed := false
if cs.attempt.s != nil {
<-cs.attempt.s.Done()
unprocessed = cs.attempt.s.Unprocessed()
Expand Down Expand Up @@ -634,7 +641,7 @@ func (cs *clientStream) shouldRetry(err error) error {
// Returns nil if a retry was performed and succeeded; error otherwise.
func (cs *clientStream) retryLocked(lastErr error) error {
for {
cs.attempt.finish(lastErr)
cs.attempt.finish(toRPCErr(lastErr))
if err := cs.shouldRetry(lastErr); err != nil {
cs.commitAttemptLocked()
return err
Expand All @@ -661,7 +668,11 @@ func (cs *clientStream) withRetry(op func(a *csAttempt) error, onSuccess func())
for {
if cs.committed {
cs.mu.Unlock()
return op(cs.attempt)
// toRPCErr is used in case the error from the attempt comes from
// NewClientStream, which intentionally doesn't return a status
// error to allow for further inspection; all other errors should
// already be status errors.
return toRPCErr(op(cs.attempt))
}
a := cs.attempt
cs.mu.Unlock()
Expand Down