forked from googleapis/google-cloud-go
/
retry.go
122 lines (114 loc) · 3.72 KB
/
retry.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package managedwriter
import (
"context"
"errors"
"io"
"time"
"github.com/googleapis/gax-go/v2"
"github.com/googleapis/gax-go/v2/apierror"
storagepb "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
var (
defaultAppendRetries = 3
knownReconnectErrors = []error{
io.EOF,
status.Error(codes.Unavailable, "the connection is draining"), // errStreamDrain in gRPC transport
}
)
type defaultRetryer struct {
bo gax.Backoff
}
func (r *defaultRetryer) Retry(err error) (pause time.Duration, shouldRetry bool) {
// This predicate evaluates enqueuing.
s, ok := status.FromError(err)
if !ok {
// Treat context errors as non-retriable.
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return r.bo.Pause(), false
}
// Any other non-status based errors treated as retryable.
return r.bo.Pause(), true
}
switch s.Code() {
case codes.Unavailable:
return r.bo.Pause(), true
default:
return r.bo.Pause(), false
}
}
func (r *defaultRetryer) RetryAppend(err error, attemptCount int) (pause time.Duration, shouldRetry bool) {
if err == nil {
return 0, false // This shouldn't need to be here, and is only provided defensively.
}
if attemptCount > defaultAppendRetries {
return 0, false // exceeded maximum retries.
}
// This predicate evaluates the received response to determine if we should re-enqueue.
apiErr, ok := apierror.FromError(err)
if !ok {
// These are non status-based errors.
// Context errors are non-retriable.
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return 0, false
}
// The same errors that trigger fast reconnect are retryable, as they deal with transient problems
// with the network stream.
if shouldReconnect(err) {
return r.bo.Pause(), true
}
// Any other non-status based errors are not retried.
return 0, false
}
// Next, evaluate service-specific error details.
se := &storagepb.StorageError{}
if e := apiErr.Details().ExtractProtoMessage(se); e == nil {
if se.GetCode() == storagepb.StorageError_OFFSET_OUT_OF_RANGE {
return r.bo.Pause(), true
}
// No other service-specific errors should be retried.
return 0, false
}
if quota := apiErr.Details().QuotaFailure; quota != nil {
// TODO: followup with yiru on this, there's some deeper checks on resource exhaustion.
return r.bo.Pause(), true
}
// Finally, evaluate based on the more generic grpc error status:
code := apiErr.GRPCStatus().Code()
switch code {
case codes.Aborted,
codes.DeadlineExceeded,
codes.Internal,
codes.Unavailable:
return r.bo.Pause(), true
case codes.ResourceExhausted:
// TODO: is there a special case here that's not quota?
}
// We treat all other failures as non-retriable.
return 0, false
}
// shouldReconnect is akin to a retry predicate, in that it evaluates whether we should force
// our bidi stream to close/reopen based on the responses error. Errors here signal that no
// further appends will succeed.
func shouldReconnect(err error) bool {
for _, ke := range knownReconnectErrors {
if errors.Is(err, ke) {
return true
}
}
return false
}