Skip to content

Commit

Permalink
etcd(ticdc): retry on goaway to be more robust to pd restart (#6798)
Browse files Browse the repository at this point in the history
close #6720
  • Loading branch information
3AceShowHand committed Aug 19, 2022
1 parent 543fd30 commit d500452
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
6 changes: 5 additions & 1 deletion pkg/errorutil/ignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,16 @@ func IsRetryableEtcdError(err error) bool {
return true
default:
}
// when the PD instance was deleted from the PD cluster, it may meet error with `raft:stopped`,
// when the PD instance was deleted from the PD cluster, it may meet different errors.
// retry on such error make cdc robust to PD / ETCD cluster member removal.
// we should tolerant such case to make cdc robust to PD / ETCD cluster member change.
// see: https://github.com/etcd-io/etcd/blob/ae36a577d7be/raft/node.go#L35
if strings.Contains(etcdErr.Error(), "raft: stopped") {
return true
}
// see: https://github.com/pingcap/tiflow/issues/6720
if strings.Contains(etcdErr.Error(), "received prior goaway: code: NO_ERROR") {
return true
}
return false
}
3 changes: 3 additions & 0 deletions pkg/errorutil/ignore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func TestIsRetryableEtcdError(t *testing.T) {
{v3rpc.ErrTimeoutDueToLeaderFail, true},
{v3rpc.ErrNoSpace, true},
{raft.ErrStopped, true},
{errors.New("rpc error: code = Unavailable desc = closing transport due to: " +
"connection error: desc = \\\"error reading from server: EOF\\\", " +
"received prior goaway: code: NO_ERROR\""), true},
}

for _, item := range cases {
Expand Down

0 comments on commit d500452

Please sign in to comment.