Skip to content

Commit

Permalink
client: include details about GOAWAYs in status messages (#4316)
Browse files Browse the repository at this point in the history
  • Loading branch information
apolcyn committed Apr 24, 2021
1 parent e158e3e commit 9572fd6
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 10 deletions.
16 changes: 13 additions & 3 deletions internal/transport/http2_client.go
Expand Up @@ -115,6 +115,9 @@ type http2Client struct {
// goAwayReason records the http2.ErrCode and debug data received with the
// GoAway frame.
goAwayReason GoAwayReason
// goAwayDebugMessage contains a detailed human readable string about a
// GoAway frame, useful for error messages.
goAwayDebugMessage string
// A condition variable used to signal when the keepalive goroutine should
// go dormant. The condition for dormancy is based on the number of active
// streams and the `PermitWithoutStream` keepalive client parameter. And
Expand Down Expand Up @@ -872,6 +875,12 @@ func (t *http2Client) Close(err error) {
if channelz.IsOn() {
channelz.RemoveEntry(t.channelzID)
}
// Append info about previous goaways if there were any, since this may be important
// for understanding the root cause for this connection to be closed.
_, goAwayDebugMessage := t.GetGoAwayReason()
if len(goAwayDebugMessage) > 0 {
err = fmt.Errorf("closing transport due to: %v, received prior goaway: %v", err, goAwayDebugMessage)
}
// Notify all active streams.
for _, s := range streams {
t.closeStream(s, err, false, http2.ErrCodeNo, status.New(codes.Unavailable, err.Error()), nil, false)
Expand Down Expand Up @@ -1146,7 +1155,7 @@ func (t *http2Client) handleGoAway(f *http2.GoAwayFrame) {
}
}
id := f.LastStreamID
if id > 0 && id%2 != 1 {
if id > 0 && id%2 == 0 {
t.mu.Unlock()
t.Close(connectionErrorf(true, nil, "received goaway with non-zero even-numbered numbered stream id: %v", id))
return
Expand Down Expand Up @@ -1212,12 +1221,13 @@ func (t *http2Client) setGoAwayReason(f *http2.GoAwayFrame) {
t.goAwayReason = GoAwayTooManyPings
}
}
t.goAwayDebugMessage = fmt.Sprintf("code: %s, debug data: %v", f.ErrCode, string(f.DebugData()))
}

func (t *http2Client) GetGoAwayReason() GoAwayReason {
func (t *http2Client) GetGoAwayReason() (GoAwayReason, string) {
t.mu.Lock()
defer t.mu.Unlock()
return t.goAwayReason
return t.goAwayReason, t.goAwayDebugMessage
}

func (t *http2Client) handleWindowUpdate(f *http2.WindowUpdateFrame) {
Expand Down
10 changes: 5 additions & 5 deletions internal/transport/keepalive_test.go
Expand Up @@ -69,7 +69,7 @@ func (s) TestMaxConnectionIdle(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayNoReason {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason)
}
case <-timeout.C:
Expand Down Expand Up @@ -143,7 +143,7 @@ func (s) TestMaxConnectionAge(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayNoReason {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason)
}
case <-timeout.C:
Expand Down Expand Up @@ -403,7 +403,7 @@ func (s) TestKeepaliveClientFrequency(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down Expand Up @@ -448,7 +448,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientNoRPC(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down Expand Up @@ -498,7 +498,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientWithRPC(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down
5 changes: 3 additions & 2 deletions internal/transport/transport.go
Expand Up @@ -656,8 +656,9 @@ type ClientTransport interface {
// HTTP/2).
GoAway() <-chan struct{}

// GetGoAwayReason returns the reason why GoAway frame was received.
GetGoAwayReason() GoAwayReason
// GetGoAwayReason returns the reason why GoAway frame was received, along
// with a human readable string with debug info.
GetGoAwayReason() (GoAwayReason, string)

// RemoteAddr returns the remote network address.
RemoteAddr() net.Addr
Expand Down
80 changes: 80 additions & 0 deletions test/end2end_test.go
Expand Up @@ -55,12 +55,14 @@ import (
"google.golang.org/grpc/health"
healthgrpc "google.golang.org/grpc/health/grpc_health_v1"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/channelz"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/internal/grpctest"
"google.golang.org/grpc/internal/stubserver"
"google.golang.org/grpc/internal/testutils"
"google.golang.org/grpc/internal/transport"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/resolver"
Expand Down Expand Up @@ -1380,6 +1382,84 @@ func (s) TestDetailedConnectionCloseErrorPropagatesToRpcError(t *testing.T) {
close(rpcDoneOnClient)
}

func (s) TestDetailedGoawayErrorOnGracefulClosePropagatesToRPCError(t *testing.T) {
rpcDoneOnClient := make(chan struct{})
ss := &stubserver.StubServer{
FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error {
<-rpcDoneOnClient
return status.Error(codes.Internal, "arbitrary status")
},
}
sopts := []grpc.ServerOption{
grpc.KeepaliveParams(keepalive.ServerParameters{
MaxConnectionAge: time.Millisecond * 100,
MaxConnectionAgeGrace: time.Millisecond,
}),
}
if err := ss.Start(sopts); err != nil {
t.Fatalf("Error starting endpoint server: %v", err)
}
defer ss.Stop()

ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
stream, err := ss.Client.FullDuplexCall(ctx)
if err != nil {
t.Fatalf("%v.FullDuplexCall = _, %v, want _, <nil>", ss.Client, err)
}
const expectedErrorMessageSubstring = "received prior goaway: code: NO_ERROR"
_, err = stream.Recv()
close(rpcDoneOnClient)
if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) {
t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: %q", stream, err, expectedErrorMessageSubstring)
}
}

func (s) TestDetailedGoawayErrorOnAbruptClosePropagatesToRPCError(t *testing.T) {
// set the min keepalive time very low so that this test can take
// a reasonable amount of time
prev := internal.KeepaliveMinPingTime
internal.KeepaliveMinPingTime = time.Millisecond
defer func() { internal.KeepaliveMinPingTime = prev }()

rpcDoneOnClient := make(chan struct{})
ss := &stubserver.StubServer{
FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error {
<-rpcDoneOnClient
return status.Error(codes.Internal, "arbitrary status")
},
}
sopts := []grpc.ServerOption{
grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
MinTime: time.Second * 1000, /* arbitrary, large value */
}),
}
dopts := []grpc.DialOption{
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: time.Millisecond, /* should trigger "too many pings" error quickly */
Timeout: time.Second * 1000, /* arbitrary, large value */
PermitWithoutStream: false,
}),
}
if err := ss.Start(sopts, dopts...); err != nil {
t.Fatalf("Error starting endpoint server: %v", err)
}
defer ss.Stop()

ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
stream, err := ss.Client.FullDuplexCall(ctx)
if err != nil {
t.Fatalf("%v.FullDuplexCall = _, %v, want _, <nil>", ss.Client, err)
}
const expectedErrorMessageSubstring = "received prior goaway: code: ENHANCE_YOUR_CALM, debug data: too_many_pings"
_, err = stream.Recv()
close(rpcDoneOnClient)
if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) {
t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: |%v|", stream, err, expectedErrorMessageSubstring)
}
}

func (s) TestClientConnCloseAfterGoAwayWithActiveStream(t *testing.T) {
for _, e := range listTestEnv() {
if e.name == "handler-tls" {
Expand Down

0 comments on commit 9572fd6

Please sign in to comment.