diff --git a/internal/transport/http2_client.go b/internal/transport/http2_client.go index 892317cc6fc..48c5e52edae 100644 --- a/internal/transport/http2_client.go +++ b/internal/transport/http2_client.go @@ -115,6 +115,9 @@ type http2Client struct { // goAwayReason records the http2.ErrCode and debug data received with the // GoAway frame. goAwayReason GoAwayReason + // goAwayDebugMessage contains a detailed human readable string about a + // GoAway frame, useful for error messages. + goAwayDebugMessage string // A condition variable used to signal when the keepalive goroutine should // go dormant. The condition for dormancy is based on the number of active // streams and the `PermitWithoutStream` keepalive client parameter. And @@ -872,6 +875,12 @@ func (t *http2Client) Close(err error) { if channelz.IsOn() { channelz.RemoveEntry(t.channelzID) } + // Append info about previous goaways if there were any, since this may be important + // for understanding the root cause for this connection to be closed. + _, goAwayDebugMessage := t.GetGoAwayReason() + if len(goAwayDebugMessage) > 0 { + err = fmt.Errorf("closing transport due to: %v, received prior goaway: %v", err, goAwayDebugMessage) + } // Notify all active streams. for _, s := range streams { t.closeStream(s, err, false, http2.ErrCodeNo, status.New(codes.Unavailable, err.Error()), nil, false) @@ -1146,7 +1155,7 @@ func (t *http2Client) handleGoAway(f *http2.GoAwayFrame) { } } id := f.LastStreamID - if id > 0 && id%2 != 1 { + if id > 0 && id%2 == 0 { t.mu.Unlock() t.Close(connectionErrorf(true, nil, "received goaway with non-zero even-numbered numbered stream id: %v", id)) return @@ -1212,12 +1221,13 @@ func (t *http2Client) setGoAwayReason(f *http2.GoAwayFrame) { t.goAwayReason = GoAwayTooManyPings } } + t.goAwayDebugMessage = fmt.Sprintf("code: %s, debug data: %v", f.ErrCode, string(f.DebugData())) } -func (t *http2Client) GetGoAwayReason() GoAwayReason { +func (t *http2Client) GetGoAwayReason() (GoAwayReason, string) { t.mu.Lock() defer t.mu.Unlock() - return t.goAwayReason + return t.goAwayReason, t.goAwayDebugMessage } func (t *http2Client) handleWindowUpdate(f *http2.WindowUpdateFrame) { diff --git a/internal/transport/keepalive_test.go b/internal/transport/keepalive_test.go index d684f582710..571cacca7e9 100644 --- a/internal/transport/keepalive_test.go +++ b/internal/transport/keepalive_test.go @@ -69,7 +69,7 @@ func (s) TestMaxConnectionIdle(t *testing.T) { if !timeout.Stop() { <-timeout.C } - if reason := client.GetGoAwayReason(); reason != GoAwayNoReason { + if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason) } case <-timeout.C: @@ -143,7 +143,7 @@ func (s) TestMaxConnectionAge(t *testing.T) { if !timeout.Stop() { <-timeout.C } - if reason := client.GetGoAwayReason(); reason != GoAwayNoReason { + if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason) } case <-timeout.C: @@ -403,7 +403,7 @@ func (s) TestKeepaliveClientFrequency(t *testing.T) { if !timeout.Stop() { <-timeout.C } - if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings { + if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings) } case <-timeout.C: @@ -448,7 +448,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientNoRPC(t *testing.T) { if !timeout.Stop() { <-timeout.C } - if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings { + if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings) } case <-timeout.C: @@ -498,7 +498,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientWithRPC(t *testing.T) { if !timeout.Stop() { <-timeout.C } - if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings { + if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings { t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings) } case <-timeout.C: diff --git a/internal/transport/transport.go b/internal/transport/transport.go index 068f4d0e502..6cc1031fd92 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -656,8 +656,9 @@ type ClientTransport interface { // HTTP/2). GoAway() <-chan struct{} - // GetGoAwayReason returns the reason why GoAway frame was received. - GetGoAwayReason() GoAwayReason + // GetGoAwayReason returns the reason why GoAway frame was received, along + // with a human readable string with debug info. + GetGoAwayReason() (GoAwayReason, string) // RemoteAddr returns the remote network address. RemoteAddr() net.Addr diff --git a/test/end2end_test.go b/test/end2end_test.go index 832ac8bd718..1baf2e347d1 100644 --- a/test/end2end_test.go +++ b/test/end2end_test.go @@ -55,12 +55,14 @@ import ( "google.golang.org/grpc/health" healthgrpc "google.golang.org/grpc/health/grpc_health_v1" healthpb "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/transport" + "google.golang.org/grpc/keepalive" "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" "google.golang.org/grpc/resolver" @@ -1380,6 +1382,84 @@ func (s) TestDetailedConnectionCloseErrorPropagatesToRpcError(t *testing.T) { close(rpcDoneOnClient) } +func (s) TestDetailedGoawayErrorOnGracefulClosePropagatesToRPCError(t *testing.T) { + rpcDoneOnClient := make(chan struct{}) + ss := &stubserver.StubServer{ + FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error { + <-rpcDoneOnClient + return status.Error(codes.Internal, "arbitrary status") + }, + } + sopts := []grpc.ServerOption{ + grpc.KeepaliveParams(keepalive.ServerParameters{ + MaxConnectionAge: time.Millisecond * 100, + MaxConnectionAgeGrace: time.Millisecond, + }), + } + if err := ss.Start(sopts); err != nil { + t.Fatalf("Error starting endpoint server: %v", err) + } + defer ss.Stop() + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + stream, err := ss.Client.FullDuplexCall(ctx) + if err != nil { + t.Fatalf("%v.FullDuplexCall = _, %v, want _, ", ss.Client, err) + } + const expectedErrorMessageSubstring = "received prior goaway: code: NO_ERROR" + _, err = stream.Recv() + close(rpcDoneOnClient) + if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) { + t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: %q", stream, err, expectedErrorMessageSubstring) + } +} + +func (s) TestDetailedGoawayErrorOnAbruptClosePropagatesToRPCError(t *testing.T) { + // set the min keepalive time very low so that this test can take + // a reasonable amount of time + prev := internal.KeepaliveMinPingTime + internal.KeepaliveMinPingTime = time.Millisecond + defer func() { internal.KeepaliveMinPingTime = prev }() + + rpcDoneOnClient := make(chan struct{}) + ss := &stubserver.StubServer{ + FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error { + <-rpcDoneOnClient + return status.Error(codes.Internal, "arbitrary status") + }, + } + sopts := []grpc.ServerOption{ + grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ + MinTime: time.Second * 1000, /* arbitrary, large value */ + }), + } + dopts := []grpc.DialOption{ + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + Time: time.Millisecond, /* should trigger "too many pings" error quickly */ + Timeout: time.Second * 1000, /* arbitrary, large value */ + PermitWithoutStream: false, + }), + } + if err := ss.Start(sopts, dopts...); err != nil { + t.Fatalf("Error starting endpoint server: %v", err) + } + defer ss.Stop() + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + stream, err := ss.Client.FullDuplexCall(ctx) + if err != nil { + t.Fatalf("%v.FullDuplexCall = _, %v, want _, ", ss.Client, err) + } + const expectedErrorMessageSubstring = "received prior goaway: code: ENHANCE_YOUR_CALM, debug data: too_many_pings" + _, err = stream.Recv() + close(rpcDoneOnClient) + if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) { + t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: |%v|", stream, err, expectedErrorMessageSubstring) + } +} + func (s) TestClientConnCloseAfterGoAwayWithActiveStream(t *testing.T) { for _, e := range listTestEnv() { if e.name == "handler-tls" {