Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client: include details about GOAWAYs in status messages #4316

Merged
merged 11 commits into from Apr 24, 2021
16 changes: 13 additions & 3 deletions internal/transport/http2_client.go
Expand Up @@ -115,6 +115,9 @@ type http2Client struct {
// goAwayReason records the http2.ErrCode and debug data received with the
// GoAway frame.
goAwayReason GoAwayReason
// goAwayDebugMessage contains a detailed human readable string about a
// GoAway frame, useful for error messages.
goAwayDebugMessage string
// A condition variable used to signal when the keepalive goroutine should
// go dormant. The condition for dormancy is based on the number of active
// streams and the `PermitWithoutStream` keepalive client parameter. And
Expand Down Expand Up @@ -872,6 +875,12 @@ func (t *http2Client) Close(err error) {
if channelz.IsOn() {
channelz.RemoveEntry(t.channelzID)
}
// Append info about previous goaways if there were any, since this may be important
// for understanding the root cause for this connection to be closed.
_, goAwayDebugMessage := t.GetGoAwayReason()
if len(goAwayDebugMessage) > 0 {
err = fmt.Errorf("closing transport due to: %v, received prior goaway: %v", err, goAwayDebugMessage)
}
// Notify all active streams.
for _, s := range streams {
t.closeStream(s, err, false, http2.ErrCodeNo, status.New(codes.Unavailable, err.Error()), nil, false)
Expand Down Expand Up @@ -1146,7 +1155,7 @@ func (t *http2Client) handleGoAway(f *http2.GoAwayFrame) {
}
}
id := f.LastStreamID
if id > 0 && id%2 != 1 {
if id > 0 && id%2 == 0 {
t.mu.Unlock()
t.Close(connectionErrorf(true, nil, "received goaway with non-zero even-numbered numbered stream id: %v", id))
return
Expand Down Expand Up @@ -1212,12 +1221,13 @@ func (t *http2Client) setGoAwayReason(f *http2.GoAwayFrame) {
t.goAwayReason = GoAwayTooManyPings
}
}
t.goAwayDebugMessage = fmt.Sprintf("code: %s, debug data: %v", f.ErrCode, string(f.DebugData()))
}

func (t *http2Client) GetGoAwayReason() GoAwayReason {
func (t *http2Client) GetGoAwayReason() (GoAwayReason, string) {
t.mu.Lock()
defer t.mu.Unlock()
return t.goAwayReason
return t.goAwayReason, t.goAwayDebugMessage
}

func (t *http2Client) handleWindowUpdate(f *http2.WindowUpdateFrame) {
Expand Down
10 changes: 5 additions & 5 deletions internal/transport/keepalive_test.go
Expand Up @@ -69,7 +69,7 @@ func (s) TestMaxConnectionIdle(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayNoReason {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason)
}
case <-timeout.C:
Expand Down Expand Up @@ -143,7 +143,7 @@ func (s) TestMaxConnectionAge(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayNoReason {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayNoReason {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayNoReason)
}
case <-timeout.C:
Expand Down Expand Up @@ -403,7 +403,7 @@ func (s) TestKeepaliveClientFrequency(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down Expand Up @@ -448,7 +448,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientNoRPC(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down Expand Up @@ -498,7 +498,7 @@ func (s) TestKeepaliveServerEnforcementWithAbusiveClientWithRPC(t *testing.T) {
if !timeout.Stop() {
<-timeout.C
}
if reason := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
if reason, _ := client.GetGoAwayReason(); reason != GoAwayTooManyPings {
t.Fatalf("GoAwayReason is %v, want %v", reason, GoAwayTooManyPings)
}
case <-timeout.C:
Expand Down
5 changes: 3 additions & 2 deletions internal/transport/transport.go
Expand Up @@ -656,8 +656,9 @@ type ClientTransport interface {
// HTTP/2).
GoAway() <-chan struct{}

// GetGoAwayReason returns the reason why GoAway frame was received.
GetGoAwayReason() GoAwayReason
// GetGoAwayReason returns the reason why GoAway frame was received, along
// with a human readable string with debug info.
GetGoAwayReason() (GoAwayReason, string)

// RemoteAddr returns the remote network address.
RemoteAddr() net.Addr
Expand Down
80 changes: 80 additions & 0 deletions test/end2end_test.go
Expand Up @@ -55,12 +55,14 @@ import (
"google.golang.org/grpc/health"
healthgrpc "google.golang.org/grpc/health/grpc_health_v1"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/channelz"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/internal/grpctest"
"google.golang.org/grpc/internal/stubserver"
"google.golang.org/grpc/internal/testutils"
"google.golang.org/grpc/internal/transport"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/resolver"
Expand Down Expand Up @@ -1380,6 +1382,84 @@ func (s) TestDetailedConnectionCloseErrorPropagatesToRpcError(t *testing.T) {
close(rpcDoneOnClient)
}

func (s) TestDetailedGoawayErrorOnGracefulClosePropagatesToRPCError(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as in the other PR about using StubServer and not enumerating the test environments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

rpcDoneOnClient := make(chan struct{})
ss := &stubserver.StubServer{
FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error {
<-rpcDoneOnClient
return status.Error(codes.Internal, "arbitrary status")
},
}
sopts := []grpc.ServerOption{
grpc.KeepaliveParams(keepalive.ServerParameters{
MaxConnectionAge: time.Millisecond * 100,
MaxConnectionAgeGrace: time.Millisecond,
}),
}
if err := ss.Start(sopts); err != nil {
t.Fatalf("Error starting endpoint server: %v", err)
}
defer ss.Stop()

ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
stream, err := ss.Client.FullDuplexCall(ctx)
if err != nil {
t.Fatalf("%v.FullDuplexCall = _, %v, want _, <nil>", ss.Client, err)
}
const expectedErrorMessageSubstring = "received prior goaway: code: NO_ERROR"
_, err = stream.Recv()
close(rpcDoneOnClient)
if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) {
t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: %q", stream, err, expectedErrorMessageSubstring)
}
}

func (s) TestDetailedGoawayErrorOnAbruptClosePropagatesToRPCError(t *testing.T) {
// set the min keepalive time very low so that this test can take
// a reasonable amount of time
prev := internal.KeepaliveMinPingTime
internal.KeepaliveMinPingTime = time.Millisecond
defer func() { internal.KeepaliveMinPingTime = prev }()

rpcDoneOnClient := make(chan struct{})
ss := &stubserver.StubServer{
FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error {
<-rpcDoneOnClient
return status.Error(codes.Internal, "arbitrary status")
},
}
sopts := []grpc.ServerOption{
grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
MinTime: time.Second * 1000, /* arbitrary, large value */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fun fact: you can tell a google home "go back / skip ahead a {year,century,millenium(!)}" while playing a song/video to go to the beginning/end.

}),
}
dopts := []grpc.DialOption{
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: time.Millisecond, /* should trigger "too many pings" error quickly */
Timeout: time.Second * 1000, /* arbitrary, large value */
PermitWithoutStream: false,
}),
}
if err := ss.Start(sopts, dopts...); err != nil {
t.Fatalf("Error starting endpoint server: %v", err)
}
defer ss.Stop()

ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
stream, err := ss.Client.FullDuplexCall(ctx)
if err != nil {
t.Fatalf("%v.FullDuplexCall = _, %v, want _, <nil>", ss.Client, err)
}
const expectedErrorMessageSubstring = "received prior goaway: code: ENHANCE_YOUR_CALM, debug data: too_many_pings"
_, err = stream.Recv()
close(rpcDoneOnClient)
if err == nil || !strings.Contains(err.Error(), expectedErrorMessageSubstring) {
t.Fatalf("%v.Recv() = _, %v, want _, rpc error containing substring: |%v|", stream, err, expectedErrorMessageSubstring)
}
}

func (s) TestClientConnCloseAfterGoAwayWithActiveStream(t *testing.T) {
for _, e := range listTestEnv() {
if e.name == "handler-tls" {
Expand Down