Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move exponential backoff to DNS resolver from resolver.ClientConn #4270

Merged
merged 58 commits into from Apr 20, 2021
Merged
Changes from 2 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
dcab165
Save progress
zasweq Mar 15, 2021
8754401
Save progress
zasweq Mar 16, 2021
6bb242d
Clean up
zasweq Mar 16, 2021
8299dad
Clean up
zasweq Mar 16, 2021
da102eb
vet
zasweq Mar 16, 2021
fbda038
vet
zasweq Mar 16, 2021
0ebd685
Added logic to tests
zasweq Mar 16, 2021
7cb488e
Added unit test for exponential backoff
zasweq Mar 16, 2021
ba01815
Vet
zasweq Mar 16, 2021
15ec449
Added another unit test
zasweq Mar 16, 2021
f0a6aed
Changed test
zasweq Mar 16, 2021
72f5538
Fixed failing tests
zasweq Mar 16, 2021
3bc67ff
Vet
zasweq Mar 16, 2021
b31f41f
Deleted old tests
zasweq Mar 16, 2021
8bc20c1
Vet
zasweq Mar 16, 2021
d20f77a
Fixed data race
zasweq Mar 16, 2021
9fe80c1
Vet
zasweq Mar 16, 2021
6b1401c
Fixed timeout
zasweq Mar 16, 2021
797bf3e
Fixed race
zasweq Mar 16, 2021
e5fa7fc
Fixed race
zasweq Mar 16, 2021
a0a24b5
Rewrote failing unit test
zasweq Mar 16, 2021
f39b7b2
Vet
zasweq Mar 16, 2021
fb44fe1
Vet
zasweq Mar 16, 2021
0721699
Fixed race conditions in tests
zasweq Mar 16, 2021
5574d9a
Changed time
zasweq Mar 16, 2021
8572acb
Responded to Doug's comments
zasweq Mar 18, 2021
54d1fd2
Responded to Doug's comments
zasweq Mar 18, 2021
33695df
vet
zasweq Mar 18, 2021
100ba1b
vet
zasweq Mar 18, 2021
8646061
vet
zasweq Mar 18, 2021
e75a12b
Vet
zasweq Mar 18, 2021
f54ecb4
Vet
zasweq Mar 18, 2021
2ecb953
Vet
zasweq Mar 18, 2021
513a51d
Deleted callback in tests
zasweq Mar 19, 2021
c61426f
Added polling to wait group
zasweq Mar 19, 2021
c3d8c01
Responded to Doug's comments
zasweq Mar 26, 2021
32801ec
Vet
zasweq Mar 26, 2021
090069d
Cleaned up comments
zasweq Mar 26, 2021
ac3e18e
Responded to Doug's comments
zasweq Mar 26, 2021
653a564
Responded to Doug's comments
zasweq Mar 26, 2021
f51aa71
Vet
zasweq Mar 26, 2021
625a9e2
Save progress before cleanup
zasweq Apr 1, 2021
e384bab
Clean up
zasweq Apr 1, 2021
d8ffa44
Vet
zasweq Apr 1, 2021
e923d61
Vet
zasweq Apr 1, 2021
0960018
More cleanup
zasweq Apr 1, 2021
98ed270
Save Progress
zasweq Apr 5, 2021
e8f3fff
Changed logic
zasweq Apr 5, 2021
96f2bc0
Incorporated Doug's idea
zasweq Apr 7, 2021
e325361
Responded to Doug's comments
zasweq Apr 12, 2021
e90b3a4
Responded to Doug's comments
zasweq Apr 12, 2021
d9e7620
Responded to Doug's comments
zasweq Apr 13, 2021
fc31662
Vet
zasweq Apr 13, 2021
7c26bb3
Responded to Doug's comments
zasweq Apr 14, 2021
e40bbaa
Vet
zasweq Apr 14, 2021
40b52ef
Responded to Doug's comments
zasweq Apr 20, 2021
6d02b03
Vet
zasweq Apr 20, 2021
e9eed86
Switched send to blocking
zasweq Apr 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
75 changes: 48 additions & 27 deletions internal/resolver/dns/dns_resolver_test.go
Expand Up @@ -34,6 +34,7 @@ import (
grpclbstate "google.golang.org/grpc/balancer/grpclb/state"
"google.golang.org/grpc/internal/envconfig"
"google.golang.org/grpc/internal/leakcheck"
"google.golang.org/grpc/internal/testutils"
"google.golang.org/grpc/resolver"
"google.golang.org/grpc/serviceconfig"
)
Expand Down Expand Up @@ -756,11 +757,11 @@ func TestDNSResolverExponentialBackoff(t *testing.T) {
defer func(nt func(d time.Duration) *time.Timer) {
newTimer = nt
}(newTimer)
timerChan := make(chan *time.Timer, 1)
timerChan := testutils.NewChannel()
newTimer = func(d time.Duration) *time.Timer {
// Will never fire on its own, allows this test to call timer immediately.
t := time.NewTimer(time.Hour)
timerChan <- t
timerChan.SendOrFail(t)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably not what you want. SendOrFail will just not push to the channel if it is already full. You might have meant SendContext and a t.Error? But, it's probably fine to just block indefinitely here (Send). The whole point is that your test shouldn't hang for the default timeout of 10 minutes when something goes wrong.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying that what I have now is wrong? I.e. right now I have a 10 minute wait using a context that I pass to every receive. Are you saying to use something like defaultTestShortTimeout for both the sends and receives?

return t
}
tests := []struct {
Expand All @@ -770,19 +771,19 @@ func TestDNSResolverExponentialBackoff(t *testing.T) {
scWant string
}{
{
"happy-case-default-port",
"happy case default port",
"foo.bar.com",
[]resolver.Address{{Addr: "1.2.3.4" + colonDefaultPort}, {Addr: "5.6.7.8" + colonDefaultPort}},
generateSC("foo.bar.com"),
},
{
"happy-case-specified-port",
"happy case specified port",
"foo.bar.com:1234",
[]resolver.Address{{Addr: "1.2.3.4:1234"}, {Addr: "5.6.7.8:1234"}},
generateSC("foo.bar.com"),
},
{
"happy-case-another-default-port",
"happy case another default port",
"srv.ipv4.single.fake",
[]resolver.Address{{Addr: "2.4.6.8" + colonDefaultPort}},
generateSC("srv.ipv4.single.fake"),
Expand All @@ -796,7 +797,7 @@ func TestDNSResolverExponentialBackoff(t *testing.T) {
cc.updateStateErr = balancer.ErrBadResolverState
r, err := b.Build(resolver.Target{Endpoint: test.target}, cc, resolver.BuildOptions{})
if err != nil {
t.Fatalf("%v\n", err)
t.Fatalf("Error building resolver for target %v: %v", test.target, err)
}
var state resolver.State
var cnt int
Expand All @@ -817,22 +818,27 @@ func TestDNSResolverExponentialBackoff(t *testing.T) {
if test.scWant != sc {
t.Errorf("Resolved service config of target: %q = %+v, want %+v", test.target, sc, test.scWant)
}
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
// Cause timer to go off 10 times, and see if it calls updateState() correctly.
for i := 0; i < 10; i++ {
timer := <-timerChan
timer.Reset(0)
timer, err := timerChan.Receive(ctx)
if err != nil {
t.Fatalf("Error receiving timer from mock NewTimer call: %v", err)
}
timerPointer := timer.(*time.Timer)
timerPointer.Reset(0)
}
// Poll to see if DNS Resolver updated state the correct number of times, which allows time for the DNS Resolver to call
// ClientConn update state.
deadline := time.Now().Add(defaultTestTimeout)
for {
cc.m1.Lock()
got := cc.updateStateCalls
cc.m1.Unlock()
if got == 11 {
dfawley marked this conversation as resolved.
Show resolved Hide resolved
cc.m1.Unlock()
break
}
cc.m1.Unlock()

if time.Now().After(deadline) {
t.Fatalf("Exponential backoff is not working as expected - should update state 11 times instead of %d", got)
Expand All @@ -843,28 +849,30 @@ func TestDNSResolverExponentialBackoff(t *testing.T) {

// Update resolver.ClientConn to not return an error anymore - this should stop it from backing off.
cc.updateStateErr = nil
timer := <-timerChan
timer.Reset(0)
timer, err := timerChan.Receive(ctx)
if err != nil {
t.Fatalf("Error receiving timer from mock NewTimer call: %v", err)
}
timerPointer := timer.(*time.Timer)
timerPointer.Reset(0)
// Poll to see if DNS Resolver updated state the correct number of times, which allows time for the DNS Resolver to call
// ClientConn update state the final time. The DNS Resolver should then stop polling.
deadline = time.Now().Add(defaultTestTimeout)
for {
cc.m1.Lock()
got := cc.updateStateCalls
cc.m1.Unlock()
if got == 12 {
dfawley marked this conversation as resolved.
Show resolved Hide resolved
cc.m1.Unlock()
break
}
cc.m1.Unlock()

if time.Now().After(deadline) {
t.Fatalf("Exponential backoff is not working as expected - should stop backing off at 12 total UpdateState calls instead of %d", got)
}

select {
case <-timerChan:
t.Fatalf("Should not poll again after no more error")
default:
_, err := timerChan.ReceiveOrFail()
if err {
t.Fatalf("Should not poll again after Client Conn stops returning error.")
}

time.Sleep(time.Millisecond)
Expand Down Expand Up @@ -1555,28 +1563,34 @@ func TestReportError(t *testing.T) {
defer func(nt func(d time.Duration) *time.Timer) {
newTimer = nt
}(newTimer)
timerChan := make(chan *time.Timer, 1)
timerChan := testutils.NewChannel()
newTimer = func(d time.Duration) *time.Timer {
// Will never fire on its own, allowing us to control it.
// Will never fire on its own, allows this test to call timer immediately.
t := time.NewTimer(time.Hour)
timerChan <- t
timerChan.SendOrFail(t)
return t
}
cc := &testClientConn{target: target, errChan: make(chan error)}
totalTimesCalledError := 0
b := NewBuilder()
r, err := b.Build(resolver.Target{Endpoint: target}, cc, resolver.BuildOptions{})
if err != nil {
t.Fatalf("%v\n", err)
t.Fatalf("Error building resolver for target %v: %v", target, err)
}
// Should receive first error.
err = <-cc.errChan
if !strings.Contains(err.Error(), "hostLookup error") {
t.Fatalf(`ReportError(err=%v) called; want err contains "hostLookupError"`, err)
}
totalTimesCalledError++
timer := <-timerChan
timer.Reset(0)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
timer, err := timerChan.Receive(ctx)
if err != nil {
t.Fatalf("Error receiving timer from mock NewTimer call: %v", err)
}
timerPointer := timer.(*time.Timer)
timerPointer.Reset(0)
defer r.Close()

// Cause timer to go off 10 times, and see if it matches DNS Resolver updating Error.
Expand All @@ -1587,14 +1601,21 @@ func TestReportError(t *testing.T) {
t.Fatalf(`ReportError(err=%v) called; want err contains "hostLookupError"`, err)
}
totalTimesCalledError++
timer = <-timerChan
timer.Reset(0)
timer, err := timerChan.Receive(ctx)
if err != nil {
t.Fatalf("Error receiving timer from mock NewTimer call: %v", err)
}
timerPointer := timer.(*time.Timer)
timerPointer.Reset(0)
}

if totalTimesCalledError != 11 {
t.Errorf("ReportError() not called 11 times, instead called %d times.", totalTimesCalledError)
}
// Clean up final watcher iteration.
<-cc.errChan
<-timerChan
_, err = timerChan.Receive(ctx)
if err != nil {
t.Fatalf("Error receiving timer from mock NewTimer call: %v", err)
}
}