Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check if server is in configuration when receiving a voteRequest #526

Merged
merged 5 commits into from Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions configuration.go
Expand Up @@ -177,6 +177,17 @@ func hasVote(configuration Configuration, id ServerID) bool {
return false
}

// inConfiguration returns true if the server identified by 'id' is in in the
// provided Configuration.
func inConfiguration(configuration Configuration, id ServerID) bool {
for _, server := range configuration.Servers {
if server.ID == id {
return true
}
}
return false
}

// checkConfiguration tests a cluster membership configuration for common
// errors.
func checkConfiguration(configuration Configuration) error {
Expand Down
4 changes: 2 additions & 2 deletions raft.go
Expand Up @@ -1566,8 +1566,8 @@ func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) {
candidateID := ServerID(req.ID)
// if the Servers list is empty that mean the cluster is very likely trying to bootstrap,
// Grant the vote
if len(r.configurations.latest.Servers) > 0 && !hasVote(r.configurations.latest, candidateID) {
r.logger.Warn("rejecting vote request since node is not a voter",
if len(r.configurations.latest.Servers) > 0 && !inConfiguration(r.configurations.latest, candidateID) {
r.logger.Warn("rejecting vote request since node is not in configuration",
"from", candidate)
return
}
Expand Down
79 changes: 79 additions & 0 deletions raft_test.go
Expand Up @@ -2644,6 +2644,85 @@ func TestRaft_VoteNotGranted_WhenNodeNotInCluster(t *testing.T) {
}
}

func TestRaft_ClusterCanRegainStability_WhenNonVoterWithHigherTermJoin(t *testing.T) {
// Make a cluster
c := MakeCluster(3, t, nil)

defer c.Close()

// Get the leader
leader := c.Leader()

// Wait until we have 2 followers
limit := time.Now().Add(c.longstopTimeout)
var followers []*Raft
for time.Now().Before(limit) && len(followers) != 2 {
c.WaitEvent(nil, c.conf.CommitTimeout)
followers = c.GetInState(Follower)
}
if len(followers) != 2 {
t.Fatalf("expected two followers: %v", followers)
}
dhiaayachi marked this conversation as resolved.
Show resolved Hide resolved

// Remove a follower
followerRemoved := followers[0]
c.Disconnect(followerRemoved.localAddr)
time.Sleep(c.propagateTimeout)

future := leader.RemoveServer(followerRemoved.localID, 0, 0)
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}

//set that follower term to higher term to simulate a partitioning
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to simulate a partitioning? Isn't that what c.Disconnect is doing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes disconnect would prohibit communication from the specified node to any node and from any node to that node.

followerRemoved.setCurrentTerm(leader.getCurrentTerm() + 2)
dhiaayachi marked this conversation as resolved.
Show resolved Hide resolved
//Add the node back as NonVoter
future = leader.AddNonvoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0)
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}

c.FullyConnect()

// Wait a while
time.Sleep(c.propagateTimeout)
// Check leader stable, could be a new leader here
leader = c.Leader()
//Write some logs to ensure they replicate
for i := 0; i < 100; i++ {
future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0)
if err := future.Error(); err != nil {
t.Fatalf("[ERR] apply err: %v", err)
}
}
c.WaitForReplication(100)

//Remove the server and add it back as Voter
future = leader.RemoveServer(followerRemoved.localID, 0, 0)
if err := future.Error(); err != nil {
t.Fatalf("err: %v", err)
}
leader.AddVoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0)

// Wait a while
time.Sleep(c.propagateTimeout * 10)

//Write some logs to ensure they replicate
for i := 100; i < 200; i++ {
future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0)
if err := future.Error(); err != nil {
t.Fatalf("[ERR] apply err: %v", err)
}
}
c.WaitForReplication(200)

// Check leader stable
newLeader := c.Leader()
if newLeader.leaderID != leader.leaderID {
t.Fatalf("leader changed")
}
}

// TestRaft_FollowerRemovalNoElection ensures that a leader election is not
// started when a standby is shut down and restarted.
func TestRaft_FollowerRemovalNoElection(t *testing.T) {
Expand Down