New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixed a data loss issue on lite member promotion #17644
Changes from 3 commits
3362c71
336f250
a050910
7ac4040
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -305,11 +305,15 @@ void updateMembers(MembersView membersView) { | |
|
||
MemberImpl[] members = new MemberImpl[membersView.size()]; | ||
int memberIndex = 0; | ||
boolean updatedLiteMember = false; | ||
for (MemberInfo memberInfo : membersView.getMembers()) { | ||
Address address = memberInfo.getAddress(); | ||
MemberImpl member = currentMemberMap.getMember(address); | ||
|
||
if (member != null && member.getUuid().equals(memberInfo.getUuid())) { | ||
if (member.isLiteMember()) { | ||
updatedLiteMember = true; | ||
} | ||
member = createNewMemberImplIfChanged(memberInfo, member); | ||
members[memberIndex++] = member; | ||
continue; | ||
|
@@ -342,6 +346,10 @@ void updateMembers(MembersView membersView) { | |
|
||
setMembers(MemberMap.createNew(membersView.getVersion(), members)); | ||
|
||
if (updatedLiteMember) { | ||
updateMembersGroupSize(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is hard to to follow similar named methods maybe we can make |
||
} | ||
|
||
for (MemberImpl member : removedMembers) { | ||
closeConnection(member.getAddress(), "Member left event received from master"); | ||
handleMemberRemove(memberMapRef.get(), member); | ||
|
@@ -410,6 +418,10 @@ private MemberImpl createMember(MemberInfo memberInfo, Map<String, String> attri | |
.build(); | ||
} | ||
|
||
private void updateMembersGroupSize() { | ||
node.partitionService.updateMemberGroupSize(); | ||
} | ||
|
||
private void repairPartitionTableIfReturningMember(MemberImpl member) { | ||
if (!clusterService.isMaster()) { | ||
return; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
import com.hazelcast.internal.partition.InternalPartition; | ||
import com.hazelcast.internal.util.RootCauseMatcher; | ||
import com.hazelcast.internal.util.UuidUtil; | ||
import com.hazelcast.map.IMap; | ||
import com.hazelcast.spi.impl.InternalCompletableFuture; | ||
import com.hazelcast.spi.impl.operationservice.impl.Invocation; | ||
import com.hazelcast.spi.impl.operationservice.impl.InvocationRegistry; | ||
|
@@ -64,9 +65,11 @@ | |
import static java.util.Arrays.asList; | ||
import static java.util.Collections.singletonList; | ||
import static org.hamcrest.Matchers.greaterThan; | ||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertNotEquals; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.assertTrue; | ||
import static org.junit.Assert.fail; | ||
|
||
@RunWith(HazelcastParallelClassRunner.class) | ||
|
@@ -323,6 +326,86 @@ public void promotion_shouldFail_whenMasterIsSuspected_duringPromotion() throws | |
} | ||
} | ||
|
||
@Test | ||
public void lite_member_promotion_data_loss() throws InterruptedException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we give a better name to test? Seeing the state tested against and expected behavior in test method name would be helpful for the future readers. |
||
int entryCount = 1000; | ||
|
||
TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); | ||
Config config = new Config().setLiteMember(true); | ||
|
||
// start first hazelcast instance as a lite member | ||
HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); | ||
|
||
// start second and third hazelcast instances as a lite member | ||
HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); | ||
HazelcastInstance thirdHazelcastInstance = factory.newHazelcastInstance(config); | ||
|
||
// promote all instances to data members | ||
firstHazelcastInstance.getCluster().promoteLocalLiteMember(); | ||
secondHazelcastInstance.getCluster().promoteLocalLiteMember(); | ||
thirdHazelcastInstance.getCluster().promoteLocalLiteMember(); | ||
|
||
// check if cluster is in a good shape | ||
assertTrueEventually(() -> assertTrue(firstHazelcastInstance.getPartitionService().isClusterSafe())); | ||
|
||
// insert some dummy data into the testing map | ||
String mapName = randomMapName(); | ||
IMap<String, String> testMap = firstHazelcastInstance.getMap(mapName); | ||
for (int i = 0; i < entryCount; ++i) { | ||
testMap.put("key" + i, "value" + i); | ||
} | ||
|
||
// check all data is correctly inserted | ||
assertEquals(entryCount, testMap.size()); | ||
|
||
// kill second instance | ||
secondHazelcastInstance.getLifecycleService().terminate(); | ||
|
||
// backup count for the map is set to 1 | ||
// even with 1 node down, no data loss is expected | ||
assertTrueEventually(() -> assertEquals(entryCount, firstHazelcastInstance.getMap(mapName).size())); | ||
assertTrueEventually(() -> assertEquals(entryCount, thirdHazelcastInstance.getMap(mapName).size())); | ||
} | ||
|
||
@Test | ||
public void lite_member_promotion_data_loss2() throws InterruptedException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here. |
||
int entryCount = 1000; | ||
|
||
TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); | ||
Config config = new Config().setLiteMember(true); | ||
|
||
// start first hazelcast instance as a lite member | ||
HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); | ||
|
||
// start second hazelcast instance as a lite member | ||
HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); | ||
|
||
// promote all instances to data members | ||
firstHazelcastInstance.getCluster().promoteLocalLiteMember(); | ||
|
||
secondHazelcastInstance.getCluster().promoteLocalLiteMember(); | ||
|
||
// check if cluster is in a good shape | ||
assertTrueEventually(() -> assertTrue(firstHazelcastInstance.getPartitionService().isClusterSafe())); | ||
|
||
// insert some dummy data into the testing map | ||
String mapName = randomMapName(); | ||
IMap<String, String> testMap = firstHazelcastInstance.getMap(mapName); | ||
for (int i = 0; i < entryCount; ++i) { | ||
testMap.put("key" + i, "value" + i); | ||
} | ||
|
||
// check all data is correctly inserted | ||
assertEquals(entryCount, testMap.size()); | ||
|
||
// kill second instance | ||
secondHazelcastInstance.getLifecycleService().terminate(); | ||
|
||
// backup count for the map is set to 1 | ||
// even with 1 node down, no data loss is expected | ||
assertTrueEventually(() -> assertEquals(entryCount, firstHazelcastInstance.getMap(mapName).size())); | ||
} | ||
|
||
private void assertPromotionInvocationStarted(HazelcastInstance instance) { | ||
OperationServiceImpl operationService = getNode(instance).getNodeEngine().getOperationService(); | ||
InvocationRegistry invocationRegistry = operationService.getInvocationRegistry(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you please add a short comment to the code to explain the meaning of
updatedLiteMember
?