From e8abc4d3330b151535fa7420066aa43929645aa2 Mon Sep 17 00:00:00 2001 From: petrpleshachkov <50169481+petrpleshachkov@users.noreply.github.com> Date: Fri, 9 Oct 2020 09:35:22 +0200 Subject: [PATCH 1/2] Fixed a data loss issue on lite member promotion (#17644) Notify cluster members on lite member promotion to update memberGroupSize. Otherwise, a data member might be not aware of other promoted data members, and it may cause backup operations not being issued to other data members. Closes https://github.com/hazelcast/hazelcast/issues/17621 --- .../cluster/impl/MembershipManager.java | 11 ++ .../impl/InternalPartitionServiceImpl.java | 4 + .../cluster/impl/PromoteLiteMemberTest.java | 107 ++++++++++++++++++ 3 files changed, 122 insertions(+) diff --git a/hazelcast/src/main/java/com/hazelcast/internal/cluster/impl/MembershipManager.java b/hazelcast/src/main/java/com/hazelcast/internal/cluster/impl/MembershipManager.java index 10bd9b0cfd72..d27d309f2da5 100644 --- a/hazelcast/src/main/java/com/hazelcast/internal/cluster/impl/MembershipManager.java +++ b/hazelcast/src/main/java/com/hazelcast/internal/cluster/impl/MembershipManager.java @@ -285,6 +285,7 @@ String memberListString() { } // handles both new and left members + @SuppressWarnings("checkstyle:npathcomplexity") void updateMembers(MembersView membersView) { MemberMap currentMemberMap = memberMapRef.get(); @@ -294,11 +295,17 @@ void updateMembers(MembersView membersView) { MemberImpl[] members = new MemberImpl[membersView.size()]; int memberIndex = 0; + // Indicates whether we received a notification on lite member membership change + // (e.g. its promotion to a data member) + boolean updatedLiteMember = false; for (MemberInfo memberInfo : membersView.getMembers()) { Address address = memberInfo.getAddress(); MemberImpl member = currentMemberMap.getMember(address); if (member != null && member.getUuid().equals(memberInfo.getUuid())) { + if (member.isLiteMember()) { + updatedLiteMember = true; + } member = createNewMemberImplIfChanged(memberInfo, member); members[memberIndex++] = member; continue; @@ -331,6 +338,10 @@ void updateMembers(MembersView membersView) { setMembers(MemberMap.createNew(membersView.getVersion(), members)); + if (updatedLiteMember) { + node.partitionService.updateMemberGroupSize(); + } + for (MemberImpl member : removedMembers) { closeConnection(member.getAddress(), "Member left event received from master"); handleMemberRemove(memberMapRef.get(), member); diff --git a/hazelcast/src/main/java/com/hazelcast/internal/partition/impl/InternalPartitionServiceImpl.java b/hazelcast/src/main/java/com/hazelcast/internal/partition/impl/InternalPartitionServiceImpl.java index 279e69465772..1e0a91bbc112 100644 --- a/hazelcast/src/main/java/com/hazelcast/internal/partition/impl/InternalPartitionServiceImpl.java +++ b/hazelcast/src/main/java/com/hazelcast/internal/partition/impl/InternalPartitionServiceImpl.java @@ -358,6 +358,10 @@ public int getMaxAllowedBackupCount() { return max(min(getMemberGroupsSize() - 1, InternalPartition.MAX_BACKUP_COUNT), 0); } + public void updateMemberGroupSize() { + partitionStateManager.updateMemberGroupsSize(); + } + @Override public void memberAdded(Member member) { logger.fine("Adding " + member); diff --git a/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java b/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java index e8e8b707f9a4..cbe7efb97a71 100644 --- a/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java +++ b/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java @@ -20,6 +20,7 @@ import com.hazelcast.config.Config; import com.hazelcast.core.Cluster; import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.core.IMap; import com.hazelcast.core.Member; import com.hazelcast.core.MemberLeftException; import com.hazelcast.internal.cluster.impl.operations.PromoteLiteMemberOp; @@ -64,6 +65,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @RunWith(HazelcastParallelClassRunner.class) @@ -374,6 +376,111 @@ private void memberAttributes_arePreserved_afterPromotion(boolean isMaster) thro } } + @Test + public void test_lite_member_promotion_causes_no_data_loss_on_three_members() throws InterruptedException { + int entryCount = 1000; + + TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); + Config config = new Config().setLiteMember(true); + + // start first hazelcast instance as a lite member + HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); + + // start second and third hazelcast instances as a lite member + HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); + HazelcastInstance thirdHazelcastInstance = factory.newHazelcastInstance(config); + + // promote all instances to data members + firstHazelcastInstance.getCluster().promoteLocalLiteMember(); + secondHazelcastInstance.getCluster().promoteLocalLiteMember(); + thirdHazelcastInstance.getCluster().promoteLocalLiteMember(); + + // check if cluster is in a good shape + assertTrueEventually(new AssertTask() { + @Override + public void run() throws Exception { + assertTrue(firstHazelcastInstance.getPartitionService().isClusterSafe()); + } + }); + + // insert some dummy data into the testing map + String mapName = randomMapName(); + IMap testMap = firstHazelcastInstance.getMap(mapName); + for (int i = 0; i < entryCount; ++i) { + testMap.put("key" + i, "value" + i); + } + + // check all data is correctly inserted + assertEquals(entryCount, testMap.size()); + + // kill second instance + secondHazelcastInstance.getLifecycleService().terminate(); + + // backup count for the map is set to 1 + // even with 1 node down, no data loss is expected + assertTrueEventually(new AssertTask() { + @Override + public void run() throws Exception { + assertEquals(entryCount, firstHazelcastInstance.getMap(mapName).size()); + } + }); + assertTrueEventually(new AssertTask() { + @Override + public void run() throws Exception { + assertEquals(entryCount, thirdHazelcastInstance.getMap(mapName).size()); + } + }); + } + + @Test + public void test_lite_member_promotion_causes_no_data_loss_on_two_members() throws InterruptedException { + int entryCount = 1000; + + TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); + Config config = new Config().setLiteMember(true); + + // start first hazelcast instance as a lite member + HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); + + // start second hazelcast instance as a lite member + HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); + + // promote all instances to data members + firstHazelcastInstance.getCluster().promoteLocalLiteMember(); + + secondHazelcastInstance.getCluster().promoteLocalLiteMember(); + + // check if cluster is in a good shape + assertTrueEventually(new AssertTask() { + @Override + public void run() throws Exception { + firstHazelcastInstance.getPartitionService().isClusterSafe(); + } + }); + + // insert some dummy data into the testing map + String mapName = randomMapName(); + IMap testMap = firstHazelcastInstance.getMap(mapName); + for (int i = 0; i < entryCount; ++i) { + testMap.put("key" + i, "value" + i); + } + + // check all data is correctly inserted + assertEquals(entryCount, testMap.size()); + + // kill second instance + secondHazelcastInstance.getLifecycleService().terminate(); + + // backup count for the map is set to 1 + // even with 1 node down, no data loss is expected + assertTrueEventually(new AssertTask() { + @Override + public void run() throws Exception { + assertEquals(entryCount, firstHazelcastInstance.getMap(mapName).size()); + } + }); + } + private void assertPromotionInvocationStarted(HazelcastInstance instance) { final OperationServiceImpl operationService = (OperationServiceImpl) getNode(instance).getNodeEngine().getOperationService(); From 2db56f6c455b18efa92b8392c63c41effb6ff62f Mon Sep 17 00:00:00 2001 From: petrpleshachkov <50169481+petrpleshachkov@users.noreply.github.com> Date: Fri, 23 Oct 2020 15:54:34 +0200 Subject: [PATCH 2/2] Fixed a compilation issue --- .../cluster/impl/PromoteLiteMemberTest.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java b/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java index cbe7efb97a71..d5cae115f1d0 100644 --- a/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java +++ b/hazelcast/src/test/java/com/hazelcast/internal/cluster/impl/PromoteLiteMemberTest.java @@ -378,17 +378,17 @@ private void memberAttributes_arePreserved_afterPromotion(boolean isMaster) thro @Test public void test_lite_member_promotion_causes_no_data_loss_on_three_members() throws InterruptedException { - int entryCount = 1000; + final int entryCount = 1000; TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); Config config = new Config().setLiteMember(true); // start first hazelcast instance as a lite member - HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); + final HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); // start second and third hazelcast instances as a lite member - HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); - HazelcastInstance thirdHazelcastInstance = factory.newHazelcastInstance(config); + final HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); + final HazelcastInstance thirdHazelcastInstance = factory.newHazelcastInstance(config); // promote all instances to data members firstHazelcastInstance.getCluster().promoteLocalLiteMember(); @@ -404,7 +404,7 @@ public void run() throws Exception { }); // insert some dummy data into the testing map - String mapName = randomMapName(); + final String mapName = randomMapName(); IMap testMap = firstHazelcastInstance.getMap(mapName); for (int i = 0; i < entryCount; ++i) { testMap.put("key" + i, "value" + i); @@ -434,16 +434,15 @@ public void run() throws Exception { @Test public void test_lite_member_promotion_causes_no_data_loss_on_two_members() throws InterruptedException { - int entryCount = 1000; + final int entryCount = 1000; TestHazelcastInstanceFactory factory = createHazelcastInstanceFactory(); Config config = new Config().setLiteMember(true); // start first hazelcast instance as a lite member - HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); - + final HazelcastInstance firstHazelcastInstance = factory.newHazelcastInstance(config); // start second hazelcast instance as a lite member - HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); + final HazelcastInstance secondHazelcastInstance = factory.newHazelcastInstance(config); // promote all instances to data members firstHazelcastInstance.getCluster().promoteLocalLiteMember(); @@ -459,7 +458,7 @@ public void run() throws Exception { }); // insert some dummy data into the testing map - String mapName = randomMapName(); + final String mapName = randomMapName(); IMap testMap = firstHazelcastInstance.getMap(mapName); for (int i = 0; i < entryCount; ++i) { testMap.put("key" + i, "value" + i);