-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
ClearExpiredRecordsTask.java
354 lines (286 loc) · 13.2 KB
/
ClearExpiredRecordsTask.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
/*
* Copyright (c) 2008-2021, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.internal.eviction;
import com.hazelcast.cluster.Address;
import com.hazelcast.internal.partition.IPartition;
import com.hazelcast.internal.partition.IPartitionService;
import com.hazelcast.internal.util.Clock;
import com.hazelcast.partition.PartitionLostEvent;
import com.hazelcast.spi.impl.NodeEngine;
import com.hazelcast.spi.impl.operationservice.Operation;
import com.hazelcast.spi.impl.operationservice.impl.OperationServiceImpl;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.spi.properties.HazelcastProperty;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import static com.hazelcast.internal.eviction.ToBackupSender.newToBackupSender;
import static com.hazelcast.internal.util.CollectionUtil.isEmpty;
import static com.hazelcast.internal.util.Preconditions.checkPositive;
import static com.hazelcast.internal.util.Preconditions.checkTrue;
import static java.lang.Integer.parseInt;
import static java.lang.Math.min;
@SuppressWarnings({"checkstyle:magicnumber", "checkstyle:methodcount"})
@SuppressFBWarnings({"URF_UNREAD_FIELD"})
public abstract class ClearExpiredRecordsTask<T, S> implements Runnable {
private static final int DIFFERENCE_BETWEEN_TWO_SUBSEQUENT_PARTITION_CLEANUP_MILLIS = 1000;
protected final T[] containers;
protected final NodeEngine nodeEngine;
protected final ToBackupSender<S> toBackupSender;
protected final IPartitionService partitionService;
private final boolean cleanupEnabled;
private final int partitionCount;
private final int taskPeriodSeconds;
private final int cleanupPercentage;
private final int cleanupOperationCount;
private final Address thisAddress;
private final OperationServiceImpl operationService;
private final AtomicBoolean singleRunPermit = new AtomicBoolean(false);
private final AtomicInteger lostPartitionCounter = new AtomicInteger();
private final AtomicInteger nextExpiryQueueToScanIndex = new AtomicInteger();
private volatile int lastKnownLostPartitionCount;
private int runningCleanupOperationsCount;
@SuppressFBWarnings({"EI_EXPOSE_REP2"})
protected ClearExpiredRecordsTask(String serviceName,
T[] containers,
HazelcastProperty cleanupEnabled,
HazelcastProperty cleanupOpProperty,
HazelcastProperty cleanupPercentageProperty,
HazelcastProperty taskPeriodProperty,
NodeEngine nodeEngine) {
this.nodeEngine = nodeEngine;
this.containers = containers;
this.operationService = (OperationServiceImpl) nodeEngine.getOperationService();
this.partitionService = nodeEngine.getPartitionService();
this.partitionCount = nodeEngine.getPartitionService().getPartitionCount();
this.thisAddress = nodeEngine.getThisAddress();
HazelcastProperties properties = nodeEngine.getProperties();
this.cleanupOperationCount = calculateCleanupOperationCount(properties, cleanupOpProperty, partitionCount,
operationService.getPartitionThreadCount());
checkPositive(cleanupOperationCount, "cleanupOperationCount should be a positive number");
this.cleanupPercentage = properties.getInteger(cleanupPercentageProperty);
checkTrue(cleanupPercentage > 0 && cleanupPercentage <= 100,
"cleanupPercentage should be in range (0,100]");
this.taskPeriodSeconds = properties.getSeconds(taskPeriodProperty);
this.cleanupEnabled = properties.getBoolean(cleanupEnabled);
this.toBackupSender = newToBackupSender(serviceName, newBackupExpiryOpSupplier(),
newBackupExpiryOpFilter(), nodeEngine);
}
protected BiFunction<Integer, Integer, Boolean> newBackupExpiryOpFilter() {
return (partitionId, replicaIndex) -> {
IPartition partition = partitionService.getPartition(partitionId);
return partition.getReplicaAddress(replicaIndex) != null;
};
}
@Override
public void run() {
try {
if (!singleRunPermit.compareAndSet(false, true)) {
return;
}
runInternal();
} finally {
singleRunPermit.set(false);
}
}
private void runInternal() {
runningCleanupOperationsCount = 0;
long nowInMillis = nowInMillis();
boolean lostPartitionDetected = lostPartitionDetected();
List<T> containersToProcess = null;
for (int partitionId = 0; partitionId < partitionCount; partitionId++) {
T container = this.containers[partitionId];
IPartition partition = partitionService.getPartition(partitionId, false);
if (partition.isMigrating()) {
continue;
}
if (partition.isLocal()) {
if (lostPartitionDetected) {
equalizeBackupSizeWithPrimary(container);
}
}
if (canProcessContainer(container, partition, nowInMillis)) {
containersToProcess = addContainerTo(containersToProcess, container);
}
}
if (!isEmpty(containersToProcess)) {
sortPartitionContainers(containersToProcess);
sendCleanupOperations(containersToProcess);
}
sendExpiryQueuesToBackupIncrementally();
}
private void sendExpiryQueuesToBackupIncrementally() {
int scanned = 0;
for (int partitionId = nextExpiryQueueToScanIndex.get(); partitionId < partitionCount; partitionId++) {
sendQueuedExpiredKeys(containers[partitionId]);
nextExpiryQueueToScanIndex.incrementAndGet();
if (++scanned % cleanupOperationCount == 0) {
break;
}
}
if (nextExpiryQueueToScanIndex.get() == partitionCount) {
nextExpiryQueueToScanIndex.set(0);
}
}
private boolean canProcessContainer(T container, IPartition partition, long nowInMillis) {
if (!getProcessablePartitionType().isProcessable(partition, thisAddress)) {
return false;
}
if (isContainerEmpty(container) && !hasExpiredKeyToSendBackup(container)) {
return false;
}
if (hasRunningCleanup(container)) {
runningCleanupOperationsCount++;
return false;
}
return runningCleanupOperationsCount <= cleanupOperationCount
&& !notInProcessableTimeWindow(container, nowInMillis)
&& !notHaveAnyExpirableRecord(container);
}
/**
* This method increments a counter to count partition lost events.
*
* After an ungraceful shutdown, backups can have expired entries.
* And these entries can remain forever on them. Reason for this is,
* the lost invalidations on a primary partition. During ungraceful
* shutdown, these invalidations can be lost before sending them to
* backups.
*
* Here, the counter in this method, is used to detect the lost
* invalidations case. If it is detected, we send expiry operations to
* remove leftover backup entries. Otherwise leftover entries can remain on
* backups forever.
*/
public final void partitionLost(PartitionLostEvent ignored) {
lostPartitionCounter.incrementAndGet();
}
private static long nowInMillis() {
return Clock.currentTimeMillis();
}
/**
* see {@link #partitionLost}
*/
private boolean lostPartitionDetected() {
int currentLostPartitionCount = lostPartitionCounter.get();
if (currentLostPartitionCount == lastKnownLostPartitionCount) {
return false;
}
lastKnownLostPartitionCount = currentLostPartitionCount;
return true;
}
private static int calculateCleanupOperationCount(HazelcastProperties properties,
final HazelcastProperty cleanupOpCountProperty,
int partitionCount, int partitionThreadCount) {
String stringValue = properties.getString(cleanupOpCountProperty);
if (stringValue != null) {
return parseInt(stringValue);
}
// calculate operation count to be sent by using partition-count.
final double scanPercentage = 0.1D;
final int opCountFromPartitionCount = (int) (partitionCount * scanPercentage);
// calculate operation count to be sent by using partition-thread-count.
final int inflationFactor = 3;
int opCountFromThreadCount = partitionThreadCount * inflationFactor;
if (opCountFromPartitionCount == 0) {
return opCountFromThreadCount;
}
return min(opCountFromPartitionCount, opCountFromThreadCount);
}
private boolean notInProcessableTimeWindow(T container, long now) {
return now - getLastCleanupTime(container) < DIFFERENCE_BETWEEN_TWO_SUBSEQUENT_PARTITION_CLEANUP_MILLIS;
}
private List<T> addContainerTo(List<T> containersToProcess, T container) {
if (containersToProcess == null) {
containersToProcess = new LinkedList<>();
}
containersToProcess.add(container);
return containersToProcess;
}
private void sendCleanupOperations(List<T> partitionContainers) {
final int start = 0;
int end = cleanupOperationCount;
if (end > partitionContainers.size()) {
end = partitionContainers.size();
}
List<T> partitionIds = partitionContainers.subList(start, end);
for (T container : partitionIds) {
// mark partition container as has on going expiration operation.
setHasRunningCleanup(container);
Operation operation = newPrimaryExpiryOp(cleanupPercentage, container);
operationService.execute(operation);
}
}
private BiFunction<S, Collection<ExpiredKey>, Operation> newBackupExpiryOpSupplier() {
return (recordStore, expiredKeys) -> newBackupExpiryOp(recordStore, expiredKeys);
}
public final void sendQueuedExpiredKeys(T container) {
Iterator<S> storeIterator = storeIterator(container);
while (storeIterator.hasNext()) {
tryToSendBackupExpiryOp(storeIterator.next(), false);
}
}
// only used for testing purposes
int getCleanupPercentage() {
return cleanupPercentage;
}
int getTaskPeriodSeconds() {
return taskPeriodSeconds;
}
int getCleanupOperationCount() {
return cleanupOperationCount;
}
protected abstract boolean isContainerEmpty(T container);
protected abstract boolean hasRunningCleanup(T container);
protected abstract long getLastCleanupTime(T container);
protected abstract void equalizeBackupSizeWithPrimary(T container);
protected abstract boolean hasExpiredKeyToSendBackup(T container);
protected abstract boolean notHaveAnyExpirableRecord(T container);
protected abstract void sortPartitionContainers(List<T> containers);
protected abstract void setHasRunningCleanup(T container);
protected abstract ProcessablePartitionType getProcessablePartitionType();
protected abstract Operation newPrimaryExpiryOp(int cleanupPercentage, T container);
protected abstract Operation newBackupExpiryOp(S store, Collection<ExpiredKey> expiredKeys);
public abstract void tryToSendBackupExpiryOp(S store, boolean sendIfAtBatchSize);
public abstract Iterator<S> storeIterator(T container);
public boolean isCleanupEnabled() {
return cleanupEnabled;
}
/**
* Used when traversing partitions. Map needs to traverse both
* backup and primary partitions due to catch ttl expired entries
* but Cache only needs primary ones.
*/
protected enum ProcessablePartitionType {
PRIMARY_PARTITION {
@Override
boolean isProcessable(IPartition partition, Address address) {
return partition.isLocal();
}
},
PRIMARY_OR_BACKUP_PARTITION {
@Override
boolean isProcessable(IPartition partition, Address address) {
return partition.isOwnerOrBackup(address);
}
};
abstract boolean isProcessable(IPartition partition, Address address);
}
}