From 4a555aaef155a4913340065937bb98649acd7a65 Mon Sep 17 00:00:00 2001 From: Brian Clozel Date: Mon, 5 Dec 2022 11:45:07 +0100 Subject: [PATCH] Reduce heap memory usage in ConcurrentLruCache Prior to this commit, the `ConcurrentLruCache` implementation would use arrays of `AtomicReference` as operation buffers, and the buffer count would be calculated with the nearest power of two for the CPU count. This can result in significant heap memory usage as each `AtomicReference` buffer entry adds to the memory pressure. As seen in FasterXML/jackson-databind#3665, this can add a significant overhead for no real added benefit for the current use case. This commit changes the current implementation to use `AtomicReferenceArray` as buffers and reduce the number of buffers. JMH benchmarks results are within the error margin so we can assume that this does not change the performance characteristics for the typical use case in Spring Framework. Fixes gh-29520 --- .../util/ConcurrentLruCache.java | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java b/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java index 2e5f19e17613..8e93e2449851 100644 --- a/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java +++ b/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java @@ -21,8 +21,9 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicLongArray; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; @@ -31,7 +32,7 @@ /** * Simple LRU (Least Recently Used) cache, bounded by a specified cache capacity. - *

This is a simplified, opinionated implementation of a LRU cache for internal + *

This is a simplified, opinionated implementation of an LRU cache for internal * use in Spring Framework. It is inspired from * ConcurrentLinkedHashMap. *

Read and write operations are internally recorded in dedicated buffers, @@ -358,7 +359,8 @@ private static final class ReadOperations { private static int detectNumberOfBuffers() { int availableProcessors = Runtime.getRuntime().availableProcessors(); - return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(availableProcessors - 1)); + int nextPowerOfTwo = 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(availableProcessors - 1)); + return Math.min(4, nextPowerOfTwo); } private static final int BUFFERS_MASK = BUFFER_COUNT - 1; @@ -374,7 +376,7 @@ private static int detectNumberOfBuffers() { /* * Number of operations recorded, for each buffer */ - private final AtomicLong[] recordedCount = new AtomicLong[BUFFER_COUNT]; + private final AtomicLongArray recordedCount = new AtomicLongArray(BUFFER_COUNT); /* * Number of operations read, for each buffer @@ -384,10 +386,10 @@ private static int detectNumberOfBuffers() { /* * Number of operations processed, for each buffer */ - private final AtomicLong[] processedCount = new AtomicLong[BUFFER_COUNT]; + private final AtomicLongArray processedCount = new AtomicLongArray(BUFFER_COUNT); @SuppressWarnings("rawtypes") - private final AtomicReference>[][] buffers = new AtomicReference[BUFFER_COUNT][BUFFER_SIZE]; + private final AtomicReferenceArray>[] buffers = new AtomicReferenceArray[BUFFER_COUNT]; private final EvictionQueue evictionQueue; @@ -395,12 +397,7 @@ private static int detectNumberOfBuffers() { ReadOperations(EvictionQueue evictionQueue) { this.evictionQueue = evictionQueue; for (int i = 0; i < BUFFER_COUNT; i++) { - this.recordedCount[i] = new AtomicLong(); - this.processedCount[i] = new AtomicLong(); - this.buffers[i] = new AtomicReference[BUFFER_SIZE]; - for (int j = 0; j < BUFFER_SIZE; j++) { - this.buffers[i][j] = new AtomicReference<>(); - } + this.buffers[i] = new AtomicReferenceArray<>(BUFFER_SIZE); } } @@ -410,12 +407,11 @@ private static int getBufferIndex() { boolean recordRead(Node node) { int bufferIndex = getBufferIndex(); - final AtomicLong counter = this.recordedCount[bufferIndex]; - final long writeCount = counter.get(); - counter.lazySet(writeCount + 1); + final long writeCount = this.recordedCount.get(bufferIndex); + this.recordedCount.lazySet(bufferIndex, writeCount + 1); final int index = (int) (writeCount & BUFFER_INDEX_MASK); - this.buffers[bufferIndex][index].lazySet(node); - final long pending = (writeCount - this.processedCount[bufferIndex].get()); + this.buffers[bufferIndex].lazySet(index, node); + final long pending = (writeCount - this.processedCount.get(bufferIndex)); return (pending < MAX_PENDING_OPERATIONS); } @@ -428,27 +424,28 @@ void drain() { } void clear() { - for (AtomicReference>[] buffer : this.buffers) { - for (AtomicReference> slot : buffer) { - slot.lazySet(null); + for (int i = 0; i < BUFFER_COUNT; i++) { + AtomicReferenceArray> buffer = this.buffers[i]; + for (int j = 0; j < BUFFER_SIZE; j++) { + buffer.lazySet(j, null); } } } private void drainReadBuffer(int bufferIndex) { - final long writeCount = this.recordedCount[bufferIndex].get(); + final long writeCount = this.recordedCount.get(bufferIndex); for (int i = 0; i < MAX_DRAIN_COUNT; i++) { final int index = (int) (this.readCount[bufferIndex] & BUFFER_INDEX_MASK); - final AtomicReference> slot = this.buffers[bufferIndex][index]; - final Node node = slot.get(); + final AtomicReferenceArray> buffer = this.buffers[bufferIndex]; + final Node node = buffer.get(index); if (node == null) { break; } - slot.lazySet(null); + buffer.lazySet(index, null); this.evictionQueue.moveToBack(node); this.readCount[bufferIndex]++; } - this.processedCount[bufferIndex].lazySet(writeCount); + this.processedCount.lazySet(bufferIndex, writeCount); } }