From 4a555aaef155a4913340065937bb98649acd7a65 Mon Sep 17 00:00:00 2001
From: Brian Clozel <bclozel@vmware.com>
Date: Mon, 5 Dec 2022 11:45:07 +0100
Subject: [PATCH] Reduce heap memory usage in ConcurrentLruCache

Prior to this commit, the `ConcurrentLruCache` implementation would use
arrays of `AtomicReference` as operation buffers, and the buffer count
would be calculated with the nearest power of two for the CPU count.

This can result in significant heap memory usage as each
`AtomicReference` buffer entry adds to the memory pressure. As seen in
FasterXML/jackson-databind#3665, this can add a significant overhead for
no real added benefit for the current use case.

This commit changes the current implementation to use
`AtomicReferenceArray` as buffers and reduce the number of buffers.
JMH benchmarks results are within the error margin so we can assume that
this does not change the performance characteristics for the typical use
case in Spring Framework.

Fixes gh-29520
---
 .../util/ConcurrentLruCache.java              | 47 +++++++++----------
 1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java b/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java
index 2e5f19e17613..8e93e2449851 100644
--- a/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java
+++ b/spring-core/src/main/java/org/springframework/util/ConcurrentLruCache.java
@@ -21,8 +21,9 @@
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicLongArray;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.atomic.AtomicReferenceArray;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Function;
@@ -31,7 +32,7 @@
 
 /**
  * Simple LRU (Least Recently Used) cache, bounded by a specified cache capacity.
- * <p>This is a simplified, opinionated implementation of a LRU cache for internal
+ * <p>This is a simplified, opinionated implementation of an LRU cache for internal
  * use in Spring Framework. It is inspired from
  * <a href="https://github.com/ben-manes/concurrentlinkedhashmap">ConcurrentLinkedHashMap</a>.
  * <p>Read and write operations are internally recorded in dedicated buffers,
@@ -358,7 +359,8 @@ private static final class ReadOperations<K, V> {
 
 		private static int detectNumberOfBuffers() {
 			int availableProcessors = Runtime.getRuntime().availableProcessors();
-			return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(availableProcessors - 1));
+			int nextPowerOfTwo = 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(availableProcessors - 1));
+			return Math.min(4, nextPowerOfTwo);
 		}
 
 		private static final int BUFFERS_MASK = BUFFER_COUNT - 1;
@@ -374,7 +376,7 @@ private static int detectNumberOfBuffers() {
 		/*
 		 * Number of operations recorded, for each buffer
 		 */
-		private final AtomicLong[] recordedCount = new AtomicLong[BUFFER_COUNT];
+		private final AtomicLongArray recordedCount = new AtomicLongArray(BUFFER_COUNT);
 
 		/*
 		 * Number of operations read, for each buffer
@@ -384,10 +386,10 @@ private static int detectNumberOfBuffers() {
 		/*
 		 * Number of operations processed, for each buffer
 		 */
-		private final AtomicLong[] processedCount = new AtomicLong[BUFFER_COUNT];
+		private final AtomicLongArray processedCount = new AtomicLongArray(BUFFER_COUNT);
 
 		@SuppressWarnings("rawtypes")
-		private final AtomicReference<Node<K, V>>[][] buffers = new AtomicReference[BUFFER_COUNT][BUFFER_SIZE];
+		private final AtomicReferenceArray<Node<K, V>>[] buffers = new AtomicReferenceArray[BUFFER_COUNT];
 
 		private final EvictionQueue<K, V> evictionQueue;
 
@@ -395,12 +397,7 @@ private static int detectNumberOfBuffers() {
 		ReadOperations(EvictionQueue<K, V> evictionQueue) {
 			this.evictionQueue = evictionQueue;
 			for (int i = 0; i < BUFFER_COUNT; i++) {
-				this.recordedCount[i] = new AtomicLong();
-				this.processedCount[i] = new AtomicLong();
-				this.buffers[i] = new AtomicReference[BUFFER_SIZE];
-				for (int j = 0; j < BUFFER_SIZE; j++) {
-					this.buffers[i][j] = new AtomicReference<>();
-				}
+				this.buffers[i] = new AtomicReferenceArray<>(BUFFER_SIZE);
 			}
 		}
 
@@ -410,12 +407,11 @@ private static int getBufferIndex() {
 
 		boolean recordRead(Node<K, V> node) {
 			int bufferIndex = getBufferIndex();
-			final AtomicLong counter = this.recordedCount[bufferIndex];
-			final long writeCount = counter.get();
-			counter.lazySet(writeCount + 1);
+			final long writeCount = this.recordedCount.get(bufferIndex);
+			this.recordedCount.lazySet(bufferIndex, writeCount + 1);
 			final int index = (int) (writeCount & BUFFER_INDEX_MASK);
-			this.buffers[bufferIndex][index].lazySet(node);
-			final long pending = (writeCount - this.processedCount[bufferIndex].get());
+			this.buffers[bufferIndex].lazySet(index, node);
+			final long pending = (writeCount - this.processedCount.get(bufferIndex));
 			return (pending < MAX_PENDING_OPERATIONS);
 		}
 
@@ -428,27 +424,28 @@ void drain() {
 		}
 
 		void clear() {
-			for (AtomicReference<Node<K, V>>[] buffer : this.buffers) {
-				for (AtomicReference<Node<K, V>> slot : buffer) {
-					slot.lazySet(null);
+			for (int i = 0; i < BUFFER_COUNT; i++) {
+				AtomicReferenceArray<Node<K, V>> buffer = this.buffers[i];
+				for (int j = 0; j < BUFFER_SIZE; j++) {
+					buffer.lazySet(j, null);
 				}
 			}
 		}
 
 		private void drainReadBuffer(int bufferIndex) {
-			final long writeCount = this.recordedCount[bufferIndex].get();
+			final long writeCount = this.recordedCount.get(bufferIndex);
 			for (int i = 0; i < MAX_DRAIN_COUNT; i++) {
 				final int index = (int) (this.readCount[bufferIndex] & BUFFER_INDEX_MASK);
-				final AtomicReference<Node<K, V>> slot = this.buffers[bufferIndex][index];
-				final Node<K, V> node = slot.get();
+				final AtomicReferenceArray<Node<K, V>> buffer = this.buffers[bufferIndex];
+				final Node<K, V> node = buffer.get(index);
 				if (node == null) {
 					break;
 				}
-				slot.lazySet(null);
+				buffer.lazySet(index, null);
 				this.evictionQueue.moveToBack(node);
 				this.readCount[bufferIndex]++;
 			}
-			this.processedCount[bufferIndex].lazySet(writeCount);
+			this.processedCount.lazySet(bufferIndex, writeCount);
 		}
 	}