S3: Download files in 1GiB chunks to reduce memory pressure.

Signed-off-by: Pascal Spörri <psp@zurich.ibm.com>
IBM · May 17, 2024 · 7103943 · 7103943
1 parent 9ebdce4
commit 7103943
Showing 1 changed file with 17 additions and 2 deletions.
diff --git a/src/s3/S3Endpoint.cpp b/src/s3/S3Endpoint.cpp
@@ -296,8 +296,23 @@ absl::Status Endpoint::putObject(const std::string &bucket, const std::string &k
 absl::StatusOr<size_t> Endpoint::readBytes(const std::string &bucket, const std::string &key,
                                            uint8_t *bytes, size_t position, size_t length) const {
 
-  auto stream = utility::ByteIOStream(bytes, length);
-  return read(bucket, key, stream, position, length);
+  size_t count = 0;
+  while (count < length) {
+    // Only request 1GiB chunks at a time.
+    auto request = std::min((size_t)1024 * 1024 * 1024, length - count);
+    auto stream = utility::ByteIOStream(&bytes[count], request);
+    auto status = read(bucket, key, stream, position + count, request);
+    if (!status.ok()) {
+      return status;
+    }
+    if (*status == 0) {
+      LOG_WARNING("Unexpected length for ", bucket, "/", key, ": Requested ", length, " (at pos ",
+                  position, ") but got ", count, "!");
+      break;
+    }
+    count += *status;
+  }
+  return count;
 }
 
 absl::StatusOr<size_t> Endpoint::read(const std::string &bucket, const std::string &key,