From ccbd65f3bb10922b89f381c3141c11a10c68ee5c Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Tue, 17 May 2022 20:30:06 +1000 Subject: [PATCH] In readToByteBuffer, read to max, not the internal buffer size Fixes #1774. Regressed by #1671. --- CHANGES | 6 ++++++ .../internal/ConstrainableInputStream.java | 9 +++------ .../org/jsoup/integration/ConnectTest.java | 20 +++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index 2d1e175149..e49ef25ddb 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,11 @@ jsoup changelog +*** Release 1.15.2 [PENDING] + * Bugfix: when using the readToByteBuffer method, such as in Connection.Response.body(), if the document has not + already been parsed and must be read fully, and there is any maximum buffer size being applied, only the default + internal buffer size is read. + + *** Release 1.15.1 [2022-May-15] * Change: removed previously deprecated methods and classes (including org.jsoup.safety.Whitelist; use org.jsoup.safety.Safelist instead). diff --git a/src/main/java/org/jsoup/internal/ConstrainableInputStream.java b/src/main/java/org/jsoup/internal/ConstrainableInputStream.java index 0f7e3c3b88..5b6491363e 100644 --- a/src/main/java/org/jsoup/internal/ConstrainableInputStream.java +++ b/src/main/java/org/jsoup/internal/ConstrainableInputStream.java @@ -81,17 +81,14 @@ public ByteBuffer readToByteBuffer(int max) throws IOException { final ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize); int read; - int remaining = bufferSize; - while (true) { - read = read(readBuffer, 0, remaining); + read = read(readBuffer, 0, bufferSize); if (read == -1) break; if (localCapped) { // this local byteBuffer cap may be smaller than the overall maxSize (like when reading first bytes) - if (read >= remaining) { - outStream.write(readBuffer, 0, remaining); + if (read >= max) { + outStream.write(readBuffer, 0, max); break; } - remaining -= read; } outStream.write(readBuffer, 0, read); } diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java index 28e79dac60..42b4f950e1 100644 --- a/src/test/java/org/jsoup/integration/ConnectTest.java +++ b/src/test/java/org/jsoup/integration/ConnectTest.java @@ -671,4 +671,24 @@ public void maxBodySize() throws IOException { assertEquals("Large HTML", doc1.title()); assertEquals("Large HTML", doc2.title()); } + + @Test + public void maxBodySizeInReadToByteBuffer() throws IOException { + // https://github.com/jhy/jsoup/issues/1774 + // when calling readToByteBuffer, contents were not buffered up + String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K + + Connection.Response defaultRes = Jsoup.connect(url).execute(); + Connection.Response smallRes = Jsoup.connect(url).maxBodySize(50 * 1024).execute(); // crops + Connection.Response mediumRes = Jsoup.connect(url).maxBodySize(200 * 1024).execute(); // crops + Connection.Response largeRes = Jsoup.connect(url).maxBodySize(300 * 1024).execute(); // does not crop + Connection.Response unlimitedRes = Jsoup.connect(url).maxBodySize(0).execute(); + + int actualDocText = 280735; + assertEquals(actualDocText, defaultRes.body().length()); + assertEquals(50 * 1024, smallRes.body().length()); + assertEquals(200 * 1024, mediumRes.body().length()); + assertEquals(actualDocText, largeRes.body().length()); + assertEquals(actualDocText, unlimitedRes.body().length()); + } }