From 22b17c197b5c007b78f2f1babd44c9cdb9c3b98e Mon Sep 17 00:00:00 2001 From: johannesherr Date: Sat, 30 Apr 2022 04:45:54 +0200 Subject: [PATCH] Fix #142 (multiple text events for long segments if requested) (#146) --- .../java/com/ctc/wstx/api/ReaderConfig.java | 12 ++++ .../com/ctc/wstx/sr/BasicStreamReader.java | 4 +- .../java/wstxtest/evt/TestEventReader.java | 60 ++++++++++--------- 3 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/ctc/wstx/api/ReaderConfig.java b/src/main/java/com/ctc/wstx/api/ReaderConfig.java index df7588d5..56ec3e78 100644 --- a/src/main/java/com/ctc/wstx/api/ReaderConfig.java +++ b/src/main/java/com/ctc/wstx/api/ReaderConfig.java @@ -818,6 +818,18 @@ public boolean hasInternNsURIsBeenEnabled() { return _hasExplicitConfigFlag(CFG_INTERN_NS_URIS); } + /** + * Checks if the user explicitly set coalescing to false. (That is if + * coalescing is disabled only because that is the default value, this method + * will return false.) + * + * @return true, if the user explicitly disabled coalescing, else false + */ + public boolean isCoalescingExplicitlyDisabled() { + // coalescing is disabled and was explicitly set by user + return !_hasConfigFlag(CFG_COALESCE_TEXT) && (mConfigFlagMods & CFG_COALESCE_TEXT) != 0; + } + /* /////////////////////////////////////////////////////////////////////// // Simple mutators diff --git a/src/main/java/com/ctc/wstx/sr/BasicStreamReader.java b/src/main/java/com/ctc/wstx/sr/BasicStreamReader.java index f9dfe268..4378eefe 100644 --- a/src/main/java/com/ctc/wstx/sr/BasicStreamReader.java +++ b/src/main/java/com/ctc/wstx/sr/BasicStreamReader.java @@ -434,10 +434,10 @@ protected BasicStreamReader(InputBootstrapper bs, mShortestTextSegment = Integer.MAX_VALUE; } else { mStTextThreshold = TOKEN_PARTIAL_SINGLE; - if (forER) { + if (forER && !cfg.isCoalescingExplicitlyDisabled()) { /* 30-Sep-2005, TSa: No point in returning runt segments for event readers * (due to event object overhead, less convenient); let's just force - * returning of full length segments. + * returning of full length segments. (Unless explicitly requested.) */ mShortestTextSegment = Integer.MAX_VALUE; } else { diff --git a/src/test/java/wstxtest/evt/TestEventReader.java b/src/test/java/wstxtest/evt/TestEventReader.java index 32d2519a..f05b95fa 100644 --- a/src/test/java/wstxtest/evt/TestEventReader.java +++ b/src/test/java/wstxtest/evt/TestEventReader.java @@ -24,11 +24,11 @@ * but it creates class of non-checked exceptions used to wrap real * stream exceptions) * - *
  • Event readers always read the full text segment, instead of returning - * fragments (ie. min. segment length will be replace with MAX_INT). This - * is done for more convenient access, as well as since the overhead of - * multiple Event objects may outweigh potential benefits from returning - * shorter segments. + *
  • Unless coalesce is explicitly set to false, event readers always read + * the full text segment, instead of returning fragments (ie. min. segment + * length will be replace with MAX_INT). This is done for more convenient + * access, as well as since the overhead of multiple Event objects may + * outweigh potential benefits from returning shorter segments. *
  • * */ @@ -87,25 +87,20 @@ public void testEventReaderLongSegments() +" not sure If we\r\nreally need anything much more but" +" let's still make this longer" +""; - ; - - // Need to disable coalescing though for test to work: - XMLEventReader er = getReader(XML, false); - XMLEvent evt = er.nextEvent(); // start document - assertTrue(evt.isStartDocument()); - assertTrue(er.nextEvent().isStartElement()); - assertTrue(er.nextEvent().isCharacters()); - - evt = er.nextEvent(); - if (evt.isEndElement()) { - ; // good - } else { - if (evt.isCharacters()) { - fail("Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 adjacent separate Characters events."); - } else { // hmmh. strange - fail("Unexpected event object type after CHARACTERS: "+evt.getClass()); - } - } + + // Single text event expected (default value, explicit coalescing=true): + + String message = "Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 separate Characters events."; + // the default behaviour for event readers is to not break text segments into multiple events + assertEquals(message, 1, numTextEvents(getReader(XML, null))); + // if coalescing is set to true event readers do not break text segments into multiple events + assertEquals(message, 1, numTextEvents(getReader(XML, true))); + + // Multiple text events expected (explicit coalescing=false): + + // if coalescing is explicitly set to false, multiple text events may be returned for a text segment + String messageMultiple = "If coalescing is set to false, multiple text events are expected for this input xml."; + assertTrue(messageMultiple, numTextEvents(getReader(XML, false)) > 1); } /** @@ -150,17 +145,28 @@ public void testDtdNotations() // Internal methods ////////////////////////////////////////////////////// */ - - private XMLEventReader2 getReader(String contents, boolean coalescing) + private XMLEventReader2 getReader(String contents, Boolean coalescing) throws XMLStreamException { XMLInputFactory f = getInputFactory(); setNamespaceAware(f, true); - setCoalescing(f, coalescing); + if (coalescing != null) { + setCoalescing(f, coalescing); + } setLazyParsing(f, true); // shouldn't have effect for event readers! setMinTextSegment(f, 8); // likewise return constructEventReader(f, contents); } + + private int numTextEvents(XMLEventReader er) throws XMLStreamException { + int numTextEvents = 0; + while (er.hasNext()) { + if (er.nextEvent().isCharacters()) { + numTextEvents++; + } + } + return numTextEvents; + } }