Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #142 (multiple text events for long segments if requested) #146

Merged
merged 1 commit into from Apr 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/main/java/com/ctc/wstx/api/ReaderConfig.java
Expand Up @@ -818,6 +818,18 @@ public boolean hasInternNsURIsBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NS_URIS);
}

/**
* Checks if the user explicitly set coalescing to false. (That is if
* coalescing is disabled only because that is the default value, this method
* will return false.)
*
* @return true, if the user explicitly disabled coalescing, else false
*/
public boolean isCoalescingExplicitlyDisabled() {
// coalescing is disabled and was explicitly set by user
return !_hasConfigFlag(CFG_COALESCE_TEXT) && (mConfigFlagMods & CFG_COALESCE_TEXT) != 0;
}

/*
///////////////////////////////////////////////////////////////////////
// Simple mutators
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/ctc/wstx/sr/BasicStreamReader.java
Expand Up @@ -434,10 +434,10 @@ protected BasicStreamReader(InputBootstrapper bs,
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mStTextThreshold = TOKEN_PARTIAL_SINGLE;
if (forER) {
if (forER && !cfg.isCoalescingExplicitlyDisabled()) {
/* 30-Sep-2005, TSa: No point in returning runt segments for event readers
* (due to event object overhead, less convenient); let's just force
* returning of full length segments.
* returning of full length segments. (Unless explicitly requested.)
*/
mShortestTextSegment = Integer.MAX_VALUE;
} else {
Expand Down
60 changes: 33 additions & 27 deletions src/test/java/wstxtest/evt/TestEventReader.java
Expand Up @@ -24,11 +24,11 @@
* but it creates class of non-checked exceptions used to wrap real
* stream exceptions)
* </li>
* <li>Event readers always read the full text segment, instead of returning
* fragments (ie. min. segment length will be replace with MAX_INT). This
* is done for more convenient access, as well as since the overhead of
* multiple Event objects may outweigh potential benefits from returning
* shorter segments.
* <li>Unless coalesce is explicitly set to false, event readers always read
* the full text segment, instead of returning fragments (ie. min. segment
* length will be replace with MAX_INT). This is done for more convenient
* access, as well as since the overhead of multiple Event objects may
* outweigh potential benefits from returning shorter segments.
* </li>
*</ul>
*/
Expand Down Expand Up @@ -87,25 +87,20 @@ public void testEventReaderLongSegments()
+" not sure If we\r\nreally need anything much more but"
+" let's still make this longer"
+"</root>";
;

// Need to disable coalescing though for test to work:
XMLEventReader er = getReader(XML, false);
XMLEvent evt = er.nextEvent(); // start document
assertTrue(evt.isStartDocument());
assertTrue(er.nextEvent().isStartElement());
assertTrue(er.nextEvent().isCharacters());

evt = er.nextEvent();
if (evt.isEndElement()) {
; // good
} else {
if (evt.isCharacters()) {
fail("Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 adjacent separate Characters events.");
} else { // hmmh. strange
fail("Unexpected event object type after CHARACTERS: "+evt.getClass());
}
}

// Single text event expected (default value, explicit coalescing=true):

String message = "Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 separate Characters events.";
// the default behaviour for event readers is to not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, null)));
// if coalescing is set to true event readers do not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, true)));

// Multiple text events expected (explicit coalescing=false):

// if coalescing is explicitly set to false, multiple text events may be returned for a text segment
String messageMultiple = "If coalescing is set to false, multiple text events are expected for this input xml.";
assertTrue(messageMultiple, numTextEvents(getReader(XML, false)) > 1);
}

/**
Expand Down Expand Up @@ -150,17 +145,28 @@ public void testDtdNotations()
// Internal methods
//////////////////////////////////////////////////////
*/

private XMLEventReader2 getReader(String contents, boolean coalescing)
private XMLEventReader2 getReader(String contents, Boolean coalescing)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
setCoalescing(f, coalescing);
if (coalescing != null) {
setCoalescing(f, coalescing);
}
setLazyParsing(f, true); // shouldn't have effect for event readers!
setMinTextSegment(f, 8); // likewise
return constructEventReader(f, contents);
}

private int numTextEvents(XMLEventReader er) throws XMLStreamException {
int numTextEvents = 0;
while (er.hasNext()) {
if (er.nextEvent().isCharacters()) {
numTextEvents++;
}
}
return numTextEvents;
}
}