-
-
Notifications
You must be signed in to change notification settings - Fork 81
/
TestEventReader.java
172 lines (151 loc) · 6.76 KB
/
TestEventReader.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
package wstxtest.evt;
import java.net.URL;
import java.util.*;
import javax.xml.stream.*;
import javax.xml.stream.events.DTD;
import javax.xml.stream.events.XMLEvent;
import org.codehaus.stax2.XMLEventReader2;
import org.codehaus.stax2.evt.NotationDeclaration2;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.exc.*;
/**
* Set of unit tests that verify that Woodstox implementation of
* {@link XMLEventReader} does obey additional constraints Woodstox
* guarantees. Specifically:
*<ul>
* <li>Event readers never read things in lazy manner: even if lazy parsing
* is enabled. (this restriction is added since lazy parsing does not
* significantly benefit Event API since there's no way to skip events,
* but it creates class of non-checked exceptions used to wrap real
* stream exceptions)
* </li>
* <li>Unless coalesce is explicitly set to false, event readers always read
* the full text segment, instead of returning fragments (ie. min. segment
* length will be replace with MAX_INT). This is done for more convenient
* access, as well as since the overhead of multiple Event objects may
* outweigh potential benefits from returning shorter segments.
* </li>
*</ul>
*/
public class TestEventReader
extends wstxtest.BaseWstxTest
{
public void testEventReaderNonLaziness()
throws XMLStreamException
{
/* We can test this by forcing coalescing to happen, and injecting
* an intentional error after first two segments. In lazy mode,
* coalescing is done not when event type is fetched, but only
* when getText() is called. In non-lazy mode, it's thrown right
* from next() method. Although the exact mechanism is hidden by
* the Event API, what we do see is the type of exception we get --
* that should be XMLStreamException, NOT a runtime wrapper instead
* of it.
*/
final String XML =
"<root>Some text and & <![CDATA[also cdata]]> &error;</root>"
;
XMLEventReader er = getReader(XML, true);
XMLEvent evt = er.nextEvent(); // start document
assertTrue(evt.isStartDocument());
assertTrue(er.nextEvent().isStartElement());
// Ok, and now...
try {
evt = er.nextEvent();
// should NOT get this far...
fail("Expected an exception for invalid content: coalescing not workig?");
} catch (WstxParsingException wex) {
// This is correct... parsing exc for entity, hopefully
//System.err.println("GOOD: got "+wex.getClass()+": "+wex);
} catch (WstxException wex2) {
// Unexpected... not a catastrophe, but not right
fail("Should have gotten a non-lazy parsing exception; got non-lazy other wstx exception (why?): "+wex2);
} catch (WstxLazyException lex) {
// Not good...
fail("Should not get a lazy exception via (default) event reader; received: "+lex);
} catch (Throwable t) {
fail("Unexpected excpetion caught: "+t);
}
}
public void testEventReaderLongSegments()
throws XMLStreamException
{
/* Ok. And here we should just check that we do not get 2 adjacent
* separate Characters event. We can try to trigger this by long
* segment and a set of char entities...
*/
final String XML =
"<root>Some text and & also "quoted" stuff..."
+" not sure If we\r\nreally need anything much more but"
+" let's still make this longer"
+"</root>";
// Single text event expected (default value, explicit coalescing=true):
String message = "Even in the absence of coalescing, event reader should not split CHARACTERS segments (Woodstox guarantee): did get 2 separate Characters events.";
// the default behaviour for event readers is to not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, null)));
// if coalescing is set to true event readers do not break text segments into multiple events
assertEquals(message, 1, numTextEvents(getReader(XML, true)));
// Multiple text events expected (explicit coalescing=false):
// if coalescing is explicitly set to false, multiple text events may be returned for a text segment
String messageMultiple = "If coalescing is set to false, multiple text events are expected for this input xml.";
assertTrue(messageMultiple, numTextEvents(getReader(XML, false)) > 1);
}
/**
* As of Stax 3.0 (Woodstox 4.0+), there is additional info for
* NotationDeclarations (base URI). Let's verify it gets properly
* populated.
*/
public void testDtdNotations()
throws Exception
{
final String URI = "http://test";
/* Ok. And here we should just check that we do not get 2 adjacent
* separate Characters event. We can try to trigger this by long
* segment and a set of char entities...
*/
final String XML = "<?xml version='1.0'?>"
+"<!DOCTYPE root [\n"
+"<!ELEMENT root EMPTY>\n"
+"<!NOTATION not PUBLIC 'some-public-id'>\n"
+"]>"
+"<root/>";
// Need to disable coalescing though for test to work:
XMLEventReader2 er = getReader(XML, false);
// Need to set Base URI; can do it for factory or instance
er.setProperty(WstxInputProperties.P_BASE_URL, new URL(URI));
assertTrue(er.nextEvent().isStartDocument());
XMLEvent evt = er.nextEvent(); // DTD
assertTokenType(DTD, evt.getEventType());
DTD dtd = (DTD) evt;
List<?> nots = dtd.getNotations();
assertEquals(1, nots.size());
NotationDeclaration2 notDecl = (NotationDeclaration2) nots.get(0);
assertEquals(URI, notDecl.getBaseURI());
}
/*
//////////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////////
*/
private XMLEventReader2 getReader(String contents, Boolean coalescing)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
if (coalescing != null) {
setCoalescing(f, coalescing);
}
setLazyParsing(f, true); // shouldn't have effect for event readers!
setMinTextSegment(f, 8); // likewise
return constructEventReader(f, contents);
}
private int numTextEvents(XMLEventReader er) throws XMLStreamException {
int numTextEvents = 0;
while (er.hasNext()) {
if (er.nextEvent().isCharacters()) {
numTextEvents++;
}
}
return numTextEvents;
}
}