-
-
Notifications
You must be signed in to change notification settings - Fork 81
/
BasicStreamReader.java
5641 lines (5173 loc) · 211 KB
/
BasicStreamReader.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sr;
import java.io.*;
import java.text.MessageFormat;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.AttributeInfo;
import org.codehaus.stax2.DTDInfo;
import org.codehaus.stax2.LocationInfo;
import org.codehaus.stax2.XMLStreamLocation2;
import org.codehaus.stax2.XMLStreamReader2;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.dtd.MinimalDTDReader;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.exc.WstxException;
import com.ctc.wstx.io.*;
import com.ctc.wstx.util.DefaultXmlSymbolTable;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.TextBuffer;
import com.ctc.wstx.util.TextBuilder;
/**
* Partial implementation of {@link XMLStreamReader2} consisting of
* all functionality other than DTD-validation-specific parts, and
* Typed Access API (Stax2 v3.0), which are implemented at
* sub-classes.
*
* @author Tatu Saloranta
*/
public abstract class BasicStreamReader
extends StreamScanner
implements StreamReaderImpl, DTDInfo, LocationInfo
{
/*
///////////////////////////////////////////////////////////////////////
// Constants
///////////////////////////////////////////////////////////////////////
*/
// // // Standalone values:
final static int DOC_STANDALONE_UNKNOWN = 0;
final static int DOC_STANDALONE_YES = 1;
final static int DOC_STANDALONE_NO = 2;
// // // Main state consts:
final static int STATE_PROLOG = 0; // Before root element
final static int STATE_TREE = 1; // Parsing actual XML tree
final static int STATE_EPILOG = 2; // After root element has been closed
final static int STATE_MULTIDOC_HACK = 3; // State "between" multiple documents (in multi-doc mode)
final static int STATE_CLOSED = 4; // After reader has been closed
// // // Tokenization state consts:
// no idea as to what comes next (unknown type):
final static int TOKEN_NOT_STARTED = 0;
// token type figured out, but not long enough:
final static int TOKEN_STARTED = 1;
/* minimum token length returnable achieved; only used for
* CHARACTERS event which allow fragments to be returned (and for
* CDATA in some limited cases)
*/
final static int TOKEN_PARTIAL_SINGLE = 2;
/* a single physical event has been successfully tokenized; as with
* partial, only used with CDATA and CHARACTERS (meaningless for others,
* which should only use TOKEN_FULL_COALESCED, TOKEN_NOT_STARTED or
* TOKEN_STARTED.
*/
final static int TOKEN_FULL_SINGLE = 3;
/* all adjacent (text) events have been tokenized and coalesced (for
* CDATA and CHARACTERS), or that the full event has been parsed (for
* others)
*/
final static int TOKEN_FULL_COALESCED = 4;
// // // Bit masks used for quick type comparisons
/**
* This mask covers all types for which basic {@link #getText} method
* can be called.
*/
final protected static int MASK_GET_TEXT =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE);
/**
* This mask covers all types for which extends <code>getTextXxx</code>
* methods can be called; which is less than those for which
* {@link #getText} can be called. Specifically, <code>DTD</code> and
* <code>ENTITY_REFERENCE</code> types do not support these extended
*/
final protected static int MASK_GET_TEXT_XXX =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE) | (1 << COMMENT);
/**
* This mask is used with Stax2 getText() method (one that takes
* Writer as an argument): accepts even wider range of event types.
*/
final protected static int MASK_GET_TEXT_WITH_WRITER =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE)
| (1 << PROCESSING_INSTRUCTION);
final protected static int MASK_GET_ELEMENT_TEXT =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << ENTITY_REFERENCE);
// // // Indicator of type of text in text event (WRT white space)
final static int ALL_WS_UNKNOWN = 0x0000;
final static int ALL_WS_YES = 0x0001;
final static int ALL_WS_NO = 0x0002;
/* 2 magic constants used for enabling/disabling indentation checks:
* (to minimize negative impact for both small docs, and large
* docs with non-regular white space)
*/
private final static int INDENT_CHECK_START = 16;
private final static int INDENT_CHECK_MAX = 40;
// // // Shared namespace symbols
final protected static String sPrefixXml = DefaultXmlSymbolTable.getXmlSymbol();
final protected static String sPrefixXmlns = DefaultXmlSymbolTable.getXmlnsSymbol();
/*
///////////////////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////////////////
*/
// note: mConfig defined in base class
/**
* Set of locally stored configuration flags
*/
protected final int mConfigFlags;
// // // Various extracted settings:
protected final boolean mCfgCoalesceText;
protected final boolean mCfgReportTextAsChars;
protected final boolean mCfgLazyParsing;
/**
* Minimum number of characters parser can return as partial text
* segment, IF it's not required to coalesce adjacent text
* segments.
*/
protected final int mShortestTextSegment;
/*
///////////////////////////////////////////////////////////////////////
// Symbol handling
///////////////////////////////////////////////////////////////////////
*/
/**
* Object to notify about shared stuff, such as symbol tables, as well
* as to query for additional config settings if necessary.
*/
final protected ReaderCreator mOwner;
/*
///////////////////////////////////////////////////////////////////////
// Additional XML document information, in addition to what StreamScanner has
///////////////////////////////////////////////////////////////////////
*/
/**
* Status about "stand-aloneness" of document; set to 'yes'/'no'/'unknown'
* based on whether there was xml declaration, and if so, whether
* it had standalone attribute.
*/
protected int mDocStandalone = DOC_STANDALONE_UNKNOWN;
/*
///////////////////////////////////////////////////////////////////////
// DOCTYPE information from document type declaration (if any found)
///////////////////////////////////////////////////////////////////////
*/
/**
* Prefix of root element, as dictated by DOCTYPE declaration; null
* if no DOCTYPE declaration, or no root prefix
*/
protected String mRootPrefix;
/**
* Local name of root element, as dictated by DOCTYPE declaration; null
* if no DOCTYPE declaration.
*/
protected String mRootLName;
/**
* Public id of the DTD, if one exists and has been parsed.
*/
protected String mDtdPublicId;
/**
* System id of the DTD, if one exists and has been parsed.
*/
protected String mDtdSystemId;
/*
///////////////////////////////////////////////////////////////////////
// Information about currently open subtree, content
///////////////////////////////////////////////////////////////////////
*/
/**
* TextBuffer mostly used to collect non-element textual content
* (text, CDATA, comment content, pi data)
*/
final protected TextBuffer mTextBuffer;
/**
* Currently open element tree
*/
final protected InputElementStack mElementStack;
/**
* Object that stores information about currently accessible attributes.
*/
final protected AttributeCollector mAttrCollector;
/*
///////////////////////////////////////////////////////////////////////
// Tokenization state
///////////////////////////////////////////////////////////////////////
*/
/// Flag set when DOCTYPE declaration has been parsed
protected boolean mStDoctypeFound = false;
/**
* State of the current token; one of M_ - constants from above.
*<p>
* Initially set to fully tokenized, since it's the virtual
* START_DOCUMENT event that we fully know by now (parsed by
* bootstrapper)
*/
protected int mTokenState = TOKEN_FULL_COALESCED;
/**
* Threshold value that defines tokenization state that needs to be
* achieved to "finish" current <b>logical</b> text segment (which
* may consist of adjacent CDATA and text segments; or be a complete
* physical segment; or just even a fragment of such a segment)
*/
protected final int mStTextThreshold;
/**
* Sized of currentTextLength for CDATA, CHARACTERS, WHITESPACE.
* When segmenting, this records to size of all the segments
* so we can track if the text length has exceeded limits.
*/
protected int mCurrTextLength;
/// Flag that indicates current start element is an empty element
protected boolean mStEmptyElem = false;
/**
* Main parsing/tokenization state (STATE_xxx)
*/
protected int mParseState;
/**
* Current state of the stream, ie token value returned by
* {@link #getEventType}. Needs to be initialized to START_DOCUMENT,
* since that's the state it starts in.
*/
protected int mCurrToken = START_DOCUMENT;
/**
* Additional information sometimes stored (when generating dummy
* events in multi-doc mode, for example) temporarily when
* {@link #mCurrToken} is already populated.
*/
protected int mSecondaryToken = START_DOCUMENT;
/**
* Status of current (text) token's "whitespaceness", that is,
* whether it is or is not all white space.
*/
protected int mWsStatus;
/**
* Flag that indicates that textual content (CDATA, CHARACTERS) is to
* be validated within current element's scope. Enabled if one of
* validators returns {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT},
* and will prevent lazy parsing of text.
*/
protected boolean mValidateText = false;
/**
* Counter used for determining whether we are to try to heuristically
* "intern" white space that seems to be used for indentation purposes
*/
protected int mCheckIndentation;
/**
* Due to the way Stax API does not allow throwing stream exceptions
* from many methods for which Woodstox would need to throw one
* (especially <code>getText</code> and its variations), we may need
* to delay throwing an exception until {@link #next} is called next
* time. If so, this variable holds the pending stream exception.
*/
protected XMLStreamException mPendingException = null;
/*
///////////////////////////////////////////////////////////////////////
// DTD information (entities, content spec stub)
///////////////////////////////////////////////////////////////////////
*/
/**
* Entities parsed from internal/external DTD subsets. Although it
* will remain null for this class, extended classes make use of it,
* plus, to be able to share some of entity resolution code, instance
* is left here even though it semantically belongs to the sub-class.
*/
protected Map<String, EntityDecl> mGeneralEntities = null;
/**
* Mode information needed at this level; mostly to check what kind
* of textual content (if any) is allowed in current element
* context. Constants come from
* {@link XMLValidator},
* (like {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT}).
* Only used inside tree; ignored for prolog/epilog (which
* have straight-forward static rules).
*/
protected int mVldContent = XMLValidator.CONTENT_ALLOW_ANY_TEXT;
/**
* Configuration from {@code WstxInputProperties#RETURN_NULL_FOR_DEFAULT_NAMESPACE}
*
* @since 4.1.2
*/
protected boolean mReturnNullForDefaultNamespace;
/*
///////////////////////////////////////////////////////////////////////
// Instance construction, initialization
///////////////////////////////////////////////////////////////////////
*/
/**
* @param elemStack Input element stack to use; if null, will create
* instance locally.
* @param forER Override indicator; if true, this stream reader will be
* used by an event reader, and should modify some of the base config
* settings appropriately. If false, configuration settings are to
* be used as is.
*/
protected BasicStreamReader(InputBootstrapper bs,
BranchingReaderSource input, ReaderCreator owner,
ReaderConfig cfg, InputElementStack elemStack,
boolean forER)
throws XMLStreamException
{
super(input, cfg, cfg.getEntityResolver());
mOwner = owner;
mTextBuffer = TextBuffer.createRecyclableBuffer(cfg);
// // // First, configuration settings:
mConfigFlags = cfg.getConfigFlags();
mCfgCoalesceText = (mConfigFlags & CFG_COALESCE_TEXT) != 0;
mCfgReportTextAsChars = (mConfigFlags & CFG_REPORT_CDATA) == 0;
mXml11 = cfg.isXml11();
// Can only use canonical white space if we are normalizing lfs
mCheckIndentation = mNormalizeLFs ? 16 : 0;
/* 30-Sep-2005, TSa: Let's not do lazy parsing when access is via
* Event API. Reason is that there will be no performance benefit
* (event objects always access full info right after traversal),
* but the wrapping of stream exceptions within runtime exception
* wrappers would happen, which is inconvenient (loss of stack trace,
* not catching all exceptions as expected)
*/
mCfgLazyParsing = !forER && ((mConfigFlags & CFG_LAZY_PARSING) != 0);
/* There are a few derived settings used during tokenization that
* need to be initialized now...
*/
if (mCfgCoalesceText) {
mStTextThreshold = TOKEN_FULL_COALESCED;
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mStTextThreshold = TOKEN_PARTIAL_SINGLE;
if (forER && !cfg.isCoalescingExplicitlyDisabled()) {
/* 30-Sep-2005, TSa: No point in returning runt segments for event readers
* (due to event object overhead, less convenient); let's just force
* returning of full length segments. (Unless explicitly requested.)
*/
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mShortestTextSegment = cfg.getShortestReportedTextSegment();
}
}
// // // Then handling of xml declaration data:
mDocXmlVersion = bs.getDeclaredVersion();
mDocInputEncoding = bs.getInputEncoding();
mDocXmlEncoding = bs.getDeclaredEncoding();
String sa = bs.getStandalone();
if (sa == null) {
mDocStandalone = DOC_STANDALONE_UNKNOWN;
} else {
if (XmlConsts.XML_SA_YES.equals(sa)) {
mDocStandalone = DOC_STANDALONE_YES;
} else {
mDocStandalone = DOC_STANDALONE_NO;
}
}
/* Ok; either we got declaration or not, but in either case we can
* now initialize prolog parsing settings, without having to really
* parse anything more.
*/
/* 07-Oct-2005, TSa: Except, if we are in fragment mode, in which
* case we are kind of "in tree" mode...
*/
mParseState = mConfig.inputParsingModeFragment() ?
STATE_TREE : STATE_PROLOG;
// // // And then connecting element stack and attribute collector
mElementStack = elemStack;
mAttrCollector = elemStack.getAttrCollector();
// And finally, location information may have offsets:
input.initInputLocation(this, mCurrDepth, 0);
elemStack.connectReporter(this);
mReturnNullForDefaultNamespace = mConfig.returnNullForDefaultNamespace();
}
protected static InputElementStack createElementStack(ReaderConfig cfg)
{
return new InputElementStack(cfg, cfg.willSupportNamespaces());
}
/*
///////////////////////////////////////////////////////////////////////
// XMLStreamReader, document info
///////////////////////////////////////////////////////////////////////
*/
/**
* As per Stax (1.0) specs, needs to return whatever xml declaration
* claimed encoding is, if any; or null if no xml declaration found.
*<p>
* Note: method name is rather confusing (compare to {@link #getEncoding}).
*/
@Override
public String getCharacterEncodingScheme() {
return mDocXmlEncoding;
}
/**
* As per Stax (1.0) specs, needs to return whatever parser determined
* the encoding was, if it was able to figure it out. If not (there are
* cases where this can not be found; specifically when being passed a
* {@link Reader}), it should return null.
*/
@Override
public String getEncoding() {
return mDocInputEncoding;
}
@Override
public String getVersion()
{
if (mDocXmlVersion == XmlConsts.XML_V_10) {
return XmlConsts.XML_V_10_STR;
}
if (mDocXmlVersion == XmlConsts.XML_V_11) {
return XmlConsts.XML_V_11_STR;
}
return null; // unknown
}
@Override
public boolean isStandalone() {
return mDocStandalone == DOC_STANDALONE_YES;
}
@Override
public boolean standaloneSet() {
return mDocStandalone != DOC_STANDALONE_UNKNOWN;
}
/*
///////////////////////////////////////////////////////////////////////
// Public API, configuration
///////////////////////////////////////////////////////////////////////
*/
@Override
public Object getProperty(String name)
{
/* 18-Nov-2008, TSa: As per [WSTX-50], should report the
* actual Base URL. It can be overridden by matching
* setProperty, but if not, is set to actual source
* of content being parsed.
*/
if (WstxInputProperties.P_BASE_URL.equals(name)) {
try {
return mInput.getSource();
} catch (IOException e) { // not optimal but...
throw new IllegalStateException(e);
}
}
/* 23-Apr-2008, TSa: Let's NOT throw IllegalArgumentException
* for unknown property; JavaDocs do not suggest it needs
* to be done (different from that of XMLInputFactory
* and XMLStreamWriter specification)
*/
return mConfig.safeGetProperty(name);
}
/*
///////////////////////////////////////////////////////////////////////
// XMLStreamReader, current state
///////////////////////////////////////////////////////////////////////
*/
// // // Attribute access:
@Override
public int getAttributeCount() {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
return mAttrCollector.getCount();
}
@Override
public String getAttributeLocalName(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
return mAttrCollector.getLocalName(index);
}
@Override
public QName getAttributeName(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
return mAttrCollector.getQName(index);
}
@Override
public String getAttributeNamespace(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
// Internally it's marked as null, externally need to see ""
String uri = mAttrCollector.getURI(index);
return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri;
}
@Override
public String getAttributePrefix(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
// Internally it's marked as null, externally need to see ""
String p = mAttrCollector.getPrefix(index);
return (p == null) ? XmlConsts.ATTR_NO_PREFIX : p;
}
@Override
public String getAttributeType(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
// Attr. collector doesn't know it, elem stack does:
return mElementStack.getAttributeType(index);
}
@Override
public String getAttributeValue(int index) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
return mAttrCollector.getValue(index);
}
@Override
public String getAttributeValue(String nsURI, String localName) {
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
// 22-Aug-2018, tatu: As per [woodstox-core#53], need different logic
// for `null` namespace URI argument
if (nsURI == null) {
return mAttrCollector.getValueByLocalName(localName);
}
return mAttrCollector.getValue(nsURI, localName);
}
/**
* From StAX specs:
*<blockquote>
* Reads the content of a text-only element, an exception is thrown if
* this is not a text-only element.
* Regardless of value of javax.xml.stream.isCoalescing this method always
* returns coalesced content.
*<br>Precondition: the current event is START_ELEMENT.
*<br>Postcondition: the current event is the corresponding END_ELEMENT.
*</blockquote>
*/
@Override
public String getElementText()
throws XMLStreamException
{
if (mCurrToken != START_ELEMENT) {
throwParseError(ErrorConsts.ERR_STATE_NOT_STELEM, null, null);
}
/* Ok, now: with START_ELEMENT we know that it's not partially
* processed; that we are in-tree (not prolog or epilog).
* The only possible complication would be:
*/
if (mStEmptyElem) {
/* And if so, we'll then get 'virtual' close tag; things
* are simple as location info was set when dealing with
* empty start element; and likewise, validation (if any)
* has been taken care of
*/
mStEmptyElem = false;
mCurrToken = END_ELEMENT;
return "";
}
// First need to find a textual event
while (true) {
int type = next();
if (type == END_ELEMENT) {
return "";
}
if (type == COMMENT || type == PROCESSING_INSTRUCTION) {
continue;
}
if (((1 << type) & MASK_GET_ELEMENT_TEXT) == 0) {
throw _constructUnexpectedInTyped(type);
}
break;
}
if (mTokenState < TOKEN_FULL_COALESCED) {
readCoalescedText(mCurrToken, false);
}
/* Ok: then a quick check; if it looks like we are directly
* followed by the end tag, we need not construct String
* quite yet.
*/
if ((mInputPtr + 1) < mInputEnd &&
mInputBuffer[mInputPtr] == '<' && mInputBuffer[mInputPtr+1] == '/') {
// Note: next() has validated text, no need for more validation
mInputPtr += 2;
mCurrToken = END_ELEMENT;
// must first get text, as call to readEndElem may break it:
String result = mTextBuffer.contentsAsString();
// Can by-pass next(), nextFromTree(), in this case:
readEndElem();
// and then return results
return result;
}
// Otherwise, we'll need to do slower processing
int extra = 1 + (mTextBuffer.size() >> 1); // let's add 50% space
StringBuilder sb = mTextBuffer.contentsAsStringBuilder(extra);
int type;
while ((type = next()) != END_ELEMENT) {
if (((1 << type) & MASK_GET_ELEMENT_TEXT) != 0) {
if (mTokenState < mStTextThreshold) {
finishToken(false);
}
verifyLimit("Text size", mConfig.getMaxTextLength(), sb.length());
mTextBuffer.contentsToStringBuilder(sb);
continue;
}
if (type != COMMENT && type != PROCESSING_INSTRUCTION) {
throw _constructUnexpectedInTyped(type);
}
}
// Note: calls next() have validated text, no need for more validation
return sb.toString();
}
/**
* Returns type of the last event returned; or START_DOCUMENT before
* any events has been explicitly returned.
*/
@Override
public int getEventType()
{
/* Only complication -- multi-part coalesced text is to be reported
* as CHARACTERS always, never as CDATA (StAX specs).
*/
if (mCurrToken == CDATA) {
if (mCfgCoalesceText || mCfgReportTextAsChars) {
return CHARACTERS;
}
}
return mCurrToken;
}
@Override
public String getLocalName()
{
// Note: for this we need not (yet) finish reading element
if (mCurrToken == START_ELEMENT || mCurrToken == END_ELEMENT) {
return mElementStack.getLocalName();
}
if (mCurrToken == ENTITY_REFERENCE) {
/* 30-Sep-2005, TSa: Entity will be null in non-expanding mode
* if no definition was found:
*/
return (mCurrEntity == null) ? mCurrName: mCurrEntity.getName();
}
throw new IllegalStateException("Current state not START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE");
}
// // // getLocation() defined in StreamScanner
@Override
public QName getName()
{
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
return mElementStack.getCurrentElementName();
}
// // // Namespace access
@Override
public NamespaceContext getNamespaceContext() {
/* Unlike other getNamespaceXxx methods, this is available
* for all events.
* Note that the context is "live", ie. remains active (but not
* static) even through calls to next(). StAX compliant apps
* should not count on this behaviour, however.
*/
return mElementStack;
}
@Override
public int getNamespaceCount() {
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
return mElementStack.getCurrentNsCount();
}
@Override
public String getNamespacePrefix(int index) {
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
// Internally it's marked as null, externally need to see "" or null, depending
String p = mElementStack.getLocalNsPrefix(index);
if (p == null) {
return mReturnNullForDefaultNamespace ? null : XmlConsts.ATTR_NO_PREFIX;
}
return p;
}
@Override
public String getNamespaceURI() {
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
// Internally it's marked as null, externally need to see ""
String uri = mElementStack.getNsURI();
return (uri == null) ? XmlConsts.ELEM_NO_NS_URI : uri;
}
@Override
public String getNamespaceURI(int index)
{
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
// Internally it's marked as null, externally need to see ""
String uri = mElementStack.getLocalNsURI(index);
return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri;
}
@Override
public String getNamespaceURI(String prefix)
{
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
/* Note: this will need to return null if no URI found for
* the prefix, so we can't mask it.
*/
return mElementStack.getNamespaceURI(prefix);
}
@Override
public String getPIData() {
if (mCurrToken != PROCESSING_INSTRUCTION) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI);
}
if (mTokenState <= TOKEN_STARTED) {
safeFinishToken();
}
return mTextBuffer.contentsAsString();
}
@Override
public String getPITarget() {
if (mCurrToken != PROCESSING_INSTRUCTION) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI);
}
// Target is always parsed automatically, not lazily...
return mCurrName;
}
@Override
public String getPrefix() {
if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);
}
// Internally it's marked as null, externally need to see ""
String p = mElementStack.getPrefix();
return (p == null) ? XmlConsts.ELEM_NO_PREFIX : p;
}
@Override
public String getText()
{
final int currToken = mCurrToken;
if (((1 << currToken) & MASK_GET_TEXT) == 0) {
throwNotTextual(currToken);
}
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
if (currToken == ENTITY_REFERENCE) {
return (mCurrEntity == null) ? null : mCurrEntity.getReplacementText();
}
if (currToken == DTD) {
// 16-Aug-2004, TSa: Hmmh. Specs are bit ambiguous on whether this
// should return just the internal subset, or the whole thing...
return getDTDInternalSubset();
}
return mTextBuffer.contentsAsString();
}
@Override
public char[] getTextCharacters()
{
final int currToken = mCurrToken;
if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {
throwNotTextXxx(currToken);
}
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
if (currToken == ENTITY_REFERENCE) {
return mCurrEntity.getReplacementChars();
}
if (currToken == DTD) {
return getDTDInternalSubsetArray();
}
return mTextBuffer.getTextBuffer();
}
@Override
public int getTextCharacters(int sourceStart, char[] target, int targetStart, int len)
{
final int currToken = mCurrToken;
if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {
throwNotTextXxx(currToken);
}
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
return mTextBuffer.contentsToArray(sourceStart, target, targetStart, len);
}
@Override
public int getTextLength()
{
final int currToken = mCurrToken;
if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {
throwNotTextXxx(currToken);
}
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
return mTextBuffer.size();
}
@Override
public int getTextStart()
{
final int currToken = mCurrToken;
if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {
throwNotTextXxx(currToken);
}
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
return mTextBuffer.getTextStart();
}
@Override
public boolean hasName() {
return (mCurrToken == START_ELEMENT) || (mCurrToken == END_ELEMENT);
}
@Override
public boolean hasNext() {
// 08-Oct-2005, TSa: In multi-doc mode, we have different criteria...
return (mCurrToken != END_DOCUMENT)
|| (mParseState == STATE_MULTIDOC_HACK);
}
@Override
public boolean hasText() {
return (((1 << mCurrToken) & MASK_GET_TEXT) != 0);
}
@Override
public boolean isAttributeSpecified(int index)
{
/* No need to check for ATTRIBUTE since we never return that...
*/
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
return mAttrCollector.isSpecified(index);
}
@Override
public boolean isCharacters()
{
/* 21-Dec-2005, TSa: Changed for 3.0 to work the same way as stax
* ref impl.
*/
//return (mCurrToken == CHARACTERS || mCurrToken == CDATA || mCurrToken == SPACE);
/* 21-Apr-2009, TSa: As per [WSTX-201], should be consistent with