From 55678b05e04a97384902914968e8c9fb5fab92cf Mon Sep 17 00:00:00 2001 From: Gabriel Belingueres Date: Sat, 2 Apr 2022 12:31:39 -0300 Subject: [PATCH] Fixed regressions: * #163 - new case: Don't assume UTF8 as default, to allow parsing from String. * #194 - Incorrect getText() after parsing the DOCDECL section. * Added tests exercising other regressions exposed while fixing this issues. --- .../plexus/util/xml/pull/MXParser.java | 157 ++++-- .../plexus/util/xml/pull/MXParserTest.java | 495 ++++++++++++++++++ src/test/resources/xml/test-entities-dos.xml | 6 + .../xml/test-entities-in-attr-dos.xml | 9 + .../resources/xml/test-entities-in-attr.xml | 9 + src/test/resources/xml/test-entities.xml | 6 + 6 files changed, 637 insertions(+), 45 deletions(-) create mode 100644 src/test/resources/xml/test-entities-dos.xml create mode 100644 src/test/resources/xml/test-entities-in-attr-dos.xml create mode 100644 src/test/resources/xml/test-entities-in-attr.xml create mode 100644 src/test/resources/xml/test-entities.xml diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index 3874f572..e21b66cb 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len ) // private String elValue[]; private int elNamespaceCount[]; - private String fileEncoding = "UTF8"; + private String fileEncoding = null; /** * Make sure that we have enough space to keep element stack if passed size. It will always create one additional @@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) ) } } - /** - * Unknown properties are always returned as false + /** + * Unknown properties are always returned as false */ @Override public boolean getFeature( String name ) @@ -1596,11 +1596,11 @@ else if ( ch == '&' ) } final int oldStart = posStart + bufAbsoluteStart; final int oldEnd = posEnd + bufAbsoluteStart; - final char[] resolvedEntity = parseEntityRef(); + parseEntityRef(); if ( tokenize ) return eventType = ENTITY_REF; // check if replacement text can be resolved !!! - if ( resolvedEntity == null ) + if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED ) { if ( entityRefName == null ) { @@ -1628,7 +1628,7 @@ else if ( ch == '&' ) } // assert usePC == true; // write into PC replacement text - do merge for replacement text!!!! - for ( char aResolvedEntity : resolvedEntity ) + for ( char aResolvedEntity : resolvedEntityRefCharBuf ) { if ( pcEnd >= pc.length ) { @@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' ) return ch; } - private char[] charRefOneCharBuf = new char[1]; + // state representing that no entity ref have been resolved + private static final char[] BUF_NOT_RESOLVED = new char[0]; + + // predefined entity refs + private static final char[] BUF_LT = new char[] { '<' }; + private static final char[] BUF_AMP = new char[] { '&' }; + private static final char[] BUF_GT = new char[] { '>' }; + private static final char[] BUF_APO = new char[] { '\'' }; + private static final char[] BUF_QUOT = new char[] { '"' }; - private char[] parseEntityRef() + private char[] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED; + + /** + * parse Entity Ref, either a character entity or one of the predefined name entities. + * + * @return the length of the valid found character reference, which may be one of the predefined character reference + * names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity + * name, otherwise. + * @throws XmlPullParserException if invalid XML is detected. + * @throws IOException if an I/O error is found. + */ + private int parseCharOrPredefinedEntityRef() throws XmlPullParserException, IOException { // entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference @@ -2686,6 +2705,8 @@ private char[] parseEntityRef() // ASSUMPTION just after & entityRefName = null; posStart = pos; + int len = 0; + resolvedEntityRefCharBuf = BUF_NOT_RESOLVED; char ch = more(); if ( ch == '#' ) { @@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' ) ch = more(); } } - posEnd = pos - 1; boolean isValidCodePoint = true; try @@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' ) isValidCodePoint = isValidCodePoint( codePoint ); if ( isValidCodePoint ) { - charRefOneCharBuf = Character.toChars( codePoint ); + resolvedEntityRefCharBuf = Character.toChars( codePoint ); } } catch ( IllegalArgumentException e ) @@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' ) if ( tokenize ) { - text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length ); + text = newString( resolvedEntityRefCharBuf, 0, resolvedEntityRefCharBuf.length ); } - return charRefOneCharBuf; + len = resolvedEntityRefCharBuf.length; } else { // [68] EntityRef ::= '&' Name ';' - // scan anem until ; + // scan name until ; if ( !isNameStartChar( ch ) ) { throw new XmlPullParserException( "entity reference names can not start with character '" @@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' ) + printable( ch ) + "'", this, null ); } } - posEnd = pos - 1; // determine what name maps to - final int len = posEnd - posStart; + len = ( pos - 1 ) - posStart; if ( len == 2 && buf[posStart] == 'l' && buf[posStart + 1] == 't' ) { if ( tokenize ) { text = "<"; } - charRefOneCharBuf[0] = '<'; - return charRefOneCharBuf; + resolvedEntityRefCharBuf = BUF_LT; // if(paramPC || isParserTokenizing) { // if(pcEnd >= pc.length) ensurePC(); // pc[pcEnd++] = '<'; @@ -2823,8 +2841,7 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po { text = "&"; } - charRefOneCharBuf[0] = '&'; - return charRefOneCharBuf; + resolvedEntityRefCharBuf = BUF_AMP; } else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' ) { @@ -2832,8 +2849,7 @@ else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' ) { text = ">"; } - charRefOneCharBuf[0] = '>'; - return charRefOneCharBuf; + resolvedEntityRefCharBuf = BUF_GT; } else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o' && buf[posStart + 3] == 's' ) @@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po { text = "'"; } - charRefOneCharBuf[0] = '\''; - return charRefOneCharBuf; + resolvedEntityRefCharBuf = BUF_APO; } else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o' && buf[posStart + 3] == 't' ) @@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po { text = "\""; } - charRefOneCharBuf[0] = '"'; - return charRefOneCharBuf; - } - else - { - final char[] result = lookuEntityReplacement( len ); - if ( result != null ) - { - return result; - } + resolvedEntityRefCharBuf = BUF_QUOT; } - if ( tokenize ) - text = null; - return null; } + + posEnd = pos; + + return len; + } + + /** + * Parse an entity reference inside the DOCDECL section. + * + * @throws XmlPullParserException if invalid XML is detected. + * @throws IOException if an I/O error is found. + */ + private void parseEntityRefInDocDecl() + throws XmlPullParserException, IOException + { + parseCharOrPredefinedEntityRef(); + if (usePC) { + posStart--; // include in PC the starting '&' of the entity + joinPC(); + } + + if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) + return; + if ( tokenize ) + text = null; + } + + /** + * Parse an entity reference inside a tag or attribute. + * + * @throws XmlPullParserException if invalid XML is detected. + * @throws IOException if an I/O error is found. + */ + private void parseEntityRef() + throws XmlPullParserException, IOException + { + final int len = parseCharOrPredefinedEntityRef(); + + posEnd--; // don't involve the final ';' from the entity in the search + + if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) { + return; + } + + resolvedEntityRefCharBuf = lookuEntityReplacement( len ); + if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) + { + return; + } + if ( tokenize ) + text = null; } /** - * Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char} + * Check if the provided parameter is a valid Char. According to + * https://www.w3.org/TR/REC-xml/#NT-Char * * @param codePoint the numeric value to check * @return true if it is a valid numeric character reference. False otherwise. @@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint ) } private char[] lookuEntityReplacement( int entityNameLen ) - throws XmlPullParserException, IOException - { if ( !allStringsInterned ) { @@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen ) } } } - return null; + return BUF_NOT_RESOLVED; } private void parseComment() @@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch )) } else { - throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null ); + throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(ch) + " found in comment", this, null ); } if ( normalizeIgnorableWS ) { @@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 ) break; else if ( ch == '&' ) { - extractEntityRef(); + extractEntityRefInDocDecl(); + continue; } if ( normalizeIgnorableWS ) { @@ -3536,6 +3590,19 @@ else if ( ch == '\n' ) } posEnd = pos - 1; + text = null; + } + + private void extractEntityRefInDocDecl() + throws XmlPullParserException, IOException + { + // extractEntityRef + posEnd = pos - 1; + + int prevPosStart = posStart; + parseEntityRefInDocDecl(); + + posStart = prevPosStart; } private void extractEntityRef() @@ -3559,9 +3626,9 @@ private void extractEntityRef() } // assert usePC == true; - final char[] resolvedEntity = parseEntityRef(); + parseEntityRef(); // check if replacement text can be resolved !!! - if ( resolvedEntity == null ) + if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED ) { if ( entityRefName == null ) { @@ -3571,7 +3638,7 @@ private void extractEntityRef() + "'", this, null ); } // write into PC replacement text - do merge for replacement text!!!! - for ( char aResolvedEntity : resolvedEntity ) + for ( char aResolvedEntity : resolvedEntityRefCharBuf ) { if ( pcEnd >= pc.length ) { diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index e0be666a..6fc6e9c6 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -17,6 +17,7 @@ */ import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -29,6 +30,7 @@ import java.nio.file.Files; import java.nio.file.Paths; +import org.codehaus.plexus.util.IOUtil; import org.codehaus.plexus.util.ReaderFactory; import org.junit.Test; @@ -898,4 +900,497 @@ public void testEncodingISO_8859_1_setInputStream() } } + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: File encoding information is lost after the input file is copied to a String. + * + * @throws IOException if IO error. + * + * @since 3.4.2 + */ + @Test + public void testEncodingISO_8859_1setStringReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + String xmlFileContents = IOUtil.toString( reader ); + parser.setInput( new StringReader( xmlFileContents ) ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + *

+ * Test custom Entity not found. + *

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws java.lang.Exception if any. + * + * @since 3.4.2 + */ + @Test + public void testCustomEntityNotFoundInText() + throws Exception + { + MXParser parser = new MXParser(); + + String input = "&otherentity;"; + parser.setInput( new StringReader( input ) ); + parser.defineEntityReplacementText( "myentity", "replacement" ); + + try + { + assertEquals( XmlPullParser.START_TAG, parser.next() ); + assertEquals( XmlPullParser.TEXT, parser.next() ); + fail( "should raise exception" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_TAG seen &otherentity;... @1:19)" ) ); + assertEquals( XmlPullParser.START_TAG, parser.getEventType() ); // not an ENTITY_REF + assertEquals( "otherentity", parser.getText() ); + } + } + + /** + *

+ * Test custom Entity not found, with tokenize. + *

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws java.lang.Exception if any. + * + * @since 3.4.2 + */ + @Test + public void testCustomEntityNotFoundInTextTokenize() + throws Exception + { + MXParser parser = new MXParser(); + + String input = "&otherentity;"; + parser.setInput( new StringReader( input ) ); + parser.defineEntityReplacementText( "myentity", "replacement" ); + + try + { + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertNull( parser.getText() ); + } + catch ( XmlPullParserException e ) + { + fail( "should not throw exception if tokenize" ); + } + } + + /** + *

+ * Test custom Entity not found in attribute. + *

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws java.lang.Exception if any. + * + * @since 3.4.2 + */ + @Test + public void testCustomEntityNotFoundInAttr() + throws Exception + { + MXParser parser = new MXParser(); + + String input = "sometext"; + parser.setInput( new StringReader( input ) ); + parser.defineEntityReplacementText( "myentity", "replacement" ); + + try + { + assertEquals( XmlPullParser.START_TAG, parser.next() ); + fail( "should raise exception" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_DOCUMENT seen + * Test custom Entity not found in attribute, with tokenize. + *

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * @throws XmlPullParserException + * + * @throws Exception if any. + * + * @since 3.4.2 + */ + @Test + public void testCustomEntityNotFoundInAttrTokenize() throws Exception + { + MXParser parser = new MXParser(); + + String input = "sometext"; + + try + { + parser.setInput( new StringReader( input ) ); + parser.defineEntityReplacementText( "myentity", "replacement" ); + + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + fail( "should raise exception" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_DOCUMENT seen Issue #194: Incorrect getText() after parsing the DOCDECL section + * + *

test DOCDECL text with myCustomEntity that cannot be resolved, Unix line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testDocdeclTextWithEntitiesUnix() + throws IOException + { + testDocdeclTextWithEntities( "test-entities.xml" ); + } + + /** + *

Issue #194: Incorrect getText() after parsing the DOCDECL section + * + *

test DOCDECL text with myCustomEntity that cannot be resolved, DOS line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testDocdeclTextWithEntitiesDOS() + throws IOException + { + testDocdeclTextWithEntities( "test-entities-dos.xml" ); + } + + private void testDocdeclTextWithEntities( String filename ) + throws IOException + { + try ( Reader reader = ReaderFactory.newXmlReader( new File( "src/test/resources/xml", filename ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() ); + assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() ); + assertEquals( XmlPullParser.DOCDECL, parser.nextToken() ); + assertEquals( " document [\n" + + "\n" + + "\n" + + "]", parser.getText() ); + assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "document", parser.getName() ); + assertEquals( XmlPullParser.TEXT, parser.next() ); + + fail( "should fail to resolve 'myCustomEntity' entity"); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "could not resolve entity named 'myCustomEntity'" )); + } + } + + /** + *

Issue #194: Incorrect getText() after parsing the DOCDECL section + * + *

test DOCDECL text with entities appearing in attributes, Unix line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testDocdeclTextWithEntitiesInAttributesUnix() + throws IOException + { + testDocdeclTextWithEntitiesInAttributes( "test-entities-in-attr.xml" ); + } + + /** + *

Issue #194: Incorrect getText() after parsing the DOCDECL section + * + *

test DOCDECL text with entities appearing in attributes, DOS line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testDocdeclTextWithEntitiesInAttributesDOS() + throws IOException + { + testDocdeclTextWithEntitiesInAttributes( "test-entities-in-attr-dos.xml" ); + } + + private void testDocdeclTextWithEntitiesInAttributes( String filename ) + throws IOException + { + try ( Reader reader = ReaderFactory.newXmlReader( new File( "src/test/resources/xml", filename ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + parser.defineEntityReplacementText( "nbsp", " " ); + parser.defineEntityReplacementText( "Alpha", "Α" ); + parser.defineEntityReplacementText( "tritPos", "𝟭" ); + parser.defineEntityReplacementText( "flo", "ř" ); + parser.defineEntityReplacementText( "myCustomEntity", "&flo;" ); + assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() ); + assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() ); + assertEquals( XmlPullParser.DOCDECL, parser.nextToken() ); + assertEquals( " document [\n" + + " \n" + + " \n" + + " \n" + + "\n" + + "\n" + + "]", parser.getText() ); + assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "document", parser.getName() ); + assertEquals( 1, parser.getAttributeCount() ); + assertEquals( "name", parser.getAttributeName( 0 ) ); + assertEquals( "section name with entities: '&' 'Α' '<' ' ' '>' '𝟭' ''' 'ř' '\"'", + parser.getAttributeValue( 0 ) ); + + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "myCustomEntity", parser.getName() ); + assertEquals( "ř", parser.getText() ); + + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + *

test entity ref with entities appearing in tags, Unix line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testEntityRefTextUnix() + throws IOException + { + testEntityRefText( "\n" ); + } + + /** + *

test entity ref with entities appearing in tags, DOS line separator.

+ * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testEntityRefTextDOS() + throws IOException + { + testEntityRefText( "\r\n" ); + } + + private void testEntityRefText( String newLine ) + throws IOException + { + StringBuilder sb = new StringBuilder(); + sb.append( "" ).append( newLine ); + sb.append( "" ).append( newLine ); + sb.append( "" ).append( newLine ); + sb.append( "" ).append( newLine ); + sb.append( "]>" ).append( newLine ); + sb.append( "&foo;&foo1;&foo2;&tritPos;" ); + + try + { + MXParser parser = new MXParser(); + parser.setInput( new StringReader( sb.toString() ) ); + parser.defineEntityReplacementText( "foo", "ř" ); + parser.defineEntityReplacementText( "nbsp", " " ); + parser.defineEntityReplacementText( "foo1", " " ); + parser.defineEntityReplacementText( "foo2", "š" ); + parser.defineEntityReplacementText( "tritPos", "𝟭" ); + + assertEquals( XmlPullParser.DOCDECL, parser.nextToken() ); + assertEquals( " test [\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "]", parser.getText() ); + assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "b", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "ř", parser.getText() ); + assertEquals( "foo", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( " ", parser.getText() ); + assertEquals( "foo1", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "š", parser.getText() ); + assertEquals( "foo2", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "𝟭", parser.getText() ); + assertEquals( "tritPos", parser.getName() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( "b", parser.getName() ); + assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Ensures that entity ref getText() and getName() return the correct value. + * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testEntityReplacement() throws IOException { + String input = "

  

"; + + try + { + MXParser parser = new MXParser(); + parser.setInput( new StringReader( input ) ); + parser.defineEntityReplacementText( "nbsp", " " ); + + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "p", parser.getName() ); + assertEquals( XmlPullParser.COMMENT, parser.nextToken() ); + assertEquals( " a pagebreak: ", parser.getText() ); + assertEquals( XmlPullParser.COMMENT, parser.nextToken() ); + assertEquals( " PB ", parser.getText() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "\u00A0", parser.getText() ); + assertEquals( "#160", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( " ", parser.getText() ); + assertEquals( "nbsp", parser.getName() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "unknown", parser.getName() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( "unknown", parser.getName() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( "p", parser.getName() ); + assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Ensures correct replacements inside the internal PC array when the new copied array size is shorter than + * previous ones. + * + * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0. + * + * @throws IOException if any. + * + * @since 3.4.2 + */ + @Test + public void testReplacementInPCArrayWithShorterCharArray() + throws IOException + { + String input = "]>" + + "

&&foo;&tritPos;

"; + + try + { + MXParser parser = new MXParser(); + parser.setInput( new StringReader( new String(input.getBytes(), "ISO-8859-1" ) ) ); + parser.defineEntityReplacementText( "foo", "ř" ); + parser.defineEntityReplacementText( "tritPos", "𝟭" ); + + assertEquals( XmlPullParser.DOCDECL, parser.nextToken() ); + assertEquals( " test []", parser.getText() ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "section", parser.getName() ); + assertEquals( 1, parser.getAttributeCount() ); + assertEquals( "name" , parser.getAttributeName( 0 ) ); + assertEquals( "&ř𝟭" , parser.getAttributeValue( 0 ) ); + assertEquals( XmlPullParser.START_TAG, parser.nextToken() ); + assertEquals( "

", parser.getText() ); + assertEquals( "p", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "&", parser.getText() ); + assertEquals( "amp", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "ř", parser.getText() ); + assertEquals( "foo", parser.getName() ); + assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() ); + assertEquals( "𝟭", parser.getText() ); + assertEquals( "tritPos", parser.getName() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( "p", parser.getName() ); + assertEquals( XmlPullParser.END_TAG, parser.nextToken() ); + assertEquals( "section", parser.getName() ); + assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } } diff --git a/src/test/resources/xml/test-entities-dos.xml b/src/test/resources/xml/test-entities-dos.xml new file mode 100644 index 00000000..e1d6d17a --- /dev/null +++ b/src/test/resources/xml/test-entities-dos.xml @@ -0,0 +1,6 @@ + + + +]> +&myCustomEntity; \ No newline at end of file diff --git a/src/test/resources/xml/test-entities-in-attr-dos.xml b/src/test/resources/xml/test-entities-in-attr-dos.xml new file mode 100644 index 00000000..a423c995 --- /dev/null +++ b/src/test/resources/xml/test-entities-in-attr-dos.xml @@ -0,0 +1,9 @@ + + + + + + +]> +&myCustomEntity; \ No newline at end of file diff --git a/src/test/resources/xml/test-entities-in-attr.xml b/src/test/resources/xml/test-entities-in-attr.xml new file mode 100644 index 00000000..a423c995 --- /dev/null +++ b/src/test/resources/xml/test-entities-in-attr.xml @@ -0,0 +1,9 @@ + + + + + + +]> +&myCustomEntity; \ No newline at end of file diff --git a/src/test/resources/xml/test-entities.xml b/src/test/resources/xml/test-entities.xml new file mode 100644 index 00000000..e1d6d17a --- /dev/null +++ b/src/test/resources/xml/test-entities.xml @@ -0,0 +1,6 @@ + + + +]> +&myCustomEntity; \ No newline at end of file