From b99f7c0e46d8b08d63afa4c2c7d274ebf5ebcc94 Mon Sep 17 00:00:00 2001
From: Gabriel Belingueres
Date: Sat, 2 Apr 2022 12:31:39 -0300
Subject: [PATCH] Fixed regressions:
* #163 - new case: Don't assume UTF8 as default, to allow parsing from String.
* #194 - Incorrect getText() after parsing the DOCDECL section.
* Added tests exercising other regressions exposed while fixing this issues.
---
.../plexus/util/xml/pull/MXParser.java | 157 ++++--
.../plexus/util/xml/pull/MXParserTest.java | 495 ++++++++++++++++++
src/test/resources/xml/test-entities-dos.xml | 6 +
.../xml/test-entities-in-attr-dos.xml | 9 +
.../resources/xml/test-entities-in-attr.xml | 9 +
src/test/resources/xml/test-entities.xml | 6 +
6 files changed, 637 insertions(+), 45 deletions(-)
create mode 100644 src/test/resources/xml/test-entities-dos.xml
create mode 100644 src/test/resources/xml/test-entities-in-attr-dos.xml
create mode 100644 src/test/resources/xml/test-entities-in-attr.xml
create mode 100644 src/test/resources/xml/test-entities.xml
diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
index 3874f572..e21b66cb 100644
--- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
@@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];
- private String fileEncoding = "UTF8";
+ private String fileEncoding = null;
/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
}
}
- /**
- * Unknown properties are always returned as false
+ /**
+ * Unknown properties are always returned as false
*/
@Override
public boolean getFeature( String name )
@@ -1596,11 +1596,11 @@ else if ( ch == '&' )
}
final int oldStart = posStart + bufAbsoluteStart;
final int oldEnd = posEnd + bufAbsoluteStart;
- final char[] resolvedEntity = parseEntityRef();
+ parseEntityRef();
if ( tokenize )
return eventType = ENTITY_REF;
// check if replacement text can be resolved !!!
- if ( resolvedEntity == null )
+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
{
if ( entityRefName == null )
{
@@ -1628,7 +1628,7 @@ else if ( ch == '&' )
}
// assert usePC == true;
// write into PC replacement text - do merge for replacement text!!!!
- for ( char aResolvedEntity : resolvedEntity )
+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
{
if ( pcEnd >= pc.length )
{
@@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
return ch;
}
- private char[] charRefOneCharBuf = new char[1];
+ // state representing that no entity ref have been resolved
+ private static final char[] BUF_NOT_RESOLVED = new char[0];
+
+ // predefined entity refs
+ private static final char[] BUF_LT = new char[] { '<' };
+ private static final char[] BUF_AMP = new char[] { '&' };
+ private static final char[] BUF_GT = new char[] { '>' };
+ private static final char[] BUF_APO = new char[] { '\'' };
+ private static final char[] BUF_QUOT = new char[] { '"' };
- private char[] parseEntityRef()
+ private char[] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;
+
+ /**
+ * parse Entity Ref, either a character entity or one of the predefined name entities.
+ *
+ * @return the length of the valid found character reference, which may be one of the predefined character reference
+ * names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity
+ * name, otherwise.
+ * @throws XmlPullParserException if invalid XML is detected.
+ * @throws IOException if an I/O error is found.
+ */
+ private int parseCharOrPredefinedEntityRef()
throws XmlPullParserException, IOException
{
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
@@ -2686,6 +2705,8 @@ private char[] parseEntityRef()
// ASSUMPTION just after &
entityRefName = null;
posStart = pos;
+ int len = 0;
+ resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;
char ch = more();
if ( ch == '#' )
{
@@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' )
ch = more();
}
}
- posEnd = pos - 1;
boolean isValidCodePoint = true;
try
@@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' )
isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
{
- charRefOneCharBuf = Character.toChars( codePoint );
+ resolvedEntityRefCharBuf = Character.toChars( codePoint );
}
}
catch ( IllegalArgumentException e )
@@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' )
if ( tokenize )
{
- text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
+ text = newString( resolvedEntityRefCharBuf, 0, resolvedEntityRefCharBuf.length );
}
- return charRefOneCharBuf;
+ len = resolvedEntityRefCharBuf.length;
}
else
{
// [68] EntityRef ::= '&' Name ';'
- // scan anem until ;
+ // scan name until ;
if ( !isNameStartChar( ch ) )
{
throw new XmlPullParserException( "entity reference names can not start with character '"
@@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' )
+ printable( ch ) + "'", this, null );
}
}
- posEnd = pos - 1;
// determine what name maps to
- final int len = posEnd - posStart;
+ len = ( pos - 1 ) - posStart;
if ( len == 2 && buf[posStart] == 'l' && buf[posStart + 1] == 't' )
{
if ( tokenize )
{
text = "<";
}
- charRefOneCharBuf[0] = '<';
- return charRefOneCharBuf;
+ resolvedEntityRefCharBuf = BUF_LT;
// if(paramPC || isParserTokenizing) {
// if(pcEnd >= pc.length) ensurePC();
// pc[pcEnd++] = '<';
@@ -2823,8 +2841,7 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
{
text = "&";
}
- charRefOneCharBuf[0] = '&';
- return charRefOneCharBuf;
+ resolvedEntityRefCharBuf = BUF_AMP;
}
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
{
@@ -2832,8 +2849,7 @@ else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
{
text = ">";
}
- charRefOneCharBuf[0] = '>';
- return charRefOneCharBuf;
+ resolvedEntityRefCharBuf = BUF_GT;
}
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 's' )
@@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
{
text = "'";
}
- charRefOneCharBuf[0] = '\'';
- return charRefOneCharBuf;
+ resolvedEntityRefCharBuf = BUF_APO;
}
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 't' )
@@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
{
text = "\"";
}
- charRefOneCharBuf[0] = '"';
- return charRefOneCharBuf;
- }
- else
- {
- final char[] result = lookuEntityReplacement( len );
- if ( result != null )
- {
- return result;
- }
+ resolvedEntityRefCharBuf = BUF_QUOT;
}
- if ( tokenize )
- text = null;
- return null;
}
+
+ posEnd = pos;
+
+ return len;
+ }
+
+ /**
+ * Parse an entity reference inside the DOCDECL section.
+ *
+ * @throws XmlPullParserException if invalid XML is detected.
+ * @throws IOException if an I/O error is found.
+ */
+ private void parseEntityRefInDocDecl()
+ throws XmlPullParserException, IOException
+ {
+ parseCharOrPredefinedEntityRef();
+ if (usePC) {
+ posStart--; // include in PC the starting '&' of the entity
+ joinPC();
+ }
+
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
+ return;
+ if ( tokenize )
+ text = null;
+ }
+
+ /**
+ * Parse an entity reference inside a tag or attribute.
+ *
+ * @throws XmlPullParserException if invalid XML is detected.
+ * @throws IOException if an I/O error is found.
+ */
+ private void parseEntityRef()
+ throws XmlPullParserException, IOException
+ {
+ final int len = parseCharOrPredefinedEntityRef();
+
+ posEnd--; // don't involve the final ';' from the entity in the search
+
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) {
+ return;
+ }
+
+ resolvedEntityRefCharBuf = lookuEntityReplacement( len );
+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
+ {
+ return;
+ }
+ if ( tokenize )
+ text = null;
}
/**
- * Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
+ * Check if the provided parameter is a valid Char. According to
+ * https://www.w3.org/TR/REC-xml/#NT-Char
*
* @param codePoint the numeric value to check
* @return true if it is a valid numeric character reference. False otherwise.
@@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint )
}
private char[] lookuEntityReplacement( int entityNameLen )
- throws XmlPullParserException, IOException
-
{
if ( !allStringsInterned )
{
@@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen )
}
}
}
- return null;
+ return BUF_NOT_RESOLVED;
}
private void parseComment()
@@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch ))
}
else
{
- throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
+ throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(ch) + " found in comment", this, null );
}
if ( normalizeIgnorableWS )
{
@@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 )
break;
else if ( ch == '&' )
{
- extractEntityRef();
+ extractEntityRefInDocDecl();
+ continue;
}
if ( normalizeIgnorableWS )
{
@@ -3536,6 +3590,19 @@ else if ( ch == '\n' )
}
posEnd = pos - 1;
+ text = null;
+ }
+
+ private void extractEntityRefInDocDecl()
+ throws XmlPullParserException, IOException
+ {
+ // extractEntityRef
+ posEnd = pos - 1;
+
+ int prevPosStart = posStart;
+ parseEntityRefInDocDecl();
+
+ posStart = prevPosStart;
}
private void extractEntityRef()
@@ -3559,9 +3626,9 @@ private void extractEntityRef()
}
// assert usePC == true;
- final char[] resolvedEntity = parseEntityRef();
+ parseEntityRef();
// check if replacement text can be resolved !!!
- if ( resolvedEntity == null )
+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
{
if ( entityRefName == null )
{
@@ -3571,7 +3638,7 @@ private void extractEntityRef()
+ "'", this, null );
}
// write into PC replacement text - do merge for replacement text!!!!
- for ( char aResolvedEntity : resolvedEntity )
+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
{
if ( pcEnd >= pc.length )
{
diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
index e0be666a..6fc6e9c6 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
@@ -17,6 +17,7 @@
*/
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -29,6 +30,7 @@
import java.nio.file.Files;
import java.nio.file.Paths;
+import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.junit.Test;
@@ -898,4 +900,497 @@ public void testEncodingISO_8859_1_setInputStream()
}
}
+ /**
+ * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
+ *
+ * Another case of bug #163: File encoding information is lost after the input file is copied to a String.
+ *
+ * @throws IOException if IO error.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testEncodingISO_8859_1setStringReader()
+ throws IOException
+ {
+ try ( Reader reader =
+ ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) )
+ {
+ MXParser parser = new MXParser();
+ String xmlFileContents = IOUtil.toString( reader );
+ parser.setInput( new StringReader( xmlFileContents ) );
+ while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+ ;
+ assertTrue( true );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ *
+ * Test custom Entity not found.
+ *
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws java.lang.Exception if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testCustomEntityNotFoundInText()
+ throws Exception
+ {
+ MXParser parser = new MXParser();
+
+ String input = "&otherentity;";
+ parser.setInput( new StringReader( input ) );
+ parser.defineEntityReplacementText( "myentity", "replacement" );
+
+ try
+ {
+ assertEquals( XmlPullParser.START_TAG, parser.next() );
+ assertEquals( XmlPullParser.TEXT, parser.next() );
+ fail( "should raise exception" );
+ }
+ catch ( XmlPullParserException e )
+ {
+ assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_TAG seen &otherentity;... @1:19)" ) );
+ assertEquals( XmlPullParser.START_TAG, parser.getEventType() ); // not an ENTITY_REF
+ assertEquals( "otherentity", parser.getText() );
+ }
+ }
+
+ /**
+ *
+ * Test custom Entity not found, with tokenize.
+ *
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws java.lang.Exception if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testCustomEntityNotFoundInTextTokenize()
+ throws Exception
+ {
+ MXParser parser = new MXParser();
+
+ String input = "&otherentity;";
+ parser.setInput( new StringReader( input ) );
+ parser.defineEntityReplacementText( "myentity", "replacement" );
+
+ try
+ {
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertNull( parser.getText() );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not throw exception if tokenize" );
+ }
+ }
+
+ /**
+ *
+ * Test custom Entity not found in attribute.
+ *
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws java.lang.Exception if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testCustomEntityNotFoundInAttr()
+ throws Exception
+ {
+ MXParser parser = new MXParser();
+
+ String input = "sometext";
+ parser.setInput( new StringReader( input ) );
+ parser.defineEntityReplacementText( "myentity", "replacement" );
+
+ try
+ {
+ assertEquals( XmlPullParser.START_TAG, parser.next() );
+ fail( "should raise exception" );
+ }
+ catch ( XmlPullParserException e )
+ {
+ assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_DOCUMENT seen
+ * Test custom Entity not found in attribute, with tokenize.
+ *
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ * @throws XmlPullParserException
+ *
+ * @throws Exception if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testCustomEntityNotFoundInAttrTokenize() throws Exception
+ {
+ MXParser parser = new MXParser();
+
+ String input = "sometext";
+
+ try
+ {
+ parser.setInput( new StringReader( input ) );
+ parser.defineEntityReplacementText( "myentity", "replacement" );
+
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ fail( "should raise exception" );
+ }
+ catch ( XmlPullParserException e )
+ {
+ assertTrue( e.getMessage().contains( "could not resolve entity named 'otherentity' (position: START_DOCUMENT seen Issue #194: Incorrect getText() after parsing the DOCDECL section>
+ *
+ * test DOCDECL text with myCustomEntity that cannot be resolved, Unix line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testDocdeclTextWithEntitiesUnix()
+ throws IOException
+ {
+ testDocdeclTextWithEntities( "test-entities.xml" );
+ }
+
+ /**
+ * Issue #194: Incorrect getText() after parsing the DOCDECL section>
+ *
+ *
test DOCDECL text with myCustomEntity that cannot be resolved, DOS line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testDocdeclTextWithEntitiesDOS()
+ throws IOException
+ {
+ testDocdeclTextWithEntities( "test-entities-dos.xml" );
+ }
+
+ private void testDocdeclTextWithEntities( String filename )
+ throws IOException
+ {
+ try ( Reader reader = ReaderFactory.newXmlReader( new File( "src/test/resources/xml", filename ) ) )
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( reader );
+ assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() );
+ assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() );
+ assertEquals( XmlPullParser.DOCDECL, parser.nextToken() );
+ assertEquals( " document [\n"
+ + "\n"
+ + "\n"
+ + "]", parser.getText() );
+ assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "document", parser.getName() );
+ assertEquals( XmlPullParser.TEXT, parser.next() );
+
+ fail( "should fail to resolve 'myCustomEntity' entity");
+ }
+ catch ( XmlPullParserException e )
+ {
+ assertTrue( e.getMessage().contains( "could not resolve entity named 'myCustomEntity'" ));
+ }
+ }
+
+ /**
+ * Issue #194: Incorrect getText() after parsing the DOCDECL section>
+ *
+ *
test DOCDECL text with entities appearing in attributes, Unix line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testDocdeclTextWithEntitiesInAttributesUnix()
+ throws IOException
+ {
+ testDocdeclTextWithEntitiesInAttributes( "test-entities-in-attr.xml" );
+ }
+
+ /**
+ * Issue #194: Incorrect getText() after parsing the DOCDECL section>
+ *
+ *
test DOCDECL text with entities appearing in attributes, DOS line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testDocdeclTextWithEntitiesInAttributesDOS()
+ throws IOException
+ {
+ testDocdeclTextWithEntitiesInAttributes( "test-entities-in-attr-dos.xml" );
+ }
+
+ private void testDocdeclTextWithEntitiesInAttributes( String filename )
+ throws IOException
+ {
+ try ( Reader reader = ReaderFactory.newXmlReader( new File( "src/test/resources/xml", filename ) ) )
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( reader );
+ parser.defineEntityReplacementText( "nbsp", " " );
+ parser.defineEntityReplacementText( "Alpha", "Α" );
+ parser.defineEntityReplacementText( "tritPos", "𝟭" );
+ parser.defineEntityReplacementText( "flo", "ř" );
+ parser.defineEntityReplacementText( "myCustomEntity", "&flo;" );
+ assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() );
+ assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() );
+ assertEquals( XmlPullParser.DOCDECL, parser.nextToken() );
+ assertEquals( " document [\n"
+ + " \n"
+ + " \n"
+ + " \n"
+ + "\n"
+ + "\n"
+ + "]", parser.getText() );
+ assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "document", parser.getName() );
+ assertEquals( 1, parser.getAttributeCount() );
+ assertEquals( "name", parser.getAttributeName( 0 ) );
+ assertEquals( "section name with entities: '&' 'Α' '<' ' ' '>' '𝟭' ''' 'ř' '\"'",
+ parser.getAttributeValue( 0 ) );
+
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "myCustomEntity", parser.getName() );
+ assertEquals( "ř", parser.getText() );
+
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ * test entity ref with entities appearing in tags, Unix line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testEntityRefTextUnix()
+ throws IOException
+ {
+ testEntityRefText( "\n" );
+ }
+
+ /**
+ * test entity ref with entities appearing in tags, DOS line separator.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testEntityRefTextDOS()
+ throws IOException
+ {
+ testEntityRefText( "\r\n" );
+ }
+
+ private void testEntityRefText( String newLine )
+ throws IOException
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append( "" ).append( newLine );
+ sb.append( "" ).append( newLine );
+ sb.append( "" ).append( newLine );
+ sb.append( "" ).append( newLine );
+ sb.append( "]>" ).append( newLine );
+ sb.append( "&foo;&foo1;&foo2;&tritPos;" );
+
+ try
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( new StringReader( sb.toString() ) );
+ parser.defineEntityReplacementText( "foo", "ř" );
+ parser.defineEntityReplacementText( "nbsp", " " );
+ parser.defineEntityReplacementText( "foo1", " " );
+ parser.defineEntityReplacementText( "foo2", "š" );
+ parser.defineEntityReplacementText( "tritPos", "𝟭" );
+
+ assertEquals( XmlPullParser.DOCDECL, parser.nextToken() );
+ assertEquals( " test [\n"
+ + "\n"
+ + "\n"
+ + "\n"
+ + "\n"
+ + "]", parser.getText() );
+ assertEquals( XmlPullParser.IGNORABLE_WHITESPACE, parser.nextToken() );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "b", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "ř", parser.getText() );
+ assertEquals( "foo", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( " ", parser.getText() );
+ assertEquals( "foo1", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "š", parser.getText() );
+ assertEquals( "foo2", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "𝟭", parser.getText() );
+ assertEquals( "tritPos", parser.getName() );
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( "b", parser.getName() );
+ assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ * Ensures that entity ref getText() and getName() return the correct value.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testEntityReplacement() throws IOException {
+ String input = "
";
+
+ try
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( new StringReader( input ) );
+ parser.defineEntityReplacementText( "nbsp", " " );
+
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "p", parser.getName() );
+ assertEquals( XmlPullParser.COMMENT, parser.nextToken() );
+ assertEquals( " a pagebreak: ", parser.getText() );
+ assertEquals( XmlPullParser.COMMENT, parser.nextToken() );
+ assertEquals( " PB ", parser.getText() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "\u00A0", parser.getText() );
+ assertEquals( "#160", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( " ", parser.getText() );
+ assertEquals( "nbsp", parser.getName() );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "unknown", parser.getName() );
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( "unknown", parser.getName() );
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( "p", parser.getName() );
+ assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ * Ensures correct replacements inside the internal PC array when the new copied array size is shorter than
+ * previous ones.
+ *
+ * Regression test: assure same behavior of MXParser from plexus-utils 3.3.0.
+ *
+ * @throws IOException if any.
+ *
+ * @since 3.4.2
+ */
+ @Test
+ public void testReplacementInPCArrayWithShorterCharArray()
+ throws IOException
+ {
+ String input = "]>"
+ + "";
+
+ try
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( new StringReader( new String(input.getBytes(), "ISO-8859-1" ) ) );
+ parser.defineEntityReplacementText( "foo", "ř" );
+ parser.defineEntityReplacementText( "tritPos", "𝟭" );
+
+ assertEquals( XmlPullParser.DOCDECL, parser.nextToken() );
+ assertEquals( " test []", parser.getText() );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "section", parser.getName() );
+ assertEquals( 1, parser.getAttributeCount() );
+ assertEquals( "name" , parser.getAttributeName( 0 ) );
+ assertEquals( "&ř𝟭" , parser.getAttributeValue( 0 ) );
+ assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+ assertEquals( "", parser.getText() );
+ assertEquals( "p", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "&", parser.getText() );
+ assertEquals( "amp", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "ř", parser.getText() );
+ assertEquals( "foo", parser.getName() );
+ assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+ assertEquals( "𝟭", parser.getText() );
+ assertEquals( "tritPos", parser.getName() );
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( "p", parser.getName() );
+ assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
+ assertEquals( "section", parser.getName() );
+ assertEquals( XmlPullParser.END_DOCUMENT, parser.nextToken() );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
}
diff --git a/src/test/resources/xml/test-entities-dos.xml b/src/test/resources/xml/test-entities-dos.xml
new file mode 100644
index 00000000..e1d6d17a
--- /dev/null
+++ b/src/test/resources/xml/test-entities-dos.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&myCustomEntity;
\ No newline at end of file
diff --git a/src/test/resources/xml/test-entities-in-attr-dos.xml b/src/test/resources/xml/test-entities-in-attr-dos.xml
new file mode 100644
index 00000000..a423c995
--- /dev/null
+++ b/src/test/resources/xml/test-entities-in-attr-dos.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+]>
+&myCustomEntity;
\ No newline at end of file
diff --git a/src/test/resources/xml/test-entities-in-attr.xml b/src/test/resources/xml/test-entities-in-attr.xml
new file mode 100644
index 00000000..a423c995
--- /dev/null
+++ b/src/test/resources/xml/test-entities-in-attr.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+]>
+&myCustomEntity;
\ No newline at end of file
diff --git a/src/test/resources/xml/test-entities.xml b/src/test/resources/xml/test-entities.xml
new file mode 100644
index 00000000..e1d6d17a
--- /dev/null
+++ b/src/test/resources/xml/test-entities.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&myCustomEntity;
\ No newline at end of file