Skip to content

Commit

Permalink
Fix MXParser improve error reporting (codehaus-plexus#136)
Browse files Browse the repository at this point in the history
- when parsing large char entities.
- when mixing invalid encoding declarations and file encodings.
  • Loading branch information
belingueres committed Jan 31, 2021
1 parent 84889e1 commit 195d992
Show file tree
Hide file tree
Showing 13 changed files with 399 additions and 8 deletions.
51 changes: 43 additions & 8 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Expand Up @@ -11,6 +11,7 @@

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

Expand Down Expand Up @@ -122,6 +123,8 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];

private String fileEncoding = "UTF8";

/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
* slot then current depth
Expand Down Expand Up @@ -659,6 +662,15 @@ public void setInput( Reader in )
{
reset();
reader = in;

if ( reader instanceof InputStreamReader )
{
InputStreamReader isr = (InputStreamReader) reader;
if ( isr.getEncoding() != null )
{
fileEncoding = isr.getEncoding().toUpperCase();
}
}
}

@Override
Expand Down Expand Up @@ -1771,6 +1783,17 @@ private int parseProlog()
// skipping UNICODE int Order Mark (so called BOM)
ch = more();
}
else if ( ch == '\uFFFD' )
{
// UTF-16 BOM in an UTF-8 encoded file?
// This is a hack...not the best way to check for BOM in UTF-16
ch = more();
if ( ch == '\uFFFD' )
{
throw new XmlPullParserException( "UTF-16 BOM in a UTF-8 encoded file is incompatible", this,
null );
}
}
}
seenMarkup = false;
boolean gotS = false;
Expand Down Expand Up @@ -2723,18 +2746,19 @@ else if ( ch >= 'A' && ch <= 'F' )
}
posEnd = pos - 1;

int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
boolean isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
boolean isValidCodePoint = true;
try
{
try
int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
{
charRefOneCharBuf = Character.toChars( codePoint );
}
catch ( IllegalArgumentException e )
{
isValidCodePoint = false;
}
}
catch ( IllegalArgumentException e )
{
isValidCodePoint = false;
}

if ( !isValidCodePoint )
Expand Down Expand Up @@ -3328,6 +3352,17 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )

// TODO reconcile with setInput encodingName
inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );

if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
{
throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}
else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
{
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}
}

ch = more();
Expand Down
@@ -0,0 +1,278 @@
package org.codehaus.plexus.util.xml.pull;

import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

import org.junit.Before;
import org.junit.Test;

/**
* Test class that execute a particular set of tests associated to a TESCASES tag from the XML W3C Conformance Tests.
* TESCASES PROFILE: <pre>Bjoern Hoehrmann via HST 2013-09-18</pre>
* XML test files base folder: <pre>xmlconf/eduni/misc/</pre>
*
* @author <a href="mailto:belingueres@gmail.com">Gabriel Belingueres</a>
*/
public class eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test
{

final static File testResourcesDir = new File("src/test/resources/", "xmlconf/eduni/misc/");

MXParser parser;

@Before
public void setUp()
{
parser = new MXParser();
}

/**
* Test ID: <pre>hst-bh-001</pre>
* Test URI: <pre>001.xml</pre>
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_001()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "001.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with hex value FF000000F6) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-002</pre>
* Test URI: <pre>002.xml</pre>
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_002()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "002.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with decimal value 4294967542) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-003</pre>
* Test URI: <pre>003.xml</pre>
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_003()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "003.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with hex value FFFFFFFF000000F6) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-004</pre>
* Test URI: <pre>004.xml</pre>
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_004()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "004.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with decimal value 18446744073709551862) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-005</pre>
* Test URI: <pre>005.xml</pre>
* Comment: <pre>xmlns:xml is an attribute as far as validation is concerned and must be declared</pre>
* Sections: <pre>3.1 [41]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
*/
// @Test
public void testhst_bh_005()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "005.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "xmlns:xml is an attribute as far as validation is concerned and must be declared" );
}
catch ( XmlPullParserException e )
{
assertTrue( true );
}
}

/**
* Test ID: <pre>hst-bh-006</pre>
* Test URI: <pre>006.xml</pre>
* Comment: <pre>xmlns:foo is an attribute as far as validation is concerned and must be declared</pre>
* Sections: <pre>3.1 [41]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
*/
// @Test
public void testhst_bh_006()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "006.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "xmlns:foo is an attribute as far as validation is concerned and must be declared" );
}
catch ( XmlPullParserException e )
{
assertTrue( true );
}
}

/**
* Test ID: <pre>hst-lhs-007</pre>
* Test URI: <pre>007.xml</pre>
* Comment: <pre>UTF-8 BOM plus xml decl of iso-8859-1 incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_007()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
}
}

/**
* Test ID: <pre>hst-lhs-008</pre>
* Test URI: <pre>008.xml</pre>
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_008()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
}
}

/**
* Test ID: <pre>hst-lhs-009</pre>
* Test URI: <pre>009.xml</pre>
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_009()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}

}
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/001.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#xFF000000F6;il</p> <!-- 32 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/002.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#4294967542;il</p> <!-- 32 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/003.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#xFFFFFFFF000000F6;il</p> <!-- 64 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/004.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#18446744073709551862;il</p> <!-- 64 bit integer overflow -->
2 changes: 2 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/005.xml
@@ -0,0 +1,2 @@
<!DOCTYPE x [ <!ELEMENT x EMPTY> ]>
<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>

0 comments on commit 195d992

Please sign in to comment.