Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix MXParser improve error reporting (#136) #137

Merged
merged 1 commit into from Apr 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 43 additions & 8 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Expand Up @@ -11,6 +11,7 @@

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

Expand Down Expand Up @@ -122,6 +123,8 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];

private String fileEncoding = "UTF8";

/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
* slot then current depth
Expand Down Expand Up @@ -659,6 +662,15 @@ public void setInput( Reader in )
{
reset();
reader = in;

if ( reader instanceof InputStreamReader )
{
InputStreamReader isr = (InputStreamReader) reader;
if ( isr.getEncoding() != null )
{
fileEncoding = isr.getEncoding().toUpperCase();
}
}
}

@Override
Expand Down Expand Up @@ -1771,6 +1783,17 @@ private int parseProlog()
// skipping UNICODE int Order Mark (so called BOM)
ch = more();
}
else if ( ch == '\uFFFD' )
{
// UTF-16 BOM in an UTF-8 encoded file?
// This is a hack...not the best way to check for BOM in UTF-16
ch = more();
if ( ch == '\uFFFD' )
{
throw new XmlPullParserException( "UTF-16 BOM in a UTF-8 encoded file is incompatible", this,
null );
}
}
}
seenMarkup = false;
boolean gotS = false;
Expand Down Expand Up @@ -2723,18 +2746,19 @@ else if ( ch >= 'A' && ch <= 'F' )
}
posEnd = pos - 1;

int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
boolean isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
boolean isValidCodePoint = true;
try
{
try
int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 );
isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
{
charRefOneCharBuf = Character.toChars( codePoint );
}
catch ( IllegalArgumentException e )
{
isValidCodePoint = false;
}
}
catch ( IllegalArgumentException e )
{
isValidCodePoint = false;
}

if ( !isValidCodePoint )
Expand Down Expand Up @@ -3328,6 +3352,17 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )

// TODO reconcile with setInput encodingName
inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );

if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
{
throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}
else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
{
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}
}

ch = more();
Expand Down
@@ -0,0 +1,278 @@
package org.codehaus.plexus.util.xml.pull;

import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

import org.junit.Before;
import org.junit.Test;

/**
* Test class that execute a particular set of tests associated to a TESCASES tag from the XML W3C Conformance Tests.
* TESCASES PROFILE: <pre>Bjoern Hoehrmann via HST 2013-09-18</pre>
* XML test files base folder: <pre>xmlconf/eduni/misc/</pre>
*
* @author <a href="mailto:belingueres@gmail.com">Gabriel Belingueres</a>
*/
public class eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test
{

final static File testResourcesDir = new File("src/test/resources/", "xmlconf/eduni/misc/");

MXParser parser;

@Before
public void setUp()
{
parser = new MXParser();
}

/**
* Test ID: <pre>hst-bh-001</pre>
* Test URI: <pre>001.xml</pre>
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_001()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "001.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with hex value FF000000F6) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-002</pre>
* Test URI: <pre>002.xml</pre>
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 32 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_002()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "002.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with decimal value 4294967542) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-003</pre>
* Test URI: <pre>003.xml</pre>
* Comment: <pre>decimal charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_003()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "003.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with hex value FFFFFFFF000000F6) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-004</pre>
* Test URI: <pre>004.xml</pre>
* Comment: <pre>hex charref &#38;#62; 10FFFF, indeed &#38;#62; max 64 bit integer, checking for recovery from possible overflow</pre>
* Sections: <pre>2.2 [2], 4.1 [66]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_bh_004()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "004.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with decimal value 18446744073709551862) is invalid" ) );
}
}

/**
* Test ID: <pre>hst-bh-005</pre>
* Test URI: <pre>005.xml</pre>
* Comment: <pre>xmlns:xml is an attribute as far as validation is concerned and must be declared</pre>
* Sections: <pre>3.1 [41]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
*/
// @Test
public void testhst_bh_005()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "005.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "xmlns:xml is an attribute as far as validation is concerned and must be declared" );
}
catch ( XmlPullParserException e )
{
assertTrue( true );
}
}

/**
* Test ID: <pre>hst-bh-006</pre>
* Test URI: <pre>006.xml</pre>
* Comment: <pre>xmlns:foo is an attribute as far as validation is concerned and must be declared</pre>
* Sections: <pre>3.1 [41]</pre>
* Version:
*
* @throws IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing.
*/
// @Test
public void testhst_bh_006()
throws IOException
{
try ( Reader reader = new FileReader( new File( testResourcesDir, "006.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "xmlns:foo is an attribute as far as validation is concerned and must be declared" );
}
catch ( XmlPullParserException e )
{
assertTrue( true );
}
}

/**
* Test ID: <pre>hst-lhs-007</pre>
* Test URI: <pre>007.xml</pre>
* Comment: <pre>UTF-8 BOM plus xml decl of iso-8859-1 incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_007()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
}
}

/**
* Test ID: <pre>hst-lhs-008</pre>
* Test URI: <pre>008.xml</pre>
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_008()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
}
}

/**
* Test ID: <pre>hst-lhs-009</pre>
* Test URI: <pre>009.xml</pre>
* Comment: <pre>UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible</pre>
* Sections: <pre>4.3.3</pre>
* Version:
*
* @throws IOException if there is an I/O error
*/
@Test
public void testhst_lhs_009()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}

}
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/001.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#xFF000000F6;il</p> <!-- 32 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/002.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#4294967542;il</p> <!-- 32 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/003.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#xFFFFFFFF000000F6;il</p> <!-- 64 bit integer overflow -->
4 changes: 4 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/004.xml
@@ -0,0 +1,4 @@
<!DOCTYPE p [
<!ELEMENT p (#PCDATA)>
]>
<p>Fa&#18446744073709551862;il</p> <!-- 64 bit integer overflow -->
2 changes: 2 additions & 0 deletions src/test/resources/xmlconf/eduni/misc/005.xml
@@ -0,0 +1,2 @@
<!DOCTYPE x [ <!ELEMENT x EMPTY> ]>
<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>