Skip to content

Commit

Permalink
Fix MXParser fails to parse xml declaration properly (codehaus-plexus…
Browse files Browse the repository at this point in the history
…#138)

- Fix bugs.
- Added tests.
- Improved error messages.
  • Loading branch information
belingueres committed Apr 4, 2021
1 parent b52d0e5 commit 7d23382
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 15 deletions.
25 changes: 18 additions & 7 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Expand Up @@ -3296,6 +3296,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
}
xmlDeclVersion = newString( buf, versionStart, versionEnd - versionStart );

String lastParsedAttr = "version";

// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
char ch = more();
char prevCh = ch;
Expand All @@ -3310,8 +3312,8 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
{
if ( !isS( prevCh ) )
{
throw new XmlPullParserException( "expected a space after version and not " + printable( ch ), this,
null );
throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not "
+ printable( ch ), this, null );
}
ch = more();
ch = requireInput( ch, NCODING );
Expand Down Expand Up @@ -3363,13 +3365,23 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}

lastParsedAttr = "encoding";

ch = more();
prevCh = ch;
ch = skipS( ch );
}

ch = more();
ch = skipS( ch );
// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
if ( ch == 's' )
{
if ( !isS( prevCh ) )
{
throw new XmlPullParserException( "expected a space after " + lastParsedAttr + " and not "
+ printable( ch ), this, null );
}

ch = more();
ch = requireInput( ch, TANDALONE );
ch = skipS( ch );
Expand All @@ -3382,11 +3394,10 @@ else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF
ch = skipS( ch );
if ( ch != '\'' && ch != '"' )
{
throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after encoding and not "
throw new XmlPullParserException( "expected apostrophe (') or quotation mark (\") after standalone and not "
+ printable( ch ), this, null );
}
char quotChar = ch;
int standaloneStart = pos;
ch = more();
if ( ch == 'y' )
{
Expand All @@ -3411,9 +3422,9 @@ else if ( ch == 'n' )
+ printable( ch ), this, null );
}
ch = more();
ch = skipS( ch );
}

ch = skipS( ch );
if ( ch != '?' )
{
throw new XmlPullParserException( "expected ?> as last part of <?xml not " + printable( ch ), this, null );
Expand Down
Expand Up @@ -53,7 +53,7 @@ public void testibm_not_wf_P32_ibm32n01xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected a space after version and not s" ) );
}
}

Expand All @@ -79,7 +79,7 @@ public void testibm_not_wf_P32_ibm32n02xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) );
}
}

Expand Down Expand Up @@ -131,7 +131,7 @@ public void testibm_not_wf_P32_ibm32n04xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) );
}
}

Expand All @@ -158,7 +158,7 @@ public void testibm_not_wf_P32_ibm32n05xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not Y" ) );
}
}

Expand All @@ -184,7 +184,7 @@ public void testibm_not_wf_P32_ibm32n06xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) );
}
}

Expand All @@ -210,7 +210,7 @@ public void testibm_not_wf_P32_ibm32n07xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected 'yes' or 'no' after standalone and not N" ) );
}
}

Expand All @@ -236,7 +236,7 @@ public void testibm_not_wf_P32_ibm32n08xml()
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected ?> as last part of <?xml not t" ) );
assertTrue( e.getMessage().contains( "expected equals sign (=) after standalone and not \"" ) );
}
}

Expand All @@ -248,8 +248,10 @@ public void testibm_not_wf_P32_ibm32n08xml()
* Version:
*
* @throws IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser does not support parsing inside DOCTYPEDECL.
*/
@Test
// @Test
public void testibm_not_wf_P32_ibm32n09xml()
throws IOException
{
Expand Down
40 changes: 40 additions & 0 deletions src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
Expand Up @@ -661,4 +661,44 @@ public void testMalformedXMLRootElement5()
}
}

@Test
public void testXMLDeclVersionOnly()
throws Exception
{
String input = "<?xml version='1.0'?><hello/>";

MXParser parser = new MXParser();
parser.setInput( new StringReader( input ) );

try
{
assertEquals( XmlPullParser.PROCESSING_INSTRUCTION, parser.nextToken() );
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
}
catch ( Exception e )
{
fail( "Should not throw Exception" );
}
}

@Test
public void testXMLDeclVersionEncodingStandaloneNoSpace()
throws Exception
{
String input = "<?xml version='1.0' encoding='ASCII'standalone='yes'?><hello/>";

MXParser parser = new MXParser();
parser.setInput( new StringReader( input ) );

try
{
parser.nextToken();
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "expected a space after encoding and not s" ));
}
}

}

0 comments on commit 7d23382

Please sign in to comment.