Skip to content

Commit

Permalink
Fixed regressions:
Browse files Browse the repository at this point in the history
* codehaus-plexus#163 - new case:  Don't assume UTF8 as default, to allow parsing from String.
* codehaus-plexus#194 - Incorrect getText() after parsing the DOCDECL section.
  • Loading branch information
belingueres committed Apr 5, 2022
1 parent 3896620 commit 5ff5d58
Show file tree
Hide file tree
Showing 4 changed files with 352 additions and 24 deletions.
98 changes: 74 additions & 24 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Expand Up @@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];

private String fileEncoding = "UTF8";
private String fileEncoding = null;

/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
Expand Down Expand Up @@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
}
}

/**
* Unknown properties are <strong>always</strong> returned as false
/**
* Unknown properties are <strong>always</strong> returned as false
*/
@Override
public boolean getFeature( String name )
Expand Down Expand Up @@ -2677,7 +2677,15 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )

private char[] charRefOneCharBuf = new char[1];

private char[] parseEntityRef()
/**
* parse Entity Ref, either a character entity or one of the predefined name entities.
*
* @return -1 if found a valid character reference, or one of the predefined character reference names
* (charRefOneCharBuf contains the replaced char). Returns the length of the found entity name, otherwise.
* @throws XmlPullParserException if invalid XML is detected.
* @throws IOException if an I/O error is found.
*/
private int parseCharOrPredefinedEntityRef()
throws XmlPullParserException, IOException
{
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
Expand Down Expand Up @@ -2777,12 +2785,12 @@ else if ( ch >= 'A' && ch <= 'F' )
{
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
}
return charRefOneCharBuf;
return -1;
}
else
{
// [68] EntityRef ::= '&' Name ';'
// scan anem until ;
// scan name until ;
if ( !isNameStartChar( ch ) )
{
throw new XmlPullParserException( "entity reference names can not start with character '"
Expand Down Expand Up @@ -2811,7 +2819,7 @@ else if ( ch >= 'A' && ch <= 'F' )
text = "<";
}
charRefOneCharBuf[0] = '<';
return charRefOneCharBuf;
return -1;
// if(paramPC || isParserTokenizing) {
// if(pcEnd >= pc.length) ensurePC();
// pc[pcEnd++] = '<';
Expand All @@ -2824,7 +2832,7 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
text = "&";
}
charRefOneCharBuf[0] = '&';
return charRefOneCharBuf;
return -1;
}
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
{
Expand All @@ -2833,7 +2841,7 @@ else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
text = ">";
}
charRefOneCharBuf[0] = '>';
return charRefOneCharBuf;
return -1;
}
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 's' )
Expand All @@ -2843,7 +2851,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
text = "'";
}
charRefOneCharBuf[0] = '\'';
return charRefOneCharBuf;
return -1;
}
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 't' )
Expand All @@ -2853,20 +2861,51 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
text = "\"";
}
charRefOneCharBuf[0] = '"';
return charRefOneCharBuf;
}
else
{
final char[] result = lookuEntityReplacement( len );
if ( result != null )
{
return result;
}
return -1;
}
if ( tokenize )
text = null;
return null;
return len; // name not found
}
}

/**
* Parse an entity reference inside the DOCDECL section.
*
* @throws XmlPullParserException if invalid XML is detected.
* @throws IOException if an I/O error is found.
*/
private void parseEntityRefInDocDecl()
throws XmlPullParserException, IOException
{
final int len = parseCharOrPredefinedEntityRef();
if ( len < 0 )
return;
if ( tokenize )
text = null;
}

/**
* Parse an entity reference inside a tag or attribute.
*
* @return the char array with the replaced character entity, the replaced custom entity, or null if no replacement
* could be found.
* @throws XmlPullParserException
* @throws IOException
*/
private char[] parseEntityRef()
throws XmlPullParserException, IOException
{
final int len = parseCharOrPredefinedEntityRef();
if ( len < 0 )
return charRefOneCharBuf;

final char[] result = lookuEntityReplacement( len );
if ( result != null )
{
return result;
}
if ( tokenize )
text = null;
return null;
}

/**
Expand Down Expand Up @@ -2977,7 +3016,7 @@ else if (isValidCodePoint( ch ))
}
else
{
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString((ch)) + " found in comment", this, null );
}
if ( normalizeIgnorableWS )
{
Expand Down Expand Up @@ -3484,7 +3523,7 @@ else if ( ch == '>' && bracketLevel == 0 )
break;
else if ( ch == '&' )
{
extractEntityRef();
extractEntityRefInDocDecl();
}
if ( normalizeIgnorableWS )
{
Expand Down Expand Up @@ -3538,6 +3577,17 @@ else if ( ch == '\n' )
posEnd = pos - 1;
}

private void extractEntityRefInDocDecl()
throws XmlPullParserException, IOException
{
// extractEntityRef
posEnd = pos - 1;

int prevPosStart = posStart;
parseEntityRefInDocDecl();
posStart = prevPosStart;
}

private void extractEntityRef()
throws XmlPullParserException, IOException
{
Expand Down

0 comments on commit 5ff5d58

Please sign in to comment.