Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed regressions: #163 and #194 #195

Merged
merged 1 commit into from Apr 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
157 changes: 112 additions & 45 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Expand Up @@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];

private String fileEncoding = "UTF8";
private String fileEncoding = null;

/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
Expand Down Expand Up @@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
}
}

/**
* Unknown properties are <strong>always</strong> returned as false
/**
* Unknown properties are <strong>always</strong> returned as false
*/
@Override
public boolean getFeature( String name )
Expand Down Expand Up @@ -1596,11 +1596,11 @@ else if ( ch == '&' )
}
final int oldStart = posStart + bufAbsoluteStart;
final int oldEnd = posEnd + bufAbsoluteStart;
final char[] resolvedEntity = parseEntityRef();
parseEntityRef();
if ( tokenize )
return eventType = ENTITY_REF;
// check if replacement text can be resolved !!!
if ( resolvedEntity == null )
if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
{
if ( entityRefName == null )
{
Expand Down Expand Up @@ -1628,7 +1628,7 @@ else if ( ch == '&' )
}
// assert usePC == true;
// write into PC replacement text - do merge for replacement text!!!!
for ( char aResolvedEntity : resolvedEntity )
for ( char aResolvedEntity : resolvedEntityRefCharBuf )
{
if ( pcEnd >= pc.length )
{
Expand Down Expand Up @@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
return ch;
}

private char[] charRefOneCharBuf = new char[1];
// state representing that no entity ref have been resolved
private static final char[] BUF_NOT_RESOLVED = new char[0];

// predefined entity refs
private static final char[] BUF_LT = new char[] { '<' };
private static final char[] BUF_AMP = new char[] { '&' };
private static final char[] BUF_GT = new char[] { '>' };
private static final char[] BUF_APO = new char[] { '\'' };
private static final char[] BUF_QUOT = new char[] { '"' };

private char[] parseEntityRef()
private char[] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;

/**
* parse Entity Ref, either a character entity or one of the predefined name entities.
*
* @return the length of the valid found character reference, which may be one of the predefined character reference
* names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity
* name, otherwise.
* @throws XmlPullParserException if invalid XML is detected.
* @throws IOException if an I/O error is found.
*/
private int parseCharOrPredefinedEntityRef()
throws XmlPullParserException, IOException
{
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
Expand All @@ -2686,6 +2705,8 @@ private char[] parseEntityRef()
// ASSUMPTION just after &
entityRefName = null;
posStart = pos;
int len = 0;
resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;
char ch = more();
if ( ch == '#' )
{
Expand Down Expand Up @@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' )
ch = more();
}
}
posEnd = pos - 1;

boolean isValidCodePoint = true;
try
Expand All @@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' )
isValidCodePoint = isValidCodePoint( codePoint );
if ( isValidCodePoint )
{
charRefOneCharBuf = Character.toChars( codePoint );
resolvedEntityRefCharBuf = Character.toChars( codePoint );
}
}
catch ( IllegalArgumentException e )
Expand All @@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' )

if ( tokenize )
{
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
text = newString( resolvedEntityRefCharBuf, 0, resolvedEntityRefCharBuf.length );
}
return charRefOneCharBuf;
len = resolvedEntityRefCharBuf.length;
}
else
{
// [68] EntityRef ::= '&' Name ';'
// scan anem until ;
// scan name until ;
if ( !isNameStartChar( ch ) )
{
throw new XmlPullParserException( "entity reference names can not start with character '"
Expand All @@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' )
+ printable( ch ) + "'", this, null );
}
}
posEnd = pos - 1;
// determine what name maps to
final int len = posEnd - posStart;
len = ( pos - 1 ) - posStart;
if ( len == 2 && buf[posStart] == 'l' && buf[posStart + 1] == 't' )
{
if ( tokenize )
{
text = "<";
}
charRefOneCharBuf[0] = '<';
return charRefOneCharBuf;
resolvedEntityRefCharBuf = BUF_LT;
// if(paramPC || isParserTokenizing) {
// if(pcEnd >= pc.length) ensurePC();
// pc[pcEnd++] = '<';
Expand All @@ -2823,17 +2841,15 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
{
text = "&";
}
charRefOneCharBuf[0] = '&';
return charRefOneCharBuf;
resolvedEntityRefCharBuf = BUF_AMP;
}
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
{
if ( tokenize )
{
text = ">";
}
charRefOneCharBuf[0] = '>';
return charRefOneCharBuf;
resolvedEntityRefCharBuf = BUF_GT;
}
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 's' )
Expand All @@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
{
text = "'";
}
charRefOneCharBuf[0] = '\'';
return charRefOneCharBuf;
resolvedEntityRefCharBuf = BUF_APO;
}
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 't' )
Expand All @@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
{
text = "\"";
}
charRefOneCharBuf[0] = '"';
return charRefOneCharBuf;
}
else
{
final char[] result = lookuEntityReplacement( len );
if ( result != null )
{
return result;
}
resolvedEntityRefCharBuf = BUF_QUOT;
}
if ( tokenize )
text = null;
return null;
}

posEnd = pos;

return len;
}

/**
* Parse an entity reference inside the DOCDECL section.
*
* @throws XmlPullParserException if invalid XML is detected.
* @throws IOException if an I/O error is found.
*/
private void parseEntityRefInDocDecl()
throws XmlPullParserException, IOException
{
parseCharOrPredefinedEntityRef();
if (usePC) {
posStart--; // include in PC the starting '&' of the entity
joinPC();
}

if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
return;
if ( tokenize )
text = null;
}

/**
* Parse an entity reference inside a tag or attribute.
*
* @throws XmlPullParserException if invalid XML is detected.
* @throws IOException if an I/O error is found.
*/
private void parseEntityRef()
throws XmlPullParserException, IOException
{
final int len = parseCharOrPredefinedEntityRef();

posEnd--; // don't involve the final ';' from the entity in the search

if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) {
return;
}

resolvedEntityRefCharBuf = lookuEntityReplacement( len );
if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
{
return;
}
if ( tokenize )
text = null;
}

/**
* Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
* Check if the provided parameter is a valid Char. According to
* <a href="https://www.w3.org/TR/REC-xml/#NT-Char">https://www.w3.org/TR/REC-xml/#NT-Char</a>
*
* @param codePoint the numeric value to check
* @return true if it is a valid numeric character reference. False otherwise.
Expand All @@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint )
}

private char[] lookuEntityReplacement( int entityNameLen )
throws XmlPullParserException, IOException

{
if ( !allStringsInterned )
{
Expand Down Expand Up @@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen )
}
}
}
return null;
return BUF_NOT_RESOLVED;
}

private void parseComment()
Expand Down Expand Up @@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch ))
}
else
{
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(ch) + " found in comment", this, null );
}
if ( normalizeIgnorableWS )
{
Expand Down Expand Up @@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 )
break;
else if ( ch == '&' )
{
extractEntityRef();
extractEntityRefInDocDecl();
continue;
}
if ( normalizeIgnorableWS )
{
Expand Down Expand Up @@ -3536,6 +3590,19 @@ else if ( ch == '\n' )

}
posEnd = pos - 1;
text = null;
}

private void extractEntityRefInDocDecl()
throws XmlPullParserException, IOException
{
// extractEntityRef
posEnd = pos - 1;

int prevPosStart = posStart;
parseEntityRefInDocDecl();

posStart = prevPosStart;
}

private void extractEntityRef()
Expand All @@ -3559,9 +3626,9 @@ private void extractEntityRef()
}
// assert usePC == true;

final char[] resolvedEntity = parseEntityRef();
parseEntityRef();
// check if replacement text can be resolved !!!
if ( resolvedEntity == null )
if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
{
if ( entityRefName == null )
{
Expand All @@ -3571,7 +3638,7 @@ private void extractEntityRef()
+ "'", this, null );
}
// write into PC replacement text - do merge for replacement text!!!!
for ( char aResolvedEntity : resolvedEntity )
for ( char aResolvedEntity : resolvedEntityRefCharBuf )
{
if ( pcEnd >= pc.length )
{
Expand Down