Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix use of a mismatching unicode path extra field in zip unarchiving #167

Merged
merged 2 commits into from Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -102,20 +102,7 @@ private static class ZipEntryFileInfo

public String getName()
{
try
{
final UnicodePathExtraField unicodePath =
(UnicodePathExtraField) zipEntry.getExtraField( UnicodePathExtraField.UPATH_ID );

return unicodePath != null
? new String( unicodePath.getUnicodeName(), "UTF-8" )
: zipEntry.getName();

}
catch ( final UnsupportedEncodingException e )
{
throw new AssertionError( e );
}
return zipEntry.getName();
}

@Override
Expand Down
Expand Up @@ -5,6 +5,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand Down Expand Up @@ -122,4 +123,22 @@ public void testSymlinkEntries()
assertTrue( symLinks.isEmpty() );
}

public void testUnarchiveUnicodePathExtra()
throws Exception
{
PlexusIoZipFileResourceCollection prc = new PlexusIoZipFileResourceCollection();
prc.setFile( getTestFile( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
Set<String> names = new HashSet<>();
final Iterator<PlexusIoResource> entries = prc.getEntries();
while ( entries.hasNext() )
{
final PlexusIoResource next = entries.next();
names.add(next.getName());
}
// a Unicode Path extra field should only be used when its CRC matches the header file name
assertEquals( "should use good extra fields but not bad ones",
plamentotev marked this conversation as resolved.
Show resolved Hide resolved
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
names );
}

}
@@ -1,10 +1,16 @@
package org.codehaus.plexus.archiver.zip;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.codehaus.plexus.PlexusTestCase;
import org.codehaus.plexus.archiver.Archiver;
import org.codehaus.plexus.archiver.UnArchiver;
import org.codehaus.plexus.components.io.fileselectors.FileInfo;
import org.codehaus.plexus.components.io.fileselectors.FileSelector;
import org.codehaus.plexus.components.io.fileselectors.IncludeExcludeFileSelector;
import org.codehaus.plexus.util.FileUtils;
Expand Down Expand Up @@ -88,6 +94,61 @@ public void testUnarchiveUtf8()
assertTrue( new File( dest, "\u20acuro.txt" ).exists() );
}

public void testUnarchiveUnicodePathExtra()
throws Exception
{
File dest = new File( "target/output/unzip/unicodePathExtra" );
dest.mkdirs();
for ( String name : dest.list() )
{
new File( dest, name ).delete();
}
assertEquals( 0, dest.list().length );

final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
unarchiver.setDestFile( dest );
unarchiver.extract();
// a Unicode Path extra field should only be used when its CRC matches the header file name
assertEquals( "should use good extra fields but not bad ones",
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
new HashSet<>( Arrays.asList( dest.list() ) ) );
}

public void testUnarchiveUnicodePathExtraSelector()
throws Exception
{
File dest = new File( "target/output/unzip/unicodePathExtraSelector" );
dest.mkdirs();
for ( String name : dest.list() )
{
new File( dest, name ).delete();
}
assertEquals( 0, dest.list().length );

class CollectingSelector implements FileSelector
{
public Set<String> collection = new HashSet<>();
@Override
public boolean isSelected( FileInfo fileInfo ) throws IOException
{
collection.add( fileInfo.getName() );
return false;
}
}
CollectingSelector selector = new CollectingSelector();

final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
unarchiver.setDestFile( dest );
unarchiver.setFileSelectors( new FileSelector[] { selector } );
unarchiver.extract();

assertEquals( "should not extract anything", 0, dest.list().length );
// a Unicode Path extra field should only be used when its CRC matches the header file name
assertEquals( "should use good extra fields but not bad ones",
new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
selector.collection );
}

private void runUnarchiver( String path, FileSelector[] selectors, boolean[] results )
throws Exception
{
Expand Down
47 changes: 47 additions & 0 deletions src/test/resources/unicodePathExtra/GenerateZips.java
@@ -0,0 +1,47 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;

import org.apache.commons.compress.archivers.zip.UnicodePathExtraField;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;

public class GenerateZips {

private static void generate(String outfilename, boolean languageEncodingFlag) {
try {
ZipArchiveOutputStream zs = new ZipArchiveOutputStream(new File(outfilename));
zs.setUseLanguageEncodingFlag(languageEncodingFlag);
zs.setMethod(ZipArchiveOutputStream.STORED);
ZipArchiveEntry ze = new ZipArchiveEntry("nameonly-name");
ze.setTime(0);
zs.putArchiveEntry(ze);
//zs.write(nothing);
zs.closeArchiveEntry();
ze = new ZipArchiveEntry("goodextra-name");
ze.setTime(0);
ze.addExtraField(new UnicodePathExtraField("goodextra-extra", "goodextra-name".getBytes(Charset.forName("UTF-8"))));
zs.putArchiveEntry(ze);
//zs.write(nothing);
zs.closeArchiveEntry();
ze = new ZipArchiveEntry("badextra-name");
ze.setTime(0);
ze.addExtraField(new UnicodePathExtraField("badextra-extra", "bogus".getBytes(Charset.forName("UTF-8"))));
zs.putArchiveEntry(ze);
//zs.write(nothing);
zs.closeArchiveEntry();
zs.finish();
zs.close();
} catch (IOException e) {
e.printStackTrace();
}
}

public static void main(String[] args) {
generate("efsclear.zip", false);
// with the flag set, decoders tend to not look at the extra field
generate("efsset.zip", true);
System.out.println("done");
}

}
15 changes: 15 additions & 0 deletions src/test/resources/unicodePathExtra/README.md
@@ -0,0 +1,15 @@
# Test ZIP Files for Unicode Path Extra Field

These files are used to test for proper use of Unicode Path extra fields ([zip specification §4.6.9](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT)) when unarchiving zip archives.

Both contain three empty files, one without a Unicode Path extra field, one with a good extra field (CRC matches the header file name), one with a stale extra field (mismatched CRC). By using different values in the header file name and the Unicode path, it can be distinguished which one was used. A compliant unarchiver will use the names marked in bold.

File name in header | CRC | Unicode Path
--------------------|-----------------------|--------------------
**nameonly-name** |
goodextra-name | CRC("goodextra-name") | **goodextra-extra**
**badextra-name** | CRC("bogus") | badextra-extra

The difference between the two archives is whether the Language Encoding Flag (EFS) is set, which indicates that the header file names are already in UTF-8. The specification is not explicit about which one wins when both the flag is set and a Unicode Path extra field is present (it only says archivers shouldn’t do that). In practice, all unarchivers I have seen (including Apache Commons Compress used by Plexus-Archiver) ignore the extra field when the flag is set, which is why only _efsclear.zip_ is useful for testing.

The archives were created by the included _GenerateZips.java_ using Commons Compress.
Binary file added src/test/resources/unicodePathExtra/efsclear.zip
Binary file not shown.
Binary file added src/test/resources/unicodePathExtra/efsset.zip
Binary file not shown.