diff --git a/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java b/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java index 103505a12..292d67a05 100644 --- a/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java +++ b/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java @@ -102,20 +102,7 @@ private static class ZipEntryFileInfo public String getName() { - try - { - final UnicodePathExtraField unicodePath = - (UnicodePathExtraField) zipEntry.getExtraField( UnicodePathExtraField.UPATH_ID ); - - return unicodePath != null - ? new String( unicodePath.getUnicodeName(), "UTF-8" ) - : zipEntry.getName(); - - } - catch ( final UnsupportedEncodingException e ) - { - throw new AssertionError( e ); - } + return zipEntry.getName(); } @Override diff --git a/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java b/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java index a6ce23a7b..62227e7bc 100644 --- a/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java +++ b/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -122,4 +123,22 @@ public void testSymlinkEntries() assertTrue( symLinks.isEmpty() ); } + public void testUnarchiveUnicodePathExtra() + throws Exception + { + PlexusIoZipFileResourceCollection prc = new PlexusIoZipFileResourceCollection(); + prc.setFile( getTestFile( "src/test/resources/unicodePathExtra/efsclear.zip" ) ); + Set names = new HashSet<>(); + final Iterator entries = prc.getEntries(); + while ( entries.hasNext() ) + { + final PlexusIoResource next = entries.next(); + names.add(next.getName()); + } + // a Unicode Path extra field should only be used when its CRC matches the header file name + assertEquals( "should use good extra fields but not bad ones", + new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ), + names ); + } + } diff --git a/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java b/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java index 7d914db39..f12c12011 100644 --- a/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java +++ b/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java @@ -1,10 +1,16 @@ package org.codehaus.plexus.archiver.zip; import java.io.File; +import java.io.IOException; import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + import org.codehaus.plexus.PlexusTestCase; import org.codehaus.plexus.archiver.Archiver; import org.codehaus.plexus.archiver.UnArchiver; +import org.codehaus.plexus.components.io.fileselectors.FileInfo; import org.codehaus.plexus.components.io.fileselectors.FileSelector; import org.codehaus.plexus.components.io.fileselectors.IncludeExcludeFileSelector; import org.codehaus.plexus.util.FileUtils; @@ -88,6 +94,61 @@ public void testUnarchiveUtf8() assertTrue( new File( dest, "\u20acuro.txt" ).exists() ); } + public void testUnarchiveUnicodePathExtra() + throws Exception + { + File dest = new File( "target/output/unzip/unicodePathExtra" ); + dest.mkdirs(); + for ( String name : dest.list() ) + { + new File( dest, name ).delete(); + } + assertEquals( 0, dest.list().length ); + + final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) ); + unarchiver.setDestFile( dest ); + unarchiver.extract(); + // a Unicode Path extra field should only be used when its CRC matches the header file name + assertEquals( "should use good extra fields but not bad ones", + new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ), + new HashSet<>( Arrays.asList( dest.list() ) ) ); + } + + public void testUnarchiveUnicodePathExtraSelector() + throws Exception + { + File dest = new File( "target/output/unzip/unicodePathExtraSelector" ); + dest.mkdirs(); + for ( String name : dest.list() ) + { + new File( dest, name ).delete(); + } + assertEquals( 0, dest.list().length ); + + class CollectingSelector implements FileSelector + { + public Set collection = new HashSet<>(); + @Override + public boolean isSelected( FileInfo fileInfo ) throws IOException + { + collection.add( fileInfo.getName() ); + return false; + } + } + CollectingSelector selector = new CollectingSelector(); + + final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) ); + unarchiver.setDestFile( dest ); + unarchiver.setFileSelectors( new FileSelector[] { selector } ); + unarchiver.extract(); + + assertEquals( "should not extract anything", 0, dest.list().length ); + // a Unicode Path extra field should only be used when its CRC matches the header file name + assertEquals( "should use good extra fields but not bad ones", + new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ), + selector.collection ); + } + private void runUnarchiver( String path, FileSelector[] selectors, boolean[] results ) throws Exception { diff --git a/src/test/resources/unicodePathExtra/GenerateZips.java b/src/test/resources/unicodePathExtra/GenerateZips.java new file mode 100644 index 000000000..c0c626f69 --- /dev/null +++ b/src/test/resources/unicodePathExtra/GenerateZips.java @@ -0,0 +1,47 @@ +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; + +import org.apache.commons.compress.archivers.zip.UnicodePathExtraField; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +public class GenerateZips { + + private static void generate(String outfilename, boolean languageEncodingFlag) { + try { + ZipArchiveOutputStream zs = new ZipArchiveOutputStream(new File(outfilename)); + zs.setUseLanguageEncodingFlag(languageEncodingFlag); + zs.setMethod(ZipArchiveOutputStream.STORED); + ZipArchiveEntry ze = new ZipArchiveEntry("nameonly-name"); + ze.setTime(0); + zs.putArchiveEntry(ze); + //zs.write(nothing); + zs.closeArchiveEntry(); + ze = new ZipArchiveEntry("goodextra-name"); + ze.setTime(0); + ze.addExtraField(new UnicodePathExtraField("goodextra-extra", "goodextra-name".getBytes(Charset.forName("UTF-8")))); + zs.putArchiveEntry(ze); + //zs.write(nothing); + zs.closeArchiveEntry(); + ze = new ZipArchiveEntry("badextra-name"); + ze.setTime(0); + ze.addExtraField(new UnicodePathExtraField("badextra-extra", "bogus".getBytes(Charset.forName("UTF-8")))); + zs.putArchiveEntry(ze); + //zs.write(nothing); + zs.closeArchiveEntry(); + zs.finish(); + zs.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) { + generate("efsclear.zip", false); + // with the flag set, decoders tend to not look at the extra field + generate("efsset.zip", true); + System.out.println("done"); + } + +} diff --git a/src/test/resources/unicodePathExtra/README.md b/src/test/resources/unicodePathExtra/README.md new file mode 100644 index 000000000..6b8f41e79 --- /dev/null +++ b/src/test/resources/unicodePathExtra/README.md @@ -0,0 +1,15 @@ +# Test ZIP Files for Unicode Path Extra Field + +These files are used to test for proper use of Unicode Path extra fields ([zip specification §4.6.9](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT)) when unarchiving zip archives. + +Both contain three empty files, one without a Unicode Path extra field, one with a good extra field (CRC matches the header file name), one with a stale extra field (mismatched CRC). By using different values in the header file name and the Unicode path, it can be distinguished which one was used. A compliant unarchiver will use the names marked in bold. + +File name in header | CRC | Unicode Path +--------------------|-----------------------|-------------------- +**nameonly-name** | +goodextra-name | CRC("goodextra-name") | **goodextra-extra** +**badextra-name** | CRC("bogus") | badextra-extra + +The difference between the two archives is whether the Language Encoding Flag (EFS) is set, which indicates that the header file names are already in UTF-8. The specification is not explicit about which one wins when both the flag is set and a Unicode Path extra field is present (it only says archivers shouldn’t do that). In practice, all unarchivers I have seen (including Apache Commons Compress used by Plexus-Archiver) ignore the extra field when the flag is set, which is why only _efsclear.zip_ is useful for testing. + +The archives were created by the included _GenerateZips.java_ using Commons Compress. diff --git a/src/test/resources/unicodePathExtra/efsclear.zip b/src/test/resources/unicodePathExtra/efsclear.zip new file mode 100644 index 000000000..c0aaefb6d Binary files /dev/null and b/src/test/resources/unicodePathExtra/efsclear.zip differ diff --git a/src/test/resources/unicodePathExtra/efsset.zip b/src/test/resources/unicodePathExtra/efsset.zip new file mode 100644 index 000000000..bd586de0e Binary files /dev/null and b/src/test/resources/unicodePathExtra/efsset.zip differ