From cf44395550ef68f260f1bb5c06d2ac36636db997 Mon Sep 17 00:00:00 2001
From: Christian Walther <walther@indel.ch>
Date: Sun, 28 Mar 2021 12:27:06 +0200
Subject: [PATCH 1/2] Tests for proper use of unicode path extra fields in zip
 unarchiving.

One of them currently failing due to a bug, to be fixed in the next commit.

Two test files included, one with the EFS bit (language encoding flag) set,
the other without. Only efsclear.zip is used in tests because the zip spec
does not specify which prevails when both the bit is set and an extra field
is present - plexus-archiver and all other unarchivers I've tried ignore
the extra field in that case.
---
 ...PlexusIoZipFileResourceCollectionTest.java |  19 ++++++
 .../archiver/zip/ZipUnArchiverTest.java       |  61 ++++++++++++++++++
 .../unicodePathExtra/GenerateZips.java        |  47 ++++++++++++++
 src/test/resources/unicodePathExtra/README.md |  15 +++++
 .../resources/unicodePathExtra/efsclear.zip   | Bin 0 -> 484 bytes
 .../resources/unicodePathExtra/efsset.zip     | Bin 0 -> 484 bytes
 6 files changed, 142 insertions(+)
 create mode 100644 src/test/resources/unicodePathExtra/GenerateZips.java
 create mode 100644 src/test/resources/unicodePathExtra/README.md
 create mode 100644 src/test/resources/unicodePathExtra/efsclear.zip
 create mode 100644 src/test/resources/unicodePathExtra/efsset.zip
diff --git a/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java b/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java
index a6ce23a7b..62227e7bc 100644
--- a/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java
+++ b/src/test/java/org/codehaus/plexus/archiver/zip/PlexusIoZipFileResourceCollectionTest.java
@@ -5,6 +5,7 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.URL;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -122,4 +123,22 @@ public void testSymlinkEntries()
         assertTrue( symLinks.isEmpty() );
     }
 
+    public void testUnarchiveUnicodePathExtra()
+        throws Exception
+    {
+        PlexusIoZipFileResourceCollection prc = new PlexusIoZipFileResourceCollection();
+        prc.setFile( getTestFile( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
+        Set<String> names = new HashSet<>();
+        final Iterator<PlexusIoResource> entries = prc.getEntries();
+        while ( entries.hasNext() )
+        {
+            final PlexusIoResource next = entries.next();
+            names.add(next.getName());
+        }
+        // a Unicode Path extra field should only be used when its CRC matches the header file name
+        assertEquals( "should use good extra fields but not bad ones",
+                new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
+                names );
+    }
+
 }
diff --git a/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java b/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java
index 7d914db39..f12c12011 100644
--- a/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java
+++ b/src/test/java/org/codehaus/plexus/archiver/zip/ZipUnArchiverTest.java
@@ -1,10 +1,16 @@
 package org.codehaus.plexus.archiver.zip;
 
 import java.io.File;
+import java.io.IOException;
 import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.codehaus.plexus.PlexusTestCase;
 import org.codehaus.plexus.archiver.Archiver;
 import org.codehaus.plexus.archiver.UnArchiver;
+import org.codehaus.plexus.components.io.fileselectors.FileInfo;
 import org.codehaus.plexus.components.io.fileselectors.FileSelector;
 import org.codehaus.plexus.components.io.fileselectors.IncludeExcludeFileSelector;
 import org.codehaus.plexus.util.FileUtils;
@@ -88,6 +94,61 @@ public void testUnarchiveUtf8()
         assertTrue( new File( dest, "\u20acuro.txt" ).exists() );
     }
 
+    public void testUnarchiveUnicodePathExtra()
+        throws Exception
+    {
+        File dest = new File( "target/output/unzip/unicodePathExtra" );
+        dest.mkdirs();
+        for ( String name : dest.list() )
+        {
+            new File( dest, name ).delete();
+        }
+        assertEquals( 0, dest.list().length );
+
+        final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
+        unarchiver.setDestFile( dest );
+        unarchiver.extract();
+        // a Unicode Path extra field should only be used when its CRC matches the header file name
+        assertEquals( "should use good extra fields but not bad ones",
+                new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
+                new HashSet<>( Arrays.asList( dest.list() ) ) );
+    }
+
+    public void testUnarchiveUnicodePathExtraSelector()
+        throws Exception
+    {
+        File dest = new File( "target/output/unzip/unicodePathExtraSelector" );
+        dest.mkdirs();
+        for ( String name : dest.list() )
+        {
+            new File( dest, name ).delete();
+        }
+        assertEquals( 0, dest.list().length );
+
+        class CollectingSelector implements FileSelector
+        {
+            public Set<String> collection = new HashSet<>();
+            @Override
+            public boolean isSelected( FileInfo fileInfo ) throws IOException
+            {
+                collection.add( fileInfo.getName() );
+                return false;
+            }
+        }
+        CollectingSelector selector = new CollectingSelector();
+
+        final ZipUnArchiver unarchiver = getZipUnArchiver( new File( "src/test/resources/unicodePathExtra/efsclear.zip" ) );
+        unarchiver.setDestFile( dest );
+        unarchiver.setFileSelectors( new FileSelector[] { selector } );
+        unarchiver.extract();
+
+        assertEquals( "should not extract anything", 0, dest.list().length );
+        // a Unicode Path extra field should only be used when its CRC matches the header file name
+        assertEquals( "should use good extra fields but not bad ones",
+                new HashSet<>( Arrays.asList( "nameonly-name", "goodextra-extra", "badextra-name" ) ),
+                selector.collection );
+    }
+
     private void runUnarchiver( String path, FileSelector[] selectors, boolean[] results )
         throws Exception
     {
diff --git a/src/test/resources/unicodePathExtra/GenerateZips.java b/src/test/resources/unicodePathExtra/GenerateZips.java
new file mode 100644
index 000000000..c0c626f69
--- /dev/null
+++ b/src/test/resources/unicodePathExtra/GenerateZips.java
@@ -0,0 +1,47 @@
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.commons.compress.archivers.zip.UnicodePathExtraField;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+
+public class GenerateZips {
+	
+	private static void generate(String outfilename, boolean languageEncodingFlag) {
+		try {
+			ZipArchiveOutputStream zs = new ZipArchiveOutputStream(new File(outfilename));
+			zs.setUseLanguageEncodingFlag(languageEncodingFlag);
+			zs.setMethod(ZipArchiveOutputStream.STORED);
+			ZipArchiveEntry ze = new ZipArchiveEntry("nameonly-name");
+			ze.setTime(0);
+			zs.putArchiveEntry(ze);
+			//zs.write(nothing);
+			zs.closeArchiveEntry();
+			ze = new ZipArchiveEntry("goodextra-name");
+			ze.setTime(0);
+			ze.addExtraField(new UnicodePathExtraField("goodextra-extra", "goodextra-name".getBytes(Charset.forName("UTF-8"))));
+			zs.putArchiveEntry(ze);
+			//zs.write(nothing);
+			zs.closeArchiveEntry();
+			ze = new ZipArchiveEntry("badextra-name");
+			ze.setTime(0);
+			ze.addExtraField(new UnicodePathExtraField("badextra-extra", "bogus".getBytes(Charset.forName("UTF-8"))));
+			zs.putArchiveEntry(ze);
+			//zs.write(nothing);
+			zs.closeArchiveEntry();
+			zs.finish();
+			zs.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static void main(String[] args) {
+		generate("efsclear.zip", false);
+		// with the flag set, decoders tend to not look at the extra field
+		generate("efsset.zip", true);
+		System.out.println("done");
+	}
+
+}
diff --git a/src/test/resources/unicodePathExtra/README.md b/src/test/resources/unicodePathExtra/README.md
new file mode 100644
index 000000000..6b8f41e79
--- /dev/null
+++ b/src/test/resources/unicodePathExtra/README.md
@@ -0,0 +1,15 @@
+# Test ZIP Files for Unicode Path Extra Field
+
+These files are used to test for proper use of Unicode Path extra fields ([zip specification §4.6.9](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT)) when unarchiving zip archives.
+
+Both contain three empty files, one without a Unicode Path extra field, one with a good extra field (CRC matches the header file name), one with a stale extra field (mismatched CRC). By using different values in the header file name and the Unicode path, it can be distinguished which one was used. A compliant unarchiver will use the names marked in bold.
+
+File name in header | CRC                   | Unicode Path
+--------------------|-----------------------|--------------------
+**nameonly-name**   |
+goodextra-name      | CRC("goodextra-name") | **goodextra-extra**
+**badextra-name**   | CRC("bogus")          | badextra-extra
+
+The difference between the two archives is whether the Language Encoding Flag (EFS) is set, which indicates that the header file names are already in UTF-8. The specification is not explicit about which one wins when both the flag is set and a Unicode Path extra field is present (it only says archivers shouldn’t do that). In practice, all unarchivers I have seen (including Apache Commons Compress used by Plexus-Archiver) ignore the extra field when the flag is set, which is why only _efsclear.zip_ is useful for testing.
+
+The archives were created by the included _GenerateZips.java_ using Commons Compress.
diff --git a/src/test/resources/unicodePathExtra/efsclear.zip b/src/test/resources/unicodePathExtra/efsclear.zip
new file mode 100644
index 0000000000000000000000000000000000000000..c0aaefb6d2d19cc92ad817f0e5ea35670f4c8580
GIT binary patch
literal 484
zcmWIWW@h1H00Tt`1tWPGL>TfCb5ry4aw>H}3`Pb4m>irAz^R>&L5CqdKR+e4qNFGh
zr~c9c5e7zgWmQImQm_Wx=JGOVGbAMvG**~_@vDpH1h`75sf<h_47j`t_Y{<Y;`sn?
z+*<h<B#`vl1K9}QfDFOyN!(`gGKeFYIUUG`dj(=74iB=jfwVCL;X@$(7Q|ry08tl7
A)Bpeg

literal 0
HcmV?d00001

diff --git a/src/test/resources/unicodePathExtra/efsset.zip b/src/test/resources/unicodePathExtra/efsset.zip
new file mode 100644
index 0000000000000000000000000000000000000000..bd586de0e6c182d632cb8c63a964456e4af73b8f
GIT binary patch
literal 484
zcmWIWW@h1HVBlb2U{C~NB*4oc!jPAko0^}OQ>hDLFfs@rNkf<cIJNUJ=rE+`=clAr
zloTc6)L&X4!ocXRtjdT`3f6$zTwVrkhNMJ-#tJhqes%Gj09Oe$m61t=0hf2tJ%{4?
z0B_t{`4}XS^x6a22;YDV!R<-hX7VzKBbhlJ$cB3bVk8a^va*4+F$3X4ApI7^VE_P!
CA4&uO

literal 0
HcmV?d00001


From 1f51bb2fe909a61dd56c39e62d0a5d1e81768749 Mon Sep 17 00:00:00 2001
From: Christian Walther <walther@indel.ch>
Date: Sun, 28 Mar 2021 12:29:15 +0200
Subject: [PATCH 2/2] Fix use of a mismatching unicode path extra field in zip
 unarchiving.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The zip spec (§4.6.9) says that a unicode path extra field is only to be
used when its CRC matches the current file name. Using it unconditionally,
as introduced in a080e0a, is wrong. Commons.compress already does it right
internally, no need to do anything here.

The bug affected extracting until 2aec2ba as a side effect replaced use of
fileInfo.getName() by ze.getName(); recently only direct use of FileInfo,
e.g. in file selectors, was affected.
---
 .../archiver/zip/AbstractZipUnArchiver.java       | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java b/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java
index 103505a12..292d67a05 100644
--- a/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java
+++ b/src/main/java/org/codehaus/plexus/archiver/zip/AbstractZipUnArchiver.java
@@ -102,20 +102,7 @@ private static class ZipEntryFileInfo
 
         public String getName()
         {
-            try
-            {
-                final UnicodePathExtraField unicodePath =
-                    (UnicodePathExtraField) zipEntry.getExtraField( UnicodePathExtraField.UPATH_ID );
-
-                return unicodePath != null
-                           ? new String( unicodePath.getUnicodeName(), "UTF-8" )
-                           : zipEntry.getName();
-
-            }
-            catch ( final UnsupportedEncodingException e )
-            {
-                throw new AssertionError( e );
-            }
+            return zipEntry.getName();
         }
 
         @Override