From 6a416dd97c93efc39ee7861e9b75b2b27d9bf3d0 Mon Sep 17 00:00:00 2001 From: Arran Walker Date: Thu, 21 Oct 2021 10:39:05 +0100 Subject: [PATCH] archive/zip: don't read data descriptor early Go 1.17 introduced an unnecessary change to when a zip's data descriptor is read for file entries, how it is parsed and how the crc32 field is used. Before Go 1.17, the data descriptor was read immediately after a file entry's content. This continuous read is a pattern existing applications have come to rely upon (for example, where reads at specific offsets might be translated to HTTP range requests). In Go 1.17, all data descriptors are immediately read upon opening the file. This results in scattered and non-continuous reads of the archive, and depending on the underlying reader, might have severe performance implications. In addition, an additional object is now initialized for each entry, but is mostly redundant. Previously, the crc32 field in the data descriptor would return an error if it did not match the central directory's entry. This check has seemingly been unintentionally removed. If the central directory crc32 is invalid and a data descriptor is present, no error is returned. This change reverts to the previous handling of data descriptors, before CL 312310. Fixes #48374 Fixes #49089 Change-Id: I5df2878c4fcc9e500064e7175f3ab9727c82f100 Reviewed-on: https://go-review.googlesource.com/c/go/+/357489 Run-TryBot: Ian Lance Taylor Reviewed-by: Ian Lance Taylor Trust: Dmitri Shuralyov --- zip/reader.go | 94 ++++++++++------------------------ zip/reader_test.go | 122 --------------------------------------------- zip/struct.go | 8 --- 3 files changed, 27 insertions(+), 197 deletions(-) diff --git a/zip/reader.go b/zip/reader.go index 2181db2a19..5b5e569c67 100644 --- a/zip/reader.go +++ b/zip/reader.go @@ -128,7 +128,6 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { if err != nil { return err } - f.readDataDescriptor() z.File = append(z.File, f) } if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here @@ -189,10 +188,15 @@ func (f *File) Open() (io.ReadCloser, error) { return nil, ErrAlgorithm } var rc io.ReadCloser = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } rc = &checksumReader{ rc: rc, hash: crc32.NewIEEE(), f: f, + desr: desr, } return rc, nil } @@ -208,49 +212,13 @@ func (f *File) OpenRaw() (io.Reader, error) { return r, nil } -func (f *File) readDataDescriptor() { - if !f.hasDataDescriptor() { - return - } - - bodyOffset, err := f.findBodyOffset() - if err != nil { - f.descErr = err - return - } - - // In section 4.3.9.2 of the spec: "However ZIP64 format MAY be used - // regardless of the size of a file. When extracting, if the zip64 - // extended information extra field is present for the file the - // compressed and uncompressed sizes will be 8 byte values." - // - // Historically, this package has used the compressed and uncompressed - // sizes from the central directory to determine if the package is - // zip64. - // - // For this case we allow either the extra field or sizes to determine - // the data descriptor length. - zip64 := f.zip64 || f.isZip64() - n := int64(dataDescriptorLen) - if zip64 { - n = dataDescriptor64Len - } - size := int64(f.CompressedSize64) - r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, n) - dd, err := readDataDescriptor(r, zip64) - if err != nil { - f.descErr = err - return - } - f.CRC32 = dd.crc32 -} - type checksumReader struct { rc io.ReadCloser hash hash.Hash32 nread uint64 // number of bytes read so far f *File - err error // sticky error + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error } func (r *checksumReader) Stat() (fs.FileInfo, error) { @@ -271,12 +239,12 @@ func (r *checksumReader) Read(b []byte) (n int, err error) { if r.nread != r.f.UncompressedSize64 { return 0, io.ErrUnexpectedEOF } - if r.f.hasDataDescriptor() { - if r.f.descErr != nil { - if r.f.descErr == io.EOF { + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + if err1 == io.EOF { err = io.ErrUnexpectedEOF } else { - err = r.f.descErr + err = err1 } } else if r.hash.Sum32() != r.f.CRC32 { err = ErrChecksum @@ -488,10 +456,8 @@ parseExtras: return nil } -func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) { - // Create enough space for the largest possible size - var buf [dataDescriptor64Len]byte - +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte // The spec says: "Although not originally assigned a // signature, the value 0x08074b50 has commonly been adopted // as a signature value for the data descriptor record. @@ -500,9 +466,10 @@ func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) { // descriptors and should account for either case when reading // ZIP files to ensure compatibility." // - // First read just those 4 bytes to see if the signature exists. + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. if _, err := io.ReadFull(r, buf[:4]); err != nil { - return nil, err + return err } off := 0 maybeSig := readBuf(buf[:4]) @@ -511,28 +478,21 @@ func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) { // bytes. off += 4 } - - end := dataDescriptorLen - 4 - if zip64 { - end = dataDescriptor64Len - 4 + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err } - if _, err := io.ReadFull(r, buf[off:end]); err != nil { - return nil, err + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum } - b := readBuf(buf[:end]) - out := &dataDescriptor{ - crc32: b.uint32(), - } + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. - if zip64 { - out.compressedSize = b.uint64() - out.uncompressedSize = b.uint64() - } else { - out.compressedSize = uint64(b.uint32()) - out.uncompressedSize = uint64(b.uint32()) - } - return out, nil + return nil } func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { diff --git a/zip/reader_test.go b/zip/reader_test.go index 75ee6e7f7d..8c3654ede1 100644 --- a/zip/reader_test.go +++ b/zip/reader_test.go @@ -1208,128 +1208,6 @@ func TestCVE202127919(t *testing.T) { } } -func TestReadDataDescriptor(t *testing.T) { - tests := []struct { - desc string - in []byte - zip64 bool - want *dataDescriptor - wantErr error - }{{ - desc: "valid 32 bit with signature", - in: []byte{ - 0x50, 0x4b, 0x07, 0x08, // signature - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, // compressed size - 0x08, 0x09, 0x0a, 0x0b, // uncompressed size - }, - want: &dataDescriptor{ - crc32: 0x03020100, - compressedSize: 0x07060504, - uncompressedSize: 0x0b0a0908, - }, - }, { - desc: "valid 32 bit without signature", - in: []byte{ - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, // compressed size - 0x08, 0x09, 0x0a, 0x0b, // uncompressed size - }, - want: &dataDescriptor{ - crc32: 0x03020100, - compressedSize: 0x07060504, - uncompressedSize: 0x0b0a0908, - }, - }, { - desc: "valid 64 bit with signature", - in: []byte{ - 0x50, 0x4b, 0x07, 0x08, // signature - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size - 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, // uncompressed size - }, - zip64: true, - want: &dataDescriptor{ - crc32: 0x03020100, - compressedSize: 0x0b0a090807060504, - uncompressedSize: 0x131211100f0e0d0c, - }, - }, { - desc: "valid 64 bit without signature", - in: []byte{ - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size - 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, // uncompressed size - }, - zip64: true, - want: &dataDescriptor{ - crc32: 0x03020100, - compressedSize: 0x0b0a090807060504, - uncompressedSize: 0x131211100f0e0d0c, - }, - }, { - desc: "invalid 32 bit with signature", - in: []byte{ - 0x50, 0x4b, 0x07, 0x08, // signature - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, // unexpected end - }, - wantErr: io.ErrUnexpectedEOF, - }, { - desc: "invalid 32 bit without signature", - in: []byte{ - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, // unexpected end - }, - wantErr: io.ErrUnexpectedEOF, - }, { - desc: "invalid 64 bit with signature", - in: []byte{ - 0x50, 0x4b, 0x07, 0x08, // signature - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size - 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, // unexpected end - }, - zip64: true, - wantErr: io.ErrUnexpectedEOF, - }, { - desc: "invalid 64 bit without signature", - in: []byte{ - 0x00, 0x01, 0x02, 0x03, // crc32 - 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size - 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, // unexpected end - }, - zip64: true, - wantErr: io.ErrUnexpectedEOF, - }} - - for _, test := range tests { - t.Run(test.desc, func(t *testing.T) { - r := bytes.NewReader(test.in) - - desc, err := readDataDescriptor(r, test.zip64) - if err != test.wantErr { - t.Fatalf("got err %v; want nil", err) - } - if test.want == nil { - return - } - if desc == nil { - t.Fatalf("got nil DataDescriptor; want non-nil") - } - if desc.crc32 != test.want.crc32 { - t.Errorf("got CRC32 %#x; want %#x", desc.crc32, test.want.crc32) - } - if desc.compressedSize != test.want.compressedSize { - t.Errorf("got CompressedSize %#x; want %#x", desc.compressedSize, test.want.compressedSize) - } - if desc.uncompressedSize != test.want.uncompressedSize { - t.Errorf("got UncompressedSize %#x; want %#x", desc.uncompressedSize, test.want.uncompressedSize) - } - }) - } -} - func TestCVE202133196(t *testing.T) { // Archive that indicates it has 1 << 128 -1 files, // this would previously cause a panic due to attempting diff --git a/zip/struct.go b/zip/struct.go index 66829a3f4a..69e2be810a 100644 --- a/zip/struct.go +++ b/zip/struct.go @@ -393,11 +393,3 @@ func unixModeToFileMode(m uint32) fs.FileMode { } return mode } - -// dataDescriptor holds the data descriptor that optionally follows the file -// contents in the zip file. -type dataDescriptor struct { - crc32 uint32 - compressedSize uint64 - uncompressedSize uint64 -}