Skip to content

Commit

Permalink
zip: Merge upstream (#631)
Browse files Browse the repository at this point in the history
* zip: Merge upstream

Add Go 1.19 improvements.
  • Loading branch information
klauspost committed Jun 27, 2022
1 parent 9bbb415 commit 51e1025
Show file tree
Hide file tree
Showing 10 changed files with 494 additions and 64 deletions.
84 changes: 84 additions & 0 deletions zip/fuzz_test.go
@@ -0,0 +1,84 @@
//go:build go1.18
// +build go1.18

// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package zip

import (
"bytes"
"io"
"os"
"path/filepath"
"testing"
)

func FuzzReader(f *testing.F) {
testdata, err := os.ReadDir("testdata")
if err != nil {
f.Fatalf("failed to read testdata directory: %s", err)
}
for _, de := range testdata {
if de.IsDir() {
continue
}
b, err := os.ReadFile(filepath.Join("testdata", de.Name()))
if err != nil {
f.Fatalf("failed to read testdata: %s", err)
}
f.Add(b)
}

f.Fuzz(func(t *testing.T, b []byte) {
r, err := NewReader(bytes.NewReader(b), int64(len(b)))
if err != nil {
return
}

type file struct {
header *FileHeader
content []byte
}
files := []file{}

for _, f := range r.File {
fr, err := f.Open()
if err != nil {
continue
}
content, err := io.ReadAll(fr)
if err != nil {
continue
}
files = append(files, file{header: &f.FileHeader, content: content})
if _, err := r.Open(f.Name); err != nil {
continue
}
}

// If we were unable to read anything out of the archive don't
// bother trying to roundtrip it.
if len(files) == 0 {
return
}

w := NewWriter(io.Discard)
for _, f := range files {
ww, err := w.CreateHeader(f.header)
if err != nil {
t.Fatalf("unable to write previously parsed header: %s", err)
}
if _, err := ww.Write(f.content); err != nil {
t.Fatalf("unable to write previously parsed content: %s", err)
}
}

if err := w.Close(); err != nil {
t.Fatalf("Unable to write archive: %s", err)
}

// TODO: We may want to check if the archive roundtrips.
})
}
122 changes: 92 additions & 30 deletions zip/reader.go
Expand Up @@ -33,6 +33,10 @@ type Reader struct {
Comment string
decompressors map[uint16]Decompressor

// Some JAR files are zip files with a prefix that is a bash script.
// The baseOffset field is the start of the zip file proper.
baseOffset int64

// fileList is a list of files sorted by ename,
// for use by the Open method.
fileListOnce sync.Once
Expand All @@ -52,9 +56,8 @@ type File struct {
FileHeader
zip *Reader
zipr io.ReaderAt
headerOffset int64
headerOffset int64 // includes overall ZIP archive baseOffset
zip64 bool // zip64 extended information extra field presence
descErr error // error reading the data descriptor during init
}

// OpenReader will open the Zip file specified by name and return a ReadCloser.
Expand Down Expand Up @@ -91,23 +94,24 @@ func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
}

func (z *Reader) init(r io.ReaderAt, size int64) error {
end, err := readDirectoryEnd(r, size)
end, baseOffset, err := readDirectoryEnd(r, size)
if err != nil {
return err
}
z.r = r
z.baseOffset = baseOffset
// Since the number of directory records is not validated, it is not
// safe to preallocate z.File without first checking that the specified
// number of files is reasonable, since a malformed archive may
// indicate it contains up to 1 << 128 - 1 files. Since each file has a
// header which will be _at least_ 30 bytes we can safely preallocate
// if (data size / 30) >= end.directoryRecords.
if (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
z.File = make([]*File, 0, end.directoryRecords)
}
z.Comment = end.comment
rs := io.NewSectionReader(r, 0, size)
if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil {
return err
}
buf := bufio.NewReader(rs)
Expand All @@ -119,12 +123,27 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
for {
f := &File{zip: z, zipr: r}
err = readDirectoryHeader(f, buf)

// For compatibility with other zip programs,
// if we have a non-zero base offset and can't read
// the first directory header, try again with a zero
// base offset.
if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 {
z.baseOffset = 0
if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
return err
}
buf.Reset(rs)
continue
}

if err == ErrFormat || err == io.ErrUnexpectedEOF {
break
}
if err != nil {
return err
}
f.headerOffset += z.baseOffset
z.File = append(z.File, f)
}
if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
Expand Down Expand Up @@ -229,6 +248,9 @@ func (r *checksumReader) Read(b []byte) (n int, err error) {
n, err = r.rc.Read(b)
r.hash.Write(b[:n])
r.nread += uint64(n)
if r.nread > r.f.UncompressedSize64 {
return 0, ErrFormat
}
if err == nil {
return
}
Expand Down Expand Up @@ -492,7 +514,7 @@ func readDataDescriptor(r io.Reader, f *File) error {
return nil
}

func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
var directoryEndOffset int64
Expand All @@ -502,15 +524,15 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error)
}
buf = make([]byte, int(bLen))
if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
return nil, err
return nil, 0, err
}
if p := findSignatureInBlock(buf); p >= 0 {
buf = buf[p:]
directoryEndOffset = size - bLen + int64(p)
break
}
if i == 1 || bLen == size {
return nil, ErrFormat
return nil, 0, ErrFormat
}
}

Expand All @@ -527,25 +549,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error)
}
l := int(d.commentLen)
if l > len(b) {
return nil, errors.New("zip: invalid comment length")
return nil, 0, errors.New("zip: invalid comment length")
}
d.comment = string(b[:l])

// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
p, err := findDirectory64End(r, directoryEndOffset)
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End(r, p, d)
}
if err != nil {
return nil, err
return nil, 0, err
}
}

baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset)

// Make sure directoryOffset points to somewhere in our file.
if o := int64(d.directoryOffset); o < 0 || o >= size {
return nil, ErrFormat
if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size {
return nil, 0, ErrFormat
}
return d, nil
return d, baseOffset, nil
}

// findDirectory64End tries to read the zip64 locator just before the
Expand Down Expand Up @@ -650,18 +676,22 @@ type fileListEntry struct {
name string
file *File
isDir bool
isDup bool
}

type fileInfoDirEntry interface {
fs.FileInfo
fs.DirEntry
}

func (e *fileListEntry) stat() fileInfoDirEntry {
func (e *fileListEntry) stat() (fileInfoDirEntry, error) {
if e.isDup {
return nil, errors.New(e.name + ": duplicate entries in zip file")
}
if !e.isDir {
return headerFileInfo{&e.file.FileHeader}
return headerFileInfo{&e.file.FileHeader}, nil
}
return e
return e, nil
}

// Only used for directories.
Expand Down Expand Up @@ -696,32 +726,61 @@ func toValidName(name string) string {

func (r *Reader) initFileList() {
r.fileListOnce.Do(func() {
// files and knownDirs map from a file/directory name
// to an index into the r.fileList entry that we are
// building. They are used to mark duplicate entries.
files := make(map[string]int)
knownDirs := make(map[string]int)

// dirs[name] is true if name is known to be a directory,
// because it appears as a prefix in a path.
dirs := make(map[string]bool)
knownDirs := make(map[string]bool)

for _, file := range r.File {
isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
name := toValidName(file.Name)
if name == "" {
continue
}

if idx, ok := files[name]; ok {
r.fileList[idx].isDup = true
continue
}
if idx, ok := knownDirs[name]; ok {
r.fileList[idx].isDup = true
continue
}

for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
dirs[dir] = true
}

idx := len(r.fileList)
entry := fileListEntry{
name: name,
file: file,
isDir: isDir,
}
r.fileList = append(r.fileList, entry)
if isDir {
knownDirs[name] = true
knownDirs[name] = idx
} else {
files[name] = idx
}
}
for dir := range dirs {
if !knownDirs[dir] {
entry := fileListEntry{
name: dir,
file: nil,
isDir: true,
if _, ok := knownDirs[dir]; !ok {
if idx, ok := files[dir]; ok {
r.fileList[idx].isDup = true
} else {
entry := fileListEntry{
name: dir,
file: nil,
isDir: true,
}
r.fileList = append(r.fileList, entry)
}
r.fileList = append(r.fileList, entry)
}
}

Expand All @@ -740,12 +799,11 @@ func fileEntryLess(x, y string) bool {
// paths are always slash separated, with no
// leading / or ../ elements.
func (r *Reader) Open(name string) (fs.File, error) {
r.initFileList()

if !fs.ValidPath(name) {
return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
}

r.initFileList()

e := r.openLookup(name)
if e == nil {
return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
Expand All @@ -761,7 +819,7 @@ func (r *Reader) Open(name string) (fs.File, error) {
}

func split(name string) (dir, elem string, isDir bool) {
if name[len(name)-1] == '/' {
if len(name) > 0 && name[len(name)-1] == '/' {
isDir = true
name = name[:len(name)-1]
}
Expand Down Expand Up @@ -817,7 +875,7 @@ type openDir struct {
}

func (d *openDir) Close() error { return nil }
func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil }
func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() }

func (d *openDir) Read([]byte) (int, error) {
return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
Expand All @@ -836,7 +894,11 @@ func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
}
list := make([]fs.DirEntry, n)
for i := range list {
list[i] = d.files[d.offset+i].stat()
s, err := d.files[d.offset+i].stat()
if err != nil {
return nil, err
}
list[i] = s
}
d.offset += n
return list, nil
Expand Down

0 comments on commit 51e1025

Please sign in to comment.