From da810275bf682d29a530ed819aff175f47bd7634 Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 2 Jun 2021 10:03:28 +0100 Subject: [PATCH] Revert "plumbing: format/packfile, prevent large objects from being read into memory completely (#303)" (#329) This reverts commit 720c192831a890d0a36b4c6720b60411fa4a0159. --- plumbing/format/packfile/fsobject.go | 15 -- plumbing/format/packfile/packfile.go | 73 -------- plumbing/format/packfile/patch_delta.go | 210 ------------------------ plumbing/format/packfile/scanner.go | 15 -- storage/filesystem/dotgit/reader.go | 79 --------- storage/filesystem/object.go | 9 +- utils/ioutil/common.go | 22 --- 7 files changed, 1 insertion(+), 422 deletions(-) delete mode 100644 storage/filesystem/dotgit/reader.go diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index 4aa3c8e01..c5edaf52e 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -7,7 +7,6 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/format/idxfile" - "github.com/go-git/go-git/v5/utils/ioutil" ) // FSObject is an object from the packfile on the filesystem. @@ -64,20 +63,6 @@ func (o *FSObject) Reader() (io.ReadCloser, error) { } p := NewPackfileWithCache(o.index, nil, f, o.cache) - if o.size > LargeObjectThreshold { - // We have a big object - h, err := p.objectHeaderAtOffset(o.offset) - if err != nil { - return nil, err - } - - r, err := p.getReaderDirect(h) - if err != nil { - _ = f.Close() - return nil, err - } - return ioutil.NewReadCloserWithCloser(r, f.Close), nil - } r, err := p.getObjectContent(o.offset) if err != nil { _ = f.Close() diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 77861d257..ddd7f62fc 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -32,12 +32,6 @@ var ( // wrapped in FSObject. const smallObjectThreshold = 16 * 1024 -// Conversely there are large objects that should not be cached and kept -// in memory as they're too large to be reasonably cached. Objects larger -// than this threshold are now always never read into memory to be stored -// in the cache -const LargeObjectThreshold = 1024 * 1024 - // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index @@ -288,37 +282,6 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return obj.Reader() } -func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) { - switch h.Type { - case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - return p.s.ReadObject() - case plumbing.REFDeltaObject: - deltaRC, err := p.s.ReadObject() - if err != nil { - return nil, err - } - r, err := p.readREFDeltaObjectContent(h, deltaRC) - if err != nil { - _ = deltaRC.Close() - return nil, err - } - return r, nil - case plumbing.OFSDeltaObject: - deltaRC, err := p.s.ReadObject() - if err != nil { - return nil, err - } - r, err := p.readOFSDeltaObjectContent(h, deltaRC) - if err != nil { - _ = deltaRC.Close() - return nil, err - } - return r, nil - default: - return nil, ErrInvalidObject.AddDetails("type %q", h.Type) - } -} - func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) @@ -371,20 +334,6 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf) } -func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) { - var err error - - base, ok := p.cacheGet(h.Reference) - if !ok { - base, err = p.Get(h.Reference) - if err != nil { - return nil, err - } - } - - return ReaderFromDelta(h, base, deltaRC) -} - func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error { var err error @@ -415,28 +364,6 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf) } -func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) { - hash, err := p.FindHash(h.OffsetReference) - if err != nil { - return nil, err - } - - base, err := p.objectAtOffset(h.OffsetReference, hash) - if err != nil { - return nil, err - } - - base, ok := p.cacheGet(h.Reference) - if !ok { - base, err = p.Get(h.Reference) - if err != nil { - return nil, err - } - } - - return ReaderFromDelta(h, base, deltaRC) -} - func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { hash, err := p.FindHash(offset) if err != nil { diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index 76fef4a13..9e90f30a7 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -1,11 +1,9 @@ package packfile import ( - "bufio" "bytes" "errors" "io" - "math" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/utils/ioutil" @@ -75,131 +73,6 @@ func PatchDelta(src, delta []byte) ([]byte, error) { return b.Bytes(), nil } -func ReaderFromDelta(h *ObjectHeader, base plumbing.EncodedObject, deltaRC io.ReadCloser) (io.ReadCloser, error) { - deltaBuf := bufio.NewReaderSize(deltaRC, 1024) - srcSz, err := decodeLEB128ByteReader(deltaBuf) - if err != nil { - if err == io.EOF { - return nil, ErrInvalidDelta - } - return nil, err - } - if srcSz != uint(base.Size()) { - return nil, ErrInvalidDelta - } - - targetSz, err := decodeLEB128ByteReader(deltaBuf) - if err != nil { - if err == io.EOF { - return nil, ErrInvalidDelta - } - return nil, err - } - remainingTargetSz := targetSz - - dstRd, dstWr := io.Pipe() - - go func() { - baseRd, err := base.Reader() - if err != nil { - _ = dstWr.CloseWithError(ErrInvalidDelta) - return - } - defer baseRd.Close() - - baseBuf := bufio.NewReader(baseRd) - basePos := uint(0) - - for { - cmd, err := deltaBuf.ReadByte() - if err == io.EOF { - _ = dstWr.CloseWithError(ErrInvalidDelta) - return - } - if err != nil { - _ = dstWr.CloseWithError(err) - return - } - - if isCopyFromSrc(cmd) { - offset, err := decodeOffsetByteReader(cmd, deltaBuf) - if err != nil { - _ = dstWr.CloseWithError(err) - return - } - sz, err := decodeSizeByteReader(cmd, deltaBuf) - if err != nil { - _ = dstWr.CloseWithError(err) - return - } - - if invalidSize(sz, targetSz) || - invalidOffsetSize(offset, sz, srcSz) { - _ = dstWr.Close() - return - } - - discard := offset - basePos - if discard < 0 { - _ = baseRd.Close() - baseRd, err = base.Reader() - if err != nil { - _ = dstWr.CloseWithError(ErrInvalidDelta) - return - } - baseBuf.Reset(baseRd) - discard = offset - } - for discard > math.MaxInt32 { - n, err := baseBuf.Discard(math.MaxInt32) - if err != nil { - _ = dstWr.CloseWithError(err) - return - } - basePos += uint(n) - discard -= uint(n) - } - for discard > 0 { - n, err := baseBuf.Discard(int(discard)) - if err != nil { - _ = dstWr.CloseWithError(err) - return - } - basePos += uint(n) - discard -= uint(n) - } - if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil { - _ = dstWr.CloseWithError(err) - return - } - remainingTargetSz -= sz - basePos += sz - } else if isCopyFromDelta(cmd) { - sz := uint(cmd) // cmd is the size itself - if invalidSize(sz, targetSz) { - _ = dstWr.CloseWithError(ErrInvalidDelta) - return - } - if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil { - _ = dstWr.CloseWithError(err) - return - } - - remainingTargetSz -= sz - } else { - _ = dstWr.CloseWithError(ErrDeltaCmd) - return - } - if remainingTargetSz <= 0 { - _ = dstWr.Close() - return - } - } - }() - - return dstRd, nil -} - func patchDelta(dst *bytes.Buffer, src, delta []byte) error { if len(delta) < deltaSizeMin { return ErrInvalidDelta @@ -288,25 +161,6 @@ func decodeLEB128(input []byte) (uint, []byte) { return num, input[sz:] } -func decodeLEB128ByteReader(input io.ByteReader) (uint, error) { - var num, sz uint - for { - b, err := input.ReadByte() - if err != nil { - return 0, err - } - - num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks - sz++ - - if uint(b)&continuation == 0 { - break - } - } - - return num, nil -} - const ( payload = 0x7f // 0111 1111 continuation = 0x80 // 1000 0000 @@ -320,40 +174,6 @@ func isCopyFromDelta(cmd byte) bool { return (cmd&0x80) == 0 && cmd != 0 } -func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { - var offset uint - if (cmd & 0x01) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset = uint(next) - } - if (cmd & 0x02) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 8 - } - if (cmd & 0x04) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 16 - } - if (cmd & 0x08) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 24 - } - - return offset, nil -} - func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { var offset uint if (cmd & 0x01) != 0 { @@ -388,36 +208,6 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { return offset, delta, nil } -func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { - var sz uint - if (cmd & 0x10) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz = uint(next) - } - if (cmd & 0x20) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz |= uint(next) << 8 - } - if (cmd & 0x40) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz |= uint(next) << 16 - } - if sz == 0 { - sz = 0x10000 - } - - return sz, nil -} - func decodeSize(cmd byte, delta []byte) (uint, []byte, error) { var sz uint if (cmd & 0x10) != 0 { diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index 5d9e8fb65..6e6a68788 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -320,21 +320,6 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro return } -// ReadObject returns a reader for the object content and an error -func (s *Scanner) ReadObject() (io.ReadCloser, error) { - s.pendingObject = nil - zr := zlibReaderPool.Get().(io.ReadCloser) - - if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil { - return nil, fmt.Errorf("zlib reset error: %s", err) - } - - return ioutil.NewReadCloserWithCloser(zr, func() error { - zlibReaderPool.Put(zr) - return nil - }), nil -} - // ReadRegularObject reads and write a non-deltified object // from it zlib stream in an object entry in the packfile. func (s *Scanner) copyObject(w io.Writer) (n int64, err error) { diff --git a/storage/filesystem/dotgit/reader.go b/storage/filesystem/dotgit/reader.go deleted file mode 100644 index a82ac94eb..000000000 --- a/storage/filesystem/dotgit/reader.go +++ /dev/null @@ -1,79 +0,0 @@ -package dotgit - -import ( - "fmt" - "io" - "os" - - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/format/objfile" - "github.com/go-git/go-git/v5/utils/ioutil" -) - -var _ (plumbing.EncodedObject) = &EncodedObject{} - -type EncodedObject struct { - dir *DotGit - h plumbing.Hash - t plumbing.ObjectType - sz int64 -} - -func (e *EncodedObject) Hash() plumbing.Hash { - return e.h -} - -func (e *EncodedObject) Reader() (io.ReadCloser, error) { - f, err := e.dir.Object(e.h) - if err != nil { - if os.IsNotExist(err) { - return nil, plumbing.ErrObjectNotFound - } - - return nil, err - } - r, err := objfile.NewReader(f) - if err != nil { - return nil, err - } - - t, size, err := r.Header() - if err != nil { - _ = r.Close() - return nil, err - } - if t != e.t { - _ = r.Close() - return nil, objfile.ErrHeader - } - if size != e.sz { - _ = r.Close() - return nil, objfile.ErrHeader - } - return ioutil.NewReadCloserWithCloser(r, f.Close), nil -} - -func (e *EncodedObject) SetType(plumbing.ObjectType) {} - -func (e *EncodedObject) Type() plumbing.ObjectType { - return e.t -} - -func (e *EncodedObject) Size() int64 { - return e.sz -} - -func (e *EncodedObject) SetSize(int64) {} - -func (e *EncodedObject) Writer() (io.WriteCloser, error) { - return nil, fmt.Errorf("Not supported") -} - -func NewEncodedObject(dir *DotGit, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject { - return &EncodedObject{ - dir: dir, - h: h, - t: t, - sz: size, - } -} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 4862362fc..0c25dad61 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -389,6 +389,7 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return cacheObj, nil } + obj = s.NewEncodedObject() r, err := objfile.NewReader(f) if err != nil { return nil, err @@ -401,14 +402,6 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return nil, err } - if size > packfile.LargeObjectThreshold { - obj = dotgit.NewEncodedObject(s.dir, h, t, size) - s.objectCache.Put(obj) - return obj, nil - } - - obj = s.NewEncodedObject() - obj.SetType(t) obj.SetSize(size) w, err := obj.Writer() diff --git a/utils/ioutil/common.go b/utils/ioutil/common.go index a0e79a2e2..b52e85a38 100644 --- a/utils/ioutil/common.go +++ b/utils/ioutil/common.go @@ -55,28 +55,6 @@ func NewReadCloser(r io.Reader, c io.Closer) io.ReadCloser { return &readCloser{Reader: r, closer: c} } -type readCloserCloser struct { - io.ReadCloser - closer func() error -} - -func (r *readCloserCloser) Close() (err error) { - defer func() { - if err == nil { - err = r.closer() - return - } - _ = r.closer() - }() - return r.ReadCloser.Close() -} - -// NewReadCloserWithCloser creates an `io.ReadCloser` with the given `io.ReaderCloser` and -// `io.Closer` that ensures that the closer is closed on close -func NewReadCloserWithCloser(r io.ReadCloser, c func() error) io.ReadCloser { - return &readCloserCloser{ReadCloser: r, closer: c} -} - type writeCloser struct { io.Writer closer io.Closer