diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index c5edaf52e..4aa3c8e01 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -7,6 +7,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/format/idxfile" + "github.com/go-git/go-git/v5/utils/ioutil" ) // FSObject is an object from the packfile on the filesystem. @@ -63,6 +64,20 @@ func (o *FSObject) Reader() (io.ReadCloser, error) { } p := NewPackfileWithCache(o.index, nil, f, o.cache) + if o.size > LargeObjectThreshold { + // We have a big object + h, err := p.objectHeaderAtOffset(o.offset) + if err != nil { + return nil, err + } + + r, err := p.getReaderDirect(h) + if err != nil { + _ = f.Close() + return nil, err + } + return ioutil.NewReadCloserWithCloser(r, f.Close), nil + } r, err := p.getObjectContent(o.offset) if err != nil { _ = f.Close() diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index ddd7f62fc..77861d257 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -32,6 +32,12 @@ var ( // wrapped in FSObject. const smallObjectThreshold = 16 * 1024 +// Conversely there are large objects that should not be cached and kept +// in memory as they're too large to be reasonably cached. Objects larger +// than this threshold are now always never read into memory to be stored +// in the cache +const LargeObjectThreshold = 1024 * 1024 + // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index @@ -282,6 +288,37 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return obj.Reader() } +func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return p.s.ReadObject() + case plumbing.REFDeltaObject: + deltaRC, err := p.s.ReadObject() + if err != nil { + return nil, err + } + r, err := p.readREFDeltaObjectContent(h, deltaRC) + if err != nil { + _ = deltaRC.Close() + return nil, err + } + return r, nil + case plumbing.OFSDeltaObject: + deltaRC, err := p.s.ReadObject() + if err != nil { + return nil, err + } + r, err := p.readOFSDeltaObjectContent(h, deltaRC) + if err != nil { + _ = deltaRC.Close() + return nil, err + } + return r, nil + default: + return nil, ErrInvalidObject.AddDetails("type %q", h.Type) + } +} + func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) @@ -334,6 +371,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf) } +func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) { + var err error + + base, ok := p.cacheGet(h.Reference) + if !ok { + base, err = p.Get(h.Reference) + if err != nil { + return nil, err + } + } + + return ReaderFromDelta(h, base, deltaRC) +} + func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error { var err error @@ -364,6 +415,28 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf) } +func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) { + hash, err := p.FindHash(h.OffsetReference) + if err != nil { + return nil, err + } + + base, err := p.objectAtOffset(h.OffsetReference, hash) + if err != nil { + return nil, err + } + + base, ok := p.cacheGet(h.Reference) + if !ok { + base, err = p.Get(h.Reference) + if err != nil { + return nil, err + } + } + + return ReaderFromDelta(h, base, deltaRC) +} + func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { hash, err := p.FindHash(offset) if err != nil { diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index 9e90f30a7..76fef4a13 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -1,9 +1,11 @@ package packfile import ( + "bufio" "bytes" "errors" "io" + "math" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/utils/ioutil" @@ -73,6 +75,131 @@ func PatchDelta(src, delta []byte) ([]byte, error) { return b.Bytes(), nil } +func ReaderFromDelta(h *ObjectHeader, base plumbing.EncodedObject, deltaRC io.ReadCloser) (io.ReadCloser, error) { + deltaBuf := bufio.NewReaderSize(deltaRC, 1024) + srcSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return nil, ErrInvalidDelta + } + return nil, err + } + if srcSz != uint(base.Size()) { + return nil, ErrInvalidDelta + } + + targetSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return nil, ErrInvalidDelta + } + return nil, err + } + remainingTargetSz := targetSz + + dstRd, dstWr := io.Pipe() + + go func() { + baseRd, err := base.Reader() + if err != nil { + _ = dstWr.CloseWithError(ErrInvalidDelta) + return + } + defer baseRd.Close() + + baseBuf := bufio.NewReader(baseRd) + basePos := uint(0) + + for { + cmd, err := deltaBuf.ReadByte() + if err == io.EOF { + _ = dstWr.CloseWithError(ErrInvalidDelta) + return + } + if err != nil { + _ = dstWr.CloseWithError(err) + return + } + + if isCopyFromSrc(cmd) { + offset, err := decodeOffsetByteReader(cmd, deltaBuf) + if err != nil { + _ = dstWr.CloseWithError(err) + return + } + sz, err := decodeSizeByteReader(cmd, deltaBuf) + if err != nil { + _ = dstWr.CloseWithError(err) + return + } + + if invalidSize(sz, targetSz) || + invalidOffsetSize(offset, sz, srcSz) { + _ = dstWr.Close() + return + } + + discard := offset - basePos + if discard < 0 { + _ = baseRd.Close() + baseRd, err = base.Reader() + if err != nil { + _ = dstWr.CloseWithError(ErrInvalidDelta) + return + } + baseBuf.Reset(baseRd) + discard = offset + } + for discard > math.MaxInt32 { + n, err := baseBuf.Discard(math.MaxInt32) + if err != nil { + _ = dstWr.CloseWithError(err) + return + } + basePos += uint(n) + discard -= uint(n) + } + for discard > 0 { + n, err := baseBuf.Discard(int(discard)) + if err != nil { + _ = dstWr.CloseWithError(err) + return + } + basePos += uint(n) + discard -= uint(n) + } + if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil { + _ = dstWr.CloseWithError(err) + return + } + remainingTargetSz -= sz + basePos += sz + } else if isCopyFromDelta(cmd) { + sz := uint(cmd) // cmd is the size itself + if invalidSize(sz, targetSz) { + _ = dstWr.CloseWithError(ErrInvalidDelta) + return + } + if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil { + _ = dstWr.CloseWithError(err) + return + } + + remainingTargetSz -= sz + } else { + _ = dstWr.CloseWithError(ErrDeltaCmd) + return + } + if remainingTargetSz <= 0 { + _ = dstWr.Close() + return + } + } + }() + + return dstRd, nil +} + func patchDelta(dst *bytes.Buffer, src, delta []byte) error { if len(delta) < deltaSizeMin { return ErrInvalidDelta @@ -161,6 +288,25 @@ func decodeLEB128(input []byte) (uint, []byte) { return num, input[sz:] } +func decodeLEB128ByteReader(input io.ByteReader) (uint, error) { + var num, sz uint + for { + b, err := input.ReadByte() + if err != nil { + return 0, err + } + + num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks + sz++ + + if uint(b)&continuation == 0 { + break + } + } + + return num, nil +} + const ( payload = 0x7f // 0111 1111 continuation = 0x80 // 1000 0000 @@ -174,6 +320,40 @@ func isCopyFromDelta(cmd byte) bool { return (cmd&0x80) == 0 && cmd != 0 } +func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { + var offset uint + if (cmd & 0x01) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset = uint(next) + } + if (cmd & 0x02) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset |= uint(next) << 8 + } + if (cmd & 0x04) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset |= uint(next) << 16 + } + if (cmd & 0x08) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset |= uint(next) << 24 + } + + return offset, nil +} + func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { var offset uint if (cmd & 0x01) != 0 { @@ -208,6 +388,36 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { return offset, delta, nil } +func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { + var sz uint + if (cmd & 0x10) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + sz = uint(next) + } + if (cmd & 0x20) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + sz |= uint(next) << 8 + } + if (cmd & 0x40) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + sz |= uint(next) << 16 + } + if sz == 0 { + sz = 0x10000 + } + + return sz, nil +} + func decodeSize(cmd byte, delta []byte) (uint, []byte, error) { var sz uint if (cmd & 0x10) != 0 { diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index 6e6a68788..5d9e8fb65 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -320,6 +320,21 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro return } +// ReadObject returns a reader for the object content and an error +func (s *Scanner) ReadObject() (io.ReadCloser, error) { + s.pendingObject = nil + zr := zlibReaderPool.Get().(io.ReadCloser) + + if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil { + return nil, fmt.Errorf("zlib reset error: %s", err) + } + + return ioutil.NewReadCloserWithCloser(zr, func() error { + zlibReaderPool.Put(zr) + return nil + }), nil +} + // ReadRegularObject reads and write a non-deltified object // from it zlib stream in an object entry in the packfile. func (s *Scanner) copyObject(w io.Writer) (n int64, err error) { diff --git a/storage/filesystem/dotgit/reader.go b/storage/filesystem/dotgit/reader.go new file mode 100644 index 000000000..a82ac94eb --- /dev/null +++ b/storage/filesystem/dotgit/reader.go @@ -0,0 +1,79 @@ +package dotgit + +import ( + "fmt" + "io" + "os" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/format/objfile" + "github.com/go-git/go-git/v5/utils/ioutil" +) + +var _ (plumbing.EncodedObject) = &EncodedObject{} + +type EncodedObject struct { + dir *DotGit + h plumbing.Hash + t plumbing.ObjectType + sz int64 +} + +func (e *EncodedObject) Hash() plumbing.Hash { + return e.h +} + +func (e *EncodedObject) Reader() (io.ReadCloser, error) { + f, err := e.dir.Object(e.h) + if err != nil { + if os.IsNotExist(err) { + return nil, plumbing.ErrObjectNotFound + } + + return nil, err + } + r, err := objfile.NewReader(f) + if err != nil { + return nil, err + } + + t, size, err := r.Header() + if err != nil { + _ = r.Close() + return nil, err + } + if t != e.t { + _ = r.Close() + return nil, objfile.ErrHeader + } + if size != e.sz { + _ = r.Close() + return nil, objfile.ErrHeader + } + return ioutil.NewReadCloserWithCloser(r, f.Close), nil +} + +func (e *EncodedObject) SetType(plumbing.ObjectType) {} + +func (e *EncodedObject) Type() plumbing.ObjectType { + return e.t +} + +func (e *EncodedObject) Size() int64 { + return e.sz +} + +func (e *EncodedObject) SetSize(int64) {} + +func (e *EncodedObject) Writer() (io.WriteCloser, error) { + return nil, fmt.Errorf("Not supported") +} + +func NewEncodedObject(dir *DotGit, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject { + return &EncodedObject{ + dir: dir, + h: h, + t: t, + sz: size, + } +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 0c25dad61..4862362fc 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -389,7 +389,6 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return cacheObj, nil } - obj = s.NewEncodedObject() r, err := objfile.NewReader(f) if err != nil { return nil, err @@ -402,6 +401,14 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return nil, err } + if size > packfile.LargeObjectThreshold { + obj = dotgit.NewEncodedObject(s.dir, h, t, size) + s.objectCache.Put(obj) + return obj, nil + } + + obj = s.NewEncodedObject() + obj.SetType(t) obj.SetSize(size) w, err := obj.Writer() diff --git a/utils/ioutil/common.go b/utils/ioutil/common.go index b52e85a38..a0e79a2e2 100644 --- a/utils/ioutil/common.go +++ b/utils/ioutil/common.go @@ -55,6 +55,28 @@ func NewReadCloser(r io.Reader, c io.Closer) io.ReadCloser { return &readCloser{Reader: r, closer: c} } +type readCloserCloser struct { + io.ReadCloser + closer func() error +} + +func (r *readCloserCloser) Close() (err error) { + defer func() { + if err == nil { + err = r.closer() + return + } + _ = r.closer() + }() + return r.ReadCloser.Close() +} + +// NewReadCloserWithCloser creates an `io.ReadCloser` with the given `io.ReaderCloser` and +// `io.Closer` that ensures that the closer is closed on close +func NewReadCloserWithCloser(r io.ReadCloser, c func() error) io.ReadCloser { + return &readCloserCloser{ReadCloser: r, closer: c} +} + type writeCloser struct { io.Writer closer io.Closer