Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plumbing: format/packfile, prevent large objects from being read into memory completely #330

Merged
merged 6 commits into from Jun 30, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 15 additions & 0 deletions plumbing/format/packfile/fsobject.go
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/format/idxfile"
"github.com/go-git/go-git/v5/utils/ioutil"
)

// FSObject is an object from the packfile on the filesystem.
Expand Down Expand Up @@ -63,6 +64,20 @@ func (o *FSObject) Reader() (io.ReadCloser, error) {
}

p := NewPackfileWithCache(o.index, nil, f, o.cache)
if o.size > LargeObjectThreshold {
// We have a big object
h, err := p.objectHeaderAtOffset(o.offset)
if err != nil {
return nil, err
}

r, err := p.getReaderDirect(h)
if err != nil {
_ = f.Close()
return nil, err
}
return ioutil.NewReadCloserWithCloser(r, f.Close), nil
}
r, err := p.getObjectContent(o.offset)
if err != nil {
_ = f.Close()
Expand Down
80 changes: 80 additions & 0 deletions plumbing/format/packfile/packfile.go
Expand Up @@ -2,6 +2,8 @@ package packfile

import (
"bytes"
"compress/zlib"
"fmt"
"io"
"os"

Expand Down Expand Up @@ -32,6 +34,12 @@ var (
// wrapped in FSObject.
const smallObjectThreshold = 16 * 1024

// Conversely there are large objects that should not be cached and kept
// in memory as they're too large to be reasonably cached. Objects larger
// than this threshold are now always never read into memory to be stored
// in the cache
const LargeObjectThreshold = 1024 * 1024

// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
Expand Down Expand Up @@ -282,6 +290,50 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
return obj.Reader()
}

func asyncReader(p *Packfile) (io.ReadCloser, error) {
reader := ioutil.NewReaderUsingReaderAt(p.file, p.s.r.offset)
zr := zlibReaderPool.Get().(io.ReadCloser)

if err := zr.(zlib.Resetter).Reset(reader, nil); err != nil {
return nil, fmt.Errorf("zlib reset error: %s", err)
}

return ioutil.NewReadCloserWithCloser(zr, func() error {
zlibReaderPool.Put(zr)
return nil
}), nil

}

func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) {
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
return asyncReader(p)
case plumbing.REFDeltaObject:
deltaRc, err := asyncReader(p)
if err != nil {
return nil, err
}
r, err := p.readREFDeltaObjectContent(h, deltaRc)
if err != nil {
return nil, err
}
return r, nil
case plumbing.OFSDeltaObject:
deltaRc, err := asyncReader(p)
Copy link
Contributor Author

@zeripath zeripath Jun 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR differs from the previous #303 here by wrapping the reader in a reader at - so as to prevent problems from the scanner moving on etc.

Copy link
Contributor Author

@zeripath zeripath Jun 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was also a bug here in the original #303 that this fixes.

if err != nil {
return nil, err
}
r, err := p.readOFSDeltaObjectContent(h, deltaRc)
if err != nil {
return nil, err
}
return r, nil
default:
return nil, ErrInvalidObject.AddDetails("type %q", h.Type)
}
}

func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
var obj = new(plumbing.MemoryObject)
obj.SetSize(h.Length)
Expand Down Expand Up @@ -334,6 +386,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
}

func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) {
var err error

base, ok := p.cacheGet(h.Reference)
if !ok {
base, err = p.Get(h.Reference)
if err != nil {
return nil, err
}
}

return ReaderFromDelta(h, base, deltaRC)
}

func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
var err error

Expand Down Expand Up @@ -364,6 +430,20 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
}

func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) {
hash, err := p.FindHash(h.OffsetReference)
if err != nil {
return nil, err
}

base, err := p.objectAtOffset(h.OffsetReference, hash)
if err != nil {
return nil, err
}

return ReaderFromDelta(h, base, deltaRC)
}

func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
hash, err := p.FindHash(offset)
if err != nil {
Expand Down
210 changes: 210 additions & 0 deletions plumbing/format/packfile/patch_delta.go
@@ -1,9 +1,11 @@
package packfile

import (
"bufio"
"bytes"
"errors"
"io"
"math"

"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/utils/ioutil"
Expand Down Expand Up @@ -73,6 +75,131 @@ func PatchDelta(src, delta []byte) ([]byte, error) {
return b.Bytes(), nil
}

func ReaderFromDelta(h *ObjectHeader, base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadCloser, error) {
deltaBuf := bufio.NewReaderSize(deltaRC, 1024)
srcSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return nil, ErrInvalidDelta
}
return nil, err
}
if srcSz != uint(base.Size()) {
return nil, ErrInvalidDelta
}

targetSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return nil, ErrInvalidDelta
}
return nil, err
}
remainingTargetSz := targetSz

dstRd, dstWr := io.Pipe()

go func() {
baseRd, err := base.Reader()
if err != nil {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
defer baseRd.Close()

baseBuf := bufio.NewReader(baseRd)
basePos := uint(0)

for {
cmd, err := deltaBuf.ReadByte()
if err == io.EOF {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
if err != nil {
_ = dstWr.CloseWithError(err)
return
}

if isCopyFromSrc(cmd) {
offset, err := decodeOffsetByteReader(cmd, deltaBuf)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
sz, err := decodeSizeByteReader(cmd, deltaBuf)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}

if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
_ = dstWr.Close()
return
}

discard := offset - basePos
if basePos > offset {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line was changed from the original PR where it was if discard < 0 {

discard here is a uint not an int64 - so checking if it's less than 0 will always fail.

This was the cause of the truncated checkouts.

_ = baseRd.Close()
baseRd, err = base.Reader()
if err != nil {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
baseBuf.Reset(baseRd)
discard = offset
}
for discard > math.MaxInt32 {
n, err := baseBuf.Discard(math.MaxInt32)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
basePos += uint(n)
discard -= uint(n)
}
for discard > 0 {
n, err := baseBuf.Discard(int(discard))
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
basePos += uint(n)
discard -= uint(n)
}
if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil {
_ = dstWr.CloseWithError(err)
return
}
remainingTargetSz -= sz
basePos += sz
} else if isCopyFromDelta(cmd) {
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil {
_ = dstWr.CloseWithError(err)
return
}

remainingTargetSz -= sz
} else {
_ = dstWr.CloseWithError(ErrDeltaCmd)
return
}
if remainingTargetSz <= 0 {
_ = dstWr.Close()
return
}
}
}()

return dstRd, nil
}

func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
if len(delta) < deltaSizeMin {
return ErrInvalidDelta
Expand Down Expand Up @@ -161,6 +288,25 @@ func decodeLEB128(input []byte) (uint, []byte) {
return num, input[sz:]
}

func decodeLEB128ByteReader(input io.ByteReader) (uint, error) {
var num, sz uint
for {
b, err := input.ReadByte()
if err != nil {
return 0, err
}

num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
sz++

if uint(b)&continuation == 0 {
break
}
}

return num, nil
}

const (
payload = 0x7f // 0111 1111
continuation = 0x80 // 1000 0000
Expand All @@ -174,6 +320,40 @@ func isCopyFromDelta(cmd byte) bool {
return (cmd&0x80) == 0 && cmd != 0
}

func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) {
var offset uint
if (cmd & 0x01) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
offset = uint(next)
}
if (cmd & 0x02) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
offset |= uint(next) << 8
}
if (cmd & 0x04) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
offset |= uint(next) << 16
}
if (cmd & 0x08) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
offset |= uint(next) << 24
}

return offset, nil
}

func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
var offset uint
if (cmd & 0x01) != 0 {
Expand Down Expand Up @@ -208,6 +388,36 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
return offset, delta, nil
}

func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) {
var sz uint
if (cmd & 0x10) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
sz = uint(next)
}
if (cmd & 0x20) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
sz |= uint(next) << 8
}
if (cmd & 0x40) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
sz |= uint(next) << 16
}
if sz == 0 {
sz = 0x10000
}

return sz, nil
}

func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
var sz uint
if (cmd & 0x10) != 0 {
Expand Down