diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 4bc153bfc..d6a377202 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -82,3 +82,13 @@ jobs: ./app/crane pull --platform=linux/arm64 --format=oci $remote $distroless ./app/crane push $distroless $local diff <(./app/crane manifest --platform linux/arm64 $remote) <(./app/crane manifest $local) + + - name: crane pull image, and export it from stdin to filesystem tar to stdout + shell: bash + run: | + set -euxo pipefail + + ./app/crane pull ubuntu ubuntu.tar + ./app/crane export - - < ubuntu.tar > filesystem.tar + ls -la *.tar + diff --git a/cmd/crane/cmd/export.go b/cmd/crane/cmd/export.go index a6b8af100..4a43210d6 100644 --- a/cmd/crane/cmd/export.go +++ b/cmd/crane/cmd/export.go @@ -17,8 +17,6 @@ package cmd import ( "fmt" "io" - "io/ioutil" - "log" "os" "github.com/google/go-containerregistry/pkg/crane" @@ -55,18 +53,10 @@ func NewCmdExport(options *[]crane.Option) *cobra.Command { var img v1.Image if src == "-" { - tmpfile, err := ioutil.TempFile("", "crane") - if err != nil { - log.Fatal(err) - } - defer os.Remove(tmpfile.Name()) - - if _, err := io.Copy(tmpfile, os.Stdin); err != nil { - log.Fatal(err) + stdoutreader := func() (io.ReadCloser, error) { + return io.ReadCloser(os.Stdin), nil } - tmpfile.Close() - - img, err = tarball.ImageFromPath(tmpfile.Name(), nil) + img, err = tarball.Image(stdoutreader, nil) if err != nil { return fmt.Errorf("reading tarball from stdin: %w", err) } diff --git a/pkg/v1/mutate/mutate.go b/pkg/v1/mutate/mutate.go index 6bae22493..e0f0355bd 100644 --- a/pkg/v1/mutate/mutate.go +++ b/pkg/v1/mutate/mutate.go @@ -27,6 +27,7 @@ import ( "time" "github.com/google/go-containerregistry/internal/gzip" + "github.com/google/go-containerregistry/pkg/logs" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/empty" "github.com/google/go-containerregistry/pkg/v1/match" @@ -220,6 +221,7 @@ func CreatedAt(base v1.Image, created v1.Time) (v1.Image, error) { // If a caller doesn't read the full contents, they should Close it to free up // resources used during extraction. func Extract(img v1.Image) io.ReadCloser { + logs.Debug.Printf("mutate: extract %T", img) pr, pw := io.Pipe() go func() { diff --git a/pkg/v1/tarball/image.go b/pkg/v1/tarball/image.go index b2e44df75..544d9f9a8 100644 --- a/pkg/v1/tarball/image.go +++ b/pkg/v1/tarball/image.go @@ -28,6 +28,7 @@ import ( "sync" "github.com/google/go-containerregistry/internal/gzip" + "github.com/google/go-containerregistry/pkg/logs" "github.com/google/go-containerregistry/pkg/name" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/partial" @@ -36,6 +37,7 @@ import ( type image struct { opener Opener + tarbuf *TarBuffered // replaces Opener manifest *Manifest config []byte imgDescriptor *Descriptor @@ -88,8 +90,21 @@ func LoadManifest(opener Opener) (Manifest, error) { // Image exposes an image from the tarball at the provided path. func Image(opener Opener, tag *name.Tag) (v1.Image, error) { + f, err := opener() + if err != nil { + return nil, err + } + close := true + defer func() { + if close { + f.Close() + } + }() + tarbuf := NewTarBuffered(f) + img := &image{ opener: opener, + tarbuf: tarbuf, tag: tag, } if err := img.loadTarDescriptorAndConfig(); err != nil { @@ -161,20 +176,18 @@ func (i *image) areLayersCompressed() (bool, error) { return false, errors.New("0 layers found in image") } layer := i.imgDescriptor.Layers[0] - blob, err := extractFileFromTar(i.opener, layer) + blob, err := i.tarbuf.scanFile(layer) if err != nil { return false, err } - defer blob.Close() return gzip.Is(blob) } func (i *image) loadTarDescriptorAndConfig() error { - m, err := extractFileFromTar(i.opener, "manifest.json") + m, err := i.tarbuf.scanFile("manifest.json") if err != nil { return err } - defer m.Close() if err := json.NewDecoder(m).Decode(&i.manifest); err != nil { return err @@ -189,11 +202,10 @@ func (i *image) loadTarDescriptorAndConfig() error { return err } - cfg, err := extractFileFromTar(i.opener, i.imgDescriptor.Config) + cfg, err := i.tarbuf.scanFile(i.imgDescriptor.Config) if err != nil { return err } - defer cfg.Close() i.config, err = ioutil.ReadAll(cfg) if err != nil { @@ -206,6 +218,68 @@ func (i *image) RawConfigFile() ([]byte, error) { return i.config, nil } +// TarBuffered stores parsed entries of tar file for subsequent access +type TarBuffered struct { + tf *tar.Reader + pos int + headers map[int]*tar.Header + content map[int][]byte + EOF bool +} + +// NewTarBuffered prepares and returns struct for parsing tar stream +func NewTarBuffered(f io.Reader) *TarBuffered { + // [ ] what happens if we don't return a pointer here? + // will reference to tf be duplicated in gc? + logs.Debug.Printf("tarbuf: start with %+v", f) + return &TarBuffered{ + tf: tar.NewReader(f), + headers: make(map[int]*tar.Header), + content: make(map[int][]byte)} +} + +//func (tb *TarBuffered) append(header *tar.Header, bytes io.Reader) (, error) { +//} + +// TODO: track size of structure and remove content entries once they've been read +func (tb *TarBuffered) scanFile(filePath string) (io.Reader, error) { + // scan records that are already read + for i := 0; i < tb.pos; i++ { + if tb.headers[i].Name == filePath { + logs.Debug.Printf("tarbuf: found in buffer %v %v", i, filePath) + return bytes.NewReader(tb.content[i]), nil + } + } + // if file is not found, continue parsing + for !tb.EOF { + logs.Debug.Printf("tarbuf: scan entry %v", tb.pos) + hdr, err := tb.tf.Next() + if errors.Is(err, io.EOF) { + logs.Debug.Printf("tarbuf: EOF") + tb.EOF = true + break + } + if err != nil { + return nil, err + } + + tb.headers[tb.pos] = hdr + logs.Debug.Printf("tarbuf: %s", hdr.Name) + tb.content[tb.pos], err = io.ReadAll(tb.tf) + if err != nil { + return nil, err + } + contentidx := tb.pos + tb.pos++ + logs.Debug.Printf("tarbuf: buffered %v", len(tb.content[contentidx])) + if hdr.Name == filePath { + logs.Debug.Printf("tarbuf: found %v", filePath) + return bytes.NewReader(tb.content[contentidx]), nil + } + } + return nil, fmt.Errorf("file %s not found in tar", filePath) +} + // tarFile represents a single file inside a tar. Closing it closes the tar itself. type tarFile struct { io.Reader