Skip to content

Commit

Permalink
chunked: implement OSTree deduplication
Browse files Browse the repository at this point in the history
if the option ostree_repos is set:

    [storage.options]
    pull_options = {enable_partial_images = "true", ostree_repos = "/foo:/bar"}

then attempt to deduplicate from the specified list of OSTree repositories.

In order to be usable, an OSTree repository must be configured to track
the checksum for its files payload (payload link), that is disabled by
default:

ostree config --repo=/path/to/repo set core.payload-link-threshold N

Where N is the minimum size for files to be tracked by their payload
and must be a nonzero value.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
  • Loading branch information
giuseppe committed Sep 24, 2021
1 parent dd74cc2 commit 1aee04b
Showing 1 changed file with 76 additions and 10 deletions.
86 changes: 76 additions & 10 deletions pkg/chunked/storage_linux.go
Expand Up @@ -345,6 +345,56 @@ func getFileDigest(f *os.File) (digest.Digest, error) {
return digester.Digest(), nil
}

// findFileInOSTreeRepos checks whether the requested file already exist in one of the OSTree repo and copies the file content from there if possible.
// file is the file to look for.
// ostreeRepos is a list of OSTree repos.
// dirfd is an open fd to the destination checkout.
// useHardLinks defines whether the deduplication can be performed using hard links.
func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
digest, err := digest.Parse(file.Digest)
if err != nil {
return false, nil, 0, nil
}
payloadLink := digest.Encoded() + ".payload-link"
if len(payloadLink) < 2 {
return false, nil, 0, nil
}

for _, repo := range ostreeRepos {
sourceFile := filepath.Join(repo, "objects", payloadLink[:2], payloadLink[2:])
st, err := os.Stat(sourceFile)
if err != nil || !st.Mode().IsRegular() {
continue
}
if st.Size() != file.Size {
continue
}
fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0)
if err != nil {
return false, nil, 0, nil
}
f := os.NewFile(uintptr(fd), "fd")
defer f.Close()

// check if the open file can be deduplicated with hard links
if useHardLinks && !canDedupFileWithHardLink(file, fd, st) {
continue
}

dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks)
if err != nil {
return false, nil, 0, nil
}
return true, dstFile, written, nil
}
// If hard links deduplication was used and it has failed, try again without hard links.
if useHardLinks {
return findFileInOSTreeRepos(file, ostreeRepos, dirfd, false)
}

return false, nil, 0, nil
}

// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible.
// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different
// paths.
Expand Down Expand Up @@ -873,6 +923,9 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
// modifies the source file as well.
useHardLinks := parseBooleanPullOption(&storeOpts, "use_hard_links", false)

// List of OSTree repositories to use for deduplication
ostreeRepos := strings.Split(storeOpts.PullOptions["ostree_repos"], ":")

// Generate the manifest
var toc internal.TOC
if err := json.Unmarshal(c.manifest, &toc); err != nil {
Expand Down Expand Up @@ -1009,18 +1062,35 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra

totalChunksSize += r.Size

finalizeFile := func(dstFile *os.File) error {
if dstFile != nil {
defer dstFile.Close()
if err := setFileAttrs(dstFile, mode, &r, options); err != nil {
return err
}
}
return nil
}

found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
if err != nil {
return output, err
}
if dstFile != nil {
if err := setFileAttrs(dstFile, mode, &r, options); err != nil {
dstFile.Close()
if found {
if err := finalizeFile(dstFile); err != nil {
return output, err
}
dstFile.Close()
continue
}

found, dstFile, _, err = findFileInOSTreeRepos(&r, ostreeRepos, dirfd, useHardLinks)
if err != nil {
return output, err
}
if found {
if err := finalizeFile(dstFile); err != nil {
return output, err
}
continue
}

Expand All @@ -1029,14 +1099,10 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
if err != nil {
return output, err
}
if dstFile != nil {
if err := setFileAttrs(dstFile, mode, &r, options); err != nil {
dstFile.Close()
if found {
if err := finalizeFile(dstFile); err != nil {
return output, err
}
dstFile.Close()
}
if found {
continue
}
}
Expand Down

0 comments on commit 1aee04b

Please sign in to comment.