Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IncludePatterns and ExcludePatterns options for Copy #2082

Merged
merged 15 commits into from
Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
225 changes: 167 additions & 58 deletions cache/contenthash/checksum.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ import (
"os"
"path"
"path/filepath"
"strings"
"sync"

"github.com/docker/docker/pkg/fileutils"
"github.com/docker/docker/pkg/idtools"
iradix "github.com/hashicorp/go-immutable-radix"
"github.com/hashicorp/golang-lru/simplelru"
Expand Down Expand Up @@ -45,12 +47,15 @@ func getDefaultManager() *cacheManager {
// header, "/dir" is for contents. For the root node "" (empty string) is the
// key for root, "/" for the root header

func Checksum(ctx context.Context, ref cache.ImmutableRef, path string, followLinks bool, s session.Group) (digest.Digest, error) {
return getDefaultManager().Checksum(ctx, ref, path, followLinks, s)
type ChecksumOpts struct {
FollowLinks bool
Wildcard bool
IncludePatterns []string
ExcludePatterns []string
}

func ChecksumWildcard(ctx context.Context, ref cache.ImmutableRef, path string, followLinks bool, s session.Group) (digest.Digest, error) {
return getDefaultManager().ChecksumWildcard(ctx, ref, path, followLinks, s)
func Checksum(ctx context.Context, ref cache.ImmutableRef, path string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
return getDefaultManager().Checksum(ctx, ref, path, opts, s)
}

func GetCacheContext(ctx context.Context, md *metadata.StorageItem, idmap *idtools.IdentityMapping) (CacheContext, error) {
Expand All @@ -66,16 +71,15 @@ func ClearCacheContext(md *metadata.StorageItem) {
}

type CacheContext interface {
Checksum(ctx context.Context, ref cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error)
ChecksumWildcard(ctx context.Context, ref cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error)
Checksum(ctx context.Context, ref cache.Mountable, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error)
HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) error
}

type Hashed interface {
Digest() digest.Digest
}

type Wildcard struct {
type IncludedPath struct {
Path string
Record *CacheRecord
}
Expand All @@ -86,20 +90,12 @@ type cacheManager struct {
lruMu sync.Mutex
}

func (cm *cacheManager) Checksum(ctx context.Context, ref cache.ImmutableRef, p string, followLinks bool, s session.Group) (digest.Digest, error) {
func (cm *cacheManager) Checksum(ctx context.Context, ref cache.ImmutableRef, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
cc, err := cm.GetCacheContext(ctx, ensureOriginMetadata(ref.Metadata()), ref.IdentityMapping())
if err != nil {
return "", nil
}
return cc.Checksum(ctx, ref, p, followLinks, s)
}

func (cm *cacheManager) ChecksumWildcard(ctx context.Context, ref cache.ImmutableRef, p string, followLinks bool, s session.Group) (digest.Digest, error) {
cc, err := cm.GetCacheContext(ctx, ensureOriginMetadata(ref.Metadata()), ref.IdentityMapping())
if err != nil {
return "", nil
}
return cc.ChecksumWildcard(ctx, ref, p, followLinks, s)
return cc.Checksum(ctx, ref, p, opts, s)
}

func (cm *cacheManager) GetCacheContext(ctx context.Context, md *metadata.StorageItem, idmap *idtools.IdentityMapping) (CacheContext, error) {
Expand Down Expand Up @@ -264,12 +260,17 @@ func (cc *cacheContext) save() error {
return cc.md.SetExternal(keyContentHash, dt)
}

// HandleChange notifies the source about a modification operation
func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) (retErr error) {
func keyPath(p string) string {
p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
}
return p
}

// HandleChange notifies the source about a modification operation
func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.FileInfo, err error) (retErr error) {
p = keyPath(p)
k := convertPathToKey([]byte(p))

deleteDir := func(cr *CacheRecord) {
Expand Down Expand Up @@ -382,36 +383,40 @@ func (cc *cacheContext) HandleChange(kind fsutil.ChangeKind, p string, fi os.Fil
return nil
}

func (cc *cacheContext) ChecksumWildcard(ctx context.Context, mountable cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error) {
func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable, p string, opts ChecksumOpts, s session.Group) (digest.Digest, error) {
m := &mount{mountable: mountable, session: s}
defer m.clean()

wildcards, err := cc.wildcards(ctx, m, p)
if !opts.Wildcard && len(opts.IncludePatterns) == 0 && len(opts.ExcludePatterns) == 0 {
return cc.checksumFollow(ctx, m, p, opts.FollowLinks)
}

includedPaths, err := cc.includedPaths(ctx, m, p, opts)
if err != nil {
return "", err
}

if followLinks {
for i, w := range wildcards {
if opts.FollowLinks {
for i, w := range includedPaths {
if w.Record.Type == CacheRecordTypeSymlink {
dgst, err := cc.checksumFollow(ctx, m, w.Path, followLinks)
dgst, err := cc.checksumFollow(ctx, m, w.Path, opts.FollowLinks)
if err != nil {
return "", err
}
wildcards[i].Record = &CacheRecord{Digest: dgst}
includedPaths[i].Record = &CacheRecord{Digest: dgst}
}
}
}
if len(wildcards) == 0 {
if len(includedPaths) == 0 {
return digest.FromBytes([]byte{}), nil
}

if len(wildcards) == 1 && path.Base(p) == path.Base(wildcards[0].Path) {
return wildcards[0].Record.Digest, nil
if len(includedPaths) == 1 && path.Base(p) == path.Base(includedPaths[0].Path) {
return includedPaths[0].Record.Digest, nil
}

digester := digest.Canonical.Digester()
for i, w := range wildcards {
for i, w := range includedPaths {
if i != 0 {
digester.Hash().Write([]byte{0})
}
Expand All @@ -421,13 +426,6 @@ func (cc *cacheContext) ChecksumWildcard(ctx context.Context, mountable cache.Mo
return digester.Digest(), nil
}

func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable, p string, followLinks bool, s session.Group) (digest.Digest, error) {
m := &mount{mountable: mountable, session: s}
defer m.clean()

return cc.checksumFollow(ctx, m, p, followLinks)
}

func (cc *cacheContext) checksumFollow(ctx context.Context, m *mount, p string, follow bool) (digest.Digest, error) {
const maxSymlinkLimit = 255
i := 0
Expand All @@ -452,7 +450,7 @@ func (cc *cacheContext) checksumFollow(ctx context.Context, m *mount, p string,
}
}

func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*Wildcard, error) {
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) ([]*IncludedPath, error) {
cc.mu.Lock()
defer cc.mu.Unlock()

Expand All @@ -478,32 +476,103 @@ func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*W
}
}()

p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
endsInSep := len(p) != 0 && p[len(p)-1] == filepath.Separator
p = keyPath(p)

var includePatternMatcher *fileutils.PatternMatcher
if len(opts.IncludePatterns) != 0 {
rootedIncludePatterns := make([]string, len(opts.IncludePatterns))
for i, includePattern := range opts.IncludePatterns {
rootedIncludePatterns[i] = keyPath(includePattern)
}
includePatternMatcher, err = fileutils.NewPatternMatcher(rootedIncludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
}
}

wildcards := make([]*Wildcard, 0, 2)
var excludePatternMatcher *fileutils.PatternMatcher
if len(opts.ExcludePatterns) != 0 {
rootedExcludePatterns := make([]string, len(opts.ExcludePatterns))
for i, excludePattern := range opts.ExcludePatterns {
rootedExcludePatterns[i] = keyPath(excludePattern)
}
excludePatternMatcher, err = fileutils.NewPatternMatcher(rootedExcludePatterns)
if err != nil {
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
}
}

includedPaths := make([]*IncludedPath, 0, 2)

txn := cc.tree.Txn()
root = txn.Root()
var updated bool
var (
updated bool
iter *iradix.Seeker
k []byte
kOk bool
)

if opts.Wildcard {
iter = root.Seek([]byte{})
k, _, kOk = iter.Next()
} else {
k = convertPathToKey([]byte(p))
if _, kOk = root.Get(k); kOk {
iter = root.Seek(k)
}
}

iter := root.Seek([]byte{})
for {
k, _, ok := iter.Next()
if !ok {
break
var (
parentDirHeaders []*IncludedPath
lastMatchedDir string
)

for kOk {
fn := string(convertKeyToPath(k))

for len(parentDirHeaders) != 0 {
lastParentDir := parentDirHeaders[len(parentDirHeaders)-1]
if strings.HasPrefix(fn, lastParentDir.Path+"/") {
break
}
parentDirHeaders = parentDirHeaders[:len(parentDirHeaders)-1]
}

dirHeader := false
if len(k) > 0 && k[len(k)-1] == byte(0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand where the non-null-byte dir is excluded from checksum for the partial case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The continue a few lines below does this. It causes us to skip the call to checksum when partialMatch is true, but the path does not end in a null byte.

dirHeader = true
fn = fn[:len(fn)-1]
if fn == p && endsInSep {
// We don't include the metadata header for a source dir which ends with a separator
k, _, kOk = iter.Next()
continue
}
}
if opts.Wildcard {
if lastMatchedDir == "" || !strings.HasPrefix(fn, lastMatchedDir+"/") {
include, err := path.Match(p, fn)
if err != nil {
return nil, err
}
if !include {
k, _, kOk = iter.Next()
continue
}
lastMatchedDir = fn
}
} else if !strings.HasPrefix(fn+"/", p+"/") {
k, _, kOk = iter.Next()
continue
}
fn := convertKeyToPath(k)
b, err := path.Match(p, string(fn))

shouldInclude, err := shouldIncludePath(p, fn, includePatternMatcher, excludePatternMatcher)
if err != nil {
return nil, err
}
if !b {
if !shouldInclude && !dirHeader {
k, _, kOk = iter.Next()
continue
}

Expand All @@ -515,25 +584,65 @@ func (cc *cacheContext) wildcards(ctx context.Context, m *mount, p string) ([]*W
updated = true
}

wildcards = append(wildcards, &Wildcard{Path: string(fn), Record: cr})

if cr.Type == CacheRecordTypeDir {
iter = root.Seek(append(k, 0, 0xff))
// We only hash dir headers and files, not dir contents. Hashing
// dir contents could be wrong if there are exclusions within the
// dir.
shouldInclude = false
}

if !shouldInclude {
if cr.Type == CacheRecordTypeDirHeader {
// We keep track of non-included parent dir headers in case an
// include pattern matches a file inside one of these dirs.
parentDirHeaders = append(parentDirHeaders, &IncludedPath{Path: fn, Record: cr})
}
} else {
includedPaths = append(includedPaths, parentDirHeaders...)
parentDirHeaders = nil
includedPaths = append(includedPaths, &IncludedPath{Path: fn, Record: cr})
}
k, _, kOk = iter.Next()
}

cc.tree = txn.Commit()
cc.dirty = updated

return wildcards, nil
return includedPaths, nil
}

func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) {
p = path.Join("/", filepath.ToSlash(p))
if p == "/" {
p = ""
func shouldIncludePath(
p string,
candidate string,
includePatternMatcher *fileutils.PatternMatcher,
excludePatternMatcher *fileutils.PatternMatcher,
) (bool, error) {
if includePatternMatcher != nil {
m, err := includePatternMatcher.Matches(filepath.FromSlash(candidate))
if err != nil {
return false, errors.Wrap(err, "failed to match includepatterns")
}
if !m {
return false, nil
}
}

if excludePatternMatcher != nil {
m, err := excludePatternMatcher.Matches(filepath.FromSlash(candidate))
if err != nil {
return false, errors.Wrap(err, "failed to match excludepatterns")
}
if m {
return false, nil
}
}

return true, nil
}

func (cc *cacheContext) checksumNoFollow(ctx context.Context, m *mount, p string) (*CacheRecord, error) {
p = keyPath(p)

cc.mu.RLock()
if cc.txn == nil {
root := cc.tree.Root()
Expand Down