Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
base/file/addfs: per-node transformations
Summary: Complementing D65999's whole-directory ones. Test Plan: New unit tests. Reviewers: jcharumilind Reviewed By: jcharumilind Subscribers: smahadevan Differential Revision: https://phabricator.grailbio.com/D66026 fbshipit-source-id: 1235d5f
- Loading branch information
1 parent
53b3d93
commit 2a979ca
Showing
3 changed files
with
305 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
package addfs | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/grailbio/base/file/fsnode" | ||
"github.com/grailbio/base/log" | ||
) | ||
|
||
type ( | ||
// PerNodeFunc computes nodes to add to a directory tree, for example to present alternate views | ||
// of raw data, expand archive files, etc. It operates on a single node at a time. If it returns | ||
// any "addition" nodes, ApplyPerNodeFuncs will place them under a sibling directory called | ||
// "...". For example, suppose we have an input directory: | ||
// parent/ | ||
// └─dir1/ | ||
// ├─fileA | ||
// ├─fileB | ||
// └─dir2/ | ||
// and we call ApplyPerNodeFuncs(parent/, ourFns). The resulting directory tree will be | ||
// parent/ | ||
// ├─.../ | ||
// │ └─dir1/ | ||
// │ └─[ nodes returned by PerNodeFunc.Apply(_, dir1/) for all ourFns ] | ||
// └─dir1/ | ||
// ├─.../ | ||
// │ ├─fileA/ | ||
// │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileA) for all ourFns ] | ||
// │ ├─fileB/ | ||
// │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileB) for all ourFns ] | ||
// │ └─dir2/ | ||
// │ └─[ nodes returned by PerNodeFunc.Apply(_, dir2/) for all ourFns ] | ||
// ├─fileA | ||
// ├─fileB | ||
// └─dir2/ | ||
// └─.../ | ||
// Users browsing this resulting tree can work with just the original files and ourFns won't | ||
// be invoked. However, they can also navigate into any of the .../s if interested and then | ||
// use the additional views generated by ourFns. If they're interested in our_view for | ||
// /path/to/a/file, they just need to prepend .../, like /path/to/a/.../file/our_view. | ||
// (Perhaps it'd be more intuitive to "append", like /path/to/a/file/our_view, but then the | ||
// file name would conflict with the view-containing directory.) | ||
// | ||
// Funcs that need to list the children of a fsnode.Parent should be careful: they may want to | ||
// set an upper limit on number of entries to read, and otherwise default to empty, to avoid | ||
// performance problems (resulting in bad UX) for very large directories. | ||
// | ||
// Funcs that simply look at filenames and declare derived outputs may want to place their | ||
// children directly under /.../file/ for convenient access. However, Funcs that are expensive, | ||
// for example reading some file contents, etc., may want to separate themselves under their own | ||
// subdirectory, like .../file/func_name/. This lets users browsing the tree "opt-in" to seeing | ||
// the results of the expensive computation by navigating to .../file/func_name/. | ||
// | ||
// If the input tree has any "..." that conflict with the added ones, the added ones override. | ||
// The originals will simply not be accessible. | ||
PerNodeFunc interface { | ||
Apply(context.Context, fsnode.T) (adds []fsnode.T, _ error) | ||
} | ||
perNodeFunc func(context.Context, fsnode.T) (adds []fsnode.T, _ error) | ||
) | ||
|
||
func NewPerNodeFunc(fn func(context.Context, fsnode.T) ([]fsnode.T, error)) PerNodeFunc { | ||
return perNodeFunc(fn) | ||
} | ||
func (f perNodeFunc) Apply(ctx context.Context, n fsnode.T) ([]fsnode.T, error) { return f(ctx, n) } | ||
|
||
const addsDirName = "..." | ||
|
||
// perNodeImpl extends the original Parent with the .../ child. | ||
type perNodeImpl struct { | ||
fsnode.Parent | ||
fns []PerNodeFunc | ||
adds fsnode.Parent | ||
} | ||
|
||
var ( | ||
_ fsnode.Parent = (*perNodeImpl)(nil) | ||
_ fsnode.Cacheable = (*perNodeImpl)(nil) | ||
) | ||
|
||
// ApplyPerNodeFuncs returns a new Parent that contains original's nodes plus any added by fns. | ||
// See PerNodeFunc's for more documentation on how this works. | ||
// Later fns's added nodes will overwrite earlier ones, if any names conflict. | ||
func ApplyPerNodeFuncs(original fsnode.Parent, fns ...PerNodeFunc) fsnode.Parent { | ||
fns = append([]PerNodeFunc{}, fns...) | ||
adds := perNodeAdds{ | ||
fsnode.CopyFileInfo(original).WithName(addsDirName), | ||
original, fns} | ||
return &perNodeImpl{original, fns, &adds} | ||
} | ||
|
||
func (n *perNodeImpl) CacheableFor() time.Duration { return fsnode.CacheableFor(n.Parent) } | ||
func (n *perNodeImpl) Child(ctx context.Context, name string) (fsnode.T, error) { | ||
if name == addsDirName { | ||
return n.adds, nil | ||
} | ||
child, err := n.Parent.Child(ctx, name) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return perNodeRecurse(child, n.fns), nil | ||
} | ||
func (n *perNodeImpl) Children() fsnode.Iterator { | ||
return fsnode.NewConcatIterator( | ||
// TODO: Consider omitting .../ if the directory has no other children. | ||
fsnode.NewIterator(n.adds), | ||
// TODO: Filter out any conflicting ... to be consistent with Child. | ||
fsnode.MapIterator(n.Parent.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) { | ||
return perNodeRecurse(child, n.fns), nil | ||
}), | ||
) | ||
} | ||
|
||
// perNodeAdds is the .../ Parent. It has a child (directory) for each original child (both | ||
// directories and files). The children contain the PerNodeFunc.Apply outputs. | ||
type perNodeAdds struct { | ||
fsnode.FileInfo | ||
original fsnode.Parent | ||
fns []PerNodeFunc | ||
} | ||
|
||
var ( | ||
_ fsnode.Parent = (*perNodeAdds)(nil) | ||
_ fsnode.Cacheable = (*perNodeAdds)(nil) | ||
) | ||
|
||
func (n *perNodeAdds) Child(ctx context.Context, name string) (fsnode.T, error) { | ||
child, err := n.original.Child(ctx, name) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return n.newAddsForChild(child), nil | ||
} | ||
func (n *perNodeAdds) Children() fsnode.Iterator { | ||
// TODO: Filter out any conflicting ... to be consistent with Child. | ||
return fsnode.MapIterator(n.original.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) { | ||
return n.newAddsForChild(child), nil | ||
}) | ||
} | ||
func (n *perNodeAdds) FSNodeT() {} | ||
|
||
func (n *perNodeAdds) newAddsForChild(original fsnode.T) fsnode.Parent { | ||
return fsnode.NewParent( | ||
fsnode.NewDirInfo(original.Name()). | ||
WithModTime(original.ModTime()). | ||
// Derived directory must be executable to be usable, even if original file wasn't. | ||
WithModePerm(original.Mode().Perm()|0111). | ||
WithCacheableFor(fsnode.CacheableFor(original)), | ||
fsnode.FuncChildren(func(ctx context.Context) ([]fsnode.T, error) { | ||
adds := make(map[string]fsnode.T) | ||
for _, fn := range n.fns { | ||
fnAdds, err := fn.Apply(ctx, original) | ||
if err != nil { | ||
return nil, fmt.Errorf("addfs: error running func %v: %w", fn, err) | ||
} | ||
for _, add := range fnAdds { | ||
log.Debug.Printf("addfs %s: conflict for added name: %s", n.Name(), add.Name()) | ||
// TODO: Consider returning an error here. Or merging the added trees? | ||
adds[add.Name()] = add | ||
} | ||
} | ||
wrapped := make([]fsnode.T, 0, len(adds)) | ||
for _, add := range adds { | ||
wrapped = append(wrapped, perNodeRecurse(add, n.fns)) | ||
} | ||
return wrapped, nil | ||
}), | ||
) | ||
} | ||
|
||
func perNodeRecurse(node fsnode.T, fns []PerNodeFunc) fsnode.T { | ||
parent, ok := node.(fsnode.Parent) | ||
if !ok { | ||
return node | ||
} | ||
return ApplyPerNodeFuncs(parent, fns...) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package addfs | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"sort" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/grailbio/base/file/fsnode" | ||
. "github.com/grailbio/base/file/fsnode/fsnodetesting" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestPerNodeFuncs(t *testing.T) { | ||
ctx := context.Background() | ||
root := func() Parent { | ||
return Parent{ | ||
"dir0": Parent{}, | ||
"dir1": Parent{ | ||
"dir10": Parent{ | ||
"a": []byte("content dir10/a"), | ||
"b": []byte("content dir10/b"), | ||
}, | ||
"a": []byte("content dir1/a"), | ||
"b": []byte("content dir1/b"), | ||
}, | ||
} | ||
} | ||
t.Run("basic", func(t *testing.T) { | ||
root := root() | ||
n := MakeT(t, "", root).(fsnode.Parent) | ||
n = ApplyPerNodeFuncs(n, | ||
NewPerNodeFunc( | ||
func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) { | ||
switch n := node.(type) { | ||
case fsnode.Parent: | ||
iter := n.Children() | ||
defer func() { assert.NoError(t, iter.Close(ctx)) }() | ||
children, err := fsnode.IterateAll(ctx, iter) | ||
assert.NoError(t, err) | ||
var names []string | ||
for _, child := range children { | ||
names = append(names, child.Name()) | ||
} | ||
sort.Strings(names) | ||
return []fsnode.T{ | ||
fsnode.ConstLeaf(fsnode.NewRegInfo("children names"), []byte(strings.Join(names, ","))), | ||
}, nil | ||
case fsnode.Leaf: | ||
return []fsnode.T{ | ||
fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), nil), // Will be overwritten. | ||
}, nil | ||
} | ||
require.Failf(t, "invalid node type", "node: %T", node) | ||
panic("unreachable") | ||
}, | ||
), | ||
NewPerNodeFunc( | ||
func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) { | ||
switch n := node.(type) { | ||
case fsnode.Parent: | ||
return nil, nil | ||
case fsnode.Leaf: | ||
return []fsnode.T{ | ||
fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), LeafReadAll(ctx, t, n)), | ||
}, nil | ||
} | ||
require.Failf(t, "invalid node type", "node: %T", node) | ||
panic("unreachable") | ||
}, | ||
), | ||
) | ||
got := Walker{}.WalkContents(ctx, t, n) | ||
want := Parent{ | ||
"...": Parent{ | ||
"dir0": Parent{"children names": []byte("")}, | ||
"dir1": Parent{"children names": []byte("a,b,dir10")}, | ||
}, | ||
"dir0": Parent{ | ||
"...": Parent{}, | ||
}, | ||
"dir1": Parent{ | ||
"...": Parent{ | ||
"dir10": Parent{"children names": []byte("a,b")}, | ||
"a": Parent{"copy": []byte("content dir1/a")}, | ||
"b": Parent{"copy": []byte("content dir1/b")}, | ||
}, | ||
"dir10": Parent{ | ||
"...": Parent{ | ||
"a": Parent{"copy": []byte("content dir10/a")}, | ||
"b": Parent{"copy": []byte("content dir10/b")}, | ||
}, | ||
"a": []byte("content dir10/a"), | ||
"b": []byte("content dir10/b"), | ||
}, | ||
"a": []byte("content dir1/a"), | ||
"b": []byte("content dir1/b"), | ||
}, | ||
} | ||
assert.Equal(t, want, got) | ||
}) | ||
t.Run("lazy", func(t *testing.T) { | ||
root := root() | ||
n := MakeT(t, "", root).(fsnode.Parent) | ||
n = ApplyPerNodeFuncs(n, NewPerNodeFunc( | ||
func(_ context.Context, node fsnode.T) ([]fsnode.T, error) { | ||
return nil, fmt.Errorf("func was called: %q", node.Name()) | ||
}, | ||
)) | ||
got := Walker{ | ||
IgnoredNames: map[string]struct{}{ | ||
addsDirName: struct{}{}, | ||
}, | ||
}.WalkContents(ctx, t, n) | ||
assert.Equal(t, root, got) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package addfs | ||
|
||
// TODO: Implement PerSubtreeFunc. | ||
// A PerNodeFunc is applied independently to each node in an entire directory tree. It may be | ||
// useful to define funcs that are contextual. For example if an fsnode.Parent called base/ has a | ||
// child called .git, we may want to define git-repository-aware views for each descendent node, | ||
// like base/file/addfs/.../per_subtree.go/git/log.txt containing history. |