Skip to content

Commit

Permalink
base/file/addfs: per-node transformations
Browse files Browse the repository at this point in the history
Summary: Complementing D65999's whole-directory ones.

Test Plan: New unit tests.

Reviewers: jcharumilind

Reviewed By: jcharumilind

Subscribers: smahadevan

Differential Revision: https://phabricator.grailbio.com/D66026

fbshipit-source-id: 1235d5f
  • Loading branch information
josh-newman authored and jcharum committed Jun 2, 2022
1 parent 53b3d93 commit 2a979ca
Show file tree
Hide file tree
Showing 3 changed files with 305 additions and 0 deletions.
179 changes: 179 additions & 0 deletions file/addfs/per_node.go
@@ -0,0 +1,179 @@
package addfs

import (
"context"
"fmt"
"time"

"github.com/grailbio/base/file/fsnode"
"github.com/grailbio/base/log"
)

type (
// PerNodeFunc computes nodes to add to a directory tree, for example to present alternate views
// of raw data, expand archive files, etc. It operates on a single node at a time. If it returns
// any "addition" nodes, ApplyPerNodeFuncs will place them under a sibling directory called
// "...". For example, suppose we have an input directory:
// parent/
// └─dir1/
// ├─fileA
// ├─fileB
// └─dir2/
// and we call ApplyPerNodeFuncs(parent/, ourFns). The resulting directory tree will be
// parent/
// ├─.../
// │ └─dir1/
// │ └─[ nodes returned by PerNodeFunc.Apply(_, dir1/) for all ourFns ]
// └─dir1/
// ├─.../
// │ ├─fileA/
// │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileA) for all ourFns ]
// │ ├─fileB/
// │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileB) for all ourFns ]
// │ └─dir2/
// │ └─[ nodes returned by PerNodeFunc.Apply(_, dir2/) for all ourFns ]
// ├─fileA
// ├─fileB
// └─dir2/
// └─.../
// Users browsing this resulting tree can work with just the original files and ourFns won't
// be invoked. However, they can also navigate into any of the .../s if interested and then
// use the additional views generated by ourFns. If they're interested in our_view for
// /path/to/a/file, they just need to prepend .../, like /path/to/a/.../file/our_view.
// (Perhaps it'd be more intuitive to "append", like /path/to/a/file/our_view, but then the
// file name would conflict with the view-containing directory.)
//
// Funcs that need to list the children of a fsnode.Parent should be careful: they may want to
// set an upper limit on number of entries to read, and otherwise default to empty, to avoid
// performance problems (resulting in bad UX) for very large directories.
//
// Funcs that simply look at filenames and declare derived outputs may want to place their
// children directly under /.../file/ for convenient access. However, Funcs that are expensive,
// for example reading some file contents, etc., may want to separate themselves under their own
// subdirectory, like .../file/func_name/. This lets users browsing the tree "opt-in" to seeing
// the results of the expensive computation by navigating to .../file/func_name/.
//
// If the input tree has any "..." that conflict with the added ones, the added ones override.
// The originals will simply not be accessible.
PerNodeFunc interface {
Apply(context.Context, fsnode.T) (adds []fsnode.T, _ error)
}
perNodeFunc func(context.Context, fsnode.T) (adds []fsnode.T, _ error)
)

func NewPerNodeFunc(fn func(context.Context, fsnode.T) ([]fsnode.T, error)) PerNodeFunc {
return perNodeFunc(fn)
}
func (f perNodeFunc) Apply(ctx context.Context, n fsnode.T) ([]fsnode.T, error) { return f(ctx, n) }

const addsDirName = "..."

// perNodeImpl extends the original Parent with the .../ child.
type perNodeImpl struct {
fsnode.Parent
fns []PerNodeFunc
adds fsnode.Parent
}

var (
_ fsnode.Parent = (*perNodeImpl)(nil)
_ fsnode.Cacheable = (*perNodeImpl)(nil)
)

// ApplyPerNodeFuncs returns a new Parent that contains original's nodes plus any added by fns.
// See PerNodeFunc's for more documentation on how this works.
// Later fns's added nodes will overwrite earlier ones, if any names conflict.
func ApplyPerNodeFuncs(original fsnode.Parent, fns ...PerNodeFunc) fsnode.Parent {
fns = append([]PerNodeFunc{}, fns...)
adds := perNodeAdds{
fsnode.CopyFileInfo(original).WithName(addsDirName),
original, fns}
return &perNodeImpl{original, fns, &adds}
}

func (n *perNodeImpl) CacheableFor() time.Duration { return fsnode.CacheableFor(n.Parent) }
func (n *perNodeImpl) Child(ctx context.Context, name string) (fsnode.T, error) {
if name == addsDirName {
return n.adds, nil
}
child, err := n.Parent.Child(ctx, name)
if err != nil {
return nil, err
}
return perNodeRecurse(child, n.fns), nil
}
func (n *perNodeImpl) Children() fsnode.Iterator {
return fsnode.NewConcatIterator(
// TODO: Consider omitting .../ if the directory has no other children.
fsnode.NewIterator(n.adds),
// TODO: Filter out any conflicting ... to be consistent with Child.
fsnode.MapIterator(n.Parent.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) {
return perNodeRecurse(child, n.fns), nil
}),
)
}

// perNodeAdds is the .../ Parent. It has a child (directory) for each original child (both
// directories and files). The children contain the PerNodeFunc.Apply outputs.
type perNodeAdds struct {
fsnode.FileInfo
original fsnode.Parent
fns []PerNodeFunc
}

var (
_ fsnode.Parent = (*perNodeAdds)(nil)
_ fsnode.Cacheable = (*perNodeAdds)(nil)
)

func (n *perNodeAdds) Child(ctx context.Context, name string) (fsnode.T, error) {
child, err := n.original.Child(ctx, name)
if err != nil {
return nil, err
}
return n.newAddsForChild(child), nil
}
func (n *perNodeAdds) Children() fsnode.Iterator {
// TODO: Filter out any conflicting ... to be consistent with Child.
return fsnode.MapIterator(n.original.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) {
return n.newAddsForChild(child), nil
})
}
func (n *perNodeAdds) FSNodeT() {}

func (n *perNodeAdds) newAddsForChild(original fsnode.T) fsnode.Parent {
return fsnode.NewParent(
fsnode.NewDirInfo(original.Name()).
WithModTime(original.ModTime()).
// Derived directory must be executable to be usable, even if original file wasn't.
WithModePerm(original.Mode().Perm()|0111).
WithCacheableFor(fsnode.CacheableFor(original)),
fsnode.FuncChildren(func(ctx context.Context) ([]fsnode.T, error) {
adds := make(map[string]fsnode.T)
for _, fn := range n.fns {
fnAdds, err := fn.Apply(ctx, original)
if err != nil {
return nil, fmt.Errorf("addfs: error running func %v: %w", fn, err)
}
for _, add := range fnAdds {
log.Debug.Printf("addfs %s: conflict for added name: %s", n.Name(), add.Name())
// TODO: Consider returning an error here. Or merging the added trees?
adds[add.Name()] = add
}
}
wrapped := make([]fsnode.T, 0, len(adds))
for _, add := range adds {
wrapped = append(wrapped, perNodeRecurse(add, n.fns))
}
return wrapped, nil
}),
)
}

func perNodeRecurse(node fsnode.T, fns []PerNodeFunc) fsnode.T {
parent, ok := node.(fsnode.Parent)
if !ok {
return node
}
return ApplyPerNodeFuncs(parent, fns...)
}
119 changes: 119 additions & 0 deletions file/addfs/per_node_test.go
@@ -0,0 +1,119 @@
package addfs

import (
"context"
"fmt"
"sort"
"strings"
"testing"

"github.com/grailbio/base/file/fsnode"
. "github.com/grailbio/base/file/fsnode/fsnodetesting"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestPerNodeFuncs(t *testing.T) {
ctx := context.Background()
root := func() Parent {
return Parent{
"dir0": Parent{},
"dir1": Parent{
"dir10": Parent{
"a": []byte("content dir10/a"),
"b": []byte("content dir10/b"),
},
"a": []byte("content dir1/a"),
"b": []byte("content dir1/b"),
},
}
}
t.Run("basic", func(t *testing.T) {
root := root()
n := MakeT(t, "", root).(fsnode.Parent)
n = ApplyPerNodeFuncs(n,
NewPerNodeFunc(
func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) {
switch n := node.(type) {
case fsnode.Parent:
iter := n.Children()
defer func() { assert.NoError(t, iter.Close(ctx)) }()
children, err := fsnode.IterateAll(ctx, iter)
assert.NoError(t, err)
var names []string
for _, child := range children {
names = append(names, child.Name())
}
sort.Strings(names)
return []fsnode.T{
fsnode.ConstLeaf(fsnode.NewRegInfo("children names"), []byte(strings.Join(names, ","))),
}, nil
case fsnode.Leaf:
return []fsnode.T{
fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), nil), // Will be overwritten.
}, nil
}
require.Failf(t, "invalid node type", "node: %T", node)
panic("unreachable")
},
),
NewPerNodeFunc(
func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) {
switch n := node.(type) {
case fsnode.Parent:
return nil, nil
case fsnode.Leaf:
return []fsnode.T{
fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), LeafReadAll(ctx, t, n)),
}, nil
}
require.Failf(t, "invalid node type", "node: %T", node)
panic("unreachable")
},
),
)
got := Walker{}.WalkContents(ctx, t, n)
want := Parent{
"...": Parent{
"dir0": Parent{"children names": []byte("")},
"dir1": Parent{"children names": []byte("a,b,dir10")},
},
"dir0": Parent{
"...": Parent{},
},
"dir1": Parent{
"...": Parent{
"dir10": Parent{"children names": []byte("a,b")},
"a": Parent{"copy": []byte("content dir1/a")},
"b": Parent{"copy": []byte("content dir1/b")},
},
"dir10": Parent{
"...": Parent{
"a": Parent{"copy": []byte("content dir10/a")},
"b": Parent{"copy": []byte("content dir10/b")},
},
"a": []byte("content dir10/a"),
"b": []byte("content dir10/b"),
},
"a": []byte("content dir1/a"),
"b": []byte("content dir1/b"),
},
}
assert.Equal(t, want, got)
})
t.Run("lazy", func(t *testing.T) {
root := root()
n := MakeT(t, "", root).(fsnode.Parent)
n = ApplyPerNodeFuncs(n, NewPerNodeFunc(
func(_ context.Context, node fsnode.T) ([]fsnode.T, error) {
return nil, fmt.Errorf("func was called: %q", node.Name())
},
))
got := Walker{
IgnoredNames: map[string]struct{}{
addsDirName: struct{}{},
},
}.WalkContents(ctx, t, n)
assert.Equal(t, root, got)
})
}
7 changes: 7 additions & 0 deletions file/addfs/per_subtree.go
@@ -0,0 +1,7 @@
package addfs

// TODO: Implement PerSubtreeFunc.
// A PerNodeFunc is applied independently to each node in an entire directory tree. It may be
// useful to define funcs that are contextual. For example if an fsnode.Parent called base/ has a
// child called .git, we may want to define git-repository-aware views for each descendent node,
// like base/file/addfs/.../per_subtree.go/git/log.txt containing history.

0 comments on commit 2a979ca

Please sign in to comment.