Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

overlay: use idmapped lower layers where supported #1180

Merged
merged 1 commit into from Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 54 additions & 0 deletions drivers/overlay/check.go
@@ -1,3 +1,4 @@
//go:build linux
// +build linux

package overlay
Expand All @@ -11,6 +12,7 @@ import (
"syscall"

"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/system"
Expand Down Expand Up @@ -218,3 +220,55 @@ func doesVolatile(d string) (bool, error) {
}()
return true, nil
}

// supportsIdmappedLowerLayers checks if the kernel supports mounting overlay on top of
// a idmapped lower layer.
func supportsIdmappedLowerLayers(home string) (bool, error) {
layerDir, err := ioutil.TempDir(home, "compat")
if err != nil {
return false, err
}
defer func() {
_ = os.RemoveAll(layerDir)
}()

mergedDir := filepath.Join(layerDir, "merged")
lowerDir := filepath.Join(layerDir, "lower")
lowerMappedDir := filepath.Join(layerDir, "lower-mapped")
upperDir := filepath.Join(layerDir, "upper")
workDir := filepath.Join(layerDir, "work")

_ = idtools.MkdirAs(mergedDir, 0700, 0, 0)
_ = idtools.MkdirAs(lowerDir, 0700, 0, 0)
_ = idtools.MkdirAs(lowerMappedDir, 0700, 0, 0)
_ = idtools.MkdirAs(upperDir, 0700, 0, 0)
_ = idtools.MkdirAs(workDir, 0700, 0, 0)

idmap := []idtools.IDMap{
{
ContainerID: 0,
HostID: 0,
Size: 1,
},
}
pid, cleanupFunc, err := createUsernsProcess(idmap, idmap)
if err != nil {
return false, err
}
defer cleanupFunc()

if err := createIDMappedMount(lowerDir, lowerMappedDir, int(pid)); err != nil {
return false, errors.Wrapf(err, "create mapped mount")
}
defer unix.Unmount(lowerMappedDir, unix.MNT_DETACH)

opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerMappedDir, upperDir, workDir)
flags := uintptr(0)
if err := unix.Mount("overlay", mergedDir, "overlay", flags, opts); err != nil {
return false, err
}
defer func() {
_ = unix.Unmount(mergedDir, unix.MNT_DETACH)
}()
return true, nil
}
160 changes: 160 additions & 0 deletions drivers/overlay/idmapped_utils.go
@@ -0,0 +1,160 @@
//go:build linux
// +build linux

package overlay

import (
"fmt"
"io/ioutil"
"os"
"syscall"
"unsafe"

"github.com/containers/storage/pkg/idtools"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

type attr struct {
attrSet uint64
attrClr uint64
propagation uint64
userNs uint64
}

const (
// _MOUNT_ATTR_IDMAP - Idmap mount to @userns_fd in struct mount_attr
_MOUNT_ATTR_IDMAP = 0x00100000 //nolint:golint

// _OPEN_TREE_CLONE - Clone the source path mount
_OPEN_TREE_CLONE = 0x00000001 //nolint:golint

// _MOVE_MOUNT_F_EMPTY_PATH - Move the path referenced by the fd
_MOVE_MOUNT_F_EMPTY_PATH = 0x00000004 //nolint:golint
)

// openTree is a wrapper for the open_tree syscall
func openTree(path string, flags int) (fd int, err error) {
var _p0 *byte

if _p0, err = syscall.BytePtrFromString(path); err != nil {
return 0, err
}

r, _, e1 := syscall.Syscall6(uintptr(unix.SYS_OPEN_TREE), uintptr(0), uintptr(unsafe.Pointer(_p0)),
uintptr(flags), 0, 0, 0)
if e1 != 0 {
err = e1
}
return int(r), nil
}

// moveMount is a wrapper for the the move_mount syscall.
func moveMount(fdTree int, target string) (err error) {
var _p0, _p1 *byte

empty := ""

if _p0, err = syscall.BytePtrFromString(target); err != nil {
return err
}
if _p1, err = syscall.BytePtrFromString(empty); err != nil {
return err
}

flags := _MOVE_MOUNT_F_EMPTY_PATH

_, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOVE_MOUNT),
uintptr(fdTree), uintptr(unsafe.Pointer(_p1)),
0, uintptr(unsafe.Pointer(_p0)), uintptr(flags), 0)
if e1 != 0 {
err = e1
}
return
}

// mountSetAttr is a wrapper for the mount_setattr syscall
func mountSetAttr(dfd int, path string, flags uint, attr *attr, size uint) (err error) {
var _p0 *byte

if _p0, err = syscall.BytePtrFromString(path); err != nil {
return err
}

_, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOUNT_SETATTR), uintptr(dfd), uintptr(unsafe.Pointer(_p0)),
uintptr(flags), uintptr(unsafe.Pointer(attr)), uintptr(size), 0)
if e1 != 0 {
err = e1
}
return
}

// createIDMappedMount creates a IDMapped bind mount from SOURCE to TARGET using the user namespace
// for the PID process.
func createIDMappedMount(source, target string, pid int) error {
path := fmt.Sprintf("/proc/%d/ns/user", pid)
userNsFile, err := os.Open(path)
if err != nil {
return errors.Wrapf(err, "unable to get user ns file descriptor for %q", path)
}

var attr attr
attr.attrSet = _MOUNT_ATTR_IDMAP
attr.attrClr = 0
attr.propagation = 0
attr.userNs = uint64(userNsFile.Fd())

defer userNsFile.Close()

targetDirFd, err := openTree(source, _OPEN_TREE_CLONE|unix.AT_RECURSIVE)
if err != nil {
return err
}
defer unix.Close(targetDirFd)

if err := mountSetAttr(targetDirFd, "", unix.AT_EMPTY_PATH|unix.AT_RECURSIVE,
&attr, uint(unsafe.Sizeof(attr))); err != nil {
return err
}
if err := os.Mkdir(target, 0700); err != nil && !os.IsExist(err) {
return err
}
return moveMount(targetDirFd, target)
}

// createUsernsProcess forks the current process and creates a user namespace using the specified
// mappings. It returns the pid of the new process.
func createUsernsProcess(uidMaps []idtools.IDMap, gidMaps []idtools.IDMap) (int, func(), error) {
pid, _, err := syscall.Syscall6(uintptr(unix.SYS_CLONE), unix.CLONE_NEWUSER|uintptr(unix.SIGCHLD), 0, 0, 0, 0, 0)
if err != 0 {
return -1, nil, err
}
if pid == 0 {
_ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
// just wait for the SIGKILL
for {
syscall.Syscall6(uintptr(unix.SYS_PAUSE), 0, 0, 0, 0, 0, 0)
}
}
cleanupFunc := func() {
unix.Kill(int(pid), unix.SIGKILL)
_, _ = unix.Wait4(int(pid), nil, 0, nil)
}
writeMappings := func(fname string, idmap []idtools.IDMap) error {
mappings := ""
for _, m := range idmap {
mappings = mappings + fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size)
}
return ioutil.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600)
}
if err := writeMappings("uid_map", uidMaps); err != nil {
cleanupFunc()
return -1, nil, err
}
if err := writeMappings("gid_map", gidMaps); err != nil {
cleanupFunc()
return -1, nil, err
}

return int(pid), cleanupFunc, nil
}
112 changes: 111 additions & 1 deletion drivers/overlay/overlay.go
Expand Up @@ -121,6 +121,8 @@ type Driver struct {
supportsVolatile *bool
usingMetacopy bool
locker *locker.Locker

supportsIDMappedMounts *bool
}

type additionalLayerStore struct {
Expand Down Expand Up @@ -205,6 +207,26 @@ func checkSupportVolatile(home, runhome string) (bool, error) {
return usingVolatile, nil
}

// checkAndRecordIDMappedSupport checks and stores if the kernel supports mounting overlay on top of a
// idmapped lower layer.
func checkAndRecordIDMappedSupport(home, runhome string) (bool, error) {
feature := "idmapped-lower-dir"
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
if err == nil {
if overlayCacheResult {
logrus.Debugf("Cached value indicated that overlay is supported")
return true, nil
}
logrus.Debugf("Cached value indicated that overlay is not supported")
return false, errors.New(overlayCacheText)
}
supportsIDMappedMounts, err := supportsIdmappedLowerLayers(home)
if err2 := cachedFeatureRecord(runhome, feature, supportsIDMappedMounts, ""); err2 != nil {
return false, errors.Wrap(err2, "recording overlay idmapped mounts support status")
}
return supportsIDMappedMounts, err
}

func checkAndRecordOverlaySupport(fsMagic graphdriver.FsMagic, home, runhome string) (bool, error) {
var supportsDType bool

Expand Down Expand Up @@ -1485,6 +1507,51 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}

if d.supportsIDmappedMounts() && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 {
var newAbsDir []string
mappedRoot := filepath.Join(d.home, id, "mapped")
if err := os.MkdirAll(mappedRoot, 0700); err != nil {
return "", err
}

pid, cleanupFunc, err := createUsernsProcess(options.UidMaps, options.GidMaps)
if err != nil {
return "", err
}
defer cleanupFunc()

idMappedMounts := make(map[string]string)

// rewrite the lower dirs to their idmapped mount.
c := 0
for _, absLower := range absLowers {
mappedMountSrc := getMappedMountRoot(absLower)

root, found := idMappedMounts[mappedMountSrc]
if !found {
root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c))
c++
if err := createIDMappedMount(mappedMountSrc, root, int(pid)); err != nil {
return "", errors.Wrapf(err, "create mapped mount for %q on %q", mappedMountSrc, root)
}
idMappedMounts[mappedMountSrc] = root

// overlay takes a reference on the mount, so it is safe to unmount
// the mapped idmounts as soon as the final overlay file system is mounted.
defer unix.Unmount(root, unix.MNT_DETACH)
}

// relative path to the layer through the id mapped mount
rel, err := filepath.Rel(mappedMountSrc, absLower)
if err != nil {
return "", err
}

newAbsDir = append(newAbsDir, filepath.Join(root, rel))
}
absLowers = newAbsDir
}

var opts string
if readWrite {
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), diffDir, workdir)
Expand Down Expand Up @@ -1587,6 +1654,18 @@ func (d *Driver) Put(id string) error {

unmounted := false

mappedRoot := filepath.Join(d.home, id, "mapped")
// It should not happen, but cleanup any mapped mount if it was leaked.
if _, err := os.Stat(mappedRoot); err == nil {
mounts, err := ioutil.ReadDir(mappedRoot)
if err == nil {
// Go through all of the mapped mounts.
for _, m := range mounts {
_ = unix.Unmount(filepath.Join(mappedRoot, m.Name()), unix.MNT_DETACH)
}
}
}

if d.options.mountProgram != "" {
// Attempt to unmount the FUSE mount using either fusermount or fusermount3.
// If they fail, fallback to unix.Unmount
Expand Down Expand Up @@ -1958,12 +2037,31 @@ func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMapp
return nil
}

// supportsIDmappedMounts returns whether the kernel supports using idmapped mounts with
// overlay lower layers.
func (d *Driver) supportsIDmappedMounts() bool {
if d.supportsIDMappedMounts != nil {
return *d.supportsIDMappedMounts
}

supportsIDMappedMounts, err := checkAndRecordIDMappedSupport(d.home, d.runhome)
d.supportsIDMappedMounts = &supportsIDMappedMounts
if err == nil {
return supportsIDMappedMounts
}
logrus.Debugf("Check for idmapped mounts support %v", err)
return false
}

// SupportsShifting tells whether the driver support shifting of the UIDs/GIDs in an userNS
func (d *Driver) SupportsShifting() bool {
if os.Getenv("_TEST_FORCE_SUPPORT_SHIFTING") == "yes-please" {
return true
}
return d.options.mountProgram != ""
if d.options.mountProgram != "" {
return true
}
return d.supportsIDmappedMounts()
}

// dumbJoin is more or less a dumber version of filepath.Join, but one which
Expand Down Expand Up @@ -2132,3 +2230,15 @@ func redirectDiffIfAdditionalLayer(diffPath string) (string, error) {
}
return diffPath, nil
}

// getMappedMountRoot is a heuristic that calculates the parent directory where
// the idmapped mount should be applied.
// It is useful to minimize the number of idmapped mounts and at the same time use
// a common path as long as possible to reduce the length of the mount data argument.
func getMappedMountRoot(path string) string {
dirName := filepath.Dir(path)
if filepath.Base(dirName) == linkDir {
return filepath.Dir(dirName)
}
return dirName
}