Skip to content

Commit

Permalink
Merge pull request #1180 from giuseppe/idmapped-overlay-lower-layers
Browse files Browse the repository at this point in the history
overlay: use idmapped lower layers where supported
  • Loading branch information
rhatdan committed Apr 12, 2022
2 parents ea4008e + b9b8a59 commit 4203c21
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 1 deletion.
54 changes: 54 additions & 0 deletions drivers/overlay/check.go
@@ -1,3 +1,4 @@
//go:build linux
// +build linux

package overlay
Expand All @@ -11,6 +12,7 @@ import (
"syscall"

"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/system"
Expand Down Expand Up @@ -218,3 +220,55 @@ func doesVolatile(d string) (bool, error) {
}()
return true, nil
}

// supportsIdmappedLowerLayers checks if the kernel supports mounting overlay on top of
// a idmapped lower layer.
func supportsIdmappedLowerLayers(home string) (bool, error) {
layerDir, err := ioutil.TempDir(home, "compat")
if err != nil {
return false, err
}
defer func() {
_ = os.RemoveAll(layerDir)
}()

mergedDir := filepath.Join(layerDir, "merged")
lowerDir := filepath.Join(layerDir, "lower")
lowerMappedDir := filepath.Join(layerDir, "lower-mapped")
upperDir := filepath.Join(layerDir, "upper")
workDir := filepath.Join(layerDir, "work")

_ = idtools.MkdirAs(mergedDir, 0700, 0, 0)
_ = idtools.MkdirAs(lowerDir, 0700, 0, 0)
_ = idtools.MkdirAs(lowerMappedDir, 0700, 0, 0)
_ = idtools.MkdirAs(upperDir, 0700, 0, 0)
_ = idtools.MkdirAs(workDir, 0700, 0, 0)

idmap := []idtools.IDMap{
{
ContainerID: 0,
HostID: 0,
Size: 1,
},
}
pid, cleanupFunc, err := createUsernsProcess(idmap, idmap)
if err != nil {
return false, err
}
defer cleanupFunc()

if err := createIDMappedMount(lowerDir, lowerMappedDir, int(pid)); err != nil {
return false, errors.Wrapf(err, "create mapped mount")
}
defer unix.Unmount(lowerMappedDir, unix.MNT_DETACH)

opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerMappedDir, upperDir, workDir)
flags := uintptr(0)
if err := unix.Mount("overlay", mergedDir, "overlay", flags, opts); err != nil {
return false, err
}
defer func() {
_ = unix.Unmount(mergedDir, unix.MNT_DETACH)
}()
return true, nil
}
160 changes: 160 additions & 0 deletions drivers/overlay/idmapped_utils.go
@@ -0,0 +1,160 @@
//go:build linux
// +build linux

package overlay

import (
"fmt"
"io/ioutil"
"os"
"syscall"
"unsafe"

"github.com/containers/storage/pkg/idtools"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

type attr struct {
attrSet uint64
attrClr uint64
propagation uint64
userNs uint64
}

const (
// _MOUNT_ATTR_IDMAP - Idmap mount to @userns_fd in struct mount_attr
_MOUNT_ATTR_IDMAP = 0x00100000 //nolint:golint

// _OPEN_TREE_CLONE - Clone the source path mount
_OPEN_TREE_CLONE = 0x00000001 //nolint:golint

// _MOVE_MOUNT_F_EMPTY_PATH - Move the path referenced by the fd
_MOVE_MOUNT_F_EMPTY_PATH = 0x00000004 //nolint:golint
)

// openTree is a wrapper for the open_tree syscall
func openTree(path string, flags int) (fd int, err error) {
var _p0 *byte

if _p0, err = syscall.BytePtrFromString(path); err != nil {
return 0, err
}

r, _, e1 := syscall.Syscall6(uintptr(unix.SYS_OPEN_TREE), uintptr(0), uintptr(unsafe.Pointer(_p0)),
uintptr(flags), 0, 0, 0)
if e1 != 0 {
err = e1
}
return int(r), nil
}

// moveMount is a wrapper for the the move_mount syscall.
func moveMount(fdTree int, target string) (err error) {
var _p0, _p1 *byte

empty := ""

if _p0, err = syscall.BytePtrFromString(target); err != nil {
return err
}
if _p1, err = syscall.BytePtrFromString(empty); err != nil {
return err
}

flags := _MOVE_MOUNT_F_EMPTY_PATH

_, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOVE_MOUNT),
uintptr(fdTree), uintptr(unsafe.Pointer(_p1)),
0, uintptr(unsafe.Pointer(_p0)), uintptr(flags), 0)
if e1 != 0 {
err = e1
}
return
}

// mountSetAttr is a wrapper for the mount_setattr syscall
func mountSetAttr(dfd int, path string, flags uint, attr *attr, size uint) (err error) {
var _p0 *byte

if _p0, err = syscall.BytePtrFromString(path); err != nil {
return err
}

_, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOUNT_SETATTR), uintptr(dfd), uintptr(unsafe.Pointer(_p0)),
uintptr(flags), uintptr(unsafe.Pointer(attr)), uintptr(size), 0)
if e1 != 0 {
err = e1
}
return
}

// createIDMappedMount creates a IDMapped bind mount from SOURCE to TARGET using the user namespace
// for the PID process.
func createIDMappedMount(source, target string, pid int) error {
path := fmt.Sprintf("/proc/%d/ns/user", pid)
userNsFile, err := os.Open(path)
if err != nil {
return errors.Wrapf(err, "unable to get user ns file descriptor for %q", path)
}

var attr attr
attr.attrSet = _MOUNT_ATTR_IDMAP
attr.attrClr = 0
attr.propagation = 0
attr.userNs = uint64(userNsFile.Fd())

defer userNsFile.Close()

targetDirFd, err := openTree(source, _OPEN_TREE_CLONE|unix.AT_RECURSIVE)
if err != nil {
return err
}
defer unix.Close(targetDirFd)

if err := mountSetAttr(targetDirFd, "", unix.AT_EMPTY_PATH|unix.AT_RECURSIVE,
&attr, uint(unsafe.Sizeof(attr))); err != nil {
return err
}
if err := os.Mkdir(target, 0700); err != nil && !os.IsExist(err) {
return err
}
return moveMount(targetDirFd, target)
}

// createUsernsProcess forks the current process and creates a user namespace using the specified
// mappings. It returns the pid of the new process.
func createUsernsProcess(uidMaps []idtools.IDMap, gidMaps []idtools.IDMap) (int, func(), error) {
pid, _, err := syscall.Syscall6(uintptr(unix.SYS_CLONE), unix.CLONE_NEWUSER|uintptr(unix.SIGCHLD), 0, 0, 0, 0, 0)
if err != 0 {
return -1, nil, err
}
if pid == 0 {
_ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
// just wait for the SIGKILL
for {
syscall.Syscall6(uintptr(unix.SYS_PAUSE), 0, 0, 0, 0, 0, 0)
}
}
cleanupFunc := func() {
unix.Kill(int(pid), unix.SIGKILL)
_, _ = unix.Wait4(int(pid), nil, 0, nil)
}
writeMappings := func(fname string, idmap []idtools.IDMap) error {
mappings := ""
for _, m := range idmap {
mappings = mappings + fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size)
}
return ioutil.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600)
}
if err := writeMappings("uid_map", uidMaps); err != nil {
cleanupFunc()
return -1, nil, err
}
if err := writeMappings("gid_map", gidMaps); err != nil {
cleanupFunc()
return -1, nil, err
}

return int(pid), cleanupFunc, nil
}
112 changes: 111 additions & 1 deletion drivers/overlay/overlay.go
Expand Up @@ -121,6 +121,8 @@ type Driver struct {
supportsVolatile *bool
usingMetacopy bool
locker *locker.Locker

supportsIDMappedMounts *bool
}

type additionalLayerStore struct {
Expand Down Expand Up @@ -205,6 +207,26 @@ func checkSupportVolatile(home, runhome string) (bool, error) {
return usingVolatile, nil
}

// checkAndRecordIDMappedSupport checks and stores if the kernel supports mounting overlay on top of a
// idmapped lower layer.
func checkAndRecordIDMappedSupport(home, runhome string) (bool, error) {
feature := "idmapped-lower-dir"
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
if err == nil {
if overlayCacheResult {
logrus.Debugf("Cached value indicated that overlay is supported")
return true, nil
}
logrus.Debugf("Cached value indicated that overlay is not supported")
return false, errors.New(overlayCacheText)
}
supportsIDMappedMounts, err := supportsIdmappedLowerLayers(home)
if err2 := cachedFeatureRecord(runhome, feature, supportsIDMappedMounts, ""); err2 != nil {
return false, errors.Wrap(err2, "recording overlay idmapped mounts support status")
}
return supportsIDMappedMounts, err
}

func checkAndRecordOverlaySupport(fsMagic graphdriver.FsMagic, home, runhome string) (bool, error) {
var supportsDType bool

Expand Down Expand Up @@ -1485,6 +1507,51 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}

if d.supportsIDmappedMounts() && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 {
var newAbsDir []string
mappedRoot := filepath.Join(d.home, id, "mapped")
if err := os.MkdirAll(mappedRoot, 0700); err != nil {
return "", err
}

pid, cleanupFunc, err := createUsernsProcess(options.UidMaps, options.GidMaps)
if err != nil {
return "", err
}
defer cleanupFunc()

idMappedMounts := make(map[string]string)

// rewrite the lower dirs to their idmapped mount.
c := 0
for _, absLower := range absLowers {
mappedMountSrc := getMappedMountRoot(absLower)

root, found := idMappedMounts[mappedMountSrc]
if !found {
root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c))
c++
if err := createIDMappedMount(mappedMountSrc, root, int(pid)); err != nil {
return "", errors.Wrapf(err, "create mapped mount for %q on %q", mappedMountSrc, root)
}
idMappedMounts[mappedMountSrc] = root

// overlay takes a reference on the mount, so it is safe to unmount
// the mapped idmounts as soon as the final overlay file system is mounted.
defer unix.Unmount(root, unix.MNT_DETACH)
}

// relative path to the layer through the id mapped mount
rel, err := filepath.Rel(mappedMountSrc, absLower)
if err != nil {
return "", err
}

newAbsDir = append(newAbsDir, filepath.Join(root, rel))
}
absLowers = newAbsDir
}

var opts string
if readWrite {
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), diffDir, workdir)
Expand Down Expand Up @@ -1587,6 +1654,18 @@ func (d *Driver) Put(id string) error {

unmounted := false

mappedRoot := filepath.Join(d.home, id, "mapped")
// It should not happen, but cleanup any mapped mount if it was leaked.
if _, err := os.Stat(mappedRoot); err == nil {
mounts, err := ioutil.ReadDir(mappedRoot)
if err == nil {
// Go through all of the mapped mounts.
for _, m := range mounts {
_ = unix.Unmount(filepath.Join(mappedRoot, m.Name()), unix.MNT_DETACH)
}
}
}

if d.options.mountProgram != "" {
// Attempt to unmount the FUSE mount using either fusermount or fusermount3.
// If they fail, fallback to unix.Unmount
Expand Down Expand Up @@ -1958,12 +2037,31 @@ func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMapp
return nil
}

// supportsIDmappedMounts returns whether the kernel supports using idmapped mounts with
// overlay lower layers.
func (d *Driver) supportsIDmappedMounts() bool {
if d.supportsIDMappedMounts != nil {
return *d.supportsIDMappedMounts
}

supportsIDMappedMounts, err := checkAndRecordIDMappedSupport(d.home, d.runhome)
d.supportsIDMappedMounts = &supportsIDMappedMounts
if err == nil {
return supportsIDMappedMounts
}
logrus.Debugf("Check for idmapped mounts support %v", err)
return false
}

// SupportsShifting tells whether the driver support shifting of the UIDs/GIDs in an userNS
func (d *Driver) SupportsShifting() bool {
if os.Getenv("_TEST_FORCE_SUPPORT_SHIFTING") == "yes-please" {
return true
}
return d.options.mountProgram != ""
if d.options.mountProgram != "" {
return true
}
return d.supportsIDmappedMounts()
}

// dumbJoin is more or less a dumber version of filepath.Join, but one which
Expand Down Expand Up @@ -2132,3 +2230,15 @@ func redirectDiffIfAdditionalLayer(diffPath string) (string, error) {
}
return diffPath, nil
}

// getMappedMountRoot is a heuristic that calculates the parent directory where
// the idmapped mount should be applied.
// It is useful to minimize the number of idmapped mounts and at the same time use
// a common path as long as possible to reduce the length of the mount data argument.
func getMappedMountRoot(path string) string {
dirName := filepath.Dir(path)
if filepath.Base(dirName) == linkDir {
return filepath.Dir(dirName)
}
return dirName
}

0 comments on commit 4203c21

Please sign in to comment.