diff --git a/drivers/overlay/check.go b/drivers/overlay/check.go index 44b3515a85..ad9238ae98 100644 --- a/drivers/overlay/check.go +++ b/drivers/overlay/check.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package overlay @@ -11,6 +12,7 @@ import ( "syscall" "github.com/containers/storage/pkg/archive" + "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/ioutils" "github.com/containers/storage/pkg/mount" "github.com/containers/storage/pkg/system" @@ -218,3 +220,78 @@ func doesVolatile(d string) (bool, error) { }() return true, nil } + +// supportsIdmappedLowerLayers checks if the kernel supports mounting overlay on top of +// a idmapped lower layer. +func supportsIdmappedLowerLayers(home string) (bool, error) { + layerDir, err := ioutil.TempDir(home, "compat") + if err != nil { + return false, err + } + + mergedDir := filepath.Join(layerDir, "merged") + lowerDir := filepath.Join(layerDir, "lower") + lowerMappedDir := filepath.Join(layerDir, "lower-mapped") + upperDir := filepath.Join(layerDir, "upper") + workDir := filepath.Join(layerDir, "work") + + defer func() { + _ = unix.Unmount(mergedDir, unix.MNT_DETACH) + _ = os.RemoveAll(layerDir) + }() + + _ = idtools.MkdirAs(mergedDir, 0700, 0, 0) + _ = idtools.MkdirAs(lowerDir, 0700, 0, 0) + _ = idtools.MkdirAs(lowerMappedDir, 0700, 0, 0) + _ = idtools.MkdirAs(upperDir, 0700, 0, 0) + _ = idtools.MkdirAs(workDir, 0700, 0, 0) + + idmap := []idtools.IDMap{ + idtools.IDMap{ContainerID: 0, + HostID: 0, + Size: 1, + }, + } + pid, err := createUsernsProcess(idmap, idmap) + if err != nil { + return false, err + } + defer func() { + unix.Kill(int(pid), unix.SIGKILL) + _, _ = unix.Wait4(int(pid), nil, 0, nil) + }() + + if err := getIDMappedMount(lowerDir, lowerMappedDir, int(pid)); err != nil { + return false, errors.Wrapf(err, "create mapped mount") + } + + opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerMappedDir, upperDir, workDir) + flags := uintptr(0) + if err := unix.Mount("overlay", mergedDir, "overlay", flags, opts); err != nil { + return false, err + } + return true, nil +} + +// checkAndRecordIdMappedSupport checks and stores if the kernel supports mounting overlay on top of a +// idmapped lower layer. +func checkAndRecordIdMappedSupport(home, runhome string) (bool, error) { + feature := "idmapped-lower-dir" + overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature) + if err == nil { + if overlayCacheResult { + logrus.Debugf("Cached value indicated that overlay is supported") + } else { + logrus.Debugf("Cached value indicated that overlay is not supported") + } + if !overlayCacheResult { + return false, errors.New(overlayCacheText) + } + return true, nil + } + supportsIdMappedMounts, err := supportsIdmappedLowerLayers(home) + if err2 := cachedFeatureRecord(runhome, feature, supportsIdMappedMounts, ""); err2 != nil { + return false, errors.Wrap(err2, "recording overlay idmapped mounts support status") + } + return supportsIdMappedMounts, err +} diff --git a/drivers/overlay/idmapped_utils.go b/drivers/overlay/idmapped_utils.go new file mode 100644 index 0000000000..96e497d3d1 --- /dev/null +++ b/drivers/overlay/idmapped_utils.go @@ -0,0 +1,182 @@ +//go:build linux +// +build linux + +package overlay + +import ( + "fmt" + "io/ioutil" + "os" + "syscall" + "unsafe" + + "github.com/containers/storage/pkg/idtools" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +type attr struct { + attrSet uint64 + attrClr uint64 + propagation uint64 + userNs uint64 +} + +const ( + // MOUNT_ATTR_RDONLY - Mount read-only + MOUNT_ATTR_RDONLY = 0x00000001 //nolint:golint + // MOUNT_ATTR_NOSUID - Ignore suid and sgid bits + MOUNT_ATTR_NOSUID = 0x00000002 //nolint:golint + // MOUNT_ATTR_NODEV - Disallow access to device special files + MOUNT_ATTR_NODEV = 0x00000004 //nolint:golint + // MOUNT_ATTR_NOEXEC - Disallow program execution + MOUNT_ATTR_NOEXEC = 0x00000008 //nolint:golint + // MOUNT_ATTR__ATIME - Setting on how atime should be updated + MOUNT_ATTR__ATIME = 0x00000070 //nolint:golint + // MOUNT_ATTR_RELATIME - Update atime relative to mtime/ctime + MOUNT_ATTR_RELATIME = 0x00000000 //nolint:golint + // MOUNT_ATTR_NOATIME - Do not update access times + MOUNT_ATTR_NOATIME = 0x00000010 //nolint:golint + // MOUNT_ATTR_STRICTATIME - Always perform atime updates + MOUNT_ATTR_STRICTATIME = 0x00000020 //nolint:golint + // MOUNT_ATTR_NODIRATIME - Do not update directory access times + MOUNT_ATTR_NODIRATIME = 0x00000080 //nolint:golint + // MOUNT_ATTR_IDMAP - Idmap mount to @userns_fd in struct mount_attr + MOUNT_ATTR_IDMAP = 0x00100000 //nolint:golint + + // OPEN_TREE_CLONE - Clone the source path mount + OPEN_TREE_CLONE = 0x00000001 //nolint:golint + + // MOVE_MOUNT_F_EMPTY_PATH - Move the path referenced by the fd + MOVE_MOUNT_F_EMPTY_PATH = 0x00000004 //nolint:golint + + // AT_RECURSIVE applies the operation to the entire subtree. + AT_RECURSIVE = 0x8000 //nolint:golint +) + +// openTree is a wrapper for the open_tree syscall +func openTree(path string, flags int) (fd int, err error) { + var _p0 *byte + + if _p0, err = syscall.BytePtrFromString(path); err != nil { + return 0, err + } + + r, _, e1 := syscall.Syscall6(uintptr(unix.SYS_OPEN_TREE), uintptr(0), uintptr(unsafe.Pointer(_p0)), + uintptr(flags), 0, 0, 0) + if e1 != 0 { + err = e1 + } + return int(r), nil +} + +// moveMount is a wrapper for the the move_mount syscall. +func moveMount(fdTree int, target string) (err error) { + var _p0, _p1 *byte + + empty := "" + + if _p0, err = syscall.BytePtrFromString(target); err != nil { + return err + } + if _p1, err = syscall.BytePtrFromString(empty); err != nil { + return err + } + + flags := MOVE_MOUNT_F_EMPTY_PATH + + _, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOVE_MOUNT), + uintptr(fdTree), uintptr(unsafe.Pointer(_p1)), + 0, uintptr(unsafe.Pointer(_p0)), uintptr(flags), 0) + if e1 != 0 { + err = e1 + } + return +} + +// mountSetAttr is a wrapper for the mount_setattr syscall +func mountSetAttr(dfd int, path string, flags uint, attr *attr, size uint) (err error) { + var _p0 *byte + + if _p0, err = syscall.BytePtrFromString(path); err != nil { + return err + } + + _, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOUNT_SETATTR), uintptr(dfd), uintptr(unsafe.Pointer(_p0)), + uintptr(flags), uintptr(unsafe.Pointer(attr)), uintptr(size), 0) + if e1 != 0 { + err = e1 + } + return +} + +// getIDMappedMount creates a IDMapped bind mount from SOURCE to TARGET using the user namespace +// for the PID process. +func getIDMappedMount(source, target string, pid int) error { + path := fmt.Sprintf("/proc/%d/ns/user", pid) + userNsFile, err := os.Open(path) + if err != nil { + return errors.Wrapf(err, "unable to get user ns file descriptor for %q", path) + } + + var attr attr + attr.attrSet = MOUNT_ATTR_IDMAP + attr.attrClr = 0 + attr.propagation = 0 + attr.userNs = uint64(userNsFile.Fd()) + + defer userNsFile.Close() + + targetDirFd, err := openTree(source, OPEN_TREE_CLONE|AT_RECURSIVE) + if err != nil { + return err + } + defer unix.Close(targetDirFd) + + if err := mountSetAttr(targetDirFd, "", unix.AT_EMPTY_PATH|AT_RECURSIVE, + &attr, uint(unsafe.Sizeof(attr))); err != nil { + return err + } + + return moveMount(targetDirFd, target) +} + +// createUsernsProcess forks the current process and creates a user namespace using the specified +// mappings. It returns the pid of the new process. +func createUsernsProcess(uidMaps []idtools.IDMap, gidMaps []idtools.IDMap) (int, func(), error) { + pid, _, err := syscall.Syscall6(uintptr(unix.SYS_CLONE), unix.CLONE_NEWUSER|uintptr(unix.SIGCHLD), 0, 0, 0, 0, 0) + if err != 0 { + return -1, nil, err + } + if pid == 0 { + _ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0) + // just wait for the SIGKILL + for { + syscall.Syscall6(uintptr(unix.SYS_PAUSE), 0, 0, 0, 0, 0, 0) + } + } + cleanupFunc := func() { + unix.Kill(int(pid), unix.SIGKILL) + _, _ = unix.Wait4(int(pid), nil, 0, nil) + } + writeMappings := func(fname string, idmap []idtools.IDMap) error { + mappings := "" + for _, m := range idmap { + mappings = mappings + fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size) + } + if err := ioutil.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600); err != nil { + return err + } + return nil + } + if err := writeMappings("uid_map", uidMaps); err != nil { + cleanupFunc() + return -1, nil, err + } + if err := writeMappings("gid_map", gidMaps); err != nil { + cleanupFunc() + return -1, nil, err + } + + return int(pid), cleanupFunc, nil +} diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index a780ef5da3..b22f28ed31 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -1485,6 +1485,40 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } } + if len(options.UidMaps) > 0 && len(options.GidMaps) > 0 { + var newAbsDir []string + mappedRoot := filepath.Join(d.home, id, "mapped") + if err := os.MkdirAll(mappedRoot, 0700); err != nil { + return "", err + } + + pid, err := createUsernsProcess(options.UidMaps, options.GidMaps) + if err != nil { + return "", err + } + defer func() { + unix.Kill(int(pid), unix.SIGKILL) + _, _ = unix.Wait4(int(pid), nil, 0, nil) + }() + + // rewrite the lower dirs to their idmapped mount. + for c, absLower := range absLowers { + to := filepath.Join(mappedRoot, fmt.Sprintf("%d", c)) + if err := os.Mkdir(to, 0700); err != nil && !os.IsExist(err) { + return "", err + } + if err := getIDMappedMount(absLower, to, int(pid)); err != nil { + return "", errors.Wrapf(err, "create mapped mount") + } + // overlay takes a reference on the mount, so it is safe to unmount + // the mapped idmounts as soon as the final overlay file system is mounted. + defer unix.Unmount(to, unix.MNT_DETACH) + + newAbsDir = append(newAbsDir, to) + } + absLowers = newAbsDir + } + var opts string if readWrite { opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), diffDir, workdir) @@ -1963,7 +1997,15 @@ func (d *Driver) SupportsShifting() bool { if os.Getenv("_TEST_FORCE_SUPPORT_SHIFTING") == "yes-please" { return true } - return d.options.mountProgram != "" + if d.options.mountProgram != "" { + return true + } + supportsIdMappedMounts, err := checkAndRecordIdMappedSupport(d.home, d.runhome) + if err == nil { + return supportsIdMappedMounts + } + logrus.Debugf("Check for idmapped mounts support %v", err) + return false } // dumbJoin is more or less a dumber version of filepath.Join, but one which