Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Bacto committed Feb 5, 2024
2 parents c263e73 + 0212048 commit aecae3e
Show file tree
Hide file tree
Showing 14 changed files with 330 additions and 118 deletions.
31 changes: 16 additions & 15 deletions libcontainer/cgroups/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ var (
// TestMode is set to true by unit tests that need "fake" cgroupfs.
TestMode bool

cgroupFd int = -1
prepOnce sync.Once
prepErr error
resolveFlags uint64
cgroupRootHandle *os.File
prepOnce sync.Once
prepErr error
resolveFlags uint64
)

func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
Expand All @@ -86,15 +86,16 @@ func prepareOpenat2() error {
}
return
}
file := os.NewFile(uintptr(fd), cgroupfsDir)

var st unix.Statfs_t
if err = unix.Fstatfs(fd, &st); err != nil {
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
logrus.Warnf("falling back to securejoin: %s", prepErr)
return
}

cgroupFd = fd

cgroupRootHandle = file
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
if st.Type == unix.CGROUP2_SUPER_MAGIC {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
Expand All @@ -121,29 +122,29 @@ func openFile(dir, file string, flags int) (*os.File, error) {
return openFallback(path, flags, mode)
}

fd, err := unix.Openat2(cgroupFd, relPath,
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
&unix.OpenHow{
Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC,
Mode: uint64(mode),
})
if err != nil {
err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir
// Check if cgroupRootHandle is still opened to cgroupfsDir
// (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted).
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2.
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(cgroupFd))
// to reopen cgroupRootHandle and retry openat2.
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
defer closer()
fdDest, _ := os.Readlink(fdPath)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd
// Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %d unexpectedly opened to %s != %s: %w",
cgroupFd, fdDest, cgroupfsDir, err)
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
}
return nil, err
}
Expand Down
9 changes: 9 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,15 @@ func (c *Container) start(process *Process) (retErr error) {
}()
}

// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
// to make sure we don't leak any files into "runc init". Any files to be
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
// runtime and thus their O_CLOEXEC flag will be cleared. This is some
// additional protection against attacks like CVE-2024-21626, by making
// sure we never leak files to "runc init" we didn't intend to.
if err := utils.CloseExecFrom(3); err != nil {
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
}
if err := parent.start(); err != nil {
return fmt.Errorf("unable to start container process: %w", err)
}
Expand Down
14 changes: 13 additions & 1 deletion libcontainer/dmz/_dmz.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#ifdef RUNC_USE_STDLIB
# include <linux/limits.h>
# include <stdio.h>
# include <string.h>
# include <unistd.h>
#else
# include "xstat.h"
Expand All @@ -11,5 +14,14 @@ int main(int argc, char **argv)
{
if (argc < 1)
return 127;
return execve(argv[0], argv, environ);
int ret = execve(argv[0], argv, environ);
if (ret) {
/* NOTE: This error message format MUST match Go's format. */
char err_msg[5 + PATH_MAX + 1] = "exec "; // "exec " + argv[0] + '\0'
strncat(err_msg, argv[0], PATH_MAX);
err_msg[sizeof(err_msg) - 1] = '\0';

perror(err_msg);
}
return ret;
}
61 changes: 49 additions & 12 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net"
"os"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
Expand Down Expand Up @@ -89,7 +90,7 @@ func Init() {
}
// Normally, StartInitialization() never returns, meaning
// if we are here, it had failed.
os.Exit(1)
os.Exit(255)
}

// Normally, this function does not return. If it returns, with or without an
Expand All @@ -107,7 +108,11 @@ func startInitialization() (retErr error) {

defer func() {
// If this defer is ever called, this means initialization has failed.
// Send the error back to the parent process in the form of an initError.
// Send the error back to the parent process in the form of an initError
// if the sync socket has not been closed.
if syncPipe.isClosed() {
return
}
ierr := initError{Message: retErr.Error()}
if err := writeSyncArg(syncPipe, procError, ierr); err != nil {
fmt.Fprintln(os.Stderr, err)
Expand Down Expand Up @@ -137,24 +142,26 @@ func startInitialization() (retErr error) {
logrus.SetLevel(logrus.Level(logLevel))
}

logFD, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
logFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
}
logPipe := os.NewFile(uintptr(logFd), "logpipe")

logrus.SetOutput(os.NewFile(uintptr(logFD), "logpipe"))
logrus.SetOutput(logPipe)
logrus.SetFormatter(new(logrus.JSONFormatter))
logrus.Debug("child process in init()")

// Only init processes have FIFOFD.
fifofd := -1
var fifoFile *os.File
envInitType := os.Getenv("_LIBCONTAINER_INITTYPE")
it := initType(envInitType)
if it == initStandard {
envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD")
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
fifoFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_FIFOFD"))
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err)
}
fifoFile = os.NewFile(uintptr(fifoFd), "initfifo")
}

var consoleSocket *os.File
Expand Down Expand Up @@ -208,10 +215,10 @@ func startInitialization() (retErr error) {
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe)
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe)
}

func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File) error {
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe *os.File) error {
if err := populateProcessEnvironment(config.Env); err != nil {
return err
}
Expand All @@ -223,7 +230,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
consoleSocket: consoleSocket,
pidfdSocket: pidfdSocket,
config: config,
logFd: logFd,
logPipe: logPipe,
dmzExe: dmzExe,
}
return i.Init()
Expand All @@ -234,8 +241,8 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
pidfdSocket: pidfdSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
logFd: logFd,
fifoFile: fifoFile,
logPipe: logPipe,
dmzExe: dmzExe,
}
return i.Init()
Expand Down Expand Up @@ -268,6 +275,32 @@ func populateProcessEnvironment(env []string) error {
return nil
}

// verifyCwd ensures that the current directory is actually inside the mount
// namespace root of the current process.
func verifyCwd() error {
// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
// current mount namespace root, and in that case prefixes "(unreachable)"
// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
// when this happens and return ENOENT rather than returning a non-absolute
// path. In both cases we can therefore easily detect if we have an invalid
// cwd by checking the return value of getcwd(3). See getcwd(3) for more
// details, and CVE-2024-21626 for the security issue that motivated this
// check.
//
// We have to use unix.Getwd() here because os.Getwd() has a workaround for
// $PWD which involves doing stat(.), which can fail if the current
// directory is inaccessible to the container process.
if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
} else if err != nil {
return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
} else if !filepath.IsAbs(wd) {
// We shouldn't ever hit this, but check just in case.
return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
}
return nil
}

// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
Expand Down Expand Up @@ -326,6 +359,10 @@ func finalizeNamespace(config *initConfig) error {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
}
}
// Make sure our final working directory is inside the container.
if err := verifyCwd(); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("unable to clear keep caps: %w", err)
}
Expand Down
20 changes: 10 additions & 10 deletions libcontainer/integration/seccomp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
libseccomp "github.com/seccomp/libseccomp-golang"
)

func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
func TestSeccompDenySyslogWithErrno(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
ErrnoRet: &errnoRet,
},
Expand All @@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: No such process"
expected := "dmesg: klogctl: No such process"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}

func TestSeccompDenyGetcwd(t *testing.T) {
func TestSeccompDenySyslog(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
},
},
Expand All @@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: Operation not permitted"
expected := "dmesg: klogctl: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
Expand Down
6 changes: 3 additions & 3 deletions libcontainer/mount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ func mountViaFds(source string, srcFile *mountSource, target, dstFd, fstype stri
// mount(2), we need to get a safe handle to /proc/thread-self. This
// isn't needed for move_mount(2) because in that case the path is just
// a dummy string used for error info.
fdStr := strconv.Itoa(int(srcFile.file.Fd()))
srcFileFd := srcFile.file.Fd()
if isMoveMount {
src = "/proc/self/fd/" + fdStr
src = "/proc/self/fd/" + strconv.Itoa(int(srcFileFd))
} else {
var closer utils.ProcThreadSelfCloser
src, closer = utils.ProcThreadSelf("fd/" + fdStr)
src, closer = utils.ProcThreadSelfFd(srcFileFd)
defer closer()
}
}
Expand Down

0 comments on commit aecae3e

Please sign in to comment.