Skip to content

Commit

Permalink
Merge pull request #2951 from cyphar/cgroup2-devices-cleanup
Browse files Browse the repository at this point in the history
cgroup2: devices filtering cleanup
  • Loading branch information
AkihiroSuda committed May 24, 2021
2 parents 211acee + 00119c8 commit e051128
Show file tree
Hide file tree
Showing 12 changed files with 283 additions and 142 deletions.
9 changes: 9 additions & 0 deletions libcontainer/cgroups/devices/devices_emulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,12 @@ func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
}
return transitionRules, nil
}

// Rules returns the minimum set of rules necessary to convert a *deny-all*
// cgroup to the emulated filter state (note that this is not the same as a
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
// wrapper around Transition() with the source emulator being an empty cgroup.
func (e *Emulator) Rules() ([]*devices.Rule, error) {
defaultCgroup := &Emulator{defaultAllow: false}
return defaultCgroup.Transition(e)
}
106 changes: 67 additions & 39 deletions libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"

"github.com/cilium/ebpf/asm"
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
Expand All @@ -22,11 +23,44 @@ const (
)

// DeviceFilter returns eBPF device filter program and its license string
func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
p := &program{}
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
// Generate the minimum ruleset for the device rules we are given. While we
// don't care about minimum transitions in cgroupv2, using the emulator
// gives us a guarantee that the behaviour of devices filtering is the same
// as cgroupv1, including security hardenings to avoid misconfiguration
// (such as punching holes in wildcard rules).
emu := new(devicesemulator.Emulator)
for _, rule := range rules {
if err := emu.Apply(*rule); err != nil {
return nil, "", err
}
}
cleanRules, err := emu.Rules()
if err != nil {
return nil, "", err
}

p := &program{
defaultAllow: emu.IsBlacklist(),
}
p.init()
for i := len(devices) - 1; i >= 0; i-- {
if err := p.appendDevice(devices[i]); err != nil {

for idx, rule := range cleanRules {
if rule.Type == devices.WildcardDevice {
// We can safely skip over wildcard entries because there should
// only be one (at most) at the very start to instruct cgroupv1 to
// go into allow-list mode. However we do double-check this here.
if idx != 0 || rule.Allow != emu.IsBlacklist() {
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
}
continue
}
if rule.Allow == p.defaultAllow {
// There should be no rules which have an action equal to the
// default action, the emulator removes those.
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
}
if err := p.appendRule(rule); err != nil {
return nil, "", err
}
}
Expand All @@ -35,9 +69,9 @@ func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
}

type program struct {
insts asm.Instructions
hasWildCard bool
blockID int
insts asm.Instructions
defaultAllow bool
blockID int
}

func (p *program) init() {
Expand Down Expand Up @@ -67,39 +101,36 @@ func (p *program) init() {
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
}

// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
func (p *program) appendDevice(dev *devices.Rule) error {
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
// to the in-progress filter program. In order to operate properly, it must be
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
// with any "a" rules removed).
func (p *program) appendRule(rule *devices.Rule) error {
if p.blockID < 0 {
return errors.New("the program is finalized")
}
if p.hasWildCard {
// All entries after wildcard entry are ignored
return nil
}

bpfType := int32(-1)
hasType := true
switch dev.Type {
case 'c':
switch rule.Type {
case devices.CharDevice:
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
case 'b':
case devices.BlockDevice:
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
case 'a':
hasType = false
default:
// if not specified in OCI json, typ is set to DeviceTypeAll
return errors.Errorf("invalid Type %q", string(dev.Type))
// We do not permit 'a', nor any other types we don't know about.
return errors.Errorf("invalid Type %q", string(rule.Type))
}
if dev.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", dev.Major)
if rule.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", rule.Major)
}
if dev.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", dev.Major)
if rule.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", rule.Major)
}
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := dev.Minor >= 0
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := rule.Minor >= 0
bpfAccess := int32(0)
for _, r := range dev.Permissions {
for _, r := range rule.Permissions {
switch r {
case 'r':
bpfAccess |= unix.BPF_DEVCG_ACC_READ
Expand Down Expand Up @@ -136,42 +167,39 @@ func (p *program) appendDevice(dev *devices.Rule) error {
if hasMajor {
p.insts = append(p.insts,
// if (R4 != major) goto next
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
)
}
if hasMinor {
p.insts = append(p.insts,
// if (R5 != minor) goto next
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
)
}
if !hasType && !hasAccess && !hasMajor && !hasMinor {
p.hasWildCard = true
}
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
// set blockSym to the first instruction we added in this iteration
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
p.blockID++
return nil
}

func (p *program) finalize() (asm.Instructions, error) {
if p.hasWildCard {
// acceptBlock with asm.Return() is already inserted
return p.insts, nil
var v int32
if p.defaultAllow {
v = 1
}
blockSym := "block-" + strconv.Itoa(p.blockID)
p.insts = append(p.insts,
// R0 <- 0
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
// R0 <- v
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
asm.Return(),
)
p.blockID = -1
return p.insts, nil
}

func acceptBlock(accept bool) asm.Instructions {
v := int32(0)
var v int32
if accept {
v = 1
}
Expand Down
128 changes: 64 additions & 64 deletions libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,81 +55,81 @@ block-0:
func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
expected := `
// load parameters into registers
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
1: And32Imm dst: r2 imm: 65535
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
3: RSh32Imm dst: r3 imm: 16
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
1: And32Imm dst: r2 imm: 65535
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
3: RSh32Imm dst: r3 imm: 16
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
block-0:
// tuntap (c, 10, 200, rwm, allow)
6: JNEImm dst: r2 off: -1 imm: 2 <block-1>
7: JNEImm dst: r4 off: -1 imm: 10 <block-1>
8: JNEImm dst: r5 off: -1 imm: 200 <block-1>
9: Mov32Imm dst: r0 imm: 1
10: Exit
// (b, wildcard, wildcard, m, true)
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
7: Mov32Reg dst: r1 src: r3
8: And32Imm dst: r1 imm: 1
9: JNEReg dst: r1 off: -1 src: r3 <block-1>
10: Mov32Imm dst: r0 imm: 1
11: Exit
block-1:
11: JNEImm dst: r2 off: -1 imm: 2 <block-2>
12: JNEImm dst: r4 off: -1 imm: 5 <block-2>
13: JNEImm dst: r5 off: -1 imm: 2 <block-2>
14: Mov32Imm dst: r0 imm: 1
15: Exit
// (c, wildcard, wildcard, m, true)
12: JNEImm dst: r2 off: -1 imm: 2 <block-2>
13: Mov32Reg dst: r1 src: r3
14: And32Imm dst: r1 imm: 1
15: JNEReg dst: r1 off: -1 src: r3 <block-2>
16: Mov32Imm dst: r0 imm: 1
17: Exit
block-2:
// /dev/pts (c, 136, wildcard, rwm, true)
16: JNEImm dst: r2 off: -1 imm: 2 <block-3>
17: JNEImm dst: r4 off: -1 imm: 136 <block-3>
18: Mov32Imm dst: r0 imm: 1
19: Exit
18: JNEImm dst: r2 off: -1 imm: 2 <block-3>
19: JNEImm dst: r4 off: -1 imm: 1 <block-3>
20: JNEImm dst: r5 off: -1 imm: 3 <block-3>
21: Mov32Imm dst: r0 imm: 1
22: Exit
block-3:
20: JNEImm dst: r2 off: -1 imm: 2 <block-4>
21: JNEImm dst: r4 off: -1 imm: 1 <block-4>
22: JNEImm dst: r5 off: -1 imm: 9 <block-4>
23: Mov32Imm dst: r0 imm: 1
24: Exit
23: JNEImm dst: r2 off: -1 imm: 2 <block-4>
24: JNEImm dst: r4 off: -1 imm: 1 <block-4>
25: JNEImm dst: r5 off: -1 imm: 5 <block-4>
26: Mov32Imm dst: r0 imm: 1
27: Exit
block-4:
25: JNEImm dst: r2 off: -1 imm: 2 <block-5>
26: JNEImm dst: r4 off: -1 imm: 1 <block-5>
27: JNEImm dst: r5 off: -1 imm: 5 <block-5>
28: Mov32Imm dst: r0 imm: 1
29: Exit
28: JNEImm dst: r2 off: -1 imm: 2 <block-5>
29: JNEImm dst: r4 off: -1 imm: 1 <block-5>
30: JNEImm dst: r5 off: -1 imm: 7 <block-5>
31: Mov32Imm dst: r0 imm: 1
32: Exit
block-5:
30: JNEImm dst: r2 off: -1 imm: 2 <block-6>
31: JNEImm dst: r4 off: -1 imm: 5 <block-6>
32: JNEImm dst: r5 off: -1 imm: 0 <block-6>
33: Mov32Imm dst: r0 imm: 1
34: Exit
33: JNEImm dst: r2 off: -1 imm: 2 <block-6>
34: JNEImm dst: r4 off: -1 imm: 1 <block-6>
35: JNEImm dst: r5 off: -1 imm: 8 <block-6>
36: Mov32Imm dst: r0 imm: 1
37: Exit
block-6:
35: JNEImm dst: r2 off: -1 imm: 2 <block-7>
36: JNEImm dst: r4 off: -1 imm: 1 <block-7>
37: JNEImm dst: r5 off: -1 imm: 7 <block-7>
38: Mov32Imm dst: r0 imm: 1
39: Exit
38: JNEImm dst: r2 off: -1 imm: 2 <block-7>
39: JNEImm dst: r4 off: -1 imm: 1 <block-7>
40: JNEImm dst: r5 off: -1 imm: 9 <block-7>
41: Mov32Imm dst: r0 imm: 1
42: Exit
block-7:
40: JNEImm dst: r2 off: -1 imm: 2 <block-8>
41: JNEImm dst: r4 off: -1 imm: 1 <block-8>
42: JNEImm dst: r5 off: -1 imm: 8 <block-8>
43: Mov32Imm dst: r0 imm: 1
44: Exit
43: JNEImm dst: r2 off: -1 imm: 2 <block-8>
44: JNEImm dst: r4 off: -1 imm: 5 <block-8>
45: JNEImm dst: r5 off: -1 imm: 0 <block-8>
46: Mov32Imm dst: r0 imm: 1
47: Exit
block-8:
45: JNEImm dst: r2 off: -1 imm: 2 <block-9>
46: JNEImm dst: r4 off: -1 imm: 1 <block-9>
47: JNEImm dst: r5 off: -1 imm: 3 <block-9>
48: Mov32Imm dst: r0 imm: 1
49: Exit
48: JNEImm dst: r2 off: -1 imm: 2 <block-9>
49: JNEImm dst: r4 off: -1 imm: 5 <block-9>
50: JNEImm dst: r5 off: -1 imm: 2 <block-9>
51: Mov32Imm dst: r0 imm: 1
52: Exit
block-9:
// (b, wildcard, wildcard, m, true)
50: JNEImm dst: r2 off: -1 imm: 1 <block-10>
51: Mov32Reg dst: r1 src: r3
52: And32Imm dst: r1 imm: 1
53: JNEReg dst: r1 off: -1 src: r3 <block-10>
54: Mov32Imm dst: r0 imm: 1
55: Exit
// tuntap (c, 10, 200, rwm, allow)
53: JNEImm dst: r2 off: -1 imm: 2 <block-10>
54: JNEImm dst: r4 off: -1 imm: 10 <block-10>
55: JNEImm dst: r5 off: -1 imm: 200 <block-10>
56: Mov32Imm dst: r0 imm: 1
57: Exit
block-10:
// (c, wildcard, wildcard, m, true)
56: JNEImm dst: r2 off: -1 imm: 2 <block-11>
57: Mov32Reg dst: r1 src: r3
58: And32Imm dst: r1 imm: 1
59: JNEReg dst: r1 off: -1 src: r3 <block-11>
// /dev/pts (c, 136, wildcard, rwm, true)
58: JNEImm dst: r2 off: -1 imm: 2 <block-11>
59: JNEImm dst: r4 off: -1 imm: 136 <block-11>
60: Mov32Imm dst: r0 imm: 1
61: Exit
block-11:
Expand Down

0 comments on commit e051128

Please sign in to comment.