Skip to content

Commit

Permalink
link/perfEvent: split perf event attachment modes
Browse files Browse the repository at this point in the history
Signed-off-by: Mattia Meleleo <mattia.meleleo@elastic.co>
  • Loading branch information
mmat11 authored and lmb committed Mar 11, 2022
1 parent 7cd6868 commit bf256fd
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 111 deletions.
18 changes: 9 additions & 9 deletions link/kprobe.go
Expand Up @@ -99,13 +99,13 @@ func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error
return nil, err
}

err = k.attach(prog)
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}

return k, nil
return lnk, nil
}

// Kretprobe attaches the given eBPF program to a perf event that fires right
Expand All @@ -123,13 +123,13 @@ func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, er
return nil, err
}

err = k.attach(prog)
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}

return k, nil
return lnk, nil
}

// kprobe opens a perf event on the given symbol and attaches prog to it.
Expand Down Expand Up @@ -282,11 +282,11 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {

// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return &perfEvent{
fd: fd,
pmuID: et,
name: args.symbol,
typ: typ.PerfEventType(args.ret),
name: args.symbol,
pmuID: et,
cookie: args.cookie,
fd: fd,
}, nil
}

Expand Down Expand Up @@ -341,12 +341,12 @@ func tracefsProbe(typ probeType, args probeArgs) (*perfEvent, error) {
}

return &perfEvent{
fd: fd,
typ: typ.PerfEventType(args.ret),
group: group,
name: args.symbol,
tracefsID: tid,
typ: typ.PerfEventType(args.ret),
cookie: args.cookie,
fd: fd,
}, nil
}

Expand Down
4 changes: 3 additions & 1 deletion link/link.go
Expand Up @@ -325,11 +325,13 @@ func (l *RawLink) Info() (*Info, error) {
extra = &TracingInfo{}
case XDPType:
extra = &XDPInfo{}
case PerfEventType:
// no extra
default:
return nil, fmt.Errorf("unknown link info type: %d", info.Type)
}

if info.Type != RawTracepointType && info.Type != IterType {
if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType {
buf := bytes.NewReader(info.Extra[:])
err := binary.Read(buf, internal.NativeEndian, extra)
if err != nil {
Expand Down
220 changes: 131 additions & 89 deletions link/perf_event.go
Expand Up @@ -71,6 +71,8 @@ const (
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
type perfEvent struct {
// The event type determines the types of programs that can be attached.
typ perfEventType

// Group and name of the tracepoint/kprobe/uprobe.
group string
Expand All @@ -81,68 +83,18 @@ type perfEvent struct {
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64

// The event type determines the types of programs that can be attached.
typ perfEventType

// User provided arbitrary value.
cookie uint64

// This is the proper perf event FD.
// This is the perf event FD.
fd *sys.FD
// If bpf perf link is available, this is the bpf link FD.
bpfLinkFD *sys.FD
}

func (pe *perfEvent) isLink() {}

func (pe *perfEvent) Pin(string) error {
return fmt.Errorf("pin perf event: %w", ErrNotSupported)
}

func (pe *perfEvent) Unpin() error {
return fmt.Errorf("unpin perf event: %w", ErrNotSupported)
}

// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
// owned by the perf event, which means multiple programs can be attached
// simultaneously.
//
// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
// returns EEXIST.
//
// Detaching a program from a perf event is currently not possible, so a
// program replacement mechanism cannot be implemented for perf events.
func (pe *perfEvent) Update(prog *ebpf.Program) error {
return fmt.Errorf("can't replace eBPF program in perf event: %w", ErrNotSupported)
}

func (pe *perfEvent) Info() (*Info, error) {
return nil, fmt.Errorf("can't get perf event info: %w", ErrNotSupported)
}

func (pe *perfEvent) Close() error {
if pe.fd == nil {
return nil
}

err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_DISABLE, 0)
if err != nil {
return fmt.Errorf("disabling perf event: %w", err)
}

err = pe.fd.Close()
if err != nil {
if err := pe.fd.Close(); err != nil {
return fmt.Errorf("closing perf event fd: %w", err)
}

if pe.bpfLinkFD != nil {
err = pe.bpfLinkFD.Close()
if err != nil {
return fmt.Errorf("closing bpf perf link fd: %w", err)
}
}

switch pe.typ {
case kprobeEvent, kretprobeEvent:
// Clean up kprobe tracefs entry.
Expand All @@ -162,70 +114,160 @@ func (pe *perfEvent) Close() error {
return nil
}

// perfEventLink represents a bpf perf link.
type perfEventLink struct {
RawLink
pe *perfEvent
}

func (pl *perfEventLink) isLink() {}

// Pinning requires the underlying perf event FD to stay open.
//
// | PerfEvent FD | BpfLink FD | Works |
// |--------------|------------|-------|
// | Open | Open | Yes |
// | Closed | Open | No |
// | Open | Closed | No (Pin() -> EINVAL) |
// | Closed | Closed | No (Pin() -> EINVAL) |
//
// There is currently no pretty way to recover the perf event FD
// when loading a pinned link, so leave as not supported for now.
func (pl *perfEventLink) Pin(string) error {
return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
}

func (pl *perfEventLink) Unpin() error {
return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
}

func (pl *perfEventLink) Close() error {
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event link close: %w", err)
}
return pl.fd.Close()
}

func (pl *perfEventLink) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event link update: %w", ErrNotSupported)
}

// perfEventIoctl implements Link and handles the perf event lifecycle
// via ioctl().
type perfEventIoctl struct {
*perfEvent
}

func (pi *perfEventIoctl) isLink() {}

// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
// owned by the perf event, which means multiple programs can be attached
// simultaneously.
//
// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
// returns EEXIST.
//
// Detaching a program from a perf event is currently not possible, so a
// program replacement mechanism cannot be implemented for perf events.
func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
}

func (pi *perfEventIoctl) Pin(string) error {
return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
}

func (pi *perfEventIoctl) Unpin() error {
return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
}

func (pi *perfEventIoctl) Info() (*Info, error) {
return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
}

// attach the given eBPF prog to the perf event stored in pe.
// pe must contain a valid perf event fd.
// prog's type must match the program type stored in pe.
func (pe *perfEvent) attach(prog *ebpf.Program) error {
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
if prog == nil {
return errors.New("cannot attach a nil program")
}
if pe.fd == nil {
return errors.New("cannot attach to nil perf event")
return nil, errors.New("cannot attach a nil program")
}
if prog.FD() < 0 {
return fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}

switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return fmt.Errorf("unknown perf event type: %d", pe.typ)
return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
}

pfd := pe.fd

if err := haveBPFLinkPerfEvent(); err == nil {
// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
attr := sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pfd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
}

fd, err := sys.LinkCreatePerfEvent(&attr)
lnk, err := attachPerfEventLink(pe, prog)
if err != nil {
return fmt.Errorf("cannot create bpf perf link: %v", err)
}
pe.bpfLinkFD = fd
} else {
if pe.cookie != 0 {
return fmt.Errorf("bpf cookies are not available: %w", ErrNotSupported)
return nil, err
}
return lnk, nil
}

// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(pfd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
return fmt.Errorf("setting perf event bpf program: %w", err)
}
lnk, err := attachPerfEventIoctl(pe, prog)
if err != nil {
return nil, err
}

return lnk, nil
}

func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
if pe.cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}

// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
return nil, fmt.Errorf("setting perf event bpf program: %w", err)
}

// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
if err := unix.IoctlSetInt(pfd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
return fmt.Errorf("enable perf event: %s", err)
if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
return nil, fmt.Errorf("enable perf event: %s", err)
}

pi := &perfEventIoctl{pe}

// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pe, (*perfEvent).Close)
return nil
runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
return pi, nil
}

// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pe.fd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
})
if err != nil {
return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
}

pl := &perfEventLink{RawLink{fd: fd}, pe}

// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pl, (*perfEventLink).Close)
return pl, nil
}

// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
Expand Down
1 change: 1 addition & 0 deletions link/syscalls.go
Expand Up @@ -22,6 +22,7 @@ const (
IterType = sys.BPF_LINK_TYPE_ITER
NetNsType = sys.BPF_LINK_TYPE_NETNS
XDPType = sys.BPF_LINK_TYPE_XDP
PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT
)

var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
Expand Down
20 changes: 12 additions & 8 deletions link/tracepoint.go
Expand Up @@ -53,21 +53,25 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions)
return nil, err
}

var cookie uint64
if opts != nil {
cookie = opts.Cookie
}

pe := &perfEvent{
fd: fd,
tracefsID: tid,
typ: tracepointEvent,
group: group,
name: name,
typ: tracepointEvent,
}
if opts != nil {
pe.cookie = opts.Cookie
tracefsID: tid,
cookie: cookie,
fd: fd,
}

if err := pe.attach(prog); err != nil {
lnk, err := attachPerfEvent(pe, prog)
if err != nil {
pe.Close()
return nil, err
}

return pe, nil
return lnk, nil
}

0 comments on commit bf256fd

Please sign in to comment.