From fd1168bcf0aa22aecb6d8fdae007d29dfc0b4677 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <i@lmb.io>
Date: Sat, 3 Sep 2022 12:10:39 +0000
Subject: [PATCH 1/5] btf: drop local btf.Spec argument to CORERelocate

CORERelocate currently takes ProgramSpec.BTF as an argument, since a CO-RE relocation
may ask for the local type ID. This is problematic, since we want to get rid of ProgramSpec.BTF.

Instead, cache the type ID when constructing the CORERelocation itself.
---
 btf/core.go      | 49 +++++++++++++++++++++++-------------------------
 btf/core_test.go |  2 +-
 btf/ext_info.go  |  4 ++++
 linker.go        | 10 ++++++++--
 prog.go          |  8 ++++----
 5 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/btf/core.go b/btf/core.go
index 223de819c..01506261f 100644
--- a/btf/core.go
+++ b/btf/core.go
@@ -156,16 +156,17 @@ func (k coreKind) String() string {
 	}
 }
 
-// CORERelocate calculates the difference in types between local and target.
+// CORERelocate calculates changes needed to adjust eBPF instructions for differences
+// in types.
 //
 // Returns a list of fixups which can be applied to instructions to make them
 // match the target type(s).
 //
 // Fixups are returned in the order of relos, e.g. fixup[i] is the solution
 // for relos[i].
-func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) {
-	if local.byteOrder != target.byteOrder {
-		return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
+func CORERelocate(relos []*CORERelocation, target *Spec, bo binary.ByteOrder) ([]COREFixup, error) {
+	if bo != target.byteOrder {
+		return nil, fmt.Errorf("can't relocate %s against %s", bo, target.byteOrder)
 	}
 
 	type reloGroup struct {
@@ -185,15 +186,14 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
 				return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
 			}
 
-			id, err := local.TypeID(relo.typ)
-			if err != nil {
-				return nil, fmt.Errorf("%s: %w", relo.kind, err)
-			}
-
 			result[i] = COREFixup{
-				kind:   relo.kind,
-				local:  uint32(id),
-				target: uint32(id),
+				kind:  relo.kind,
+				local: uint32(relo.id),
+				// NB: Using relo.id as the target here is incorrect, since
+				// it doesn't match the BTF we generate on the fly. This isn't
+				// too bad for now since there are no uses of the local type ID
+				// in the kernel, yet.
+				target: uint32(relo.id),
 			}
 			continue
 		}
@@ -214,7 +214,7 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
 		}
 
 		targets := target.namedTypes[newEssentialName(localTypeName)]
-		fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos)
+		fixups, err := coreCalculateFixups(group.relos, target, targets, bo)
 		if err != nil {
 			return nil, fmt.Errorf("relocate %s: %w", localType, err)
 		}
@@ -230,18 +230,13 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
 var errAmbiguousRelocation = errors.New("ambiguous relocation")
 var errImpossibleRelocation = errors.New("impossible relocation")
 
-// coreCalculateFixups calculates the fixups for the given relocations using
-// the "best" target.
+// coreCalculateFixups finds the target type that best matches all relocations.
+//
+// All relos must target the same type.
 //
 // The best target is determined by scoring: the less poisoning we have to do
 // the better the target is.
-func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) {
-	localID, err := localSpec.TypeID(local)
-	if err != nil {
-		return nil, fmt.Errorf("local type ID: %w", err)
-	}
-	local = Copy(local, UnderlyingType)
-
+func coreCalculateFixups(relos []*CORERelocation, targetSpec *Spec, targets []Type, bo binary.ByteOrder) ([]COREFixup, error) {
 	bestScore := len(relos)
 	var bestFixups []COREFixup
 	for i := range targets {
@@ -254,7 +249,7 @@ func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type
 		score := 0 // lower is better
 		fixups := make([]COREFixup, 0, len(relos))
 		for _, relo := range relos {
-			fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo)
+			fixup, err := coreCalculateFixup(relo, target, targetID, bo)
 			if err != nil {
 				return nil, fmt.Errorf("target %s: %w", target, err)
 			}
@@ -305,7 +300,7 @@ func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type
 
 // coreCalculateFixup calculates the fixup for a single local type, target type
 // and relocation.
-func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) {
+func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo binary.ByteOrder) (COREFixup, error) {
 	fixup := func(local, target uint32) (COREFixup, error) {
 		return COREFixup{kind: relo.kind, local: local, target: target}, nil
 	}
@@ -320,6 +315,8 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
 	}
 	zero := COREFixup{}
 
+	local := Copy(relo.typ, UnderlyingType)
+
 	switch relo.kind {
 	case reloTypeIDTarget, reloTypeSize, reloTypeExists:
 		if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
@@ -339,7 +336,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
 			return fixup(1, 1)
 
 		case reloTypeIDTarget:
-			return fixup(uint32(localID), uint32(targetID))
+			return fixup(uint32(relo.id), uint32(targetID))
 
 		case reloTypeSize:
 			localSize, err := Sizeof(local)
@@ -427,7 +424,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
 
 		case reloFieldLShiftU64:
 			var target uint32
-			if byteOrder == binary.LittleEndian {
+			if bo == binary.LittleEndian {
 				targetSize, err := targetField.sizeBits()
 				if err != nil {
 					return zero, err
diff --git a/btf/core_test.go b/btf/core_test.go
index 97675787b..f98eb4fa3 100644
--- a/btf/core_test.go
+++ b/btf/core_test.go
@@ -546,7 +546,7 @@ func TestCORERelocation(t *testing.T) {
 					relos = append(relos, reloInfo.relo)
 				}
 
-				fixups, err := CORERelocate(spec, spec, relos)
+				fixups, err := CORERelocate(relos, spec, spec.byteOrder)
 				if want := errs[name]; want != nil {
 					if !errors.Is(err, want) {
 						t.Fatal("Expected", want, "got", err)
diff --git a/btf/ext_info.go b/btf/ext_info.go
index 2c0e1afe2..36e38abae 100644
--- a/btf/ext_info.go
+++ b/btf/ext_info.go
@@ -605,9 +605,12 @@ type bpfCORERelo struct {
 }
 
 type CORERelocation struct {
+	// The local type of the relocation, stripped of typedefs and qualifiers.
 	typ      Type
 	accessor coreAccessor
 	kind     coreKind
+	// The ID of the local type in the source BTF.
+	id TypeID
 }
 
 func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation {
@@ -641,6 +644,7 @@ func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreR
 			typ,
 			accessor,
 			relo.Kind,
+			relo.TypeID,
 		},
 		asm.RawInstructionOffset(relo.InsnOff),
 	}, nil
diff --git a/linker.go b/linker.go
index e6276b182..2a2c1b639 100644
--- a/linker.go
+++ b/linker.go
@@ -1,12 +1,14 @@
 package ebpf
 
 import (
+	"encoding/binary"
 	"errors"
 	"fmt"
 	"sync"
 
 	"github.com/cilium/ebpf/asm"
 	"github.com/cilium/ebpf/btf"
+	"github.com/cilium/ebpf/internal"
 )
 
 // splitSymbols splits insns into subsections delimited by Symbol Instructions.
@@ -67,7 +69,7 @@ func hasFunctionReferences(insns asm.Instructions) bool {
 //
 // Passing a nil target will relocate against the running kernel. insns are
 // modified in place.
-func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
+func applyRelocations(insns asm.Instructions, target *btf.Spec, bo binary.ByteOrder) error {
 	var relos []*btf.CORERelocation
 	var reloInsns []*asm.Instruction
 	iter := insns.Iterate()
@@ -82,12 +84,16 @@ func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
 		return nil
 	}
 
+	if bo == nil {
+		bo = internal.NativeEndian
+	}
+
 	target, err := maybeLoadKernelBTF(target)
 	if err != nil {
 		return err
 	}
 
-	fixups, err := btf.CORERelocate(local, target, relos)
+	fixups, err := btf.CORERelocate(relos, target, bo)
 	if err != nil {
 		return err
 	}
diff --git a/prog.go b/prog.go
index bec7d2347..3bec8e094 100644
--- a/prog.go
+++ b/prog.go
@@ -243,10 +243,6 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
 
 	var btfDisabled bool
 	if spec.BTF != nil {
-		if err := applyRelocations(insns, spec.BTF, opts.KernelTypes); err != nil {
-			return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
-		}
-
 		handle, err := handles.btfHandle(spec.BTF)
 		btfDisabled = errors.Is(err, btf.ErrNotSupported)
 		if err != nil && !btfDisabled {
@@ -271,6 +267,10 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
 		}
 	}
 
+	if err := applyRelocations(insns, opts.KernelTypes, spec.ByteOrder); err != nil {
+		return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
+	}
+
 	if err := fixupAndValidate(insns); err != nil {
 		return nil, err
 	}

From bffaeb754f800b62e2452fbdef033828b3ca8aca Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <i@lmb.io>
Date: Tue, 6 Sep 2022 13:45:07 +0000
Subject: [PATCH 2/5] btf: add benchmark for Type.Walk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Benchmark the cost of adding a type and its children to a typeDeque.

    name                                      time/op
    Walk/Void-4                               22.8ns ± 3%
    Walk/Int[unsigned_size=0]-4               22.3ns ± 1%
    Walk/Pointer[target=<nil>]-4              61.3ns ± 0%
    Walk/Array[index=<nil>_type=<nil>_n=0]-4  62.4ns ± 0%
    Walk/Struct[fields=2]-4                   64.2ns ± 0%
    Walk/Union[fields=2]-4                    64.6ns ± 0%
    Walk/Enum[size=0_values=0]-4              22.1ns ± 0%
    Walk/Fwd[struct]-4                        22.1ns ± 0%
    Walk/Typedef[<nil>]-4                     61.2ns ± 0%
    Walk/Volatile[<nil>]-4                    61.2ns ± 0%
    Walk/Const[<nil>]-4                       61.2ns ± 0%
    Walk/Restrict[<nil>]-4                    61.2ns ± 0%
    Walk/Func[static_proto=<nil>]-4           61.6ns ± 0%
    Walk/FuncProto[args=2_return=<nil>]-4     66.1ns ± 0%
    Walk/Var[static]-4                        61.5ns ± 1%
    Walk/Datasec-4                            64.4ns ± 0%

    name                                      alloc/op
    Walk/Void-4                                48.0B ± 0%
    Walk/Int[unsigned_size=0]-4                48.0B ± 0%
    Walk/Pointer[target=<nil>]-4                112B ± 0%
    Walk/Array[index=<nil>_type=<nil>_n=0]-4    112B ± 0%
    Walk/Struct[fields=2]-4                     112B ± 0%
    Walk/Union[fields=2]-4                      112B ± 0%
    Walk/Enum[size=0_values=0]-4               48.0B ± 0%
    Walk/Fwd[struct]-4                         48.0B ± 0%
    Walk/Typedef[<nil>]-4                       112B ± 0%
    Walk/Volatile[<nil>]-4                      112B ± 0%
    Walk/Const[<nil>]-4                         112B ± 0%
    Walk/Restrict[<nil>]-4                      112B ± 0%
    Walk/Func[static_proto=<nil>]-4             112B ± 0%
    Walk/FuncProto[args=2_return=<nil>]-4       112B ± 0%
    Walk/Var[static]-4                          112B ± 0%
    Walk/Datasec-4                              112B ± 0%

    name                                      allocs/op
    Walk/Void-4                                 1.00 ± 0%
    Walk/Int[unsigned_size=0]-4                 1.00 ± 0%
    Walk/Pointer[target=<nil>]-4                2.00 ± 0%
    Walk/Array[index=<nil>_type=<nil>_n=0]-4    2.00 ± 0%
    Walk/Struct[fields=2]-4                     2.00 ± 0%
    Walk/Union[fields=2]-4                      2.00 ± 0%
    Walk/Enum[size=0_values=0]-4                1.00 ± 0%
    Walk/Fwd[struct]-4                          1.00 ± 0%
    Walk/Typedef[<nil>]-4                       2.00 ± 0%
    Walk/Volatile[<nil>]-4                      2.00 ± 0%
    Walk/Const[<nil>]-4                         2.00 ± 0%
    Walk/Restrict[<nil>]-4                      2.00 ± 0%
    Walk/Func[static_proto=<nil>]-4             2.00 ± 0%
    Walk/FuncProto[args=2_return=<nil>]-4       2.00 ± 0%
    Walk/Var[static]-4                          2.00 ± 0%
    Walk/Datasec-4                              2.00 ± 0%
---
 btf/types_test.go | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/btf/types_test.go b/btf/types_test.go
index c1437533a..4519f7129 100644
--- a/btf/types_test.go
+++ b/btf/types_test.go
@@ -415,6 +415,37 @@ func TestInflateLegacyBitfield(t *testing.T) {
 	}
 }
 
+func BenchmarkWalk(b *testing.B) {
+	types := []Type{
+		&Void{},
+		&Int{},
+		&Pointer{},
+		&Array{},
+		&Struct{Members: make([]Member, 2)},
+		&Union{Members: make([]Member, 2)},
+		&Enum{},
+		&Fwd{},
+		&Typedef{},
+		&Volatile{},
+		&Const{},
+		&Restrict{},
+		&Func{},
+		&FuncProto{Params: make([]FuncParam, 2)},
+		&Var{},
+		&Datasec{Vars: make([]VarSecinfo, 2)},
+	}
+
+	for _, typ := range types {
+		b.Run(fmt.Sprint(typ), func(b *testing.B) {
+			b.ReportAllocs()
+
+			for i := 0; i < b.N; i++ {
+				typ.walk(&typeDeque{})
+			}
+		})
+	}
+}
+
 func BenchmarkUnderlyingType(b *testing.B) {
 	b.Run("no unwrapping", func(b *testing.B) {
 		v := &Int{}

From 888a6302151eada27312f0bfd0550a071a096f57 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <i@lmb.io>
Date: Fri, 16 Sep 2022 15:49:36 +0000
Subject: [PATCH 3/5] btf: avoid heap allocations when walking types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Type.walk() forces the typeDeque argument to always escape, since the escape analyzer has
to be conservative when dealing with interfaces. Replace this with an optimizer friendly
walkType that allows avoiding allocations.

The downside is that it's now possible to implement a Type without updating walkType, which
will obviously blow up. Fortunately we don't do this too often, so hopefully the burden isn't
too high.

    name                                      old time/op    new time/op    delta
    Walk/Void-4                                 22.4ns ± 1%     2.9ns ± 1%   -87.13%  (p=0.029 n=4+4)
    Walk/Int[unsigned_size=0]-4                 22.4ns ± 0%     2.8ns ± 2%   -87.31%  (p=0.029 n=4+4)
    Walk/Pointer[target=<nil>]-4                61.4ns ± 1%    37.1ns ± 1%   -39.59%  (p=0.029 n=4+4)
    Walk/Array[index=<nil>_type=<nil>_n=0]-4    63.0ns ± 0%    39.3ns ± 2%   -37.50%  (p=0.029 n=4+4)
    Walk/Struct[fields=2]-4                     64.8ns ± 1%    40.9ns ± 1%   -36.96%  (p=0.029 n=4+4)
    Walk/Union[fields=2]-4                      64.9ns ± 1%    40.9ns ± 2%   -36.92%  (p=0.029 n=4+4)
    Walk/Enum[size=0_values=0]-4                22.4ns ± 1%     2.8ns ± 2%   -87.45%  (p=0.029 n=4+4)
    Walk/Fwd[struct]-4                          22.3ns ± 1%     2.8ns ± 2%   -87.43%  (p=0.029 n=4+4)
    Walk/Typedef[<nil>]-4                       61.7ns ± 1%    36.5ns ± 2%   -40.83%  (p=0.029 n=4+4)
    Walk/Volatile[<nil>]-4                      61.5ns ± 1%    36.4ns ± 0%   -40.82%  (p=0.029 n=4+4)
    Walk/Const[<nil>]-4                         61.6ns ± 0%    36.8ns ± 1%   -40.26%  (p=0.029 n=4+4)
    Walk/Restrict[<nil>]-4                      61.2ns ± 0%    37.1ns ± 2%   -39.35%  (p=0.029 n=4+4)
    Walk/Func[static_proto=<nil>]-4             61.7ns ± 1%    36.8ns ± 3%   -40.32%  (p=0.029 n=4+4)
    Walk/FuncProto[args=2_return=<nil>]-4       66.8ns ± 1%    43.1ns ± 1%   -35.48%  (p=0.029 n=4+4)
    Walk/Var[static]-4                          61.9ns ± 1%    36.5ns ± 0%   -40.96%  (p=0.029 n=4+4)
    Walk/Datasec-4                              65.5ns ± 5%    40.3ns ± 0%   -38.57%  (p=0.029 n=4+4)

    name                                      old alloc/op   new alloc/op   delta
    Walk/Void-4                                  48.0B ± 0%      0.0B       -100.00%  (p=0.029 n=4+4)
    Walk/Int[unsigned_size=0]-4                  48.0B ± 0%      0.0B       -100.00%  (p=0.029 n=4+4)
    Walk/Pointer[target=<nil>]-4                  112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Array[index=<nil>_type=<nil>_n=0]-4      112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Struct[fields=2]-4                       112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Union[fields=2]-4                        112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Enum[size=0_values=0]-4                 48.0B ± 0%      0.0B       -100.00%  (p=0.029 n=4+4)
    Walk/Fwd[struct]-4                           48.0B ± 0%      0.0B       -100.00%  (p=0.029 n=4+4)
    Walk/Typedef[<nil>]-4                         112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Volatile[<nil>]-4                        112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Const[<nil>]-4                           112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Restrict[<nil>]-4                        112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Func[static_proto=<nil>]-4               112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/FuncProto[args=2_return=<nil>]-4         112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Var[static]-4                            112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)
    Walk/Datasec-4                                112B ± 0%       64B ± 0%   -42.86%  (p=0.029 n=4+4)

    name                                      old allocs/op  new allocs/op  delta
    Walk/Void-4                                   1.00 ± 0%      0.00       -100.00%  (p=0.029 n=4+4)
    Walk/Int[unsigned_size=0]-4                   1.00 ± 0%      0.00       -100.00%  (p=0.029 n=4+4)
    Walk/Pointer[target=<nil>]-4                  2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Array[index=<nil>_type=<nil>_n=0]-4      2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Struct[fields=2]-4                       2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Union[fields=2]-4                        2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Enum[size=0_values=0]-4                  1.00 ± 0%      0.00       -100.00%  (p=0.029 n=4+4)
    Walk/Fwd[struct]-4                            1.00 ± 0%      0.00       -100.00%  (p=0.029 n=4+4)
    Walk/Typedef[<nil>]-4                         2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Volatile[<nil>]-4                        2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Const[<nil>]-4                           2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Restrict[<nil>]-4                        2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Func[static_proto=<nil>]-4               2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/FuncProto[args=2_return=<nil>]-4         2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Var[static]-4                            2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
    Walk/Datasec-4                                2.00 ± 0%      1.00 ± 0%   -50.00%  (p=0.029 n=4+4)
---
 btf/core.go       |  8 +++---
 btf/traversal.go  | 56 ++++++++++++++++++++++++++++++++++++
 btf/types.go      | 72 +++++++++--------------------------------------
 btf/types_test.go | 40 ++++++++++++++++++++++----
 4 files changed, 109 insertions(+), 67 deletions(-)
 create mode 100644 btf/traversal.go

diff --git a/btf/core.go b/btf/core.go
index 01506261f..54c911ab7 100644
--- a/btf/core.go
+++ b/btf/core.go
@@ -873,8 +873,8 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
 
 		case *Pointer, *Array:
 			depth++
-			localType.walk(&localTs)
-			targetType.walk(&targetTs)
+			walkType(localType, localTs.push)
+			walkType(targetType, targetTs.push)
 
 		case *FuncProto:
 			tv := targetType.(*FuncProto)
@@ -883,8 +883,8 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
 			}
 
 			depth++
-			localType.walk(&localTs)
-			targetType.walk(&targetTs)
+			walkType(localType, localTs.push)
+			walkType(targetType, targetTs.push)
 
 		default:
 			return fmt.Errorf("unsupported type %T", localType)
diff --git a/btf/traversal.go b/btf/traversal.go
new file mode 100644
index 000000000..a9ff1f703
--- /dev/null
+++ b/btf/traversal.go
@@ -0,0 +1,56 @@
+package btf
+
+import "fmt"
+
+// walkType calls fn on each child of typ.
+func walkType(typ Type, fn func(*Type)) {
+	// Explicitly type switch on the most common types to allow the inliner to
+	// do its work. This avoids allocating intermediate slices from walk() on
+	// the heap.
+	switch v := typ.(type) {
+	case *Void, *Int, *Enum, *Fwd, *Float:
+		// No children to traverse.
+	case *Pointer:
+		fn(&v.Target)
+	case *Array:
+		fn(&v.Index)
+		fn(&v.Type)
+	case *Struct:
+		for i := range v.Members {
+			fn(&v.Members[i].Type)
+		}
+	case *Union:
+		for i := range v.Members {
+			fn(&v.Members[i].Type)
+		}
+	case *Typedef:
+		fn(&v.Type)
+	case *Volatile:
+		fn(&v.Type)
+	case *Const:
+		fn(&v.Type)
+	case *Restrict:
+		fn(&v.Type)
+	case *Func:
+		fn(&v.Type)
+	case *FuncProto:
+		fn(&v.Return)
+		for i := range v.Params {
+			fn(&v.Params[i].Type)
+		}
+	case *Var:
+		fn(&v.Type)
+	case *Datasec:
+		for i := range v.Vars {
+			fn(&v.Vars[i].Type)
+		}
+	case *declTag:
+		fn(&v.Type)
+	case *typeTag:
+		fn(&v.Type)
+	case *cycle:
+		// cycle has children, but we ignore them deliberately.
+	default:
+		panic(fmt.Sprintf("don't know how to walk Type %T", v))
+	}
+}
diff --git a/btf/types.go b/btf/types.go
index b34aaeefc..33ee43790 100644
--- a/btf/types.go
+++ b/btf/types.go
@@ -35,9 +35,7 @@ type Type interface {
 	// Make a copy of the type, without copying Type members.
 	copy() Type
 
-	// Enumerate all nested Types. Repeated calls must visit nested
-	// types in the same order.
-	walk(*typeDeque)
+	// New implementations must update walkType.
 }
 
 var (
@@ -51,6 +49,9 @@ var (
 	_ Type = (*Var)(nil)
 	_ Type = (*Datasec)(nil)
 	_ Type = (*Float)(nil)
+	_ Type = (*declTag)(nil)
+	_ Type = (*typeTag)(nil)
+	_ Type = (*cycle)(nil)
 )
 
 // types is a list of Type.
@@ -72,7 +73,6 @@ func (v *Void) Format(fs fmt.State, verb rune) { formatType(fs, verb, v) }
 func (v *Void) TypeName() string               { return "" }
 func (v *Void) size() uint32                   { return 0 }
 func (v *Void) copy() Type                     { return (*Void)(nil) }
-func (v *Void) walk(*typeDeque)                {}
 
 type IntEncoding byte
 
@@ -126,7 +126,6 @@ func (i *Int) Format(fs fmt.State, verb rune) {
 
 func (i *Int) TypeName() string { return i.Name }
 func (i *Int) size() uint32     { return i.Size }
-func (i *Int) walk(*typeDeque)  {}
 func (i *Int) copy() Type {
 	cpy := *i
 	return &cpy
@@ -141,9 +140,8 @@ func (p *Pointer) Format(fs fmt.State, verb rune) {
 	formatType(fs, verb, p, "target=", p.Target)
 }
 
-func (p *Pointer) TypeName() string    { return "" }
-func (p *Pointer) size() uint32        { return 8 }
-func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) }
+func (p *Pointer) TypeName() string { return "" }
+func (p *Pointer) size() uint32     { return 8 }
 func (p *Pointer) copy() Type {
 	cpy := *p
 	return &cpy
@@ -162,11 +160,6 @@ func (arr *Array) Format(fs fmt.State, verb rune) {
 
 func (arr *Array) TypeName() string { return "" }
 
-func (arr *Array) walk(tdq *typeDeque) {
-	tdq.push(&arr.Index)
-	tdq.push(&arr.Type)
-}
-
 func (arr *Array) copy() Type {
 	cpy := *arr
 	return &cpy
@@ -188,12 +181,6 @@ func (s *Struct) TypeName() string { return s.Name }
 
 func (s *Struct) size() uint32 { return s.Size }
 
-func (s *Struct) walk(tdq *typeDeque) {
-	for i := range s.Members {
-		tdq.push(&s.Members[i].Type)
-	}
-}
-
 func (s *Struct) copy() Type {
 	cpy := *s
 	cpy.Members = copyMembers(s.Members)
@@ -220,12 +207,6 @@ func (u *Union) TypeName() string { return u.Name }
 
 func (u *Union) size() uint32 { return u.Size }
 
-func (u *Union) walk(tdq *typeDeque) {
-	for i := range u.Members {
-		tdq.push(&u.Members[i].Type)
-	}
-}
-
 func (u *Union) copy() Type {
 	cpy := *u
 	cpy.Members = copyMembers(u.Members)
@@ -293,8 +274,7 @@ type EnumValue struct {
 	Value uint64
 }
 
-func (e *Enum) size() uint32    { return e.Size }
-func (e *Enum) walk(*typeDeque) {}
+func (e *Enum) size() uint32 { return e.Size }
 func (e *Enum) copy() Type {
 	cpy := *e
 	cpy.Values = make([]EnumValue, len(e.Values))
@@ -334,7 +314,6 @@ func (f *Fwd) Format(fs fmt.State, verb rune) {
 
 func (f *Fwd) TypeName() string { return f.Name }
 
-func (f *Fwd) walk(*typeDeque) {}
 func (f *Fwd) copy() Type {
 	cpy := *f
 	return &cpy
@@ -352,7 +331,6 @@ func (td *Typedef) Format(fs fmt.State, verb rune) {
 
 func (td *Typedef) TypeName() string { return td.Name }
 
-func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) }
 func (td *Typedef) copy() Type {
 	cpy := *td
 	return &cpy
@@ -369,8 +347,7 @@ func (v *Volatile) Format(fs fmt.State, verb rune) {
 
 func (v *Volatile) TypeName() string { return "" }
 
-func (v *Volatile) qualify() Type       { return v.Type }
-func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) }
+func (v *Volatile) qualify() Type { return v.Type }
 func (v *Volatile) copy() Type {
 	cpy := *v
 	return &cpy
@@ -387,8 +364,7 @@ func (c *Const) Format(fs fmt.State, verb rune) {
 
 func (c *Const) TypeName() string { return "" }
 
-func (c *Const) qualify() Type       { return c.Type }
-func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) }
+func (c *Const) qualify() Type { return c.Type }
 func (c *Const) copy() Type {
 	cpy := *c
 	return &cpy
@@ -405,8 +381,7 @@ func (r *Restrict) Format(fs fmt.State, verb rune) {
 
 func (r *Restrict) TypeName() string { return "" }
 
-func (r *Restrict) qualify() Type       { return r.Type }
-func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) }
+func (r *Restrict) qualify() Type { return r.Type }
 func (r *Restrict) copy() Type {
 	cpy := *r
 	return &cpy
@@ -430,7 +405,6 @@ func (f *Func) Format(fs fmt.State, verb rune) {
 
 func (f *Func) TypeName() string { return f.Name }
 
-func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) }
 func (f *Func) copy() Type {
 	cpy := *f
 	return &cpy
@@ -448,13 +422,6 @@ func (fp *FuncProto) Format(fs fmt.State, verb rune) {
 
 func (fp *FuncProto) TypeName() string { return "" }
 
-func (fp *FuncProto) walk(tdq *typeDeque) {
-	tdq.push(&fp.Return)
-	for i := range fp.Params {
-		tdq.push(&fp.Params[i].Type)
-	}
-}
-
 func (fp *FuncProto) copy() Type {
 	cpy := *fp
 	cpy.Params = make([]FuncParam, len(fp.Params))
@@ -480,7 +447,6 @@ func (v *Var) Format(fs fmt.State, verb rune) {
 
 func (v *Var) TypeName() string { return v.Name }
 
-func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) }
 func (v *Var) copy() Type {
 	cpy := *v
 	return &cpy
@@ -501,12 +467,6 @@ func (ds *Datasec) TypeName() string { return ds.Name }
 
 func (ds *Datasec) size() uint32 { return ds.Size }
 
-func (ds *Datasec) walk(tdq *typeDeque) {
-	for i := range ds.Vars {
-		tdq.push(&ds.Vars[i].Type)
-	}
-}
-
 func (ds *Datasec) copy() Type {
 	cpy := *ds
 	cpy.Vars = make([]VarSecinfo, len(ds.Vars))
@@ -537,7 +497,6 @@ func (f *Float) Format(fs fmt.State, verb rune) {
 
 func (f *Float) TypeName() string { return f.Name }
 func (f *Float) size() uint32     { return f.Size }
-func (f *Float) walk(*typeDeque)  {}
 func (f *Float) copy() Type {
 	cpy := *f
 	return &cpy
@@ -557,8 +516,7 @@ func (dt *declTag) Format(fs fmt.State, verb rune) {
 	formatType(fs, verb, dt, "type=", dt.Type, "value=", dt.Value, "index=", dt.Index)
 }
 
-func (dt *declTag) TypeName() string   { return "" }
-func (dt *declTag) walk(td *typeDeque) { td.push(&dt.Type) }
+func (dt *declTag) TypeName() string { return "" }
 func (dt *declTag) copy() Type {
 	cpy := *dt
 	return &cpy
@@ -574,9 +532,8 @@ func (tt *typeTag) Format(fs fmt.State, verb rune) {
 	formatType(fs, verb, tt, "type=", tt.Type, "value=", tt.Value)
 }
 
-func (tt *typeTag) TypeName() string   { return "" }
-func (tt *typeTag) qualify() Type      { return tt.Type }
-func (tt *typeTag) walk(td *typeDeque) { td.push(&tt.Type) }
+func (tt *typeTag) TypeName() string { return "" }
+func (tt *typeTag) qualify() Type    { return tt.Type }
 func (tt *typeTag) copy() Type {
 	cpy := *tt
 	return &cpy
@@ -590,7 +547,6 @@ type cycle struct {
 func (c *cycle) ID() TypeID                     { return math.MaxUint32 }
 func (c *cycle) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, "root=", c.root) }
 func (c *cycle) TypeName() string               { return "" }
-func (c *cycle) walk(*typeDeque)                {}
 func (c *cycle) copy() Type {
 	cpy := *c
 	return &cpy
@@ -739,7 +695,7 @@ func (c copier) copy(typ *Type, transform Transformer) {
 		*t = cpy
 
 		// Mark any nested types for copying.
-		cpy.walk(&work)
+		walkType(cpy, work.push)
 	}
 }
 
diff --git a/btf/types_test.go b/btf/types_test.go
index 4519f7129..faa84ee52 100644
--- a/btf/types_test.go
+++ b/btf/types_test.go
@@ -97,6 +97,7 @@ func ExampleType_validTypes() {
 	var _ Type = &FuncProto{}
 	var _ Type = &Var{}
 	var _ Type = &Datasec{}
+	var _ Type = &Float{}
 }
 
 func TestType(t *testing.T) {
@@ -153,17 +154,45 @@ func TestType(t *testing.T) {
 				t.Error("Copy doesn't copy")
 			}
 
-			var first, second typeDeque
-			typ.walk(&first)
-			typ.walk(&second)
+			var a []*Type
+			walkType(typ, func(t *Type) { a = append(a, t) })
 
-			if diff := cmp.Diff(first.all(), second.all(), compareTypes); diff != "" {
+			if _, ok := typ.(*cycle); !ok {
+				if n := countChildren(t, reflect.TypeOf(typ)); len(a) < n {
+					t.Errorf("walkType visited %d children, expected at least %d", len(a), n)
+				}
+			}
+
+			var b []*Type
+			walkType(typ, func(t *Type) { b = append(b, t) })
+
+			if diff := cmp.Diff(a, b, compareTypes); diff != "" {
 				t.Errorf("Walk mismatch (-want +got):\n%s", diff)
 			}
 		})
 	}
 }
 
+func countChildren(t *testing.T, typ reflect.Type) int {
+	if typ.Kind() != reflect.Pointer {
+		t.Fatal("Expected pointer, got", typ.Kind())
+	}
+
+	typ = typ.Elem()
+	if typ.Kind() != reflect.Struct {
+		t.Fatal("Expected struct, got", typ.Kind())
+	}
+
+	var n int
+	for i := 0; i < typ.NumField(); i++ {
+		if typ.Field(i).Type == reflect.TypeOf((*Type)(nil)).Elem() {
+			n++
+		}
+	}
+
+	return n
+}
+
 func TestTypeDeque(t *testing.T) {
 	a, b := new(Type), new(Type)
 
@@ -440,7 +469,8 @@ func BenchmarkWalk(b *testing.B) {
 			b.ReportAllocs()
 
 			for i := 0; i < b.N; i++ {
-				typ.walk(&typeDeque{})
+				var dq typeDeque
+				walkType(typ, dq.push)
 			}
 		})
 	}

From aa3b56bbad4acf0d9b83066839e77ebc599f6b5d Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <i@lmb.io>
Date: Wed, 7 Sep 2022 18:29:32 +0000
Subject: [PATCH 4/5] btf: make typeDeque generic

Make typeDeque a generic container type so that we can re-use it
for postorderTraversal.
---
 btf/types.go      | 91 +++++++++++++++++++++++++++--------------------
 btf/types_test.go | 77 +++++++++++++++++++--------------------
 2 files changed, 92 insertions(+), 76 deletions(-)

diff --git a/btf/types.go b/btf/types.go
index 33ee43790..eba4a9632 100644
--- a/btf/types.go
+++ b/btf/types.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"io"
 	"math"
+	"math/bits"
 	"reflect"
 	"strings"
 
@@ -699,77 +700,91 @@ func (c copier) copy(typ *Type, transform Transformer) {
 	}
 }
 
-// typeDeque keeps track of pointers to types which still
-// need to be visited.
-type typeDeque struct {
-	types       []*Type
+type typeDeque = deque[*Type]
+
+// deque implements a double ended queue.
+type deque[T any] struct {
+	elems       []T
 	read, write uint64
 	mask        uint64
 }
 
-func (dq *typeDeque) empty() bool {
+func (dq *deque[T]) empty() bool {
 	return dq.read == dq.write
 }
 
-// push adds a type to the stack.
-func (dq *typeDeque) push(t *Type) {
-	if dq.write-dq.read < uint64(len(dq.types)) {
-		dq.types[dq.write&dq.mask] = t
+func (dq *deque[T]) remainingCap() int {
+	return len(dq.elems) - int(dq.write-dq.read)
+}
+
+// push adds an element to the end.
+func (dq *deque[T]) push(e T) {
+	if dq.remainingCap() >= 1 {
+		dq.elems[dq.write&dq.mask] = e
 		dq.write++
 		return
 	}
 
-	new := len(dq.types) * 2
-	if new == 0 {
-		new = 8
-	}
+	elems := dq.linearise(1)
+	elems = append(elems, e)
 
-	types := make([]*Type, new)
-	pivot := dq.read & dq.mask
-	n := copy(types, dq.types[pivot:])
-	n += copy(types[n:], dq.types[:pivot])
-	types[n] = t
-
-	dq.types = types
-	dq.mask = uint64(new) - 1
-	dq.read, dq.write = 0, uint64(n+1)
+	dq.elems = elems[:cap(elems)]
+	dq.mask = uint64(cap(elems)) - 1
+	dq.read, dq.write = 0, uint64(len(elems))
 }
 
-// shift returns the first element or null.
-func (dq *typeDeque) shift() *Type {
+// shift returns the first element or the zero value.
+func (dq *deque[T]) shift() T {
+	var zero T
+
 	if dq.empty() {
-		return nil
+		return zero
 	}
 
 	index := dq.read & dq.mask
-	t := dq.types[index]
-	dq.types[index] = nil
+	t := dq.elems[index]
+	dq.elems[index] = zero
 	dq.read++
 	return t
 }
 
-// pop returns the last element or null.
-func (dq *typeDeque) pop() *Type {
+// pop returns the last element or the zero value.
+func (dq *deque[T]) pop() T {
+	var zero T
+
 	if dq.empty() {
-		return nil
+		return zero
 	}
 
 	dq.write--
 	index := dq.write & dq.mask
-	t := dq.types[index]
-	dq.types[index] = nil
+	t := dq.elems[index]
+	dq.elems[index] = zero
 	return t
 }
 
-// all returns all elements.
+// linearise the contents of the deque.
 //
-// The deque is empty after calling this method.
-func (dq *typeDeque) all() []*Type {
+// The returned slice has space for at least n more elements and has power
+// of two capacity.
+func (dq *deque[T]) linearise(n int) []T {
 	length := dq.write - dq.read
-	types := make([]*Type, 0, length)
-	for t := dq.shift(); t != nil; t = dq.shift() {
-		types = append(types, t)
+	need := length + uint64(n)
+	if need < length {
+		panic("overflow")
 	}
+
+	// Round up to the new power of two which is at least 8.
+	// See https://jameshfisher.com/2018/03/30/round-up-power-2/
+	capacity := 1 << (64 - bits.LeadingZeros64(need-1))
+	if capacity < 8 {
+		capacity = 8
+	}
+
+	types := make([]T, length, capacity)
+	pivot := dq.read & dq.mask
+	copied := copy(types, dq.elems[pivot:])
+	copy(types[copied:], dq.elems[:pivot])
 	return types
 }
 
diff --git a/btf/types_test.go b/btf/types_test.go
index faa84ee52..b46d8d2f1 100644
--- a/btf/types_test.go
+++ b/btf/types_test.go
@@ -193,77 +193,78 @@ func countChildren(t *testing.T, typ reflect.Type) int {
 	return n
 }
 
-func TestTypeDeque(t *testing.T) {
-	a, b := new(Type), new(Type)
-
+func TestDeque(t *testing.T) {
 	t.Run("pop", func(t *testing.T) {
-		var td typeDeque
-		td.push(a)
-		td.push(b)
+		var dq deque[int]
+		dq.push(1)
+		dq.push(2)
 
-		if td.pop() != b {
-			t.Error("Didn't pop b first")
+		if dq.pop() != 2 {
+			t.Error("Didn't pop 2 first")
 		}
 
-		if td.pop() != a {
-			t.Error("Didn't pop a second")
+		if dq.pop() != 1 {
+			t.Error("Didn't pop 1 second")
 		}
 
-		if td.pop() != nil {
-			t.Error("Didn't pop nil")
+		if dq.pop() != 0 {
+			t.Error("Didn't pop zero")
 		}
 	})
 
 	t.Run("shift", func(t *testing.T) {
-		var td typeDeque
-		td.push(a)
-		td.push(b)
+		var td deque[int]
+		td.push(1)
+		td.push(2)
 
-		if td.shift() != a {
-			t.Error("Didn't shift a second")
+		if td.shift() != 1 {
+			t.Error("Didn't shift 1 first")
 		}
 
-		if td.shift() != b {
-			t.Error("Didn't shift b first")
+		if td.shift() != 2 {
+			t.Error("Didn't shift b second")
 		}
 
-		if td.shift() != nil {
-			t.Error("Didn't shift nil")
+		if td.shift() != 0 {
+			t.Error("Didn't shift zero")
 		}
 	})
 
 	t.Run("push", func(t *testing.T) {
-		var td typeDeque
-		td.push(a)
-		td.push(b)
+		var td deque[int]
+		td.push(1)
+		td.push(2)
 		td.shift()
 
-		ts := make([]Type, 12)
-		for i := range ts {
-			td.push(&ts[i])
+		for i := 1; i <= 12; i++ {
+			td.push(i)
 		}
 
-		if td.shift() != b {
-			t.Error("Didn't shift b first")
+		if td.shift() != 2 {
+			t.Error("Didn't shift 2 first")
 		}
-		for i := range ts {
-			if td.shift() != &ts[i] {
-				t.Fatal("Shifted wrong Type at pos", i)
+		for i := 1; i <= 12; i++ {
+			if v := td.shift(); v != i {
+				t.Fatalf("Shifted %d at pos %d", v, i)
 			}
 		}
 	})
 
-	t.Run("all", func(t *testing.T) {
-		var td typeDeque
-		td.push(a)
-		td.push(b)
+	t.Run("linearise", func(t *testing.T) {
+		var td deque[int]
+		td.push(1)
+		td.push(2)
 
-		all := td.all()
+		all := td.linearise(0)
 		if len(all) != 2 {
 			t.Fatal("Expected 2 elements, got", len(all))
 		}
 
-		if all[0] != a || all[1] != b {
+		if cap(all)&(cap(all)-1) != 0 {
+			t.Fatalf("Capacity %d is not a power of two", cap(all))
+		}
+
+		if all[0] != 1 || all[1] != 2 {
 			t.Fatal("Elements don't match")
 		}
 	})

From 391a00a75f13b8175b6434520baa2978f9abb994 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <i@lmb.io>
Date: Wed, 7 Sep 2022 18:33:15 +0000
Subject: [PATCH 5/5] internal: move Deque

Move Deque from btf into internal and export methods. No other code changes.
---
 btf/core.go            | 10 ++---
 btf/types.go           | 94 ++----------------------------------------
 btf/types_test.go      | 79 +----------------------------------
 internal/deque.go      | 89 +++++++++++++++++++++++++++++++++++++++
 internal/deque_test.go | 80 +++++++++++++++++++++++++++++++++++
 5 files changed, 179 insertions(+), 173 deletions(-)
 create mode 100644 internal/deque.go
 create mode 100644 internal/deque_test.go

diff --git a/btf/core.go b/btf/core.go
index 54c911ab7..f952b654e 100644
--- a/btf/core.go
+++ b/btf/core.go
@@ -855,7 +855,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
 		depth             = 0
 	)
 
-	for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() {
+	for ; l != nil && t != nil; l, t = localTs.Shift(), targetTs.Shift() {
 		if depth >= maxTypeDepth {
 			return errors.New("types are nested too deep")
 		}
@@ -873,8 +873,8 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
 
 		case *Pointer, *Array:
 			depth++
-			walkType(localType, localTs.push)
-			walkType(targetType, targetTs.push)
+			walkType(localType, localTs.Push)
+			walkType(targetType, targetTs.Push)
 
 		case *FuncProto:
 			tv := targetType.(*FuncProto)
@@ -883,8 +883,8 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
 			}
 
 			depth++
-			walkType(localType, localTs.push)
-			walkType(targetType, targetTs.push)
+			walkType(localType, localTs.Push)
+			walkType(targetType, targetTs.Push)
 
 		default:
 			return fmt.Errorf("unsupported type %T", localType)
diff --git a/btf/types.go b/btf/types.go
index eba4a9632..81980e8d8 100644
--- a/btf/types.go
+++ b/btf/types.go
@@ -4,11 +4,11 @@ import (
 	"fmt"
 	"io"
 	"math"
-	"math/bits"
 	"reflect"
 	"strings"
 
 	"github.com/cilium/ebpf/asm"
+	"github.com/cilium/ebpf/internal"
 )
 
 const maxTypeDepth = 32
@@ -678,7 +678,7 @@ type copier map[Type]Type
 
 func (c copier) copy(typ *Type, transform Transformer) {
 	var work typeDeque
-	for t := typ; t != nil; t = work.pop() {
+	for t := typ; t != nil; t = work.Pop() {
 		// *t is the identity of the type.
 		if cpy := c[*t]; cpy != nil {
 			*t = cpy
@@ -696,97 +696,11 @@ func (c copier) copy(typ *Type, transform Transformer) {
 		*t = cpy
 
 		// Mark any nested types for copying.
-		walkType(cpy, work.push)
+		walkType(cpy, work.Push)
 	}
 }
 
-type typeDeque = deque[*Type]
-
-// deque implements a double ended queue.
-type deque[T any] struct {
-	elems       []T
-	read, write uint64
-	mask        uint64
-}
-
-func (dq *deque[T]) empty() bool {
-	return dq.read == dq.write
-}
-
-func (dq *deque[T]) remainingCap() int {
-	return len(dq.elems) - int(dq.write-dq.read)
-}
-
-// push adds an element to the end.
-func (dq *deque[T]) push(e T) {
-	if dq.remainingCap() >= 1 {
-		dq.elems[dq.write&dq.mask] = e
-		dq.write++
-		return
-	}
-
-	elems := dq.linearise(1)
-	elems = append(elems, e)
-
-	dq.elems = elems[:cap(elems)]
-	dq.mask = uint64(cap(elems)) - 1
-	dq.read, dq.write = 0, uint64(len(elems))
-}
-
-// shift returns the first element or the zero value.
-func (dq *deque[T]) shift() T {
-	var zero T
-
-	if dq.empty() {
-		return zero
-	}
-
-	index := dq.read & dq.mask
-	t := dq.elems[index]
-	dq.elems[index] = zero
-	dq.read++
-	return t
-}
-
-// pop returns the last element or the zero value.
-func (dq *deque[T]) pop() T {
-	var zero T
-
-	if dq.empty() {
-		return zero
-	}
-
-	dq.write--
-	index := dq.write & dq.mask
-	t := dq.elems[index]
-	dq.elems[index] = zero
-	return t
-}
-
-// linearise the contents of the deque.
-//
-// The returned slice has space for at least n more elements and has power
-// of two capacity.
-func (dq *deque[T]) linearise(n int) []T {
-	length := dq.write - dq.read
-	need := length + uint64(n)
-	if need < length {
-		panic("overflow")
-	}
-
-	// Round up to the new power of two which is at least 8.
-	// See https://jameshfisher.com/2018/03/30/round-up-power-2/
-	capacity := 1 << (64 - bits.LeadingZeros64(need-1))
-	if capacity < 8 {
-		capacity = 8
-	}
-
-	types := make([]T, length, capacity)
-	pivot := dq.read & dq.mask
-	copied := copy(types, dq.elems[pivot:])
-	copy(types[copied:], dq.elems[:pivot])
-	return types
-}
+type typeDeque = internal.Deque[*Type]
 
 // inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
 // it into a graph of Types connected via pointers.
diff --git a/btf/types_test.go b/btf/types_test.go
index b46d8d2f1..569d32bd5 100644
--- a/btf/types_test.go
+++ b/btf/types_test.go
@@ -193,83 +193,6 @@ func countChildren(t *testing.T, typ reflect.Type) int {
 	return n
 }
 
-func TestDeque(t *testing.T) {
-	t.Run("pop", func(t *testing.T) {
-		var dq deque[int]
-		dq.push(1)
-		dq.push(2)
-
-		if dq.pop() != 2 {
-			t.Error("Didn't pop 2 first")
-		}
-
-		if dq.pop() != 1 {
-			t.Error("Didn't pop 1 second")
-		}
-
-		if dq.pop() != 0 {
-			t.Error("Didn't pop zero")
-		}
-	})
-
-	t.Run("shift", func(t *testing.T) {
-		var td deque[int]
-		td.push(1)
-		td.push(2)
-
-		if td.shift() != 1 {
-			t.Error("Didn't shift 1 first")
-		}
-
-		if td.shift() != 2 {
-			t.Error("Didn't shift b second")
-		}
-
-		if td.shift() != 0 {
-			t.Error("Didn't shift zero")
-		}
-	})
-
-	t.Run("push", func(t *testing.T) {
-		var td deque[int]
-		td.push(1)
-		td.push(2)
-		td.shift()
-
-		for i := 1; i <= 12; i++ {
-			td.push(i)
-		}
-
-		if td.shift() != 2 {
-			t.Error("Didn't shift 2 first")
-		}
-		for i := 1; i <= 12; i++ {
-			if v := td.shift(); v != i {
-				t.Fatalf("Shifted %d at pos %d", v, i)
-			}
-		}
-	})
-
-	t.Run("linearise", func(t *testing.T) {
-		var td deque[int]
-		td.push(1)
-		td.push(2)
-
-		all := td.linearise(0)
-		if len(all) != 2 {
-			t.Fatal("Expected 2 elements, got", len(all))
-		}
-
-		if cap(all)&(cap(all)-1) != 0 {
-			t.Fatalf("Capacity %d is not a power of two", cap(all))
-		}
-
-		if all[0] != 1 || all[1] != 2 {
-			t.Fatal("Elements don't match")
-		}
-	})
-}
-
 type testFormattableType struct {
 	name  string
 	extra []interface{}
@@ -471,7 +394,7 @@ func BenchmarkWalk(b *testing.B) {
 
 			for i := 0; i < b.N; i++ {
 				var dq typeDeque
-				walkType(typ, dq.push)
+				walkType(typ, dq.Push)
 			}
 		})
 	}
diff --git a/internal/deque.go b/internal/deque.go
new file mode 100644
index 000000000..1abc9a9ba
--- /dev/null
+++ b/internal/deque.go
@@ -0,0 +1,89 @@
+package internal
+
+import "math/bits"
+
+// Deque implements a double ended queue.
+type Deque[T any] struct {
+	elems       []T
+	read, write uint64
+	mask        uint64
+}
+
+func (dq *Deque[T]) Empty() bool {
+	return dq.read == dq.write
+}
+
+func (dq *Deque[T]) remainingCap() int {
+	return len(dq.elems) - int(dq.write-dq.read)
+}
+
+// Push adds an element to the end.
+func (dq *Deque[T]) Push(e T) {
+	if dq.remainingCap() >= 1 {
+		dq.elems[dq.write&dq.mask] = e
+		dq.write++
+		return
+	}
+
+	elems := dq.linearise(1)
+	elems = append(elems, e)
+
+	dq.elems = elems[:cap(elems)]
+	dq.mask = uint64(cap(elems)) - 1
+	dq.read, dq.write = 0, uint64(len(elems))
+}
+
+// Shift returns the first element or the zero value.
+func (dq *Deque[T]) Shift() T {
+	var zero T
+
+	if dq.Empty() {
+		return zero
+	}
+
+	index := dq.read & dq.mask
+	t := dq.elems[index]
+	dq.elems[index] = zero
+	dq.read++
+	return t
+}
+
+// Pop returns the last element or the zero value.
+func (dq *Deque[T]) Pop() T {
+	var zero T
+
+	if dq.Empty() {
+		return zero
+	}
+
+	dq.write--
+	index := dq.write & dq.mask
+	t := dq.elems[index]
+	dq.elems[index] = zero
+	return t
+}
+
+// linearise the contents of the deque.
+//
+// The returned slice has space for at least n more elements and has power
+// of two capacity.
+func (dq *Deque[T]) linearise(n int) []T {
+	length := dq.write - dq.read
+	need := length + uint64(n)
+	if need < length {
+		panic("overflow")
+	}
+
+	// Round up to the new power of two which is at least 8.
+	// See https://jameshfisher.com/2018/03/30/round-up-power-2/
+	capacity := 1 << (64 - bits.LeadingZeros64(need-1))
+	if capacity < 8 {
+		capacity = 8
+	}
+
+	types := make([]T, length, capacity)
+	pivot := dq.read & dq.mask
+	copied := copy(types, dq.elems[pivot:])
+	copy(types[copied:], dq.elems[:pivot])
+	return types
+}
diff --git a/internal/deque_test.go b/internal/deque_test.go
new file mode 100644
index 000000000..d611c0719
--- /dev/null
+++ b/internal/deque_test.go
@@ -0,0 +1,80 @@
+package internal
+
+import "testing"
+
+func TestDeque(t *testing.T) {
+	t.Run("pop", func(t *testing.T) {
+		var dq Deque[int]
+		dq.Push(1)
+		dq.Push(2)
+
+		if dq.Pop() != 2 {
+			t.Error("Didn't pop 2 first")
+		}
+
+		if dq.Pop() != 1 {
+			t.Error("Didn't pop 1 second")
+		}
+
+		if dq.Pop() != 0 {
+			t.Error("Didn't pop zero")
+		}
+	})
+
+	t.Run("shift", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+
+		if td.Shift() != 1 {
+			t.Error("Didn't shift 1 first")
+		}
+
+		if td.Shift() != 2 {
+			t.Error("Didn't shift b second")
+		}
+
+		if td.Shift() != 0 {
+			t.Error("Didn't shift zero")
+		}
+	})
+
+	t.Run("push", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+		td.Shift()
+
+		for i := 1; i <= 12; i++ {
+			td.Push(i)
+		}
+
+		if td.Shift() != 2 {
+			t.Error("Didn't shift 2 first")
+		}
+		for i := 1; i <= 12; i++ {
+			if v := td.Shift(); v != i {
+				t.Fatalf("Shifted %d at pos %d", v, i)
+			}
+		}
+	})
+
+	t.Run("linearise", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+
+		all := td.linearise(0)
+		if len(all) != 2 {
+			t.Fatal("Expected 2 elements, got", len(all))
+		}
+
+		if cap(all)&(cap(all)-1) != 0 {
+			t.Fatalf("Capacity %d is not a power of two", cap(all))
+		}
+
+		if all[0] != 1 || all[1] != 2 {
+			t.Fatal("Elements don't match")
+		}
+	})
+}