rlp, trie: faster trie node encoding (ethereum#24126)

This change speeds up trie hashing and all other activities that require RLP encoding of trie nodes by approximately 20%. The speedup is achieved by avoiding reflection overhead during node encoding. The interface type trie.node now contains a method 'encode' that works with rlp.EncoderBuffer. Management of EncoderBuffers is left to calling code. trie.hasher, which is pooled to avoid allocations, now maintains an EncoderBuffer. This means memory resources related to trie node encoding are tied to the hasher pool. Co-authored-by: Felix Lange <fjl@twurst.com>
syscoin · Mar 9, 2022 · 1dd3380 · 1dd3380
1 parent 7cd522c
commit 1dd3380
Show file tree

Hide file tree

Showing 12 changed files with 286 additions and 181 deletions.
diff --git a/rlp/encbuffer.go b/rlp/encbuffer.go
@@ -36,27 +36,31 @@ func (buf *encBuffer) size() int {
 	return len(buf.str) + buf.lhsize
 }
 
-// toBytes creates the encoder output.
-func (w *encBuffer) toBytes() []byte {
+// makeBytes creates the encoder output.
+func (w *encBuffer) makeBytes() []byte {
 	out := make([]byte, w.size())
+	w.copyTo(out)
+	return out
+}
+
+func (w *encBuffer) copyTo(dst []byte) {
 	strpos := 0
 	pos := 0
 	for _, head := range w.lheads {
 		// write string data before header
-		n := copy(out[pos:], w.str[strpos:head.offset])
+		n := copy(dst[pos:], w.str[strpos:head.offset])
 		pos += n
 		strpos += n
 		// write the header
-		enc := head.encode(out[pos:])
+		enc := head.encode(dst[pos:])
 		pos += len(enc)
 	}
 	// copy string data after the last list header
-	copy(out[pos:], w.str[strpos:])
-	return out
+	copy(dst[pos:], w.str[strpos:])
 }
 
-// toWriter writes the encoder output to w.
-func (buf *encBuffer) toWriter(w io.Writer) (err error) {
+// writeTo writes the encoder output to w.
+func (buf *encBuffer) writeTo(w io.Writer) (err error) {
 	strpos := 0
 	for _, head := range buf.lheads {
 		// write string data before header
@@ -252,6 +256,19 @@ func (r *encReader) next() []byte {
 	}
 }
 
+func encBufferFromWriter(w io.Writer) *encBuffer {
+	switch w := w.(type) {
+	case EncoderBuffer:
+		return w.buf
+	case *EncoderBuffer:
+		return w.buf
+	case *encBuffer:
+		return w
+	default:
+		return nil
+	}
+}
+
 // EncoderBuffer is a buffer for incremental encoding.
 //
 // The zero value is NOT ready for use. To get a usable buffer,
@@ -279,14 +296,10 @@ func (w *EncoderBuffer) Reset(dst io.Writer) {
 	// If the destination writer has an *encBuffer, use it.
 	// Note that w.ownBuffer is left false here.
 	if dst != nil {
-		if outer, ok := dst.(*encBuffer); ok {
+		if outer := encBufferFromWriter(dst); outer != nil {
 			*w = EncoderBuffer{outer, nil, false}
 			return
 		}
-		if outer, ok := dst.(EncoderBuffer); ok {
-			*w = EncoderBuffer{outer.buf, nil, false}
-			return
-		}
 	}
 
 	// Get a fresh buffer.
@@ -303,7 +316,7 @@ func (w *EncoderBuffer) Reset(dst io.Writer) {
 func (w *EncoderBuffer) Flush() error {
 	var err error
 	if w.dst != nil {
-		err = w.buf.toWriter(w.dst)
+		err = w.buf.writeTo(w.dst)
 	}
 	// Release the internal buffer.
 	if w.ownBuffer {
@@ -315,7 +328,15 @@ func (w *EncoderBuffer) Flush() error {
 
 // ToBytes returns the encoded bytes.
 func (w *EncoderBuffer) ToBytes() []byte {
-	return w.buf.toBytes()
+	return w.buf.makeBytes()
+}
+
+// AppendToBytes appends the encoded bytes to dst.
+func (w *EncoderBuffer) AppendToBytes(dst []byte) []byte {
+	size := w.buf.size()
+	out := append(dst, make([]byte, size)...)
+	w.buf.copyTo(out[len(dst):])
+	return out
 }
 
 // Write appends b directly to the encoder output.

diff --git a/rlp/encode.go b/rlp/encode.go
@@ -56,20 +56,16 @@ type Encoder interface {
 // Please see package-level documentation of encoding rules.
 func Encode(w io.Writer, val interface{}) error {
 	// Optimization: reuse *encBuffer when called by EncodeRLP.
-	if buf, ok := w.(*encBuffer); ok {
+	if buf := encBufferFromWriter(w); buf != nil {
 		return buf.encode(val)
 	}
-	if ebuf, ok := w.(EncoderBuffer); ok {
-		return ebuf.buf.encode(val)
-	}
 
 	buf := getEncBuffer()
 	defer encBufferPool.Put(buf)
-
 	if err := buf.encode(val); err != nil {
 		return err
 	}
-	return buf.toWriter(w)
+	return buf.writeTo(w)
 }
 
 // EncodeToBytes returns the RLP encoding of val.
@@ -81,7 +77,7 @@ func EncodeToBytes(val interface{}) ([]byte, error) {
 	if err := buf.encode(val); err != nil {
 		return nil, err
 	}
-	return buf.toBytes(), nil
+	return buf.makeBytes(), nil
 }
 
 // EncodeToReader returns a reader from which the RLP encoding of val

diff --git a/rlp/encode_test.go b/rlp/encode_test.go
@@ -399,6 +399,21 @@ func TestEncodeToBytes(t *testing.T) {
 	runEncTests(t, EncodeToBytes)
 }
 
+func TestEncodeAppendToBytes(t *testing.T) {
+	buffer := make([]byte, 20)
+	runEncTests(t, func(val interface{}) ([]byte, error) {
+		w := NewEncoderBuffer(nil)
+		defer w.Flush()
+
+		err := Encode(w, val)
+		if err != nil {
+			return nil, err
+		}
+		output := w.AppendToBytes(buffer[:0])
+		return output, nil
+	})
+}
+
 func TestEncodeToReader(t *testing.T) {
 	runEncTests(t, func(val interface{}) ([]byte, error) {
 		_, r, err := EncodeToReader(val)

diff --git a/trie/committer.go b/trie/committer.go
@@ -44,7 +44,6 @@ type leaf struct {
 // By 'some level' of parallelism, it's still the case that all leaves will be
 // processed sequentially - onleaf will never be called in parallel or out of order.
 type committer struct {
-	tmp sliceBuffer
 	sha crypto.KeccakState
 
 	onleaf LeafCallback
@@ -55,7 +54,6 @@ type committer struct {
 var committerPool = sync.Pool{
 	New: func() interface{} {
 		return &committer{
-			tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
 			sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
 		}
 	},

diff --git a/trie/database.go b/trie/database.go
@@ -113,16 +113,9 @@ func (n rawFullNode) cache() (hashNode, bool)   { panic("this should never end u
 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") }
 
 func (n rawFullNode) EncodeRLP(w io.Writer) error {
-	var nodes [17]node
-
-	for i, child := range n {
-		if child != nil {
-			nodes[i] = child
-		} else {
-			nodes[i] = nilValueNode
-		}
-	}
-	return rlp.Encode(w, nodes)
+	eb := rlp.NewEncoderBuffer(w)
+	n.encode(eb)
+	return eb.Flush()
 }
 
 // rawShortNode represents only the useful data content of a short node, with the
@@ -164,11 +157,7 @@ func (n *cachedNode) rlp() []byte {
 	if node, ok := n.node.(rawNode); ok {
 		return node
 	}
-	blob, err := rlp.EncodeToBytes(n.node)
-	if err != nil {
-		panic(err)
-	}
-	return blob
+	return nodeToBytes(n.node)
 }
 
 // obj returns the decoded and expanded trie node, either directly from the cache,

diff --git a/trie/hasher.go b/trie/hasher.go
@@ -24,31 +24,22 @@ import (
 	"golang.org/x/crypto/sha3"
 )
 
-type sliceBuffer []byte
-
-func (b *sliceBuffer) Write(data []byte) (n int, err error) {
-	*b = append(*b, data...)
-	return len(data), nil
-}
-
-func (b *sliceBuffer) Reset() {
-	*b = (*b)[:0]
-}
-
 // hasher is a type used for the trie Hash operation. A hasher has some
 // internal preallocated temp space
 type hasher struct {
 	sha      crypto.KeccakState
-	tmp      sliceBuffer
+	tmp      []byte
+	encbuf   rlp.EncoderBuffer
 	parallel bool // Whether to use paralallel threads when hashing
 }
 
 // hasherPool holds pureHashers
 var hasherPool = sync.Pool{
 	New: func() interface{} {
 		return &hasher{
-			tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
-			sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
+			tmp:    make([]byte, 0, 550), // cap is as large as a full fullNode.
+			sha:    sha3.NewLegacyKeccak256().(crypto.KeccakState),
+			encbuf: rlp.NewEncoderBuffer(nil),
 		}
 	},
 }
@@ -153,30 +144,41 @@ func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached
 // into compact form for RLP encoding.
 // If the rlp data is smaller than 32 bytes, `nil` is returned.
 func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
-	h.tmp.Reset()
-	if err := rlp.Encode(&h.tmp, n); err != nil {
-		panic("encode error: " + err.Error())
-	}
+	n.encode(h.encbuf)
+	enc := h.encodedBytes()
 
-	if len(h.tmp) < 32 && !force {
+	if len(enc) < 32 && !force {
 		return n // Nodes smaller than 32 bytes are stored inside their parent
 	}
-	return h.hashData(h.tmp)
+	return h.hashData(enc)
 }
 
 // shortnodeToHash is used to creates a hashNode from a set of hashNodes, (which
 // may contain nil values)
 func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
-	h.tmp.Reset()
-	// Generate the RLP encoding of the node
-	if err := n.EncodeRLP(&h.tmp); err != nil {
-		panic("encode error: " + err.Error())
-	}
+	n.encode(h.encbuf)
+	enc := h.encodedBytes()
 
-	if len(h.tmp) < 32 && !force {
+	if len(enc) < 32 && !force {
 		return n // Nodes smaller than 32 bytes are stored inside their parent
 	}
-	return h.hashData(h.tmp)
+	return h.hashData(enc)
+}
+
+// encodedBytes returns the result of the last encoding operation on h.encbuf.
+// This also resets the encoder buffer.
+//
+// All node encoding must be done like this:
+//
+//     node.encode(h.encbuf)
+//     enc := h.encodedBytes()
+//
+// This convention exists because node.encode can only be inlined/escape-analyzed when
+// called on a concrete receiver type.
+func (h *hasher) encodedBytes() []byte {
+	h.tmp = h.encbuf.AppendToBytes(h.tmp[:0])
+	h.encbuf.Reset(nil)
+	return h.tmp
 }
 
 // hashData hashes the provided data

diff --git a/trie/iterator.go b/trie/iterator.go
@@ -23,7 +23,6 @@ import (
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/ethdb"
-	"github.com/ethereum/go-ethereum/rlp"
 )
 
 // Iterator is a key-value trie iterator that traverses a Trie.
@@ -214,8 +213,7 @@ func (it *nodeIterator) LeafProof() [][]byte {
 				// Gather nodes that end up as hash nodes (or the root)
 				node, hashed := hasher.proofHash(item.node)
 				if _, ok := hashed.(hashNode); ok || i == 0 {
-					enc, _ := rlp.EncodeToBytes(node)
-					proofs = append(proofs, enc)
+					proofs = append(proofs, nodeToBytes(node))
 				}
 			}
 			return proofs

diff --git a/trie/node.go b/trie/node.go
@@ -28,8 +28,9 @@ import (
 var indices = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "[17]"}
 
 type node interface {
-	fstring(string) string
 	cache() (hashNode, bool)
+	encode(w rlp.EncoderBuffer)
+	fstring(string) string
 }
 
 type (
@@ -52,16 +53,9 @@ var nilValueNode = valueNode(nil)
 
 // EncodeRLP encodes a full node into the consensus RLP format.
 func (n *fullNode) EncodeRLP(w io.Writer) error {
-	var nodes [17]node
-
-	for i, child := range &n.Children {
-		if child != nil {
-			nodes[i] = child
-		} else {
-			nodes[i] = nilValueNode
-		}
-	}
-	return rlp.Encode(w, nodes)
+	eb := rlp.NewEncoderBuffer(w)
+	n.encode(eb)
+	return eb.Flush()
 }
 
 func (n *fullNode) copy() *fullNode   { copy := *n; return &copy }