trie: polishes to trie committer (#21351)

* trie: update tests to check commit integrity * trie: polish committer * trie: fix typo * trie: remove hasvalue notion According to the benchmarks, type assertion between the pointer and interface is extremely fast. BenchmarkIntmethod-12 1000000000 1.91 ns/op BenchmarkInterface-12 1000000000 2.13 ns/op BenchmarkTypeSwitch-12 1000000000 1.81 ns/op BenchmarkTypeAssertion-12 2000000000 1.78 ns/op So the overhead for asserting whether the shortnode has "valuenode" child is super tiny. No necessary to have another field. * trie: linter nitpicks Co-authored-by: Martin Holst Swende <martin@swende.se>
ethereum · Sep 30, 2020 · 053ed9c · 053ed9c
1 parent dad2658
commit 053ed9c
Show file tree

Hide file tree

Showing 4 changed files with 196 additions and 65 deletions.
diff --git a/trie/committer.go b/trie/committer.go
@@ -23,7 +23,6 @@ import (
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/crypto"
-	"github.com/ethereum/go-ethereum/rlp"
 	"golang.org/x/crypto/sha3"
 )
 
@@ -33,10 +32,9 @@ const leafChanSize = 200
 
 // leaf represents a trie leaf value
 type leaf struct {
-	size   int         // size of the rlp data (estimate)
-	hash   common.Hash // hash of rlp data
-	node   node        // the node to commit
-	vnodes bool        // set to true if the node (possibly) contains a valueNode
+	size int         // size of the rlp data (estimate)
+	hash common.Hash // hash of rlp data
+	node node        // the node to commit
 }
 
 // committer is a type used for the trie Commit operation. A committer has some
@@ -74,26 +72,20 @@ func returnCommitterToPool(h *committer) {
 	committerPool.Put(h)
 }
 
-// commitNeeded returns 'false' if the given node is already in sync with db
-func (c *committer) commitNeeded(n node) bool {
-	hash, dirty := n.cache()
-	return hash == nil || dirty
-}
-
 // commit collapses a node down into a hash node and inserts it into the database
 func (c *committer) Commit(n node, db *Database) (hashNode, error) {
 	if db == nil {
 		return nil, errors.New("no db provided")
 	}
-	h, err := c.commit(n, db, true)
+	h, err := c.commit(n, db)
 	if err != nil {
 		return nil, err
 	}
 	return h.(hashNode), nil
 }
 
 // commit collapses a node down into a hash node and inserts it into the database
-func (c *committer) commit(n node, db *Database, force bool) (node, error) {
+func (c *committer) commit(n node, db *Database) (node, error) {
 	// if this path is clean, use available cached data
 	hash, dirty := n.cache()
 	if hash != nil && !dirty {
@@ -104,87 +96,90 @@ func (c *committer) commit(n node, db *Database, force bool) (node, error) {
 	case *shortNode:
 		// Commit child
 		collapsed := cn.copy()
-		if _, ok := cn.Val.(valueNode); !ok {
-			childV, err := c.commit(cn.Val, db, false)
+
+		// If the child is fullnode, recursively commit.
+		// Otherwise it can only be hashNode or valueNode.
+		if _, ok := cn.Val.(*fullNode); ok {
+			childV, err := c.commit(cn.Val, db)
 			if err != nil {
 				return nil, err
 			}
 			collapsed.Val = childV
 		}
 		// The key needs to be copied, since we're delivering it to database
 		collapsed.Key = hexToCompact(cn.Key)
-		hashedNode := c.store(collapsed, db, force, true)
+		hashedNode := c.store(collapsed, db)
 		if hn, ok := hashedNode.(hashNode); ok {
 			return hn, nil
 		}
 		return collapsed, nil
 	case *fullNode:
-		hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
+		hashedKids, err := c.commitChildren(cn, db)
 		if err != nil {
 			return nil, err
 		}
 		collapsed := cn.copy()
 		collapsed.Children = hashedKids
 
-		hashedNode := c.store(collapsed, db, force, hasVnodes)
+		hashedNode := c.store(collapsed, db)
 		if hn, ok := hashedNode.(hashNode); ok {
 			return hn, nil
 		}
 		return collapsed, nil
-	case valueNode:
-		return c.store(cn, db, force, false), nil
-	// hashnodes aren't stored
 	case hashNode:
 		return cn, nil
+	default:
+		// nil, valuenode shouldn't be committed
+		panic(fmt.Sprintf("%T: invalid node: %v", n, n))
 	}
-	return hash, nil
 }
 
 // commitChildren commits the children of the given fullnode
-func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
+func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, error) {
 	var children [17]node
-	var hasValueNodeChildren = false
-	for i, child := range n.Children {
+	for i := 0; i < 16; i++ {
+		child := n.Children[i]
 		if child == nil {
 			continue
 		}
-		hnode, err := c.commit(child, db, false)
-		if err != nil {
-			return children, false, err
+		// If it's the hashed child, save the hash value directly.
+		// Note: it's impossible that the child in range [0, 15]
+		// is a valuenode.
+		if hn, ok := child.(hashNode); ok {
+			children[i] = hn
+			continue
 		}
-		children[i] = hnode
-		if _, ok := hnode.(valueNode); ok {
-			hasValueNodeChildren = true
+		// Commit the child recursively and store the "hashed" value.
+		// Note the returned node can be some embedded nodes, so it's
+		// possible the type is not hashnode.
+		hashed, err := c.commit(child, db)
+		if err != nil {
+			return children, err
 		}
+		children[i] = hashed
 	}
-	return children, hasValueNodeChildren, nil
+	// For the 17th child, it's possible the type is valuenode.
+	if n.Children[16] != nil {
+		children[16] = n.Children[16]
+	}
+	return children, nil
 }
 
 // store hashes the node n and if we have a storage layer specified, it writes
 // the key/value pair to it and tracks any node->child references as well as any
 // node->external trie references.
-func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
+func (c *committer) store(n node, db *Database) node {
 	// Larger nodes are replaced by their hash and stored in the database.
 	var (
 		hash, _ = n.cache()
 		size    int
 	)
 	if hash == nil {
-		if vn, ok := n.(valueNode); ok {
-			c.tmp.Reset()
-			if err := rlp.Encode(&c.tmp, vn); err != nil {
-				panic("encode error: " + err.Error())
-			}
-			size = len(c.tmp)
-			if size < 32 && !force {
-				return n // Nodes smaller than 32 bytes are stored inside their parent
-			}
-			hash = c.makeHashNode(c.tmp)
-		} else {
-			// This was not generated - must be a small node stored in the parent
-			// No need to do anything here
-			return n
-		}
+		// This was not generated - must be a small node stored in the parent.
+		// In theory we should apply the leafCall here if it's not nil(embedded
+		// node usually contains value). But small value(less than 32bytes) is
+		// not our target.
+		return n
 	} else {
 		// We have the hash already, estimate the RLP encoding-size of the node.
 		// The size is used for mem tracking, does not need to be exact
@@ -194,10 +189,9 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
 	// The leaf channel will be active only when there an active leaf-callback
 	if c.leafCh != nil {
 		c.leafCh <- &leaf{
-			size:   size,
-			hash:   common.BytesToHash(hash),
-			node:   n,
-			vnodes: hasVnodeChildren,
+			size: size,
+			hash: common.BytesToHash(hash),
+			node: n,
 		}
 	} else if db != nil {
 		// No leaf-callback used, but there's still a database. Do serial
@@ -209,30 +203,30 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
 	return hash
 }
 
-// commitLoop does the actual insert + leaf callback for nodes
+// commitLoop does the actual insert + leaf callback for nodes.
 func (c *committer) commitLoop(db *Database) {
 	for item := range c.leafCh {
 		var (
-			hash      = item.hash
-			size      = item.size
-			n         = item.node
-			hasVnodes = item.vnodes
+			hash = item.hash
+			size = item.size
+			n    = item.node
 		)
 		// We are pooling the trie nodes into an intermediate memory cache
 		db.lock.Lock()
 		db.insert(hash, size, n)
 		db.lock.Unlock()
-		if c.onleaf != nil && hasVnodes {
+
+		if c.onleaf != nil {
 			switch n := n.(type) {
 			case *shortNode:
 				if child, ok := n.Val.(valueNode); ok {
 					c.onleaf(nil, child, hash)
 				}
 			case *fullNode:
-				for i := 0; i < 16; i++ {
-					if child, ok := n.Children[i].(valueNode); ok {
-						c.onleaf(nil, child, hash)
-					}
+				// For children in range [0, 15], it's impossible
+				// to contain valuenode. Only check the 17th child.
+				if n.Children[16] != nil {
+					c.onleaf(nil, n.Children[16].(valueNode), hash)
 				}
 			}
 		}

diff --git a/trie/hasher.go b/trie/hasher.go
@@ -66,11 +66,11 @@ func returnHasherToPool(h *hasher) {
 // hash collapses a node down into a hash node, also returning a copy of the
 // original node initialized with the computed hash to replace the original one.
 func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
-	// We're not storing the node, just hashing, use available cached data
+	// Return the cached hash if it's available
 	if hash, _ := n.cache(); hash != nil {
 		return hash, n
 	}
-	// Trie not processed yet or needs storage, walk the children
+	// Trie not processed yet, walk the children
 	switch n := n.(type) {
 	case *shortNode:
 		collapsed, cached := h.hashShortNodeChildren(n)

diff --git a/trie/trie.go b/trie/trie.go
@@ -505,13 +505,16 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
 	if t.root == nil {
 		return emptyRoot, nil
 	}
+	// Derive the hash for all dirty nodes first. We hold the assumption
+	// in the following procedure that all nodes are hashed.
 	rootHash := t.Hash()
 	h := newCommitter()
 	defer returnCommitterToPool(h)
+
 	// Do a quick check if we really need to commit, before we spin
 	// up goroutines. This can happen e.g. if we load a trie for reading storage
 	// values, but don't write to it.
-	if !h.commitNeeded(t.root) {
+	if _, dirty := t.root.cache(); !dirty {
 		return rootHash, nil
 	}
 	var wg sync.WaitGroup