Skip to content

Commit

Permalink
trie: polishes to trie committer (#21351)
Browse files Browse the repository at this point in the history
* trie: update tests to check commit integrity

* trie: polish committer

* trie: fix typo

* trie: remove hasvalue notion

According to the benchmarks, type assertion between the pointer and
interface is extremely fast.

BenchmarkIntmethod-12           1000000000               1.91 ns/op
BenchmarkInterface-12           1000000000               2.13 ns/op
BenchmarkTypeSwitch-12          1000000000               1.81 ns/op
BenchmarkTypeAssertion-12       2000000000               1.78 ns/op

So the overhead for asserting whether the shortnode has "valuenode"
child is super tiny. No necessary to have another field.

* trie: linter nitpicks

Co-authored-by: Martin Holst Swende <martin@swende.se>
  • Loading branch information
rjl493456442 and holiman committed Sep 30, 2020
1 parent dad2658 commit 053ed9c
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 65 deletions.
118 changes: 56 additions & 62 deletions trie/committer.go
Expand Up @@ -23,7 +23,6 @@ import (

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"golang.org/x/crypto/sha3"
)

Expand All @@ -33,10 +32,9 @@ const leafChanSize = 200

// leaf represents a trie leaf value
type leaf struct {
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
vnodes bool // set to true if the node (possibly) contains a valueNode
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
}

// committer is a type used for the trie Commit operation. A committer has some
Expand Down Expand Up @@ -74,26 +72,20 @@ func returnCommitterToPool(h *committer) {
committerPool.Put(h)
}

// commitNeeded returns 'false' if the given node is already in sync with db
func (c *committer) commitNeeded(n node) bool {
hash, dirty := n.cache()
return hash == nil || dirty
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) Commit(n node, db *Database) (hashNode, error) {
if db == nil {
return nil, errors.New("no db provided")
}
h, err := c.commit(n, db, true)
h, err := c.commit(n, db)
if err != nil {
return nil, err
}
return h.(hashNode), nil
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) commit(n node, db *Database, force bool) (node, error) {
func (c *committer) commit(n node, db *Database) (node, error) {
// if this path is clean, use available cached data
hash, dirty := n.cache()
if hash != nil && !dirty {
Expand All @@ -104,87 +96,90 @@ func (c *committer) commit(n node, db *Database, force bool) (node, error) {
case *shortNode:
// Commit child
collapsed := cn.copy()
if _, ok := cn.Val.(valueNode); !ok {
childV, err := c.commit(cn.Val, db, false)

// If the child is fullnode, recursively commit.
// Otherwise it can only be hashNode or valueNode.
if _, ok := cn.Val.(*fullNode); ok {
childV, err := c.commit(cn.Val, db)
if err != nil {
return nil, err
}
collapsed.Val = childV
}
// The key needs to be copied, since we're delivering it to database
collapsed.Key = hexToCompact(cn.Key)
hashedNode := c.store(collapsed, db, force, true)
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case *fullNode:
hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
hashedKids, err := c.commitChildren(cn, db)
if err != nil {
return nil, err
}
collapsed := cn.copy()
collapsed.Children = hashedKids

hashedNode := c.store(collapsed, db, force, hasVnodes)
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case valueNode:
return c.store(cn, db, force, false), nil
// hashnodes aren't stored
case hashNode:
return cn, nil
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
return hash, nil
}

// commitChildren commits the children of the given fullnode
func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, error) {
var children [17]node
var hasValueNodeChildren = false
for i, child := range n.Children {
for i := 0; i < 16; i++ {
child := n.Children[i]
if child == nil {
continue
}
hnode, err := c.commit(child, db, false)
if err != nil {
return children, false, err
// If it's the hashed child, save the hash value directly.
// Note: it's impossible that the child in range [0, 15]
// is a valuenode.
if hn, ok := child.(hashNode); ok {
children[i] = hn
continue
}
children[i] = hnode
if _, ok := hnode.(valueNode); ok {
hasValueNodeChildren = true
// Commit the child recursively and store the "hashed" value.
// Note the returned node can be some embedded nodes, so it's
// possible the type is not hashnode.
hashed, err := c.commit(child, db)
if err != nil {
return children, err
}
children[i] = hashed
}
return children, hasValueNodeChildren, nil
// For the 17th child, it's possible the type is valuenode.
if n.Children[16] != nil {
children[16] = n.Children[16]
}
return children, nil
}

// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
func (c *committer) store(n node, db *Database) node {
// Larger nodes are replaced by their hash and stored in the database.
var (
hash, _ = n.cache()
size int
)
if hash == nil {
if vn, ok := n.(valueNode); ok {
c.tmp.Reset()
if err := rlp.Encode(&c.tmp, vn); err != nil {
panic("encode error: " + err.Error())
}
size = len(c.tmp)
if size < 32 && !force {
return n // Nodes smaller than 32 bytes are stored inside their parent
}
hash = c.makeHashNode(c.tmp)
} else {
// This was not generated - must be a small node stored in the parent
// No need to do anything here
return n
}
// This was not generated - must be a small node stored in the parent.
// In theory we should apply the leafCall here if it's not nil(embedded
// node usually contains value). But small value(less than 32bytes) is
// not our target.
return n
} else {
// We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact
Expand All @@ -194,10 +189,9 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
// The leaf channel will be active only when there an active leaf-callback
if c.leafCh != nil {
c.leafCh <- &leaf{
size: size,
hash: common.BytesToHash(hash),
node: n,
vnodes: hasVnodeChildren,
size: size,
hash: common.BytesToHash(hash),
node: n,
}
} else if db != nil {
// No leaf-callback used, but there's still a database. Do serial
Expand All @@ -209,30 +203,30 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
return hash
}

// commitLoop does the actual insert + leaf callback for nodes
// commitLoop does the actual insert + leaf callback for nodes.
func (c *committer) commitLoop(db *Database) {
for item := range c.leafCh {
var (
hash = item.hash
size = item.size
n = item.node
hasVnodes = item.vnodes
hash = item.hash
size = item.size
n = item.node
)
// We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock()
db.insert(hash, size, n)
db.lock.Unlock()
if c.onleaf != nil && hasVnodes {

if c.onleaf != nil {
switch n := n.(type) {
case *shortNode:
if child, ok := n.Val.(valueNode); ok {
c.onleaf(nil, child, hash)
}
case *fullNode:
for i := 0; i < 16; i++ {
if child, ok := n.Children[i].(valueNode); ok {
c.onleaf(nil, child, hash)
}
// For children in range [0, 15], it's impossible
// to contain valuenode. Only check the 17th child.
if n.Children[16] != nil {
c.onleaf(nil, n.Children[16].(valueNode), hash)
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions trie/hasher.go
Expand Up @@ -66,11 +66,11 @@ func returnHasherToPool(h *hasher) {
// hash collapses a node down into a hash node, also returning a copy of the
// original node initialized with the computed hash to replace the original one.
func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
// We're not storing the node, just hashing, use available cached data
// Return the cached hash if it's available
if hash, _ := n.cache(); hash != nil {
return hash, n
}
// Trie not processed yet or needs storage, walk the children
// Trie not processed yet, walk the children
switch n := n.(type) {
case *shortNode:
collapsed, cached := h.hashShortNodeChildren(n)
Expand Down
5 changes: 4 additions & 1 deletion trie/trie.go
Expand Up @@ -505,13 +505,16 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
if t.root == nil {
return emptyRoot, nil
}
// Derive the hash for all dirty nodes first. We hold the assumption
// in the following procedure that all nodes are hashed.
rootHash := t.Hash()
h := newCommitter()
defer returnCommitterToPool(h)

// Do a quick check if we really need to commit, before we spin
// up goroutines. This can happen e.g. if we load a trie for reading storage
// values, but don't write to it.
if !h.commitNeeded(t.root) {
if _, dirty := t.root.cache(); !dirty {
return rootHash, nil
}
var wg sync.WaitGroup
Expand Down

0 comments on commit 053ed9c

Please sign in to comment.