Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster trie node encoding #1679

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2bdfd5f
add committer.go
hqjang-pepper Oct 28, 2022
7e0fad6
fix hasher.go
hqjang-pepper Oct 28, 2022
9a2126b
seperate hashes and committer
hqjang-pepper Nov 2, 2022
794ccf9
fix gofumpt
hqjang-pepper Nov 2, 2022
ba271f5
rollback GetHashAndHexKey function and KeccakState interface
hqjang-pepper Nov 2, 2022
d6a73d5
trie parallel hashing
hqjang-pepper Nov 2, 2022
12a31fc
add node encoder
hqjang-pepper Nov 7, 2022
8c3d8aa
change decodeNode function in order to make modifing the byte slice a…
hqjang-pepper Nov 9, 2022
411f968
add node encoding/decoding benchmark testcode
hqjang-pepper Nov 9, 2022
053d774
fix gofumpt
hqjang-pepper Nov 9, 2022
ac07269
refactor node encoding by using encode() method of node
hqjang-pepper Nov 9, 2022
0962432
use new encoder in hasher
hqjang-pepper Nov 9, 2022
f2c871e
use new encoder in StackTrie
hqjang-pepper Nov 9, 2022
4a80e97
remove unused variable
hqjang-pepper Nov 9, 2022
1bea769
use newEncoderBuffer at fullNode RLP encoding
hqjang-pepper Nov 10, 2022
434cbcc
Merge branch 'dev' of https://github.com/hqjang-pepper/klaytn into se…
hqjang-pepper Nov 11, 2022
99b6d1b
Merge branch 'dev' of https://github.com/hqjang-pepper/klaytn into tr…
hqjang-pepper Nov 14, 2022
a84d46b
Merge branch 'dev' of https://github.com/hqjang-pepper/klaytn into fa…
hqjang-pepper Nov 14, 2022
0e2e3e7
fix copyright
hqjang-pepper Nov 15, 2022
006d483
Merge branch 'seperate-trie-commit-and-hash' of https://github.com/hq…
hqjang-pepper Nov 15, 2022
1f8ef40
Merge branch 'trie-parallel-hashing' of https://github.com/hqjang-pep…
hqjang-pepper Nov 15, 2022
a1afbe1
add copyrights
hqjang-pepper Nov 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
268 changes: 268 additions & 0 deletions storage/statedb/committer.go
@@ -0,0 +1,268 @@
// Modifications Copyright 2022 The klaytn Authors
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
//
// This file is derived from trie/comitter.go (2022/11/14).
// Modified and improved for the klaytn development.

package statedb

import (
"errors"
"fmt"
"sync"

"github.com/klaytn/klaytn/common"
"golang.org/x/crypto/sha3"
)

// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
// some paralellism but not incur too much memory overhead.
const leafChanSize = 200

// leaf represents a trie leaf value
type leaf struct {
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
vnodes bool // set to true if the node (possibly) contains a valueNode
}

// committer is a type used for the trie Commit operation. A committer has some
// internal preallocated temp space, and also a callback that is invoked when
// leaves are committed. The leafs are passed through the `leafCh`, to allow
// some level of paralellism.
// By 'some level' of parallelism, it's still the case that all leaves will be
// processed sequentially - onleaf will never be called in parallel or out of order.
type committer struct {
sha KeccakState

onleaf LeafCallback
leafCh chan *leaf
}

// committers live in a global sync.Pool
var committerPool = sync.Pool{
New: func() interface{} {
return &committer{
sha: sha3.NewLegacyKeccak256().(KeccakState),
}
},
}

// newCommitter creates a new committer or picks one from the pool.
func newCommitter() *committer {
return committerPool.Get().(*committer)
}

func returnCommitterToPool(h *committer) {
h.onleaf = nil
h.leafCh = nil
committerPool.Put(h)
}

// commitNeeded returns 'false' if the given node is already in sync with db
func (c *committer) commitNeeded(n node) bool {
hash, dirty := n.cache()
return hash == nil || dirty
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) Commit(n node, db *Database) (hashNode, error) {
if db == nil {
return nil, errors.New("no db provided")
}
h, err := c.commit(n, db, true)
if err != nil {
return nil, err
}
return h.(hashNode), nil
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) commit(n node, db *Database, force bool) (node, error) {
// if this path is clean, use available cached data
hash, dirty := n.cache()
if hash != nil && !dirty {
return hash, nil
}
// Commit children, then parent, and remove remove the dirty flag.
switch cn := n.(type) {
case *shortNode:
// Commit child
collapsed := cn.copy()
if _, ok := cn.Val.(valueNode); !ok {
if childV, err := c.commit(cn.Val, db, false); err != nil {
return nil, err
} else {
collapsed.Val = childV
}
}
// The key needs to be copied, since we're delivering it to database
collapsed.Key = hexToCompact(cn.Key)
hashedNode := c.store(collapsed, db, force, true)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
} else {
return collapsed, nil
}
case *fullNode:
hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
if err != nil {
return nil, err
}
collapsed := cn.copy()
collapsed.Children = hashedKids

hashedNode := c.store(collapsed, db, force, hasVnodes)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
} else {
return collapsed, nil
}
case valueNode:
return c.store(cn, db, force, false), nil
// hashnodes aren't stored
case hashNode:
return cn, nil
}
return hash, nil
}

// commitChildren commits the children of the given fullnode
func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
var children [17]node
hasValueNodeChildren := false
for i, child := range n.Children {
if child == nil {
continue
}
hnode, err := c.commit(child, db, false)
if err != nil {
return children, false, err
}
children[i] = hnode
if _, ok := hnode.(valueNode); ok {
hasValueNodeChildren = true
}
}
return children, hasValueNodeChildren, nil
}

// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
// Larger nodes are replaced by their hash and stored in the database.
var (
hash, _ = n.cache()
size int
)
if hash == nil {
// This was not generated - must be a small node stored in the parent
// No need to do anything here
return n
}
// We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact
size = estimateSize(n)

// If we're using channel-based leaf-reporting, send to channel.
// The leaf channel will be active only when there an active leaf-callback
if c.leafCh != nil {
c.leafCh <- &leaf{
size: size,
hash: common.BytesToHash(hash),
node: n,
vnodes: hasVnodeChildren,
}
} else if db != nil {
// No leaf-callback used, but there's still a database. Do serial
// insertion
db.lock.Lock()
db.insert(common.BytesToHash(hash), uint16(size), n)
db.lock.Unlock()
}
return hash
}

// commitLoop does the actual insert + leaf callback for nodes
func (c *committer) commitLoop(db *Database) {
for item := range c.leafCh {
var (
hash = item.hash
size = item.size
n = item.node
hasVnodes = item.vnodes
)
// We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock()
db.insert(hash, uint16(size), n)
db.lock.Unlock()
if c.onleaf != nil && hasVnodes {
switch n := n.(type) {
case *shortNode:
if child, ok := n.Val.(valueNode); ok {
c.onleaf(nil, nil, child, hash, 0)
}
case *fullNode:
// For children in range [0, 15], it's impossible
// to contain valueNode. Only check the 17th child.
if n.Children[16] != nil {
c.onleaf(nil, nil, n.Children[16].(valueNode), hash, 0)
}
}
}
}
}

func (c *committer) makeHashNode(data []byte) hashNode {
n := make(hashNode, c.sha.Size())
c.sha.Reset()
c.sha.Write(data)
c.sha.Read(n)
return n
}

// estimateSize estimates the size of an rlp-encoded node, without actually
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
func estimateSize(n node) int {
switch n := n.(type) {
case *shortNode:
// A short node contains a compacted key, and a value.
return 3 + len(n.Key) + estimateSize(n.Val)
case *fullNode:
// A full node contains up to 16 hashes (some nils), and a key
s := 3
for i := 0; i < 16; i++ {
if child := n.Children[i]; child != nil {
s += estimateSize(child)
} else {
s += 1
}
}
return s
case valueNode:
return 1 + len(n)
case hashNode:
return 1 + len(n)
default:
panic(fmt.Sprintf("node type %T", n))

}
}
19 changes: 4 additions & 15 deletions storage/statedb/database.go
Expand Up @@ -146,16 +146,9 @@ func (n rawFullNode) fstring(ind string) string { panic("this should never e
func (n rawFullNode) lenEncoded() uint16 { panic("this should never end up in a live trie") }

func (n rawFullNode) EncodeRLP(w io.Writer) error {
var nodes [17]node

for i, child := range n {
if child != nil {
nodes[i] = child
} else {
nodes[i] = nilValueNode
}
}
return rlp.Encode(w, nodes)
encodeByte := rlp.NewEncoderBuffer(w)
n.encode(encodeByte)
return encodeByte.Flush()
}

// rawShortNode represents only the useful data content of a short node, with the
Expand Down Expand Up @@ -193,11 +186,7 @@ func (n *cachedNode) rlp() []byte {
if node, ok := n.node.(rawNode); ok {
return node
}
blob, err := rlp.EncodeToBytes(n.node)
if err != nil {
panic(err)
}
return blob
return nodeToBytes(n.node)
}

// obj returns the decoded and expanded trie node, either directly from the cache,
Expand Down