From 8629c5804915533eca4ba40bec8fa8cfa247e0c6 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 13 Jul 2022 23:23:01 +0800 Subject: [PATCH 01/13] all: rework trie and trie committer --- core/state/database.go | 8 +- core/state/snapshot/generate.go | 5 +- core/state/snapshot/generate_test.go | 31 +++-- core/state/state_object.go | 11 +- core/state/statedb.go | 43 +++--- eth/protocols/snap/sync_test.go | 16 +-- light/postprocess.go | 14 +- light/trie.go | 6 +- tests/fuzzers/stacktrie/trie_fuzzer.go | 5 +- tests/fuzzers/trie/trie-fuzzer.go | 17 ++- trie/committer.go | 174 ++++++++++--------------- trie/database.go | 44 ++++++- trie/iterator.go | 5 +- trie/iterator_test.go | 24 ++-- trie/nodeset.go | 84 ++++++++++++ trie/nodestore.go | 170 ++++++++++++++++++++++++ trie/nodestore_test.go | 115 ++++++++++++++++ trie/proof.go | 13 +- trie/secure_trie.go | 14 +- trie/secure_trie_test.go | 4 +- trie/sync_test.go | 10 +- trie/trie.go | 133 +++++++------------ trie/trie_test.go | 85 +++++++----- trie/util_test.go | 2 +- 24 files changed, 723 insertions(+), 310 deletions(-) create mode 100644 trie/nodeset.go create mode 100644 trie/nodestore.go create mode 100644 trie/nodestore_test.go diff --git a/core/state/database.go b/core/state/database.go index ce5d8d731715f..b8927162cd56c 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -88,9 +88,11 @@ type Trie interface { // can be used even if the trie doesn't have one. Hash() common.Hash - // Commit writes all nodes to the trie's memory database, tracking the internal - // and external (for account tries) references. - Commit(onleaf trie.LeafCallback) (common.Hash, int, error) + // Commit collects all dirty nodes in the trie and replace them with the + // corresponding node hash. All collected nodes(including dirty leaves if + // collectLeaf is true) will be encapsulated into a nodeset for return. + // The returned nodeset can be nil if the trie is clean(nothing to commit). + Commit(collectLeaf bool) (common.Hash, *trie.NodeSet, error) // NodeIterator returns an iterator that returns nodes of the trie. Iteration // starts at the key after the given start key. diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 36055856e1c71..bf714db4c2d01 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -367,7 +367,10 @@ func (dl *diskLayer) generateRange(ctx *generatorContext, owner common.Hash, roo for i, key := range result.keys { snapTrie.Update(key, result.vals[i]) } - root, _, _ := snapTrie.Commit(nil) + root, nodes, _ := snapTrie.Commit(false) + if nodes != nil { + snapTrieDb.Update(trie.NewWithNodeSet(nodes)) + } snapTrieDb.Commit(root, false, nil) } // Construct the trie for state iteration, reuse the trie diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index fe81993e9d2fd..911a211f7ce61 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -143,6 +143,7 @@ type testHelper struct { diskdb ethdb.Database triedb *trie.Database accTrie *trie.SecureTrie + nodes *trie.MergedNodeSet } func newHelper() *testHelper { @@ -153,6 +154,7 @@ func newHelper() *testHelper { diskdb: diskdb, triedb: triedb, accTrie: accTrie, + nodes: trie.NewMergedNodeSet(), } } @@ -184,17 +186,22 @@ func (t *testHelper) makeStorageTrie(stateRoot, owner common.Hash, keys []string for i, k := range keys { stTrie.Update([]byte(k), []byte(vals[i])) } - var root common.Hash if !commit { - root = stTrie.Hash() - } else { - root, _, _ = stTrie.Commit(nil) + return stTrie.Hash().Bytes() + } + root, nodes, _ := stTrie.Commit(false) + if nodes != nil { + t.nodes.Merge(nodes) } return root.Bytes() } func (t *testHelper) Commit() common.Hash { - root, _, _ := t.accTrie.Commit(nil) + root, nodes, _ := t.accTrie.Commit(false) + if nodes != nil { + t.nodes.Merge(nodes) + } + t.triedb.Update(t.nodes) t.triedb.Commit(root, false, nil) return root } @@ -378,7 +385,7 @@ func TestGenerateCorruptAccountTrie(t *testing.T) { helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 - root, _, _ := helper.accTrie.Commit(nil) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + root, _, _ := helper.accTrie.Commit(false) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 // Delete an account trie leaf and ensure the generator chokes helper.triedb.Commit(root, false, nil) @@ -413,7 +420,7 @@ func TestGenerateMissingStorageTrie(t *testing.T) { helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - root, _, _ := helper.accTrie.Commit(nil) + root, _, _ := helper.accTrie.Commit(false) // We can only corrupt the disk database, so flush the tries out helper.triedb.Reference( @@ -458,7 +465,7 @@ func TestGenerateCorruptStorageTrie(t *testing.T) { stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - root, _, _ := helper.accTrie.Commit(nil) + root, _, _ := helper.accTrie.Commit(false) // We can only corrupt the disk database, so flush the tries out helper.triedb.Reference( @@ -825,10 +832,12 @@ func populateDangling(disk ethdb.KeyValueStore) { // This test will populate some dangling storages to see if they can be cleaned up. func TestGenerateCompleteSnapshotWithDanglingStorage(t *testing.T) { var helper = newHelper() - stRoot := helper.makeStorageTrie(common.Hash{}, common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addAccount("acc-2", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -858,10 +867,12 @@ func TestGenerateCompleteSnapshotWithDanglingStorage(t *testing.T) { // This test will populate some dangling storages to see if they can be cleaned up. func TestGenerateBrokenSnapshotWithDanglingStorage(t *testing.T) { var helper = newHelper() - stRoot := helper.makeStorageTrie(common.Hash{}, common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + + helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) populateDangling(helper.diskdb) diff --git a/core/state/state_object.go b/core/state/state_object.go index bc1ca1f40eaf1..a23df895458c1 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" ) var emptyCodeHash = crypto.Keccak256(nil) @@ -375,23 +376,23 @@ func (s *stateObject) updateRoot(db Database) { // CommitTrie the storage trie of the object to db. // This updates the trie root. -func (s *stateObject) CommitTrie(db Database) (int, error) { +func (s *stateObject) CommitTrie(db Database) (*trie.NodeSet, error) { // If nothing changed, don't bother with hashing anything if s.updateTrie(db) == nil { - return 0, nil + return nil, nil } if s.dbErr != nil { - return 0, s.dbErr + return nil, s.dbErr } // Track the amount of time wasted on committing the storage trie if metrics.EnabledExpensive { defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) } - root, committed, err := s.trie.Commit(nil) + root, nodes, err := s.trie.Commit(false) if err == nil { s.data.Root = root } - return committed, err + return nodes, err } // AddBalance adds amount to s's balance. diff --git a/core/state/statedb.go b/core/state/statedb.go index e945ab595013e..9d5e8ef08accd 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -907,7 +907,11 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { s.IntermediateRoot(deleteEmptyObjects) // Commit objects to the trie, measuring the elapsed time - var storageCommitted int + var ( + accounts int + storages int + nodes = trie.NewMergedNodeSet() + ) codeWriter := s.db.TrieDB().DiskDB().NewBatch() for addr := range s.stateObjectsDirty { if obj := s.stateObjects[addr]; !obj.deleted { @@ -917,11 +921,17 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { obj.dirtyCode = false } // Write any storage changes in the state object to its storage trie - committed, err := obj.CommitTrie(s.db) + set, err := obj.CommitTrie(s.db) if err != nil { return common.Hash{}, err } - storageCommitted += committed + // Merge the dirty nodes of storage trie into global set + if set != nil { + if err := nodes.Merge(set); err != nil { + return common.Hash{}, err + } + storages += set.Len() + } } } if len(s.stateObjectsDirty) > 0 { @@ -937,21 +947,17 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if metrics.EnabledExpensive { start = time.Now() } - // The onleaf func is called _serially_, so we can reuse the same account - // for unmarshalling every time. - var account types.StateAccount - root, accountCommitted, err := s.trie.Commit(func(_ [][]byte, _ []byte, leaf []byte, parent common.Hash, _ []byte) error { - if err := rlp.DecodeBytes(leaf, &account); err != nil { - return nil - } - if account.Root != emptyRoot { - s.db.TrieDB().Reference(account.Root, parent) - } - return nil - }) + root, set, err := s.trie.Commit(true) if err != nil { return common.Hash{}, err } + // Merge the dirty nodes of account trie into global set + if set != nil { + if err := nodes.Merge(set); err != nil { + return common.Hash{}, err + } + accounts = set.Len() + } if metrics.EnabledExpensive { s.AccountCommits += time.Since(start) @@ -959,8 +965,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { storageUpdatedMeter.Mark(int64(s.StorageUpdated)) accountDeletedMeter.Mark(int64(s.AccountDeleted)) storageDeletedMeter.Mark(int64(s.StorageDeleted)) - accountCommittedMeter.Mark(int64(accountCommitted)) - storageCommittedMeter.Mark(int64(storageCommitted)) + accountCommittedMeter.Mark(int64(accounts)) + storageCommittedMeter.Mark(int64(storages)) s.AccountUpdated, s.AccountDeleted = 0, 0 s.StorageUpdated, s.StorageDeleted = 0, 0 } @@ -984,6 +990,9 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { } s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil } + if err := s.db.TrieDB().Update(nodes); err != nil { + return common.Hash{}, err + } s.originalRoot = root return root, err } diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 85e4dc5e4f835..0654faf795113 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -1364,7 +1364,7 @@ func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { entries = append(entries, elem) } sort.Sort(entries) - accTrie.Commit(nil) + accTrie.Commit(false) return accTrie, entries } @@ -1420,7 +1420,7 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) + trie.Commit(false) return trie, entries } @@ -1444,7 +1444,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) // Create a storage trie stTrie, stEntries := makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), i, db) stRoot := stTrie.Hash() - stTrie.Commit(nil) + stTrie.Commit(false) value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, Balance: big.NewInt(int64(i)), @@ -1460,7 +1460,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) } sort.Sort(entries) - accTrie.Commit(nil) + accTrie.Commit(false) return accTrie, entries, storageTries, storageEntries } @@ -1491,7 +1491,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie stTrie, stEntries = makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), 0, db) } stRoot := stTrie.Hash() - stTrie.Commit(nil) + stTrie.Commit(false) value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, @@ -1507,7 +1507,7 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie storageEntries[common.BytesToHash(key)] = stEntries } sort.Sort(entries) - accTrie.Commit(nil) + accTrie.Commit(false) return accTrie, entries, storageTries, storageEntries } @@ -1530,7 +1530,7 @@ func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Databas entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) + trie.Commit(false) return trie, entries } @@ -1581,7 +1581,7 @@ func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (*trie entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(nil) + trie.Commit(false) return trie, entries } diff --git a/light/postprocess.go b/light/postprocess.go index c09b00e71c812..1ce781a50f2b1 100644 --- a/light/postprocess.go +++ b/light/postprocess.go @@ -217,10 +217,15 @@ func (c *ChtIndexerBackend) Process(ctx context.Context, header *types.Header) e // Commit implements core.ChainIndexerBackend func (c *ChtIndexerBackend) Commit() error { - root, _, err := c.trie.Commit(nil) + root, nodes, err := c.trie.Commit(false) if err != nil { return err } + if nodes != nil { + if err := c.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + return err + } + } // Pruning historical trie nodes if necessary. if !c.disablePruning { // Flush the triedb and track the latest trie nodes. @@ -453,10 +458,15 @@ func (b *BloomTrieIndexerBackend) Commit() error { b.trie.Delete(encKey[:]) } } - root, _, err := b.trie.Commit(nil) + root, nodes, err := b.trie.Commit(false) if err != nil { return err } + if nodes != nil { + if err := b.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + return err + } + } // Pruning historical trie nodes if necessary. if !b.disablePruning { // Flush the triedb and track the latest trie nodes. diff --git a/light/trie.go b/light/trie.go index 931ba30cb40a2..a2ef8ebff3d32 100644 --- a/light/trie.go +++ b/light/trie.go @@ -137,11 +137,11 @@ func (t *odrTrie) TryDelete(key []byte) error { }) } -func (t *odrTrie) Commit(onleaf trie.LeafCallback) (common.Hash, int, error) { +func (t *odrTrie) Commit(collectLeaf bool) (common.Hash, *trie.NodeSet, error) { if t.trie == nil { - return t.id.Root, 0, nil + return t.id.Root, nil, nil } - return t.trie.Commit(onleaf) + return t.trie.Commit(collectLeaf) } func (t *odrTrie) Hash() common.Hash { diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go index 17d67a8758c2b..e6165df08c156 100644 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -173,10 +173,13 @@ func (f *fuzzer) fuzz() int { return 0 } // Flush trie -> database - rootA, _, err := trieA.Commit(nil) + rootA, nodes, err := trieA.Commit(false) if err != nil { panic(err) } + if nodes != nil { + dbA.Update(trie.NewWithNodeSet(nodes)) + } // Flush memdb -> disk (sponge) dbA.Commit(rootA, false, nil) diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go index ca1509085b128..96674d9a4c4e4 100644 --- a/tests/fuzzers/trie/trie-fuzzer.go +++ b/tests/fuzzers/trie/trie-fuzzer.go @@ -158,14 +158,27 @@ func runRandTest(rt randTest) error { rt[i].err = fmt.Errorf("mismatch for key %#x, got %#x want %#x", step.key, v, want) } case opCommit: - _, _, rt[i].err = tr.Commit(nil) + _, nodes, err := tr.Commit(false) + if err != nil { + rt[i].err = err + } + if nodes != nil { + if err := triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + return err + } + } case opHash: tr.Hash() case opReset: - hash, _, err := tr.Commit(nil) + hash, nodes, err := tr.Commit(false) if err != nil { return err } + if nodes != nil { + if err := triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { + return err + } + } newtr, err := trie.New(common.Hash{}, hash, triedb) if err != nil { return err diff --git a/trie/committer.go b/trie/committer.go index 7a392abab7f4d..efc645a7329fd 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -17,34 +17,25 @@ package trie import ( - "errors" "fmt" "sync" "github.com/ethereum/go-ethereum/common" ) -// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow -// some parallelism but not incur too much memory overhead. -const leafChanSize = 200 - -// leaf represents a trie leaf value +// leaf represents a trie leaf node type leaf struct { - size int // size of the rlp data (estimate) - hash common.Hash // hash of rlp data - node node // the node to commit - path []byte // the path from the root node + blob []byte // raw blob of leaf + parent common.Hash // the hash of parent node } -// committer is a type used for the trie Commit operation. A committer has some -// internal preallocated temp space, and also a callback that is invoked when -// leaves are committed. The leafs are passed through the `leafCh`, to allow -// some level of parallelism. -// By 'some level' of parallelism, it's still the case that all leaves will be -// processed sequentially - onleaf will never be called in parallel or out of order. +// committer is a type used for the trie Commit operation. The committer will +// capture all dirty nodes during the commit process and keep them cached in +// insertion order. type committer struct { - onleaf LeafCallback - leafCh chan *leaf + owner common.Hash + nodes *NodeSet + collectLeaf bool } // committers live in a global sync.Pool @@ -55,34 +46,36 @@ var committerPool = sync.Pool{ } // newCommitter creates a new committer or picks one from the pool. -func newCommitter() *committer { - return committerPool.Get().(*committer) +func newCommitter(owner common.Hash, collectLeaf bool) *committer { + ret := committerPool.Get().(*committer) + ret.owner = owner + ret.nodes = NewNodeSet(owner) + ret.collectLeaf = collectLeaf + return ret } func returnCommitterToPool(h *committer) { - h.onleaf = nil - h.leafCh = nil + h.owner = common.Hash{} + h.nodes = nil + h.collectLeaf = false committerPool.Put(h) } // Commit collapses a node down into a hash node and inserts it into the database -func (c *committer) Commit(n node, db *Database) (hashNode, int, error) { - if db == nil { - return nil, 0, errors.New("no db provided") - } - h, committed, err := c.commit(nil, n, db) +func (c *committer) Commit(n node, db *nodeStore) (hashNode, *NodeSet, error) { + h, err := c.commit(nil, n, db) if err != nil { - return nil, 0, err + return nil, nil, err } - return h.(hashNode), committed, nil + return h.(hashNode), c.nodes, nil } // commit collapses a node down into a hash node and inserts it into the database -func (c *committer) commit(path []byte, n node, db *Database) (node, int, error) { +func (c *committer) commit(path []byte, n node, db *nodeStore) (node, error) { // if this path is clean, use available cached data hash, dirty := n.cache() if hash != nil && !dirty { - return hash, 0, nil + return hash, nil } // Commit children, then parent, and remove the dirty flag. switch cn := n.(type) { @@ -92,36 +85,35 @@ func (c *committer) commit(path []byte, n node, db *Database) (node, int, error) // If the child is fullNode, recursively commit, // otherwise it can only be hashNode or valueNode. - var childCommitted int if _, ok := cn.Val.(*fullNode); ok { - childV, committed, err := c.commit(append(path, cn.Key...), cn.Val, db) + childV, err := c.commit(append(path, cn.Key...), cn.Val, db) if err != nil { - return nil, 0, err + return nil, err } - collapsed.Val, childCommitted = childV, committed + collapsed.Val = childV } // The key needs to be copied, since we're delivering it to database collapsed.Key = hexToCompact(cn.Key) hashedNode := c.store(path, collapsed, db) if hn, ok := hashedNode.(hashNode); ok { - return hn, childCommitted + 1, nil + return hn, nil } - return collapsed, childCommitted, nil + return collapsed, nil case *fullNode: - hashedKids, childCommitted, err := c.commitChildren(path, cn, db) + hashedKids, err := c.commitChildren(path, cn, db) if err != nil { - return nil, 0, err + return nil, err } collapsed := cn.copy() collapsed.Children = hashedKids hashedNode := c.store(path, collapsed, db) if hn, ok := hashedNode.(hashNode); ok { - return hn, childCommitted + 1, nil + return hn, nil } - return collapsed, childCommitted, nil + return collapsed, nil case hashNode: - return cn, 0, nil + return cn, nil default: // nil, valuenode shouldn't be committed panic(fmt.Sprintf("%T: invalid node: %v", n, n)) @@ -129,11 +121,8 @@ func (c *committer) commit(path []byte, n node, db *Database) (node, int, error) } // commitChildren commits the children of the given fullnode -func (c *committer) commitChildren(path []byte, n *fullNode, db *Database) ([17]node, int, error) { - var ( - committed int - children [17]node - ) +func (c *committer) commitChildren(path []byte, n *fullNode, db *nodeStore) ([17]node, error) { + var children [17]node for i := 0; i < 16; i++ { child := n.Children[i] if child == nil { @@ -149,83 +138,62 @@ func (c *committer) commitChildren(path []byte, n *fullNode, db *Database) ([17] // Commit the child recursively and store the "hashed" value. // Note the returned node can be some embedded nodes, so it's // possible the type is not hashNode. - hashed, childCommitted, err := c.commit(append(path, byte(i)), child, db) + hashed, err := c.commit(append(path, byte(i)), child, db) if err != nil { - return children, 0, err + return children, err } children[i] = hashed - committed += childCommitted } // For the 17th child, it's possible the type is valuenode. if n.Children[16] != nil { children[16] = n.Children[16] } - return children, committed, nil + return children, nil } // store hashes the node n and if we have a storage layer specified, it writes // the key/value pair to it and tracks any node->child references as well as any // node->external trie references. -func (c *committer) store(path []byte, n node, db *Database) node { +func (c *committer) store(path []byte, n node, db *nodeStore) node { // Larger nodes are replaced by their hash and stored in the database. - var ( - hash, _ = n.cache() - size int - ) + var hash, _ = n.cache() + + // This was not generated - must be a small node stored in the parent. + // In theory, we should check if the node is leaf here (embedded node + // usually is leaf node). But small value(less than 32bytes) is not + // our target(leaves in account trie only). if hash == nil { - // This was not generated - must be a small node stored in the parent. - // In theory, we should apply the leafCall here if it's not nil(embedded - // node usually contains value). But small value(less than 32bytes) is - // not our target. return n - } else { - // We have the hash already, estimate the RLP encoding-size of the node. - // The size is used for mem tracking, does not need to be exact - size = estimateSize(n) } - // If we're using channel-based leaf-reporting, send to channel. - // The leaf channel will be active only when there an active leaf-callback - if c.leafCh != nil { - c.leafCh <- &leaf{ - size: size, - hash: common.BytesToHash(hash), - node: n, - path: path, + // We have the hash already, estimate the RLP encoding-size of the node. + // The size is used for mem tracking, does not need to be exact + var ( + size = estimateSize(n) + nhash = common.BytesToHash(hash) + mnode = &memoryNode{ + hash: nhash, + node: simplifyNode(n), + size: uint16(size), } - } else if db != nil { - // No leaf-callback used, but there's still a database. Do serial - // insertion - db.insert(common.BytesToHash(hash), size, n) - } - return hash -} - -// commitLoop does the actual insert + leaf callback for nodes. -func (c *committer) commitLoop(db *Database) { - for item := range c.leafCh { - var ( - hash = item.hash - size = item.size - n = item.node - ) - // We are pooling the trie nodes into an intermediate memory cache - db.insert(hash, size, n) - - if c.onleaf != nil { - switch n := n.(type) { - case *shortNode: - if child, ok := n.Val.(valueNode); ok { - c.onleaf(nil, nil, child, hash, nil) - } - case *fullNode: - // For children in range [0, 15], it's impossible - // to contain valueNode. Only check the 17th child. - if n.Children[16] != nil { - c.onleaf(nil, nil, n.Children[16].(valueNode), hash, nil) - } + spath = string(path) + ) + // Insert the dirty nodes into internal store for accessing later. + db.write(spath, mnode) + + // Collect the dirty node to nodeset. + c.nodes.add(spath, mnode) + + // Collect the corresponding leaf node if it's required. We don't check + // full node since it's impossible to store value in fullNode. The key + // length of leaves should be exactly same. + if c.collectLeaf { + if sn, ok := n.(*shortNode); ok { + if val, ok := sn.Val.(valueNode); ok { + c.nodes.addLeaf(&leaf{blob: val, parent: nhash}) } } } + return hash } // estimateSize estimates the size of an rlp-encoded node, without actually diff --git a/trie/database.go b/trie/database.go index 8e1788a21239b..2418ce73ac508 100644 --- a/trie/database.go +++ b/trie/database.go @@ -28,6 +28,7 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" @@ -305,14 +306,10 @@ func (db *Database) DiskDB() ethdb.KeyValueStore { return db.diskdb } -// insert inserts a collapsed trie node into the memory database. -// The blob size must be specified to allow proper size tracking. +// insert inserts a simplified trie node into the memory database. // All nodes inserted by this function will be reference tracked // and in theory should only used for **trie nodes** insertion. func (db *Database) insert(hash common.Hash, size int, node node) { - db.lock.Lock() - defer db.lock.Unlock() - // If the node's already cached, skip if _, ok := db.dirties[hash]; ok { return @@ -321,7 +318,7 @@ func (db *Database) insert(hash common.Hash, size int, node node) { // Create the cached entry for this node entry := &cachedNode{ - node: simplifyNode(node), + node: node, size: uint16(size), flushPrev: db.newest, } @@ -763,6 +760,41 @@ func (c *cleaner) Delete(key []byte) error { panic("not implemented") } +// Update inserts the dirty nodes in provided nodeset into database and +// link the account trie with multiple storage tries if necessary. +func (db *Database) Update(nodes *MergedNodeSet) error { + db.lock.Lock() + defer db.lock.Unlock() + + // Insert dirty nodes into the database. In the same tree, it must be + // ensured that children are inserted first, then parent so that children + // can be linked with their parent correctly. The order of writing between + // different tries(account trie, storage tries) is not required. + for owner, subset := range nodes.nodes { + for _, path := range subset.paths { + n, ok := subset.nodes[path] + if !ok { + return fmt.Errorf("missing node %x %v", owner, path) + } + db.insert(n.hash, int(n.size), n.node) + } + } + // Link up the account trie and storage trie if the node points + // to an account trie leaf. + if set, present := nodes.nodes[common.Hash{}]; present { + for _, n := range set.leafs { + var account types.StateAccount + if err := rlp.DecodeBytes(n.blob, &account); err != nil { + return err + } + if account.Root != emptyRoot { + db.reference(account.Root, n.parent) + } + } + } + return nil +} + // Size returns the current storage size of the memory cache in front of the // persistent database layer. func (db *Database) Size() (common.StorageSize, common.StorageSize) { diff --git a/trie/iterator.go b/trie/iterator.go index e0006ee05e3ba..e0ab350772cd8 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -375,8 +375,7 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { } } } - resolved, err := it.trie.resolveHash(hash, path) - return resolved, err + return it.trie.nodes.readNode(it.trie.owner, common.BytesToHash(hash), path) } func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { @@ -385,7 +384,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) return blob, nil } } - return it.trie.resolveBlob(hash, path) + return it.trie.nodes.readBlob(it.trie.owner, common.BytesToHash(hash), path) } func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { diff --git a/trie/iterator_test.go b/trie/iterator_test.go index e3e6d0e3a8fa9..de90eb9223265 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -59,7 +59,7 @@ func TestIterator(t *testing.T) { all[val.k] = val.v trie.Update([]byte(val.k), []byte(val.v)) } - trie.Commit(nil) + trie.Commit(false) found := make(map[string]string) it := NewIterator(trie.NodeIterator(nil)) @@ -218,13 +218,13 @@ func TestDifferenceIterator(t *testing.T) { for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(nil) + triea.Commit(false) trieb := newEmpty() for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(nil) + trieb.Commit(false) found := make(map[string]string) di, _ := NewDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil)) @@ -254,13 +254,13 @@ func TestUnionIterator(t *testing.T) { for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(nil) + triea.Commit(false) trieb := newEmpty() for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(nil) + trieb.Commit(false) di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(nil), trieb.NodeIterator(nil)}) it := NewIterator(di) @@ -316,7 +316,8 @@ func testIteratorContinueAfterError(t *testing.T, memonly bool) { for _, val := range testdata1 { tr.Update([]byte(val.k), []byte(val.v)) } - tr.Commit(nil) + _, nodes, _ := tr.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) if !memonly { triedb.Commit(tr.Hash(), true, nil) } @@ -407,7 +408,8 @@ func testIteratorContinueAfterSeekError(t *testing.T, memonly bool) { for _, val := range testdata1 { ctr.Update([]byte(val.k), []byte(val.v)) } - root, _, _ := ctr.Commit(nil) + root, nodes, _ := ctr.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) if !memonly { triedb.Commit(root, true, nil) } @@ -525,13 +527,16 @@ func makeLargeTestTrie() (*Database, *SecureTrie, *loggingDb) { val = crypto.Keccak256(val) trie.Update(key, val) } - trie.Commit(nil) + _, nodes, _ := trie.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) // Return the generated trie return triedb, trie, logDb } // Tests that the node iterator indeed walks over the entire database contents. func TestNodeIteratorLargeTrie(t *testing.T) { + t.SkipNow() + // Create some arbitrary test trie to iterate db, trie, logDb := makeLargeTestTrie() db.Cap(0) // flush everything @@ -564,7 +569,8 @@ func TestIteratorNodeBlob(t *testing.T) { all[val.k] = val.v trie.Update([]byte(val.k), []byte(val.v)) } - trie.Commit(nil) + _, nodes, _ := trie.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) triedb.Cap(0) found := make(map[common.Hash][]byte) diff --git a/trie/nodeset.go b/trie/nodeset.go new file mode 100644 index 0000000000000..90735ad28914e --- /dev/null +++ b/trie/nodeset.go @@ -0,0 +1,84 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "fmt" + + "github.com/ethereum/go-ethereum/common" +) + +// NodeSet contains all dirty nodes collected during the commit operation. +// Each node is keyed by path. It's not thread-safe to use. +type NodeSet struct { + owner common.Hash // the identifier of the trie + paths []string // the path of dirty nodes, sort by insertion order + nodes map[string]*memoryNode // the map of dirty nodes, keyed by node path + leafs []*leaf // the list of dirty leafs +} + +// NewNodeSet initializes an empty dirty node set. +func NewNodeSet(owner common.Hash) *NodeSet { + return &NodeSet{ + owner: owner, + nodes: make(map[string]*memoryNode), + } +} + +// add caches node with provided path and node object. +func (set *NodeSet) add(path string, node *memoryNode) { + set.paths = append(set.paths, path) + set.nodes[path] = node +} + +// addLeaf caches the provided leaf node. +func (set *NodeSet) addLeaf(node *leaf) { + set.leafs = append(set.leafs, node) +} + +// Len returns the number of dirty nodes contained in the set. +func (set *NodeSet) Len() int { + return len(set.nodes) +} + +// MergedNodeSet represents a merged dirty node set for a group of tries. +type MergedNodeSet struct { + nodes map[common.Hash]*NodeSet +} + +// NewMergedNodeSet initializes an empty merged set. +func NewMergedNodeSet() *MergedNodeSet { + return &MergedNodeSet{nodes: make(map[common.Hash]*NodeSet)} +} + +// NewWithNodeSet constructs a merged nodeset with the provided single set. +func NewWithNodeSet(set *NodeSet) *MergedNodeSet { + merged := NewMergedNodeSet() + merged.Merge(set) + return merged +} + +// Merge merges the provided dirty nodes of a trie into the set. The assumption +// is held that no duplicated set belonging to the same trie will be merged twice. +func (set *MergedNodeSet) Merge(other *NodeSet) error { + _, present := set.nodes[other.owner] + if present { + return fmt.Errorf("duplicated trie %x", other.owner) + } + set.nodes[other.owner] = other + return nil +} diff --git a/trie/nodestore.go b/trie/nodestore.go new file mode 100644 index 0000000000000..40557d871fa70 --- /dev/null +++ b/trie/nodestore.go @@ -0,0 +1,170 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "errors" + "fmt" + "reflect" + + "github.com/ethereum/go-ethereum/common" +) + +// errUnexpectedNode is returned if the requested node with specified path is +// not hash matched or marked as deleted. +var errUnexpectedNode = errors.New("unexpected node") + +// memoryNode is all the information we know about a single cached trie node +// in the memory. +type memoryNode struct { + hash common.Hash // Node hash, computed by hashing rlp value + size uint16 // Byte size of the useful cached data + node node // Cached collapsed trie node, or raw rlp data +} + +// memoryNodeSize is the raw size of a memoryNode data structure without any +// node data included. It's an approximate size, but should be a lot better +// than not counting them. +var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) + +// rlp returns the raw rlp encoded blob of the cached trie node, either directly +// from the cache, or by regenerating it from the collapsed node. +func (n *memoryNode) rlp() []byte { + if n.node == nil { + return nil + } + if node, ok := n.node.(rawNode); ok { + return node + } + return nodeToBytes(n.node) +} + +// obj returns the decoded and expanded trie node, either directly from the cache, +// or by regenerating it from the rlp encoded blob. +func (n *memoryNode) obj() node { + if n.node == nil { + return nil + } + if node, ok := n.node.(rawNode); ok { + return mustDecodeNode(n.hash[:], node) + } + return expandNode(n.hash[:], n.node) +} + +// memorySize returns the total memory size used by this node. +func (n *memoryNode) memorySize(key int) int { + return int(n.size) + memoryNodeSize + key +} + +// nodeStore is built on the underlying node database with an additional +// node cache. The dirty nodes will be cached here whenever trie commit +// is performed to make them accessible. Nodes are keyed by node path +// which is unique in the trie. +// +// nodeStore is not safe for concurrent use. +type nodeStore struct { + db *Database + nodes map[string]*memoryNode +} + +// readNode retrieves the node in canonical representation. +// Returns an MissingNodeError error if the node is not found. +func (s *nodeStore) readNode(owner common.Hash, hash common.Hash, path []byte) (node, error) { + // Load the node from the local cache first. + mn, ok := s.nodes[string(path)] + if ok { + if mn.hash == hash { + return mn.obj(), nil + } + // Bubble up an error if the trie node is not hash matched. + // It shouldn't happen at all. + return nil, fmt.Errorf("%w %x!=%x(%x %v)", errUnexpectedNode, mn.hash, hash, owner, path) + } + // Load the node from the underlying database then + if s.db == nil { + return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} + } + n := s.db.node(hash) + if n != nil { + return n, nil + } + return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} +} + +// readBlob retrieves the node in rlp-encoded representation. +// Returns an MissingNodeError error if the node is not found. +func (s *nodeStore) readBlob(owner common.Hash, hash common.Hash, path []byte) ([]byte, error) { + // Load the node from the local cache first + mn, ok := s.nodes[string(path)] + if ok { + if mn.hash == hash { + return mn.rlp(), nil + } + // Bubble up an error if the trie node is not hash matched. + // It shouldn't happen at all. + return nil, fmt.Errorf("%w %x!=%x(%x %v)", errUnexpectedNode, mn.hash, hash, owner, path) + } + // Load the node from the underlying database then + if s.db == nil { + return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} + } + blob, err := s.db.Node(hash) + if err == nil { + return blob, nil + } + return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path, err: err} +} + +// write inserts a dirty node into the store. It happens in trie commit procedure. +func (s *nodeStore) write(path string, node *memoryNode) { + s.nodes[path] = node +} + +// copy deep copies the nodeStore and returns an independent handler but with +// same content cached inside. +func (s *nodeStore) copy() *nodeStore { + nodes := make(map[string]*memoryNode) + for k, n := range s.nodes { + nodes[k] = n + } + return &nodeStore{ + db: s.db, // safe to copy directly. + nodes: nodes, + } +} + +// size returns the total memory usage used by caching nodes internally. +func (s *nodeStore) size() common.StorageSize { + var size common.StorageSize + for k, n := range s.nodes { + size += common.StorageSize(n.memorySize(len(k))) + } + return size +} + +// newNodeStore initializes the nodeStore with the given node reader. +func newNodeStore(db *Database) (*nodeStore, error) { + return &nodeStore{ + db: db, + nodes: make(map[string]*memoryNode), + }, nil +} + +// newMemoryStore initializes the pure in-memory store. +func newMemoryStore() *nodeStore { + return &nodeStore{nodes: make(map[string]*memoryNode)} +} diff --git a/trie/nodestore_test.go b/trie/nodestore_test.go new file mode 100644 index 0000000000000..7bf532c242b57 --- /dev/null +++ b/trie/nodestore_test.go @@ -0,0 +1,115 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestNodeStoreCopy(t *testing.T) { + // Insert a batch of entries into trie + triedb := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(triedb) + vals := []struct{ k, v string }{ + {"do", "verb"}, + {"ether", "wookiedoo"}, + {"horse", "stallion"}, + {"shaman", "horse"}, + {"doge", "coin"}, + {"dog", "puppy"}, + {"somethingveryoddindeedthis is", "myothernodedata"}, + } + for _, val := range vals { + trie.Update([]byte(val.k), []byte(val.v)) + } + trie.Commit(false) // all nodes should be committed into store + + seen := make(map[string][]byte) + iter := trie.NodeIterator(nil) + for iter.Next(true) { + if iter.Hash() != (common.Hash{}) { + seen[string(iter.Path())] = common.CopyBytes(iter.NodeBlob()) + } + } + + // Create the node store copy, ensure all nodes can be retrieved back. + store := trie.nodes + storeCopy := store.copy() + + for path, blob := range seen { + blob1, err1 := store.readBlob(common.Hash{}, crypto.Keccak256Hash(blob), []byte(path)) + blob2, err2 := storeCopy.readBlob(common.Hash{}, crypto.Keccak256Hash(blob), []byte(path)) + if err1 != nil || err2 != nil { + t.Fatalf("Failed to read node, %v, %v", err1, err2) + } + if !bytes.Equal(blob1, blob) || !bytes.Equal(blob2, blob) { + t.Fatal("Node is mismatched") + } + } + // Flush items into the origin reader, it shouldn't affect the copy + var ( + node = randomNode() + path = randomHash() + ) + store.write(string(path.Bytes()), node) + blob, err := store.readBlob(common.Hash{}, node.hash, path.Bytes()) + if err != nil { + t.Fatalf("Failed to read blob %v", err) + } + if !bytes.Equal(blob, node.rlp()) { + t.Fatal("Unexpected node") + } + _, err = storeCopy.readBlob(common.Hash{}, node.hash, path.Bytes()) + missing, ok := err.(*MissingNodeError) + if !ok || missing.NodeHash != node.hash { + t.Fatal("didn't hit missing node, got", err) + } + + // Create a new copy, it should retrieve the node correctly + copyTwo := store.copy() + blob, err = copyTwo.readBlob(common.Hash{}, node.hash, path.Bytes()) + if err != nil { + t.Fatalf("Failed to read blob %v", err) + } + if !bytes.Equal(blob, node.rlp()) { + t.Fatal("Unexpected node") + } +} + +// randomHash generates a random blob of data and returns it as a hash. +func randomHash() common.Hash { + var hash common.Hash + if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil { + panic(err) + } + return hash +} + +func randomNode() *memoryNode { + val := randBytes(100) + return &memoryNode{ + hash: crypto.Keccak256Hash(val), + node: rawNode(val), + size: 100, + } +} diff --git a/trie/proof.go b/trie/proof.go index 9bf9107562fa5..d1f16caf429a3 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -35,9 +35,12 @@ import ( // with the node that proves the absence of the key. func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) error { // Collect all nodes on the path to key. + var ( + prefix []byte + nodes []node + tn = t.root + ) key = keybytesToHex(key) - var nodes []node - tn := t.root for len(key) > 0 && tn != nil { switch n := tn.(type) { case *shortNode: @@ -46,16 +49,18 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e tn = nil } else { tn = n.Val + prefix = append(prefix, n.Key...) key = key[len(n.Key):] } nodes = append(nodes, n) case *fullNode: tn = n.Children[key[0]] + prefix = append(prefix, key[0]) key = key[1:] nodes = append(nodes, n) case hashNode: var err error - tn, err = t.resolveHash(n, nil) + tn, err = t.resolveHash(n, prefix) if err != nil { log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) return err @@ -553,7 +558,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key } // Rebuild the trie with the leaf stream, the shape of trie // should be same with the original one. - tr := newWithRootNode(root) + tr := &Trie{root: root, nodes: newMemoryStore()} if empty { tr.root = nil } diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 967194df9628b..0ea5337325362 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -160,12 +160,12 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte { return t.preimages.preimage(common.BytesToHash(shaKey)) } -// Commit writes all nodes and the secure hash pre-images to the trie's database. -// Nodes are stored with their sha3 hash as the key. -// -// Committing flushes nodes from memory. Subsequent Get calls will load nodes -// from the database. -func (t *SecureTrie) Commit(onleaf LeafCallback) (common.Hash, int, error) { +// Commit collects all dirty nodes in the trie and replace them with the +// corresponding node hash. All collected nodes(including dirty leaves if +// collectLeaf is true) will be encapsulated into a nodeset for return. +// The returned nodeset can be nil if the trie is clean(nothing to commit). +// All cached preimages will be also flushed if preimages recording is enabled. +func (t *SecureTrie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { // Write all the pre-images to the actual disk database if len(t.getSecKeyCache()) > 0 { if t.preimages != nil { @@ -178,7 +178,7 @@ func (t *SecureTrie) Commit(onleaf LeafCallback) (common.Hash, int, error) { t.secKeyCache = make(map[string][]byte) } // Commit the trie to its intermediate node database - return t.trie.Commit(onleaf) + return t.trie.Commit(collectLeaf) } // Hash returns the root hash of SecureTrie. It does not write to the diff --git a/trie/secure_trie_test.go b/trie/secure_trie_test.go index beea5845ad0d6..c18d399543498 100644 --- a/trie/secure_trie_test.go +++ b/trie/secure_trie_test.go @@ -57,7 +57,7 @@ func makeTestSecureTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - trie.Commit(nil) + trie.Commit(false) // Return the generated trie return triedb, trie, content @@ -135,7 +135,7 @@ func TestSecureTrieConcurrency(t *testing.T) { tries[index].Update(key, val) } } - tries[index].Commit(nil) + tries[index].Commit(false) }(i) } // Wait for all threads to finish diff --git a/trie/sync_test.go b/trie/sync_test.go index 472c31a63b9bb..afc202ef3b8e3 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "fmt" "testing" "github.com/ethereum/go-ethereum/common" @@ -50,8 +51,13 @@ func makeTestTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - trie.Commit(nil) - + _, nodes, err := trie.Commit(false) + if err != nil { + panic(fmt.Errorf("failed to commit trie %v", err)) + } + if err := triedb.Update(NewWithNodeSet(nodes)); err != nil { + panic(fmt.Errorf("failed to commit db %v", err)) + } // Return the generated trie return triedb, trie, content } diff --git a/trie/trie.go b/trie/trie.go index 1e168402ad952..3189602b23caa 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -21,10 +21,8 @@ import ( "bytes" "errors" "fmt" - "sync" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" @@ -55,23 +53,27 @@ var ( // for extracting the raw states(leaf nodes) with corresponding paths. type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error -// Trie is a Merkle Patricia Trie. -// The zero value is an empty trie with no database. -// Use New to create a trie that sits on top of a database. +// Trie is a Merkle Patricia Trie. Use New to create a trie that sits on +// top of a database. Whenever trie performs a commit operation, the generated +// dirty nodes will be cached in the internal store. It's users' responsibility +// to manage the memory usage and re-create trie if necessary in order to avoid +// out-of-memory issue. // // Trie is not safe for concurrent use. type Trie struct { - db *Database root node owner common.Hash // Keep track of the number leaves which have been inserted since the last // hashing operation. This number will not directly map to the number of - // actually unhashed nodes + // actually unhashed nodes. unhashed int - // tracer is the state diff tracer can be used to track newly added/deleted - // trie node. It will be reset after each commit operation. + // nodes is the place to cache dirty nodes and access trie node from. + nodes *nodeStore + + // tracer is the tool to track the trie changes. + // It will be reset after each commit operation. tracer *tracer } @@ -83,10 +85,10 @@ func (t *Trie) newFlag() nodeFlag { // Copy returns a copy of Trie. func (t *Trie) Copy() *Trie { return &Trie{ - db: t.db, root: t.root, owner: t.owner, unhashed: t.unhashed, + nodes: t.nodes.copy(), tracer: t.tracer.copy(), } } @@ -99,33 +101,13 @@ func (t *Trie) Copy() *Trie { // New will panic if db is nil and returns a MissingNodeError if root does // not exist in the database. Accessing the trie loads nodes from db on demand. func New(owner common.Hash, root common.Hash, db *Database) (*Trie, error) { - return newTrie(owner, root, db) -} - -// NewEmpty is a shortcut to create empty tree. It's mostly used in tests. -func NewEmpty(db *Database) *Trie { - tr, _ := newTrie(common.Hash{}, common.Hash{}, db) - return tr -} - -// newWithRootNode initializes the trie with the given root node. -// It's only used by range prover. -func newWithRootNode(root node) *Trie { - return &Trie{ - root: root, - //tracer: newTracer(), - db: NewDatabase(rawdb.NewMemoryDatabase()), - } -} - -// newTrie is the internal function used to construct the trie with given parameters. -func newTrie(owner common.Hash, root common.Hash, db *Database) (*Trie, error) { - if db == nil { - panic("trie.New called without a database") + store, err := newNodeStore(db) + if err != nil { + return nil, err } trie := &Trie{ - db: db, owner: owner, + nodes: store, //tracer: newTracer(), } if root != (common.Hash{}) && root != emptyRoot { @@ -138,6 +120,12 @@ func newTrie(owner common.Hash, root common.Hash, db *Database) (*Trie, error) { return trie, nil } +// NewEmpty is a shortcut to create empty tree. It's mostly used in tests. +func NewEmpty(db *Database) *Trie { + tr, _ := New(common.Hash{}, common.Hash{}, db) + return tr +} + // NodeIterator returns an iterator that returns nodes of the trie. Iteration starts at // the key after the given start key. func (t *Trie) NodeIterator(start []byte) NodeIterator { @@ -236,7 +224,7 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new if hash == nil { return nil, origNode, 0, errors.New("non-consensus node") } - blob, err := t.db.Node(common.BytesToHash(hash)) + blob, err := t.nodes.readBlob(t.owner, common.BytesToHash(hash), path) return blob, origNode, 1, err } // Path still needs to be traversed, descend into children @@ -512,7 +500,7 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { // shortNode{..., shortNode{...}}. Since the entry // might not be loaded yet, resolve it just for this // check. - cnode, err := t.resolve(n.Children[pos], prefix) + cnode, err := t.resolve(n.Children[pos], append(prefix, byte(pos))) if err != nil { return false, nil, err } @@ -572,21 +560,10 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) { return n, nil } +// resolveHash loads node from the underlying store with the given +// node hash and path prefix. func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) { - hash := common.BytesToHash(n) - if node := t.db.node(hash); node != nil { - return node, nil - } - return nil, &MissingNodeError{Owner: t.owner, NodeHash: hash, Path: prefix} -} - -func (t *Trie) resolveBlob(n hashNode, prefix []byte) ([]byte, error) { - hash := common.BytesToHash(n) - blob, _ := t.db.Node(hash) - if len(blob) != 0 { - return blob, nil - } - return nil, &MissingNodeError{Owner: t.owner, NodeHash: hash, Path: prefix} + return t.nodes.readNode(t.owner, common.BytesToHash(n), prefix) } // Hash returns the root hash of the trie. It does not write to the @@ -597,56 +574,39 @@ func (t *Trie) Hash() common.Hash { return common.BytesToHash(hash.(hashNode)) } -// Commit writes all nodes to the trie's memory database, tracking the internal -// and external (for account tries) references. -func (t *Trie) Commit(onleaf LeafCallback) (common.Hash, int, error) { - if t.db == nil { - panic("commit called on trie with nil database") - } +// Commit collects all dirty nodes in the trie and replace them with the +// corresponding node hash. All collected nodes(including dirty leaves if +// collectLeaf is true) will be encapsulated into a nodeset for return. +// The returned nodeset can be nil if the trie is clean(nothing to commit). +// Note that all dirty nodes will also be cached in the nodestore inside +// the trie to ensure these nodes can still be accessed after the commit. +func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { defer t.tracer.reset() if t.root == nil { - return emptyRoot, 0, nil + return emptyRoot, nil, nil } // Derive the hash for all dirty nodes first. We hold the assumption // in the following procedure that all nodes are hashed. rootHash := t.Hash() - h := newCommitter() + + h := newCommitter(t.owner, collectLeaf) defer returnCommitterToPool(h) - // Do a quick check if we really need to commit, before we spin - // up goroutines. This can happen e.g. if we load a trie for reading storage - // values, but don't write to it. + // Do a quick check if we really need to commit. This can happen e.g. + // if we load a trie for reading storage values, but don't write to it. if hashedNode, dirty := t.root.cache(); !dirty { // Replace the root node with the origin hash in order to // ensure all resolved nodes are dropped after the commit. t.root = hashedNode - return rootHash, 0, nil - } - var wg sync.WaitGroup - if onleaf != nil { - h.onleaf = onleaf - h.leafCh = make(chan *leaf, leafChanSize) - wg.Add(1) - go func() { - defer wg.Done() - h.commitLoop(t.db) - }() - } - newRoot, committed, err := h.Commit(t.root, t.db) - if onleaf != nil { - // The leafch is created in newCommitter if there was an onleaf callback - // provided. The commitLoop only _reads_ from it, and the commit - // operation was the sole writer. Therefore, it's safe to close this - // channel here. - close(h.leafCh) - wg.Wait() + return rootHash, nil, nil } + newRoot, nodes, err := h.Commit(t.root, t.nodes) if err != nil { - return common.Hash{}, 0, err + return common.Hash{}, nil, err } t.root = newRoot - return rootHash, committed, nil + return rootHash, nodes, nil } // hashRoot calculates the root hash of the given trie @@ -668,9 +628,10 @@ func (t *Trie) Reset() { t.owner = common.Hash{} t.unhashed = 0 t.tracer.reset() + t.nodes = nil } -// Owner returns the associated trie owner. -func (t *Trie) Owner() common.Hash { - return t.owner +// Size returns the total memory usage used by caching nodes internally. +func (t *Trie) Size() common.StorageSize { + return t.nodes.size() } diff --git a/trie/trie_test.go b/trie/trie_test.go index 135e94e3d0a36..1959cd78cc4ae 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -91,7 +91,8 @@ func testMissingNode(t *testing.T, memonly bool) { trie := NewEmpty(triedb) updateString(trie, "120000", "qwerqwerqwerqwerqwerqwerqwerqwer") updateString(trie, "123456", "asdfasdfasdfasdfasdfasdfasdfasdf") - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) if !memonly { triedb.Commit(root, true, nil) } @@ -173,7 +174,7 @@ func TestInsert(t *testing.T) { updateString(trie, "A", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") exp = common.HexToHash("d23786fb4a010da3ce639d66d5e904a11dbc02746d1ce25029e53290cabf28ab") - root, _, err := trie.Commit(nil) + root, _, err := trie.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } @@ -202,7 +203,7 @@ func TestGet(t *testing.T) { if i == 1 { return } - trie.Commit(nil) + trie.Commit(false) } } @@ -258,7 +259,8 @@ func TestEmptyValues(t *testing.T) { } func TestReplication(t *testing.T) { - trie := newEmpty() + triedb := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(triedb) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -271,13 +273,14 @@ func TestReplication(t *testing.T) { for _, val := range vals { updateString(trie, val.k, val.v) } - exp, _, err := trie.Commit(nil) + exp, nodes, err := trie.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } + triedb.Update(NewWithNodeSet(nodes)) // create a new trie on top of the database and check that lookups work. - trie2, err := New(common.Hash{}, exp, trie.db) + trie2, err := New(common.Hash{}, exp, triedb) if err != nil { t.Fatalf("can't recreate trie at %x: %v", exp, err) } @@ -286,7 +289,7 @@ func TestReplication(t *testing.T) { t.Errorf("trie2 doesn't have %q => %q", kv.k, kv.v) } } - hash, _, err := trie2.Commit(nil) + hash, _, err := trie2.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } @@ -434,16 +437,26 @@ func runRandTest(rt randTest) bool { rt[i].err = fmt.Errorf("mismatch for key %#x, got %#x want %#x", step.key, v, want) } case opCommit: - _, _, rt[i].err = tr.Commit(nil) + _, nodes, err := tr.Commit(false) + if err != nil { + rt[i].err = err + return false + } + if nodes != nil { + triedb.Update(NewWithNodeSet(nodes)) + } origTrie = tr.Copy() case opHash: tr.Hash() case opReset: - hash, _, err := tr.Commit(nil) + hash, nodes, err := tr.Commit(false) if err != nil { rt[i].err = err return false } + if nodes != nil { + triedb.Update(NewWithNodeSet(nodes)) + } newtr, err := New(common.Hash{}, hash, triedb) if err != nil { rt[i].err = err @@ -541,10 +554,11 @@ func BenchmarkUpdateLE(b *testing.B) { benchUpdate(b, binary.LittleEndian) } const benchElemCount = 20000 func benchGet(b *testing.B, commit bool) { - trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) + triedb := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(triedb) if commit { - tmpdb := tempDB(b) - trie = NewEmpty(tmpdb) + triedb = tempDB(b) + trie = NewEmpty(triedb) } k := make([]byte, 32) for i := 0; i < benchElemCount; i++ { @@ -553,7 +567,7 @@ func benchGet(b *testing.B, commit bool) { } binary.LittleEndian.PutUint64(k, benchElemCount/2) if commit { - trie.Commit(nil) + trie.Commit(false) } b.ResetTimer() @@ -563,7 +577,7 @@ func benchGet(b *testing.B, commit bool) { b.StopTimer() if commit { - ldb := trie.db.diskdb.(*leveldb.Database) + ldb := triedb.diskdb.(*leveldb.Database) ldb.Close() os.RemoveAll(ldb.Path()) } @@ -621,19 +635,14 @@ func BenchmarkHash(b *testing.B) { // insert into the trie before measuring the hashing. func BenchmarkCommitAfterHash(b *testing.B) { b.Run("no-onleaf", func(b *testing.B) { - benchmarkCommitAfterHash(b, nil) + benchmarkCommitAfterHash(b, false) }) - var a types.StateAccount - onleaf := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash, parentPath []byte) error { - rlp.DecodeBytes(leaf, &a) - return nil - } b.Run("with-onleaf", func(b *testing.B) { - benchmarkCommitAfterHash(b, onleaf) + benchmarkCommitAfterHash(b, true) }) } -func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) { +func benchmarkCommitAfterHash(b *testing.B, collectLeaf bool) { // Make the random benchmark deterministic addresses, accounts := makeAccounts(b.N) trie := newEmpty() @@ -644,7 +653,7 @@ func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) { trie.Hash() b.ResetTimer() b.ReportAllocs() - trie.Commit(onleaf) + trie.Commit(collectLeaf) } func TestTinyTrie(t *testing.T) { @@ -663,7 +672,7 @@ func TestTinyTrie(t *testing.T) { if exp, root := common.HexToHash("0608c1d1dc3905fa22204c7a0e43644831c3b6d3def0f274be623a948197e64a"), trie.Hash(); exp != root { t.Errorf("3: got %x, exp %x", root, exp) } - checktr := NewEmpty(trie.db) + checktr := newEmpty() it := NewIterator(trie.NodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) @@ -682,13 +691,13 @@ func TestCommitAfterHash(t *testing.T) { } // Insert the accounts into the trie and hash it trie.Hash() - trie.Commit(nil) + trie.Commit(false) root := trie.Hash() exp := common.HexToHash("72f9d3f3fe1e1dd7b8936442e7642aef76371472d94319900790053c493f3fe6") if exp != root { t.Errorf("got %x, exp %x", root, exp) } - root, _, _ = trie.Commit(nil) + root, _, _ = trie.Commit(false) if exp != root { t.Errorf("got %x, exp %x", root, exp) } @@ -797,7 +806,8 @@ func TestCommitSequence(t *testing.T) { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(NewWithNodeSet(nodes)) // Flush memdb -> disk (sponge) db.Commit(root, false, func(c common.Hash) { // And spongify the callback-order @@ -849,7 +859,8 @@ func TestCommitSequenceRandomBlobs(t *testing.T) { trie.Update(key, val) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) + db.Update(NewWithNodeSet(nodes)) // Flush memdb -> disk (sponge) db.Commit(root, false, func(c common.Hash) { // And spongify the callback-order @@ -875,7 +886,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} stTrie := NewStackTrie(stackTrieSponge) // Fill the trie with elements - for i := 1; i < count; i++ { + for i := 0; i < count; i++ { // For the stack trie, we need to do inserts in proper order key := make([]byte, 32) binary.BigEndian.PutUint64(key, uint64(i)) @@ -891,8 +902,9 @@ func TestCommitSequenceStackTrie(t *testing.T) { stTrie.TryUpdate(key, val) } // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) // Flush memdb -> disk (sponge) + db.Update(NewWithNodeSet(nodes)) db.Commit(root, false, nil) // And flush stacktrie -> disk stRoot, err := stTrie.Commit() @@ -936,8 +948,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) { trie.TryUpdate(key, []byte{0x1}) stTrie.TryUpdate(key, []byte{0x1}) // Flush trie -> database - root, _, _ := trie.Commit(nil) + root, nodes, _ := trie.Commit(false) // Flush memdb -> disk (sponge) + db.Update(NewWithNodeSet(nodes)) db.Commit(root, false, nil) // And flush stacktrie -> disk stRoot, err := stTrie.Commit() @@ -1057,7 +1070,7 @@ func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accou // Insert the accounts into the trie and hash it trie.Hash() b.StartTimer() - trie.Commit(nil) + trie.Commit(false) b.StopTimer() } @@ -1102,14 +1115,16 @@ func BenchmarkDerefRootFixedSize(b *testing.B) { func benchmarkDerefRootFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) { b.ReportAllocs() - trie := newEmpty() + triedb := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(triedb) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } h := trie.Hash() - trie.Commit(nil) + _, nodes, _ := trie.Commit(false) + triedb.Update(NewWithNodeSet(nodes)) b.StartTimer() - trie.db.Dereference(h) + triedb.Dereference(h) b.StopTimer() } diff --git a/trie/util_test.go b/trie/util_test.go index 589eca62423ac..cefc4ac767d52 100644 --- a/trie/util_test.go +++ b/trie/util_test.go @@ -66,7 +66,7 @@ func TestTrieTracer(t *testing.T) { } // Commit the changes - trie.Commit(nil) + trie.Commit(false) // Delete all the elements, check deletion set for _, val := range vals { From 6d3de3921187926c594926bedf914805d1b718c3 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 16:10:14 +0800 Subject: [PATCH 02/13] all: get rid of internal cache in trie --- core/blockchain.go | 6 +- core/state/statedb.go | 4 +- eth/protocols/snap/sync_test.go | 108 ++++++++++++++------ trie/committer.go | 28 +++--- trie/iterator.go | 4 +- trie/iterator_test.go | 44 ++++++--- trie/nodeset.go | 43 ++++++++ trie/nodestore.go | 170 -------------------------------- trie/nodestore_test.go | 115 --------------------- trie/proof.go | 3 +- trie/secure_trie_test.go | 13 ++- trie/sync_test.go | 5 +- trie/trie.go | 49 +++++---- trie/trie_test.go | 85 +++++----------- trie/util_test.go | 11 ++- 15 files changed, 245 insertions(+), 443 deletions(-) delete mode 100644 trie/nodestore.go delete mode 100644 trie/nodestore_test.go diff --git a/core/blockchain.go b/core/blockchain.go index 506034b539a76..3638a1dcea161 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -1244,7 +1244,7 @@ func (bc *BlockChain) writeKnownBlock(block *types.Block) error { // writeBlockWithState writes block, metadata and corresponding state data to the // database. -func (bc *BlockChain) writeBlockWithState(block *types.Block, receipts []*types.Receipt, logs []*types.Log, state *state.StateDB) error { +func (bc *BlockChain) writeBlockWithState(block *types.Block, receipts []*types.Receipt, state *state.StateDB) error { // Calculate the total difficulty of the block ptd := bc.GetTd(block.ParentHash(), block.NumberU64()-1) if ptd == nil { @@ -1339,7 +1339,7 @@ func (bc *BlockChain) WriteBlockAndSetHead(block *types.Block, receipts []*types // writeBlockAndSetHead is the internal implementation of WriteBlockAndSetHead. // This function expects the chain mutex to be held. func (bc *BlockChain) writeBlockAndSetHead(block *types.Block, receipts []*types.Receipt, logs []*types.Log, state *state.StateDB, emitHeadEvent bool) (status WriteStatus, err error) { - if err := bc.writeBlockWithState(block, receipts, logs, state); err != nil { + if err := bc.writeBlockWithState(block, receipts, state); err != nil { return NonStatTy, err } currentBlock := bc.CurrentBlock() @@ -1703,7 +1703,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals, setHead bool) var status WriteStatus if !setHead { // Don't set the head, only insert the block - err = bc.writeBlockWithState(block, receipts, logs, statedb) + err = bc.writeBlockWithState(block, receipts, statedb) } else { status, err = bc.writeBlockAndSetHead(block, receipts, logs, statedb, false) } diff --git a/core/state/statedb.go b/core/state/statedb.go index 9d5e8ef08accd..3663a7cbf01b8 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -774,7 +774,7 @@ func (s *StateDB) GetRefund() uint64 { return s.refund } -// Finalise finalises the state by removing the s destructed objects and clears +// Finalise finalises the state by removing the destructed objects and clears // the journal as well as the refunds. Finalise, however, will not push any updates // into the tries just yet. Only IntermediateRoot or Commit will do that. func (s *StateDB) Finalise(deleteEmptyObjects bool) { @@ -844,7 +844,7 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { // Although naively it makes sense to retrieve the account trie and then do // the contract storage and account updates sequentially, that short circuits // the account prefetcher. Instead, let's process all the storage updates - // first, giving the account prefeches just a few more milliseconds of time + // first, giving the account prefetches just a few more milliseconds of time // to pull useful data from disk. for addr := range s.stateObjectsPending { if obj := s.stateObjects[addr]; !obj.deleted { diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 0654faf795113..a13e8d3089666 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -1348,9 +1348,11 @@ func getCodeByHash(hash common.Hash) []byte { // makeAccountTrieNoStorage spits out a trie, along with the leafs func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { - db := trie.NewDatabase(rawdb.NewMemoryDatabase()) - accTrie := trie.NewEmpty(db) - var entries entrySlice + var ( + db = trie.NewDatabase(rawdb.NewMemoryDatabase()) + accTrie = trie.NewEmpty(db) + entries entrySlice + ) for i := uint64(1); i <= uint64(n); i++ { value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, @@ -1364,7 +1366,13 @@ func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { entries = append(entries, elem) } sort.Sort(entries) - accTrie.Commit(false) + + // Commit the state changes into db and re-create the trie + // for accessing later. + root, nodes, _ := accTrie.Commit(false) + db.Update(trie.NewWithNodeSet(nodes)) + + accTrie, _ = trie.New(common.Hash{}, root, db) return accTrie, entries } @@ -1376,8 +1384,8 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { entries entrySlice boundaries []common.Hash - db = trie.NewDatabase(rawdb.NewMemoryDatabase()) - trie = trie.NewEmpty(db) + db = trie.NewDatabase(rawdb.NewMemoryDatabase()) + accTrie = trie.NewEmpty(db) ) // Initialize boundaries var next common.Hash @@ -1404,7 +1412,7 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { CodeHash: getCodeHash(uint64(i)), }) elem := &kv{boundaries[i].Bytes(), value} - trie.Update(elem.k, elem.v) + accTrie.Update(elem.k, elem.v) entries = append(entries, elem) } // Fill other accounts if required @@ -1416,12 +1424,18 @@ func makeBoundaryAccountTrie(n int) (*trie.Trie, entrySlice) { CodeHash: getCodeHash(i), }) elem := &kv{key32(i), value} - trie.Update(elem.k, elem.v) + accTrie.Update(elem.k, elem.v) entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(false) - return trie, entries + + // Commit the state changes into db and re-create the trie + // for accessing later. + root, nodes, _ := accTrie.Commit(false) + db.Update(trie.NewWithNodeSet(nodes)) + + accTrie, _ = trie.New(common.Hash{}, root, db) + return accTrie, entries } // makeAccountTrieWithStorageWithUniqueStorage creates an account trie where each accounts @@ -1431,8 +1445,10 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) db = trie.NewDatabase(rawdb.NewMemoryDatabase()) accTrie = trie.NewEmpty(db) entries entrySlice + storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) + nodes = trie.NewMergedNodeSet() ) // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { @@ -1442,9 +1458,9 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) codehash = getCodeHash(i) } // Create a storage trie - stTrie, stEntries := makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), i, db) - stRoot := stTrie.Hash() - stTrie.Commit(false) + stRoot, stNodes, stEntries := makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), i, db) + nodes.Merge(stNodes) + value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, Balance: big.NewInt(int64(i)), @@ -1455,12 +1471,25 @@ func makeAccountTrieWithStorageWithUniqueStorage(accounts, slots int, code bool) accTrie.Update(elem.k, elem.v) entries = append(entries, elem) - storageTries[common.BytesToHash(key)] = stTrie + storageRoots[common.BytesToHash(key)] = stRoot storageEntries[common.BytesToHash(key)] = stEntries } sort.Sort(entries) - accTrie.Commit(false) + // Commit account trie + root, set, _ := accTrie.Commit(true) + nodes.Merge(set) + + // Commit gathered dirty nodes into database + db.Update(nodes) + + // Re-create tries with new root + accTrie, _ = trie.New(common.Hash{}, root, db) + for i := uint64(1); i <= uint64(accounts); i++ { + key := key32(i) + trie, _ := trie.New(common.BytesToHash(key), storageRoots[common.BytesToHash(key)], db) + storageTries[common.BytesToHash(key)] = trie + } return accTrie, entries, storageTries, storageEntries } @@ -1470,8 +1499,10 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie db = trie.NewDatabase(rawdb.NewMemoryDatabase()) accTrie = trie.NewEmpty(db) entries entrySlice + storageRoots = make(map[common.Hash]common.Hash) storageTries = make(map[common.Hash]*trie.Trie) storageEntries = make(map[common.Hash]entrySlice) + nodes = trie.NewMergedNodeSet() ) // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { @@ -1482,16 +1513,16 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie } // Make a storage trie var ( - stTrie *trie.Trie + stRoot common.Hash + stNodes *trie.NodeSet stEntries entrySlice ) if boundary { - stTrie, stEntries = makeBoundaryStorageTrie(common.BytesToHash(key), slots, db) + stRoot, stNodes, stEntries = makeBoundaryStorageTrie(common.BytesToHash(key), slots, db) } else { - stTrie, stEntries = makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), 0, db) + stRoot, stNodes, stEntries = makeStorageTrieWithSeed(common.BytesToHash(key), uint64(slots), 0, db) } - stRoot := stTrie.Hash() - stTrie.Commit(false) + nodes.Merge(stNodes) value, _ := rlp.EncodeToBytes(&types.StateAccount{ Nonce: i, @@ -1502,19 +1533,40 @@ func makeAccountTrieWithStorage(accounts, slots int, code, boundary bool) (*trie elem := &kv{key, value} accTrie.Update(elem.k, elem.v) entries = append(entries, elem) + // we reuse the same one for all accounts - storageTries[common.BytesToHash(key)] = stTrie + storageRoots[common.BytesToHash(key)] = stRoot storageEntries[common.BytesToHash(key)] = stEntries } sort.Sort(entries) - accTrie.Commit(false) + + // Commit account trie + root, set, _ := accTrie.Commit(true) + nodes.Merge(set) + + // Commit gathered dirty nodes into database + db.Update(nodes) + + // Re-create tries with new root + accTrie, err := trie.New(common.Hash{}, root, db) + if err != nil { + panic(err) + } + for i := uint64(1); i <= uint64(accounts); i++ { + key := key32(i) + trie, err := trie.New(common.BytesToHash(key), storageRoots[common.BytesToHash(key)], db) + if err != nil { + panic(err) + } + storageTries[common.BytesToHash(key)] = trie + } return accTrie, entries, storageTries, storageEntries } // makeStorageTrieWithSeed fills a storage trie with n items, returning the // not-yet-committed trie and the sorted entries. The seeds can be used to ensure // that tries are unique. -func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Database) (*trie.Trie, entrySlice) { +func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Database) (common.Hash, *trie.NodeSet, entrySlice) { trie, _ := trie.New(owner, common.Hash{}, db) var entries entrySlice for i := uint64(1); i <= n; i++ { @@ -1530,14 +1582,14 @@ func makeStorageTrieWithSeed(owner common.Hash, n, seed uint64, db *trie.Databas entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(false) - return trie, entries + root, nodes, _ := trie.Commit(false) + return root, nodes, entries } // makeBoundaryStorageTrie constructs a storage trie. Instead of filling // storage slots normally, this function will fill a few slots which have // boundary hash. -func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (*trie.Trie, entrySlice) { +func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (common.Hash, *trie.NodeSet, entrySlice) { var ( entries entrySlice boundaries []common.Hash @@ -1581,8 +1633,8 @@ func makeBoundaryStorageTrie(owner common.Hash, n int, db *trie.Database) (*trie entries = append(entries, elem) } sort.Sort(entries) - trie.Commit(false) - return trie, entries + root, nodes, _ := trie.Commit(false) + return root, nodes, entries } func verifyTrie(db ethdb.KeyValueStore, root common.Hash, t *testing.T) { diff --git a/trie/committer.go b/trie/committer.go index efc645a7329fd..f36604e281e20 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -29,11 +29,10 @@ type leaf struct { parent common.Hash // the hash of parent node } -// committer is a type used for the trie Commit operation. The committer will +// committer is the tool used for the trie Commit operation. The committer will // capture all dirty nodes during the commit process and keep them cached in // insertion order. type committer struct { - owner common.Hash nodes *NodeSet collectLeaf bool } @@ -48,22 +47,20 @@ var committerPool = sync.Pool{ // newCommitter creates a new committer or picks one from the pool. func newCommitter(owner common.Hash, collectLeaf bool) *committer { ret := committerPool.Get().(*committer) - ret.owner = owner ret.nodes = NewNodeSet(owner) ret.collectLeaf = collectLeaf return ret } func returnCommitterToPool(h *committer) { - h.owner = common.Hash{} h.nodes = nil h.collectLeaf = false committerPool.Put(h) } // Commit collapses a node down into a hash node and inserts it into the database -func (c *committer) Commit(n node, db *nodeStore) (hashNode, *NodeSet, error) { - h, err := c.commit(nil, n, db) +func (c *committer) Commit(n node) (hashNode, *NodeSet, error) { + h, err := c.commit(nil, n) if err != nil { return nil, nil, err } @@ -71,7 +68,7 @@ func (c *committer) Commit(n node, db *nodeStore) (hashNode, *NodeSet, error) { } // commit collapses a node down into a hash node and inserts it into the database -func (c *committer) commit(path []byte, n node, db *nodeStore) (node, error) { +func (c *committer) commit(path []byte, n node) (node, error) { // if this path is clean, use available cached data hash, dirty := n.cache() if hash != nil && !dirty { @@ -86,7 +83,7 @@ func (c *committer) commit(path []byte, n node, db *nodeStore) (node, error) { // If the child is fullNode, recursively commit, // otherwise it can only be hashNode or valueNode. if _, ok := cn.Val.(*fullNode); ok { - childV, err := c.commit(append(path, cn.Key...), cn.Val, db) + childV, err := c.commit(append(path, cn.Key...), cn.Val) if err != nil { return nil, err } @@ -94,20 +91,20 @@ func (c *committer) commit(path []byte, n node, db *nodeStore) (node, error) { } // The key needs to be copied, since we're delivering it to database collapsed.Key = hexToCompact(cn.Key) - hashedNode := c.store(path, collapsed, db) + hashedNode := c.store(path, collapsed) if hn, ok := hashedNode.(hashNode); ok { return hn, nil } return collapsed, nil case *fullNode: - hashedKids, err := c.commitChildren(path, cn, db) + hashedKids, err := c.commitChildren(path, cn) if err != nil { return nil, err } collapsed := cn.copy() collapsed.Children = hashedKids - hashedNode := c.store(path, collapsed, db) + hashedNode := c.store(path, collapsed) if hn, ok := hashedNode.(hashNode); ok { return hn, nil } @@ -121,7 +118,7 @@ func (c *committer) commit(path []byte, n node, db *nodeStore) (node, error) { } // commitChildren commits the children of the given fullnode -func (c *committer) commitChildren(path []byte, n *fullNode, db *nodeStore) ([17]node, error) { +func (c *committer) commitChildren(path []byte, n *fullNode) ([17]node, error) { var children [17]node for i := 0; i < 16; i++ { child := n.Children[i] @@ -138,7 +135,7 @@ func (c *committer) commitChildren(path []byte, n *fullNode, db *nodeStore) ([17 // Commit the child recursively and store the "hashed" value. // Note the returned node can be some embedded nodes, so it's // possible the type is not hashNode. - hashed, err := c.commit(append(path, byte(i)), child, db) + hashed, err := c.commit(append(path, byte(i)), child) if err != nil { return children, err } @@ -154,7 +151,7 @@ func (c *committer) commitChildren(path []byte, n *fullNode, db *nodeStore) ([17 // store hashes the node n and if we have a storage layer specified, it writes // the key/value pair to it and tracks any node->child references as well as any // node->external trie references. -func (c *committer) store(path []byte, n node, db *nodeStore) node { +func (c *committer) store(path []byte, n node) node { // Larger nodes are replaced by their hash and stored in the database. var hash, _ = n.cache() @@ -177,9 +174,6 @@ func (c *committer) store(path []byte, n node, db *nodeStore) node { } spath = string(path) ) - // Insert the dirty nodes into internal store for accessing later. - db.write(spath, mnode) - // Collect the dirty node to nodeset. c.nodes.add(spath, mnode) diff --git a/trie/iterator.go b/trie/iterator.go index e0ab350772cd8..1e76625c6213a 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -375,7 +375,7 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { } } } - return it.trie.nodes.readNode(it.trie.owner, common.BytesToHash(hash), path) + return it.trie.resolveHash(hash, path) } func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { @@ -384,7 +384,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) return blob, nil } } - return it.trie.nodes.readBlob(it.trie.owner, common.BytesToHash(hash), path) + return it.trie.resolveBlob(hash, path) } func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { diff --git a/trie/iterator_test.go b/trie/iterator_test.go index de90eb9223265..a6f025def99bc 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -31,7 +31,7 @@ import ( ) func TestEmptyIterator(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) iter := trie.NodeIterator(nil) seen := make(map[string]struct{}) @@ -44,7 +44,8 @@ func TestEmptyIterator(t *testing.T) { } func TestIterator(t *testing.T) { - trie := newEmpty() + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -59,8 +60,13 @@ func TestIterator(t *testing.T) { all[val.k] = val.v trie.Update([]byte(val.k), []byte(val.v)) } - trie.Commit(false) + root, nodes, err := trie.Commit(false) + if err != nil { + t.Fatalf("Failed to commit trie %v", err) + } + db.Update(NewWithNodeSet(nodes)) + trie, _ = New(common.Hash{}, root, db) found := make(map[string]string) it := NewIterator(trie.NodeIterator(nil)) for it.Next() { @@ -80,7 +86,7 @@ type kv struct { } func TestIteratorLargeData(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) vals := make(map[string]*kv) for i := byte(0); i < 255; i++ { @@ -173,7 +179,7 @@ var testdata2 = []kvs{ } func TestIteratorSeek(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for _, val := range testdata1 { trie.Update([]byte(val.k), []byte(val.v)) } @@ -214,17 +220,23 @@ func checkIteratorOrder(want []kvs, it *Iterator) error { } func TestDifferenceIterator(t *testing.T) { - triea := newEmpty() + dba := NewDatabase(rawdb.NewMemoryDatabase()) + triea := NewEmpty(dba) for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(false) + rootA, nodesA, _ := triea.Commit(false) + dba.Update(NewWithNodeSet(nodesA)) + triea, _ = New(common.Hash{}, rootA, dba) - trieb := newEmpty() + dbb := NewDatabase(rawdb.NewMemoryDatabase()) + trieb := NewEmpty(dbb) for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(false) + rootB, nodesB, _ := trieb.Commit(false) + dbb.Update(NewWithNodeSet(nodesB)) + trieb, _ = New(common.Hash{}, rootB, dbb) found := make(map[string]string) di, _ := NewDifferenceIterator(triea.NodeIterator(nil), trieb.NodeIterator(nil)) @@ -250,17 +262,23 @@ func TestDifferenceIterator(t *testing.T) { } func TestUnionIterator(t *testing.T) { - triea := newEmpty() + dba := NewDatabase(rawdb.NewMemoryDatabase()) + triea := NewEmpty(dba) for _, val := range testdata1 { triea.Update([]byte(val.k), []byte(val.v)) } - triea.Commit(false) + rootA, nodesA, _ := triea.Commit(false) + dba.Update(NewWithNodeSet(nodesA)) + triea, _ = New(common.Hash{}, rootA, dba) - trieb := newEmpty() + dbb := NewDatabase(rawdb.NewMemoryDatabase()) + trieb := NewEmpty(dbb) for _, val := range testdata2 { trieb.Update([]byte(val.k), []byte(val.v)) } - trieb.Commit(false) + rootB, nodesB, _ := trieb.Commit(false) + dbb.Update(NewWithNodeSet(nodesB)) + trieb, _ = New(common.Hash{}, rootB, dbb) di, _ := NewUnionIterator([]NodeIterator{triea.NodeIterator(nil), trieb.NodeIterator(nil)}) it := NewIterator(di) diff --git a/trie/nodeset.go b/trie/nodeset.go index 90735ad28914e..341b23bc7553e 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -18,10 +18,53 @@ package trie import ( "fmt" + "reflect" "github.com/ethereum/go-ethereum/common" ) +// memoryNode is all the information we know about a single cached trie node +// in the memory. +type memoryNode struct { + hash common.Hash // Node hash, computed by hashing rlp value + size uint16 // Byte size of the useful cached data + node node // Cached collapsed trie node, or raw rlp data +} + +// memoryNodeSize is the raw size of a memoryNode data structure without any +// node data included. It's an approximate size, but should be a lot better +// than not counting them. +var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) + +// rlp returns the raw rlp encoded blob of the cached trie node, either directly +// from the cache, or by regenerating it from the collapsed node. +func (n *memoryNode) rlp() []byte { + if n.node == nil { + return nil + } + if node, ok := n.node.(rawNode); ok { + return node + } + return nodeToBytes(n.node) +} + +// obj returns the decoded and expanded trie node, either directly from the cache, +// or by regenerating it from the rlp encoded blob. +func (n *memoryNode) obj() node { + if n.node == nil { + return nil + } + if node, ok := n.node.(rawNode); ok { + return mustDecodeNode(n.hash[:], node) + } + return expandNode(n.hash[:], n.node) +} + +// memorySize returns the total memory size used by this node. +func (n *memoryNode) memorySize(key int) int { + return int(n.size) + memoryNodeSize + key +} + // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { diff --git a/trie/nodestore.go b/trie/nodestore.go deleted file mode 100644 index 40557d871fa70..0000000000000 --- a/trie/nodestore.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2022 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package trie - -import ( - "errors" - "fmt" - "reflect" - - "github.com/ethereum/go-ethereum/common" -) - -// errUnexpectedNode is returned if the requested node with specified path is -// not hash matched or marked as deleted. -var errUnexpectedNode = errors.New("unexpected node") - -// memoryNode is all the information we know about a single cached trie node -// in the memory. -type memoryNode struct { - hash common.Hash // Node hash, computed by hashing rlp value - size uint16 // Byte size of the useful cached data - node node // Cached collapsed trie node, or raw rlp data -} - -// memoryNodeSize is the raw size of a memoryNode data structure without any -// node data included. It's an approximate size, but should be a lot better -// than not counting them. -var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) - -// rlp returns the raw rlp encoded blob of the cached trie node, either directly -// from the cache, or by regenerating it from the collapsed node. -func (n *memoryNode) rlp() []byte { - if n.node == nil { - return nil - } - if node, ok := n.node.(rawNode); ok { - return node - } - return nodeToBytes(n.node) -} - -// obj returns the decoded and expanded trie node, either directly from the cache, -// or by regenerating it from the rlp encoded blob. -func (n *memoryNode) obj() node { - if n.node == nil { - return nil - } - if node, ok := n.node.(rawNode); ok { - return mustDecodeNode(n.hash[:], node) - } - return expandNode(n.hash[:], n.node) -} - -// memorySize returns the total memory size used by this node. -func (n *memoryNode) memorySize(key int) int { - return int(n.size) + memoryNodeSize + key -} - -// nodeStore is built on the underlying node database with an additional -// node cache. The dirty nodes will be cached here whenever trie commit -// is performed to make them accessible. Nodes are keyed by node path -// which is unique in the trie. -// -// nodeStore is not safe for concurrent use. -type nodeStore struct { - db *Database - nodes map[string]*memoryNode -} - -// readNode retrieves the node in canonical representation. -// Returns an MissingNodeError error if the node is not found. -func (s *nodeStore) readNode(owner common.Hash, hash common.Hash, path []byte) (node, error) { - // Load the node from the local cache first. - mn, ok := s.nodes[string(path)] - if ok { - if mn.hash == hash { - return mn.obj(), nil - } - // Bubble up an error if the trie node is not hash matched. - // It shouldn't happen at all. - return nil, fmt.Errorf("%w %x!=%x(%x %v)", errUnexpectedNode, mn.hash, hash, owner, path) - } - // Load the node from the underlying database then - if s.db == nil { - return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} - } - n := s.db.node(hash) - if n != nil { - return n, nil - } - return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} -} - -// readBlob retrieves the node in rlp-encoded representation. -// Returns an MissingNodeError error if the node is not found. -func (s *nodeStore) readBlob(owner common.Hash, hash common.Hash, path []byte) ([]byte, error) { - // Load the node from the local cache first - mn, ok := s.nodes[string(path)] - if ok { - if mn.hash == hash { - return mn.rlp(), nil - } - // Bubble up an error if the trie node is not hash matched. - // It shouldn't happen at all. - return nil, fmt.Errorf("%w %x!=%x(%x %v)", errUnexpectedNode, mn.hash, hash, owner, path) - } - // Load the node from the underlying database then - if s.db == nil { - return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path} - } - blob, err := s.db.Node(hash) - if err == nil { - return blob, nil - } - return nil, &MissingNodeError{Owner: owner, NodeHash: hash, Path: path, err: err} -} - -// write inserts a dirty node into the store. It happens in trie commit procedure. -func (s *nodeStore) write(path string, node *memoryNode) { - s.nodes[path] = node -} - -// copy deep copies the nodeStore and returns an independent handler but with -// same content cached inside. -func (s *nodeStore) copy() *nodeStore { - nodes := make(map[string]*memoryNode) - for k, n := range s.nodes { - nodes[k] = n - } - return &nodeStore{ - db: s.db, // safe to copy directly. - nodes: nodes, - } -} - -// size returns the total memory usage used by caching nodes internally. -func (s *nodeStore) size() common.StorageSize { - var size common.StorageSize - for k, n := range s.nodes { - size += common.StorageSize(n.memorySize(len(k))) - } - return size -} - -// newNodeStore initializes the nodeStore with the given node reader. -func newNodeStore(db *Database) (*nodeStore, error) { - return &nodeStore{ - db: db, - nodes: make(map[string]*memoryNode), - }, nil -} - -// newMemoryStore initializes the pure in-memory store. -func newMemoryStore() *nodeStore { - return &nodeStore{nodes: make(map[string]*memoryNode)} -} diff --git a/trie/nodestore_test.go b/trie/nodestore_test.go deleted file mode 100644 index 7bf532c242b57..0000000000000 --- a/trie/nodestore_test.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2022 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package trie - -import ( - "bytes" - "math/rand" - "testing" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/crypto" -) - -func TestNodeStoreCopy(t *testing.T) { - // Insert a batch of entries into trie - triedb := NewDatabase(rawdb.NewMemoryDatabase()) - trie := NewEmpty(triedb) - vals := []struct{ k, v string }{ - {"do", "verb"}, - {"ether", "wookiedoo"}, - {"horse", "stallion"}, - {"shaman", "horse"}, - {"doge", "coin"}, - {"dog", "puppy"}, - {"somethingveryoddindeedthis is", "myothernodedata"}, - } - for _, val := range vals { - trie.Update([]byte(val.k), []byte(val.v)) - } - trie.Commit(false) // all nodes should be committed into store - - seen := make(map[string][]byte) - iter := trie.NodeIterator(nil) - for iter.Next(true) { - if iter.Hash() != (common.Hash{}) { - seen[string(iter.Path())] = common.CopyBytes(iter.NodeBlob()) - } - } - - // Create the node store copy, ensure all nodes can be retrieved back. - store := trie.nodes - storeCopy := store.copy() - - for path, blob := range seen { - blob1, err1 := store.readBlob(common.Hash{}, crypto.Keccak256Hash(blob), []byte(path)) - blob2, err2 := storeCopy.readBlob(common.Hash{}, crypto.Keccak256Hash(blob), []byte(path)) - if err1 != nil || err2 != nil { - t.Fatalf("Failed to read node, %v, %v", err1, err2) - } - if !bytes.Equal(blob1, blob) || !bytes.Equal(blob2, blob) { - t.Fatal("Node is mismatched") - } - } - // Flush items into the origin reader, it shouldn't affect the copy - var ( - node = randomNode() - path = randomHash() - ) - store.write(string(path.Bytes()), node) - blob, err := store.readBlob(common.Hash{}, node.hash, path.Bytes()) - if err != nil { - t.Fatalf("Failed to read blob %v", err) - } - if !bytes.Equal(blob, node.rlp()) { - t.Fatal("Unexpected node") - } - _, err = storeCopy.readBlob(common.Hash{}, node.hash, path.Bytes()) - missing, ok := err.(*MissingNodeError) - if !ok || missing.NodeHash != node.hash { - t.Fatal("didn't hit missing node, got", err) - } - - // Create a new copy, it should retrieve the node correctly - copyTwo := store.copy() - blob, err = copyTwo.readBlob(common.Hash{}, node.hash, path.Bytes()) - if err != nil { - t.Fatalf("Failed to read blob %v", err) - } - if !bytes.Equal(blob, node.rlp()) { - t.Fatal("Unexpected node") - } -} - -// randomHash generates a random blob of data and returns it as a hash. -func randomHash() common.Hash { - var hash common.Hash - if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil { - panic(err) - } - return hash -} - -func randomNode() *memoryNode { - val := randBytes(100) - return &memoryNode{ - hash: crypto.Keccak256Hash(val), - node: rawNode(val), - size: 100, - } -} diff --git a/trie/proof.go b/trie/proof.go index d1f16caf429a3..e66949a6c860b 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -20,6 +20,7 @@ import ( "bytes" "errors" "fmt" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/ethdb" @@ -558,7 +559,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key } // Rebuild the trie with the leaf stream, the shape of trie // should be same with the original one. - tr := &Trie{root: root, nodes: newMemoryStore()} + tr := &Trie{root: root, db: NewDatabase(rawdb.NewMemoryDatabase())} if empty { tr.root = nil } diff --git a/trie/secure_trie_test.go b/trie/secure_trie_test.go index c18d399543498..524d1db949f0c 100644 --- a/trie/secure_trie_test.go +++ b/trie/secure_trie_test.go @@ -18,6 +18,7 @@ package trie import ( "bytes" + "fmt" "runtime" "sync" "testing" @@ -57,9 +58,15 @@ func makeTestSecureTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - trie.Commit(false) - - // Return the generated trie + root, nodes, err := trie.Commit(false) + if err != nil { + panic(fmt.Errorf("failed to commit trie %v", err)) + } + if err := triedb.Update(NewWithNodeSet(nodes)); err != nil { + panic(fmt.Errorf("failed to commit db %v", err)) + } + // Re-create the trie based on the new state + trie, _ = NewSecure(common.Hash{}, root, triedb) return triedb, trie, content } diff --git a/trie/sync_test.go b/trie/sync_test.go index afc202ef3b8e3..c2ddb98c78d95 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -51,14 +51,15 @@ func makeTestTrie() (*Database, *SecureTrie, map[string][]byte) { trie.Update(key, val) } } - _, nodes, err := trie.Commit(false) + root, nodes, err := trie.Commit(false) if err != nil { panic(fmt.Errorf("failed to commit trie %v", err)) } if err := triedb.Update(NewWithNodeSet(nodes)); err != nil { panic(fmt.Errorf("failed to commit db %v", err)) } - // Return the generated trie + // Re-create the trie based on the new state + trie, _ = NewSecure(common.Hash{}, root, triedb) return triedb, trie, content } diff --git a/trie/trie.go b/trie/trie.go index 3189602b23caa..a09fc3f40d97e 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -55,9 +55,9 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha // Trie is a Merkle Patricia Trie. Use New to create a trie that sits on // top of a database. Whenever trie performs a commit operation, the generated -// dirty nodes will be cached in the internal store. It's users' responsibility -// to manage the memory usage and re-create trie if necessary in order to avoid -// out-of-memory issue. +// nodes will be gathered and returned in a set. Once the trie is committed, +// it's not usable anymore. Callers have to re-create the trie with new root +// based on the updated trie database. // // Trie is not safe for concurrent use. type Trie struct { @@ -69,8 +69,9 @@ type Trie struct { // actually unhashed nodes. unhashed int - // nodes is the place to cache dirty nodes and access trie node from. - nodes *nodeStore + // db is the handler trie can retrieve nodes from. It's + // only for reading purpose and not available for writing. + db *Database // tracer is the tool to track the trie changes. // It will be reset after each commit operation. @@ -88,7 +89,7 @@ func (t *Trie) Copy() *Trie { root: t.root, owner: t.owner, unhashed: t.unhashed, - nodes: t.nodes.copy(), + db: t.db, tracer: t.tracer.copy(), } } @@ -101,13 +102,9 @@ func (t *Trie) Copy() *Trie { // New will panic if db is nil and returns a MissingNodeError if root does // not exist in the database. Accessing the trie loads nodes from db on demand. func New(owner common.Hash, root common.Hash, db *Database) (*Trie, error) { - store, err := newNodeStore(db) - if err != nil { - return nil, err - } trie := &Trie{ owner: owner, - nodes: store, + db: db, //tracer: newTracer(), } if root != (common.Hash{}) && root != emptyRoot { @@ -224,7 +221,7 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new if hash == nil { return nil, origNode, 0, errors.New("non-consensus node") } - blob, err := t.nodes.readBlob(t.owner, common.BytesToHash(hash), path) + blob, err := t.db.Node(common.BytesToHash(hash)) return blob, origNode, 1, err } // Path still needs to be traversed, descend into children @@ -560,10 +557,25 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) { return n, nil } -// resolveHash loads node from the underlying store with the given +// resolveHash loads node from the underlying database with the provided // node hash and path prefix. func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) { - return t.nodes.readNode(t.owner, common.BytesToHash(n), prefix) + hash := common.BytesToHash(n) + if node := t.db.node(hash); node != nil { + return node, nil + } + return nil, &MissingNodeError{Owner: t.owner, NodeHash: hash, Path: prefix} +} + +// resolveHash loads rlp-encoded node blob from the underlying database +// with the provided node hash and path prefix. +func (t *Trie) resolveBlob(n hashNode, prefix []byte) ([]byte, error) { + hash := common.BytesToHash(n) + blob, _ := t.db.Node(hash) + if len(blob) != 0 { + return blob, nil + } + return nil, &MissingNodeError{Owner: t.owner, NodeHash: hash, Path: prefix} } // Hash returns the root hash of the trie. It does not write to the @@ -601,7 +613,7 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { t.root = hashedNode return rootHash, nil, nil } - newRoot, nodes, err := h.Commit(t.root, t.nodes) + newRoot, nodes, err := h.Commit(t.root) if err != nil { return common.Hash{}, nil, err } @@ -627,11 +639,6 @@ func (t *Trie) Reset() { t.root = nil t.owner = common.Hash{} t.unhashed = 0 + t.db = nil t.tracer.reset() - t.nodes = nil -} - -// Size returns the total memory usage used by caching nodes internally. -func (t *Trie) Size() common.StorageSize { - return t.nodes.size() } diff --git a/trie/trie_test.go b/trie/trie_test.go index 1959cd78cc4ae..f849d5872830d 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -24,7 +24,6 @@ import ( "hash" "math/big" "math/rand" - "os" "reflect" "testing" "testing/quick" @@ -35,7 +34,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/ethdb/leveldb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/rlp" "golang.org/x/crypto/sha3" @@ -46,12 +44,6 @@ func init() { spew.Config.DisableMethods = false } -// Used for testing -func newEmpty() *Trie { - trie := NewEmpty(NewDatabase(memorydb.New())) - return trie -} - func TestEmptyTrie(t *testing.T) { trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) res := trie.Hash() @@ -158,7 +150,7 @@ func testMissingNode(t *testing.T, memonly bool) { } func TestInsert(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) updateString(trie, "doe", "reindeer") updateString(trie, "dog", "puppy") @@ -170,7 +162,7 @@ func TestInsert(t *testing.T) { t.Errorf("case 1: exp %x got %x", exp, root) } - trie = newEmpty() + trie = NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) updateString(trie, "A", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") exp = common.HexToHash("d23786fb4a010da3ce639d66d5e904a11dbc02746d1ce25029e53290cabf28ab") @@ -184,7 +176,8 @@ func TestInsert(t *testing.T) { } func TestGet(t *testing.T) { - trie := newEmpty() + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) updateString(trie, "doe", "reindeer") updateString(trie, "dog", "puppy") updateString(trie, "dogglesworth", "cat") @@ -194,21 +187,21 @@ func TestGet(t *testing.T) { if !bytes.Equal(res, []byte("puppy")) { t.Errorf("expected puppy got %x", res) } - unknown := getString(trie, "unknown") if unknown != nil { t.Errorf("expected nil got %x", unknown) } - if i == 1 { return } - trie.Commit(false) + root, nodes, _ := trie.Commit(false) + db.Update(NewWithNodeSet(nodes)) + trie, _ = New(common.Hash{}, root, db) } } func TestDelete(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) vals := []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, @@ -235,7 +228,7 @@ func TestDelete(t *testing.T) { } func TestEmptyValues(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) vals := []struct{ k, v string }{ {"do", "verb"}, @@ -318,7 +311,7 @@ func TestReplication(t *testing.T) { } func TestLargeValue(t *testing.T) { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) trie.Update([]byte("key1"), []byte{99, 99, 99, 99}) trie.Update([]byte("key2"), bytes.Repeat([]byte{1}, 32)) trie.Hash() @@ -372,9 +365,8 @@ const ( opUpdate = iota opDelete opGet - opCommit opHash - opReset + opCommit opItercheckhash opNodeDiff opMax // boundary value, not an actual op @@ -436,19 +428,9 @@ func runRandTest(rt randTest) bool { if string(v) != want { rt[i].err = fmt.Errorf("mismatch for key %#x, got %#x want %#x", step.key, v, want) } - case opCommit: - _, nodes, err := tr.Commit(false) - if err != nil { - rt[i].err = err - return false - } - if nodes != nil { - triedb.Update(NewWithNodeSet(nodes)) - } - origTrie = tr.Copy() case opHash: tr.Hash() - case opReset: + case opCommit: hash, nodes, err := tr.Commit(false) if err != nil { rt[i].err = err @@ -546,45 +528,31 @@ func TestRandom(t *testing.T) { } } -func BenchmarkGet(b *testing.B) { benchGet(b, false) } -func BenchmarkGetDB(b *testing.B) { benchGet(b, true) } +func BenchmarkGet(b *testing.B) { benchGet(b) } func BenchmarkUpdateBE(b *testing.B) { benchUpdate(b, binary.BigEndian) } func BenchmarkUpdateLE(b *testing.B) { benchUpdate(b, binary.LittleEndian) } const benchElemCount = 20000 -func benchGet(b *testing.B, commit bool) { +func benchGet(b *testing.B) { triedb := NewDatabase(rawdb.NewMemoryDatabase()) trie := NewEmpty(triedb) - if commit { - triedb = tempDB(b) - trie = NewEmpty(triedb) - } k := make([]byte, 32) for i := 0; i < benchElemCount; i++ { binary.LittleEndian.PutUint64(k, uint64(i)) trie.Update(k, k) } binary.LittleEndian.PutUint64(k, benchElemCount/2) - if commit { - trie.Commit(false) - } b.ResetTimer() for i := 0; i < b.N; i++ { trie.Get(k) } b.StopTimer() - - if commit { - ldb := triedb.diskdb.(*leveldb.Database) - ldb.Close() - os.RemoveAll(ldb.Path()) - } } func benchUpdate(b *testing.B, e binary.ByteOrder) *Trie { - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) k := make([]byte, 32) b.ReportAllocs() for i := 0; i < b.N; i++ { @@ -614,7 +582,7 @@ func BenchmarkHash(b *testing.B) { // entries, then adding N more. addresses, accounts := makeAccounts(2 * b.N) // Insert the accounts into the trie and hash it - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) i := 0 for ; i < len(addresses)/2; i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) @@ -645,7 +613,7 @@ func BenchmarkCommitAfterHash(b *testing.B) { func benchmarkCommitAfterHash(b *testing.B, collectLeaf bool) { // Make the random benchmark deterministic addresses, accounts := makeAccounts(b.N) - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } @@ -659,7 +627,7 @@ func benchmarkCommitAfterHash(b *testing.B, collectLeaf bool) { func TestTinyTrie(t *testing.T) { // Create a realistic account trie to hash _, accounts := makeAccounts(5) - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) trie.Update(common.Hex2Bytes("0000000000000000000000000000000000000000000000000000000000001337"), accounts[3]) if exp, root := common.HexToHash("8c6a85a4d9fda98feff88450299e574e5378e32391f75a055d470ac0653f1005"), trie.Hash(); exp != root { t.Errorf("1: got %x, exp %x", root, exp) @@ -672,7 +640,7 @@ func TestTinyTrie(t *testing.T) { if exp, root := common.HexToHash("0608c1d1dc3905fa22204c7a0e43644831c3b6d3def0f274be623a948197e64a"), trie.Hash(); exp != root { t.Errorf("3: got %x, exp %x", root, exp) } - checktr := newEmpty() + checktr := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) it := NewIterator(trie.NodeIterator(nil)) for it.Next() { checktr.Update(it.Key, it.Value) @@ -685,7 +653,7 @@ func TestTinyTrie(t *testing.T) { func TestCommitAfterHash(t *testing.T) { // Create a realistic account trie to hash addresses, accounts := makeAccounts(1000) - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } @@ -1012,7 +980,7 @@ func BenchmarkHashFixedSize(b *testing.B) { func benchmarkHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) { b.ReportAllocs() - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } @@ -1063,7 +1031,7 @@ func BenchmarkCommitAfterHashFixedSize(b *testing.B) { func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) { b.ReportAllocs() - trie := newEmpty() + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for i := 0; i < len(addresses); i++ { trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) } @@ -1128,15 +1096,6 @@ func benchmarkDerefRootFixedSize(b *testing.B, addresses [][20]byte, accounts [] b.StopTimer() } -func tempDB(tb testing.TB) *Database { - dir := tb.TempDir() - diskdb, err := leveldb.New(dir, 256, 0, "", false) - if err != nil { - panic(fmt.Sprintf("can't create temporary database: %v", err)) - } - return NewDatabase(diskdb) -} - func getString(trie *Trie, k string) []byte { return trie.Get([]byte(k)) } diff --git a/trie/util_test.go b/trie/util_test.go index cefc4ac767d52..d2de410a68de3 100644 --- a/trie/util_test.go +++ b/trie/util_test.go @@ -17,6 +17,7 @@ package trie import ( + "github.com/ethereum/go-ethereum/common" "testing" "github.com/ethereum/go-ethereum/core/rawdb" @@ -24,7 +25,8 @@ import ( // Tests if the trie diffs are tracked correctly. func TestTrieTracer(t *testing.T) { - trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) trie.tracer = newTracer() // Insert a batch of entries, all the nodes should be marked as inserted @@ -65,8 +67,11 @@ func TestTrieTracer(t *testing.T) { t.Fatalf("Unexpected deleted node tracked %d", len(deleted)) } - // Commit the changes - trie.Commit(false) + // Commit the changes and re-create with new root + root, nodes, _ := trie.Commit(false) + db.Update(NewWithNodeSet(nodes)) + trie, _ = New(common.Hash{}, root, db) + trie.tracer = newTracer() // Delete all the elements, check deletion set for _, val := range vals { From 7a7d566c004314be8d24ac0ecc1966623f18641f Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 16:28:35 +0800 Subject: [PATCH 03/13] all: fixes --- core/state/database.go | 2 ++ light/postprocess.go | 12 ++++++++++++ tests/fuzzers/trie/trie-fuzzer.go | 15 ++------------- trie/secure_trie.go | 2 ++ trie/trie.go | 4 ++-- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/core/state/database.go b/core/state/database.go index b8927162cd56c..8f662ecd3dab1 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -92,6 +92,8 @@ type Trie interface { // corresponding node hash. All collected nodes(including dirty leaves if // collectLeaf is true) will be encapsulated into a nodeset for return. // The returned nodeset can be nil if the trie is clean(nothing to commit). + // Once the trie is committed, it's not usable anymore. A new trie must + // be created with new root and updated trie database for following usage Commit(collectLeaf bool) (common.Hash, *trie.NodeSet, error) // NodeIterator returns an iterator that returns nodes of the trie. Iteration diff --git a/light/postprocess.go b/light/postprocess.go index 1ce781a50f2b1..0e50dab967173 100644 --- a/light/postprocess.go +++ b/light/postprocess.go @@ -221,11 +221,17 @@ func (c *ChtIndexerBackend) Commit() error { if err != nil { return err } + // Commit trie changes into trie database in case it's not nil. if nodes != nil { if err := c.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { return err } } + // Re-create trie with newly generated root and updated database. + c.trie, err = trie.New(common.Hash{}, root, c.triedb) + if err != nil { + return err + } // Pruning historical trie nodes if necessary. if !c.disablePruning { // Flush the triedb and track the latest trie nodes. @@ -462,11 +468,17 @@ func (b *BloomTrieIndexerBackend) Commit() error { if err != nil { return err } + // Commit trie changes into trie database in case it's not nil. if nodes != nil { if err := b.triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { return err } } + // Re-create trie with newly generated root and updated database. + b.trie, err = trie.New(common.Hash{}, root, b.triedb) + if err != nil { + return err + } // Pruning historical trie nodes if necessary. if !b.disablePruning { // Flush the triedb and track the latest trie nodes. diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go index 96674d9a4c4e4..f36b613d47863 100644 --- a/tests/fuzzers/trie/trie-fuzzer.go +++ b/tests/fuzzers/trie/trie-fuzzer.go @@ -51,9 +51,8 @@ const ( opUpdate = iota opDelete opGet - opCommit opHash - opReset + opCommit opItercheckhash opProve opMax // boundary value, not an actual op @@ -157,19 +156,9 @@ func runRandTest(rt randTest) error { if string(v) != want { rt[i].err = fmt.Errorf("mismatch for key %#x, got %#x want %#x", step.key, v, want) } - case opCommit: - _, nodes, err := tr.Commit(false) - if err != nil { - rt[i].err = err - } - if nodes != nil { - if err := triedb.Update(trie.NewWithNodeSet(nodes)); err != nil { - return err - } - } case opHash: tr.Hash() - case opReset: + case opCommit: hash, nodes, err := tr.Commit(false) if err != nil { return err diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 0ea5337325362..59772815e294e 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -165,6 +165,8 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte { // collectLeaf is true) will be encapsulated into a nodeset for return. // The returned nodeset can be nil if the trie is clean(nothing to commit). // All cached preimages will be also flushed if preimages recording is enabled. +// Once the trie is committed, it's not usable anymore. A new trie must +// be created with new root and updated trie database for following usage func (t *SecureTrie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { // Write all the pre-images to the actual disk database if len(t.getSecKeyCache()) > 0 { diff --git a/trie/trie.go b/trie/trie.go index a09fc3f40d97e..a53f23ee80a6d 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -590,8 +590,8 @@ func (t *Trie) Hash() common.Hash { // corresponding node hash. All collected nodes(including dirty leaves if // collectLeaf is true) will be encapsulated into a nodeset for return. // The returned nodeset can be nil if the trie is clean(nothing to commit). -// Note that all dirty nodes will also be cached in the nodestore inside -// the trie to ensure these nodes can still be accessed after the commit. +// Once the trie is committed, it's not usable anymore. A new trie must +// be created with new root and updated trie database for following usage func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { defer t.tracer.reset() From 4afe8b95e99bf51836e195e7f68b10886132a471 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 17:39:38 +0800 Subject: [PATCH 04/13] trie: polish --- trie/committer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/trie/committer.go b/trie/committer.go index f36604e281e20..e5a7418d1fd3f 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -172,10 +172,9 @@ func (c *committer) store(path []byte, n node) node { node: simplifyNode(n), size: uint16(size), } - spath = string(path) ) // Collect the dirty node to nodeset. - c.nodes.add(spath, mnode) + c.nodes.add(string(path), mnode) // Collect the corresponding leaf node if it's required. We don't check // full node since it's impossible to store value in fullNode. The key From 587e9f9e6669a39bc1ba80c9a9206c5e0a5d9e3e Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 17:46:54 +0800 Subject: [PATCH 05/13] core, trie: address comments --- core/state/metrics.go | 12 ++++++------ core/state/statedb.go | 14 +++++++------- trie/committer.go | 24 +++++------------------- trie/trie.go | 4 +--- 4 files changed, 19 insertions(+), 35 deletions(-) diff --git a/core/state/metrics.go b/core/state/metrics.go index 7b40ff37aff0e..c349e8d87f67c 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -19,10 +19,10 @@ package state import "github.com/ethereum/go-ethereum/metrics" var ( - accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) - storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) - accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) - storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) - accountCommittedMeter = metrics.NewRegisteredMeter("state/commit/account", nil) - storageCommittedMeter = metrics.NewRegisteredMeter("state/commit/storage", nil) + accountUpdatedMeter = metrics.NewRegisteredMeter("state/update/account", nil) + storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) + accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) + storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) + accountTrieCommittedMeter = metrics.NewRegisteredMeter("state/trie/account", nil) + storageTriesCommittedMeter = metrics.NewRegisteredMeter("state/trie/storage", nil) ) diff --git a/core/state/statedb.go b/core/state/statedb.go index 3663a7cbf01b8..322bc540b7a20 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -908,9 +908,9 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { // Commit objects to the trie, measuring the elapsed time var ( - accounts int - storages int - nodes = trie.NewMergedNodeSet() + accountTrieNodes int + storageTrieNodes int + nodes = trie.NewMergedNodeSet() ) codeWriter := s.db.TrieDB().DiskDB().NewBatch() for addr := range s.stateObjectsDirty { @@ -930,7 +930,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := nodes.Merge(set); err != nil { return common.Hash{}, err } - storages += set.Len() + storageTrieNodes += set.Len() } } } @@ -956,7 +956,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := nodes.Merge(set); err != nil { return common.Hash{}, err } - accounts = set.Len() + accountTrieNodes = set.Len() } if metrics.EnabledExpensive { s.AccountCommits += time.Since(start) @@ -965,8 +965,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { storageUpdatedMeter.Mark(int64(s.StorageUpdated)) accountDeletedMeter.Mark(int64(s.AccountDeleted)) storageDeletedMeter.Mark(int64(s.StorageDeleted)) - accountCommittedMeter.Mark(int64(accounts)) - storageCommittedMeter.Mark(int64(storages)) + accountTrieCommittedMeter.Mark(int64(accountTrieNodes)) + storageTriesCommittedMeter.Mark(int64(storageTrieNodes)) s.AccountUpdated, s.AccountDeleted = 0, 0 s.StorageUpdated, s.StorageDeleted = 0, 0 } diff --git a/trie/committer.go b/trie/committer.go index e5a7418d1fd3f..b72cd7c67c789 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -18,7 +18,6 @@ package trie import ( "fmt" - "sync" "github.com/ethereum/go-ethereum/common" ) @@ -37,25 +36,12 @@ type committer struct { collectLeaf bool } -// committers live in a global sync.Pool -var committerPool = sync.Pool{ - New: func() interface{} { - return &committer{} - }, -} - // newCommitter creates a new committer or picks one from the pool. func newCommitter(owner common.Hash, collectLeaf bool) *committer { - ret := committerPool.Get().(*committer) - ret.nodes = NewNodeSet(owner) - ret.collectLeaf = collectLeaf - return ret -} - -func returnCommitterToPool(h *committer) { - h.nodes = nil - h.collectLeaf = false - committerPool.Put(h) + return &committer{ + nodes: NewNodeSet(owner), + collectLeaf: collectLeaf, + } } // Commit collapses a node down into a hash node and inserts it into the database @@ -173,7 +159,7 @@ func (c *committer) store(path []byte, n node) node { size: uint16(size), } ) - // Collect the dirty node to nodeset. + // Collect the dirty node to nodeset for return. c.nodes.add(string(path), mnode) // Collect the corresponding leaf node if it's required. We don't check diff --git a/trie/trie.go b/trie/trie.go index a53f23ee80a6d..92c31c5c47489 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -602,9 +602,6 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { // in the following procedure that all nodes are hashed. rootHash := t.Hash() - h := newCommitter(t.owner, collectLeaf) - defer returnCommitterToPool(h) - // Do a quick check if we really need to commit. This can happen e.g. // if we load a trie for reading storage values, but don't write to it. if hashedNode, dirty := t.root.cache(); !dirty { @@ -613,6 +610,7 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet, error) { t.root = hashedNode return rootHash, nil, nil } + h := newCommitter(t.owner, collectLeaf) newRoot, nodes, err := h.Commit(t.root) if err != nil { return common.Hash{}, nil, err From e703c676c8564deb573d9c26a3838137ae37505a Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 17:51:32 +0800 Subject: [PATCH 06/13] trie: fix imports --- trie/proof.go | 2 +- trie/util_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/trie/proof.go b/trie/proof.go index e66949a6c860b..fe3662c51a884 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -20,9 +20,9 @@ import ( "bytes" "errors" "fmt" - "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" ) diff --git a/trie/util_test.go b/trie/util_test.go index d2de410a68de3..252dc09e0804c 100644 --- a/trie/util_test.go +++ b/trie/util_test.go @@ -17,9 +17,9 @@ package trie import ( - "github.com/ethereum/go-ethereum/common" "testing" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" ) From ef1810d3538961486f6b899879022fcea81814c6 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 19:33:07 +0800 Subject: [PATCH 07/13] core/state: address comments --- core/state/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/state/metrics.go b/core/state/metrics.go index c349e8d87f67c..35d2df92dda4b 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -23,6 +23,6 @@ var ( storageUpdatedMeter = metrics.NewRegisteredMeter("state/update/storage", nil) accountDeletedMeter = metrics.NewRegisteredMeter("state/delete/account", nil) storageDeletedMeter = metrics.NewRegisteredMeter("state/delete/storage", nil) - accountTrieCommittedMeter = metrics.NewRegisteredMeter("state/trie/account", nil) - storageTriesCommittedMeter = metrics.NewRegisteredMeter("state/trie/storage", nil) + accountTrieCommittedMeter = metrics.NewRegisteredMeter("state/commit/accountnodes", nil) + storageTriesCommittedMeter = metrics.NewRegisteredMeter("state/commit/storagenodes", nil) ) From 9addf748de14eb9c500104e9a9450a1ec47e2807 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 19:54:00 +0800 Subject: [PATCH 08/13] core/state/snapshot: polish --- core/state/snapshot/generate_test.go | 31 +++++----------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 911a211f7ce61..8d89ca59a3c3b 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -197,7 +197,7 @@ func (t *testHelper) makeStorageTrie(stateRoot, owner common.Hash, keys []string } func (t *testHelper) Commit() common.Hash { - root, nodes, _ := t.accTrie.Commit(false) + root, nodes, _ := t.accTrie.Commit(true) if nodes != nil { t.nodes.Merge(nodes) } @@ -385,7 +385,7 @@ func TestGenerateCorruptAccountTrie(t *testing.T) { helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 - root, _, _ := helper.accTrie.Commit(false) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + root := helper.Commit() // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 // Delete an account trie leaf and ensure the generator chokes helper.triedb.Commit(root, false, nil) @@ -420,18 +420,8 @@ func TestGenerateMissingStorageTrie(t *testing.T) { helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - root, _, _ := helper.accTrie.Commit(false) - - // We can only corrupt the disk database, so flush the tries out - helper.triedb.Reference( - common.BytesToHash(stRoot), - common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), - ) - helper.triedb.Reference( - common.BytesToHash(stRoot), - common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), - ) - helper.triedb.Commit(root, false, nil) + + root := helper.Commit() // Delete a storage trie root and ensure the generator chokes helper.diskdb.Delete(stRoot) @@ -465,18 +455,7 @@ func TestGenerateCorruptStorageTrie(t *testing.T) { stRoot = helper.makeStorageTrie(common.Hash{}, hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - root, _, _ := helper.accTrie.Commit(false) - - // We can only corrupt the disk database, so flush the tries out - helper.triedb.Reference( - common.BytesToHash(stRoot), - common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), - ) - helper.triedb.Reference( - common.BytesToHash(stRoot), - common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), - ) - helper.triedb.Commit(root, false, nil) + root := helper.Commit() // Delete a storage trie leaf and ensure the generator chokes helper.diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes()) From d0debc8906af550f6ff26ee259776dc6ebe01a8a Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 20:15:50 +0800 Subject: [PATCH 09/13] trie: remove unused code --- trie/nodeset.go | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/trie/nodeset.go b/trie/nodeset.go index 341b23bc7553e..8d8a6c1fb9950 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -18,7 +18,6 @@ package trie import ( "fmt" - "reflect" "github.com/ethereum/go-ethereum/common" ) @@ -31,40 +30,6 @@ type memoryNode struct { node node // Cached collapsed trie node, or raw rlp data } -// memoryNodeSize is the raw size of a memoryNode data structure without any -// node data included. It's an approximate size, but should be a lot better -// than not counting them. -var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) - -// rlp returns the raw rlp encoded blob of the cached trie node, either directly -// from the cache, or by regenerating it from the collapsed node. -func (n *memoryNode) rlp() []byte { - if n.node == nil { - return nil - } - if node, ok := n.node.(rawNode); ok { - return node - } - return nodeToBytes(n.node) -} - -// obj returns the decoded and expanded trie node, either directly from the cache, -// or by regenerating it from the rlp encoded blob. -func (n *memoryNode) obj() node { - if n.node == nil { - return nil - } - if node, ok := n.node.(rawNode); ok { - return mustDecodeNode(n.hash[:], node) - } - return expandNode(n.hash[:], n.node) -} - -// memorySize returns the total memory size used by this node. -func (n *memoryNode) memorySize(key int) int { - return int(n.size) + memoryNodeSize + key -} - // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { From a353c403b8b2d26b3ce2ff209afd9f51ec6e009c Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 20:23:22 +0800 Subject: [PATCH 10/13] trie: update tests --- trie/trie_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/trie/trie_test.go b/trie/trie_test.go index f849d5872830d..3e29600bbd121 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -282,7 +282,7 @@ func TestReplication(t *testing.T) { t.Errorf("trie2 doesn't have %q => %q", kv.k, kv.v) } } - hash, _, err := trie2.Commit(false) + hash, nodes, err := trie2.Commit(false) if err != nil { t.Fatalf("commit error: %v", err) } @@ -290,6 +290,14 @@ func TestReplication(t *testing.T) { t.Errorf("root failure. expected %x got %x", exp, hash) } + // recreate the trie after commit + if nodes != nil { + triedb.Update(NewWithNodeSet(nodes)) + } + trie2, err = New(common.Hash{}, hash, triedb) + if err != nil { + t.Fatalf("can't recreate trie at %x: %v", exp, err) + } // perform some insertions on the new trie. vals2 := []struct{ k, v string }{ {"do", "verb"}, From 5e731650e7eba1db62ef0812e4cff4e0a9c5397e Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 21:49:19 +0800 Subject: [PATCH 11/13] trie: don't set db as nil --- trie/trie.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/trie.go b/trie/trie.go index 92c31c5c47489..08f2480db5511 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -637,6 +637,6 @@ func (t *Trie) Reset() { t.root = nil t.owner = common.Hash{} t.unhashed = 0 - t.db = nil + //t.db = nil t.tracer.reset() } From afa999eb9e5db65e1c209a465f53cb29b78f871f Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Wed, 3 Aug 2022 22:08:48 +0800 Subject: [PATCH 12/13] trie: address comments --- trie/committer.go | 2 +- trie/database.go | 6 +++--- trie/nodeset.go | 24 +++++++++++++----------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/trie/committer.go b/trie/committer.go index b72cd7c67c789..d9f0ecf3dea41 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -177,7 +177,7 @@ func (c *committer) store(path []byte, n node) node { // estimateSize estimates the size of an rlp-encoded node, without actually // rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie -// with 1000 leafs, the only errors above 1% are on small shortnodes, where this +// with 1000 leaves, the only errors above 1% are on small shortnodes, where this // method overestimates by 2 or 3 bytes (e.g. 37 instead of 35) func estimateSize(n node) int { switch n := n.(type) { diff --git a/trie/database.go b/trie/database.go index 2418ce73ac508..81f0477aeb86e 100644 --- a/trie/database.go +++ b/trie/database.go @@ -770,7 +770,7 @@ func (db *Database) Update(nodes *MergedNodeSet) error { // ensured that children are inserted first, then parent so that children // can be linked with their parent correctly. The order of writing between // different tries(account trie, storage tries) is not required. - for owner, subset := range nodes.nodes { + for owner, subset := range nodes.sets { for _, path := range subset.paths { n, ok := subset.nodes[path] if !ok { @@ -781,8 +781,8 @@ func (db *Database) Update(nodes *MergedNodeSet) error { } // Link up the account trie and storage trie if the node points // to an account trie leaf. - if set, present := nodes.nodes[common.Hash{}]; present { - for _, n := range set.leafs { + if set, present := nodes.sets[common.Hash{}]; present { + for _, n := range set.leaves { var account types.StateAccount if err := rlp.DecodeBytes(n.blob, &account); err != nil { return err diff --git a/trie/nodeset.go b/trie/nodeset.go index 8d8a6c1fb9950..08b9b35ebc877 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -33,13 +33,15 @@ type memoryNode struct { // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { - owner common.Hash // the identifier of the trie - paths []string // the path of dirty nodes, sort by insertion order - nodes map[string]*memoryNode // the map of dirty nodes, keyed by node path - leafs []*leaf // the list of dirty leafs + owner common.Hash // the identifier of the trie + paths []string // the path of dirty nodes, sort by insertion order + nodes map[string]*memoryNode // the map of dirty nodes, keyed by node path + leaves []*leaf // the list of dirty leaves } -// NewNodeSet initializes an empty dirty node set. +// NewNodeSet initializes an empty node set to be used for tracking dirty nodes +// from a specific account or storage trie. The owner is zero for the account +// trie and the owning account address hash for storage tries. func NewNodeSet(owner common.Hash) *NodeSet { return &NodeSet{ owner: owner, @@ -55,7 +57,7 @@ func (set *NodeSet) add(path string, node *memoryNode) { // addLeaf caches the provided leaf node. func (set *NodeSet) addLeaf(node *leaf) { - set.leafs = append(set.leafs, node) + set.leaves = append(set.leaves, node) } // Len returns the number of dirty nodes contained in the set. @@ -65,12 +67,12 @@ func (set *NodeSet) Len() int { // MergedNodeSet represents a merged dirty node set for a group of tries. type MergedNodeSet struct { - nodes map[common.Hash]*NodeSet + sets map[common.Hash]*NodeSet } // NewMergedNodeSet initializes an empty merged set. func NewMergedNodeSet() *MergedNodeSet { - return &MergedNodeSet{nodes: make(map[common.Hash]*NodeSet)} + return &MergedNodeSet{sets: make(map[common.Hash]*NodeSet)} } // NewWithNodeSet constructs a merged nodeset with the provided single set. @@ -83,10 +85,10 @@ func NewWithNodeSet(set *NodeSet) *MergedNodeSet { // Merge merges the provided dirty nodes of a trie into the set. The assumption // is held that no duplicated set belonging to the same trie will be merged twice. func (set *MergedNodeSet) Merge(other *NodeSet) error { - _, present := set.nodes[other.owner] + _, present := set.sets[other.owner] if present { - return fmt.Errorf("duplicated trie %x", other.owner) + return fmt.Errorf("duplicate trie for owner %#x", other.owner) } - set.nodes[other.owner] = other + set.sets[other.owner] = other return nil } From 56a85026f0340063f93f29e752e422873867e9ff Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Thu, 4 Aug 2022 15:52:11 +0800 Subject: [PATCH 13/13] trie: unskip test --- trie/iterator_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/trie/iterator_test.go b/trie/iterator_test.go index a6f025def99bc..0a4c04c8b346e 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -553,8 +553,6 @@ func makeLargeTestTrie() (*Database, *SecureTrie, *loggingDb) { // Tests that the node iterator indeed walks over the entire database contents. func TestNodeIteratorLargeTrie(t *testing.T) { - t.SkipNow() - // Create some arbitrary test trie to iterate db, trie, logDb := makeLargeTestTrie() db.Cap(0) // flush everything