Skip to content

Commit

Permalink
cmd/geth, core/state/snapshot: rework journal loading, implement acco…
Browse files Browse the repository at this point in the history
…unt-check
  • Loading branch information
holiman committed May 23, 2022
1 parent 59ac229 commit 69d183b
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 163 deletions.
49 changes: 49 additions & 0 deletions cmd/geth/snapshot.go
Expand Up @@ -101,6 +101,25 @@ In other words, this command does the snapshot to trie conversion.
Description: `
geth snapshot check-dangling-storage <state-root> traverses the snap storage
data, and verifies that all snapshot storage data has a corresponding account.
`,
},
{
Name: "inspect-account",
Usage: "Check all snapshot layers for the a specific account",
ArgsUsage: "<address | hash>",
Action: utils.MigrateFlags(checkAccount),
Category: "MISCELLANEOUS COMMANDS",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.AncientFlag,
utils.RopstenFlag,
utils.SepoliaFlag,
utils.RinkebyFlag,
utils.GoerliFlag,
},
Description: `
geth snapshot inspect-account <address> checks all snapshot layers and prints out
information about the specified address.
`,
},
{
Expand Down Expand Up @@ -517,3 +536,33 @@ func dumpState(ctx *cli.Context) error {
"elapsed", common.PrettyDuration(time.Since(start)))
return nil
}

// checkAccount iterates the snap data layers, and looks up the given account
// across all layers.
func checkAccount(ctx *cli.Context) error {
if ctx.NArg() != 1 {
return errors.New("need <address|hash> arg")
}
var addr common.Address
var hash common.Hash
switch len(ctx.Args()[0]) {
case 40, 42:
addr = common.HexToAddress(ctx.Args()[0])
hash = crypto.Keccak256Hash(addr.Bytes())
case 64, 66:
hash = common.HexToHash(ctx.Args()[0])
default:
return errors.New("malformed address or hash")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
start := time.Now()
log.Info("Checking difflayer journal", "address", addr, "hash", hash)
if err := snapshot.CheckJournalAccount(chaindb, hash); err != nil {
return err
}
log.Info("Checked the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
return nil
}
86 changes: 9 additions & 77 deletions core/state/snapshot/dangling.go
Expand Up @@ -18,16 +18,13 @@ package snapshot

import (
"bytes"
"errors"
"fmt"
"io"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
)

// CheckDanglingStorage iterates the snap storage data, and verifies that all
Expand Down Expand Up @@ -75,81 +72,16 @@ func checkDanglingDiskStorage(chaindb ethdb.KeyValueStore) error {
// checkDanglingMemStorage checks if there is any 'dangling' storage in the journalled
// snapshot difflayers.
func checkDanglingMemStorage(db ethdb.KeyValueStore) error {
var (
start = time.Now()
journal = rawdb.ReadSnapshotJournal(db)
)
if len(journal) == 0 {
log.Warn("Loaded snapshot journal", "diffs", "missing")
return nil
}
r := rlp.NewStream(bytes.NewReader(journal), 0)
// Firstly, resolve the first element as the journal version
version, err := r.Uint()
if err != nil {
log.Warn("Failed to resolve the journal version", "error", err)
return nil
}
if version != journalVersion {
log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version)
log.Info("Checking dangling journalled storage")
start := time.Now()
iterateJournal(db, func(pRoot, root common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
for accHash := range storage {
if _, ok := accounts[accHash]; !ok {
log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", accHash), "root", root)
}
}
return nil
}
// Secondly, resolve the disk layer root, ensure it's continuous
// with disk layer. Note now we can ensure it's the snapshot journal
// correct version, so we expect everything can be resolved properly.
var root common.Hash
if err := r.Decode(&root); err != nil {
return errors.New("missing disk layer root")
}
// The diff journal is not matched with disk, discard them.
// It can happen that Geth crashes without persisting the latest
// diff journal.
// Load all the snapshot diffs from the journal
if err := checkDanglingJournalStorage(r); err != nil {
return err
}
})
log.Info("Verified the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
return nil
}

// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
// diff and verifying that it can be linked to the requested parent.
func checkDanglingJournalStorage(r *rlp.Stream) error {
for {
// Read the next diff journal entry
var root common.Hash
if err := r.Decode(&root); err != nil {
// The first read may fail with EOF, marking the end of the journal
if err == io.EOF {
return nil
}
return fmt.Errorf("load diff root: %v", err)
}
var destructs []journalDestruct
if err := r.Decode(&destructs); err != nil {
return fmt.Errorf("load diff destructs: %v", err)
}
var accounts []journalAccount
if err := r.Decode(&accounts); err != nil {
return fmt.Errorf("load diff accounts: %v", err)
}
accountData := make(map[common.Hash][]byte)
for _, entry := range accounts {
if len(entry.Blob) > 0 { // RLP loses nil-ness, but `[]byte{}` is not a valid item, so reinterpret that
accountData[entry.Hash] = entry.Blob
} else {
accountData[entry.Hash] = nil
}
}
var storage []journalStorage
if err := r.Decode(&storage); err != nil {
return fmt.Errorf("load diff storage: %v", err)
}
for _, entry := range storage {
if _, ok := accountData[entry.Hash]; !ok {
log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", entry.Hash), "root", root)
return fmt.Errorf("dangling journal snapshot storage account %#x", entry.Hash)
}
}
}
}

0 comments on commit 69d183b

Please sign in to comment.