Skip to content

Commit

Permalink
Fix data race on core blockchain wg
Browse files Browse the repository at this point in the history
When using a wg Add is intended to be called before wait, calling Add
and Wait concurrently leads to data races, and the blockchain calls Add
all over the place in many public methods that can be called from other
threads.

The solution is to lock over calls to Add and Wait.

Unfortunately we can't see both sides of this data race, it seems the
stack was corrupted somehow since the first stack trace cannot be
correct (how can fmt.Fscanf call TestStartStopValidators ?). Ive
shortened the stack with an elipsis because it was also very long.

But it seems clear that this violation was occurring.
golang/go#23842

==================
WARNING: DATA RACE
Write at 0x00c007036360 by goroutine 354:
  internal/race.Write()
      /usr/local/go/src/internal/race/race.go:41 +0x125
  sync.(*WaitGroup).Wait()
      /usr/local/go/src/sync/waitgroup.go:128 +0x126
  github.com/celo-org/celo-blockchain/core.(*BlockChain).Stop()
      /home/pierspowlesland/projects/celo-blockchain/core/blockchain.go:987 +0xfd
  github.com/celo-org/celo-blockchain/eth.(*Ethereum).Stop()
      /home/pierspowlesland/projects/celo-blockchain/eth/backend.go:584 +0x216
  github.com/celo-org/celo-blockchain/node.(*Node).stopServices()
      /home/pierspowlesland/projects/celo-blockchain/node/node.go:309 +0x150
  github.com/celo-org/celo-blockchain/node.(*Node).Close()
      /home/pierspowlesland/projects/celo-blockchain/node/node.go:221 +0x1bb
  github.com/celo-org/celo-blockchain/test.(*Node).Close()
      /home/pierspowlesland/projects/celo-blockchain/test/node.go:312 +0x3b8
  github.com/celo-org/celo-blockchain/test.Network.Shutdown()
      /home/pierspowlesland/projects/celo-blockchain/test/node.go:498 +0x9a
  runtime.call32()
      /usr/local/go/src/runtime/asm_amd64.s:551 +0x3d
  testing.(*T).FailNow()
      <autogenerated>:1 +0x44
  github.com/stretchr/testify/require.NoError()
      /home/pierspowlesland/go/pkg/mod/github.com/stretchr/testify@v1.4.0/require/require.go:974 +0x104
  github.com/celo-org/celo-blockchain/e2e_test_test.TestStartStopValidators()
      /home/pierspowlesland/projects/celo-blockchain/e2e_test/e2e_test.go:168 +0x15ce
  fmt.Fscanf()
      /usr/local/go/src/fmt/scan.go:143 +0xee
  fmt.Sscanf()
      /usr/local/go/src/fmt/scan.go:114 +0x191
  github.com/syndtr/goleveldb/leveldb/storage.fsParseName()
      /home/pierspowlesland/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20190923125748-758128399b1d/leveldb/storage/file_storage.go:643 +0xa6
  github.com/syndtr/goleveldb/leveldb/storage.(*fileStorage).List()
      /home/pierspowlesland/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20190923125748-758128399b1d/leveldb/storage/file_storage.go:458 +0x309
  github.com/syndtr/goleveldb/leveldb.(*DB).checkAndCleanFiles()
      /home/pierspowlesland/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20190923125748-758128399b1d/leveldb/db_util.go:52 +0x301
  github.com/syndtr/goleveldb/leveldb.openDB()
      /home/pierspowlesland/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20190923125748-758128399b1d/leveldb/db.go:136 +0x9bb
  fmt.(*ss).doScanf()
      /usr/local/go/src/fmt/scan.go:1230 +0x411
  fmt.Fscanf()
      /usr/local/go/src/fmt/scan.go:143 +0xee
  fmt.Sscanf()
  ...

Previous read at 0x00c001448520 by goroutine 203:
  [failed to restore the stack]
  • Loading branch information
piersy committed Oct 1, 2021
1 parent b34305a commit bbd17b6
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ type BlockChain struct {

quit chan struct{} // blockchain quit channel
wg sync.WaitGroup // chain processing wait group for shutting down
wgMu sync.Mutex // chain processing wait group for shutting down
running int32 // 0 if chain is running, 1 when stopped
procInterrupt int32 // interrupt signaler for block processing

Expand Down Expand Up @@ -384,7 +385,9 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
bc.cacheConfig.TrieCleanRejournal = time.Minute
}
triedb := bc.stateCache.TrieDB()
bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
go func() {
defer bc.wg.Done()
triedb.SaveCachePeriodically(bc.cacheConfig.TrieCleanJournal, bc.cacheConfig.TrieCleanRejournal, bc.quit)
Expand Down Expand Up @@ -984,7 +987,9 @@ func (bc *BlockChain) Stop() {
bc.scope.Close()
close(bc.quit)
bc.StopInsert()
bc.wgMu.Lock()
bc.wg.Wait()
bc.wgMu.Unlock()

// Ensure that the entirety of the state snapshot is journalled to disk.
var snapBase common.Hash
Expand Down Expand Up @@ -1111,7 +1116,10 @@ type numberHash struct {
func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain []types.Receipts, ancientLimit uint64) (int, error) {
// We don't require the chainMu here since we want to maximize the
// concurrency of header insertion and receipt insertion.

bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
defer bc.wg.Done()

var (
Expand Down Expand Up @@ -1430,7 +1438,9 @@ var lastWrite uint64
// but does not write any state. This is used to construct competing side forks
// up to the point where they exceed the canonical total difficulty.
func (bc *BlockChain) writeBlockWithoutState(block *types.Block, td *big.Int) (err error) {
bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
defer bc.wg.Done()

batch := bc.db.NewBatch()
Expand All @@ -1445,7 +1455,9 @@ func (bc *BlockChain) writeBlockWithoutState(block *types.Block, td *big.Int) (e
// writeKnownBlock updates the head block flag with a known block
// and introduces chain reorg if necessary.
func (bc *BlockChain) writeKnownBlock(block *types.Block) error {
bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
defer bc.wg.Done()

current := bc.CurrentBlock()
Expand Down Expand Up @@ -1476,7 +1488,9 @@ func (bc *BlockChain) InsertPreprocessedBlock(block *types.Block, receipts []*ty
// insertPreprocessedBlock writes the block and all associated state to the database,
// but is expects the chain mutex to be held.
func (bc *BlockChain) insertPreprocessedBlock(block *types.Block, receipts []*types.Receipt, logs []*types.Log, state *state.StateDB, emitHeadEvent bool) (status WriteStatus, err error) {
bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
defer bc.wg.Done()

randomCommitment := common.Hash{}
Expand Down Expand Up @@ -1695,7 +1709,9 @@ func (bc *BlockChain) InsertChain(chain types.Blocks) (int, error) {
}
}
// Pre-checks passed, start the full block imports
bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
bc.chainmu.Lock()
n, err := bc.insertChain(chain, true)
bc.chainmu.Unlock()
Expand Down Expand Up @@ -2444,7 +2460,9 @@ func (bc *BlockChain) InsertHeaderChain(chain []*types.Header, checkFreq int, co
bc.chainmu.Lock()
defer bc.chainmu.Unlock()

bc.wgMu.Lock()
bc.wg.Add(1)
bc.wgMu.Unlock()
defer bc.wg.Done()

whFunc := func(header *types.Header) error {
Expand Down

0 comments on commit bbd17b6

Please sign in to comment.