From 303f0b791b16f37b88be0c3f06c70b22a0c40519 Mon Sep 17 00:00:00 2001 From: Leo Zhang Date: Wed, 1 Jun 2022 15:40:48 -0700 Subject: [PATCH] Merge master to v0.26 (#2536) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove payload key comparison in Payload.Equals() Payload key is auxiliary data and shouldn't be included in payload equality test. Although payload key was used in this comparison for about a year, this is an edge case that isn't triggered by our current use of MTrie. So this change isn't expected to cause compatibility issues. * Add comments to say payload key shouldn't change In Mtrie, it is expected that for a specific path, the payload's key never changes. * Add Payload.ValueEquals() to compare payload values Restore Payload.Equals() to compare payload keys and values. Add Payload.ValueEquals() to compare payload values. Add and update tests. * fix race condition * skip long running test * outdated blocks are logged as errors, despite being expected during normal operations * add jobqueue README * addressed reviewer feedback * Add module that requests execution data for newly sealed blocks * Add timeout to blob fetch * Add edr to staked access node, and improve backfilling * Updates to access node config * Refactor requester to use queues and notifications * removed old requester files * fix imports in builders * fixes from testing * refactor from review * always cache new execution datas * throw irrecoverable on invalid blobs * cleanup comments and logging * merge notifications and cache and refactor into heap * add config, add restart handling, code cleanup * improve error handling, cleanup refactoring * cleanup error handling, refactor status * fix unittest import cycle, and some cleanup * make more options configurable * cleanup comments and naming * add metrics * remove metrics comment * add tests and fix bugs for status module * fix bug in status duplicate height processing * fix scoping in edr setup * use a single implementation of blob_service * add testing for RestartableComponent * add more comments and unittests * update check function prototype * convert to use jobqueue * fix rebase conflict * Update module/state_synchronization/requester/execution_data_requester.go Co-authored-by: Leo Zhang * add unit tests for notification_heap * Move ExecutionDataRequester interface to state_synchronization package * improve requester comments * cleanup requester bootstrapping * add tests for ExecutionDataService * remove unused consumer member, rename wrapped structs to readydoneaware * add unittest for starting from halted state * Update module/state_synchronization/requester/execution_data_requester.go Co-authored-by: Leo Zhang * Update module/state_synchronization/requester/execution_data_requester.go Co-authored-by: Leo Zhang * switch to storage.headers * save halted reason, improve logging * move pause/resume to jobqueue * Remove min heap from status * add unittest for ReadyDoneAwareConsumer * remove halts, switch to stateless notification status * refactor datastore check into separate struct, add unit tests * Apply suggestions from code review Co-authored-by: Leo Zhang * remove halted metric * refactor start block to be more explicit * move finalized block reader and sealed header reader to jobqueue namespace * reorder consumer args, add pre/post notifiers * remove execution data cache, move ed get into reader * fix sealed reader test, improve comments * fix datastore check tests and components lifecycle * rename ReadyDoneAwareConsumer to ComponentConsumer * fix linting issues * increase timeouts in check datastore tests * comment improvements from review * remove datastore checker * remove now used execution data service check method * add more details to execution data service get comments * fix tests after rebase * fix lint errors * cleanup unused arguments * apply comment update from review Co-authored-by: Leo Zhang * add requester integration test * add missing lock in module/jobqueue/consumer.go Co-authored-by: Leo Zhang * add lock to jobqueue consumer size() * remove changes to blob service, and create new test blobservice for local db only * fix potential hang in consumer startup, update cli arg description * update storage namespaces in access_node_builder * Apply suggestions from code review Co-authored-by: Simon Zhu * Apply suggestions from code review Co-authored-by: Peter Argue <89119817+peterargue@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Peter Argue <89119817+peterargue@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Peter Argue <89119817+peterargue@users.noreply.github.com> * update pull vs push * Update module/component/component.go Co-authored-by: Jordan Schalm * Remove unused codeowners * Add atree reporter to execution state extraction Report: - number of FVM/storage/slab payloads - number of slab array meta/data payloads - number of slab map meta/data payloads - number of first level hash collision groups in slab maps - number of first level hash collisions in each collision group - etc. * Fix lint error * validate guarantors * handle error from FindGuarantors * add comment * add go workspace files to .dockerignore and .gitignore * Add Ledger.GetSingleValue for speedups and memory - Added Ledger.GetSingleValue() to improve speed, alloc/op, and allocs/op - Modified delta.View.readFunc to use Ledger.GetSingleValue() - Optimized existing mtrie batch read for single path Bench comparision between current Ledger.Get() vs Ledger.GetSingleValue() Benchstat results: name old time/op new time/op delta LedgerGetOneValue/batch_get-4 6.54µs ± 0% 5.24µs ± 0% -19.92% (p=0.000 n=9+10) name old alloc/op new alloc/op delta LedgerGetOneValue/batch_get-4 1.74kB ± 0% 1.62kB ± 0% -6.85% (p=0.000 n=10+10) name old allocs/op new allocs/op delta LedgerGetOneValue/batch_get-4 21.0 ± 0% 15.0 ± 0% -28.57% (p=0.000 n=10+10) Bench comparision between optimized new Ledger.Get() vs Ledger.GetSingleValue() Benchstat results: name old time/op new time/op delta LedgerGetOneValue/batch_get-4 5.70µs ± 0% 5.23µs ± 0% -8.24% (p=0.000 n=9+10) name old alloc/op new alloc/op delta LedgerGetOneValue/batch_get-4 1.69kB ± 0% 1.62kB ± 0% -3.79% (p=0.000 n=10+10) name old allocs/op new allocs/op delta LedgerGetOneValue/batch_get-4 18.0 ± 0% 15.0 ± 0% -16.67% (p=0.000 n=10+10) * Update ledger mock * Check Mtrie root integrity after read in tests * add comment * Apply suggestions from code review Co-authored-by: Jordan Schalm * Reduce allocs/op by 79% in ledger read Change Forest.Read to return []ledger.Value without deep copying payload keys. This avoids 4 heap allocation per key. This change doesn't affect the caller (Ledger.Get) because it discards the payload keys. name old time/op new time/op delta TrieRead-4 524µs ± 1% 420µs ± 1% -19.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta TrieRead-4 190kB ± 0% 95kB ± 0% -50.04% (p=0.000 n=10+10) name old allocs/op new allocs/op delta TrieRead-4 1.52k ± 0% 0.32k ± 0% -79.17% (p=0.000 n=10+10) * Fix tests * Update Forest.Read() callers to use new API * Speedup and reduce allocs/op in ledger single read Changed Forest.ReadSingleValue to return ledger.Value without deep copying payload keys. This avoid 4 heap allocation per key. This change doesn't affect the caller (Ledger.GetSingleValue) because it discards the payload key. * Move a line outside for-loop in readSinglePayload This optimization improves speed by 0.49% on Haswell CPU. Thanks for suggestion @tarakby. Co-authored-by: Tarak Ben Youssef * Read memory limit from state * Fixed memory metering crasher and tests lint * fixed tests * add tests for script mutations * update cadence dep * mod tidy * go mod tidy integration * do not split always fatal error * add tests to manager for discarding computations in scripts * fix test * assume service account exists * update test for discarding storage mutations * test fail on error * if memory limit not set in service acct then default to max * lint * better fix: return was missing * accidentally added files * removed finished TODO * handling unexpected error * cleanup unused fields from loader * add test cases * update tests * fix lint * fix error wrapping * upload log level for memory weights * add test * [Fix] Adding sync request message as a cluster type message (#2495) * adds sync request as an authorized cluster channel type * use isClusterChannel and add test * fix test vars and comments * Update topic_validator_test.go * Update topic_validator_test.go * explicitly check cluster msg codes, handle cluster sync request Co-authored-by: Khalil Claybon * upgrade cadence to secure-cadence-m8 * update error handling * fix test case * flaky tests fix test async uploader - Test_AsyncUploader/stopping_component_stops_retrying (#2453) * fixed flaky test - WIP - Test_AsyncUploader/stopping_component_stops_retrying * fix flaky test - Test_AsyncUploader/stopping_component_stops_retrying - added wait group to ensure upload started before shutting down component * updated comments, renamed wait group to be more clear * minor refactoring - var renaming * added more explicit retry mechanism to better test that retry is not working when component is shutting down * uses unit.RequireCloseBefore() to check closed channel * added clarification comments on why test is trying to increment callCount more than once * Review updates * Test fix * change test fix * chore(tests): fix init-light * chore(tests): mark localnet and benchmarks owned by the performance team * fix test case * add blockheight to transaction result response * fix test case * add test case * handle guarantee.ReferenceBlockID not found * fix assigner tests * fix linter * fix reader tests * upgrade cadence to secure-cadence-m9 * tidy go mods * fix naming in limit script * Upgrades libp2p pubsub version (#2515) * generate mocks, make tidy * error fix * go mod update * update mocks * Suggestion for PR #2462 (#2517) * consolidated errors from signer indices decoding into a single error type * fix error type and tests * fix typo Co-authored-by: Leo Zhang (zhangchiqing) * Apply suggestions from code review Co-authored-by: Alexander Hentschel * fix linter * exit validation early for empty collections * lint * Update state/cluster/badger/mutator.go * fix tests * update test * extract decodeSignerIndices * small refactor * update to Cadence 8b113c539a2c * block nil check * workaround for handling hotstuff error * check that reference block is known otherwise the cluster committee may return an error as it is unable to determine the identities for the block (determined by the reference block) * height is already set here * add blockheight * blockheight for historical transaction request * test blockheight equivalence * upgrade flow to v0.3.1 * fix linter Co-authored-by: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Co-authored-by: Simon Zhu Co-authored-by: Alexander Hentschel Co-authored-by: Peter Argue <89119817+peterargue@users.noreply.github.com> Co-authored-by: Jordan Schalm Co-authored-by: Gregor G <75445744+sideninja@users.noreply.github.com> Co-authored-by: Tarak Ben Youssef <50252200+tarakby@users.noreply.github.com> Co-authored-by: Alexey Ivanov Co-authored-by: Tarak Ben Youssef Co-authored-by: Janez Podhostnik Co-authored-by: robert Co-authored-by: Daniel Sainati Co-authored-by: Robert E. Davidson III <45945043+robert-e-davidson3@users.noreply.github.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Yahya Hassanzadeh Co-authored-by: Khalil Claybon Co-authored-by: Misha Co-authored-by: lolpuddle Co-authored-by: Jan Bernatik Co-authored-by: Bastian Müller --- .dockerignore | 3 + .gitignore | 4 + CODEOWNERS | 12 +- Makefile | 2 +- access/api.go | 3 + .../node_builder/access_node_builder.go | 178 +++- .../staked_access_node_builder.go | 24 +- .../unstaked_access_node_builder.go | 3 + cmd/node_builder.go | 9 + cmd/scaffold.go | 95 ++- cmd/scaffold_test.go | 532 +++++++++--- cmd/testUtil.go | 4 + .../execution_state_extract.go | 4 + .../read-execution-state/list-accounts/cmd.go | 4 +- cmd/util/ledger/reporters/atree_decode.go | 318 +++++++ .../ledger/reporters/atree_decode_test.go | 646 ++++++++++++++ cmd/util/ledger/reporters/atree_reporter.go | 243 ++++++ consensus/hotstuff/signature/packer.go | 7 +- consensus/hotstuff/validator/validator.go | 2 +- engine/access/ingestion/engine.go | 1 + engine/access/rpc/backend/backend_test.go | 1 + .../rpc/backend/backend_transactions.go | 6 + engine/collection/compliance/engine.go | 27 +- engine/common/follower/engine.go | 39 +- engine/consensus/compliance/engine.go | 27 +- engine/consensus/ingestion/core.go | 6 +- engine/consensus/ingestion/core_test.go | 3 +- engine/consensus/sealing/engine.go | 4 +- .../computer/uploader/uploader_test.go | 87 +- engine/execution/computation/manager.go | 2 +- engine/execution/computation/manager_test.go | 68 +- engine/execution/ingestion/engine.go | 9 + engine/execution/state/state.go | 22 +- .../assigner/blockconsumer/blockjob.go | 40 - .../assigner/blockconsumer/consumer.go | 4 +- .../assigner/blockconsumer/consumer_test.go | 7 +- .../assigner/blockconsumer/worker.go | 5 +- .../fetcher/chunkconsumer/consumer.go | 2 +- engine/verification/utils/unittest/fixture.go | 39 +- engine/verification/utils/unittest/helper.go | 11 +- fvm/blueprints/fees.go | 44 +- fvm/bootstrap.go | 34 +- fvm/executionParameters.go | 137 +++ fvm/fvm.go | 134 --- fvm/fvm_test.go | 324 ++++++- fvm/handler/programs.go | 17 +- fvm/meter/weighted/meter.go | 5 + fvm/scriptEnv.go | 72 +- fvm/transactionEnv.go | 87 +- go.mod | 6 +- go.sum | 18 +- integration/go.mod | 14 +- integration/go.sum | 11 +- integration/localnet/Makefile | 2 +- integration/testnet/container.go | 15 + .../tests/access/execution_state_sync_test.go | 208 +++++ integration/utils/blob_service.go | 94 +++ integration/utils/contLoadGenerator.go | 4 - ledger/common/encoding/encoding_test.go | 24 +- ledger/complete/ledger.go | 29 +- ledger/complete/ledger_benchmark_test.go | 73 ++ ledger/complete/ledger_test.go | 90 ++ ledger/complete/mtrie/forest.go | 28 +- ledger/complete/mtrie/forest_test.go | 163 ++-- ledger/complete/mtrie/trie/trie.go | 46 +- ledger/complete/mtrie/trie/trie_test.go | 93 ++ ledger/complete/wal/checkpointer_test.go | 28 +- ledger/complete/wal/compactor_test.go | 8 +- ledger/ledger.go | 26 +- ledger/ledger_test.go | 118 +-- ledger/mock/ledger.go | 23 + ledger/partial/ledger.go | 16 + ledger/partial/ledger_test.go | 37 +- ledger/partial/ptrie/partialTrie.go | 9 + ledger/partial/ptrie/partialTrie_test.go | 114 +++ ledger/trie.go | 26 + ledger/trie_test.go | 299 +++++++ module/chunks/chunkVerifier.go | 6 +- module/component/component.go | 8 +- module/component/component_manager_test.go | 30 +- module/dkg/client_test.go | 1 + .../irrecoverable_example_test.go | 1 - module/jobqueue.go | 6 + module/jobqueue/README.md | 85 ++ module/jobqueue/component_consumer.go | 152 ++++ module/jobqueue/component_consumer_test.go | 300 +++++++ module/jobqueue/consumer.go | 33 +- module/jobqueue/consumer_behavior_test.go | 63 +- module/jobqueue/consumer_test.go | 130 +-- .../jobqueue/finalized_block_reader.go | 2 +- .../jobqueue/finalized_block_reader_test.go | 16 +- module/jobqueue/jobs.go | 68 ++ module/jobqueue/jobs_test.go | 77 ++ module/jobqueue/sealed_header_reader.go | 66 ++ module/jobqueue/sealed_header_reader_test.go | 83 ++ module/jobqueue/workerpool.go | 92 ++ module/metrics.go | 14 + module/metrics/execution_data_requester.go | 110 +++ module/metrics/namespaces.go | 3 +- module/metrics/noop.go | 4 + module/mock/backend_scripts_metrics.go | 14 +- .../mock/execution_data_requester_metrics.go | 46 + module/mock/job_consumer.go | 14 + module/mock/transaction_metrics.go | 10 +- module/signature/errors.go | 47 +- module/signature/signer_indices.go | 85 +- module/signature/signer_indices_test.go | 85 +- .../execution_data_requester.go | 21 + .../execution_data_service.go | 7 +- .../mock/execution_data_requester.go | 76 ++ .../mock/execution_data_service.go | 32 + .../requester/execution_data_requester.go | 462 ++++++++++ .../execution_data_requester_test.go | 793 ++++++++++++++++++ .../requester/jobs/execution_data_reader.go | 112 +++ .../jobs/execution_data_reader_test.go | 200 +++++ .../requester/jobs/jobs.go | 35 + .../requester/jobs/jobs_test.go | 47 ++ .../requester/unittest/unittest.go | 223 +++++ network/p2p/blob_service.go | 24 +- state/cluster/badger/mutator.go | 41 +- state/cluster/badger/mutator_test.go | 4 +- state/protocol/badger/mutator.go | 15 + state/protocol/badger/mutator_test.go | 106 +++ state/protocol/epoch.go | 3 + state/protocol/state.go | 3 + state/protocol/util.go | 11 +- utils/unittest/fixtures.go | 43 +- 127 files changed, 8002 insertions(+), 886 deletions(-) create mode 100644 cmd/util/ledger/reporters/atree_decode.go create mode 100644 cmd/util/ledger/reporters/atree_decode_test.go create mode 100644 cmd/util/ledger/reporters/atree_reporter.go delete mode 100644 engine/verification/assigner/blockconsumer/blockjob.go create mode 100644 fvm/executionParameters.go create mode 100644 integration/tests/access/execution_state_sync_test.go create mode 100644 integration/utils/blob_service.go create mode 100644 ledger/trie_test.go create mode 100644 module/jobqueue/README.md create mode 100644 module/jobqueue/component_consumer.go create mode 100644 module/jobqueue/component_consumer_test.go rename engine/verification/assigner/blockconsumer/reader.go => module/jobqueue/finalized_block_reader.go (98%) rename engine/verification/assigner/blockconsumer/reader_test.go => module/jobqueue/finalized_block_reader_test.go (81%) create mode 100644 module/jobqueue/jobs.go create mode 100644 module/jobqueue/jobs_test.go create mode 100644 module/jobqueue/sealed_header_reader.go create mode 100644 module/jobqueue/sealed_header_reader_test.go create mode 100644 module/jobqueue/workerpool.go create mode 100644 module/metrics/execution_data_requester.go create mode 100644 module/mock/execution_data_requester_metrics.go create mode 100644 module/state_synchronization/execution_data_requester.go create mode 100644 module/state_synchronization/mock/execution_data_requester.go create mode 100644 module/state_synchronization/requester/execution_data_requester.go create mode 100644 module/state_synchronization/requester/execution_data_requester_test.go create mode 100644 module/state_synchronization/requester/jobs/execution_data_reader.go create mode 100644 module/state_synchronization/requester/jobs/execution_data_reader_test.go create mode 100644 module/state_synchronization/requester/jobs/jobs.go create mode 100644 module/state_synchronization/requester/jobs/jobs_test.go create mode 100644 module/state_synchronization/requester/unittest/unittest.go diff --git a/.dockerignore b/.dockerignore index 44d4840f8d6..6b1dfb9d54b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,3 +4,6 @@ integration/localnet/data/ integration/localnet/trie/ integration/localnet/docker-compose.nodes.yml integration/localnet/targets.nodes.json + +go.work +go.work.sum diff --git a/.gitignore b/.gitignore index 6f036e56299..b77da1e697d 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,7 @@ language/tools/vscode-extension/out/* read-badger read-protocol-state remove-execution-fork + +# go workspace +go.work +go.work.sum diff --git a/CODEOWNERS b/CODEOWNERS index 3369fbf62ac..826e81ef542 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -4,8 +4,8 @@ /protobuf/services/collection/** @jordanschalm # Consensus Stream -/cmd/consensus/** @AlexHentschel @awishformore @zhangchiqing -/engine/consensus/** @AlexHentschel @awishformore @zhangchiqing +/cmd/consensus/** @AlexHentschel @zhangchiqing +/engine/consensus/** @AlexHentschel @zhangchiqing # Execution Stream /cmd/execution/** @m4ksio @ramtinms @@ -34,4 +34,10 @@ /crypto/** @tarakby # Bootstrap and transit scripts -/cmd/bootstrap/** @vishalchangrani @ljk662 +/cmd/bootstrap/** @vishalchangrani + +# Performance Stream +/integration/localnet/** @SaveTheRbtz @simonhf @Kay-Zee @zhangchiqing +/integration/loader/** @SaveTheRbtz @simonhf @Kay-Zee +/integration/benchmark/** @SaveTheRbtz @simonhf @Kay-Zee +/integration/utils/** @SaveTheRbtz @simonhf @Kay-Zee diff --git a/Makefile b/Makefile index 20b6f28cbca..eaa115d1803 100644 --- a/Makefile +++ b/Makefile @@ -127,7 +127,7 @@ generate-mocks: GO111MODULE=on mockgen -destination=module/mocks/network.go -package=mocks github.com/onflow/flow-go/module Local,Requester GO111MODULE=on mockgen -destination=network/mocknetwork/engine.go -package=mocknetwork github.com/onflow/flow-go/network Engine GO111MODULE=on mockgen -destination=network/mocknetwork/mock_network.go -package=mocknetwork github.com/onflow/flow-go/network Network - GO111MODULE=on mockery --name '(ExecutionDataService|ExecutionDataCIDCache)' --dir=module/state_synchronization --case=underscore --output="./module/state_synchronization/mock" --outpkg="state_synchronization" + GO111MODULE=on mockery --name '(ExecutionDataService|ExecutionDataCIDCache|ExecutionDataRequester)' --dir=module/state_synchronization --case=underscore --output="./module/state_synchronization/mock" --outpkg="state_synchronization" GO111MODULE=on mockery --name 'ExecutionState' --dir=engine/execution/state --case=underscore --output="engine/execution/state/mock" --outpkg="mock" GO111MODULE=on mockery --name 'BlockComputer' --dir=engine/execution/computation/computer --case=underscore --output="engine/execution/computation/computer/mock" --outpkg="mock" GO111MODULE=on mockery --name 'ComputationManager' --dir=engine/execution/computation --case=underscore --output="engine/execution/computation/mock" --outpkg="mock" diff --git a/access/api.go b/access/api.go index 2101afb1a44..8d66f15b70e 100644 --- a/access/api.go +++ b/access/api.go @@ -58,6 +58,7 @@ type TransactionResult struct { BlockID flow.Identifier TransactionID flow.Identifier CollectionID flow.Identifier + BlockHeight uint64 } func TransactionResultToMessage(result *TransactionResult) *access.TransactionResultResponse { @@ -69,6 +70,7 @@ func TransactionResultToMessage(result *TransactionResult) *access.TransactionRe BlockId: result.BlockID[:], TransactionId: result.TransactionID[:], CollectionId: result.CollectionID[:], + BlockHeight: uint64(result.BlockHeight), } } @@ -93,6 +95,7 @@ func MessageToTransactionResult(message *access.TransactionResultResponse) *Tran BlockID: flow.HashToID(message.BlockId), TransactionID: flow.HashToID(message.TransactionId), CollectionID: flow.HashToID(message.CollectionId), + BlockHeight: message.BlockHeight, } } diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index 753ce078ae8..c1d7340c4b9 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -4,16 +4,16 @@ import ( "encoding/json" "errors" "fmt" + "os" + "path/filepath" "strings" "time" + badger "github.com/ipfs/go-ds-badger2" "github.com/onflow/flow/protobuf/go/flow/access" "github.com/rs/zerolog" "github.com/spf13/pflag" - "github.com/onflow/flow-go/crypto" - "github.com/onflow/flow-go/module/compliance" - "github.com/onflow/flow-go/cmd" "github.com/onflow/flow-go/consensus" "github.com/onflow/flow-go/consensus/hotstuff" @@ -22,6 +22,8 @@ import ( hotsignature "github.com/onflow/flow-go/consensus/hotstuff/signature" "github.com/onflow/flow-go/consensus/hotstuff/verification" recovery "github.com/onflow/flow-go/consensus/recovery/protocol" + "github.com/onflow/flow-go/crypto" + "github.com/onflow/flow-go/engine" "github.com/onflow/flow-go/engine/access/ingestion" "github.com/onflow/flow-go/engine/access/rpc" "github.com/onflow/flow-go/engine/access/rpc/backend" @@ -30,24 +32,30 @@ import ( "github.com/onflow/flow-go/engine/common/requester" synceng "github.com/onflow/flow-go/engine/common/synchronization" "github.com/onflow/flow-go/model/encodable" + "github.com/onflow/flow-go/model/encoding/cbor" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/model/flow/filter" "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/module/buffer" + "github.com/onflow/flow-go/module/compliance" finalizer "github.com/onflow/flow-go/module/finalizer/consensus" "github.com/onflow/flow-go/module/id" "github.com/onflow/flow-go/module/mempool/stdmap" "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/module/state_synchronization" + edrequester "github.com/onflow/flow-go/module/state_synchronization/requester" "github.com/onflow/flow-go/module/synchronization" "github.com/onflow/flow-go/network" netcache "github.com/onflow/flow-go/network/cache" cborcodec "github.com/onflow/flow-go/network/codec/cbor" + "github.com/onflow/flow-go/network/compressor" "github.com/onflow/flow-go/network/p2p" "github.com/onflow/flow-go/network/validator" "github.com/onflow/flow-go/state/protocol" badgerState "github.com/onflow/flow-go/state/protocol/badger" "github.com/onflow/flow-go/state/protocol/blocktimer" - storage "github.com/onflow/flow-go/storage/badger" + "github.com/onflow/flow-go/storage" + bstorage "github.com/onflow/flow-go/storage/badger" ) // AccessNodeBuilder extends cmd.NodeBuilder and declares additional functions needed to bootstrap an Access node @@ -98,6 +106,10 @@ type AccessNodeConfig struct { logTxTimeToFinalizedExecuted bool retryEnabled bool rpcMetricsEnabled bool + executionDataSyncEnabled bool + executionDataDir string + executionDataStartHeight uint64 + executionDataConfig edrequester.ExecutionDataConfig baseOptions []cmd.Option PublicNetworkConfig PublicNetworkConfig @@ -112,6 +124,7 @@ type PublicNetworkConfig struct { // DefaultAccessNodeConfig defines all the default values for the AccessNodeConfig func DefaultAccessNodeConfig() *AccessNodeConfig { + homedir, _ := os.UserHomeDir() return &AccessNodeConfig{ collectionGRPCPort: 9000, executionGRPCPort: 9000, @@ -147,6 +160,16 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { Metrics: metrics.NewNoopCollector(), }, observerNetworkingKeyPath: cmd.NotSet, + executionDataSyncEnabled: false, + executionDataDir: filepath.Join(homedir, ".flow", "execution_data"), + executionDataStartHeight: 0, + executionDataConfig: edrequester.ExecutionDataConfig{ + InitialBlockHeight: 0, + MaxSearchAhead: edrequester.DefaultMaxSearchAhead, + FetchTimeout: edrequester.DefaultFetchTimeout, + RetryDelay: edrequester.DefaultRetryDelay, + MaxRetryDelay: edrequester.DefaultMaxRetryDelay, + }, } } @@ -175,6 +198,8 @@ type FlowAccessNodeBuilder struct { Finalized *flow.Header Pending []*flow.Header FollowerCore module.HotStuffFollower + ExecutionDataService state_synchronization.ExecutionDataService + ExecutionDataRequester state_synchronization.ExecutionDataRequester // for the unstaked access node, the sync engine participants provider is the libp2p peer store which is not // available until after the network has started. Hence, a factory function that needs to be called just before // creating the sync engine @@ -277,8 +302,18 @@ func (builder *FlowAccessNodeBuilder) buildFollowerCore() *FlowAccessNodeBuilder // initialize the verifier for the protocol consensus verifier := verification.NewCombinedVerifier(builder.Committee, packer) - followerCore, err := consensus.NewFollower(node.Logger, builder.Committee, node.Storage.Headers, final, verifier, - builder.FinalizationDistributor, node.RootBlock.Header, node.RootQC, builder.Finalized, builder.Pending) + followerCore, err := consensus.NewFollower( + node.Logger, + builder.Committee, + node.Storage.Headers, + final, + verifier, + builder.FinalizationDistributor, + node.RootBlock.Header, + node.RootQC, + builder.Finalized, + builder.Pending, + ) if err != nil { return nil, fmt.Errorf("could not initialize follower core: %w", err) } @@ -293,7 +328,7 @@ func (builder *FlowAccessNodeBuilder) buildFollowerCore() *FlowAccessNodeBuilder func (builder *FlowAccessNodeBuilder) buildFollowerEngine() *FlowAccessNodeBuilder { builder.Component("follower engine", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { // initialize cleaner for DB - cleaner := storage.NewCleaner(node.Logger, node.DB, builder.Metrics.CleanCollector, flow.DefaultValueLogGCFrequency) + cleaner := bstorage.NewCleaner(node.Logger, node.DB, builder.Metrics.CleanCollector, flow.DefaultValueLogGCFrequency) conCache := buffer.NewPendingBlocks() followerEng, err := follower.New( @@ -375,6 +410,111 @@ func (builder *FlowAccessNodeBuilder) BuildConsensusFollower() AccessNodeBuilder return builder } +func (builder *FlowAccessNodeBuilder) BuildExecutionDataRequester() *FlowAccessNodeBuilder { + var ds *badger.Datastore + var bs network.BlobService + var processedBlockHeight storage.ConsumerProgress + var processedNotifications storage.ConsumerProgress + + builder. + Module("execution data datastore and blobstore", func(node *cmd.NodeConfig) error { + err := os.MkdirAll(builder.executionDataDir, 0700) + if err != nil { + return err + } + + ds, err = badger.NewDatastore(builder.executionDataDir, &badger.DefaultOptions) + if err != nil { + return err + } + + builder.ShutdownFunc(func() error { + if err := ds.Close(); err != nil { + return fmt.Errorf("could not close execution data datastore: %w", err) + } + return nil + }) + + return nil + }). + Module("processed block height consumer progress", func(node *cmd.NodeConfig) error { + // uses the datastore's DB + processedBlockHeight = bstorage.NewConsumerProgress(ds.DB, module.ConsumeProgressExecutionDataRequesterBlockHeight) + return nil + }). + Module("processed notifications consumer progress", func(node *cmd.NodeConfig) error { + // uses the datastore's DB + processedNotifications = bstorage.NewConsumerProgress(ds.DB, module.ConsumeProgressExecutionDataRequesterNotification) + return nil + }). + Component("execution data service", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { + var err error + bs, err = node.Network.RegisterBlobService(engine.ExecutionDataService, ds) + if err != nil { + return nil, fmt.Errorf("could not register blob service: %w", err) + } + + builder.ExecutionDataService = state_synchronization.NewExecutionDataService( + new(cbor.Codec), + compressor.NewLz4Compressor(), + bs, + metrics.NewExecutionDataServiceCollector(), + builder.Logger, + ) + + return builder.ExecutionDataService, nil + }). + Component("execution data requester", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { + // Validation of the start block height needs to be done after loading state + if builder.executionDataStartHeight > 0 { + if builder.executionDataStartHeight <= builder.RootBlock.Header.Height { + return nil, fmt.Errorf( + "execution data start block height (%d) must be greater than the root block height (%d)", + builder.executionDataStartHeight, builder.RootBlock.Header.Height) + } + + latestSeal, err := builder.State.Sealed().Head() + if err != nil { + return nil, fmt.Errorf("failed to get latest sealed height") + } + + // Note: since the root block of a spork is also sealed in the root protocol state, the + // latest sealed height is always equal to the root block height. That means that at the + // very beginning of a spork, this check will always fail. Operators should not specify + // an InitialBlockHeight when starting from the beginning of a spork. + if builder.executionDataStartHeight > latestSeal.Height { + return nil, fmt.Errorf( + "execution data start block height (%d) must be less than or equal to the latest sealed block height (%d)", + builder.executionDataStartHeight, latestSeal.Height) + } + + // executionDataStartHeight is provided as the first block to sync, but the + // requester expects the initial last processed height, which is the first height - 1 + builder.executionDataConfig.InitialBlockHeight = builder.executionDataStartHeight - 1 + } else { + builder.executionDataConfig.InitialBlockHeight = builder.RootBlock.Header.Height + } + + builder.ExecutionDataRequester = edrequester.New( + builder.Logger, + metrics.NewExecutionDataRequesterCollector(), + builder.ExecutionDataService, + processedBlockHeight, + processedNotifications, + builder.State, + builder.Storage.Headers, + builder.Storage.Results, + builder.executionDataConfig, + ) + + builder.FinalizationDistributor.AddOnBlockFinalizedConsumer(builder.ExecutionDataRequester.OnBlockFinalized) + + return builder.ExecutionDataRequester, nil + }) + + return builder +} + type Option func(*AccessNodeConfig) func WithBootStrapPeers(bootstrapNodes ...*flow.Identity) Option { @@ -459,11 +599,35 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { flags.StringSliceVar(&builder.bootstrapNodePublicKeys, "bootstrap-node-public-keys", defaultConfig.bootstrapNodePublicKeys, "the networking public key of the bootstrap access node if this is an unstaked access node (in the same order as the bootstrap node addresses) e.g. \"d57a5e9c5.....\",\"44ded42d....\"") flags.BoolVar(&builder.supportsUnstakedFollower, "supports-unstaked-node", defaultConfig.supportsUnstakedFollower, "true if this staked access node supports unstaked node") flags.StringVar(&builder.PublicNetworkConfig.BindAddress, "public-network-address", defaultConfig.PublicNetworkConfig.BindAddress, "staked access node's public network bind address") + + // ExecutionDataRequester config + flags.BoolVar(&builder.executionDataSyncEnabled, "execution-data-sync-enabled", defaultConfig.executionDataSyncEnabled, "whether to enable the execution data sync protocol") + flags.StringVar(&builder.executionDataDir, "execution-data-dir", defaultConfig.executionDataDir, "directory to use for Execution Data database") + flags.Uint64Var(&builder.executionDataStartHeight, "execution-data-start-height", defaultConfig.executionDataStartHeight, "height of first block to sync execution data from when starting with an empty Execution Data database") + flags.Uint64Var(&builder.executionDataConfig.MaxSearchAhead, "execution-data-max-search-ahead", defaultConfig.executionDataConfig.MaxSearchAhead, "max number of heights to search ahead of the lowest outstanding execution data height") + flags.DurationVar(&builder.executionDataConfig.FetchTimeout, "execution-data-fetch-timeout", defaultConfig.executionDataConfig.FetchTimeout, "timeout to use when fetching execution data from the network e.g. 300s") + flags.DurationVar(&builder.executionDataConfig.RetryDelay, "execution-data-retry-delay", defaultConfig.executionDataConfig.RetryDelay, "initial delay for exponential backoff when fetching execution data fails e.g. 10s") + flags.DurationVar(&builder.executionDataConfig.MaxRetryDelay, "execution-data-max-retry-delay", defaultConfig.executionDataConfig.MaxRetryDelay, "maximum delay for exponential backoff when fetching execution data fails e.g. 5m") }).ValidateFlags(func() error { if builder.supportsUnstakedFollower && (builder.PublicNetworkConfig.BindAddress == cmd.NotSet || builder.PublicNetworkConfig.BindAddress == "") { return errors.New("public-network-address must be set if supports-unstaked-node is true") } + if builder.executionDataSyncEnabled { + if builder.executionDataConfig.FetchTimeout <= 0 { + return errors.New("execution-data-fetch-timeout must be greater than 0") + } + if builder.executionDataConfig.RetryDelay <= 0 { + return errors.New("execution-data-retry-delay must be greater than 0") + } + if builder.executionDataConfig.MaxRetryDelay < builder.executionDataConfig.RetryDelay { + return errors.New("execution-data-max-retry-delay must be greater than or equal to execution-data-retry-delay") + } + if builder.executionDataConfig.MaxSearchAhead == 0 { + return errors.New("execution-data-max-search-ahead must be greater than 0") + } + } + return nil }) } diff --git a/cmd/access/node_builder/staked_access_node_builder.go b/cmd/access/node_builder/staked_access_node_builder.go index 99574a25127..bbce9bc4ab3 100644 --- a/cmd/access/node_builder/staked_access_node_builder.go +++ b/cmd/access/node_builder/staked_access_node_builder.go @@ -241,8 +241,24 @@ func (builder *StakedAccessNodeBuilder) Build() (cmd.Node, error) { return nil, fmt.Errorf("could not create requester engine: %w", err) } - builder.IngestEng, err = ingestion.New(node.Logger, node.Network, node.State, node.Me, builder.RequestEng, node.Storage.Blocks, node.Storage.Headers, node.Storage.Collections, node.Storage.Transactions, node.Storage.Results, node.Storage.Receipts, builder.TransactionMetrics, - builder.CollectionsToMarkFinalized, builder.CollectionsToMarkExecuted, builder.BlocksToMarkExecuted, builder.RpcEng) + builder.IngestEng, err = ingestion.New( + node.Logger, + node.Network, + node.State, + node.Me, + builder.RequestEng, + node.Storage.Blocks, + node.Storage.Headers, + node.Storage.Collections, + node.Storage.Transactions, + node.Storage.Results, + node.Storage.Receipts, + builder.TransactionMetrics, + builder.CollectionsToMarkFinalized, + builder.CollectionsToMarkExecuted, + builder.BlocksToMarkExecuted, + builder.RpcEng, + ) if err != nil { return nil, err } @@ -278,6 +294,10 @@ func (builder *StakedAccessNodeBuilder) Build() (cmd.Node, error) { }) } + if builder.executionDataSyncEnabled { + builder.BuildExecutionDataRequester() + } + builder.Component("ping engine", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { ping, err := pingeng.New( node.Logger, diff --git a/cmd/access/node_builder/unstaked_access_node_builder.go b/cmd/access/node_builder/unstaked_access_node_builder.go index 4343198c9ab..305e5ab2535 100644 --- a/cmd/access/node_builder/unstaked_access_node_builder.go +++ b/cmd/access/node_builder/unstaked_access_node_builder.go @@ -266,6 +266,9 @@ func (builder *UnstakedAccessNodeBuilder) enqueueMiddleware() { // Currently, the unstaked AN only runs the follower engine. func (builder *UnstakedAccessNodeBuilder) Build() (cmd.Node, error) { builder.BuildConsensusFollower() + if builder.executionDataSyncEnabled { + builder.BuildExecutionDataRequester() + } return builder.FlowAccessNodeBuilder.Build() } diff --git a/cmd/node_builder.go b/cmd/node_builder.go index cf82b90cf13..e0af2aecc59 100644 --- a/cmd/node_builder.go +++ b/cmd/node_builder.go @@ -21,6 +21,7 @@ import ( "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/id" "github.com/onflow/flow-go/module/synchronization" "github.com/onflow/flow-go/network" @@ -77,6 +78,14 @@ type NodeBuilder interface { // and the node will wait for the component to exit gracefully. Component(name string, f ReadyDoneFactory) NodeBuilder + // RestartableComponent adds a new component to the node that conforms to the ReadyDoneAware + // interface, and calls the provided error handler when an irrecoverable error is encountered. + // Use RestartableComponent if the component is not critical to the node's safe operation and + // can/should be independently restarted when an irrecoverable error is encountered. + // + // Any irrecoverable errors thrown by the component will be passed to the provided error handler. + RestartableComponent(name string, f ReadyDoneFactory, errorHandler component.OnError) NodeBuilder + // ShutdownFunc adds a callback function that is called after all components have exited. // All shutdown functions are called regardless of errors returned by previous callbacks. Any // errors returned are captured and passed to the caller. diff --git a/cmd/scaffold.go b/cmd/scaffold.go index 44cf48155be..3a67680d9be 100644 --- a/cmd/scaffold.go +++ b/cmd/scaffold.go @@ -85,6 +85,8 @@ type namedModuleFunc struct { type namedComponentFunc struct { fn ReadyDoneFactory name string + + errorHandler component.OnError } // FlowNodeBuilder is the default builder struct used for all flow nodes @@ -885,13 +887,21 @@ func (fnb *FlowNodeBuilder) handleComponents() error { parent := make(chan struct{}) close(parent) + var err error // Run all components for _, f := range fnb.components { started := make(chan struct{}) - err := fnb.handleComponent(f, parent, func() { close(started) }) + + if f.errorHandler != nil { + err = fnb.handleRestartableComponent(f, parent, func() { close(started) }) + } else { + err = fnb.handleComponent(f, parent, func() { close(started) }) + } + if err != nil { return err } + parent = started } return nil @@ -906,9 +916,9 @@ func (fnb *FlowNodeBuilder) handleComponents() error { // to close before starting, and then call the started callback after they are ready(). The started // callback closes the parentReady channel of the next component, and so on. // -// TODO: Instead of this serial startup, components should wait for their depenedencies to be ready +// TODO: Instead of this serial startup, components should wait for their dependencies to be ready // using their ReadyDoneAware interface. After components are updated to use the idempotent -// ReadyDoneAware interface and explicilty wait for their dependencies to be ready, we can remove +// ReadyDoneAware interface and explicitly wait for their dependencies to be ready, we can remove // this channel chaining. func (fnb *FlowNodeBuilder) handleComponent(v namedComponentFunc, parentReady <-chan struct{}, started func()) error { // Add a closure that starts the component when the node is started, and then waits for it to exit @@ -967,6 +977,63 @@ func (fnb *FlowNodeBuilder) handleComponent(v namedComponentFunc, parentReady <- return nil } +// handleRestartableComponent constructs a component using the provided ReadyDoneFactory, and +// registers a worker with the ComponentManager to be run when the node is started. +// +// Restartable Components are components that can be restarted after successfully handling +// an irrecoverable error. +// +// Any irrecoverable errors thrown by the component will be passed to the provided error handler. +func (fnb *FlowNodeBuilder) handleRestartableComponent(v namedComponentFunc, parentReady <-chan struct{}, started func()) error { + fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + // wait for the previous component to be ready before starting + if err := util.WaitClosed(ctx, parentReady); err != nil { + return + } + + // Note: we're marking the worker routine ready before we even attempt to start the + // component. the idea behind a restartable component is that the node should not depend + // on it for safe operation, so the node does not need to wait for it to be ready. + ready() + + // do not block serial startup. started can only be called once, so it cannot be called + // from within the componentFactory + started() + + log := fnb.Logger.With().Str("component", v.name).Logger() + + // This may be called multiple times if the component is restarted + componentFactory := func() (component.Component, error) { + c, err := v.fn(fnb.NodeConfig) + if err != nil { + return nil, err + } + log.Info().Msg("component initialization complete") + + go func() { + if err := util.WaitClosed(ctx, c.Ready()); err != nil { + log.Info().Msg("component startup aborted") + } else { + log.Info().Msg("component startup complete") + } + + <-ctx.Done() + log.Info().Msg("component shutdown started") + }() + return c.(component.Component), nil + } + + err := component.RunComponent(ctx, componentFactory, v.errorHandler) + if err != nil && !errors.Is(err, ctx.Err()) { + ctx.Throw(fmt.Errorf("component %s encountered an unhandled irrecoverable error: %w", v.name, err)) + } + + log.Info().Msg("component shutdown complete") + }) + + return nil +} + // ExtraFlags enables binding additional flags beyond those defined in BaseConfig. func (fnb *FlowNodeBuilder) ExtraFlags(f func(*pflag.FlagSet)) NodeBuilder { f(fnb.flags) @@ -1037,6 +1104,28 @@ func (fnb *FlowNodeBuilder) OverrideComponent(name string, f ReadyDoneFactory) N return fnb.Component(name, f) } +// RestartableComponent adds a new component to the node that conforms to the ReadyDoneAware +// interface, and calls the provided error handler when an irrecoverable error is encountered. +// Use RestartableComponent if the component is not critical to the node's safe operation and +// can/should be independently restarted when an irrecoverable error is encountered. +// +// IMPORTANT: Since a RestartableComponent can be restarted independently of the node, the node and +// other components must not rely on it for safe operation, and failures must be handled gracefully. +// As such, RestartableComponents do not block the node from becoming ready, and do not block +// subsequent components from starting serially. They do start in serial order. +// +// Note: The ReadyDoneFactory method may be called multiple times if the component is restarted. +// +// Any irrecoverable errors thrown by the component will be passed to the provided error handler. +func (fnb *FlowNodeBuilder) RestartableComponent(name string, f ReadyDoneFactory, errorHandler component.OnError) NodeBuilder { + fnb.components = append(fnb.components, namedComponentFunc{ + fn: f, + name: name, + errorHandler: errorHandler, + }) + return fnb +} + func (fnb *FlowNodeBuilder) PreInit(f BuilderFunc) NodeBuilder { fnb.preInitFns = append(fnb.preInitFns, f) return fnb diff --git a/cmd/scaffold_test.go b/cmd/scaffold_test.go index a6be4a6b63b..be968749225 100644 --- a/cmd/scaffold_test.go +++ b/cmd/scaffold_test.go @@ -7,6 +7,8 @@ import ( "io/ioutil" "os" "path/filepath" + "strings" + "sync" "testing" "time" @@ -52,36 +54,6 @@ func TestLoadSecretsEncryptionKey(t *testing.T) { }) } -type testReadyDone struct { - name string - readyFn func(string) <-chan struct{} - doneFn func(string) <-chan struct{} -} - -func (n *testReadyDone) Ready() <-chan struct{} { - return n.readyFn(n.name) -} - -func (n *testReadyDone) Done() <-chan struct{} { - return n.doneFn(n.name) -} - -type testComponent struct { - *testReadyDone - startFn func(irrecoverable.SignalerContext, string) - started chan struct{} -} - -func (n *testComponent) Start(ctx irrecoverable.SignalerContext) { - defer close(n.started) - n.startFn(ctx, n.name) -} - -func (n *testComponent) Ready() <-chan struct{} { - <-n.started - return n.readyFn(n.name) -} - // Test the components are started in the correct order, and are run serially func TestComponentsRunSerially(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) @@ -90,58 +62,29 @@ func TestComponentsRunSerially(t *testing.T) { nb := FlowNode("scaffold test") nb.componentBuilder = component.NewComponentManagerBuilder() - logger := testLog{} - - readyFn := func(name string) <-chan struct{} { - ready := make(chan struct{}) - defer close(ready) - logger.Logf("%s ready", name) - return ready - } - doneFn := func(name string) <-chan struct{} { - done := make(chan struct{}) - defer close(done) - logger.Logf("%s done", name) - return done - } - startFn := func(ctx irrecoverable.SignalerContext, name string) { - // add delay to test components are run serially - time.Sleep(5 * time.Millisecond) - logger.Logf("%s started", name) - } + logger := &testLog{} name1 := "component 1" nb.Component(name1, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name1) - return &testReadyDone{ - name: name1, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name1), nil }) name2 := "component 2" nb.Component(name2, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name2) - return &testComponent{ - testReadyDone: &testReadyDone{ - name: name2, - readyFn: readyFn, - doneFn: doneFn, - }, - startFn: startFn, - started: make(chan struct{}), - }, nil + c := newTestComponent(logger, name2) + c.startFn = func(ctx irrecoverable.SignalerContext, name string) { + // add delay to test components are run serially + time.Sleep(5 * time.Millisecond) + } + return c, nil }) name3 := "component 3" nb.Component(name3, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name3) - return &testReadyDone{ - name: name3, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name3), nil }) err := nb.handleComponents() @@ -220,59 +163,30 @@ func TestOverrideComponent(t *testing.T) { nb := FlowNode("scaffold test") nb.componentBuilder = component.NewComponentManagerBuilder() - logger := testLog{} - - readyFn := func(name string) <-chan struct{} { - ready := make(chan struct{}) - defer close(ready) - logger.Logf("%s ready", name) - return ready - } - doneFn := func(name string) <-chan struct{} { - done := make(chan struct{}) - defer close(done) - logger.Logf("%s done", name) - return done - } + logger := &testLog{} name1 := "component 1" nb.Component(name1, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name1) - return &testReadyDone{ - name: name1, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name1), nil }) name2 := "component 2" nb.Component(name2, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name2) - return &testReadyDone{ - name: name2, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name2), nil }) name3 := "component 3" nb.Component(name3, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s initialized", name3) - return &testReadyDone{ - name: name3, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name3), nil }) // Overrides second component nb.OverrideComponent(name2, func(node *NodeConfig) (module.ReadyDoneAware, error) { logger.Logf("%s overridden", name2) - return &testReadyDone{ - name: name2, - readyFn: readyFn, - doneFn: doneFn, - }, nil + return newTestReadyDone(logger, name2), nil }) err := nb.handleComponents() @@ -281,6 +195,7 @@ func TestOverrideComponent(t *testing.T) { cm := nb.componentBuilder.Build() cm.Start(signalerCtx) + <-cm.Ready() logs := logger.logs @@ -300,3 +215,416 @@ func TestOverrideComponent(t *testing.T) { cancel() <-cm.Done() } + +type testComponentDefinition struct { + name string + factory ReadyDoneFactory + errorHandler component.OnError +} + +func runRestartableTest(t *testing.T, components []testComponentDefinition, expectedErr error, shutdown <-chan struct{}) { + ctx, cancel := context.WithCancel(context.Background()) + signalerCtx, errChan := irrecoverable.WithSignaler(ctx) + + go func() { + select { + case <-ctx.Done(): + return + case err := <-errChan: + if expectedErr == nil { + assert.NoError(t, err, "unexpected unhandled irrecoverable error") + } else { + assert.ErrorIs(t, err, expectedErr) + } + } + }() + + nb := FlowNode("scaffold test") + nb.componentBuilder = component.NewComponentManagerBuilder() + + for _, c := range components { + if c.errorHandler == nil { + nb.Component(c.name, c.factory) + } else { + nb.RestartableComponent(c.name, c.factory, c.errorHandler) + } + } + + err := nb.handleComponents() + assert.NoError(t, err) + + cm := nb.componentBuilder.Build() + + cm.Start(signalerCtx) + + <-shutdown + cancel() + + <-cm.Done() +} + +func TestRestartableRestartsSuccessfully(t *testing.T) { + + logger := &testLog{} + shutdown := make(chan struct{}) + + name := "component 1" + err := fmt.Errorf("%s error", name) + + starts := 0 + factory := func(node *NodeConfig) (module.ReadyDoneAware, error) { + logger.Logf("%s initialized", name) + c := newTestComponent(logger, name) + c.startFn = func(signalCtx irrecoverable.SignalerContext, name string) { + go func() { + <-c.Ready() + starts++ + if starts == 1 { + signalCtx.Throw(err) + } + close(shutdown) + }() + } + return c, nil + } + + runRestartableTest(t, []testComponentDefinition{ + { + name: name, + factory: factory, + errorHandler: testErrorHandler(logger, err), + }, + }, nil, shutdown) + + assert.Equal(t, []string{ + "component 1 initialized", + "component 1 started", + "component 1 ready", + "component 1 done", + "handled error: component 1 error", + "component 1 initialized", + "component 1 started", + "component 1 ready", + "component 1 done", + }, logger.logs) +} + +func TestRestartableStopsSuccessfully(t *testing.T) { + logger := &testLog{} + shutdown := make(chan struct{}) + + name := "component 1" + err := fmt.Errorf("%s error", name) + unexpectedErr := fmt.Errorf("%s unexpected error", name) + + starts := 0 + factory := func(node *NodeConfig) (module.ReadyDoneAware, error) { + logger.Logf("%s initialized", name) + c := newTestComponent(logger, name) + c.startFn = func(signalCtx irrecoverable.SignalerContext, name string) { + go func() { + <-c.Ready() + starts++ + if starts < 2 { + signalCtx.Throw(err) + } + if starts == 2 { + defer close(shutdown) + signalCtx.Throw(unexpectedErr) + } + }() + } + return c, nil + } + + runRestartableTest(t, []testComponentDefinition{ + { + name: name, + factory: factory, + errorHandler: testErrorHandler(logger, err), + }, + }, unexpectedErr, shutdown) + + assert.Equal(t, []string{ + "component 1 initialized", + "component 1 started", + "component 1 ready", + "component 1 done", + "handled error: component 1 error", + "component 1 initialized", + "component 1 started", + "component 1 ready", + "component 1 done", + "handled unexpected error: component 1 unexpected error", + }, logger.logs) +} + +func TestRestartableWithMultipleComponents(t *testing.T) { + logger := &testLog{} + shutdown := make(chan struct{}) + + // synchronization is needed since RestartableComponents are non-blocking + readyComponents := sync.WaitGroup{} + readyComponents.Add(3) + + c1 := func() testComponentDefinition { + name := "component 1" + factory := func(node *NodeConfig) (module.ReadyDoneAware, error) { + logger.Logf("%s initialized", name) + c := newTestReadyDone(logger, name) + c.readyFn = func(name string) { + // delay to demonstrate that components are started serially + time.Sleep(5 * time.Millisecond) + readyComponents.Done() + } + return c, nil + } + + return testComponentDefinition{ + name: name, + factory: factory, + } + } + + c2Initialized := make(chan struct{}) + c2 := func() testComponentDefinition { + name := "component 2" + err := fmt.Errorf("%s error", name) + factory := func(node *NodeConfig) (module.ReadyDoneAware, error) { + defer close(c2Initialized) + logger.Logf("%s initialized", name) + c := newTestComponent(logger, name) + c.startFn = func(ctx irrecoverable.SignalerContext, name string) { + // delay to demonstrate the RestartableComponent startup is non-blocking + time.Sleep(5 * time.Millisecond) + } + c.readyFn = func(name string) { + readyComponents.Done() + } + return c, nil + } + + return testComponentDefinition{ + name: name, + factory: factory, + errorHandler: testErrorHandler(logger, err), + } + } + + c3 := func() testComponentDefinition { + name := "component 3" + err := fmt.Errorf("%s error", name) + starts := 0 + factory := func(node *NodeConfig) (module.ReadyDoneAware, error) { + logger.Logf("%s initialized", name) + c := newTestComponent(logger, name) + c.startFn = func(signalCtx irrecoverable.SignalerContext, name string) { + go func() { + <-c.Ready() + starts++ + if starts == 1 { + signalCtx.Throw(err) + } + <-c2Initialized // can't use ready since it may not be initialized yet + readyComponents.Done() + }() + } + return c, nil + } + + return testComponentDefinition{ + name: name, + factory: factory, + errorHandler: testErrorHandler(logger, err), + } + } + + go func() { + readyComponents.Wait() + close(shutdown) + }() + + runRestartableTest(t, []testComponentDefinition{c1(), c2(), c3()}, nil, shutdown) + + logs := logger.logs + + // make sure component 1 is started and ready before any other components start + assert.Equal(t, []string{ + "component 1 initialized", + "component 1 ready", + }, logs[:2]) + + // now split logs by component, and verify we got the right messages/order + component1 := []string{} + component2 := []string{} + component3 := []string{} + unexpected := []string{} + for _, l := range logs { + switch { + case strings.Contains(l, "component 1"): + component1 = append(component1, l) + case strings.Contains(l, "component 2"): + component2 = append(component2, l) + case strings.Contains(l, "component 3"): + component3 = append(component3, l) + default: + unexpected = append(unexpected, l) + } + } + + // no unexpected logs + assert.Len(t, unexpected, 0) + + assert.Equal(t, []string{ + "component 1 initialized", + "component 1 ready", + "component 1 done", + }, component1) + + assert.Equal(t, []string{ + "component 2 initialized", + "component 2 started", + "component 2 ready", + "component 2 done", + }, component2) + + assert.Equal(t, []string{ + "component 3 initialized", + "component 3 started", + "component 3 ready", + "component 3 done", + "handled error: component 3 error", + "component 3 initialized", + "component 3 started", + "component 3 ready", + "component 3 done", + }, component3) + + // components are stopped via context cancellation, so the specific order is random + doneLogs := logs[len(logs)-3:] + assert.ElementsMatch(t, []string{ + "component 1 done", + "component 2 done", + "component 3 done", + }, doneLogs) +} + +func testErrorHandler(logger *testLog, expected error) component.OnError { + return func(err error) component.ErrorHandlingResult { + if errors.Is(err, expected) { + logger.Logf("handled error: %s", err) + return component.ErrorHandlingRestart + } + logger.Logf("handled unexpected error: %s", err) + return component.ErrorHandlingStop + } +} + +func newTestReadyDone(logger *testLog, name string) *testReadyDone { + return &testReadyDone{ + name: name, + logger: logger, + readyFn: func(string) {}, + doneFn: func(string) {}, + ready: make(chan struct{}), + done: make(chan struct{}), + } +} + +type testReadyDone struct { + name string + logger *testLog + + readyFn func(string) + doneFn func(string) + + ready chan struct{} + done chan struct{} + + started bool + stopped bool + mu sync.Mutex +} + +func (c *testReadyDone) Ready() <-chan struct{} { + c.mu.Lock() + defer c.mu.Unlock() + if !c.started { + c.started = true + go func() { + c.readyFn(c.name) + + c.logger.Logf("%s ready", c.name) + close(c.ready) + }() + } + + return c.ready +} + +func (c *testReadyDone) Done() <-chan struct{} { + c.mu.Lock() + defer c.mu.Unlock() + if !c.stopped { + c.stopped = true + go func() { + c.doneFn(c.name) + + c.logger.Logf("%s done", c.name) + close(c.done) + }() + } + + return c.done +} + +func newTestComponent(logger *testLog, name string) *testComponent { + return &testComponent{ + name: name, + logger: logger, + readyFn: func(string) {}, + doneFn: func(string) {}, + startFn: func(irrecoverable.SignalerContext, string) {}, + ready: make(chan struct{}), + done: make(chan struct{}), + } +} + +type testComponent struct { + name string + logger *testLog + + readyFn func(string) + doneFn func(string) + startFn func(irrecoverable.SignalerContext, string) + + ready chan struct{} + done chan struct{} +} + +func (c *testComponent) Start(ctx irrecoverable.SignalerContext) { + c.startFn(ctx, c.name) + c.logger.Logf("%s started", c.name) + + go func() { + c.readyFn(c.name) + c.logger.Logf("%s ready", c.name) + close(c.ready) + }() + + go func() { + <-ctx.Done() + + c.doneFn(c.name) + c.logger.Logf("%s done", c.name) + close(c.done) + }() +} + +func (c *testComponent) Ready() <-chan struct{} { + return c.ready +} + +func (c *testComponent) Done() <-chan struct{} { + return c.done +} diff --git a/cmd/testUtil.go b/cmd/testUtil.go index abf8d2c32f6..265a5807a09 100644 --- a/cmd/testUtil.go +++ b/cmd/testUtil.go @@ -18,9 +18,13 @@ func (l *testLog) Logf(msg string, args ...interface{}) { func (l *testLog) Log(msg string) { l.mux.Lock() defer l.mux.Unlock() + l.logs = append(l.logs, msg) } func (l *testLog) Reset() { + l.mux.Lock() + defer l.mux.Unlock() + l.logs = []string{} } diff --git a/cmd/util/cmd/execution-state-extract/execution_state_extract.go b/cmd/util/cmd/execution-state-extract/execution_state_extract.go index 67a70866f2c..a1d98d69c57 100644 --- a/cmd/util/cmd/execution-state-extract/execution_state_extract.go +++ b/cmd/util/cmd/execution-state-extract/execution_state_extract.go @@ -100,6 +100,10 @@ func extractExecutionState( RWF: reportFileWriterFactory, }, "newFungibleTokenTracker": reporters.NewFungibleTokenTracker(log, reportFileWriterFactory, chain, []string{reporters.FlowTokenTypeID(chain)}), + "atree": &reporters.AtreeReporter{ + Log: log, + RWF: reportFileWriterFactory, + }, } } diff --git a/cmd/util/cmd/read-execution-state/list-accounts/cmd.go b/cmd/util/cmd/read-execution-state/list-accounts/cmd.go index 18555b3bbe1..cef6e1adf65 100644 --- a/cmd/util/cmd/read-execution-state/list-accounts/cmd.go +++ b/cmd/util/cmd/read-execution-state/list-accounts/cmd.go @@ -89,12 +89,12 @@ func run(*cobra.Command, []string) { }, } - payload, err := forest.Read(read) + values, err := forest.Read(read) if err != nil { return nil, err } - return payload[0].Value, nil + return values[0], nil }) sth := state.NewStateHolder(state.NewState(ldg)) diff --git a/cmd/util/ledger/reporters/atree_decode.go b/cmd/util/ledger/reporters/atree_decode.go new file mode 100644 index 00000000000..96720afd5c5 --- /dev/null +++ b/cmd/util/ledger/reporters/atree_decode.go @@ -0,0 +1,318 @@ +package reporters + +import ( + "bytes" + "errors" + "fmt" + "math" + + "github.com/fxamacker/cbor/v2" +) + +const ( + versionAndFlagSize = 2 + mapExtraDataLength = 3 + storageIDSize = 16 + digestSize = 8 + flagIndex = 1 +) + +const ( + CBORTagInlineCollisionGroup = 253 + CBORTagExternalCollisionGroup = 254 +) + +var ( + encodedInlineCollisionGroupPrefix = []byte{0xd8, CBORTagInlineCollisionGroup} + encodedExternalCollisionGroupPrefix = []byte{0xd8, CBORTagExternalCollisionGroup} +) + +var decMode = func() cbor.DecMode { + decMode, err := cbor.DecOptions{ + IntDec: cbor.IntDecConvertNone, + MaxArrayElements: math.MaxInt64, + MaxMapPairs: math.MaxInt64, + MaxNestedLevels: math.MaxInt16, + }.DecMode() + if err != nil { + panic(err) + } + return decMode +}() + +// These functions are simplified version of decoding +// functions in onflow/atree. Full functionality requires +// Cadence package which isn't needed here. + +// parseSlabMapData returns raw elements bytes. +func parseSlabMapData(data []byte) ([]byte, error) { + // Check minimum data length + if len(data) < versionAndFlagSize { + return nil, errors.New("data is too short for map data slab") + } + + isRootSlab := isRoot(data[flagIndex]) + + // Check flag for extra data + if isRootSlab { + // Decode extra data + var err error + data, err = skipMapExtraData(data, decMode) + if err != nil { + return nil, err + } + } + + if len(data) < versionAndFlagSize { + return nil, errors.New("data is too short for map data slab") + } + + // Check flag + flag := data[flagIndex] + mapType := getSlabMapType(flag) + if mapType != slabMapData && mapType != slabMapCollisionGroup { + return nil, fmt.Errorf( + "data has invalid flag 0x%x, want 0x%x or 0x%x", + flag, + maskMapData, + maskCollisionGroup, + ) + } + + contentOffset := versionAndFlagSize + if !isRootSlab { + // Skip next storage ID for non-root slab + contentOffset += storageIDSize + } + + return data[contentOffset:], nil +} + +// getCollisionGroupCountFromSlabMapData returns collision level, +// number of collision groups (inline and external collision groups), +// and error. +func getCollisionGroupCountFromSlabMapData(data []byte) (collisionLevel uint, collisionGroupCount uint, err error) { + elements, err := parseSlabMapData(data) + if err != nil { + return 0, 0, err + } + + collisionLevel, rawElements, err := parseRawElements(elements, decMode) + if err != nil { + return 0, 0, err + } + + for _, rawElem := range rawElements { + if bytes.HasPrefix(rawElem, encodedInlineCollisionGroupPrefix) || + bytes.HasPrefix(rawElem, encodedExternalCollisionGroupPrefix) { + collisionGroupCount++ + } + } + + return collisionLevel, collisionGroupCount, nil +} + +// getInlineCollisionCountsFromSlabMapData returns collision level, inline collision counts, and error. +func getInlineCollisionCountsFromSlabMapData(data []byte) (collisionLevel uint, inlineCollisionCount []uint, err error) { + elements, err := parseSlabMapData(data) + if err != nil { + return 0, nil, err + } + + collisionLevel, rawElements, err := parseRawElements(elements, decMode) + if err != nil { + return 0, nil, err + } + + for _, rawElem := range rawElements { + if bytes.HasPrefix(rawElem, encodedInlineCollisionGroupPrefix) { + _, collisionElems, err := parseRawElements(rawElem, decMode) + if err != nil { + return 0, nil, err + } + inlineCollisionCount = append(inlineCollisionCount, uint(len(collisionElems))) + } + } + + return collisionLevel, inlineCollisionCount, nil +} + +func skipMapExtraData(data []byte, decMode cbor.DecMode) ([]byte, error) { + // Check data length + if len(data) < versionAndFlagSize { + return data, errors.New("data is too short for map extra data") + } + + // Check flag + flag := data[1] + if !isRoot(flag) { + return data, fmt.Errorf("data has invalid flag 0x%x, want root flag", flag) + } + + // Decode extra data + r := bytes.NewReader(data[versionAndFlagSize:]) + dec := decMode.NewDecoder(r) + + var v []interface{} + err := dec.Decode(&v) + if err != nil { + return data, errors.New("failed to decode map extra data") + } + + if len(v) != mapExtraDataLength { + return data, fmt.Errorf("map extra data has %d number of elements, want %d", len(v), mapExtraDataLength) + } + + // Reslice for remaining data + n := dec.NumBytesRead() + data = data[versionAndFlagSize+n:] + + return data, nil +} + +type elements struct { + _ struct{} `cbor:",toarray"` + Level uint + DigestBytes []byte + RawElements []cbor.RawMessage +} + +func parseRawElements(data []byte, decMode cbor.DecMode) (uint, []cbor.RawMessage, error) { + var elems elements + err := decMode.Unmarshal(data, &elems) + if err != nil { + return 0, nil, err + } + + if len(elems.DigestBytes)%digestSize != 0 { + return 0, nil, fmt.Errorf("number of digest bytes is not multiple of %d", digestSize) + } + + digestCount := len(elems.DigestBytes) / digestSize + + if digestCount != len(elems.RawElements) { + return 0, nil, fmt.Errorf("found %d digests and %d elements", digestCount, len(elems.RawElements)) + } + + return elems.Level, elems.RawElements, nil +} + +// The remaining code is a subset of onflow/atree/flag.go. +// These functions are not exported by onflow/atree because +// they are implementation details. They are copied here +// to parse atree slabs. + +type slabType int + +const ( + slabTypeUndefined slabType = iota + slabArray + slabMap + slabStorable +) + +type slabArrayType int + +const ( + slabArrayUndefined slabArrayType = iota + slabArrayData + slabArrayMeta + slabLargeImmutableArray + slabBasicArray +) + +type slabMapType int + +const ( + slabMapUndefined slabMapType = iota + slabMapData + slabMapMeta + slabMapLargeEntry + slabMapCollisionGroup +) + +const ( + // Slab flags: 3 high bits + maskSlabRoot byte = 0b100_00000 + // maskSlabHasPointers byte = 0b010_00000 + // maskSlabAnySize byte = 0b001_00000 + + // Array flags: 3 low bits (4th and 5th bits are 0) + // maskArrayData byte = 0b000_00000 + // maskArrayMeta byte = 0b000_00001 + // maskLargeImmutableArray byte = 0b000_00010 // not used for now + // maskBasicArray byte = 0b000_00011 // used for benchmarking + + // Map flags: 3 low bits (4th bit is 0, 5th bit is 1) + maskMapData byte = 0b000_01000 + // maskMapMeta byte = 0b000_01001 + // maskLargeMapEntry byte = 0b000_01010 // not used for now + maskCollisionGroup byte = 0b000_01011 + + // Storable flags: 3 low bits (4th bit is 1, 5th bit is 1) + // maskStorable byte = 0b000_11111 +) + +func isRoot(f byte) bool { + return f&maskSlabRoot > 0 +} + +func getSlabType(f byte) slabType { + // Extract 4th and 5th bits for slab type. + dataType := (f & byte(0b000_11000)) >> 3 + switch dataType { + case 0: + // 4th and 5th bits are 0. + return slabArray + case 1: + // 4th bit is 0 and 5th bit is 1. + return slabMap + case 3: + // 4th and 5th bit are 1. + return slabStorable + default: + return slabTypeUndefined + } +} + +func getSlabArrayType(f byte) slabArrayType { + if getSlabType(f) != slabArray { + return slabArrayUndefined + } + + // Extract 3 low bits for slab array type. + dataType := (f & byte(0b000_00111)) + switch dataType { + case 0: + return slabArrayData + case 1: + return slabArrayMeta + case 2: + return slabLargeImmutableArray + case 3: + return slabBasicArray + default: + return slabArrayUndefined + } +} + +func getSlabMapType(f byte) slabMapType { + if getSlabType(f) != slabMap { + return slabMapUndefined + } + + // Extract 3 low bits for slab map type. + dataType := (f & byte(0b000_00111)) + switch dataType { + case 0: + return slabMapData + case 1: + return slabMapMeta + case 2: + return slabMapLargeEntry + case 3: + return slabMapCollisionGroup + default: + return slabMapUndefined + } +} diff --git a/cmd/util/ledger/reporters/atree_decode_test.go b/cmd/util/ledger/reporters/atree_decode_test.go new file mode 100644 index 00000000000..73b69a7e54b --- /dev/null +++ b/cmd/util/ledger/reporters/atree_decode_test.go @@ -0,0 +1,646 @@ +package reporters + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMapDataSlabCollisionCount(t *testing.T) { + + testCases := []struct { + name string + data []byte + expectedLevel0ElementCount uint + expectedLevel0CollisionGroupCount uint + expectedLevel0InlineCollsionCount []uint + }{ + { + name: "empty", + data: []byte{ + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info + 0x18, 0x2a, + // count: 0 + 0x00, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 0 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + expectedLevel0ElementCount: 0, + expectedLevel0CollisionGroupCount: 0, + expectedLevel0InlineCollsionCount: nil, + }, + { + name: "dataslab as root", + data: []byte{ + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info + 0x18, 0x2a, + // count: 1 + 0x01, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 1 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // element: [uint64(0):uint64(0)] + 0x82, 0xd8, 0xa4, 0x00, 0xd8, 0xa4, 0x00, + }, + expectedLevel0ElementCount: 1, + expectedLevel0CollisionGroupCount: 0, + expectedLevel0InlineCollsionCount: nil, + }, + { + name: "has pointer no collision", + data: []byte{ + // version + 0x00, + // flag: has pointer + map data + 0x48, + // next storage id + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 4) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + // hkey: 4 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + // hkey: 5 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + // hkey: 6 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, + // hkey: 7 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + + // elements (array of 4 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + // element: [eeeeeeeeeeeeeeeeeeeeee:eeeeeeeeeeeeeeeeeeeeee] + 0x82, + 0x76, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, + 0x76, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, + // element: [ffffffffffffffffffffff:ffffffffffffffffffffff] + 0x82, + 0x76, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x76, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + // element: [gggggggggggggggggggggg:gggggggggggggggggggggg] + 0x82, + 0x76, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, + 0x76, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, 0x67, + // element: [hhhhhhhhhhhhhhhhhhhhhh:StorageID(1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,4)] + 0x82, + 0x76, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, + 0xd8, 0xff, 0x50, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }, + expectedLevel0ElementCount: 4, + expectedLevel0CollisionGroupCount: 0, + expectedLevel0InlineCollsionCount: nil, + }, + { + name: "inline collision 1 level", + data: []byte{ + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info: "map" + // 0x63, 0x6d, 0x61, 0x70, + 0x18, 0x2A, + // count: 8 + 0x08, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 4) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // hkey: 1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // hkey: 2 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // hkey: 3 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + + // elements (array of 2 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + + // inline collision group corresponding to hkey 0 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 2) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // hkey: 4 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(0), uint64(0)] + 0x82, 0xd8, 0xa4, 0x00, 0xd8, 0xa4, 0x00, + // element: [uint64(4), uint64(8)] + 0x82, 0xd8, 0xa4, 0x04, 0xd8, 0xa4, 0x08, + + // inline collision group corresponding to hkey 1 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 2) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // hkey: 5 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(1), uint64(2)] + 0x82, 0xd8, 0xa4, 0x01, 0xd8, 0xa4, 0x02, + // element: [uint64(5), uint64(10)] + 0x82, 0xd8, 0xa4, 0x05, 0xd8, 0xa4, 0x0a, + + // inline collision group corresponding to hkey 2 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 2) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 2 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // hkey: 6 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(2), uint64(4)] + 0x82, 0xd8, 0xa4, 0x02, 0xd8, 0xa4, 0x04, + // element: [uint64(6), uint64(12)] + 0x82, 0xd8, 0xa4, 0x06, 0xd8, 0xa4, 0x0c, + + // inline collision group corresponding to hkey 3 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 2) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 3 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + // hkey: 7 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(3), uint64(6)] + 0x82, 0xd8, 0xa4, 0x03, 0xd8, 0xa4, 0x06, + // element: [uint64(7), uint64(14)] + 0x82, 0xd8, 0xa4, 0x07, 0xd8, 0xa4, 0x0e, + }, + expectedLevel0ElementCount: 4, + expectedLevel0CollisionGroupCount: 4, + expectedLevel0InlineCollsionCount: []uint{2, 2, 2, 2}, + }, + { + name: "inline collision 2 levels", + data: []byte{ + // extra data + // version + 0x00, + // flag: root + map data + 0x88, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info: "map" + // 0x63, 0x6d, 0x61, 0x70, + 0x18, 0x2A, + // count: 8 + 0x08, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + map data + 0x88, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 4) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // hkey: 1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + // hkey: 2 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // hkey: 3 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + + // elements (array of 4 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + + // inline collision group corresponding to hkey 0 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level 1 + 0x01, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 1 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // inline collision group corresponding to hkey [0, 0] + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 2 + 0x02, + + // hkeys (empty byte string) + 0x40, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(0), uint64(0)] + 0x82, 0xd8, 0xa4, 0x00, 0xd8, 0xa4, 0x00, + // element: [uint64(4), uint64(8)] + 0x82, 0xd8, 0xa4, 0x04, 0xd8, 0xa4, 0x08, + + // inline collision group corresponding to hkey 1 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // elements (array of 1 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // inline collision group corresponding to hkey [1, 1] + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 2 + 0x02, + + // hkeys (empty byte string) + 0x40, + + // elements (array of 2 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(1), uint64(2)] + 0x82, 0xd8, 0xa4, 0x01, 0xd8, 0xa4, 0x02, + // element: [uint64(5), uint64(10)] + 0x82, 0xd8, 0xa4, 0x05, 0xd8, 0xa4, 0x0a, + + // inline collision group corresponding to hkey 2 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // elements (array of 1 element) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // inline collision group corresponding to hkey [2, 0] + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 2 + 0x02, + + // hkeys (empty byte string) + 0x40, + + // elements (array of 2 element) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(2), uint64(4)] + 0x82, 0xd8, 0xa4, 0x02, 0xd8, 0xa4, 0x04, + // element: [uint64(6), uint64(12)] + 0x82, 0xd8, 0xa4, 0x06, 0xd8, 0xa4, 0x0c, + + // inline collision group corresponding to hkey 3 + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 1) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 1 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // elements (array of 1 element) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // inline collision group corresponding to hkey [3, 1] + // (tag number CBORTagInlineCollisionGroup) + 0xd8, 0xfd, + // (tag content: array of 3 elements) + 0x83, + + // level: 2 + 0x02, + + // hkeys (empty byte string) + 0x40, + + // elements (array of 2 element) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // element: [uint64(3), uint64(6)] + 0x82, 0xd8, 0xa4, 0x03, 0xd8, 0xa4, 0x06, + // element: [uint64(7), uint64(14)] + 0x82, 0xd8, 0xa4, 0x07, 0xd8, 0xa4, 0x0e, + }, + expectedLevel0ElementCount: 4, + expectedLevel0CollisionGroupCount: 4, + expectedLevel0InlineCollsionCount: []uint{1, 1, 1, 1}, + }, + { + name: "external collision", + data: []byte{ + // extra data + // version + 0x00, + // flag: root + has pointer + map data + 0xc8, + // extra data (CBOR encoded array of 3 elements) + 0x83, + // type info: "map" + // 0x63, 0x6d, 0x61, 0x70, + 0x18, 0x2A, + // count: 10 + 0x14, + // seed + 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, + + // version + 0x00, + // flag: root + has pointer + map data + 0xc8, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 0 + 0x00, + + // hkeys (byte string of length 8 * 2) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + + // elements (array of 2 elements) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + + // external collision group corresponding to hkey 0 + // (tag number CBORTagExternalCollisionGroup) + 0xd8, 0xfe, + // (tag content: storage id) + 0xd8, 0xff, 0x50, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + + // external collision group corresponding to hkey 1 + // (tag number CBORTagExternalCollisionGroup) + 0xd8, 0xfe, + // (tag content: storage id) + 0xd8, 0xff, 0x50, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + }, + expectedLevel0ElementCount: 2, + expectedLevel0CollisionGroupCount: 2, + expectedLevel0InlineCollsionCount: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + collisionLevel, collisionGroupCount, err := getCollisionGroupCountFromSlabMapData(tc.data) + require.NoError(t, err) + require.Equal(t, uint(0), collisionLevel) + require.Equal(t, tc.expectedLevel0CollisionGroupCount, collisionGroupCount) + + collisionLevel, inlineCollisionCount, err := getInlineCollisionCountsFromSlabMapData(tc.data) + require.NoError(t, err) + require.Equal(t, uint(0), collisionLevel) + require.Equal(t, tc.expectedLevel0InlineCollsionCount, inlineCollisionCount) + }) + } +} + +func TestMapDataSlabExternalCollisionCount(t *testing.T) { + data := []byte{ + // version + 0x00, + // flag: any size + collision group + 0x2b, + // next storage id + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // the following encoded data is valid CBOR + + // elements (array of 3 elements) + 0x83, + + // level: 1 + 0x01, + + // hkeys (byte string of length 8 * 10) + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, + // hkey: 0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // hkey: 2 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + // hkey: 4 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + // hkey: 6 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, + // hkey: 8 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + // hkey: 10 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, + // hkey: 12 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, + // hkey: 14 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, + // hkey: 16 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + // hkey: 18 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, + + // elements (array of 10 elements) + // each element is encoded as CBOR array of 2 elements (key, value) + 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, + // element: [uint64(0), uint64(0)] + 0x82, 0xd8, 0xa4, 0x00, 0xd8, 0xa4, 0x00, + // element: [uint64(2), uint64(4)] + 0x82, 0xd8, 0xa4, 0x02, 0xd8, 0xa4, 0x04, + // element: [uint64(4), uint64(8)] + 0x82, 0xd8, 0xa4, 0x04, 0xd8, 0xa4, 0x08, + // element: [uint64(6), uint64(12)] + 0x82, 0xd8, 0xa4, 0x06, 0xd8, 0xa4, 0x0c, + // element: [uint64(8), uint64(16)] + 0x82, 0xd8, 0xa4, 0x08, 0xd8, 0xa4, 0x10, + // element: [uint64(10), uint64(20)] + 0x82, 0xd8, 0xa4, 0x0a, 0xd8, 0xa4, 0x14, + // element: [uint64(12), uint64(24)] + 0x82, 0xd8, 0xa4, 0x0c, 0xd8, 0xa4, 0x18, 0x18, + // element: [uint64(14), uint64(28)] + 0x82, 0xd8, 0xa4, 0x0e, 0xd8, 0xa4, 0x18, 0x1c, + // element: [uint64(16), uint64(32)] + 0x82, 0xd8, 0xa4, 0x10, 0xd8, 0xa4, 0x18, 0x20, + // element: [uint64(18), uint64(36)] + 0x82, 0xd8, 0xa4, 0x12, 0xd8, 0xa4, 0x18, 0x24, + } + + const expectedElementCount = 10 + + elements, err := parseSlabMapData(data) + require.NoError(t, err) + + level, rawElements, err := parseRawElements(elements, decMode) + require.NoError(t, err) + require.Equal(t, uint(1), level) + require.Equal(t, expectedElementCount, len(rawElements)) +} diff --git a/cmd/util/ledger/reporters/atree_reporter.go b/cmd/util/ledger/reporters/atree_reporter.go new file mode 100644 index 00000000000..a3f3c0f7e65 --- /dev/null +++ b/cmd/util/ledger/reporters/atree_reporter.go @@ -0,0 +1,243 @@ +package reporters + +import ( + "bytes" + "errors" + "fmt" + goRuntime "runtime" + "sync" + + "github.com/onflow/atree" + + fvmState "github.com/onflow/flow-go/fvm/state" + "github.com/onflow/flow-go/ledger" + + "github.com/rs/zerolog" + "github.com/schollz/progressbar/v3" +) + +// AtreeReporter iterates payloads and generates payload and atree stats. +type AtreeReporter struct { + Log zerolog.Logger + RWF ReportWriterFactory +} + +var _ ledger.Reporter = &AtreeReporter{} + +func (r *AtreeReporter) Name() string { + return "Atree Reporter" +} + +func (r *AtreeReporter) Report(payloads []ledger.Payload) error { + rwa := r.RWF.ReportWriter("atree_report") + defer rwa.Close() + + progress := progressbar.Default(int64(len(payloads)), "Processing:") + + workerCount := goRuntime.NumCPU() / 2 + if workerCount == 0 { + workerCount = 1 + } + + jobs := make(chan *ledger.Payload, workerCount) + + results := make(chan payloadStats, workerCount) + defer close(results) + + // create multiple workers to process payloads concurrently + wg := &sync.WaitGroup{} + wg.Add(workerCount) + + for i := 0; i < workerCount; i++ { + go func() { + defer wg.Done() + + var stats payloadStats + for p := range jobs { + err := stats.process(p) + if err != nil { + r.Log.Err(err).Msgf("failed to process payload %s", p.Key.String()) + } + } + results <- stats + }() + } + + // produce jobs for workers to process + for i := 0; i < len(payloads); i++ { + jobs <- &payloads[i] + + err := progress.Add(1) + if err != nil { + panic(fmt.Errorf("progress.Add(1): %w", err)) + } + } + close(jobs) + + // wait until all jobs are done + wg.Wait() + + err := progress.Finish() + if err != nil { + panic(fmt.Errorf("progress.Finish(): %w", err)) + } + + // aggregate all payload stats + var stats payloadStats + for i := 0; i < workerCount; i++ { + r := <-results + stats.add(&r) + } + + rwa.Write(stats) + + return nil +} + +type payloadType uint + +const ( + unknownPayloadType payloadType = iota + fvmPayloadType + storagePayloadType + slabPayloadType +) + +func getPayloadType(p *ledger.Payload) payloadType { + if len(p.Key.KeyParts) < 3 { + return unknownPayloadType + } + if fvmState.IsFVMStateKey( + string(p.Key.KeyParts[0].Value), + string(p.Key.KeyParts[1].Value), + string(p.Key.KeyParts[2].Value), + ) { + return fvmPayloadType + } + if bytes.HasPrefix(p.Key.KeyParts[2].Value, []byte(atree.LedgerBaseStorageSlabPrefix)) { + return slabPayloadType + } + return storagePayloadType +} + +type slabPayloadStats struct { + SlabArrayMetaCount uint + SlabArrayDataCount uint + SlabMapMetaCount uint + SlabMapDataCount uint + SlabMapExternalCollisionCount uint + SlabStorableCount uint + SlabMapCollisionGroupCount uint + + SlabMapCollisionCounts []uint +} + +type payloadStats struct { + FVMPayloadCount uint + StoragePayloadCount uint + SlabPayloadCount uint + slabPayloadStats +} + +func (s *payloadStats) add(s1 *payloadStats) { + s.FVMPayloadCount += s1.FVMPayloadCount + s.StoragePayloadCount += s1.StoragePayloadCount + s.SlabPayloadCount += s1.SlabPayloadCount + s.SlabArrayMetaCount += s1.SlabArrayMetaCount + s.SlabArrayDataCount += s1.SlabArrayDataCount + s.SlabMapMetaCount += s1.SlabMapMetaCount + s.SlabMapDataCount += s1.SlabMapDataCount + s.SlabMapExternalCollisionCount += s1.SlabMapExternalCollisionCount + s.SlabStorableCount += s1.SlabStorableCount + s.SlabMapCollisionGroupCount += s1.SlabMapCollisionGroupCount + + if len(s1.SlabMapCollisionCounts) > 0 { + s.SlabMapCollisionCounts = append(s.SlabMapCollisionCounts, s1.SlabMapCollisionCounts...) + } +} + +func (s *payloadStats) process(p *ledger.Payload) error { + pt := getPayloadType(p) + switch pt { + case unknownPayloadType: + return fmt.Errorf("unknown payload: key %v", p.Key.KeyParts) + case fvmPayloadType: + s.FVMPayloadCount++ + case storagePayloadType: + s.StoragePayloadCount++ + case slabPayloadType: + s.SlabPayloadCount++ + } + + if pt != slabPayloadType { + return nil + } + + if len(p.Value) < versionAndFlagSize { + return errors.New("data is too short") + } + + flag := p.Value[flagIndex] + + switch dataType := getSlabType(flag); dataType { + case slabArray: + switch arrayDataType := getSlabArrayType(flag); arrayDataType { + case slabArrayData: + s.SlabArrayDataCount++ + + case slabArrayMeta: + s.SlabArrayMetaCount++ + + default: + return fmt.Errorf("slab array has invalid flag 0x%x", flag) + } + + case slabMap: + switch mapDataType := getSlabMapType(flag); mapDataType { + case slabMapData: + _, collisionGroupCount, err := getCollisionGroupCountFromSlabMapData(p.Value) + if err != nil { + return err + } + if collisionGroupCount > 0 { + _, inlineCollisionCount, err := getInlineCollisionCountsFromSlabMapData(p.Value) + if err != nil { + return err + } + if len(inlineCollisionCount) > 0 { + s.SlabMapCollisionCounts = append(s.SlabMapCollisionCounts, inlineCollisionCount...) + } + } + s.SlabMapCollisionGroupCount += collisionGroupCount + s.SlabMapDataCount++ + + case slabMapCollisionGroup: + elements, err := parseSlabMapData(p.Value) + if err != nil { + return err + } + _, rawElements, err := parseRawElements(elements, decMode) + if err != nil { + return err + } + if len(rawElements) > 0 { + s.SlabMapCollisionCounts = append(s.SlabMapCollisionCounts, uint(len(rawElements))) + } + s.SlabMapExternalCollisionCount++ + + case slabMapMeta: + s.SlabMapMetaCount++ + + default: + return fmt.Errorf("slab map has invalid flag 0x%x", flag) + } + + case slabStorable: + s.SlabStorableCount++ + + default: + return fmt.Errorf("slab data has invalid flag 0x%x", flag) + } + + return nil +} diff --git a/consensus/hotstuff/signature/packer.go b/consensus/hotstuff/signature/packer.go index 66fe719d134..30953397253 100644 --- a/consensus/hotstuff/signature/packer.go +++ b/consensus/hotstuff/signature/packer.go @@ -1,7 +1,6 @@ package signature import ( - "errors" "fmt" "github.com/onflow/flow-go/consensus/hotstuff" @@ -77,10 +76,10 @@ func (p *ConsensusSigDataPacker) Unpack(signerIdentities flow.IdentityList, sigD stakingSigners, randomBeaconSigners, err := signature.DecodeSigTypeToStakingAndBeaconSigners(signerIdentities, data.SigType) if err != nil { - if errors.Is(err, signature.ErrIllegallyPaddedBitVector) || errors.Is(err, signature.ErrIncompatibleBitVectorLength) { - return nil, model.NewInvalidFormatErrorf("invalid SigType vector: %w", err) + if signature.IsInvalidSigTypesError(err) { + return nil, model.NewInvalidFormatErrorf("invalid signer type data.SigType %v: %w", data.SigType, err) } - return nil, fmt.Errorf("could not decode signer indices and sig type: %w", err) + return nil, fmt.Errorf("unexpected exception unpacking signer data data.SigType %v: %w", data.SigType, err) } return &hotstuff.BlockSignatureData{ diff --git a/consensus/hotstuff/validator/validator.go b/consensus/hotstuff/validator/validator.go index 5b00dc48f05..6eee31e9ff1 100644 --- a/consensus/hotstuff/validator/validator.go +++ b/consensus/hotstuff/validator/validator.go @@ -55,7 +55,7 @@ func (v *Validator) ValidateQC(qc *flow.QuorumCertificate, block *model.Block) e signers, err := signature.DecodeSignerIndicesToIdentities(allParticipants, qc.SignerIndices) if err != nil { - if signature.IsDecodeSignerIndicesError(err) { + if signature.IsInvalidSignerIndicesError(err) { return newInvalidBlockError(block, fmt.Errorf("invalid signer indices: %w", err)) } // unexpected error diff --git a/engine/access/ingestion/engine.go b/engine/access/ingestion/engine.go index ebe6afe6b60..281ab8f773d 100644 --- a/engine/access/ingestion/engine.go +++ b/engine/access/ingestion/engine.go @@ -629,6 +629,7 @@ func (e *Engine) requestCollectionsInFinalizedBlock(missingColls []*flow.Collect // TODO: move this query out of for loop? guarantors, err := protocol.FindGuarantors(e.state, cg) if err != nil { + // failed to find guarantors for guarantees contained in a finalized block is fatal error e.log.Fatal().Err(err).Msgf("could not find guarantors for guarantee %v", cg.ID()) } e.request.EntityByID(cg.ID(), filter.HasNodeID(guarantors...)) diff --git a/engine/access/rpc/backend/backend_test.go b/engine/access/rpc/backend/backend_test.go index 72065903d6e..797aa2f33e4 100644 --- a/engine/access/rpc/backend/backend_test.go +++ b/engine/access/rpc/backend/backend_test.go @@ -658,6 +658,7 @@ func (suite *Suite) TestGetTransactionResultByIndex() { result, err := backend.GetTransactionResultByIndex(ctx, blockId, index) suite.checkResponse(result, err) + suite.Assert().Equal(result.BlockHeight, block.Header.Height) suite.assertAllExpectations() } diff --git a/engine/access/rpc/backend/backend_transactions.go b/engine/access/rpc/backend/backend_transactions.go index d952ebd1f59..6ee3122dbff 100644 --- a/engine/access/rpc/backend/backend_transactions.go +++ b/engine/access/rpc/backend/backend_transactions.go @@ -264,10 +264,12 @@ func (b *backendTransactions) GetTransactionResult( var events []flow.Event var txError string var statusCode uint32 + var blockHeight uint64 // access node may not have the block if it hasn't yet been finalized, hence block can be nil at this point if block != nil { blockID = block.ID() transactionWasExecuted, events, statusCode, txError, err = b.lookupTransactionResult(ctx, txID, blockID) + blockHeight = block.Header.Height if err != nil { return nil, convertStorageError(err) } @@ -288,6 +290,7 @@ func (b *backendTransactions) GetTransactionResult( ErrorMessage: txError, BlockID: blockID, TransactionID: txID, + BlockHeight: blockHeight, }, nil } @@ -353,6 +356,7 @@ func (b *backendTransactions) GetTransactionResultsByBlockID( BlockID: blockID, TransactionID: txID, CollectionID: guarantee.CollectionID, + BlockHeight: block.Header.Height, }) i++ @@ -383,6 +387,7 @@ func (b *backendTransactions) GetTransactionResultsByBlockID( ErrorMessage: systemTxResult.GetErrorMessage(), BlockID: blockID, TransactionID: systemTx.ID(), + BlockHeight: block.Header.Height, }) return results, nil @@ -436,6 +441,7 @@ func (b *backendTransactions) GetTransactionResultByIndex( Events: convert.MessagesToEvents(resp.GetEvents()), ErrorMessage: resp.GetErrorMessage(), BlockID: blockID, + BlockHeight: block.Header.Height, }, nil } diff --git a/engine/collection/compliance/engine.go b/engine/collection/compliance/engine.go index dd8f05d1e06..796790e729f 100644 --- a/engine/collection/compliance/engine.go +++ b/engine/collection/compliance/engine.go @@ -206,8 +206,15 @@ func (e *Engine) Ready() <-chan struct{} { e.unit.Launch(e.finalizationProcessingLoop) ctx, cancel := context.WithCancel(context.Background()) - signalerCtx, _ := irrecoverable.WithSignaler(ctx) + signalerCtx, hotstuffErrChan := irrecoverable.WithSignaler(ctx) e.stopHotstuff = cancel + + // TODO: this workaround for handling fatal HotStuff errors is required only + // because this engine and epochmgr do not use the Component pattern yet + e.unit.Launch(func() { + e.handleHotStuffError(hotstuffErrChan) + }) + e.core.hotstuff.Start(signalerCtx) // wait for request handler to startup <-e.core.hotstuff.Ready() @@ -448,3 +455,21 @@ func (e *Engine) finalizationProcessingLoop() { } } } + +// handleHotStuffError accepts the error channel from the HotStuff component and +// crashes the node if any error is detected. +// TODO: this function should be removed in favour of refactoring this engine and +// the epochmgr engine to use the Component pattern, so that irrecoverable errors +// can be bubbled all the way to the node scaffold +func (e *Engine) handleHotStuffError(hotstuffErrs <-chan error) { + for { + select { + case <-e.unit.Quit(): + return + case err := <-hotstuffErrs: + if err != nil { + e.log.Fatal().Err(err).Msg("encountered fatal error in HotStuff") + } + } + } +} diff --git a/engine/common/follower/engine.go b/engine/common/follower/engine.go index 4f9ebe12a93..8366aaf0f1a 100644 --- a/engine/common/follower/engine.go +++ b/engine/common/follower/engine.go @@ -294,7 +294,7 @@ func (e *Engine) onBlockProposal(originID flow.Identifier, proposal *messages.Bl // at this point, we should be able to connect the proposal to the finalized // state and should process it to see whether to forward to hotstuff or not - err = e.processBlockProposal(ctx, proposal) + err = e.processBlockAndDescendants(ctx, proposal) if err != nil { return fmt.Errorf("could not process block proposal: %w", err) } @@ -308,11 +308,11 @@ func (e *Engine) onBlockProposal(originID flow.Identifier, proposal *messages.Bl return nil } -// processBlockProposal processes blocks that are already known to connect to -// the finalized state; if a parent of children is validly processed, it means -// the children are also still on a valid chain and all missing links are there; -// no need to do all the processing again. -func (e *Engine) processBlockProposal(ctx context.Context, proposal *messages.BlockProposal) error { +// processBlockAndDescendants processes `proposal` and its pending descendants recursively. +// The function assumes that `proposal` is connected to the finalized state. By induction, +// any children are therefore also connected to the finalized state and can be processed as well. +// No errors are expected during normal operations. +func (e *Engine) processBlockAndDescendants(ctx context.Context, proposal *messages.BlockProposal) error { span, ctx := e.tracer.StartSpanFromContext(ctx, trace.FollowerProcessBlockProposal) defer span.Finish() @@ -342,19 +342,20 @@ func (e *Engine) processBlockProposal(ctx context.Context, proposal *messages.Bl // it only checks the block header, since checking block body is expensive. // The full block check is done by the consensus participants. err := e.state.Extend(ctx, block) - // if the error is a known invalid extension of the protocol state, then - // the input is invalid - if state.IsInvalidExtensionError(err) { - return engine.NewInvalidInputErrorf("invalid extension of protocol state: %w", err) - } - - // if the error is a known outdated extension of the protocol state, then - // the input is outdated - if state.IsOutdatedExtensionError(err) { - return engine.NewOutdatedInputErrorf("outdated extension of protocol state: %w", err) - } - if err != nil { + // block is outdated by the time we started processing it + // => some other node generating the proposal is probably behind is catching up. + if state.IsOutdatedExtensionError(err) { + log.Info().Err(err).Msg("dropped processing of abandoned fork; this might be an indicator that some consensus node is behind") + return nil + } + // the block is invalid; log as error as we desire honest participation + // ToDo: potential slashing + if state.IsInvalidExtensionError(err) { + log.Warn().Err(err).Msg("received invalid block from other node (potential slashing evidence?)") + return nil + } + return fmt.Errorf("could not extend protocol state: %w", err) } @@ -401,7 +402,7 @@ func (e *Engine) processPendingChildren(ctx context.Context, header *flow.Header Header: child.Header, Payload: child.Payload, } - err := e.processBlockProposal(ctx, proposal) + err := e.processBlockAndDescendants(ctx, proposal) if err != nil { result = multierror.Append(result, err) } diff --git a/engine/consensus/compliance/engine.go b/engine/consensus/compliance/engine.go index 817f98c047b..ac62edd590f 100644 --- a/engine/consensus/compliance/engine.go +++ b/engine/consensus/compliance/engine.go @@ -179,8 +179,15 @@ func (e *Engine) Ready() <-chan struct{} { e.unit.Launch(e.finalizationProcessingLoop) ctx, cancel := context.WithCancel(context.Background()) - signalerCtx, _ := irrecoverable.WithSignaler(ctx) + signalerCtx, hotstuffErrChan := irrecoverable.WithSignaler(ctx) e.stopHotstuff = cancel + + // TODO: this workaround for handling fatal HotStuff errors is required only + // because this engine and epochmgr do not use the Component pattern yet + e.unit.Launch(func() { + e.handleHotStuffError(hotstuffErrChan) + }) + e.core.hotstuff.Start(signalerCtx) // wait for request handler to startup @@ -427,3 +434,21 @@ func (e *Engine) finalizationProcessingLoop() { } } } + +// handleHotStuffError accepts the error channel from the HotStuff component and +// crashes the node if any error is detected. +// TODO: this function should be removed in favour of refactoring this engine and +// the epochmgr engine to use the Component pattern, so that irrecoverable errors +// can be bubbled all the way to the node scaffold +func (e *Engine) handleHotStuffError(hotstuffErrs <-chan error) { + for { + select { + case <-e.unit.Quit(): + return + case err := <-hotstuffErrs: + if err != nil { + e.log.Fatal().Err(err).Msg("encountered fatal error in HotStuff") + } + } + } +} diff --git a/engine/consensus/ingestion/core.go b/engine/consensus/ingestion/core.go index bfd5d8dba93..970373c6479 100644 --- a/engine/consensus/ingestion/core.go +++ b/engine/consensus/ingestion/core.go @@ -159,7 +159,7 @@ func (e *Core) validateGuarantors(guarantee *flow.CollectionGuarantee) error { } // cluster not found by the chain ID if errors.Is(err, protocol.ErrClusterNotFound) { - return engine.NewInvalidInputErrorf("cluster not found by chain ID %v, %w", guarantee.ChainID, err) + return engine.NewInvalidInputErrorf("cluster not found by chain ID %v: %w", guarantee.ChainID, err) } if err != nil { return fmt.Errorf("internal error retrieving collector clusters for guarantee (ReferenceBlockID: %v, ChainID: %v): %w", @@ -172,8 +172,8 @@ func (e *Core) validateGuarantors(guarantee *flow.CollectionGuarantee) error { // find guarantors by signer indices guarantors, err := signature.DecodeSignerIndicesToIdentities(clusterMembers, guarantee.SignerIndices) if err != nil { - if signature.IsDecodeSignerIndicesError(err) { - return engine.NewInvalidInputErrorf("could not decode guarantor indices: %v", err) + if signature.IsInvalidSignerIndicesError(err) { + return engine.NewInvalidInputErrorf("could not decode guarantor indices: %w", err) } // unexpected error return fmt.Errorf("unexpected internal error decoding signer indices: %w", err) diff --git a/engine/consensus/ingestion/core_test.go b/engine/consensus/ingestion/core_test.go index dd5ba129add..85b5bbbb536 100644 --- a/engine/consensus/ingestion/core_test.go +++ b/engine/consensus/ingestion/core_test.go @@ -252,7 +252,8 @@ func (suite *IngestionCoreSuite) TestOnGuaranteeInvalidGuarantor() { // submit the guarantee as if it was sent by a collection node err := suite.core.OnGuarantee(suite.collID, guarantee) suite.Assert().Error(err, "should error with invalid guarantor") - suite.Assert().True(engine.IsInvalidInputError(err)) + suite.Assert().True(engine.IsInvalidInputError(err), err) + suite.Assert().True(signature.IsInvalidSignerIndicesError(err), err) // check that the guarantee has _not_ been added to the mempool suite.pool.AssertNotCalled(suite.T(), "Add", guarantee) diff --git a/engine/consensus/sealing/engine.go b/engine/consensus/sealing/engine.go index cc7f1263783..98549df855b 100644 --- a/engine/consensus/sealing/engine.go +++ b/engine/consensus/sealing/engine.go @@ -161,12 +161,12 @@ func (e *Engine) setupTrustedInboundQueues() error { var err error e.pendingIncorporatedResults, err = fifoqueue.NewFifoQueue() if err != nil { - return fmt.Errorf("failed to create queue for incorproated results: %w", err) + return fmt.Errorf("failed to create queue for incorporated results: %w", err) } e.pendingIncorporatedBlocks, err = fifoqueue.NewFifoQueue( fifoqueue.WithCapacity(defaultIncorporatedBlockQueueCapacity)) if err != nil { - return fmt.Errorf("failed to create queue for incorproated blocks: %w", err) + return fmt.Errorf("failed to create queue for incorporated blocks: %w", err) } return nil } diff --git a/engine/execution/computation/computer/uploader/uploader_test.go b/engine/execution/computation/computer/uploader/uploader_test.go index 9e626bec676..d634b429eba 100644 --- a/engine/execution/computation/computer/uploader/uploader_test.go +++ b/engine/execution/computation/computer/uploader/uploader_test.go @@ -6,6 +6,7 @@ import ( "fmt" "io/ioutil" "os" + "runtime/debug" "sync" "testing" "time" @@ -19,6 +20,7 @@ import ( "github.com/onflow/flow-go/engine/execution/state/unittest" "github.com/onflow/flow-go/module/metrics" testutils "github.com/onflow/flow-go/utils/unittest" + unittest2 "github.com/onflow/flow-go/utils/unittest" ) func Test_AsyncUploader(t *testing.T) { @@ -26,18 +28,18 @@ func Test_AsyncUploader(t *testing.T) { computationResult := unittest.ComputationResultFixture(nil) t.Run("uploads are run in parallel and emit metrics", func(t *testing.T) { - wgCalled := sync.WaitGroup{} - wgCalled.Add(3) + wgUploadStarted := sync.WaitGroup{} + wgUploadStarted.Add(3) - wgAllDone := sync.WaitGroup{} - wgAllDone.Add(1) + wgContinueUpload := sync.WaitGroup{} + wgContinueUpload.Add(1) uploader := &DummyUploader{ f: func() error { // this should be called 3 times - wgCalled.Done() + wgUploadStarted.Done() - wgAllDone.Wait() + wgContinueUpload.Wait() return nil }, @@ -55,12 +57,13 @@ func Test_AsyncUploader(t *testing.T) { err = async.Upload(computationResult) require.NoError(t, err) - wgCalled.Wait() // all three are in progress, check metrics + wgUploadStarted.Wait() // all three are in progress, check metrics require.Equal(t, int64(3), metrics.Counter.Load()) - wgAllDone.Done() //release all + wgContinueUpload.Done() //release all + // shut down component <-async.Done() require.Equal(t, int64(0), metrics.Counter.Load()) @@ -76,6 +79,7 @@ func Test_AsyncUploader(t *testing.T) { uploader := &DummyUploader{ f: func() error { + // force an upload error to test that upload is retried 3 times if callCount < 3 { callCount++ return fmt.Errorf("artificial upload error") @@ -95,35 +99,78 @@ func Test_AsyncUploader(t *testing.T) { require.Equal(t, 3, callCount) }) - time.Sleep(1 * time.Second) - + // This test shuts down the async uploader right after the upload has started. The upload has an error to force + // the retry mechanism to kick in (under normal circumstances). Since the component is shutting down, the retry + // should not kick in. + // + // sequence of events: + // 1. create async uploader and initiate upload with an error - to force retrying + // 2. shut down async uploader right after upload initiated (not completed) + // 3. assert that upload called only once even when trying to use retry mechanism t.Run("stopping component stops retrying", func(t *testing.T) { testutils.SkipUnless(t, testutils.TEST_FLAKY, "flaky") callCount := 0 + t.Log("test started grID:", string(bytes.Fields(debug.Stack())[1])) - wg := sync.WaitGroup{} - wg.Add(1) + // this wait group ensures that async uploader has a chance to start the upload before component is shut down + // otherwise, there's a race condition that can happen where the component can shut down before the async uploader + // has a chance to start the upload + wgUploadStarted := sync.WaitGroup{} + wgUploadStarted.Add(1) + + // this wait group ensures that async uploader won't send an error (to test if retry will kick in) until + // the component has initiated shutting down (which should stop retry from working) + wgShutdownStarted := sync.WaitGroup{} + wgShutdownStarted.Add(1) + t.Log("added 1 to wait group grID:", string(bytes.Fields(debug.Stack())[1])) uploader := &DummyUploader{ f: func() error { - defer func() { + t.Log("DummyUploader func() - about to call wgUploadStarted.Done() grID:", string(bytes.Fields(debug.Stack())[1])) + // signal to main goroutine that upload started, so it can initiate shutting down component + wgUploadStarted.Done() + + t.Log("DummyUpload func() waiting for component shutdown to start grID:", string(bytes.Fields(debug.Stack())[1])) + wgShutdownStarted.Wait() + t.Log("DummyUploader func() component shutdown started, about to return error grID:", string(bytes.Fields(debug.Stack())[1])) + + // force an upload error to test that upload is never retried (because component is shut down) + // normally, we would see retry mechanism kick in and the callCount would be > 1 + // but since component has started shutting down, we expect callCount to be 1 + // In summary, callCount SHOULD be called only once - but we want the test to TRY and call it more than once to prove that it + // was only called it once. If we changed it to 'callCount < 1' that wouldn't prove that the test tried to call it more than once + // and wouldn't prove that stopping the component stopped the retry mechanism. + if callCount < 5 { + t.Logf("DummyUploader func() incrementing callCount=%d grID: %s", callCount, string(bytes.Fields(debug.Stack())[1])) callCount++ - }() - wg.Wait() - return fmt.Errorf("this should return only once") + t.Logf("DummyUploader func() about to return error callCount=%d grID: %s", callCount, string(bytes.Fields(debug.Stack())[1])) + return fmt.Errorf("this should return only once") + } + return nil }, } - + t.Log("about to create NewAsyncUploader grID:", string(bytes.Fields(debug.Stack())[1])) async := NewAsyncUploader(uploader, 1*time.Nanosecond, 5, zerolog.Nop(), &metrics.NoopCollector{}) - + t.Log("about to call async.Upload() grID:", string(bytes.Fields(debug.Stack())[1])) err := async.Upload(computationResult) // doesn't matter what we upload require.NoError(t, err) + // stop component and check that it's fully stopped + t.Log("about to close async uploader grID:", string(bytes.Fields(debug.Stack())[1])) + + // wait until upload has started before shutting down the component + wgUploadStarted.Wait() + + // stop component and check that it's fully stopped + t.Log("about to initiate shutdown grID: ", string(bytes.Fields(debug.Stack())[1])) c := async.Done() - wg.Done() - <-c + t.Log("about to notify upload() that shutdown started and can continue uploading grID:", string(bytes.Fields(debug.Stack())[1])) + wgShutdownStarted.Done() + t.Log("about to check async done channel is closed grID:", string(bytes.Fields(debug.Stack())[1])) + unittest2.RequireCloseBefore(t, c, 1*time.Second, "async uploader not closed in time") + t.Log("about to check if callCount is 1 grID:", string(bytes.Fields(debug.Stack())[1])) require.Equal(t, 1, callCount) }) diff --git a/engine/execution/computation/manager.go b/engine/execution/computation/manager.go index c24c99d418a..2ad1c147aec 100644 --- a/engine/execution/computation/manager.go +++ b/engine/execution/computation/manager.go @@ -322,7 +322,7 @@ func (e *Manager) ComputeBlock( e.edCache.Insert(block.Block.Header, blobTree) e.log.Info().Hex("block_id", logging.Entity(block.Block)).Hex("execution_data_id", rootID[:]).Msg("execution data ID computed") - // result.ExecutionDataID = rootID + result.ExecutionDataID = rootID return result, nil } diff --git a/engine/execution/computation/manager_test.go b/engine/execution/computation/manager_test.go index bcc0aef693f..99d6b42f8fe 100644 --- a/engine/execution/computation/manager_test.go +++ b/engine/execution/computation/manager_test.go @@ -4,11 +4,15 @@ import ( "bytes" "context" "fmt" + "math" "sync" "testing" "time" "github.com/onflow/cadence" + jsoncdc "github.com/onflow/cadence/encoding/json" + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -205,7 +209,7 @@ func TestExecuteScript(t *testing.T) { vm := fvm.NewVirtualMachine(rt) - ledger := testutil.RootBootstrappedLedger(vm, execCtx) + ledger := testutil.RootBootstrappedLedger(vm, execCtx, fvm.WithExecutionMemoryLimit(math.MaxUint64)) view := delta.NewView(ledger.Get) @@ -495,3 +499,65 @@ func TestExecuteScriptCancelled(t *testing.T) { require.Nil(t, value) require.Contains(t, err.Error(), fvmErrors.ErrCodeScriptExecutionCancelledError.String()) } + +func TestScriptStorageMutationsDiscarded(t *testing.T) { + + timeout := 1 * time.Millisecond + vm := fvm.NewVirtualMachine(fvm.NewInterpreterRuntime()) + chain := flow.Mainnet.Chain() + ctx := fvm.NewContext(zerolog.Nop(), fvm.WithChain(chain)) + manager, _ := New( + zerolog.Nop(), + metrics.NewNoopCollector(), + nil, + nil, + nil, + vm, + ctx, + DefaultProgramsCacheSize, + committer.NewNoopViewCommitter(), + DefaultScriptLogThreshold, + timeout, + nil, + nil, + nil) + view := testutil.RootBootstrappedLedger(vm, ctx) + programs := programs.NewEmptyPrograms() + st := state.NewState(view) + sth := state.NewStateHolder(st) + env := fvm.NewScriptEnvironment(context.Background(), ctx, vm, sth, programs) + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + commonAddress, _ := common.HexToAddress(address.Hex()) + + script := []byte(` + pub fun main(account: Address) { + let acc = getAuthAccount(account) + acc.save(3, to: /storage/x) + } + `) + + header := unittest.BlockHeaderFixture() + scriptView := view.NewChild() + _, err = manager.ExecuteScript(context.Background(), script, [][]byte{jsoncdc.MustEncode(address)}, &header, scriptView) + + require.NoError(t, err) + + v, err := vm.Runtime.ReadStored( + commonAddress, + cadence.NewPath("storage", "x"), + runtime.Context{Interface: env}, + ) + + // the save should not update account storage by writing the delta from the child view back to the parent + require.NoError(t, err) + require.Equal(t, nil, v) +} diff --git a/engine/execution/ingestion/engine.go b/engine/execution/ingestion/engine.go index c7f062f17d0..707c51e4fc8 100644 --- a/engine/execution/ingestion/engine.go +++ b/engine/execution/ingestion/engine.go @@ -998,6 +998,15 @@ func (e *Engine) matchOrRequestCollections( guarantors, err := protocol.FindGuarantors(e.state, guarantee) if err != nil { + // execution node executes certified blocks, which means there is a quorum of consensus nodes who + // have validated the block payload. And that validation includes checking the guarantors are correct. + // Based on that assumption, failing to find guarantors for guarantees contained in an incorporated block + // should be treated as fatal error + e.log.Fatal().Err(err).Msgf("failed to find guarantors for guarantee %v at block %v, height %v", + guarantee.ID(), + executableBlock.ID(), + executableBlock.Height(), + ) return fmt.Errorf("could not find guarantors: %w", err) } // queue the collection to be requested from one of the guarantors diff --git a/engine/execution/state/state.go b/engine/execution/state/state.go index c922ea0014a..44e22f1cf28 100644 --- a/engine/execution/state/state.go +++ b/engine/execution/state/state.go @@ -133,11 +133,10 @@ func NewExecutionState( } -func makeSingleValueQuery(commitment flow.StateCommitment, owner, controller, key string) (*ledger.Query, error) { - return ledger.NewQuery(ledger.State(commitment), - []ledger.Key{ - RegisterIDToKey(flow.NewRegisterID(owner, controller, key)), - }) +func makeSingleValueQuery(commitment flow.StateCommitment, owner, controller, key string) (*ledger.QuerySingleValue, error) { + return ledger.NewQuerySingleValue(ledger.State(commitment), + RegisterIDToKey(flow.NewRegisterID(owner, controller, key)), + ) } func makeQuery(commitment flow.StateCommitment, ids []flow.RegisterID) (*ledger.Query, error) { @@ -187,26 +186,21 @@ func LedgerGetRegister(ldg ledger.Ledger, commitment flow.StateCommitment) delta return nil, fmt.Errorf("cannot create ledger query: %w", err) } - values, err := ldg.Get(query) + value, err := ldg.GetSingleValue(query) if err != nil { return nil, fmt.Errorf("error getting register (%s) value at %x: %w", key, commitment, err) } - // We expect 1 element in the returned slice of values because query is from makeSingleValueQuery() - if len(values) != 1 { - return nil, fmt.Errorf("error getting register (%s) value at %x: number of returned values (%d) != number of queried keys (%d)", key, commitment, len(values), len(query.Keys())) - } - // Prevent caching of value with len zero - if len(values[0]) == 0 { + if len(value) == 0 { return nil, nil } // don't cache value with len zero - readCache[regID] = flow.RegisterEntry{Key: regID, Value: values[0]} + readCache[regID] = flow.RegisterEntry{Key: regID, Value: value} - return values[0], nil + return value, nil } } diff --git a/engine/verification/assigner/blockconsumer/blockjob.go b/engine/verification/assigner/blockconsumer/blockjob.go deleted file mode 100644 index ca024d557f7..00000000000 --- a/engine/verification/assigner/blockconsumer/blockjob.go +++ /dev/null @@ -1,40 +0,0 @@ -package blockconsumer - -import ( - "fmt" - - "github.com/onflow/flow-go/model/flow" - "github.com/onflow/flow-go/module" -) - -// BlockJob implements the Job interface. It converts a Block into a Job to be used by job queue. -// -// In current architecture, BlockJob represents a finalized block enqueued to be processed by the BlockConsumer that implements the -// JobQueue interface. -type BlockJob struct { - Block *flow.Block -} - -// ID converts block id into job id, which guarantees uniqueness. -func (j BlockJob) ID() module.JobID { - return JobID(j.Block.ID()) -} - -// JobID returns the corresponding unique job id of the BlockJob for this job. -func JobID(blockID flow.Identifier) module.JobID { - return module.JobID(fmt.Sprintf("%v", blockID)) -} - -// JobToBlock converts a block job into its corresponding block. -func JobToBlock(job module.Job) (*flow.Block, error) { - blockJob, ok := job.(*BlockJob) - if !ok { - return nil, fmt.Errorf("could not assert job to block, job id: %x", job.ID()) - } - return blockJob.Block, nil -} - -// BlockToJob converts the block to a BlockJob. -func BlockToJob(block *flow.Block) *BlockJob { - return &BlockJob{Block: block} -} diff --git a/engine/verification/assigner/blockconsumer/consumer.go b/engine/verification/assigner/blockconsumer/consumer.go index ac7eb66f44e..e0913a45fa6 100644 --- a/engine/verification/assigner/blockconsumer/consumer.go +++ b/engine/verification/assigner/blockconsumer/consumer.go @@ -57,9 +57,9 @@ func NewBlockConsumer(log zerolog.Logger, blockProcessor.WithBlockConsumerNotifier(worker) // the block reader is where the consumer reads new finalized blocks from (i.e., jobs). - jobs := NewFinalizedBlockReader(state, blocks) + jobs := jobqueue.NewFinalizedBlockReader(state, blocks) - consumer := jobqueue.NewConsumer(lg, jobs, processedHeight, worker, maxProcessing) + consumer := jobqueue.NewConsumer(lg, jobs, processedHeight, worker, maxProcessing, 0) defaultIndex, err := defaultProcessedIndex(state) if err != nil { return nil, 0, fmt.Errorf("could not read default processed index: %w", err) diff --git a/engine/verification/assigner/blockconsumer/consumer_test.go b/engine/verification/assigner/blockconsumer/consumer_test.go index 1ad75c4b169..759fb6c849a 100644 --- a/engine/verification/assigner/blockconsumer/consumer_test.go +++ b/engine/verification/assigner/blockconsumer/consumer_test.go @@ -13,7 +13,9 @@ import ( "github.com/onflow/flow-go/engine/verification/assigner/blockconsumer" vertestutils "github.com/onflow/flow-go/engine/verification/utils/unittest" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/model/flow/filter" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/jobqueue" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/module/trace" bstorage "github.com/onflow/flow-go/storage/badger" @@ -24,7 +26,7 @@ import ( // and its corresponding job can be converted back to the same block. func TestBlockToJob(t *testing.T) { block := unittest.BlockFixture() - actual, err := blockconsumer.JobToBlock(blockconsumer.BlockToJob(&block)) + actual, err := jobqueue.JobToBlock(jobqueue.BlockToJob(&block)) require.NoError(t, err) require.Equal(t, &block, actual) } @@ -145,7 +147,8 @@ func withConsumer( // hold any guarantees. root, err := s.State.Params().Root() require.NoError(t, err) - results := vertestutils.CompleteExecutionReceiptChainFixture(t, root, blockCount/2) + clusterCommittee := participants.Filter(filter.HasRole(flow.RoleCollection)) + results := vertestutils.CompleteExecutionReceiptChainFixture(t, root, blockCount/2, vertestutils.WithClusterCommittee(clusterCommittee)) blocks := vertestutils.ExtendStateWithFinalizedBlocks(t, results, s.State) // makes sure that we generated a block chain of requested length. require.Len(t, blocks, blockCount) diff --git a/engine/verification/assigner/blockconsumer/worker.go b/engine/verification/assigner/blockconsumer/worker.go index 654763b54d2..cdce2f5706f 100644 --- a/engine/verification/assigner/blockconsumer/worker.go +++ b/engine/verification/assigner/blockconsumer/worker.go @@ -4,6 +4,7 @@ import ( "github.com/onflow/flow-go/engine/verification/assigner" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/jobqueue" ) // worker is an internal type of this package. @@ -31,7 +32,7 @@ func (w *worker) withBlockConsumer(consumer *BlockConsumer) { // It then converts the job to a block and passes it to the underlying engine // for processing. func (w *worker) Run(job module.Job) error { - block, err := JobToBlock(job) + block, err := jobqueue.JobToBlock(job) if err != nil { return err } @@ -45,6 +46,6 @@ func (w *worker) Run(job module.Job) error { // The worker translates the block ID into job ID and notifies the consumer // that the job is done. func (w *worker) Notify(blockID flow.Identifier) { - jobID := JobID(blockID) + jobID := jobqueue.JobID(blockID) w.consumer.NotifyJobIsDone(jobID) } diff --git a/engine/verification/fetcher/chunkconsumer/consumer.go b/engine/verification/fetcher/chunkconsumer/consumer.go index 2a38115bc7f..97cccdb4ab2 100644 --- a/engine/verification/fetcher/chunkconsumer/consumer.go +++ b/engine/verification/fetcher/chunkconsumer/consumer.go @@ -40,7 +40,7 @@ func NewChunkConsumer( jobs := &ChunkJobs{locators: chunksQueue} lg := log.With().Str("module", "chunk_consumer").Logger() - consumer := jobqueue.NewConsumer(lg, jobs, processedIndex, worker, maxProcessing) + consumer := jobqueue.NewConsumer(lg, jobs, processedIndex, worker, maxProcessing, 0) chunkConsumer := &ChunkConsumer{ consumer: consumer, diff --git a/engine/verification/utils/unittest/fixture.go b/engine/verification/utils/unittest/fixture.go index 3ccd27bd55e..6656e538bbe 100644 --- a/engine/verification/utils/unittest/fixture.go +++ b/engine/verification/utils/unittest/fixture.go @@ -22,6 +22,8 @@ import ( "github.com/onflow/flow-go/model/convert" "github.com/onflow/flow-go/model/messages" "github.com/onflow/flow-go/module/epochs" + "github.com/onflow/flow-go/module/signature" + "github.com/onflow/flow-go/state/cluster" fvmMock "github.com/onflow/flow-go/fvm/mock" "github.com/onflow/flow-go/model/flow" @@ -127,11 +129,12 @@ func (c CompleteExecutionReceiptList) resultOf(t *testing.T, chunkID flow.Identi // CompleteExecutionReceiptBuilder is a test helper struct that specifies the parameters to build a CompleteExecutionReceipt. type CompleteExecutionReceiptBuilder struct { - resultsCount int // number of execution results in the container block. - executorCount int // number of times each execution result is copied in a block (by different receipts). - chunksCount int // number of chunks in each execution result. - chain flow.Chain - executorIDs flow.IdentifierList // identifier of execution nodes in the test. + resultsCount int // number of execution results in the container block. + executorCount int // number of times each execution result is copied in a block (by different receipts). + chunksCount int // number of chunks in each execution result. + chain flow.Chain + executorIDs flow.IdentifierList // identifier of execution nodes in the test. + clusterCommittee flow.IdentityList } type CompleteExecutionReceiptBuilderOpt func(builder *CompleteExecutionReceiptBuilder) @@ -166,20 +169,15 @@ func WithExecutorIDs(executorIDs flow.IdentifierList) CompleteExecutionReceiptBu } } -// CompleteExecutionReceiptFixture returns complete execution receipt with an -// execution receipt referencing the block collections. -// -// chunks determines the number of chunks inside each receipt. -// The output is an execution result with chunks+1 chunks, where the last chunk accounts -// for the system chunk. -// TODO: remove this function once new verification architecture is in place. -func CompleteExecutionReceiptFixture(t *testing.T, chunks int, chain flow.Chain, root *flow.Header) *CompleteExecutionReceipt { - return CompleteExecutionReceiptChainFixture(t, root, 1, WithChunksCount(chunks), WithChain(chain))[0] +func WithClusterCommittee(clusterCommittee flow.IdentityList) CompleteExecutionReceiptBuilderOpt { + return func(builder *CompleteExecutionReceiptBuilder) { + builder.clusterCommittee = clusterCommittee + } } // ExecutionResultFixture is a test helper that returns an execution result for the reference block header as well as the execution receipt data // for that result. -func ExecutionResultFixture(t *testing.T, chunkCount int, chain flow.Chain, refBlkHeader *flow.Header) (*flow.ExecutionResult, +func ExecutionResultFixture(t *testing.T, chunkCount int, chain flow.Chain, refBlkHeader *flow.Header, clusterCommittee flow.IdentityList) (*flow.ExecutionResult, *ExecutionReceiptData) { // setups up the first collection of block consists of three transactions @@ -196,7 +194,13 @@ func ExecutionResultFixture(t *testing.T, chunkCount int, chain flow.Chain, refB transactions := []*flow.TransactionBody{tx1, tx2, tx3} collection := flow.Collection{Transactions: transactions} collections := []*flow.Collection{&collection} + clusterChainID := cluster.CanonicalClusterID(1, clusterCommittee) + guarantee := unittest.CollectionGuaranteeFixture(unittest.WithCollection(&collection), unittest.WithCollRef(refBlkHeader.ParentID)) + guarantee.ChainID = clusterChainID + indices, err := signature.EncodeSignersToIndices(clusterCommittee.NodeIDs(), clusterCommittee.NodeIDs()) + require.NoError(t, err) + guarantee.SignerIndices = indices guarantees := []*flow.CollectionGuarantee{guarantee} metricsCollector := &metrics.NoopCollector{} @@ -265,6 +269,9 @@ func ExecutionResultFixture(t *testing.T, chunkCount int, chain flow.Chain, refB collection := flow.Collection{Transactions: []*flow.TransactionBody{tx}} guarantee := unittest.CollectionGuaranteeFixture(unittest.WithCollection(&collection), unittest.WithCollRef(refBlkHeader.ParentID)) + guarantee.SignerIndices = indices + guarantee.ChainID = clusterChainID + collections = append(collections, &collection) guarantees = append(guarantees, guarantee) @@ -432,7 +439,7 @@ func ExecutionReceiptsFromParentBlockFixture(t *testing.T, parent *flow.Header, func ExecutionResultFromParentBlockFixture(t *testing.T, parent *flow.Header, builder *CompleteExecutionReceiptBuilder) (*flow.ExecutionResult, *ExecutionReceiptData) { refBlkHeader := unittest.BlockHeaderWithParentFixture(parent) - return ExecutionResultFixture(t, builder.chunksCount, builder.chain, &refBlkHeader) + return ExecutionResultFixture(t, builder.chunksCount, builder.chain, &refBlkHeader, builder.clusterCommittee) } // ContainerBlockFixture builds and returns a block that contains input execution receipts. diff --git a/engine/verification/utils/unittest/helper.go b/engine/verification/utils/unittest/helper.go index 4a69519fd1f..45b2a70b2bd 100644 --- a/engine/verification/utils/unittest/helper.go +++ b/engine/verification/utils/unittest/helper.go @@ -380,7 +380,7 @@ func ExtendStateWithFinalizedBlocks(t *testing.T, completeExecutionReceipts Comp } err := state.Extend(context.Background(), receipt.ReferenceBlock) - require.NoError(t, err) + require.NoError(t, err, fmt.Errorf("can not extend block %v: %w", receipt.ReferenceBlock.ID(), err)) err = state.Finalize(context.Background(), refBlockID) require.NoError(t, err) blocks = append(blocks, receipt.ReferenceBlock) @@ -464,9 +464,6 @@ func withConsumers(t *testing.T, s, verID, participants := bootstrapSystem(t, tracer, authorized) exeID := participants.Filter(filter.HasRole(flow.RoleExecution))[0] conID := participants.Filter(filter.HasRole(flow.RoleConsensus))[0] - ops = append(ops, WithExecutorIDs( - participants.Filter(filter.HasRole(flow.RoleExecution)).NodeIDs())) - // generates a chain of blocks in the form of root <- R1 <- C1 <- R2 <- C2 <- ... where Rs are distinct reference // blocks (i.e., containing guarantees), and Cs are container blocks for their preceding reference block, // Container blocks only contain receipts of their preceding reference blocks. But they do not @@ -474,6 +471,12 @@ func withConsumers(t *testing.T, root, err := s.State.Final().Head() require.NoError(t, err) chainID := root.ChainID + ops = append(ops, WithExecutorIDs( + participants.Filter(filter.HasRole(flow.RoleExecution)).NodeIDs()), func(builder *CompleteExecutionReceiptBuilder) { + // needed for the guarantees to have the correct chainID and signer indices + builder.clusterCommittee = participants.Filter(filter.HasRole(flow.RoleCollection)) + }) + completeERs := CompleteExecutionReceiptChainFixture(t, root, blockCount, ops...) blocks := ExtendStateWithFinalizedBlocks(t, completeERs, s.State) diff --git a/fvm/blueprints/fees.go b/fvm/blueprints/fees.go index 17d35dc5562..3ecabe8b945 100644 --- a/fvm/blueprints/fees.go +++ b/fvm/blueprints/fees.go @@ -11,11 +11,10 @@ import ( "github.com/onflow/flow-go/model/flow" ) -const TransactionFeesExecutionEffortWeightsPathDomain = "storage" +const TransactionExecutionParametersPathDomain = "storage" const TransactionFeesExecutionEffortWeightsPathIdentifier = "executionEffortWeights" - -const TransactionFeesExecutionMemoryWeightsPathDomain = "storage" const TransactionFeesExecutionMemoryWeightsPathIdentifier = "executionMemoryWeights" +const TransactionFeesExecutionMemoryLimitPathIdentifier = "executionMemoryLimit" const deployTxFeesTransactionTemplate = ` transaction { @@ -196,7 +195,7 @@ func SetExecutionEffortWeightsTransaction( return setExecutionWeightsTransaction( service, weights, - TransactionFeesExecutionEffortWeightsPathDomain, + TransactionExecutionParametersPathDomain, TransactionFeesExecutionEffortWeightsPathIdentifier, ) } @@ -209,7 +208,7 @@ func SetExecutionMemoryWeightsTransaction( return setExecutionWeightsTransaction( service, weights, - TransactionFeesExecutionMemoryWeightsPathDomain, + TransactionExecutionParametersPathDomain, TransactionFeesExecutionMemoryWeightsPathIdentifier, ) } @@ -259,3 +258,38 @@ const setExecutionWeightsScript = ` } } ` + +func SetExecutionMemoryLimitTransaction( + service flow.Address, + limit uint64, +) (*flow.TransactionBody, error) { + newLimit, err := jsoncdc.Encode(cadence.UInt64(limit)) + if err != nil { + return nil, err + } + + storagePath, err := jsoncdc.Encode(cadence.Path{ + Domain: TransactionExecutionParametersPathDomain, + Identifier: TransactionFeesExecutionMemoryLimitPathIdentifier, + }) + if err != nil { + return nil, err + } + + tx := flow.NewTransactionBody(). + SetScript([]byte(setExecutionMemoryLimit)). + AddArgument(newLimit). + AddArgument(storagePath). + AddAuthorizer(service) + + return tx, nil +} + +const setExecutionMemoryLimit = ` + transaction(newLimit: UInt64, path: StoragePath) { + prepare(signer: AuthAccount) { + signer.load(from: path) + signer.save(newLimit, to: path) + } + } +` diff --git a/fvm/bootstrap.go b/fvm/bootstrap.go index c98d5521bd8..ac8accdd686 100644 --- a/fvm/bootstrap.go +++ b/fvm/bootstrap.go @@ -44,6 +44,8 @@ type BootstrapProcedure struct { transactionFees BootstrapProcedureFeeParameters executionEffortWeights weightedMeter.ExecutionEffortWeights executionMemoryWeights weightedMeter.ExecutionMemoryWeights + // executionMemoryLimit of 0 means that it won't be set in the state. The FVM will use the default value from the context. + executionMemoryLimit uint64 // config values for epoch smart-contracts epochConfig epochs.EpochConfig @@ -144,6 +146,13 @@ func WithExecutionMemoryWeights(weights weightedMeter.ExecutionMemoryWeights) Bo } } +func WithExecutionMemoryLimit(limit uint64) BootstrapProcedureOption { + return func(bp *BootstrapProcedure) *BootstrapProcedure { + bp.executionMemoryLimit = limit + return bp + } +} + func WithMinimumStorageReservation(reservation cadence.UFix64) BootstrapProcedureOption { return func(bp *BootstrapProcedure) *BootstrapProcedure { bp.minimumStorageReservation = reservation @@ -599,6 +608,9 @@ func (b *BootstrapProcedure) setupExecutionWeights(service flow.Address) { if b.executionMemoryWeights != nil { b.setupExecutionMemoryWeights(service) } + if b.executionMemoryLimit != 0 { + b.setExecutionMemoryLimitTransaction(service) + } } func (b *BootstrapProcedure) setupExecutionEffortWeights(service flow.Address) { @@ -635,7 +647,7 @@ func (b *BootstrapProcedure) setupExecutionMemoryWeights(service flow.Address) { tb, err := blueprints.SetExecutionMemoryWeightsTransaction(service, uintWeights) if err != nil { - panic(fmt.Sprintf("failed to setup execution effort weights %s", err.Error())) + panic(fmt.Sprintf("failed to setup execution memory weights %s", err.Error())) } txError, err := b.vm.invokeMetaTransaction( @@ -646,7 +658,25 @@ func (b *BootstrapProcedure) setupExecutionMemoryWeights(service flow.Address) { b.sth, b.programs, ) - panicOnMetaInvokeErrf("failed to setup execution effort weights: %s", txError, err) + panicOnMetaInvokeErrf("failed to setup execution memory weights: %s", txError, err) +} + +func (b *BootstrapProcedure) setExecutionMemoryLimitTransaction(service flow.Address) { + + tb, err := blueprints.SetExecutionMemoryLimitTransaction(service, b.executionMemoryLimit) + if err != nil { + panic(fmt.Sprintf("failed to setup execution memory limit %s", err.Error())) + } + + txError, err := b.vm.invokeMetaTransaction( + b.ctx, + Transaction( + tb, + 0), + b.sth, + b.programs, + ) + panicOnMetaInvokeErrf("failed to setup execution memory limit: %s", txError, err) } func (b *BootstrapProcedure) setupStorageForServiceAccounts( diff --git a/fvm/executionParameters.go b/fvm/executionParameters.go new file mode 100644 index 00000000000..054745f7d59 --- /dev/null +++ b/fvm/executionParameters.go @@ -0,0 +1,137 @@ +package fvm + +import ( + "github.com/onflow/cadence" + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" + + "github.com/onflow/flow-go/fvm/blueprints" + "github.com/onflow/flow-go/fvm/errors" + "github.com/onflow/flow-go/fvm/meter/weighted" + "github.com/onflow/flow-go/fvm/utils" +) + +// getExecutionWeights reads stored execution effort weights from the service account +func getExecutionEffortWeights( + env Environment, + service runtime.Address, +) ( + computationWeights weighted.ExecutionEffortWeights, + err error, +) { + value, err := env.VM().Runtime.ReadStored( + service, + cadence.Path{ + Domain: blueprints.TransactionExecutionParametersPathDomain, + Identifier: blueprints.TransactionFeesExecutionEffortWeightsPathIdentifier, + }, + runtime.Context{Interface: env}, + ) + if err != nil { + // this might be fatal, return as is + return nil, err + } + + computationWeightsRaw, ok := utils.CadenceValueToWeights(value) + if !ok { + // this is a non-fatal error. It is expected if the weights are not set up on the network yet. + return nil, errors.NewCouldNotGetExecutionParameterFromStateError( + service.Hex(), + blueprints.TransactionExecutionParametersPathDomain, + blueprints.TransactionFeesExecutionEffortWeightsPathIdentifier) + } + + // Merge the default weights with the weights from the state. + // This allows for weights that are not set in the state, to be set by default. + // In case the network is stuck because of a transaction using an FVM feature that has 0 weight + // (or is not metered at all), the defaults can be changed and the network restarted + // instead of trying to change the weights with a transaction. + computationWeights = make(weighted.ExecutionEffortWeights, len(weighted.DefaultComputationWeights)) + for k, v := range weighted.DefaultComputationWeights { + computationWeights[k] = v + } + for k, v := range computationWeightsRaw { + computationWeights[common.ComputationKind(k)] = v + } + + return computationWeights, nil +} + +// getExecutionMemoryWeights reads stored execution memory weights from the service account +func getExecutionMemoryWeights( + env Environment, + service runtime.Address, +) ( + memoryWeights weighted.ExecutionMemoryWeights, + err error, +) { + value, err := env.VM().Runtime.ReadStored( + service, + cadence.Path{ + Domain: blueprints.TransactionExecutionParametersPathDomain, + Identifier: blueprints.TransactionFeesExecutionMemoryWeightsPathIdentifier, + }, + runtime.Context{Interface: env}, + ) + if err != nil { + // this might be fatal, return as is + return nil, err + } + + memoryWeightsRaw, ok := utils.CadenceValueToWeights(value) + if !ok { + // this is a non-fatal error. It is expected if the weights are not set up on the network yet. + return nil, errors.NewCouldNotGetExecutionParameterFromStateError( + service.Hex(), + blueprints.TransactionExecutionParametersPathDomain, + blueprints.TransactionFeesExecutionMemoryWeightsPathIdentifier) + } + + // Merge the default weights with the weights from the state. + // This allows for weights that are not set in the state, to be set by default. + // In case the network is stuck because of a transaction using an FVM feature that has 0 weight + // (or is not metered at all), the defaults can be changed and the network restarted + // instead of trying to change the weights with a transaction. + memoryWeights = make(weighted.ExecutionMemoryWeights, len(weighted.DefaultMemoryWeights)) + for k, v := range weighted.DefaultMemoryWeights { + memoryWeights[k] = v + } + for k, v := range memoryWeightsRaw { + memoryWeights[common.MemoryKind(k)] = v + } + + return memoryWeights, nil +} + +// getExecutionMemoryLimit reads the stored execution memory limit from the service account +func getExecutionMemoryLimit( + env Environment, + service runtime.Address, +) ( + memoryLimit uint64, + err error, +) { + value, err := env.VM().Runtime.ReadStored( + service, + cadence.Path{ + Domain: blueprints.TransactionExecutionParametersPathDomain, + Identifier: blueprints.TransactionFeesExecutionMemoryLimitPathIdentifier, + }, + runtime.Context{Interface: env}, + ) + if err != nil { + // this might be fatal, return as is + return 0, err + } + + memoryLimitRaw, ok := value.(cadence.UInt64) + if value == nil || !ok { + // this is a non-fatal error. It is expected if the weights are not set up on the network yet. + return 0, errors.NewCouldNotGetExecutionParameterFromStateError( + service.Hex(), + blueprints.TransactionExecutionParametersPathDomain, + blueprints.TransactionFeesExecutionMemoryLimitPathIdentifier) + } + + return memoryLimitRaw.ToGoValue().(uint64), nil +} diff --git a/fvm/fvm.go b/fvm/fvm.go index 50529199220..3012a302373 100644 --- a/fvm/fvm.go +++ b/fvm/fvm.go @@ -3,17 +3,13 @@ package fvm import ( "fmt" - "github.com/onflow/cadence" "github.com/onflow/cadence/runtime" - "github.com/onflow/cadence/runtime/common" "github.com/rs/zerolog" - "github.com/onflow/flow-go/fvm/blueprints" errors "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/meter/weighted" "github.com/onflow/flow-go/fvm/programs" "github.com/onflow/flow-go/fvm/state" - "github.com/onflow/flow-go/fvm/utils" "github.com/onflow/flow-go/model/flow" ) @@ -101,133 +97,3 @@ func (vm *VirtualMachine) invokeMetaTransaction(parentCtx Context, tx *Transacti txErr, fatalErr := errors.SplitErrorTypes(err) return txErr, fatalErr } - -// getExecutionWeights reads stored execution effort weights from the service account -func getExecutionEffortWeights( - env Environment, - accounts state.Accounts, -) ( - computationWeights weighted.ExecutionEffortWeights, - err error, -) { - // the weights are stored in the service account - serviceAddress := env.Context().Chain.ServiceAddress() - - service := runtime.Address(serviceAddress) - // Check that the service account exists - ok, err := accounts.Exists(serviceAddress) - - if err != nil { - // this might be fatal, return as is - return nil, err - } - if !ok { - // if the service account does not exist, return an FVM error - return nil, errors.NewCouldNotGetExecutionParameterFromStateError( - service.Hex(), - blueprints.TransactionFeesExecutionEffortWeightsPathDomain, - blueprints.TransactionFeesExecutionEffortWeightsPathIdentifier) - } - - value, err := env.VM().Runtime.ReadStored( - service, - cadence.Path{ - Domain: blueprints.TransactionFeesExecutionEffortWeightsPathDomain, - Identifier: blueprints.TransactionFeesExecutionEffortWeightsPathIdentifier, - }, - runtime.Context{Interface: env}, - ) - if err != nil { - // this might be fatal, return as is - return nil, err - } - - computationWeightsRaw, ok := utils.CadenceValueToWeights(value) - if !ok { - // this is a non-fatal error. It is expected if the weights are not set up on the network yet. - return nil, errors.NewCouldNotGetExecutionParameterFromStateError( - service.Hex(), - blueprints.TransactionFeesExecutionEffortWeightsPathDomain, - blueprints.TransactionFeesExecutionEffortWeightsPathIdentifier) - } - - // Merge the default weights with the weights from the state. - // This allows for weights that are not set in the state, to be set by default. - // In case the network is stuck because of a transaction using an FVM feature that has 0 weight - // (or is not metered at all), the defaults can be changed and the network restarted - // instead of trying to change the weights with a transaction. - computationWeights = make(weighted.ExecutionEffortWeights) - for k, v := range weighted.DefaultComputationWeights { - computationWeights[k] = v - } - for k, v := range computationWeightsRaw { - computationWeights[common.ComputationKind(k)] = v - } - - return computationWeights, nil -} - -// getExecutionMemoryWeights reads stored execution memory weights from the service account -func getExecutionMemoryWeights( - env Environment, - accounts state.Accounts, -) ( - memoryWeights weighted.ExecutionMemoryWeights, - err error, -) { - // the weights are stored in the service account - serviceAddress := env.Context().Chain.ServiceAddress() - - service := runtime.Address(serviceAddress) - // Check that the service account exists - ok, err := accounts.Exists(serviceAddress) - - if err != nil { - // this might be fatal, return as is - return nil, err - } - if !ok { - // if the service account does not exist, return an FVM error - return nil, errors.NewCouldNotGetExecutionParameterFromStateError( - service.Hex(), - blueprints.TransactionFeesExecutionMemoryWeightsPathDomain, - blueprints.TransactionFeesExecutionMemoryWeightsPathIdentifier) - } - - value, err := env.VM().Runtime.ReadStored( - service, - cadence.Path{ - Domain: blueprints.TransactionFeesExecutionMemoryWeightsPathDomain, - Identifier: blueprints.TransactionFeesExecutionMemoryWeightsPathIdentifier, - }, - runtime.Context{Interface: env}, - ) - if err != nil { - // this might be fatal, return as is - return nil, err - } - - memoryWeightsRaw, ok := utils.CadenceValueToWeights(value) - if !ok { - // this is a non-fatal error. It is expected if the weights are not set up on the network yet. - return nil, errors.NewCouldNotGetExecutionParameterFromStateError( - service.Hex(), - blueprints.TransactionFeesExecutionMemoryWeightsPathDomain, - blueprints.TransactionFeesExecutionMemoryWeightsPathIdentifier) - } - - // Merge the default weights with the weights from the state. - // This allows for weights that are not set in the state, to be set by default. - // In case the network is stuck because of a transaction using an FVM feature that has 0 weight - // (or is not metered at all), the defaults can be changed and the network restarted - // instead of trying to change the weights with a transaction. - memoryWeights = make(weighted.ExecutionMemoryWeights) - for k, v := range weighted.DefaultMemoryWeights { - memoryWeights[k] = v - } - for k, v := range memoryWeightsRaw { - memoryWeights[common.MemoryKind(k)] = v - } - - return memoryWeights, nil -} diff --git a/fvm/fvm_test.go b/fvm/fvm_test.go index 960aacec33a..1e650356847 100644 --- a/fvm/fvm_test.go +++ b/fvm/fvm_test.go @@ -15,6 +15,7 @@ import ( jsoncdc "github.com/onflow/cadence/encoding/json" "github.com/onflow/cadence/runtime" "github.com/onflow/cadence/runtime/common" + "github.com/onflow/cadence/runtime/interpreter" "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -241,7 +242,7 @@ func TestPrograms(t *testing.T) { ) t.Run("script execution programs are not committed", - newVMTest().run( + newVMTest().withBootstrapProcedureOptions().run( func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { scriptCtx := fvm.NewContextFromParent(ctx) @@ -524,7 +525,9 @@ func TestBlockContext_DeployContract(t *testing.T) { err = vm.Run(ctx, tx, ledger, programs.NewEmptyPrograms()) require.NoError(t, err) - assert.NoError(t, tx.Err) + var parsingCheckingError *runtime.ParsingCheckingError + assert.ErrorAs(t, tx.Err, &parsingCheckingError) + assert.ErrorContains(t, tx.Err, "program too ambiguous, local replay limit of 64 tokens exceeded") }) t.Run("account update with set code fails if not signed by service account", func(t *testing.T) { @@ -2949,6 +2952,7 @@ func TestBlockContext_ExecuteTransaction_FailingTransactions(t *testing.T) { err := vm.Run(ctx, script, view, programs.NewEmptyPrograms()) require.NoError(t, err) + require.NoError(t, script.Err) return script.Value.ToGoValue().(uint64) } @@ -2956,6 +2960,7 @@ func TestBlockContext_ExecuteTransaction_FailingTransactions(t *testing.T) { fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), fvm.WithAccountCreationFee(fvm.DefaultAccountCreationFee), fvm.WithStorageMBPerFLOW(fvm.DefaultStorageMBPerFLOW), + fvm.WithExecutionMemoryLimit(math.MaxUint64), ).run( func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { ctx.LimitAccountStorage = true // this test requires storage limits to be enforced @@ -2998,6 +3003,7 @@ func TestBlockContext_ExecuteTransaction_FailingTransactions(t *testing.T) { fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), fvm.WithAccountCreationFee(fvm.DefaultAccountCreationFee), fvm.WithStorageMBPerFLOW(fvm.DefaultStorageMBPerFLOW), + fvm.WithExecutionMemoryLimit(math.MaxUint64), ).run( func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { ctx.LimitAccountStorage = true // this test requires storage limits to be enforced @@ -3183,6 +3189,7 @@ func TestTransactionFeeDeduction(t *testing.T) { err := vm.Run(ctx, script, view, programs.NewEmptyPrograms()) require.NoError(t, err) + require.NoError(t, script.Err) return script.Value.ToGoValue().(uint64) } @@ -3548,6 +3555,7 @@ func TestTransactionFeeDeduction(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("Transaction Fees %d: %s", i, tc.name), newVMTest().withBootstrapProcedureOptions( fvm.WithTransactionFee(fvm.DefaultTransactionFees), + fvm.WithExecutionMemoryLimit(math.MaxUint64), ).withContextOptions( fvm.WithTransactionFeesEnabled(true), ).run( @@ -3561,6 +3569,7 @@ func TestTransactionFeeDeduction(t *testing.T) { fvm.WithStorageMBPerFLOW(fvm.DefaultStorageMBPerFLOW), fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), fvm.WithAccountCreationFee(fvm.DefaultAccountCreationFee), + fvm.WithExecutionMemoryLimit(math.MaxUint64), ).withContextOptions( fvm.WithTransactionFeesEnabled(true), fvm.WithAccountStorageLimit(true), @@ -3696,6 +3705,74 @@ func TestSettingExecutionWeights(t *testing.T) { }, )) + memoryWeights = make(map[common.MemoryKind]uint64) + for k, v := range weightedMeter.DefaultMemoryWeights { + memoryWeights[k] = v + } + memoryWeights[common.MemoryKindBreakStatement] = 1_000_000 + t.Run("transaction should fail with low memory limit (set in the state)", newVMTest().withBootstrapProcedureOptions( + fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), + fvm.WithAccountCreationFee(fvm.DefaultAccountCreationFee), + fvm.WithStorageMBPerFLOW(fvm.DefaultStorageMBPerFLOW), + fvm.WithExecutionMemoryLimit( + 100_000_000, + ), + fvm.WithExecutionMemoryWeights( + memoryWeights, + ), + ).run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + require.NoError(t, err) + + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + + // This transaction is specially designed to use a lot of breaks + // as the weight for breaks is much higher than usual. + // putting a `while true {break}` in a loop does not use the same amount of memory. + txBody := flow.NewTransactionBody(). + SetScript([]byte(` + transaction { + prepare(signer: AuthAccount) { + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + while true {break};while true {break};while true {break};while true {break};while true {break}; + } + } + `)) + + err = testutil.SignTransaction(txBody, accounts[0], privateKeys[0], 0) + require.NoError(t, err) + + tx := fvm.Transaction(txBody, 0) + err = vm.Run(ctx, tx, view, programs) + require.NoError(t, err) + // There are 100 breaks and each break uses 1_000_000 memory + require.Greater(t, tx.MemoryUsed, uint64(100_000_000)) + + var memoryLimitExceededError *errors.MemoryLimitExceededError + assert.ErrorAs(t, tx.Err, &memoryLimitExceededError) + }, + )) + t.Run("transaction should fail if create account weight is high", newVMTest().withBootstrapProcedureOptions( fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), fvm.WithAccountCreationFee(fvm.DefaultAccountCreationFee), @@ -4051,3 +4128,246 @@ func TestEnforcingComputationLimit(t *testing.T) { }) } } + +func TestScriptContractMutationsFailure(t *testing.T) { + t.Parallel() + + t.Run("contract additions are not committed", + newVMTest().run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + + scriptCtx := fvm.NewContextFromParent(ctx) + + contract := "pub contract Foo {}" + + script := fvm.Script([]byte(fmt.Sprintf(` + pub fun main(account: Address) { + let acc = getAuthAccount(account) + acc.contracts.add(name: "Foo", code: "%s".decodeHex()) + }`, hex.EncodeToString([]byte(contract))), + )).WithArguments( + jsoncdc.MustEncode(address), + ) + + err = vm.Run(scriptCtx, script, view, programs) + require.NoError(t, err) + require.Error(t, script.Err) + require.IsType(t, &errors.CadenceRuntimeError{}, script.Err) + // modifications to contracts are not supported in scripts + require.IsType(t, &errors.OperationNotSupportedError{}, + script.Err.(*errors.CadenceRuntimeError).Unwrap().(*runtime.Error).Err.(interpreter.Error).Err.(interpreter.PositionedError).Err) + }, + ), + ) + + t.Run("contract removals are not committed", + newVMTest().run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + privateKey := privateKeys[0] + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + + subCtx := fvm.NewContextFromParent(ctx) + + contract := "pub contract Foo {}" + + txBody := flow.NewTransactionBody().SetScript([]byte(fmt.Sprintf(` + transaction { + prepare(signer: AuthAccount, service: AuthAccount) { + signer.contracts.add(name: "Foo", code: "%s".decodeHex()) + } + } + `, hex.EncodeToString([]byte(contract))))). + AddAuthorizer(account). + AddAuthorizer(chain.ServiceAddress()). + SetPayer(chain.ServiceAddress()). + SetProposalKey(chain.ServiceAddress(), 0, 0) + + _ = testutil.SignPayload(txBody, account, privateKey) + _ = testutil.SignEnvelope(txBody, chain.ServiceAddress(), unittest.ServiceAccountPrivateKey) + tx := fvm.Transaction(txBody, 0) + err = vm.Run(subCtx, tx, view, programs) + require.NoError(t, err) + require.NoError(t, tx.Err) + + script := fvm.Script([]byte(` + pub fun main(account: Address) { + let acc = getAuthAccount(account) + let n = acc.contracts.names[0] + acc.contracts.remove(name: n) + }`, + )).WithArguments( + jsoncdc.MustEncode(address), + ) + + err = vm.Run(subCtx, script, view, programs) + require.NoError(t, err) + require.Error(t, script.Err) + require.IsType(t, &errors.CadenceRuntimeError{}, script.Err) + // modifications to contracts are not supported in scripts + require.IsType(t, &errors.OperationNotSupportedError{}, + script.Err.(*errors.CadenceRuntimeError).Unwrap().(*runtime.Error).Err.(interpreter.Error).Err.(interpreter.PositionedError).Err) + }, + ), + ) + + t.Run("contract updates are not committed", + newVMTest().run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + privateKey := privateKeys[0] + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + + subCtx := fvm.NewContextFromParent(ctx) + + contract := "pub contract Foo {}" + + txBody := flow.NewTransactionBody().SetScript([]byte(fmt.Sprintf(` + transaction { + prepare(signer: AuthAccount, service: AuthAccount) { + signer.contracts.add(name: "Foo", code: "%s".decodeHex()) + } + } + `, hex.EncodeToString([]byte(contract))))). + AddAuthorizer(account). + AddAuthorizer(chain.ServiceAddress()). + SetPayer(chain.ServiceAddress()). + SetProposalKey(chain.ServiceAddress(), 0, 0) + + _ = testutil.SignPayload(txBody, account, privateKey) + _ = testutil.SignEnvelope(txBody, chain.ServiceAddress(), unittest.ServiceAccountPrivateKey) + tx := fvm.Transaction(txBody, 0) + err = vm.Run(subCtx, tx, view, programs) + require.NoError(t, err) + require.NoError(t, tx.Err) + + script := fvm.Script([]byte(fmt.Sprintf(` + pub fun main(account: Address) { + let acc = getAuthAccount(account) + let n = acc.contracts.names[0] + acc.contracts.update__experimental(name: n, code: "%s".decodeHex()) + }`, hex.EncodeToString([]byte(contract))))).WithArguments( + jsoncdc.MustEncode(address), + ) + + err = vm.Run(subCtx, script, view, programs) + require.NoError(t, err) + require.Error(t, script.Err) + require.IsType(t, &errors.CadenceRuntimeError{}, script.Err) + // modifications to contracts are not supported in scripts + require.IsType(t, &errors.OperationNotSupportedError{}, + script.Err.(*errors.CadenceRuntimeError).Unwrap().(*runtime.Error).Err.(interpreter.Error).Err.(interpreter.PositionedError).Err) + }, + ), + ) +} + +func TestScriptAccountKeyMutationsFailure(t *testing.T) { + t.Parallel() + + t.Run("Account key additions are not committed", + newVMTest().run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + + scriptCtx := fvm.NewContextFromParent(ctx) + + seed := make([]byte, crypto.KeyGenSeedMinLenECDSAP256) + _, _ = rand.Read(seed) + + privateKey, _ := crypto.GeneratePrivateKey(crypto.ECDSAP256, seed) + + script := fvm.Script([]byte(` + pub fun main(account: Address, k: [UInt8]) { + let acc = getAuthAccount(account) + acc.addPublicKey(k) + }`, + )).WithArguments( + jsoncdc.MustEncode(address), + jsoncdc.MustEncode(testutil.BytesToCadenceArray( + privateKey.PublicKey().Encode(), + )), + ) + + err = vm.Run(scriptCtx, script, view, programs) + require.NoError(t, err) + require.Error(t, script.Err) + require.IsType(t, &errors.CadenceRuntimeError{}, script.Err) + // modifications to public keys are not supported in scripts + require.IsType(t, &errors.OperationNotSupportedError{}, + script.Err.(*errors.CadenceRuntimeError).Unwrap().(*runtime.Error).Err.(interpreter.Error).Err.(interpreter.PositionedError).Err) + }, + ), + ) + + t.Run("Account key removals are not committed", + newVMTest().run( + func(t *testing.T, vm *fvm.VirtualMachine, chain flow.Chain, ctx fvm.Context, view state.View, programs *programs.Programs) { + + // Create an account private key. + privateKeys, err := testutil.GenerateAccountPrivateKeys(1) + require.NoError(t, err) + + // Bootstrap a ledger, creating accounts with the provided private keys and the root account. + accounts, err := testutil.CreateAccounts(vm, view, programs, privateKeys, chain) + require.NoError(t, err) + account := accounts[0] + address := cadence.NewAddress(account) + + scriptCtx := fvm.NewContextFromParent(ctx) + + script := fvm.Script([]byte(` + pub fun main(account: Address) { + let acc = getAuthAccount(account) + acc.removePublicKey(0) + }`, + )).WithArguments( + jsoncdc.MustEncode(address), + ) + + err = vm.Run(scriptCtx, script, view, programs) + require.NoError(t, err) + require.Error(t, script.Err) + require.IsType(t, &errors.CadenceRuntimeError{}, script.Err) + // modifications to public keys are not supported in scripts + require.IsType(t, &errors.OperationNotSupportedError{}, + script.Err.(*errors.CadenceRuntimeError).Unwrap().(*runtime.Error).Err.(interpreter.Error).Err.(interpreter.PositionedError).Err) + }, + ), + ) +} diff --git a/fvm/handler/programs.go b/fvm/handler/programs.go index 93b75fcac72..c1c775025fc 100644 --- a/fvm/handler/programs.go +++ b/fvm/handler/programs.go @@ -6,7 +6,6 @@ import ( "github.com/onflow/cadence/runtime/common" "github.com/onflow/cadence/runtime/interpreter" - "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/programs" "github.com/onflow/flow-go/fvm/state" ) @@ -65,12 +64,12 @@ func (h *ProgramsHandler) Set(location common.Location, program *interpreter.Pro h.Programs.Set(location, program, last.state) - err := h.mergeState(last.state) + err := h.mergeState(last.state, h.masterState.EnforceInteractionLimits()) return err } -func (h *ProgramsHandler) mergeState(state *state.State) error { +func (h *ProgramsHandler) mergeState(state *state.State, enforceLimits bool) error { if len(h.viewsStack) == 0 { // if this was last item, merge to the master state h.masterState.SetActiveState(h.initialState) @@ -78,7 +77,7 @@ func (h *ProgramsHandler) mergeState(state *state.State) error { h.masterState.SetActiveState(h.viewsStack[len(h.viewsStack)-1].state) } - return h.masterState.State().MergeState(state, h.masterState.EnforceInteractionLimits()) + return h.masterState.State().MergeState(state, enforceLimits) } func (h *ProgramsHandler) Get(location common.Location) (*interpreter.Program, bool) { @@ -90,13 +89,11 @@ func (h *ProgramsHandler) Get(location common.Location) (*interpreter.Program, b program, view, has := h.Programs.Get(location) if has { if view != nil { // handle view not set (ie. for non-address locations - err := h.mergeState(view) + // don't enforce limits while merging a cached view + enforceLimits := false + err := h.mergeState(view, enforceLimits) if err != nil { - // ignore LedgerIntractionLimitExceededError errors - var interactionLimiExceededErr *errors.LedgerIntractionLimitExceededError - if !errors.As(err, &interactionLimiExceededErr) { - panic(fmt.Sprintf("merge error while getting program, panic: %s", err)) - } + panic(fmt.Sprintf("merge error while getting program, panic: %s", err)) } } return program, true diff --git a/fvm/meter/weighted/meter.go b/fvm/meter/weighted/meter.go index 7fd0d155232..32828613e8c 100644 --- a/fvm/meter/weighted/meter.go +++ b/fvm/meter/weighted/meter.go @@ -414,3 +414,8 @@ func (m *Meter) TotalMemoryUsed() uint { func (m *Meter) TotalMemoryLimit() uint { return uint(m.memoryLimit) } + +// SetTotalMemoryLimit sets the total memory limit +func (m *Meter) SetTotalMemoryLimit(limit uint64) { + m.memoryLimit = limit +} diff --git a/fvm/scriptEnv.go b/fvm/scriptEnv.go index 733259f9306..105f89437d5 100644 --- a/fvm/scriptEnv.go +++ b/fvm/scriptEnv.go @@ -98,37 +98,75 @@ func NewScriptEnvironment( env.seedRNG(fvmContext.BlockHeader) } - env.setMeteringWeights() + env.setExecutionParameters() return env } -func (e *ScriptEnv) setMeteringWeights() { - var m *weighted.Meter +func (e *ScriptEnv) setExecutionParameters() { + // Check that the service account exists because all the settings are stored in it + serviceAddress := e.Context().Chain.ServiceAddress() + service := runtime.Address(serviceAddress) + + // set the property if no error, but if the error is a fatal error then return it + setIfOk := func(prop string, err error, setter func()) (fatal error) { + err, fatal = errors.SplitErrorTypes(err) + if fatal != nil { + // this is a fatal error. return it + e.ctx.Logger. + Error(). + Err(fatal). + Msgf("error getting %s", prop) + return fatal + } + if err != nil { + // this is a general error. + // could be that no setting was present in the state, + // or that the setting was not parseable, + // or some other deterministic thing. + e.ctx.Logger. + Debug(). + Err(err). + Msgf("could not set %s. Using defaults", prop) + return + } + // everything is ok. do the setting + setter() + return nil + } + var ok bool + var m *weighted.Meter // only set the weights if the meter is a weighted.Meter if m, ok = e.sth.State().Meter().(*weighted.Meter); !ok { return } - computationWeights, err := getExecutionEffortWeights(e, e.accounts) + computationWeights, err := getExecutionEffortWeights(e, service) + err = setIfOk( + "execution effort weights", + err, + func() { m.SetComputationWeights(computationWeights) }) if err != nil { - e.ctx.Logger. - Info(). - Err(err). - Msg("could not set execution effort weights. Using defaults") - } else { - m.SetComputationWeights(computationWeights) + return } - memoryWeights, err := getExecutionMemoryWeights(e, e.accounts) + memoryWeights, err := getExecutionMemoryWeights(e, service) + err = setIfOk( + "execution memory weights", + err, + func() { m.SetMemoryWeights(memoryWeights) }) if err != nil { - e.ctx.Logger. - Info(). - Err(err). - Msg("could not set execution memory weights. Using defaults") - } else { - m.SetMemoryWeights(memoryWeights) + return + } + + memoryLimit, err := getExecutionMemoryLimit(e, service) + err = setIfOk( + "execution memory limit", + err, + func() { m.SetTotalMemoryLimit(memoryLimit) }) + if err != nil { + return } } diff --git a/fvm/transactionEnv.go b/fvm/transactionEnv.go index 963c36225ad..e2515686143 100644 --- a/fvm/transactionEnv.go +++ b/fvm/transactionEnv.go @@ -115,53 +115,76 @@ func NewTransactionEnvironment( env.seedRNG(ctx.BlockHeader) } - err := env.setMeteringWeights() + // set the execution parameters from the state + err := env.setExecutionParameters() return env, err } -func (e *TransactionEnv) setMeteringWeights() error { - var m *weighted.Meter +func (e *TransactionEnv) setExecutionParameters() error { + // Check that the service account exists because all the settings are stored in it + serviceAddress := e.Context().Chain.ServiceAddress() + service := runtime.Address(serviceAddress) + + // set the property if no error, but if the error is a fatal error then return it + setIfOk := func(prop string, err error, setter func()) (fatal error) { + err, fatal = errors.SplitErrorTypes(err) + if fatal != nil { + // this is a fatal error. return it + e.ctx.Logger. + Error(). + Err(fatal). + Msgf("error getting %s", prop) + return fatal + } + if err != nil { + // this is a general error. + // could be that no setting was present in the state, + // or that the setting was not parseable, + // or some other deterministic thing. + e.ctx.Logger. + Debug(). + Err(err). + Msgf("could not set %s. Using defaults", prop) + return nil + } + // everything is ok. do the setting + setter() + return nil + } + var ok bool + var m *weighted.Meter // only set the weights if the meter is a weighted.Meter if m, ok = e.sth.State().Meter().(*weighted.Meter); !ok { return nil } - computationWeights, err := getExecutionEffortWeights(e, e.accounts) - err, fatal := errors.SplitErrorTypes(err) - if fatal != nil { - e.ctx.Logger. - Error(). - Err(fatal). - Msg("error getting execution effort weights") - return fatal - } + computationWeights, err := getExecutionEffortWeights(e, service) + err = setIfOk( + "execution effort weights", + err, + func() { m.SetComputationWeights(computationWeights) }) if err != nil { - e.ctx.Logger. - Info(). - Err(err). - Msg("could not set execution effort weights. Using defaults") - } else { - m.SetComputationWeights(computationWeights) + return err } - memoryWeights, err := getExecutionMemoryWeights(e, e.accounts) - err, fatal = errors.SplitErrorTypes(err) - if fatal != nil { - e.ctx.Logger. - Error(). - Err(fatal). - Msg("error getting execution memory weights") - return fatal + memoryWeights, err := getExecutionMemoryWeights(e, service) + err = setIfOk( + "execution memory weights", + err, + func() { m.SetMemoryWeights(memoryWeights) }) + if err != nil { + return err } + + memoryLimit, err := getExecutionMemoryLimit(e, service) + err = setIfOk( + "execution memory limit", + err, + func() { m.SetTotalMemoryLimit(memoryLimit) }) if err != nil { - e.ctx.Logger. - Info(). - Err(err). - Msg("could not set execution memory weights. Using defaults") - } else { - m.SetMemoryWeights(memoryWeights) + return err } return nil diff --git a/go.mod b/go.mod index fce49e69d78..dac34512746 100644 --- a/go.mod +++ b/go.mod @@ -45,7 +45,7 @@ require ( github.com/libp2p/go-libp2p-discovery v0.6.0 github.com/libp2p/go-libp2p-kad-dht v0.15.0 github.com/libp2p/go-libp2p-kbucket v0.4.7 - github.com/libp2p/go-libp2p-pubsub v0.6.0 + github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c github.com/libp2p/go-libp2p-swarm v0.10.2 github.com/libp2p/go-libp2p-tls v0.4.1 github.com/libp2p/go-tcp-transport v0.5.1 @@ -55,13 +55,13 @@ require ( github.com/multiformats/go-multihash v0.1.0 github.com/onflow/atree v0.3.1-0.20220531231935-525fbc26f40a github.com/onflow/cadence v0.21.3-0.20220601002855-8b113c539a2c - github.com/onflow/flow v0.3.0 + github.com/onflow/flow v0.3.1 github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220513155751-c4c1f8d59f83 github.com/onflow/flow-core-contracts/lib/go/templates v0.11.2-0.20220513155751-c4c1f8d59f83 github.com/onflow/flow-emulator v0.31.2-0.20220513151845-ef7513cb1cd0 github.com/onflow/flow-go-sdk v0.24.1-0.20220513205729-d1f58d47c4e3 github.com/onflow/flow-go/crypto v0.24.3 - github.com/onflow/flow/protobuf/go/flow v0.3.0 + github.com/onflow/flow/protobuf/go/flow v0.3.1 github.com/opentracing/opentracing-go v1.2.0 github.com/pierrec/lz4 v2.6.1+incompatible github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index cd84949d314..0b7dea21cec 100644 --- a/go.sum +++ b/go.sum @@ -391,8 +391,11 @@ github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWp github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= github.com/fxamacker/cbor/v2 v2.2.1-0.20210510192846-c3f3c69e7bc8/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo= github.com/fxamacker/cbor/v2 v2.4.1-0.20220314011055-12f5cb4b5eb0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo= +github.com/fxamacker/cbor/v2 v2.4.1-0.20220314011055-12f5cb4b5eb0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo= +github.com/fxamacker/cbor/v2 v2.4.1-0.20220515183430-ad2eae63303f h1:dxTR4AaxCwuQv9LAVTAC2r1szlS+epeuPT5ClLKT6ZY= github.com/fxamacker/cbor/v2 v2.4.1-0.20220515183430-ad2eae63303f h1:dxTR4AaxCwuQv9LAVTAC2r1szlS+epeuPT5ClLKT6ZY= github.com/fxamacker/cbor/v2 v2.4.1-0.20220515183430-ad2eae63303f/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo= +github.com/fxamacker/cbor/v2 v2.4.1-0.20220515183430-ad2eae63303f/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo= github.com/fxamacker/circlehash v0.3.0 h1:XKdvTtIJV9t7DDUtsf0RIpC1OcxZtPbmgIH7ekx28WA= github.com/fxamacker/circlehash v0.3.0/go.mod h1:3aq3OfVvsWtkWMb6A1owjOQFA+TLsD5FgJflnaQwtMM= github.com/gammazero/deque v0.1.0 h1:f9LnNmq66VDeuAlSAapemq/U7hJ2jpIWa4c09q8Dlik= @@ -1010,8 +1013,9 @@ github.com/libp2p/go-libp2p-peerstore v0.6.0/go.mod h1:DGEmKdXrcYpK9Jha3sS7MhqYd github.com/libp2p/go-libp2p-pnet v0.2.0 h1:J6htxttBipJujEjz1y0a5+eYoiPcFHhSYHH6na5f0/k= github.com/libp2p/go-libp2p-pnet v0.2.0/go.mod h1:Qqvq6JH/oMZGwqs3N1Fqhv8NVhrdYcO0BW4wssv21LA= github.com/libp2p/go-libp2p-pubsub v0.4.1/go.mod h1:izkeMLvz6Ht8yAISXjx60XUQZMq9ZMe5h2ih4dLIBIQ= -github.com/libp2p/go-libp2p-pubsub v0.6.0 h1:98+RXuEWW17U6cAijK1yaTf6mw/B+n5yPA421z+dlo0= github.com/libp2p/go-libp2p-pubsub v0.6.0/go.mod h1:nJv87QM2cU0w45KPR1rZicq+FmFIOD16zmT+ep1nOmg= +github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c h1:vvjRhS6tFFVQw0XzvbX5I6ch1O8JELRHyCTDdLNa/bE= +github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c/go.mod h1:EuyBJFtF8qF67IEA98biwK8Xnw5MNJpJ/Z+8iWCMFwc= github.com/libp2p/go-libp2p-quic-transport v0.10.0/go.mod h1:RfJbZ8IqXIhxBRm5hqUEJqjiiY8xmEuq3HUDS993MkA= github.com/libp2p/go-libp2p-quic-transport v0.11.2/go.mod h1:wlanzKtIh6pHrq+0U3p3DY9PJfGqxMgPaGKaK5LifwQ= github.com/libp2p/go-libp2p-quic-transport v0.13.0/go.mod h1:39/ZWJ1TW/jx1iFkKzzUg00W6tDJh73FC0xYudjr7Hc= @@ -1040,6 +1044,7 @@ github.com/libp2p/go-libp2p-swarm v0.4.0/go.mod h1:XVFcO52VoLoo0eitSxNQWYq4D6syd github.com/libp2p/go-libp2p-swarm v0.5.0/go.mod h1:sU9i6BoHE0Ve5SKz3y9WfKrh8dUat6JknzUehFx8xW4= github.com/libp2p/go-libp2p-swarm v0.5.3/go.mod h1:NBn7eNW2lu568L7Ns9wdFrOhgRlkRnIDg0FLKbuu3i8= github.com/libp2p/go-libp2p-swarm v0.8.0/go.mod h1:sOMp6dPuqco0r0GHTzfVheVBh6UEL0L1lXUZ5ot2Fvc= +github.com/libp2p/go-libp2p-swarm v0.10.0/go.mod h1:71ceMcV6Rg/0rIQ97rsZWMzto1l9LnNquef+efcRbmA= github.com/libp2p/go-libp2p-swarm v0.10.2 h1:UaXf+CTq6Ns1N2V1EgqJ9Q3xaRsiN7ImVlDMpirMAWw= github.com/libp2p/go-libp2p-swarm v0.10.2/go.mod h1:Pdkq0QU5a+qu+oyqIV3bknMsnzk9lnNyKvB9acJ5aZs= github.com/libp2p/go-libp2p-testing v0.0.2/go.mod h1:gvchhf3FQOtBdr+eFUABet5a4MBLK8jM3V4Zghvmi+E= @@ -1053,6 +1058,7 @@ github.com/libp2p/go-libp2p-testing v0.4.0/go.mod h1:Q+PFXYoiYFN5CAEG2w3gLPEzotl github.com/libp2p/go-libp2p-testing v0.4.2/go.mod h1:Q+PFXYoiYFN5CAEG2w3gLPEzotlKsNSbKQ/lImlOWF0= github.com/libp2p/go-libp2p-testing v0.5.0/go.mod h1:QBk8fqIL1XNcno/l3/hhaIEn4aLRijpYOR+zVjjlh+A= github.com/libp2p/go-libp2p-testing v0.7.0/go.mod h1:OLbdn9DbgdMwv00v+tlp1l3oe2Cl+FAjoWIA2pa0X6E= +github.com/libp2p/go-libp2p-testing v0.9.0/go.mod h1:Td7kbdkWqYTJYQGTwzlgXwaqldraIanyjuRiAbK/XQU= github.com/libp2p/go-libp2p-testing v0.9.2 h1:dCpODRtRaDZKF8HXT9qqqgON+OMEB423Knrgeod8j84= github.com/libp2p/go-libp2p-tls v0.1.3/go.mod h1:wZfuewxOndz5RTnCAxFliGjvYSDA40sKitV4c50uI1M= github.com/libp2p/go-libp2p-tls v0.3.0/go.mod h1:fwF5X6PWGxm6IDRwF3V8AVCCj/hOd5oFlg+wo2FxJDY= @@ -1082,6 +1088,7 @@ github.com/libp2p/go-libp2p-yamux v0.5.0/go.mod h1:AyR8k5EzyM2QN9Bbdg6X1SkVVuqLw github.com/libp2p/go-libp2p-yamux v0.5.1/go.mod h1:dowuvDu8CRWmr0iqySMiSxK+W0iL5cMVO9S94Y6gkv4= github.com/libp2p/go-libp2p-yamux v0.5.4/go.mod h1:tfrXbyaTqqSU654GTvK3ocnSZL3BuHoeTSqhcel1wsE= github.com/libp2p/go-libp2p-yamux v0.6.0/go.mod h1:MRhd6mAYnFRnSISp4M8i0ClV/j+mWHo2mYLifWGw33k= +github.com/libp2p/go-libp2p-yamux v0.8.0/go.mod h1:yTkPgN2ib8FHyU1ZcVD7aelzyAqXXwEPbyx+aSKm9h8= github.com/libp2p/go-libp2p-yamux v0.8.1/go.mod h1:rUozF8Jah2dL9LLGyBaBeTQeARdwhefMCTQVQt6QobE= github.com/libp2p/go-libp2p-yamux v0.9.1 h1:oplewiRix8s45SOrI30rCPZG5mM087YZp+VYhXAh4+c= github.com/libp2p/go-libp2p-yamux v0.9.1/go.mod h1:wRc6wvyxQINFcKe7daL4BeQ02Iyp+wxyC8WCNfngBrA= @@ -1166,6 +1173,7 @@ github.com/libp2p/go-yamux v1.4.1/go.mod h1:fr7aVgmdNGJK+N1g+b6DW6VxzbRCjCOejR/h github.com/libp2p/go-yamux/v2 v2.0.0/go.mod h1:NVWira5+sVUIU6tu1JWvaRn1dRnG+cawOJiflsAM+7U= github.com/libp2p/go-yamux/v2 v2.2.0/go.mod h1:3So6P6TV6r75R9jiBpiIKgU/66lOarCZjqROGxzPpPQ= github.com/libp2p/go-yamux/v2 v2.3.0/go.mod h1:iTU+lOIn/2h0AgKcL49clNTwfEw+WSfDYrXe05EyKIs= +github.com/libp2p/go-yamux/v3 v3.0.1/go.mod h1:s2LsDhHbh+RfCsQoICSYt58U2f8ijtPANFD8BmE74Bo= github.com/libp2p/go-yamux/v3 v3.0.2/go.mod h1:s2LsDhHbh+RfCsQoICSYt58U2f8ijtPANFD8BmE74Bo= github.com/libp2p/go-yamux/v3 v3.1.1 h1:X0qSVodCZciOu/f4KTp9V+O0LAqcqP2tdaUGB0+0lng= github.com/libp2p/go-yamux/v3 v3.1.1/go.mod h1:jeLEQgLXqE2YqX1ilAClIfCMDY+0uXQUKmmb/qp0gT4= @@ -1391,8 +1399,8 @@ github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXW github.com/olekukonko/tablewriter v0.0.2-0.20190409134802-7e037d187b0c/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onflow/flow v0.2.4/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= github.com/onflow/flow v0.2.5/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= -github.com/onflow/flow v0.3.0 h1:qDKHFXh5HVZRxw+MpHdENXrqA2gVhmCq0CRW7X3ObLA= -github.com/onflow/flow v0.3.0/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= +github.com/onflow/flow v0.3.1 h1:kL/tNvCXeBw4yCVPys/m9rxvKxrO7Ck/mVNqHFtkTrI= +github.com/onflow/flow v0.3.1/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= github.com/onflow/flow-core-contracts/lib/go/contracts v0.7.3-0.20210527134022-58c25247091a/go.mod h1:IZ2e7UyLCYmpQ8Kd7k0A32uXqdqfiV1r2sKs5/riblo= github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220413172500-d89ca96e6db3/go.mod h1:T6yhM+kWrFxiP6F3hh8lh9DcocHfmv48P4ITnjLhKSk= github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220422202806-92ad02a996cc/go.mod h1:T6yhM+kWrFxiP6F3hh8lh9DcocHfmv48P4ITnjLhKSk= @@ -1430,8 +1438,8 @@ github.com/onflow/flow/protobuf/go/flow v0.2.0/go.mod h1:kRugbzZjwQqvevJhrnnCFMJ github.com/onflow/flow/protobuf/go/flow v0.2.2/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= github.com/onflow/flow/protobuf/go/flow v0.2.4/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= github.com/onflow/flow/protobuf/go/flow v0.2.5/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= -github.com/onflow/flow/protobuf/go/flow v0.3.0 h1:DSnThQkS7Hbl1X99vUaXYdVKYfNWd5ZU5R8kl+SAPFM= -github.com/onflow/flow/protobuf/go/flow v0.3.0/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= +github.com/onflow/flow/protobuf/go/flow v0.3.1 h1:4I8ykG6naR3n8Or6eXrZDaGVaoztb3gP2KJ6XKyDufg= +github.com/onflow/flow/protobuf/go/flow v0.3.1/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= github.com/onflow/fusd/lib/go/contracts v0.0.0-20211021081023-ae9de8fb2c7e/go.mod h1:CRX9eXtc9zHaRVTW1Xh4Cf5pZgKkQuu1NuSEVyHXr/0= github.com/onflow/sdks v0.4.2 h1:UdnXOdcIPIdD02n2SxQVGTJBAxGqJBgOkThxI3/IDnk= github.com/onflow/sdks v0.4.2/go.mod h1:F0dj0EyHC55kknLkeD10js4mo14yTdMotnWMslPirrU= diff --git a/integration/go.mod b/integration/go.mod index 7a2e74ccb1f..bbf9a869267 100644 --- a/integration/go.mod +++ b/integration/go.mod @@ -8,6 +8,11 @@ require ( github.com/docker/docker v1.4.2-0.20190513124817-8c8457b0f2f8 github.com/docker/go-connections v0.4.0 github.com/go-yaml/yaml v2.1.0+incompatible + github.com/ipfs/go-blockservice v0.2.0 + github.com/ipfs/go-cid v0.1.0 + github.com/ipfs/go-datastore v0.5.1 + github.com/ipfs/go-ds-badger2 v0.1.3 + github.com/ipfs/go-ipfs-blockstore v0.2.0 github.com/jedib0t/go-pretty v4.3.0+incompatible github.com/onflow/cadence v0.21.3-0.20220601002855-8b113c539a2c github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220513155751-c4c1f8d59f83 @@ -17,7 +22,7 @@ require ( github.com/onflow/flow-go v0.25.13-0.20220513151142-7858f76e703b // replaced by version on-disk github.com/onflow/flow-go-sdk v0.24.1-0.20220513205729-d1f58d47c4e3 github.com/onflow/flow-go/crypto v0.24.3 - github.com/onflow/flow/protobuf/go/flow v0.3.0 + github.com/onflow/flow/protobuf/go/flow v0.3.1 github.com/plus3it/gorecurcopy v0.0.1 github.com/rs/zerolog v1.26.1 github.com/stretchr/testify v1.7.1 @@ -115,13 +120,8 @@ require ( github.com/ipfs/bbloom v0.0.4 // indirect github.com/ipfs/go-bitswap v0.5.0 // indirect github.com/ipfs/go-block-format v0.0.3 // indirect - github.com/ipfs/go-blockservice v0.2.0 // indirect - github.com/ipfs/go-cid v0.1.0 // indirect github.com/ipfs/go-cidutil v0.0.2 // indirect - github.com/ipfs/go-datastore v0.5.1 // indirect - github.com/ipfs/go-ds-badger2 v0.1.3 // indirect github.com/ipfs/go-fetcher v1.5.0 // indirect - github.com/ipfs/go-ipfs-blockstore v0.2.0 // indirect github.com/ipfs/go-ipfs-delay v0.0.1 // indirect github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect github.com/ipfs/go-ipfs-exchange-interface v0.1.0 // indirect @@ -160,7 +160,7 @@ require ( github.com/libp2p/go-libp2p-noise v0.4.0 // indirect github.com/libp2p/go-libp2p-peerstore v0.6.0 // indirect github.com/libp2p/go-libp2p-pnet v0.2.0 // indirect - github.com/libp2p/go-libp2p-pubsub v0.6.0 // indirect + github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c // indirect github.com/libp2p/go-libp2p-quic-transport v0.17.0 // indirect github.com/libp2p/go-libp2p-record v0.1.3 // indirect github.com/libp2p/go-libp2p-resource-manager v0.2.1 // indirect diff --git a/integration/go.sum b/integration/go.sum index c3b3c208719..b8dfe5bc875 100644 --- a/integration/go.sum +++ b/integration/go.sum @@ -177,7 +177,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.7.0/go.mod h1:0qcSMCyASQPN2sk/1KQLQ2 github.com/aws/smithy-go v1.8.0 h1:AEwwwXQZtUwP5Mz506FeXXrKBe0jA8gVM+1gEcSRooc= github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/benbjohnson/clock v1.0.2/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= -github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -1101,8 +1100,8 @@ github.com/libp2p/go-libp2p-peerstore v0.6.0 h1:HJminhQSGISBIRb93N6WK3t6Fa8OOTnH github.com/libp2p/go-libp2p-peerstore v0.6.0/go.mod h1:DGEmKdXrcYpK9Jha3sS7MhqYdInxJy84bIPtSu65bKc= github.com/libp2p/go-libp2p-pnet v0.2.0 h1:J6htxttBipJujEjz1y0a5+eYoiPcFHhSYHH6na5f0/k= github.com/libp2p/go-libp2p-pnet v0.2.0/go.mod h1:Qqvq6JH/oMZGwqs3N1Fqhv8NVhrdYcO0BW4wssv21LA= -github.com/libp2p/go-libp2p-pubsub v0.6.0 h1:98+RXuEWW17U6cAijK1yaTf6mw/B+n5yPA421z+dlo0= -github.com/libp2p/go-libp2p-pubsub v0.6.0/go.mod h1:nJv87QM2cU0w45KPR1rZicq+FmFIOD16zmT+ep1nOmg= +github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c h1:vvjRhS6tFFVQw0XzvbX5I6ch1O8JELRHyCTDdLNa/bE= +github.com/libp2p/go-libp2p-pubsub v0.6.2-0.20220530164708-06b5ba47636c/go.mod h1:EuyBJFtF8qF67IEA98biwK8Xnw5MNJpJ/Z+8iWCMFwc= github.com/libp2p/go-libp2p-quic-transport v0.10.0/go.mod h1:RfJbZ8IqXIhxBRm5hqUEJqjiiY8xmEuq3HUDS993MkA= github.com/libp2p/go-libp2p-quic-transport v0.11.2/go.mod h1:wlanzKtIh6pHrq+0U3p3DY9PJfGqxMgPaGKaK5LifwQ= github.com/libp2p/go-libp2p-quic-transport v0.13.0/go.mod h1:39/ZWJ1TW/jx1iFkKzzUg00W6tDJh73FC0xYudjr7Hc= @@ -1483,7 +1482,7 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.2-0.20190409134802-7e037d187b0c/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onflow/flow v0.3.0/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= +github.com/onflow/flow v0.3.1/go.mod h1:lzyAYmbu1HfkZ9cfnL5/sjrrsnJiUU8fRL26CqLP7+c= github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220513155751-c4c1f8d59f83 h1:mpJirFu/JWMLV0IhKDZleVrVdN5B8QERV4gSXDef5bA= github.com/onflow/flow-core-contracts/lib/go/contracts v0.11.2-0.20220513155751-c4c1f8d59f83/go.mod h1:T6yhM+kWrFxiP6F3hh8lh9DcocHfmv48P4ITnjLhKSk= github.com/onflow/flow-core-contracts/lib/go/templates v0.11.2-0.20220513155751-c4c1f8d59f83 h1:w4uXFTvjQmLtA/X50H4YXVlzbdsoL3vDI3Y86jtJOMM= @@ -1507,8 +1506,8 @@ github.com/onflow/flow/protobuf/go/flow v0.1.8/go.mod h1:kRugbzZjwQqvevJhrnnCFMJ github.com/onflow/flow/protobuf/go/flow v0.1.9/go.mod h1:kRugbzZjwQqvevJhrnnCFMJZNmoSJmxlKt6hTGXZojM= github.com/onflow/flow/protobuf/go/flow v0.2.2/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= github.com/onflow/flow/protobuf/go/flow v0.2.5/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= -github.com/onflow/flow/protobuf/go/flow v0.3.0 h1:DSnThQkS7Hbl1X99vUaXYdVKYfNWd5ZU5R8kl+SAPFM= -github.com/onflow/flow/protobuf/go/flow v0.3.0/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= +github.com/onflow/flow/protobuf/go/flow v0.3.1 h1:4I8ykG6naR3n8Or6eXrZDaGVaoztb3gP2KJ6XKyDufg= +github.com/onflow/flow/protobuf/go/flow v0.3.1/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= github.com/onflow/fusd/lib/go/contracts v0.0.0-20211021081023-ae9de8fb2c7e/go.mod h1:CRX9eXtc9zHaRVTW1Xh4Cf5pZgKkQuu1NuSEVyHXr/0= github.com/onflow/sdks v0.4.2 h1:UdnXOdcIPIdD02n2SxQVGTJBAxGqJBgOkThxI3/IDnk= github.com/onflow/sdks v0.4.2/go.mod h1:F0dj0EyHC55kknLkeD10js4mo14yTdMotnWMslPirrU= diff --git a/integration/localnet/Makefile b/integration/localnet/Makefile index 815cd40d24c..2b745dbd68e 100644 --- a/integration/localnet/Makefile +++ b/integration/localnet/Makefile @@ -38,7 +38,7 @@ init: # Creates a light version of the localnet with just 1 instance for each node type .PHONY: init-light init-light: - $(MAKE) -e COLLECTION=1 CONSENSUS=1 EXECUTION=1 VERIFICATION=1 ACCESS=1 NCLUSTERS=1 init + $(MAKE) -e COLLECTION=1 CONSENSUS=2 EXECUTION=1 VERIFICATION=1 ACCESS=1 NCLUSTERS=1 init # Creates a version of localnet configured with short epochs .PHONY: init-short-epochs diff --git a/integration/testnet/container.go b/integration/testnet/container.go index 2f6dcc0d5a4..0f0d18a7741 100644 --- a/integration/testnet/container.go +++ b/integration/testnet/container.go @@ -212,10 +212,25 @@ func (c *Container) DB() (*badger.DB, error) { return db, err } +// DB returns the node's execution data database. +func (c *Container) ExecutionDataDB() (*badger.DB, error) { + opts := badger. + DefaultOptions(c.ExecutionDataDBPath()). + WithKeepL0InMemory(true). + WithLogger(nil) + + db, err := badger.Open(opts) + return db, err +} + func (c *Container) DBPath() string { return filepath.Join(c.datadir, DefaultFlowDBDir) } +func (c *Container) ExecutionDataDBPath() string { + return filepath.Join(c.datadir, DefaultExecutionDataServiceDir) +} + func (c *Container) BootstrapPath() string { return filepath.Join(c.datadir, DefaultBootstrapDir) } diff --git a/integration/tests/access/execution_state_sync_test.go b/integration/tests/access/execution_state_sync_test.go new file mode 100644 index 00000000000..6b1425539e4 --- /dev/null +++ b/integration/tests/access/execution_state_sync_test.go @@ -0,0 +1,208 @@ +package access + +import ( + "context" + "fmt" + "os" + "testing" + + badgerds "github.com/ipfs/go-ds-badger2" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/onflow/flow-go/engine/ghost/client" + "github.com/onflow/flow-go/integration/testnet" + "github.com/onflow/flow-go/integration/tests/lib" + "github.com/onflow/flow-go/integration/utils" + "github.com/onflow/flow-go/model/encoding/cbor" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/module/state_synchronization" + "github.com/onflow/flow-go/network/compressor" + storage "github.com/onflow/flow-go/storage/badger" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestExecutionStateSync(t *testing.T) { + suite.Run(t, new(ExecutionStateSyncSuite)) +} + +type ExecutionStateSyncSuite struct { + suite.Suite + lib.TestnetStateTracker + + log zerolog.Logger + + bridgeID flow.Identifier + ghostID flow.Identifier + + // root context for the current test + ctx context.Context + cancel context.CancelFunc + + net *testnet.FlowNetwork +} + +func (s *ExecutionStateSyncSuite) SetupTest() { + logger := unittest.LoggerWithLevel(zerolog.InfoLevel).With(). + Str("testfile", "execution_state_sync_test.go"). + Str("testcase", s.T().Name()). + Logger() + + s.log = logger + s.log.Info().Msg("================> SetupTest") + s.ctx, s.cancel = context.WithCancel(context.Background()) + + s.buildNetworkConfig() + + // start the network + s.net.Start(s.ctx) + + s.Track(s.T(), s.ctx, s.Ghost()) +} + +func (s *ExecutionStateSyncSuite) TearDownTest() { + s.log.Info().Msg("================> Start TearDownTest") + s.net.Remove() + s.cancel() + s.log.Info().Msgf("================> Finish TearDownTest") +} + +func (s *ExecutionStateSyncSuite) Ghost() *client.GhostClient { + ghost := s.net.ContainerByID(s.ghostID) + client, err := lib.GetGhostClient(ghost) + require.NoError(s.T(), err, "could not get ghost client") + return client +} + +func (s *ExecutionStateSyncSuite) buildNetworkConfig() { + // access node + s.bridgeID = unittest.IdentifierFixture() + bridgeANConfig := testnet.NewNodeConfig( + flow.RoleAccess, + testnet.WithID(s.bridgeID), + testnet.SupportsUnstakedNodes(), + testnet.WithLogLevel(zerolog.DebugLevel), + testnet.WithAdditionalFlag("--execution-data-sync-enabled=true"), + testnet.WithAdditionalFlag(fmt.Sprintf("--execution-data-dir=%s", testnet.DefaultExecutionDataServiceDir)), + testnet.WithAdditionalFlag("--execution-data-retry-delay=1s"), + ) + + // add the ghost (access) node config + s.ghostID = unittest.IdentifierFixture() + ghostNode := testnet.NewNodeConfig( + flow.RoleAccess, + testnet.WithID(s.ghostID), + testnet.WithLogLevel(zerolog.FatalLevel), + testnet.AsGhost()) + + consensusConfigs := []func(config *testnet.NodeConfig){ + testnet.WithAdditionalFlag("--hotstuff-timeout=12s"), + testnet.WithAdditionalFlag("--block-rate-delay=100ms"), + testnet.WithAdditionalFlag(fmt.Sprintf("--required-verification-seal-approvals=%d", 1)), + testnet.WithAdditionalFlag(fmt.Sprintf("--required-construction-seal-approvals=%d", 1)), + testnet.WithLogLevel(zerolog.FatalLevel), + } + + net := []testnet.NodeConfig{ + testnet.NewNodeConfig(flow.RoleCollection, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleCollection, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleExecution, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleExecution, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleVerification, testnet.WithLogLevel(zerolog.FatalLevel)), + bridgeANConfig, + ghostNode, + // TODO: add observer + } + + conf := testnet.NewNetworkConfig("execution state sync test", net) + s.net = testnet.PrepareFlowNetwork(s.T(), conf) +} + +// TestHappyPath tests that Execution Nodes generate execution data, and Access Nodes are able to +// successfully sync the data +func (s *ExecutionStateSyncSuite) TestHappyPath() { + // Let the network run for this many blocks + runBlocks := uint64(20) + + // We will check that execution data was downloaded for this many blocks + // It has to be less than runBlocks since it's not possible to see which height the AN stopped + // downloading execution data for + checkBlocks := runBlocks / 2 + + // get the first block height + blockA := s.BlockState.WaitForHighestFinalizedProgress(s.T()) + s.T().Logf("got block height %v ID %v", blockA.Header.Height, blockA.Header.ID()) + + // wait for the requested number of sealed blocks, then pause the network so we can inspect the dbs + s.BlockState.WaitForSealed(s.T(), blockA.Header.Height+runBlocks) + s.net.StopContainers() + + // start an execution data service using the Access Node's execution data db + an := s.net.ContainerByID(s.bridgeID) + eds, ctx := s.nodeExecutionDataService(an) + + // setup storage objects needed to get the execution data id + db, err := an.DB() + require.NoError(s.T(), err, "could not open db") + + metrics := metrics.NewNoopCollector() + headers := storage.NewHeaders(metrics, db) + results := storage.NewExecutionResults(metrics, db) + + // Loop through checkBlocks and verify the execution data was downloaded correctly + for i := blockA.Header.Height; i <= blockA.Header.Height+checkBlocks; i++ { + header, err := headers.ByHeight(i) + require.NoError(s.T(), err, "could not get header") + + result, err := results.ByBlockID(header.ID()) + require.NoError(s.T(), err, "could not get sealed result") + + s.T().Logf("getting execution data for height %d, block %s, execution_data %s", header.Height, header.ID(), result.ExecutionDataID) + + ed, err := eds.Get(ctx, result.ExecutionDataID) + assert.NoError(s.T(), err, "could not get execution data for height %v", i) + + s.T().Logf("got execution data for height %d", i) + assert.Equal(s.T(), header.ID(), ed.BlockID) + } +} + +func (s *ExecutionStateSyncSuite) nodeExecutionDataService(node *testnet.Container) (state_synchronization.ExecutionDataService, irrecoverable.SignalerContext) { + ctx, errChan := irrecoverable.WithSignaler(s.ctx) + go func() { + select { + case <-s.ctx.Done(): + return + case err := <-errChan: + s.T().Errorf("irrecoverable error: %v", err) + } + }() + + ds, err := badgerds.NewDatastore(node.ExecutionDataDBPath(), &badgerds.DefaultOptions) + require.NoError(s.T(), err, "could not get execution datastore") + + go func() { + <-s.ctx.Done() + if err := ds.Close(); err != nil { + s.T().Logf("could not close execution data datastore: %v", err) + } + }() + + blobService := utils.NewLocalBlobService(ds, utils.WithHashOnRead(true)) + blobService.Start(ctx) + + return state_synchronization.NewExecutionDataService( + new(cbor.Codec), + compressor.NewLz4Compressor(), + blobService, + metrics.NewNoopCollector(), + zerolog.New(os.Stdout).With().Str("test", "execution-state-sync").Logger(), + ), ctx +} diff --git a/integration/utils/blob_service.go b/integration/utils/blob_service.go new file mode 100644 index 00000000000..e375113e122 --- /dev/null +++ b/integration/utils/blob_service.go @@ -0,0 +1,94 @@ +package utils + +import ( + "context" + "fmt" + + "github.com/ipfs/go-blockservice" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-datastore" + blockstore "github.com/ipfs/go-ipfs-blockstore" + + "github.com/onflow/flow-go/module/blobs" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/network" +) + +type testBlobService struct { + component.Component + blockService blockservice.BlockService + blockStore blockstore.Blockstore +} + +var _ network.BlobService = (*testBlobService)(nil) +var _ component.Component = (*testBlobService)(nil) + +// WithHashOnRead sets whether or not the blobstore will rehash the blob data on read +// When set, calls to GetBlob will fail with an error if the hash of the data in storage does not +// match its CID +func WithHashOnRead(enabled bool) network.BlobServiceOption { + return func(bs network.BlobService) { + bs.(*testBlobService).blockStore.HashOnRead(enabled) + } +} + +// NewLocalBlobService creates a new BlobService that only interacts with its local datastore +func NewLocalBlobService( + ds datastore.Batching, + opts ...network.BlobServiceOption, +) *testBlobService { + bs := &testBlobService{ + blockStore: blockstore.NewBlockstore(ds), + } + + for _, opt := range opts { + opt(bs) + } + + cm := component.NewComponentManagerBuilder(). + AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + bs.blockService = blockservice.New(bs.blockStore, nil) + + ready() + + <-ctx.Done() + + if err := bs.blockService.Close(); err != nil { + ctx.Throw(err) + } + }). + Build() + + bs.Component = cm + + return bs +} + +func (bs *testBlobService) GetBlob(ctx context.Context, c cid.Cid) (blobs.Blob, error) { + return bs.blockService.GetBlock(ctx, c) +} + +func (bs *testBlobService) GetBlobs(ctx context.Context, ks []cid.Cid) <-chan blobs.Blob { + return bs.blockService.GetBlocks(ctx, ks) +} + +func (bs *testBlobService) AddBlob(ctx context.Context, b blobs.Blob) error { + return bs.blockService.AddBlock(ctx, b) +} + +func (bs *testBlobService) AddBlobs(ctx context.Context, blobs []blobs.Blob) error { + return bs.blockService.AddBlocks(ctx, blobs) +} + +func (bs *testBlobService) DeleteBlob(ctx context.Context, c cid.Cid) error { + return bs.blockService.DeleteBlock(ctx, c) +} + +func (bs *testBlobService) GetSession(ctx context.Context) network.BlobGetter { + return nil +} + +func (bs *testBlobService) TriggerReprovide(ctx context.Context) error { + return fmt.Errorf("not implemented") +} diff --git a/integration/utils/contLoadGenerator.go b/integration/utils/contLoadGenerator.go index e438ab5d3f0..a0224e625e4 100644 --- a/integration/utils/contLoadGenerator.go +++ b/integration/utils/contLoadGenerator.go @@ -42,7 +42,6 @@ type ContLoadGenerator struct { numberOfAccounts int trackTxs bool flowClient *client.Client - supervisorClient *client.Client serviceAccount *flowAccount flowTokenAddress *flowsdk.Address fungibleTokenAddress *flowsdk.Address @@ -51,7 +50,6 @@ type ContLoadGenerator struct { availableAccounts chan *flowAccount // queue with accounts available for workers happeningAccounts chan func() (*flowAccount, string, time.Time) // queue with accounts happening after worker processing txTracker *TxTracker - txStatsTracker *TxStatsTracker workerStatsTracker *WorkerStatsTracker workers []*Worker blockRef BlockRef @@ -104,7 +102,6 @@ func NewContLoadGenerator( numberOfAccounts: numberOfAccounts, trackTxs: false, flowClient: flowClient, - supervisorClient: supervisorClient, serviceAccount: servAcc, fungibleTokenAddress: fungibleTokenAddress, flowTokenAddress: flowTokenAddress, @@ -112,7 +109,6 @@ func NewContLoadGenerator( availableAccounts: make(chan *flowAccount, numberOfAccounts), happeningAccounts: make(chan func() (*flowAccount, string, time.Time), numberOfAccounts), txTracker: txTracker, - txStatsTracker: txStatsTracker, workerStatsTracker: NewWorkerStatsTracker(), blockRef: NewBlockRef(supervisorClient), loadType: loadType, diff --git a/ledger/common/encoding/encoding_test.go b/ledger/common/encoding/encoding_test.go index e27a191fc3f..26ee02603f1 100644 --- a/ledger/common/encoding/encoding_test.go +++ b/ledger/common/encoding/encoding_test.go @@ -40,19 +40,19 @@ func TestKeyPartSerialization(t *testing.T) { // decode key part encoded in version 0 decodedkp, err := encoding.DecodeKeyPart(encodedV0) require.NoError(t, err) - require.True(t, kp.Equals(decodedkp)) + require.Equal(t, kp, *decodedkp) // decode key part encoded in version 1 decodedkp, err = encoding.DecodeKeyPart(encodedV1) require.NoError(t, err) - require.True(t, kp.Equals(decodedkp)) + require.Equal(t, kp, *decodedkp) }) t.Run("roundtrip", func(t *testing.T) { encoded := encoding.EncodeKeyPart(&kp) newkp, err := encoding.DecodeKeyPart(encoded) require.NoError(t, err) - require.True(t, kp.Equals(newkp)) + require.Equal(t, kp, *newkp) // wrong type decoding _, err = encoding.DecodeKey(encoded) @@ -104,19 +104,19 @@ func TestKeySerialization(t *testing.T) { // decode key encoded in version 0 decodedk, err := encoding.DecodeKey(encodedV0) require.NoError(t, err) - require.True(t, decodedk.Equals(&k)) + require.Equal(t, k, *decodedk) // decode key encoded in version 1 decodedk, err = encoding.DecodeKey(encodedV1) require.NoError(t, err) - require.True(t, decodedk.Equals(&k)) + require.Equal(t, k, *decodedk) }) t.Run("roundtrip", func(t *testing.T) { encoded := encoding.EncodeKey(&k) newk, err := encoding.DecodeKey(encoded) require.NoError(t, err) - require.True(t, newk.Equals(&k)) + require.Equal(t, k, *newk) }) } @@ -208,19 +208,19 @@ func TestPayloadSerialization(t *testing.T) { // decode payload encoded in version 0 decodedp, err := encoding.DecodePayload(encodedV0) require.NoError(t, err) - require.True(t, decodedp.Equals(p)) + require.Equal(t, p, decodedp) // decode payload encoded in version 1 decodedp, err = encoding.DecodePayload(encodedV1) require.NoError(t, err) - require.True(t, decodedp.Equals(p)) + require.Equal(t, p, decodedp) }) t.Run("roundtrip", func(t *testing.T) { encoded := encoding.EncodePayload(p) newp, err := encoding.DecodePayload(encoded) require.NoError(t, err) - require.True(t, newp.Equals(p)) + require.Equal(t, p, newp) }) } @@ -366,7 +366,7 @@ func TestPayloadWithoutPrefixSerialization(t *testing.T) { // Decode payload (excluding prefix) decodedp, err := encoding.DecodePayloadWithoutPrefix(encoded[bufPrefixLen:], tc.zeroCopy, encoding.PayloadVersion) require.NoError(t, err) - require.True(t, decodedp.Equals(p)) + require.Equal(t, p, decodedp) // Reset encoded payload for i := 0; i < len(encoded); i++ { @@ -376,10 +376,10 @@ func TestPayloadWithoutPrefixSerialization(t *testing.T) { if tc.zeroCopy { // Test if decoded payload is changed after source data is modified // because data is shared. - require.False(t, decodedp.Equals(p)) + require.NotEqual(t, p, decodedp) } else { // Test if decoded payload is unchanged after source data is modified. - require.True(t, decodedp.Equals(p)) + require.Equal(t, p, decodedp) } }) } diff --git a/ledger/complete/ledger.go b/ledger/complete/ledger.go index f6a01037d15..0eaadcb014b 100644 --- a/ledger/complete/ledger.go +++ b/ledger/complete/ledger.go @@ -133,6 +133,29 @@ func (l *Ledger) ValueSizes(query *ledger.Query) (valueSizes []int, err error) { return valueSizes, err } +// GetSingleValue reads value of a single given key at the given state. +func (l *Ledger) GetSingleValue(query *ledger.QuerySingleValue) (value ledger.Value, err error) { + start := time.Now() + path, err := pathfinder.KeyToPath(query.Key(), l.pathFinderVersion) + if err != nil { + return nil, err + } + trieRead := &ledger.TrieReadSingleValue{RootHash: ledger.RootHash(query.State()), Path: path} + value, err = l.forest.ReadSingleValue(trieRead) + if err != nil { + return nil, err + } + + l.metrics.ReadValuesNumber(1) + readDuration := time.Since(start) + l.metrics.ReadDuration(readDuration) + + durationPerValue := time.Duration(readDuration.Nanoseconds()) * time.Nanosecond + l.metrics.ReadDurationPerItem(durationPerValue) + + return value, nil +} + // Get read the values of the given keys at the given state // it returns the values in the same order as given registerIDs and errors (if any) func (l *Ledger) Get(query *ledger.Query) (values []ledger.Value, err error) { @@ -142,11 +165,7 @@ func (l *Ledger) Get(query *ledger.Query) (values []ledger.Value, err error) { return nil, err } trieRead := &ledger.TrieRead{RootHash: ledger.RootHash(query.State()), Paths: paths} - payloads, err := l.forest.Read(trieRead) - if err != nil { - return nil, err - } - values, err = pathfinder.PayloadsToValues(payloads) + values, err = l.forest.Read(trieRead) if err != nil { return nil, err } diff --git a/ledger/complete/ledger_benchmark_test.go b/ledger/complete/ledger_benchmark_test.go index 98b5a2f5d40..519aee54e7c 100644 --- a/ledger/complete/ledger_benchmark_test.go +++ b/ledger/complete/ledger_benchmark_test.go @@ -239,6 +239,79 @@ func BenchmarkTrieRead(b *testing.B) { b.StopTimer() } +func BenchmarkLedgerGetOneValue(b *testing.B) { + // key updates per iteration + numInsPerStep := 10000 + keyNumberOfParts := 10 + keyPartMinByteSize := 1 + keyPartMaxByteSize := 100 + valueMaxByteSize := 32 + rand.Seed(1) + + dir, err := os.MkdirTemp("", "test-mtrie-") + defer os.RemoveAll(dir) + if err != nil { + b.Fatal(err) + } + + diskWal, err := wal.NewDiskWAL(zerolog.Nop(), nil, metrics.NewNoopCollector(), dir, 101, pathfinder.PathByteSize, wal.SegmentSize) + require.NoError(b, err) + defer func() { + <-diskWal.Done() + }() + + led, err := complete.NewLedger(diskWal, 101, &metrics.NoopCollector{}, zerolog.Logger{}, complete.DefaultPathFinderVersion) + defer led.Done() + if err != nil { + b.Fatal("can't create a new complete ledger") + } + + state := led.InitialState() + + keys := utils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, keyPartMinByteSize, keyPartMaxByteSize) + values := utils.RandomValues(numInsPerStep, 1, valueMaxByteSize) + + update, err := ledger.NewUpdate(state, keys, values) + if err != nil { + b.Fatal(err) + } + + newState, _, err := led.Set(update) + if err != nil { + b.Fatal(err) + } + + b.Run("batch get", func(b *testing.B) { + query, err := ledger.NewQuery(newState, []ledger.Key{keys[0]}) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = led.Get(query) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("single get", func(b *testing.B) { + query, err := ledger.NewQuerySingleValue(newState, keys[0]) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = led.GetSingleValue(query) + if err != nil { + b.Fatal(err) + } + } + }) +} + // BenchmarkTrieUpdate benchmarks the performance of a trie prove func BenchmarkTrieProve(b *testing.B) { // key updates per iteration diff --git a/ledger/complete/ledger_test.go b/ledger/complete/ledger_test.go index 27cb57c978e..efe6020651a 100644 --- a/ledger/complete/ledger_test.go +++ b/ledger/complete/ledger_test.go @@ -122,6 +122,96 @@ func TestLedger_Get(t *testing.T) { }) } +// TestLedger_GetSingleValue tests reading value from a single path. +func TestLedger_GetSingleValue(t *testing.T) { + + wal := &fixtures.NoopWAL{} + led, err := complete.NewLedger( + wal, + 100, + &metrics.NoopCollector{}, + zerolog.Logger{}, + complete.DefaultPathFinderVersion, + ) + require.NoError(t, err) + + state := led.InitialState() + + t.Run("non-existent key", func(t *testing.T) { + + keys := utils.RandomUniqueKeys(10, 2, 1, 10) + + for _, k := range keys { + qs, err := ledger.NewQuerySingleValue(state, k) + require.NoError(t, err) + + retValue, err := led.GetSingleValue(qs) + require.NoError(t, err) + assert.Equal(t, 0, len(retValue)) + } + }) + + t.Run("existent key", func(t *testing.T) { + + u := utils.UpdateFixture() + u.SetState(state) + + newState, _, err := led.Set(u) + require.NoError(t, err) + assert.NotEqual(t, state, newState) + + for i, k := range u.Keys() { + q, err := ledger.NewQuerySingleValue(newState, k) + require.NoError(t, err) + + retValue, err := led.GetSingleValue(q) + require.NoError(t, err) + assert.Equal(t, u.Values()[i], retValue) + } + }) + + t.Run("mix of existent and non-existent keys", func(t *testing.T) { + + u := utils.UpdateFixture() + u.SetState(state) + + newState, _, err := led.Set(u) + require.NoError(t, err) + assert.NotEqual(t, state, newState) + + // Save expected values for existent keys + expectedValues := make(map[string]ledger.Value) + for i, key := range u.Keys() { + encKey := encoding.EncodeKey(&key) + expectedValues[string(encKey)] = u.Values()[i] + } + + // Create a randomly ordered mix of existent and non-existent keys + var queryKeys []ledger.Key + queryKeys = append(queryKeys, u.Keys()...) + queryKeys = append(queryKeys, utils.RandomUniqueKeys(10, 2, 1, 10)...) + + rand.Shuffle(len(queryKeys), func(i, j int) { + queryKeys[i], queryKeys[j] = queryKeys[j], queryKeys[i] + }) + + for _, k := range queryKeys { + qs, err := ledger.NewQuerySingleValue(newState, k) + require.NoError(t, err) + + retValue, err := led.GetSingleValue(qs) + require.NoError(t, err) + + encKey := encoding.EncodeKey(&k) + if value, ok := expectedValues[string(encKey)]; ok { + require.Equal(t, value, retValue) + } else { + require.Equal(t, 0, len(retValue)) + } + } + }) +} + func TestLedgerValueSizes(t *testing.T) { t.Run("empty query", func(t *testing.T) { diff --git a/ledger/complete/mtrie/forest.go b/ledger/complete/mtrie/forest.go index 50947ec6bf9..c03a619bf5c 100644 --- a/ledger/complete/mtrie/forest.go +++ b/ledger/complete/mtrie/forest.go @@ -123,12 +123,24 @@ func (f *Forest) ValueSizes(r *ledger.TrieRead) ([]int, error) { return orderedValueSizes, nil } +// ReadSingleValue reads value for a single path and returns value and error (if any) +func (f *Forest) ReadSingleValue(r *ledger.TrieReadSingleValue) (ledger.Value, error) { + // lookup the trie by rootHash + trie, err := f.GetTrie(r.RootHash) + if err != nil { + return nil, err + } + + payload := trie.ReadSinglePayload(r.Path) + return payload.Value.DeepCopy(), nil +} + // Read reads values for an slice of paths and returns values and error (if any) // TODO: can be optimized further if we don't care about changing the order of the input r.Paths -func (f *Forest) Read(r *ledger.TrieRead) ([]*ledger.Payload, error) { +func (f *Forest) Read(r *ledger.TrieRead) ([]ledger.Value, error) { if len(r.Paths) == 0 { - return []*ledger.Payload{}, nil + return []ledger.Value{}, nil } // lookup the trie by rootHash @@ -137,6 +149,12 @@ func (f *Forest) Read(r *ledger.TrieRead) ([]*ledger.Payload, error) { return nil, err } + // call ReadSinglePayload if there is only one path + if len(r.Paths) == 1 { + payload := trie.ReadSinglePayload(r.Paths[0]) + return []ledger.Value{payload.Value.DeepCopy()}, nil + } + // deduplicate keys: // Generally, we expect the VM to deduplicate reads and writes. Hence, the following is a pre-caution. // TODO: We could take out the following de-duplication logic @@ -156,20 +174,20 @@ func (f *Forest) Read(r *ledger.TrieRead) ([]*ledger.Payload, error) { payloads := trie.UnsafeRead(deduplicatedPaths) // this sorts deduplicatedPaths IN-PLACE // reconstruct the payloads in the same key order that called the method - orderedPayloads := make([]*ledger.Payload, len(r.Paths)) + orderedValues := make([]ledger.Value, len(r.Paths)) totalPayloadSize := 0 for i, p := range deduplicatedPaths { payload := payloads[i] indices := pathOrgIndex[p] for _, j := range indices { - orderedPayloads[j] = payload.DeepCopy() + orderedValues[j] = payload.Value.DeepCopy() } totalPayloadSize += len(indices) * payload.Size() } // TODO rename the metrics f.metrics.ReadValuesSize(uint64(totalPayloadSize)) - return orderedPayloads, nil + return orderedValues, nil } // Update updates the Values for the registers and returns rootHash and error (if any). diff --git a/ledger/complete/mtrie/forest_test.go b/ledger/complete/mtrie/forest_test.go index 694b0c97f9e..bf7c9b6eda4 100644 --- a/ledger/complete/mtrie/forest_test.go +++ b/ledger/complete/mtrie/forest_test.go @@ -11,7 +11,6 @@ import ( "github.com/stretchr/testify/require" "github.com/onflow/flow-go/ledger" - "github.com/onflow/flow-go/ledger/common/encoding" prf "github.com/onflow/flow-go/ledger/common/proof" "github.com/onflow/flow-go/ledger/common/utils" "github.com/onflow/flow-go/ledger/complete/mtrie/trie" @@ -67,9 +66,9 @@ func TestTrieUpdate(t *testing.T) { require.NoError(t, err) read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[0]), encoding.EncodePayload(payloads[0]))) + require.Equal(t, retValues[0], payloads[0].Value) } // TestLeftEmptyInsert tests inserting a new value into an empty sub-trie: @@ -122,10 +121,10 @@ func TestLeftEmptyInsert(t *testing.T) { paths = []ledger.Path{p1, p2, p3} payloads = []*ledger.Payload{v1, v2, v3} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } } @@ -180,10 +179,10 @@ func TestRightEmptyInsert(t *testing.T) { paths = []ledger.Path{p1, p2, p3} payloads = []*ledger.Payload{v1, v2, v3} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } } @@ -236,10 +235,10 @@ func TestExpansionInsert(t *testing.T) { paths = []ledger.Path{p1, p2} payloads = []*ledger.Payload{v1, v2} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } } @@ -304,10 +303,10 @@ func TestFullHouseInsert(t *testing.T) { paths = []ledger.Path{p1, p2, p3} payloads = []*ledger.Payload{v1, v2, v3} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } } @@ -346,10 +345,10 @@ func TestLeafInsert(t *testing.T) { require.Equal(t, uint64(v1.Size()+v2.Size()), updatedTrie.AllocatedRegSize()) read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } } @@ -384,9 +383,9 @@ func TestOverrideValue(t *testing.T) { require.NoError(t, err) read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[0]), encoding.EncodePayload(payloads[0]))) + require.Equal(t, retValues[0], payloads[0].Value) } @@ -417,9 +416,9 @@ func TestDuplicateOverride(t *testing.T) { paths = []ledger.Path{p0} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[0]), encoding.EncodePayload(v2))) + require.Equal(t, retValues[0], v2.Value) } @@ -443,16 +442,16 @@ func TestReadSafety(t *testing.T) { require.NoError(t, err) require.Len(t, data, 1) - require.Equal(t, v0, data[0]) + require.Equal(t, v0.Value, data[0]) // modify returned slice - data[0].Value = []byte("new value") + data[0] = []byte("new value") // read again data2, err := forest.Read(read) require.NoError(t, err) require.Len(t, data2, 1) - require.Equal(t, v0, data2[0]) + require.Equal(t, v0.Value, data2[0]) } // TestReadOrder tests that payloads from reading a trie are delivered in the order as specified by the paths @@ -474,18 +473,18 @@ func TestReadOrder(t *testing.T) { require.NoError(t, err) read := &ledger.TrieRead{RootHash: testRoot, Paths: []ledger.Path{p1, p2}} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.Equal(t, len(read.Paths), len(retPayloads)) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[0]), encoding.EncodePayload(payloads[0]))) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[1]), encoding.EncodePayload(payloads[1]))) + require.Equal(t, len(read.Paths), len(retValues)) + require.Equal(t, retValues[0], payloads[0].Value) + require.Equal(t, retValues[1], payloads[1].Value) read = &ledger.TrieRead{RootHash: testRoot, Paths: []ledger.Path{p2, p1}} - retPayloads, err = forest.Read(read) + retValues, err = forest.Read(read) require.NoError(t, err) - require.Equal(t, len(read.Paths), len(retPayloads)) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[1]), encoding.EncodePayload(payloads[0]))) - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[0]), encoding.EncodePayload(payloads[1]))) + require.Equal(t, len(read.Paths), len(retValues)) + require.Equal(t, retValues[1], payloads[0].Value) + require.Equal(t, retValues[0], payloads[1].Value) } // TestMixRead tests reading a mixture of set and unset registers. @@ -521,10 +520,10 @@ func TestMixRead(t *testing.T) { expectedPayloads := []*ledger.Payload{v1, v2, v3, v4} read := &ledger.TrieRead{RootHash: baseRoot, Paths: readPaths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(expectedPayloads[i]))) + require.Equal(t, retValues[i], expectedPayloads[i].Value) } } @@ -549,11 +548,11 @@ func TestReadWithDuplicatedKeys(t *testing.T) { paths = []ledger.Path{p1, p2, p3} expectedPayloads := []*ledger.Payload{v1, v2, v1} read := &ledger.TrieRead{RootHash: updatedRoot, Paths: paths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.Equal(t, len(read.Paths), len(retPayloads)) + require.Equal(t, len(read.Paths), len(retValues)) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(expectedPayloads[i]))) + require.Equal(t, retValues[i], expectedPayloads[i].Value) } } @@ -573,9 +572,69 @@ func TestReadNonExistingPath(t *testing.T) { p2 := pathByUint8s([]uint8{uint8(116), uint8(129)}) read := &ledger.TrieRead{RootHash: updatedRoot, Paths: []ledger.Path{p2}} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err) - require.True(t, retPayloads[0].IsEmpty()) + require.Equal(t, 0, len(retValues[0])) +} + +// TestReadSinglePayload tests reading a single payload of set/unset register. +func TestReadSinglePayload(t *testing.T) { + forest, err := NewForest(5, &metrics.NoopCollector{}, nil) + require.NoError(t, err) + + // path: 01111101... + path1 := pathByUint8s([]uint8{uint8(125), uint8(23)}) + payload1 := payloadBySlices([]byte{'A'}, []byte{'A'}) + + // path: 10110010... + path2 := pathByUint8s([]uint8{uint8(178), uint8(152)}) + payload2 := payloadBySlices([]byte{'B'}, []byte{'B'}) + + paths := []ledger.Path{path1, path2} + payloads := []*ledger.Payload{payload1, payload2} + + update := &ledger.TrieUpdate{RootHash: forest.GetEmptyRootHash(), Paths: paths, Payloads: payloads} + baseRoot, err := forest.Update(update) + require.NoError(t, err) + + // path: 01101110... + path3 := pathByUint8s([]uint8{uint8(110), uint8(48)}) + payload3 := ledger.EmptyPayload() + + // path: 00010111... + path4 := pathByUint8s([]uint8{uint8(23), uint8(82)}) + payload4 := ledger.EmptyPayload() + + expectedPayloads := make(map[ledger.Path]*ledger.Payload) + expectedPayloads[path1] = payload1 + expectedPayloads[path2] = payload2 + expectedPayloads[path3] = payload3 + expectedPayloads[path4] = payload4 + + // Batch read one payload at a time (less efficient) + for path, payload := range expectedPayloads { + read := &ledger.TrieRead{RootHash: baseRoot, Paths: []ledger.Path{path}} + retValues, err := forest.Read(read) + require.NoError(t, err) + require.Equal(t, 1, len(retValues)) + if payload.IsEmpty() { + require.Equal(t, 0, len(retValues[0])) + } else { + require.Equal(t, payload.Value, retValues[0]) + } + } + + // Read single value + for path, payload := range expectedPayloads { + read := &ledger.TrieReadSingleValue{RootHash: baseRoot, Path: path} + retValue, err := forest.ReadSingleValue(read) + require.NoError(t, err) + if payload.IsEmpty() { + require.Equal(t, 0, len(retValue)) + } else { + require.Equal(t, payload.Value, retValue) + } + } } // TestForkingUpdates updates a base trie in two different ways. We expect @@ -617,24 +676,24 @@ func TestForkingUpdates(t *testing.T) { // Verify payloads are preserved read := &ledger.TrieRead{RootHash: baseRoot, Paths: paths} - retPayloads, err := forest.Read(read) // reading from original Trie + retValues, err := forest.Read(read) // reading from original Trie require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } readA := &ledger.TrieRead{RootHash: updatedRootA, Paths: pathsA} - retPayloads, err = forest.Read(readA) // reading from updatedTrieA + retValues, err = forest.Read(readA) // reading from updatedTrieA require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloadsA[i]))) + require.Equal(t, retValues[i], payloadsA[i].Value) } readB := &ledger.TrieRead{RootHash: updatedRootB, Paths: pathsB} - retPayloads, err = forest.Read(readB) // reading from updatedTrieB + retValues, err = forest.Read(readB) // reading from updatedTrieB require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloadsB[i]))) + require.Equal(t, retValues[i], payloadsB[i].Value) } } @@ -668,17 +727,17 @@ func TestIdenticalUpdateAppliedTwice(t *testing.T) { paths = []ledger.Path{p1, p2, p3} payloads = []*ledger.Payload{v1, v2, v3} read := &ledger.TrieRead{RootHash: updatedRootA, Paths: paths} - retPayloadsA, err := forest.Read(read) + retValuesA, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloadsA[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValuesA[i], payloads[i].Value) } read = &ledger.TrieRead{RootHash: updatedRootB, Paths: paths} - retPayloadsB, err := forest.Read(read) + retValuesB, err := forest.Read(read) require.NoError(t, err) for i := range paths { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloadsB[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValuesB[i], payloads[i].Value) } } @@ -720,10 +779,10 @@ func TestRandomUpdateReadProofValueSizes(t *testing.T) { } } read := &ledger.TrieRead{RootHash: activeRoot, Paths: nonExistingPaths} - retPayloads, err := forest.Read(read) + retValues, err := forest.Read(read) require.NoError(t, err, "error reading - non existing paths") - for _, p := range retPayloads { - require.True(t, p.IsEmpty()) + for _, p := range retValues { + require.Equal(t, 0, len(p)) } // test value sizes for non-existent keys @@ -741,10 +800,10 @@ func TestRandomUpdateReadProofValueSizes(t *testing.T) { // test read read = &ledger.TrieRead{RootHash: activeRoot, Paths: paths} - retPayloads, err = forest.Read(read) + retValues, err = forest.Read(read) require.NoError(t, err, "error reading") for i := range payloads { - require.True(t, bytes.Equal(encoding.EncodePayload(retPayloads[i]), encoding.EncodePayload(payloads[i]))) + require.Equal(t, retValues[i], payloads[i].Value) } // test value sizes for existing keys @@ -792,10 +851,10 @@ func TestRandomUpdateReadProofValueSizes(t *testing.T) { } read = &ledger.TrieRead{RootHash: activeRoot, Paths: allPaths} - retPayloads, err = forest.Read(read) + retValues, err = forest.Read(read) require.NoError(t, err) for i, v := range allPayloads { - assert.True(t, v.Equals(retPayloads[i])) + assert.Equal(t, retValues[i], v.Value) } // check value sizes for all existing paths diff --git a/ledger/complete/mtrie/trie/trie.go b/ledger/complete/mtrie/trie/trie.go index d1db346a496..89a548ea642 100644 --- a/ledger/complete/mtrie/trie/trie.go +++ b/ledger/complete/mtrie/trie/trie.go @@ -26,6 +26,8 @@ import ( // that where not affected by the write operation are shared between the original MTrie // (before the register updates) and the updated MTrie (after the register writes). // +// MTrie expects that for a specific path, the register's key never changes. +// // DEFINITIONS and CONVENTIONS: // * HEIGHT of a node v in a tree is the number of edges on the longest downward path // between v and a tree leaf. The height of a tree is the height of its root. @@ -195,6 +197,39 @@ func valueSizes(sizes []int, paths []ledger.Path, head *node.Node) { } } +// ReadSinglePayload reads and returns a payload for a single path. +func (mt *MTrie) ReadSinglePayload(path ledger.Path) *ledger.Payload { + return readSinglePayload(path, mt.root) +} + +// readSinglePayload reads and returns a payload for a single path in subtree with `head` as root node. +func readSinglePayload(path ledger.Path, head *node.Node) *ledger.Payload { + pathBytes := path[:] + + if head == nil { + return ledger.EmptyPayload() + } + + depth := ledger.NodeMaxHeight - head.Height() // distance to the tree root + + // Traverse nodes following the path until a leaf node or nil node is reached. + for !head.IsLeaf() { + bit := bitutils.ReadBit(pathBytes, depth) + if bit == 0 { + head = head.LeftChild() + } else { + head = head.RightChild() + } + depth++ + } + + if head != nil && *head.Path() == path { + return head.Payload() + } + + return ledger.EmptyPayload() +} + // UnsafeRead reads payloads for the given paths. // UNSAFE: requires _all_ paths to have a length of mt.Height bits. // CAUTION: while reading the payloads, `paths` is permuted IN-PLACE for optimized processing. @@ -228,6 +263,7 @@ func read(payloads []*ledger.Payload, paths []ledger.Path, head *node.Node) { } return } + // reached a leaf node if head.IsLeaf() { for i, p := range paths { @@ -240,6 +276,13 @@ func read(payloads []*ledger.Payload, paths []ledger.Path, head *node.Node) { return } + // reached an interim node + if len(paths) == 1 { + // call readSinglePayload to skip partition and recursive calls when there is only one path + payloads[0] = readSinglePayload(paths[0], head) + return + } + // partition step to quick sort the paths: // lpaths contains all paths that have `0` at the partitionIndex // rpaths contains all paths that have `1` at the partitionIndex @@ -279,6 +322,7 @@ func read(payloads []*ledger.Payload, paths []ledger.Path, head *node.Node) { // * keys are NOT duplicated // * requires _all_ paths to have a length of mt.Height bits. // CAUTION: `updatedPaths` and `updatedPayloads` are permuted IN-PLACE for optimized processing. +// CAUTION: MTrie expects that for a specific path, the payload's key never changes. // TODO: move consistency checks from MForest to here, to make API safe and self-contained func NewTrieWithUpdatedRegisters( parentTrie *MTrie, @@ -362,7 +406,7 @@ func update( if p == parentPath { // the case where the recursion stops: only one path to update if len(paths) == 1 { - if !parentNode.Payload().Equals(&payloads[i]) { + if !parentNode.Payload().ValueEquals(&payloads[i]) { n = node.NewLeaf(paths[i], payloads[i].DeepCopy(), nodeHeight) allocatedRegCountDelta, allocatedRegSizeDelta = diff --git a/ledger/complete/mtrie/trie/trie_test.go b/ledger/complete/mtrie/trie/trie_test.go index 4a4e7cab6b0..ecf02cbb94c 100644 --- a/ledger/complete/mtrie/trie/trie_test.go +++ b/ledger/complete/mtrie/trie/trie_test.go @@ -1094,3 +1094,96 @@ func TestTrieAllocatedRegCountRegSizeWithMixedPruneFlag(t *testing.T) { require.Equal(t, expectedAllocatedRegCount, updatedTrie.AllocatedRegCount()) require.Equal(t, expectedAllocatedRegSize, updatedTrie.AllocatedRegSize()) } + +// TestReadSinglePayload tests reading a single payload of existent/non-existent path for trie of different layouts. +func TestReadSinglePayload(t *testing.T) { + + emptyTrie := trie.NewEmptyMTrie() + + // Test reading payload in empty trie + t.Run("empty trie", func(t *testing.T) { + savedRootHash := emptyTrie.RootHash() + + path := utils.PathByUint16LeftPadded(0) + payload := emptyTrie.ReadSinglePayload(path) + require.True(t, payload.IsEmpty()) + require.Equal(t, savedRootHash, emptyTrie.RootHash()) + }) + + // Test reading payload for existent/non-existent path + // in trie with compact leaf as root node. + t.Run("compact leaf as root", func(t *testing.T) { + path1 := utils.PathByUint16LeftPadded(0) + payload1 := utils.RandomPayload(1, 100) + + paths := []ledger.Path{path1} + payloads := []ledger.Payload{*payload1} + + newTrie, maxDepthTouched, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true) + require.NoError(t, err) + require.Equal(t, uint16(0), maxDepthTouched) + + savedRootHash := newTrie.RootHash() + + // Get payload for existent path path + retPayload := newTrie.ReadSinglePayload(path1) + require.Equal(t, payload1, retPayload) + require.Equal(t, savedRootHash, newTrie.RootHash()) + + // Get payload for non-existent path + path2 := utils.PathByUint16LeftPadded(1) + retPayload = newTrie.ReadSinglePayload(path2) + require.True(t, retPayload.IsEmpty()) + require.Equal(t, savedRootHash, newTrie.RootHash()) + }) + + // Test reading payload for existent/non-existent path in an unpruned trie. + t.Run("trie", func(t *testing.T) { + path1 := utils.PathByUint16(1 << 12) // 000100... + path2 := utils.PathByUint16(1 << 13) // 001000... + path3 := utils.PathByUint16(1 << 14) // 010000... + + payload1 := utils.RandomPayload(1, 100) + payload2 := utils.RandomPayload(1, 100) + payload3 := ledger.EmptyPayload() + + paths := []ledger.Path{path1, path2, path3} + payloads := []ledger.Payload{*payload1, *payload2, *payload3} + + // Create an unpruned trie with 3 leaf nodes (n1, n2, n3). + newTrie, maxDepthTouched, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, false) + require.NoError(t, err) + require.Equal(t, uint16(3), maxDepthTouched) + + savedRootHash := newTrie.RootHash() + + // n5 + // / + // / + // n4 + // / \ + // / \ + // n3 n3 (path3/ + // / \ payload3) + // / \ + // n1 (path1/ n2 (path2/ + // payload1) payload2) + // + + // Test reading payload for all possible paths for the first 4 bits. + for i := 0; i < 16; i++ { + path := utils.PathByUint16(uint16(i << 12)) + + retPayload := newTrie.ReadSinglePayload(path) + require.Equal(t, savedRootHash, newTrie.RootHash()) + switch path { + case path1: + require.Equal(t, payload1, retPayload) + case path2: + require.Equal(t, payload2, retPayload) + default: + require.True(t, retPayload.IsEmpty()) + } + } + }) +} diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go index 54e4078251a..53db783bbdf 100644 --- a/ledger/complete/wal/checkpointer_test.go +++ b/ledger/complete/wal/checkpointer_test.go @@ -242,19 +242,19 @@ func Test_Checkpointing(t *testing.T) { paths = append(paths, path) } - payloads1, err := f.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) + values1, err := f.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) require.NoError(t, err) - payloads2, err := f2.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) + values2, err := f2.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) require.NoError(t, err) - payloads3, err := f3.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) + values3, err := f3.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) require.NoError(t, err) for i, path := range paths { - require.True(t, data[path].Equals(payloads1[i])) - require.True(t, data[path].Equals(payloads2[i])) - require.True(t, data[path].Equals(payloads3[i])) + require.Equal(t, data[path].Value, values1[i]) + require.Equal(t, data[path].Value, values2[i]) + require.Equal(t, data[path].Value, values3[i]) } } }) @@ -325,15 +325,15 @@ func Test_Checkpointing(t *testing.T) { trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion) require.NoError(t, err) - payloads, err := f.Read(trieRead) + values, err := f.Read(trieRead) require.NoError(t, err) - payloads5, err := f5.Read(trieRead) + values5, err := f5.Read(trieRead) require.NoError(t, err) for i := range keys2 { - require.Equal(t, values2[i], payloads[i].Value) - require.Equal(t, values2[i], payloads5[i].Value) + require.Equal(t, values2[i], values[i]) + require.Equal(t, values2[i], values5[i]) } }) @@ -415,15 +415,15 @@ func Test_Checkpointing(t *testing.T) { trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion) require.NoError(t, err) - payloads, err := f.Read(trieRead) + values, err := f.Read(trieRead) require.NoError(t, err) - payloads6, err := f6.Read(trieRead) + values6, err := f6.Read(trieRead) require.NoError(t, err) for i := range keys2 { - require.Equal(t, values2[i], payloads[i].Value) - require.Equal(t, values2[i], payloads6[i].Value) + require.Equal(t, values2[i], values[i]) + require.Equal(t, values2[i], values6[i]) } }) diff --git a/ledger/complete/wal/compactor_test.go b/ledger/complete/wal/compactor_test.go index ffec556fc15..4a29aad8ff5 100644 --- a/ledger/complete/wal/compactor_test.go +++ b/ledger/complete/wal/compactor_test.go @@ -194,15 +194,15 @@ func Test_Compactor(t *testing.T) { } read := &ledger.TrieRead{RootHash: rootHash, Paths: paths} - payloads, err := f.Read(read) + values, err := f.Read(read) require.NoError(t, err) - payloads2, err := f2.Read(read) + values2, err := f2.Read(read) require.NoError(t, err) for i, path := range paths { - require.True(t, data[path].Equals(payloads[i])) - require.True(t, data[path].Equals(payloads2[i])) + require.Equal(t, data[path].Value, values[i]) + require.Equal(t, data[path].Value, values2[i]) } } diff --git a/ledger/ledger.go b/ledger/ledger.go index 6b5578f1945..54c8e36fb4b 100644 --- a/ledger/ledger.go +++ b/ledger/ledger.go @@ -24,6 +24,9 @@ type Ledger interface { // InitialState returns the initial state of the ledger InitialState() State + // GetSingleValue returns value for a given key at specific state + GetSingleValue(query *QuerySingleValue) (value Value, err error) + // Get returns values for the given slice of keys at specific state Get(query *Query) (values []Value, err error) @@ -45,7 +48,7 @@ func NewEmptyQuery(sc State) (*Query, error) { return &Query{state: sc}, nil } -// NewQuery constructs a new ledger query +// NewQuery constructs a new ledger query func NewQuery(sc State, keys []Key) (*Query, error) { return &Query{state: sc, keys: keys}, nil } @@ -70,6 +73,27 @@ func (q *Query) SetState(s State) { q.state = s } +// QuerySingleValue contains ledger query for a single value +type QuerySingleValue struct { + state State + key Key +} + +// NewQuerySingleValue constructs a new ledger query for a single value +func NewQuerySingleValue(sc State, key Key) (*QuerySingleValue, error) { + return &QuerySingleValue{state: sc, key: key}, nil +} + +// Key returns key of the query +func (q *QuerySingleValue) Key() Key { + return q.key +} + +// State returns the state part of the query +func (q *QuerySingleValue) State() State { + return q.state +} + // Update holds all data needed for a ledger update type Update struct { state State diff --git a/ledger/ledger_test.go b/ledger/ledger_test.go index 6f1de9adf19..fcf9a9f13a3 100644 --- a/ledger/ledger_test.go +++ b/ledger/ledger_test.go @@ -57,10 +57,10 @@ func BenchmarkOriginalCanonicalForm(b *testing.B) { } } -// TestPayloadKeyEquals tests whether keys are equal. +// TestKeyEquals tests whether keys are equal. // It tests equality of empty, nil, and not-empty keys. // Empty key and nil key should be equal. -func TestPayloadKeyEquals(t *testing.T) { +func TestKeyEquals(t *testing.T) { nilKey := (*Key)(nil) emptyKey := &Key{} @@ -186,10 +186,10 @@ func TestPayloadKeyEquals(t *testing.T) { }) } -// TestPayloadValueEquals tests whether values are equal. +// TestValueEquals tests whether values are equal. // It tests equality of empty, nil, and not-empty values. // Empty value and nil value should be equal. -func TestPayloadValueEquals(t *testing.T) { +func TestValueEquals(t *testing.T) { nilValue := (Value)(nil) emptyValue := Value{} @@ -240,113 +240,3 @@ func TestPayloadValueEquals(t *testing.T) { require.True(t, v2.Equals(v1)) }) } - -// TestPayloadEquals tests whether payloads are equal. -// It tests equality of empty, nil, and not-empty payloads. -// Empty payload and nil payload should be equal. -func TestPayloadEquals(t *testing.T) { - nilPayload := (*Payload)(nil) - emptyPayload := EmptyPayload() - - t.Run("nil vs empty", func(t *testing.T) { - require.True(t, nilPayload.Equals(emptyPayload)) - require.True(t, emptyPayload.Equals(nilPayload)) - }) - - t.Run("nil vs nil", func(t *testing.T) { - require.True(t, nilPayload.Equals(nilPayload)) - }) - - t.Run("empty vs empty", func(t *testing.T) { - require.True(t, emptyPayload.Equals(emptyPayload)) - }) - - t.Run("empty vs non-empty", func(t *testing.T) { - p := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - require.False(t, emptyPayload.Equals(p)) - require.False(t, p.Equals(emptyPayload)) - }) - - t.Run("nil vs non-empty", func(t *testing.T) { - p := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - require.False(t, nilPayload.Equals(p)) - require.False(t, p.Equals(nilPayload)) - }) - - t.Run("different key", func(t *testing.T) { - p := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - // p1.Key.KeyParts[0].Type is different - p1 := &Payload{ - Key: Key{KeyParts: []KeyPart{{2, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - // p2.Key.KeyParts[0].Value is different - p2 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02, 0x03}}}}, - Value: []byte{0x03, 0x04}, - } - // len(p3.Key.KeyParts) is different - p3 := &Payload{ - Key: Key{KeyParts: []KeyPart{ - {1, []byte{0x01, 0x02}}, - {2, []byte{0x03, 0x04}}}, - }, - Value: []byte{0x03, 0x04}, - } - require.False(t, p.Equals(p1)) - require.False(t, p.Equals(p2)) - require.False(t, p.Equals(p3)) - }) - - t.Run("different value", func(t *testing.T) { - p := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - // p1.Value is nil - p1 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - } - // p2.Value is empty - p2 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{}, - } - // p3.Value length is different - p3 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03}, - } - // p4.Value data is different - p4 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x05}, - } - require.False(t, p.Equals(p1)) - require.False(t, p.Equals(p2)) - require.False(t, p.Equals(p3)) - require.False(t, p.Equals(p4)) - }) - - t.Run("same", func(t *testing.T) { - p1 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - p2 := &Payload{ - Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, - Value: []byte{0x03, 0x04}, - } - require.True(t, p1.Equals(p2)) - require.True(t, p2.Equals(p1)) - }) -} diff --git a/ledger/mock/ledger.go b/ledger/mock/ledger.go index 258789d2171..aa9f9291dde 100644 --- a/ledger/mock/ledger.go +++ b/ledger/mock/ledger.go @@ -53,6 +53,29 @@ func (_m *Ledger) Get(query *ledger.Query) ([]ledger.Value, error) { return r0, r1 } +// GetSingleValue provides a mock function with given fields: query +func (_m *Ledger) GetSingleValue(query *ledger.QuerySingleValue) (ledger.Value, error) { + ret := _m.Called(query) + + var r0 ledger.Value + if rf, ok := ret.Get(0).(func(*ledger.QuerySingleValue) ledger.Value); ok { + r0 = rf(query) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(ledger.Value) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(*ledger.QuerySingleValue) error); ok { + r1 = rf(query) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // InitialState provides a mock function with given fields: func (_m *Ledger) InitialState() ledger.State { ret := _m.Called() diff --git a/ledger/partial/ledger.go b/ledger/partial/ledger.go index 89ab41ce1ef..cae3bc5fe37 100644 --- a/ledger/partial/ledger.go +++ b/ledger/partial/ledger.go @@ -63,6 +63,22 @@ func (l *Ledger) InitialState() ledger.State { return l.state } +// GetSingleValue reads value of a given key at the given state +func (l *Ledger) GetSingleValue(query *ledger.QuerySingleValue) (value ledger.Value, err error) { + path, err := pathfinder.KeyToPath(query.Key(), l.pathFinderVersion) + if err != nil { + return nil, err + } + payload, err := l.ptrie.GetSinglePayload(path) + if err != nil { + if _, ok := err.(*ptrie.ErrMissingPath); ok { + return nil, &ledger.ErrMissingKeys{Keys: []ledger.Key{query.Key()}} + } + return nil, err + } + return payload.Value, err +} + // Get read the values of the given keys at the given state // it returns the values in the same order as given registerIDs and errors (if any) func (l *Ledger) Get(query *ledger.Query) (values []ledger.Value, err error) { diff --git a/ledger/partial/ledger_test.go b/ledger/partial/ledger_test.go index b9515de2783..0f1485f0bd0 100644 --- a/ledger/partial/ledger_test.go +++ b/ledger/partial/ledger_test.go @@ -42,18 +42,51 @@ func TestFunctionalityWithCompleteTrie(t *testing.T) { assert.NoError(t, err) assert.Equal(t, pled.InitialState(), newState) - // test missing keys (get) + // test batch querying existent keys + query, err = ledger.NewQuery(newState, keys[0:2]) + require.NoError(t, err) + + retValues, err := pled.Get(query) + require.NoError(t, err) + require.Equal(t, 2, len(retValues)) + for i := 0; i < len(retValues); i++ { + require.Equal(t, values[i], retValues[i]) + } + + // test querying single existent key + querySingleValue, err := ledger.NewQuerySingleValue(newState, keys[0]) + require.NoError(t, err) + + retValue, err := pled.GetSingleValue(querySingleValue) + require.NoError(t, err) + require.Equal(t, values[0], retValue) + + // test batch getting missing keys query, err = ledger.NewQuery(newState, keys[1:3]) require.NoError(t, err) - _, err = pled.Get(query) + retValues, err = pled.Get(query) require.Error(t, err) + require.Nil(t, retValues) e, ok := err.(*ledger.ErrMissingKeys) require.True(t, ok) assert.Equal(t, len(e.Keys), 1) require.True(t, e.Keys[0].Equals(&keys[2])) + // test querying single non-existent key + querySingleValue, err = ledger.NewQuerySingleValue(newState, keys[2]) + require.NoError(t, err) + + retValue, err = pled.GetSingleValue(querySingleValue) + require.Error(t, err) + require.Nil(t, retValue) + + e, ok = err.(*ledger.ErrMissingKeys) + require.True(t, ok) + assert.Equal(t, len(e.Keys), 1) + require.True(t, e.Keys[0].Equals(&keys[2])) + // test missing keys (set) update, err = ledger.NewUpdate(state, keys[1:3], values[1:3]) require.NoError(t, err) diff --git a/ledger/partial/ptrie/partialTrie.go b/ledger/partial/ptrie/partialTrie.go index 3c8bf2a4f9a..5f4a3b95cfb 100644 --- a/ledger/partial/ptrie/partialTrie.go +++ b/ledger/partial/ptrie/partialTrie.go @@ -27,6 +27,15 @@ func (p *PSMT) RootHash() ledger.RootHash { return ledger.RootHash(p.root.Hash()) } +// GetSinglePayload returns payload of a given path +func (p *PSMT) GetSinglePayload(path ledger.Path) (*ledger.Payload, error) { + node, found := p.pathLookUp[path] + if !found { + return nil, &ErrMissingPath{Paths: []ledger.Path{path}} + } + return node.payload, nil +} + // Get returns an slice of payloads (same order), an slice of failed paths and errors (if any) // TODO return list of indecies instead of paths func (p *PSMT) Get(paths []ledger.Path) ([]*ledger.Payload, error) { diff --git a/ledger/partial/ptrie/partialTrie_test.go b/ledger/partial/ptrie/partialTrie_test.go index 3e9006240fd..3c9cd896f8a 100644 --- a/ledger/partial/ptrie/partialTrie_test.go +++ b/ledger/partial/ptrie/partialTrie_test.go @@ -68,6 +68,120 @@ func TestPartialTrieEmptyTrie(t *testing.T) { }) } +// TestPartialTrieGet gets payloads from existent and non-existent paths. +func TestPartialTrieGet(t *testing.T) { + + pathByteSize := 32 + withForest(t, pathByteSize, 10, func(t *testing.T, f *mtrie.Forest) { + + path1 := utils.PathByUint16(0) + payload1 := utils.LightPayload('A', 'a') + + path2 := utils.PathByUint16(1) + payload2 := utils.LightPayload('B', 'b') + + paths := []ledger.Path{path1, path2} + payloads := []*ledger.Payload{payload1, payload2} + + u := &ledger.TrieUpdate{RootHash: f.GetEmptyRootHash(), Paths: paths, Payloads: payloads} + rootHash, err := f.Update(u) + require.NoError(t, err, "error updating trie") + + r := &ledger.TrieRead{RootHash: rootHash, Paths: paths} + bp, err := f.Proofs(r) + require.NoError(t, err, "error getting batch proof") + + psmt, err := NewPSMT(rootHash, bp) + require.NoError(t, err, "error building partial trie") + ensureRootHash(t, rootHash, psmt) + + t.Run("non-existent key", func(t *testing.T) { + path3 := utils.PathByUint16(2) + path4 := utils.PathByUint16(4) + + nonExistentPaths := []ledger.Path{path3, path4} + retPayloads, err := psmt.Get(nonExistentPaths) + require.Nil(t, retPayloads) + + e, ok := err.(*ErrMissingPath) + require.True(t, ok) + assert.Equal(t, 2, len(e.Paths)) + require.Equal(t, path3, e.Paths[0]) + require.Equal(t, path4, e.Paths[1]) + }) + + t.Run("existent key", func(t *testing.T) { + retPayloads, err := psmt.Get(paths) + require.NoError(t, err) + require.Equal(t, len(paths), len(retPayloads)) + require.Equal(t, payload1, retPayloads[0]) + require.Equal(t, payload2, retPayloads[1]) + }) + + t.Run("mix of existent and non-existent keys", func(t *testing.T) { + path3 := utils.PathByUint16(2) + path4 := utils.PathByUint16(4) + + retPayloads, err := psmt.Get([]ledger.Path{path1, path2, path3, path4}) + require.Nil(t, retPayloads) + + e, ok := err.(*ErrMissingPath) + require.True(t, ok) + assert.Equal(t, 2, len(e.Paths)) + require.Equal(t, path3, e.Paths[0]) + require.Equal(t, path4, e.Paths[1]) + }) + }) +} + +// TestPartialTrieGetSinglePayload gets single payload from existent/non-existent path. +func TestPartialTrieGetSinglePayload(t *testing.T) { + + pathByteSize := 32 + withForest(t, pathByteSize, 10, func(t *testing.T, f *mtrie.Forest) { + + path1 := utils.PathByUint16(0) + payload1 := utils.LightPayload('A', 'a') + + path2 := utils.PathByUint16(1) + payload2 := utils.LightPayload('B', 'b') + + paths := []ledger.Path{path1, path2} + payloads := []*ledger.Payload{payload1, payload2} + + u := &ledger.TrieUpdate{RootHash: f.GetEmptyRootHash(), Paths: paths, Payloads: payloads} + rootHash, err := f.Update(u) + require.NoError(t, err, "error updating trie") + + r := &ledger.TrieRead{RootHash: rootHash, Paths: paths} + bp, err := f.Proofs(r) + require.NoError(t, err, "error getting batch proof") + + psmt, err := NewPSMT(rootHash, bp) + require.NoError(t, err, "error building partial trie") + ensureRootHash(t, rootHash, psmt) + + retPayload, err := psmt.GetSinglePayload(path1) + require.NoError(t, err) + require.Equal(t, payload1, retPayload) + + retPayload, err = psmt.GetSinglePayload(path2) + require.NoError(t, err) + require.Equal(t, payload2, retPayload) + + path3 := utils.PathByUint16(2) + + retPayload, err = psmt.GetSinglePayload(path3) + require.Nil(t, retPayload) + + var errMissingPath *ErrMissingPath + require.ErrorAs(t, err, &errMissingPath) + missingPath := err.(*ErrMissingPath) + require.Equal(t, 1, len(missingPath.Paths)) + require.Equal(t, path3, missingPath.Paths[0]) + }) +} + func TestPartialTrieLeafUpdates(t *testing.T) { pathByteSize := 32 diff --git a/ledger/trie.go b/ledger/trie.go index 78431eb8f11..a34e6584752 100644 --- a/ledger/trie.go +++ b/ledger/trie.go @@ -89,6 +89,12 @@ type TrieRead struct { Paths []Path } +// TrieReadSinglePayload contains trie read query for a single payload +type TrieReadSingleValue struct { + RootHash RootHash + Path Path +} + // TrieUpdate holds all data for a trie update type TrieUpdate struct { RootHash RootHash @@ -241,6 +247,26 @@ func (p *Payload) Equals(other *Payload) bool { return false } +// ValueEquals compares this payload value to another payload value. +// A nil payload is equivalent to an empty payload. +// NOTE: prefer using this function over payload.Value.Equals() +// when comparing payload values. payload.ValueEquals() handles +// nil payload, while payload.Value.Equals() panics on nil payload. +func (p *Payload) ValueEquals(other *Payload) bool { + pEmpty := p.IsEmpty() + otherEmpty := other.IsEmpty() + if pEmpty != otherEmpty { + // Only one payload is empty + return false + } + if pEmpty { + // Both payloads are empty + return true + } + // Compare values since both payloads are not empty. + return p.Value.Equals(other.Value) +} + // DeepCopy returns a deep copy of the payload func (p *Payload) DeepCopy() *Payload { if p == nil { diff --git a/ledger/trie_test.go b/ledger/trie_test.go new file mode 100644 index 00000000000..b5fe6a05497 --- /dev/null +++ b/ledger/trie_test.go @@ -0,0 +1,299 @@ +package ledger + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestPayloadEquals tests equality of payloads. It tests: +// - equality of empty, nil, and not-empty payloads +// - equality of payloads with different keys and same value +// - equality of payloads with same key and different values +// - and etc. +func TestPayloadEquals(t *testing.T) { + nilPayload := (*Payload)(nil) + emptyPayload := EmptyPayload() + + t.Run("nil vs empty", func(t *testing.T) { + require.True(t, nilPayload.Equals(emptyPayload)) + require.True(t, emptyPayload.Equals(nilPayload)) + }) + + t.Run("nil vs nil", func(t *testing.T) { + require.True(t, nilPayload.Equals(nilPayload)) + }) + + t.Run("empty vs empty", func(t *testing.T) { + require.True(t, emptyPayload.Equals(emptyPayload)) + }) + + t.Run("empty vs non-empty", func(t *testing.T) { + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.False(t, emptyPayload.Equals(p)) + require.False(t, p.Equals(emptyPayload)) + }) + + t.Run("nil vs non-empty", func(t *testing.T) { + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.False(t, nilPayload.Equals(p)) + require.False(t, p.Equals(nilPayload)) + }) + + t.Run("different key same value", func(t *testing.T) { + value := []byte{0x03, 0x04} + + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: value, + } + // p1.Key.KeyParts[0].Type is different + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{2, []byte{0x01, 0x02}}}}, + Value: value, + } + // p2.Key.KeyParts[0].Value is different + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02, 0x03}}}}, + Value: value, + } + // len(p3.Key.KeyParts) is different + p3 := &Payload{ + Key: Key{KeyParts: []KeyPart{ + {1, []byte{0x01, 0x02}}, + {2, []byte{0x03, 0x04}}}, + }, + Value: value, + } + require.False(t, p.Equals(p1)) + require.False(t, p.Equals(p2)) + require.False(t, p.Equals(p3)) + }) + + t.Run("different key empty value", func(t *testing.T) { + value := []byte{} + + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: value, + } + // p1.Key.KeyParts[0].Type is different + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{2, []byte{0x01, 0x02}}}}, + Value: value, + } + // p2.Key.KeyParts[0].Value is different + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02, 0x03}}}}, + Value: value, + } + // len(p3.Key.KeyParts) is different + p3 := &Payload{ + Key: Key{KeyParts: []KeyPart{ + {1, []byte{0x01, 0x02}}, + {2, []byte{0x03, 0x04}}}, + }, + Value: value, + } + require.False(t, p.Equals(p1)) + require.False(t, p.Equals(p2)) + require.False(t, p.Equals(p3)) + }) + + t.Run("same key different value", func(t *testing.T) { + key := Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}} + + p := &Payload{ + Key: key, + Value: []byte{0x03, 0x04}, + } + // p1.Value is nil + p1 := &Payload{ + Key: key, + } + // p2.Value is empty + p2 := &Payload{ + Key: key, + Value: []byte{}, + } + // p3.Value length is different + p3 := &Payload{ + Key: key, + Value: []byte{0x03}, + } + // p4.Value data is different + p4 := &Payload{ + Key: key, + Value: []byte{0x03, 0x05}, + } + require.False(t, p.Equals(p1)) + require.False(t, p.Equals(p2)) + require.False(t, p.Equals(p3)) + require.False(t, p.Equals(p4)) + }) + + t.Run("same key same value", func(t *testing.T) { + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.True(t, p1.Equals(p2)) + require.True(t, p2.Equals(p1)) + }) +} + +// TestPayloadValueEquals tests equality of payload values. It tests: +// - equality of empty, nil, and not-empty payloads +// - equality of payloads with different keys and same value +// - equality of payloads with same key and different values +// - and etc. +func TestPayloadValuEquals(t *testing.T) { + nilPayload := (*Payload)(nil) + emptyPayload := EmptyPayload() + + t.Run("nil vs empty", func(t *testing.T) { + require.True(t, nilPayload.ValueEquals(emptyPayload)) + require.True(t, emptyPayload.ValueEquals(nilPayload)) + }) + + t.Run("nil vs nil", func(t *testing.T) { + require.True(t, nilPayload.ValueEquals(nilPayload)) + }) + + t.Run("empty vs empty", func(t *testing.T) { + require.True(t, emptyPayload.ValueEquals(emptyPayload)) + }) + + t.Run("empty vs non-empty", func(t *testing.T) { + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.False(t, emptyPayload.ValueEquals(p)) + require.False(t, p.ValueEquals(emptyPayload)) + }) + + t.Run("nil vs non-empty", func(t *testing.T) { + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.False(t, nilPayload.ValueEquals(p)) + require.False(t, p.ValueEquals(nilPayload)) + }) + + t.Run("different key same value", func(t *testing.T) { + value := []byte{0x03, 0x04} + + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: value, + } + // p1.Key.KeyParts[0].Type is different + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{2, []byte{0x01, 0x02}}}}, + Value: value, + } + // p2.Key.KeyParts[0].Value is different + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02, 0x03}}}}, + Value: value, + } + // len(p3.Key.KeyParts) is different + p3 := &Payload{ + Key: Key{KeyParts: []KeyPart{ + {1, []byte{0x01, 0x02}}, + {2, []byte{0x03, 0x04}}}, + }, + Value: value, + } + require.True(t, p.ValueEquals(p1)) + require.True(t, p.ValueEquals(p2)) + require.True(t, p.ValueEquals(p3)) + }) + + t.Run("different key empty value", func(t *testing.T) { + value := []byte{} + + p := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: value, + } + // p1.Key.KeyParts[0].Type is different + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{2, []byte{0x01, 0x02}}}}, + Value: value, + } + // p2.Key.KeyParts[0].Value is different + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02, 0x03}}}}, + Value: value, + } + // len(p3.Key.KeyParts) is different + p3 := &Payload{ + Key: Key{KeyParts: []KeyPart{ + {1, []byte{0x01, 0x02}}, + {2, []byte{0x03, 0x04}}}, + }, + Value: value, + } + require.True(t, p.ValueEquals(p1)) + require.True(t, p.ValueEquals(p2)) + require.True(t, p.ValueEquals(p3)) + }) + + t.Run("same key different value", func(t *testing.T) { + key := Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}} + + p := &Payload{ + Key: key, + Value: []byte{0x03, 0x04}, + } + // p1.Value is nil + p1 := &Payload{ + Key: key, + } + // p2.Value is empty + p2 := &Payload{ + Key: key, + Value: []byte{}, + } + // p3.Value length is different + p3 := &Payload{ + Key: key, + Value: []byte{0x03}, + } + // p4.Value data is different + p4 := &Payload{ + Key: key, + Value: []byte{0x03, 0x05}, + } + require.False(t, p.ValueEquals(p1)) + require.False(t, p.ValueEquals(p2)) + require.False(t, p.ValueEquals(p3)) + require.False(t, p.ValueEquals(p4)) + }) + + t.Run("same key same value", func(t *testing.T) { + p1 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + p2 := &Payload{ + Key: Key{KeyParts: []KeyPart{{1, []byte{0x01, 0x02}}}}, + Value: []byte{0x03, 0x04}, + } + require.True(t, p1.ValueEquals(p2)) + require.True(t, p2.ValueEquals(p1)) + }) +} diff --git a/module/chunks/chunkVerifier.go b/module/chunks/chunkVerifier.go index 63a7ccdc83a..2f249eb7155 100644 --- a/module/chunks/chunkVerifier.go +++ b/module/chunks/chunkVerifier.go @@ -133,13 +133,13 @@ func (fcv *ChunkVerifier) verifyTransactionsInContext(context fvm.Context, chunk registerKey := executionState.RegisterIDToKey(registerID) - query, err := ledger.NewQuery(ledger.State(chunkDataPack.StartState), []ledger.Key{registerKey}) + query, err := ledger.NewQuerySingleValue(ledger.State(chunkDataPack.StartState), registerKey) if err != nil { return nil, fmt.Errorf("cannot create query: %w", err) } - values, err := psmt.Get(query) + value, err := psmt.GetSingleValue(query) if err != nil { if errors.Is(err, ledger.ErrMissingKeys{}) { @@ -158,7 +158,7 @@ func (fcv *ChunkVerifier) verifyTransactionsInContext(context fvm.Context, chunk return nil, fmt.Errorf("cannot query register: %w", err) } - return values[0], nil + return value, nil } chunkView := delta.NewView(getRegister) diff --git a/module/component/component.go b/module/component/component.go index 07502a58626..f296a7a25ec 100644 --- a/module/component/component.go +++ b/module/component/component.go @@ -32,7 +32,7 @@ type ComponentFactory func() (Component, error) // Before returning, it could also: // - panic (in canary / benchmark) // - log in various Error channels and / or send telemetry ... -type OnError = func(err error) ErrorHandlingResult +type OnError = func(error) ErrorHandlingResult type ErrorHandlingResult int @@ -218,14 +218,16 @@ func (c *ComponentManager) Start(parent irrecoverable.SignalerContext) { // goroutine and the parent's are scheduled. If the parent is scheduled first, any errors // thrown within workers would not have propagated, and it would only receive the done signal defer func() { + cancel() // shutdown all workers + // wait for shutdown signal before signalling the component is done + // this guarantees that ShutdownSignal is closed before Done + <-c.shutdownSignal <-c.workersDone close(c.done) }() // wait until the workersDone channel is closed or an irrecoverable error is encountered if err := util.WaitError(errChan, c.workersDone); err != nil { - cancel() // shutdown all workers - // propagate the error directly to the parent because a failure in a worker routine // is considered irrecoverable parent.Throw(err) diff --git a/module/component/component_manager_test.go b/module/component/component_manager_test.go index 7871e47e232..fc99ca92af3 100644 --- a/module/component/component_manager_test.go +++ b/module/component/component_manager_test.go @@ -17,7 +17,7 @@ import ( "github.com/onflow/flow-go/utils/unittest" ) -const CHANNEL_CLOSE_LATENCY_ALLOWANCE = 20 * time.Millisecond +const CHANNEL_CLOSE_LATENCY_ALLOWANCE = 25 * time.Millisecond type WorkerState int @@ -627,3 +627,31 @@ func TestComponentManager(t *testing.T) { rapid.Check(t, rapid.Run(&ComponentManagerMachine{})) } + +func TestComponentManagerShutdown(t *testing.T) { + mgr := component.NewComponentManagerBuilder(). + AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + <-ctx.Done() + }).Build() + + parent, cancel := context.WithCancel(context.Background()) + ctx, _ := irrecoverable.WithSignaler(parent) + + mgr.Start(ctx) + unittest.AssertClosesBefore(t, mgr.Ready(), 10*time.Millisecond) + cancel() + + // ShutdownSignal indicates we have started shutdown, Done indicates we have completed + // shutdown. If we have completed shutdown, we must have started shutdown. + unittest.AssertClosesBefore(t, mgr.Done(), 10*time.Millisecond) + closed := util.CheckClosed(mgr.ShutdownSignal()) + assert.True(t, closed) +} + +// run the test many times to reproduce consistently +func TestComponentManagerShutdown_100(t *testing.T) { + for i := 0; i < 100; i++ { + TestComponentManagerShutdown(t) + } +} diff --git a/module/dkg/client_test.go b/module/dkg/client_test.go index 3f52feb4e15..8ad5f7048c5 100644 --- a/module/dkg/client_test.go +++ b/module/dkg/client_test.go @@ -334,6 +334,7 @@ func (s *ClientSuite) executeScript(script []byte, arguments [][]byte) cadence.V // execute script result, err := s.blockchain.ExecuteScript(script, arguments) require.NoError(s.T(), err) + require.NoError(s.T(), result.Error) require.True(s.T(), result.Succeeded()) return result.Value diff --git a/module/irrecoverable/irrecoverable_example_test.go b/module/irrecoverable/irrecoverable_example_test.go index 5ab164b3431..28fd6fed7cc 100644 --- a/module/irrecoverable/irrecoverable_example_test.go +++ b/module/irrecoverable/irrecoverable_example_test.go @@ -42,7 +42,6 @@ func Example() { // depending on the circumstances return component.ErrorHandlingStop } - } // run the component. this is a blocking call, and will return with an error if the diff --git a/module/jobqueue.go b/module/jobqueue.go index 2e5c62226f1..61bb8a9a3b3 100644 --- a/module/jobqueue.go +++ b/module/jobqueue.go @@ -7,6 +7,9 @@ import ( const ( ConsumeProgressVerificationBlockHeight = "ConsumeProgressVerificationBlockHeight" ConsumeProgressVerificationChunkIndex = "ConsumeProgressVerificationChunkIndex" + + ConsumeProgressExecutionDataRequesterBlockHeight = "ConsumeProgressExecutionDataRequesterBlockHeight" + ConsumeProgressExecutionDataRequesterNotification = "ConsumeProgressExecutionDataRequesterNotification" ) // JobID is a unique ID of the job. @@ -32,6 +35,9 @@ type JobConsumer interface { // It blocks until the existing worker finish processing the job Stop() + // LastProcessedIndex returns the last processed job index + LastProcessedIndex() uint64 + // NotifyJobIsDone let the consumer know a job has been finished, so that consumer will take // the next job from the job queue if there are workers available. It returns the last processed job index. NotifyJobIsDone(JobID) uint64 diff --git a/module/jobqueue/README.md b/module/jobqueue/README.md new file mode 100644 index 00000000000..15562a89703 --- /dev/null +++ b/module/jobqueue/README.md @@ -0,0 +1,85 @@ +## JobQueue Design Goal + +The jobqueue package implemented a reusable job queue system for async message processing. + +The most common use case is to work on each finalized block async. + +For instance, verification nodes must verify each finalized block. This needs to happen async, otherwise a verification node might get overwhelmed during periods when a large amount of blocks are finalized quickly (e.g. when a node comes back online and is catching up from other peers). + +So the goal for the jobqueue system are: +1. guarantee each job (i.e. finalized block) will be processed eventually +2. in the event of a crash failure, the jobqueue state is persisted and workers can be rescheduled so that no job is skipped. +3. allow concurrent processing of multiple jobs +4. the number of concurrent workers is configurable so that the node won't get overwhelmed when too many jobs are created (i.e. too many blocks are finalized in a short period of time) + +## JobQueue components +To achieve the above goal, the jobqueue system contains the following components/interfaces: +1. A `Jobs` module to find jobs by job index +2. A `storage.ConsumerProgress` to store job processing progress +3. A `Worker` module to process jobs and report job completion. +4. A `Consumer` that orchestrates the job processing by finding new jobs, creating workers for each job using the above modules, and managing job processing status internally. + +### Using module.Jobs to find jobs +There is no JobProducer in jobqueue design. Job queue assumes each job can be indexed by a uint64 value, just like each finalized block (or sealed block) can be indexed by block height. + +Let's just call this uint64 value "Job Index" or index. + +So if we iterate through each index from low to high, and find each job by index, then we are able to iterate through each job. + +Therefore modules.Job interface abstracts it into a method: `AtIndex`. + +`AtIndex` method returns the job at any given index. + +Job consumer relies on the modules.Jobs to find jobs. However, modules.Jobs doesn't provide a way to notify as soon as a new job is available. So it's consumer's job to keep track of the values returned by module.Jobs's `Head` method and find jobs that are new. + +### Using Check method to notify job consumer for checking new jobs +Job consumer provides the `Check` method for users to notify new jobs available. + +Once called, job consumer will iterate through each height with the `AtIndex` method. It stops when one of the following condition is true: +1. no job was found at a index +2. no more workers to work on them, which is limitted by the config item `maxProcessing` + +`Check` method is concurrent safe, meaning even if job consumer is notified concurrently about new jobs available, job consumer will check at most once to find new jobs. + +Whenever a worker finishes a job, job consumer will also call `Check` internally. + +### Storing job consuming progress in storage.ConsumerProgress +Job consumer stores the last processed job index in `storage.ConsumerProgress`, so that on startup, the job consumer can read the last processed job index from storage and compare with the last available job index from `module.Jobs`'s `Head` method to resume job processing. + +This ensures each job will be processed at least once. Note: given the at least once execution, the `Worker` should gracefully handle duplicate runs of the same job. + +### Using Workers to work on each job + +When Job consumer finds a new job, it uses an implementation of the `Worker` interface to process each job. The `Worker`s `Run` method accepts a `module.Job` interface. So it's the user's responsibility to handle the conversion between `module.Job` and the underlying data type. + +In the scenario of processing finalized blocks, implementing symmetric functions like BlockToJob and JobToBlock are recommended for this conversion. + +In order to report job completion, the worker needs to call job consumer's `NotifyJobIsDone` method. + +### Error handling +Job queue doesn't allow job to fail, because job queue has to guarantee any job below the last processed job index has been finished successfully. Leaving a gap is not accpeted. + +Therefore, if a worker fails to process a job, it should retry by itself, or just crash. + +Note, Worker should not log the error and report the job is completed, because that would change the last processed job index, and will not have the chance to retry that job. + + +## Pipeline Pattern +Multiple jobqueues can be combined to form a pipeline. This is useful in the scenario that the first job queue will process each finalized block and create jobs to process data depending on the block, and having the second job queue to process each job created by the worker of the first job queue. + +For instance, verification node uses 2-jobqueue pipeline to find chunks from each block and create jobs if the block has chunks that it needs to verify, and the second job queue will allow verification node to verify each chunk with a max number of workers. + +## Considerations + +### Push vs Pull +The jobqueue architecture is optimized for "pull" style processes, where the job producer simply notify the job consumer about new jobs without creating any job, and job consumer pulls jobs from a source when workers are available. All current implementations are using this pull style since it lends well to asynchronously processing jobs based on block heights. + +Some use cases might require "push" style jobs where there is a job producer that create new jobs, and a consumer that processes work from the producer. This is possible with the jobqueue, but requires the producer persist the jobs to a database, then implement the `Head` and `AtIndex` methods that allow accessing jobs by sequential `uint64` indexes. + +### TODOs +1. Jobs at different index are processed in parallel, it's possible that there is a job takes a long time to work on, and causing too many completed jobs cached in memory before being used to update the the last processed job index. + `maxSearchAhead` will allow the job consumer to stop consume more blocks if too many jobs are completed, but the job at index lastProcesssed + 1 has not been unprocessed yet. + The difference between `maxSearchAhead` and `maxProcessing` is that: `maxProcessing` allows at most `maxProcessing` number of works to process jobs. However, even if there is worker available, it might not be assigned to a job, because the job at index lastProcesssed +1 has not been done, it won't work on an job with index higher than `lastProcesssed + maxSearchAhead`. +2. accept callback to get notified when the consecutive job index is finished. +3. implement ReadyDoneAware interface + diff --git a/module/jobqueue/component_consumer.go b/module/jobqueue/component_consumer.go new file mode 100644 index 00000000000..b66e7802b8d --- /dev/null +++ b/module/jobqueue/component_consumer.go @@ -0,0 +1,152 @@ +package jobqueue + +import ( + "fmt" + + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/util" + "github.com/onflow/flow-go/storage" +) + +type ComponentConsumer struct { + component.Component + + cm *component.ComponentManager + consumer module.JobConsumer + jobs module.Jobs + workSignal <-chan struct{} + preNotifier NotifyDone + postNotifier NotifyDone + log zerolog.Logger +} + +// NewComponentConsumer creates a new ComponentConsumer consumer +func NewComponentConsumer( + log zerolog.Logger, + workSignal <-chan struct{}, + progress storage.ConsumerProgress, + jobs module.Jobs, + defaultIndex uint64, + processor JobProcessor, // method used to process jobs + maxProcessing uint64, + maxSearchAhead uint64, +) *ComponentConsumer { + + c := &ComponentConsumer{ + workSignal: workSignal, + jobs: jobs, + log: log, + } + + // create a worker pool with maxProcessing workers to process jobs + worker := NewWorkerPool( + processor, + func(id module.JobID) { c.NotifyJobIsDone(id) }, + maxProcessing, + ) + c.consumer = NewConsumer(c.log, c.jobs, progress, worker, maxProcessing, maxSearchAhead) + + builder := component.NewComponentManagerBuilder(). + AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + worker.Start(ctx) + if err := util.WaitClosed(ctx, worker.Ready()); err != nil { + c.log.Info().Msg("job consumer startup aborted") + <-worker.Done() + c.log.Info().Msg("job consumer shutdown complete") + return + } + + c.log.Info().Msg("job consumer starting") + err := c.consumer.Start(defaultIndex) + if err != nil { + ctx.Throw(fmt.Errorf("could not start consumer: %w", err)) + } + + ready() + + <-ctx.Done() + c.log.Info().Msg("job consumer shutdown started") + + // blocks until all running jobs have stopped + c.consumer.Stop() + + <-worker.Done() + c.log.Info().Msg("job consumer shutdown complete") + }). + AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + c.processingLoop(ctx) + }) + + cm := builder.Build() + c.cm = cm + c.Component = cm + + return c +} + +// SetPreNotifier sets a notification function that is invoked before marking a job as done in the +// consumer. +// +// Note: This guarantees that the function is called at least once for each job, but may be executed +// before consumer updates the last processed index. +func (c *ComponentConsumer) SetPreNotifier(fn NotifyDone) { + c.preNotifier = fn +} + +// SetPostNotifier sets a notification function that is invoked after marking a job as done in the +// consumer. +// +// Note: This guarantees that the function is executed after consumer updates the last processed index, +// but notifications may be missed in the event of a crash. +func (c *ComponentConsumer) SetPostNotifier(fn NotifyDone) { + c.postNotifier = fn +} + +// NotifyJobIsDone is invoked by the worker to let the consumer know that it is done +// processing a (block) job. +func (c *ComponentConsumer) NotifyJobIsDone(jobID module.JobID) uint64 { + if c.preNotifier != nil { + c.preNotifier(jobID) + } + + // notify wrapped consumer that job is complete + processedIndex := c.consumer.NotifyJobIsDone(jobID) + + if c.postNotifier != nil { + c.postNotifier(jobID) + } + + return processedIndex +} + +// Size returns number of in-memory block jobs that block consumer is processing. +func (c *ComponentConsumer) Size() uint { + return c.consumer.Size() +} + +// Head returns the highest job index available +func (c *ComponentConsumer) Head() (uint64, error) { + return c.jobs.Head() +} + +// LastProcessedIndex returns the last processed job index +func (c *ComponentConsumer) LastProcessedIndex() uint64 { + return c.consumer.LastProcessedIndex() +} + +func (c *ComponentConsumer) processingLoop(ctx irrecoverable.SignalerContext) { + c.log.Debug().Msg("listening for new jobs") + for { + select { + case <-ctx.Done(): + return + case <-c.workSignal: + c.consumer.Check() + } + } +} diff --git a/module/jobqueue/component_consumer_test.go b/module/jobqueue/component_consumer_test.go new file mode 100644 index 00000000000..4fdbb2468b3 --- /dev/null +++ b/module/jobqueue/component_consumer_test.go @@ -0,0 +1,300 @@ +package jobqueue + +import ( + "context" + "fmt" + "os" + "sync" + "testing" + "time" + + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/irrecoverable" + modulemock "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/storage" + storagemock "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/unittest" +) + +type ComponentConsumerSuite struct { + suite.Suite + + defaultIndex uint64 + maxProcessing uint64 + maxSearchAhead uint64 + + progress *storagemock.ConsumerProgress +} + +func TestComponentConsumerSuite(t *testing.T) { + t.Parallel() + suite.Run(t, new(ComponentConsumerSuite)) +} + +func (suite *ComponentConsumerSuite) SetupTest() { + suite.defaultIndex = uint64(0) + suite.maxProcessing = uint64(2) + suite.maxSearchAhead = uint64(5) + + suite.progress = new(storagemock.ConsumerProgress) +} + +func mockJobs(data map[uint64]TestJob) *modulemock.Jobs { + jobs := new(modulemock.Jobs) + + jobs.On("AtIndex", mock.AnythingOfType("uint64")).Return( + func(index uint64) module.Job { + job, ok := data[index] + if !ok { + return nil + } + return job + }, + func(index uint64) error { + _, ok := data[index] + if !ok { + return storage.ErrNotFound + } + return nil + }, + ) + + return jobs +} + +func mockProgress() *storagemock.ConsumerProgress { + progress := new(storagemock.ConsumerProgress) + progress.On("ProcessedIndex").Return(uint64(0), nil) + progress.On("SetProcessedIndex", mock.AnythingOfType("uint64")).Return(nil) + + return progress +} + +func generateTestData(jobCount uint64) map[uint64]TestJob { + jobData := make(map[uint64]TestJob, jobCount) + + for i := uint64(1); i <= jobCount; i++ { + jobData[i] = TestJob{i} + } + + return jobData +} + +func (suite *ComponentConsumerSuite) prepareTest( + processor JobProcessor, + preNotifier NotifyDone, + postNotifier NotifyDone, + jobData map[uint64]TestJob, +) (*ComponentConsumer, chan struct{}) { + + jobs := mockJobs(jobData) + workSignal := make(chan struct{}) + progress := mockProgress() + + consumer := NewComponentConsumer( + zerolog.New(os.Stdout).With().Timestamp().Logger(), + workSignal, + progress, + jobs, + suite.defaultIndex, + processor, + suite.maxProcessing, + suite.maxSearchAhead, + ) + consumer.SetPreNotifier(preNotifier) + consumer.SetPostNotifier(postNotifier) + + return consumer, workSignal +} + +// TestHappyPath: +// - processes jobs until cancelled +// - notify called for all jobs +func (suite *ComponentConsumerSuite) TestHappyPath() { + testCtx, testCancel := context.WithCancel(context.Background()) + defer testCancel() + + testJobsCount := uint64(20) + jobData := generateTestData(testJobsCount) + finishedJobs := make(map[uint64]bool, testJobsCount) + + wg := sync.WaitGroup{} + + processor := func(_ irrecoverable.SignalerContext, _ module.Job, complete func()) { complete() } + notifier := func(jobID module.JobID) { + defer wg.Done() + + index, err := JobIDToIndex(jobID) + assert.NoError(suite.T(), err) + + finishedJobs[index] = true + + suite.T().Logf("job %d finished", index) + } + + suite.Run("runs and notifies using pre-notifier", func() { + wg.Add(int(testJobsCount)) + consumer, workSignal := suite.prepareTest(processor, nil, notifier, jobData) + + suite.runTest(testCtx, consumer, workSignal, func() { + workSignal <- struct{}{} + wg.Wait() + }) + + // verify all jobs were run + assert.Len(suite.T(), finishedJobs, len(jobData)) + for index := range jobData { + assert.True(suite.T(), finishedJobs[index], "job %d did not finished", index) + } + }) + + suite.Run("runs and notifies using post-notifier", func() { + wg.Add(int(testJobsCount)) + consumer, workSignal := suite.prepareTest(processor, notifier, nil, jobData) + + suite.runTest(testCtx, consumer, workSignal, func() { + workSignal <- struct{}{} + wg.Wait() + }) + + // verify all jobs were run + assert.Len(suite.T(), finishedJobs, len(jobData)) + for index := range jobData { + assert.True(suite.T(), finishedJobs[index], "job %d did not finished", index) + } + }) +} + +// TestProgressesOnComplete: +// - only processes next job after complete is called +func (suite *ComponentConsumerSuite) TestProgressesOnComplete() { + testCtx, testCancel := context.WithCancel(context.Background()) + defer testCancel() + + stopIndex := uint64(10) + testJobsCount := uint64(11) + jobData := generateTestData(testJobsCount) + finishedJobs := make(map[uint64]bool, testJobsCount) + + done := make(chan struct{}) + + processor := func(_ irrecoverable.SignalerContext, job module.Job, complete func()) { + index, err := JobIDToIndex(job.ID()) + assert.NoError(suite.T(), err) + + if index <= stopIndex { + complete() + } + } + notifier := func(jobID module.JobID) { + index, err := JobIDToIndex(jobID) + assert.NoError(suite.T(), err) + + finishedJobs[index] = true + + suite.T().Logf("job %d finished", index) + if index == stopIndex+1 { + close(done) + } + } + + suite.maxProcessing = 1 + consumer, workSignal := suite.prepareTest(processor, nil, notifier, jobData) + + suite.runTest(testCtx, consumer, workSignal, func() { + workSignal <- struct{}{} + unittest.RequireNeverClosedWithin(suite.T(), done, 50*time.Millisecond, fmt.Sprintf("job %d wasn't supposed to finish", stopIndex+1)) + }) + + // verify all jobs were run + assert.Len(suite.T(), finishedJobs, int(stopIndex)) + for index := range finishedJobs { + assert.LessOrEqual(suite.T(), index, stopIndex) + } +} + +// TestPassesIrrecoverableErrors: +// - throws an irrecoverable error +// - verifies no jobs were processed +func (suite *ComponentConsumerSuite) TestPassesIrrecoverableErrors() { + testCtx, testCancel := context.WithCancel(context.Background()) + defer testCancel() + + testJobsCount := uint64(10) + jobData := generateTestData(testJobsCount) + done := make(chan struct{}) + + expectedErr := fmt.Errorf("test failure") + + // always throws an error + processor := func(ctx irrecoverable.SignalerContext, job module.Job, _ func()) { + ctx.Throw(expectedErr) + } + + // never expects a job + notifier := func(jobID module.JobID) { + suite.T().Logf("job %s finished unexpectedly", jobID) + close(done) + } + + consumer, _ := suite.prepareTest(processor, nil, notifier, jobData) + + ctx, cancel := context.WithCancel(testCtx) + signalCtx, errChan := irrecoverable.WithSignaler(ctx) + + consumer.Start(signalCtx) + unittest.RequireCloseBefore(suite.T(), consumer.Ready(), 10*time.Millisecond, "timeout waiting for consumer to be ready") + + // send job signal, then wait for the irrecoverable error + // don't need to sent signal since the worker is kicked off by Start() + select { + case <-ctx.Done(): + suite.T().Errorf("expected irrecoverable error, but got none") + case err := <-errChan: + assert.ErrorIs(suite.T(), err, expectedErr) + } + + // shutdown + cancel() + unittest.RequireCloseBefore(suite.T(), consumer.Done(), 10*time.Millisecond, "timeout waiting for consumer to be done") + + // no notification should have been sent + unittest.RequireNotClosed(suite.T(), done, "job wasn't supposed to finish") +} + +func (suite *ComponentConsumerSuite) runTest( + testCtx context.Context, + consumer *ComponentConsumer, + workSignal chan<- struct{}, + sendJobs func(), +) { + ctx, cancel := context.WithCancel(testCtx) + signalCtx, errChan := irrecoverable.WithSignaler(ctx) + + // use global context so we listen for errors until the test is finished + go irrecoverableNotExpected(suite.T(), testCtx, errChan) + + consumer.Start(signalCtx) + unittest.RequireCloseBefore(suite.T(), consumer.Ready(), 10*time.Millisecond, "timeout waiting for consumer to be ready") + + sendJobs() + + // shutdown + cancel() + unittest.RequireCloseBefore(suite.T(), consumer.Done(), 10*time.Millisecond, "timeout waiting for consumer to be done") +} + +func irrecoverableNotExpected(t *testing.T, ctx context.Context, errChan <-chan error) { + select { + case <-ctx.Done(): + return + case err := <-errChan: + require.NoError(t, err, "unexpected irrecoverable error") + } +} diff --git a/module/jobqueue/consumer.go b/module/jobqueue/consumer.go index bdbf2324d13..9c23375e62d 100644 --- a/module/jobqueue/consumer.go +++ b/module/jobqueue/consumer.go @@ -29,7 +29,8 @@ type Consumer struct { worker Worker // to process job and notify consumer when finish processing a job // Config - maxProcessing uint64 // max number of jobs to be processed concurrently + maxProcessing uint64 // max number of jobs to be processed concurrently + maxSearchAhead uint64 // max number of jobs beyond processedIndex to process. 0 means no limit // State Variables running bool // a signal to control whether to start processing more jobs. Useful for waiting @@ -50,6 +51,7 @@ func NewConsumer( progress storage.ConsumerProgress, worker Worker, maxProcessing uint64, + maxSearchAhead uint64, ) *Consumer { return &Consumer{ log: log.With().Str("sub_module", "job_queue").Logger(), @@ -60,7 +62,8 @@ func NewConsumer( worker: worker, // update config - maxProcessing: maxProcessing, + maxProcessing: maxProcessing, + maxSearchAhead: maxSearchAhead, // init state variables running: false, @@ -130,9 +133,20 @@ func (c *Consumer) Stop() { // Size returns number of in-memory jobs that consumer is processing. func (c *Consumer) Size() uint { + c.mu.Lock() + defer c.mu.Unlock() + return uint(len(c.processings)) } +// LastProcessedIndex returns the last processed job index +func (c *Consumer) LastProcessedIndex() uint64 { + c.mu.Lock() + defer c.mu.Unlock() + + return c.processedIndex +} + // NotifyJobIsDone let the consumer know a job has been finished, so that consumer will take // the next job from the job queue if there are workers available. It returns the last processed job index. func (c *Consumer) NotifyJobIsDone(jobID module.JobID) uint64 { @@ -247,6 +261,7 @@ func (c *Consumer) processableJobs() ([]*jobAtIndex, uint64, error) { c.jobs, c.processings, c.maxProcessing, + c.maxSearchAhead, c.processedIndex, ) @@ -266,7 +281,7 @@ func (c *Consumer) processableJobs() ([]*jobAtIndex, uint64, error) { // processableJobs check the worker's capacity and if sufficient, read // jobs from the storage, return the processable jobs, and the processed // index -func processableJobs(jobs module.Jobs, processings map[uint64]*jobStatus, maxProcessing uint64, processedIndex uint64) ([]*jobAtIndex, uint64, +func processableJobs(jobs module.Jobs, processings map[uint64]*jobStatus, maxProcessing uint64, maxSearchAhead uint64, processedIndex uint64) ([]*jobAtIndex, uint64, error) { processables := make([]*jobAtIndex, 0) @@ -274,8 +289,18 @@ func processableJobs(jobs module.Jobs, processings map[uint64]*jobStatus, maxPro // in order to decide whether to process a new job processing := uint64(0) + // determine if the consumer should pause processing new jobs because it's too far ahead of + // the lowest in progress index + shouldPause := func(index uint64) bool { + if maxSearchAhead == 0 { + return false + } + + return index-processedIndex > maxSearchAhead + } + // if still have processing capacity, find the next processable job - for i := processedIndex + 1; processing < maxProcessing; i++ { + for i := processedIndex + 1; processing < maxProcessing && !shouldPause(i); i++ { status, ok := processings[i] // if no worker is processing the next job, try to read it and process diff --git a/module/jobqueue/consumer_behavior_test.go b/module/jobqueue/consumer_behavior_test.go index dd526525220..1fac55faa96 100644 --- a/module/jobqueue/consumer_behavior_test.go +++ b/module/jobqueue/consumer_behavior_test.go @@ -51,6 +51,10 @@ func TestConsumer(t *testing.T) { // when more jobs are arrived than the max number of workers, only the first 3 jobs will be processed t.Run("testMaxWorker", testMaxWorker) + // [+1, +2, +3, +4, +5, +6] => [0#, !1, *2, *3, *4, *5, 6, +7] => [0#, *1, *2, *3, *4, *5, !6, !7] + // when processing lags behind, the consumer is paused until processing catches up + t.Run("testPauseResume", testPauseResume) + // [+1, +2, +3, +4, 3*] => [0#, 1!, 2!, 3*, 4!] // when job 3 is finished, which is not the next processing job 1, the processed index won't change t.Run("testNonNextFinished", testNonNextFinished) @@ -176,6 +180,52 @@ func testMaxWorker(t *testing.T) { }) } +// [+1, +2, +3, +4, +5, +6] => [0#, !1, *2, *3, *4, *5, 6, +7] => [0#, *1, *2, *3, *4, *5, !6, !7] +// when processing lags behind, the consumer is paused until processing catches up +func testPauseResume(t *testing.T) { + runWithSeatchAhead(t, 5, func(c module.JobConsumer, cp storage.ConsumerProgress, w *mockWorker, j *jobqueue.MockJobs, db *badgerdb.DB) { + require.NoError(t, c.Start(DefaultIndex)) + require.NoError(t, j.PushOne()) // +1 + c.Check() + + require.NoError(t, j.PushOne()) // +2 + c.Check() + + require.NoError(t, j.PushOne()) // +3 + c.Check() + + require.NoError(t, j.PushOne()) // +4 + c.Check() + + require.NoError(t, j.PushOne()) // +5 + c.Check() + time.Sleep(1 * time.Millisecond) + c.NotifyJobIsDone(jobqueue.JobIDAtIndex(2)) // 2* + c.NotifyJobIsDone(jobqueue.JobIDAtIndex(3)) // 3* + c.NotifyJobIsDone(jobqueue.JobIDAtIndex(4)) // 4* + c.NotifyJobIsDone(jobqueue.JobIDAtIndex(5)) // 5* + + require.NoError(t, j.PushOne()) // +6 + c.Check() + + time.Sleep(1 * time.Millisecond) + + // all jobs so far are processed, except 1 and 6 + w.AssertCalled(t, []int64{1, 2, 3, 4, 5}) + assertProcessed(t, cp, 0) + + require.NoError(t, j.PushOne()) // +7 + c.NotifyJobIsDone(jobqueue.JobIDAtIndex(1)) // 1* + c.Check() + + time.Sleep(1 * time.Millisecond) + + // processing resumed after job 1 finished + w.AssertCalled(t, []int64{1, 2, 3, 4, 5, 6, 7}) + assertProcessed(t, cp, 5) + }) +} + // [+1, +2, +3, +4, 3*] => [0#, 1!, 2!, 3*, 4!] // when job 3 is finished, which is not the next processing job 1, the processed index won't change func testNonNextFinished(t *testing.T) { @@ -420,7 +470,7 @@ func testWorkOnNextAfterFastforward(t *testing.T) { // jobs need to be reused, since it stores all the jobs reWorker := newMockWorker() reProgress := badger.NewConsumerProgress(db, ConsumerTag) - reConsumer := newTestConsumer(reProgress, j, reWorker) + reConsumer := newTestConsumer(reProgress, j, reWorker, 0) err := reConsumer.Start(DefaultIndex) require.NoError(t, err) @@ -500,11 +550,15 @@ type JobID = module.JobID type Job = module.Job func runWith(t testing.TB, runTestWith func(module.JobConsumer, storage.ConsumerProgress, *mockWorker, *jobqueue.MockJobs, *badgerdb.DB)) { + runWithSeatchAhead(t, 0, runTestWith) +} + +func runWithSeatchAhead(t testing.TB, maxSearchAhead uint64, runTestWith func(module.JobConsumer, storage.ConsumerProgress, *mockWorker, *jobqueue.MockJobs, *badgerdb.DB)) { unittest.RunWithBadgerDB(t, func(db *badgerdb.DB) { jobs := jobqueue.NewMockJobs() worker := newMockWorker() progress := badger.NewConsumerProgress(db, ConsumerTag) - consumer := newTestConsumer(progress, jobs, worker) + consumer := newTestConsumer(progress, jobs, worker, maxSearchAhead) runTestWith(consumer, progress, worker, jobs, db) }) } @@ -515,11 +569,10 @@ func assertProcessed(t testing.TB, cp storage.ConsumerProgress, expectProcessed require.Equal(t, expectProcessed, processed) } -func newTestConsumer(cp storage.ConsumerProgress, jobs module.Jobs, worker jobqueue.Worker) module.JobConsumer { +func newTestConsumer(cp storage.ConsumerProgress, jobs module.Jobs, worker jobqueue.Worker, maxSearchAhead uint64) module.JobConsumer { log := unittest.Logger().With().Str("module", "consumer").Logger() maxProcessing := uint64(3) - c := jobqueue.NewConsumer(log, jobs, cp, worker, maxProcessing) - return c + return jobqueue.NewConsumer(log, jobs, cp, worker, maxProcessing, maxSearchAhead) } // a Mock worker that stores all the jobs that it was asked to work on diff --git a/module/jobqueue/consumer_test.go b/module/jobqueue/consumer_test.go index d8ed20a2c75..1b3e6aab927 100644 --- a/module/jobqueue/consumer_test.go +++ b/module/jobqueue/consumer_test.go @@ -2,6 +2,7 @@ package jobqueue import ( "fmt" + "strconv" "sync" "testing" "time" @@ -19,14 +20,28 @@ import ( func TestProcessableJobs(t *testing.T) { t.Parallel() + processedIndex := uint64(2) maxProcessing := uint64(3) + maxSearchAhead := uint64(5) + + populate := func(start, end uint64, incomplete []uint64) map[uint64]*jobStatus { + processings := map[uint64]*jobStatus{} + for i := start; i <= end; i++ { + processings[i] = &jobStatus{jobID: JobIDAtIndex(i), done: true} + } + for _, i := range incomplete { + processings[i].done = false + } + + return processings + } t.Run("no job, nothing to process", func(t *testing.T) { jobs := NewMockJobs() // no job in the queue processings := map[uint64]*jobStatus{} processedIndex := uint64(0) - jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, processedIndex) + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, 0, processedIndex) require.NoError(t, err) require.Equal(t, uint64(0), processedTo) @@ -36,19 +51,12 @@ func TestProcessableJobs(t *testing.T) { t.Run("max processing was not reached", func(t *testing.T) { jobs := NewMockJobs() require.NoError(t, jobs.PushN(20)) // enough jobs in the queue - processings := map[uint64]*jobStatus{} - for i := uint64(3); i <= 11; i++ { - // job 3 are 5 are not done, 2 processing in total - // 4, 6, 7, 8, 9, 10, 11 are finished, 7 finished in total - done := true - if i == 3 || i == 5 { - done = false - } - processings[i] = &jobStatus{jobID: JobIDAtIndex(i), done: done} - } - processedIndex := uint64(2) - jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, processedIndex) + // job 3 are 5 are not done, 2 processing in total + // 4, 6, 7, 8, 9, 10, 11 are finished, 7 finished in total + processings := populate(3, 11, []uint64{3, 5}) + + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, 0, processedIndex) require.NoError(t, err) require.Equal(t, uint64(2), processedTo) @@ -61,18 +69,12 @@ func TestProcessableJobs(t *testing.T) { t.Run("reached max processing", func(t *testing.T) { jobs := NewMockJobs() require.NoError(t, jobs.PushN(20)) // enough jobs in the queue - processings := map[uint64]*jobStatus{} - for i := uint64(3); i <= 12; i++ { - // job 3, 5, 6 are not done, which have reached max processing(3) - // 4, 7, 8, 9, 10, 11, 12 are finished, 7 finished in total - done := true - if i == 3 || i == 5 || i == 6 { - done = false - } - processings[i] = &jobStatus{jobID: JobIDAtIndex(i), done: done} - } - processedIndex := uint64(2) - jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, processedIndex) + + // job 3, 5, 6 are not done, which have reached max processing(3) + // 4, 7, 8, 9, 10, 11, 12 are finished, 7 finished in total + processings := populate(3, 12, []uint64{3, 5, 6}) + + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, 0, processedIndex) require.NoError(t, err) require.Equal(t, uint64(2), processedTo) @@ -80,22 +82,53 @@ func TestProcessableJobs(t *testing.T) { assertJobs(t, []uint64{}, jobsToRun) }) + t.Run("processing pauses and resumes", func(t *testing.T) { + jobs := NewMockJobs() + require.NoError(t, jobs.PushN(20)) // enough jobs in the queue + + maxProcessing := uint64(4) + + // job 3, 5 are not done + // 4, 6, 7 are finished, 3 finished in total + processings := populate(3, processedIndex+maxSearchAhead, []uint64{3, 5}) + + // it will not process any job, because the consumer is paused + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, maxSearchAhead, processedIndex) + + require.NoError(t, err) + require.Equal(t, processedIndex, processedTo) + assertJobs(t, []uint64{}, jobsToRun) + + // lowest job is processed, which should cause consumer to resume + processings[uint64(3)].done = true + + // Job 3 is done, so it should return 2 more jobs 8-9 and pause again with one available worker + jobsToRun, processedTo, err = processableJobs(jobs, processings, maxProcessing, maxSearchAhead, processedIndex) + + require.NoError(t, err) + require.Equal(t, uint64(4), processedTo) + assertJobs(t, []uint64{8, 9}, jobsToRun) + + // lowest job is processed, which should cause consumer to resume + processings[uint64(5)].done = true + + // job 5 is processed, it should return jobs 8-11 (one job for each worker) + jobsToRun, processedTo, err = processableJobs(jobs, processings, maxProcessing, maxSearchAhead, processedIndex) + + require.NoError(t, err) + require.Equal(t, uint64(7), processedTo) + assertJobs(t, []uint64{8, 9, 10, 11}, jobsToRun) + }) + t.Run("no more job", func(t *testing.T) { jobs := NewMockJobs() require.NoError(t, jobs.PushN(11)) // 11 jobs, no more job to process - processings := map[uint64]*jobStatus{} - for i := uint64(3); i <= 11; i++ { - // job 3, 11 are not done, which have not reached max processing (3) - // 4, 5, 6, 7, 8, 9, 10 are finished, 7 finished in total - done := true - if i == 3 || i == 11 { - done = false - } - processings[i] = &jobStatus{jobID: JobIDAtIndex(i), done: done} - } - processedIndex := uint64(2) - jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, processedIndex) + // job 3, 11 are not done, which have not reached max processing (3) + // 4, 5, 6, 7, 8, 9, 10 are finished, 7 finished in total + processings := populate(3, 11, []uint64{3, 11}) + + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, 0, processedIndex) require.NoError(t, err) require.Equal(t, uint64(2), processedTo) @@ -105,19 +138,12 @@ func TestProcessableJobs(t *testing.T) { t.Run("next jobs were done", func(t *testing.T) { jobs := NewMockJobs() require.NoError(t, jobs.PushN(20)) // enough jobs in the queue - processings := map[uint64]*jobStatus{} - for i := uint64(3); i <= 6; i++ { - // job 3, 5 are done - // job 4, 6 are not done, which have not reached max processing - done := true - if i == 4 || i == 6 { - done = false - } - processings[i] = &jobStatus{jobID: JobIDAtIndex(i), done: done} - } - processedIndex := uint64(2) - jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, processedIndex) + // job 3, 5 are done + // job 4, 6 are not done, which have not reached max processing + processings := populate(3, 6, []uint64{4, 6}) + + jobsToRun, processedTo, err := processableJobs(jobs, processings, maxProcessing, 0, processedIndex) require.NoError(t, err) require.Equal(t, uint64(3), processedTo) @@ -137,7 +163,7 @@ func TestProcessedIndexDeletion(t *testing.T) { progress := badger.NewConsumerProgress(db, "consumer") worker := newMockWorker() maxProcessing := uint64(3) - c := NewConsumer(log, jobs, progress, worker, maxProcessing) + c := NewConsumer(log, jobs, progress, worker, maxProcessing, 0) worker.WithConsumer(c) f(c, jobs) @@ -256,6 +282,10 @@ func JobIDAtIndex(index uint64) module.JobID { return module.JobID(fmt.Sprintf("%v", index)) } +func JobIDToIndex(id module.JobID) (uint64, error) { + return strconv.ParseUint(string(id), 10, 64) +} + // JobMaker is a test helper. // it creates new job with unique job id type JobMaker struct { diff --git a/engine/verification/assigner/blockconsumer/reader.go b/module/jobqueue/finalized_block_reader.go similarity index 98% rename from engine/verification/assigner/blockconsumer/reader.go rename to module/jobqueue/finalized_block_reader.go index e6c0a071270..ac01838f4ef 100644 --- a/engine/verification/assigner/blockconsumer/reader.go +++ b/module/jobqueue/finalized_block_reader.go @@ -1,4 +1,4 @@ -package blockconsumer +package jobqueue import ( "fmt" diff --git a/engine/verification/assigner/blockconsumer/reader_test.go b/module/jobqueue/finalized_block_reader_test.go similarity index 81% rename from engine/verification/assigner/blockconsumer/reader_test.go rename to module/jobqueue/finalized_block_reader_test.go index d1e29e2ff4f..0fc5cf7b254 100644 --- a/engine/verification/assigner/blockconsumer/reader_test.go +++ b/module/jobqueue/finalized_block_reader_test.go @@ -1,4 +1,4 @@ -package blockconsumer_test +package jobqueue_test import ( "testing" @@ -7,9 +7,10 @@ import ( "github.com/stretchr/testify/require" "github.com/onflow/flow-go/engine/testutil" - "github.com/onflow/flow-go/engine/verification/assigner/blockconsumer" vertestutils "github.com/onflow/flow-go/engine/verification/utils/unittest" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/model/flow/filter" + "github.com/onflow/flow-go/module/jobqueue" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/module/trace" "github.com/onflow/flow-go/utils/unittest" @@ -18,7 +19,7 @@ import ( // TestBlockReader evaluates that block reader correctly reads stored finalized blocks from the blocks storage and // protocol state. func TestBlockReader(t *testing.T) { - withReader(t, 10, func(reader *blockconsumer.FinalizedBlockReader, blocks []*flow.Block) { + withReader(t, 10, func(reader *jobqueue.FinalizedBlockReader, blocks []*flow.Block) { // head of block reader should be the same height as the last block on the chain. head, err := reader.Head() require.NoError(t, err) @@ -30,7 +31,7 @@ func TestBlockReader(t *testing.T) { job, err := reader.AtIndex(index) require.NoError(t, err) - retrieved, err := blockconsumer.JobToBlock(job) + retrieved, err := jobqueue.JobToBlock(job) require.NoError(t, err) require.Equal(t, actual.ID(), retrieved.ID()) } @@ -43,7 +44,7 @@ func TestBlockReader(t *testing.T) { func withReader( t *testing.T, blockCount int, - withBlockReader func(*blockconsumer.FinalizedBlockReader, []*flow.Block), + withBlockReader func(*jobqueue.FinalizedBlockReader, []*flow.Block), ) { require.Equal(t, blockCount%2, 0, "block count for this test should be even") unittest.RunWithBadgerDB(t, func(db *badger.DB) { @@ -54,7 +55,7 @@ func withReader( rootSnapshot := unittest.RootSnapshotFixture(participants) s := testutil.CompleteStateFixture(t, collector, tracer, rootSnapshot) - reader := blockconsumer.NewFinalizedBlockReader(s.State, s.Storage.Blocks) + reader := jobqueue.NewFinalizedBlockReader(s.State, s.Storage.Blocks) // generates a chain of blocks in the form of root <- R1 <- C1 <- R2 <- C2 <- ... where Rs are distinct reference // blocks (i.e., containing guarantees), and Cs are container blocks for their preceding reference block, @@ -62,7 +63,8 @@ func withReader( // hold any guarantees. root, err := s.State.Params().Root() require.NoError(t, err) - results := vertestutils.CompleteExecutionReceiptChainFixture(t, root, blockCount/2) + clusterCommittee := participants.Filter(filter.HasRole(flow.RoleCollection)) + results := vertestutils.CompleteExecutionReceiptChainFixture(t, root, blockCount/2, vertestutils.WithClusterCommittee(clusterCommittee)) blocks := vertestutils.ExtendStateWithFinalizedBlocks(t, results, s.State) withBlockReader(reader, blocks) diff --git a/module/jobqueue/jobs.go b/module/jobqueue/jobs.go new file mode 100644 index 00000000000..a9b2a5a9e90 --- /dev/null +++ b/module/jobqueue/jobs.go @@ -0,0 +1,68 @@ +package jobqueue + +import ( + "fmt" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" +) + +// JobID returns the corresponding unique job id of the BlockJob for this job. +func JobID(blockID flow.Identifier) module.JobID { + return module.JobID(fmt.Sprintf("%v", blockID)) +} + +// BlockJob implements the Job interface. It converts a Block into a Job to be used by job queue. +// +// In current architecture, BlockJob represents a finalized block enqueued to be processed by the +// BlockConsumer that implements the JobQueue interface. +type BlockJob struct { + Block *flow.Block +} + +// ID converts block id into job id, which guarantees uniqueness. +func (j BlockJob) ID() module.JobID { + return JobID(j.Block.ID()) +} + +// JobToBlock converts a block job into its corresponding block. +func JobToBlock(job module.Job) (*flow.Block, error) { + blockJob, ok := job.(*BlockJob) + if !ok { + return nil, fmt.Errorf("could not assert job to block, job id: %x", job.ID()) + } + return blockJob.Block, nil +} + +// BlockToJob converts the block to a BlockJob. +func BlockToJob(block *flow.Block) *BlockJob { + return &BlockJob{Block: block} +} + +// BlockHeaderJob implements the Job interface. It converts a Block Header into a Job to be used by +// job queue. +// +// In current architecture, BlockHeaderJob represents a finalized block enqueued to be processed by +// a consumer that implements the JobQueue interface. +type BlockHeaderJob struct { + Header *flow.Header +} + +// ID converts block id into job id, which guarantees uniqueness. +func (j BlockHeaderJob) ID() module.JobID { + return JobID(j.Header.ID()) +} + +// JobToBlockHeader converts a block job into its corresponding block header. +func JobToBlockHeader(job module.Job) (*flow.Header, error) { + headerJob, ok := job.(*BlockHeaderJob) + if !ok { + return nil, fmt.Errorf("could not assert job to block header, job id: %x", job.ID()) + } + return headerJob.Header, nil +} + +// BlockHeaderToJob converts the block to a BlockHeaderJob. +func BlockHeaderToJob(header *flow.Header) *BlockHeaderJob { + return &BlockHeaderJob{Header: header} +} diff --git a/module/jobqueue/jobs_test.go b/module/jobqueue/jobs_test.go new file mode 100644 index 00000000000..b78489a756f --- /dev/null +++ b/module/jobqueue/jobs_test.go @@ -0,0 +1,77 @@ +package jobqueue_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/jobqueue" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestJobID(t *testing.T) { + fid := unittest.IdentifierFixture() + jobID := jobqueue.JobID(fid) + + assert.IsType(t, module.JobID(""), jobID) + assert.Equal(t, fid.String(), string(jobID)) +} + +func TestBlockJob(t *testing.T) { + block := unittest.BlockFixture() + job := jobqueue.BlockToJob(&block) + + t.Run("job is correct type", func(t *testing.T) { + assert.IsType(t, &jobqueue.BlockJob{}, job, "job is not a block job") + }) + + t.Run("job ID matches block ID", func(t *testing.T) { + jobID := jobqueue.JobID(block.ID()) + assert.Equal(t, job.ID(), jobID, "job ID is not the block ID") + }) + + t.Run("job converts to block", func(t *testing.T) { + b, err := jobqueue.JobToBlock(job) + assert.NoError(t, err, "unexpected error converting notify job to block") + assert.Equal(t, block, *b, "converted block is not the same as the original block") + }) + + t.Run("incorrect job type fails to convert to block", func(t *testing.T) { + e, err := jobqueue.JobToBlock(invalidJob{}) + assert.Error(t, err, "expected error converting invalidJob to block") + assert.Nil(t, e, "expected nil block") + }) +} + +func TestBlockHeaderJob(t *testing.T) { + block := unittest.BlockHeaderFixture() + job := jobqueue.BlockHeaderToJob(&block) + + t.Run("job is correct type", func(t *testing.T) { + assert.IsType(t, &jobqueue.BlockHeaderJob{}, job, "job is not a block job") + }) + + t.Run("job ID matches block ID", func(t *testing.T) { + jobID := jobqueue.JobID(block.ID()) + assert.Equal(t, job.ID(), jobID, "job ID is not the block ID") + }) + + t.Run("job converts to header", func(t *testing.T) { + b, err := jobqueue.JobToBlockHeader(job) + assert.NoError(t, err, "unexpected error converting notify job to header") + assert.Equal(t, block, *b, "converted header is not the same as the original header") + }) + + t.Run("incorrect job type fails to convert to header", func(t *testing.T) { + e, err := jobqueue.JobToBlockHeader(invalidJob{}) + assert.Error(t, err, "expected error converting invalidJob to header") + assert.Nil(t, e, "expected nil header") + }) +} + +type invalidJob struct{} + +func (j invalidJob) ID() module.JobID { + return "invalid" +} diff --git a/module/jobqueue/sealed_header_reader.go b/module/jobqueue/sealed_header_reader.go new file mode 100644 index 00000000000..d81508dbdc6 --- /dev/null +++ b/module/jobqueue/sealed_header_reader.go @@ -0,0 +1,66 @@ +package jobqueue + +import ( + "fmt" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/storage" +) + +// SealedBlockHeaderReader provides an abstraction for consumers to read blocks as job. +type SealedBlockHeaderReader struct { + state protocol.State + headers storage.Headers +} + +// NewSealedBlockHeaderReader creates and returns a SealedBlockHeaderReader. +func NewSealedBlockHeaderReader(state protocol.State, headers storage.Headers) *SealedBlockHeaderReader { + return &SealedBlockHeaderReader{ + state: state, + headers: headers, + } +} + +// AtIndex returns the block header job at the given index. +// The block header job at an index is just the finalized block header at that index (i.e., height). +func (r SealedBlockHeaderReader) AtIndex(index uint64) (module.Job, error) { + header, err := r.blockByHeight(index) + if err != nil { + return nil, fmt.Errorf("could not get block by index %v: %w", index, err) + } + + sealed, err := r.Head() + if err != nil { + return nil, fmt.Errorf("could not get last sealed block height: %w", err) + } + + if index > sealed { + // return not found error to indicate there is no job available at this height + return nil, fmt.Errorf("block at index %v is not sealed: %w", index, storage.ErrNotFound) + } + + // the block at height index is sealed + return BlockHeaderToJob(header), nil +} + +// blockByHeight returns the block at the given height. +func (r SealedBlockHeaderReader) blockByHeight(height uint64) (*flow.Header, error) { + block, err := r.headers.ByHeight(height) + if err != nil { + return nil, fmt.Errorf("could not get block by height %d: %w", height, err) + } + + return block, nil +} + +// Head returns the last sealed height as job index. +func (r SealedBlockHeaderReader) Head() (uint64, error) { + header, err := r.state.Sealed().Head() + if err != nil { + return 0, fmt.Errorf("could not get header of last sealed block: %w", err) + } + + return header.Height, nil +} diff --git a/module/jobqueue/sealed_header_reader_test.go b/module/jobqueue/sealed_header_reader_test.go new file mode 100644 index 00000000000..4f3c97a5385 --- /dev/null +++ b/module/jobqueue/sealed_header_reader_test.go @@ -0,0 +1,83 @@ +package jobqueue_test + +import ( + "testing" + + "github.com/dgraph-io/badger/v2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/jobqueue" + synctest "github.com/onflow/flow-go/module/state_synchronization/requester/unittest" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestSealedBlockHeaderReader evaluates that block reader correctly reads stored finalized blocks from the blocks storage and +// protocol state. +func TestSealedBlockHeaderReader(t *testing.T) { + RunWithReader(t, 10, func(reader *jobqueue.SealedBlockHeaderReader, blocks []*flow.Block) { + // the last block seals its parent + lastSealedBlock := blocks[len(blocks)-2] + + // head of the reader is the last sealed block + head, err := reader.Head() + assert.NoError(t, err) + assert.Equal(t, lastSealedBlock.Header.Height, head, "head does not match last sealed block") + + // retrieved blocks from block reader should be the same as the original blocks stored in it. + // all except the last block should be sealed + lastIndex := len(blocks) + for _, expected := range blocks[:lastIndex-1] { + index := expected.Header.Height + job, err := reader.AtIndex(index) + assert.NoError(t, err) + + retrieved, err := jobqueue.JobToBlockHeader(job) + assert.NoError(t, err) + assert.Equal(t, expected.ID(), retrieved.ID()) + } + + // ensure the last block returns a NotFound error + job, err := reader.AtIndex(uint64(lastIndex)) + assert.Nil(t, job) + assert.ErrorIs(t, err, storage.ErrNotFound) + }) +} + +// RunWithReader is a test helper that sets up a block reader. +// It also provides a chain of specified number of finalized blocks ready to read by block reader, i.e., the protocol state is extended with the +// chain of blocks and the blocks are stored in blocks storage. +func RunWithReader( + t *testing.T, + blockCount int, + withBlockReader func(*jobqueue.SealedBlockHeaderReader, []*flow.Block), +) { + require.Equal(t, blockCount%2, 0, "block count for this test should be even") + unittest.RunWithBadgerDB(t, func(db *badger.DB) { + + blocks := make([]*flow.Block, blockCount) + blocksByHeight := make(map[uint64]*flow.Block, blockCount) + + var seals []*flow.Header + parent := unittest.GenesisFixture().Header + for i := 0; i < blockCount; i++ { + seals = []*flow.Header{parent} + height := uint64(i) + 1 + + blocks[i] = unittest.BlockWithParentAndSeals(parent, seals) + blocksByHeight[height] = blocks[i] + + parent = blocks[i].Header + } + + snapshot := synctest.MockProtocolStateSnapshot(synctest.WithHead(seals[0])) + state := synctest.MockProtocolState(synctest.WithSnapshot(snapshot)) + headerStorage := synctest.MockBlockHeaderStorage(synctest.WithByHeight(blocksByHeight)) + + reader := jobqueue.NewSealedBlockHeaderReader(state, headerStorage) + + withBlockReader(reader, blocks) + }) +} diff --git a/module/jobqueue/workerpool.go b/module/jobqueue/workerpool.go new file mode 100644 index 00000000000..15338e1ffc1 --- /dev/null +++ b/module/jobqueue/workerpool.go @@ -0,0 +1,92 @@ +package jobqueue + +import ( + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/util" +) + +// WorkerPool implements the jobqueue.Worker interface, and wraps the processing to make it +// compatible with the Component interface. +type WorkerPool struct { + component.Component + + cm *component.ComponentManager + processor JobProcessor + notify NotifyDone + ch chan module.Job +} + +// JobProcessor is called by the worker to execute each job. It should only return when the job has +// completed, either successfully or after performing any failure handling. +// It takes 3 arguments: +// - irrecoverable.SignalerContext: this is used to signal shutdown to the worker and throw any +// irrecoverable errors back to the parent. The signaller context is passed in from consumer's +// Start method +// - module.Job: the job to be processed. The processor is responsible for decoding into the +// expected format. +// - func(): Call this closure after the job is considered complete. This is a convenience method +// that avoid needing to a separate ProcessingNotifier for simple usecases. If a different method +// is used to signal jobs are done to the consumer, this function can be ignored. +type JobProcessor func(irrecoverable.SignalerContext, module.Job, func()) + +// NotifyDone should be the consumer's NotifyJobIsDone method, or a wrapper for that method. It is +// wrapped in a closure and added as an argument to the JobProcessor to notify the consumer that +// the job is done. +type NotifyDone func(module.JobID) + +// NewWorkerPool returns a new WorkerPool +func NewWorkerPool(processor JobProcessor, notify NotifyDone, workers uint64) *WorkerPool { + w := &WorkerPool{ + processor: processor, + notify: notify, + ch: make(chan module.Job), + } + + builder := component.NewComponentManagerBuilder() + + for i := uint64(0); i < workers; i++ { + builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + w.workerLoop(ctx) + }) + } + + w.cm = builder.Build() + w.Component = w.cm + + return w +} + +// Run executes the worker's JobProcessor for the provided job. +// Run is non-blocking. +func (w *WorkerPool) Run(job module.Job) error { + // don't accept new jobs after shutdown is signalled + if util.CheckClosed(w.cm.ShutdownSignal()) { + return nil + } + + select { + case <-w.cm.ShutdownSignal(): + return nil + case w.ch <- job: + } + + return nil +} + +// workerLoop processes incoming jobs passed via the Run method. The job execution is wrapped in a +// goroutine to support passing the worker's irrecoverable.SignalerContext into the processor. +func (w *WorkerPool) workerLoop(ctx irrecoverable.SignalerContext) { + for { + select { + case <-ctx.Done(): + return + case job := <-w.ch: + w.processor(ctx, job, func() { + w.notify(job.ID()) + }) + } + } +} diff --git a/module/metrics.go b/module/metrics.go index 5ca79f1d61e..b6db6b26395 100644 --- a/module/metrics.go +++ b/module/metrics.go @@ -331,6 +331,20 @@ type ExecutionDataServiceMetrics interface { ExecutionDataGetFinished(duration time.Duration, success bool, blobTreeSize uint64) } +type ExecutionDataRequesterMetrics interface { + // ExecutionDataFetchStarted records an in-progress download + ExecutionDataFetchStarted() + + // ExecutionDataFetchFinished records a completed download + ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64) + + // NotificationSent reports that ExecutionData received notifications were sent for a block height + NotificationSent(height uint64) + + // FetchRetried reports that a download retry was processed + FetchRetried() +} + type RuntimeMetrics interface { // TransactionParsed reports the time spent parsing a single transaction RuntimeTransactionParsed(dur time.Duration) diff --git a/module/metrics/execution_data_requester.go b/module/metrics/execution_data_requester.go new file mode 100644 index 00000000000..e8ccc5e3266 --- /dev/null +++ b/module/metrics/execution_data_requester.go @@ -0,0 +1,110 @@ +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/onflow/flow-go/module" +) + +type ExecutionDataRequesterCollector struct { + fetchDuration prometheus.Histogram + + downloadsInProgress prometheus.Gauge + outstandingNotifications prometheus.Gauge + + highestNotificationHeight prometheus.Gauge + highestDownloadHeight prometheus.Gauge + + downloadRetries prometheus.Counter + failedDownloads prometheus.Counter +} + +func NewExecutionDataRequesterCollector() module.ExecutionDataRequesterMetrics { + + fetchDuration := promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_download_duration_ms", + Help: "the duration of execution data download operation", + Buckets: []float64{1, 100, 500, 1000, 2000, 5000}, + }) + + downloadsInProgress := promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_in_progress_downloads", + Help: "number of concurrently running execution data download operations", + }) + + outstandingNotifications := promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_outstanding_notifications", + Help: "number of execution data received notifications waiting to be processed", + }) + + highestDownloadHeight := promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_highest_download_height", + Help: "highest block height for which execution data has been received", + }) + + highestNotificationHeight := promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_highest_notification_height", + Help: "highest block height for which execution data notifications have been sent", + }) + + downloadRetries := promauto.NewCounter(prometheus.CounterOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_requester_download_retries_total", + Help: "number of execution data download retries", + }) + + failedDownloads := promauto.NewCounter(prometheus.CounterOpts{ + Namespace: namespaceStateSync, + Subsystem: subsystemExecutionDataRequester, + Name: "execution_data_failed_downloads_total", + Help: "number of failed execution data downloads", + }) + + return &ExecutionDataRequesterCollector{ + fetchDuration: fetchDuration, + downloadsInProgress: downloadsInProgress, + outstandingNotifications: outstandingNotifications, + highestDownloadHeight: highestDownloadHeight, + highestNotificationHeight: highestNotificationHeight, + downloadRetries: downloadRetries, + failedDownloads: failedDownloads, + } +} + +func (ec *ExecutionDataRequesterCollector) ExecutionDataFetchStarted() { + ec.downloadsInProgress.Inc() +} + +func (ec *ExecutionDataRequesterCollector) ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64) { + ec.downloadsInProgress.Dec() + ec.fetchDuration.Observe(float64(duration.Milliseconds())) + if success { + ec.highestDownloadHeight.Set(float64(height)) + ec.outstandingNotifications.Inc() + } else { + ec.failedDownloads.Inc() + } +} + +func (ec *ExecutionDataRequesterCollector) NotificationSent(height uint64) { + ec.outstandingNotifications.Dec() + ec.highestNotificationHeight.Set(float64(height)) +} + +func (ec *ExecutionDataRequesterCollector) FetchRetried() { + ec.downloadRetries.Inc() +} diff --git a/module/metrics/namespaces.go b/module/metrics/namespaces.go index 31e6a49dc47..12e243a6811 100644 --- a/module/metrics/namespaces.go +++ b/module/metrics/namespaces.go @@ -68,7 +68,8 @@ const ( // State Synchronization Subsystems const ( - subsystemExecutionDataService = "execution_data_service" + subsystemExecutionDataService = "execution_data_service" + subsystemExecutionDataRequester = "execution_data_requester" ) // METRIC NAMING GUIDELINES diff --git a/module/metrics/noop.go b/module/metrics/noop.go index b1054a40d72..beb26dd69b8 100644 --- a/module/metrics/noop.go +++ b/module/metrics/noop.go @@ -166,3 +166,7 @@ func (nc *NoopCollector) OnEntityEjectionDueToEmergency() func (nc *NoopCollector) OnKeyPutFailure() {} func (nc *NoopCollector) OnKeyGetSuccess() {} func (nc *NoopCollector) OnKeyGetFailure() {} +func (nc *NoopCollector) ExecutionDataFetchStarted() {} +func (nc *NoopCollector) ExecutionDataFetchFinished(_ time.Duration, _ bool, _ uint64) {} +func (nc *NoopCollector) NotificationSent(height uint64) {} +func (nc *NoopCollector) FetchRetried() {} diff --git a/module/mock/backend_scripts_metrics.go b/module/mock/backend_scripts_metrics.go index fae23c8a38f..0bbd184b4a8 100644 --- a/module/mock/backend_scripts_metrics.go +++ b/module/mock/backend_scripts_metrics.go @@ -1,10 +1,12 @@ -// Code generated by mockery v1.0.0. DO NOT EDIT. +// Code generated by mockery v2.12.1. DO NOT EDIT. package mock import ( mock "github.com/stretchr/testify/mock" + testing "testing" + time "time" ) @@ -17,3 +19,13 @@ type BackendScriptsMetrics struct { func (_m *BackendScriptsMetrics) ScriptExecuted(dur time.Duration, size int) { _m.Called(dur, size) } + +// NewBackendScriptsMetrics creates a new instance of BackendScriptsMetrics. It also registers the testing.TB interface on the mock and a cleanup function to assert the mocks expectations. +func NewBackendScriptsMetrics(t testing.TB) *BackendScriptsMetrics { + mock := &BackendScriptsMetrics{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/module/mock/execution_data_requester_metrics.go b/module/mock/execution_data_requester_metrics.go new file mode 100644 index 00000000000..b3b39eb96ba --- /dev/null +++ b/module/mock/execution_data_requester_metrics.go @@ -0,0 +1,46 @@ +// Code generated by mockery v2.12.1. DO NOT EDIT. + +package mock + +import ( + mock "github.com/stretchr/testify/mock" + + testing "testing" + + time "time" +) + +// ExecutionDataRequesterMetrics is an autogenerated mock type for the ExecutionDataRequesterMetrics type +type ExecutionDataRequesterMetrics struct { + mock.Mock +} + +// ExecutionDataFetchFinished provides a mock function with given fields: duration, success, height +func (_m *ExecutionDataRequesterMetrics) ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64) { + _m.Called(duration, success, height) +} + +// ExecutionDataFetchStarted provides a mock function with given fields: +func (_m *ExecutionDataRequesterMetrics) ExecutionDataFetchStarted() { + _m.Called() +} + +// FetchRetried provides a mock function with given fields: +func (_m *ExecutionDataRequesterMetrics) FetchRetried() { + _m.Called() +} + +// NotificationSent provides a mock function with given fields: height +func (_m *ExecutionDataRequesterMetrics) NotificationSent(height uint64) { + _m.Called(height) +} + +// NewExecutionDataRequesterMetrics creates a new instance of ExecutionDataRequesterMetrics. It also registers the testing.TB interface on the mock and a cleanup function to assert the mocks expectations. +func NewExecutionDataRequesterMetrics(t testing.TB) *ExecutionDataRequesterMetrics { + mock := &ExecutionDataRequesterMetrics{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/module/mock/job_consumer.go b/module/mock/job_consumer.go index d26a49115cc..aadb8591017 100644 --- a/module/mock/job_consumer.go +++ b/module/mock/job_consumer.go @@ -19,6 +19,20 @@ func (_m *JobConsumer) Check() { _m.Called() } +// LastProcessedIndex provides a mock function with given fields: +func (_m *JobConsumer) LastProcessedIndex() uint64 { + ret := _m.Called() + + var r0 uint64 + if rf, ok := ret.Get(0).(func() uint64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(uint64) + } + + return r0 +} + // NotifyJobIsDone provides a mock function with given fields: _a0 func (_m *JobConsumer) NotifyJobIsDone(_a0 module.JobID) uint64 { ret := _m.Called(_a0) diff --git a/module/mock/transaction_metrics.go b/module/mock/transaction_metrics.go index 2ec5793abeb..1a0f7ba5cc4 100644 --- a/module/mock/transaction_metrics.go +++ b/module/mock/transaction_metrics.go @@ -21,11 +21,6 @@ func (_m *TransactionMetrics) ScriptExecuted(dur time.Duration, size int) { _m.Called(dur, size) } -// TransactionResultFetched provides a mock function with given fields: dur, size -func (_m *TransactionMetrics) TransactionResultFetched(dur time.Duration, size int) { - _m.Called(dur, size) -} - // TransactionExecuted provides a mock function with given fields: txID, when func (_m *TransactionMetrics) TransactionExecuted(txID flow.Identifier, when time.Time) { _m.Called(txID, when) @@ -46,6 +41,11 @@ func (_m *TransactionMetrics) TransactionReceived(txID flow.Identifier, when tim _m.Called(txID, when) } +// TransactionResultFetched provides a mock function with given fields: dur, size +func (_m *TransactionMetrics) TransactionResultFetched(dur time.Duration, size int) { + _m.Called(dur, size) +} + // TransactionSubmissionFailed provides a mock function with given fields: func (_m *TransactionMetrics) TransactionSubmissionFailed() { _m.Called() diff --git a/module/signature/errors.go b/module/signature/errors.go index 912404a89a2..18809551149 100644 --- a/module/signature/errors.go +++ b/module/signature/errors.go @@ -109,9 +109,46 @@ func IsInsufficientSignaturesError(err error) bool { return errors.As(err, &e) } -// IsDecodeSignerIndicesError returns whether err is about decoding signer indices -func IsDecodeSignerIndicesError(err error) bool { - return errors.Is(err, ErrIllegallyPaddedBitVector) || - errors.Is(err, ErrIncompatibleBitVectorLength) || - errors.Is(err, ErrInvalidChecksum) +/* ********************** InvalidSignerIndicesError ********************** */ + +// InvalidSignerIndicesError indicates that a bit vector does not encode a valid set of signers +type InvalidSignerIndicesError struct { + err error +} + +func NewInvalidSignerIndicesErrorf(msg string, args ...interface{}) error { + return InvalidSignerIndicesError{ + err: fmt.Errorf(msg, args...), + } +} + +func (e InvalidSignerIndicesError) Error() string { return e.err.Error() } +func (e InvalidSignerIndicesError) Unwrap() error { return e.err } + +// IsInvalidSignerIndicesError returns whether err is an InvalidSignerIndicesError +func IsInvalidSignerIndicesError(err error) bool { + var e InvalidSignerIndicesError + return errors.As(err, &e) +} + +/* ********************** InvalidSignerIndicesError ********************** */ + +// InvalidSigTypesError indicates that the given data not encode valid signature types +type InvalidSigTypesError struct { + err error +} + +func NewInvalidSigTypesErrorf(msg string, args ...interface{}) error { + return InvalidSigTypesError{ + err: fmt.Errorf(msg, args...), + } +} + +func (e InvalidSigTypesError) Error() string { return e.err.Error() } +func (e InvalidSigTypesError) Unwrap() error { return e.err } + +// IsInvalidSigTypesError returns whether err is an InvalidSigTypesError +func IsInvalidSigTypesError(err error) bool { + var e InvalidSigTypesError + return errors.As(err, &e) } diff --git a/module/signature/signer_indices.go b/module/signature/signer_indices.go index bdab92082e1..a418c126e16 100644 --- a/module/signature/signer_indices.go +++ b/module/signature/signer_indices.go @@ -1,6 +1,7 @@ package signature import ( + "errors" "fmt" "github.com/onflow/flow-go/ledger/common/bitutils" @@ -118,20 +119,22 @@ func EncodeSignerToIndicesAndSigType( // * The input `signers` must be the set of signers in their canonical order. // // Expected Error returns during normal operations: -// * ErrIncompatibleBitVectorLength indicates that `signerIndices` has the wrong length -// * ErrIllegallyPaddedBitVector is the vector is padded with bits other than 0 +// * signature.IsInvalidSigTypesError if the given `sigType` does not encode a valid sequence of signature types func DecodeSigTypeToStakingAndBeaconSigners( signers flow.IdentityList, sigType []byte, -) (stakingSigners flow.IdentityList, beaconSigners flow.IdentityList, err error) { +) (flow.IdentityList, flow.IdentityList, error) { numberSigners := len(signers) - if e := validPadding(sigType, numberSigners); e != nil { - return nil, nil, fmt.Errorf("sigType is invalid: %w", e) + if err := validPadding(sigType, numberSigners); err != nil { + if errors.Is(err, ErrIncompatibleBitVectorLength) || errors.Is(err, ErrIllegallyPaddedBitVector) { + return nil, nil, NewInvalidSigTypesErrorf("invalid padding of sigTypes: %w", err) + } + return nil, nil, fmt.Errorf("unexpected exception while checking padding of sigTypes: %w", err) } // decode bits to Identities - stakingSigners = make(flow.IdentityList, 0, numberSigners) - beaconSigners = make(flow.IdentityList, 0, numberSigners) + stakingSigners := make(flow.IdentityList, 0, numberSigners) + beaconSigners := make(flow.IdentityList, 0, numberSigners) for i, signer := range signers { if bitutils.ReadBit(sigType, i) == 0 { stakingSigners = append(stakingSigners, signer) @@ -211,36 +214,56 @@ func EncodeSignersToIndices( // * The input `canonicalIdentifiers` must exhaustively list the set of authorized signers in their canonical order. // // Expected Error returns during normal operations: -// * ErrIncompatibleBitVectorLength indicates that `signerIndices` has the wrong length -// * ErrIllegallyPaddedBitVector is the vector is padded with bits other than 0 -// * ErrInvalidChecksum if the input is shorter than the expected checksum contained therein +// * signature.InvalidSignerIndicesError if the given index vector `prefixed` does not encode a valid set of signers func DecodeSignerIndicesToIdentifiers( canonicalIdentifiers flow.IdentifierList, prefixed []byte, ) (flow.IdentifierList, error) { + indices, err := decodeSignerIndices(canonicalIdentifiers, prefixed) + if err != nil { + return nil, err + } + + signerIDs := make(flow.IdentifierList, 0, len(indices)) + for _, index := range indices { + signerIDs = append(signerIDs, canonicalIdentifiers[index]) + } + return signerIDs, nil +} + +func decodeSignerIndices( + canonicalIdentifiers flow.IdentifierList, + prefixed []byte, +) ([]int, error) { // the prefixed contains the checksum of the canonicalIdentifiers that the signerIndices // creator saw. // extract the checksum and compare with the canonicalIdentifiers to see if both // the signerIndices creator and validator see the same list. signerIndices, err := CompareAndExtract(canonicalIdentifiers, prefixed) if err != nil { - return nil, fmt.Errorf("could not extract signer indices from prefixed data: %w", err) + if errors.Is(err, ErrInvalidChecksum) { + return nil, NewInvalidSignerIndicesErrorf("signer indices' checkum is invalid: %w", err) + } + return nil, fmt.Errorf("unexpected exception while checking signer indices: %w", err) } numberCanonicalNodes := len(canonicalIdentifiers) err = validPadding(signerIndices, numberCanonicalNodes) if err != nil { - return nil, fmt.Errorf("signerIndices are invalid: %w", err) + if errors.Is(err, ErrIncompatibleBitVectorLength) || errors.Is(err, ErrIllegallyPaddedBitVector) { + return nil, NewInvalidSignerIndicesErrorf("invalid padding of signerIndices: %w", err) + } + return nil, fmt.Errorf("unexpected exception while checking padding of signer indices: %w", err) } // decode bits to Identifiers - signerIDs := make(flow.IdentifierList, 0, numberCanonicalNodes) + indices := make([]int, 0, numberCanonicalNodes) for i := 0; i < numberCanonicalNodes; i++ { if bitutils.ReadBit(signerIndices, i) == 1 { - signerIDs = append(signerIDs, canonicalIdentifiers[i]) + indices = append(indices, i) } } - return signerIDs, nil + return indices, nil } // DecodeSignerIndicesToIdentities decodes the given compacted bit vector into node Identities. @@ -248,35 +271,21 @@ func DecodeSignerIndicesToIdentifiers( // * The input `canonicalIdentifiers` must exhaustively list the set of authorized signers in their canonical order. // // Expected Error returns during normal operations: -// * ErrIncompatibleBitVectorLength indicates that `signerIndices` has the wrong length -// * ErrIllegallyPaddedBitVector is the vector is padded with bits other than 0 -// * ErrInvalidChecksum if the input is shorter than the expected checksum contained therein +// * signature.InvalidSignerIndicesError if the given index vector `prefixed` does not encode a valid set of signers func DecodeSignerIndicesToIdentities( canonicalIdentities flow.IdentityList, prefixed []byte, ) (flow.IdentityList, error) { - // the prefixed contains the checksum of the canonicalIdentifiers that the signerIndices - // creator saw. - // extract the checksum and compare with the canonicalIdentifiers to see if both - // the signerIndices creator and validator see the same list. - signerIndices, err := CompareAndExtract(canonicalIdentities.NodeIDs(), prefixed) + indices, err := decodeSignerIndices(canonicalIdentities.NodeIDs(), prefixed) if err != nil { - return nil, fmt.Errorf("could not extract signer indices from prefixed data: %w", err) - } - - numberCanonicalNodes := len(canonicalIdentities) - if e := validPadding(signerIndices, numberCanonicalNodes); e != nil { - return nil, fmt.Errorf("signerIndices padding are invalid: %w", e) + return nil, err } - // decode bits to Identities - signerIdentities := make(flow.IdentityList, 0, numberCanonicalNodes) - for i := 0; i < numberCanonicalNodes; i++ { - if bitutils.ReadBit(signerIndices, i) == 1 { - signerIdentities = append(signerIdentities, canonicalIdentities[i]) - } + signers := make(flow.IdentityList, 0, len(indices)) + for _, index := range indices { + signers = append(signers, canonicalIdentities[index]) } - return signerIdentities, nil + return signers, nil } // validPadding verifies that `bitVector` satisfies the following criteria @@ -284,7 +293,9 @@ func DecodeSignerIndicesToIdentities( // `numUsedBits` number of bits. Otherwise, we return an `ErrIncompatibleBitVectorLength`. // 2. If `numUsedBits` is _not_ an integer-multiple of 8, `bitVector` is padded with tailing bits. Per // convention, these bits must be zero. Otherwise, we return an `ErrIllegallyPaddedBitVector`. -// All errors represent expected failure cases for byzantine inputs. There are _no unexpected_ error returns. +// Expected Error returns during normal operations: +// * ErrIncompatibleBitVectorLength if the vector has the wrong length +// * ErrIllegallyPaddedBitVector if the vector is padded with bits other than 0 func validPadding(bitVector []byte, numUsedBits int) error { // Verify condition 1: l := len(bitVector) diff --git a/module/signature/signer_indices_test.go b/module/signature/signer_indices_test.go index 260ca2c712c..540b90053d7 100644 --- a/module/signature/signer_indices_test.go +++ b/module/signature/signer_indices_test.go @@ -5,16 +5,14 @@ import ( "sort" "testing" + "github.com/stretchr/testify/require" "pgregory.net/rapid" "github.com/onflow/flow-go/ledger/common/bitutils" + "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/model/flow/filter" "github.com/onflow/flow-go/model/flow/filter/id" "github.com/onflow/flow-go/model/flow/order" - - "github.com/stretchr/testify/require" - - "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/signature" "github.com/onflow/flow-go/utils/unittest" ) @@ -47,6 +45,41 @@ func TestEncodeDecodeIdentities(t *testing.T) { } } +func TestEncodeDecodeIdentitiesFail(t *testing.T) { + canonicalIdentities := unittest.IdentityListFixture(20) + canonicalIdentifiers := canonicalIdentities.NodeIDs() + signers := canonicalIdentities[3:19] + validIndices, err := signature.EncodeSignersToIndices(canonicalIdentities.NodeIDs(), signers.NodeIDs()) + require.NoError(t, err) + + _, err = signature.DecodeSignerIndicesToIdentifiers(canonicalIdentifiers, validIndices) + require.NoError(t, err) + + invalidSum := make([]byte, len(validIndices)) + copy(invalidSum, validIndices) + if invalidSum[0] == byte(0) { + invalidSum[0] = byte(1) + } else { + invalidSum[0] = byte(0) + } + _, err = signature.DecodeSignerIndicesToIdentifiers(canonicalIdentifiers, invalidSum) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.ErrorIs(t, err, signature.ErrInvalidChecksum, err) + + incompatibleLength := append(validIndices, byte(0)) + _, err = signature.DecodeSignerIndicesToIdentifiers(canonicalIdentifiers, incompatibleLength) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.False(t, signature.IsInvalidSignerIndicesError(signature.NewInvalidSigTypesErrorf("sdf"))) + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, err) + + illegallyPadded := make([]byte, len(validIndices)) + copy(illegallyPadded, validIndices) + illegallyPadded[len(illegallyPadded)-1]++ + _, err = signature.DecodeSignerIndicesToIdentifiers(canonicalIdentifiers, illegallyPadded) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.ErrorIs(t, err, signature.ErrIllegallyPaddedBitVector, err) +} + func TestEncodeIdentity(t *testing.T) { only := unittest.IdentifierListFixture(1) indices, err := signature.EncodeSignersToIndices(only, only) @@ -160,13 +193,13 @@ func Test_ValidPaddingErrIncompatibleBitVectorLength(t *testing.T) { // 1 byte less _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(255)}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // 1 byte more _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // if bits is not multiply of 8, then padding is needed signers = unittest.IdentityListFixture(15) @@ -175,32 +208,32 @@ func Test_ValidPaddingErrIncompatibleBitVectorLength(t *testing.T) { // 1 byte more _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(255), byte(255), byte(254)}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // 1 byte less _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(254)}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // if bits is not multiply of 8, // 1 byte more signers = unittest.IdentityListFixture(0) _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(255)}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // 1 byte more signers = unittest.IdentityListFixture(1) _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(0), byte(0)}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") // 1 byte less signers = unittest.IdentityListFixture(7) _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{}) - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIncompatibleBitVectorLength) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIncompatibleBitVectorLength, "low-level error representing the failure should be ErrIncompatibleBitVectorLength") } func TestValidPaddingErrIllegallyPaddedBitVector(t *testing.T) { @@ -210,23 +243,23 @@ func TestValidPaddingErrIllegallyPaddedBitVector(t *testing.T) { for count := 1; count < 8; count++ { signers = unittest.IdentityListFixture(count) _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(255)}) // last bit should be 0, but 1 - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIllegallyPaddedBitVector) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIllegallyPaddedBitVector, "low-level error representing the failure should be ErrIllegallyPaddedBitVector") _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(1)}) // last bit should be 0, but 1 - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIllegallyPaddedBitVector) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIllegallyPaddedBitVector, "low-level error representing the failure should be ErrIllegallyPaddedBitVector") } for count := 9; count < 16; count++ { signers = unittest.IdentityListFixture(count) _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(255), byte(255)}) // last bit should be 0, but 1 - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIllegallyPaddedBitVector) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIllegallyPaddedBitVector, "low-level error representing the failure should be ErrIllegallyPaddedBitVector") _, _, err = signature.DecodeSigTypeToStakingAndBeaconSigners(signers, []byte{byte(1), byte(1)}) // last bit should be 0, but 1 - require.Error(t, err) - require.ErrorAs(t, err, &signature.ErrIllegallyPaddedBitVector) + require.True(t, signature.IsInvalidSigTypesError(err), "API-level error should be InvalidSigTypesError") + require.ErrorIs(t, err, signature.ErrIllegallyPaddedBitVector, "low-level error representing the failure should be ErrIllegallyPaddedBitVector") } } diff --git a/module/state_synchronization/execution_data_requester.go b/module/state_synchronization/execution_data_requester.go new file mode 100644 index 00000000000..8eda8e5c20b --- /dev/null +++ b/module/state_synchronization/execution_data_requester.go @@ -0,0 +1,21 @@ +package state_synchronization + +import ( + "github.com/onflow/flow-go/consensus/hotstuff/model" + "github.com/onflow/flow-go/module/component" +) + +// ExecutionDataReceivedCallback is a callback that is called ExecutionData is received for a new block +type ExecutionDataReceivedCallback func(*ExecutionData) + +// ExecutionDataRequester is a component that syncs ExecutionData from the network, and exposes +// a callback that is called when a new ExecutionData is received +type ExecutionDataRequester interface { + component.Component + + // OnBlockFinalized accepts block finalization notifications from the FinalizationDistributor + OnBlockFinalized(*model.Block) + + // AddOnExecutionDataFetchedConsumer adds a callback to be called when a new ExecutionData is received + AddOnExecutionDataFetchedConsumer(fn ExecutionDataReceivedCallback) +} diff --git a/module/state_synchronization/execution_data_service.go b/module/state_synchronization/execution_data_service.go index f8293d77c32..6cfd33adb1f 100644 --- a/module/state_synchronization/execution_data_service.go +++ b/module/state_synchronization/execution_data_service.go @@ -28,6 +28,7 @@ type BlobTree [][]cid.Cid // ExecutionDataService handles adding/getting execution data to/from a blobservice type ExecutionDataService interface { + module.ReadyDoneAware // Add constructs a blob tree for the given ExecutionData and // adds it to the blobservice, and then returns the root CID // and list of all CIDs. @@ -348,7 +349,7 @@ func (s *executionDataServiceImpl) getBlobs(ctx context.Context, cids []cid.Cid, return nil, 0, &MalformedDataError{deserializeErr} } - // TODO: deserialization succeeds even if the blob channel reader has still has unconsumed data, meaning that a malicious actor + // TODO: deserialization succeeds even if the blob channel reader still has unconsumed data, meaning that a malicious actor // could fill the blob tree with lots of unnecessary data by appending it at the end of the serialized data for each level. // It's possible that we could detect this and fail deserialization using something like the following: // https://github.com/onflow/flow-go/blob/bd5320719266b045ae2cac954f6a56e1e79560eb/engine/access/rest/handlers.go#L189-L193 @@ -358,9 +359,13 @@ func (s *executionDataServiceImpl) getBlobs(ctx context.Context, cids []cid.Cid, } // Get gets the ExecutionData for the given root CID from the blobservice. +// It blocks until all blobs are retrieved and the ExecutionData is reconstructed, or an error occurs. +// If a bitswap enabled blobservice is used and the requested ExecutionData is not already in the +// local blobstore, the ExecutionData will be retrieved from the network and stored in the blobstore. // The returned error will be: // - MalformedDataError if some level of the blob tree cannot be properly deserialized // - BlobSizeLimitExceededError if any blob in the blob tree exceeds the maximum blob size +// - ErrBlobTreeDepthExceeded if the blob tree exceeds the maximum depth // - BlobNotFoundError if some CID in the blob tree could not be found from the blobservice func (s *executionDataServiceImpl) Get(ctx context.Context, rootID flow.Identifier) (*ExecutionData, error) { rootCid := flow.IdToCid(rootID) diff --git a/module/state_synchronization/mock/execution_data_requester.go b/module/state_synchronization/mock/execution_data_requester.go new file mode 100644 index 00000000000..d06cafd268f --- /dev/null +++ b/module/state_synchronization/mock/execution_data_requester.go @@ -0,0 +1,76 @@ +// Code generated by mockery v2.12.1. DO NOT EDIT. + +package state_synchronization + +import ( + irrecoverable "github.com/onflow/flow-go/module/irrecoverable" + mock "github.com/stretchr/testify/mock" + + model "github.com/onflow/flow-go/consensus/hotstuff/model" + + state_synchronization "github.com/onflow/flow-go/module/state_synchronization" + + testing "testing" +) + +// ExecutionDataRequester is an autogenerated mock type for the ExecutionDataRequester type +type ExecutionDataRequester struct { + mock.Mock +} + +// AddOnExecutionDataFetchedConsumer provides a mock function with given fields: fn +func (_m *ExecutionDataRequester) AddOnExecutionDataFetchedConsumer(fn state_synchronization.ExecutionDataReceivedCallback) { + _m.Called(fn) +} + +// Done provides a mock function with given fields: +func (_m *ExecutionDataRequester) Done() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// OnBlockFinalized provides a mock function with given fields: _a0 +func (_m *ExecutionDataRequester) OnBlockFinalized(_a0 *model.Block) { + _m.Called(_a0) +} + +// Ready provides a mock function with given fields: +func (_m *ExecutionDataRequester) Ready() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// Start provides a mock function with given fields: _a0 +func (_m *ExecutionDataRequester) Start(_a0 irrecoverable.SignalerContext) { + _m.Called(_a0) +} + +// NewExecutionDataRequester creates a new instance of ExecutionDataRequester. It also registers the testing.TB interface on the mock and a cleanup function to assert the mocks expectations. +func NewExecutionDataRequester(t testing.TB) *ExecutionDataRequester { + mock := &ExecutionDataRequester{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/module/state_synchronization/mock/execution_data_service.go b/module/state_synchronization/mock/execution_data_service.go index ed0bc6e5409..fa6fcaf5fcd 100644 --- a/module/state_synchronization/mock/execution_data_service.go +++ b/module/state_synchronization/mock/execution_data_service.go @@ -50,6 +50,22 @@ func (_m *ExecutionDataService) Add(ctx context.Context, sd *state_synchronizati return r0, r1, r2 } +// Done provides a mock function with given fields: +func (_m *ExecutionDataService) Done() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + // Get provides a mock function with given fields: ctx, rootID func (_m *ExecutionDataService) Get(ctx context.Context, rootID flow.Identifier) (*state_synchronization.ExecutionData, error) { ret := _m.Called(ctx, rootID) @@ -73,6 +89,22 @@ func (_m *ExecutionDataService) Get(ctx context.Context, rootID flow.Identifier) return r0, r1 } +// Ready provides a mock function with given fields: +func (_m *ExecutionDataService) Ready() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + // NewExecutionDataService creates a new instance of ExecutionDataService. It also registers the testing.TB interface on the mock and a cleanup function to assert the mocks expectations. func NewExecutionDataService(t testing.TB) *ExecutionDataService { mock := &ExecutionDataService{} diff --git a/module/state_synchronization/requester/execution_data_requester.go b/module/state_synchronization/requester/execution_data_requester.go new file mode 100644 index 00000000000..94527f3e308 --- /dev/null +++ b/module/state_synchronization/requester/execution_data_requester.go @@ -0,0 +1,462 @@ +package requester + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/rs/zerolog" + "github.com/sethvargo/go-retry" + + "github.com/onflow/flow-go/consensus/hotstuff/model" + "github.com/onflow/flow-go/engine" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/jobqueue" + "github.com/onflow/flow-go/module/state_synchronization" + "github.com/onflow/flow-go/module/state_synchronization/requester/jobs" + "github.com/onflow/flow-go/module/util" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/storage" +) + +// The ExecutionDataRequester downloads ExecutionData for sealed blocks from other participants in +// the flow network. The ExecutionData for a sealed block should always downloadable, since a +// sealed block must have been executed. +// +// Once the ExecutionData for a block is downloaded, the node becomes a seeder for other participants +// on the network using the bitswap protocol. The downloading and seeding work is handled by the +// ExecutionDataService. +// +// The ExecutionDataRequester internally uses a job queue to request and download each sealed block +// with multiple workers. It downloads ExecutionData block by block towards the latest sealed block. +// In order to ensure it does not miss any sealed block to download, it persists the last downloaded +// height, and only increments it when the next height has been downloaded. In the event of a crash +// failure, it will read the last downloaded height, and process from the next un-downloaded height. +// The requester listens to block finalization event, and checks if sealed height has been changed, +// if changed, it create job for each un-downloaded and sealed height. +// +// The requester is made up of 3 subcomponents: +// +// * OnBlockFinalized: receives block finalized events from the finalization distributor and +// forwards them to the blockConsumer. +// +// * blockConsumer: is a jobqueue that receives block finalization events. On each event, +// it checks for the latest sealed block, then uses a pool of workers to +// download ExecutionData for each block from the network. After each +// successful download, the blockConsumer sends a notification to the +// notificationConsumer that a new ExecutionData is available. +// +// * notificationConsumer: is a jobqueue that receives ExecutionData fetched events. On each event, +// it checks if ExecutionData for the next consecutive block height is +// available, then uses a single worker to send notifications to registered +// consumers. +// the registered consumers are guaranteed to receive each sealed block in +// consecutive height at least once. +// +// +------------------+ +---------------+ +----------------------+ +// -->| OnBlockFinalized |----->| blockConsumer | +-->| notificationConsumer | +// +------------------+ +-------+-------+ | +-----------+----------+ +// | | | +// +------+------+ | +------+------+ +// xN | Worker Pool |----+ x1 | Worker Pool |----> Registered consumers +// +-------------+ +-------------+ + +const ( + // DefaultFetchTimeout is the default timeout for fetching ExecutionData from the db/network + DefaultFetchTimeout = 5 * time.Minute + + // DefaultRetryDelay is the default initial delay used in the exponential backoff for failed + // ExecutionData download retries + DefaultRetryDelay = 10 * time.Second + + // DefaultMaxRetryDelay is the default maximum delay used in the exponential backoff for failed + // ExecutionData download retries + DefaultMaxRetryDelay = 5 * time.Minute + + // DefaultMaxSearchAhead is the default max number of unsent notifications to allow before + // pausing new fetches. + DefaultMaxSearchAhead = 5000 + + // Number of goroutines to use for downloading new ExecutionData from the network. + fetchWorkers = 4 +) + +// ExecutionDataConfig contains configuration options for the ExecutionDataRequester +type ExecutionDataConfig struct { + // The initial value to use as the last processed block height. This should be the + // first block height to sync - 1 + InitialBlockHeight uint64 + + // Max number of unsent notifications to allow before pausing new fetches. After exceeding this + // limit, the requester will stop processing new finalized block notifications. This prevents + // unbounded memory use by the requester if it gets stuck fetching a specific height. + MaxSearchAhead uint64 + + // The timeout for fetching ExecutionData from the db/network + FetchTimeout time.Duration + + // Exponential backoff settings for download retries + RetryDelay time.Duration + MaxRetryDelay time.Duration +} + +type executionDataRequester struct { + component.Component + cm *component.ComponentManager + eds state_synchronization.ExecutionDataService + metrics module.ExecutionDataRequesterMetrics + config ExecutionDataConfig + log zerolog.Logger + + // Local db objects + headers storage.Headers + results storage.ExecutionResults + + executionDataReader *jobs.ExecutionDataReader + + // Notifiers for queue consumers + finalizationNotifier engine.Notifier + + // Job queues + blockConsumer *jobqueue.ComponentConsumer + notificationConsumer *jobqueue.ComponentConsumer + + // List of callbacks to call when ExecutionData is successfully fetched for a block + consumers []state_synchronization.ExecutionDataReceivedCallback + + consumerMu sync.RWMutex +} + +var _ state_synchronization.ExecutionDataRequester = (*executionDataRequester)(nil) + +// New creates a new execution data requester component +func New( + log zerolog.Logger, + edrMetrics module.ExecutionDataRequesterMetrics, + eds state_synchronization.ExecutionDataService, + processedHeight storage.ConsumerProgress, + processedNotifications storage.ConsumerProgress, + state protocol.State, + headers storage.Headers, + results storage.ExecutionResults, + cfg ExecutionDataConfig, +) state_synchronization.ExecutionDataRequester { + e := &executionDataRequester{ + log: log.With().Str("component", "execution_data_requester").Logger(), + eds: eds, + metrics: edrMetrics, + headers: headers, + results: results, + config: cfg, + finalizationNotifier: engine.NewNotifier(), + } + + executionDataNotifier := engine.NewNotifier() + + // jobqueue Jobs object that tracks sealed blocks by height. This is used by the blockConsumer + // to get a sequential list of sealed blocks. + sealedBlockReader := jobqueue.NewSealedBlockHeaderReader(state, headers) + + // blockConsumer ensures every sealed block's execution data is downloaded. + // It listens to block finalization events from `finalizationNotifier`, then checks if there + // are new sealed blocks with `sealedBlockReader`. If there are, it starts workers to process + // them with `processingBlockJob`, which fetches execution data. At most `fetchWorkers` workers + // will be created for concurrent processing. When a sealed block's execution data has been + // downloaded, it updates and persists the highest consecutive downloaded height with + // `processedHeight`. That way, if the node crashes, it reads the `processedHeight` and resume + // from `processedHeight + 1`. If the database is empty, rootHeight will be used to init the + // last processed height. Once the execution data is fetched and stored, it notifies + // `executionDataNotifier`. + e.blockConsumer = jobqueue.NewComponentConsumer( + e.log.With().Str("module", "block_consumer").Logger(), + e.finalizationNotifier.Channel(), // to listen to finalization events to find newly sealed blocks + processedHeight, // read and persist the downloaded height + sealedBlockReader, // read sealed blocks by height + e.config.InitialBlockHeight, // initial "last processed" height for empty db + e.processBlockJob, // process the sealed block job to download its execution data + fetchWorkers, // the number of concurrent workers + e.config.MaxSearchAhead, // max number of unsent notifications to allow before pausing new fetches + ) + // notifies notificationConsumer when new ExecutionData blobs are available + // SetPostNotifier will notify executionDataNotifier AFTER e.blockConsumer.LastProcessedIndex is updated. + // Even though it doesn't guarantee to notify for every height at least once, the notificationConsumer is + // able to guarantee to process every height at least once, because the notificationConsumer finds new job + // using executionDataReader which finds new height using e.blockConsumer.LastProcessedIndex + e.blockConsumer.SetPostNotifier(func(module.JobID) { executionDataNotifier.Notify() }) + + // jobqueue Jobs object tracks downloaded execution data by height. This is used by the + // notificationConsumer to get downloaded execution data from storage. + e.executionDataReader = jobs.NewExecutionDataReader( + e.eds, + e.headers, + e.results, + e.config.FetchTimeout, + // method to get highest consecutive height that has downloaded execution data. it is used + // here by the notification job consumer to discover new jobs. + // Note: we don't want to notify notificationConsumer for a block if it has not downloaded + // execution data yet. + e.blockConsumer.LastProcessedIndex, + ) + + // notificationConsumer consumes `OnExecutionDataFetched` events, and ensures its consumer + // receives this event in consecutive block height order. + // It listens to events from `executionDataNotifier`, which is delivered when + // a block's execution data is downloaded and stored, and checks the `executionDataCache` to + // find if the next un-processed consecutive height is available. + // To know what's the height of the next un-processed consecutive height, it reads the latest + // consecutive height in `processedNotifications`. And it's persisted in storage to be crash-resistant. + // When a new consecutive height is available, it calls `processNotificationJob` to notify all the + // `e.consumers`. + // Note: the `e.consumers` will be guaranteed to receive at least one `OnExecutionDataFetched` event + // for each sealed block in consecutive block height order. + e.notificationConsumer = jobqueue.NewComponentConsumer( + e.log.With().Str("module", "notification_consumer").Logger(), + executionDataNotifier.Channel(), // listen for notifications from the block consumer + processedNotifications, // read and persist the notified height + e.executionDataReader, // read execution data by height + e.config.InitialBlockHeight, // initial "last processed" height for empty db + e.processNotificationJob, // process the job to send notifications for an execution data + 1, // use a single worker to ensure notification is delivered in consecutive order + 0, // search ahead limit controlled by worker count + ) + + builder := component.NewComponentManagerBuilder(). + AddWorker(e.runBlockConsumer). + AddWorker(e.runNotificationConsumer) + + e.cm = builder.Build() + e.Component = e.cm + + return e +} + +// OnBlockFinalized accepts block finalization notifications from the FinalizationDistributor +func (e *executionDataRequester) OnBlockFinalized(*model.Block) { + e.finalizationNotifier.Notify() +} + +// AddOnExecutionDataFetchedConsumer adds a callback to be called when a new ExecutionData is received +// Callback Implementations must: +// * be concurrency safe +// * be non-blocking +// * handle repetition of the same events (with some processing overhead). +func (e *executionDataRequester) AddOnExecutionDataFetchedConsumer(fn state_synchronization.ExecutionDataReceivedCallback) { + e.consumerMu.Lock() + defer e.consumerMu.Unlock() + + e.consumers = append(e.consumers, fn) +} + +// runBlockConsumer runs the blockConsumer component +func (e *executionDataRequester) runBlockConsumer(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + err := util.WaitClosed(ctx, e.eds.Ready()) + if err != nil { + return // context cancelled + } + + err = util.WaitClosed(ctx, e.notificationConsumer.Ready()) + if err != nil { + return // context cancelled + } + + e.blockConsumer.Start(ctx) + + err = util.WaitClosed(ctx, e.blockConsumer.Ready()) + if err == nil { + ready() + } + + <-e.blockConsumer.Done() +} + +// runNotificationConsumer runs the notificationConsumer component +func (e *executionDataRequester) runNotificationConsumer(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + e.executionDataReader.AddContext(ctx) + e.notificationConsumer.Start(ctx) + + err := util.WaitClosed(ctx, e.notificationConsumer.Ready()) + if err == nil { + ready() + } + + <-e.notificationConsumer.Done() +} + +// Fetch Worker Methods + +// processBlockJob consumes jobs from the blockConsumer and attempts to download an ExecutionData +// for the given block height. +func (e *executionDataRequester) processBlockJob(ctx irrecoverable.SignalerContext, job module.Job, jobComplete func()) { + // convert job into a block entry + header, err := jobqueue.JobToBlockHeader(job) + if err != nil { + ctx.Throw(fmt.Errorf("failed to convert job to block: %w", err)) + } + + err = e.processSealedHeight(ctx, header.ID(), header.Height) + if err == nil { + jobComplete() + return + } + + // errors are thrown as irrecoverable errors except context cancellation, and invalid blobs + // invalid blobs are logged, and never completed, which will halt downloads after maxSearchAhead + // is reached. + e.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error encountered while processing block job") +} + +// processSealedHeight downloads ExecutionData for the given block height. +// If the download fails, it will retry forever, using exponential backoff. +func (e *executionDataRequester) processSealedHeight(ctx irrecoverable.SignalerContext, blockID flow.Identifier, height uint64) error { + backoff := retry.NewExponential(e.config.RetryDelay) + backoff = retry.WithCappedDuration(e.config.MaxRetryDelay, backoff) + backoff = retry.WithJitterPercent(15, backoff) + + attempt := 0 + return retry.Do(ctx, backoff, func(context.Context) error { + if attempt > 0 { + e.log.Debug(). + Str("block_id", blockID.String()). + Uint64("height", height). + Uint64("attempt", uint64(attempt)). + Msgf("retrying download") + + e.metrics.FetchRetried() + } + attempt++ + + // download execution data for the block + err := e.processFetchRequest(ctx, blockID, height) + + // don't retry if the blob was invalid + if isInvalidBlobError(err) { + return err + } + + return retry.RetryableError(err) + }) +} + +func (e *executionDataRequester) processFetchRequest(ctx irrecoverable.SignalerContext, blockID flow.Identifier, height uint64) error { + logger := e.log.With(). + Str("block_id", blockID.String()). + Uint64("height", height). + Logger() + + logger.Debug().Msg("processing fetch request") + + result, err := e.results.ByBlockID(blockID) + + // The ExecutionResult may not have been downloaded yet. This error should be retried + if errors.Is(err, storage.ErrNotFound) { + logger.Debug().Msg("execution result not found") + return err + } + + if err != nil { + ctx.Throw(fmt.Errorf("failed to lookup execution result for block %s: %w", blockID, err)) + } + + logger = logger.With().Str("execution_data_id", result.ExecutionDataID.String()).Logger() + + start := time.Now() + e.metrics.ExecutionDataFetchStarted() + + logger.Debug().Msg("downloading execution data") + + _, err = e.fetchExecutionData(ctx, result.ExecutionDataID) + + e.metrics.ExecutionDataFetchFinished(time.Since(start), err == nil, height) + + if isInvalidBlobError(err) { + // This means an execution result was sealed with an invalid execution data id (invalid data). + // Eventually, verification nodes will verify that the execution data is valid, and not sign the receipt + logger.Error().Err(err).Msg("HALTING REQUESTER: invalid execution data found") + + return err + } + + // Some or all of the blob was missing or corrupt. retry + if isBlobNotFoundError(err) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { + logger.Error().Err(err).Msg("failed to get execution data for block") + + return err + } + + // Any other error is unexpected + if err != nil { + logger.Error().Err(err).Msg("unexpected error fetching execution data") + + ctx.Throw(err) + } + + logger.Debug().Msg("Fetched execution data") + + return nil +} + +// fetchExecutionData fetches the ExecutionData by its ID, and times out if fetchTimeout is exceeded +func (e *executionDataRequester) fetchExecutionData(signalerCtx irrecoverable.SignalerContext, executionDataID flow.Identifier) (*state_synchronization.ExecutionData, error) { + ctx, cancel := context.WithTimeout(signalerCtx, e.config.FetchTimeout) + defer cancel() + + // Get the data from the network + // this is a blocking call, won't be unblocked until either hitting error (including timeout) or + // the data is received + executionData, err := e.eds.Get(ctx, executionDataID) + + if err != nil { + return nil, err + } + + return executionData, nil +} + +// Notification Worker Methods + +func (e *executionDataRequester) processNotificationJob(ctx irrecoverable.SignalerContext, job module.Job, jobComplete func()) { + // convert job into a block entry + entry, err := jobs.JobToBlockEntry(job) + if err != nil { + ctx.Throw(fmt.Errorf("failed to convert job to entry: %w", err)) + } + + e.processNotification(ctx, entry.Height, entry.ExecutionData) + jobComplete() +} + +func (e *executionDataRequester) processNotification(ctx irrecoverable.SignalerContext, height uint64, executionData *state_synchronization.ExecutionData) { + e.log.Debug().Msgf("notifying for block %d", height) + + // send notifications + e.notifyConsumers(executionData) + + e.metrics.NotificationSent(height) +} + +func (e *executionDataRequester) notifyConsumers(executionData *state_synchronization.ExecutionData) { + e.consumerMu.RLock() + defer e.consumerMu.RUnlock() + + for _, fn := range e.consumers { + fn(executionData) + } +} + +func isInvalidBlobError(err error) bool { + var malformedDataError *state_synchronization.MalformedDataError + var blobSizeLimitExceededError *state_synchronization.BlobSizeLimitExceededError + return errors.As(err, &malformedDataError) || + errors.As(err, &blobSizeLimitExceededError) || + errors.Is(err, state_synchronization.ErrBlobTreeDepthExceeded) +} + +func isBlobNotFoundError(err error) bool { + var blobNotFoundError *state_synchronization.BlobNotFoundError + return errors.As(err, &blobNotFoundError) +} diff --git a/module/state_synchronization/requester/execution_data_requester_test.go b/module/state_synchronization/requester/execution_data_requester_test.go new file mode 100644 index 00000000000..f12e3397597 --- /dev/null +++ b/module/state_synchronization/requester/execution_data_requester_test.go @@ -0,0 +1,793 @@ +package requester_test + +import ( + "context" + "fmt" + "math/rand" + "os" + "sync" + "testing" + "time" + + "github.com/dgraph-io/badger/v2" + "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + blockstore "github.com/ipfs/go-ipfs-blockstore" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/onflow/flow-go/consensus/hotstuff/model" + "github.com/onflow/flow-go/consensus/hotstuff/notifications/pubsub" + "github.com/onflow/flow-go/model/encoding/cbor" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/module/state_synchronization" + syncmock "github.com/onflow/flow-go/module/state_synchronization/mock" + "github.com/onflow/flow-go/module/state_synchronization/requester" + synctest "github.com/onflow/flow-go/module/state_synchronization/requester/unittest" + "github.com/onflow/flow-go/network/compressor" + "github.com/onflow/flow-go/network/mocknetwork" + "github.com/onflow/flow-go/state/protocol" + statemock "github.com/onflow/flow-go/state/protocol/mock" + storage "github.com/onflow/flow-go/storage/badger" + storagemock "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/unittest" +) + +type ExecutionDataRequesterSuite struct { + suite.Suite + + blobservice *mocknetwork.BlobService + datastore datastore.Batching + db *badger.DB + headers *storagemock.Headers + + eds *syncmock.ExecutionDataService + + run edTestRun + + mockSnapshot *mockSnapshot +} + +func TestExecutionDataRequesterSuite(t *testing.T) { + t.Parallel() + rand.Seed(time.Now().UnixMilli()) + suite.Run(t, new(ExecutionDataRequesterSuite)) +} + +func (suite *ExecutionDataRequesterSuite) SetupTest() { + suite.datastore = dssync.MutexWrap(datastore.NewMapDatastore()) + suite.blobservice = synctest.MockBlobService(blockstore.NewBlockstore(suite.datastore)) + + suite.run = edTestRun{ + "", + 100, + func(_ int) map[uint64]testExecutionDataCallback { + return map[uint64]testExecutionDataCallback{} + }, + } +} + +type testExecutionDataServiceEntry struct { + // When set, the response from this call back will be returned for any calls to Get + // Note: this callback is called twice by mockery, once for the execution data and once for the error + fn testExecutionDataCallback + // When set (and fn is unset), this error will be returned for any calls to Get for this ED + Err error + // Otherwise, the execution data will be returned directly with no error + ExecutionData *state_synchronization.ExecutionData +} + +type specialBlockGenerator func(int) map[uint64]testExecutionDataCallback +type edTestRun struct { + name string + blockCount int + specialBlocks specialBlockGenerator +} + +type testExecutionDataCallback func(*state_synchronization.ExecutionData) (*state_synchronization.ExecutionData, error) + +func mockExecutionDataService(edStore map[flow.Identifier]*testExecutionDataServiceEntry) *syncmock.ExecutionDataService { + eds := new(syncmock.ExecutionDataService) + + get := func(id flow.Identifier) (*state_synchronization.ExecutionData, error) { + ed, has := edStore[id] + + // return not found + if !has { + return nil, &state_synchronization.BlobNotFoundError{} + } + + // use a callback. this is useful for injecting a pause or custom error behavior + if ed.fn != nil { + return ed.fn(ed.ExecutionData) + } + + // return a custom error + if ed.Err != nil { + return nil, ed.Err + } + + // return the specific execution data + return ed.ExecutionData, nil + } + + eds.On("Get", mock.Anything, mock.AnythingOfType("flow.Identifier")). + Return( + func(ctx context.Context, id flow.Identifier) *state_synchronization.ExecutionData { + ed, _ := get(id) + return ed + }, + func(ctx context.Context, id flow.Identifier) error { + _, err := get(id) + return err + }, + ). + Maybe() // Maybe() needed to get call count + + eds.On("Add", mock.Anything, mock.AnythingOfType("*state_synchronization.ExecutionData")). + Return(flow.ZeroID, nil, nil). + Maybe() // Maybe() needed to get call count + + noop := module.NoopReadyDoneAware{} + eds.On("Ready"). + Return(func() <-chan struct{} { return noop.Ready() }). + Maybe() // Maybe() needed to get call count + + return eds +} + +func (suite *ExecutionDataRequesterSuite) mockProtocolState(blocksByHeight map[uint64]*flow.Block) *statemock.State { + state := new(statemock.State) + + suite.mockSnapshot = new(mockSnapshot) + suite.mockSnapshot.set(blocksByHeight[0].Header, nil) // genesis block + + state.On("Sealed").Return(suite.mockSnapshot).Maybe() + return state +} + +// TestRequesterProcessesBlocks tests that the requester processes all blocks and sends notifications +// in order. +func (suite *ExecutionDataRequesterSuite) TestRequesterProcessesBlocks() { + + tests := []edTestRun{ + // Test that blocks are processed in order + { + "happy path", + 100, + func(_ int) map[uint64]testExecutionDataCallback { + return map[uint64]testExecutionDataCallback{} + }, + }, + // Tests that blocks that are missed are properly retried and notifications are received in order + { + "requests blocks with some missed", + 100, + generateBlocksWithSomeMissed, + }, + // Tests that blocks that are missed are properly retried and backfilled + { + "requests blocks with some delayed", + 100, + generateBlocksWithRandomDelays, + }, + } + + for _, run := range tests { + suite.Run(run.name, func() { + unittest.RunWithBadgerDB(suite.T(), func(db *badger.DB) { + suite.db = db + + suite.datastore = dssync.MutexWrap(datastore.NewMapDatastore()) + suite.blobservice = synctest.MockBlobService(blockstore.NewBlockstore(suite.datastore)) + + testData := suite.generateTestData(run.blockCount, run.specialBlocks(run.blockCount)) + edr, fd := suite.prepareRequesterTest(testData) + fetchedExecutionData := suite.runRequesterTest(edr, fd, testData) + + verifyFetchedExecutionData(suite.T(), fetchedExecutionData, testData) + + suite.T().Log("Shutting down test") + }) + }) + } +} + +// TestRequesterResumesAfterRestart tests that the requester will pick up where it left off after a +// restart, without skipping any blocks +func (suite *ExecutionDataRequesterSuite) TestRequesterResumesAfterRestart() { + suite.datastore = dssync.MutexWrap(datastore.NewMapDatastore()) + suite.blobservice = synctest.MockBlobService(blockstore.NewBlockstore(suite.datastore)) + + testData := suite.generateTestData(suite.run.blockCount, suite.run.specialBlocks(suite.run.blockCount)) + + test := func(stopHeight, resumeHeight uint64) { + testData.fetchedExecutionData = nil + + unittest.RunWithBadgerDB(suite.T(), func(db *badger.DB) { + suite.db = db + + // Process half of the blocks + edr, fd := suite.prepareRequesterTest(testData) + testData.stopHeight = stopHeight + testData.resumeHeight = 0 + testData.fetchedExecutionData = suite.runRequesterTest(edr, fd, testData) + + // Stand up a new component using the same datastore, and make sure all remaining + // blocks are processed + edr, fd = suite.prepareRequesterTest(testData) + testData.stopHeight = 0 + testData.resumeHeight = resumeHeight + fetchedExecutionData := suite.runRequesterTest(edr, fd, testData) + + verifyFetchedExecutionData(suite.T(), fetchedExecutionData, testData) + + suite.T().Log("Shutting down test") + }) + } + + suite.Run("requester resumes processing with no gap", func() { + stopHeight := testData.startHeight + uint64(suite.run.blockCount)/2 + resumeHeight := stopHeight + 1 + test(stopHeight, resumeHeight) + }) + + suite.Run("requester resumes processing with gap", func() { + stopHeight := testData.startHeight + uint64(suite.run.blockCount)/2 + resumeHeight := testData.endHeight + test(stopHeight, resumeHeight) + }) +} + +// TestRequesterCatchesUp tests that the requester processes all heights when it starts with a +// backlog of sealed blocks. +func (suite *ExecutionDataRequesterSuite) TestRequesterCatchesUp() { + unittest.RunWithBadgerDB(suite.T(), func(db *badger.DB) { + suite.db = db + + suite.datastore = dssync.MutexWrap(datastore.NewMapDatastore()) + suite.blobservice = synctest.MockBlobService(blockstore.NewBlockstore(suite.datastore)) + + testData := suite.generateTestData(suite.run.blockCount, suite.run.specialBlocks(suite.run.blockCount)) + + // start processing with all seals available + edr, fd := suite.prepareRequesterTest(testData) + testData.resumeHeight = testData.endHeight + fetchedExecutionData := suite.runRequesterTest(edr, fd, testData) + + verifyFetchedExecutionData(suite.T(), fetchedExecutionData, testData) + + suite.T().Log("Shutting down test") + }) +} + +// TestRequesterPausesAndResumes tests that the requester pauses when it downloads maxSearchAhead +// blocks beyond the last processed block, and resumes when it catches up. +func (suite *ExecutionDataRequesterSuite) TestRequesterPausesAndResumes() { + unittest.RunWithBadgerDB(suite.T(), func(db *badger.DB) { + suite.db = db + + pauseHeight := uint64(10) + maxSearchAhead := uint64(5) + + // Downloads will succeed immediately for all blocks except pauseHeight, which will hang + // until the resume() is called. + generate, resume := generatePauseResume(pauseHeight) + + testData := suite.generateTestData(suite.run.blockCount, generate(suite.run.blockCount)) + testData.maxSearchAhead = maxSearchAhead + testData.waitTimeout = time.Second * 10 + + // calculate the expected number of blocks that should be downloaded before resuming + expectedDownloads := maxSearchAhead + (pauseHeight-1)*2 + + edr, fd := suite.prepareRequesterTest(testData) + fetchedExecutionData := suite.runRequesterTestPauseResume(edr, fd, testData, int(expectedDownloads), resume) + + verifyFetchedExecutionData(suite.T(), fetchedExecutionData, testData) + + suite.T().Log("Shutting down test") + }) +} + +// TestRequesterHalts tests that the requester handles halting correctly when it encouters an +// invalid block +func (suite *ExecutionDataRequesterSuite) TestRequesterHalts() { + unittest.RunWithBadgerDB(suite.T(), func(db *badger.DB) { + suite.db = db + + suite.run.blockCount = 10 + suite.datastore = dssync.MutexWrap(datastore.NewMapDatastore()) + suite.blobservice = synctest.MockBlobService(blockstore.NewBlockstore(suite.datastore)) + + // generate a block that will return a malformed blob error. causing the requester to halt + generate, expectedErr := generateBlocksWithHaltingError(suite.run.blockCount) + testData := suite.generateTestData(suite.run.blockCount, generate(suite.run.blockCount)) + + // start processing with all seals available + edr, finalizationDistributor := suite.prepareRequesterTest(testData) + testData.resumeHeight = testData.endHeight + testData.expectedIrrecoverable = expectedErr + fetchedExecutionData := suite.runRequesterTestHalts(edr, finalizationDistributor, testData) + assert.Less(suite.T(), len(fetchedExecutionData), testData.sealedCount) + + suite.T().Log("Shutting down test") + }) +} + +func generateBlocksWithSomeMissed(blockCount int) map[uint64]testExecutionDataCallback { + missing := map[uint64]testExecutionDataCallback{} + + // every 5th block fails to download n times before succeeding + for i := uint64(0); i < uint64(blockCount); i++ { + if i%5 > 0 { + continue + } + + failures := rand.Intn(3) + 1 + attempts := 0 + missing[i] = func(ed *state_synchronization.ExecutionData) (*state_synchronization.ExecutionData, error) { + if attempts < failures*2 { // this func is run twice for every attempt by the mock (once for ExecutionData one for errors) + attempts++ + // This should fail the first n fetch attempts + time.Sleep(time.Duration(rand.Intn(25)) * time.Millisecond) + return nil, &state_synchronization.BlobNotFoundError{} + } + + return ed, nil + } + } + + return missing +} + +func generateBlocksWithRandomDelays(blockCount int) map[uint64]testExecutionDataCallback { + // delay every third block by a random amount + delays := map[uint64]testExecutionDataCallback{} + for i := uint64(0); i < uint64(blockCount); i++ { + if i%5 > 0 { + continue + } + + delays[i] = func(ed *state_synchronization.ExecutionData) (*state_synchronization.ExecutionData, error) { + time.Sleep(time.Duration(rand.Intn(25)) * time.Millisecond) + return ed, nil + } + } + + return delays +} + +func generateBlocksWithHaltingError(blockCount int) (specialBlockGenerator, error) { + // return a MalformedDataError on the second to last block + height := uint64(blockCount - 5) + err := fmt.Errorf("halting error: %w", &state_synchronization.MalformedDataError{}) + + generate := func(int) map[uint64]testExecutionDataCallback { + return map[uint64]testExecutionDataCallback{ + height: func(ed *state_synchronization.ExecutionData) (*state_synchronization.ExecutionData, error) { + return nil, err + }, + } + } + return generate, err +} + +func generatePauseResume(pauseHeight uint64) (specialBlockGenerator, func()) { + pause := make(chan struct{}) + + blocks := map[uint64]testExecutionDataCallback{} + blocks[pauseHeight] = func(ed *state_synchronization.ExecutionData) (*state_synchronization.ExecutionData, error) { + <-pause + return ed, nil + } + + generate := func(int) map[uint64]testExecutionDataCallback { return blocks } + resume := func() { close(pause) } + + return generate, resume +} + +func (suite *ExecutionDataRequesterSuite) prepareRequesterTest(cfg *fetchTestRun) (state_synchronization.ExecutionDataRequester, *pubsub.FinalizationDistributor) { + suite.headers = synctest.MockBlockHeaderStorage(synctest.WithByID(cfg.blocksByID), synctest.WithByHeight(cfg.blocksByHeight)) + results := synctest.MockResultsStorage(synctest.WithByBlockID(cfg.resultsByID)) + state := suite.mockProtocolState(cfg.blocksByHeight) + + suite.eds = mockExecutionDataService(cfg.executionDataEntries) + + finalizationDistributor := pubsub.NewFinalizationDistributor() + processedHeight := storage.NewConsumerProgress(suite.db, module.ConsumeProgressExecutionDataRequesterBlockHeight) + processedNotification := storage.NewConsumerProgress(suite.db, module.ConsumeProgressExecutionDataRequesterNotification) + + edr := requester.New( + zerolog.New(os.Stdout).With().Timestamp().Logger(), + metrics.NewNoopCollector(), + suite.eds, + processedHeight, + processedNotification, + state, + suite.headers, + results, + requester.ExecutionDataConfig{ + InitialBlockHeight: cfg.startHeight - 1, + MaxSearchAhead: cfg.maxSearchAhead, + FetchTimeout: cfg.fetchTimeout, + RetryDelay: cfg.retryDelay, + MaxRetryDelay: cfg.maxRetryDelay, + }, + ) + + finalizationDistributor.AddOnBlockFinalizedConsumer(edr.OnBlockFinalized) + + return edr, finalizationDistributor +} + +func (suite *ExecutionDataRequesterSuite) runRequesterTestHalts(edr state_synchronization.ExecutionDataRequester, finalizationDistributor *pubsub.FinalizationDistributor, cfg *fetchTestRun) receivedExecutionData { + // make sure test helper goroutines are cleaned up + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + signalerCtx, errChan := irrecoverable.WithSignaler(ctx) + go irrecoverableNotExpected(suite.T(), ctx, errChan) + + testDone := make(chan struct{}) + fetchedExecutionData := cfg.FetchedExecutionData() + + // collect all execution data notifications + edr.AddOnExecutionDataFetchedConsumer(suite.consumeExecutionDataNotifications(cfg, func() { close(testDone) }, fetchedExecutionData)) + + edr.Start(signalerCtx) + unittest.RequireCloseBefore(suite.T(), edr.Ready(), cfg.waitTimeout, "timed out waiting for requester to be ready") + + // Send blocks through finalizationDistributor + suite.finalizeBlocks(cfg, finalizationDistributor) + + // testDone should never close because the requester paused + unittest.RequireNeverClosedWithin(suite.T(), testDone, 100*time.Millisecond, "finished sending notifications unexpectedly") + suite.T().Log("All notifications received") + + cancel() + unittest.RequireCloseBefore(suite.T(), edr.Done(), cfg.waitTimeout, "timed out waiting for requester to shutdown") + + return fetchedExecutionData +} + +func (suite *ExecutionDataRequesterSuite) runRequesterTestPauseResume(edr state_synchronization.ExecutionDataRequester, finalizationDistributor *pubsub.FinalizationDistributor, cfg *fetchTestRun, expectedDownloads int, resume func()) receivedExecutionData { + // make sure test helper goroutines are cleaned up + ctx, cancel := context.WithCancel(context.Background()) + + signalerCtx, errChan := irrecoverable.WithSignaler(ctx) + go irrecoverableNotExpected(suite.T(), ctx, errChan) + + testDone := make(chan struct{}) + fetchedExecutionData := cfg.FetchedExecutionData() + + // collect all execution data notifications + edr.AddOnExecutionDataFetchedConsumer(suite.consumeExecutionDataNotifications(cfg, func() { close(testDone) }, fetchedExecutionData)) + + edr.Start(signalerCtx) + unittest.RequireCloseBefore(suite.T(), edr.Ready(), cfg.waitTimeout, "timed out waiting for requester to be ready") + + // Send all blocks through finalizationDistributor + suite.finalizeBlocks(cfg, finalizationDistributor) + + // requester should pause downloads until resume is called, so testDone should not be closed + unittest.RequireNeverClosedWithin(suite.T(), testDone, 500*time.Millisecond, "finished unexpectedly") + + // confirm the expected number of downloads were attempted + suite.eds.AssertNumberOfCalls(suite.T(), "Get", expectedDownloads) + + suite.T().Log("Resuming") + resume() + + // Pause until we've received all of the expected notifications + unittest.RequireCloseBefore(suite.T(), testDone, cfg.waitTimeout, "timed out waiting for notifications") + suite.T().Log("All notifications received") + + cancel() + unittest.RequireCloseBefore(suite.T(), edr.Done(), cfg.waitTimeout, "timed out waiting for requester to shutdown") + + return fetchedExecutionData +} + +func (suite *ExecutionDataRequesterSuite) runRequesterTest(edr state_synchronization.ExecutionDataRequester, finalizationDistributor *pubsub.FinalizationDistributor, cfg *fetchTestRun) receivedExecutionData { + // make sure test helper goroutines are cleaned up + ctx, cancel := context.WithCancel(context.Background()) + + signalerCtx, errChan := irrecoverable.WithSignaler(ctx) + go irrecoverableNotExpected(suite.T(), ctx, errChan) + + // wait for all notifications + testDone := make(chan struct{}) + + fetchedExecutionData := cfg.FetchedExecutionData() + + // collect all execution data notifications + edr.AddOnExecutionDataFetchedConsumer(suite.consumeExecutionDataNotifications(cfg, func() { close(testDone) }, fetchedExecutionData)) + + edr.Start(signalerCtx) + unittest.RequireCloseBefore(suite.T(), edr.Ready(), cfg.waitTimeout, "timed out waiting for requester to be ready") + + // Send blocks through finalizationDistributor + suite.finalizeBlocks(cfg, finalizationDistributor) + + // Pause until we've received all of the expected notifications + unittest.RequireCloseBefore(suite.T(), testDone, cfg.waitTimeout, "timed out waiting for notifications") + suite.T().Log("All notifications received") + + cancel() + unittest.RequireCloseBefore(suite.T(), edr.Done(), cfg.waitTimeout, "timed out waiting for requester to shutdown") + + return fetchedExecutionData +} + +func (suite *ExecutionDataRequesterSuite) consumeExecutionDataNotifications(cfg *fetchTestRun, done func(), fetchedExecutionData map[flow.Identifier]*state_synchronization.ExecutionData) func(ed *state_synchronization.ExecutionData) { + return func(ed *state_synchronization.ExecutionData) { + if _, has := fetchedExecutionData[ed.BlockID]; has { + suite.T().Errorf("duplicate execution data for block %s", ed.BlockID) + return + } + + fetchedExecutionData[ed.BlockID] = ed + suite.T().Logf("notified of execution data for block %v height %d (%d/%d)", ed.BlockID, cfg.blocksByID[ed.BlockID].Header.Height, len(fetchedExecutionData), cfg.sealedCount) + + if cfg.IsLastSeal(ed.BlockID) { + done() + } + } +} + +func (suite *ExecutionDataRequesterSuite) finalizeBlocks(cfg *fetchTestRun, finalizationDistributor *pubsub.FinalizationDistributor) { + for i := cfg.StartHeight(); i <= cfg.endHeight; i++ { + b := cfg.blocksByHeight[i] + + suite.T().Log(">>>> Finalizing block", b.ID(), b.Header.Height) + + if len(b.Payload.Seals) > 0 { + seal := b.Payload.Seals[0] + sealedHeader := cfg.blocksByID[seal.BlockID].Header + + suite.mockSnapshot.set(sealedHeader, nil) + suite.T().Log(">>>> Sealing block", sealedHeader.ID(), sealedHeader.Height) + } + + finalizationDistributor.OnFinalizedBlock(&model.Block{}) // actual block is unused + + if cfg.stopHeight == i { + break + } + } +} + +type receivedExecutionData map[flow.Identifier]*state_synchronization.ExecutionData +type fetchTestRun struct { + sealedCount int + startHeight uint64 + endHeight uint64 + blocksByHeight map[uint64]*flow.Block + blocksByID map[flow.Identifier]*flow.Block + resultsByID map[flow.Identifier]*flow.ExecutionResult + executionDataByID map[flow.Identifier]*state_synchronization.ExecutionData + executionDataEntries map[flow.Identifier]*testExecutionDataServiceEntry + executionDataIDByBlockID map[flow.Identifier]flow.Identifier + expectedIrrecoverable error + + stopHeight uint64 + resumeHeight uint64 + fetchedExecutionData map[flow.Identifier]*state_synchronization.ExecutionData + waitTimeout time.Duration + + maxSearchAhead uint64 + fetchTimeout time.Duration + retryDelay time.Duration + maxRetryDelay time.Duration +} + +func (r *fetchTestRun) StartHeight() uint64 { + if r.resumeHeight > 0 { + return r.resumeHeight + } + return r.startHeight +} + +func (r *fetchTestRun) StopHeight() uint64 { + if r.stopHeight > 0 { + return r.stopHeight + } + return r.endHeight +} + +func (r *fetchTestRun) FetchedExecutionData() receivedExecutionData { + if r.fetchedExecutionData == nil { + return make(receivedExecutionData, r.sealedCount) + } + return r.fetchedExecutionData +} + +// IsLastSeal returns true if the provided blockID is the last expected sealed block for the test +func (r *fetchTestRun) IsLastSeal(blockID flow.Identifier) bool { + stopHeight := r.StopHeight() + lastSeal := r.blocksByHeight[stopHeight].Payload.Seals[0].BlockID + return lastSeal == r.blocksByID[blockID].ID() +} + +func (suite *ExecutionDataRequesterSuite) generateTestData(blockCount int, specialHeightFuncs map[uint64]testExecutionDataCallback) *fetchTestRun { + edsEntries := map[flow.Identifier]*testExecutionDataServiceEntry{} + blocksByHeight := map[uint64]*flow.Block{} + blocksByID := map[flow.Identifier]*flow.Block{} + resultsByID := map[flow.Identifier]*flow.ExecutionResult{} + executionDataByID := map[flow.Identifier]*state_synchronization.ExecutionData{} + executionDataIDByBlockID := map[flow.Identifier]flow.Identifier{} + + sealedCount := blockCount - 4 // seals for blocks 1-96 + firstSeal := blockCount - sealedCount + + // genesis is block 0, we start syncing from block 1 + startHeight := uint64(1) + endHeight := uint64(blockCount) - 1 + + // instantiate ExecutionDataService to generate correct CIDs + eds := state_synchronization.NewExecutionDataService( + new(cbor.Codec), + compressor.NewLz4Compressor(), + suite.blobservice, + metrics.NewNoopCollector(), + zerolog.New(os.Stdout).With().Timestamp().Logger(), + ) + + var previousBlock *flow.Block + var previousResult *flow.ExecutionResult + for i := 0; i < blockCount; i++ { + var seals []*flow.Header + + if i >= firstSeal { + seals = []*flow.Header{ + blocksByHeight[uint64(i-firstSeal+1)].Header, // block 0 doesn't get sealed (it's pre-sealed in the genesis state) + } + suite.T().Logf("block %d has seals for %d", i, seals[0].Height) + } + + height := uint64(i) + block := buildBlock(height, previousBlock, seals) + + ed := synctest.ExecutionDataFixture(block.ID()) + + cid, _, err := eds.Add(context.Background(), ed) + require.NoError(suite.T(), err) + + result := buildResult(block, cid, previousResult) + + blocksByHeight[height] = block + blocksByID[block.ID()] = block + resultsByID[block.ID()] = result + + // ignore all the data we don't need to verify the test + if i > 0 && i <= sealedCount { + executionDataByID[block.ID()] = ed + edsEntries[cid] = &testExecutionDataServiceEntry{ExecutionData: ed} + if fn, has := specialHeightFuncs[height]; has { + edsEntries[cid].fn = fn + } + + executionDataIDByBlockID[block.ID()] = cid + } + + previousBlock = block + previousResult = result + } + + return &fetchTestRun{ + sealedCount: sealedCount, + startHeight: startHeight, + endHeight: endHeight, + blocksByHeight: blocksByHeight, + blocksByID: blocksByID, + resultsByID: resultsByID, + executionDataByID: executionDataByID, + executionDataEntries: edsEntries, + executionDataIDByBlockID: executionDataIDByBlockID, + waitTimeout: time.Second * 5, + + maxSearchAhead: requester.DefaultMaxSearchAhead, + fetchTimeout: requester.DefaultFetchTimeout, + retryDelay: 1 * time.Millisecond, + maxRetryDelay: 15 * time.Millisecond, + } +} + +func buildBlock(height uint64, parent *flow.Block, seals []*flow.Header) *flow.Block { + if parent == nil { + return unittest.GenesisFixture() + } + + if len(seals) == 0 { + return unittest.BlockWithParentFixture(parent.Header) + } + + return unittest.BlockWithParentAndSeals(parent.Header, seals) +} + +func buildResult(block *flow.Block, cid flow.Identifier, previousResult *flow.ExecutionResult) *flow.ExecutionResult { + opts := []func(result *flow.ExecutionResult){ + unittest.WithBlock(block), + unittest.WithExecutionDataID(cid), + } + + if previousResult != nil { + opts = append(opts, unittest.WithPreviousResult(*previousResult)) + } + + return unittest.ExecutionResultFixture(opts...) +} + +func irrecoverableNotExpected(t *testing.T, ctx context.Context, errChan <-chan error) { + select { + case <-ctx.Done(): + return + case err := <-errChan: + assert.NoError(t, err, "unexpected irrecoverable error") + } +} + +func verifyFetchedExecutionData(t *testing.T, actual receivedExecutionData, cfg *fetchTestRun) { + expected := cfg.executionDataByID + assert.Len(t, actual, len(expected)) + + for i := 0; i < cfg.sealedCount; i++ { + height := cfg.startHeight + uint64(i) + block := cfg.blocksByHeight[height] + blockID := block.ID() + + expectedED := expected[blockID] + actualED, has := actual[blockID] + assert.True(t, has, "missing execution data for block %v height %d", blockID, height) + if has { + assert.Equal(t, expectedED, actualED, "execution data for block %v doesn't match", blockID) + } + } +} + +type mockSnapshot struct { + header *flow.Header + err error + mu sync.Mutex +} + +func (m *mockSnapshot) set(header *flow.Header, err error) { + m.mu.Lock() + defer m.mu.Unlock() + + m.header = header + m.err = err +} + +func (m *mockSnapshot) Head() (*flow.Header, error) { + m.mu.Lock() + defer m.mu.Unlock() + + return m.header, m.err +} + +// none of these are used in this test +func (m *mockSnapshot) QuorumCertificate() (*flow.QuorumCertificate, error) { return nil, nil } +func (m *mockSnapshot) Identities(selector flow.IdentityFilter) (flow.IdentityList, error) { + return nil, nil +} +func (m *mockSnapshot) Identity(nodeID flow.Identifier) (*flow.Identity, error) { return nil, nil } +func (m *mockSnapshot) SealedResult() (*flow.ExecutionResult, *flow.Seal, error) { + return nil, nil, nil +} +func (m *mockSnapshot) Commit() (flow.StateCommitment, error) { return flow.DummyStateCommitment, nil } +func (m *mockSnapshot) SealingSegment() (*flow.SealingSegment, error) { return nil, nil } +func (m *mockSnapshot) Descendants() ([]flow.Identifier, error) { return nil, nil } +func (m *mockSnapshot) ValidDescendants() ([]flow.Identifier, error) { return nil, nil } +func (m *mockSnapshot) RandomSource() ([]byte, error) { return nil, nil } +func (m *mockSnapshot) Phase() (flow.EpochPhase, error) { return flow.EpochPhaseUndefined, nil } +func (m *mockSnapshot) Epochs() protocol.EpochQuery { return nil } +func (m *mockSnapshot) Params() protocol.GlobalParams { return nil } diff --git a/module/state_synchronization/requester/jobs/execution_data_reader.go b/module/state_synchronization/requester/jobs/execution_data_reader.go new file mode 100644 index 00000000000..92b7636ab25 --- /dev/null +++ b/module/state_synchronization/requester/jobs/execution_data_reader.go @@ -0,0 +1,112 @@ +package jobs + +import ( + "context" + "fmt" + "time" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/state_synchronization" + "github.com/onflow/flow-go/storage" +) + +// BlockEntry represents a block that's tracked by the ExecutionDataRequester +type BlockEntry struct { + BlockID flow.Identifier + Height uint64 + ExecutionData *state_synchronization.ExecutionData +} + +// ExecutionDataReader provides an abstraction for consumers to read blocks as job. +type ExecutionDataReader struct { + eds state_synchronization.ExecutionDataService + headers storage.Headers + results storage.ExecutionResults + + fetchTimeout time.Duration + highestAvailableHeight func() uint64 + + // TODO: refactor this to accept a context in AtIndex instead of storing it on the struct. + // This requires also refactoring jobqueue.Consumer + ctx irrecoverable.SignalerContext +} + +// NewExecutionDataReader creates and returns a ExecutionDataReader. +func NewExecutionDataReader( + eds state_synchronization.ExecutionDataService, + headers storage.Headers, + results storage.ExecutionResults, + fetchTimeout time.Duration, + highestAvailableHeight func() uint64, +) *ExecutionDataReader { + return &ExecutionDataReader{ + eds: eds, + headers: headers, + results: results, + fetchTimeout: fetchTimeout, + highestAvailableHeight: highestAvailableHeight, + } +} + +// AddContext adds a context to the execution data reader +// TODO: this is an anti-pattern, refactor this to accept a context in AtIndex instead of storing +// it on the struct. +func (r *ExecutionDataReader) AddContext(ctx irrecoverable.SignalerContext) { + r.ctx = ctx +} + +// AtIndex returns the block entry job at the given height, or storage.ErrNotFound. +// Any other error is unexpected +func (r *ExecutionDataReader) AtIndex(height uint64) (module.Job, error) { + if r.ctx == nil { + return nil, fmt.Errorf("execution data reader is not initialized") + } + + // height has not been downloaded, so height is not available yet + if height > r.highestAvailableHeight() { + return nil, storage.ErrNotFound + } + + executionData, err := r.getExecutionData(r.ctx, height) + if err != nil { + return nil, err + } + + return BlockEntryToJob(&BlockEntry{ + BlockID: executionData.BlockID, + Height: height, + ExecutionData: executionData, + }), nil +} + +// Head returns the highest consecutive block height with downloaded execution data +func (r *ExecutionDataReader) Head() (uint64, error) { + return r.highestAvailableHeight(), nil +} + +// getExecutionData returns the ExecutionData for the given block height. +// This is used by the execution data reader to get the ExecutionData for a block. +func (r *ExecutionDataReader) getExecutionData(signalCtx irrecoverable.SignalerContext, height uint64) (*state_synchronization.ExecutionData, error) { + header, err := r.headers.ByHeight(height) + if err != nil { + return nil, fmt.Errorf("failed to lookup header for height %d: %w", height, err) + } + + result, err := r.results.ByBlockID(header.ID()) + if err != nil { + return nil, fmt.Errorf("failed to lookup execution result for block %s: %w", header.ID(), err) + } + + ctx, cancel := context.WithTimeout(signalCtx, r.fetchTimeout) + defer cancel() + + executionData, err := r.eds.Get(ctx, result.ExecutionDataID) + + if err != nil { + return nil, fmt.Errorf("failed to get execution data for block %s: %w", header.ID(), err) + } + + return executionData, nil +} diff --git a/module/state_synchronization/requester/jobs/execution_data_reader_test.go b/module/state_synchronization/requester/jobs/execution_data_reader_test.go new file mode 100644 index 00000000000..a59c04f1fd3 --- /dev/null +++ b/module/state_synchronization/requester/jobs/execution_data_reader_test.go @@ -0,0 +1,200 @@ +package jobs + +import ( + "context" + "errors" + "math/rand" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/state_synchronization" + syncmock "github.com/onflow/flow-go/module/state_synchronization/mock" + synctest "github.com/onflow/flow-go/module/state_synchronization/requester/unittest" + "github.com/onflow/flow-go/storage" + storagemock "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/unittest" +) + +type ExecutionDataReaderSuite struct { + suite.Suite + + reader *ExecutionDataReader + eds *syncmock.ExecutionDataService + headers *storagemock.Headers + results *storagemock.ExecutionResults + fetchTimeout time.Duration + + executionDataID flow.Identifier + executionData *state_synchronization.ExecutionData + block *flow.Block + blocksByHeight map[uint64]*flow.Block + + highestAvailableHeight func() uint64 +} + +func TestExecutionDataReaderSuite(t *testing.T) { + t.Parallel() + rand.Seed(time.Now().UnixMilli()) + suite.Run(t, new(ExecutionDataReaderSuite)) +} + +func (suite *ExecutionDataReaderSuite) SetupTest() { + suite.fetchTimeout = time.Second + suite.executionDataID = unittest.IdentifierFixture() + + parent := unittest.BlockHeaderFixture(unittest.WithHeaderHeight(1)) + suite.block = unittest.BlockWithParentFixture(&parent) + + suite.executionData = synctest.ExecutionDataFixture(suite.block.ID()) + + suite.highestAvailableHeight = func() uint64 { return suite.block.Header.Height + 1 } + + suite.reset() +} + +func (suite *ExecutionDataReaderSuite) reset() { + result := unittest.ExecutionResultFixture( + unittest.WithBlock(suite.block), + unittest.WithExecutionDataID(suite.executionDataID), + ) + + suite.blocksByHeight = map[uint64]*flow.Block{ + suite.block.Header.Height: suite.block, + } + suite.headers = synctest.MockBlockHeaderStorage(synctest.WithByHeight(suite.blocksByHeight)) + suite.results = synctest.MockResultsStorage(synctest.WithByBlockID(map[flow.Identifier]*flow.ExecutionResult{ + suite.block.ID(): result, + })) + + suite.eds = new(syncmock.ExecutionDataService) + suite.reader = NewExecutionDataReader( + suite.eds, + suite.headers, + suite.results, + suite.fetchTimeout, + func() uint64 { + return suite.highestAvailableHeight() + }, + ) +} + +func (suite *ExecutionDataReaderSuite) TestAtIndex() { + setExecutionDataGet := func(executionData *state_synchronization.ExecutionData, err error) { + suite.eds.On("Get", mock.Anything, suite.executionDataID).Return( + func(ctx context.Context, id flow.Identifier) *state_synchronization.ExecutionData { + return executionData + }, + func(ctx context.Context, id flow.Identifier) error { + return err + }, + ) + } + + suite.Run("returns not found when not initialized", func() { + // runTest not called, so context is never added + job, err := suite.reader.AtIndex(1) + assert.Nil(suite.T(), job, "job should be nil") + assert.Error(suite.T(), err, "error should be returned") + }) + + suite.Run("returns not found when index out of range", func() { + suite.reset() + suite.runTest(func() { + job, err := suite.reader.AtIndex(suite.highestAvailableHeight() + 1) + assert.Nil(suite.T(), job, "job should be nil") + assert.Equal(suite.T(), storage.ErrNotFound, err, "expected not found error") + }) + }) + + suite.Run("returns successfully", func() { + suite.reset() + suite.runTest(func() { + ed := synctest.ExecutionDataFixture(unittest.IdentifierFixture()) + setExecutionDataGet(ed, nil) + + job, err := suite.reader.AtIndex(suite.block.Header.Height) + require.NoError(suite.T(), err) + + entry, err := JobToBlockEntry(job) + assert.NoError(suite.T(), err) + + assert.Equal(suite.T(), entry.ExecutionData, ed) + }) + }) + + suite.Run("returns error from ExecutionDataService Get", func() { + suite.reset() + suite.runTest(func() { + // return an error while getting the execution data + expecteErr := errors.New("expected error: get failed") + setExecutionDataGet(nil, expecteErr) + + job, err := suite.reader.AtIndex(suite.block.Header.Height) + assert.Nil(suite.T(), job, "job should be nil") + assert.ErrorIs(suite.T(), err, expecteErr) + }) + }) + + suite.Run("returns error getting header", func() { + suite.reset() + suite.runTest(func() { + // search for an index that doesn't have a header in storage + job, err := suite.reader.AtIndex(suite.block.Header.Height + 1) + assert.Nil(suite.T(), job, "job should be nil") + assert.ErrorIs(suite.T(), err, storage.ErrNotFound) + }) + }) + + suite.Run("returns error getting execution result", func() { + suite.reset() + suite.runTest(func() { + // add a new block without an execution result + newBlock := unittest.BlockWithParentFixture(suite.block.Header) + suite.blocksByHeight[newBlock.Header.Height] = newBlock + + job, err := suite.reader.AtIndex(newBlock.Header.Height) + assert.Nil(suite.T(), job, "job should be nil") + assert.ErrorIs(suite.T(), err, storage.ErrNotFound) + }) + }) +} + +func (suite *ExecutionDataReaderSuite) TestHead() { + suite.runTest(func() { + expectedIndex := uint64(15) + suite.highestAvailableHeight = func() uint64 { + return expectedIndex + } + index, err := suite.reader.Head() + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), expectedIndex, index) + }) +} + +func (suite *ExecutionDataReaderSuite) runTest(fn func()) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + signalCtx, errChan := irrecoverable.WithSignaler(ctx) + go irrecoverableNotExpected(suite.T(), ctx, errChan) + + suite.reader.AddContext(signalCtx) + + fn() +} + +func irrecoverableNotExpected(t *testing.T, ctx context.Context, errChan <-chan error) { + select { + case <-ctx.Done(): + return + case err := <-errChan: + require.NoError(t, err, "unexpected irrecoverable error") + } +} diff --git a/module/state_synchronization/requester/jobs/jobs.go b/module/state_synchronization/requester/jobs/jobs.go new file mode 100644 index 00000000000..98d1c821105 --- /dev/null +++ b/module/state_synchronization/requester/jobs/jobs.go @@ -0,0 +1,35 @@ +package jobs + +import ( + "fmt" + + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/jobqueue" +) + +// BlockEntryJob implements the Job interface. It converts a BlockEntry into a Job to be used by job queue. +// +// In current architecture, BlockEntryJob represents a ExecutionData notification enqueued to be +// processed by the NotificationConsumer that implements the JobQueue interface. +type BlockEntryJob struct { + Entry *BlockEntry +} + +// ID converts block id into job id, which guarantees uniqueness. +func (j BlockEntryJob) ID() module.JobID { + return jobqueue.JobID(j.Entry.BlockID) +} + +// JobToBlockEntry converts a block entry job into its corresponding BlockEntry. +func JobToBlockEntry(job module.Job) (*BlockEntry, error) { + blockJob, ok := job.(*BlockEntryJob) + if !ok { + return nil, fmt.Errorf("could not convert job to block entry, job id: %x", job.ID()) + } + return blockJob.Entry, nil +} + +// BlockEntryToJob converts the BlockEntry to a BlockEntryJob. +func BlockEntryToJob(entry *BlockEntry) *BlockEntryJob { + return &BlockEntryJob{Entry: entry} +} diff --git a/module/state_synchronization/requester/jobs/jobs_test.go b/module/state_synchronization/requester/jobs/jobs_test.go new file mode 100644 index 00000000000..bf6826e189c --- /dev/null +++ b/module/state_synchronization/requester/jobs/jobs_test.go @@ -0,0 +1,47 @@ +package jobs_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/jobqueue" + "github.com/onflow/flow-go/module/state_synchronization/requester/jobs" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestBlockEntryJob(t *testing.T) { + blockEntry := &jobs.BlockEntry{ + Height: 42, + BlockID: unittest.IdentifierFixture(), + } + + job := jobs.BlockEntryToJob(blockEntry) + t.Run("job is correct type", func(t *testing.T) { + assert.IsType(t, &jobs.BlockEntryJob{}, job, "job is not a block entry job") + }) + + t.Run("job ID matches block ID", func(t *testing.T) { + jobID := jobqueue.JobID(blockEntry.BlockID) + assert.Equal(t, job.ID(), jobID, "job ID is not the block ID") + }) + + t.Run("job converts to block entry", func(t *testing.T) { + e, err := jobs.JobToBlockEntry(job) + assert.NoError(t, err, "unexpected error converting notify job to block entry") + assert.Equal(t, blockEntry, e, "converted block entry is not the same as the original block entry") + }) + + t.Run("incorrect job type fails to convert to block entry", func(t *testing.T) { + e, err := jobs.JobToBlockEntry(invalidJob{}) + assert.Error(t, err, "expected error converting invalidJob to block entry") + assert.Nil(t, e, "expected nil block entry") + }) +} + +type invalidJob struct{} + +func (j invalidJob) ID() module.JobID { + return "invalid" +} diff --git a/module/state_synchronization/requester/unittest/unittest.go b/module/state_synchronization/requester/unittest/unittest.go new file mode 100644 index 00000000000..e1a2a3490eb --- /dev/null +++ b/module/state_synchronization/requester/unittest/unittest.go @@ -0,0 +1,223 @@ +package unittest + +import ( + "context" + "fmt" + "sync" + + "github.com/ipfs/go-cid" + blockstore "github.com/ipfs/go-ipfs-blockstore" + "github.com/stretchr/testify/mock" + + "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/blobs" + "github.com/onflow/flow-go/module/state_synchronization" + "github.com/onflow/flow-go/network/mocknetwork" + statemock "github.com/onflow/flow-go/state/protocol/mock" + "github.com/onflow/flow-go/storage" + storagemock "github.com/onflow/flow-go/storage/mock" +) + +func WithCollections(collections []*flow.Collection) func(*state_synchronization.ExecutionData) { + return func(executionData *state_synchronization.ExecutionData) { + executionData.Collections = collections + } +} + +func WithEvents(events []flow.EventsList) func(*state_synchronization.ExecutionData) { + return func(executionData *state_synchronization.ExecutionData) { + executionData.Events = events + } +} + +func WithTrieUpdates(updates []*ledger.TrieUpdate) func(*state_synchronization.ExecutionData) { + return func(executionData *state_synchronization.ExecutionData) { + executionData.TrieUpdates = updates + } +} + +func ExecutionDataFixture(blockID flow.Identifier) *state_synchronization.ExecutionData { + return &state_synchronization.ExecutionData{ + BlockID: blockID, + Collections: []*flow.Collection{}, + Events: []flow.EventsList{}, + TrieUpdates: []*ledger.TrieUpdate{}, + } +} + +func MockBlobService(bs blockstore.Blockstore) *mocknetwork.BlobService { + bex := new(mocknetwork.BlobService) + + bex.On("GetBlobs", mock.Anything, mock.AnythingOfType("[]cid.Cid")). + Return(func(ctx context.Context, cids []cid.Cid) <-chan blobs.Blob { + ch := make(chan blobs.Blob) + + var wg sync.WaitGroup + wg.Add(len(cids)) + + for _, c := range cids { + c := c + go func() { + defer wg.Done() + + blob, err := bs.Get(ctx, c) + + if err != nil { + // In the real implementation, Bitswap would keep trying to get the blob from + // the network indefinitely, sending requests to more and more peers until it + // eventually finds the blob, or the context is canceled. Here, we know that + // if the blob is not already in the blobstore, then we will never appear, so + // we just wait for the context to be canceled. + <-ctx.Done() + + return + } + + ch <- blob + }() + } + + go func() { + wg.Wait() + close(ch) + }() + + return ch + }).Maybe() + + bex.On("AddBlobs", mock.Anything, mock.AnythingOfType("[]blocks.Block")).Return(bs.PutMany).Maybe() + bex.On("DeleteBlob", mock.Anything, mock.AnythingOfType("cid.Cid")).Return(bs.DeleteBlock).Maybe() + + noop := module.NoopReadyDoneAware{} + bex.On("Ready").Return(func() <-chan struct{} { return noop.Ready() }).Maybe() + + return bex +} + +type SnapshotMockOptions func(*statemock.Snapshot) + +func WithHead(head *flow.Header) SnapshotMockOptions { + return func(snapshot *statemock.Snapshot) { + snapshot.On("Head").Return(head, nil) + } +} + +func MockProtocolStateSnapshot(opts ...SnapshotMockOptions) *statemock.Snapshot { + snapshot := new(statemock.Snapshot) + + for _, opt := range opts { + opt(snapshot) + } + + return snapshot +} + +type StateMockOptions func(*statemock.State) + +func WithSnapshot(snapshot *statemock.Snapshot) StateMockOptions { + return func(state *statemock.State) { + state.On("Sealed").Return(snapshot) + } +} + +func MockProtocolState(opts ...StateMockOptions) *statemock.State { + state := new(statemock.State) + + for _, opt := range opts { + opt(state) + } + + return state +} + +type BlockHeaderMockOptions func(*storagemock.Headers) + +func WithByHeight(blocksByHeight map[uint64]*flow.Block) BlockHeaderMockOptions { + return func(blocks *storagemock.Headers) { + blocks.On("ByHeight", mock.AnythingOfType("uint64")).Return( + func(height uint64) *flow.Header { + if _, has := blocksByHeight[height]; !has { + return nil + } + return blocksByHeight[height].Header + }, + func(height uint64) error { + if _, has := blocksByHeight[height]; !has { + return fmt.Errorf("block %d not found: %w", height, storage.ErrNotFound) + } + return nil + }, + ) + } +} + +func WithByID(blocksByID map[flow.Identifier]*flow.Block) BlockHeaderMockOptions { + return func(blocks *storagemock.Headers) { + blocks.On("ByBlockID", mock.AnythingOfType("flow.Identifier")).Return( + func(blockID flow.Identifier) *flow.Header { + if _, has := blocksByID[blockID]; !has { + return nil + } + return blocksByID[blockID].Header + }, + func(blockID flow.Identifier) error { + if _, has := blocksByID[blockID]; !has { + return fmt.Errorf("block %s not found: %w", blockID, storage.ErrNotFound) + } + return nil + }, + ) + } +} + +func MockBlockHeaderStorage(opts ...BlockHeaderMockOptions) *storagemock.Headers { + headers := new(storagemock.Headers) + + for _, opt := range opts { + opt(headers) + } + + return headers +} + +type ResultsMockOptions func(*storagemock.ExecutionResults) + +func WithByBlockID(resultsByID map[flow.Identifier]*flow.ExecutionResult) ResultsMockOptions { + return func(results *storagemock.ExecutionResults) { + results.On("ByBlockID", mock.AnythingOfType("flow.Identifier")).Return( + func(blockID flow.Identifier) *flow.ExecutionResult { + if _, has := resultsByID[blockID]; !has { + return nil + } + return resultsByID[blockID] + }, + func(blockID flow.Identifier) error { + if _, has := resultsByID[blockID]; !has { + return fmt.Errorf("result %s not found: %w", blockID, storage.ErrNotFound) + } + return nil + }, + ) + } +} + +func MockResultsStorage(opts ...ResultsMockOptions) *storagemock.ExecutionResults { + results := new(storagemock.ExecutionResults) + + for _, opt := range opts { + opt(results) + } + + return results +} + +func RemoveExpectedCall(method string, expectedCalls []*mock.Call) []*mock.Call { + for i, call := range expectedCalls { + if call.Method == method { + expectedCalls = append(expectedCalls[:i], expectedCalls[i+1:]...) + } + } + return expectedCalls +} diff --git a/network/p2p/blob_service.go b/network/p2p/blob_service.go index 6cb5b51595c..d58ba881d3b 100644 --- a/network/p2p/blob_service.go +++ b/network/p2p/blob_service.go @@ -26,6 +26,7 @@ import ( type blobService struct { component.Component blockService blockservice.BlockService + blockStore blockstore.Blockstore reprovider provider.Reprovider config *BlobServiceConfig } @@ -45,12 +46,22 @@ func WithReprovideInterval(d time.Duration) network.BlobServiceOption { } } +// WithBitswapOptions sets additional options for Bitswap exchange func WithBitswapOptions(opts ...bitswap.Option) network.BlobServiceOption { return func(bs network.BlobService) { bs.(*blobService).config.BitswapOptions = opts } } +// WithHashOnRead sets whether or not the blobstore will rehash the blob data on read +// When set, calls to GetBlob will fail with an error if the hash of the data in storage does not +// match its CID +func WithHashOnRead(enabled bool) network.BlobServiceOption { + return func(bs network.BlobService) { + bs.(*blobService).blockStore.HashOnRead(enabled) + } +} + // NewBlobService creates a new BlobService. func NewBlobService( host host.Host, @@ -59,12 +70,13 @@ func NewBlobService( ds datastore.Batching, opts ...network.BlobServiceOption, ) *blobService { - bstore := blockstore.NewBlockstore(ds) bsNetwork := bsnet.NewFromIpfsHost(host, r, bsnet.Prefix(protocol.ID(prefix))) - config := &BlobServiceConfig{ - ReprovideInterval: 12 * time.Hour, + bs := &blobService{ + config: &BlobServiceConfig{ + ReprovideInterval: 12 * time.Hour, + }, + blockStore: blockstore.NewBlockstore(ds), } - bs := &blobService{config: config} for _, opt := range opts { opt(bs) @@ -72,12 +84,12 @@ func NewBlobService( cm := component.NewComponentManagerBuilder(). AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { - bs.blockService = blockservice.New(bstore, bitswap.New(ctx, bsNetwork, bstore, config.BitswapOptions...)) + bs.blockService = blockservice.New(bs.blockStore, bitswap.New(ctx, bsNetwork, bs.blockStore, bs.config.BitswapOptions...)) ready() }). AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { - bs.reprovider = simple.NewReprovider(ctx, config.ReprovideInterval, r, simple.NewBlockstoreProvider(bstore)) + bs.reprovider = simple.NewReprovider(ctx, bs.config.ReprovideInterval, r, simple.NewBlockstoreProvider(bs.blockStore)) ready() diff --git a/state/cluster/badger/mutator.go b/state/cluster/badger/mutator.go index 8d8909305b8..b2c3ab1105f 100644 --- a/state/cluster/badger/mutator.go +++ b/state/cluster/badger/mutator.go @@ -120,7 +120,25 @@ func (m *MutableState) Extend(block *cluster.Block) error { checkTxsSpan, _ := m.tracer.StartSpanFromContext(ctx, trace.COLClusterStateMutatorExtendCheckTransactionsValid) defer checkTxsSpan.Finish() + // a valid collection must reference a valid reference block + // NOTE: it is valid for a collection to be expired at this point, + // otherwise we would compromise liveness of the cluster. + refBlock, err := m.headers.ByBlockID(payload.ReferenceBlockID) + if errors.Is(err, storage.ErrNotFound) { + return state.NewInvalidExtensionErrorf("unknown reference block (id=%x)", payload.ReferenceBlockID) + } + if err != nil { + return fmt.Errorf("could not check reference block: %w", err) + } + + // no validation of transactions is necessary for empty collections + if payload.Collection.Len() == 0 { + return nil + } + // check that all transactions within the collection are valid + // keep track of the min/max reference blocks - the collection must be non-empty + // at this point so these are guaranteed to be set correctly minRefID := flow.ZeroID minRefHeight := uint64(math.MaxUint64) maxRefHeight := uint64(0) @@ -145,30 +163,17 @@ func (m *MutableState) Extend(block *cluster.Block) error { // a valid collection must reference the oldest reference block among // its constituent transactions - if payload.Collection.Len() > 0 && minRefID != payload.ReferenceBlockID { + if minRefID != payload.ReferenceBlockID { return state.NewInvalidExtensionErrorf( "reference block (id=%x) must match oldest transaction's reference block (id=%x)", payload.ReferenceBlockID, minRefID, ) } // a valid collection must contain only transactions within its expiry window - if payload.Collection.Len() > 0 { - if maxRefHeight-minRefHeight >= flow.DefaultTransactionExpiry { - return state.NewInvalidExtensionErrorf( - "collection contains reference height range [%d,%d] exceeding expiry window size: %d", - minRefHeight, maxRefHeight, flow.DefaultTransactionExpiry) - } - } - - // a valid collection must reference a valid reference block - // NOTE: it is valid for a collection to be expired at this point, - // otherwise we would compromise liveness of the cluster. - refBlock, err := m.headers.ByBlockID(payload.ReferenceBlockID) - if errors.Is(err, storage.ErrNotFound) { - return state.NewInvalidExtensionErrorf("unknown reference block (id=%x)", payload.ReferenceBlockID) - } - if err != nil { - return fmt.Errorf("could not check reference block: %w", err) + if maxRefHeight-minRefHeight >= flow.DefaultTransactionExpiry { + return state.NewInvalidExtensionErrorf( + "collection contains reference height range [%d,%d] exceeding expiry window size: %d", + minRefHeight, maxRefHeight, flow.DefaultTransactionExpiry) } // TODO ensure the reference block is part of the main chain diff --git a/state/cluster/badger/mutator_test.go b/state/cluster/badger/mutator_test.go index 467c8b1cc1c..91fbf8d4534 100644 --- a/state/cluster/badger/mutator_test.go +++ b/state/cluster/badger/mutator_test.go @@ -315,8 +315,10 @@ func (suite *MutatorSuite) TestExtend_WithEmptyCollection() { // an unknown reference block is invalid func (suite *MutatorSuite) TestExtend_WithNonExistentReferenceBlock() { block := suite.Block() + tx := suite.Tx() + payload := suite.Payload(&tx) // set a random reference block ID - payload := model.EmptyPayload(unittest.IdentifierFixture()) + payload.ReferenceBlockID = unittest.IdentifierFixture() block.SetPayload(payload) err := suite.state.Extend(&block) suite.Assert().Error(err) diff --git a/state/protocol/badger/mutator.go b/state/protocol/badger/mutator.go index ee109cf036b..1041e48bcc0 100644 --- a/state/protocol/badger/mutator.go +++ b/state/protocol/badger/mutator.go @@ -12,6 +12,7 @@ import ( "github.com/onflow/flow-go/engine" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/signature" "github.com/onflow/flow-go/module/trace" "github.com/onflow/flow-go/state" "github.com/onflow/flow-go/state/protocol" @@ -314,6 +315,9 @@ func (m *MutableState) guaranteeExtend(ctx context.Context, candidate *flow.Bloc // get the reference block to check expiry ref, err := m.headers.ByBlockID(guarantee.ReferenceBlockID) if err != nil { + if errors.Is(err, storage.ErrNotFound) { + return state.NewInvalidExtensionErrorf("could not get reference block %x: %w", guarantee.ReferenceBlockID, err) + } return fmt.Errorf("could not get reference block (%x): %w", guarantee.ReferenceBlockID, err) } @@ -322,6 +326,17 @@ func (m *MutableState) guaranteeExtend(ctx context.Context, candidate *flow.Bloc return state.NewInvalidExtensionErrorf("payload includes expired guarantee (height: %d, limit: %d)", ref.Height, limit) } + + // check the guarantors are correct + _, err = protocol.FindGuarantors(m, guarantee) + if err != nil { + if signature.IsInvalidSignerIndicesError(err) || + errors.Is(err, protocol.ErrEpochNotCommitted) || + errors.Is(err, protocol.ErrClusterNotFound) { + return state.NewInvalidExtensionErrorf("guarantee %v contains invalid guarantors: %w", guarantee.ID(), err) + } + return fmt.Errorf("could not find guarantor for guarantee %v: %w", guarantee.ID(), err) + } } return nil diff --git a/state/protocol/badger/mutator_test.go b/state/protocol/badger/mutator_test.go index a651ed8c51b..4cdab48dd9f 100644 --- a/state/protocol/badger/mutator_test.go +++ b/state/protocol/badger/mutator_test.go @@ -22,6 +22,7 @@ import ( "github.com/onflow/flow-go/model/flow/order" "github.com/onflow/flow-go/module/metrics" mockmodule "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/module/signature" "github.com/onflow/flow-go/module/trace" st "github.com/onflow/flow-go/state" realprotocol "github.com/onflow/flow-go/state/protocol" @@ -1653,6 +1654,111 @@ func TestHeaderExtendHighestSeal(t *testing.T) { }) } +// TestExtendInvalidGuarantee checks if Extend method will reject invalid blocks that contain +// guarantees with invalid guarantors +func TestExtendInvalidGuarantee(t *testing.T) { + rootSnapshot := unittest.RootSnapshotFixture(participants) + util.RunWithFullProtocolState(t, rootSnapshot, func(db *badger.DB, state *protocol.MutableState) { + // create a valid block + head, err := rootSnapshot.Head() + require.NoError(t, err) + + cluster, err := unittest.SnapshotClusterByIndex(rootSnapshot, 0) + require.NoError(t, err) + + // prepare for a valid guarantor signer indices to be used in the valid block + all := cluster.Members().NodeIDs() + validSignerIndices, err := signature.EncodeSignersToIndices(all, all) + require.NoError(t, err) + + block := unittest.BlockWithParentFixture(head) + payload := flow.EmptyPayload() + payload.Guarantees = []*flow.CollectionGuarantee{ + &flow.CollectionGuarantee{ + ChainID: cluster.ChainID(), + ReferenceBlockID: head.ID(), + SignerIndices: validSignerIndices, + }, + } + + // now the valid block has a guarantee in the payload with valid signer indices. + block.SetPayload(payload) + + // check Extend should accept this valid block + err = state.Extend(context.Background(), block) + require.NoError(t, err) + + // now the guarantee has invalid signer indices: the checksum should have 4 bytes, but it only has 1 + payload.Guarantees[0].SignerIndices = []byte{byte(1)} + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.True(t, errors.As(err, &signature.ErrInvalidChecksum), err) + require.True(t, st.IsInvalidExtensionError(err), err) + + // now the guarantee has invalid signer indices: the checksum should have 4 bytes, but it only has 1 + checksumMismatch := make([]byte, len(validSignerIndices)) + copy(checksumMismatch, validSignerIndices) + checksumMismatch[0] = byte(1) + if checksumMismatch[0] == validSignerIndices[0] { + checksumMismatch[0] = byte(2) + } + payload.Guarantees[0].SignerIndices = checksumMismatch + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.True(t, errors.As(err, &signature.ErrInvalidChecksum), err) + require.True(t, st.IsInvalidExtensionError(err), err) + + // let's test even if the checksum is correct, but signer indices is still wrong because the tailing are not 0, + // then the block should still be rejected. + wrongTailing := make([]byte, len(validSignerIndices)) + copy(wrongTailing, validSignerIndices) + wrongTailing[len(wrongTailing)-1] = byte(255) + + payload.Guarantees[0].SignerIndices = wrongTailing + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.True(t, errors.As(err, &signature.ErrIllegallyPaddedBitVector), err) + require.True(t, st.IsInvalidExtensionError(err), err) + + // test imcompatible bit vector length + wrongbitVectorLength := validSignerIndices[0 : len(validSignerIndices)-1] + payload.Guarantees[0].SignerIndices = wrongbitVectorLength + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, signature.IsInvalidSignerIndicesError(err), err) + require.True(t, errors.As(err, &signature.ErrIncompatibleBitVectorLength), err) + require.True(t, st.IsInvalidExtensionError(err), err) + + // revert back to good value + payload.Guarantees[0].SignerIndices = validSignerIndices + + // test the ReferenceBlockID is not found + payload.Guarantees[0].ReferenceBlockID = flow.ZeroID + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, errors.As(err, &storage.ErrNotFound), err) + require.True(t, st.IsInvalidExtensionError(err), err) + + // revert back to good value + payload.Guarantees[0].ReferenceBlockID = head.ID() + + // TODO: test the guarantee has bad reference block ID that would return ErrEpochNotCommitted + // this case is not easy to create, since the test case has no such block yet. + // we need to refactor the MutableState to add a guaranteeValidator, so that we can mock it and + // return the ErrEpochNotCommitted for testing + + // test the guarantee has wrong chain ID, and should return ErrClusterNotFound + payload.Guarantees[0].ChainID = flow.ChainID("some_bad_chain_ID") + err = state.Extend(context.Background(), block) + require.Error(t, err) + require.True(t, errors.As(err, &realprotocol.ErrClusterNotFound), err) + require.True(t, st.IsInvalidExtensionError(err), err) + }) +} + func TestMakeValid(t *testing.T) { t.Run("should trigger BlockProcessable with parent block", func(t *testing.T) { consumer := &mockprotocol.Consumer{} diff --git a/state/protocol/epoch.go b/state/protocol/epoch.go index 55bfeb5fd00..d1755db4233 100644 --- a/state/protocol/epoch.go +++ b/state/protocol/epoch.go @@ -79,6 +79,9 @@ type Epoch interface { // ClusterByChainID returns the detailed cluster information for the cluster with // the given chain ID, in this epoch + // Expected Error returns during normal operations: + // * protocol.ErrEpochNotCommitted if epoch has not been committed yet + // * protocol.ErrClusterNotFound if cluster is not found by the given chainID ClusterByChainID(chainID flow.ChainID) (Cluster, error) // DKG returns the result of the distributed key generation procedure. diff --git a/state/protocol/state.go b/state/protocol/state.go index db219096a27..2d63d3f3787 100644 --- a/state/protocol/state.go +++ b/state/protocol/state.go @@ -45,6 +45,9 @@ type MutableState interface { // still checking that the given block is a valid extension of the protocol // state. Depending on implementation it might be a lighter version that checks only // block header. + // Expected errors during normal operations: + // * state.OutdatedExtensionError if the candidate block is outdated (e.g. orphaned) + // * state.InvalidExtensionError if the candidate block is invalid Extend(ctx context.Context, candidate *flow.Block) error // Finalize finalizes the block with the given hash. diff --git a/state/protocol/util.go b/state/protocol/util.go index 8b5ec92094c..0c6392ebaf2 100644 --- a/state/protocol/util.go +++ b/state/protocol/util.go @@ -73,9 +73,10 @@ func IsSporkRootSnapshot(snapshot Snapshot) (bool, error) { // FindGuarantors decodes the signer indices from the guarantee, and finds the guarantor identifiers from protocol state // Expected Error returns during normal operations: -// * signature.ErrIncompatibleBitVectorLength indicates that `signerIndices` has the wrong length -// * signature.ErrIllegallyPaddedBitVector is the vector is padded with bits other than 0 -// * signature.ErrInvalidChecksum if the input is shorter than the expected checksum contained in the guarantee.SignerIndices +// * signature.InvalidSignerIndicesError if `signerIndices` does not encode a valid set of collection guarantors +// * storage.ErrNotFound if the guarantee's ReferenceBlockID is not found +// * protocol.ErrEpochNotCommitted if epoch has not been committed yet +// * protocol.ErrClusterNotFound if cluster is not found by the given chainID func FindGuarantors(state State, guarantee *flow.CollectionGuarantee) ([]flow.Identifier, error) { snapshot := state.AtBlockID(guarantee.ReferenceBlockID) epochs := snapshot.Epochs() @@ -83,10 +84,8 @@ func FindGuarantors(state State, guarantee *flow.CollectionGuarantee) ([]flow.Id cluster, err := epoch.ClusterByChainID(guarantee.ChainID) if err != nil { - // protocol state must have validated the block that contains the guarantee, so the cluster - // must be found, otherwise, it's an internal error return nil, fmt.Errorf( - "internal error retrieving collector clusters for guarantee (ReferenceBlockID: %v, ChainID: %v): %w", + "fail to retrieve collector clusters for guarantee (ReferenceBlockID: %v, ChainID: %v): %w", guarantee.ReferenceBlockID, guarantee.ChainID, err) } diff --git a/utils/unittest/fixtures.go b/utils/unittest/fixtures.go index 0b7f934addf..1749e68f324 100644 --- a/utils/unittest/fixtures.go +++ b/utils/unittest/fixtures.go @@ -28,6 +28,7 @@ import ( "github.com/onflow/flow-go/model/messages" "github.com/onflow/flow-go/model/verification" "github.com/onflow/flow-go/module/mempool/entity" + "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/state/protocol/inmem" "github.com/onflow/flow-go/utils/dsl" ) @@ -331,18 +332,18 @@ func BlockWithParentAndProposerFixture(parent *flow.Header, proposer flow.Identi return *block } -func BlockWithParentAndSeal( - parent *flow.Header, sealed *flow.Header) *flow.Block { +func BlockWithParentAndSeals(parent *flow.Header, seals []*flow.Header) *flow.Block { block := BlockWithParentFixture(parent) payload := flow.Payload{ Guarantees: nil, } - if sealed != nil { - payload.Seals = []*flow.Seal{ - Seal.Fixture( - Seal.WithBlockID(sealed.ID()), - ), + if len(seals) > 0 { + payload.Seals = make([]*flow.Seal, len(seals)) + for i, seal := range seals { + payload.Seals[i] = Seal.Fixture( + Seal.WithBlockID(seal.ID()), + ) } } @@ -539,17 +540,17 @@ func BlockSealsFixture(n int) []*flow.Seal { return seals } -func CollectionListFixture(n int) []*flow.Collection { +func CollectionListFixture(n int, options ...func(*flow.Collection)) []*flow.Collection { collections := make([]*flow.Collection, n) for i := 0; i < n; i++ { - collection := CollectionFixture(1) + collection := CollectionFixture(1, options...) collections[i] = &collection } return collections } -func CollectionFixture(n int) flow.Collection { +func CollectionFixture(n int, options ...func(*flow.Collection)) flow.Collection { transactions := make([]*flow.TransactionBody, 0, n) for i := 0; i < n; i++ { @@ -557,7 +558,11 @@ func CollectionFixture(n int) flow.Collection { transactions = append(transactions, &tx.TransactionBody) } - return flow.Collection{Transactions: transactions} + col := flow.Collection{Transactions: transactions} + for _, opt := range options { + opt(&col) + } + return col } func FixedReferenceBlockID() flow.Identifier { @@ -746,6 +751,12 @@ func WithServiceEvents(n int) func(result *flow.ExecutionResult) { } } +func WithExecutionDataID(id flow.Identifier) func(result *flow.ExecutionResult) { + return func(result *flow.ExecutionResult) { + result.ExecutionDataID = id + } +} + func ServiceEventsFixture(n int) flow.ServiceEventList { sel := make(flow.ServiceEventList, n) @@ -1875,6 +1886,16 @@ func RootSnapshotFixture(participants flow.IdentityList, opts ...func(*flow.Bloc return root } +func SnapshotClusterByIndex(snapshot *inmem.Snapshot, clusterIndex uint) (protocol.Cluster, error) { + epochs := snapshot.Epochs() + epoch := epochs.Current() + cluster, err := epoch.Cluster(clusterIndex) + if err != nil { + return nil, err + } + return cluster, nil +} + // ChainFixture creates a list of blocks that forms a chain func ChainFixture(nonGenesisCount int) ([]*flow.Block, *flow.ExecutionResult, *flow.Seal) { chain := make([]*flow.Block, 0, nonGenesisCount+1)