From 55606eb2070326e8a70d3da52a985f310872058d Mon Sep 17 00:00:00 2001
From: rjl493456442 <garyrong0905@gmail.com>
Date: Tue, 15 Feb 2022 21:15:13 +0800
Subject: [PATCH] core, ethdb, tests, trie: implement NewBatchWithSize API for
 batcher (#24392)

This PR adds an addtional API called `NewBatchWithSize` for db
batcher. It turns out that leveldb batch memory allocation is
super inefficient. The main reason is the allocation step of
leveldb Batch is too small when the batch size is large. It can
take a few second to build a leveldb batch with 100MB size.

Luckily, leveldb also offers another API called MakeBatch which can
pre-allocate the memory area. So if the approximate size of batch is
known in advance, this API can be used in this case.

It's needed in new state scheme PR which needs to commit a batch of
trie nodes in a single batch. Implement the feature in a seperate PR.
---
 core/rawdb/table.go                    | 5 +++++
 ethdb/batch.go                         | 3 +++
 ethdb/leveldb/leveldb.go               | 8 ++++++++
 ethdb/memorydb/memorydb.go             | 7 +++++++
 tests/fuzzers/bn256/bn256_fuzz.go      | 1 +
 tests/fuzzers/stacktrie/trie_fuzzer.go | 1 +
 trie/iterator_test.go                  | 4 ++++
 trie/trie_test.go                      | 1 +
 8 files changed, 30 insertions(+)

diff --git a/core/rawdb/table.go b/core/rawdb/table.go
index 91fc31b660d67..bd47161c5f8cf 100644
--- a/core/rawdb/table.go
+++ b/core/rawdb/table.go
@@ -172,6 +172,11 @@ func (t *table) NewBatch() ethdb.Batch {
 	return &tableBatch{t.db.NewBatch(), t.prefix}
 }
 
+// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
+func (t *table) NewBatchWithSize(size int) ethdb.Batch {
+	return &tableBatch{t.db.NewBatchWithSize(size), t.prefix}
+}
+
 // tableBatch is a wrapper around a database batch that prefixes each key access
 // with a pre-configured string.
 type tableBatch struct {
diff --git a/ethdb/batch.go b/ethdb/batch.go
index 1353693318a65..541f40c838d28 100644
--- a/ethdb/batch.go
+++ b/ethdb/batch.go
@@ -43,6 +43,9 @@ type Batcher interface {
 	// NewBatch creates a write-only database that buffers changes to its host db
 	// until a final write is called.
 	NewBatch() Batch
+
+	// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
+	NewBatchWithSize(size int) Batch
 }
 
 // HookedBatch wraps an arbitrary batch where each operation may be hooked into
diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go
index 9a782dedbe147..cb348ea28cbc8 100644
--- a/ethdb/leveldb/leveldb.go
+++ b/ethdb/leveldb/leveldb.go
@@ -213,6 +213,14 @@ func (db *Database) NewBatch() ethdb.Batch {
 	}
 }
 
+// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
+func (db *Database) NewBatchWithSize(size int) ethdb.Batch {
+	return &batch{
+		db: db.db,
+		b:  leveldb.MakeBatch(size),
+	}
+}
+
 // NewIterator creates a binary-alphabetical iterator over a subset
 // of database content with a particular key prefix, starting at a particular
 // initial key (or after, if it does not exist).
diff --git a/ethdb/memorydb/memorydb.go b/ethdb/memorydb/memorydb.go
index 78181e860c151..7c8d655f4a45a 100644
--- a/ethdb/memorydb/memorydb.go
+++ b/ethdb/memorydb/memorydb.go
@@ -129,6 +129,13 @@ func (db *Database) NewBatch() ethdb.Batch {
 	}
 }
 
+// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
+func (db *Database) NewBatchWithSize(size int) ethdb.Batch {
+	return &batch{
+		db: db,
+	}
+}
+
 // NewIterator creates a binary-alphabetical iterator over a subset
 // of database content with a particular key prefix, starting at a particular
 // initial key (or after, if it does not exist).
diff --git a/tests/fuzzers/bn256/bn256_fuzz.go b/tests/fuzzers/bn256/bn256_fuzz.go
index 030ac19b3f527..11fd9e18df00d 100644
--- a/tests/fuzzers/bn256/bn256_fuzz.go
+++ b/tests/fuzzers/bn256/bn256_fuzz.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be found
 // in the LICENSE file.
 
+//go:build gofuzz
 // +build gofuzz
 
 package bn256
diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go
index e73ef4851a9c2..6beec7c802780 100644
--- a/tests/fuzzers/stacktrie/trie_fuzzer.go
+++ b/tests/fuzzers/stacktrie/trie_fuzzer.go
@@ -66,6 +66,7 @@ func (s *spongeDb) Has(key []byte) (bool, error)             { panic("implement
 func (s *spongeDb) Get(key []byte) ([]byte, error)           { return nil, errors.New("no such elem") }
 func (s *spongeDb) Delete(key []byte) error                  { panic("implement me") }
 func (s *spongeDb) NewBatch() ethdb.Batch                    { return &spongeBatch{s} }
+func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch    { return &spongeBatch{s} }
 func (s *spongeDb) Stat(property string) (string, error)     { panic("implement me") }
 func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
 func (s *spongeDb) Close() error                             { return nil }
diff --git a/trie/iterator_test.go b/trie/iterator_test.go
index 95cafdd3bdceb..8540fc8f2dc20 100644
--- a/trie/iterator_test.go
+++ b/trie/iterator_test.go
@@ -470,6 +470,10 @@ func (l *loggingDb) NewBatch() ethdb.Batch {
 	return l.backend.NewBatch()
 }
 
+func (l *loggingDb) NewBatchWithSize(size int) ethdb.Batch {
+	return l.backend.NewBatchWithSize(size)
+}
+
 func (l *loggingDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
 	fmt.Printf("NewIterator\n")
 	return l.backend.NewIterator(prefix, start)
diff --git a/trie/trie_test.go b/trie/trie_test.go
index be0df8a544263..3097c67f0d915 100644
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@@ -675,6 +675,7 @@ func (s *spongeDb) Has(key []byte) (bool, error)             { panic("implement
 func (s *spongeDb) Get(key []byte) ([]byte, error)           { return nil, errors.New("no such elem") }
 func (s *spongeDb) Delete(key []byte) error                  { panic("implement me") }
 func (s *spongeDb) NewBatch() ethdb.Batch                    { return &spongeBatch{s} }
+func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch    { return &spongeBatch{s} }
 func (s *spongeDb) Stat(property string) (string, error)     { panic("implement me") }
 func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
 func (s *spongeDb) Close() error                             { return nil }