From f1d588dc6211f319d373e47c3a4cb4879912387f Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 24 Aug 2021 09:00:10 +0200 Subject: [PATCH] core/vm: faster code analysis (#23381) * core/vm: more detailed benchmark for jumpdest analysis * core/vm: make jd analysis benchmark alloc free * core/vm: improve jumpdest analysis * core/vm: improve worst-case * core/vm: further improvements in analysis * core/vm: improve jumpdest analysis >PUSH15 * core/vm: make jd analysis ref by value * core/vm: fix misspell * core/vm: improve set8 and set16 a bit * core/vm: reduce amount of code * core/vm: optimize byte copying --- core/vm/analysis.go | 94 +++++++++++++++++++++++++++++++++------- core/vm/analysis_test.go | 24 +++++++++- core/vm/instructions.go | 4 ++ core/vm/interpreter.go | 2 +- 4 files changed, 105 insertions(+), 19 deletions(-) diff --git a/core/vm/analysis.go b/core/vm/analysis.go index 0ccf47b97903a..449cded2a896c 100644 --- a/core/vm/analysis.go +++ b/core/vm/analysis.go @@ -16,17 +16,49 @@ package vm +const ( + set2BitsMask = uint16(0b1100_0000_0000_0000) + set3BitsMask = uint16(0b1110_0000_0000_0000) + set4BitsMask = uint16(0b1111_0000_0000_0000) + set5BitsMask = uint16(0b1111_1000_0000_0000) + set6BitsMask = uint16(0b1111_1100_0000_0000) + set7BitsMask = uint16(0b1111_1110_0000_0000) +) + // bitvec is a bit vector which maps bytes in a program. // An unset bit means the byte is an opcode, a set bit means // it's data (i.e. argument of PUSHxx). type bitvec []byte -func (bits *bitvec) set(pos uint64) { - (*bits)[pos/8] |= 0x80 >> (pos % 8) +var lookup = [8]byte{ + 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, +} + +func (bits bitvec) set1(pos uint64) { + bits[pos/8] |= lookup[pos%8] +} + +func (bits bitvec) setN(flag uint16, pos uint64) { + a := flag >> (pos % 8) + bits[pos/8] |= byte(a >> 8) + if b := byte(a); b != 0 { + // If the bit-setting affects the neighbouring byte, we can assign - no need to OR it, + // since it's the first write to that byte + bits[pos/8+1] = b + } +} + +func (bits bitvec) set8(pos uint64) { + a := byte(0xFF >> (pos % 8)) + bits[pos/8] |= a + bits[pos/8+1] = ^a } -func (bits *bitvec) set8(pos uint64) { - (*bits)[pos/8] |= 0xFF >> (pos % 8) - (*bits)[pos/8+1] |= ^(0xFF >> (pos % 8)) + +func (bits bitvec) set16(pos uint64) { + a := byte(0xFF >> (pos % 8)) + bits[pos/8] |= a + bits[pos/8+1] = 0xFF + bits[pos/8+2] = ^a } // codeSegment checks if the position is in a code segment. @@ -40,22 +72,52 @@ func codeBitmap(code []byte) bitvec { // ends with a PUSH32, the algorithm will push zeroes onto the // bitvector outside the bounds of the actual code. bits := make(bitvec, len(code)/8+1+4) + return codeBitmapInternal(code, bits) +} + +// codeBitmapInternal is the internal implementation of codeBitmap. +// It exists for the purpose of being able to run benchmark tests +// without dynamic allocations affecting the results. +func codeBitmapInternal(code, bits bitvec) bitvec { for pc := uint64(0); pc < uint64(len(code)); { op := OpCode(code[pc]) - - if op >= PUSH1 && op <= PUSH32 { - numbits := op - PUSH1 + 1 - pc++ + pc++ + if op < PUSH1 || op > PUSH32 { + continue + } + numbits := op - PUSH1 + 1 + if numbits >= 8 { + for ; numbits >= 16; numbits -= 16 { + bits.set16(pc) + pc += 16 + } for ; numbits >= 8; numbits -= 8 { - bits.set8(pc) // 8 + bits.set8(pc) pc += 8 } - for ; numbits > 0; numbits-- { - bits.set(pc) - pc++ - } - } else { - pc++ + } + switch numbits { + case 1: + bits.set1(pc) + pc += 1 + case 2: + bits.setN(set2BitsMask, pc) + pc += 2 + case 3: + bits.setN(set3BitsMask, pc) + pc += 3 + case 4: + bits.setN(set4BitsMask, pc) + pc += 4 + case 5: + bits.setN(set5BitsMask, pc) + pc += 5 + case 6: + bits.setN(set6BitsMask, pc) + pc += 6 + case 7: + bits.setN(set7BitsMask, pc) + pc += 7 } } return bits diff --git a/core/vm/analysis_test.go b/core/vm/analysis_test.go index fd2d744d87f49..585bb3097f44a 100644 --- a/core/vm/analysis_test.go +++ b/core/vm/analysis_test.go @@ -47,10 +47,10 @@ func TestJumpDestAnalysis(t *testing.T) { {[]byte{byte(PUSH32)}, 0xFF, 1}, {[]byte{byte(PUSH32)}, 0xFF, 2}, } - for _, test := range tests { + for i, test := range tests { ret := codeBitmap(test.code) if ret[test.which] != test.exp { - t.Fatalf("expected %x, got %02x", test.exp, ret[test.which]) + t.Fatalf("test %d: expected %x, got %02x", i, test.exp, ret[test.which]) } } } @@ -73,3 +73,23 @@ func BenchmarkJumpdestHashing_1200k(bench *testing.B) { } bench.StopTimer() } + +func BenchmarkJumpdestOpAnalysis(bench *testing.B) { + var op OpCode + bencher := func(b *testing.B) { + code := make([]byte, 32*b.N) + for i := range code { + code[i] = byte(op) + } + bits := make(bitvec, len(code)/8+1+4) + b.ResetTimer() + codeBitmapInternal(code, bits) + } + for op = PUSH1; op <= PUSH32; op++ { + bench.Run(op.String(), bencher) + } + op = JUMPDEST + bench.Run(op.String(), bencher) + op = STOP + bench.Run(op.String(), bencher) +} diff --git a/core/vm/instructions.go b/core/vm/instructions.go index f7ef2f900e202..6c8c6e6e6fa1b 100644 --- a/core/vm/instructions.go +++ b/core/vm/instructions.go @@ -669,6 +669,7 @@ func opCall(pc *uint64, interpreter *EVMInterpreter, scope *ScopeContext) ([]byt } stack.push(&temp) if err == nil || err == ErrExecutionReverted { + ret = common.CopyBytes(ret) scope.Memory.Set(retOffset.Uint64(), retSize.Uint64(), ret) } scope.Contract.Gas += returnGas @@ -703,6 +704,7 @@ func opCallCode(pc *uint64, interpreter *EVMInterpreter, scope *ScopeContext) ([ } stack.push(&temp) if err == nil || err == ErrExecutionReverted { + ret = common.CopyBytes(ret) scope.Memory.Set(retOffset.Uint64(), retSize.Uint64(), ret) } scope.Contract.Gas += returnGas @@ -730,6 +732,7 @@ func opDelegateCall(pc *uint64, interpreter *EVMInterpreter, scope *ScopeContext } stack.push(&temp) if err == nil || err == ErrExecutionReverted { + ret = common.CopyBytes(ret) scope.Memory.Set(retOffset.Uint64(), retSize.Uint64(), ret) } scope.Contract.Gas += returnGas @@ -757,6 +760,7 @@ func opStaticCall(pc *uint64, interpreter *EVMInterpreter, scope *ScopeContext) } stack.push(&temp) if err == nil || err == ErrExecutionReverted { + ret = common.CopyBytes(ret) scope.Memory.Set(retOffset.Uint64(), retSize.Uint64(), ret) } scope.Contract.Gas += returnGas diff --git a/core/vm/interpreter.go b/core/vm/interpreter.go index 9cf0c4e2c1e07..9fb83799c98c8 100644 --- a/core/vm/interpreter.go +++ b/core/vm/interpreter.go @@ -262,7 +262,7 @@ func (in *EVMInterpreter) Run(contract *Contract, input []byte, readOnly bool) ( // if the operation clears the return data (e.g. it has returning data) // set the last return to the result of the operation. if operation.returns { - in.returnData = common.CopyBytes(res) + in.returnData = res } switch {