Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s2: Slightly faster non-assembly decompression #646

Merged
merged 2 commits into from Jul 25, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 23 additions & 11 deletions s2/decode_other.go
Expand Up @@ -28,6 +28,9 @@ func s2Decode(dst, src []byte) int {

// As long as we can read at least 5 bytes...
for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
Expand All @@ -38,14 +41,19 @@ func s2Decode(dst, src []byte) int {
s += 2
x = uint32(src[s-1])
case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
Expand All @@ -62,38 +70,42 @@ func s2Decode(dst, src []byte) int {

case tagCopy1:
s += 2
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
length = int(src[s]) + 4
s += 1
length = int(uint32(src[s-1])) + 4
case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)

case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}

if offset <= 0 || d < offset || length > len(dst)-d {
Expand Down