flate: Inline literal emission (#509)

~2% faster. Emit long matches as length 255, since it saves bits.
klauspost · Mar 1, 2022 · 84e0164 · 84e0164
1 parent bf9102c
commit 84e0164
Show file tree

Hide file tree

Showing 9 changed files with 79 additions and 15 deletions.
diff --git a/flate/deflate_test.go b/flate/deflate_test.go
@@ -157,7 +157,8 @@ func TestVeryLongSparseChunk(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping sparse chunk during short test")
 	}
-	w, err := NewWriter(ioutil.Discard, 1)
+	var buf bytes.Buffer
+	w, err := NewWriter(&buf, 1)
 	if err != nil {
 		t.Errorf("NewWriter: %v", err)
 		return
@@ -166,6 +167,7 @@ func TestVeryLongSparseChunk(t *testing.T) {
 		t.Errorf("Compress failed: %v", err)
 		return
 	}
+	t.Log("Length:", buf.Len())
 }
 
 type syncBuffer struct {

diff --git a/flate/level1.go b/flate/level1.go
@@ -154,7 +154,15 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 				l++
 			}
 			if nextEmit < s {
-				emitLiteral(dst, src[nextEmit:s])
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
 			}
 
 			// Save the match found
@@ -169,8 +177,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 				for xlength > 0 {
 					xl := xlength
 					if xl > 258 {
-						// We need to have at least baseMatchLength left over for next loop.
-						xl = 258 - baseMatchLength
+						if xl > 258+baseMatchLength {
+							xl = 258
+						} else {
+							xl = 258 - baseMatchLength
+						}
 					}
 					xlength -= xl
 					xl -= baseMatchLength

diff --git a/flate/level2.go b/flate/level2.go
@@ -134,7 +134,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 				l++
 			}
 			if nextEmit < s {
-				emitLiteral(dst, src[nextEmit:s])
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
 			}
 
 			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))

diff --git a/flate/level3.go b/flate/level3.go
@@ -143,7 +143,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 				l++
 			}
 			if nextEmit < s {
-				emitLiteral(dst, src[nextEmit:s])
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
 			}
 
 			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))

diff --git a/flate/level4.go b/flate/level4.go
@@ -135,7 +135,15 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			l++
 		}
 		if nextEmit < s {
-			emitLiteral(dst, src[nextEmit:s])
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
 		}
 		if debugDeflate {
 			if t >= s {

diff --git a/flate/level5.go b/flate/level5.go
@@ -210,7 +210,15 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			l++
 		}
 		if nextEmit < s {
-			emitLiteral(dst, src[nextEmit:s])
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
 		}
 		if debugDeflate {
 			if t >= s {

diff --git a/flate/level6.go b/flate/level6.go
@@ -243,7 +243,15 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			l++
 		}
 		if nextEmit < s {
-			emitLiteral(dst, src[nextEmit:s])
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
 		}
 		if false {
 			if t >= s {

diff --git a/flate/stateless.go b/flate/stateless.go
@@ -249,7 +249,15 @@ func statelessEnc(dst *tokens, src []byte, startAt int16) {
 				l++
 			}
 			if nextEmit < s {
-				emitLiteral(dst, src[nextEmit:s])
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
 			}
 
 			// Save the match found

diff --git a/flate/token.go b/flate/token.go
@@ -195,12 +195,11 @@ func (t *tokens) indexTokens(in []token) {
 
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 func emitLiteral(dst *tokens, lit []byte) {
-	ol := int(dst.n)
-	for i, v := range lit {
-		dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
+	for _, v := range lit {
+		dst.tokens[dst.n] = token(v)
 		dst.litHist[v]++
+		dst.n++
 	}
-	dst.n += uint16(len(lit))
 }
 
 func (t *tokens) AddLiteral(lit byte) {
@@ -294,7 +293,11 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
 		xl := xlength
 		if xl > 258 {
 			// We need to have at least baseMatchLength left over for next loop.
-			xl = 258 - baseMatchLength
+			if xl > 258+baseMatchLength {
+				xl = 258
+			} else {
+				xl = 258 - baseMatchLength
+			}
 		}
 		xlength -= xl
 		xl -= baseMatchLength