From 0d8470dbf20ac0223d338036746e8425a89a96fe Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 7 Jul 2022 19:02:13 +0200 Subject: [PATCH 1/3] s2: Add Index header trim/restore Add `RemoveIndexHeaders` that will remove 20 header+trailer bytes for cases when storage can be relied upon. `RestoreIndexHeaders` will restore the index header+trailer so it can be loaded. --- s2/index.go | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ s2/index_test.go | 12 ++++++++++ 2 files changed, 71 insertions(+) diff --git a/s2/index.go b/s2/index.go index 7b24a0060b..16028554f5 100644 --- a/s2/index.go +++ b/s2/index.go @@ -533,3 +533,62 @@ func (i *Index) JSON() []byte { b, _ := json.MarshalIndent(x, "", " ") return b } + +// RemoveIndexHeaders will trim all headers and trailers from a given index. +// This is expected to save 20 bytes. +// These can be restored using RestoreIndexHeaders. +// This removes a layer of security, but is the most compact representation. +// Returns nil if headers contains errors. +// The returned slice references the provided slice. +func RemoveIndexHeaders(b []byte) []byte { + const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4 + if len(b) <= save { + return nil + } + if b[0] != ChunkTypeIndex { + return nil + } + chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16 + b = b[4:] + + // Validate we have enough... + if len(b) < chunkLen { + return nil + } + b = b[:chunkLen] + + if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) { + return nil + } + b = b[len(S2IndexHeader):] + if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) { + return nil + } + b = bytes.TrimSuffix(b, []byte(S2IndexTrailer)) + + if len(b) < 4 { + return nil + } + return b[:len(b)-4] +} + +// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders. +// No error checking is performed on the input. +func RestoreIndexHeaders(in []byte) []byte { + b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4) + b = append(b, ChunkTypeIndex, 0, 0, 0) + b = append(b, []byte(S2IndexHeader)...) + b = append(b, in...) + + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer))) + b = append(b, tmp[:4]...) + // Trailer + b = append(b, []byte(S2IndexTrailer)...) + + chunkLen := len(b) - skippableFrameHeader + b[1] = uint8(chunkLen >> 0) + b[2] = uint8(chunkLen >> 8) + b[3] = uint8(chunkLen >> 16) + return b +} diff --git a/s2/index_test.go b/s2/index_test.go index 9a43c2be74..faa6704ea5 100644 --- a/s2/index_test.go +++ b/s2/index_test.go @@ -2,6 +2,7 @@ package s2_test import ( "bytes" + "encoding/hex" "fmt" "io" "io/ioutil" @@ -123,6 +124,17 @@ func TestSeeking(t *testing.T) { t.Fatal(err) } + // Test trimming + slim := s2.RemoveIndexHeaders(index) + if slim == nil { + t.Error("Removing headers failed") + } + restored := s2.RestoreIndexHeaders(slim) + if !bytes.Equal(restored, index) { + t.Errorf("want %s, got %s", hex.EncodeToString(index), hex.EncodeToString(restored)) + } + t.Logf("Saved %d bytes", len(index)-len(slim)) + for _, skip := range testSizes { t.Run(fmt.Sprintf("noSeekSkip=%d", skip), func(t *testing.T) { dec := s2.NewReader(io.NopCloser(bytes.NewReader(compressed.Bytes()))) From b94784f8360954b5ce29f16d70aa6e44ccdf9f8e Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 8 Jul 2022 14:29:07 +0200 Subject: [PATCH 2/3] Add test. Return 0 bytes slice. --- s2/index.go | 4 ++++ s2/index_test.go | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/s2/index.go b/s2/index.go index 16028554f5..dd9ecfe718 100644 --- a/s2/index.go +++ b/s2/index.go @@ -574,7 +574,11 @@ func RemoveIndexHeaders(b []byte) []byte { // RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders. // No error checking is performed on the input. +// If a 0 length slice is sent, it is returned without modification. func RestoreIndexHeaders(in []byte) []byte { + if len(in) == 0 { + return in + } b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4) b = append(b, ChunkTypeIndex, 0, 0, 0) b = append(b, []byte(S2IndexHeader)...) diff --git a/s2/index_test.go b/s2/index_test.go index faa6704ea5..f9d662efa5 100644 --- a/s2/index_test.go +++ b/s2/index_test.go @@ -135,6 +135,17 @@ func TestSeeking(t *testing.T) { } t.Logf("Saved %d bytes", len(index)-len(slim)) + // Test trimming + slim := s2.RemoveIndexHeaders(index) + if slim == nil { + t.Error("Removing headers failed") + } + restored := s2.RestoreIndexHeaders(slim) + if !bytes.Equal(restored, index) { + t.Errorf("want %s, got %s", hex.EncodeToString(index), hex.EncodeToString(restored)) + } + t.Logf("Saved %d bytes, %d remain", len(index)-len(slim), len(slim)) + for _, skip := range testSizes { t.Run(fmt.Sprintf("noSeekSkip=%d", skip), func(t *testing.T) { dec := s2.NewReader(io.NopCloser(bytes.NewReader(compressed.Bytes()))) From fd08d81d802b4dcaa244c39e6dd6ac0df77025b5 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 8 Jul 2022 14:30:54 +0200 Subject: [PATCH 3/3] Remove duplicate merge. --- s2/index_test.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/s2/index_test.go b/s2/index_test.go index f9d662efa5..faa6704ea5 100644 --- a/s2/index_test.go +++ b/s2/index_test.go @@ -135,17 +135,6 @@ func TestSeeking(t *testing.T) { } t.Logf("Saved %d bytes", len(index)-len(slim)) - // Test trimming - slim := s2.RemoveIndexHeaders(index) - if slim == nil { - t.Error("Removing headers failed") - } - restored := s2.RestoreIndexHeaders(slim) - if !bytes.Equal(restored, index) { - t.Errorf("want %s, got %s", hex.EncodeToString(index), hex.EncodeToString(restored)) - } - t.Logf("Saved %d bytes, %d remain", len(index)-len(slim), len(slim)) - for _, skip := range testSizes { t.Run(fmt.Sprintf("noSeekSkip=%d", skip), func(t *testing.T) { dec := s2.NewReader(io.NopCloser(bytes.NewReader(compressed.Bytes())))