Skip to content

Commit

Permalink
zlib: More precise matching (#386)
Browse files Browse the repository at this point in the history
* demo bug

* check 2 bytes of zlib header

* add .zz test
  • Loading branch information
dpgarrick committed Sep 12, 2023
1 parent 9f827e1 commit 24fa33e
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
35 changes: 35 additions & 0 deletions formats_test.go
Expand Up @@ -3,6 +3,7 @@ package archiver
import (
"bytes"
"context"
"errors"
"io"
"io/fs"
"math/rand"
Expand Down Expand Up @@ -370,6 +371,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) {
compressorName: "",
wantFormatName: ".rar",
},
{
name: "should recognize zz",
openCompressionWriter: Zlib{}.OpenWriter,
content: []byte("this is text"),
compressorName: ".zz",
wantFormatName: ".zz",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -410,3 +418,30 @@ func TestIdentifyAndOpenZip(t *testing.T) {
})
checkErr(t, err, "extracting zip")
}

func TestIdentifyASCIIFileStartingWithX(t *testing.T) {
// Create a temporary file starting with the letter 'x'
tmpFile, err := os.CreateTemp("", "TestIdentifyASCIIFileStartingWithX-tmp-*.txt")
if err != nil {
t.Fatalf("fail to create tmp test file for archive tests: err=%v", err)
}

_, err = tmpFile.Write([]byte("xThis is a test file"))
if err != nil {
t.Fatalf("Failed to write to temp file: %v", err)
}
tmpFile.Close()

// Open the file and use the Identify function
file, err := os.Open(tmpFile.Name())
if err != nil {
t.Fatalf("Failed to open temp file: %v", err)
}
defer file.Close()

_, _, err = Identify(tmpFile.Name(), file)
if !errors.Is(err, ErrNoMatch) {
t.Fatalf("Identify failed: %v", err)
}

}
30 changes: 25 additions & 5 deletions zlib.go
@@ -1,7 +1,6 @@
package archiver

import (
"bytes"
"io"
"strings"

Expand All @@ -28,11 +27,13 @@ func (zz Zlib) Match(filename string, stream io.Reader) (MatchResult, error) {
}

// match file header
buf, err := readAtMost(stream, len(ZlibHeader))
if err != nil {
buf, err := readAtMost(stream, 2)
// If an error occurred or buf is not 2 bytes we can't check the header
if err != nil || len(buf) < 2 {
return mr, err
}
mr.ByStream = bytes.Equal(buf, ZlibHeader)

mr.ByStream = isValidZlibHeader(buf[0], buf[1])

return mr, nil
}
Expand All @@ -49,4 +50,23 @@ func (Zlib) OpenReader(r io.Reader) (io.ReadCloser, error) {
return zlib.NewReader(r)
}

var ZlibHeader = []byte{0x78}
func isValidZlibHeader(first, second byte) bool {
// Define all 32 valid zlib headers, see https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like/54915442#54915442
validHeaders := map[uint16]struct{}{
0x081D: {}, 0x085B: {}, 0x0899: {}, 0x08D7: {},
0x1819: {}, 0x1857: {}, 0x1895: {}, 0x18D3: {},
0x2815: {}, 0x2853: {}, 0x2891: {}, 0x28CF: {},
0x3811: {}, 0x384F: {}, 0x388D: {}, 0x38CB: {},
0x480D: {}, 0x484B: {}, 0x4889: {}, 0x48C7: {},
0x5809: {}, 0x5847: {}, 0x5885: {}, 0x58C3: {},
0x6805: {}, 0x6843: {}, 0x6881: {}, 0x68DE: {},
0x7801: {}, 0x785E: {}, 0x789C: {}, 0x78DA: {},
}

// Combine the first and second bytes into a single 16-bit, big-endian value
header := uint16(first)<<8 | uint16(second)

// Check if the header is in the map of valid headers
_, isValid := validHeaders[header]
return isValid
}

0 comments on commit 24fa33e

Please sign in to comment.