diff --git a/backuptar/tar.go b/backuptar/tar.go index cb461ca3..689e4da6 100644 --- a/backuptar/tar.go +++ b/backuptar/tar.go @@ -5,7 +5,6 @@ package backuptar import ( "archive/tar" "encoding/base64" - "errors" "fmt" "io" "io/ioutil" @@ -42,19 +41,14 @@ const ( hdrCreationTime = "LIBARCHIVE.creationtime" ) -func writeZeroes(w io.Writer, count int64) error { - buf := make([]byte, 8192) - c := len(buf) - for i := int64(0); i < count; i += int64(c) { - if int64(c) > count-i { - c = int(count - i) - } - _, err := w.Write(buf[:c]) - if err != nil { - return err - } +// zeroReader is an io.Reader that always returns 0s. +type zeroReader struct{} + +func (zr zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 } - return nil + return len(b), nil } func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error { @@ -71,16 +65,26 @@ func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error { return fmt.Errorf("unexpected stream %d", bhdr.Id) } + // We can't seek backwards, since we have already written that data to the tar.Writer. + if bhdr.Offset < curOffset { + return fmt.Errorf("cannot seek back from %d to %d", curOffset, bhdr.Offset) + } // archive/tar does not support writing sparse files // so just write zeroes to catch up to the current offset. - err = writeZeroes(t, bhdr.Offset-curOffset) + if _, err := io.CopyN(t, zeroReader{}, bhdr.Offset-curOffset); err != nil { + return fmt.Errorf("seek to offset %d: %s", bhdr.Offset, err) + } if bhdr.Size == 0 { + // A sparse block with size = 0 is used to mark the end of the sparse blocks. break } n, err := io.Copy(t, br) if err != nil { return err } + if n != bhdr.Size { + return fmt.Errorf("copied %d bytes instead of %d at offset %d", n, bhdr.Size, bhdr.Offset) + } curOffset = bhdr.Offset + n } return nil @@ -221,20 +225,44 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size } } + // The logic for copying file contents is fairly complicated due to the need for handling sparse files, + // and the weird ways they are represented by BackupRead. A normal file will always either have a data stream + // with size and content, or no data stream at all (if empty). However, for a sparse file, the content can also + // be represented using a series of sparse block streams following the data stream. Additionally, the way sparse + // files are handled by BackupRead has changed in the OS recently. The specifics of the representation are described + // in the list at the bottom of this block comment. + // + // Sparse files can be represented in four different ways, based on the specifics of the file. + // - Size = 0: + // Previously: BackupRead yields no data stream and no sparse block streams. + // Recently: BackupRead yields a data stream with size = 0. There are no following sparse block streams. + // - Size > 0, no allocated ranges: + // BackupRead yields a data stream with size = 0. Following is a single sparse block stream with + // size = 0 and offset = . + // - Size > 0, one allocated range: + // BackupRead yields a data stream with size = containing the file contents. There are no + // sparse block streams. This is the case if you take a normal file with contents and simply set the + // sparse flag on it. + // - Size > 0, multiple allocated ranges: + // BackupRead yields a data stream with size = 0. Following are sparse block streams for each allocated + // range of the file containing the range contents. Finally there is a sparse block stream with + // size = 0 and offset = . + if dataHdr != nil { // A data stream was found. Copy the data. - if (dataHdr.Attributes & winio.StreamSparseAttributes) == 0 { + // We assume that we will either have a data stream size > 0 XOR have sparse block streams. + if dataHdr.Size > 0 || (dataHdr.Attributes&winio.StreamSparseAttributes) == 0 { if size != dataHdr.Size { return fmt.Errorf("%s: mismatch between file size %d and header size %d", name, size, dataHdr.Size) } - _, err = io.Copy(t, br) - if err != nil { - return err + if _, err = io.Copy(t, br); err != nil { + return fmt.Errorf("%s: copying contents from data stream: %s", name, err) } - } else { - err = copySparse(t, br) - if err != nil { - return err + } else if size > 0 { + // As of a recent OS change, BackupRead now returns a data stream for empty sparse files. + // These files have no sparse block streams, so skip the copySparse call if file size = 0. + if err = copySparse(t, br); err != nil { + return fmt.Errorf("%s: copying contents from sparse block stream: %s", name, err) } } } @@ -279,7 +307,7 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size } else { // Unsupported for now, since the size of the alternate stream is not present // in the backup stream until after the data has been read. - return errors.New("tar of sparse alternate data streams is unsupported") + return fmt.Errorf("%s: tar of sparse alternate data streams is unsupported", name) } case winio.BackupEaData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData: // ignore these streams diff --git a/backuptar/tar_test.go b/backuptar/tar_test.go index 8be0bf34..cc2cbf09 100644 --- a/backuptar/tar_test.go +++ b/backuptar/tar_test.go @@ -5,6 +5,7 @@ package backuptar import ( "archive/tar" "bytes" + "io" "io/ioutil" "os" "path/filepath" @@ -12,6 +13,7 @@ import ( "testing" "github.com/Microsoft/go-winio" + "golang.org/x/sys/windows" ) func ensurePresent(t *testing.T, m map[string]string, keys ...string) { @@ -22,65 +24,197 @@ func ensurePresent(t *testing.T, m map[string]string, keys ...string) { } } -func TestRoundTrip(t *testing.T) { - f, err := ioutil.TempFile("", "tst") - if err != nil { - t.Fatal(err) - } - defer f.Close() - defer os.Remove(f.Name()) - - if _, err = f.Write([]byte("testing 1 2 3\n")); err != nil { - t.Fatal(err) - } - - if _, err = f.Seek(0, 0); err != nil { +func setSparse(t *testing.T, f *os.File) { + const FSCTL_SET_SPARSE uint32 = 0x900c4 + if err := windows.DeviceIoControl(windows.Handle(f.Fd()), FSCTL_SET_SPARSE, nil, 0, nil, 0, nil, nil); err != nil { t.Fatal(err) } +} - fi, err := f.Stat() - if err != nil { - t.Fatal(err) +// compareReaders validates that two readers contain the exact same data. +func compareReaders(t *testing.T, rActual io.Reader, rExpected io.Reader) { + const size = 8 * 1024 + var bufExpected, bufActual [size]byte + var readCount int64 + // Loop, first reading from rExpected, then reading the same amount from rActual. + // For each set of reads, compare the bytes to make sure they are identical. + // When we run out of data in rExpected, exit the loop. + for { + // Do a read from rExpected and see how many bytes we get. + nExpected, err := rExpected.Read(bufExpected[:]) + if err == io.EOF && nExpected == 0 { + break + } else if err != nil && err != io.EOF { + t.Fatalf("Failed reading from rExpected at %d: %s", readCount, err) + } + // Do a ReadFull from rActual for the same number of bytes we got from rExpected. + if nActual, err := io.ReadFull(rActual, bufActual[:nExpected]); err != nil { + t.Fatalf("Only read %d bytes out of %d from rActual at %d: %s", nActual, nExpected, readCount, err) + } + readCount += int64(nExpected) + for i, bExpected := range bufExpected[:nExpected] { + if bExpected != bufActual[i] { + t.Fatalf("Mismatched bytes at %d. got 0x%x, expected 0x%x", i, bufActual[i], bExpected) + } + } } - - bi, err := winio.GetFileBasicInfo(f) - if err != nil { - t.Fatal(err) - } - - br := winio.NewBackupFileReader(f, true) - defer br.Close() - - var buf bytes.Buffer - tw := tar.NewWriter(&buf) - - err = WriteTarFileFromBackupStream(tw, br, f.Name(), fi.Size(), bi) - if err != nil { - t.Fatal(err) + // Now we just need to make sure there isn't any further data in rActual. + var b [1]byte + if n, err := rActual.Read(b[:]); n != 0 || err != io.EOF { + t.Fatalf("rActual didn't return EOF at expected end. Read %d bytes with error %s", n, err) } +} - tr := tar.NewReader(&buf) - hdr, err := tr.Next() - if err != nil { - t.Fatal(err) +func TestRoundTrip(t *testing.T) { + // Each test case is a name mapped to a function which must create a file and return its path. + // The test then round-trips that file through backuptar, and validates the output matches the input. + for name, setup := range map[string]func(*testing.T) string{ + "normalFile": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + if err := ioutil.WriteFile(path, []byte("testing 1 2 3\n"), 0644); err != nil { + t.Fatal(err) + } + return path + }, + "normalFileEmpty": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + return path + }, + "sparseFileEmpty": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + setSparse(t, f) + return path + }, + "sparseFileWithNoAllocatedRanges": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + setSparse(t, f) + // Set file size without writing data to produce a file with size > 0 + // but no allocated ranges. + if err := f.Truncate(1000000); err != nil { + t.Fatal(err) + } + return path + }, + "sparseFileWithOneAllocatedRange": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + setSparse(t, f) + if _, err := f.WriteString("test sparse data"); err != nil { + t.Fatal(err) + } + return path + }, + "sparseFileWithMultipleAllocatedRanges": func(t *testing.T) string { + path := filepath.Join(t.TempDir(), "foo.txt") + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + defer f.Close() + setSparse(t, f) + if _, err = f.Write([]byte("testing 1 2 3\n")); err != nil { + t.Fatal(err) + } + // The documentation talks about FSCTL_SET_ZERO_DATA, but seeking also + // seems to create a hole. + if _, err = f.Seek(1000000, 0); err != nil { + t.Fatal(err) + } + if _, err = f.Write([]byte("more data later\n")); err != nil { + t.Fatal(err) + } + return path + }, + } { + t.Run(name, func(t *testing.T) { + path := setup(t) + f, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + t.Fatal(err) + } + bi, err := winio.GetFileBasicInfo(f) + if err != nil { + t.Fatal(err) + } + + br := winio.NewBackupFileReader(f, true) + defer br.Close() + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + err = WriteTarFileFromBackupStream(tw, br, f.Name(), fi.Size(), bi) + if err != nil { + t.Fatal(err) + } + tr := tar.NewReader(&buf) + hdr, err := tr.Next() + if err != nil { + t.Fatal(err) + } + + name, size, bi2, err := FileInfoFromHeader(hdr) + if err != nil { + t.Fatal(err) + } + if name != filepath.ToSlash(f.Name()) { + t.Errorf("got name %s, expected %s", name, filepath.ToSlash(f.Name())) + } + if size != fi.Size() { + t.Errorf("got size %d, expected %d", size, fi.Size()) + } + if !reflect.DeepEqual(*bi2, *bi) { + t.Errorf("got %#v, expected %#v", *bi2, *bi) + } + ensurePresent(t, hdr.PAXRecords, "MSWINDOWS.fileattr", "MSWINDOWS.rawsd") + // Reset file position so we can compare file contents. + // The file contents of the actual file should match what we get from the tar. + if _, err := f.Seek(0, 0); err != nil { + t.Fatal(err) + } + compareReaders(t, tr, f) + }) } +} - name, size, bi2, err := FileInfoFromHeader(hdr) +func TestZeroReader(t *testing.T) { + const size = 512 + var b [size]byte + var bExpected [size]byte + var r zeroReader + n, err := r.Read(b[:]) if err != nil { - t.Fatal(err) - } - - if name != filepath.ToSlash(f.Name()) { - t.Errorf("got name %s, expected %s", name, filepath.ToSlash(f.Name())) + t.Fatalf("Unexpected read error: %s", err) } - - if size != fi.Size() { - t.Errorf("got size %d, expected %d", size, fi.Size()) + if n != size { + t.Errorf("Wrong read size. got %d, expected %d", n, size) } - - if !reflect.DeepEqual(*bi2, *bi) { - t.Errorf("got %#v, expected %#v", *bi2, *bi) + for i := range b { + if b[i] != bExpected[i] { + t.Errorf("Wrong content at index %d. got %d, expected %d", i, b[i], bExpected[i]) + } } - - ensurePresent(t, hdr.PAXRecords, "MSWINDOWS.fileattr", "MSWINDOWS.rawsd") }