Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multistream support to BZip2 #417

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/ICSharpCode.SharpZipLib/BZip2/BZip2.cs
Expand Up @@ -15,7 +15,8 @@ public static class BZip2
/// <param name="inStream">The readable stream containing data to decompress.</param>
/// <param name="outStream">The output stream to receive the decompressed data.</param>
/// <param name="isStreamOwner">Both streams are closed on completion if true.</param>
public static void Decompress(Stream inStream, Stream outStream, bool isStreamOwner)
/// <param name="multiStream">Whether to attempt to continue reading after stream end.</param>
public static void Decompress(Stream inStream, Stream outStream, bool isStreamOwner, bool multiStream)
{
if (inStream == null || outStream == null)
{
Expand All @@ -24,7 +25,7 @@ public static void Decompress(Stream inStream, Stream outStream, bool isStreamOw

try
{
using (BZip2InputStream bzipInput = new BZip2InputStream(inStream))
using (BZip2InputStream bzipInput = new BZip2InputStream(inStream, multiStream))
{
bzipInput.IsStreamOwner = isStreamOwner;
Core.StreamUtils.Copy(bzipInput, outStream, new byte[4096]);
Expand All @@ -40,6 +41,16 @@ public static void Decompress(Stream inStream, Stream outStream, bool isStreamOw
}
}

/// <summary>
/// Decompress the <paramref name="inStream">input</paramref> writing
/// uncompressed data to the <paramref name="outStream">output stream</paramref>
/// </summary>
/// <param name="inStream">The readable stream containing data to decompress.</param>
/// <param name="outStream">The output stream to receive the decompressed data.</param>
/// <param name="isStreamOwner">Both streams are closed on completion if true.</param>
public static void Decompress(Stream inStream, Stream outStream, bool isStreamOwner)
=> Decompress(inStream, outStream, isStreamOwner, true);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
=> Decompress(inStream, outStream, isStreamOwner, true);
=> Decompress(inStream, outStream, isStreamOwner, inStream.CanSeek);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not certain if there aren't any streams that support seeking but don't allow access to their length, but maybe that's too much of an unusual situation to worry about, and as long as the functionality can be disabled that should cover the situation if needed.

Copy link
Member Author

@piksel piksel Mar 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if seeking can even be achieved if you don't allow Length (since the SeekOrigin parameter of Seek() call can be set to End.
The Stream documentation basically thinks of them as mutually inclusive:
https://docs.microsoft.com/en-us/dotnet/standard/io/?view=netframework-4.8#streams

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's why I said too unusual to worry about (it might not make sense, but that doesn't guarantee it won't happen!).
Not a reason to avoid making the change though


/// <summary>
/// Compress the <paramref name="inStream">input stream</paramref> sending
/// result data to <paramref name="outStream">output stream</paramref>
Expand Down
16 changes: 12 additions & 4 deletions src/ICSharpCode.SharpZipLib/BZip2/BZip2Constants.cs
Expand Up @@ -3,7 +3,7 @@ namespace ICSharpCode.SharpZipLib.BZip2
/// <summary>
/// Defines internal values for both compression and decompression
/// </summary>
internal sealed class BZip2Constants
internal static class BZip2Constants
{
/// <summary>
/// Random numbers used to randomise repetitive blocks
Expand Down Expand Up @@ -114,8 +114,16 @@ internal sealed class BZip2Constants
/// </summary>
public const int OvershootBytes = 20;

private BZip2Constants()
{
}
/// <summary>
/// 48 bit magic number used to identify stream footer
/// </summary>
/// <remarks>Square root of Pi in BCD</remarks>
public const ulong MagicFooter = 0x177245385090UL;

/// <summary>
/// 48 bit magic number used to identify stream header
/// </summary>
/// <remarks>Pi in BCD</remarks>
public const ulong MagicHeader = 0x314159265359UL;
}
}
73 changes: 50 additions & 23 deletions src/ICSharpCode.SharpZipLib/BZip2/BZip2InputStream.cs
Expand Up @@ -87,16 +87,22 @@ during decompression.
private int i2, j2;
private byte z;

private bool multiStream;
private long processedLastStream;

#endregion Instance Fields

/// <summary>
/// Construct instance for reading from stream
/// </summary>
/// <param name="stream">Data source</param>
public BZip2InputStream(Stream stream)
/// <param name="multiStream">Whether to continue reading streams after the first one</param>
public BZip2InputStream(Stream stream, bool multiStream)
{
if (stream == null)
throw new ArgumentNullException(nameof(stream));
baseStream = stream ?? throw new ArgumentNullException(nameof(stream));
this.multiStream = multiStream;


// init arrays
for (int i = 0; i < BZip2Constants.GroupCount; ++i)
{
Expand All @@ -105,14 +111,16 @@ public BZip2InputStream(Stream stream)
perm[i] = new int[BZip2Constants.MaximumAlphaSize];
}

baseStream = stream;
bsLive = 0;
bsBuff = 0;

Initialize();
InitBlock();
SetupBlock();
}

/// <summary>
/// Construct instance for reading from stream
/// </summary>
/// <param name="stream">Data source</param>
public BZip2InputStream(Stream stream) : this(stream, true) { }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public BZip2InputStream(Stream stream) : this(stream, true) { }
public BZip2InputStream(Stream stream) : this(stream, stream.CanSeek) { }


/// <summary>
/// Get/set flag indicating ownership of underlying stream.
/// When the flag is true <see cref="Stream.Dispose()" /> will close the underlying stream also.
Expand Down Expand Up @@ -286,7 +294,17 @@ public override int ReadByte()
{
if (streamEnd)
{
return -1; // ok
long bytesProcessed = baseStream.Position - processedLastStream;
long bytesLeft = baseStream.Length - baseStream.Position;
if (multiStream && bytesProcessed > 0 && bytesLeft > 0)
{
processedLastStream = bytesProcessed;
Initialize();
}
else
{
return -1; // ok
}
Comment on lines +297 to +307
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
long bytesProcessed = baseStream.Position - processedLastStream;
long bytesLeft = baseStream.Length - baseStream.Position;
if (multiStream && bytesProcessed > 0 && bytesLeft > 0)
{
processedLastStream = bytesProcessed;
Initialize();
}
else
{
return -1; // ok
}
if(!multiStream) {
return -1; // no multistream support, so just end it here
}
long bytesProcessed = baseStream.Position - processedLastStream;
long bytesLeft = baseStream.Length - baseStream.Position;
if (bytesProcessed > 0 && bytesLeft > 0)
{
processedLastStream = bytesProcessed;
Initialize();
}
else
{
return -1; // ok
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems reasonable

}

int retChar = currentChar;
Expand Down Expand Up @@ -334,38 +352,42 @@ private void MakeMaps()

private void Initialize()
{
bsLive = 0;
bsBuff = 0;

char magic1 = BsGetUChar();
char magic2 = BsGetUChar();

char magic3 = BsGetUChar();
char magic4 = BsGetUChar();
char version = BsGetUChar();
char blockSize = BsGetUChar();

if (magic1 != 'B' || magic2 != 'Z' || magic3 != 'h' || magic4 < '1' || magic4 > '9')
if (magic1 != 'B' || magic2 != 'Z' || version != 'h' || blockSize < '1' || blockSize > '9')
{
streamEnd = true;
return;
}
else
{
streamEnd = false;
SetDecompressStructureSizes(blockSize - '0');
computedCombinedCRC = 0;
}

SetDecompressStructureSizes(magic4 - '0');
computedCombinedCRC = 0;
InitBlock();
SetupBlock();
}

private void InitBlock()
{
char magic1 = BsGetUChar();
char magic2 = BsGetUChar();
char magic3 = BsGetUChar();
char magic4 = BsGetUChar();
char magic5 = BsGetUChar();
char magic6 = BsGetUChar();

if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90)
var streamMagic = BsGetStreamMagic();

if (streamMagic == BZip2Constants.MagicFooter)
{
Complete();
return;
}

if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59)
if (streamMagic != BZip2Constants.MagicHeader)
{
BadBlockHeader();
streamEnd = true;
Expand Down Expand Up @@ -452,6 +474,11 @@ private int BsGetIntVS(int numBits)
return BsR(numBits);
}

private ulong BsGetStreamMagic()
{
return ((ulong)BsR(24) << 24) | (uint)BsR(24);
}

private int BsGetInt32()
{
int result = BsR(8);
Expand Down
50 changes: 50 additions & 0 deletions test/ICSharpCode.SharpZipLib.Tests/BZip2/Bzip2Tests.cs
Expand Up @@ -3,6 +3,7 @@
using NUnit.Framework;
using System;
using System.IO;
using System.Text;

namespace ICSharpCode.SharpZipLib.Tests.BZip2
{
Expand Down Expand Up @@ -52,6 +53,55 @@ public void BasicRoundTrip()
}
}

/// <summary>
/// MultiStream support
/// </summary>
[Test]
[Category("BZip2")]
public void MultiStream()
{
var firstData = "first";
var secondData = "second";
using (var ms = new MemoryStream())
{
using (var outStreamA = new BZip2OutputStream(ms))
{
outStreamA.IsStreamOwner = false;
var data = Encoding.ASCII.GetBytes(firstData);
outStreamA.Write(data, 0, data.Length);
}

using (var outStreamB = new BZip2OutputStream(ms))
{
outStreamB.IsStreamOwner = false;
var data = Encoding.ASCII.GetBytes(secondData);
outStreamB.Write(data, 0, data.Length);
}

ms.Seek(0, SeekOrigin.Begin);

using (var inStream = new BZip2InputStream(ms, false))
using (var sr = new StreamReader(inStream, Encoding.ASCII, false, 1024, true))
{
inStream.IsStreamOwner = false;
var data = sr.ReadToEnd();
Assert.AreEqual(firstData, data, "When decompressing the data without multistream support, the output data did not match the contents of the first data stream");
}

ms.Seek(0, SeekOrigin.Begin);

using (var inStream = new BZip2InputStream(ms, true))
using (var sr = new StreamReader(inStream, Encoding.ASCII, false, 1024, true))
{
inStream.IsStreamOwner = false;
var data = sr.ReadToEnd();
Assert.AreEqual(firstData + secondData, data, "When decompressing the data with multistream support, the output data did not match the combined contents of input data streams");
}

}

}

/// <summary>
/// Check that creating an empty archive is handled ok
/// </summary>
Expand Down