Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support decompressor plugins #427

Merged
merged 21 commits into from Feb 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/zip.rb
Expand Up @@ -4,6 +4,7 @@
require 'fileutils'
require 'stringio'
require 'zlib'
require 'zip/constants'
require 'zip/dos_time'
require 'zip/ioextras'
require 'rbconfig'
Expand All @@ -21,14 +22,14 @@
require 'zip/null_input_stream'
require 'zip/pass_thru_compressor'
require 'zip/pass_thru_decompressor'
require 'zip/crypto/decrypted_io'
require 'zip/crypto/encryption'
require 'zip/crypto/null_encryption'
require 'zip/crypto/traditional_encryption'
require 'zip/inflater'
require 'zip/deflater'
require 'zip/streamable_stream'
require 'zip/streamable_directory'
require 'zip/constants'
require 'zip/errors'

module Zip
Expand Down
52 changes: 52 additions & 0 deletions lib/zip/constants.rb
Expand Up @@ -60,4 +60,56 @@ module Zip
FSTYPE_MAC_OSX => 'Mac OS/X (Darwin)'.freeze,
FSTYPE_ATHEOS => 'AtheOS'.freeze
}.freeze

COMPRESSION_METHOD_STORE = 0
COMPRESSION_METHOD_SHRINK = 1
COMPRESSION_METHOD_REDUCE_1 = 2
COMPRESSION_METHOD_REDUCE_2 = 3
COMPRESSION_METHOD_REDUCE_3 = 4
COMPRESSION_METHOD_REDUCE_4 = 5
COMPRESSION_METHOD_IMPLODE = 6
# RESERVED = 7
COMPRESSION_METHOD_DEFLATE = 8
COMPRESSION_METHOD_DEFLATE_64 = 9
COMPRESSION_METHOD_PKWARE_DCLI = 10
# RESERVED = 11
COMPRESSION_METHOD_BZIP2 = 12
# RESERVED = 13
COMPRESSION_METHOD_LZMA = 14
# RESERVED = 15
COMPRESSION_METHOD_IBM_CMPSC = 16
# RESERVED = 17
COMPRESSION_METHOD_IBM_TERSE = 18
COMPRESSION_METHOD_IBM_LZ77 = 19
COMPRESSION_METHOD_JPEG = 96
COMPRESSION_METHOD_WAVPACK = 97
COMPRESSION_METHOD_PPMD = 98
COMPRESSION_METHOD_AES = 99

COMPRESSION_METHODS = {
COMPRESSION_METHOD_STORE => 'Store (no compression)',
COMPRESSION_METHOD_SHRINK => 'Shrink',
COMPRESSION_METHOD_REDUCE_1 => 'Reduce with compression factor 1',
COMPRESSION_METHOD_REDUCE_2 => 'Reduce with compression factor 2',
COMPRESSION_METHOD_REDUCE_3 => 'Reduce with compression factor 3',
COMPRESSION_METHOD_REDUCE_4 => 'Reduce with compression factor 4',
COMPRESSION_METHOD_IMPLODE => 'Implode',
# RESERVED = 7
COMPRESSION_METHOD_DEFLATE => 'Deflate',
COMPRESSION_METHOD_DEFLATE_64 => 'Deflate64(tm)',
COMPRESSION_METHOD_PKWARE_DCLI => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
# RESERVED = 11
COMPRESSION_METHOD_BZIP2 => 'BZIP2',
# RESERVED = 13
COMPRESSION_METHOD_LZMA => 'LZMA',
# RESERVED = 15
COMPRESSION_METHOD_IBM_CMPSC => 'IBM z/OS CMPSC Compression',
# RESERVED = 17
COMPRESSION_METHOD_IBM_TERSE => 'IBM TERSE (new)',
COMPRESSION_METHOD_IBM_LZ77 => 'IBM LZ77 z Architecture (PFS)',
COMPRESSION_METHOD_JPEG => 'JPEG variant',
COMPRESSION_METHOD_WAVPACK => 'WavPack compressed data',
COMPRESSION_METHOD_PPMD => 'PPMd version I, Rev 1',
COMPRESSION_METHOD_AES => 'AES encryption',
}.freeze
end
39 changes: 39 additions & 0 deletions lib/zip/crypto/decrypted_io.rb
@@ -0,0 +1,39 @@
module Zip
class DecryptedIo #:nodoc:all
CHUNK_SIZE = 32_768

def initialize(io, decrypter)
@io = io
@decrypter = decrypter
end

def read(length = nil, outbuf = '')
return ((length.nil? || length.zero?) ? "" : nil) if eof

while length.nil? || (buffer.bytesize < length)
break if input_finished?
buffer << produce_input
end

outbuf.replace(buffer.slice!(0...(length || output_buffer.bytesize)))
end

private

def eof
buffer.empty? && input_finished?
end

def buffer
@buffer ||= ''.dup
end

def input_finished?
@io.eof
end

def produce_input
@decrypter.decrypt(@io.read(CHUNK_SIZE))
end
end
end
20 changes: 19 additions & 1 deletion lib/zip/decompressor.rb
@@ -1,9 +1,27 @@
module Zip
class Decompressor #:nodoc:all
CHUNK_SIZE = 32_768
def initialize(input_stream)

def self.decompressor_classes
@decompressor_classes ||= {}
end

def self.register(compression_method, decompressor_class)
decompressor_classes[compression_method] = decompressor_class
end

def self.find_by_compression_method(compression_method)
decompressor_classes[compression_method]
end

attr_reader :input_stream
attr_reader :decompressed_size

def initialize(input_stream, decompressed_size = nil)
super()

@input_stream = input_stream
@decompressed_size = decompressed_size
end
end
end
Expand Down
8 changes: 8 additions & 0 deletions lib/zip/entry.rb
Expand Up @@ -72,6 +72,14 @@ def initialize(*args)
@extra = ::Zip::ExtraField.new(@extra.to_s) unless @extra.is_a?(::Zip::ExtraField)
end

def encrypted?
gp_flags & 1 == 1
end

def incomplete?
gp_flags & 8 == 8
end

def time
if @extra['UniversalTime']
@extra['UniversalTime'].mtime
Expand Down
1 change: 1 addition & 0 deletions lib/zip/errors.rb
Expand Up @@ -7,6 +7,7 @@ class EntryNameError < Error; end
class EntrySizeError < Error; end
class InternalError < Error; end
class GPFBit3Error < Error; end
class DecompressionError < Error; end

# Backwards compatibility with v1 (delete in v2)
ZipError = Error
Expand Down
2 changes: 1 addition & 1 deletion lib/zip/file.rb
Expand Up @@ -49,7 +49,7 @@ class File < CentralDirectory
MAX_SEGMENT_SIZE = 3_221_225_472
MIN_SEGMENT_SIZE = 65_536
DATA_BUFFER_SIZE = 8192
IO_METHODS = [:tell, :seek, :read, :close]
IO_METHODS = [:tell, :seek, :read, :eof, :close]

DEFAULT_OPTIONS = {
restore_ownership: false,
Expand Down
58 changes: 22 additions & 36 deletions lib/zip/inflater.rb
@@ -1,64 +1,50 @@
module Zip
class Inflater < Decompressor #:nodoc:all
def initialize(input_stream, decrypter = NullDecrypter.new)
super(input_stream)
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
@output_buffer = ''.dup
@has_returned_empty_string = false
@decrypter = decrypter
end
def initialize(*args)
super

def sysread(number_of_bytes = nil, buf = '')
readEverything = number_of_bytes.nil?
while readEverything || @output_buffer.bytesize < number_of_bytes
break if internal_input_finished?
@output_buffer << internal_produce_input(buf)
end
return value_when_finished if @output_buffer.bytesize == 0 && input_finished?
end_index = number_of_bytes.nil? ? @output_buffer.bytesize : number_of_bytes
@output_buffer.slice!(0...end_index)
@buffer = ''.dup
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
end

def produce_input
if @output_buffer.empty?
internal_produce_input
else
@output_buffer.slice!(0...(@output_buffer.length))
def read(length = nil, outbuf = '')
return ((length.nil? || length.zero?) ? "" : nil) if eof

while length.nil? || (@buffer.bytesize < length)
break if input_finished?
@buffer << produce_input
end

outbuf.replace(@buffer.slice!(0...(length || @buffer.bytesize)))
end

# to be used with produce_input, not read (as read may still have more data cached)
# is data cached anywhere other than @outputBuffer? the comment above may be wrong
def input_finished?
@output_buffer.empty? && internal_input_finished?
def eof
@buffer.empty? && input_finished?
end

alias :eof input_finished?
alias :eof? input_finished?
alias_method :eof?, :eof

private

def internal_produce_input(buf = '')
def produce_input
retried = 0
begin
@zlib_inflater.inflate(@decrypter.decrypt(@input_stream.read(Decompressor::CHUNK_SIZE, buf)))
@zlib_inflater.inflate(input_stream.read(Decompressor::CHUNK_SIZE))
rescue Zlib::BufError
raise if retried >= 5 # how many times should we retry?
retried += 1
retry
end
rescue Zlib::Error => e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
rescue Zlib::Error => e
rescue Zlib::Error

raise(::Zip::DecompressionError, 'zlib error while inflating')
end

def internal_input_finished?
def input_finished?
@zlib_inflater.finished?
end

def value_when_finished # mimic behaviour of ruby File object.
return if @has_returned_empty_string
@has_returned_empty_string = true
''
end
end

::Zip::Decompressor.register(::Zip::COMPRESSION_METHOD_DEFLATE, ::Zip::Inflater)
end

# Copyright (C) 2002, 2003 Thomas Sondergaard
Expand Down
50 changes: 27 additions & 23 deletions lib/zip/input_stream.rb
Expand Up @@ -39,6 +39,8 @@ module Zip
# class.

class InputStream
CHUNK_SIZE = 32_768

include ::Zip::IOExtras::AbstractInputStream

# Opens the indicated zip file. An exception is thrown
Expand Down Expand Up @@ -78,16 +80,10 @@ def rewind
end

# Modeled after IO.sysread
def sysread(number_of_bytes = nil, buf = nil)
@decompressor.sysread(number_of_bytes, buf)
end

def eof
@output_buffer.empty? && @decompressor.eof
def sysread(length = nil, outbuf = '')
@decompressor.read(length, outbuf)
end

alias :eof? eof

class << self
# Same as #initialize but if a block is passed the opened
# stream is passed to the block and closed when the block
Expand Down Expand Up @@ -124,46 +120,54 @@ def get_io(io_or_file, offset = 0)

def open_entry
@current_entry = ::Zip::Entry.read_local_entry(@archive_io)
if @current_entry && @current_entry.gp_flags & 1 == 1 && @decrypter.is_a?(NullEncrypter)
if @current_entry && @current_entry.encrypted? && @decrypter.is_a?(NullEncrypter)
raise Error, 'password required to decode zip file'
end
if @current_entry && @current_entry.gp_flags & 8 == 8 && @current_entry.crc == 0 \
if @current_entry && @current_entry.incomplete? && @current_entry.crc == 0 \
&& @current_entry.compressed_size == 0 \
&& @current_entry.size == 0 && !@complete_entry
raise GPFBit3Error,
'General purpose flag Bit 3 is set so not possible to get proper info from local header.' \
'Please use ::Zip::File instead of ::Zip::InputStream'
end
@decrypted_io = get_decrypted_io
@decompressor = get_decompressor
flush
@current_entry
end

def get_decrypted_io
header = @archive_io.read(@decrypter.header_bytesize)
@decrypter.reset!(header)

::Zip::DecryptedIo.new(@archive_io, @decrypter)
end

def get_decompressor
if @current_entry.nil?
::Zip::NullDecompressor
elsif @current_entry.compression_method == ::Zip::Entry::STORED
if @current_entry.gp_flags & 8 == 8 && @current_entry.crc == 0 && @current_entry.size == 0 && @complete_entry
::Zip::PassThruDecompressor.new(@archive_io, @complete_entry.size)
return ::Zip::NullDecompressor if @current_entry.nil?

decompressed_size =
if @current_entry.incomplete? && @current_entry.crc == 0 && @current_entry.size == 0 && @complete_entry
@complete_entry.size
else
::Zip::PassThruDecompressor.new(@archive_io, @current_entry.size)
@current_entry.size
end
elsif @current_entry.compression_method == ::Zip::Entry::DEFLATED
header = @archive_io.read(@decrypter.header_bytesize)
@decrypter.reset!(header)
::Zip::Inflater.new(@archive_io, @decrypter)
else

decompressor_class = ::Zip::Decompressor.find_by_compression_method(@current_entry.compression_method)
if decompressor_class.nil?
raise ::Zip::CompressionMethodError,
"Unsupported compression method #{@current_entry.compression_method}"
end

decompressor_class.new(@decrypted_io, decompressed_size)
end

def produce_input
@decompressor.produce_input
@decompressor.read(CHUNK_SIZE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are quite a few CHUNK_SIZEs with the same value already. I wonder whether using Decompressor::CHUNK_SIZE would be appropriate here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the optimal CHUNK_SIZE for decompressors is dependant on compression method. Thus the Decompressor::CHUNK_SIZE should not exist and be replaced by an implementation dependant CHUCK_SIZE in Inflater.

Therefore, in InputStream, I think it is better to not depend on this Decompressor::CHUNK_SIZE.

end

def input_finished?
@decompressor.input_finished?
@decompressor.eof
end
end
end
Expand Down
6 changes: 6 additions & 0 deletions lib/zip/ioextras/abstract_input_stream.rb
Expand Up @@ -106,6 +106,12 @@ def each_line(a_sep_string = $/)
end

alias_method :each, :each_line

def eof
@output_buffer.empty? && input_finished?
end

alias_method :eof?, :eof
end
end
end
10 changes: 1 addition & 9 deletions lib/zip/null_decompressor.rb
Expand Up @@ -2,18 +2,10 @@ module Zip
module NullDecompressor #:nodoc:all
module_function

def sysread(_numberOfBytes = nil, _buf = nil)
def read(_length = nil, _outbuf = nil)
nil
end

def produce_input
nil
end

def input_finished?
true
end

def eof
true
end
Expand Down