Skip to content

Commit

Permalink
Merge pull request #427 from jspanjers/refactor-decompressor
Browse files Browse the repository at this point in the history
Support decompressor plugins
  • Loading branch information
jdleesmiller committed Feb 1, 2020
2 parents 0b79104 + 0b9433c commit 666fb8c
Show file tree
Hide file tree
Showing 23 changed files with 324 additions and 111 deletions.
3 changes: 2 additions & 1 deletion lib/zip.rb
Expand Up @@ -4,6 +4,7 @@
require 'fileutils'
require 'stringio'
require 'zlib'
require 'zip/constants'
require 'zip/dos_time'
require 'zip/ioextras'
require 'rbconfig'
Expand All @@ -21,14 +22,14 @@
require 'zip/null_input_stream'
require 'zip/pass_thru_compressor'
require 'zip/pass_thru_decompressor'
require 'zip/crypto/decrypted_io'
require 'zip/crypto/encryption'
require 'zip/crypto/null_encryption'
require 'zip/crypto/traditional_encryption'
require 'zip/inflater'
require 'zip/deflater'
require 'zip/streamable_stream'
require 'zip/streamable_directory'
require 'zip/constants'
require 'zip/errors'

module Zip
Expand Down
52 changes: 52 additions & 0 deletions lib/zip/constants.rb
Expand Up @@ -60,4 +60,56 @@ module Zip
FSTYPE_MAC_OSX => 'Mac OS/X (Darwin)'.freeze,
FSTYPE_ATHEOS => 'AtheOS'.freeze
}.freeze

COMPRESSION_METHOD_STORE = 0
COMPRESSION_METHOD_SHRINK = 1
COMPRESSION_METHOD_REDUCE_1 = 2
COMPRESSION_METHOD_REDUCE_2 = 3
COMPRESSION_METHOD_REDUCE_3 = 4
COMPRESSION_METHOD_REDUCE_4 = 5
COMPRESSION_METHOD_IMPLODE = 6
# RESERVED = 7
COMPRESSION_METHOD_DEFLATE = 8
COMPRESSION_METHOD_DEFLATE_64 = 9
COMPRESSION_METHOD_PKWARE_DCLI = 10
# RESERVED = 11
COMPRESSION_METHOD_BZIP2 = 12
# RESERVED = 13
COMPRESSION_METHOD_LZMA = 14
# RESERVED = 15
COMPRESSION_METHOD_IBM_CMPSC = 16
# RESERVED = 17
COMPRESSION_METHOD_IBM_TERSE = 18
COMPRESSION_METHOD_IBM_LZ77 = 19
COMPRESSION_METHOD_JPEG = 96
COMPRESSION_METHOD_WAVPACK = 97
COMPRESSION_METHOD_PPMD = 98
COMPRESSION_METHOD_AES = 99

COMPRESSION_METHODS = {
COMPRESSION_METHOD_STORE => 'Store (no compression)',
COMPRESSION_METHOD_SHRINK => 'Shrink',
COMPRESSION_METHOD_REDUCE_1 => 'Reduce with compression factor 1',
COMPRESSION_METHOD_REDUCE_2 => 'Reduce with compression factor 2',
COMPRESSION_METHOD_REDUCE_3 => 'Reduce with compression factor 3',
COMPRESSION_METHOD_REDUCE_4 => 'Reduce with compression factor 4',
COMPRESSION_METHOD_IMPLODE => 'Implode',
# RESERVED = 7
COMPRESSION_METHOD_DEFLATE => 'Deflate',
COMPRESSION_METHOD_DEFLATE_64 => 'Deflate64(tm)',
COMPRESSION_METHOD_PKWARE_DCLI => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
# RESERVED = 11
COMPRESSION_METHOD_BZIP2 => 'BZIP2',
# RESERVED = 13
COMPRESSION_METHOD_LZMA => 'LZMA',
# RESERVED = 15
COMPRESSION_METHOD_IBM_CMPSC => 'IBM z/OS CMPSC Compression',
# RESERVED = 17
COMPRESSION_METHOD_IBM_TERSE => 'IBM TERSE (new)',
COMPRESSION_METHOD_IBM_LZ77 => 'IBM LZ77 z Architecture (PFS)',
COMPRESSION_METHOD_JPEG => 'JPEG variant',
COMPRESSION_METHOD_WAVPACK => 'WavPack compressed data',
COMPRESSION_METHOD_PPMD => 'PPMd version I, Rev 1',
COMPRESSION_METHOD_AES => 'AES encryption',
}.freeze
end
39 changes: 39 additions & 0 deletions lib/zip/crypto/decrypted_io.rb
@@ -0,0 +1,39 @@
module Zip
class DecryptedIo #:nodoc:all
CHUNK_SIZE = 32_768

def initialize(io, decrypter)
@io = io
@decrypter = decrypter
end

def read(length = nil, outbuf = '')
return ((length.nil? || length.zero?) ? "" : nil) if eof

while length.nil? || (buffer.bytesize < length)
break if input_finished?
buffer << produce_input
end

outbuf.replace(buffer.slice!(0...(length || output_buffer.bytesize)))
end

private

def eof
buffer.empty? && input_finished?
end

def buffer
@buffer ||= ''.dup
end

def input_finished?
@io.eof
end

def produce_input
@decrypter.decrypt(@io.read(CHUNK_SIZE))
end
end
end
20 changes: 19 additions & 1 deletion lib/zip/decompressor.rb
@@ -1,9 +1,27 @@
module Zip
class Decompressor #:nodoc:all
CHUNK_SIZE = 32_768
def initialize(input_stream)

def self.decompressor_classes
@decompressor_classes ||= {}
end

def self.register(compression_method, decompressor_class)
decompressor_classes[compression_method] = decompressor_class
end

def self.find_by_compression_method(compression_method)
decompressor_classes[compression_method]
end

attr_reader :input_stream
attr_reader :decompressed_size

def initialize(input_stream, decompressed_size = nil)
super()

@input_stream = input_stream
@decompressed_size = decompressed_size
end
end
end
Expand Down
8 changes: 8 additions & 0 deletions lib/zip/entry.rb
Expand Up @@ -72,6 +72,14 @@ def initialize(*args)
@extra = ::Zip::ExtraField.new(@extra.to_s) unless @extra.is_a?(::Zip::ExtraField)
end

def encrypted?
gp_flags & 1 == 1
end

def incomplete?
gp_flags & 8 == 8
end

def time
if @extra['UniversalTime']
@extra['UniversalTime'].mtime
Expand Down
1 change: 1 addition & 0 deletions lib/zip/errors.rb
Expand Up @@ -7,6 +7,7 @@ class EntryNameError < Error; end
class EntrySizeError < Error; end
class InternalError < Error; end
class GPFBit3Error < Error; end
class DecompressionError < Error; end

# Backwards compatibility with v1 (delete in v2)
ZipError = Error
Expand Down
2 changes: 1 addition & 1 deletion lib/zip/file.rb
Expand Up @@ -49,7 +49,7 @@ class File < CentralDirectory
MAX_SEGMENT_SIZE = 3_221_225_472
MIN_SEGMENT_SIZE = 65_536
DATA_BUFFER_SIZE = 8192
IO_METHODS = [:tell, :seek, :read, :close]
IO_METHODS = [:tell, :seek, :read, :eof, :close]

DEFAULT_OPTIONS = {
restore_ownership: false,
Expand Down
58 changes: 22 additions & 36 deletions lib/zip/inflater.rb
@@ -1,64 +1,50 @@
module Zip
class Inflater < Decompressor #:nodoc:all
def initialize(input_stream, decrypter = NullDecrypter.new)
super(input_stream)
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
@output_buffer = ''.dup
@has_returned_empty_string = false
@decrypter = decrypter
end
def initialize(*args)
super

def sysread(number_of_bytes = nil, buf = '')
readEverything = number_of_bytes.nil?
while readEverything || @output_buffer.bytesize < number_of_bytes
break if internal_input_finished?
@output_buffer << internal_produce_input(buf)
end
return value_when_finished if @output_buffer.bytesize == 0 && input_finished?
end_index = number_of_bytes.nil? ? @output_buffer.bytesize : number_of_bytes
@output_buffer.slice!(0...end_index)
@buffer = ''.dup
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
end

def produce_input
if @output_buffer.empty?
internal_produce_input
else
@output_buffer.slice!(0...(@output_buffer.length))
def read(length = nil, outbuf = '')
return ((length.nil? || length.zero?) ? "" : nil) if eof

while length.nil? || (@buffer.bytesize < length)
break if input_finished?
@buffer << produce_input
end

outbuf.replace(@buffer.slice!(0...(length || @buffer.bytesize)))
end

# to be used with produce_input, not read (as read may still have more data cached)
# is data cached anywhere other than @outputBuffer? the comment above may be wrong
def input_finished?
@output_buffer.empty? && internal_input_finished?
def eof
@buffer.empty? && input_finished?
end

alias :eof input_finished?
alias :eof? input_finished?
alias_method :eof?, :eof

private

def internal_produce_input(buf = '')
def produce_input
retried = 0
begin
@zlib_inflater.inflate(@decrypter.decrypt(@input_stream.read(Decompressor::CHUNK_SIZE, buf)))
@zlib_inflater.inflate(input_stream.read(Decompressor::CHUNK_SIZE))
rescue Zlib::BufError
raise if retried >= 5 # how many times should we retry?
retried += 1
retry
end
rescue Zlib::Error => e
raise(::Zip::DecompressionError, 'zlib error while inflating')
end

def internal_input_finished?
def input_finished?
@zlib_inflater.finished?
end

def value_when_finished # mimic behaviour of ruby File object.
return if @has_returned_empty_string
@has_returned_empty_string = true
''
end
end

::Zip::Decompressor.register(::Zip::COMPRESSION_METHOD_DEFLATE, ::Zip::Inflater)
end

# Copyright (C) 2002, 2003 Thomas Sondergaard
Expand Down
50 changes: 27 additions & 23 deletions lib/zip/input_stream.rb
Expand Up @@ -39,6 +39,8 @@ module Zip
# class.

class InputStream
CHUNK_SIZE = 32_768

include ::Zip::IOExtras::AbstractInputStream

# Opens the indicated zip file. An exception is thrown
Expand Down Expand Up @@ -78,16 +80,10 @@ def rewind
end

# Modeled after IO.sysread
def sysread(number_of_bytes = nil, buf = nil)
@decompressor.sysread(number_of_bytes, buf)
end

def eof
@output_buffer.empty? && @decompressor.eof
def sysread(length = nil, outbuf = '')
@decompressor.read(length, outbuf)
end

alias :eof? eof

class << self
# Same as #initialize but if a block is passed the opened
# stream is passed to the block and closed when the block
Expand Down Expand Up @@ -124,46 +120,54 @@ def get_io(io_or_file, offset = 0)

def open_entry
@current_entry = ::Zip::Entry.read_local_entry(@archive_io)
if @current_entry && @current_entry.gp_flags & 1 == 1 && @decrypter.is_a?(NullEncrypter)
if @current_entry && @current_entry.encrypted? && @decrypter.is_a?(NullEncrypter)
raise Error, 'password required to decode zip file'
end
if @current_entry && @current_entry.gp_flags & 8 == 8 && @current_entry.crc == 0 \
if @current_entry && @current_entry.incomplete? && @current_entry.crc == 0 \
&& @current_entry.compressed_size == 0 \
&& @current_entry.size == 0 && !@complete_entry
raise GPFBit3Error,
'General purpose flag Bit 3 is set so not possible to get proper info from local header.' \
'Please use ::Zip::File instead of ::Zip::InputStream'
end
@decrypted_io = get_decrypted_io
@decompressor = get_decompressor
flush
@current_entry
end

def get_decrypted_io
header = @archive_io.read(@decrypter.header_bytesize)
@decrypter.reset!(header)

::Zip::DecryptedIo.new(@archive_io, @decrypter)
end

def get_decompressor
if @current_entry.nil?
::Zip::NullDecompressor
elsif @current_entry.compression_method == ::Zip::Entry::STORED
if @current_entry.gp_flags & 8 == 8 && @current_entry.crc == 0 && @current_entry.size == 0 && @complete_entry
::Zip::PassThruDecompressor.new(@archive_io, @complete_entry.size)
return ::Zip::NullDecompressor if @current_entry.nil?

decompressed_size =
if @current_entry.incomplete? && @current_entry.crc == 0 && @current_entry.size == 0 && @complete_entry
@complete_entry.size
else
::Zip::PassThruDecompressor.new(@archive_io, @current_entry.size)
@current_entry.size
end
elsif @current_entry.compression_method == ::Zip::Entry::DEFLATED
header = @archive_io.read(@decrypter.header_bytesize)
@decrypter.reset!(header)
::Zip::Inflater.new(@archive_io, @decrypter)
else

decompressor_class = ::Zip::Decompressor.find_by_compression_method(@current_entry.compression_method)
if decompressor_class.nil?
raise ::Zip::CompressionMethodError,
"Unsupported compression method #{@current_entry.compression_method}"
end

decompressor_class.new(@decrypted_io, decompressed_size)
end

def produce_input
@decompressor.produce_input
@decompressor.read(CHUNK_SIZE)
end

def input_finished?
@decompressor.input_finished?
@decompressor.eof
end
end
end
Expand Down
6 changes: 6 additions & 0 deletions lib/zip/ioextras/abstract_input_stream.rb
Expand Up @@ -106,6 +106,12 @@ def each_line(a_sep_string = $/)
end

alias_method :each, :each_line

def eof
@output_buffer.empty? && input_finished?
end

alias_method :eof?, :eof
end
end
end
10 changes: 1 addition & 9 deletions lib/zip/null_decompressor.rb
Expand Up @@ -2,18 +2,10 @@ module Zip
module NullDecompressor #:nodoc:all
module_function

def sysread(_numberOfBytes = nil, _buf = nil)
def read(_length = nil, _outbuf = nil)
nil
end

def produce_input
nil
end

def input_finished?
true
end

def eof
true
end
Expand Down

0 comments on commit 666fb8c

Please sign in to comment.