Skip to content

Commit

Permalink
Add ability to customize downloaders. Closes #1636
Browse files Browse the repository at this point in the history
  • Loading branch information
mshibuya committed Jun 11, 2019
1 parent c7bebd6 commit 68d1eb8
Show file tree
Hide file tree
Showing 7 changed files with 288 additions and 281 deletions.
50 changes: 50 additions & 0 deletions lib/carrierwave/downloader/base.rb
@@ -0,0 +1,50 @@
require 'open-uri'
require 'carrierwave/downloader/remote_file'

module CarrierWave
module Downloader
class Base
attr_reader :uploader

def initialize(uploader)
@uploader = uploader
end

##
# Downloads a file from given URL and returns a RemoteFile.
#
# === Parameters
#
# [url (String)] The URL where the remote file is stored
# [remote_headers (Hash)] Request headers
#
def download(url, remote_headers = {})
headers = remote_headers.
reverse_merge('User-Agent' => "CarrierWave/#{CarrierWave::VERSION}")
begin
file = OpenURI.open_uri(process_uri(url.to_s), headers)
rescue StandardError => e
raise CarrierWave::DownloadError, "could not download file: #{e.message}"
end
CarrierWave::Downloader::RemoteFile.new(file)
end

##
# Processes the given URL by parsing and escaping it. Public to allow overriding.
#
# === Parameters
#
# [url (String)] The URL where the remote file is stored
#
def process_uri(uri)
URI.parse(uri)
rescue URI::InvalidURIError
uri_parts = uri.split('?')
# regexp from Ruby's URI::Parser#regexp[:UNSAFE], with [] specifically removed
encoded_uri = URI.encode(uri_parts.shift, /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,]/)
encoded_uri << '?' << URI.encode(uri_parts.join('?')) if uri_parts.any?
URI.parse(encoded_uri) rescue raise CarrierWave::DownloadError, "couldn't parse URL"
end
end
end
end
42 changes: 42 additions & 0 deletions lib/carrierwave/downloader/remote_file.rb
@@ -0,0 +1,42 @@
module CarrierWave
module Downloader
class RemoteFile
attr_reader :file

def initialize(file)
@file = file.is_a?(String) ? StringIO.new(file) : file
end

def original_filename
filename = filename_from_header || filename_from_uri
mime_type = MiniMime.lookup_by_content_type(file.content_type)
unless File.extname(filename).present? || mime_type.blank?
filename = "#{filename}.#{mime_type.extension}"
end
filename
end

def respond_to?(*args)
super || file.respond_to?(*args)
end

private

def filename_from_header
if file.meta.include? 'content-disposition'
match = file.meta['content-disposition'].match(/filename=(?:"([^"]+)"|([^";]+))/)
match[1].presence || match[2].presence
end
end

def filename_from_uri
CGI.unescape(File.basename(file.base_uri.path))
end

def method_missing(*args, &block)
file.send(*args, &block)
end
end
end
end

4 changes: 4 additions & 0 deletions lib/carrierwave/uploader/configuration.rb
@@ -1,3 +1,5 @@
require 'carrierwave/downloader/base'

module CarrierWave

module Uploader
Expand All @@ -21,6 +23,7 @@ module Configuration
add_config :move_to_cache
add_config :move_to_store
add_config :remove_previously_stored_files_after_update
add_config :downloader

# fog
add_config :fog_provider
Expand Down Expand Up @@ -189,6 +192,7 @@ def reset_config
config.move_to_cache = false
config.move_to_store = false
config.remove_previously_stored_files_after_update = true
config.downloader = CarrierWave::Downloader::Base
config.ignore_integrity_errors = true
config.ignore_processing_errors = true
config.ignore_download_errors = true
Expand Down
82 changes: 2 additions & 80 deletions lib/carrierwave/uploader/download.rb
@@ -1,5 +1,3 @@
require 'open-uri'

module CarrierWave
module Uploader
module Download
Expand All @@ -9,94 +7,18 @@ module Download
include CarrierWave::Uploader::Configuration
include CarrierWave::Uploader::Cache

class RemoteFile
def initialize(uri, remote_headers = {})
@uri = uri
@remote_headers = remote_headers
@file = nil
end

def original_filename
filename = filename_from_header || filename_from_uri
mime_type = MiniMime.lookup_by_content_type(file.content_type)
unless File.extname(filename).present? || mime_type.blank?
filename = "#{filename}.#{mime_type.extension}"
end
filename
end

def respond_to?(*args)
super || file.respond_to?(*args)
end

def http?
@uri.scheme =~ /^https?$/
end

private

def file
if @file.blank?
headers = @remote_headers.
reverse_merge('User-Agent' => "CarrierWave/#{CarrierWave::VERSION}")

@file = Kernel.open(@uri.to_s, headers)
@file = @file.is_a?(String) ? StringIO.new(@file) : @file
end
@file

rescue StandardError => e
raise CarrierWave::DownloadError, "could not download file: #{e.message}"
end

def filename_from_header
if file.meta.include? 'content-disposition'
match = file.meta['content-disposition'].match(/filename=(?:"([^"]+)"|([^";]+))/)
return match[1].presence || match[2].presence
end
end

def filename_from_uri
URI.decode(File.basename(file.base_uri.path))
end

def method_missing(*args, &block)
file.send(*args, &block)
end
end

##
# Caches the file by downloading it from the given URL.
# Caches the file by downloading it from the given URL, using downloader.
#
# === Parameters
#
# [url (String)] The URL where the remote file is stored
# [remote_headers (Hash)] Request headers
#
def download!(uri, remote_headers = {})
processed_uri = process_uri(uri)
file = RemoteFile.new(processed_uri, remote_headers)
raise CarrierWave::DownloadError, "trying to download a file which is not served over HTTP" unless file.http?
file = downloader.new(self).download(uri, remote_headers)
cache!(file)
end

##
# Processes the given URL by parsing and escaping it. Public to allow overriding.
#
# === Parameters
#
# [url (String)] The URL where the remote file is stored
#
def process_uri(uri)
URI.parse(uri)
rescue URI::InvalidURIError
uri_parts = uri.split('?')
# regexp from Ruby's URI::Parser#regexp[:UNSAFE], with [] specifically removed
encoded_uri = URI.encode(uri_parts.shift, /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,]/)
encoded_uri << '?' << URI.encode(uri_parts.join('?')) if uri_parts.any?
URI.parse(encoded_uri) rescue raise CarrierWave::DownloadError, "couldn't parse URL"
end

end # Download
end # Uploader
end # CarrierWave
129 changes: 129 additions & 0 deletions spec/downloader/base_spec.rb
@@ -0,0 +1,129 @@
require 'spec_helper'

describe CarrierWave::Downloader::Base do
let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) }
let(:uploader) { uploader_class.new }
let(:file) { File.read(file_path("test.jpg")) }
let(:filename) { "test.jpg" }
let(:uri) { URI.encode("http://www.example.com/#{filename}") }

subject { CarrierWave::Downloader::Base.new(uploader) }

context "with unicode sybmols in URL" do
let(:filename) { "юникод.jpg" }
before do
stub_request(:get, uri).to_return(body: file)
end

let(:remote_file) { subject.download(uri) }

it "downloads a file" do
expect(remote_file).to be_an_instance_of(CarrierWave::Downloader::RemoteFile)
end

it "sets the filename to the file's decoded sanitized filename" do
expect(remote_file.original_filename).to eq("#{filename}")
end
end

context 'with request headers' do
let(:authentication_headers) do
{
'Accept'=>'*/*',
'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
'User-Agent'=>"CarrierWave/#{CarrierWave::VERSION}",
'Authorization'=>'Bearer QWE'
}
end
before do
stub_request(:get, uri).
with(:headers => authentication_headers).
to_return(body: file)
end

it 'pass custom headers to request' do
expect(subject.download(uri, { 'Authorization' => 'Bearer QWE' }).file.read).to eq file
end
end

it "raises an error when trying to download a local file" do
expect { subject.download('/etc/passwd') }.to raise_error(CarrierWave::DownloadError)
end

context "with missing file" do
before do
stub_request(:get, uri).to_return(status: 404)
end

it "raises an error when trying to download a missing file" do
expect{ subject.download(uri) }.to raise_error(CarrierWave::DownloadError)
end

it "doesn't obscure original exception message" do
expect { subject.download(uri) }.to raise_error(CarrierWave::DownloadError, /could not download file: 404/)
end
end

context "with a url that contains space" do
let(:filename) { "my test.jpg" }
before do
stub_request(:get, uri).to_return(body: file)
end

it "accepts spaces in the url" do
expect(subject.download(uri).original_filename).to eq filename
end
end

context "with redirects" do
let(:another_uri) { 'http://example.com/redirected.jpg' }
before do
stub_request(:get, uri).
to_return(status: 301, body: "Redirecting", headers: { "Location" => another_uri })
stub_request(:get, another_uri).to_return(body: file)
end

it "retrieves redirected file" do
expect(subject.download(uri).file.read).to eq file
end

it "extracts filename from the url after redirection" do
expect(subject.download(uri).original_filename).to eq 'redirected.jpg'
end
end

describe '#process_uri' do
it "parses but not escape already escaped uris" do
uri = 'http://example.com/%5B.jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq(uri)
end

it "parses but not escape uris with query-string-only characters not needing escaping" do
uri = 'http://example.com/?foo[]=bar'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq(uri)
end

it "escapes and parse unescaped uris" do
uri = 'http://example.com/ %[].jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/%20%25%5B%5D.jpg')
end

it "escapes and parse brackets in uri paths without harming the query string" do
uri = 'http://example.com/].jpg?test[]'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/%5D.jpg?test[]')
end

it "throws an exception on bad uris" do
uri = '~http:'
expect { subject.process_uri(uri) }.to raise_error(CarrierWave::DownloadError)
end
end
end

0 comments on commit 68d1eb8

Please sign in to comment.