diff --git a/lib/twingly/url.rb b/lib/twingly/url.rb index 97635e7..02d6e37 100644 --- a/lib/twingly/url.rb +++ b/lib/twingly/url.rb @@ -35,7 +35,9 @@ def internal_parse(potential_url) scheme = addressable_uri.scheme raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES - public_suffix_domain = PublicSuffix.parse(addressable_uri.display_uri.host) + display_uri = addressable_display_uri(addressable_uri) + + public_suffix_domain = PublicSuffix.parse(display_uri.host) raise Twingly::URL::Error::ParseError if public_suffix_domain.nil? new(addressable_uri, public_suffix_domain) @@ -56,7 +58,19 @@ def to_addressable_uri(potential_url) end end - private :new, :internal_parse, :to_addressable_uri + # Workaround for the following bug in addressable: + # https://github.com/sporkmonger/addressable/issues/224 + def addressable_display_uri(addressable_uri) + addressable_uri.display_uri + rescue ArgumentError => error + if error.message.include?("invalid byte sequence in UTF-8") + raise Twingly::URL::Error::ParseError + end + + raise + end + + private :new, :internal_parse, :to_addressable_uri, :addressable_display_uri end def initialize(addressable_uri, public_suffix_domain) diff --git a/spec/lib/twingly/url_spec.rb b/spec/lib/twingly/url_spec.rb index 729f847..b80849e 100644 --- a/spec/lib/twingly/url_spec.rb +++ b/spec/lib/twingly/url_spec.rb @@ -27,6 +27,8 @@ def invalid_urls "http://xn--t...-/", "http://xn--...-", "leather beltsbelts for menleather beltmens beltsleather belts for menmens beltbelt bucklesblack l...", + "http://some_site.net%C2", + "http://+%D5d.some_site.net", ] end