Skip to content

Commit

Permalink
FIX: broken onebox images due to url normalization bugs
Browse files Browse the repository at this point in the history
normalized_encode in addressable has a number of issues, including sporkmonger/addressable#472

To temporaily work around those issues for the majority of cases, we try parsing with `::URI`. If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.

Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.

This commit also adds support for unicode domain names and emoji domain names with escape_uri.

This removes an unneeded hack checking for pre-signed urls, which are now handled by the general case due to starting off valid and only being minimally normalized. Previous test case continues to pass.

UrlHelper.s3_presigned_url? which was somewhat wide was removed.
  • Loading branch information
SamSaffron authored and davidtaylorhq committed Aug 9, 2022
1 parent 6f3be0c commit f0e08c6
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 30 deletions.
60 changes: 54 additions & 6 deletions lib/url_helper.rb
Expand Up @@ -60,9 +60,35 @@ def self.secure_proxy_without_cdn(url)
self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
end

# This is a poorly named method. In reality, it **normalizes** the given URL,
# and does not escape it. Therefore it's idempotent, and can be used on user
# input which includes a mix of escaped and unescaped characters
def self.escape_uri(uri)
return uri if s3_presigned_url?(uri)
Addressable::URI.normalized_encode(uri)
validated = nil
url = uri.to_s

# Ideally we will jump straight to `Addressable::URI.normalized_encode`. However,
# that implementation has some edge-case issues like https://github.com/sporkmonger/addressable/issues/472.
# To temporaily work around those issues for the majority of cases, we try parsing with `::URI`.
# If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.
# Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.

# edge case where we expect mailto:test%40test.com to normalize to mailto:test@test.com
if url.match(/\Amailto:/)
return normalize_with_addressable(url)
end

# If it doesn't pass the regexp, it's definitely not gonna parse with URI.parse. Skip
# to addressable
if !url.match?(/\A#{URI::regexp}\z/)
return normalize_with_addressable(url)
end

begin
normalize_with_ruby_uri(url)
rescue URI::Error
normalize_with_addressable(url)
end
end

def self.rails_route_from_url(url)
Expand All @@ -72,10 +98,6 @@ def self.rails_route_from_url(url)
nil
end

def self.s3_presigned_url?(url)
url[/x-amz-(algorithm|credential)/i].present?
end

def self.cook_url(url, secure: false, local: nil)
is_secure = SiteSetting.secure_media && secure
local = is_local(url) if local.nil?
Expand Down Expand Up @@ -120,4 +142,30 @@ def self.local_cdn_url(url)
end
end

private

def self.normalize_with_addressable(url)
u = Addressable::URI.normalized_encode(url, Addressable::URI)

if u.host && !u.host.ascii_only?
u.host = ::Addressable::IDNA.to_ascii(u.host)
end

u.to_s
end

def self.normalize_with_ruby_uri(url)
u = URI.parse(url)

if u.scheme && u.scheme != u.scheme.downcase
u.scheme = u.scheme.downcase
end

if u.host && u.host != u.host.downcase
u.host = u.host.downcase
end

u.to_s
end

end
2 changes: 1 addition & 1 deletion spec/lib/final_destination_spec.rb
Expand Up @@ -26,7 +26,7 @@
when 'any-subdomain.ihaveawildcard.com' then '104.25.152.11'
when 'wikipedia.com' then '1.2.3.4'
else
as_ip = IPAddr.new(host)
_as_ip = IPAddr.new(host)
host
end
end
Expand Down
43 changes: 41 additions & 2 deletions spec/lib/url_helper_spec.rb
Expand Up @@ -86,6 +86,16 @@
end

describe "#escape_uri" do
it "does not double escape %3A (:)" do
url = "http://discourse.org/%3A/test"
expect(UrlHelper.escape_uri(url)).to eq(url)
end

it "does not double escape %2F (/)" do
url = "http://discourse.org/%2F/test"
expect(UrlHelper.escape_uri(url)).to eq(url)
end

it "doesn't escape simple URL" do
url = UrlHelper.escape_uri('http://example.com/foo/bar')
expect(url).to eq('http://example.com/foo/bar')
Expand All @@ -107,8 +117,37 @@
end

it "doesn't escape already escaped chars (hash)" do
url = UrlHelper.escape_uri('https://calendar.google.com/calendar/embed?src=en.uk%23holiday%40group.v.calendar.google.com&ctz=Europe%2FLondon')
expect(url).to eq('https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe/London')
url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon'
escaped = UrlHelper.escape_uri(url)
expect(escaped).to eq(url)
end

it "leaves reserved chars alone in edge cases" do
skip "see: https://github.com/sporkmonger/addressable/issues/472"
url = "https://example.com/ article/id%3A1.2%2F1/bar"
expected = "https://example.com/%20article/id%3A1.2%2F1/bar"
escaped = UrlHelper.escape_uri(url)
expect(escaped).to eq(expected)
end

it "handles emoji domain names" do
url = "https://💻.example/💻?computer=💻"
expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB"
escaped = UrlHelper.escape_uri(url)
expect(escaped).to eq(expected)
end

it "handles special-character domain names" do
url = "https://éxample.com/test"
expected = "https://xn--xample-9ua.com/test"
escaped = UrlHelper.escape_uri(url)
expect(escaped).to eq(expected)
end

it "performs basic normalization" do
url = "http://EXAMPLE.com/a"
escaped = UrlHelper.escape_uri(url)
expect(escaped).to eq("http://example.com/a")
end

it "doesn't escape S3 presigned URLs" do
Expand Down
21 changes: 0 additions & 21 deletions spec/models/upload_spec.rb
Expand Up @@ -568,27 +568,6 @@ def enable_secure_media
end
end

describe ".signed_url_from_secure_media_url" do
before do
# must be done so signed_url_for_path exists
enable_secure_media
end

it "correctly gives back a signed url from a path only" do
secure_url = "/secure-media-uploads/original/1X/c5a2c4ba0fa390f5aac5c2c1a12416791ebdd9e9.png"
signed_url = Upload.signed_url_from_secure_media_url(secure_url)
expect(signed_url).not_to include("secure-media-uploads")
expect(UrlHelper.s3_presigned_url?(signed_url)).to eq(true)
end

it "correctly gives back a signed url from a full url" do
secure_url = "http://localhost:3000/secure-media-uploads/original/1X/c5a2c4ba0fa390f5aac5c2c1a12416791ebdd9e9.png"
signed_url = Upload.signed_url_from_secure_media_url(secure_url)
expect(signed_url).not_to include(Discourse.base_url)
expect(UrlHelper.s3_presigned_url?(signed_url)).to eq(true)
end
end

describe ".secure_media_url_from_upload_url" do
before do
# must be done so signed_url_for_path exists
Expand Down

0 comments on commit f0e08c6

Please sign in to comment.