Skip to content

Commit

Permalink
Rename IDNA backend implementations and refactor loading mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
jarthod committed Apr 11, 2023
1 parent 0a6f091 commit a1fb7de
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 168 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
ruby: [2.7]
idna_mode: [native2, native, pure]
idna_mode: [libidn2, libidn1, pure]
os: [ubuntu-20.04]
env:
IDNA_MODE: ${{ matrix.idna_mode }}
Expand Down
32 changes: 30 additions & 2 deletions README.md
Expand Up @@ -94,15 +94,43 @@ template.extract(uri)
$ gem install addressable
```

You may optionally turn on native IDN support by installing libidn and the
idn gem:
# IDNA support (unicode hostnames)

Three IDNA implementations are available, the first one available is used:
- A `libidn2` wrapper (if `libidn2` is installed), supporting IDNA2008+UTS#46.
- A `libidn1` wrapper (if `libidn` and the `idn` gem are installed), supporting IDNA2003.
- A pure ruby implementation (slower), [almost](https://github.com/sporkmonger/addressable/issues/491) supporting IDNA2008.

To install `libidn2`:

```console
$ sudo apt-get install libidn2-dev # Debian/Ubuntu
$ brew install libidn # OS X
```

To install the legacy `libidn1` and the `idn` gem (also add it to your Gemfile):

```console
$ sudo apt-get install libidn11-dev # Debian/Ubuntu
$ brew install libidn # OS X
$ gem install idn-ruby
```

You can check which implementation is active with:

```ruby
puts Addressable::IDNA.backend.name
```

Finally if you want to force a different IDNA implementation, you can do so like this (after addressable is required):

```ruby
require "addressable/idna/pure.rb"
Addressable::IDNA.backend = Addressable::IDNA::Pure
require "addressable/idna/libidn1"
Addressable::IDNA.backend = Addressable::IDNA::Libidn1
```

# Semantic Versioning

This project uses [Semantic Versioning](https://semver.org/). You can (and should) specify your
Expand Down
37 changes: 17 additions & 20 deletions benchmark/idna.rb
Expand Up @@ -2,32 +2,30 @@
# frozen_string_literal: true.

require "benchmark"
require "addressable/idna/libidn2"
require "addressable/idna/libidn1"
require "addressable/idna/pure"

value = "fiᆵリ宠퐱卄.com"
expected = "xn--fi-w1k207vk59a3qk9w9r.com"
N = 100_000

fail "pure ruby does not match" unless expected == Addressable::IDNA::Pure.to_ascii(value)
fail "libidn does not match" unless expected == Addressable::IDNA::Libidn1.to_ascii(value)
fail "addressable does not match" unless expected == Addressable::IDNA::Libidn2.to_ascii(value)

Benchmark.bmbm do |x|
x.report("pure") {
load "lib/addressable/idna/pure.rb"
fail "pure ruby does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}
x.report("pure") { N.times {
Addressable::IDNA::Pure.to_unicode(Addressable::IDNA::Pure.to_ascii(value))
} }

x.report("libidn") {
load "lib/addressable/idna/native.rb"
fail "libidn does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}
x.report("libidn") { N.times {
Addressable::IDNA::Libidn1.to_unicode(Addressable::IDNA::Libidn1.to_ascii(value))
} }

x.report("libidn2") {
load "lib/addressable/idna/native2.rb"
fail "addressable does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}
x.report("libidn2") { N.times {
Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value))
} }
end

# > ruby benchmark/idna.rb
Expand All @@ -43,10 +41,9 @@
# libidn2 0.764782 0.000000 0.764782 ( 0.764863)

puts "\nMemory leak test for libidn2 (memory should stabilize quickly):"
load "lib/addressable/idna/native2.rb"
GC.disable # Only run GC when manually called
10.times do
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
N.times { Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value)) }
GC.start # Run a major GC
pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i)
puts " Memory: #{size/1024}MB" # show process memory
Expand Down
32 changes: 30 additions & 2 deletions lib/addressable/idna.rb
Expand Up @@ -16,15 +16,43 @@
# limitations under the License.
#++

module Addressable
module IDNA
class << self
attr_accessor :backend

# public interface implemented by all backends
def to_ascii(value)
backend.to_ascii(value)
end

def to_unicode(value)
backend.to_unicode(value)
end

# @deprecated Use {String#unicode_normalize(:nfkc)} instead
def unicode_normalize_kc(value)
value.to_s.unicode_normalize(:nfkc)
end

extend Gem::Deprecate
deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
end
end
end

begin
require "addressable/idna/native2"
require "addressable/idna/libidn2"
Addressable::IDNA.backend = Addressable::IDNA::Libidn2
rescue LoadError
# libidn2 or the ffi gem was not available, fall back on libidn1
begin
require "addressable/idna/native"
require "addressable/idna/libidn1"
Addressable::IDNA.backend = Addressable::IDNA::Libidn1
rescue LoadError
# libidn or the idn gem was not available, fall back on a pure-Ruby
# implementation...
require "addressable/idna/pure"
Addressable::IDNA.backend = Addressable::IDNA::Pure
end
end
60 changes: 60 additions & 0 deletions lib/addressable/idna/libidn1.rb
@@ -0,0 +1,60 @@
# frozen_string_literal: true

#--
# Copyright (C) Bob Aman
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

# libidn1 implementing IDNA2003
require "idn"

module Addressable
module IDNA
module Libidn1
class << self
# @deprecated Use {String#unicode_normalize(:nfkc)} instead
def unicode_normalize_kc(value)
value.to_s.unicode_normalize(:nfkc)
end

extend Gem::Deprecate
deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
end

def self.to_ascii(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0 && segment.size < 64
IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED)
elsif segment.size >= 64
segment
else
''
end
end.join('.')
end

def self.to_unicode(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0 && segment.size < 64
IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED)
elsif segment.size >= 64
segment
else
''
end
end.join('.')
end
end
end
end
59 changes: 59 additions & 0 deletions lib/addressable/idna/libidn2.rb
@@ -0,0 +1,59 @@
# frozen_string_literal: true

#--
# Copyright (C) Bob Aman
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

# libidn2 implementing IDNA2008+TR46
require "ffi"

module Addressable
module IDNA
module Libidn2
extend FFI::Library

ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"]

attach_function :idn2_to_ascii_8z, %i[string pointer int], :int
attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int
attach_function :idn2_strerror, [:int], :string
attach_function :idn2_free, [:pointer], :void

IDN2_TRANSITIONAL = 4
IDN2_NONTRANSITIONAL = 8

def self.to_ascii(value)
return value if value.ascii_only?
pointer = FFI::MemoryPointer.new(:pointer)
res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL)
# Fallback to Transitional mode in case of disallowed character
res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0
raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0
result = pointer.read_pointer.read_string
idn2_free(pointer.read_pointer)
result
end

def self.to_unicode(value)
pointer = FFI::MemoryPointer.new(:pointer)
res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL)
return value if res != 0
result = pointer.read_pointer.read_string
idn2_free(pointer.read_pointer)
result.force_encoding('UTF-8')
end
end
end
end
62 changes: 4 additions & 58 deletions lib/addressable/idna/native.rb
@@ -1,58 +1,4 @@
# frozen_string_literal: true

#--
# Copyright (C) Bob Aman
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

# libidn1 implementing IDNA2003
require "idn"

module Addressable
module IDNA
class << self
# @deprecated Use {String#unicode_normalize(:nfkc)} instead
def unicode_normalize_kc(value)
value.to_s.unicode_normalize(:nfkc)
end

extend Gem::Deprecate
deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4
end

def self.to_ascii(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0 && segment.size < 64
IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED)
elsif segment.size >= 64
segment
else
''
end
end.join('.')
end

def self.to_unicode(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0 && segment.size < 64
IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED)
elsif segment.size >= 64
segment
else
''
end
end.join('.')
end
end
end
# Deprecated, for backward compatibility only
require "addressable/idna/libidn1"
Addressable::IDNA.backend = Addressable::IDNA::Libidn1
warn "NOTE: loading 'addressable/idna/native' is deprecated; use 'addressable/idna/libidn1' instead and set `Addressable::IDNA.backend = Addressable::IDNA::Libidn1` to force libidn1."

0 comments on commit a1fb7de

Please sign in to comment.