Skip to content

Commit

Permalink
libidn2 ffi implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
jarthod committed Mar 22, 2023
1 parent 5c22f25 commit f77bcf4
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
ruby: [2.7]
idna_mode: [native, pure]
idna_mode: [native2, native, pure]
os: [ubuntu-20.04]
env:
IDNA_MODE: ${{ matrix.idna_mode }}
Expand Down
4 changes: 3 additions & 1 deletion addressable.gemspec
Expand Up @@ -13,7 +13,7 @@ Gem::Specification.new do |s|
s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze
s.email = "bob@sporkmonger.com".freeze
s.extra_rdoc_files = ["README.md".freeze]
s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze]
s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/native2.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze]
s.homepage = "https://github.com/sporkmonger/addressable".freeze
s.licenses = ["Apache-2.0".freeze]
s.rdoc_options = ["--main".freeze, "README.md".freeze]
Expand All @@ -27,9 +27,11 @@ Gem::Specification.new do |s|

if s.respond_to? :add_runtime_dependency then
s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 6.0"])
s.add_runtime_dependency(%q<ffi>.freeze)
s.add_development_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
else
s.add_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 6.0"])
s.add_dependency(%q<ffi>.freeze)
s.add_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
end
end
65 changes: 65 additions & 0 deletions benchmark/idna.rb
@@ -0,0 +1,65 @@
# /usr/bin/env ruby
# frozen_string_literal: true.

require "benchmark"

value = "fiᆵリ宠퐱卄.com"
expected = "xn--fi-w1k207vk59a3qk9w9r.com"
N = 100_000

Benchmark.bmbm do |x|
x.report("pure") {
load "lib/addressable/idna/pure.rb"
fail "pure ruby does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}

x.report("libidn") {
load "lib/addressable/idna/native.rb"
fail "libidn does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}

x.report("libidn2") {
load "lib/addressable/idna/native2.rb"
fail "addressable does not match" unless expected == Addressable::IDNA.to_ascii(value)
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
Addressable.send(:remove_const, :IDNA)
}
end

# > ruby benchmark/idna.rb
# Rehearsal -------------------------------------------
# pure 5.914630 0.000000 5.914630 ( 5.915326)
# libidn 0.518971 0.003672 0.522643 ( 0.522676)
# libidn2 0.763936 0.000000 0.763936 ( 0.763983)
# ---------------------------------- total: 7.201209sec

# user system total real
# pure 6.042877 0.000000 6.042877 ( 6.043252)
# libidn 0.521668 0.000000 0.521668 ( 0.521704)
# libidn2 0.764782 0.000000 0.764782 ( 0.764863)

puts "\nMemory leak test for libidn2 (memory should stabilize quickly):"
load "lib/addressable/idna/native2.rb"
GC.disable # Only run GC when manually called
10.times do
N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) }
GC.start # Run a major GC
pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i)
puts " Memory: #{size/1024}MB" # show process memory
end

# Memory leak test for libidn2 (memory should stabilize quickly):
# Memory: 117MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
# Memory: 121MB
16 changes: 10 additions & 6 deletions lib/addressable/idna.rb
Expand Up @@ -16,11 +16,15 @@
# limitations under the License.
#++


begin
require "addressable/idna/native"
require "addressable/idna/native2"
rescue LoadError
# libidn or the idn gem was not available, fall back on a pure-Ruby
# implementation...
require "addressable/idna/pure"
end
# libidn2 or the ffi gem was not available, fall back on libidn1
begin
require "addressable/idna/native"
rescue LoadError
# libidn or the idn gem was not available, fall back on a pure-Ruby
# implementation...
require "addressable/idna/pure"
end
end
10 changes: 1 addition & 9 deletions lib/addressable/idna/native.rb
Expand Up @@ -16,19 +16,11 @@
# limitations under the License.
#++


# libidn1 implementing IDNA2003
require "idn"

module Addressable
module IDNA
def self.punycode_encode(value)
IDN::Punycode.encode(value.to_s)
end

def self.punycode_decode(value)
IDN::Punycode.decode(value.to_s)
end

def self.to_ascii(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0 && segment.size < 64
Expand Down
57 changes: 57 additions & 0 deletions lib/addressable/idna/native2.rb
@@ -0,0 +1,57 @@
# frozen_string_literal: true

#--
# Copyright (C) Bob Aman
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

# libidn2 implementing IDNA2008+TR46
require "ffi"

module Addressable
module IDNA
extend FFI::Library

ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"]

attach_function :idn2_to_ascii_8z, %i[string pointer int], :int
attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int
attach_function :idn2_strerror, [:int], :string
attach_function :idn2_free, [:pointer], :void

IDN2_TRANSITIONAL = 4
IDN2_NONTRANSITIONAL = 8

def self.to_ascii(value)
return value if value.ascii_only?
pointer = FFI::MemoryPointer.new(:pointer)
res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL)
# Fallback to Transitional mode in case of disallowed character
res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0
raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0
result = pointer.read_pointer.read_string
idn2_free(pointer.read_pointer)
result
end

def self.to_unicode(value)
pointer = FFI::MemoryPointer.new(:pointer)
res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL)
return value if res != 0
result = pointer.read_pointer.read_string
idn2_free(pointer.read_pointer)
result.force_encoding('UTF-8')
end
end
end
41 changes: 36 additions & 5 deletions spec/addressable/idna_spec.rb
Expand Up @@ -258,14 +258,18 @@
end

describe Addressable::IDNA, "when using the pure-Ruby implementation" do
before do
before :all do
Addressable.send(:remove_const, :IDNA)
load "addressable/idna/pure.rb"
end

it_should_behave_like "converting from unicode to ASCII"
it_should_behave_like "converting from ASCII to unicode"

it "should implement IDNA2008 non transitional" do
expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de")
end

begin
require "fiber"

Expand All @@ -285,18 +289,45 @@
begin
require "idn"

describe Addressable::IDNA, "when using the native-code implementation" do
before do
describe Addressable::IDNA, "when using the libidn1 native implementation (idn gem)" do
before :all do
Addressable.send(:remove_const, :IDNA)
load "addressable/idna/native.rb"
end

it_should_behave_like "converting from unicode to ASCII"
it_should_behave_like "converting from ASCII to unicode"

it "should implement IDNA2003" do
expect(Addressable::IDNA.to_ascii("faß.de")).to eq("fass.de")
end
end
rescue LoadError => error
raise error if ENV["CI"] && TestHelper.native_supported?

# Cannot test the native implementation without libidn support.
warn('Could not load native IDN implementation.')
# Cannot test the native implementation without libidn installed.
warn('Could not load native libidn1 implementation.')
end

begin
require "addressable/idna/native2.rb"

describe Addressable::IDNA, "when using the libidn2 native implementation (ffi)" do
before :all do
Addressable.send(:remove_const, :IDNA)
load "addressable/idna/native2.rb"
end

it_should_behave_like "converting from unicode to ASCII"
it_should_behave_like "converting from ASCII to unicode"

it "should implement IDNA2008 non transitional" do
expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de")
end
end
rescue LoadError => error
raise error if ENV["CI"] && TestHelper.native_supported?

# Cannot test the native implementation without libidn2 installed.
warn('Could not load native libidn2 implementation.')
end
5 changes: 4 additions & 1 deletion tasks/profile.rake
Expand Up @@ -41,13 +41,16 @@ namespace :profile do
if ENV["IDNA_MODE"] == "pure"
Addressable.send(:remove_const, :IDNA)
load "addressable/idna/pure.rb"
elsif ENV["IDNA_MODE"] == "native"
Addressable.send(:remove_const, :IDNA)
load "addressable/idna/native.rb"
end

start_at = Time.now.to_f
report = MemoryProfiler.report do
30_000.times do
Addressable::URI.parse(
"http://google.com/stuff/../?with_lots=of&params=asdff#!stuff"
"http://fiᆵリ宠퐱卄.com/stuff/../?with_lots=of&params=asdff#!stuff"
).normalize
end
end
Expand Down

0 comments on commit f77bcf4

Please sign in to comment.