diff --git a/CHANGELOG.md b/CHANGELOG.md index 02f4c831cf..cd99aecac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Improved +* Reduce the number of object allocations needed when parsing an HTML::DocumentFragment. [[#2087](https://github.com/sparklemotion/nokogiri/issues/2087)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) * [JRuby] Update the algorithm used to calculate `Node#line` to be wrong less-often. The underlying parser, Xerces, does not track line numbers, and so we've always used a hacky solution for this method. [[#1223](https://github.com/sparklemotion/nokogiri/issues/1223)] diff --git a/lib/nokogiri/html/document_fragment.rb b/lib/nokogiri/html/document_fragment.rb index f2375a3ce1..ce85cd78e3 100644 --- a/lib/nokogiri/html/document_fragment.rb +++ b/lib/nokogiri/html/document_fragment.rb @@ -4,26 +4,26 @@ module HTML class DocumentFragment < Nokogiri::XML::DocumentFragment #### # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+ - def self.parse tags, encoding = nil + def self.parse(tags, encoding = nil) doc = HTML::Document.new encoding ||= if tags.respond_to?(:encoding) - encoding = tags.encoding - if encoding == ::Encoding::ASCII_8BIT - 'UTF-8' - else - encoding.name - end - else - 'UTF-8' - end + encoding = tags.encoding + if encoding == ::Encoding::ASCII_8BIT + 'UTF-8' + else + encoding.name + end + else + 'UTF-8' + end doc.encoding = encoding new(doc, tags) end - def initialize document, tags = nil, ctx = nil + def initialize(document, tags = nil, ctx = nil) return self unless tags if ctx @@ -33,13 +33,13 @@ def initialize document, tags = nil, ctx = nil self.errors = document.errors - preexisting_errors else # This is a horrible hack, but I don't care - if tags.strip =~ /^#{tags}", nil, document.encoding + temp_doc = HTML::Document.parse("#{tags}", nil, document.encoding) temp_doc.xpath(path).each { |child| child.parent = self } self.errors = temp_doc.errors end