sparklemotion · flavorjones · Jan 13, 2022 · Jan 11, 2022 · Jan 12, 2022 · Jan 12, 2022
diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb
diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y
diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb
@@ -63,10 +63,10 @@ def _next_token
                   when (text = @ss.scan(/has\([\s]*/))
                      action { [:HAS, text] }
 
-                  when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
+                  when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
                      action { [:FUNCTION, text] }
 
-                  when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
+                  when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
                      action { [:IDENT, text] }
 
                   when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))

diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex
@@ -13,7 +13,7 @@ macro
   escape    {unicode}|\\[^\n\r\f0-9A-Fa-f]
   nmchar    [_A-Za-z0-9-]|{nonascii}|{escape}
   nmstart   [_A-Za-z]|{nonascii}|{escape}
-  ident     [-@]?({nmstart})({nmchar})*
+  ident     -?({nmstart})({nmchar})*
   name      ({nmchar})+
   string1   "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
   string2   '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'

diff --git a/lib/nokogiri/css/xpath_visitor.rb b/lib/nokogiri/css/xpath_visitor.rb
@@ -128,8 +128,11 @@ def visit_function(node)
           is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
           ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
         else
-          # non-standard. this looks like a function call.
-          args = ["."] + node.value[1..-1]
+          # xpath function call, let's marshal those arguments
+          args = ["."]
+          args += node.value[1..-1].map do |n|
+            n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
+          end
           "#{node.value.first}#{args.join(",")})"
         end
       end
@@ -149,17 +152,8 @@ def visit_id(node)
       end
 
       def visit_attribute_condition(node)
-        attribute = if (node.value.first.type == :FUNCTION) || (node.value.first.value.first =~ /::/)
-          ""
-        else
-          "@"
-        end
-        attribute += node.value.first.accept(self)
-
-        # non-standard. attributes starting with '@'
-        attribute.gsub!(/^@@/, "@")
-
-        return attribute unless node.value.length == 3
+        attribute = node.value.first.accept(self)
+        return attribute if node.value.length == 1
 
         value = node.value.last
         value = "'#{value}'" unless /^['"]/.match?(value)
@@ -249,10 +243,7 @@ def visit_conditional_selector(node)
       end
 
       def visit_element_name(node)
-        if @doctype == DoctypeConfig::HTML5 && node.value.first != "*"
-          # if there is already a namespace, use it as normal
-          return node.value.first if node.value.first.include?(":")
-
+        if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
           # HTML5 has namespaces that should be ignored in CSS queries
           # https://github.com/sparklemotion/nokogiri/issues/2376
           if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
@@ -270,7 +261,7 @@ def visit_element_name(node)
       end
 
       def visit_attrib_name(node)
-        node.value.first
+        "@#{node.value.first}"
       end
 
       def accept(node)
@@ -279,6 +270,13 @@ def accept(node)
 
       private
 
+      def html5_element_name_needs_namespace_handling(node)
+        # if this is the wildcard selector "*", use it as normal
+        node.value.first != "*" &&
+          # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
+          !node.value.first.include?(":")
+      end
+
       def nth(node, options = {})
         raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
 

diff --git a/lib/nokogiri/xml/searchable.rb b/lib/nokogiri/xml/searchable.rb
@@ -6,7 +6,7 @@ module XML
     #
     #  The Searchable module declares the interface used for searching your DOM.
     #
-    #  It implements the public methods `search`, `css`, and `xpath`,
+    #  It implements the public methods #search, #css, and #xpath,
     #  as well as allowing specific implementations to specialize some
     #  of the important behaviors.
     #
@@ -30,25 +30,22 @@ module Searchable
       #   node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
       #   node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
       #
-      # For XPath queries, a hash of variable bindings may also be
-      # appended to the namespace bindings. For example:
+      # For XPath queries, a hash of variable bindings may also be appended to the namespace
+      # bindings. For example:
       #
       #   node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
       #
-      # Custom XPath functions and CSS pseudo-selectors may also be
-      # defined. To define custom functions create a class and
-      # implement the function you want to define.  The first argument
-      # to the method will be the current matching NodeSet.  Any other
-      # arguments are ones that you pass in.  Note that this class may
-      # appear anywhere in the argument list.  For example:
-      #
-      #   node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
-      #     Class.new {
-      #       def regex node_set, regex
-      #         node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
-      #       end
-      #     }.new
-      #   )
+      # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
+      # functions create a class and implement the function you want to define. The first argument
+      # to the method will be the current matching NodeSet. Any other arguments are ones that you
+      # pass in. Note that this class may appear anywhere in the argument list. For example:
+      #
+      #   handler = Class.new {
+      #     def regex node_set, regex
+      #       node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
+      #     end
+      #   }.new
+      #   node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
       #
       # See Searchable#xpath and Searchable#css for further usage help.
       def search(*args)
@@ -92,25 +89,40 @@ def at(*args)
       #
       #   node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
       #
-      # Custom CSS pseudo classes may also be defined.  To define
-      # custom pseudo classes, create a class and implement the custom
-      # pseudo class you want defined.  The first argument to the
-      # method will be the current matching NodeSet.  Any other
-      # arguments are ones that you pass in.  For example:
+      # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
+      # function.  To define custom pseudo classes, create a class and implement the custom pseudo
+      # class you want defined. The first argument to the method will be the matching context
+      # NodeSet. Any other arguments are ones that you pass in. For example:
       #
-      #   node.css('title:regex("\w+")', Class.new {
-      #     def regex node_set, regex
+      #   handler = Class.new {
+      #     def regex(node_set, regex)
       #       node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
       #     end
-      #   }.new)
+      #   }.new
+      #   node.css('title:regex("\w+")', handler)
+      #
+      # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
       #
-      # Note that the CSS query string is case-sensitive with regards
-      # to your document type. That is, if you're looking for "H1" in
-      # an HTML document, you'll never find anything, since HTML tags
-      # will match only lowercase CSS queries. However, "H1" might be
-      # found in an XML document, where tags names are case-sensitive
-      # (e.g., "H1" is distinct from "h1").
+      #   node.css('img > @href') # returns all +href+ attributes on an +img+ element
+      #   node.css('img / @href') # same
       #
+      #   # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
+      #   node.css('div @class')
+      #
+      #   node.css
+      #
+      # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
+      #
+      # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
+      # example:
+      #
+      #   # equivalent to 'li:nth-child(2)'
+      #   node.css('li[2]') # retrieve the second li element in a list
+      #
+      # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
+      # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
+      # you'll never find anything. However, "H1" might be found in an XML document, where tags
+      # names are case-sensitive (e.g., "H1" is distinct from "h1").
       def css(*args)
         rules, handler, ns, _ = extract_params(args)
 
@@ -147,18 +159,17 @@ def at_css(*args)
       #
       #   node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
       #
-      # Custom XPath functions may also be defined.  To define custom
-      # functions create a class and implement the function you want
-      # to define.  The first argument to the method will be the
-      # current matching NodeSet.  Any other arguments are ones that
-      # you pass in.  Note that this class may appear anywhere in the
-      # argument list.  For example:
+      # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
+      # implement the function you want to define. The first argument to the method will be the
+      # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
+      # class may appear anywhere in the argument list. For example:
       #
-      #   node.xpath('.//title[regex(., "\w+")]', Class.new {
-      #     def regex node_set, regex
+      #   handler = Class.new {
+      #     def regex(node_set, regex)
       #       node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
       #     end
-      #   }.new)
+      #   }.new
+      #   node.xpath('.//title[regex(., "\w+")]', handler)
       #
       def xpath(*args)
         paths, handler, ns, binds = extract_params(args)