From f0ed1e0e8111739efee969a6b053e03ebf30fe6f Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 08:20:52 -0500 Subject: [PATCH] fix: regression with XPath attributes in CSS selectors This commit removes "@" from the IDENT token so that we can create a new grammar rule in the parser for XPath attributes. Fixes #2419 --- lib/nokogiri/css/parser.rb | 516 +++++++++++++++++-------------- lib/nokogiri/css/parser.y | 12 + lib/nokogiri/css/tokenizer.rb | 4 +- lib/nokogiri/css/tokenizer.rex | 2 +- test/css/test_css_integration.rb | 42 +++ test/css/test_parser.rb | 8 + test/css/test_tokenizer.rb | 24 +- test/css/test_xpath_visitor.rb | 16 + 8 files changed, 377 insertions(+), 247 deletions(-) diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index 5d67e4f16a..3e7f6211e4 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true # # DO NOT MODIFY!!!! # This file is automatically generated by Racc 1.6.0 @@ -38,190 +39,204 @@ def unescape_css_string(str) ##### State transition tables begin ### racc_action_table = [ - 24, 93, 56, 57, 33, 55, 94, 23, 24, 22, - 12, 93, 33, 27, 35, 52, 44, 22, -23, 25, - 45, 98, 23, 33, 26, 18, 20, 25, 27, -23, - 23, 24, 26, 18, 20, 33, 27, 11, 39, 24, - 22, 23, 74, 33, 18, 91, 90, 27, 22, 12, - 25, 24, -23, 23, 85, 26, 18, 20, 25, 27, - 66, 23, 24, 26, 18, 20, 33, 27, 86, 88, - 51, 22, 89, 92, 24, 26, 56, 87, 95, 60, - 90, 25, 96, 46, 23, 49, 26, 18, 20, 99, - 27, 33, 33, 51, 103, 104, 56, 58, 26, 60, - 93, 106, 33, 33, 33, 109, 39, 39, 110, 23, - 23, nil, 18, 18, 20, 27, 27, 39, 39, 39, - 23, 23, 23, 18, 18, 18, 27, 27, 27, 33, - 33, 101, 100, nil, 102, 22, 56, 87, nil, 60, - 33, nil, nil, nil, 39, 39, nil, 23, 23, nil, - 18, 18, 20, 27, 27, 39, 82, 83, 23, 56, - 87, 18, 60, nil, 27, 82, 83, 78, 79, 80, - nil, 81, nil, nil, nil, 77, 78, 79, 80, nil, - 81, 4, 5, 10, 77, 4, 5, 43, nil, 56, - 87, 6, 60, 8, 7, 6, nil, 8, 7, 4, - 5, 10, nil, nil, nil, nil, nil, nil, nil, 6, - nil, 8, 7 ] + 27, 11, 38, 99, 36, 12, 40, 26, 48, 25, + 49, 27, 100, 12, 30, 36, 105, 99, -26, 28, + 25, -26, 26, 27, 29, 14, 21, 23, 80, 30, + 28, 36, 72, 26, -26, 29, 14, 21, 23, 27, + 30, 91, 56, 36, 97, 96, 43, 29, 25, 26, + 27, 92, 94, 21, 36, 95, 30, 98, 28, 25, + 101, 26, 102, 29, 14, 21, 23, 96, 30, 28, + 36, 36, 26, 103, 29, 14, 21, 23, 27, 30, + 108, 107, 36, 109, 106, 43, 43, 25, 26, 26, + 27, 110, 21, 21, 111, 30, 30, 28, 99, 50, + 26, 53, 29, 14, 21, 23, 36, 30, 36, 56, + 61, 64, 113, 66, 29, 14, 116, 36, 118, 36, + nil, 43, nil, 43, 26, nil, 26, 14, 21, 23, + 21, 30, 43, 30, 43, 26, nil, 26, 36, 21, + 36, 21, 30, 25, 30, nil, nil, nil, nil, nil, + nil, 61, 62, 43, 60, 43, 26, nil, 26, nil, + 21, 23, 21, 30, 57, 30, 88, 89, 14, nil, + nil, 88, 89, nil, nil, nil, nil, 84, 85, 86, + nil, 87, 84, 85, 86, 83, 87, nil, 61, 93, + 83, 66, 61, 93, nil, 66, 61, 93, nil, 66, + 61, 93, nil, 66, nil, 14, nil, 61, 93, 14, + 66, nil, nil, 14, nil, nil, nil, 14, 4, 5, + 10, nil, nil, nil, 14, 4, 5, 47, 6, nil, + 8, 7, 4, 5, 10, 6, nil, 8, 7, nil, + nil, nil, 6, nil, 8, 7 ] racc_action_check = [ - 3, 58, 24, 24, 3, 24, 57, 15, 9, 3, - 64, 57, 9, 15, 11, 24, 18, 9, 58, 3, - 21, 64, 3, 14, 3, 3, 3, 9, 3, 22, - 9, 12, 9, 9, 9, 12, 9, 1, 14, 42, - 12, 14, 45, 42, 14, 55, 55, 14, 42, 1, - 12, 27, 46, 12, 49, 12, 12, 12, 42, 12, - 27, 42, 43, 42, 42, 42, 43, 42, 50, 53, - 27, 43, 54, 56, 23, 27, 51, 51, 59, 51, - 60, 43, 61, 23, 43, 23, 43, 43, 43, 75, - 43, 28, 25, 23, 84, 86, 25, 25, 23, 25, - 87, 91, 29, 30, 31, 94, 28, 25, 106, 28, - 25, nil, 28, 25, 25, 28, 25, 29, 30, 31, - 29, 30, 31, 29, 30, 31, 29, 30, 31, 32, - 39, 76, 76, nil, 76, 39, 90, 90, nil, 90, - 62, nil, nil, nil, 32, 39, nil, 32, 39, nil, - 32, 39, 39, 32, 39, 62, 47, 47, 62, 92, - 92, 62, 92, nil, 62, 48, 48, 47, 47, 47, - nil, 47, nil, nil, nil, 47, 48, 48, 48, nil, - 48, 0, 0, 0, 48, 17, 17, 17, nil, 93, - 93, 0, 93, 0, 0, 17, nil, 17, 17, 26, - 26, 26, nil, nil, nil, nil, nil, nil, nil, 26, - nil, 26, 26 ] + 3, 1, 11, 64, 3, 70, 14, 17, 21, 3, + 24, 9, 62, 1, 17, 9, 70, 62, 25, 3, + 9, 64, 3, 30, 3, 3, 3, 3, 49, 3, + 9, 16, 30, 9, 50, 9, 9, 9, 9, 12, + 9, 53, 30, 12, 60, 60, 16, 30, 12, 16, + 46, 54, 58, 16, 46, 59, 16, 61, 12, 46, + 63, 12, 65, 12, 12, 12, 12, 66, 12, 46, + 31, 32, 46, 67, 46, 46, 46, 46, 47, 46, + 82, 82, 47, 82, 81, 31, 32, 47, 31, 32, + 26, 90, 31, 32, 92, 31, 32, 47, 93, 26, + 47, 26, 47, 47, 47, 47, 28, 47, 33, 26, + 28, 28, 97, 28, 26, 26, 100, 34, 113, 35, + nil, 28, nil, 33, 28, nil, 33, 28, 28, 28, + 33, 28, 34, 33, 35, 34, nil, 35, 43, 34, + 68, 35, 34, 43, 35, nil, nil, nil, nil, nil, + nil, 27, 27, 43, 27, 68, 43, nil, 68, nil, + 43, 43, 68, 43, 27, 68, 51, 51, 27, nil, + nil, 52, 52, nil, nil, nil, nil, 51, 51, 51, + nil, 51, 52, 52, 52, 51, 52, nil, 56, 56, + 52, 56, 96, 96, nil, 96, 98, 98, nil, 98, + 99, 99, nil, 99, nil, 56, nil, 101, 101, 96, + 101, nil, nil, 98, nil, nil, nil, 99, 0, 0, + 0, nil, nil, nil, 101, 20, 20, 20, 0, nil, + 0, 0, 29, 29, 29, 20, nil, 20, 20, nil, + nil, nil, 29, nil, 29, 29 ] racc_action_pointer = [ - 174, 37, nil, -2, nil, nil, nil, nil, nil, 6, - nil, 14, 29, nil, 17, -17, nil, 178, 5, nil, - nil, -9, 0, 72, -8, 86, 192, 49, 85, 96, - 97, 98, 123, nil, nil, nil, nil, nil, nil, 124, - nil, nil, 37, 60, nil, 31, 23, 153, 162, 29, - 39, 66, nil, 46, 49, 34, 61, -1, -11, 55, - 68, 59, 134, nil, -2, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 64, 121, nil, nil, nil, - nil, nil, nil, nil, 69, nil, 84, 88, nil, nil, - 126, 94, 149, 179, 92, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 95, nil, nil, nil, - nil ] + 211, 1, nil, -2, nil, nil, nil, nil, nil, 9, + nil, 2, 37, nil, -5, nil, 25, -17, nil, nil, + 218, -3, nil, nil, -20, -12, 88, 141, 100, 225, + 21, 64, 65, 102, 111, 113, nil, nil, nil, nil, + nil, nil, nil, 132, nil, nil, 48, 76, nil, 17, + 4, 163, 168, 16, 21, nil, 178, nil, 29, 32, + 33, 45, 5, 48, -9, 39, 55, 50, 134, nil, + -7, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 59, 70, nil, nil, nil, nil, nil, nil, nil, + 66, nil, 83, 86, nil, nil, 182, 105, 186, 190, + 103, 197, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, 105, nil, nil, nil, nil, nil ] racc_action_default = [ - -75, -76, -2, -24, -4, -5, -6, -7, -8, -24, - -74, -76, -24, -3, -47, -10, -13, -17, -76, -19, - -20, -76, -22, -24, -76, -24, -75, -76, -53, -54, - -55, -56, -57, -58, -14, 111, -1, -9, -46, -24, - -11, -12, -24, -24, -18, -76, -29, -62, -62, -76, - -76, -76, -30, -76, -76, -38, -39, -40, -22, -76, - -38, -76, -71, -73, -76, -44, -45, -48, -49, -50, - -51, -52, -15, -16, -21, -76, -76, -63, -64, -65, - -66, -67, -68, -69, -76, -27, -76, -40, -31, -32, - -76, -43, -76, -76, -76, -33, -70, -72, -34, -25, - -59, -60, -61, -26, -28, -35, -76, -36, -37, -42, - -41 ] + -81, -82, -2, -27, -4, -5, -6, -7, -8, -27, + -80, -82, -27, -3, -82, -10, -53, -12, -15, -16, + -20, -82, -22, -23, -82, -25, -27, -82, -27, -81, + -82, -59, -60, -61, -62, -63, -64, -17, 119, -1, + -9, -11, -52, -27, -13, -14, -27, -27, -21, -82, + -32, -68, -68, -82, -82, -33, -82, -34, -82, -82, + -43, -44, -45, -46, -25, -82, -43, -82, -77, -79, + -82, -50, -51, -54, -55, -56, -57, -58, -18, -19, + -24, -82, -82, -69, -70, -71, -72, -73, -74, -75, + -82, -30, -82, -45, -35, -36, -82, -49, -82, -82, + -82, -82, -37, -76, -78, -38, -28, -65, -66, -67, + -29, -31, -39, -82, -40, -41, -48, -42, -47 ] racc_goto_table = [ - 53, 38, 13, 1, 42, 48, 62, 37, 34, 65, - 40, 36, 63, 75, 84, 67, 68, 69, 70, 71, - 62, 41, 50, 47, 54, nil, 63, nil, nil, 64, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 72, 73, nil, nil, nil, nil, nil, nil, 97, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 105, nil, 107, 108 ] + 58, 42, 13, 1, 46, 52, 15, 68, 37, 71, + 55, 39, 15, 69, 41, 15, 73, 74, 75, 76, + 77, 44, 68, 81, 90, 45, 54, 51, 69, 15, + 59, nil, 70, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 78, 79, nil, nil, 15, + 15, nil, nil, 104, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 112, + nil, 114, 115, nil, 117 ] racc_goto_check = [ - 18, 12, 2, 1, 5, 9, 7, 8, 2, 9, - 10, 2, 12, 17, 17, 12, 12, 12, 12, 12, - 7, 11, 15, 16, 19, nil, 12, nil, nil, 1, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 2, 2, nil, nil, nil, nil, nil, nil, 12, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 18, nil, 18, 18 ] + 20, 14, 2, 1, 5, 11, 6, 9, 2, 11, + 7, 2, 6, 14, 10, 6, 14, 14, 14, 14, + 14, 12, 9, 19, 19, 13, 17, 18, 14, 6, + 21, nil, 1, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 2, 2, nil, nil, 6, + 6, nil, nil, 14, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 20, + nil, 20, 20, nil, 20 ] racc_goto_pointer = [ - nil, 3, -1, nil, nil, -13, nil, -19, -7, -18, - -5, 6, -13, nil, nil, -1, 0, -34, -24, 0, - nil, nil, nil, nil ] + nil, 3, -1, nil, nil, -16, 3, -16, nil, -21, + -2, -21, 4, 8, -15, nil, nil, 0, 1, -28, + -27, 3, nil, nil, nil, nil ] racc_goto_default = [ - nil, nil, nil, 2, 3, 9, 17, 14, nil, 15, - 31, 30, 16, 29, 19, 21, nil, nil, 59, nil, - 28, 32, 76, 61 ] + nil, nil, nil, 2, 3, 9, 63, 19, 20, 16, + nil, 17, 34, 33, 18, 32, 22, 24, nil, nil, + 65, nil, 31, 35, 82, 67 ] racc_reduce_table = [ 0, 0, :racc_error, - 3, 32, :_reduce_1, - 1, 32, :_reduce_2, - 2, 32, :_reduce_3, - 1, 36, :_reduce_4, - 1, 36, :_reduce_5, - 1, 36, :_reduce_6, - 1, 36, :_reduce_7, - 1, 36, :_reduce_8, - 2, 37, :_reduce_9, - 1, 37, :_reduce_none, - 2, 37, :_reduce_11, - 2, 37, :_reduce_12, - 1, 37, :_reduce_13, - 2, 34, :_reduce_14, - 3, 33, :_reduce_15, - 3, 33, :_reduce_16, - 1, 33, :_reduce_none, - 2, 44, :_reduce_18, - 1, 38, :_reduce_none, - 1, 38, :_reduce_20, - 3, 45, :_reduce_21, - 1, 45, :_reduce_22, - 1, 46, :_reduce_23, - 0, 46, :_reduce_none, - 4, 42, :_reduce_25, - 4, 42, :_reduce_26, - 3, 42, :_reduce_27, - 3, 47, :_reduce_28, - 1, 47, :_reduce_29, - 2, 40, :_reduce_30, - 3, 40, :_reduce_31, - 3, 40, :_reduce_32, - 3, 40, :_reduce_33, - 3, 40, :_reduce_34, - 3, 49, :_reduce_35, - 3, 49, :_reduce_36, - 3, 49, :_reduce_37, - 1, 49, :_reduce_none, - 1, 49, :_reduce_none, - 1, 49, :_reduce_40, - 4, 50, :_reduce_41, - 3, 50, :_reduce_42, - 2, 50, :_reduce_43, - 2, 41, :_reduce_44, - 2, 41, :_reduce_45, - 1, 39, :_reduce_none, - 0, 39, :_reduce_none, - 2, 43, :_reduce_48, - 2, 43, :_reduce_49, - 2, 43, :_reduce_50, - 2, 43, :_reduce_51, - 2, 43, :_reduce_52, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 51, :_reduce_58, - 2, 48, :_reduce_59, - 2, 48, :_reduce_60, - 2, 48, :_reduce_61, - 0, 48, :_reduce_none, - 1, 53, :_reduce_63, - 1, 53, :_reduce_64, - 1, 53, :_reduce_65, - 1, 53, :_reduce_66, - 1, 53, :_reduce_67, - 1, 53, :_reduce_68, - 1, 53, :_reduce_69, - 3, 52, :_reduce_70, - 1, 54, :_reduce_none, - 2, 54, :_reduce_none, - 1, 54, :_reduce_none, - 1, 35, :_reduce_none, - 0, 35, :_reduce_none ] - -racc_reduce_n = 76 - -racc_shift_n = 111 + 3, 33, :_reduce_1, + 1, 33, :_reduce_2, + 2, 33, :_reduce_3, + 1, 37, :_reduce_4, + 1, 37, :_reduce_5, + 1, 37, :_reduce_6, + 1, 37, :_reduce_7, + 1, 37, :_reduce_8, + 2, 38, :_reduce_9, + 1, 39, :_reduce_10, + 2, 40, :_reduce_11, + 1, 40, :_reduce_none, + 2, 40, :_reduce_13, + 2, 40, :_reduce_14, + 1, 40, :_reduce_15, + 1, 40, :_reduce_none, + 2, 35, :_reduce_17, + 3, 34, :_reduce_18, + 3, 34, :_reduce_19, + 1, 34, :_reduce_none, + 2, 47, :_reduce_21, + 1, 41, :_reduce_none, + 1, 41, :_reduce_23, + 3, 48, :_reduce_24, + 1, 48, :_reduce_25, + 1, 49, :_reduce_26, + 0, 49, :_reduce_none, + 4, 45, :_reduce_28, + 4, 45, :_reduce_29, + 3, 45, :_reduce_30, + 3, 50, :_reduce_31, + 1, 50, :_reduce_32, + 1, 50, :_reduce_none, + 2, 43, :_reduce_34, + 3, 43, :_reduce_35, + 3, 43, :_reduce_36, + 3, 43, :_reduce_37, + 3, 43, :_reduce_38, + 3, 52, :_reduce_39, + 3, 52, :_reduce_40, + 3, 52, :_reduce_41, + 3, 52, :_reduce_42, + 1, 52, :_reduce_none, + 1, 52, :_reduce_none, + 1, 52, :_reduce_45, + 1, 52, :_reduce_none, + 4, 53, :_reduce_47, + 3, 53, :_reduce_48, + 2, 53, :_reduce_49, + 2, 44, :_reduce_50, + 2, 44, :_reduce_51, + 1, 42, :_reduce_none, + 0, 42, :_reduce_none, + 2, 46, :_reduce_54, + 2, 46, :_reduce_55, + 2, 46, :_reduce_56, + 2, 46, :_reduce_57, + 2, 46, :_reduce_58, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 54, :_reduce_64, + 2, 51, :_reduce_65, + 2, 51, :_reduce_66, + 2, 51, :_reduce_67, + 0, 51, :_reduce_none, + 1, 56, :_reduce_69, + 1, 56, :_reduce_70, + 1, 56, :_reduce_71, + 1, 56, :_reduce_72, + 1, 56, :_reduce_73, + 1, 56, :_reduce_74, + 1, 56, :_reduce_75, + 3, 55, :_reduce_76, + 1, 57, :_reduce_none, + 2, 57, :_reduce_none, + 1, 57, :_reduce_none, + 1, 36, :_reduce_none, + 0, 36, :_reduce_none ] + +racc_reduce_n = 82 + +racc_shift_n = 119 racc_token_table = { false => 0, @@ -251,12 +266,13 @@ def unescape_css_string(str) :LSQUARE => 24, :RSQUARE => 25, :HAS => 26, - "." => 27, - "*" => 28, - "|" => 29, - ":" => 30 } + "@" => 27, + "." => 28, + "*" => 29, + "|" => 30, + ":" => 31 } -racc_nt_base = 31 +racc_nt_base = 32 racc_use_result_var = true @@ -304,6 +320,7 @@ def unescape_css_string(str) "LSQUARE", "RSQUARE", "HAS", + "\"@\"", "\".\"", "\"*\"", "\"|\"", @@ -314,6 +331,8 @@ def unescape_css_string(str) "prefixless_combinator_selector", "optional_S", "combinator", + "xpath_attribute_name", + "xpath_attribute", "simple_selector", "element_name", "hcap_0toN", @@ -381,6 +400,16 @@ def _reduce_8(val, _values, result) end def _reduce_9(val, _values, result) + result = val.join + result +end + +def _reduce_10(val, _values, result) + result = Node.new(:ATTRIB_NAME, [val[0]]) + result +end + +def _reduce_11(val, _values, result) result = if val[1].nil? val[0] else @@ -390,21 +419,21 @@ def _reduce_9(val, _values, result) result end -# reduce 10 omitted +# reduce 12 omitted -def _reduce_11(val, _values, result) +def _reduce_13(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, val) result end -def _reduce_12(val, _values, result) +def _reduce_14(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, val) result end -def _reduce_13(val, _values, result) +def _reduce_15(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]] ) @@ -412,39 +441,41 @@ def _reduce_13(val, _values, result) result end -def _reduce_14(val, _values, result) +# reduce 16 omitted + +def _reduce_17(val, _values, result) result = Node.new(val[0], [nil, val[1]]) result end -def _reduce_15(val, _values, result) +def _reduce_18(val, _values, result) result = Node.new(val[1], [val[0], val[2]]) result end -def _reduce_16(val, _values, result) +def _reduce_19(val, _values, result) result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) result end -# reduce 17 omitted +# reduce 20 omitted -def _reduce_18(val, _values, result) +def _reduce_21(val, _values, result) result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) result end -# reduce 19 omitted +# reduce 22 omitted -def _reduce_20(val, _values, result) +def _reduce_23(val, _values, result) result = Node.new(:ELEMENT_NAME, val) result end -def _reduce_21(val, _values, result) +def _reduce_24(val, _values, result) result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')] ) @@ -452,21 +483,21 @@ def _reduce_21(val, _values, result) result end -def _reduce_22(val, _values, result) +def _reduce_25(val, _values, result) name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0] result = Node.new(:ELEMENT_NAME, [name]) result end -def _reduce_23(val, _values, result) +def _reduce_26(val, _values, result) result = val[0] result end -# reduce 24 omitted +# reduce 27 omitted -def _reduce_25(val, _values, result) +def _reduce_28(val, _values, result) result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []) ) @@ -474,7 +505,7 @@ def _reduce_25(val, _values, result) result end -def _reduce_26(val, _values, result) +def _reduce_29(val, _values, result) result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []) ) @@ -482,7 +513,7 @@ def _reduce_26(val, _values, result) result end -def _reduce_27(val, _values, result) +def _reduce_30(val, _values, result) # non-standard, from hpricot result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])] @@ -491,7 +522,7 @@ def _reduce_27(val, _values, result) result end -def _reduce_28(val, _values, result) +def _reduce_31(val, _values, result) result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')] ) @@ -499,7 +530,7 @@ def _reduce_28(val, _values, result) result end -def _reduce_29(val, _values, result) +def _reduce_32(val, _values, result) # Default namespace is not applied to attributes. # So we don't add prefix "xmlns:" as in namespaced_ident. result = Node.new(:ATTRIB_NAME, [val[0]]) @@ -507,56 +538,63 @@ def _reduce_29(val, _values, result) result end -def _reduce_30(val, _values, result) +# reduce 33 omitted + +def _reduce_34(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip]) result end -def _reduce_31(val, _values, result) +def _reduce_35(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_32(val, _values, result) +def _reduce_36(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_33(val, _values, result) +def _reduce_37(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_34(val, _values, result) +def _reduce_38(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_35(val, _values, result) +def _reduce_39(val, _values, result) result = [val[0], val[2]] result end -def _reduce_36(val, _values, result) +def _reduce_40(val, _values, result) result = [val[0], val[2]] result end -def _reduce_37(val, _values, result) +def _reduce_41(val, _values, result) result = [val[0], val[2]] result end -# reduce 38 omitted +def _reduce_42(val, _values, result) + result = [val[0], val[2]] + result +end -# reduce 39 omitted +# reduce 43 omitted -def _reduce_40(val, _values, result) +# reduce 44 omitted + +def _reduce_45(val, _values, result) case val[0] when 'even' result = Node.new(:NTH, ['2','n','+','0']) @@ -571,7 +609,9 @@ def _reduce_40(val, _values, result) result end -def _reduce_41(val, _values, result) +# reduce 46 omitted + +def _reduce_47(val, _values, result) if val[1] == 'n' result = Node.new(:NTH, val) else @@ -581,7 +621,7 @@ def _reduce_41(val, _values, result) result end -def _reduce_42(val, _values, result) +def _reduce_48(val, _values, result) # n+3, -n+3 if val[0] == 'n' val.unshift("1") @@ -597,7 +637,7 @@ def _reduce_42(val, _values, result) result end -def _reduce_43(val, _values, result) +def _reduce_49(val, _values, result) # 5n, -5n, 10n-1 n = val[1] if n[0, 2] == 'n-' @@ -617,133 +657,133 @@ def _reduce_43(val, _values, result) result end -def _reduce_44(val, _values, result) +def _reduce_50(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -def _reduce_45(val, _values, result) +def _reduce_51(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -# reduce 46 omitted +# reduce 52 omitted -# reduce 47 omitted +# reduce 53 omitted -def _reduce_48(val, _values, result) +def _reduce_54(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_49(val, _values, result) +def _reduce_55(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_50(val, _values, result) +def _reduce_56(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_51(val, _values, result) +def _reduce_57(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_52(val, _values, result) +def _reduce_58(val, _values, result) result = Node.new(:COMBINATOR, val) result end -# reduce 53 omitted +# reduce 59 omitted -# reduce 54 omitted +# reduce 60 omitted -# reduce 55 omitted +# reduce 61 omitted -# reduce 56 omitted +# reduce 62 omitted -# reduce 57 omitted +# reduce 63 omitted -def _reduce_58(val, _values, result) +def _reduce_64(val, _values, result) result = Node.new(:ID, [unescape_css_identifier(val[0])]) result end -def _reduce_59(val, _values, result) +def _reduce_65(val, _values, result) result = [val[0], unescape_css_identifier(val[1])] result end -def _reduce_60(val, _values, result) +def _reduce_66(val, _values, result) result = [val[0], unescape_css_string(val[1])] result end -def _reduce_61(val, _values, result) +def _reduce_67(val, _values, result) result = [val[0], val[1]] result end -# reduce 62 omitted +# reduce 68 omitted -def _reduce_63(val, _values, result) +def _reduce_69(val, _values, result) result = :equal result end -def _reduce_64(val, _values, result) +def _reduce_70(val, _values, result) result = :prefix_match result end -def _reduce_65(val, _values, result) +def _reduce_71(val, _values, result) result = :suffix_match result end -def _reduce_66(val, _values, result) +def _reduce_72(val, _values, result) result = :substring_match result end -def _reduce_67(val, _values, result) +def _reduce_73(val, _values, result) result = :not_equal result end -def _reduce_68(val, _values, result) +def _reduce_74(val, _values, result) result = :includes result end -def _reduce_69(val, _values, result) +def _reduce_75(val, _values, result) result = :dash_match result end -def _reduce_70(val, _values, result) +def _reduce_76(val, _values, result) result = Node.new(:NOT, [val[1]]) result end -# reduce 71 omitted +# reduce 77 omitted -# reduce 72 omitted +# reduce 78 omitted -# reduce 73 omitted +# reduce 79 omitted -# reduce 74 omitted +# reduce 80 omitted -# reduce 75 omitted +# reduce 81 omitted def _reduce_none(val, _values, result) val[0] diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index 6f1733ffe9..69a26e8332 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -21,6 +21,14 @@ rule | SLASH { result = :CHILD_SELECTOR } ; + xpath_attribute_name: + '@' IDENT { result = val.join } + ; + + xpath_attribute: + xpath_attribute_name { result = Node.new(:ATTRIB_NAME, [val[0]]) } + ; + simple_selector: element_name hcap_0toN { result = if val[1].nil? @@ -41,6 +49,7 @@ rule [Node.new(:ELEMENT_NAME, ['*']), val[0]] ) } + | xpath_attribute ; prefixless_combinator_selector: @@ -115,6 +124,7 @@ rule # So we don't add prefix "xmlns:" as in namespaced_ident. result = Node.new(:ATTRIB_NAME, [val[0]]) } + | xpath_attribute ; function: @@ -139,6 +149,7 @@ rule NUMBER COMMA expr { result = [val[0], val[2]] } | STRING COMMA expr { result = [val[0], val[2]] } | IDENT COMMA expr { result = [val[0], val[2]] } + | xpath_attribute_name COMMA expr { result = [val[0], val[2]] } | NUMBER | STRING | IDENT { @@ -153,6 +164,7 @@ rule result = val end } + | xpath_attribute_name ; nth: diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb index d173bc2c38..d2a5b2e7c5 100644 --- a/lib/nokogiri/css/tokenizer.rb +++ b/lib/nokogiri/css/tokenizer.rb @@ -63,10 +63,10 @@ def _next_token when (text = @ss.scan(/has\([\s]*/)) action { [:HAS, text] } - when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/)) action { [:FUNCTION, text] } - when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/)) action { [:IDENT, text] } when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/)) diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex index 7db1d0e156..10fdf76cca 100644 --- a/lib/nokogiri/css/tokenizer.rex +++ b/lib/nokogiri/css/tokenizer.rex @@ -13,7 +13,7 @@ macro escape {unicode}|\\[^\n\r\f0-9A-Fa-f] nmchar [_A-Za-z0-9-]|{nonascii}|{escape} nmstart [_A-Za-z]|{nonascii}|{escape} - ident [-@]?({nmstart})({nmchar})* + ident -?({nmstart})({nmchar})* name ({nmchar})+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(? +
+ +
+
+
+
+ + HTML + + result = doc.css("div > @class") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div/@class") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div @class") + assert_equal(["first", "child", "second", "third", "fourth"], result.map(&:to_s)) + end + + it "handles xpath functions" do + doc = subject_class.parse(<<~HTML) + +
firstchild
+
second
+
third
+
fourth
+ + HTML + + result = doc.css("div > text()") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div/text()") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div text()") + assert_equal(["first", "child", "second", "third", "fourth"], result.map(&:to_s)) + end end end end diff --git a/test/css/test_parser.rb b/test/css/test_parser.rb index ac2f9ae20c..9c3175002a 100644 --- a/test/css/test_parser.rb +++ b/test/css/test_parser.rb @@ -37,5 +37,13 @@ class TestNokogiri < Nokogiri::TestCase [:FUNCTION, ["nth-child("], ["2"]],],], asts.first.to_a ) end + + it "parses xpath attributes" do + ast = parser.parse("a/@href").first + assert_equal( + [:CHILD_SELECTOR, [:ELEMENT_NAME, ["a"]], [:ATTRIB_NAME, ["@href"]]], + ast.to_a + ) + end end end diff --git a/test/css/test_tokenizer.rb b/test/css/test_tokenizer.rb index fa915ebd83..183a322e59 100644 --- a/test/css/test_tokenizer.rb +++ b/test/css/test_tokenizer.rb @@ -19,6 +19,14 @@ def setup @scanner = Nokogiri::CSS::Tokenizer.new end + def assert_tokens(tokens, scanner) + toks = [] + while (tok = @scanner.next_token) + toks << tok + end + assert_equal(tokens, toks) + end + def test_has @scanner.scan("a:has(b)") assert_tokens( @@ -189,12 +197,16 @@ def test_significant_space [:RSQUARE, "]"],], @scanner) end - def assert_tokens(tokens, scanner) - toks = [] - while (tok = @scanner.next_token) - toks << tok - end - assert_equal(tokens, toks) + def test_xpath_attributes + @scanner.scan("a/@href") + assert_tokens([[:IDENT, "a"], [:SLASH, "/"], ["@", "@"], [:IDENT, "href"]], + @scanner) + end + + def test_xpath_functions + @scanner.scan("a/text()") + assert_tokens([[:IDENT, "a"], [:SLASH, "/"], [:FUNCTION, "text("], [:RPAREN, ")"]], + @scanner) end end end diff --git a/test/css/test_xpath_visitor.rb b/test/css/test_xpath_visitor.rb index 751afb47e8..dd1e5dada1 100644 --- a/test/css/test_xpath_visitor.rb +++ b/test/css/test_xpath_visitor.rb @@ -371,6 +371,10 @@ def assert_xpath(expecteds, asts) it "handles text() (non-standard)" do assert_xpath("//a[child::text()]", parser.parse("a[text()]")) assert_xpath("//child::text()", parser.parse("text()")) + assert_xpath("//a//child::text()", parser.parse("a text()")) + assert_xpath("//a/child::text()", parser.parse("a / text()")) + assert_xpath("//a/child::text()", parser.parse("a > text()")) + assert_xpath("//a//child::text()", parser.parse("a text()")) end it "handles comment() (non-standard)" do @@ -615,6 +619,14 @@ def visit_pseudo_class_aaron(node) it "avoids the wildcard when using namespaces" do assert_xpath("//ns1:foo", parser.parse("ns1|foo")) end + + it "avoids the wildcard when using attribute selectors" do + if Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") + assert_xpath("//*:a/@href", parser.parse("a/@href")) + else + assert_xpath("//*[nokogiri-builtin:local-name-is('a')]/@href", parser.parse("a/@href")) + end + end end describe "builtins:never" do @@ -622,6 +634,10 @@ def visit_pseudo_class_aaron(node) it "matches on the element's local-name, ignoring namespaces" do assert_xpath("//*[local-name()='foo']", parser.parse("foo")) end + + it "avoids the wildcard when using attribute selectors" do + assert_xpath("//*[local-name()='a']/@href", parser.parse("a/@href")) + end end describe "builtins:optimal" do