Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use an xpath query handler #258

Merged
merged 1 commit into from Sep 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 0 additions & 43 deletions lib/puppeteer/dom_world.rb
Expand Up @@ -539,49 +539,6 @@ def add_binding_to_context(context, binding_function)
handle.as_element
end

# @param xpath [String]
# @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
# @param timeout [Integer]
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil, root: nil)
option_wait_for_visible = visible || false
option_wait_for_hidden = hidden || false
option_timeout = timeout || @timeout_settings.timeout
option_root = root

polling =
if option_wait_for_visible || option_wait_for_hidden
'raf'
else
'mutation'
end
title = "XPath #{xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"

xpath_predicate = make_predicate_string(
predicate_arg_def: '(root, selector, waitForVisible, waitForHidden)',
predicate_body: <<~JAVASCRIPT
const node = document.evaluate(selector, root, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
return checkWaitForOptions(node, waitForVisible, waitForHidden);
JAVASCRIPT
)

wait_task = Puppeteer::WaitTask.new(
dom_world: self,
predicate_body: xpath_predicate,
title: title,
polling: polling,
timeout: option_timeout,
args: [xpath, option_wait_for_visible, option_wait_for_hidden],
root: option_root,
)
handle = wait_task.await_promise
unless handle.as_element
handle.dispose
return nil
end
handle.as_element
end

# @param page_function [String]
# @param args [Array]
# @param polling [Integer|String]
Expand Down
39 changes: 9 additions & 30 deletions lib/puppeteer/element_handle.rb
Expand Up @@ -125,28 +125,14 @@ def wait_for_selector(selector, visible: nil, hidden: nil, timeout: nil)
# Defaults to `30000` (30 seconds). Pass `0` to disable timeout. The default
# value can be changed by using the {@link Page.setDefaultTimeout} method.
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
frame = @context.frame

secondary_world = frame.secondary_world
adopted_root = secondary_world.execution_context.adopt_element_handle(self)
param_xpath =
if xpath.start_with?('//')
".#{xpath}"
else
xpath
end
unless param_xpath.start_with?('.//')
adopted_root.dispose
raise ArgumentError.new("Unsupported xpath expression: #{xpath}")
end
handle = secondary_world.wait_for_xpath(param_xpath, visible: visible, hidden: hidden, timeout: timeout, root: adopted_root)
adopted_root.dispose
return nil unless handle

main_world = frame.main_world
result = main_world.execution_context.adopt_element_handle(handle)
handle.dispose
result
wait_for_selector("xpath/#{param_xpath}", visible: visible, hidden: hidden, timeout: timeout)
end

define_async_method :async_wait_for_xpath
Expand Down Expand Up @@ -623,21 +609,14 @@ def eval_on_selector_all(selector, page_function, *args)
# @param expression [String]
# @return [Array<ElementHandle>]
def Sx(expression)
fn = <<~JAVASCRIPT
(element, expression) => {
const document = element.ownerDocument || element;
const iterator = document.evaluate(expression, element, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
const array = [];
let item;
while ((item = iterator.iterateNext()))
array.push(item);
return array;
}
JAVASCRIPT
handles = evaluate_handle(fn, expression)
properties = handles.properties
handles.dispose
properties.values.map(&:as_element).compact
param_xpath =
if expression.start_with?('//')
".#{expression}"
else
expression
end

query_selector_all("xpath/#{param_xpath}")
end

define_async_method :async_Sx
Expand Down
25 changes: 16 additions & 9 deletions lib/puppeteer/frame.rb
Expand Up @@ -106,7 +106,14 @@ def query_selector(selector)
# @param {string} expression
# @return {!Promise<!Array<!Puppeteer.ElementHandle>>}
def Sx(expression)
@main_world.Sx(expression)
param_xpath =
if expression.start_with?('//')
".#{expression}"
else
expression
end

query_selector_all("xpath/#{param_xpath}")
end

define_async_method :async_Sx
Expand Down Expand Up @@ -274,14 +281,14 @@ def wait_for_timeout(milliseconds)
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
# @param timeout [Integer]
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
handle = @secondary_world.wait_for_xpath(xpath, visible: visible, hidden: hidden, timeout: timeout)
if !handle
return nil
end
main_execution_context = @main_world.execution_context
result = main_execution_context.adopt_element_handle(handle)
handle.dispose
result
param_xpath =
if xpath.start_with?('//')
".#{xpath}"
else
xpath
end

wait_for_selector("xpath/#{param_xpath}", visible: visible, hidden: hidden, timeout: timeout)
end

define_async_method :async_wait_for_xpath
Expand Down
35 changes: 35 additions & 0 deletions lib/puppeteer/query_handler_manager.rb
Expand Up @@ -6,6 +6,7 @@ class Puppeteer::QueryHandlerManager
def query_handlers
@query_handlers ||= {
aria: Puppeteer::AriaQueryHandler.new,
xpath: xpath_handler,
}
end

Expand All @@ -16,6 +17,40 @@ def query_handlers
)
end

private def xpath_handler
@xpath_handler ||= Puppeteer::CustomQueryHandler.new(
query_one: <<~JAVASCRIPT,
(element, selector) => {
const doc = element.ownerDocument || document;
const result = doc.evaluate(
selector,
element,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE
);
return result.singleNodeValue;
}
JAVASCRIPT
query_all: <<~JAVASCRIPT,
(element, selector) => {
const doc = element.ownerDocument || document;
const iterator = doc.evaluate(
selector,
element,
null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE
);
const array = [];
let item;
while ((item = iterator.iterateNext())) {
array.push(item);
}
return array;
}
JAVASCRIPT
)
end

class Result
def initialize(query_handler:, selector:)
@query_handler = query_handler
Expand Down
71 changes: 71 additions & 0 deletions spec/integration/query_handler_spec.rb
@@ -0,0 +1,71 @@
require 'spec_helper'

RSpec.describe 'Query handler tests' do
describe 'XPath selectors' do
describe 'in Page' do
it 'should query existing element' do
page.content = '<section>test</section>'
el = page.query_selector('xpath/html/body/section')
expect(el).to be_a(Puppeteer::ElementHandle)
expect(el.evaluate('el => el.textContent')).to eq('test')

elements = page.query_selector_all('xpath/html/body/section')
expect(elements.size).to eq(1)
el = elements.first
expect(el).to be_a(Puppeteer::ElementHandle)
expect(el.evaluate('el => el.textContent')).to eq('test')
end

it 'should return empty array for non-existing element' do
el = page.query_selector('xpath/html/body/non-existing-element')
expect(el).to be_nil

elements = page.query_selector_all('xpath/html/body/non-existing-element')
expect(elements).to be_empty
end

it 'should return first element' do
page.content = '<div>a</div><div>b</div>'

el = page.query_selector('xpath/html/body/div')
expect(el.evaluate('el => el.textContent')).to eq('a')
end

it 'should return multiple elements' do
page.content = '<div>a</div><div>b</div>'

elements = page.query_selector_all('xpath/html/body/div')
expect(elements.size).to eq(2)
expect(elements.first.evaluate('el => el.textContent')).to eq('a')
expect(elements.last.evaluate('el => el.textContent')).to eq('b')
end
end

describe 'in ElementHandles' do
it 'should query existing element' do
page.content = '<span>outer</span><div class="a">a<span>inner</span></div>'
div = page.query_selector('div')
el = div.query_selector('xpath/span')
expect(el).to be_a(Puppeteer::ElementHandle)
expect(el.evaluate('el => el.textContent')).to eq('inner')

elements = div.query_selector_all('xpath/span')
expect(elements.size).to eq(1)
el = elements.first
expect(el).to be_a(Puppeteer::ElementHandle)
expect(el.evaluate('el => el.textContent')).to eq('inner')
end

it 'should return null for non-existing element' do
page.content = '<div class="a">a</div>'

div = page.query_selector('div')
el = div.query_selector('xpath/div')
expect(el).to be_nil

elements = div.query_selector_all('xpath/html/body/div')
expect(elements).to be_empty
end
end
end
end