From 22ab05060fdf0e4417e6c218245ec2e172cb39b5 Mon Sep 17 00:00:00 2001 From: Zeke Lu Date: Sat, 5 Jun 2021 23:39:23 +0800 Subject: [PATCH 1/5] use dom.ScrollIntoViewIfNeeded() to replace the js implementation --- input.go | 8 +------- js.go | 8 -------- query.go | 12 +----------- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/input.go b/input.go index c808b2be..64cdcf31 100644 --- a/input.go +++ b/input.go @@ -60,14 +60,8 @@ func MouseClickNode(n *cdp.Node, opts ...MouseOption) MouseAction { if t == nil { return ErrInvalidTarget } - frameID := t.enclosingFrame(n) - t.frameMu.RLock() - execCtx := t.execContexts[frameID] - t.frameMu.RUnlock() - var pos []float64 - err := evalInCtx(ctx, execCtx, snippet(scrollIntoViewJS, cashX(true), nil, n), &pos) - if err != nil { + if err := dom.ScrollIntoViewIfNeeded().WithNodeID(n.NodeID).Do(ctx); err != nil { return err } diff --git a/js.go b/js.go index f506733d..79179f34 100644 --- a/js.go +++ b/js.go @@ -35,14 +35,6 @@ const ( return true; })(%s)` - // scrollIntoViewJS is a javascript snippet that scrolls the specified node - // into the window's viewport (if needed), returning the actual window x/y - // after execution. - scrollIntoViewJS = `(function(a) { - a.scrollIntoViewIfNeeded(true); - return [window.scrollX, window.scrollY]; - })(%s)` - // submitJS is a javascript snippet that will call the containing form's // submit function, returning true or false if the call was successful. submitJS = `(function(a) { diff --git a/query.go b/query.go index 939c1d7e..abc71f31 100644 --- a/query.go +++ b/query.go @@ -1202,16 +1202,6 @@ func ScrollIntoView(sel interface{}, opts ...QueryOption) QueryAction { return fmt.Errorf("selector %q did not return any nodes", sel) } - var pos []float64 - err := evalInCtx(ctx, execCtx, snippet(scrollIntoViewJS, cashX(true), sel, nodes[0]), &pos) - if err != nil { - return err - } - - if pos == nil { - return fmt.Errorf("could not scroll into node %d", nodes[0].NodeID) - } - - return nil + return dom.ScrollIntoViewIfNeeded().WithNodeID(nodes[0].NodeID).Do(ctx) }, opts...) } From b8009bc4eed57fb979b03fe9922d2b58daefd0ac Mon Sep 17 00:00:00 2001 From: Zeke Lu Date: Mon, 7 Jun 2021 18:45:30 +0800 Subject: [PATCH 2/5] extract parseRemoteObject function and improve the doc --- eval.go | 69 ++++++++++++++++++++++++++++++++------------------------- poll.go | 17 +------------- 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/eval.go b/eval.go index 0a6fbed5..e538017d 100644 --- a/eval.go +++ b/eval.go @@ -15,26 +15,31 @@ type EvaluateAction Action // Evaluate is an action to evaluate the Javascript expression, unmarshaling // the result of the script evaluation to res. // -// When res is a type other than *[]byte, or **runtime.RemoteObject, -// then the result of the script evaluation will be returned "by value" (ie, +// When res is nil, the script result will be ignored. +// +// When res is a *[]byte, the raw JSON-encoded value of the script +// result will be placed in res. +// +// When res is a **runtime.RemoteObject, res will be set to the low-level +// protocol type, and no attempt will be made to convert the result. +// Original objects are maintained in memory until the page navigated or closed, +// unless they are either explicitly released or are released along with the +// other objects in their object group. runtime.ReleaseObject or +// runtime.ReleaseObjectGroup can be used to ask the browser to release +// original objects. +// +// For all other cases, the result of the script will be returned "by value" (ie, // JSON-encoded), and subsequently an attempt will be made to json.Unmarshal // the script result to res. It returns an error if the script result is // "undefined" in this case. // -// Otherwise, when res is a *[]byte, the raw JSON-encoded value of the script -// result will be placed in res. Similarly, if res is a **runtime.RemoteObject, -// then res will be set to the low-level protocol type, and no attempt will be -// made to convert the result. "undefined" is okay in this case. -// -// When res is nil, the script result will be ignored (including "undefined"). -// // Note: any exception encountered will be returned as an error. func Evaluate(expression string, res interface{}, opts ...EvaluateOption) EvaluateAction { return ActionFunc(func(ctx context.Context) error { // set up parameters p := runtime.Evaluate(expression) switch res.(type) { - case nil, **runtime.RemoteObject: + case **runtime.RemoteObject: default: p = p.WithReturnByValue(true) } @@ -53,30 +58,34 @@ func Evaluate(expression string, res interface{}, opts ...EvaluateOption) Evalua return exp } - if res == nil { - return nil - } + return parseRemoteObject(v, res) + }) +} - switch x := res.(type) { - case **runtime.RemoteObject: - *x = v - return nil +func parseRemoteObject(v *runtime.RemoteObject, res interface{}) error { + if res == nil { + return nil + } - case *[]byte: - *x = []byte(v.Value) - return nil - } + switch x := res.(type) { + case **runtime.RemoteObject: + *x = v + return nil - if v.Type == "undefined" { - // The unmarshal above would fail with the cryptic - // "unexpected end of JSON input" error, so try to give - // a better one here. - return fmt.Errorf("encountered an undefined value") - } + case *[]byte: + *x = v.Value + return nil + } - // unmarshal - return json.Unmarshal(v.Value, res) - }) + if v.Type == "undefined" { + // The unmarshal below would fail with the cryptic + // "unexpected end of JSON input" error, so try to give + // a better one here. + return fmt.Errorf("encountered an undefined value") + } + + // unmarshal + return json.Unmarshal(v.Value, res) } // EvaluateAsDevTools is an action that evaluates a Javascript expression as diff --git a/poll.go b/poll.go index d48bece6..dc689575 100644 --- a/poll.go +++ b/poll.go @@ -88,22 +88,7 @@ func (p *pollTask) Do(ctx context.Context) error { return ErrPollingTimeout } - // it's okay to discard the result. - if p.res == nil { - return nil - } - - switch x := p.res.(type) { - case **runtime.RemoteObject: - *x = v - return nil - - case *[]byte: - *x = v.Value - return nil - default: - return json.Unmarshal(v.Value, p.res) - } + return parseRemoteObject(v, p.res) } // Poll is a poll action that will wait for a general Javascript predicate. From dc8dd876f7db1477c011a883dea5cc0052f33ec7 Mon Sep 17 00:00:00 2001 From: Zeke Lu Date: Mon, 7 Jun 2021 18:47:49 +0800 Subject: [PATCH 3/5] encapsulate runtime.CallFunctionOn The advantages of runtime.CallFunctionOn are: 1. it's safe to pass arguments into js; 2. the js function can be bound to a remote object (accessed by "this"). So that we can use runtime.CallFunctionOn to run js function without constructing js dynamically. --- call.go | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 call.go diff --git a/call.go b/call.go new file mode 100644 index 00000000..4b709393 --- /dev/null +++ b/call.go @@ -0,0 +1,67 @@ +package chromedp + +import ( + "context" + + "github.com/chromedp/cdproto/runtime" +) + +// CallAction are actions that calls a Javascript function using +// runtime.CallFunctionOn. +type CallAction Action + +// CallFunctionOn is an action to call a Javascript function, unmarshaling +// the result of the function to res. +// +// The handling of res is the same as that of Evaluate. +// +// Do not call the following methods on runtime.CallFunctionOnParams: +// - WithReturnByValue: it will be set depending on the type of res; +// - WithArguments: pass the arguments with args instead. +// +// Note: any exception encountered will be returned as an error. +func CallFunctionOn(functionDeclaration string, res interface{}, opt CallOption, args ...interface{}) CallAction { + return ActionFunc(func(ctx context.Context) error { + // set up parameters + p := runtime.CallFunctionOn(functionDeclaration). + WithSilent(true) + + switch res.(type) { + case nil, **runtime.RemoteObject: + default: + p = p.WithReturnByValue(true) + } + + // apply opt + if opt != nil { + p = opt(p) + } + + // arguments + if len(args) > 0 { + ea := &errAppender{args: make([]*runtime.CallArgument, 0, len(args))} + for _, arg := range args { + ea.append(arg) + } + if ea.err != nil { + return ea.err + } + p = p.WithArguments(ea.args) + } + + // call + v, exp, err := p.Do(ctx) + if err != nil { + return err + } + if exp != nil { + return exp + } + + return parseRemoteObject(v, res) + }) +} + +// CallOption is a function to modify the runtime.CallFunctionOnParams +// to provide more information. +type CallOption = func(params *runtime.CallFunctionOnParams) *runtime.CallFunctionOnParams From 94cf82e7ca65fb7eac9ea893673649f3c6b328e7 Mon Sep 17 00:00:00 2001 From: Zeke Lu Date: Tue, 8 Jun 2021 13:28:09 +0800 Subject: [PATCH 4/5] refactor the poll action to use the encapsulated CallFunctionOn func --- call.go | 21 +++++++++++++++++++ poll.go | 62 ++++++++++++++++----------------------------------------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/call.go b/call.go index 4b709393..0f27619a 100644 --- a/call.go +++ b/call.go @@ -2,6 +2,7 @@ package chromedp import ( "context" + "encoding/json" "github.com/chromedp/cdproto/runtime" ) @@ -65,3 +66,23 @@ func CallFunctionOn(functionDeclaration string, res interface{}, opt CallOption, // CallOption is a function to modify the runtime.CallFunctionOnParams // to provide more information. type CallOption = func(params *runtime.CallFunctionOnParams) *runtime.CallFunctionOnParams + +// errAppender is to help accumulating the arguments and simplifying error checks. +// +// see https://blog.golang.org/errors-are-values +type errAppender struct { + args []*runtime.CallArgument + err error +} + +// append method calls the json.Marshal method to marshal the value and appends it to the slice. +// It records the first error for future reference. +// As soon as an error occurs, the append method becomes a no-op but the error value is saved. +func (ea *errAppender) append(v interface{}) { + if ea.err != nil { + return + } + var b []byte + b, ea.err = json.Marshal(v) + ea.args = append(ea.args, &runtime.CallArgument{Value: b}) +} diff --git a/poll.go b/poll.go index dc689575..f9307bc1 100644 --- a/poll.go +++ b/poll.go @@ -2,7 +2,6 @@ package chromedp import ( "context" - "encoding/json" "fmt" "time" @@ -55,40 +54,33 @@ func (p *pollTask) Do(ctx context.Context) error { t.frameMu.RUnlock() } - ea := &errAppender{args: make([]*runtime.CallArgument, 0, len(p.args)+3)} - ea.append(p.predicate) + args := make([]interface{}, 0, len(p.args)+3) + args = append(args, p.predicate) if p.interval > 0 { - ea.append(p.interval.Milliseconds()) + args = append(args, p.interval.Milliseconds()) } else { - ea.append(p.polling) + args = append(args, p.polling) } - ea.append(p.timeout.Milliseconds()) + args = append(args, p.timeout.Milliseconds()) for _, arg := range p.args { - ea.append(arg) - } - if ea.err != nil { - return ea.err + args = append(args, arg) } - v, exp, err := runtime.CallFunctionOn(waitForPredicatePageFunction). - WithExecutionContextID(execCtx). - WithReturnByValue(false). - WithAwaitPromise(true). - WithUserGesture(true). - WithArguments(ea.args). - Do(ctx) - if err != nil { - return err - } - if exp != nil { - return exp - } + err := CallFunctionOn(waitForPredicatePageFunction, p.res, + func(p *runtime.CallFunctionOnParams) *runtime.CallFunctionOnParams { + return p.WithExecutionContextID(execCtx). + WithAwaitPromise(true). + WithUserGesture(true) + }, + args..., + ).Do(ctx) - if v.Type == "undefined" { + // FIXME: sentinel error? + if err != nil && err.Error() == "encountered an undefined value" { return ErrPollingTimeout } - return parseRemoteObject(v, p.res) + return err } // Poll is a poll action that will wait for a general Javascript predicate. @@ -188,23 +180,3 @@ func WithPollingArgs(args ...interface{}) PollOption { w.args = args } } - -// errAppender is to help accumulating the arguments and simplifying error checks. -// -// see https://blog.golang.org/errors-are-values -type errAppender struct { - args []*runtime.CallArgument - err error -} - -// append method calls the json.Marshal method to marshal the value and appends it to the slice. -// It records the first error for future reference. -// As soon as an error occurs, the append method becomes a no-op but the error value is saved. -func (ea *errAppender) append(v interface{}) { - if ea.err != nil { - return - } - var b []byte - b, ea.err = json.Marshal(v) - ea.args = append(ea.args, &runtime.CallArgument{Value: b}) -} From 0de4f6033f8b8f68ebcfaffdf26797c2a630b5d9 Mon Sep 17 00:00:00 2001 From: Zeke Lu Date: Fri, 4 Jun 2021 19:23:37 +0800 Subject: [PATCH 5/5] replace Evaluate with CallFunctionOn to execuate js on node Note: there is a performance cost here. Before the changes, just one CDP command is sent: - Runtime.evaluate After the changes, three CDP command are sent: - DOM.resolveNode: to get the RemoteObjectId from NodeId - Runtime.callFunctionOn - Runtime.releaseObject --- js.go | 122 ++++++++++++++++++------------------------------------- query.go | 50 ++++++++++++++--------- 2 files changed, 69 insertions(+), 103 deletions(-) diff --git a/js.go b/js.go index 79179f34..39b5b50f 100644 --- a/js.go +++ b/js.go @@ -1,89 +1,76 @@ package chromedp -import ( - "fmt" - - "github.com/chromedp/cdproto/cdp" -) - const ( - // textJS is a javascript snippet that returns the concatenated innerText of all - // visible (ie, offsetWidth || offsetHeight || getClientRects().length ) children. - textJS = `(function(a) { - var s = ''; - for (var i = 0; i < a.length; i++) { - if (a[i].offsetWidth || a[i].offsetHeight || a[i].getClientRects().length) { - s += a[i].innerText; - } + // textJS is a javascript snippet that returns the innerText of the specified + // visible (ie, offsetWidth || offsetHeight || getClientRects().length ) element. + textJS = `function text() { + if (this.offsetWidth || this.offsetHeight || this.getClientRects().length) { + return this.innerText; } - return s; - })(%s)` + return ''; + }` - // textContentJS is a javascript snippet that returns the concatenated textContent - // of all children. - textContentJS = `(function(a) { - var s = ''; - for (var i = 0; i < a.length; i++) { - s += a[i].textContent; - } - return s; - })(%s)` + // textContentJS is a javascript snippet that returns the textContent of the + // specified element. + textContentJS = `function textContent() { + return this.textContent; + }` // blurJS is a javascript snippet that blurs the specified element. - blurJS = `(function(a) { - a.blur(); + blurJS = `function blur() { + this.blur(); return true; - })(%s)` + }` // submitJS is a javascript snippet that will call the containing form's // submit function, returning true or false if the call was successful. - submitJS = `(function(a) { - if (a.nodeName === 'FORM') { - HTMLFormElement.prototype.submit.call(a); + submitJS = `function submit() { + if (this.nodeName === 'FORM') { + HTMLFormElement.prototype.submit.call(this); return true; - } else if (a.form !== null) { - HTMLFormElement.prototype.submit.call(a.form); + } else if (this.form !== null) { + HTMLFormElement.prototype.submit.call(this.form); return true; } return false; - })(%s)` + }` // resetJS is a javascript snippet that will call the containing form's // reset function, returning true or false if the call was successful. - resetJS = `(function(a) { - if (a.nodeName === 'FORM') { - HTMLFormElement.prototype.reset.call(a); + resetJS = `function reset() { + if (this.nodeName === 'FORM') { + HTMLFormElement.prototype.reset.call(this); return true; - } else if (a.form !== null) { - HTMLFormElement.prototype.reset.call(a.form); + } else if (this.form !== null) { + HTMLFormElement.prototype.reset.call(this.form); return true; } return false; - })(%s)` + }` // attributeJS is a javascript snippet that returns the attribute of a specified // node. - attributeJS = `(function(a, n) { - return a[n]; - })(%s, %q)` + attributeJS = `function attribute(n) { + return this[n]; + }` // setAttributeJS is a javascript snippet that sets the value of the specified // node, and returns the value. - setAttributeJS = `(function(a, n, v) { - a[n] = v; + setAttributeJS = `function setAttribute(n, v) { + this[n] = v; if (n === 'value') { - a.dispatchEvent(new Event('input', { bubbles: true })); - a.dispatchEvent(new Event('change', { bubbles: true })); + this.dispatchEvent(new Event('input', { bubbles: true })); + this.dispatchEvent(new Event('change', { bubbles: true })); } - return a[n]; - })(%s, %q, %q)` + return this[n]; + }` // visibleJS is a javascript snippet that returns true or false depending on if // the specified node's offsetWidth, offsetHeight or getClientRects().length is // not null. - visibleJS = `(function(a) { - return Boolean( a.offsetWidth || a.offsetHeight || a.getClientRects().length ); - })(%s)` + visibleJS = `function visible() { + return Boolean( this.offsetWidth || this.offsetHeight || this.getClientRects().length ); + }` // waitForPredicatePageFunction is a javascript snippet that runs the polling in the // browser. It's copied from puppeteer. See @@ -168,34 +155,3 @@ const ( } }` ) - -// snippet builds a Javascript expression snippet. -func snippet(js string, f func(n *cdp.Node) string, sel interface{}, n *cdp.Node, v ...interface{}) string { - switch s := sel.(type) { - case *Selector: - if s != nil && s.raw { - return fmt.Sprintf(js, append([]interface{}{s.selAsString()}, v...)...) - } - } - return fmt.Sprintf(js, append([]interface{}{f(n)}, v...)...) -} - -// cashX returns the $x() expression using the node's full xpath value. -func cashX(flatten bool) func(*cdp.Node) string { - return func(n *cdp.Node) string { - if flatten { - return fmt.Sprintf(`$x(%q)[0]`, n.PartialXPath()) - } - return fmt.Sprintf(`$x(%q)`, n.PartialXPath()) - } -} - -// cashXNode returns the $x(/node()) expression using the node's full xpath value. -func cashXNode(flatten bool) func(*cdp.Node) string { - return func(n *cdp.Node) string { - if flatten { - return fmt.Sprintf(`$x(%q)[0]`, n.PartialXPath()+"/node()") - } - return fmt.Sprintf(`$x(%q)`, n.PartialXPath()+"/node()") - } -} diff --git a/query.go b/query.go index abc71f31..7e0900ed 100644 --- a/query.go +++ b/query.go @@ -34,7 +34,6 @@ type Selector struct { by func(context.Context, *cdp.Node) ([]cdp.NodeID, error) wait func(context.Context, *cdp.Frame, runtime.ExecutionContextID, ...cdp.NodeID) ([]*cdp.Node, error) after func(context.Context, runtime.ExecutionContextID, ...*cdp.Node) error - raw bool } // Query is a query action that queries the browser for specific element @@ -358,7 +357,6 @@ func BySearch(s *Selector) { // Note: Do not use with an untrusted selector value, as any defined selector // will be passed to runtime.Evaluate. func ByJSPath(s *Selector) { - s.raw = true ByFunc(func(ctx context.Context, n *cdp.Node) ([]cdp.NodeID, error) { // set up eval command p := runtime.Evaluate(s.selAsString()). @@ -425,15 +423,28 @@ func NodeReady(s *Selector) { WaitFunc(s.waitReady(nil))(s) } -func withContextID(id runtime.ExecutionContextID) EvaluateOption { - return func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithContextID(id) +func callFunctionOnNode(ctx context.Context, node *cdp.Node, function string, res interface{}, args ...interface{}) error { + r, err := dom.ResolveNode().WithNodeID(node.NodeID).Do(ctx) + if err != nil { + return err } -} + err = CallFunctionOn(function, &res, + func(p *runtime.CallFunctionOnParams) *runtime.CallFunctionOnParams { + return p.WithObjectID(r.ObjectID) + }, + args..., + ).Do(ctx) + + if err != nil { + return err + } + + // Try to release the remote object. + // It will fail if the page is navigated or closed, + // and it's okay to ignore the error in this case. + _ = runtime.ReleaseObject(r.ObjectID).Do(ctx) -func evalInCtx(ctx context.Context, execCtx runtime.ExecutionContextID, expression string, res interface{}, opts ...EvaluateOption) error { - allOpts := append([]EvaluateOption{withContextID(execCtx)}, opts...) - return EvaluateAsDevTools(expression, &res, allOpts...).Do(ctx) + return nil } // NodeVisible is an element query option to wait until all queried element @@ -452,7 +463,7 @@ func NodeVisible(s *Selector) { // check visibility var res bool - err = evalInCtx(ctx, execCtx, snippet(visibleJS, cashX(true), s, n), &res, withContextID(execCtx)) + err = callFunctionOnNode(ctx, n, visibleJS, &res) if err != nil { return err } @@ -479,7 +490,7 @@ func NodeNotVisible(s *Selector) { // check visibility var res bool - err = evalInCtx(ctx, execCtx, snippet(visibleJS, cashX(true), s, n), &res) + err = callFunctionOnNode(ctx, n, visibleJS, &res) if err != nil { return err } @@ -648,7 +659,7 @@ func Blur(sel interface{}, opts ...QueryOption) QueryAction { } var res bool - err := evalInCtx(ctx, execCtx, snippet(blurJS, cashX(true), sel, nodes[0]), &res) + err := callFunctionOnNode(ctx, nodes[0], blurJS, &res) if err != nil { return err } @@ -689,7 +700,7 @@ func Text(sel interface{}, text *string, opts ...QueryOption) QueryAction { return fmt.Errorf("selector %q did not return any nodes", sel) } - return evalInCtx(ctx, execCtx, snippet(textJS, cashX(false), sel, nodes[0]), text) + return callFunctionOnNode(ctx, nodes[0], textJS, text) }, opts...) } @@ -705,7 +716,7 @@ func TextContent(sel interface{}, text *string, opts ...QueryOption) QueryAction return fmt.Errorf("selector %q did not return any nodes", sel) } - return evalInCtx(ctx, execCtx, snippet(textContentJS, cashX(false), sel, nodes[0]), text) + return callFunctionOnNode(ctx, nodes[0], textContentJS, text) }, opts...) } @@ -931,11 +942,10 @@ func JavascriptAttribute(sel interface{}, name string, res interface{}, opts ... return fmt.Errorf("selector %q did not return any nodes", sel) } - if err := evalInCtx(ctx, execCtx, - snippet(attributeJS, cashX(true), sel, nodes[0], name), res, - ); err != nil { + if err := callFunctionOnNode(ctx, nodes[0], attributeJS, res, name); err != nil { return fmt.Errorf("could not retrieve attribute %q: %w", name, err) } + return nil }, opts...) } @@ -949,7 +959,7 @@ func SetJavascriptAttribute(sel interface{}, name, value string, opts ...QueryOp } var res string - err := evalInCtx(ctx, execCtx, snippet(setAttributeJS, cashX(true), sel, nodes[0], name, value), &res) + err := callFunctionOnNode(ctx, nodes[0], setAttributeJS, &res, name, value) if err != nil { return err } @@ -1109,7 +1119,7 @@ func Submit(sel interface{}, opts ...QueryOption) QueryAction { } var res bool - err := evalInCtx(ctx, execCtx, snippet(submitJS, cashX(true), sel, nodes[0]), &res) + err := callFunctionOnNode(ctx, nodes[0], submitJS, &res) if err != nil { return err } @@ -1131,7 +1141,7 @@ func Reset(sel interface{}, opts ...QueryOption) QueryAction { } var res bool - err := evalInCtx(ctx, execCtx, snippet(resetJS, cashX(true), sel, nodes[0]), &res) + err := callFunctionOnNode(ctx, nodes[0], resetJS, &res) if err != nil { return err }