Skip to content

Commit

Permalink
Adding Dump, DumpTo, and Populate
Browse files Browse the repository at this point in the history
Adds the Dump and DumpTo query actions, and a Populate query option to provide
a reusable way to dump the node tree.

Additionally simplifies the package dump examples.
  • Loading branch information
kenshaw committed Feb 4, 2024
1 parent 42f356b commit ebf842c
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 26 deletions.
2 changes: 1 addition & 1 deletion LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2016-2023 Kenneth Shaw
Copyright (c) 2016-2024 Kenneth Shaw

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
77 changes: 60 additions & 17 deletions example_test.go
Expand Up @@ -13,13 +13,14 @@ import (
"strings"
"time"

"github.com/chromedp/chromedp"
"github.com/chromedp/chromedp/device"

"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
"github.com/chromedp/chromedp"
"github.com/chromedp/chromedp/device"
)

func writeHTML(content string) http.Handler {
Expand Down Expand Up @@ -517,14 +518,63 @@ func ExampleFromNode() {
// Nested query from the document root: inner content
}

func Example_dump() {
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()

ts := httptest.NewServer(writeHTML(`<!doctype html>
<html>
<body>
<div id="content" style="display:block;">the content</div>
</body>
</html>`))
defer ts.Close()

const expr = `(function(d, id, v) {
var b = d.querySelector('body');
var el = d.createElement('div');
el.id = id;
el.innerText = v;
b.insertBefore(el, b.childNodes[0]);
})(document, %q, %q);`

s := fmt.Sprintf(expr, "thing", "a new thing!")

var buf bytes.Buffer
if err := chromedp.Run(ctx,
chromedp.Navigate(ts.URL),
chromedp.WaitVisible(`#content`),
chromedp.Evaluate(s, nil),
chromedp.WaitVisible(`#thing`),
chromedp.Dump(`document`, &buf, chromedp.ByJSPath),
); err != nil {
log.Fatal(err)
}

fmt.Println("Document tree:")
fmt.Print(buf.String())

// Output:
// Document tree:
// #document <Document>
// html <DocumentType>
// html
// head
// body
// div#thing
// #text "a new thing!"
// div#content [style="display:block;"]
// #text "the content"
}

func Example_documentDump() {
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()

ts := httptest.NewServer(writeHTML(`<!doctype html>
<html>
<body>
<div id="content">the content</div>
<div id="content" style="display:block;">the content</div>
</body>
</html>`))
defer ts.Close()
Expand All @@ -537,22 +587,15 @@ func Example_documentDump() {
b.insertBefore(el, b.childNodes[0]);
})(document, %q, %q);`

s := fmt.Sprintf(expr, "thing", "a new thing!")

var nodes []*cdp.Node
if err := chromedp.Run(ctx,
chromedp.Navigate(ts.URL),
chromedp.Nodes(`document`, &nodes, chromedp.ByJSPath),
chromedp.Nodes(`document`, &nodes,
chromedp.ByJSPath, chromedp.Populate(-1, true)),
chromedp.WaitVisible(`#content`),
chromedp.ActionFunc(func(ctx context.Context) error {
s := fmt.Sprintf(expr, "thing", "a new thing!")
_, exp, err := runtime.Evaluate(s).Do(ctx)
if err != nil {
return err
}
if exp != nil {
return exp
}
return nil
}),
chromedp.Evaluate(s, nil),
chromedp.WaitVisible(`#thing`),
); err != nil {
log.Fatal(err)
Expand All @@ -570,7 +613,7 @@ func Example_documentDump() {
// body
// div#thing
// #text "a new thing!"
// div#content
// div#content [style="display:block;"]
// #text "the content"
}

Expand All @@ -586,7 +629,7 @@ func ExampleFullScreenshot() {
log.Fatal(err)
}

if err := os.WriteFile("fullScreenshot.jpeg", buf, 0644); err != nil {
if err := os.WriteFile("fullScreenshot.jpeg", buf, 0o644); err != nil {
log.Fatal(err)
}
fmt.Println("wrote fullScreenshot.jpeg")
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Expand Up @@ -3,9 +3,11 @@ module github.com/chromedp/chromedp
go 1.16

require (
github.com/chromedp/cdproto v0.0.0-20240127002248-bd7a66284627
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732
github.com/gobwas/ws v1.3.2
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80
github.com/mailru/easyjson v0.7.7
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde
)

require golang.org/x/sys v0.16.0 // indirect
7 changes: 4 additions & 3 deletions go.sum
@@ -1,5 +1,5 @@
github.com/chromedp/cdproto v0.0.0-20240127002248-bd7a66284627 h1:L5rJ/yzLfSU3kcjsjq11xYDqAdianisL21CXQ/08Zag=
github.com/chromedp/cdproto v0.0.0-20240127002248-bd7a66284627/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 h1:XYUCaZrW8ckGWlCRJKCSoh/iFwlpX316a8yY9IFEzv8=
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
Expand All @@ -16,5 +16,6 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
76 changes: 72 additions & 4 deletions query.go
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -33,7 +34,7 @@ type Selector struct {
exp int
by func(context.Context, *cdp.Node) ([]cdp.NodeID, error)
wait func(context.Context, *cdp.Frame, runtime.ExecutionContextID, ...cdp.NodeID) ([]*cdp.Node, error)
after func(context.Context, runtime.ExecutionContextID, ...*cdp.Node) error
after []func(context.Context, runtime.ExecutionContextID, ...*cdp.Node) error
}

// Query is a query action that queries the browser for specific element
Expand Down Expand Up @@ -211,8 +212,8 @@ func (s *Selector) Do(ctx context.Context) error {
if nodes == nil || err != nil {
return false, nil
}
if s.after != nil {
if err := s.after(ctx, execCtx, nodes...); err != nil {
for _, f := range s.after {
if err := f(ctx, execCtx, nodes...); err != nil {
return true, err
}
}
Expand Down Expand Up @@ -588,7 +589,45 @@ func RetryInterval(interval time.Duration) QueryOption {
// condition is true.
func After(f func(context.Context, runtime.ExecutionContextID, ...*cdp.Node) error) QueryOption {
return func(s *Selector) {
s.after = f
s.after = append(s.after, f)
}
}

// Populate is an element query option that causes the queried nodes to be
// retrieved for later use. Use a depth of -1 to retrieve all child nodes. When
// pierce is true, will pierce child containers (e.g. iframes and the like)
//
// NOTE: this could be extremely resource intensive. Avoid doing this unless
// necessary.
func Populate(depth int64, pierce bool, opts ...PopulateOption) QueryOption {
return After(func(ctx context.Context, execCtx runtime.ExecutionContextID, nodes ...*cdp.Node) error {
var d time.Duration
for _, o := range opts {
o(&d)
}
for _, n := range nodes {
if err := dom.RequestChildNodes(n.NodeID).
WithDepth(depth).
WithPierce(pierce).
Do(ctx); err != nil {
return err
}
}
if d != 0 {
<-time.After(d)
}
return nil
})
}

// PopulateOption is an element populate action option.
type PopulateOption = func(*time.Duration)

// PopulateWait is populate option to set a wait interval after requesting
// child nodes.
func PopulateWait(wait time.Duration) PopulateOption {
return func(d *time.Duration) {
*d = wait
}
}

Expand Down Expand Up @@ -1188,3 +1227,32 @@ func ScrollIntoView(sel interface{}, opts ...QueryOption) QueryAction {
return dom.ScrollIntoViewIfNeeded().WithNodeID(nodes[0].NodeID).Do(ctx)
}, opts...)
}

// DumpTo is an element query action that writes a readable tree of the first
// element node matching the selector and its children, up to the specified
// depth.
//
// See [Dump] for a simpler interface.
func DumpTo(sel interface{}, w io.Writer, prefix, indent string, nodeIDs bool, depth int64, pierce bool, wait time.Duration, opts ...QueryOption) QueryAction {
return Query(sel, append(opts,
Populate(depth, pierce, PopulateWait(wait)),
After(func(ctx context.Context, execCtx runtime.ExecutionContextID, nodes ...*cdp.Node) error {
var n *cdp.Node
if len(nodes) > 0 {
n = nodes[0]
}
_, err := n.WriteTo(w, prefix, indent, nodeIDs)
return err
}),
)...)
}

// Dump is an element query action that writes a readable tree of the first
// element node matching the selector and its children, up to the specified
// depth.
//
// See [DumpTo] for more configurable options, which includes the ability to
// set the sleep wait timeout.
func Dump(sel interface{}, w io.Writer, opts ...QueryOption) QueryAction {
return DumpTo(sel, w, "", " ", false, -1, true, 80*time.Millisecond, opts...)
}

0 comments on commit ebf842c

Please sign in to comment.