From 2b6041f3ed7a0bbef64d33f53ba1b44043de2d74 Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 27 Dec 2021 16:38:25 +1100 Subject: [PATCH] When cloning, shallow clone the OwnerDocument This keeps the element's output settings preserved Fixes #763 --- CHANGES | 4 +++ src/main/java/org/jsoup/nodes/Document.java | 9 ++++++ src/main/java/org/jsoup/nodes/Element.java | 2 +- src/main/java/org/jsoup/nodes/Node.java | 9 ++++++ .../java/org/jsoup/nodes/ElementTest.java | 29 ++++++++++++++++++- src/test/java/org/jsoup/nodes/NodeTest.java | 22 ++++++++++++++ 6 files changed, 73 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index f2a6dae689..92c19cfcd5 100644 --- a/CHANGES +++ b/CHANGES @@ -21,6 +21,10 @@ jsoup changelog 30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1. + * Improvement: when cloning a Node or an Element, the clone gets a cloned OwnerDocument containing only that clone, so + as to preserve applicable settings, such as the Pretty Print settings. + + * Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve case. diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 6e5d8e3202..b9ace3352c 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -338,6 +338,15 @@ public Document clone() { clone.outputSettings = this.outputSettings.clone(); return clone; } + + @Override + public Document shallowClone() { + Document clone = new Document(baseUri()); + if (attributes != null) + clone.attributes = attributes.clone(); + clone.outputSettings = this.outputSettings.clone(); + return clone; + } /** * Ensures a meta charset (html) or xml declaration (xml) with the current diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java index 0a20456fba..795fc0100a 100644 --- a/src/main/java/org/jsoup/nodes/Element.java +++ b/src/main/java/org/jsoup/nodes/Element.java @@ -47,7 +47,7 @@ public class Element extends Node { private Tag tag; private @Nullable WeakReference> shadowChildrenRef; // points to child elements shadowed from node children List childNodes; - private @Nullable Attributes attributes; // field is nullable but all methods for attributes are non null + protected @Nullable Attributes attributes; // field is nullable but all methods for attributes are non null /** * Create a new, standalone element. diff --git a/src/main/java/org/jsoup/nodes/Node.java b/src/main/java/org/jsoup/nodes/Node.java index 27886f46b0..e0520ebee5 100644 --- a/src/main/java/org/jsoup/nodes/Node.java +++ b/src/main/java/org/jsoup/nodes/Node.java @@ -781,6 +781,15 @@ protected Node doClone(@Nullable Node parent) { clone.parentNode = parent; // can be null, to create an orphan split clone.siblingIndex = parent == null ? 0 : siblingIndex; + // if not keeping the parent, shallowClone the ownerDocument to preserve its settings + if (parent == null && !(this instanceof Document)) { + Document doc = ownerDocument(); + if (doc != null) { + Document docClone = doc.shallowClone(); + clone.parentNode = docClone; + docClone.ensureChildNodes().add(clone); + } + } return clone; } diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index 563442bd04..712541dbcc 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -866,7 +866,10 @@ public void testClone() { Element p = doc.select("p").get(1); Element clone = p.clone(); - assertNull(clone.parent()); // should be orphaned + assertNotNull(clone.parentNode); // should be a cloned document just containing this clone + assertEquals(1, clone.parentNode.childNodeSize()); + assertSame(clone.ownerDocument(), clone.parentNode); + assertEquals(0, clone.siblingIndex); assertEquals(1, p.siblingIndex); assertNotNull(p.parent()); @@ -2099,4 +2102,28 @@ public void childNodesAccessorDoesNotVivify() { p.removeAttr("foo"); assertEquals(0, p.attributesSize()); } + + @Test void clonedElementsHaveOwnerDocsAndIndependentSettings() { + // https://github.com/jhy/jsoup/issues/763 + Document doc = Jsoup.parse("
Text
Two
"); + doc.outputSettings().prettyPrint(false); + Element div = doc.selectFirst("div"); + assertNotNull(div); + Node text = div.childNode(0); + assertNotNull(text); + + Element divClone = div.clone(); + Document docClone = divClone.ownerDocument(); + assertNotNull(docClone); + assertFalse(docClone.outputSettings().prettyPrint()); + assertNotSame(doc, docClone); + assertSame(docClone, divClone.childNode(0).ownerDocument()); + // the cloned text has same owner doc as the cloned div + + doc.outputSettings().prettyPrint(true); + assertTrue(doc.outputSettings().prettyPrint()); + assertFalse(docClone.outputSettings().prettyPrint()); + assertEquals(1, docClone.children().size()); // check did not get the second div as the owner's children + assertEquals(divClone, docClone.child(0)); // note not the head or the body -- not normalized + } } diff --git a/src/test/java/org/jsoup/nodes/NodeTest.java b/src/test/java/org/jsoup/nodes/NodeTest.java index 9dc1b2272d..fc8aaa7fce 100644 --- a/src/test/java/org/jsoup/nodes/NodeTest.java +++ b/src/test/java/org/jsoup/nodes/NodeTest.java @@ -332,4 +332,26 @@ private Attributes singletonAttributes() { attributes.put("value", "bar"); return attributes; } + + @Test void clonedNodesHaveOwnerDocsAndIndependentSettings() { + // https://github.com/jhy/jsoup/issues/763 + Document doc = Jsoup.parse("
Text
Two
"); + doc.outputSettings().prettyPrint(false); + Element div = doc.selectFirst("div"); + assertNotNull(div); + TextNode text = (TextNode) div.childNode(0); + assertNotNull(text); + + TextNode textClone = text.clone(); + Document docClone = textClone.ownerDocument(); + assertNotNull(docClone); + assertFalse(docClone.outputSettings().prettyPrint()); + assertNotSame(doc, docClone); + + doc.outputSettings().prettyPrint(true); + assertTrue(doc.outputSettings().prettyPrint()); + assertFalse(docClone.outputSettings().prettyPrint()); + assertEquals(1, docClone.childNodes().size()); // check did not get the second div as the owner's children + assertEquals(textClone, docClone.childNode(0)); // note not the head or the body -- not normalized + } }