Skip to content

Commit

Permalink
When cloning, shallow clone the OwnerDocument
Browse files Browse the repository at this point in the history
This keeps the element's output settings preserved

Fixes #763
  • Loading branch information
jhy committed Dec 27, 2021
1 parent 95a767e commit 2b6041f
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Expand Up @@ -21,6 +21,10 @@ jsoup changelog
30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1.
<https://github.com/jhy/jsoup/pull/1655>

* Improvement: when cloning a Node or an Element, the clone gets a cloned OwnerDocument containing only that clone, so
as to preserve applicable settings, such as the Pretty Print settings.
<https://github.com/jhy/jsoup/issues/763>

* Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve
case.
<https://github.com/jhy/jsoup/issues/1656>
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/jsoup/nodes/Document.java
Expand Up @@ -338,6 +338,15 @@ public Document clone() {
clone.outputSettings = this.outputSettings.clone();
return clone;
}

@Override
public Document shallowClone() {
Document clone = new Document(baseUri());
if (attributes != null)
clone.attributes = attributes.clone();
clone.outputSettings = this.outputSettings.clone();
return clone;
}

/**
* Ensures a meta charset (html) or xml declaration (xml) with the current
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/nodes/Element.java
Expand Up @@ -47,7 +47,7 @@ public class Element extends Node {
private Tag tag;
private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
List<Node> childNodes;
private @Nullable Attributes attributes; // field is nullable but all methods for attributes are non null
protected @Nullable Attributes attributes; // field is nullable but all methods for attributes are non null

/**
* Create a new, standalone element.
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/jsoup/nodes/Node.java
Expand Up @@ -781,6 +781,15 @@ protected Node doClone(@Nullable Node parent) {

clone.parentNode = parent; // can be null, to create an orphan split
clone.siblingIndex = parent == null ? 0 : siblingIndex;
// if not keeping the parent, shallowClone the ownerDocument to preserve its settings
if (parent == null && !(this instanceof Document)) {
Document doc = ownerDocument();
if (doc != null) {
Document docClone = doc.shallowClone();
clone.parentNode = docClone;
docClone.ensureChildNodes().add(clone);
}
}

return clone;
}
Expand Down
29 changes: 28 additions & 1 deletion src/test/java/org/jsoup/nodes/ElementTest.java
Expand Up @@ -866,7 +866,10 @@ public void testClone() {
Element p = doc.select("p").get(1);
Element clone = p.clone();

assertNull(clone.parent()); // should be orphaned
assertNotNull(clone.parentNode); // should be a cloned document just containing this clone
assertEquals(1, clone.parentNode.childNodeSize());
assertSame(clone.ownerDocument(), clone.parentNode);

assertEquals(0, clone.siblingIndex);
assertEquals(1, p.siblingIndex);
assertNotNull(p.parent());
Expand Down Expand Up @@ -2099,4 +2102,28 @@ public void childNodesAccessorDoesNotVivify() {
p.removeAttr("foo");
assertEquals(0, p.attributesSize());
}

@Test void clonedElementsHaveOwnerDocsAndIndependentSettings() {
// https://github.com/jhy/jsoup/issues/763
Document doc = Jsoup.parse("<div>Text</div><div>Two</div>");
doc.outputSettings().prettyPrint(false);
Element div = doc.selectFirst("div");
assertNotNull(div);
Node text = div.childNode(0);
assertNotNull(text);

Element divClone = div.clone();
Document docClone = divClone.ownerDocument();
assertNotNull(docClone);
assertFalse(docClone.outputSettings().prettyPrint());
assertNotSame(doc, docClone);
assertSame(docClone, divClone.childNode(0).ownerDocument());
// the cloned text has same owner doc as the cloned div

doc.outputSettings().prettyPrint(true);
assertTrue(doc.outputSettings().prettyPrint());
assertFalse(docClone.outputSettings().prettyPrint());
assertEquals(1, docClone.children().size()); // check did not get the second div as the owner's children
assertEquals(divClone, docClone.child(0)); // note not the head or the body -- not normalized
}
}
22 changes: 22 additions & 0 deletions src/test/java/org/jsoup/nodes/NodeTest.java
Expand Up @@ -332,4 +332,26 @@ private Attributes singletonAttributes() {
attributes.put("value", "bar");
return attributes;
}

@Test void clonedNodesHaveOwnerDocsAndIndependentSettings() {
// https://github.com/jhy/jsoup/issues/763
Document doc = Jsoup.parse("<div>Text</div><div>Two</div>");
doc.outputSettings().prettyPrint(false);
Element div = doc.selectFirst("div");
assertNotNull(div);
TextNode text = (TextNode) div.childNode(0);
assertNotNull(text);

TextNode textClone = text.clone();
Document docClone = textClone.ownerDocument();
assertNotNull(docClone);
assertFalse(docClone.outputSettings().prettyPrint());
assertNotSame(doc, docClone);

doc.outputSettings().prettyPrint(true);
assertTrue(doc.outputSettings().prettyPrint());
assertFalse(docClone.outputSettings().prettyPrint());
assertEquals(1, docClone.childNodes().size()); // check did not get the second div as the owner's children
assertEquals(textClone, docClone.childNode(0)); // note not the head or the body -- not normalized
}
}

0 comments on commit 2b6041f

Please sign in to comment.