Skip to content

Commit

Permalink
add a new XmlWhitespaceStrippedSource that honors XML's idea of white…
Browse files Browse the repository at this point in the history
  • Loading branch information
bodewig committed Mar 29, 2023
1 parent 03b1999 commit 6e334a4
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 14 deletions.
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
[#38](https://github.com/xmlunit/xmlunit.net/issues/38). And neither
of the methods could deal with `XmlSignificantWhitespace` at all.

* add `XmlWhitespaceStrippedSource` that only trims characters that
are considered whitespace by the [XML
Specification](https://www.w3.org/TR/xml11/#NT-S) from textual
content.
Issue [xmlunit/#260](https://github.com/xmlunit/xmlunit/issues/260).

## XMLUnit.NET 2.9.2 - /Released 2023-03-16/

* added `NodeFilters#SatisfiesAll` and `SatifiesAny` methods to make
Expand Down
43 changes: 43 additions & 0 deletions src/main/net-core/Input/XmlWhitespaceStrippedSource.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
This file is licensed to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

using Org.XmlUnit.Util;

namespace Org.XmlUnit.Input {

/// <summary>
/// A source that is obtained from a different source by removing
/// all empty text nodes and removing all characters XML considers
/// whitespace at the start and end of the text content of the
/// non-empty ones. <see href="https://www.w3.org/TR/xml11/#NT-S"/>
/// </summary>
/// <remarks>
/// <para>
/// If you only want to remove text nodes consisting solely of
/// whitespace (AKA element content whitespace) but leave all
/// other text nodes alone you should use
/// ElementContentWhitespaceStrippedSource instead.
/// </para>
/// </remarks>
public class XmlWhitespaceStrippedSource : DOMSource {
/// <summary>
/// Creates a new Source with the same content as another source trimming whitespace from Text nodes.
/// </summary>
/// <param name="originalSource">source with the original content</param>
public XmlWhitespaceStrippedSource(ISource originalSource) :
base(Nodes.StripXmlWhitespace(originalSource.ToDocument())) {
SystemId = originalSource.SystemId;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
<Compile Include="..\Input\StringSource.cs" />
<Compile Include="..\Input\WhitespaceNormalizedSource.cs" />
<Compile Include="..\Input\WhitespaceStrippedSource.cs" />
<Compile Include="..\Input\XmlWhitespaceStrippedSource.cs" />
<Compile Include="..\ISource.cs" />
<Compile Include="..\Transform\Transformation.cs" />
<Compile Include="..\Util\Convert.cs" />
Expand Down
56 changes: 46 additions & 10 deletions src/main/net-core/Util/Nodes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,22 @@ public static class Nodes {
public static XmlNode StripWhitespace(XmlNode original) {
XmlNode cloned = original.CloneNode(true);
cloned.Normalize();
HandleWsRec(cloned, false);
HandleWsRec(cloned, TrimValue);
return cloned;
}

/// <summary>
/// Creates a new Node (of the same type as the original node)
/// that is similar to the orginal but doesn't contain any
/// empty text or CDATA nodes and where all textual content
/// including attribute values or comments are trimmed of
/// characters XML considers whitespace according to
/// <see href="https://www.w3.org/TR/xml11/#NT-S"/>.
/// </summary>
public static XmlNode StripXmlWhitespace(XmlNode original) {
XmlNode cloned = original.CloneNode(true);
cloned.Normalize();
HandleWsRec(cloned, XmlTrimValue);
return cloned;
}

Expand All @@ -104,7 +119,7 @@ public static class Nodes {
public static XmlNode NormalizeWhitespace(XmlNode original) {
XmlNode cloned = original.CloneNode(true);
cloned.Normalize();
HandleWsRec(cloned, true);
HandleWsRec(cloned, TrimAndNormalizeValue);
return cloned;
}

Expand All @@ -129,23 +144,44 @@ public static class Nodes {
return cloned;
}

/// <summary>
/// Returns the nodes' value trimmed of all whitespace.
/// <summary>
private static String TrimValue(XmlNode n) {
return n.Value.Trim();
}

/// <summary>
/// Returns the nodes' value trimmed of all whitespace and Normalized
/// <summary>
private static String TrimAndNormalizeValue(XmlNode n) {
return Normalize(TrimValue(n));
}

private static readonly char[] XML_WHITESPACE_CHARS = {
' ', '\r', '\n', '\t'
};

/// <summary>
/// Returns the nodes' value trimmed of all characters XML considers whitespace.
/// <summary>
private static String XmlTrimValue(XmlNode n) {
return n.Value.Trim(XML_WHITESPACE_CHARS);
}

/// <summary>
/// Trims textual content of this node, removes empty text and
/// CDATA children, recurses into its child nodes.
/// </summary>
/// <parameter name="normalize">whether to normalize
/// whitespace as well</parameter>
private static void HandleWsRec(XmlNode n, bool normalize) {
private static void HandleWsRec(XmlNode n, Func<XmlNode, String> handleWs) {
if (n is XmlCharacterData || n is XmlProcessingInstruction) {
string s = n.Value.Trim();
if (normalize) {
s = Normalize(s);
}
n.Value = s;
n.Value = handleWs(n);
}
LinkedList<XmlNode> toRemove = new LinkedList<XmlNode>();
foreach (XmlNode child in n.ChildNodes) {
HandleWsRec(child, normalize);
HandleWsRec(child, handleWs);
if (!(n is XmlAttribute)
&& IsTextualContentNode(child)
&& child.Value.Length == 0) {
Expand All @@ -158,7 +194,7 @@ public static class Nodes {
XmlNamedNodeMap attrs = n.Attributes;
if (attrs != null) {
foreach (XmlAttribute a in attrs) {
HandleWsRec(a, normalize);
HandleWsRec(a, handleWs);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/tests/net-core/Input/WhitespaceStrippedSourceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class WhitespaceStrippedSourceTest {
}

private void WhitespaceIsStrippedProperly(XmlDocument testDoc) {
string testXml = "<a>\n <b>\n Test Value\n </b>\n</a>";
string testXml = "<a>\n <b>\n Test Value\u00a0\n </b>\n</a>";
testDoc.LoadXml(testXml);
WhitespaceStrippedSource s = new WhitespaceStrippedSource(new DOMSource(testDoc));
XmlNode root = s.Node;
Expand Down
49 changes: 49 additions & 0 deletions src/tests/net-core/Input/XmlWhitespaceStrippedSourceTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
This file is licensed to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

using System.Xml;
using Org.XmlUnit.Util;
using NUnit.Framework;

namespace Org.XmlUnit.Input {

[TestFixture]
public class XmlWhitespaceStrippedSourceTest {

[Test]
public void XmlWhitespaceIsStrippedProperly() {
XmlWhitespaceIsStrippedProperly(new XmlDocument());
}

[Test]
public void XmlWhitespaceIsStrippedProperlyEvenWithPreserveWhitespaceDoc() {
XmlDocument testDoc = new XmlDocument();
testDoc.PreserveWhitespace = true;
XmlWhitespaceIsStrippedProperly(testDoc);
}

private void XmlWhitespaceIsStrippedProperly(XmlDocument testDoc) {
string testXml = "<a>\n <b>\n Test Value\u00a0\n </b>\n</a>";
testDoc.LoadXml(testXml);
XmlWhitespaceStrippedSource s = new XmlWhitespaceStrippedSource(new DOMSource(testDoc));
XmlNode root = s.Node;
Assert.AreEqual(1, root.ChildNodes.Count);
XmlNode a = root.FirstChild;
Assert.AreEqual(1, a.ChildNodes.Count);
XmlNode b = a.FirstChild;
Assert.AreEqual(1, b.ChildNodes.Count);
Assert.AreEqual("Test Value\u00a0", b.FirstChild.Value);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
<Compile Include="..\Input\CommentLessSourceTest.cs" />
<Compile Include="..\Input\NormalizedSourceTest.cs" />
<Compile Include="..\Input\WhitespaceStrippedSourceTest.cs" />
<Compile Include="..\Input\XmlWhitespaceStrippedSourceTest.cs" />
<Compile Include="..\TestResources.cs" />
<Compile Include="..\Transform\TransformationTest.cs" />
<Compile Include="..\Util\ConvertTest.cs" />
Expand Down
22 changes: 19 additions & 3 deletions src/tests/net-core/Util/NodesTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public class NodesTest {
private XmlDocument HandleWsSetup() {
return Convert.ToDocument(InputBuilder.FromString(
"<root>\n"
+ "<!-- trim\tme -->\n"
+ "<!--\u00a0 trim\tme\u00a0 -->\n"
+ "<child attr=' trim me ' attr2='not me'>\n"
+ " trim me \n"
+ "</child><![CDATA[ trim me ]]>\n"
Expand All @@ -156,11 +156,22 @@ public class NodesTest {
XmlNode>(toTest, Nodes.NormalizeWhitespace(toTest));
}

private KeyValuePair<XmlDocument, XmlNode> StripXmlWsSetup() {
XmlDocument toTest = HandleWsSetup();
return new KeyValuePair<XmlDocument,
XmlNode>(toTest, Nodes.StripXmlWhitespace(toTest));
}

[Test]
public void StripWhitespaceWorks() {
HandleWsWorks(StripWsSetup(), "trim\tme");
}

[Test]
public void StripXmlWhitespaceWorks() {
HandleWsWorks(StripXmlWsSetup(), "\u00a0 trim\tme\u00a0");
}

[Test]
public void NormalizeWhitespaceWorks() {
HandleWsWorks(NormalizeWsSetup(), "trim me");
Expand Down Expand Up @@ -209,6 +220,11 @@ public class NodesTest {
HandleWsDoesntAlterOriginal(StripWsSetup());
}

[Test]
public void StripXmlWhitespaceDoesntAlterOriginal() {
HandleWsDoesntAlterOriginal(StripXmlWsSetup());
}

[Test]
public void NormalizeWhitespaceDoesntAlterOriginal() {
HandleWsDoesntAlterOriginal(NormalizeWsSetup());
Expand All @@ -224,7 +240,7 @@ public class NodesTest {
Assert.AreEqual(5, rootsChildren.Count);
Assert.IsTrue(rootsChildren[0] is XmlComment,
"should be comment, is " + rootsChildren[0].GetType());
Assert.AreEqual(" trim\tme ",
Assert.AreEqual("\u00a0 trim\tme\u00a0 ",
((XmlComment) rootsChildren[0]).Data);
Assert.IsTrue(rootsChildren[1] is XmlElement,
"should be element, is " + rootsChildren[1].GetType());
Expand Down Expand Up @@ -277,7 +293,7 @@ public class NodesTest {
Assert.AreEqual(4, rootsChildren.Count);
Assert.IsTrue(rootsChildren[0] is XmlComment,
"should be comment, is " + rootsChildren[0].GetType());
Assert.AreEqual(" trim\tme ",
Assert.AreEqual("\u00a0 trim\tme\u00a0 ",
((XmlComment) rootsChildren[0]).Data);
Assert.IsTrue(rootsChildren[1] is XmlElement,
"should be element, is " + rootsChildren[1].GetType());
Expand Down

0 comments on commit 6e334a4

Please sign in to comment.