diff --git a/src/HtmlAgilityPack.Shared/HtmlNode.cs b/src/HtmlAgilityPack.Shared/HtmlNode.cs index 9df390b..9927f1c 100644 --- a/src/HtmlAgilityPack.Shared/HtmlNode.cs +++ b/src/HtmlAgilityPack.Shared/HtmlNode.cs @@ -3,7 +3,7 @@ // Forum & Issues: https://github.com/zzzprojects/html-agility-pack // License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE // More projects: http://www.zzzprojects.com/ -// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. using System; using System.Collections; @@ -377,7 +377,7 @@ public virtual string InnerText { get { - string result; + var sb = new StringBuilder(); string name = this.Name; if (name != null) @@ -385,51 +385,56 @@ public virtual string InnerText name = name.ToLowerInvariant(); bool isDisplayScriptingText = (name == "head" || name == "script" || name == "style"); - - result = InternalInnerText(isDisplayScriptingText); + + InternalInnerText(sb, isDisplayScriptingText); } else - { - result = InternalInnerText(false); + { + InternalInnerText(sb, false); } - return result; + return sb.ToString(); } } - internal virtual string InternalInnerText(bool isDisplayScriptingText) - { - if (!_ownerdocument.BackwardCompatibility) - { - if (HasChildNodes) - { - StringBuilder sb = new StringBuilder(); - AppendInnerText(sb, isDisplayScriptingText); - return sb.ToString(); - } + internal virtual void InternalInnerText(StringBuilder sb, bool isDisplayScriptingText) + { + if (!_ownerdocument.BackwardCompatibility) + { + if (HasChildNodes) + { + AppendInnerText(sb, isDisplayScriptingText); + return; + } - return GetCurrentNodeText(); - } + sb.Append(GetCurrentNodeText()); + return; + } - if (_nodetype == HtmlNodeType.Text) - return ((HtmlTextNode) this).Text; + if (_nodetype == HtmlNodeType.Text) + { + sb.Append(((HtmlTextNode) this).Text); + return; + } - // Don't display comment or comment child nodes - if (_nodetype == HtmlNodeType.Comment) - return ""; + // Don't display comment or comment child nodes + if (_nodetype == HtmlNodeType.Comment) + { + return; + } - // note: right now, this method is *slow*, because we recompute everything. - // it could be optimized like innerhtml - if (!HasChildNodes || ( _isHideInnerText && !isDisplayScriptingText)) - return string.Empty; + // note: right now, this method is *slow*, because we recompute everything. + // it could be optimized like innerhtml + if (!HasChildNodes || (_isHideInnerText && !isDisplayScriptingText)) + { + return; + } - var s = new StringBuilder; - foreach (HtmlNode node in ChildNodes) - s.Append(node.InternalInnerText(isDisplayScriptingText)); - return s.ToString(); + foreach (HtmlNode node in ChildNodes) + node.InternalInnerText(sb, isDisplayScriptingText); } - /// Gets direct inner text. + /// Gets direct inner text. /// The direct inner text. public virtual string GetDirectInnerText() { @@ -453,18 +458,18 @@ public virtual string GetDirectInnerText() return ""; if (!HasChildNodes) - return string.Empty; + return string.Empty; - string s = null; - foreach (HtmlNode node in ChildNodes) + var s = new StringBuilder(); + foreach (HtmlNode node in ChildNodes) { if (node._nodetype == HtmlNodeType.Text) { - s += ((HtmlTextNode)node).Text; + s.Append(((HtmlTextNode)node).Text); } } - return s; + return s.ToString(); } @@ -2487,4 +2492,4 @@ private bool IsEmpty(IEnumerable en) #endregion } -} +} \ No newline at end of file diff --git a/src/HtmlAgilityPack.Shared/HtmlNodeNavigator.cs b/src/HtmlAgilityPack.Shared/HtmlNodeNavigator.cs index 95b75aa..dfb60a2 100644 --- a/src/HtmlAgilityPack.Shared/HtmlNodeNavigator.cs +++ b/src/HtmlAgilityPack.Shared/HtmlNodeNavigator.cs @@ -1,9 +1,9 @@ -// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. // Website & Documentation: http://html-agility-pack.net // Forum & Issues: https://github.com/zzzprojects/html-agility-pack // License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE // More projects: http://www.zzzprojects.com/ -// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. #if !METRO @@ -17,582 +17,588 @@ #pragma warning disable 0649 namespace HtmlAgilityPack { - /// - /// Represents an HTML navigator on an HTML document seen as a data store. - /// - public class HtmlNodeNavigator : XPathNavigator - { - #region Fields - - private int _attindex; - private HtmlNode _currentnode; - private readonly HtmlDocument _doc; - private readonly HtmlNameTable _nametable; - - internal bool Trace; - - #endregion - - #region Constructors - - internal HtmlNodeNavigator() - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - Reset(); - } - - internal HtmlNodeNavigator(HtmlDocument doc, HtmlNode currentNode) - { - if (currentNode == null) - { - throw new ArgumentNullException("currentNode"); - } - - if (currentNode.OwnerDocument != doc) - { - throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild); - } + /// + /// Represents an HTML navigator on an HTML document seen as a data store. + /// + public class HtmlNodeNavigator : XPathNavigator + { + #region Fields + + private int _attindex; + private HtmlNode _currentnode; + private readonly HtmlDocument _doc; + private readonly HtmlNameTable _nametable; + + internal bool Trace; + + #endregion + + #region Constructors + + internal HtmlNodeNavigator() + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + Reset(); + } + + internal HtmlNodeNavigator(HtmlDocument doc, HtmlNode currentNode) + { + if (currentNode == null) + { + throw new ArgumentNullException("currentNode"); + } + + if (currentNode.OwnerDocument != doc) + { + throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild); + } + + if (doc == null) + { + // keep in message, currentNode.OwnerDocument also null. + throw new Exception("Oops! The HtmlDocument cannot be null."); + } #if TRACE_NAVIGATOR InternalTrace(null); #endif - _doc = doc; - _nametable = new HtmlNameTable(); - Reset(); - _currentnode = currentNode; - } + _doc = doc; + _nametable = new HtmlNameTable(); + Reset(); + _currentnode = currentNode; + } - private HtmlNodeNavigator(HtmlNodeNavigator nav) - { - if (nav == null) - { - throw new ArgumentNullException("nav"); - } + private HtmlNodeNavigator(HtmlNodeNavigator nav) + { + if (nav == null) + { + throw new ArgumentNullException("nav"); + } #if TRACE_NAVIGATOR InternalTrace(null); #endif - _doc = nav._doc; - _currentnode = nav._currentnode; - _attindex = nav._attindex; - _nametable = nav._nametable; // REVIEW: should we do this? - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. - /// - /// The input stream. - public HtmlNodeNavigator(Stream stream) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(stream); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. - /// - /// The input stream. - /// Indicates whether to look for byte order marks at the beginning of the stream. - public HtmlNodeNavigator(Stream stream, bool detectEncodingFromByteOrderMarks) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(stream, detectEncodingFromByteOrderMarks); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. - /// - /// The input stream. - /// The character encoding to use. - public HtmlNodeNavigator(Stream stream, Encoding encoding) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(stream, encoding); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. - /// - /// The input stream. - /// The character encoding to use. - /// Indicates whether to look for byte order marks at the beginning of the stream. - public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. - /// - /// The input stream. - /// The character encoding to use. - /// Indicates whether to look for byte order marks at the beginning of the stream. - /// The minimum buffer size. - public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks, buffersize); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a TextReader. - /// - /// The TextReader used to feed the HTML data into the document. - public HtmlNodeNavigator(TextReader reader) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(reader); - Reset(); - } + _doc = nav._doc; + _currentnode = nav._currentnode; + _attindex = nav._attindex; + _nametable = nav._nametable; // REVIEW: should we do this? + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. + /// + /// The input stream. + public HtmlNodeNavigator(Stream stream) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(stream); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. + /// + /// The input stream. + /// Indicates whether to look for byte order marks at the beginning of the stream. + public HtmlNodeNavigator(Stream stream, bool detectEncodingFromByteOrderMarks) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(stream, detectEncodingFromByteOrderMarks); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. + /// + /// The input stream. + /// The character encoding to use. + public HtmlNodeNavigator(Stream stream, Encoding encoding) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(stream, encoding); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. + /// + /// The input stream. + /// The character encoding to use. + /// Indicates whether to look for byte order marks at the beginning of the stream. + public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. + /// + /// The input stream. + /// The character encoding to use. + /// Indicates whether to look for byte order marks at the beginning of the stream. + /// The minimum buffer size. + public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks, buffersize); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a TextReader. + /// + /// The TextReader used to feed the HTML data into the document. + public HtmlNodeNavigator(TextReader reader) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(reader); + Reset(); + } #if !(NETSTANDARD1_3 || NETSTANDARD1_6) - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. - /// - /// The complete file path to be read. - public HtmlNodeNavigator(string path) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(path); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. - /// - /// The complete file path to be read. - /// Indicates whether to look for byte order marks at the beginning of the file. - public HtmlNodeNavigator(string path, bool detectEncodingFromByteOrderMarks) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(path, detectEncodingFromByteOrderMarks); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. - /// - /// The complete file path to be read. - /// The character encoding to use. - public HtmlNodeNavigator(string path, Encoding encoding) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(path, encoding); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. - /// - /// The complete file path to be read. - /// The character encoding to use. - /// Indicates whether to look for byte order marks at the beginning of the file. - public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(path, encoding, detectEncodingFromByteOrderMarks); - Reset(); - } - - /// - /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. - /// - /// The complete file path to be read. - /// The character encoding to use. - /// Indicates whether to look for byte order marks at the beginning of the file. - /// The minimum buffer size. - public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) - { - _doc = new HtmlDocument(); - _nametable = new HtmlNameTable(); - _doc.Load(path, encoding, detectEncodingFromByteOrderMarks, buffersize); - Reset(); - } -#endif - -#endregion - -#region Properties - - /// - /// Gets the base URI for the current node. - /// Always returns string.Empty in the case of HtmlNavigator implementation. - /// - public override string BaseURI - { - get - { + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. + /// + /// The complete file path to be read. + public HtmlNodeNavigator(string path) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(path); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. + /// + /// The complete file path to be read. + /// Indicates whether to look for byte order marks at the beginning of the file. + public HtmlNodeNavigator(string path, bool detectEncodingFromByteOrderMarks) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(path, detectEncodingFromByteOrderMarks); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. + /// + /// The complete file path to be read. + /// The character encoding to use. + public HtmlNodeNavigator(string path, Encoding encoding) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(path, encoding); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. + /// + /// The complete file path to be read. + /// The character encoding to use. + /// Indicates whether to look for byte order marks at the beginning of the file. + public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(path, encoding, detectEncodingFromByteOrderMarks); + Reset(); + } + + /// + /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. + /// + /// The complete file path to be read. + /// The character encoding to use. + /// Indicates whether to look for byte order marks at the beginning of the file. + /// The minimum buffer size. + public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) + { + _doc = new HtmlDocument(); + _nametable = new HtmlNameTable(); + _doc.Load(path, encoding, detectEncodingFromByteOrderMarks, buffersize); + Reset(); + } +#endif + + #endregion + + #region Properties + + /// + /// Gets the base URI for the current node. + /// Always returns string.Empty in the case of HtmlNavigator implementation. + /// + public override string BaseURI + { + get + { #if TRACE_NAVIGATOR InternalTrace(">"); #endif - return _nametable.GetOrAdd(string.Empty); - } - } - - /// - /// Gets the current HTML document. - /// - public HtmlDocument CurrentDocument - { - get { return _doc; } - } - - /// - /// Gets the current HTML node. - /// - public HtmlNode CurrentNode - { - get { return _currentnode; } - } - - /// - /// Gets a value indicating whether the current node has child nodes. - /// - public override bool HasAttributes - { - get - { + return _nametable.GetOrAdd(string.Empty); + } + } + + /// + /// Gets the current HTML document. + /// + public HtmlDocument CurrentDocument + { + get { return _doc; } + } + + /// + /// Gets the current HTML node. + /// + public HtmlNode CurrentNode + { + get { return _currentnode; } + } + + /// + /// Gets a value indicating whether the current node has child nodes. + /// + public override bool HasAttributes + { + get + { #if TRACE_NAVIGATOR InternalTrace(">" + (_currentnode.Attributes.Count > 0)); #endif - return (_currentnode.Attributes.Count > 0); - } - } + return (_currentnode.Attributes.Count > 0); + } + } - /// - /// Gets a value indicating whether the current node has child nodes. - /// - public override bool HasChildren - { - get - { + /// + /// Gets a value indicating whether the current node has child nodes. + /// + public override bool HasChildren + { + get + { #if TRACE_NAVIGATOR InternalTrace(">" + (_currentnode.ChildNodes.Count > 0)); #endif - return (_currentnode.ChildNodes.Count > 0); - } - } + return (_currentnode.ChildNodes.Count > 0); + } + } - /// - /// Gets a value indicating whether the current node is an empty element. - /// - public override bool IsEmptyElement - { - get - { + /// + /// Gets a value indicating whether the current node is an empty element. + /// + public override bool IsEmptyElement + { + get + { #if TRACE_NAVIGATOR InternalTrace(">" + !HasChildren); #endif - // REVIEW: is this ok? - return !HasChildren; - } - } - - /// - /// Gets the name of the current HTML node without the namespace prefix. - /// - public override string LocalName - { - get - { - if (_attindex != -1) - { + // REVIEW: is this ok? + return !HasChildren; + } + } + + /// + /// Gets the name of the current HTML node without the namespace prefix. + /// + public override string LocalName + { + get + { + if (_attindex != -1) + { #if TRACE_NAVIGATOR InternalTrace("att>" + _currentnode.Attributes[_attindex].Name); #endif - return _nametable.GetOrAdd(_currentnode.Attributes[_attindex].Name); - } + return _nametable.GetOrAdd(_currentnode.Attributes[_attindex].Name); + } #if TRACE_NAVIGATOR InternalTrace("node>" + _currentnode.Name); #endif - return _nametable.GetOrAdd(_currentnode.Name); - } - } + return _nametable.GetOrAdd(_currentnode.Name); + } + } - /// - /// Gets the qualified name of the current node. - /// - public override string Name - { - get - { + /// + /// Gets the qualified name of the current node. + /// + public override string Name + { + get + { #if TRACE_NAVIGATOR InternalTrace(">" + _currentnode.Name); #endif - return _nametable.GetOrAdd(_currentnode.Name); - } - } + return _nametable.GetOrAdd(_currentnode.Name); + } + } - /// - /// Gets the namespace URI (as defined in the W3C Namespace Specification) of the current node. - /// Always returns string.Empty in the case of HtmlNavigator implementation. - /// - public override string NamespaceURI - { - get - { + /// + /// Gets the namespace URI (as defined in the W3C Namespace Specification) of the current node. + /// Always returns string.Empty in the case of HtmlNavigator implementation. + /// + public override string NamespaceURI + { + get + { #if TRACE_NAVIGATOR InternalTrace(">"); #endif - return _nametable.GetOrAdd(string.Empty); - } - } + return _nametable.GetOrAdd(string.Empty); + } + } - /// - /// Gets the associated with this implementation. - /// - public override XmlNameTable NameTable - { - get - { + /// + /// Gets the associated with this implementation. + /// + public override XmlNameTable NameTable + { + get + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return _nametable; - } - } - - /// - /// Gets the type of the current node. - /// - public override XPathNodeType NodeType - { - get - { - switch (_currentnode.NodeType) - { - case HtmlNodeType.Comment: + return _nametable; + } + } + + /// + /// Gets the type of the current node. + /// + public override XPathNodeType NodeType + { + get + { + switch (_currentnode.NodeType) + { + case HtmlNodeType.Comment: #if TRACE_NAVIGATOR InternalTrace(">" + XPathNodeType.Comment); #endif - return XPathNodeType.Comment; + return XPathNodeType.Comment; - case HtmlNodeType.Document: + case HtmlNodeType.Document: #if TRACE_NAVIGATOR InternalTrace(">" + XPathNodeType.Root); #endif - return XPathNodeType.Root; + return XPathNodeType.Root; - case HtmlNodeType.Text: + case HtmlNodeType.Text: #if TRACE_NAVIGATOR InternalTrace(">" + XPathNodeType.Text); #endif - return XPathNodeType.Text; + return XPathNodeType.Text; - case HtmlNodeType.Element: - { - if (_attindex != -1) - { + case HtmlNodeType.Element: + { + if (_attindex != -1) + { #if TRACE_NAVIGATOR InternalTrace(">" + XPathNodeType.Attribute); #endif - return XPathNodeType.Attribute; - } + return XPathNodeType.Attribute; + } #if TRACE_NAVIGATOR InternalTrace(">" + XPathNodeType.Element); #endif - return XPathNodeType.Element; - } - - default: - throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + - _currentnode.NodeType); - } - } - } - - /// - /// Gets the prefix associated with the current node. - /// Always returns string.Empty in the case of HtmlNavigator implementation. - /// - public override string Prefix - { - get - { + return XPathNodeType.Element; + } + + default: + throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + + _currentnode.NodeType); + } + } + } + + /// + /// Gets the prefix associated with the current node. + /// Always returns string.Empty in the case of HtmlNavigator implementation. + /// + public override string Prefix + { + get + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return _nametable.GetOrAdd(string.Empty); - } - } + return _nametable.GetOrAdd(string.Empty); + } + } - /// - /// Gets the text value of the current node. - /// - public override string Value - { - get - { + /// + /// Gets the text value of the current node. + /// + public override string Value + { + get + { #if TRACE_NAVIGATOR InternalTrace("nt=" + _currentnode.NodeType); #endif - switch (_currentnode.NodeType) - { - case HtmlNodeType.Comment: + switch (_currentnode.NodeType) + { + case HtmlNodeType.Comment: #if TRACE_NAVIGATOR InternalTrace(">" + ((HtmlCommentNode) _currentnode).Comment); #endif - return ((HtmlCommentNode) _currentnode).Comment; + return ((HtmlCommentNode) _currentnode).Comment; - case HtmlNodeType.Document: + case HtmlNodeType.Document: #if TRACE_NAVIGATOR InternalTrace(">"); #endif - return ""; + return ""; - case HtmlNodeType.Text: + case HtmlNodeType.Text: #if TRACE_NAVIGATOR InternalTrace(">" + ((HtmlTextNode) _currentnode).Text); #endif - return ((HtmlTextNode) _currentnode).Text; + return ((HtmlTextNode) _currentnode).Text; - case HtmlNodeType.Element: - { - if (_attindex != -1) - { + case HtmlNodeType.Element: + { + if (_attindex != -1) + { #if TRACE_NAVIGATOR InternalTrace(">" + _currentnode.Attributes[_attindex].Value); #endif - return _currentnode.Attributes[_attindex].Value; - } + return _currentnode.Attributes[_attindex].Value; + } - return _currentnode.InnerText; - } + return _currentnode.InnerText; + } - default: - throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + - _currentnode.NodeType); - } - } - } + default: + throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + + _currentnode.NodeType); + } + } + } - /// - /// Gets the xml:lang scope for the current node. - /// Always returns string.Empty in the case of HtmlNavigator implementation. - /// - public override string XmlLang - { - get - { + /// + /// Gets the xml:lang scope for the current node. + /// Always returns string.Empty in the case of HtmlNavigator implementation. + /// + public override string XmlLang + { + get + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return _nametable.GetOrAdd(string.Empty); - } - } + return _nametable.GetOrAdd(string.Empty); + } + } -#endregion + #endregion -#region Public Methods + #region Public Methods - /// - /// Creates a new HtmlNavigator positioned at the same node as this HtmlNavigator. - /// - /// A new HtmlNavigator object positioned at the same node as the original HtmlNavigator. - public override XPathNavigator Clone() - { + /// + /// Creates a new HtmlNavigator positioned at the same node as this HtmlNavigator. + /// + /// A new HtmlNavigator object positioned at the same node as the original HtmlNavigator. + public override XPathNavigator Clone() + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return new HtmlNodeNavigator(this); - } + return new HtmlNodeNavigator(this); + } - /// - /// Gets the value of the HTML attribute with the specified LocalName and NamespaceURI. - /// - /// The local name of the HTML attribute. - /// The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation. - /// The value of the specified HTML attribute. String.Empty or null if a matching attribute is not found or if the navigator is not positioned on an element node. - public override string GetAttribute(string localName, string namespaceURI) - { + /// + /// Gets the value of the HTML attribute with the specified LocalName and NamespaceURI. + /// + /// The local name of the HTML attribute. + /// The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation. + /// The value of the specified HTML attribute. String.Empty or null if a matching attribute is not found or if the navigator is not positioned on an element node. + public override string GetAttribute(string localName, string namespaceURI) + { #if TRACE_NAVIGATOR InternalTrace("localName=" + localName + ", namespaceURI=" + namespaceURI); #endif - HtmlAttribute att = _currentnode.Attributes[localName]; - if (att == null) - { + HtmlAttribute att = _currentnode.Attributes[localName]; + if (att == null) + { #if TRACE_NAVIGATOR InternalTrace(">null"); #endif - return null; - } + return null; + } #if TRACE_NAVIGATOR InternalTrace(">" + att.Value); #endif - return att.Value; - } + return att.Value; + } - /// - /// Returns the value of the namespace node corresponding to the specified local name. - /// Always returns string.Empty for the HtmlNavigator implementation. - /// - /// The local name of the namespace node. - /// Always returns string.Empty for the HtmlNavigator implementation. - public override string GetNamespace(string name) - { + /// + /// Returns the value of the namespace node corresponding to the specified local name. + /// Always returns string.Empty for the HtmlNavigator implementation. + /// + /// The local name of the namespace node. + /// Always returns string.Empty for the HtmlNavigator implementation. + public override string GetNamespace(string name) + { #if TRACE_NAVIGATOR InternalTrace("name=" + name); #endif - return string.Empty; - } + return string.Empty; + } - /// - /// Determines whether the current HtmlNavigator is at the same position as the specified HtmlNavigator. - /// - /// The HtmlNavigator that you want to compare against. - /// true if the two navigators have the same position, otherwise, false. - public override bool IsSamePosition(XPathNavigator other) - { - HtmlNodeNavigator nav = other as HtmlNodeNavigator; - if (nav == null) - { + /// + /// Determines whether the current HtmlNavigator is at the same position as the specified HtmlNavigator. + /// + /// The HtmlNavigator that you want to compare against. + /// true if the two navigators have the same position, otherwise, false. + public override bool IsSamePosition(XPathNavigator other) + { + HtmlNodeNavigator nav = other as HtmlNodeNavigator; + if (nav == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } #if TRACE_NAVIGATOR InternalTrace(">" + (nav._currentnode == _currentnode)); #endif - return (nav._currentnode == _currentnode); - } + return (nav._currentnode == _currentnode); + } - /// - /// Moves to the same position as the specified HtmlNavigator. - /// - /// The HtmlNavigator positioned on the node that you want to move to. - /// true if successful, otherwise false. If false, the position of the navigator is unchanged. - public override bool MoveTo(XPathNavigator other) - { - HtmlNodeNavigator nav = other as HtmlNodeNavigator; - if (nav == null) - { + /// + /// Moves to the same position as the specified HtmlNavigator. + /// + /// The HtmlNavigator positioned on the node that you want to move to. + /// true if successful, otherwise false. If false, the position of the navigator is unchanged. + public override bool MoveTo(XPathNavigator other) + { + HtmlNodeNavigator nav = other as HtmlNodeNavigator; + if (nav == null) + { #if TRACE_NAVIGATOR InternalTrace(">false (nav is not an HtmlNodeNavigator)"); #endif - return false; - } + return false; + } #if TRACE_NAVIGATOR InternalTrace("moveto oid=" + nav.GetHashCode() @@ -600,294 +606,294 @@ public override bool MoveTo(XPathNavigator other) + ", a:" + nav._attindex); #endif - if (nav._doc == _doc) - { - _currentnode = nav._currentnode; - _attindex = nav._attindex; + if (nav._doc == _doc) + { + _currentnode = nav._currentnode; + _attindex = nav._attindex; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - // we don't know how to handle that + // we don't know how to handle that #if TRACE_NAVIGATOR InternalTrace(">false (???)"); #endif - return false; - } + return false; + } - /// - /// Moves to the HTML attribute with matching LocalName and NamespaceURI. - /// - /// The local name of the HTML attribute. - /// The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation. - /// true if the HTML attribute is found, otherwise, false. If false, the position of the navigator does not change. - public override bool MoveToAttribute(string localName, string namespaceURI) - { + /// + /// Moves to the HTML attribute with matching LocalName and NamespaceURI. + /// + /// The local name of the HTML attribute. + /// The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation. + /// true if the HTML attribute is found, otherwise, false. If false, the position of the navigator does not change. + public override bool MoveToAttribute(string localName, string namespaceURI) + { #if TRACE_NAVIGATOR InternalTrace("localName=" + localName + ", namespaceURI=" + namespaceURI); #endif - int index = _currentnode.Attributes.GetAttributeIndex(localName); - if (index == -1) - { + int index = _currentnode.Attributes.GetAttributeIndex(localName); + if (index == -1) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _attindex = index; + _attindex = index; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the first sibling of the current node. - /// - /// true if the navigator is successful moving to the first sibling node, false if there is no first sibling or if the navigator is currently positioned on an attribute node. - public override bool MoveToFirst() - { - if (_currentnode.ParentNode == null) - { + /// + /// Moves to the first sibling of the current node. + /// + /// true if the navigator is successful moving to the first sibling node, false if there is no first sibling or if the navigator is currently positioned on an attribute node. + public override bool MoveToFirst() + { + if (_currentnode.ParentNode == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - if (_currentnode.ParentNode.FirstChild == null) - { + if (_currentnode.ParentNode.FirstChild == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _currentnode = _currentnode.ParentNode.FirstChild; + _currentnode = _currentnode.ParentNode.FirstChild; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the first HTML attribute. - /// - /// true if the navigator is successful moving to the first HTML attribute, otherwise, false. - public override bool MoveToFirstAttribute() - { - if (!HasAttributes) - { + /// + /// Moves to the first HTML attribute. + /// + /// true if the navigator is successful moving to the first HTML attribute, otherwise, false. + public override bool MoveToFirstAttribute() + { + if (!HasAttributes) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _attindex = 0; + _attindex = 0; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the first child of the current node. - /// - /// true if there is a first child node, otherwise false. - public override bool MoveToFirstChild() - { - if (!_currentnode.HasChildNodes) - { + /// + /// Moves to the first child of the current node. + /// + /// true if there is a first child node, otherwise false. + public override bool MoveToFirstChild() + { + if (!_currentnode.HasChildNodes) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _currentnode = _currentnode.ChildNodes[0]; + _currentnode = _currentnode.ChildNodes[0]; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves the XPathNavigator to the first namespace node of the current element. - /// Always returns false for the HtmlNavigator implementation. - /// - /// An XPathNamespaceScope value describing the namespace scope. - /// Always returns false for the HtmlNavigator implementation. - public override bool MoveToFirstNamespace(XPathNamespaceScope scope) - { + /// + /// Moves the XPathNavigator to the first namespace node of the current element. + /// Always returns false for the HtmlNavigator implementation. + /// + /// An XPathNamespaceScope value describing the namespace scope. + /// Always returns false for the HtmlNavigator implementation. + public override bool MoveToFirstNamespace(XPathNamespaceScope scope) + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return false; - } + return false; + } - /// - /// Moves to the node that has an attribute of type ID whose value matches the specified string. - /// - /// A string representing the ID value of the node to which you want to move. This argument does not need to be atomized. - /// true if the move was successful, otherwise false. If false, the position of the navigator is unchanged. - public override bool MoveToId(string id) - { + /// + /// Moves to the node that has an attribute of type ID whose value matches the specified string. + /// + /// A string representing the ID value of the node to which you want to move. This argument does not need to be atomized. + /// true if the move was successful, otherwise false. If false, the position of the navigator is unchanged. + public override bool MoveToId(string id) + { #if TRACE_NAVIGATOR InternalTrace("id=" + id); #endif - HtmlNode node = _doc.GetElementbyId(id); - if (node == null) - { + HtmlNode node = _doc.GetElementbyId(id); + if (node == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _currentnode = node; + _currentnode = node; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves the XPathNavigator to the namespace node with the specified local name. - /// Always returns false for the HtmlNavigator implementation. - /// - /// The local name of the namespace node. - /// Always returns false for the HtmlNavigator implementation. - public override bool MoveToNamespace(string name) - { + /// + /// Moves the XPathNavigator to the namespace node with the specified local name. + /// Always returns false for the HtmlNavigator implementation. + /// + /// The local name of the namespace node. + /// Always returns false for the HtmlNavigator implementation. + public override bool MoveToNamespace(string name) + { #if TRACE_NAVIGATOR InternalTrace("name=" + name); #endif - return false; - } + return false; + } - /// - /// Moves to the next sibling of the current node. - /// - /// true if the navigator is successful moving to the next sibling node, false if there are no more siblings or if the navigator is currently positioned on an attribute node. If false, the position of the navigator is unchanged. - public override bool MoveToNext() - { - if (_currentnode.NextSibling == null) - { + /// + /// Moves to the next sibling of the current node. + /// + /// true if the navigator is successful moving to the next sibling node, false if there are no more siblings or if the navigator is currently positioned on an attribute node. If false, the position of the navigator is unchanged. + public override bool MoveToNext() + { + if (_currentnode.NextSibling == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } #if TRACE_NAVIGATOR InternalTrace("_c=" + _currentnode.CloneNode(false).OuterHtml); InternalTrace("_n=" + _currentnode.NextSibling.CloneNode(false).OuterHtml); #endif - _currentnode = _currentnode.NextSibling; + _currentnode = _currentnode.NextSibling; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the next HTML attribute. - /// - /// - public override bool MoveToNextAttribute() - { + /// + /// Moves to the next HTML attribute. + /// + /// + public override bool MoveToNextAttribute() + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - if (_attindex >= (_currentnode.Attributes.Count - 1)) - { + if (_attindex >= (_currentnode.Attributes.Count - 1)) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _attindex++; + _attindex++; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves the XPathNavigator to the next namespace node. - /// Always returns falsefor the HtmlNavigator implementation. - /// - /// An XPathNamespaceScope value describing the namespace scope. - /// Always returns false for the HtmlNavigator implementation. - public override bool MoveToNextNamespace(XPathNamespaceScope scope) - { + /// + /// Moves the XPathNavigator to the next namespace node. + /// Always returns falsefor the HtmlNavigator implementation. + /// + /// An XPathNamespaceScope value describing the namespace scope. + /// Always returns false for the HtmlNavigator implementation. + public override bool MoveToNextNamespace(XPathNamespaceScope scope) + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - return false; - } + return false; + } - /// - /// Moves to the parent of the current node. - /// - /// true if there is a parent node, otherwise false. - public override bool MoveToParent() - { - if (_currentnode.ParentNode == null) - { + /// + /// Moves to the parent of the current node. + /// + /// true if there is a parent node, otherwise false. + public override bool MoveToParent() + { + if (_currentnode.ParentNode == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _currentnode = _currentnode.ParentNode; + _currentnode = _currentnode.ParentNode; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the previous sibling of the current node. - /// - /// true if the navigator is successful moving to the previous sibling node, false if there is no previous sibling or if the navigator is currently positioned on an attribute node. - public override bool MoveToPrevious() - { - if (_currentnode.PreviousSibling == null) - { + /// + /// Moves to the previous sibling of the current node. + /// + /// true if the navigator is successful moving to the previous sibling node, false if there is no previous sibling or if the navigator is currently positioned on an attribute node. + public override bool MoveToPrevious() + { + if (_currentnode.PreviousSibling == null) + { #if TRACE_NAVIGATOR InternalTrace(">false"); #endif - return false; - } + return false; + } - _currentnode = _currentnode.PreviousSibling; + _currentnode = _currentnode.PreviousSibling; #if TRACE_NAVIGATOR InternalTrace(">true"); #endif - return true; - } + return true; + } - /// - /// Moves to the root node to which the current node belongs. - /// - public override void MoveToRoot() - { - _currentnode = _doc.DocumentNode; + /// + /// Moves to the root node to which the current node belongs. + /// + public override void MoveToRoot() + { + _currentnode = _doc.DocumentNode; #if TRACE_NAVIGATOR InternalTrace(null); #endif - } + } - #endregion + #endregion - #region Internal Methods + #region Internal Methods #if TRACE_NAVIGATOR [Conditional("TRACE")] internal void InternalTrace(object traceValue) @@ -934,20 +940,20 @@ internal void InternalTrace(object traceValue) HtmlAgilityPack.Trace.WriteLine(string.Format("oid={0},n={1},a={2},v={3},{4}", GetHashCode(), nodename, _attindex, nodevalue, traceValue), "N!" + name); } #endif - #endregion + #endregion - #region Private Methods + #region Private Methods - private void Reset() - { + private void Reset() + { #if TRACE_NAVIGATOR InternalTrace(null); #endif - _currentnode = _doc.DocumentNode; - _attindex = -1; - } + _currentnode = _doc.DocumentNode; + _attindex = -1; + } -#endregion - } + #endregion + } } #endif \ No newline at end of file diff --git a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlDocumentTests.cs b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlDocumentTests.cs index 8a61559..6adb6ef 100644 --- a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlDocumentTests.cs +++ b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlDocumentTests.cs @@ -33,7 +33,21 @@ private HtmlDocument GetMshomeDocument() return doc; } + [Test] + public void SelectEventAttributesTest() + { + String xpath = "//* [@onkeypress or @onkeydown or @onkeyup or @onclick or @ondblclick or @onmousedown or @onmouseup or @onmouseover or @onmousemove or @onmouseout or @onmouseenter or @onmouseleave or @onmousewheel or @oncontextmenu or @onabort or @onbeforeunload or @onerror or @onload or @onmove or @onresize or @onscroll or @onstop or @onunload or @onreset or @onsubmit or @onblur or @onchange or @onfocus or @onfocusin or @onfocusout or @oninput or @onbeforeactivate or @onactivate or @onbefordeactivate or @ondeactivate or @onbounce or @onfinish or @onstart or @onbeforecopy or @onbeforecut or @onbeforeeditfocus or @onbeforepaste or @onbeforeupdate or @oncopy or @oncut or @ondrag or @ondragdrop or @ondragend or @ondragenter or @ondragleave or @ondragover or @ondragstart or @ondrop or @onlosecapture or @onpaste or @onselect or @onselectstart or @oncontrolselect or @onmovestart or @onmoveend or @onafterupdate or @oncellchange or @ondataavailable or @ondatasetchanged or @ondatasetcomplete or @onerrorupdate or @onrowenter or @onrowexit or @onrowsdelete or @onrowsinserted or @onafterprint or @onbeforeprint or @onfilterchange or @onhelp or @onpropertychange or @onreadystatechange]"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(@"
some text
"); + for (int i = 0; i < 100000; i++) + { + doc.DocumentNode.SelectNodes(xpath).ToList(); + } + } + + + [Test] public void HtmlAgilityPack_AttributeCollectionBug() { { @@ -81,6 +95,31 @@ public void HtmlAgilityPack_AttributeCollectionBug() } } +// [Test] +// public void spanTest() +// { +// // fix has been cancelled for now +// const string test = @" +// +// +//

Foo

+//

Bar

+//"; +// HtmlDocument doc = new HtmlDocument(); +// doc.LoadHtml(test); + +// const string expected = @" +// +// +//

Foo

+//

Bar

+//
+//"; + + +// Assert.AreEqual(expected, doc.DocumentNode.OuterHtml); ; +// } + [Test] public void TextInsideScriptTagShouldHaveCorrectStreamPosition() { @@ -90,7 +129,7 @@ public void TextInsideScriptTagShouldHaveCorrectStreamPosition() var scraptText = document.DocumentNode.FirstChild.FirstChild; Assert.AreEqual(8, scraptText.StreamPosition); Assert.AreEqual(1, scraptText.Line); - Assert.AreEqual(9, scraptText.LinePosition); + Assert.AreEqual(8, scraptText.LinePosition); } { var document = new HtmlDocument(); @@ -98,7 +137,7 @@ public void TextInsideScriptTagShouldHaveCorrectStreamPosition() var scriptText = document.DocumentNode.FirstChild.FirstChild; Assert.AreEqual(8, scriptText.StreamPosition); Assert.AreEqual(1, scriptText.Line); - Assert.AreEqual(9, scriptText.LinePosition); + Assert.AreEqual(8, scriptText.LinePosition); } { var document = new HtmlAgilityPack.HtmlDocument(); @@ -108,7 +147,7 @@ public void TextInsideScriptTagShouldHaveCorrectStreamPosition() // var aa = scraptText.FirstChild; Assert.AreEqual(10, scraptText.StreamPosition); Assert.AreEqual(2, scraptText.Line); - Assert.AreEqual(9, scraptText.LinePosition); + Assert.AreEqual(8, scraptText.LinePosition); } @@ -119,7 +158,7 @@ public void TextInsideScriptTagShouldHaveCorrectStreamPosition() var scriptText = document.DocumentNode.LastChild.FirstChild; Assert.AreEqual(10, scriptText.StreamPosition); Assert.AreEqual(2, scriptText.Line); - Assert.AreEqual(9, scriptText.LinePosition); + Assert.AreEqual(8, scriptText.LinePosition); } } @@ -469,24 +508,24 @@ public void TestDetectEncoding() Assert.AreEqual(System.Text.Encoding.UTF8, encoding); } - [Test] - public void TestLoadWithCache() - { - var dir = _contentDirectory + "cache"; - Directory.CreateDirectory(dir); + //[Test] + //public void TestLoadWithCache() + //{ + // var dir = _contentDirectory + "cache"; + // Directory.CreateDirectory(dir); - var web = new HtmlAgilityPack.HtmlWeb() - { - CachePath = dir, - UsingCache = true - }; + // var web = new HtmlAgilityPack.HtmlWeb() + // { + // CachePath = dir, + // UsingCache = true + // }; - var url = "http://html-agility-pack.net/"; - var docCache = web.Load(url); + // var url = "http://html-agility-pack.net/"; + // var docCache = web.Load(url); - var docLoad = new HtmlAgilityPack.HtmlWeb().Load(url); - Assert.AreEqual(docLoad.DocumentNode.OuterHtml, docCache.DocumentNode.OuterHtml); - } + // var docLoad = new HtmlAgilityPack.HtmlWeb().Load(url); + // Assert.AreEqual(docLoad.DocumentNode.OuterHtml, docCache.DocumentNode.OuterHtml); + //} [Test] public void OuterHtmlHasBeenCalled_RemoveCalled_SubsequentOuterHtmlCallsAreBroken() @@ -824,7 +863,7 @@ public void ChildIsRemovedFromParent() public void GetEncapsulatedData() { HtmlWeb stackoverflowSite = new HtmlWeb(); - HtmlDocument htmlDocument = stackoverflowSite.Load("https://stackoverflow.com/"); + HtmlDocument htmlDocument = stackoverflowSite.Load("https://stackoverflow.com/?tab=interesting"); StackOverflowPage stackOverflowPage = htmlDocument.DocumentNode.GetEncapsulatedData(); IEnumerable filtered = stackOverflowPage.Questions.OrderByDescending(new Func(x => x.Statistics.Votes)); @@ -899,6 +938,30 @@ public void SanitizeXmlElementNameWithColon() Assert.AreEqual(expected, xmlDoc); } + [Test] + public void HasClass_WhereClassWithWhitespacePassed_ShouldReturnTrue() + { + var input = @""; + var htmlDoc = new HtmlDocument(); + + htmlDoc.LoadHtml(input); + + var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); + Assert.True(aTag.HasClass("disabled")); + } + + [Test] + public void GetClasses_WhereClassWithWhitespacePassed_ShouldNotBeEmpty() + { + var input = @""; + var htmlDoc = new HtmlDocument(); + + htmlDoc.LoadHtml(input); + + var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); + Assert.IsNotEmpty(aTag.GetClasses()); + } + [Test] public void DoesNotSanitizeXmlElementNameWithColonWhenConfiguredToPreserveXmlNamespaces() { @@ -922,27 +985,225 @@ public void DoesNotSanitizeXmlElementNameWithColonWhenConfiguredToPreserveXmlNam } [Test] - public void HasClass_WhereClassWithWhitespacePassed_ShouldReturnTrue() + public void ChangesToPHandling() { - var input = @""; - var htmlDoc = new HtmlDocument(); + var input = "

Begin

Inner
End

"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(input); + var docNode = doc.DocumentNode; + + // Check tree is correct + + Assert.AreEqual(3, docNode.ChildNodes.Count); + Assert.AreEqual("p", docNode.ChildNodes[0].Name); + Assert.AreEqual("div", docNode.ChildNodes[1].Name); + Assert.AreEqual("#text", docNode.ChildNodes[2].Name); + Assert.AreEqual("Begin", docNode.ChildNodes[0].InnerText); + Assert.AreEqual("Inner", docNode.ChildNodes[1].InnerText); + Assert.AreEqual("End", docNode.ChildNodes[2].InnerText); + } - htmlDoc.LoadHtml(input); + [Test] + public void ChangesToPHandlingFalse() + { + HtmlDocument.DisableBehaviorTagP = false; - var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); - Assert.True(aTag.HasClass("disabled")); + var input = "

Begin

Inner
End

"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(input); + var docNode = doc.DocumentNode; + + try + { + // Check tree is correct + Assert.AreEqual(1, docNode.ChildNodes.Count); + Assert.AreEqual("p", docNode.ChildNodes[0].Name); + + var pRootNode = docNode.ChildNodes[0]; + Assert.AreEqual(3, pRootNode.ChildNodes.Count); + Assert.AreEqual("Begin", pRootNode.ChildNodes[0].InnerText); + Assert.AreEqual("Inner", pRootNode.ChildNodes[1].InnerText); + Assert.AreEqual("End", pRootNode.ChildNodes[2].InnerText); + } + catch (Exception e) + { + throw; + } + finally + { + HtmlDocument.DisableBehaviorTagP = true; + } } [Test] - public void GetClasses_WhereClassWithWhitespacePassed_ShouldNotBeEmpty() + public void AttributeValue() { - var input = @""; - var htmlDoc = new HtmlDocument(); + { + Assert.AreEqual(""'", WebUtility.HtmlEncode("\"'")); + } + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = true; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; - htmlDoc.LoadHtml(input); + divNode.Attributes.Add("name", "value1value2"); + Assert.AreEqual("value1value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = true; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1\"value2"); + Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = true; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1"value2"); + Assert.AreEqual("value1"value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = true; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1'value2"); + divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; + Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = true; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1'value2"); + divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; + Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = false; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1value2"); + Assert.AreEqual("value1value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = false; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1\"value2"); + Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = false; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1"value2"); + Assert.AreEqual("value1"value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = false; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1'value2"); + divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; + Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + { + var input = "
z
"; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.BackwardCompatibility = false; + doc.LoadHtml(input); + var divNode = doc.DocumentNode.ChildNodes[0]; + + divNode.Attributes.Add("name", "value1'value2"); + divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; + Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); + Assert.AreEqual("
z
", doc.DocumentNode.InnerHtml); + } + + } + + [Test] + public void AttributeSerialization() + { + HtmlDocument doc1 = new HtmlDocument() + { + OptionEmptyCollection = true, + OptionAutoCloseOnEnd = true, + OptionWriteEmptyNodes = true, + // Disable backward compatibility to enable automatic html decoding + BackwardCompatibility = false + }; + + HtmlDocument doc2 = new HtmlDocument() + { + OptionEmptyCollection = true, + OptionAutoCloseOnEnd = true, + OptionWriteEmptyNodes = true, + // Disable backward compatibility to enable automatic html decoding + BackwardCompatibility = false + }; + + + + string html = @" + + +Hello Button: + +"; + doc1.LoadHtml(html); + int beforeSerializtionSrcElementsCount = doc1.DocumentNode.SelectNodes("//* [@src]").Count; + StringWriter stringWriter = new StringWriter(); + doc1.Save(stringWriter); + String htmlAfterSerialization = stringWriter.ToString(); + doc2.LoadHtml(htmlAfterSerialization); + Assert.AreEqual(beforeSerializtionSrcElementsCount, doc2.DocumentNode.SelectNodes("//* [@src]").Count); - var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); - Assert.IsNotEmpty(aTag.GetClasses()); } [HasXPath] diff --git a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlNode.Tests.cs b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlNode.Tests.cs index f48faea..865403e 100644 --- a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlNode.Tests.cs +++ b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlNode.Tests.cs @@ -1,7 +1,9 @@ using System; using System.IO; using System.Linq; +using Microsoft.VisualStudio.TestTools.UnitTesting; using NUnit.Framework; +using Assert = NUnit.Framework.Assert; namespace HtmlAgilityPack.Tests { @@ -24,6 +26,63 @@ public class HtmlNode2 // var result = writer.GetStringBuilder().ToString(); // Assert.AreEqual(" AttributeIsThis=\"val\"", result); //} + + [Test] + public void ScriptingText() + { + var html = @" + + + SEE title + + + + + +
222 +
+ +"; + + { + HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); + htmlDocument.LoadHtml(html); + + var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText; + var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText; + var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText; + var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText; + var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText; + var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText; + + Assert.AreEqual("\r\n SEE title\r\n\tSEE script \r\n\tSEE style\r\n", content1); + Assert.AreEqual("SEE script ", content2); + Assert.AreEqual("SEE style", content3); + Assert.AreEqual("\r\n\r\n222\r\n\r\n", content4); + Assert.AreEqual("\r\n\r\n SEE title\r\n\t\r\n\t\r\n\r\n\r\n\r\n222\r\n\r\n\r\n", content5); + Assert.AreEqual("NOTSEE script", content6); + } + + { + HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); + htmlDocument.BackwardCompatibility = false; + htmlDocument.LoadHtml(html); + + var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText; + var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText; + var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText; + var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText; + var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText; + var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText; + + Assert.AreEqual(" SEE titleSEE script SEE style", content1); + Assert.AreEqual("SEE script ", content2); + Assert.AreEqual("SEE style", content3); + Assert.AreEqual("222", content4); + Assert.AreEqual(" SEE title222", content5); + Assert.AreEqual("NOTSEE script", content6); + } + } [Test] public void ReadNotCloseTag() @@ -100,5 +159,60 @@ public void Prepend_CheckOrder()
  • Foxtrot
  • "); } + + [Test] + public void OptionMaxNestedChildNodes_NotSet_IsNotEnforced() + { + var html = "
    1
    2
    3
    "; + var doc = new HtmlDocument(); + + doc.LoadHtml(html); + + Assert.IsNotNull(doc); + Assert.AreEqual(html, doc.Text); + } + + [Test] + public void OptionMaxNestedChildNodes_SetToZero_IsNotEnforced() + { + var html = "
    1
    2
    3
    "; + var doc = new HtmlDocument(); + doc.OptionMaxNestedChildNodes = 0; + + doc.LoadHtml(html); + + Assert.IsNotNull(doc); + Assert.AreEqual(html, doc.Text); + } + + [Test] + public void OptionMaxNestedChildNodes_WithinMax_NoException() + { + var html = "
    "; + var doc = new HtmlDocument(); + doc.OptionMaxNestedChildNodes = 8; + + doc.LoadHtml(html); + } + + [Test] + [ExpectedException(typeof(ApplicationException))] + public void OptionMaxNestedChildNodes_AbotMax() + { + var html = "
    "; + var doc = new HtmlDocument(); + doc.OptionMaxNestedChildNodes = 7; + string message = ""; + try + { + doc.LoadHtml(html); + } + catch (Exception e) + { + message = e.Message; + } + + Assert.AreEqual("Document has more than 7 nested tags. This is likely due to the page not closing tags properly.", message); + } } } \ No newline at end of file