Skip to content

Commit

Permalink
Update shared
Browse files Browse the repository at this point in the history
Update shared
  • Loading branch information
JonathanMagnan committed Apr 18, 2019
1 parent 1d2c3a8 commit b3c6865
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 41 deletions.
66 changes: 34 additions & 32 deletions src/HtmlAgilityPack.Shared/HtmlDocument.cs
Expand Up @@ -194,11 +194,18 @@ public static bool DisableBehaviorTagP
/// </summary>
public bool OptionWriteEmptyNodes;

#endregion
/// <summary>
/// The max number of nested child nodes.
/// Added to prevent stackoverflow problem when a page has tens of thousands of opening html tags with no closing tags
/// </summary>
public int OptionMaxNestedChildNodes = 0;


#endregion

#region Static Members
#region Static Members

internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";
internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";

internal static readonly string HtmlExceptionUseIdAttributeFalse = "You need to set UseIdAttribute property to true to enable this feature";

Expand Down Expand Up @@ -1093,7 +1100,7 @@ private string CurrentNodeName()
private void DecrementPosition()
{
_index--;
if (_lineposition == 1)
if (_lineposition == 0)
{
_lineposition = _maxlineposition;
_line--;
Expand Down Expand Up @@ -1190,7 +1197,7 @@ private void IncrementPosition()
_maxlineposition = _lineposition;
if (_c == 10)
{
_lineposition = 1;
_lineposition = 0;
_line++;
}
else
Expand Down Expand Up @@ -1228,7 +1235,7 @@ private bool NewCheck()
break;

case ParseState.BetweenAttributes:
PushAttributeNameStart(_index - 1);
PushAttributeNameStart(_index - 1, _lineposition -1);
break;

case ParseState.WhichTag:
Expand All @@ -1255,7 +1262,7 @@ private bool NewCheck()
{
if (Text[_index] == '!')
{
PushNodeStart(HtmlNodeType.Comment, _index - 1);
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition -1);
PushNodeNameStart(true, _index);
PushNodeNameEnd(_index + 1);
_state = ParseState.Comment;
Expand All @@ -1276,7 +1283,7 @@ private bool NewCheck()
}
}

PushNodeStart(HtmlNodeType.Element, _index - 1);
PushNodeStart(HtmlNodeType.Element, _index - 1, _lineposition - 1);
return true;
}

Expand All @@ -1298,8 +1305,8 @@ private void Parse()
_fullcomment = false;
_parseerrors = new List<HtmlParseError>();
_line = 1;
_lineposition = 1;
_maxlineposition = 1;
_lineposition = 0;
_maxlineposition = 0;

_state = ParseState.Text;
_oldstate = _state;
Expand All @@ -1312,7 +1319,7 @@ private void Parse()
_currentattribute = null;

_index = 0;
PushNodeStart(HtmlNodeType.Text, 0);
PushNodeStart(HtmlNodeType.Text, 0, _lineposition);
while (_index < Text.Length)
{
_c = Text[_index];
Expand Down Expand Up @@ -1391,7 +1398,7 @@ private void Parse()
if (_state != ParseState.Tag)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
}

break;
Expand Down Expand Up @@ -1421,11 +1428,11 @@ private void Parse()
if (_state != ParseState.BetweenAttributes)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

PushAttributeNameStart(_index - 1);
PushAttributeNameStart(_index - 1, _lineposition -1);
_state = ParseState.AttributeName;
break;

Expand All @@ -1445,7 +1452,7 @@ private void Parse()
if (_state != ParseState.EmptyTag)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand Down Expand Up @@ -1496,7 +1503,7 @@ private void Parse()
if (_state != ParseState.AttributeName)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand All @@ -1520,7 +1527,7 @@ private void Parse()
if (_state != ParseState.AttributeBeforeEquals)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand Down Expand Up @@ -1562,7 +1569,7 @@ private void Parse()
if (_state != ParseState.AttributeAfterEquals)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand Down Expand Up @@ -1594,7 +1601,7 @@ private void Parse()
if (_state != ParseState.AttributeValue)
continue;
_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand Down Expand Up @@ -1645,7 +1652,7 @@ private void Parse()
}

_state = ParseState.Text;
PushNodeStart(HtmlNodeType.Text, _index);
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
continue;
}

Expand Down Expand Up @@ -1706,11 +1713,11 @@ private void Parse()
script._outerlength = _index - 1 - script._outerstartindex;
script._streamposition = script._outerstartindex;
script._line = _currentnode.Line;
script._lineposition = _currentnode.LinePosition + _currentnode._namelength + 2;
_currentnode.AppendChild(script);
script._lineposition = _currentnode.LinePosition + _currentnode._namelength + 2;
_currentnode.AppendChild(script);


PushNodeStart(HtmlNodeType.Element, _index - 1);
PushNodeStart(HtmlNodeType.Element, _index - 1, _lineposition -1);
PushNodeNameStart(false, _index - 1 + 2);
_state = ParseState.Tag;
IncrementPosition();
Expand Down Expand Up @@ -1743,12 +1750,12 @@ private void PushAttributeNameEnd(int index)
_currentnode.Attributes.Append(_currentattribute);
}

private void PushAttributeNameStart(int index)
private void PushAttributeNameStart(int index, int lineposition)
{
_currentattribute = CreateAttribute();
_currentattribute._namestartindex = index;
_currentattribute.Line = _line;
_currentattribute._lineposition = _lineposition;
_currentattribute._lineposition = lineposition;
_currentattribute._streamposition = index;
}

Expand Down Expand Up @@ -2056,16 +2063,11 @@ private void PushNodeNameStart(bool starttag, int index)
_currentnode._namestartindex = index;
}

private void PushNodeStart(HtmlNodeType type, int index)
private void PushNodeStart(HtmlNodeType type, int index, int lineposition)
{
_currentnode = CreateNode(type, index);
_currentnode._line = _line;
_currentnode._lineposition = _lineposition;
if (type == HtmlNodeType.Element)
{
_currentnode._lineposition--;
}

_currentnode._lineposition = lineposition;
_currentnode._streamposition = index;
}

Expand Down
23 changes: 23 additions & 0 deletions src/HtmlAgilityPack.Shared/HtmlNode.cs
Expand Up @@ -618,6 +618,12 @@ public string XPath
}
}


/// <summary>
/// The depth of the node relative to the opening root html element. This value is used to determine if a document has to many nested html nodes which can cause stack overflows
/// </summary>
public int Depth { get; set; }

#endregion

#region Public Methods
Expand Down Expand Up @@ -1859,6 +1865,23 @@ public string WriteTo()
}
}

/// <summary>
/// Sets the parent Html node and properly determines the current node's depth using the parent node's depth.
/// </summary>
public void SetParent(HtmlNode parent)
{
if (parent == null)
return;

ParentNode = parent;
if (OwnerDocument.OptionMaxNestedChildNodes > 0)
{
Depth = parent.Depth + 1;
if (Depth > OwnerDocument.OptionMaxNestedChildNodes)
throw new Exception(string.Format("Document has more than {0} nested tags. This is likely due to the page not closing tags properly.", OwnerDocument.OptionMaxNestedChildNodes));
}
}

#endregion

#region Internal Methods
Expand Down
18 changes: 9 additions & 9 deletions src/HtmlAgilityPack.Shared/HtmlNodeCollection.cs
Expand Up @@ -220,9 +220,9 @@ public void Insert(int index, HtmlNode node)
if (next == node)
throw new InvalidProgramException("Unexpected error.");

node._nextnode = next;
node._parentnode = _parentnode;
}
node._nextnode = next;
node.SetParent(_parentnode);
}

/// <summary>
/// Remove node
Expand Down Expand Up @@ -315,8 +315,8 @@ public void Append(HtmlNode node)
_items.Add(node);
node._prevnode = last;
node._nextnode = null;
node._parentnode = _parentnode;
if (last == null) return;
node.SetParent(_parentnode);
if (last == null) return;
if (last == node)
throw new InvalidProgramException("Unexpected error.");

Expand Down Expand Up @@ -363,9 +363,9 @@ public void Prepend(HtmlNode node)
throw new InvalidProgramException("Unexpected error.");
node._nextnode = first;
node._prevnode = null;
node._parentnode = _parentnode;
node.SetParent(_parentnode);

if (first != null)
if (first != null)
first._prevnode = node;
}

Expand Down Expand Up @@ -415,9 +415,9 @@ public void Replace(int index, HtmlNode node)
throw new InvalidProgramException("Unexpected error.");

node._nextnode = next;
node._parentnode = _parentnode;
node.SetParent(_parentnode);

oldnode._prevnode = null;
oldnode._prevnode = null;
oldnode._nextnode = null;
oldnode._parentnode = null;
}
Expand Down

0 comments on commit b3c6865

Please sign in to comment.