diff --git a/src/HtmlAgilityPack.Shared/HtmlAttribute.cs b/src/HtmlAgilityPack.Shared/HtmlAttribute.cs
index 6701b7c..0daa3be 100644
--- a/src/HtmlAgilityPack.Shared/HtmlAttribute.cs
+++ b/src/HtmlAgilityPack.Shared/HtmlAttribute.cs
@@ -83,10 +83,12 @@ public int ValueLength
get { return _valuelength; }
}
- ///
- /// Gets the qualified name of the attribute.
- ///
- public string Name
+ public bool UseOriginalName { get; set; } = false;
+
+ ///
+ /// Gets the qualified name of the attribute.
+ ///
+ public string Name
{
get
{
@@ -95,8 +97,8 @@ public string Name
_name = _ownerdocument.Text.Substring(_namestartindex, _namelength);
}
- return _name.ToLowerInvariant();
- }
+ return UseOriginalName ? _name : _name.ToLowerInvariant();
+ }
set
{
if (value == null)
diff --git a/src/HtmlAgilityPack.Shared/HtmlDocument.cs b/src/HtmlAgilityPack.Shared/HtmlDocument.cs
index cd56d09..4c51fea 100644
--- a/src/HtmlAgilityPack.Shared/HtmlDocument.cs
+++ b/src/HtmlAgilityPack.Shared/HtmlDocument.cs
@@ -60,6 +60,7 @@ public partial class HtmlDocument
private int _remainderOffset;
private ParseState _state;
private Encoding _streamencoding;
+ private bool _useHtmlEncodingForStream;
/// The HtmlDocument Text. Careful if you modify it.
public string Text;
@@ -313,7 +314,20 @@ public static string GetXmlName(string name)
return GetXmlName(name, false, false);
}
- public static string GetXmlName(string name, bool isAttribute, bool preserveXmlNamespaces)
+#if !METRO
+ public void UseAttributeOriginalName(string tagName)
+ {
+ foreach (var nod in this.DocumentNode.SelectNodes("//" + tagName))
+ {
+ foreach (var attribut in nod.Attributes)
+ {
+ attribut.UseOriginalName = true;
+ }
+ }
+ }
+#endif
+
+ public static string GetXmlName(string name, bool isAttribute, bool preserveXmlNamespaces)
{
string xmlname = string.Empty;
bool nameisok = true;
@@ -498,6 +512,19 @@ public HtmlTextNode CreateTextNode(string text)
/// The detected encoding.
public Encoding DetectEncoding(Stream stream)
{
+ return DetectEncoding(stream, false);
+ }
+
+ ///
+ /// Detects the encoding of an HTML stream.
+ ///
+ /// The input stream. May not be null.
+ /// The html is checked.
+ /// The detected encoding.
+ public Encoding DetectEncoding(Stream stream, bool checkHtml)
+ {
+ _useHtmlEncodingForStream = checkHtml;
+
if (stream == null)
{
throw new ArgumentNullException("stream");
@@ -539,7 +566,7 @@ public Encoding DetectEncoding(TextReader reader)
}
StreamReader sr = reader as StreamReader;
- if (sr != null)
+ if (sr != null && !_useHtmlEncodingForStream)
{
Text = sr.ReadToEnd();
_streamencoding = sr.CurrentEncoding;
@@ -565,7 +592,7 @@ public Encoding DetectEncoding(TextReader reader)
return _streamencoding;
}
-
+
///
/// Detects the encoding of an HTML text.
///
@@ -1719,15 +1746,24 @@ private void CloseParentImplicitExplicitNode()
{
hasNodeToClose = false;
+ bool forceExplicitEnd = false;
+
// CHECK if parent must be implicitely closed
if (IsParentImplicitEnd())
{
- CloseParentImplicitEnd();
- hasNodeToClose = true;
+ if (OptionOutputAsXml)
+ {
+ forceExplicitEnd = true;
+ }
+ else
+ {
+ CloseParentImplicitEnd();
+ hasNodeToClose = true;
+ }
}
// CHECK if parent must be explicitely closed
- if (IsParentExplicitEnd())
+ if (forceExplicitEnd || IsParentExplicitEnd())
{
CloseParentExplicitEnd();
hasNodeToClose = true;
diff --git a/src/HtmlAgilityPack.Shared/HtmlNode.cs b/src/HtmlAgilityPack.Shared/HtmlNode.cs
index 94b3022..bdbd77d 100644
--- a/src/HtmlAgilityPack.Shared/HtmlNode.cs
+++ b/src/HtmlAgilityPack.Shared/HtmlNode.cs
@@ -1744,7 +1744,7 @@ public virtual void WriteTo(TextWriter outText, int level = 0)
else
WriteContentTo(outText, level);
- if (!_isImplicitEnd)
+ if (_ownerdocument.OptionOutputAsXml || !_isImplicitEnd)
{
outText.Write("" + name);
if (!_ownerdocument.OptionOutputAsXml)
diff --git a/src/HtmlAgilityPack.Shared/HtmlNodeCollection.cs b/src/HtmlAgilityPack.Shared/HtmlNodeCollection.cs
index c3df3c2..ca194ed 100644
--- a/src/HtmlAgilityPack.Shared/HtmlNodeCollection.cs
+++ b/src/HtmlAgilityPack.Shared/HtmlNodeCollection.cs
@@ -291,7 +291,7 @@ public static HtmlNode FindFirst(HtmlNodeCollection items, string name)
{
foreach (HtmlNode node in items)
{
- if (node.Name.IndexOf(name, StringComparison.OrdinalIgnoreCase) != -1)
+ if (node.Name.Equals(name, StringComparison.OrdinalIgnoreCase))
return node;
if (!node.HasChildNodes) continue;
HtmlNode returnNode = FindFirst(node.ChildNodes, name);
diff --git a/src/HtmlAgilityPack.Shared/HtmlWeb.cs b/src/HtmlAgilityPack.Shared/HtmlWeb.cs
index 846003e..61f643b 100644
--- a/src/HtmlAgilityPack.Shared/HtmlWeb.cs
+++ b/src/HtmlAgilityPack.Shared/HtmlWeb.cs
@@ -1167,20 +1167,30 @@ public string GetCachePath(Uri uri)
throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
}
- string cachePath;
+ string cachePath;
if (uri.AbsolutePath == "/")
{
cachePath = Path.Combine(_cachePath, ".htm");
}
else
{
- if (uri.AbsolutePath[uri.AbsolutePath.Length - 1] == Path.AltDirectorySeparatorChar)
+
+ string absolutePathWithoutBadChar = uri.AbsolutePath;
+
+ string invalid = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars());
+
+ foreach (char c in invalid)
+ {
+ absolutePathWithoutBadChar = absolutePathWithoutBadChar.Replace(c.ToString(), "");
+ }
+
+ if (uri.AbsolutePath[uri.AbsolutePath.Length - 1] == Path.AltDirectorySeparatorChar)
{
- cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath.TrimEnd(Path.AltDirectorySeparatorChar)).Replace('/', '\\') + ".htm");
+ cachePath = Path.Combine(_cachePath, (uri.Host + absolutePathWithoutBadChar.TrimEnd(Path.AltDirectorySeparatorChar)).Replace('/', '\\') + ".htm");
}
else
{
- cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath.Replace('/', '\\')));
+ cachePath = Path.Combine(_cachePath, (uri.Host + absolutePathWithoutBadChar.Replace('/', '\\')));
}
}