diff --git a/src/HtmlAgilityPack.Shared/HtmlEntity.cs b/src/HtmlAgilityPack.Shared/HtmlEntity.cs index 5e56f8b..00f5dfd 100644 --- a/src/HtmlAgilityPack.Shared/HtmlEntity.cs +++ b/src/HtmlAgilityPack.Shared/HtmlEntity.cs @@ -8,6 +8,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.Globalization; using System.Text; namespace HtmlAgilityPack @@ -766,14 +767,6 @@ public static string Entitize(string text, bool useNames) /// If set to true, the [quote], [ampersand], [lower than] and [greather than] characters will be entitized. /// The result text public static string Entitize(string text, bool useNames, bool entitizeQuotAmpAndLtGt) -// _entityValue.Add("quot", 34); // quotation mark = APL quote, U+0022 ISOnum -// _entityName.Add(34, "quot"); -// _entityValue.Add("amp", 38); // ampersand, U+0026 ISOnum -// _entityName.Add(38, "amp"); -// _entityValue.Add("lt", 60); // less-than sign, U+003C ISOnum -// _entityName.Add(60, "lt"); -// _entityValue.Add("gt", 62); // greater-than sign, U+003E ISOnum -// _entityName.Add(62, "gt"); { if (text == null) return null; @@ -782,28 +775,10 @@ public static string Entitize(string text, bool useNames, bool entitizeQuotAmpAn return text; StringBuilder sb = new StringBuilder(text.Length); - for (int i = 0; i < text.Length; i++) + TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator(text); + while (enumerator.MoveNext()) { - int code = text[i]; - if ((code > 127) || - (entitizeQuotAmpAndLtGt && ((code == 34) || (code == 38) || (code == 60) || (code == 62)))) - { - string entity; - EntityName.TryGetValue(code, out entity); - - if ((entity == null) || (!useNames)) - { - sb.Append("&#" + code + ";"); - } - else - { - sb.Append("&" + entity + ";"); - } - } - else - { - sb.Append(text[i]); - } + sb.Append(System.Net.WebUtility.HtmlEncode(enumerator.GetTextElement())); } return sb.ToString(); diff --git a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlAgilityPack.Tests.Net45.csproj b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlAgilityPack.Tests.Net45.csproj index d08a886..15d099b 100644 --- a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlAgilityPack.Tests.Net45.csproj +++ b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlAgilityPack.Tests.Net45.csproj @@ -56,6 +56,7 @@ + diff --git a/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlEntityTests.cs b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlEntityTests.cs new file mode 100644 index 0000000..7a35f45 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests.Net45/HtmlEntityTests.cs @@ -0,0 +1,23 @@ +using NUnit.Framework; + +namespace HtmlAgilityPack.Tests.fx._4._5 +{ + public class HtmlEntityTests + { + [Test] + public void Entitize_PassEmojiUnicode_ShouldCorrectlyEntitize() + { + string result = HtmlEntity.Entitize("😂"); + + Assert.AreEqual("😂", result); + } + + [Test] + public void Entitize_PassSimpleText_ShouldCorrectlyEntitize() + { + string result = HtmlEntity.Entitize("qwerty"); + + Assert.AreEqual("qwerty", result); + } + } +} \ No newline at end of file