Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle not supported content encoding #327

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/HtmlAgilityPack.Net35/HtmlAgilityPack.Net35.csproj
Expand Up @@ -18,7 +18,7 @@
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DefineConstants>DEBUG;TRACE;FX35</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
Expand Down
Expand Up @@ -44,7 +44,7 @@
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DefineConstants>DEBUG;TRACE;FX40</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<DocumentationFile>bin\Debug\HtmlAgilityPack.XML</DocumentationFile>
Expand Down
1 change: 1 addition & 0 deletions src/HtmlAgilityPack.Net40/Properties/AssemblyInfo.cs
Expand Up @@ -18,6 +18,7 @@
#endif
#endif
[assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("ZZZ Projects Inc.")]
[assembly: AssemblyProduct("Html Agility Pack")]
Expand Down
1 change: 1 addition & 0 deletions src/HtmlAgilityPack.Net45/Properties/AssemblyInfo.cs
Expand Up @@ -18,6 +18,7 @@
#endif
#endif
[assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("ZZZ Projects Inc.")]
[assembly: AssemblyProduct("Html Agility Pack")]
Expand Down
6 changes: 5 additions & 1 deletion src/HtmlAgilityPack.NetStandard2_0/AssemblyInfo.cs
@@ -1,3 +1,7 @@
using System;
using System.Runtime.CompilerServices;

[assembly: CLSCompliant(true)]
[assembly: CLSCompliant(true)]

[assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")]
39 changes: 39 additions & 0 deletions src/HtmlAgilityPack.Shared/EncodingNotSupportedException.cs
@@ -0,0 +1,39 @@
// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright � ZZZ Projects Inc. 2014 - 2017. All rights reserved.

using System;
using System.Text;

namespace HtmlAgilityPack
{
public class EncodingNotSupportedException : Exception
{
#region Fields

private string _encoding;

#endregion

#region Constructors

internal EncodingNotSupportedException(string encoding)
{
_encoding = encoding;
}

#endregion

#region Properties

public string Encoding
{
get { return _encoding; }
}

#endregion
}
}
7 changes: 7 additions & 0 deletions src/HtmlAgilityPack.Shared/HtmlAgilityPack.Shared.projitems
Expand Up @@ -17,6 +17,7 @@
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildThisFileDirectory)crc32.cs" />
<Compile Include="$(MSBuildThisFileDirectory)EncodingNotSupportedException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)EncodingFoundException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)HtmlAttribute.cs" />
<Compile Include="$(MSBuildThisFileDirectory)HtmlAttributeCollection.cs" />
Expand Down Expand Up @@ -57,5 +58,11 @@
<Compile Include="$(MSBuildThisFileDirectory)Trace.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Trace.FullFramework.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities.cs" />
<Compile Include="$(MSBuildThisFileDirectory)IHttpWebRequest.cs" />
<Compile Include="$(MSBuildThisFileDirectory)HttpWebRequestWrapper.cs" />
<Compile Include="$(MSBuildThisFileDirectory)IHttpWebResponse.cs" />
<Compile Include="$(MSBuildThisFileDirectory)HttpWebResponseWrapper.cs" />
<Compile Include="$(MSBuildThisFileDirectory)IHttpWebRequestFactory.cs" />
<Compile Include="$(MSBuildThisFileDirectory)HttpWebRequestFactory.cs" />
</ItemGroup>
</Project>
56 changes: 43 additions & 13 deletions src/HtmlAgilityPack.Shared/HtmlWeb.cs
Expand Up @@ -83,6 +83,10 @@ public partial class HtmlWeb

#region Fields

#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
private IHttpWebRequestFactory _requestFactory;
#endif

private bool _autoDetectEncoding = true;
private bool _cacheOnly;

Expand Down Expand Up @@ -928,6 +932,21 @@ public bool UsingCache

#endregion

#region Constructors

#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
public HtmlWeb() : this(new HttpWebRequestFactory())
{
}

internal HtmlWeb(IHttpWebRequestFactory requestFactory)
{
_requestFactory = requestFactory;
}
#endif

#endregion

#region Public Methods

#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
Expand Down Expand Up @@ -1547,10 +1566,10 @@ private static long SaveStream(Stream stream, string path, DateTime touchDate, i
ICredentials creds)
{
string cachePath = null;
HttpWebRequest req;
IHttpWebRequest req;
bool oldFile = false;

req = WebRequest.Create(uri) as HttpWebRequest;
req = _requestFactory.Create(uri);
req.Method = method;
req.UserAgent = UserAgent;
if (CaptureRedirect)
Expand Down Expand Up @@ -1617,7 +1636,7 @@ private static long SaveStream(Stream stream, string path, DateTime touchDate, i
if (PreRequest != null)
{
// allow our user to change the request at will
if (!PreRequest(req))
if (!PreRequest(req.Request))
{
return HttpStatusCode.ResetContent;
}
Expand All @@ -1632,16 +1651,16 @@ private static long SaveStream(Stream stream, string path, DateTime touchDate, i
// }
}

HttpWebResponse resp;
IHttpWebResponse resp;

try
{
resp = req.GetResponse() as HttpWebResponse;
resp = req.GetResponse();
}
catch (WebException we)
{
_requestDuration = Environment.TickCount - tc;
resp = (HttpWebResponse) we.Response;
resp = we.Response == null ? null : new HttpWebResponseWrapper((HttpWebResponse) we.Response);
if (resp == null)
{
if (oldFile)
Expand All @@ -1668,7 +1687,7 @@ private static long SaveStream(Stream stream, string path, DateTime touchDate, i
// allow our user to get some info from the response
if (PostResponse != null)
{
PostResponse(req, resp);
PostResponse(req.Request, resp.Response);
}

_requestDuration = Environment.TickCount - tc;
Expand All @@ -1677,11 +1696,22 @@ private static long SaveStream(Stream stream, string path, DateTime touchDate, i
bool html = IsHtmlContent(resp.ContentType);
bool isUnknown = string.IsNullOrEmpty(resp.ContentType);

Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)
? Encoding.GetEncoding(resp.ContentEncoding)
: null;
if (OverrideEncoding != null)
respenc = OverrideEncoding;
Encoding respenc = OverrideEncoding;
if (respenc == null && !string.IsNullOrEmpty(resp.ContentEncoding))
{
try
{
Encoding.GetEncoding(resp.ContentEncoding);
}
catch (ArgumentException ex)
{
if (ex.ParamName == "name")
{
throw new EncodingNotSupportedException(resp.ContentEncoding);
}
throw ex;
}
}

if (CaptureRedirect)
{
Expand Down Expand Up @@ -2092,7 +2122,7 @@ private HtmlDocument LoadUrl(Uri uri, string method, IWebProxy proxy, ICredentia
}
#endif
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp)
private void SaveCacheHeaders(Uri requestUri, IHttpWebResponse resp)
{
// we cache the original headers aside the cached document.
string file = GetCacheHeadersPath(requestUri);
Expand Down
31 changes: 31 additions & 0 deletions src/HtmlAgilityPack.Shared/HttpWebRequestFactory.cs
@@ -0,0 +1,31 @@
// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved.

#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO)

using System;
using System.Net;

namespace HtmlAgilityPack
{
/// <summary>
/// Implement the initialization of HttpWebRequest.
/// </summary>
internal class HttpWebRequestFactory : IHttpWebRequestFactory
{
public HttpWebRequestFactory()
{
}

public IHttpWebRequest Create(Uri uri)
{
return new HttpWebRequestWrapper((HttpWebRequest)HttpWebRequest.Create(uri));
}
}
}

#endif
44 changes: 44 additions & 0 deletions src/HtmlAgilityPack.Shared/HttpWebRequestWrapper.cs
@@ -0,0 +1,44 @@
// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved.

#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO)

using System;
using System.Net;

namespace HtmlAgilityPack
{
/// <summary>
/// Wraps HttpWebRequest.
/// </summary>
internal class HttpWebRequestWrapper : IHttpWebRequest
{
HttpWebRequest _request;
public HttpWebRequest Request { get { return _request; } }

public HttpWebRequestWrapper(HttpWebRequest request)
{
_request = request;
}

public string Method { get { return _request.Method; } set { _request.Method = value; } }
public string UserAgent { get { return _request.UserAgent; } set { _request.UserAgent = value; } }
public bool AllowAutoRedirect { get { return _request.AllowAutoRedirect; } set { _request.AllowAutoRedirect = value; } }
public ICredentials Credentials { get { return _request.Credentials; } set { _request.Credentials = value; } }
public IWebProxy Proxy { get { return _request.Proxy; } set { _request.Proxy = value; } }
public Uri RequestUri { get { return _request.RequestUri; } }
public DateTime IfModifiedSince { get { return _request.IfModifiedSince; } set { _request.IfModifiedSince = value; } }
public CookieContainer CookieContainer { get { return _request.CookieContainer; } set { _request.CookieContainer = value; } }

public IHttpWebResponse GetResponse()
{
return new HttpWebResponseWrapper((HttpWebResponse)_request.GetResponse());
}
}
}

#endif
55 changes: 55 additions & 0 deletions src/HtmlAgilityPack.Shared/HttpWebResponseWrapper.cs
@@ -0,0 +1,55 @@
// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved.

#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO)

using System;
using System.IO;
using System.Net;

namespace HtmlAgilityPack
{
/// <summary>
/// Wraps HttpWebResponse.
/// </summary>
internal class HttpWebResponseWrapper : IHttpWebResponse
{
HttpWebResponse _response;
public HttpWebResponse Response { get { return _response; } }

public HttpWebResponseWrapper(HttpWebResponse response)
{
_response = response;
}

public Uri ResponseUri { get { return _response.ResponseUri; } }
public HttpStatusCode StatusCode { get { return _response.StatusCode; } }
public string ContentType { get { return _response.ContentType; } }
public string ContentEncoding { get { return _response.ContentEncoding; } }
public WebHeaderCollection Headers { get { return _response.Headers; } }
public DateTime LastModified { get { return _response.LastModified; } }

public void Close()
{
_response.Close();
}

public void Dispose()
{
#if !(FX20 || FX35 || FX40)
_response.Dispose();
#endif
}

public Stream GetResponseStream()
{
return _response.GetResponseStream();
}
}
}

#endif
34 changes: 34 additions & 0 deletions src/HtmlAgilityPack.Shared/IHttpWebRequest.cs
@@ -0,0 +1,34 @@
// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved.

#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO)

using System;
using System.Net;

namespace HtmlAgilityPack
{
internal interface IHttpWebRequest
/// <summary>
/// Abstracts HttpWebRequest.
/// </summary>
{
HttpWebRequest Request { get; }
string Method { get; set; }
string UserAgent { get; set; }
bool AllowAutoRedirect { get; set; }
ICredentials Credentials { get; set; }
IWebProxy Proxy { get; set; }
Uri RequestUri { get; }
DateTime IfModifiedSince { get; set; }
CookieContainer CookieContainer { get; set; }

IHttpWebResponse GetResponse();
}
}

#endif