scrapy · hassaan-naushahi · Jun 2, 2023 · Jun 2, 2023 · Sep 8, 2023 · Sep 8, 2023
diff --git a/scrapy/downloadermiddlewares/httpcompression.py b/scrapy/downloadermiddlewares/httpcompression.py
@@ -29,24 +29,53 @@ class HttpCompressionMiddleware:
     """This middleware allows compressed (gzip, deflate) traffic to be
     sent/received from web sites"""
 
-    def __init__(self, stats=None):
+    def __init__(self, stats=None, settings=None):
         self.stats = stats
+        if not stats:
+            warnings.warn(
+                "The default value of COMPRESSION_KEEP_ENCODING_HEADER, "
+                "False, is deprecated, and will stop working and stop "
+                "being its default value in a future version of Scrapy. "
+                "Set COMPRESSION_KEEP_ENCODING_HEADER=True in your "
+                "settings to remove this warning.",
+                ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+        if settings:
+            self.keep_encoding_header = settings.getbool('COMPRESSION_KEEP_ENCODING_HEADER')
+            if not self.keep_encoding_header:
+                warnings.warn(
+                    "Setting COMPRESSION_KEEP_ENCODING_HEADER=False is deprecated",
+                    ScrapyDeprecationWarning,
+                )
+        else:
+            self.keep_encoding_header = False
+            warnings.warn(
+                "The default value of COMPRESSION_KEEP_ENCODING_HEADER, "
+                "False, is deprecated, and will stop working and stop "
+                "being its default value in a future version of Scrapy. "
+                "Set COMPRESSION_KEEP_ENCODING_HEADER=True in your "
+                "settings to remove this warning.",
+                ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
 
     @classmethod
     def from_crawler(cls, crawler):
         if not crawler.settings.getbool("COMPRESSION_ENABLED"):
             raise NotConfigured
         try:
-            return cls(stats=crawler.stats)
+            return cls(stats=crawler.stats, settings=crawler.settings)
         except TypeError:
             warnings.warn(
                 "HttpCompressionMiddleware subclasses must either modify "
-                "their '__init__' method to support a 'stats' parameter or "
+                "their '__init__' method to support 'stats' and 'settings' parameters or "
                 "reimplement the 'from_crawler' method.",
                 ScrapyDeprecationWarning,
             )
             result = cls()
             result.stats = crawler.stats
+            result.keep_encoding_header = False
             return result
 
     def process_request(self, request, spider):
@@ -55,32 +84,30 @@ def process_request(self, request, spider):
     def process_response(self, request, response, spider):
         if request.method == "HEAD":
             return response
-        if isinstance(response, Response):
-            content_encoding = response.headers.getlist("Content-Encoding")
-            if content_encoding:
-                encoding = content_encoding.pop()
-                decoded_body = self._decode(response.body, encoding.lower())
-                if self.stats:
-                    self.stats.inc_value(
-                        "httpcompression/response_bytes",
-                        len(decoded_body),
-                        spider=spider,
-                    )
-                    self.stats.inc_value(
-                        "httpcompression/response_count", spider=spider
-                    )
-                respcls = responsetypes.from_args(
-                    headers=response.headers, url=response.url, body=decoded_body
-                )
-                kwargs = dict(cls=respcls, body=decoded_body)
-                if issubclass(respcls, TextResponse):
-                    # force recalculating the encoding until we make sure the
-                    # responsetypes guessing is reliable
-                    kwargs["encoding"] = None
-                response = response.replace(**kwargs)
-                if not content_encoding:
-                    del response.headers["Content-Encoding"]
+        if b'decoded' in response.flags:
+            return response
+        content_encoding = response.headers.getlist('Content-Encoding')
+        if not content_encoding:
+            return response
+
+        encoding = content_encoding[0]
+        decoded_body = self._decode(response.body, encoding.lower())
+        if self.stats:
+            self.stats.inc_value('httpcompression/response_bytes', len(decoded_body), spider=spider)
+            self.stats.inc_value('httpcompression/response_count', spider=spider)
+        respcls = responsetypes.from_args(
+            headers=response.headers, url=response.url, body=decoded_body
+        )
+        kwargs = dict(cls=respcls, body=decoded_body)
+        if issubclass(respcls, TextResponse):
+            # force recalculating the encoding until we make sure the
+            # responsetypes guessing is reliable
+            kwargs['encoding'] = None
 
+        kwargs['flags'] = response.flags + [b'decoded']
+        response = response.replace(**kwargs)
+        if not self.keep_encoding_header:
+            del response.headers['Content-Encoding']
         return response
 
     def _decode(self, body, encoding):

diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py
@@ -37,7 +37,7 @@
 COMMANDS_MODULE = ""
 
 COMPRESSION_ENABLED = True
-
+COMPRESSION_KEEP_ENCODING_HEADER = False
 CONCURRENT_ITEMS = 100
 
 CONCURRENT_REQUESTS = 16

diff --git a/scrapy/templates/project/module/settings.py.tmpl b/scrapy/templates/project/module/settings.py.tmpl
@@ -86,6 +86,7 @@ ROBOTSTXT_OBEY = True
 #HTTPCACHE_DIR = "httpcache"
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
+COMPRESSION_KEEP_ENCODING_HEADER = True
 
 # Set settings whose default value is deprecated to a future-proof value
 REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"