From a3edc4818568616b7f0a4a42a7a3c528ddfd68f4 Mon Sep 17 00:00:00 2001
From: Sebastian Wiesner <sebastian@swsnr.de>
Date: Sun, 7 Feb 2021 12:02:21 +0100
Subject: [PATCH] Limit resource reads to 100MiB

---
 CHANGELOG.md     |  4 ++
 mdcat.1.adoc     |  2 +
 src/resources.rs | 95 +++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 88 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 58d93c12..4a894892 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
 To publish a new release run `scripts/release` from the project directory.
 
 ## [Unreleased]
+### Added
+- Refuse to read more than 100MiB from external resources, e.g. images; mdcat cannot display images of that size reasonably anyway (see [GH-176]).
+
+[GH-176]: https://github.com/lunaryorn/mdcat/pull/176
 
 ## [0.22.2] – 2021-01-01
 
diff --git a/mdcat.1.adoc b/mdcat.1.adoc
index ad638231..daee1d80 100644
--- a/mdcat.1.adoc
+++ b/mdcat.1.adoc
@@ -36,6 +36,8 @@ The environment variables `$MDCAT_PAGER` and `$PAGER` control the pager used.
 In iTerm2, Kitty and Terminology mdcat prints inline images.
 mdcat supports most standard pixel formats by default.
 
+mdcat silently ignores images larger than 100 MiB.
+
 === SVG support
 
 In Terminology mdcat also renders SVG images, using the built-in support of Terminology.
diff --git a/src/resources.rs b/src/resources.rs
index 309b473b..a0e364a4 100644
--- a/src/resources.rs
+++ b/src/resources.rs
@@ -36,11 +36,14 @@ impl ResourceAccess {
     }
 }
 
-/// Whether `url` is readable as local file:.
+/// Whether `url` is readable as local file.
 fn is_local(url: &Url) -> bool {
     url.scheme() == "file" && url.to_file_path().is_ok()
 }
 
+/// Read size limit for resources.
+static RESOURCE_READ_LIMIT: u64 = 104_857_600;
+
 #[throws]
 fn fetch_http(url: &Url) -> Vec<u8> {
     let proxy = match env_proxy::for_url(url).to_string() {
@@ -53,8 +56,7 @@ fn fetch_http(url: &Url) -> Vec<u8> {
         }
     };
 
-    let mut buffer = Vec::new();
-    proxy
+    let response = proxy
         .map_or(AgentBuilder::new(), |proxy| {
             AgentBuilder::new().proxy(proxy)
         })
@@ -62,22 +64,67 @@ fn fetch_http(url: &Url) -> Vec<u8> {
         .request_url("GET", url)
         .set("User-Agent", concat!("mdcat/", env!("CARGO_PKG_VERSION")))
         .call()
-        .with_context(|| format!("Failed to GET {}", url))?
-        .into_reader()
-        .read_to_end(&mut buffer)
-        .with_context(|| format!("Failed to read {}", url))?;
+        .with_context(|| format!("Failed to GET {}", url))?;
+
+    match response.header("Content-Length") {
+        // The server gave us no content size so read until the end of the stream, but not more than our read limit.
+        None => {
+            // An educated guess for a good capacity,
+            let mut buffer = Vec::with_capacity(1_048_576);
+            // We read one byte more than our limit, so that we can differentiate between a regular EOF and one from hitting the limit.
+            response
+                .into_reader()
+                .take(RESOURCE_READ_LIMIT + 1)
+                .read_to_end(&mut buffer)
+                .with_context(|| format!("Failed to read from {}", url))?;
+
+            if RESOURCE_READ_LIMIT < buffer.len() as u64 {
+                throw!(anyhow!(
+                    "Contents of {} exceeded {}, rejected",
+                    url,
+                    RESOURCE_READ_LIMIT
+                ))
+            } else {
+                buffer
+            }
+        }
+        // If we've got a content-size use it to read exactly as many bytes as the server told us to do (within limits)
+        Some(value) => {
+            let size = value
+                .parse::<usize>()
+                .with_context(|| format!("{} reports invalid content size {}", url, value))?;
+            if RESOURCE_READ_LIMIT < size as u64 {
+                throw!(anyhow!(
+                    "{} reports size {} which exceeds limit {}, refusing to read",
+                    url,
+                    size,
+                    RESOURCE_READ_LIMIT
+                ))
+            }
+
+            let mut buffer = vec![0; size];
+            response
+                .into_reader()
+                // Just to be on the safe side limit the read operation explicitly, just in case we got the above check wrong
+                .take(RESOURCE_READ_LIMIT)
+                .read_exact(buffer.as_mut_slice())
+                .with_context(|| format!("Failed to read from {}", url))?;
 
-    buffer
+            buffer
+        }
+    }
 }
 
 /// Read the contents of the given `url` if supported.
 ///
-/// Fail if we don’t know how to read from `url`, or if we fail to read from
-/// URL.
+/// Fail if
+///
+/// - we don’t know how to read from `url`, i.e. the scheme's not supported,
+/// - if we fail to read from URL, or
+/// - if contents of the URL exceed an internal hard-coded size limit (currently 100 MiB).
 ///
 /// We currently support `file:` URLs which the underlying operation system can
-/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs if enabled at
-/// build system.
+/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs.
 pub fn read_url(url: &Url, access: ResourceAccess) -> Result<Vec<u8>> {
     if !access.permits(url) {
         throw!(anyhow!(
@@ -92,9 +139,20 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result<Vec<u8>> {
                 let mut buffer = Vec::new();
                 File::open(path)
                     .with_context(|| format!("Failed to open file at {}", url))?
+                    // Read a byte more than the limit differentiate an expected EOF from hitting the limit
+                    .take(RESOURCE_READ_LIMIT + 1)
                     .read_to_end(&mut buffer)
                     .with_context(|| format!("Failed to read from file at {}", url))?;
-                Ok(buffer)
+
+                if RESOURCE_READ_LIMIT < buffer.len() as u64 {
+                    Err(anyhow!(
+                        "Contents of {} exceeded {}, rejected",
+                        url,
+                        RESOURCE_READ_LIMIT
+                    ))
+                } else {
+                    Ok(buffer)
+                }
             }
             Err(_) => Err(anyhow!("Cannot convert URL {} to file path", url)),
         },
@@ -165,4 +223,15 @@ mod tests {
         assert!(result.is_ok(), "Unexpected error: {:?}", result);
         assert_eq!(result.unwrap().len(), 100);
     }
+
+    #[test]
+    fn read_url_with_http_url_fails_when_size_limit_is_exceeded() {
+        let url = "https://eu.httpbin.org/response-headers?content-length=115343400"
+            .parse::<url::Url>()
+            .unwrap();
+        let result = read_url(&url, ResourceAccess::RemoteAllowed);
+        assert!(result.is_err(), "Unexpected success: {:?}", result);
+        let error = format!("{:#}", result.unwrap_err());
+        assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read")
+    }
 }