From a3edc4818568616b7f0a4a42a7a3c528ddfd68f4 Mon Sep 17 00:00:00 2001 From: Sebastian Wiesner Date: Sun, 7 Feb 2021 12:02:21 +0100 Subject: [PATCH] Limit resource reads to 100MiB --- CHANGELOG.md | 4 ++ mdcat.1.adoc | 2 + src/resources.rs | 95 +++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58d93c12..4a894892 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html To publish a new release run `scripts/release` from the project directory. ## [Unreleased] +### Added +- Refuse to read more than 100MiB from external resources, e.g. images; mdcat cannot display images of that size reasonably anyway (see [GH-176]). + +[GH-176]: https://github.com/lunaryorn/mdcat/pull/176 ## [0.22.2] – 2021-01-01 diff --git a/mdcat.1.adoc b/mdcat.1.adoc index ad638231..daee1d80 100644 --- a/mdcat.1.adoc +++ b/mdcat.1.adoc @@ -36,6 +36,8 @@ The environment variables `$MDCAT_PAGER` and `$PAGER` control the pager used. In iTerm2, Kitty and Terminology mdcat prints inline images. mdcat supports most standard pixel formats by default. +mdcat silently ignores images larger than 100 MiB. + === SVG support In Terminology mdcat also renders SVG images, using the built-in support of Terminology. diff --git a/src/resources.rs b/src/resources.rs index 309b473b..a0e364a4 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -36,11 +36,14 @@ impl ResourceAccess { } } -/// Whether `url` is readable as local file:. +/// Whether `url` is readable as local file. fn is_local(url: &Url) -> bool { url.scheme() == "file" && url.to_file_path().is_ok() } +/// Read size limit for resources. +static RESOURCE_READ_LIMIT: u64 = 104_857_600; + #[throws] fn fetch_http(url: &Url) -> Vec { let proxy = match env_proxy::for_url(url).to_string() { @@ -53,8 +56,7 @@ fn fetch_http(url: &Url) -> Vec { } }; - let mut buffer = Vec::new(); - proxy + let response = proxy .map_or(AgentBuilder::new(), |proxy| { AgentBuilder::new().proxy(proxy) }) @@ -62,22 +64,67 @@ fn fetch_http(url: &Url) -> Vec { .request_url("GET", url) .set("User-Agent", concat!("mdcat/", env!("CARGO_PKG_VERSION"))) .call() - .with_context(|| format!("Failed to GET {}", url))? - .into_reader() - .read_to_end(&mut buffer) - .with_context(|| format!("Failed to read {}", url))?; + .with_context(|| format!("Failed to GET {}", url))?; + + match response.header("Content-Length") { + // The server gave us no content size so read until the end of the stream, but not more than our read limit. + None => { + // An educated guess for a good capacity, + let mut buffer = Vec::with_capacity(1_048_576); + // We read one byte more than our limit, so that we can differentiate between a regular EOF and one from hitting the limit. + response + .into_reader() + .take(RESOURCE_READ_LIMIT + 1) + .read_to_end(&mut buffer) + .with_context(|| format!("Failed to read from {}", url))?; + + if RESOURCE_READ_LIMIT < buffer.len() as u64 { + throw!(anyhow!( + "Contents of {} exceeded {}, rejected", + url, + RESOURCE_READ_LIMIT + )) + } else { + buffer + } + } + // If we've got a content-size use it to read exactly as many bytes as the server told us to do (within limits) + Some(value) => { + let size = value + .parse::() + .with_context(|| format!("{} reports invalid content size {}", url, value))?; + if RESOURCE_READ_LIMIT < size as u64 { + throw!(anyhow!( + "{} reports size {} which exceeds limit {}, refusing to read", + url, + size, + RESOURCE_READ_LIMIT + )) + } + + let mut buffer = vec![0; size]; + response + .into_reader() + // Just to be on the safe side limit the read operation explicitly, just in case we got the above check wrong + .take(RESOURCE_READ_LIMIT) + .read_exact(buffer.as_mut_slice()) + .with_context(|| format!("Failed to read from {}", url))?; - buffer + buffer + } + } } /// Read the contents of the given `url` if supported. /// -/// Fail if we don’t know how to read from `url`, or if we fail to read from -/// URL. +/// Fail if +/// +/// - we don’t know how to read from `url`, i.e. the scheme's not supported, +/// - if we fail to read from URL, or +/// - if contents of the URL exceed an internal hard-coded size limit (currently 100 MiB). /// /// We currently support `file:` URLs which the underlying operation system can -/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs if enabled at -/// build system. +/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs. pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { if !access.permits(url) { throw!(anyhow!( @@ -92,9 +139,20 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { let mut buffer = Vec::new(); File::open(path) .with_context(|| format!("Failed to open file at {}", url))? + // Read a byte more than the limit differentiate an expected EOF from hitting the limit + .take(RESOURCE_READ_LIMIT + 1) .read_to_end(&mut buffer) .with_context(|| format!("Failed to read from file at {}", url))?; - Ok(buffer) + + if RESOURCE_READ_LIMIT < buffer.len() as u64 { + Err(anyhow!( + "Contents of {} exceeded {}, rejected", + url, + RESOURCE_READ_LIMIT + )) + } else { + Ok(buffer) + } } Err(_) => Err(anyhow!("Cannot convert URL {} to file path", url)), }, @@ -165,4 +223,15 @@ mod tests { assert!(result.is_ok(), "Unexpected error: {:?}", result); assert_eq!(result.unwrap().len(), 100); } + + #[test] + fn read_url_with_http_url_fails_when_size_limit_is_exceeded() { + let url = "https://eu.httpbin.org/response-headers?content-length=115343400" + .parse::() + .unwrap(); + let result = read_url(&url, ResourceAccess::RemoteAllowed); + assert!(result.is_err(), "Unexpected success: {:?}", result); + let error = format!("{:#}", result.unwrap_err()); + assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read") + } }