Skip to content
This repository has been archived by the owner on Mar 31, 2024. It is now read-only.

Commit

Permalink
Limit resource reads to 100MiB
Browse files Browse the repository at this point in the history
  • Loading branch information
swsnr committed Feb 7, 2021
1 parent d94a0e1 commit a3edc48
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Expand Up @@ -7,6 +7,10 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
To publish a new release run `scripts/release` from the project directory.

## [Unreleased]
### Added
- Refuse to read more than 100MiB from external resources, e.g. images; mdcat cannot display images of that size reasonably anyway (see [GH-176]).

[GH-176]: https://github.com/lunaryorn/mdcat/pull/176

## [0.22.2] – 2021-01-01

Expand Down
2 changes: 2 additions & 0 deletions mdcat.1.adoc
Expand Up @@ -36,6 +36,8 @@ The environment variables `$MDCAT_PAGER` and `$PAGER` control the pager used.
In iTerm2, Kitty and Terminology mdcat prints inline images.
mdcat supports most standard pixel formats by default.

mdcat silently ignores images larger than 100 MiB.

=== SVG support

In Terminology mdcat also renders SVG images, using the built-in support of Terminology.
Expand Down
95 changes: 82 additions & 13 deletions src/resources.rs
Expand Up @@ -36,11 +36,14 @@ impl ResourceAccess {
}
}

/// Whether `url` is readable as local file:.
/// Whether `url` is readable as local file.
fn is_local(url: &Url) -> bool {
url.scheme() == "file" && url.to_file_path().is_ok()
}

/// Read size limit for resources.
static RESOURCE_READ_LIMIT: u64 = 104_857_600;

#[throws]
fn fetch_http(url: &Url) -> Vec<u8> {
let proxy = match env_proxy::for_url(url).to_string() {
Expand All @@ -53,31 +56,75 @@ fn fetch_http(url: &Url) -> Vec<u8> {
}
};

let mut buffer = Vec::new();
proxy
let response = proxy
.map_or(AgentBuilder::new(), |proxy| {
AgentBuilder::new().proxy(proxy)
})
.build()
.request_url("GET", url)
.set("User-Agent", concat!("mdcat/", env!("CARGO_PKG_VERSION")))
.call()
.with_context(|| format!("Failed to GET {}", url))?
.into_reader()
.read_to_end(&mut buffer)
.with_context(|| format!("Failed to read {}", url))?;
.with_context(|| format!("Failed to GET {}", url))?;

match response.header("Content-Length") {
// The server gave us no content size so read until the end of the stream, but not more than our read limit.
None => {
// An educated guess for a good capacity,
let mut buffer = Vec::with_capacity(1_048_576);
// We read one byte more than our limit, so that we can differentiate between a regular EOF and one from hitting the limit.
response
.into_reader()
.take(RESOURCE_READ_LIMIT + 1)
.read_to_end(&mut buffer)
.with_context(|| format!("Failed to read from {}", url))?;

if RESOURCE_READ_LIMIT < buffer.len() as u64 {
throw!(anyhow!(
"Contents of {} exceeded {}, rejected",
url,
RESOURCE_READ_LIMIT
))
} else {
buffer
}
}
// If we've got a content-size use it to read exactly as many bytes as the server told us to do (within limits)
Some(value) => {
let size = value
.parse::<usize>()
.with_context(|| format!("{} reports invalid content size {}", url, value))?;
if RESOURCE_READ_LIMIT < size as u64 {
throw!(anyhow!(
"{} reports size {} which exceeds limit {}, refusing to read",
url,
size,
RESOURCE_READ_LIMIT
))
}

let mut buffer = vec![0; size];
response
.into_reader()
// Just to be on the safe side limit the read operation explicitly, just in case we got the above check wrong
.take(RESOURCE_READ_LIMIT)
.read_exact(buffer.as_mut_slice())
.with_context(|| format!("Failed to read from {}", url))?;

buffer
buffer
}
}
}

/// Read the contents of the given `url` if supported.
///
/// Fail if we don’t know how to read from `url`, or if we fail to read from
/// URL.
/// Fail if
///
/// - we don’t know how to read from `url`, i.e. the scheme's not supported,
/// - if we fail to read from URL, or
/// - if contents of the URL exceed an internal hard-coded size limit (currently 100 MiB).
///
/// We currently support `file:` URLs which the underlying operation system can
/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs if enabled at
/// build system.
/// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs.
pub fn read_url(url: &Url, access: ResourceAccess) -> Result<Vec<u8>> {
if !access.permits(url) {
throw!(anyhow!(
Expand All @@ -92,9 +139,20 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result<Vec<u8>> {
let mut buffer = Vec::new();
File::open(path)
.with_context(|| format!("Failed to open file at {}", url))?
// Read a byte more than the limit differentiate an expected EOF from hitting the limit
.take(RESOURCE_READ_LIMIT + 1)
.read_to_end(&mut buffer)
.with_context(|| format!("Failed to read from file at {}", url))?;
Ok(buffer)

if RESOURCE_READ_LIMIT < buffer.len() as u64 {
Err(anyhow!(
"Contents of {} exceeded {}, rejected",
url,
RESOURCE_READ_LIMIT
))
} else {
Ok(buffer)
}
}
Err(_) => Err(anyhow!("Cannot convert URL {} to file path", url)),
},
Expand Down Expand Up @@ -165,4 +223,15 @@ mod tests {
assert!(result.is_ok(), "Unexpected error: {:?}", result);
assert_eq!(result.unwrap().len(), 100);
}

#[test]
fn read_url_with_http_url_fails_when_size_limit_is_exceeded() {
let url = "https://eu.httpbin.org/response-headers?content-length=115343400"
.parse::<url::Url>()
.unwrap();
let result = read_url(&url, ResourceAccess::RemoteAllowed);
assert!(result.is_err(), "Unexpected success: {:?}", result);
let error = format!("{:#}", result.unwrap_err());
assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read")
}
}

0 comments on commit a3edc48

Please sign in to comment.