From 661bd9c0fa46dbcb6a6e204d04113103af10bbd3 Mon Sep 17 00:00:00 2001 From: Luke Hsiao Date: Mon, 17 Feb 2020 11:09:33 -0800 Subject: [PATCH] Set default user agent for external requests Many servers will return errors (e.g. 400/403) to requests that do not set a User-Agent header. This results in issues in both the link_checker and load_data components. With the link_checker these are false positive dead links. In load_data, remote data fails to be fetched. To mitigate this issue, this sets a default User-Agent of $CARGO_PKG_NAME/$CARGO_PKG_VERSION Note that the root cause of this regression from zola v0.9.0 is that reqwest 0.10 changed their default behavior and no longer sets a User-Agent by default: https://github.com/seanmonstar/reqwest/pull/751 Fixes #950. --- components/link_checker/src/lib.rs | 21 +++++++++++- .../templates/src/global_fns/load_data.rs | 32 ++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/components/link_checker/src/lib.rs b/components/link_checker/src/lib.rs index 6e50a9562..e9b038bc9 100644 --- a/components/link_checker/src/lib.rs +++ b/components/link_checker/src/lib.rs @@ -58,7 +58,10 @@ pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult { headers.insert(ACCEPT, "text/html".parse().unwrap()); headers.append(ACCEPT, "*/*".parse().unwrap()); - let client = Client::new(); + let client = Client::builder() + .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))) + .build() + .expect("reqwest client build"); let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix)); @@ -185,6 +188,22 @@ mod tests { assert!(res.error.is_none()); } + #[test] + fn set_default_user_agent() { + let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); + let _m1 = mock("GET", "/C4Szbfnvj6M0LoPk") + .match_header("User-Agent", user_agent) + .with_status(200) + .with_body("Test") + .create(); + + let url = format!("{}{}", mockito::server_url(), "/C4Szbfnvj6M0LoPk"); + let res = check_url(&url, &LinkChecker::default()); + assert!(res.is_valid()); + assert!(res.code.is_some()); + assert!(res.error.is_none()); + } + #[test] fn can_fail_301_to_404_links() { let _m1 = mock("GET", "/cav9vibhsc") diff --git a/components/templates/src/global_fns/load_data.rs b/components/templates/src/global_fns/load_data.rs index 586a5020a..8aa542da7 100644 --- a/components/templates/src/global_fns/load_data.rs +++ b/components/templates/src/global_fns/load_data.rs @@ -178,7 +178,12 @@ pub struct LoadData { } impl LoadData { pub fn new(base_path: PathBuf) -> Self { - let client = Arc::new(Mutex::new(Client::builder().build().expect("reqwest client build"))); + let client = Arc::new(Mutex::new( + Client::builder() + .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))) + .build() + .expect("reqwest client build"), + )); let result_cache = Arc::new(Mutex::new(HashMap::new())); Self { base_path, client, result_cache } } @@ -443,6 +448,31 @@ mod tests { ); } + #[test] + fn set_default_user_agent() { + let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); + let _m = mock("GET", "/chu8aizahBiy") + .match_header("User-Agent", user_agent) + .with_header("content-type", "application/json") + .with_body( + r#"{ + "test": { + "foo": "bar" + } +} +"#, + ) + .create(); + + let url = format!("{}{}", mockito::server_url(), "/chu8aizahBiy"); + let static_fn = LoadData::new(PathBuf::new()); + let mut args = HashMap::new(); + args.insert("url".to_string(), to_value(&url).unwrap()); + args.insert("format".to_string(), to_value("json").unwrap()); + let result = static_fn.call(&args).unwrap(); + assert_eq!(result.get("test").unwrap().get("foo").unwrap(), &to_value("bar").unwrap()); + } + #[test] fn can_load_toml() { let static_fn = LoadData::new(PathBuf::from("../utils/test-files"));