Auto merge of #10343 - willcrichton:example-analyzer, r=weihanglo

Change rustdoc-scrape-examples to be a target-level configuration This PR addresses issues raised in #9525. Specifically: 1. It enables examples to be scraped from `#[test]` functions, by passing additional flags to Rustdoc to ensure that these functions aren't ignored by rustc. 2. It moves the `arg` from `-Z rustdoc-scrape-examples={arg}` into a target-level configuration that can be added to Cargo.toml. The added test `scrape_examples_configure_target` shows a concrete example. In short, examples will be default scraped from Example and Lib targets. Then the user can enable or disable scraping like so: ```toml [lib] doc-scrape-examples = false [[test]] name = "my_test" doc-scrape-examples = true ```
rust-lang · Nov 24, 2022 · 99d3c71 · 99d3c71
2 parents ba607b2 + 183425f
commit 99d3c71
Show file tree

Hide file tree

Showing 18 changed files with 832 additions and 365 deletions.
diff --git a/crates/cargo-test-support/src/compare.rs b/crates/cargo-test-support/src/compare.rs
@@ -168,6 +168,7 @@ fn substitute_macros(input: &str) -> String {
         ("[NOTE]", "note:"),
         ("[HELP]", "help:"),
         ("[DOCUMENTING]", " Documenting"),
+        ("[SCRAPING]", "    Scraping"),
         ("[FRESH]", "       Fresh"),
         ("[UPDATING]", "    Updating"),
         ("[ADDING]", "      Adding"),

diff --git a/src/cargo/core/compiler/context/compilation_files.rs b/src/cargo/core/compiler/context/compilation_files.rs
@@ -451,9 +451,11 @@ impl<'a, 'cfg: 'a> CompilationFiles<'a, 'cfg> {
                 vec![]
             }
             CompileMode::Docscrape => {
-                let path = self
-                    .deps_dir(unit)
-                    .join(format!("{}.examples", unit.buildkey()));
+                // The file name needs to be stable across Cargo sessions.
+                // This originally used unit.buildkey(), but that isn't stable,
+                // so we use metadata instead (prefixed with name for debugging).
+                let file_name = format!("{}-{}.examples", unit.pkg.name(), self.metadata(unit));
+                let path = self.deps_dir(unit).join(file_name);
                 vec![OutputFile {
                     path,
                     hardlink: None,

diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs
@@ -77,6 +77,11 @@ pub struct Context<'a, 'cfg> {
     /// Map of Doc/Docscrape units to metadata for their -Cmetadata flag.
     /// See Context::find_metadata_units for more details.
     pub metadata_for_doc_units: HashMap<Unit, Metadata>,
+
+    /// Set of metadata of Docscrape units that fail before completion, e.g.
+    /// because the target has a type error. This is in an Arc<Mutex<..>>
+    /// because it is continuously updated as the job progresses.
+    pub failed_scrape_units: Arc<Mutex<HashSet<Metadata>>>,
 }
 
 impl<'a, 'cfg> Context<'a, 'cfg> {
@@ -115,6 +120,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> {
             rustc_clients: HashMap::new(),
             lto: HashMap::new(),
             metadata_for_doc_units: HashMap::new(),
+            failed_scrape_units: Arc::new(Mutex::new(HashSet::new())),
         })
     }
 

diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs
@@ -1275,7 +1275,7 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
 
     // Afterwards calculate our own fingerprint information.
     let target_root = target_root(cx);
-    let local = if unit.mode.is_doc() {
+    let local = if unit.mode.is_doc() || unit.mode.is_doc_scrape() {
         // rustdoc does not have dep-info files.
         let fingerprint = pkg_fingerprint(cx.bcx, &unit.pkg).with_context(|| {
             format!(
@@ -1302,7 +1302,7 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
     // Fill out a bunch more information that we'll be tracking typically
     // hashed to take up less space on disk as we just need to know when things
     // change.
-    let extra_flags = if unit.mode.is_doc() {
+    let extra_flags = if unit.mode.is_doc() || unit.mode.is_doc_scrape() {
         cx.bcx.rustdocflags_args(unit)
     } else {
         cx.bcx.rustflags_args(unit)

diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs
@@ -134,6 +134,7 @@ struct DrainState<'cfg> {
     active: HashMap<JobId, Unit>,
     compiled: HashSet<PackageId>,
     documented: HashSet<PackageId>,
+    scraped: HashSet<PackageId>,
     counts: HashMap<PackageId, usize>,
     progress: Progress<'cfg>,
     next_id: u32,
@@ -347,17 +348,29 @@ enum Message {
     BuildPlanMsg(String, ProcessBuilder, Arc<Vec<OutputFile>>),
     Stdout(String),
     Stderr(String),
+
+    // This is for general stderr output from subprocesses
     Diagnostic {
         id: JobId,
         level: String,
         diag: String,
         fixable: bool,
     },
+    // This handles duplicate output that is suppressed, for showing
+    // only a count of duplicate messages instead
     WarningCount {
         id: JobId,
         emitted: bool,
         fixable: bool,
     },
+    // This is for warnings generated by Cargo's interpretation of the
+    // subprocess output, e.g. scrape-examples prints a warning if a
+    // unit fails to be scraped
+    Warning {
+        id: JobId,
+        warning: String,
+    },
+
     FixDiagnostic(diagnostic_server::Message),
     Token(io::Result<Acquired>),
     Finish(JobId, Artifact, CargoResult<()>),
@@ -405,6 +418,7 @@ impl<'a, 'cfg> JobState<'a, 'cfg> {
         Ok(())
     }
 
+    /// See [`Message::Diagnostic`] and [`Message::WarningCount`].
     pub fn emit_diag(&self, level: String, diag: String, fixable: bool) -> CargoResult<()> {
         if let Some(dedupe) = self.output {
             let emitted = dedupe.emit_diag(&diag)?;
@@ -426,6 +440,15 @@ impl<'a, 'cfg> JobState<'a, 'cfg> {
         Ok(())
     }
 
+    /// See [`Message::Warning`].
+    pub fn warning(&self, warning: String) -> CargoResult<()> {
+        self.messages.push_bounded(Message::Warning {
+            id: self.id,
+            warning,
+        });
+        Ok(())
+    }
+
     /// A method used to signal to the coordinator thread that the rmeta file
     /// for an rlib has been produced. This is only called for some rmeta
     /// builds when required, and can be called at any time before a job ends.
@@ -475,8 +498,10 @@ impl<'cfg> JobQueue<'cfg> {
             .filter(|dep| {
                 // Binaries aren't actually needed to *compile* tests, just to run
                 // them, so we don't include this dependency edge in the job graph.
+                // But we shouldn't filter out dependencies being scraped for Rustdoc.
                 (!dep.unit.target.is_test() && !dep.unit.target.is_bin())
                     || dep.unit.artifact.is_true()
+                    || dep.unit.mode.is_doc_scrape()
             })
             .map(|dep| {
                 // Handle the case here where our `unit -> dep` dependency may
@@ -563,6 +588,7 @@ impl<'cfg> JobQueue<'cfg> {
             active: HashMap::new(),
             compiled: HashSet::new(),
             documented: HashSet::new(),
+            scraped: HashSet::new(),
             counts: self.counts,
             progress,
             next_id: 0,
@@ -739,6 +765,10 @@ impl<'cfg> DrainState<'cfg> {
                     cnts.disallow_fixable();
                 }
             }
+            Message::Warning { id, warning } => {
+                cx.bcx.config.shell().warn(warning)?;
+                self.bump_warning_count(id, true, false);
+            }
             Message::WarningCount {
                 id,
                 emitted,
@@ -782,6 +812,16 @@ impl<'cfg> DrainState<'cfg> {
                 debug!("end ({:?}): {:?}", unit, result);
                 match result {
                     Ok(()) => self.finish(id, &unit, artifact, cx)?,
+                    Err(_)
+                        if unit.mode.is_doc_scrape()
+                            && unit.target.doc_scrape_examples().is_unset() =>
+                    {
+                        cx.failed_scrape_units
+                            .lock()
+                            .unwrap()
+                            .insert(cx.files().metadata(&unit));
+                        self.queue.finish(&unit, &artifact);
+                    }
                     Err(error) => {
                         let msg = "The following warnings were emitted during compilation:";
                         self.emit_warnings(Some(msg), &unit, cx)?;
@@ -1303,8 +1343,11 @@ impl<'cfg> DrainState<'cfg> {
         unit: &Unit,
         fresh: Freshness,
     ) -> CargoResult<()> {
-        if (self.compiled.contains(&unit.pkg.package_id()) && !unit.mode.is_doc())
+        if (self.compiled.contains(&unit.pkg.package_id())
+            && !unit.mode.is_doc()
+            && !unit.mode.is_doc_scrape())
             || (self.documented.contains(&unit.pkg.package_id()) && unit.mode.is_doc())
+            || (self.scraped.contains(&unit.pkg.package_id()) && unit.mode.is_doc_scrape())
         {
             return Ok(());
         }
@@ -1318,6 +1361,9 @@ impl<'cfg> DrainState<'cfg> {
                     config.shell().status("Documenting", &unit.pkg)?;
                 } else if unit.mode.is_doc_test() {
                     // Skip doc test.
+                } else if unit.mode.is_doc_scrape() {
+                    self.scraped.insert(unit.pkg.package_id());
+                    config.shell().status("Scraping", &unit.pkg)?;
                 } else {
                     self.compiled.insert(unit.pkg.package_id());
                     if unit.mode.is_check() {

diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs
@@ -22,7 +22,7 @@ mod unit;
 pub mod unit_dependencies;
 pub mod unit_graph;
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::env;
 use std::ffi::{OsStr, OsString};
 use std::fs::{self, File};
@@ -55,7 +55,7 @@ use crate::core::compiler::future_incompat::FutureIncompatReport;
 pub use crate::core::compiler::unit::{Unit, UnitInterner};
 use crate::core::manifest::TargetSourcePath;
 use crate::core::profiles::{PanicStrategy, Profile, Strip};
-use crate::core::{Feature, PackageId, Target};
+use crate::core::{Feature, PackageId, Target, Verbosity};
 use crate::util::errors::{CargoResult, VerboseError};
 use crate::util::interning::InternedString;
 use crate::util::machine_message::{self, Message};
@@ -654,13 +654,16 @@ fn rustdoc(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Work> {
     rustdoc.arg("-C").arg(format!("metadata={}", metadata));
 
     let scrape_output_path = |unit: &Unit| -> CargoResult<PathBuf> {
-        let output_dir = cx.files().deps_dir(unit);
-        Ok(output_dir.join(format!("{}.examples", unit.buildkey())))
+        cx.outputs(unit).map(|outputs| outputs[0].path.clone())
     };
 
     if unit.mode.is_doc_scrape() {
         debug_assert!(cx.bcx.scrape_units.contains(unit));
 
+        if unit.target.is_test() {
+            rustdoc.arg("--scrape-tests");
+        }
+
         rustdoc.arg("-Zunstable-options");
 
         rustdoc
@@ -678,18 +681,23 @@ fn rustdoc(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Work> {
                 rustdoc.arg("--scrape-examples-target-crate").arg(name);
             }
         }
-    } else if cx.bcx.scrape_units.len() > 0 && cx.bcx.ws.unit_needs_doc_scrape(unit) {
-        // We only pass scraped examples to packages in the workspace
-        // since examples are only coming from reverse-dependencies of workspace packages
+    }
 
+    let should_include_scrape_units = unit.mode.is_doc()
+        && cx.bcx.scrape_units.len() > 0
+        && cx.bcx.ws.unit_needs_doc_scrape(unit);
+    let scrape_outputs = if should_include_scrape_units {
         rustdoc.arg("-Zunstable-options");
-
-        for scrape_unit in &cx.bcx.scrape_units {
-            rustdoc
-                .arg("--with-examples")
-                .arg(scrape_output_path(scrape_unit)?);
-        }
-    }
+        Some(
+            cx.bcx
+                .scrape_units
+                .iter()
+                .map(|unit| Ok((cx.files().metadata(unit), scrape_output_path(unit)?)))
+                .collect::<CargoResult<HashMap<_, _>>>()?,
+        )
+    } else {
+        None
+    };
 
     build_deps_args(&mut rustdoc, cx, unit)?;
     rustdoc::add_root_urls(cx, unit, &mut rustdoc)?;
@@ -700,19 +708,45 @@ fn rustdoc(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Work> {
         append_crate_version_flag(unit, &mut rustdoc);
     }
 
+    let target_desc = unit.target.description_named();
     let name = unit.pkg.name().to_string();
     let build_script_outputs = Arc::clone(&cx.build_script_outputs);
     let package_id = unit.pkg.package_id();
     let manifest_path = PathBuf::from(unit.pkg.manifest_path());
+    let relative_manifest_path = manifest_path
+        .strip_prefix(cx.bcx.ws.root())
+        .unwrap_or(&manifest_path)
+        .to_owned();
     let target = Target::clone(&unit.target);
     let mut output_options = OutputOptions::new(cx, unit);
     let script_metadata = cx.find_build_script_metadata(unit);
+    let failed_scrape_units = Arc::clone(&cx.failed_scrape_units);
+    let hide_diagnostics_for_scrape_unit = unit.mode.is_doc_scrape()
+        && unit.target.doc_scrape_examples().is_unset()
+        && !matches!(cx.bcx.config.shell().verbosity(), Verbosity::Verbose);
+    if hide_diagnostics_for_scrape_unit {
+        output_options.show_diagnostics = false;
+    }
     Ok(Work::new(move |state| {
         add_custom_flags(
             &mut rustdoc,
             &build_script_outputs.lock().unwrap(),
             script_metadata,
         )?;
+
+        // Add the output of scraped examples to the rustdoc command.
+        // This action must happen after the unit's dependencies have finished,
+        // because some of those deps may be Docscrape units which have failed.
+        // So we dynamically determine which `--with-examples` flags to pass here.
+        if let Some(scrape_outputs) = scrape_outputs {
+            let failed_scrape_units = failed_scrape_units.lock().unwrap();
+            for (metadata, output_path) in &scrape_outputs {
+                if !failed_scrape_units.contains(metadata) {
+                    rustdoc.arg("--with-examples").arg(output_path);
+                }
+            }
+        }
+
         let crate_dir = doc_dir.join(&crate_name);
         if crate_dir.exists() {
             // Remove output from a previous build. This ensures that stale
@@ -722,7 +756,7 @@ fn rustdoc(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Work> {
         }
         state.running(&rustdoc);
 
-        rustdoc
+        let result = rustdoc
             .exec_with_streaming(
                 &mut |line| on_stdout_line(state, line, package_id, &target),
                 &mut |line| {
@@ -737,7 +771,23 @@ fn rustdoc(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Work> {
                 },
                 false,
             )
-            .with_context(|| format!("could not document `{}`", name))?;
+            .with_context(|| format!("could not document `{}`", name));
+
+        if let Err(e) = result {
+            if hide_diagnostics_for_scrape_unit {
+                let diag = format!(
+                    "\
+failed to scan {target_desc} in package `{name}` for example code usage
+    Try running with `--verbose` to see the error message.
+    If this example should not be scanned, consider adding `doc-scrape-examples = false` to the `[[example]]` definition in {}",
+                    relative_manifest_path.display()
+                );
+                state.warning(diag)?;
+            }
+
+            return Err(e);
+        }
+
         Ok(())
     }))
 }

diff --git a/src/cargo/core/compiler/rustdoc.rs b/src/cargo/core/compiler/rustdoc.rs
@@ -190,3 +190,22 @@ pub fn add_root_urls(
     }
     Ok(())
 }
+
+/// Indicates whether a target should have examples scraped from it
+/// by rustdoc. Configured within Cargo.toml.
+#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug, Copy)]
+pub enum RustdocScrapeExamples {
+    Enabled,
+    Disabled,
+    Unset,
+}
+
+impl RustdocScrapeExamples {
+    pub fn is_enabled(&self) -> bool {
+        matches!(self, RustdocScrapeExamples::Enabled)
+    }
+
+    pub fn is_unset(&self) -> bool {
+        matches!(self, RustdocScrapeExamples::Unset)
+    }
+}
diff --git a/src/cargo/core/compiler/unit_dependencies.rs b/src/cargo/core/compiler/unit_dependencies.rs
@@ -718,13 +718,14 @@ fn compute_deps_doc(
     // Add all units being scraped for examples as a dependency of top-level Doc units.
     if state.ws.unit_needs_doc_scrape(unit) {
         for scrape_unit in state.scrape_units.iter() {
-            deps_of(scrape_unit, state, unit_for)?;
+            let scrape_unit_for = UnitFor::new_normal(scrape_unit.kind);
+            deps_of(scrape_unit, state, scrape_unit_for)?;
             ret.push(new_unit_dep(
                 state,
                 scrape_unit,
                 &scrape_unit.pkg,
                 &scrape_unit.target,
-                unit_for,
+                scrape_unit_for,
                 scrape_unit.kind,
                 scrape_unit.mode,
                 IS_NO_ARTIFACT_DEP,
@@ -1088,7 +1089,6 @@ impl<'a, 'cfg> State<'a, 'cfg> {
                         if !dep.is_transitive()
                             && !unit.target.is_test()
                             && !unit.target.is_example()
-                            && !unit.mode.is_doc_scrape()
                             && !unit.mode.is_any_test()
                         {
                             return false;