Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pulldown-cmark version #65894

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 4 additions & 15 deletions Cargo.lock
Expand Up @@ -480,7 +480,7 @@ dependencies = [
"itertools 0.8.0",
"lazy_static 1.3.0",
"matches",
"pulldown-cmark 0.6.1",
"pulldown-cmark",
"quine-mc_cluskey",
"regex-syntax",
"semver",
Expand Down Expand Up @@ -1972,7 +1972,7 @@ dependencies = [
"log",
"memchr",
"open",
"pulldown-cmark 0.6.1",
"pulldown-cmark",
"regex",
"serde",
"serde_derive",
Expand All @@ -1999,7 +1999,7 @@ dependencies = [
"log",
"mdbook",
"percent-encoding 2.1.0",
"pulldown-cmark 0.6.1",
"pulldown-cmark",
"rayon",
"regex",
"reqwest",
Expand Down Expand Up @@ -2641,17 +2641,6 @@ dependencies = [
"url 2.1.0",
]

[[package]]
name = "pulldown-cmark"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77043da1282374688ee212dc44b3f37ff929431de9c9adc3053bd3cee5630357"
dependencies = [
"bitflags",
"memchr",
"unicase",
]

[[package]]
name = "pulldown-cmark"
version = "0.6.1"
Expand Down Expand Up @@ -3902,7 +3891,7 @@ name = "rustdoc"
version = "0.0.0"
dependencies = [
"minifier",
"pulldown-cmark 0.5.3",
"pulldown-cmark",
"rustc-rayon 0.3.0",
"tempfile",
]
Expand Down
2 changes: 1 addition & 1 deletion src/librustdoc/Cargo.toml
Expand Up @@ -9,7 +9,7 @@ name = "rustdoc"
path = "lib.rs"

[dependencies]
pulldown-cmark = { version = "0.5.3", default-features = false }
pulldown-cmark = { version = "0.6.1", default-features = false }
minifier = "0.0.33"
rayon = { version = "0.3.0", package = "rustc-rayon" }
tempfile = "3"
103 changes: 38 additions & 65 deletions src/librustdoc/html/markdown.rs
Expand Up @@ -368,11 +368,11 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
}

let event = self.inner.next();
if let Some(Event::Start(Tag::Header(level))) = event {
if let Some(Event::Start(Tag::Heading(level))) = event {
let mut id = String::new();
for event in &mut self.inner {
match &event {
Event::End(Tag::Header(..)) => break,
Event::End(Tag::Heading(..)) => break,
Event::Text(text) | Event::Code(text) => {
id.extend(text.chars().filter_map(slugify));
}
Expand All @@ -386,16 +386,16 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
let mut html_header = String::new();
html::push_html(&mut html_header, self.buf.iter().cloned());
let sec = builder.push(level as u32, html_header, id.clone());
self.buf.push_front(Event::InlineHtml(format!("{} ", sec).into()));
self.buf.push_front(Event::Html(format!("{} ", sec).into()));
}

self.buf.push_back(Event::InlineHtml(format!("</a></h{}>", level).into()));
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));

let start_tags = format!("<h{level} id=\"{id}\" class=\"section-header\">\
<a href=\"#{id}\">",
id = id,
level = level);
return Some(Event::InlineHtml(start_tags.into()));
return Some(Event::Html(start_tags.into()));
}
event
}
Expand Down Expand Up @@ -553,15 +553,11 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {

pub fn find_testable_code<T: test::Tester>(doc: &str, tests: &mut T, error_codes: ErrorCodes,
enable_per_target_ignores: bool) {
let mut parser = Parser::new(doc);
let mut prev_offset = 0;
let mut nb_lines = 0;
let mut parser = Parser::new(doc).into_offset_iter();
let mut register_header = None;
while let Some(event) = parser.next() {
while let Some((event, offset)) = parser.next() {
match event {
Event::Start(Tag::CodeBlock(s)) => {
let offset = parser.get_offset();

let block_info = if s.is_empty() {
LangString::all_false()
} else {
Expand All @@ -571,8 +567,7 @@ pub fn find_testable_code<T: test::Tester>(doc: &str, tests: &mut T, error_codes
continue;
}
let mut test_s = String::new();

while let Some(Event::Text(s)) = parser.next() {
while let Some((Event::Text(s), _)) = parser.next() {
test_s.push_str(&s);
}

Expand All @@ -581,12 +576,12 @@ pub fn find_testable_code<T: test::Tester>(doc: &str, tests: &mut T, error_codes
.map(|l| map_line(l).for_code())
.collect::<Vec<Cow<'_, str>>>()
.join("\n");
nb_lines += doc[prev_offset..offset].lines().count();
// "+ 1" is to take into account the start of the code block
let nb_lines = doc[..offset.start].lines().count() + 1;
let line = tests.get_line() + nb_lines;
tests.add_test(text, block_info, line);
prev_offset = offset;
}
Event::Start(Tag::Header(level)) => {
Event::Start(Tag::Heading(level)) => {
register_header = Some(level as u32);
}
Event::Text(ref s) if register_header.is_some() => {
Expand Down Expand Up @@ -766,7 +761,7 @@ impl MarkdownHtml<'_> {

// Treat inline HTML as plain text.
let p = p.map(|event| match event {
Event::Html(text) | Event::InlineHtml(text) => Event::Text(text),
Event::Html(text) => Event::Text(text),
_ => event
});

Expand Down Expand Up @@ -823,10 +818,10 @@ pub fn plain_summary_line(md: &str) -> String {
let next_event = next_event.unwrap();
let (ret, is_in) = match next_event {
Event::Start(Tag::Paragraph) => (None, 1),
Event::Start(Tag::Header(_)) => (None, 1),
Event::Start(Tag::Heading(_)) => (None, 1),
Event::Code(code) => (Some(format!("`{}`", code)), 0),
Event::Text(ref s) if self.is_in > 0 => (Some(s.as_ref().to_owned()), 0),
Event::End(Tag::Paragraph) | Event::End(Tag::Header(_)) => (None, -1),
Event::End(Tag::Paragraph) | Event::End(Tag::Heading(_)) => (None, -1),
_ => (None, 0),
};
if is_in > 0 || (is_in < 0 && self.is_in > 0) {
Expand Down Expand Up @@ -925,16 +920,14 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
return code_blocks;
}

let mut p = Parser::new_ext(md, opts());
let mut p = Parser::new_ext(md, opts()).into_offset_iter();

let mut code_block_start = 0;
let mut code_block = None;
let mut code_start = 0;
let mut is_fenced = false;
let mut previous_offset = 0;
let mut in_rust_code_block = false;
while let Some(event) = p.next() {
let offset = p.get_offset();

let mut previous_offset = Range { start: 0, end: 0 };
while let Some((event, offset_range)) = p.next() {
match event {
Event::Start(Tag::CodeBlock(syntax)) => {
let lang_string = if syntax.is_empty() {
Expand All @@ -945,55 +938,36 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {

if lang_string.rust {
in_rust_code_block = true;

code_start = offset;
code_block_start = match md[previous_offset..offset].find("```") {
Some(fence_idx) => {
is_fenced = true;
previous_offset + fence_idx
}
None => {
is_fenced = false;
offset
}
code_block = Some(offset_range.clone());

code_start = if !md[previous_offset.end..offset_range.start].ends_with(" ") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is necessary to scan through the text to find the code block start/end now that it is using into_offset_iter. It has the exact ranges provided by pulldown-cmark. Perhaps something like this will be a little more reliable?

diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index d10b131af8e..e1fc7075372 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -922,11 +922,11 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {

     let mut p = Parser::new_ext(md, opts()).into_offset_iter();

-    let mut code_block = None;
+    let mut code_block_range = 0..0;
     let mut code_start = 0;
+    let mut code_end = 0;
     let mut is_fenced = false;
     let mut in_rust_code_block = false;
-    let mut previous_offset = Range { start: 0, end: 0 };
     while let Some((event, offset_range)) = p.next() {
         match event {
             Event::Start(Tag::CodeBlock(syntax)) => {
@@ -938,36 +938,23 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {

                 if lang_string.rust {
                     in_rust_code_block = true;
-                    code_block = Some(offset_range.clone());
-
-                    code_start = if !md[previous_offset.end..offset_range.start].ends_with("    ") {
-                        is_fenced = true;
-                        offset_range.start + md[offset_range.clone()]
-                            .lines()
-                            .next()
-                            .map_or(0, |x| x.len() + 1)
-                    } else {
-                        is_fenced = false;
-                        offset_range.start
-                    };
+                    code_block_range = offset_range.clone();
+                    is_fenced = md[offset_range].starts_with("```");
+                }
+            }
+            Event::Text(_) if in_rust_code_block => {
+                // Once inside a code block, the Text event gives the contents
+                // of the code block.
+                if code_start == 0 {
+                    code_start = offset_range.start;
                 }
+                code_end = offset_range.end;
             }
             Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
                 in_rust_code_block = false;
-
-                let code_end = if is_fenced {
-                    let last_len = md[offset_range.clone()]
-                        .lines()
-                        .last()
-                        .filter(|l| l.ends_with("```"))
-                        .map_or(0, |l| l.len());
-                    offset_range.end - last_len
-                } else {
-                    offset_range.end
-                };
                 code_blocks.push(RustCodeBlock {
                     is_fenced,
-                    range: code_block.clone().unwrap(),
+                    range: code_block_range.clone(),
                     code: Range {
                         start: code_start,
                         end: code_end,
@@ -978,10 +965,10 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
                         None
                     },
                 });
+                code_start = 0;
             }
             _ => (),
         }
-        previous_offset = offset_range;
     }

     code_blocks

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately it's not. I had this issue as well. :)

In the case the block code is indented and not started with backticks, then if we have backticks, it's text. We even have a test for this use case. Therefore, we need to check if the code block is preceded by whitespaces.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear, are you referring to this test? If it is important to avoid adding the suggestion to add text, a modification of the patch above should work: Add this to Event::Text:

is_fenced = code_start != code_block_range.start;

and remove is_fenced from Tag::CodeBlock. That is, when the Event::Text has the same offset as Event::Start(Tag::CodeBlock)), then it should be an indented codeblock.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a hack and not a good one unfortunately. Like I said below, the easiest way is to simply know beforehand the kind of code block this is. I sent a PR for it in pulldown-cmark. That'll allow us to just remove all of this.

is_fenced = true;
offset_range.start + md[offset_range.clone()]
.lines()
.next()
.map_or(0, |x| x.len() + 1)
} else {
is_fenced = false;
offset_range.start
};
}
}
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
in_rust_code_block = false;

let code_block_end = if is_fenced {
let fence_str = &md[previous_offset..offset]
.chars()
.rev()
.collect::<String>();
fence_str
.find("```")
.map(|fence_idx| offset - fence_idx)
.unwrap_or_else(|| offset)
} else if md
.as_bytes()
.get(offset)
.map(|b| *b == b'\n')
.unwrap_or_default()
{
offset - 1
} else {
offset
};

let code_end = if is_fenced {
previous_offset
let last_len = md[offset_range.clone()]
.lines()
.last()
.filter(|l| l.ends_with("```"))
.map_or(0, |l| l.len());
offset_range.end - last_len
} else {
code_block_end
offset_range.end
};

code_blocks.push(RustCodeBlock {
is_fenced,
range: Range {
start: code_block_start,
end: code_block_end,
},
range: code_block.clone().unwrap(),
code: Range {
start: code_start,
end: code_end,
Expand All @@ -1007,8 +981,7 @@ crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
}
_ => (),
}

previous_offset = offset;
previous_offset = offset_range;
}

code_blocks
Expand Down
1 change: 0 additions & 1 deletion src/librustdoc/passes/check_code_block_syntax.rs
Expand Up @@ -33,7 +33,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
FileName::Custom(String::from("doctest")),
dox[code_block.code].to_owned(),
);

let validation_status = {
let mut has_syntax_errors = false;
let mut only_whitespace = true;
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/passes/mod.rs
Expand Up @@ -322,6 +322,7 @@ pub fn look_for_tests<'tcx>(
}
};

#[derive(Debug)]
struct Tests {
found_tests: usize,
}
Expand Down
15 changes: 14 additions & 1 deletion src/librustdoc/test.rs
Expand Up @@ -26,7 +26,7 @@ use crate::clean::Attributes;
use crate::config::Options;
use crate::html::markdown::{self, ErrorCodes, LangString, Ignore};

#[derive(Clone, Default)]
#[derive(Clone, Default, Debug)]
pub struct TestOptions {
/// Whether to disable the default `extern crate my_crate;` when creating doctests.
pub no_crate_inject: bool,
Expand Down Expand Up @@ -640,6 +640,19 @@ pub struct Collector {
filename: Option<PathBuf>,
}

impl ::std::fmt::Debug for Collector {
fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
f.debug_struct("Collector")
.field("tests", &self.tests)
.field("options", &self.options)
.field("use_headers", &self.use_headers)
.field("enable_per_target_ignores", &self.enable_per_target_ignores)
.field("cratename", &self.cratename)
.field("opts", &self.opts)
.finish()
}
}

impl Collector {
pub fn new(cratename: String, options: Options, use_headers: bool, opts: TestOptions,
source_map: Option<Lrc<SourceMap>>, filename: Option<PathBuf>,
Expand Down
3 changes: 2 additions & 1 deletion src/test/rustdoc-ui/invalid-syntax.stderr
Expand Up @@ -115,7 +115,8 @@ warning: could not parse code block as Rust code
LL | /// code with bad syntax
| _________^
LL | | /// \_
| |__________^
LL | | ///
| |_
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the ^?


error: unknown start of token: `
--> <doctest>:1:1
Expand Down