From 08df86ada6bf6ac6d09eaff417f1f0933e8ec68b Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Sun, 17 Nov 2019 21:39:04 +0100 Subject: [PATCH 1/5] Add codeblock kind in Tag:CodeBlock --- src/html.rs | 4 ++-- src/parse.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/src/html.rs b/src/html.rs index 8a3cc5c1..1457e3f0 100644 --- a/src/html.rs +++ b/src/html.rs @@ -250,7 +250,7 @@ where self.write("\n
\n") } } - Tag::CodeBlock(info) => { + Tag::CodeBlock(_, info) => { if !self.end_newline { self.write_newline()?; } @@ -375,7 +375,7 @@ where Tag::BlockQuote => { self.write("
\n")?; } - Tag::CodeBlock(_) => { + Tag::CodeBlock(_, _) => { self.write("\n")?; } Tag::List(Some(_)) => { diff --git a/src/parse.rs b/src/parse.rs index e4cbca04..c6854b3d 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -38,6 +38,36 @@ use crate::tree::{Tree, TreeIndex, TreePointer}; // https://spec.commonmark.org/0.29/#link-destination const LINK_MAX_NESTED_PARENS: usize = 5; +/// Codeblock kind. +#[derive(Clone, Debug, PartialEq)] +pub enum CodeBlockKind<'a> { + Indented, + Fenced(CowStr<'a>), +} + +impl<'a> CodeBlockKind<'a> { + pub fn is_indented(&self) -> bool { + match *self { + CodeBlockKind::Indented => true, + _ => false, + } + } + + pub fn is_fenced(&self) -> bool { + match *self { + CodeBlockKind::Fenced(_) => true, + _ => false, + } + } + + pub fn get_fences(&self) -> Option<&CowStr<'a>> { + match *self { + CodeBlockKind::Fenced(ref f) => Some(f), + _ => None, + } + } +} + /// Tags for elements that can contain other elements. #[derive(Clone, Debug, PartialEq)] pub enum Tag<'a> { @@ -48,9 +78,12 @@ pub enum Tag<'a> { Heading(u32), BlockQuote, - /// A code block. The value contained in the tag describes the language of the code, - /// which may be empty. - CodeBlock(CowStr<'a>), + /// A code block. + /// + /// The boolean is `true` is this is an indented code block (not starting with "\`\`\`"). + /// + /// The value contained in the tag describes the language of the code, which may be empty. + CodeBlock(CodeBlockKind<'a>, CowStr<'a>), /// A list. If the list is ordered the field indicates the number of the first item. /// Contains only list items. @@ -204,7 +237,7 @@ enum ItemBody { Rule, Heading(u32), // heading level - FencedCodeBlock(CowIndex), + FencedCodeBlock(CowIndex, CowIndex), IndentCodeBlock, Html, BlockQuote, @@ -1012,10 +1045,14 @@ impl<'a> FirstPass<'a> { let mut ix = info_start + scan_nextline(&bytes[info_start..]); let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace); let info_string = unescape(&self.text[info_start..info_end]); + let fences = &self.text[start_ix..info_start]; self.tree.append(Item { start: start_ix, end: 0, // will get set later - body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)), + body: ItemBody::FencedCodeBlock( + self.allocs.allocate_cow(fences.into()), + self.allocs.allocate_cow(info_string), + ), }); self.tree.push(); loop { @@ -2675,8 +2712,13 @@ fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> { Tag::Image(*link_type, url.clone(), title.clone()) } ItemBody::Heading(level) => Tag::Heading(level), - ItemBody::FencedCodeBlock(cow_ix) => Tag::CodeBlock(allocs[cow_ix].clone()), - ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()), + ItemBody::FencedCodeBlock(fences_ix, cow_ix) => { + Tag::CodeBlock( + CodeBlockKind::Fenced(allocs[fences_ix].clone()), + allocs[cow_ix].clone(), + ) + } + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented, "".into()), ItemBody::BlockQuote => Tag::BlockQuote, ItemBody::List(_, c, listitem_start) => { if c == b'.' || c == b')' { @@ -2722,8 +2764,13 @@ fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Eve Tag::Image(*link_type, url.clone(), title.clone()) } ItemBody::Heading(level) => Tag::Heading(level), - ItemBody::FencedCodeBlock(cow_ix) => Tag::CodeBlock(allocs[cow_ix].clone()), - ItemBody::IndentCodeBlock => Tag::CodeBlock("".into()), + ItemBody::FencedCodeBlock(fences_ix, cow_ix) => { + Tag::CodeBlock( + CodeBlockKind::Fenced(allocs[fences_ix].clone()), + allocs[cow_ix].clone(), + ) + } + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented, "".into()), ItemBody::BlockQuote => Tag::BlockQuote, ItemBody::List(_, c, listitem_start) => { if c == b'.' || c == b')' { From 7c4986e3c57277d1969ad2698bb09385be5b8d54 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 18 Nov 2019 13:38:57 +0100 Subject: [PATCH 2/5] Add tests for new syntax --- src/parse.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/parse.rs b/src/parse.rs index c6854b3d..5f9d963e 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -3077,4 +3077,37 @@ mod test { } assert!(link_tag_count > 0); } + + #[test] + fn code_block_kind_check_fenced() { + let parser = Parser::new("hello\n```test\ntadam\n```"); + let mut found = 0; + for (ev, _range) in parser.into_offset_iter() { + match ev { + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(fences), syntax)) => { + assert_eq!(fences.as_ref(), "```"); + assert_eq!(syntax.as_ref(), "test"); + found += 1; + } + _ => {} + } + } + assert_eq!(found, 1); + } + + #[test] + fn code_block_kind_check_indented() { + let parser = Parser::new("hello\n\n ```test\n tadam\nhello"); + let mut found = 0; + for (ev, _range) in parser.into_offset_iter() { + match ev { + Event::Start(Tag::CodeBlock(CodeBlockKind::Indented, syntax)) => { + assert_eq!(syntax.as_ref(), ""); + found += 1; + } + _ => {} + } + } + assert_eq!(found, 1); + } } From 1de6cb8adb84d03b94d5ef0c220ad11b19039ac3 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 18 Nov 2019 13:39:36 +0100 Subject: [PATCH 3/5] Fix size tests --- src/parse.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index 5f9d963e..35de523b 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -2886,14 +2886,14 @@ mod test { #[cfg(target_pointer_width = "64")] fn node_size() { let node_size = std::mem::size_of::>(); - assert_eq!(48, node_size); + assert_eq!(56, node_size); } #[test] #[cfg(target_pointer_width = "64")] fn body_size() { let body_size = std::mem::size_of::(); - assert_eq!(16, body_size); + assert_eq!(24, body_size); } #[test] From 2755bb0361be6cea0bfec5e5cffbdd932d290f6f Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 27 Nov 2019 17:20:48 +0100 Subject: [PATCH 4/5] Only keep the syntax --- src/html.rs | 25 +++++++++++++++---------- src/parse.rs | 46 +++++++++++++--------------------------------- 2 files changed, 28 insertions(+), 43 deletions(-) diff --git a/src/html.rs b/src/html.rs index 1457e3f0..6edf7215 100644 --- a/src/html.rs +++ b/src/html.rs @@ -26,7 +26,7 @@ use std::io::{self, ErrorKind, Write}; use crate::escape::{escape_href, escape_html}; use crate::parse::Event::*; -use crate::parse::{Alignment, Event, LinkType, Tag}; +use crate::parse::{Alignment, CodeBlockKind, Event, LinkType, Tag}; use crate::strings::CowStr; enum TableState { @@ -250,17 +250,22 @@ where self.write("\n
\n") } } - Tag::CodeBlock(_, info) => { + Tag::CodeBlock(info) => { if !self.end_newline { self.write_newline()?; } - let lang = info.split(' ').next().unwrap(); - if lang.is_empty() { - self.write("
")
-                } else {
-                    self.write("
")
+                match info {
+                    CodeBlockKind::Fenced(info) => {
+                        let lang = info.split(' ').next().unwrap();
+                        if lang.is_empty() {
+                            self.write("
")
+                        } else {
+                            self.write("
")
+                        }
+                    }
+                    CodeBlockKind::Indented => self.write("
"),
                 }
             }
             Tag::List(Some(1)) => {
@@ -375,7 +380,7 @@ where
             Tag::BlockQuote => {
                 self.write("
\n")?; } - Tag::CodeBlock(_, _) => { + Tag::CodeBlock(_) => { self.write("\n")?; } Tag::List(Some(_)) => { diff --git a/src/parse.rs b/src/parse.rs index 35de523b..9dc2f920 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -42,6 +42,7 @@ const LINK_MAX_NESTED_PARENS: usize = 5; #[derive(Clone, Debug, PartialEq)] pub enum CodeBlockKind<'a> { Indented, + /// The value contained in the tag describes the language of the code, which may be empty. Fenced(CowStr<'a>), } @@ -59,13 +60,6 @@ impl<'a> CodeBlockKind<'a> { _ => false, } } - - pub fn get_fences(&self) -> Option<&CowStr<'a>> { - match *self { - CodeBlockKind::Fenced(ref f) => Some(f), - _ => None, - } - } } /// Tags for elements that can contain other elements. @@ -79,11 +73,7 @@ pub enum Tag<'a> { BlockQuote, /// A code block. - /// - /// The boolean is `true` is this is an indented code block (not starting with "\`\`\`"). - /// - /// The value contained in the tag describes the language of the code, which may be empty. - CodeBlock(CodeBlockKind<'a>, CowStr<'a>), + CodeBlock(CodeBlockKind<'a>), /// A list. If the list is ordered the field indicates the number of the first item. /// Contains only list items. @@ -237,7 +227,7 @@ enum ItemBody { Rule, Heading(u32), // heading level - FencedCodeBlock(CowIndex, CowIndex), + FencedCodeBlock(CowIndex), IndentCodeBlock, Html, BlockQuote, @@ -1045,12 +1035,10 @@ impl<'a> FirstPass<'a> { let mut ix = info_start + scan_nextline(&bytes[info_start..]); let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace); let info_string = unescape(&self.text[info_start..info_end]); - let fences = &self.text[start_ix..info_start]; self.tree.append(Item { start: start_ix, end: 0, // will get set later body: ItemBody::FencedCodeBlock( - self.allocs.allocate_cow(fences.into()), self.allocs.allocate_cow(info_string), ), }); @@ -2712,13 +2700,10 @@ fn item_to_tag<'a>(item: &Item, allocs: &Allocations<'a>) -> Tag<'a> { Tag::Image(*link_type, url.clone(), title.clone()) } ItemBody::Heading(level) => Tag::Heading(level), - ItemBody::FencedCodeBlock(fences_ix, cow_ix) => { - Tag::CodeBlock( - CodeBlockKind::Fenced(allocs[fences_ix].clone()), - allocs[cow_ix].clone(), - ) + ItemBody::FencedCodeBlock(cow_ix) => { + Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) } - ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented, "".into()), + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), ItemBody::BlockQuote => Tag::BlockQuote, ItemBody::List(_, c, listitem_start) => { if c == b'.' || c == b')' { @@ -2764,13 +2749,10 @@ fn item_to_event<'a>(item: Item, text: &'a str, allocs: &Allocations<'a>) -> Eve Tag::Image(*link_type, url.clone(), title.clone()) } ItemBody::Heading(level) => Tag::Heading(level), - ItemBody::FencedCodeBlock(fences_ix, cow_ix) => { - Tag::CodeBlock( - CodeBlockKind::Fenced(allocs[fences_ix].clone()), - allocs[cow_ix].clone(), - ) + ItemBody::FencedCodeBlock(cow_ix) => { + Tag::CodeBlock(CodeBlockKind::Fenced(allocs[cow_ix].clone())) } - ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented, "".into()), + ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented), ItemBody::BlockQuote => Tag::BlockQuote, ItemBody::List(_, c, listitem_start) => { if c == b'.' || c == b')' { @@ -2886,14 +2868,14 @@ mod test { #[cfg(target_pointer_width = "64")] fn node_size() { let node_size = std::mem::size_of::>(); - assert_eq!(56, node_size); + assert_eq!(48, node_size); } #[test] #[cfg(target_pointer_width = "64")] fn body_size() { let body_size = std::mem::size_of::(); - assert_eq!(24, body_size); + assert_eq!(16, body_size); } #[test] @@ -3084,8 +3066,7 @@ mod test { let mut found = 0; for (ev, _range) in parser.into_offset_iter() { match ev { - Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(fences), syntax)) => { - assert_eq!(fences.as_ref(), "```"); + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => { assert_eq!(syntax.as_ref(), "test"); found += 1; } @@ -3101,8 +3082,7 @@ mod test { let mut found = 0; for (ev, _range) in parser.into_offset_iter() { match ev { - Event::Start(Tag::CodeBlock(CodeBlockKind::Indented, syntax)) => { - assert_eq!(syntax.as_ref(), ""); + Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => { found += 1; } _ => {} From d4bee96d79a0f89d17605bda5eebf60016046e24 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 16 Dec 2019 23:35:05 +0100 Subject: [PATCH 5/5] Re-export CodeBlockKind --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 18b8d85d..229e1f03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,5 +70,5 @@ mod tree; #[cfg(all(target_arch = "x86_64", feature = "simd"))] mod simd; -pub use crate::parse::{Alignment, Event, LinkType, OffsetIter, Options, Parser, Tag}; +pub use crate::parse::{Alignment, CodeBlockKind, Event, LinkType, OffsetIter, Options, Parser, Tag}; pub use crate::strings::{CowStr, InlineStr};