Skip to content

Commit

Permalink
Convert to str (#48)
Browse files Browse the repository at this point in the history
* Convert ordered lists

* Change input on sanitize function

* Bold and italics

* Strikethrough

* Code blocks

* Blockquotes

* Images and links

* Detail

* Tasklist item

* Half the footnote

* Working on the other half of the footnote

* other half of the footnote

* Tables

* Update doc strings
  • Loading branch information
darakian committed May 4, 2022
1 parent 1eb2b29 commit 21f04a5
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 85 deletions.
97 changes: 49 additions & 48 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ use crate::MiniIter;
pub enum Token<'a> {
/// String: Body of unstructured text
Plaintext(String),
/// u8: Header level (1..=6). String: Header text. Option<String>: html label
/// u8: Header level (1..=6). str: Header text. Option<str>: html label
Header(u8, &'a str, Option<&'a str>),
/// String: Text for list entry
/// str: Text for list entry
UnorderedListEntry(&'a str),
/// String: Text for list entry
OrderedListEntry(String),
/// String: Text to be italicized
Italic(String),
/// String: Text to be bolded
Bold(String),
/// String: Text to be bolded and italicized
BoldItalic(String),
/// str: Text for list entry
OrderedListEntry(&'a str),
/// str: Text to be italicized
Italic(&'a str),
/// str: Text to be bolded
Bold(&'a str),
/// str: Text to be bolded and italicized
BoldItalic(&'a str),
/// Corresponds to a </br> html tag
LineBreak,
/// Corresponds to a newline character
Expand All @@ -27,26 +27,26 @@ pub enum Token<'a> {
Tab,
/// Used for control flow. Not directly rendered
DoubleTab,
/// String: Text to be struck through
Strikethrough(String),
/// String: Text to be placed within an inline code tag. eg. <code>String</code>
Code(String),
/// First String: Text to be placed within a multi-line code tag. Second String: Language
CodeBlock(String, String),
/// u8: Block quote level. String: Block quote text
BlockQuote(u8, String),
/// String: Link. Option<String>: Title for link.
Image(String, Option<String>),
/// String: Link. First Option<String>: Title for link. Second Option<String>: Hover text
Link(String, Option<String>, Option<String>),
/// String: Summary. Vec<Token>: Tokens to be rendered in the collapsable section
Detail(String, Vec<Token<'a>>),
/// Tuple of Vec<(Alignment, String)>: Which defines the table header and Vec<Vec<(Alignment, Vec<Token>)>> which defines the rows
Table(Vec<(Alignment, String)>, Vec<Vec<(Alignment, Vec<Token<'a>>)>>),
/// TaskBox: Boolean state of the checked or unchecked box. String: List item text
TaskListItem(TaskBox, String),
/// First String: Reference id. Second String: Reference text
Footnote(String, String),
/// str: Text to be struck through
Strikethrough(&'a str),
/// str: Text to be placed within an inline code tag. eg. <code>str</code>
Code(&'a str),
/// First str: Text to be placed within a multi-line code tag. Second str: Language
CodeBlock(&'a str, &'a str),
/// u8: Block quote level. str: Block quote text
BlockQuote(u8, &'a str),
/// str: Link. Option<str>: Title for link.
Image(&'a str, Option<&'a str>),
/// str: Link. First Option<str>: Title for link. Second Option<str>: Hover text
Link(&'a str, Option<&'a str>, Option<&'a str>),
/// str: Summary. Vec<Token>: Tokens to be rendered in the collapsable section
Detail(&'a str, Vec<Token<'a>>),
/// Tuple of Vec<(Alignment, str)>: Which defines the table header and Vec<Vec<(Alignment, Vec<Token>)>> which defines the rows
Table(Vec<(Alignment, &'a str)>, Vec<Vec<(Alignment, Vec<Token<'a>>)>>),
/// TaskBox: Boolean state of the checked or unchecked box. str: List item text
TaskListItem(TaskBox, &'a str),
/// First str: Reference id. Second str: Reference text
Footnote(&'a str, &'a str),
}

/// Holds the possible states of a taskbox in a task list
Expand Down Expand Up @@ -142,7 +142,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul
let s = char_iter.consume_while_case_holds(&|c| c != "*" && c != "_").unwrap_or("");
if char_iter.peek() != Some("*") || char_iter.peek() != Some(&"_"){
char_iter.next();
return Ok(Token::Italic(s.to_string()))
return Ok(Token::Italic(s))
} else {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")});
}
Expand All @@ -151,7 +151,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul
let s = char_iter.consume_while_case_holds(&|c| c != "*" && c != "_").unwrap_or("");
let trailing_astunds = char_iter.consume_while_case_holds(&|c| c == "*" || c == "_").unwrap_or("");
if trailing_astunds.len() == 2 {
return Ok(Token::Bold(s.to_string()))
return Ok(Token::Bold(s))
} else {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")});
}
Expand All @@ -160,7 +160,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul
let s = char_iter.consume_while_case_holds(&|c| c != "*" && c != "_").unwrap_or("");
let trailing_astunds = char_iter.consume_while_case_holds(&|c| c == "*" || c == "_").unwrap_or("");
if trailing_astunds.len() == 3 {
return Ok(Token::BoldItalic(s.to_string()))
return Ok(Token::BoldItalic(s))
} else {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")});
}
Expand Down Expand Up @@ -207,7 +207,7 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a
if leading_ticks.len() != trailing_ticks.len() {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
} else {
return Ok(Token::Code(s.to_string()))
return Ok(Token::Code(s))
}
}
// leading_ticks.len() == 3. Check for lang
Expand All @@ -222,7 +222,7 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a
if leading_ticks.len() != trailing_ticks.len() {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
} else {
return Ok(Token::CodeBlock(s.to_string(), lang.to_string()))
return Ok(Token::CodeBlock(s, lang))
}
}

Expand All @@ -242,7 +242,7 @@ pub(crate) fn lex_blockquotes<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<
}
let s = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
char_iter.next_if_eq(&"\n");
Ok(Token::BlockQuote(right_arrows.len() as u8, s.to_string()))
Ok(Token::BlockQuote(right_arrows.len() as u8, s))
}

pub(crate) fn lex_images<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
Expand Down Expand Up @@ -275,6 +275,7 @@ pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
char_iter.next();
let ref_id = title.strip_prefix("^").unwrap_or("");
let mut note_text = String::new();
let note_index = char_iter.get_index();
loop {
note_text.push_str(char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or(""));
char_iter.next();
Expand All @@ -299,7 +300,7 @@ pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
if ref_id.contains(char::is_whitespace){
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
return Ok(Token::Footnote(ref_id.to_string(), note_text.trim_start().to_string()));
return Ok(Token::Footnote(ref_id, char_iter.get_substring_from(note_index).unwrap_or("").trim()));
}
if char_iter.peek() != Some(&"(") {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
Expand All @@ -311,12 +312,12 @@ pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
}
if char_iter.peek() == Some(&")") {
char_iter.next();
return Ok(Token::Link(link.to_string(), Some(title.to_string()), None));
return Ok(Token::Link(link, Some(title), None));
}
if char_iter.peek() == Some(&" ") {
let hover = char_iter.consume_while_case_holds(&|c| c != ")").unwrap_or("");
char_iter.skip_while(|c| c != &"\n").next();
return Ok(Token::Link(link.to_string(), Some(title.to_string()), Some(hover.to_string())));
return Ok(Token::Link(link, Some(title), Some(hover)));
}
Err(ParseError{content: ""})
}
Expand All @@ -329,7 +330,7 @@ pub(crate) fn lex_side_carrot<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<
match char_iter.peek(){
Some(">") if s != "details" => {
char_iter.next();
return Ok(Token::Link(s.to_string(), None, None))
return Ok(Token::Link(s, None, None))
},
Some(">") if s == "details" => {
char_iter.next();
Expand Down Expand Up @@ -359,11 +360,11 @@ pub(crate) fn lex_plus_minus<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'
}
let line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
if line.starts_with(" [ ] ") {
return Ok(Token::TaskListItem(TaskBox::Unchecked,line.strip_prefix(" [ ] ").unwrap_or("").to_string()))
return Ok(Token::TaskListItem(TaskBox::Unchecked,line.strip_prefix(" [ ] ").unwrap_or("")))
} else if line.starts_with(" [x] ") {
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [x] ").unwrap_or("").to_string()))
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [x] ").unwrap_or("")))
} else if line.starts_with(" [X] ") {
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [X] ").unwrap_or("").to_string()))
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [X] ").unwrap_or("")))
} else if line.starts_with(" "){
return Ok(Token::UnorderedListEntry(line.strip_prefix(" ").unwrap_or("")))
} else {
Expand All @@ -382,7 +383,7 @@ pub(crate) fn lex_numbers<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>,
}
char_iter.next();
let s = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
return Ok(Token::OrderedListEntry(s.to_string()))
return Ok(Token::OrderedListEntry(s))
},
_ => return Err(ParseError{content: c})
}
Expand All @@ -403,7 +404,7 @@ pub(crate) fn lex_tilde<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
// return Err(ParseError{content: format!("{}{}{}", lead_tildes, line, tail_tildes)})
}
return Ok(Token::Strikethrough(line.to_string()));
return Ok(Token::Strikethrough(line));
}
_ => return Err(ParseError{content: lead_tildes}),
}
Expand Down Expand Up @@ -436,7 +437,7 @@ fn parse_details<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseErr
}
}
let inner_tokens = crate::lex(remaining_text.strip_suffix("</details>").unwrap_or(""));
Ok(Token::Detail(summary_line.to_string(), inner_tokens))
Ok(Token::Detail(summary_line, inner_tokens))
}

pub(crate) fn lex_pipes<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
Expand All @@ -454,13 +455,13 @@ pub(crate) fn lex_pipes<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
}
let headings: Vec<_> = lines.remove(0).split("|")
.filter(|&x| x != "")
.map(|x| x.trim().to_string())
.map(|x| x.trim())
.collect();
let alignments: Vec<_> = lines.remove(0).split("|")
.filter(|&x| x != "")
.map(|x|
{
match (x.trim().to_string().starts_with(":"), x.trim().to_string().ends_with(":")) {
match (x.trim().starts_with(":"), x.trim().ends_with(":")) {
(true, false) => Alignment::Left,
(true, true) => Alignment::Center,
(false, true) => Alignment::Right,
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ pub fn parse(tokens: &[Token]) -> String {
in_ordered_list = true;
html.push_str(format!("<ol>").as_str())
}
html.push_str(format!("<li>{}</li>", sanitize_display_text(t)).as_str())
html.push_str(format!("<li>{}</li>", sanitize_display_text(&t.to_string())).as_str())
},
Token::Newline => {html.push('\n')},
Token::Italic(t) => {html.push_str(format!("<em>{}</em>", sanitize_display_text(t)).as_str())},
Expand Down Expand Up @@ -392,7 +392,7 @@ pub fn render(source: &str) -> String {
}

/// Replace potentially unsafe characters with html entities
pub(crate) fn sanitize_display_text(source: &String) -> String {
pub(crate) fn sanitize_display_text(source: &str) -> String {
source.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ fn test_references(){
("Here's some text. And a ref [^1]\n [^1]: Reference text",
"<p>Here's some text. And a ref <sup id=\"fnref:1\" role=\"doc-noteref\"><a href=\"#fn:1\" class=\"footnote\" rel=\"footnote\">1</a></sup></p>\n<div class=\"footnotes\" role=\"doc-endnotes\">\n\t<ol>\n\t\t<li id=\"fn:1\" role=\"doc-endnote\">\t\t\t<p>Reference text<a href=\"#fnref:1\" class=\"reversefootnote\" role=\"doc-backlink\">↩</a></p>\t\t</li>\t</ol>\n</div>\n"),
("Here's some text. And a ref [^1]\n [^1]: Reference text\n\twith multiple\n lines\n to ensure those work",
"<p>Here's some text. And a ref <sup id=\"fnref:1\" role=\"doc-noteref\"><a href=\"#fn:1\" class=\"footnote\" rel=\"footnote\">1</a></sup></p>\n<div class=\"footnotes\" role=\"doc-endnotes\">\n\t<ol>\n\t\t<li id=\"fn:1\" role=\"doc-endnote\">\t\t\t<p>Reference text\nwith multiple\nlines\nto ensure those work<a href=\"#fnref:1\" class=\"reversefootnote\" role=\"doc-backlink\">↩</a></p>\t\t</li>\t</ol>\n</div>\n"),
"<p>Here's some text. And a ref <sup id=\"fnref:1\" role=\"doc-noteref\"><a href=\"#fn:1\" class=\"footnote\" rel=\"footnote\">1</a></sup></p>\n<div class=\"footnotes\" role=\"doc-endnotes\">\n\t<ol>\n\t\t<li id=\"fn:1\" role=\"doc-endnote\">\t\t\t<p>Reference text\n\twith multiple\n lines\n to ensure those work<a href=\"#fnref:1\" class=\"reversefootnote\" role=\"doc-backlink\">↩</a></p>\t\t</li>\t</ol>\n</div>\n"),
]);

for test in tests.iter(){
Expand Down

0 comments on commit 21f04a5

Please sign in to comment.