diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index 23fb7b3158..0b2aea001a 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -16,7 +16,7 @@ const marked = require('marked'); const renderer = new marked.Renderer(); // Override function -renderer.heading = function (text, level) { +renderer.heading = function(text, level) { const escapedText = text.toLowerCase().replace(/[^\w]+/g, '-'); return ` @@ -58,7 +58,7 @@ console.log(marked('# heading+', { renderer })); - tablerow(*string* content) - tablecell(*string* content, *object* flags) -`slugger` has the `slug` method to create an unique id from value: +`slugger` has the `slug` method to create a unique id from value: ```js slugger.slug('foo') // foo @@ -89,9 +89,93 @@ slugger.slug('foo-1') // foo-1-2 - image(*string* href, *string* title, *string* text) - text(*string* text) +

The tokenizer

+ +The tokenizer defines how to turn markdown text into tokens. + +**Example:** Overriding default `codespan` tokenizer to include LaTeX. + +```js +// Create reference instance +const marked = require('marked'); + +// Get reference +const tokenizer = new marked.Tokenizer(); +const originalCodespan = tokenizer.codespan; +// Override function +tokenizer.codespan = function(src) { + const match = src.match(/\$+([^\$\n]+?)\$+/); + if (match) { + return { + type: 'codespan', + raw: match[0], + text: match[1].trim() + }; + } + return originalCodespan.apply(this, arguments); +}; + +// Run marked +console.log(marked('$ latex code $', { tokenizer })); +``` + +**Output:** + +```html +

latext code

+``` + +### Block level tokenizer methods + +- space(*string* src) +- code(*string* src, *array* tokens) +- fences(*string* src) +- heading(*string* src) +- nptable(*string* src) +- hr(*string* src) +- blockquote(*string* src) +- list(*string* src) +- html(*string* src) +- def(*string* src) +- table(*string* src) +- lheading(*string* src) +- paragraph(*string* src) +- text(*string* src) + +### Inline level tokenizer methods + +- escape(*string* src) +- tag(*string* src, *bool* inLink, *bool* inRawBlock) +- link(*string* src) +- reflink(*string* src, *object* links) +- strong(*string* src) +- em(*string* src) +- codespan(*string* src) +- br(*string* src) +- del(*string* src) +- autolink(*string* src, *function* mangle) +- url(*string* src, *function* mangle) +- inlineText(*string* src, *bool* inRawBlock, *function* smartypants) + +`mangle` is a method that changes text to HTML character references: + +```js +mangle('test@example.com') +// "test@example.com" +``` + +`smartypants` is a method that translates plain ASCII punctuation characters into “smart” typographic punctuation HTML entities: + +https://daringfireball.net/projects/smartypants/ + +```js +smartypants('"this ... string"') +// "“this … string”" +``` +

The lexer

-The lexer turns a markdown string into tokens. +The lexer takes a markdown string and calls the tokenizer functions.

The parser

@@ -112,8 +196,10 @@ console.log(marked.parser(tokens, options)); const lexer = new marked.Lexer(options); const tokens = lexer.lex(markdown); console.log(tokens); -console.log(lexer.rules.block); // block level rules -console.log(lexer.rules.inline); // inline level rules +console.log(lexer.tokenizer.rules.block); // block level rules used +console.log(lexer.tokenizer.rules.inline); // inline level rules used +console.log(marked.Lexer.rules.block); // all block level rules +console.log(marked.Lexer.rules.inline); // all inline level rules ``` ``` bash diff --git a/docs/index.html b/docs/index.html index 6aed62796c..9d68e7bb41 100644 --- a/docs/index.html +++ b/docs/index.html @@ -155,6 +155,7 @@

Marked.js Documentation

Extensibility diff --git a/lib/marked.esm.js b/lib/marked.esm.js index 2cb28fe24d..7a89a4629a 100644 --- a/lib/marked.esm.js +++ b/lib/marked.esm.js @@ -31,6 +31,7 @@ function getDefaults() { silent: false, smartLists: false, smartypants: false, + tokenizer: null, xhtml: false }; } @@ -293,6 +294,593 @@ var helpers = { checkSanitizeDeprecation }; +const { defaults: defaults$1 } = defaults; +const { + rtrim: rtrim$1, + splitCells: splitCells$1, + escape: escape$1, + findClosingBracket: findClosingBracket$1 +} = helpers; + +function outputLink(cap, link, raw) { + const href = link.href; + const title = link.title ? escape$1(link.title) : null; + + if (cap[0].charAt(0) !== '!') { + return { + type: 'link', + raw, + href, + title, + text: cap[1] + }; + } else { + return { + type: 'image', + raw, + text: escape$1(cap[1]), + href, + title + }; + } +} + +/** + * Tokenizer + */ +var Tokenizer_1 = class Tokenizer { + constructor(options) { + this.options = options || defaults$1; + } + + space(src) { + const cap = this.rules.block.newline.exec(src); + if (cap) { + if (cap[0].length > 1) { + return { + type: 'space', + raw: cap[0] + }; + } + return { raw: '\n' }; + } + } + + code(src, tokens) { + const cap = this.rules.block.code.exec(src); + if (cap) { + const lastToken = tokens[tokens.length - 1]; + // An indented code block cannot interrupt a paragraph. + if (lastToken && lastToken.type === 'paragraph') { + tokens.pop(); + lastToken.text += '\n' + cap[0].trimRight(); + lastToken.raw += '\n' + cap[0]; + return lastToken; + } else { + const text = cap[0].replace(/^ {4}/gm, ''); + return { + type: 'code', + raw: cap[0], + codeBlockStyle: 'indented', + text: !this.options.pedantic + ? rtrim$1(text, '\n') + : text + }; + } + } + } + + fences(src) { + const cap = this.rules.block.fences.exec(src); + if (cap) { + return { + type: 'code', + raw: cap[0], + lang: cap[2] ? cap[2].trim() : cap[2], + text: cap[3] || '' + }; + } + } + + heading(src) { + const cap = this.rules.block.heading.exec(src); + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[1].length, + text: cap[2] + }; + } + } + + nptable(src) { + const cap = this.rules.block.nptable.exec(src); + if (cap) { + const item = { + type: 'table', + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], + raw: cap[0] + }; + + if (item.header.length === item.align.length) { + let l = item.align.length; + let i; + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + for (i = 0; i < l; i++) { + item.cells[i] = splitCells$1(item.cells[i], item.header.length); + } + + return item; + } + } + } + + hr(src) { + const cap = this.rules.block.hr.exec(src); + if (cap) { + return { + type: 'hr', + raw: cap[0] + }; + } + } + + blockquote(src) { + const cap = this.rules.block.blockquote.exec(src); + if (cap) { + const text = cap[0].replace(/^ *> ?/gm, ''); + + return { + type: 'blockquote', + raw: cap[0], + text + }; + } + } + + list(src) { + const cap = this.rules.block.list.exec(src); + if (cap) { + let raw = cap[0]; + const bull = cap[2]; + const isordered = bull.length > 1; + + const list = { + type: 'list', + raw, + ordered: isordered, + start: isordered ? +bull : '', + loose: false, + items: [] + }; + + // Get each top-level item. + const itemMatch = cap[0].match(this.rules.block.item); + + let next = false, + item, + space, + b, + addBack, + loose, + istask, + ischecked; + + const l = itemMatch.length; + for (let i = 0; i < l; i++) { + item = itemMatch[i]; + raw = item; + + // Remove the list item's bullet + // so it is seen as the next token. + space = item.length; + item = item.replace(/^ *([*+-]|\d+\.) */, ''); + + // Outdent whatever the + // list item contains. Hacky. + if (~item.indexOf('\n ')) { + space -= item.length; + item = !this.options.pedantic + ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') + : item.replace(/^ {1,4}/gm, ''); + } + + // Determine whether the next list item belongs here. + // Backpedal if it does not belong in this list. + if (i !== l - 1) { + b = this.rules.block.bullet.exec(itemMatch[i + 1])[0]; + if (bull.length > 1 ? b.length === 1 + : (b.length > 1 || (this.options.smartLists && b !== bull))) { + addBack = itemMatch.slice(i + 1).join('\n'); + list.raw = list.raw.substring(0, list.raw.length - addBack.length); + i = l - 1; + } + } + + // Determine whether item is loose or not. + // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ + // for discount behavior. + loose = next || /\n\n(?!\s*$)/.test(item); + if (i !== l - 1) { + next = item.charAt(item.length - 1) === '\n'; + if (!loose) loose = next; + } + + if (loose) { + list.loose = true; + } + + // Check for task list items + istask = /^\[[ xX]\] /.test(item); + ischecked = undefined; + if (istask) { + ischecked = item[1] !== ' '; + item = item.replace(/^\[[ xX]\] +/, ''); + } + + list.items.push({ + raw, + task: istask, + checked: ischecked, + loose: loose, + text: item + }); + } + + return list; + } + } + + html(src) { + const cap = this.rules.block.html.exec(src); + if (cap) { + return { + type: this.options.sanitize + ? 'paragraph' + : 'html', + raw: cap[0], + pre: !this.options.sanitizer + && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), + text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0] + }; + } + } + + def(src) { + const cap = this.rules.block.def.exec(src); + if (cap) { + if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); + const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); + return { + tag, + raw: cap[0], + href: cap[2], + title: cap[3] + }; + } + } + + table(src) { + const cap = this.rules.block.table.exec(src); + if (cap) { + const item = { + type: 'table', + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] + }; + + if (item.header.length === item.align.length) { + item.raw = cap[0]; + + let l = item.align.length; + let i; + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + for (i = 0; i < l; i++) { + item.cells[i] = splitCells$1( + item.cells[i].replace(/^ *\| *| *\| *$/g, ''), + item.header.length); + } + + return item; + } + } + } + + lheading(src) { + const cap = this.rules.block.lheading.exec(src); + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[2].charAt(0) === '=' ? 1 : 2, + text: cap[1] + }; + } + } + + paragraph(src) { + const cap = this.rules.block.paragraph.exec(src); + if (cap) { + return { + type: 'paragraph', + raw: cap[0], + text: cap[1].charAt(cap[1].length - 1) === '\n' + ? cap[1].slice(0, -1) + : cap[1] + }; + } + } + + text(src) { + const cap = this.rules.block.text.exec(src); + if (cap) { + return { + type: 'text', + raw: cap[0], + text: cap[0] + }; + } + } + + escape(src) { + const cap = this.rules.inline.escape.exec(src); + if (cap) { + return { + type: 'escape', + raw: cap[0], + text: escape$1(cap[1]) + }; + } + } + + tag(src, inLink, inRawBlock) { + const cap = this.rules.inline.tag.exec(src); + if (cap) { + if (!inLink && /^/i.test(cap[0])) { + inLink = false; + } + if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = true; + } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = false; + } + + return { + type: this.options.sanitize + ? 'text' + : 'html', + raw: cap[0], + inLink, + inRawBlock, + text: this.options.sanitize + ? (this.options.sanitizer + ? this.options.sanitizer(cap[0]) + : escape$1(cap[0])) + : cap[0] + }; + } + } + + link(src) { + const cap = this.rules.inline.link.exec(src); + if (cap) { + const lastParenIndex = findClosingBracket$1(cap[2], '()'); + if (lastParenIndex > -1) { + const start = cap[0].indexOf('!') === 0 ? 5 : 4; + const linkLen = start + cap[1].length + lastParenIndex; + cap[2] = cap[2].substring(0, lastParenIndex); + cap[0] = cap[0].substring(0, linkLen).trim(); + cap[3] = ''; + } + let href = cap[2]; + let title = ''; + if (this.options.pedantic) { + const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); + + if (link) { + href = link[1]; + title = link[3]; + } else { + title = ''; + } + } else { + title = cap[3] ? cap[3].slice(1, -1) : ''; + } + href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); + const token = outputLink(cap, { + href: href ? href.replace(this.rules.inline._escapes, '$1') : href, + title: title ? title.replace(this.rules.inline._escapes, '$1') : title + }, cap[0]); + return token; + } + } + + reflink(src, links) { + let cap; + if ((cap = this.rules.inline.reflink.exec(src)) + || (cap = this.rules.inline.nolink.exec(src))) { + let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); + link = links[link.toLowerCase()]; + if (!link || !link.href) { + const text = cap[0].charAt(0); + return { + type: 'text', + raw: text, + text + }; + } + const token = outputLink(cap, link, cap[0]); + return token; + } + } + + strong(src) { + const cap = this.rules.inline.strong.exec(src); + if (cap) { + return { + type: 'strong', + raw: cap[0], + text: cap[4] || cap[3] || cap[2] || cap[1] + }; + } + } + + em(src) { + const cap = this.rules.inline.em.exec(src); + if (cap) { + return { + type: 'em', + raw: cap[0], + text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1] + }; + } + } + + codespan(src) { + const cap = this.rules.inline.code.exec(src); + if (cap) { + return { + type: 'codespan', + raw: cap[0], + text: escape$1(cap[2].trim(), true) + }; + } + } + + br(src) { + const cap = this.rules.inline.br.exec(src); + if (cap) { + return { + type: 'br', + raw: cap[0] + }; + } + } + + del(src) { + const cap = this.rules.inline.del.exec(src); + if (cap) { + return { + type: 'del', + raw: cap[0], + text: cap[1] + }; + } + } + + autolink(src, mangle) { + const cap = this.rules.inline.autolink.exec(src); + if (cap) { + let text, href; + if (cap[2] === '@') { + text = escape$1(this.options.mangle ? mangle(cap[1]) : cap[1]); + href = 'mailto:' + text; + } else { + text = escape$1(cap[1]); + href = text; + } + + return { + type: 'link', + raw: cap[0], + text, + href, + tokens: [ + { + type: 'text', + raw: text, + text + } + ] + }; + } + } + + url(src, mangle) { + let cap; + if (cap = this.rules.inline.url.exec(src)) { + let text, href; + if (cap[2] === '@') { + text = escape$1(this.options.mangle ? mangle(cap[0]) : cap[0]); + href = 'mailto:' + text; + } else { + // do extended autolink path validation + let prevCapZero; + do { + prevCapZero = cap[0]; + cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; + } while (prevCapZero !== cap[0]); + text = escape$1(cap[0]); + if (cap[1] === 'www.') { + href = 'http://' + text; + } else { + href = text; + } + } + return { + type: 'link', + raw: cap[0], + text, + href, + tokens: [ + { + type: 'text', + raw: text, + text + } + ] + }; + } + } + + inlineText(src, inRawBlock, smartypants) { + const cap = this.rules.inline.text.exec(src); + if (cap) { + let text; + if (inRawBlock) { + text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0]; + } else { + text = escape$1(this.options.smartypants ? smartypants(cap[0]) : cap[0]); + } + return { + type: 'text', + raw: cap[0], + text + }; + } + } +}; + const { noopTest: noopTest$1, edit: edit$1, @@ -560,14 +1148,49 @@ var rules = { inline }; -const { defaults: defaults$1 } = defaults; +const { defaults: defaults$2 } = defaults; const { block: block$1, inline: inline$1 } = rules; -const { - rtrim: rtrim$1, - splitCells: splitCells$1, - escape: escape$1, - findClosingBracket: findClosingBracket$1 -} = helpers; + +/** + * smartypants text replacement + */ +function smartypants(text) { + return text + // em-dashes + .replace(/---/g, '\u2014') + // en-dashes + .replace(/--/g, '\u2013') + // opening singles + .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') + // closing singles & apostrophes + .replace(/'/g, '\u2019') + // opening doubles + .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') + // closing doubles + .replace(/"/g, '\u201d') + // ellipses + .replace(/\.{3}/g, '\u2026'); +} + +/** + * mangle email addresses + */ +function mangle(text) { + let out = '', + i, + ch; + + const l = text.length; + for (i = 0; i < l; i++) { + ch = text.charCodeAt(i); + if (Math.random() > 0.5) { + ch = 'x' + ch.toString(16); + } + out += '&#' + ch + ';'; + } + + return out; +} /** * Block Lexer @@ -576,27 +1199,32 @@ var Lexer_1 = class Lexer { constructor(options) { this.tokens = []; this.tokens.links = Object.create(null); - this.options = options || defaults$1; - this.rules = { + this.options = options || defaults$2; + this.options.tokenizer = this.options.tokenizer || new Tokenizer_1(); + this.tokenizer = this.options.tokenizer; + this.tokenizer.options = this.options; + + const rules = { block: block$1.normal, inline: inline$1.normal }; if (this.options.pedantic) { - this.rules.block = block$1.pedantic; - this.rules.inline = inline$1.pedantic; + rules.block = block$1.pedantic; + rules.inline = inline$1.pedantic; } else if (this.options.gfm) { - this.rules.block = block$1.gfm; + rules.block = block$1.gfm; if (this.options.breaks) { - this.rules.inline = inline$1.breaks; + rules.inline = inline$1.breaks; } else { - this.rules.inline = inline$1.gfm; + rules.inline = inline$1.gfm; } } + this.tokenizer.rules = rules; } /** - * Expose Block Rules + * Expose Rules */ static get rules() { return { @@ -621,7 +1249,7 @@ var Lexer_1 = class Lexer { .replace(/\r\n|\r/g, '\n') .replace(/\t/g, ' '); - this.blockTokens(src, this.tokens); + this.blockTokens(src, this.tokens, true); this.inline(this.tokens); @@ -631,347 +1259,118 @@ var Lexer_1 = class Lexer { /** * Lexing */ - blockTokens(src, tokens, top = true) { + blockTokens(src, tokens = [], top = true) { src = src.replace(/^ +$/gm, ''); - let next, - loose, - cap, - bull, - b, - item, - list, - space, - i, - tag, - l, - isordered, - istask, - ischecked, - lastToken, - addBack, - raw; + let token, i, l; while (src) { // newline - if (cap = this.rules.block.newline.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (cap[0].length > 1) { - tokens.push({ - type: 'space', - raw - }); + if (token = this.tokenizer.space(src)) { + src = src.substring(token.raw.length); + if (token.type) { + tokens.push(token); } + continue; } // code - if (cap = this.rules.block.code.exec(src)) { - lastToken = tokens[tokens.length - 1]; - src = src.substring(cap[0].length); - raw = cap[0]; - // An indented code block cannot interrupt a paragraph. - if (lastToken && lastToken.type === 'paragraph') { - lastToken.text += '\n' + cap[0].trimRight(); - lastToken.raw += '\n' + raw; - } else { - cap = cap[0].replace(/^ {4}/gm, ''); - tokens.push({ - type: 'code', - raw, - codeBlockStyle: 'indented', - text: !this.options.pedantic - ? rtrim$1(cap, '\n') - : cap - }); - } + if (token = this.tokenizer.code(src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // fences - if (cap = this.rules.block.fences.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'code', - raw, - lang: cap[2] ? cap[2].trim() : cap[2], - text: cap[3] || '' - }); + if (token = this.tokenizer.fences(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // heading - if (cap = this.rules.block.heading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw, - depth: cap[1].length, - text: cap[2] - }); + if (token = this.tokenizer.heading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // table no leading pipe (gfm) - if (cap = this.rules.block.nptable.exec(src)) { - item = { - type: 'table', - header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - raw = cap[0]; - item.raw = raw; - - l = item.align.length; - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - for (i = 0; i < l; i++) { - item.cells[i] = splitCells$1(item.cells[i], item.header.length); - } - - tokens.push(item); - - continue; - } + if (token = this.tokenizer.nptable(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // hr - if (cap = this.rules.block.hr.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'hr', - raw - }); + if (token = this.tokenizer.hr(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // blockquote - if (cap = this.rules.block.blockquote.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - - cap = cap[0].replace(/^ *> ?/gm, ''); - - tokens.push({ - type: 'blockquote', - raw, - tokens: this.blockTokens(cap, [], top) - }); - + if (token = this.tokenizer.blockquote(src)) { + src = src.substring(token.raw.length); + token.tokens = this.blockTokens(token.text, [], top); + tokens.push(token); continue; } // list - if (cap = this.rules.block.list.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - bull = cap[2]; - isordered = bull.length > 1; - - list = { - type: 'list', - raw, - ordered: isordered, - start: isordered ? +bull : '', - loose: false, - items: [] - }; - - tokens.push(list); - - // Get each top-level item. - cap = cap[0].match(this.rules.block.item); - - next = false; - - l = cap.length; + if (token = this.tokenizer.list(src)) { + src = src.substring(token.raw.length); + l = token.items.length; for (i = 0; i < l; i++) { - item = cap[i]; - raw = item.trim(); - - // Remove the list item's bullet - // so it is seen as the next token. - space = item.length; - item = item.replace(/^ *([*+-]|\d+\.) */, ''); - - // Outdent whatever the - // list item contains. Hacky. - if (~item.indexOf('\n ')) { - space -= item.length; - item = !this.options.pedantic - ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') - : item.replace(/^ {1,4}/gm, ''); - } - - // Determine whether the next list item belongs here. - // Backpedal if it does not belong in this list. - if (i !== l - 1) { - b = block$1.bullet.exec(cap[i + 1])[0]; - if (bull.length > 1 ? b.length === 1 - : (b.length > 1 || (this.options.smartLists && b !== bull))) { - addBack = cap.slice(i + 1).join('\n'); - src = addBack + src; - list.raw = list.raw.substring(list.raw.length - addBack.length); - i = l - 1; - } - } - - // Determine whether item is loose or not. - // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ - // for discount behavior. - loose = next || /\n\n(?!\s*$)/.test(item); - if (i !== l - 1) { - next = item.charAt(item.length - 1) === '\n'; - if (!loose) loose = next; - } - - if (loose) { - list.loose = true; - } - - // Check for task list items - istask = /^\[[ xX]\] /.test(item); - ischecked = undefined; - if (istask) { - ischecked = item[1] !== ' '; - item = item.replace(/^\[[ xX]\] +/, ''); - } - - list.items.push({ - raw, - task: istask, - checked: ischecked, - loose: loose, - tokens: this.blockTokens(item, [], false) - }); + token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); } - + tokens.push(token); continue; } // html - if (cap = this.rules.block.html.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: this.options.sanitize - ? 'paragraph' - : 'html', - raw, - pre: !this.options.sanitizer - && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), - text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0] - }); + if (token = this.tokenizer.html(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // def - if (top && (cap = this.rules.block.def.exec(src))) { - src = src.substring(cap[0].length); - if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); - tag = cap[1].toLowerCase().replace(/\s+/g, ' '); - if (!this.tokens.links[tag]) { - this.tokens.links[tag] = { - href: cap[2], - title: cap[3] + if (top && (token = this.tokenizer.def(src))) { + src = src.substring(token.raw.length); + if (!this.tokens.links[token.tag]) { + this.tokens.links[token.tag] = { + href: token.href, + title: token.title }; } continue; } // table (gfm) - if (cap = this.rules.block.table.exec(src)) { - item = { - type: 'table', - header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - item.raw = cap[0]; - - l = item.align.length; - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - for (i = 0; i < l; i++) { - item.cells[i] = splitCells$1( - item.cells[i].replace(/^ *\| *| *\| *$/g, ''), - item.header.length); - } - - tokens.push(item); - - continue; - } + if (token = this.tokenizer.table(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // lheading - if (cap = this.rules.block.lheading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw, - depth: cap[2].charAt(0) === '=' ? 1 : 2, - text: cap[1] - }); + if (token = this.tokenizer.lheading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // top-level paragraph - if (top && (cap = this.rules.block.paragraph.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'paragraph', - raw, - text: cap[1].charAt(cap[1].length - 1) === '\n' - ? cap[1].slice(0, -1) - : cap[1] - }); + if (top && (token = this.tokenizer.paragraph(src))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.block.text.exec(src)) { - // Top-level should never reach here. - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'text', - raw, - text: cap[0] - }); + if (token = this.tokenizer.text(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -979,6 +1378,7 @@ var Lexer_1 = class Lexer { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } @@ -1053,280 +1453,102 @@ var Lexer_1 = class Lexer { /** * Lexing/Compiling */ - inlineTokens(src, tokens) { - let out = '', - link, - text, - newTokens, - href, - title, - cap, - prevCapZero, - lastParenIndex, - start, - linkLen, - raw; + inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { + let token; while (src) { // escape - if (cap = this.rules.inline.escape.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape$1(cap[1]); - out += text; - tokens.push({ - type: 'escape', - raw, - text - }); + if (token = this.tokenizer.escape(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // tag - if (cap = this.rules.inline.tag.exec(src)) { - if (!this.inLink && /^/i.test(cap[0])) { - this.inLink = false; - } - if (!this.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = true; - } else if (this.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = false; - } - - src = src.substring(cap[0].length); - raw = cap[0]; - text = this.options.sanitize - ? (this.options.sanitizer - ? this.options.sanitizer(cap[0]) - : escape$1(cap[0])) - : cap[0]; - tokens.push({ - type: this.options.sanitize - ? 'text' - : 'html', - raw, - text - }); - out += text; + if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { + src = src.substring(token.raw.length); + inLink = token.inLink; + inRawBlock = token.inRawBlock; + tokens.push(token); continue; } // link - if (cap = this.rules.inline.link.exec(src)) { - lastParenIndex = findClosingBracket$1(cap[2], '()'); - if (lastParenIndex > -1) { - start = cap[0].indexOf('!') === 0 ? 5 : 4; - linkLen = start + cap[1].length + lastParenIndex; - cap[2] = cap[2].substring(0, lastParenIndex); - cap[0] = cap[0].substring(0, linkLen).trim(); - cap[3] = ''; - } - src = src.substring(cap[0].length); - raw = cap[0]; - this.inLink = true; - href = cap[2]; - if (this.options.pedantic) { - link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); - - if (link) { - href = link[1]; - title = link[3]; - } else { - title = ''; - } - } else { - title = cap[3] ? cap[3].slice(1, -1) : ''; + if (token = this.tokenizer.link(src)) { + src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); - out += this.outputLink(cap, { - href: this.escapes(href), - title: this.escapes(title) - }, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // reflink, nolink - if ((cap = this.rules.inline.reflink.exec(src)) - || (cap = this.rules.inline.nolink.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - link = (cap[2] || cap[1]).replace(/\s+/g, ' '); - link = this.tokens.links[link.toLowerCase()]; - if (!link || !link.href) { - text = cap[0].charAt(0); - out += text; - tokens.push({ - type: 'text', - raw: text, - text - }); - src = cap[0].substring(1) + src; - continue; + if (token = this.tokenizer.reflink(src, this.tokens.links)) { + src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - this.inLink = true; - out += this.outputLink(cap, link, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // strong - if (cap = this.rules.inline.strong.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[4] || cap[3] || cap[2] || cap[1], newTokens); - - tokens.push({ - type: 'strong', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.strong(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // em - if (cap = this.rules.inline.em.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1], newTokens); - tokens.push({ - type: 'em', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.em(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // code - if (cap = this.rules.inline.code.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape$1(cap[2].trim(), true); - tokens.push({ - type: 'codespan', - raw, - text - }); - out += text; + if (token = this.tokenizer.codespan(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // br - if (cap = this.rules.inline.br.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'br', - raw - }); - out += '\n'; + if (token = this.tokenizer.br(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // del (gfm) - if (cap = this.rules.inline.del.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'del', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.del(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // autolink - if (cap = this.rules.inline.autolink.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (cap[2] === '@') { - text = escape$1(this.options.mangle ? this.mangle(cap[1]) : cap[1]); - href = 'mailto:' + text; - } else { - text = escape$1(cap[1]); - href = text; - } - tokens.push({ - type: 'link', - raw, - text, - href, - tokens: [ - { - type: 'text', - raw: text, - text - } - ] - }); - out += text; + if (token = this.tokenizer.autolink(src, mangle)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // url (gfm) - if (!this.inLink && (cap = this.rules.inline.url.exec(src))) { - if (cap[2] === '@') { - text = escape$1(this.options.mangle ? this.mangle(cap[0]) : cap[0]); - href = 'mailto:' + text; - } else { - // do extended autolink path validation - do { - prevCapZero = cap[0]; - cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; - } while (prevCapZero !== cap[0]); - text = escape$1(cap[0]); - if (cap[1] === 'www.') { - href = 'http://' + text; - } else { - href = text; - } - } - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'link', - raw, - text, - href, - tokens: [ - { - type: 'text', - raw: text, - text - } - ] - }); - out += text; + if (!inLink && (token = this.tokenizer.url(src, mangle))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.inline.text.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (this.inRawBlock) { - text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0]; - } else { - text = escape$1(this.options.smartypants ? this.smartypants(cap[0]) : cap[0]); - } - tokens.push({ - type: 'text', - raw, - text - }); - out += text; + if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -1334,94 +1556,18 @@ var Lexer_1 = class Lexer { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } } } - return out; - } - - escapes(text) { - return text ? text.replace(inline$1._escapes, '$1') : text; - } - - /** - * tokenize Link - */ - outputLink(cap, link, tokens, raw) { - const href = link.href; - const title = link.title ? escape$1(link.title) : null; - const newTokens = tokens ? [] : null; - - if (cap[0].charAt(0) !== '!') { - const text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'link', - raw, - text, - href, - title, - tokens: newTokens - }); - return text; - } else { - const text = escape$1(cap[1]); - tokens.push({ - type: 'image', - raw, - text, - href, - title - }); - return text; - } - } - - /** - * Smartypants Transformations - */ - smartypants(text) { - return text - // em-dashes - .replace(/---/g, '\u2014') - // en-dashes - .replace(/--/g, '\u2013') - // opening singles - .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') - // closing singles & apostrophes - .replace(/'/g, '\u2019') - // opening doubles - .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') - // closing doubles - .replace(/"/g, '\u201d') - // ellipses - .replace(/\.{3}/g, '\u2026'); - } - - /** - * Mangle Links - */ - mangle(text) { - let out = '', - i, - ch; - - const l = text.length; - for (i = 0; i < l; i++) { - ch = text.charCodeAt(i); - if (Math.random() > 0.5) { - ch = 'x' + ch.toString(16); - } - out += '&#' + ch + ';'; - } - - return out; + return tokens; } }; -const { defaults: defaults$2 } = defaults; +const { defaults: defaults$3 } = defaults; const { cleanUrl: cleanUrl$1, escape: escape$2 @@ -1432,7 +1578,7 @@ const { */ var Renderer_1 = class Renderer { constructor(options) { - this.options = options || defaults$2; + this.options = options || defaults$3; } code(code, infostring, escaped) { @@ -1663,7 +1809,7 @@ var Slugger_1 = class Slugger { } }; -const { defaults: defaults$3 } = defaults; +const { defaults: defaults$4 } = defaults; const { unescape: unescape$1 } = helpers; @@ -1673,7 +1819,7 @@ const { */ var Parser_1 = class Parser { constructor(options) { - this.options = options || defaults$3; + this.options = options || defaults$4; this.options.renderer = this.options.renderer || new Renderer_1(); this.renderer = this.options.renderer; this.renderer.options = this.options; @@ -1924,7 +2070,7 @@ const { const { getDefaults, changeDefaults, - defaults: defaults$4 + defaults: defaults$5 } = defaults; /** @@ -2037,7 +2183,7 @@ marked.setOptions = function(opt) { marked.getDefaults = getDefaults; -marked.defaults = defaults$4; +marked.defaults = defaults$5; /** * Expose @@ -2052,6 +2198,8 @@ marked.TextRenderer = TextRenderer_1; marked.Lexer = Lexer_1; marked.lexer = Lexer_1.lex; +marked.Tokenizer = Tokenizer_1; + marked.Slugger = Slugger_1; marked.parse = marked; diff --git a/lib/marked.js b/lib/marked.js index 4dfb61da80..b96962ff37 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -53,6 +53,7 @@ silent: false, smartLists: false, smartypants: false, + tokenizer: null, xhtml: false }; } @@ -342,6 +343,617 @@ checkSanitizeDeprecation: checkSanitizeDeprecation }; + var defaults$1 = defaults.defaults; + var rtrim$1 = helpers.rtrim, + splitCells$1 = helpers.splitCells, + _escape = helpers.escape, + findClosingBracket$1 = helpers.findClosingBracket; + + function outputLink(cap, link, raw) { + var href = link.href; + var title = link.title ? _escape(link.title) : null; + + if (cap[0].charAt(0) !== '!') { + return { + type: 'link', + raw: raw, + href: href, + title: title, + text: cap[1] + }; + } else { + return { + type: 'image', + raw: raw, + text: _escape(cap[1]), + href: href, + title: title + }; + } + } + /** + * Tokenizer + */ + + + var Tokenizer_1 = /*#__PURE__*/function () { + function Tokenizer(options) { + this.options = options || defaults$1; + } + + var _proto = Tokenizer.prototype; + + _proto.space = function space(src) { + var cap = this.rules.block.newline.exec(src); + + if (cap) { + if (cap[0].length > 1) { + return { + type: 'space', + raw: cap[0] + }; + } + + return { + raw: '\n' + }; + } + }; + + _proto.code = function code(src, tokens) { + var cap = this.rules.block.code.exec(src); + + if (cap) { + var lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. + + if (lastToken && lastToken.type === 'paragraph') { + tokens.pop(); + lastToken.text += '\n' + cap[0].trimRight(); + lastToken.raw += '\n' + cap[0]; + return lastToken; + } else { + var text = cap[0].replace(/^ {4}/gm, ''); + return { + type: 'code', + raw: cap[0], + codeBlockStyle: 'indented', + text: !this.options.pedantic ? rtrim$1(text, '\n') : text + }; + } + } + }; + + _proto.fences = function fences(src) { + var cap = this.rules.block.fences.exec(src); + + if (cap) { + return { + type: 'code', + raw: cap[0], + lang: cap[2] ? cap[2].trim() : cap[2], + text: cap[3] || '' + }; + } + }; + + _proto.heading = function heading(src) { + var cap = this.rules.block.heading.exec(src); + + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[1].length, + text: cap[2] + }; + } + }; + + _proto.nptable = function nptable(src) { + var cap = this.rules.block.nptable.exec(src); + + if (cap) { + var item = { + type: 'table', + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], + raw: cap[0] + }; + + if (item.header.length === item.align.length) { + var l = item.align.length; + var i; + + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + + for (i = 0; i < l; i++) { + item.cells[i] = splitCells$1(item.cells[i], item.header.length); + } + + return item; + } + } + }; + + _proto.hr = function hr(src) { + var cap = this.rules.block.hr.exec(src); + + if (cap) { + return { + type: 'hr', + raw: cap[0] + }; + } + }; + + _proto.blockquote = function blockquote(src) { + var cap = this.rules.block.blockquote.exec(src); + + if (cap) { + var text = cap[0].replace(/^ *> ?/gm, ''); + return { + type: 'blockquote', + raw: cap[0], + text: text + }; + } + }; + + _proto.list = function list(src) { + var cap = this.rules.block.list.exec(src); + + if (cap) { + var raw = cap[0]; + var bull = cap[2]; + var isordered = bull.length > 1; + var list = { + type: 'list', + raw: raw, + ordered: isordered, + start: isordered ? +bull : '', + loose: false, + items: [] + }; // Get each top-level item. + + var itemMatch = cap[0].match(this.rules.block.item); + var next = false, + item, + space, + b, + addBack, + loose, + istask, + ischecked; + var l = itemMatch.length; + + for (var i = 0; i < l; i++) { + item = itemMatch[i]; + raw = item; // Remove the list item's bullet + // so it is seen as the next token. + + space = item.length; + item = item.replace(/^ *([*+-]|\d+\.) */, ''); // Outdent whatever the + // list item contains. Hacky. + + if (~item.indexOf('\n ')) { + space -= item.length; + item = !this.options.pedantic ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') : item.replace(/^ {1,4}/gm, ''); + } // Determine whether the next list item belongs here. + // Backpedal if it does not belong in this list. + + + if (i !== l - 1) { + b = this.rules.block.bullet.exec(itemMatch[i + 1])[0]; + + if (bull.length > 1 ? b.length === 1 : b.length > 1 || this.options.smartLists && b !== bull) { + addBack = itemMatch.slice(i + 1).join('\n'); + list.raw = list.raw.substring(0, list.raw.length - addBack.length); + i = l - 1; + } + } // Determine whether item is loose or not. + // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ + // for discount behavior. + + + loose = next || /\n\n(?!\s*$)/.test(item); + + if (i !== l - 1) { + next = item.charAt(item.length - 1) === '\n'; + if (!loose) loose = next; + } + + if (loose) { + list.loose = true; + } // Check for task list items + + + istask = /^\[[ xX]\] /.test(item); + ischecked = undefined; + + if (istask) { + ischecked = item[1] !== ' '; + item = item.replace(/^\[[ xX]\] +/, ''); + } + + list.items.push({ + raw: raw, + task: istask, + checked: ischecked, + loose: loose, + text: item + }); + } + + return list; + } + }; + + _proto.html = function html(src) { + var cap = this.rules.block.html.exec(src); + + if (cap) { + return { + type: this.options.sanitize ? 'paragraph' : 'html', + raw: cap[0], + pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), + text: this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : _escape(cap[0]) : cap[0] + }; + } + }; + + _proto.def = function def(src) { + var cap = this.rules.block.def.exec(src); + + if (cap) { + if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); + var tag = cap[1].toLowerCase().replace(/\s+/g, ' '); + return { + tag: tag, + raw: cap[0], + href: cap[2], + title: cap[3] + }; + } + }; + + _proto.table = function table(src) { + var cap = this.rules.block.table.exec(src); + + if (cap) { + var item = { + type: 'table', + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] + }; + + if (item.header.length === item.align.length) { + item.raw = cap[0]; + var l = item.align.length; + var i; + + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + + for (i = 0; i < l; i++) { + item.cells[i] = splitCells$1(item.cells[i].replace(/^ *\| *| *\| *$/g, ''), item.header.length); + } + + return item; + } + } + }; + + _proto.lheading = function lheading(src) { + var cap = this.rules.block.lheading.exec(src); + + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[2].charAt(0) === '=' ? 1 : 2, + text: cap[1] + }; + } + }; + + _proto.paragraph = function paragraph(src) { + var cap = this.rules.block.paragraph.exec(src); + + if (cap) { + return { + type: 'paragraph', + raw: cap[0], + text: cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1] + }; + } + }; + + _proto.text = function text(src) { + var cap = this.rules.block.text.exec(src); + + if (cap) { + return { + type: 'text', + raw: cap[0], + text: cap[0] + }; + } + }; + + _proto.escape = function escape(src) { + var cap = this.rules.inline.escape.exec(src); + + if (cap) { + return { + type: 'escape', + raw: cap[0], + text: _escape(cap[1]) + }; + } + }; + + _proto.tag = function tag(src, inLink, inRawBlock) { + var cap = this.rules.inline.tag.exec(src); + + if (cap) { + if (!inLink && /^/i.test(cap[0])) { + inLink = false; + } + + if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = true; + } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = false; + } + + return { + type: this.options.sanitize ? 'text' : 'html', + raw: cap[0], + inLink: inLink, + inRawBlock: inRawBlock, + text: this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : _escape(cap[0]) : cap[0] + }; + } + }; + + _proto.link = function link(src) { + var cap = this.rules.inline.link.exec(src); + + if (cap) { + var lastParenIndex = findClosingBracket$1(cap[2], '()'); + + if (lastParenIndex > -1) { + var start = cap[0].indexOf('!') === 0 ? 5 : 4; + var linkLen = start + cap[1].length + lastParenIndex; + cap[2] = cap[2].substring(0, lastParenIndex); + cap[0] = cap[0].substring(0, linkLen).trim(); + cap[3] = ''; + } + + var href = cap[2]; + var title = ''; + + if (this.options.pedantic) { + var link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); + + if (link) { + href = link[1]; + title = link[3]; + } else { + title = ''; + } + } else { + title = cap[3] ? cap[3].slice(1, -1) : ''; + } + + href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); + var token = outputLink(cap, { + href: href ? href.replace(this.rules.inline._escapes, '$1') : href, + title: title ? title.replace(this.rules.inline._escapes, '$1') : title + }, cap[0]); + return token; + } + }; + + _proto.reflink = function reflink(src, links) { + var cap; + + if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { + var link = (cap[2] || cap[1]).replace(/\s+/g, ' '); + link = links[link.toLowerCase()]; + + if (!link || !link.href) { + var text = cap[0].charAt(0); + return { + type: 'text', + raw: text, + text: text + }; + } + + var token = outputLink(cap, link, cap[0]); + return token; + } + }; + + _proto.strong = function strong(src) { + var cap = this.rules.inline.strong.exec(src); + + if (cap) { + return { + type: 'strong', + raw: cap[0], + text: cap[4] || cap[3] || cap[2] || cap[1] + }; + } + }; + + _proto.em = function em(src) { + var cap = this.rules.inline.em.exec(src); + + if (cap) { + return { + type: 'em', + raw: cap[0], + text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1] + }; + } + }; + + _proto.codespan = function codespan(src) { + var cap = this.rules.inline.code.exec(src); + + if (cap) { + return { + type: 'codespan', + raw: cap[0], + text: _escape(cap[2].trim(), true) + }; + } + }; + + _proto.br = function br(src) { + var cap = this.rules.inline.br.exec(src); + + if (cap) { + return { + type: 'br', + raw: cap[0] + }; + } + }; + + _proto.del = function del(src) { + var cap = this.rules.inline.del.exec(src); + + if (cap) { + return { + type: 'del', + raw: cap[0], + text: cap[1] + }; + } + }; + + _proto.autolink = function autolink(src, mangle) { + var cap = this.rules.inline.autolink.exec(src); + + if (cap) { + var text, href; + + if (cap[2] === '@') { + text = _escape(this.options.mangle ? mangle(cap[1]) : cap[1]); + href = 'mailto:' + text; + } else { + text = _escape(cap[1]); + href = text; + } + + return { + type: 'link', + raw: cap[0], + text: text, + href: href, + tokens: [{ + type: 'text', + raw: text, + text: text + }] + }; + } + }; + + _proto.url = function url(src, mangle) { + var cap; + + if (cap = this.rules.inline.url.exec(src)) { + var text, href; + + if (cap[2] === '@') { + text = _escape(this.options.mangle ? mangle(cap[0]) : cap[0]); + href = 'mailto:' + text; + } else { + // do extended autolink path validation + var prevCapZero; + + do { + prevCapZero = cap[0]; + cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; + } while (prevCapZero !== cap[0]); + + text = _escape(cap[0]); + + if (cap[1] === 'www.') { + href = 'http://' + text; + } else { + href = text; + } + } + + return { + type: 'link', + raw: cap[0], + text: text, + href: href, + tokens: [{ + type: 'text', + raw: text, + text: text + }] + }; + } + }; + + _proto.inlineText = function inlineText(src, inRawBlock, smartypants) { + var cap = this.rules.inline.text.exec(src); + + if (cap) { + var text; + + if (inRawBlock) { + text = this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : _escape(cap[0]) : cap[0]; + } else { + text = _escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]); + } + + return { + type: 'text', + raw: cap[0], + text: text + }; + } + }; + + return Tokenizer; + }(); + var noopTest$1 = helpers.noopTest, edit$1 = helpers.edit, merge$1 = helpers.merge; @@ -509,42 +1121,81 @@ inline: inline }; - var defaults$1 = defaults.defaults; + var defaults$2 = defaults.defaults; var block$1 = rules.block, inline$1 = rules.inline; - var rtrim$1 = helpers.rtrim, - splitCells$1 = helpers.splitCells, - escape$1 = helpers.escape, - findClosingBracket$1 = helpers.findClosingBracket; + /** + * smartypants text replacement + */ + + function smartypants(text) { + return text // em-dashes + .replace(/---/g, "\u2014") // en-dashes + .replace(/--/g, "\u2013") // opening singles + .replace(/(^|[-\u2014/(\[{"\s])'/g, "$1\u2018") // closing singles & apostrophes + .replace(/'/g, "\u2019") // opening doubles + .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, "$1\u201C") // closing doubles + .replace(/"/g, "\u201D") // ellipses + .replace(/\.{3}/g, "\u2026"); + } + /** + * mangle email addresses + */ + + + function mangle(text) { + var out = '', + i, + ch; + var l = text.length; + + for (i = 0; i < l; i++) { + ch = text.charCodeAt(i); + + if (Math.random() > 0.5) { + ch = 'x' + ch.toString(16); + } + + out += '&#' + ch + ';'; + } + + return out; + } /** * Block Lexer */ + var Lexer_1 = /*#__PURE__*/function () { function Lexer(options) { this.tokens = []; this.tokens.links = Object.create(null); - this.options = options || defaults$1; - this.rules = { + this.options = options || defaults$2; + this.options.tokenizer = this.options.tokenizer || new Tokenizer_1(); + this.tokenizer = this.options.tokenizer; + this.tokenizer.options = this.options; + var rules = { block: block$1.normal, inline: inline$1.normal }; if (this.options.pedantic) { - this.rules.block = block$1.pedantic; - this.rules.inline = inline$1.pedantic; + rules.block = block$1.pedantic; + rules.inline = inline$1.pedantic; } else if (this.options.gfm) { - this.rules.block = block$1.gfm; + rules.block = block$1.gfm; if (this.options.breaks) { - this.rules.inline = inline$1.breaks; + rules.inline = inline$1.breaks; } else { - this.rules.inline = inline$1.gfm; + rules.inline = inline$1.gfm; } } + + this.tokenizer.rules = rules; } /** - * Expose Block Rules + * Expose Rules */ @@ -564,7 +1215,7 @@ _proto.lex = function lex(src) { src = src.replace(/\r\n|\r/g, '\n').replace(/\t/g, ' '); - this.blockTokens(src, this.tokens); + this.blockTokens(src, this.tokens, true); this.inline(this.tokens); return this.tokens; } @@ -574,242 +1225,100 @@ ; _proto.blockTokens = function blockTokens(src, tokens, top) { + if (tokens === void 0) { + tokens = []; + } + if (top === void 0) { top = true; } src = src.replace(/^ +$/gm, ''); - var next, loose, cap, bull, b, item, list, space, i, tag, l, isordered, istask, ischecked, lastToken, addBack, raw; + var token, i, l; while (src) { // newline - if (cap = this.rules.block.newline.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - - if (cap[0].length > 1) { - tokens.push({ - type: 'space', - raw: raw - }); + if (token = this.tokenizer.space(src)) { + src = src.substring(token.raw.length); + + if (token.type) { + tokens.push(token); } - } // code + continue; + } // code - if (cap = this.rules.block.code.exec(src)) { - lastToken = tokens[tokens.length - 1]; - src = src.substring(cap[0].length); - raw = cap[0]; // An indented code block cannot interrupt a paragraph. - - if (lastToken && lastToken.type === 'paragraph') { - lastToken.text += '\n' + cap[0].trimRight(); - lastToken.raw += '\n' + raw; - } else { - cap = cap[0].replace(/^ {4}/gm, ''); - tokens.push({ - type: 'code', - raw: raw, - codeBlockStyle: 'indented', - text: !this.options.pedantic ? rtrim$1(cap, '\n') : cap - }); - } + if (token = this.tokenizer.code(src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // fences - if (cap = this.rules.block.fences.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'code', - raw: raw, - lang: cap[2] ? cap[2].trim() : cap[2], - text: cap[3] || '' - }); + if (token = this.tokenizer.fences(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // heading - if (cap = this.rules.block.heading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw: raw, - depth: cap[1].length, - text: cap[2] - }); + if (token = this.tokenizer.heading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // table no leading pipe (gfm) - if (cap = this.rules.block.nptable.exec(src)) { - item = { - type: 'table', - header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - raw = cap[0]; - item.raw = raw; - l = item.align.length; - - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - - for (i = 0; i < l; i++) { - item.cells[i] = splitCells$1(item.cells[i], item.header.length); - } - - tokens.push(item); - continue; - } + if (token = this.tokenizer.nptable(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // hr - if (cap = this.rules.block.hr.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'hr', - raw: raw - }); + if (token = this.tokenizer.hr(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // blockquote - if (cap = this.rules.block.blockquote.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - cap = cap[0].replace(/^ *> ?/gm, ''); - tokens.push({ - type: 'blockquote', - raw: raw, - tokens: this.blockTokens(cap, [], top) - }); + if (token = this.tokenizer.blockquote(src)) { + src = src.substring(token.raw.length); + token.tokens = this.blockTokens(token.text, [], top); + tokens.push(token); continue; } // list - if (cap = this.rules.block.list.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - bull = cap[2]; - isordered = bull.length > 1; - list = { - type: 'list', - raw: raw, - ordered: isordered, - start: isordered ? +bull : '', - loose: false, - items: [] - }; - tokens.push(list); // Get each top-level item. - - cap = cap[0].match(this.rules.block.item); - next = false; - l = cap.length; + if (token = this.tokenizer.list(src)) { + src = src.substring(token.raw.length); + l = token.items.length; for (i = 0; i < l; i++) { - item = cap[i]; - raw = item.trim(); // Remove the list item's bullet - // so it is seen as the next token. - - space = item.length; - item = item.replace(/^ *([*+-]|\d+\.) */, ''); // Outdent whatever the - // list item contains. Hacky. - - if (~item.indexOf('\n ')) { - space -= item.length; - item = !this.options.pedantic ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') : item.replace(/^ {1,4}/gm, ''); - } // Determine whether the next list item belongs here. - // Backpedal if it does not belong in this list. - - - if (i !== l - 1) { - b = block$1.bullet.exec(cap[i + 1])[0]; - - if (bull.length > 1 ? b.length === 1 : b.length > 1 || this.options.smartLists && b !== bull) { - addBack = cap.slice(i + 1).join('\n'); - src = addBack + src; - list.raw = list.raw.substring(list.raw.length - addBack.length); - i = l - 1; - } - } // Determine whether item is loose or not. - // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ - // for discount behavior. - - - loose = next || /\n\n(?!\s*$)/.test(item); - - if (i !== l - 1) { - next = item.charAt(item.length - 1) === '\n'; - if (!loose) loose = next; - } - - if (loose) { - list.loose = true; - } // Check for task list items - - - istask = /^\[[ xX]\] /.test(item); - ischecked = undefined; - - if (istask) { - ischecked = item[1] !== ' '; - item = item.replace(/^\[[ xX]\] +/, ''); - } - - list.items.push({ - raw: raw, - task: istask, - checked: ischecked, - loose: loose, - tokens: this.blockTokens(item, [], false) - }); + token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); } + tokens.push(token); continue; } // html - if (cap = this.rules.block.html.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: this.options.sanitize ? 'paragraph' : 'html', - raw: raw, - pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), - text: this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0]) : cap[0] - }); + if (token = this.tokenizer.html(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // def - if (top && (cap = this.rules.block.def.exec(src))) { - src = src.substring(cap[0].length); - if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); - tag = cap[1].toLowerCase().replace(/\s+/g, ' '); + if (top && (token = this.tokenizer.def(src))) { + src = src.substring(token.raw.length); - if (!this.tokens.links[tag]) { - this.tokens.links[tag] = { - href: cap[2], - title: cap[3] + if (!this.tokens.links[token.tag]) { + this.tokens.links[token.tag] = { + href: token.href, + title: token.title }; } @@ -817,77 +1326,30 @@ } // table (gfm) - if (cap = this.rules.block.table.exec(src)) { - item = { - type: 'table', - header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - item.raw = cap[0]; - l = item.align.length; - - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - - for (i = 0; i < l; i++) { - item.cells[i] = splitCells$1(item.cells[i].replace(/^ *\| *| *\| *$/g, ''), item.header.length); - } - - tokens.push(item); - continue; - } + if (token = this.tokenizer.table(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // lheading - if (cap = this.rules.block.lheading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw: raw, - depth: cap[2].charAt(0) === '=' ? 1 : 2, - text: cap[1] - }); + if (token = this.tokenizer.lheading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // top-level paragraph - if (top && (cap = this.rules.block.paragraph.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'paragraph', - raw: raw, - text: cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1] - }); + if (top && (token = this.tokenizer.paragraph(src))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.block.text.exec(src)) { - // Top-level should never reach here. - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'text', - raw: raw, - text: cap[0] - }); + if (token = this.tokenizer.text(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -896,6 +1358,7 @@ if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } @@ -978,282 +1441,118 @@ */ ; - _proto.inlineTokens = function inlineTokens(src, tokens) { - var out = '', - link, - text, - newTokens, - href, - title, - cap, - prevCapZero, - lastParenIndex, - start, - linkLen, - raw; + _proto.inlineTokens = function inlineTokens(src, tokens, inLink, inRawBlock) { + if (tokens === void 0) { + tokens = []; + } + + if (inLink === void 0) { + inLink = false; + } + + if (inRawBlock === void 0) { + inRawBlock = false; + } + + var token; while (src) { // escape - if (cap = this.rules.inline.escape.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape$1(cap[1]); - out += text; - tokens.push({ - type: 'escape', - raw: raw, - text: text - }); + if (token = this.tokenizer.escape(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // tag - if (cap = this.rules.inline.tag.exec(src)) { - if (!this.inLink && /^/i.test(cap[0])) { - this.inLink = false; - } - - if (!this.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = true; - } else if (this.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = false; - } - - src = src.substring(cap[0].length); - raw = cap[0]; - text = this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0]) : cap[0]; - tokens.push({ - type: this.options.sanitize ? 'text' : 'html', - raw: raw, - text: text - }); - out += text; + if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { + src = src.substring(token.raw.length); + inLink = token.inLink; + inRawBlock = token.inRawBlock; + tokens.push(token); continue; } // link - if (cap = this.rules.inline.link.exec(src)) { - lastParenIndex = findClosingBracket$1(cap[2], '()'); + if (token = this.tokenizer.link(src)) { + src = src.substring(token.raw.length); - if (lastParenIndex > -1) { - start = cap[0].indexOf('!') === 0 ? 5 : 4; - linkLen = start + cap[1].length + lastParenIndex; - cap[2] = cap[2].substring(0, lastParenIndex); - cap[0] = cap[0].substring(0, linkLen).trim(); - cap[3] = ''; + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - src = src.substring(cap[0].length); - raw = cap[0]; - this.inLink = true; - href = cap[2]; - - if (this.options.pedantic) { - link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); - - if (link) { - href = link[1]; - title = link[3]; - } else { - title = ''; - } - } else { - title = cap[3] ? cap[3].slice(1, -1) : ''; - } - - href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); - out += this.outputLink(cap, { - href: this.escapes(href), - title: this.escapes(title) - }, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // reflink, nolink - if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - link = (cap[2] || cap[1]).replace(/\s+/g, ' '); - link = this.tokens.links[link.toLowerCase()]; + if (token = this.tokenizer.reflink(src, this.tokens.links)) { + src = src.substring(token.raw.length); - if (!link || !link.href) { - text = cap[0].charAt(0); - out += text; - tokens.push({ - type: 'text', - raw: text, - text: text - }); - src = cap[0].substring(1) + src; - continue; + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - this.inLink = true; - out += this.outputLink(cap, link, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // strong - if (cap = this.rules.inline.strong.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[4] || cap[3] || cap[2] || cap[1], newTokens); - tokens.push({ - type: 'strong', - raw: raw, - text: text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.strong(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // em - if (cap = this.rules.inline.em.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1], newTokens); - tokens.push({ - type: 'em', - raw: raw, - text: text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.em(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // code - if (cap = this.rules.inline.code.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape$1(cap[2].trim(), true); - tokens.push({ - type: 'codespan', - raw: raw, - text: text - }); - out += text; + if (token = this.tokenizer.codespan(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // br - if (cap = this.rules.inline.br.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'br', - raw: raw - }); - out += '\n'; + if (token = this.tokenizer.br(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // del (gfm) - if (cap = this.rules.inline.del.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'del', - raw: raw, - text: text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.del(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // autolink - if (cap = this.rules.inline.autolink.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - - if (cap[2] === '@') { - text = escape$1(this.options.mangle ? this.mangle(cap[1]) : cap[1]); - href = 'mailto:' + text; - } else { - text = escape$1(cap[1]); - href = text; - } - - tokens.push({ - type: 'link', - raw: raw, - text: text, - href: href, - tokens: [{ - type: 'text', - raw: text, - text: text - }] - }); - out += text; + if (token = this.tokenizer.autolink(src, mangle)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // url (gfm) - if (!this.inLink && (cap = this.rules.inline.url.exec(src))) { - if (cap[2] === '@') { - text = escape$1(this.options.mangle ? this.mangle(cap[0]) : cap[0]); - href = 'mailto:' + text; - } else { - // do extended autolink path validation - do { - prevCapZero = cap[0]; - cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; - } while (prevCapZero !== cap[0]); - - text = escape$1(cap[0]); - - if (cap[1] === 'www.') { - href = 'http://' + text; - } else { - href = text; - } - } - - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'link', - raw: raw, - text: text, - href: href, - tokens: [{ - type: 'text', - raw: text, - text: text - }] - }); - out += text; + if (!inLink && (token = this.tokenizer.url(src, mangle))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.inline.text.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - - if (this.inRawBlock) { - text = this.options.sanitize ? this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0]) : cap[0]; - } else { - text = escape$1(this.options.smartypants ? this.smartypants(cap[0]) : cap[0]); - } - - tokens.push({ - type: 'text', - raw: raw, - text: text - }); - out += text; + if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -1262,89 +1561,14 @@ if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } } } - return out; - }; - - _proto.escapes = function escapes(text) { - return text ? text.replace(inline$1._escapes, '$1') : text; - } - /** - * tokenize Link - */ - ; - - _proto.outputLink = function outputLink(cap, link, tokens, raw) { - var href = link.href; - var title = link.title ? escape$1(link.title) : null; - var newTokens = tokens ? [] : null; - - if (cap[0].charAt(0) !== '!') { - var text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'link', - raw: raw, - text: text, - href: href, - title: title, - tokens: newTokens - }); - return text; - } else { - var _text = escape$1(cap[1]); - - tokens.push({ - type: 'image', - raw: raw, - text: _text, - href: href, - title: title - }); - return _text; - } - } - /** - * Smartypants Transformations - */ - ; - - _proto.smartypants = function smartypants(text) { - return text // em-dashes - .replace(/---/g, "\u2014") // en-dashes - .replace(/--/g, "\u2013") // opening singles - .replace(/(^|[-\u2014/(\[{"\s])'/g, "$1\u2018") // closing singles & apostrophes - .replace(/'/g, "\u2019") // opening doubles - .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, "$1\u201C") // closing doubles - .replace(/"/g, "\u201D") // ellipses - .replace(/\.{3}/g, "\u2026"); - } - /** - * Mangle Links - */ - ; - - _proto.mangle = function mangle(text) { - var out = '', - i, - ch; - var l = text.length; - - for (i = 0; i < l; i++) { - ch = text.charCodeAt(i); - - if (Math.random() > 0.5) { - ch = 'x' + ch.toString(16); - } - - out += '&#' + ch + ';'; - } - - return out; + return tokens; }; _createClass(Lexer, null, [{ @@ -1360,16 +1584,16 @@ return Lexer; }(); - var defaults$2 = defaults.defaults; + var defaults$3 = defaults.defaults; var cleanUrl$1 = helpers.cleanUrl, - escape$2 = helpers.escape; + escape$1 = helpers.escape; /** * Renderer */ var Renderer_1 = /*#__PURE__*/function () { function Renderer(options) { - this.options = options || defaults$2; + this.options = options || defaults$3; } var _proto = Renderer.prototype; @@ -1387,10 +1611,10 @@ } if (!lang) { - return '
' + (escaped ? _code : escape$2(_code, true)) + '
'; + return '
' + (escaped ? _code : escape$1(_code, true)) + '
'; } - return '
' + (escaped ? _code : escape$2(_code, true)) + '
\n'; + return '
' + (escaped ? _code : escape$1(_code, true)) + '
\n'; }; _proto.blockquote = function blockquote(quote) { @@ -1475,7 +1699,7 @@ return text; } - var out = '
An error occurred:

' + escape$3(e.message + '', true) + '
'; + return '

An error occurred:

' + escape$2(e.message + '', true) + '
'; } throw e; @@ -2029,7 +2253,7 @@ }; marked.getDefaults = getDefaults; - marked.defaults = defaults$4; + marked.defaults = defaults$5; /** * Expose */ @@ -2040,6 +2264,7 @@ marked.TextRenderer = TextRenderer_1; marked.Lexer = Lexer_1; marked.lexer = Lexer_1.lex; + marked.Tokenizer = Tokenizer_1; marked.Slugger = Slugger_1; marked.parse = marked; var marked_1 = marked; diff --git a/marked.min.js b/marked.min.js index ba8cc00aee..8228059297 100644 --- a/marked.min.js +++ b/marked.min.js @@ -3,4 +3,4 @@ * Copyright (c) 2011-2020, Christopher Jeffrey. (MIT Licensed) * https://github.com/markedjs/marked */ -!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e=e||self).marked=t()}(this,function(){"use strict";function i(e,t){for(var n=0;n"']/),s=/[&<>"']/g,l=/[<>"']|&(?!#?\w+;)/,a=/[<>"']|&(?!#?\w+;)/g,o={"&":"&","<":"<",">":">",'"':""","'":"'"};var c=/&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi;function h(e){return e.replace(c,function(e,t){return"colon"===(t=t.toLowerCase())?":":"#"===t.charAt(0)?"x"===t.charAt(1)?String.fromCharCode(parseInt(t.substring(2),16)):String.fromCharCode(+t.substring(1)):""})}var u=/(^|[^\[])\^/g;var p=/[^\w:]/g,g=/^$|^[a-z][a-z0-9+.-]*:|^[?#]/i;var f={},d=/^[^:]+:\/*[^/]*$/,b=/^([^:]+:)[\s\S]*$/,k=/^([^:]+:\/*[^/]*)[\s\S]*$/;function m(e,t){f[" "+e]||(d.test(e)?f[" "+e]=e+"/":f[" "+e]=x(e,"/",!0));var n=-1===(e=f[" "+e]).indexOf(":");return"//"===t.substring(0,2)?n?t:e.replace(b,"$1")+t:"/"===t.charAt(0)?n?t:e.replace(k,"$1")+t:e+t}function x(e,t,n){var r=e.length;if(0===r)return"";for(var s=0;st)n.splice(t);else for(;n.length ?(paragraph|[^\n]*)(?:\n|$))+/,list:/^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,html:"^ {0,3}(?:<(script|pre|style)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)|comment[^\\n]*(\\n+|$)|<\\?[\\s\\S]*?\\?>\\n*|\\n*|\\n*|)[\\s\\S]*?(?:\\n{2,}|$)|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$))",def:/^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,nptable:R,table:R,lheading:/^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,_paragraph:/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/,text:/^[^\n]+/,_label:/(?!\s*\])(?:\\[\[\]]|[^\[\]])+/,_title:/(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/};I.def=Z(I.def).replace("label",I._label).replace("title",I._title).getRegex(),I.bullet=/(?:[*+-]|\d{1,9}\.)/,I.item=/^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/,I.item=Z(I.item,"gm").replace(/bull/g,I.bullet).getRegex(),I.list=Z(I.list).replace(/bull/g,I.bullet).replace("hr","\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))").replace("def","\\n+(?="+I.def.source+")").getRegex(),I._tag="address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul",I._comment=//,I.html=Z(I.html,"i").replace("comment",I._comment).replace("tag",I._tag).replace("attribute",/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/).getRegex(),I.paragraph=Z(I._paragraph).replace("hr",I.hr).replace("heading"," {0,3}#{1,6} ").replace("|lheading","").replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",I._tag).getRegex(),I.blockquote=Z(I.blockquote).replace("paragraph",I.paragraph).getRegex(),I.normal=q({},I),I.gfm=q({},I.normal,{nptable:"^ *([^|\\n ].*\\|.*)\\n *([-:]+ *\\|[-| :]*)(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)",table:"^ *\\|(.+)\\n *\\|?( *[-:]+[-| :]*)(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)"}),I.gfm.nptable=Z(I.gfm.nptable).replace("hr",I.hr).replace("heading"," {0,3}#{1,6} ").replace("blockquote"," {0,3}>").replace("code"," {4}[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",I._tag).getRegex(),I.gfm.table=Z(I.gfm.table).replace("hr",I.hr).replace("heading"," {0,3}#{1,6} ").replace("blockquote"," {0,3}>").replace("code"," {4}[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",I._tag).getRegex(),I.pedantic=q({},I.normal,{html:Z("^ *(?:comment *(?:\\n|\\s*$)|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))").replace("comment",I._comment).replace(/tag/g,"(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b").getRegex(),def:/^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,heading:/^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,fences:R,paragraph:Z(I.normal._paragraph).replace("hr",I.hr).replace("heading"," *#{1,6} *[^\n]").replace("lheading",I.lheading).replace("blockquote"," {0,3}>").replace("|fences","").replace("|list","").replace("|html","").getRegex()});var L={escape:/^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,autolink:/^<(scheme:[^\s\x00-\x1f<>]*|email)>/,url:R,tag:"^comment|^|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>|^<\\?[\\s\\S]*?\\?>|^|^",link:/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,reflink:/^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,nolink:/^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,strong:/^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,em:/^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,code:/^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,br:/^( {2,}|\\)\n(?!\s*$)/,del:R,text:/^(`+|[^`])(?:[\s\S]*?(?:(?=[\\?@\\[^_{|}~"};L.em=Z(L.em).replace(/punctuation/g,L._punctuation).getRegex(),L._escapes=/\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g,L._scheme=/[a-zA-Z][a-zA-Z0-9+.-]{1,31}/,L._email=/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/,L.autolink=Z(L.autolink).replace("scheme",L._scheme).replace("email",L._email).getRegex(),L._attribute=/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/,L.tag=Z(L.tag).replace("comment",I._comment).replace("attribute",L._attribute).getRegex(),L._label=/(?:\[[^\[\]]*\]|\\.|`[^`]*`|[^\[\]\\`])*?/,L._href=/<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/,L._title=/"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/,L.link=Z(L.link).replace("label",L._label).replace("href",L._href).replace("title",L._title).getRegex(),L.reflink=Z(L.reflink).replace("label",L._label).getRegex(),L.normal=q({},L),L.pedantic=q({},L.normal,{strong:/^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,em:/^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,link:Z(/^!?\[(label)\]\((.*?)\)/).replace("label",L._label).getRegex(),reflink:Z(/^!?\[(label)\]\s*\[([^\]]*)\]/).replace("label",L._label).getRegex()}),L.gfm=q({},L.normal,{escape:Z(L.escape).replace("])","~|])").getRegex(),_extended_email:/[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,url:/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,_backpedal:/(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,del:/^~+(?=\S)([\s\S]*?\S)~+/,text:/^(`+|[^`])(?:[\s\S]*?(?:(?=[\\ ?/gm,""),t.push({type:"blockquote",raw:x,tokens:this.blockTokens(i,[],n)});else if(i=this.rules.block.list.exec(e))for(e=e.substring(i[0].length),c={type:"list",raw:x=i[0],ordered:f=1<(l=i[2]).length,start:f?+l:"",loose:!1,items:[]},t.push(c),r=!1,g=(i=i[0].match(this.rules.block.item)).length,u=0;u/i.test(a[0])&&(this.inLink=!1),!this.inRawBlock&&/^<(pre|code|kbd|script)(\s|>)/i.test(a[0])?this.inRawBlock=!0:this.inRawBlock&&/^<\/(pre|code|kbd|script)(\s|>)/i.test(a[0])&&(this.inRawBlock=!1),e=e.substring(a[0].length),u=a[0],r=this.options.sanitize?this.options.sanitizer?this.options.sanitizer(a[0]):P(a[0]):a[0],t.push({type:this.options.sanitize?"text":"html",raw:u,text:r}),p+=r;else if(a=this.rules.inline.link.exec(e))-1<(c=B(a[2],"()"))&&(h=(0===a[0].indexOf("!")?5:4)+a[1].length+c,a[2]=a[2].substring(0,c),a[0]=a[0].substring(0,h).trim(),a[3]=""),e=e.substring(a[0].length),u=a[0],this.inLink=!0,i=a[2],l=this.options.pedantic?(n=/^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(i))?(i=n[1],n[3]):"":a[3]?a[3].slice(1,-1):"",i=i.trim().replace(/^<([\s\S]*)>$/,"$1"),p+=this.outputLink(a,{href:this.escapes(i),title:this.escapes(l)},t,u),this.inLink=!1;else if((a=this.rules.inline.reflink.exec(e))||(a=this.rules.inline.nolink.exec(e))){if(e=e.substring(a[0].length),u=a[0],n=(a[2]||a[1]).replace(/\s+/g," "),!(n=this.tokens.links[n.toLowerCase()])||!n.href){p+=r=a[0].charAt(0),t.push({type:"text",raw:r,text:r}),e=a[0].substring(1)+e;continue}this.inLink=!0,p+=this.outputLink(a,n,t,u),this.inLink=!1}else if(a=this.rules.inline.strong.exec(e))e=e.substring(a[0].length),u=a[0],s=t?[]:null,r=this.inlineTokens(a[4]||a[3]||a[2]||a[1],s),t.push({type:"strong",raw:u,text:r,tokens:s}),p+=r;else if(a=this.rules.inline.em.exec(e))e=e.substring(a[0].length),u=a[0],s=t?[]:null,r=this.inlineTokens(a[6]||a[5]||a[4]||a[3]||a[2]||a[1],s),t.push({type:"em",raw:u,text:r,tokens:s}),p+=r;else if(a=this.rules.inline.code.exec(e))e=e.substring(a[0].length),u=a[0],r=P(a[2].trim(),!0),t.push({type:"codespan",raw:u,text:r}),p+=r;else if(a=this.rules.inline.br.exec(e))e=e.substring(a[0].length),u=a[0],t.push({type:"br",raw:u}),p+="\n";else if(a=this.rules.inline.del.exec(e))e=e.substring(a[0].length),u=a[0],s=t?[]:null,r=this.inlineTokens(a[1],s),t.push({type:"del",raw:u,text:r,tokens:s}),p+=r;else if(a=this.rules.inline.autolink.exec(e))e=e.substring(a[0].length),u=a[0],i="@"===a[2]?"mailto:"+(r=P(this.options.mangle?this.mangle(a[1]):a[1])):r=P(a[1]),t.push({type:"link",raw:u,text:r,href:i,tokens:[{type:"text",raw:r,text:r}]}),p+=r;else if(this.inLink||!(a=this.rules.inline.url.exec(e))){if(a=this.rules.inline.text.exec(e))e=e.substring(a[0].length),u=a[0],r=this.inRawBlock?this.options.sanitize?this.options.sanitizer?this.options.sanitizer(a[0]):P(a[0]):a[0]:P(this.options.smartypants?this.smartypants(a[0]):a[0]),t.push({type:"text",raw:u,text:r}),p+=r;else if(e){var g="Infinite loop on byte: "+e.charCodeAt(0);if(!this.options.silent)throw new Error(g);console.error(g)}}else{if("@"===a[2])i="mailto:"+(r=P(this.options.mangle?this.mangle(a[0]):a[0]));else{for(;o=a[0],a[0]=this.rules.inline._backpedal.exec(a[0])[0],o!==a[0];);r=P(a[0]),i="www."===a[1]?"http://"+r:r}e=e.substring(a[0].length),u=a[0],t.push({type:"link",raw:u,text:r,href:i,tokens:[{type:"text",raw:r,text:r}]}),p+=r}return p},s.escapes=function(e){return e?e.replace(D._escapes,"$1"):e},s.outputLink=function(e,t,n,r){var s=t.href,i=t.title?P(t.title):null,l=n?[]:null;if("!"!==e[0].charAt(0)){var a=this.inlineTokens(e[1],l);return n.push({type:"link",raw:r,text:a,href:s,title:i,tokens:l}),a}var o=P(e[1]);return n.push({type:"image",raw:r,text:o,href:s,title:i}),o},s.smartypants=function(e){return e.replace(/---/g,"—").replace(/--/g,"–").replace(/(^|[-\u2014/(\[{"\s])'/g,"$1‘").replace(/'/g,"’").replace(/(^|[-\u2014/(\[{\u2018\s])"/g,"$1“").replace(/"/g,"”").replace(/\.{3}/g,"…")},s.mangle=function(e){var t,n,r="",s=e.length;for(t=0;t'+(n?e:X(e,!0))+"\n":"
"+(n?e:X(e,!0))+"
"},t.blockquote=function(e){return"
\n"+e+"
\n"},t.html=function(e){return e},t.heading=function(e,t,n,r){return this.options.headerIds?"'+e+"\n":""+e+"\n"},t.hr=function(){return this.options.xhtml?"
\n":"
\n"},t.list=function(e,t,n){var r=t?"ol":"ul";return"<"+r+(t&&1!==n?' start="'+n+'"':"")+">\n"+e+"\n"},t.listitem=function(e){return"
  • "+e+"
  • \n"},t.checkbox=function(e){return" "},t.paragraph=function(e){return"

    "+e+"

    \n"},t.table=function(e,t){return"\n\n"+e+"\n"+(t=t&&""+t+"")+"
    \n"},t.tablerow=function(e){return"\n"+e+"\n"},t.tablecell=function(e,t){var n=t.header?"th":"td";return(t.align?"<"+n+' align="'+t.align+'">':"<"+n+">")+e+"\n"},t.strong=function(e){return""+e+""},t.em=function(e){return""+e+""},t.codespan=function(e){return""+e+""},t.br=function(){return this.options.xhtml?"
    ":"
    "},t.del=function(e){return""+e+""},t.link=function(e,t,n){if(null===(e=N(this.options.sanitize,this.options.baseUrl,e)))return n;var r='
    "},t.image=function(e,t,n){if(null===(e=N(this.options.sanitize,this.options.baseUrl,e)))return n;var r=''+n+'":">"},t.text=function(e){return e},e}(),M=function(){function e(){}var t=e.prototype;return t.strong=function(e){return e},t.em=function(e){return e},t.codespan=function(e){return e},t.del=function(e){return e},t.html=function(e){return e},t.text=function(e){return e},t.link=function(e,t,n){return""+n},t.image=function(e,t,n){return""+n},t.br=function(){return""},e}(),V=function(){function e(){this.seen={}}return e.prototype.slug=function(e){var t=e.toLowerCase().trim().replace(/<[!\/a-z].*?>/gi,"").replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,./:;<=>?@[\]^`{|}~]/g,"").replace(/\s/g,"-");if(this.seen.hasOwnProperty(t))for(var n=t;this.seen[n]++,t=n+"-"+this.seen[n],this.seen.hasOwnProperty(t););return this.seen[t]=0,t},e}(),H=t.defaults,J=_,K=function(){function n(e){this.options=e||H,this.options.renderer=this.options.renderer||new G,this.renderer=this.options.renderer,this.renderer.options=this.options,this.textRenderer=new M,this.slugger=new V}n.parse=function(e,t){return new n(t).parse(e)};var e=n.prototype;return e.parse=function(e,t){void 0===t&&(t=!0);var n,r,s,i,l,a,o,c,h,u,p,g,f,d,b,k,m,x,w="",_=e.length;for(n=0;n<_;n++)switch((u=e[n]).type){case"space":continue;case"hr":w+=this.renderer.hr();continue;case"heading":w+=this.renderer.heading(this.parseInline(u.tokens),u.depth,J(this.parseInline(u.tokens,this.textRenderer)),this.slugger);continue;case"code":w+=this.renderer.code(u.text,u.lang,u.escaped);continue;case"table":for(o=c="",i=u.header.length,r=0;rAn error occurred:

    "+Y(e.message+"",!0)+"
    ";throw e}}return re.options=re.setOptions=function(e){return Q(re.defaults,e),te(re.defaults),re},re.getDefaults=ee,re.defaults=ne,re.Parser=K,re.parser=K.parse,re.Renderer=G,re.TextRenderer=M,re.Lexer=U,re.lexer=U.lex,re.Slugger=V,re.parse=re}); \ No newline at end of file +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e=e||self).marked=t()}(this,function(){"use strict";function s(e,t){for(var n=0;n"']/),i=/[&<>"']/g,l=/[<>"']|&(?!#?\w+;)/,a=/[<>"']|&(?!#?\w+;)/g,o={"&":"&","<":"<",">":">",'"':""","'":"'"};var c=/&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/gi;function h(e){return e.replace(c,function(e,t){return"colon"===(t=t.toLowerCase())?":":"#"===t.charAt(0)?"x"===t.charAt(1)?String.fromCharCode(parseInt(t.substring(2),16)):String.fromCharCode(+t.substring(1)):""})}var u=/(^|[^\[])\^/g;var p=/[^\w:]/g,g=/^$|^[a-z][a-z0-9+.-]*:|^[?#]/i;var f={},d=/^[^:]+:\/*[^/]*$/,k=/^([^:]+:)[\s\S]*$/,b=/^([^:]+:\/*[^/]*)[\s\S]*$/;function m(e,t){f[" "+e]||(d.test(e)?f[" "+e]=e+"/":f[" "+e]=x(e,"/",!0));var n=-1===(e=f[" "+e]).indexOf(":");return"//"===t.substring(0,2)?n?t:e.replace(k,"$1")+t:"/"===t.charAt(0)?n?t:e.replace(b,"$1")+t:e+t}function x(e,t,n){var r=e.length;if(0===r)return"";for(var i=0;it)n.splice(t);else for(;n.length ?/gm,"");return{type:"blockquote",raw:t[0],text:n}}},t.list=function(e){var t=this.rules.block.list.exec(e);if(t){for(var n,r,i,s,l,a,o,c=t[0],h=t[2],u=1/i.test(r[0])&&(t=!1),!n&&/^<(pre|code|kbd|script)(\s|>)/i.test(r[0])?n=!0:n&&/^<\/(pre|code|kbd|script)(\s|>)/i.test(r[0])&&(n=!1),{type:this.options.sanitize?"text":"html",raw:r[0],inLink:t,inRawBlock:n,text:this.options.sanitize?this.options.sanitizer?this.options.sanitizer(r[0]):C(r[0]):r[0]}},t.link=function(e){var t=this.rules.inline.link.exec(e);if(t){var n=O(t[2],"()");if(-1$/,"$1"))?i.replace(this.rules.inline._escapes,"$1"):i,title:s?s.replace(this.rules.inline._escapes,"$1"):s},t[0])}},t.reflink=function(e,t){var n;if((n=this.rules.inline.reflink.exec(e))||(n=this.rules.inline.nolink.exec(e))){var r=(n[2]||n[1]).replace(/\s+/g," ");if((r=t[r.toLowerCase()])&&r.href)return D(n,r,n[0]);var i=n[0].charAt(0);return{type:"text",raw:i,text:i}}},t.strong=function(e){var t=this.rules.inline.strong.exec(e);if(t)return{type:"strong",raw:t[0],text:t[4]||t[3]||t[2]||t[1]}},t.em=function(e){var t=this.rules.inline.em.exec(e);if(t)return{type:"em",raw:t[0],text:t[6]||t[5]||t[4]||t[3]||t[2]||t[1]}},t.codespan=function(e){var t=this.rules.inline.code.exec(e);if(t)return{type:"codespan",raw:t[0],text:C(t[2].trim(),!0)}},t.br=function(e){var t=this.rules.inline.br.exec(e);if(t)return{type:"br",raw:t[0]}},t.del=function(e){var t=this.rules.inline.del.exec(e);if(t)return{type:"del",raw:t[0],text:t[1]}},t.autolink=function(e,t){var n,r,i=this.rules.inline.autolink.exec(e);if(i)return r="@"===i[2]?"mailto:"+(n=C(this.options.mangle?t(i[1]):i[1])):n=C(i[1]),{type:"link",raw:i[0],text:n,href:r,tokens:[{type:"text",raw:n,text:n}]}},t.url=function(e,t){var n;if(n=this.rules.inline.url.exec(e)){var r,i;if("@"===n[2])i="mailto:"+(r=C(this.options.mangle?t(n[0]):n[0]));else{for(var s;s=n[0],n[0]=this.rules.inline._backpedal.exec(n[0])[0],s!==n[0];);r=C(n[0]),i="www."===n[1]?"http://"+r:r}return{type:"link",raw:n[0],text:r,href:i,tokens:[{type:"text",raw:r,text:r}]}}},t.inlineText=function(e,t,n){var r,i=this.rules.inline.text.exec(e);if(i)return r=t?this.options.sanitize?this.options.sanitizer?this.options.sanitizer(i[0]):C(i[0]):i[0]:C(this.options.smartypants?n(i[0]):i[0]),{type:"text",raw:i[0],text:r}},e}(),j=z,L=_,P=$,U={newline:/^\n+/,code:/^( {4}[^\n]+\n*)+/,fences:/^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,hr:/^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,heading:/^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,blockquote:/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,list:/^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,html:"^ {0,3}(?:<(script|pre|style)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)|comment[^\\n]*(\\n+|$)|<\\?[\\s\\S]*?\\?>\\n*|\\n*|\\n*|)[\\s\\S]*?(?:\\n{2,}|$)|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$))",def:/^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,nptable:j,table:j,lheading:/^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,_paragraph:/^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/,text:/^[^\n]+/,_label:/(?!\s*\])(?:\\[\[\]]|[^\[\]])+/,_title:/(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/};U.def=L(U.def).replace("label",U._label).replace("title",U._title).getRegex(),U.bullet=/(?:[*+-]|\d{1,9}\.)/,U.item=/^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/,U.item=L(U.item,"gm").replace(/bull/g,U.bullet).getRegex(),U.list=L(U.list).replace(/bull/g,U.bullet).replace("hr","\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))").replace("def","\\n+(?="+U.def.source+")").getRegex(),U._tag="address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul",U._comment=//,U.html=L(U.html,"i").replace("comment",U._comment).replace("tag",U._tag).replace("attribute",/ +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/).getRegex(),U.paragraph=L(U._paragraph).replace("hr",U.hr).replace("heading"," {0,3}#{1,6} ").replace("|lheading","").replace("blockquote"," {0,3}>").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",U._tag).getRegex(),U.blockquote=L(U.blockquote).replace("paragraph",U.paragraph).getRegex(),U.normal=P({},U),U.gfm=P({},U.normal,{nptable:"^ *([^|\\n ].*\\|.*)\\n *([-:]+ *\\|[-| :]*)(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)",table:"^ *\\|(.+)\\n *\\|?( *[-:]+[-| :]*)(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)"}),U.gfm.nptable=L(U.gfm.nptable).replace("hr",U.hr).replace("heading"," {0,3}#{1,6} ").replace("blockquote"," {0,3}>").replace("code"," {4}[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",U._tag).getRegex(),U.gfm.table=L(U.gfm.table).replace("hr",U.hr).replace("heading"," {0,3}#{1,6} ").replace("blockquote"," {0,3}>").replace("code"," {4}[^\\n]").replace("fences"," {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n").replace("list"," {0,3}(?:[*+-]|1[.)]) ").replace("html",")|<(?:script|pre|style|!--)").replace("tag",U._tag).getRegex(),U.pedantic=P({},U.normal,{html:L("^ *(?:comment *(?:\\n|\\s*$)|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))").replace("comment",U._comment).replace(/tag/g,"(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b").getRegex(),def:/^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,heading:/^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,fences:j,paragraph:L(U.normal._paragraph).replace("hr",U.hr).replace("heading"," *#{1,6} *[^\n]").replace("lheading",U.lheading).replace("blockquote"," {0,3}>").replace("|fences","").replace("|list","").replace("|html","").getRegex()});var B={escape:/^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,autolink:/^<(scheme:[^\s\x00-\x1f<>]*|email)>/,url:j,tag:"^comment|^|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>|^<\\?[\\s\\S]*?\\?>|^|^",link:/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,reflink:/^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,nolink:/^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,strong:/^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,em:/^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,code:/^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,br:/^( {2,}|\\)\n(?!\s*$)/,del:j,text:/^(`+|[^`])(?:[\s\S]*?(?:(?=[\\?@\\[^_{|}~"};B.em=L(B.em).replace(/punctuation/g,B._punctuation).getRegex(),B._escapes=/\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g,B._scheme=/[a-zA-Z][a-zA-Z0-9+.-]{1,31}/,B._email=/[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/,B.autolink=L(B.autolink).replace("scheme",B._scheme).replace("email",B._email).getRegex(),B._attribute=/\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/,B.tag=L(B.tag).replace("comment",U._comment).replace("attribute",B._attribute).getRegex(),B._label=/(?:\[[^\[\]]*\]|\\.|`[^`]*`|[^\[\]\\`])*?/,B._href=/<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/,B._title=/"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/,B.link=L(B.link).replace("label",B._label).replace("href",B._href).replace("title",B._title).getRegex(),B.reflink=L(B.reflink).replace("label",B._label).getRegex(),B.normal=P({},B),B.pedantic=P({},B.normal,{strong:/^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,em:/^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,link:L(/^!?\[(label)\]\((.*?)\)/).replace("label",B._label).getRegex(),reflink:L(/^!?\[(label)\]\s*\[([^\]]*)\]/).replace("label",B._label).getRegex()}),B.gfm=P({},B.normal,{escape:L(B.escape).replace("])","~|])").getRegex(),_extended_email:/[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,url:/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,_backpedal:/(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,del:/^~+(?=\S)([\s\S]*?\S)~+/,text:/^(`+|[^`])(?:[\s\S]*?(?:(?=[\\'+(n?e:Q(e,!0))+"\n":"
    "+(n?e:Q(e,!0))+"
    "},t.blockquote=function(e){return"
    \n"+e+"
    \n"},t.html=function(e){return e},t.heading=function(e,t,n,r){return this.options.headerIds?"'+e+"\n":""+e+"\n"},t.hr=function(){return this.options.xhtml?"
    \n":"
    \n"},t.list=function(e,t,n){var r=t?"ol":"ul";return"<"+r+(t&&1!==n?' start="'+n+'"':"")+">\n"+e+"\n"},t.listitem=function(e){return"
  • "+e+"
  • \n"},t.checkbox=function(e){return" "},t.paragraph=function(e){return"

    "+e+"

    \n"},t.table=function(e,t){return"\n\n"+e+"\n"+(t=t&&""+t+"")+"
    \n"},t.tablerow=function(e){return"\n"+e+"\n"},t.tablecell=function(e,t){var n=t.header?"th":"td";return(t.align?"<"+n+' align="'+t.align+'">':"<"+n+">")+e+"\n"},t.strong=function(e){return""+e+""},t.em=function(e){return""+e+""},t.codespan=function(e){return""+e+""},t.br=function(){return this.options.xhtml?"
    ":"
    "},t.del=function(e){return""+e+""},t.link=function(e,t,n){if(null===(e=K(this.options.sanitize,this.options.baseUrl,e)))return n;var r='
    "},t.image=function(e,t,n){if(null===(e=K(this.options.sanitize,this.options.baseUrl,e)))return n;var r=''+n+'":">"},t.text=function(e){return e},e}(),Y=function(){function e(){}var t=e.prototype;return t.strong=function(e){return e},t.em=function(e){return e},t.codespan=function(e){return e},t.del=function(e){return e},t.html=function(e){return e},t.text=function(e){return e},t.link=function(e,t,n){return""+n},t.image=function(e,t,n){return""+n},t.br=function(){return""},e}(),ee=function(){function e(){this.seen={}}return e.prototype.slug=function(e){var t=e.toLowerCase().trim().replace(/<[!\/a-z].*?>/gi,"").replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,./:;<=>?@[\]^`{|}~]/g,"").replace(/\s/g,"-");if(this.seen.hasOwnProperty(t))for(var n=t;this.seen[n]++,t=n+"-"+this.seen[n],this.seen.hasOwnProperty(t););return this.seen[t]=0,t},e}(),te=t.defaults,ne=v,re=function(){function n(e){this.options=e||te,this.options.renderer=this.options.renderer||new W,this.renderer=this.options.renderer,this.renderer.options=this.options,this.textRenderer=new Y,this.slugger=new ee}n.parse=function(e,t){return new n(t).parse(e)};var e=n.prototype;return e.parse=function(e,t){void 0===t&&(t=!0);var n,r,i,s,l,a,o,c,h,u,p,g,f,d,k,b,m,x,w="",v=e.length;for(n=0;nAn error occurred:

    "+le(e.message+"",!0)+"
    ";throw e}}return he.options=he.setOptions=function(e){return ie(he.defaults,e),oe(he.defaults),he},he.getDefaults=ae,he.defaults=ce,he.Parser=re,he.parser=re.parse,he.Renderer=W,he.TextRenderer=Y,he.Lexer=H,he.lexer=H.lex,he.Tokenizer=E,he.Slugger=ee,he.parse=he}); \ No newline at end of file diff --git a/src/Lexer.js b/src/Lexer.js index b119241591..577f40f966 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -1,11 +1,47 @@ +const Tokenizer = require('./Tokenizer.js'); const { defaults } = require('./defaults.js'); const { block, inline } = require('./rules.js'); -const { - rtrim, - splitCells, - escape, - findClosingBracket -} = require('./helpers.js'); + +/** + * smartypants text replacement + */ +function smartypants(text) { + return text + // em-dashes + .replace(/---/g, '\u2014') + // en-dashes + .replace(/--/g, '\u2013') + // opening singles + .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') + // closing singles & apostrophes + .replace(/'/g, '\u2019') + // opening doubles + .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') + // closing doubles + .replace(/"/g, '\u201d') + // ellipses + .replace(/\.{3}/g, '\u2026'); +} + +/** + * mangle email addresses + */ +function mangle(text) { + let out = '', + i, + ch; + + const l = text.length; + for (i = 0; i < l; i++) { + ch = text.charCodeAt(i); + if (Math.random() > 0.5) { + ch = 'x' + ch.toString(16); + } + out += '&#' + ch + ';'; + } + + return out; +} /** * Block Lexer @@ -15,26 +51,31 @@ module.exports = class Lexer { this.tokens = []; this.tokens.links = Object.create(null); this.options = options || defaults; - this.rules = { + this.options.tokenizer = this.options.tokenizer || new Tokenizer(); + this.tokenizer = this.options.tokenizer; + this.tokenizer.options = this.options; + + const rules = { block: block.normal, inline: inline.normal }; if (this.options.pedantic) { - this.rules.block = block.pedantic; - this.rules.inline = inline.pedantic; + rules.block = block.pedantic; + rules.inline = inline.pedantic; } else if (this.options.gfm) { - this.rules.block = block.gfm; + rules.block = block.gfm; if (this.options.breaks) { - this.rules.inline = inline.breaks; + rules.inline = inline.breaks; } else { - this.rules.inline = inline.gfm; + rules.inline = inline.gfm; } } + this.tokenizer.rules = rules; } /** - * Expose Block Rules + * Expose Rules */ static get rules() { return { @@ -59,7 +100,7 @@ module.exports = class Lexer { .replace(/\r\n|\r/g, '\n') .replace(/\t/g, ' '); - this.blockTokens(src, this.tokens); + this.blockTokens(src, this.tokens, true); this.inline(this.tokens); @@ -69,347 +110,118 @@ module.exports = class Lexer { /** * Lexing */ - blockTokens(src, tokens, top = true) { + blockTokens(src, tokens = [], top = true) { src = src.replace(/^ +$/gm, ''); - let next, - loose, - cap, - bull, - b, - item, - list, - space, - i, - tag, - l, - isordered, - istask, - ischecked, - lastToken, - addBack, - raw; + let token, i, l; while (src) { // newline - if (cap = this.rules.block.newline.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (cap[0].length > 1) { - tokens.push({ - type: 'space', - raw - }); + if (token = this.tokenizer.space(src)) { + src = src.substring(token.raw.length); + if (token.type) { + tokens.push(token); } + continue; } // code - if (cap = this.rules.block.code.exec(src)) { - lastToken = tokens[tokens.length - 1]; - src = src.substring(cap[0].length); - raw = cap[0]; - // An indented code block cannot interrupt a paragraph. - if (lastToken && lastToken.type === 'paragraph') { - lastToken.text += '\n' + cap[0].trimRight(); - lastToken.raw += '\n' + raw; - } else { - cap = cap[0].replace(/^ {4}/gm, ''); - tokens.push({ - type: 'code', - raw, - codeBlockStyle: 'indented', - text: !this.options.pedantic - ? rtrim(cap, '\n') - : cap - }); - } + if (token = this.tokenizer.code(src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // fences - if (cap = this.rules.block.fences.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'code', - raw, - lang: cap[2] ? cap[2].trim() : cap[2], - text: cap[3] || '' - }); + if (token = this.tokenizer.fences(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // heading - if (cap = this.rules.block.heading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw, - depth: cap[1].length, - text: cap[2] - }); + if (token = this.tokenizer.heading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // table no leading pipe (gfm) - if (cap = this.rules.block.nptable.exec(src)) { - item = { - type: 'table', - header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - raw = cap[0]; - item.raw = raw; - - l = item.align.length; - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - for (i = 0; i < l; i++) { - item.cells[i] = splitCells(item.cells[i], item.header.length); - } - - tokens.push(item); - - continue; - } + if (token = this.tokenizer.nptable(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // hr - if (cap = this.rules.block.hr.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'hr', - raw - }); + if (token = this.tokenizer.hr(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // blockquote - if (cap = this.rules.block.blockquote.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - - cap = cap[0].replace(/^ *> ?/gm, ''); - - tokens.push({ - type: 'blockquote', - raw, - tokens: this.blockTokens(cap, [], top) - }); - + if (token = this.tokenizer.blockquote(src)) { + src = src.substring(token.raw.length); + token.tokens = this.blockTokens(token.text, [], top); + tokens.push(token); continue; } // list - if (cap = this.rules.block.list.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - bull = cap[2]; - isordered = bull.length > 1; - - list = { - type: 'list', - raw, - ordered: isordered, - start: isordered ? +bull : '', - loose: false, - items: [] - }; - - tokens.push(list); - - // Get each top-level item. - cap = cap[0].match(this.rules.block.item); - - next = false; - - l = cap.length; + if (token = this.tokenizer.list(src)) { + src = src.substring(token.raw.length); + l = token.items.length; for (i = 0; i < l; i++) { - item = cap[i]; - raw = item.trim(); - - // Remove the list item's bullet - // so it is seen as the next token. - space = item.length; - item = item.replace(/^ *([*+-]|\d+\.) */, ''); - - // Outdent whatever the - // list item contains. Hacky. - if (~item.indexOf('\n ')) { - space -= item.length; - item = !this.options.pedantic - ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') - : item.replace(/^ {1,4}/gm, ''); - } - - // Determine whether the next list item belongs here. - // Backpedal if it does not belong in this list. - if (i !== l - 1) { - b = block.bullet.exec(cap[i + 1])[0]; - if (bull.length > 1 ? b.length === 1 - : (b.length > 1 || (this.options.smartLists && b !== bull))) { - addBack = cap.slice(i + 1).join('\n'); - src = addBack + src; - list.raw = list.raw.substring(list.raw.length - addBack.length); - i = l - 1; - } - } - - // Determine whether item is loose or not. - // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ - // for discount behavior. - loose = next || /\n\n(?!\s*$)/.test(item); - if (i !== l - 1) { - next = item.charAt(item.length - 1) === '\n'; - if (!loose) loose = next; - } - - if (loose) { - list.loose = true; - } - - // Check for task list items - istask = /^\[[ xX]\] /.test(item); - ischecked = undefined; - if (istask) { - ischecked = item[1] !== ' '; - item = item.replace(/^\[[ xX]\] +/, ''); - } - - list.items.push({ - raw, - task: istask, - checked: ischecked, - loose: loose, - tokens: this.blockTokens(item, [], false) - }); + token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); } - + tokens.push(token); continue; } // html - if (cap = this.rules.block.html.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: this.options.sanitize - ? 'paragraph' - : 'html', - raw, - pre: !this.options.sanitizer - && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), - text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] - }); + if (token = this.tokenizer.html(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // def - if (top && (cap = this.rules.block.def.exec(src))) { - src = src.substring(cap[0].length); - if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); - tag = cap[1].toLowerCase().replace(/\s+/g, ' '); - if (!this.tokens.links[tag]) { - this.tokens.links[tag] = { - href: cap[2], - title: cap[3] + if (top && (token = this.tokenizer.def(src))) { + src = src.substring(token.raw.length); + if (!this.tokens.links[token.tag]) { + this.tokens.links[token.tag] = { + href: token.href, + title: token.title }; } continue; } // table (gfm) - if (cap = this.rules.block.table.exec(src)) { - item = { - type: 'table', - header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] - }; - - if (item.header.length === item.align.length) { - src = src.substring(cap[0].length); - item.raw = cap[0]; - - l = item.align.length; - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - for (i = 0; i < l; i++) { - item.cells[i] = splitCells( - item.cells[i].replace(/^ *\| *| *\| *$/g, ''), - item.header.length); - } - - tokens.push(item); - - continue; - } + if (token = this.tokenizer.table(src)) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; } // lheading - if (cap = this.rules.block.lheading.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'heading', - raw, - depth: cap[2].charAt(0) === '=' ? 1 : 2, - text: cap[1] - }); + if (token = this.tokenizer.lheading(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // top-level paragraph - if (top && (cap = this.rules.block.paragraph.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'paragraph', - raw, - text: cap[1].charAt(cap[1].length - 1) === '\n' - ? cap[1].slice(0, -1) - : cap[1] - }); + if (top && (token = this.tokenizer.paragraph(src))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.block.text.exec(src)) { - // Top-level should never reach here. - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'text', - raw, - text: cap[0] - }); + if (token = this.tokenizer.text(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -417,6 +229,7 @@ module.exports = class Lexer { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } @@ -494,280 +307,102 @@ module.exports = class Lexer { /** * Lexing/Compiling */ - inlineTokens(src, tokens) { - let out = '', - link, - text, - newTokens, - href, - title, - cap, - prevCapZero, - lastParenIndex, - start, - linkLen, - raw; + inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { + let token; while (src) { // escape - if (cap = this.rules.inline.escape.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape(cap[1]); - out += text; - tokens.push({ - type: 'escape', - raw, - text - }); + if (token = this.tokenizer.escape(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // tag - if (cap = this.rules.inline.tag.exec(src)) { - if (!this.inLink && /^
    /i.test(cap[0])) { - this.inLink = false; - } - if (!this.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = true; - } else if (this.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = false; - } - - src = src.substring(cap[0].length); - raw = cap[0]; - text = this.options.sanitize - ? (this.options.sanitizer - ? this.options.sanitizer(cap[0]) - : escape(cap[0])) - : cap[0]; - tokens.push({ - type: this.options.sanitize - ? 'text' - : 'html', - raw, - text - }); - out += text; + if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { + src = src.substring(token.raw.length); + inLink = token.inLink; + inRawBlock = token.inRawBlock; + tokens.push(token); continue; } // link - if (cap = this.rules.inline.link.exec(src)) { - lastParenIndex = findClosingBracket(cap[2], '()'); - if (lastParenIndex > -1) { - start = cap[0].indexOf('!') === 0 ? 5 : 4; - linkLen = start + cap[1].length + lastParenIndex; - cap[2] = cap[2].substring(0, lastParenIndex); - cap[0] = cap[0].substring(0, linkLen).trim(); - cap[3] = ''; - } - src = src.substring(cap[0].length); - raw = cap[0]; - this.inLink = true; - href = cap[2]; - if (this.options.pedantic) { - link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); - - if (link) { - href = link[1]; - title = link[3]; - } else { - title = ''; - } - } else { - title = cap[3] ? cap[3].slice(1, -1) : ''; + if (token = this.tokenizer.link(src)) { + src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); - out += this.outputLink(cap, { - href: this.escapes(href), - title: this.escapes(title) - }, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // reflink, nolink - if ((cap = this.rules.inline.reflink.exec(src)) - || (cap = this.rules.inline.nolink.exec(src))) { - src = src.substring(cap[0].length); - raw = cap[0]; - link = (cap[2] || cap[1]).replace(/\s+/g, ' '); - link = this.tokens.links[link.toLowerCase()]; - if (!link || !link.href) { - text = cap[0].charAt(0); - out += text; - tokens.push({ - type: 'text', - raw: text, - text - }); - src = cap[0].substring(1) + src; - continue; + if (token = this.tokenizer.reflink(src, this.tokens.links)) { + src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); } - this.inLink = true; - out += this.outputLink(cap, link, tokens, raw); - this.inLink = false; + tokens.push(token); continue; } // strong - if (cap = this.rules.inline.strong.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[4] || cap[3] || cap[2] || cap[1], newTokens); - - tokens.push({ - type: 'strong', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.strong(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // em - if (cap = this.rules.inline.em.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1], newTokens); - tokens.push({ - type: 'em', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.em(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // code - if (cap = this.rules.inline.code.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - text = escape(cap[2].trim(), true); - tokens.push({ - type: 'codespan', - raw, - text - }); - out += text; + if (token = this.tokenizer.codespan(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // br - if (cap = this.rules.inline.br.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'br', - raw - }); - out += '\n'; + if (token = this.tokenizer.br(src)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // del (gfm) - if (cap = this.rules.inline.del.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - newTokens = tokens ? [] : null; - text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'del', - raw, - text, - tokens: newTokens - }); - out += text; + if (token = this.tokenizer.del(src)) { + src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); + tokens.push(token); continue; } // autolink - if (cap = this.rules.inline.autolink.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (cap[2] === '@') { - text = escape(this.options.mangle ? this.mangle(cap[1]) : cap[1]); - href = 'mailto:' + text; - } else { - text = escape(cap[1]); - href = text; - } - tokens.push({ - type: 'link', - raw, - text, - href, - tokens: [ - { - type: 'text', - raw: text, - text - } - ] - }); - out += text; + if (token = this.tokenizer.autolink(src, mangle)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // url (gfm) - if (!this.inLink && (cap = this.rules.inline.url.exec(src))) { - if (cap[2] === '@') { - text = escape(this.options.mangle ? this.mangle(cap[0]) : cap[0]); - href = 'mailto:' + text; - } else { - // do extended autolink path validation - do { - prevCapZero = cap[0]; - cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; - } while (prevCapZero !== cap[0]); - text = escape(cap[0]); - if (cap[1] === 'www.') { - href = 'http://' + text; - } else { - href = text; - } - } - src = src.substring(cap[0].length); - raw = cap[0]; - tokens.push({ - type: 'link', - raw, - text, - href, - tokens: [ - { - type: 'text', - raw: text, - text - } - ] - }); - out += text; + if (!inLink && (token = this.tokenizer.url(src, mangle))) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } // text - if (cap = this.rules.inline.text.exec(src)) { - src = src.substring(cap[0].length); - raw = cap[0]; - if (this.inRawBlock) { - text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; - } else { - text = escape(this.options.smartypants ? this.smartypants(cap[0]) : cap[0]); - } - tokens.push({ - type: 'text', - raw, - text - }); - out += text; + if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { + src = src.substring(token.raw.length); + tokens.push(token); continue; } @@ -775,89 +410,13 @@ module.exports = class Lexer { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); + break; } else { throw new Error(errMsg); } } } - return out; - } - - escapes(text) { - return text ? text.replace(inline._escapes, '$1') : text; - } - - /** - * tokenize Link - */ - outputLink(cap, link, tokens, raw) { - const href = link.href; - const title = link.title ? escape(link.title) : null; - const newTokens = tokens ? [] : null; - - if (cap[0].charAt(0) !== '!') { - const text = this.inlineTokens(cap[1], newTokens); - tokens.push({ - type: 'link', - raw, - text, - href, - title, - tokens: newTokens - }); - return text; - } else { - const text = escape(cap[1]); - tokens.push({ - type: 'image', - raw, - text, - href, - title - }); - return text; - } - } - - /** - * Smartypants Transformations - */ - smartypants(text) { - return text - // em-dashes - .replace(/---/g, '\u2014') - // en-dashes - .replace(/--/g, '\u2013') - // opening singles - .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') - // closing singles & apostrophes - .replace(/'/g, '\u2019') - // opening doubles - .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') - // closing doubles - .replace(/"/g, '\u201d') - // ellipses - .replace(/\.{3}/g, '\u2026'); - } - - /** - * Mangle Links - */ - mangle(text) { - let out = '', - i, - ch; - - const l = text.length; - for (i = 0; i < l; i++) { - ch = text.charCodeAt(i); - if (Math.random() > 0.5) { - ch = 'x' + ch.toString(16); - } - out += '&#' + ch + ';'; - } - - return out; + return tokens; } }; diff --git a/src/Tokenizer.js b/src/Tokenizer.js new file mode 100644 index 0000000000..56656f52f9 --- /dev/null +++ b/src/Tokenizer.js @@ -0,0 +1,586 @@ +const { defaults } = require('./defaults.js'); +const { + rtrim, + splitCells, + escape, + findClosingBracket +} = require('./helpers.js'); + +function outputLink(cap, link, raw) { + const href = link.href; + const title = link.title ? escape(link.title) : null; + + if (cap[0].charAt(0) !== '!') { + return { + type: 'link', + raw, + href, + title, + text: cap[1] + }; + } else { + return { + type: 'image', + raw, + text: escape(cap[1]), + href, + title + }; + } +} + +/** + * Tokenizer + */ +module.exports = class Tokenizer { + constructor(options) { + this.options = options || defaults; + } + + space(src) { + const cap = this.rules.block.newline.exec(src); + if (cap) { + if (cap[0].length > 1) { + return { + type: 'space', + raw: cap[0] + }; + } + return { raw: '\n' }; + } + } + + code(src, tokens) { + const cap = this.rules.block.code.exec(src); + if (cap) { + const lastToken = tokens[tokens.length - 1]; + // An indented code block cannot interrupt a paragraph. + if (lastToken && lastToken.type === 'paragraph') { + tokens.pop(); + lastToken.text += '\n' + cap[0].trimRight(); + lastToken.raw += '\n' + cap[0]; + return lastToken; + } else { + const text = cap[0].replace(/^ {4}/gm, ''); + return { + type: 'code', + raw: cap[0], + codeBlockStyle: 'indented', + text: !this.options.pedantic + ? rtrim(text, '\n') + : text + }; + } + } + } + + fences(src) { + const cap = this.rules.block.fences.exec(src); + if (cap) { + return { + type: 'code', + raw: cap[0], + lang: cap[2] ? cap[2].trim() : cap[2], + text: cap[3] || '' + }; + } + } + + heading(src) { + const cap = this.rules.block.heading.exec(src); + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[1].length, + text: cap[2] + }; + } + } + + nptable(src) { + const cap = this.rules.block.nptable.exec(src); + if (cap) { + const item = { + type: 'table', + header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], + raw: cap[0] + }; + + if (item.header.length === item.align.length) { + let l = item.align.length; + let i; + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + for (i = 0; i < l; i++) { + item.cells[i] = splitCells(item.cells[i], item.header.length); + } + + return item; + } + } + } + + hr(src) { + const cap = this.rules.block.hr.exec(src); + if (cap) { + return { + type: 'hr', + raw: cap[0] + }; + } + } + + blockquote(src) { + const cap = this.rules.block.blockquote.exec(src); + if (cap) { + const text = cap[0].replace(/^ *> ?/gm, ''); + + return { + type: 'blockquote', + raw: cap[0], + text + }; + } + } + + list(src) { + const cap = this.rules.block.list.exec(src); + if (cap) { + let raw = cap[0]; + const bull = cap[2]; + const isordered = bull.length > 1; + + const list = { + type: 'list', + raw, + ordered: isordered, + start: isordered ? +bull : '', + loose: false, + items: [] + }; + + // Get each top-level item. + const itemMatch = cap[0].match(this.rules.block.item); + + let next = false, + item, + space, + b, + addBack, + loose, + istask, + ischecked; + + const l = itemMatch.length; + for (let i = 0; i < l; i++) { + item = itemMatch[i]; + raw = item; + + // Remove the list item's bullet + // so it is seen as the next token. + space = item.length; + item = item.replace(/^ *([*+-]|\d+\.) */, ''); + + // Outdent whatever the + // list item contains. Hacky. + if (~item.indexOf('\n ')) { + space -= item.length; + item = !this.options.pedantic + ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') + : item.replace(/^ {1,4}/gm, ''); + } + + // Determine whether the next list item belongs here. + // Backpedal if it does not belong in this list. + if (i !== l - 1) { + b = this.rules.block.bullet.exec(itemMatch[i + 1])[0]; + if (bull.length > 1 ? b.length === 1 + : (b.length > 1 || (this.options.smartLists && b !== bull))) { + addBack = itemMatch.slice(i + 1).join('\n'); + list.raw = list.raw.substring(0, list.raw.length - addBack.length); + i = l - 1; + } + } + + // Determine whether item is loose or not. + // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ + // for discount behavior. + loose = next || /\n\n(?!\s*$)/.test(item); + if (i !== l - 1) { + next = item.charAt(item.length - 1) === '\n'; + if (!loose) loose = next; + } + + if (loose) { + list.loose = true; + } + + // Check for task list items + istask = /^\[[ xX]\] /.test(item); + ischecked = undefined; + if (istask) { + ischecked = item[1] !== ' '; + item = item.replace(/^\[[ xX]\] +/, ''); + } + + list.items.push({ + raw, + task: istask, + checked: ischecked, + loose: loose, + text: item + }); + } + + return list; + } + } + + html(src) { + const cap = this.rules.block.html.exec(src); + if (cap) { + return { + type: this.options.sanitize + ? 'paragraph' + : 'html', + raw: cap[0], + pre: !this.options.sanitizer + && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), + text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] + }; + } + } + + def(src) { + const cap = this.rules.block.def.exec(src); + if (cap) { + if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); + const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); + return { + tag, + raw: cap[0], + href: cap[2], + title: cap[3] + }; + } + } + + table(src) { + const cap = this.rules.block.table.exec(src); + if (cap) { + const item = { + type: 'table', + header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), + align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), + cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] + }; + + if (item.header.length === item.align.length) { + item.raw = cap[0]; + + let l = item.align.length; + let i; + for (i = 0; i < l; i++) { + if (/^ *-+: *$/.test(item.align[i])) { + item.align[i] = 'right'; + } else if (/^ *:-+: *$/.test(item.align[i])) { + item.align[i] = 'center'; + } else if (/^ *:-+ *$/.test(item.align[i])) { + item.align[i] = 'left'; + } else { + item.align[i] = null; + } + } + + l = item.cells.length; + for (i = 0; i < l; i++) { + item.cells[i] = splitCells( + item.cells[i].replace(/^ *\| *| *\| *$/g, ''), + item.header.length); + } + + return item; + } + } + } + + lheading(src) { + const cap = this.rules.block.lheading.exec(src); + if (cap) { + return { + type: 'heading', + raw: cap[0], + depth: cap[2].charAt(0) === '=' ? 1 : 2, + text: cap[1] + }; + } + } + + paragraph(src) { + const cap = this.rules.block.paragraph.exec(src); + if (cap) { + return { + type: 'paragraph', + raw: cap[0], + text: cap[1].charAt(cap[1].length - 1) === '\n' + ? cap[1].slice(0, -1) + : cap[1] + }; + } + } + + text(src) { + const cap = this.rules.block.text.exec(src); + if (cap) { + return { + type: 'text', + raw: cap[0], + text: cap[0] + }; + } + } + + escape(src) { + const cap = this.rules.inline.escape.exec(src); + if (cap) { + return { + type: 'escape', + raw: cap[0], + text: escape(cap[1]) + }; + } + } + + tag(src, inLink, inRawBlock) { + const cap = this.rules.inline.tag.exec(src); + if (cap) { + if (!inLink && /^/i.test(cap[0])) { + inLink = false; + } + if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = true; + } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = false; + } + + return { + type: this.options.sanitize + ? 'text' + : 'html', + raw: cap[0], + inLink, + inRawBlock, + text: this.options.sanitize + ? (this.options.sanitizer + ? this.options.sanitizer(cap[0]) + : escape(cap[0])) + : cap[0] + }; + } + } + + link(src) { + const cap = this.rules.inline.link.exec(src); + if (cap) { + const lastParenIndex = findClosingBracket(cap[2], '()'); + if (lastParenIndex > -1) { + const start = cap[0].indexOf('!') === 0 ? 5 : 4; + const linkLen = start + cap[1].length + lastParenIndex; + cap[2] = cap[2].substring(0, lastParenIndex); + cap[0] = cap[0].substring(0, linkLen).trim(); + cap[3] = ''; + } + let href = cap[2]; + let title = ''; + if (this.options.pedantic) { + const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); + + if (link) { + href = link[1]; + title = link[3]; + } else { + title = ''; + } + } else { + title = cap[3] ? cap[3].slice(1, -1) : ''; + } + href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); + const token = outputLink(cap, { + href: href ? href.replace(this.rules.inline._escapes, '$1') : href, + title: title ? title.replace(this.rules.inline._escapes, '$1') : title + }, cap[0]); + return token; + } + } + + reflink(src, links) { + let cap; + if ((cap = this.rules.inline.reflink.exec(src)) + || (cap = this.rules.inline.nolink.exec(src))) { + let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); + link = links[link.toLowerCase()]; + if (!link || !link.href) { + const text = cap[0].charAt(0); + return { + type: 'text', + raw: text, + text + }; + } + const token = outputLink(cap, link, cap[0]); + return token; + } + } + + strong(src) { + const cap = this.rules.inline.strong.exec(src); + if (cap) { + return { + type: 'strong', + raw: cap[0], + text: cap[4] || cap[3] || cap[2] || cap[1] + }; + } + } + + em(src) { + const cap = this.rules.inline.em.exec(src); + if (cap) { + return { + type: 'em', + raw: cap[0], + text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1] + }; + } + } + + codespan(src) { + const cap = this.rules.inline.code.exec(src); + if (cap) { + return { + type: 'codespan', + raw: cap[0], + text: escape(cap[2].trim(), true) + }; + } + } + + br(src) { + const cap = this.rules.inline.br.exec(src); + if (cap) { + return { + type: 'br', + raw: cap[0] + }; + } + } + + del(src) { + const cap = this.rules.inline.del.exec(src); + if (cap) { + return { + type: 'del', + raw: cap[0], + text: cap[1] + }; + } + } + + autolink(src, mangle) { + const cap = this.rules.inline.autolink.exec(src); + if (cap) { + let text, href; + if (cap[2] === '@') { + text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]); + href = 'mailto:' + text; + } else { + text = escape(cap[1]); + href = text; + } + + return { + type: 'link', + raw: cap[0], + text, + href, + tokens: [ + { + type: 'text', + raw: text, + text + } + ] + }; + } + } + + url(src, mangle) { + let cap; + if (cap = this.rules.inline.url.exec(src)) { + let text, href; + if (cap[2] === '@') { + text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]); + href = 'mailto:' + text; + } else { + // do extended autolink path validation + let prevCapZero; + do { + prevCapZero = cap[0]; + cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; + } while (prevCapZero !== cap[0]); + text = escape(cap[0]); + if (cap[1] === 'www.') { + href = 'http://' + text; + } else { + href = text; + } + } + return { + type: 'link', + raw: cap[0], + text, + href, + tokens: [ + { + type: 'text', + raw: text, + text + } + ] + }; + } + } + + inlineText(src, inRawBlock, smartypants) { + const cap = this.rules.inline.text.exec(src); + if (cap) { + let text; + if (inRawBlock) { + text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; + } else { + text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]); + } + return { + type: 'text', + raw: cap[0], + text + }; + } + } +}; diff --git a/src/defaults.js b/src/defaults.js index 8d0be95580..0153bb4334 100644 --- a/src/defaults.js +++ b/src/defaults.js @@ -15,6 +15,7 @@ function getDefaults() { silent: false, smartLists: false, smartypants: false, + tokenizer: null, xhtml: false }; } diff --git a/src/marked.js b/src/marked.js index b575eec23d..8a77ddcef6 100644 --- a/src/marked.js +++ b/src/marked.js @@ -1,5 +1,6 @@ const Lexer = require('./Lexer.js'); const Parser = require('./Parser.js'); +const Tokenizer = require('./Tokenizer.js'); const Renderer = require('./Renderer.js'); const TextRenderer = require('./TextRenderer.js'); const Slugger = require('./Slugger.js'); @@ -139,6 +140,8 @@ marked.TextRenderer = TextRenderer; marked.Lexer = Lexer; marked.lexer = Lexer.lex; +marked.Tokenizer = Tokenizer; + marked.Slugger = Slugger; marked.parse = marked; diff --git a/test/specs/new/double_link.html b/test/specs/new/double_link.html index ff68395a3a..78858b22bf 100644 --- a/test/specs/new/double_link.html +++ b/test/specs/new/double_link.html @@ -2,4 +2,14 @@

    Already linked: http://example.com/.

    +

    Already linked: http://example.com/.

    +

    Already linked: http://example.com/.

    + +

    Already linked: http://example.com/.

    + +

    Already linked: http://example.com/.

    + +

    Already linked: http://example.com/.

    + +

    Already linked: http://example.com/.

    diff --git a/test/specs/new/double_link.md b/test/specs/new/double_link.md index 50216736c1..895d22f6d9 100644 --- a/test/specs/new/double_link.md +++ b/test/specs/new/double_link.md @@ -2,4 +2,16 @@ Already linked: [http://example.com/](http://example.com/). +Already linked: http://example.com/. + Already linked: **http://example.com/**. + +Already linked: *http://example.com/*. + +Already linked: ~~http://example.com/~~. + +Already linked: [http://example.com/]. + +Already linked: [http://example.com/][]. + +[http://example.com/]: http://example.com/ diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 0d334358a0..d8a802061b 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -9,15 +9,12 @@ function expectTokens({ md, options, tokens = [], links = {} }) { expect(actual).toEqual(expected); } -function expectInlineTokens({ md, options, output = jasmine.any(String), tokens = jasmine.any(Array), links = {} }) { +function expectInlineTokens({ md, options, tokens = jasmine.any(Array), links = {} }) { const lexer = new Lexer(options); lexer.tokens.links = links; const outTokens = []; - const outOutput = lexer.inlineTokens(md, outTokens); - expect({ - output: outOutput, - tokens: outTokens - }).toEqual({ output, tokens }); + lexer.inlineTokens(md, outTokens); + expect(outTokens).toEqual(tokens); } function expectInline({ token, options, tokens }) { @@ -279,6 +276,7 @@ a | b { type: 'blockquote', raw: '> blockquote', + text: 'blockquote', tokens: [{ type: 'paragraph', raw: 'blockquote', @@ -313,6 +311,7 @@ a | b task: false, checked: undefined, loose: false, + text: 'item 1', tokens: [{ type: 'text', raw: 'item 1', @@ -321,10 +320,11 @@ a | b }] }, { - raw: '- item 2', + raw: '- item 2\n', task: false, checked: undefined, loose: false, + text: 'item 2\n', tokens: [{ type: 'text', raw: 'item 2', @@ -355,7 +355,7 @@ a | b raw: '1. item 1' }), jasmine.objectContaining({ - raw: '2. item 2' + raw: '2. item 2\n' }) ] }) @@ -380,7 +380,7 @@ a | b raw: '2. item 1' }), jasmine.objectContaining({ - raw: '3. item 2' + raw: '3. item 2\n' }) ] }) @@ -422,7 +422,7 @@ a | b checked: false }), jasmine.objectContaining({ - raw: '- [x] item 2', + raw: '- [x] item 2\n', task: true, checked: true }) @@ -504,7 +504,7 @@ a | b }); describe('inline', () => { - describe('tokens', () => { + describe('inline', () => { it('paragraph', () => { expectInline({ token: { type: 'paragraph', text: 'text' }, @@ -563,11 +563,10 @@ a | b }); }); - describe('output', () => { + describe('inlineTokens', () => { it('escape', () => { expectInlineTokens({ md: '\\>', - output: '>', tokens: [ { type: 'escape', raw: '\\>', text: '>' } ] @@ -577,11 +576,10 @@ a | b it('html', () => { expectInlineTokens({ md: '
    html
    ', - output: '
    html
    ', tokens: [ - { type: 'html', raw: '
    ', text: '
    ' }, + { type: 'html', raw: '
    ', inLink: false, inRawBlock: false, text: '
    ' }, { type: 'text', raw: 'html', text: 'html' }, - { type: 'html', raw: '
    ', text: '
    ' } + { type: 'html', raw: '
    ', inLink: false, inRawBlock: false, text: '
    ' } ] }); }); @@ -590,11 +588,10 @@ a | b expectInlineTokens({ md: '
    html
    ', options: { sanitize: true }, - output: '<div>html</div>', tokens: [ - { type: 'text', raw: '
    ', text: '<div>' }, + { type: 'text', raw: '
    ', inLink: false, inRawBlock: false, text: '<div>' }, { type: 'text', raw: 'html', text: 'html' }, - { type: 'text', raw: '
    ', text: '</div>' } + { type: 'text', raw: '
    ', inLink: false, inRawBlock: false, text: '</div>' } ] }); }); @@ -602,14 +599,13 @@ a | b it('link', () => { expectInlineTokens({ md: '[link](https://example.com)', - output: 'link', tokens: [ { type: 'link', raw: '[link](https://example.com)', - text: 'link', href: 'https://example.com', title: null, + text: 'link', tokens: [ { type: 'text', raw: 'link', text: 'link' } ] @@ -621,14 +617,13 @@ a | b it('link title', () => { expectInlineTokens({ md: '[link](https://example.com "title")', - output: 'link', tokens: [ { type: 'link', raw: '[link](https://example.com "title")', - text: 'link', href: 'https://example.com', title: 'title', + text: 'link', tokens: [ { type: 'text', raw: 'link', text: 'link' } ] @@ -640,7 +635,6 @@ a | b it('image', () => { expectInlineTokens({ md: '![image](https://example.com/image.png)', - output: 'image', tokens: [ { type: 'image', @@ -656,7 +650,6 @@ a | b it('image title', () => { expectInlineTokens({ md: '![image](https://example.com/image.png "title")', - output: 'image', tokens: [ { type: 'image', @@ -676,14 +669,13 @@ a | b links: { link: { href: 'https://example.com', title: 'title' } }, - output: 'link', tokens: [ { type: 'link', raw: '[link][]', - text: 'link', href: 'https://example.com', title: 'title', + text: 'link', tokens: [{ type: 'text', raw: 'link', @@ -700,14 +692,13 @@ a | b links: { link: { href: 'https://example.com', title: 'title' } }, - output: 'link', tokens: [ { type: 'link', raw: '[link]', - text: 'link', href: 'https://example.com', title: 'title', + text: 'link', tokens: [{ type: 'text', raw: 'link', @@ -721,7 +712,6 @@ a | b it('no def', () => { expectInlineTokens({ md: '[link]', - output: '[link]', tokens: [ { type: 'text', raw: '[', text: '[' }, { type: 'text', raw: 'link]', text: 'link]' } @@ -733,7 +723,6 @@ a | b it('strong', () => { expectInlineTokens({ md: '**strong**', - output: 'strong', tokens: [ { type: 'strong', @@ -750,7 +739,6 @@ a | b it('em', () => { expectInlineTokens({ md: '*em*', - output: 'em', tokens: [ { type: 'em', @@ -767,7 +755,6 @@ a | b it('code', () => { expectInlineTokens({ md: '`code`', - output: 'code', tokens: [ { type: 'codespan', raw: '`code`', text: 'code' } ] @@ -778,7 +765,6 @@ a | b expectInlineTokens({ md: 'a\nb', options: { gfm: true, breaks: true }, - output: 'a\nb', tokens: jasmine.arrayContaining([ { type: 'br', raw: '\n' } ]) @@ -788,7 +774,6 @@ a | b it('del', () => { expectInlineTokens({ md: '~~del~~', - output: 'del', tokens: [ { type: 'del', @@ -806,7 +791,6 @@ a | b it('autolink', () => { expectInlineTokens({ md: '', - output: 'https://example.com', tokens: [ { type: 'link', @@ -825,7 +809,6 @@ a | b expectInlineTokens({ md: '', options: { mangle: false }, - output: 'test@example.com', tokens: [ { type: 'link', @@ -844,7 +827,6 @@ a | b expectInlineTokens({ md: '', options: { mangle: true }, - output: jasmine.stringMatching('&#'), tokens: [ { type: 'link', @@ -866,7 +848,6 @@ a | b it('url', () => { expectInlineTokens({ md: 'https://example.com', - output: 'https://example.com', tokens: [ { type: 'link', @@ -885,7 +866,6 @@ a | b expectInlineTokens({ md: 'test@example.com', options: { gfm: true, mangle: false }, - output: 'test@example.com', tokens: [ { type: 'link', @@ -904,7 +884,6 @@ a | b expectInlineTokens({ md: 'test@example.com', options: { gfm: true, mangle: true }, - output: jasmine.stringMatching('&#'), tokens: [ { type: 'link', @@ -927,7 +906,6 @@ a | b it('text', () => { expectInlineTokens({ md: 'text', - output: 'text', tokens: [ { type: 'text', @@ -943,7 +921,6 @@ a | b expectInlineTokens({ md: "'single quotes'", options: { smartypants: true }, - output: '‘single quotes’', tokens: [ { type: 'text', @@ -958,7 +935,6 @@ a | b expectInlineTokens({ md: '"double quotes"', options: { smartypants: true }, - output: '“double quotes”', tokens: [ { type: 'text', @@ -973,7 +949,6 @@ a | b expectInlineTokens({ md: 'ellipses...', options: { smartypants: true }, - output: 'ellipses…', tokens: [ { type: 'text', @@ -988,7 +963,6 @@ a | b expectInlineTokens({ md: 'en--dash', options: { smartypants: true }, - output: 'en–dash', tokens: [ { type: 'text', @@ -1003,7 +977,6 @@ a | b expectInlineTokens({ md: 'em---dash', options: { smartypants: true }, - output: 'em—dash', tokens: [ { type: 'text',