From 780c5cf3aa2168deebe8854a1ffa1ed68c3974f9 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Tue, 14 Apr 2020 13:23:41 -0500 Subject: [PATCH] only send needed variables to tokenizer --- docs/USING_PRO.md | 52 ++++++++-------- src/Lexer.js | 72 ++++++++++++++--------- src/Tokenizer.js | 101 +++++++++++++++----------------- test/specs/new/double_link.html | 10 ++++ test/specs/new/double_link.md | 12 ++++ test/unit/Lexer-spec.js | 18 ++++-- 6 files changed, 154 insertions(+), 111 deletions(-) diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index de9f45f53e..9d92e8ef97 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -127,35 +127,35 @@ console.log(marked('$ latext code $', { tokenizer })); ### Block level tokenizer methods -- space(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- code(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- fences(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- heading(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- nptable(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- hr(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- blockquote(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- list(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- html(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- def(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- table(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- lheading(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- paragraph(*Lexer* lexer, *string* src, *array* tokens, *bool* top) -- text(*Lexer* lexer, *string* src, *array* tokens, *bool* top) +- space(*string* src) +- code(*string* src, *array* tokens) +- fences(*string* src) +- heading(*string* src) +- nptable(*string* src) +- hr(*string* src) +- blockquote(*string* src) +- list(*string* src) +- html(*string* src) +- def(*string* src) +- table(*string* src) +- lheading(*string* src) +- paragraph(*string* src) +- text(*string* src) ### Inline level tokenizer methods -- escape(*Lexer* lexer, *string* src, *array* tokens) -- tag(*Lexer* lexer, *string* src, *array* tokens) -- link(*Lexer* lexer, *string* src, *array* tokens) -- reflink(*Lexer* lexer, *string* src, *array* tokens) -- strong(*Lexer* lexer, *string* src, *array* tokens) -- em(*Lexer* lexer, *string* src, *array* tokens) -- codespan(*Lexer* lexer, *string* src, *array* tokens) -- br(*Lexer* lexer, *string* src, *array* tokens) -- del(*Lexer* lexer, *string* src, *array* tokens) -- autolink(*Lexer* lexer, *string* src, *array* tokens) -- url(*Lexer* lexer, *string* src, *array* tokens) -- inlineText(*Lexer* lexer, *string* src, *array* tokens) +- escape(*string* src) +- tag(*string* src, *bool* inLink, *bool* inRawBlock) +- link(*string* src) +- reflink(*string* src, *object* links) +- strong(*string* src) +- em(*string* src) +- codespan(*string* src) +- br(*string* src) +- del(*string* src) +- autolink(*string* src) +- url(*string* src) +- inlineText(*string* src, *bool* inRawBlock) ### Other tokenizer methods diff --git a/src/Lexer.js b/src/Lexer.js index 7ad44a2a3a..6c5e06a705 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -50,11 +50,11 @@ module.exports = class Lexer { */ blockTokens(src, tokens = [], top = true) { src = src.replace(/^ +$/gm, ''); - let token; + let token, i, l; while (src) { // newline - if (token = this.tokenizer.space(this, src, tokens, top)) { + if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); if (token.type) { tokens.push(token); @@ -63,63 +63,68 @@ module.exports = class Lexer { } // code - if (token = this.tokenizer.code(this, src, tokens, top)) { + if (token = this.tokenizer.code(src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // fences - if (token = this.tokenizer.fences(this, src, tokens, top)) { + if (token = this.tokenizer.fences(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // heading - if (token = this.tokenizer.heading(this, src, tokens, top)) { + if (token = this.tokenizer.heading(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // table no leading pipe (gfm) - if (token = this.tokenizer.nptable(this, src, tokens, top)) { + if (token = this.tokenizer.nptable(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // hr - if (token = this.tokenizer.hr(this, src, tokens, top)) { + if (token = this.tokenizer.hr(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // blockquote - if (token = this.tokenizer.blockquote(this, src, tokens, top)) { + if (token = this.tokenizer.blockquote(src)) { src = src.substring(token.raw.length); + token.tokens = this.blockTokens(token.text, [], top); tokens.push(token); continue; } // list - if (token = this.tokenizer.list(this, src, tokens, top)) { + if (token = this.tokenizer.list(src)) { src = src.substring(token.raw.length); + l = token.items.length; + for (i = 0; i < l; i++) { + token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); + } tokens.push(token); continue; } // html - if (token = this.tokenizer.html(this, src, tokens, top)) { + if (token = this.tokenizer.html(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // def - if (top && (token = this.tokenizer.def(this, src, tokens, top))) { + if (top && (token = this.tokenizer.def(src))) { src = src.substring(token.raw.length); if (!this.tokens.links[token.tag]) { this.tokens.links[token.tag] = { @@ -131,28 +136,28 @@ module.exports = class Lexer { } // table (gfm) - if (token = this.tokenizer.table(this, src, tokens, top)) { + if (token = this.tokenizer.table(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // lheading - if (token = this.tokenizer.lheading(this, src, tokens, top)) { + if (token = this.tokenizer.lheading(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // top-level paragraph - if (top && (token = this.tokenizer.paragraph(this, src, tokens, top))) { + if (top && (token = this.tokenizer.paragraph(src))) { src = src.substring(token.raw.length); tokens.push(token); continue; } // text - if (token = this.tokenizer.text(this, src, tokens, top)) { + if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; @@ -240,89 +245,100 @@ module.exports = class Lexer { /** * Lexing/Compiling */ - inlineTokens(src, tokens = []) { + inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { let token; while (src) { // escape - if (token = this.tokenizer.escape(this, src, tokens)) { + if (token = this.tokenizer.escape(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // tag - if (token = this.tokenizer.tag(this, src, tokens)) { + if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { src = src.substring(token.raw.length); + inLink = token.inLink; + inRawBlock = token.inRawBlock; tokens.push(token); continue; } // link - if (token = this.tokenizer.link(this, src, tokens)) { + if (token = this.tokenizer.link(src)) { src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); + } tokens.push(token); continue; } // reflink, nolink - if (token = this.tokenizer.reflink(this, src, tokens)) { + if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); + if (token.type === 'link') { + token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); + } tokens.push(token); continue; } // strong - if (token = this.tokenizer.strong(this, src, tokens)) { + if (token = this.tokenizer.strong(src)) { src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); tokens.push(token); continue; } // em - if (token = this.tokenizer.em(this, src, tokens)) { + if (token = this.tokenizer.em(src)) { src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); tokens.push(token); continue; } // code - if (token = this.tokenizer.codespan(this, src, tokens)) { + if (token = this.tokenizer.codespan(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // br - if (token = this.tokenizer.br(this, src, tokens)) { + if (token = this.tokenizer.br(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // del (gfm) - if (token = this.tokenizer.del(this, src, tokens)) { + if (token = this.tokenizer.del(src)) { src = src.substring(token.raw.length); + token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); tokens.push(token); continue; } // autolink - if (token = this.tokenizer.autolink(this, src, tokens)) { + if (token = this.tokenizer.autolink(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // url (gfm) - if (token = this.tokenizer.url(this, src, tokens)) { + if (!inLink && (token = this.tokenizer.url(src))) { src = src.substring(token.raw.length); tokens.push(token); continue; } // text - if (token = this.tokenizer.inlineText(this, src, tokens)) { + if (token = this.tokenizer.inlineText(src, inRawBlock)) { src = src.substring(token.raw.length); tokens.push(token); continue; diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 66b33c8d76..9432b8f60a 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -7,7 +7,7 @@ const { } = require('./helpers.js'); const { block, inline } = require('./rules.js'); -function outputLink(cap, link, tokens, raw, lexer) { +function outputLink(cap, link, raw) { const href = link.href; const title = link.title ? escape(link.title) : null; @@ -17,7 +17,7 @@ function outputLink(cap, link, tokens, raw, lexer) { raw, href, title, - tokens: lexer.inlineTokens(cap[1]) + text: cap[1] }; } else { return { @@ -40,9 +40,6 @@ module.exports = class Tokenizer { } initialize() { - this.inLink = false; - this.inRawBlock = false; - this.rules = { block: block.normal, inline: inline.normal @@ -71,7 +68,7 @@ module.exports = class Tokenizer { }; } - space(lexer, src, tokens, top) { + space(src) { const cap = this.rules.block.newline.exec(src); if (cap) { if (cap[0].length > 1) { @@ -84,7 +81,7 @@ module.exports = class Tokenizer { } } - code(lexer, src, tokens, top) { + code(src, tokens) { const cap = this.rules.block.code.exec(src); if (cap) { const lastToken = tokens[tokens.length - 1]; @@ -108,7 +105,7 @@ module.exports = class Tokenizer { } } - fences(lexer, src, tokens, top) { + fences(src) { const cap = this.rules.block.fences.exec(src); if (cap) { return { @@ -120,7 +117,7 @@ module.exports = class Tokenizer { } } - heading(lexer, src, tokens, top) { + heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { return { @@ -132,7 +129,7 @@ module.exports = class Tokenizer { } } - nptable(lexer, src, tokens, top) { + nptable(src) { const cap = this.rules.block.nptable.exec(src); if (cap) { const item = { @@ -168,7 +165,7 @@ module.exports = class Tokenizer { } } - hr(lexer, src, tokens, top) { + hr(src) { const cap = this.rules.block.hr.exec(src); if (cap) { return { @@ -178,7 +175,7 @@ module.exports = class Tokenizer { } } - blockquote(lexer, src, tokens, top) { + blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { const text = cap[0].replace(/^ *> ?/gm, ''); @@ -186,12 +183,12 @@ module.exports = class Tokenizer { return { type: 'blockquote', raw: cap[0], - tokens: lexer.blockTokens(text, [], top) + text }; } } - list(lexer, src, tokens, top) { + list(src) { const cap = this.rules.block.list.exec(src); if (cap) { let raw = cap[0]; @@ -276,7 +273,7 @@ module.exports = class Tokenizer { task: istask, checked: ischecked, loose: loose, - tokens: lexer.blockTokens(item, [], false) + text: item }); } @@ -284,7 +281,7 @@ module.exports = class Tokenizer { } } - html(lexer, src, tokens, top) { + html(src) { const cap = this.rules.block.html.exec(src); if (cap) { return { @@ -299,7 +296,7 @@ module.exports = class Tokenizer { } } - def(lexer, src, tokens, top) { + def(src) { const cap = this.rules.block.def.exec(src); if (cap) { if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); @@ -313,7 +310,7 @@ module.exports = class Tokenizer { } } - table(lexer, src, tokens, top) { + table(src) { const cap = this.rules.block.table.exec(src); if (cap) { const item = { @@ -352,7 +349,7 @@ module.exports = class Tokenizer { } } - lheading(lexer, src, tokens, top) { + lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { return { @@ -364,7 +361,7 @@ module.exports = class Tokenizer { } } - paragraph(lexer, src, tokens, top) { + paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { return { @@ -377,7 +374,7 @@ module.exports = class Tokenizer { } } - text(lexer, src, tokens, top) { + text(src) { const cap = this.rules.block.text.exec(src); if (cap) { return { @@ -388,7 +385,7 @@ module.exports = class Tokenizer { } } - escape(lexer, src, tokens) { + escape(src) { const cap = this.rules.inline.escape.exec(src); if (cap) { return { @@ -399,18 +396,18 @@ module.exports = class Tokenizer { } } - tag(lexer, src, tokens) { + tag(src, inLink, inRawBlock) { const cap = this.rules.inline.tag.exec(src); if (cap) { - if (!this.inLink && /^/i.test(cap[0])) { - this.inLink = false; + if (!inLink && /^/i.test(cap[0])) { + inLink = false; } - if (!this.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = true; - } else if (this.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - this.inRawBlock = false; + if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = true; + } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + inRawBlock = false; } return { @@ -418,6 +415,8 @@ module.exports = class Tokenizer { ? 'text' : 'html', raw: cap[0], + inLink, + inRawBlock, text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) @@ -427,7 +426,7 @@ module.exports = class Tokenizer { } } - link(lexer, src, tokens) { + link(src) { const cap = this.rules.inline.link.exec(src); if (cap) { const lastParenIndex = findClosingBracket(cap[2], '()'); @@ -438,7 +437,6 @@ module.exports = class Tokenizer { cap[0] = cap[0].substring(0, linkLen).trim(); cap[3] = ''; } - this.inLink = true; let href = cap[2]; let title = ''; if (this.options.pedantic) { @@ -457,18 +455,17 @@ module.exports = class Tokenizer { const token = outputLink(cap, { href: href ? href.replace(this.rules.inline._escapes, '$1') : href, title: title ? title.replace(this.rules.inline._escapes, '$1') : title - }, tokens, cap[0], lexer); - this.inLink = false; + }, cap[0]); return token; } } - reflink(lexer, src, tokens) { + reflink(src, links) { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); - link = lexer.tokens.links[link.toLowerCase()]; + link = links[link.toLowerCase()]; if (!link || !link.href) { const text = cap[0].charAt(0); return { @@ -477,36 +474,34 @@ module.exports = class Tokenizer { text }; } - this.inLink = true; - const token = outputLink(cap, link, tokens, cap[0], lexer); - this.inLink = false; + const token = outputLink(cap, link, cap[0]); return token; } } - strong(lexer, src, tokens) { + strong(src) { const cap = this.rules.inline.strong.exec(src); if (cap) { return { type: 'strong', raw: cap[0], - tokens: lexer.inlineTokens(cap[4] || cap[3] || cap[2] || cap[1]) + text: cap[4] || cap[3] || cap[2] || cap[1] }; } } - em(lexer, src, tokens) { + em(src) { const cap = this.rules.inline.em.exec(src); if (cap) { return { type: 'em', raw: cap[0], - tokens: lexer.inlineTokens(cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]) + text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1] }; } } - codespan(lexer, src, tokens) { + codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { return { @@ -517,7 +512,7 @@ module.exports = class Tokenizer { } } - br(lexer, src, tokens) { + br(src) { const cap = this.rules.inline.br.exec(src); if (cap) { return { @@ -527,18 +522,18 @@ module.exports = class Tokenizer { } } - del(lexer, src, tokens) { + del(src) { const cap = this.rules.inline.del.exec(src); if (cap) { return { type: 'del', raw: cap[0], - tokens: lexer.inlineTokens(cap[1]) + text: cap[1] }; } } - autolink(lexer, src, tokens) { + autolink(src) { const cap = this.rules.inline.autolink.exec(src); if (cap) { let text, href; @@ -566,9 +561,9 @@ module.exports = class Tokenizer { } } - url(lexer, src, tokens) { + url(src) { let cap; - if (!this.inLink && (cap = this.rules.inline.url.exec(src))) { + if (cap = this.rules.inline.url.exec(src)) { let text, href; if (cap[2] === '@') { text = escape(this.options.mangle ? this.mangle(cap[0]) : cap[0]); @@ -603,11 +598,11 @@ module.exports = class Tokenizer { } } - inlineText(lexer, src, tokens) { + inlineText(src, inRawBlock) { const cap = this.rules.inline.text.exec(src); if (cap) { let text; - if (this.inRawBlock) { + if (inRawBlock) { text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; } else { text = escape(this.options.smartypants ? this.smartypants(cap[0]) : cap[0]); diff --git a/test/specs/new/double_link.html b/test/specs/new/double_link.html index ff68395a3a..78858b22bf 100644 --- a/test/specs/new/double_link.html +++ b/test/specs/new/double_link.html @@ -2,4 +2,14 @@

Already linked: http://example.com/.

+

Already linked: http://example.com/.

+

Already linked: http://example.com/.

+ +

Already linked: http://example.com/.

+ +

Already linked: http://example.com/.

+ +

Already linked: http://example.com/.

+ +

Already linked: http://example.com/.

diff --git a/test/specs/new/double_link.md b/test/specs/new/double_link.md index 50216736c1..895d22f6d9 100644 --- a/test/specs/new/double_link.md +++ b/test/specs/new/double_link.md @@ -2,4 +2,16 @@ Already linked: [http://example.com/](http://example.com/). +Already linked: http://example.com/. + Already linked: **http://example.com/**. + +Already linked: *http://example.com/*. + +Already linked: ~~http://example.com/~~. + +Already linked: [http://example.com/]. + +Already linked: [http://example.com/][]. + +[http://example.com/]: http://example.com/ diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index dd0bdfef2c..d8a802061b 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -276,6 +276,7 @@ a | b { type: 'blockquote', raw: '> blockquote', + text: 'blockquote', tokens: [{ type: 'paragraph', raw: 'blockquote', @@ -310,6 +311,7 @@ a | b task: false, checked: undefined, loose: false, + text: 'item 1', tokens: [{ type: 'text', raw: 'item 1', @@ -322,6 +324,7 @@ a | b task: false, checked: undefined, loose: false, + text: 'item 2\n', tokens: [{ type: 'text', raw: 'item 2', @@ -574,9 +577,9 @@ a | b expectInlineTokens({ md: '
html
', tokens: [ - { type: 'html', raw: '
', text: '
' }, + { type: 'html', raw: '
', inLink: false, inRawBlock: false, text: '
' }, { type: 'text', raw: 'html', text: 'html' }, - { type: 'html', raw: '
', text: '
' } + { type: 'html', raw: '
', inLink: false, inRawBlock: false, text: '
' } ] }); }); @@ -586,9 +589,9 @@ a | b md: '
html
', options: { sanitize: true }, tokens: [ - { type: 'text', raw: '
', text: '<div>' }, + { type: 'text', raw: '
', inLink: false, inRawBlock: false, text: '<div>' }, { type: 'text', raw: 'html', text: 'html' }, - { type: 'text', raw: '
', text: '</div>' } + { type: 'text', raw: '
', inLink: false, inRawBlock: false, text: '</div>' } ] }); }); @@ -602,6 +605,7 @@ a | b raw: '[link](https://example.com)', href: 'https://example.com', title: null, + text: 'link', tokens: [ { type: 'text', raw: 'link', text: 'link' } ] @@ -619,6 +623,7 @@ a | b raw: '[link](https://example.com "title")', href: 'https://example.com', title: 'title', + text: 'link', tokens: [ { type: 'text', raw: 'link', text: 'link' } ] @@ -670,6 +675,7 @@ a | b raw: '[link][]', href: 'https://example.com', title: 'title', + text: 'link', tokens: [{ type: 'text', raw: 'link', @@ -692,6 +698,7 @@ a | b raw: '[link]', href: 'https://example.com', title: 'title', + text: 'link', tokens: [{ type: 'text', raw: 'link', @@ -720,6 +727,7 @@ a | b { type: 'strong', raw: '**strong**', + text: 'strong', tokens: [ { type: 'text', raw: 'strong', text: 'strong' } ] @@ -735,6 +743,7 @@ a | b { type: 'em', raw: '*em*', + text: 'em', tokens: [ { type: 'text', raw: 'em', text: 'em' } ] @@ -769,6 +778,7 @@ a | b { type: 'del', raw: '~~del~~', + text: 'del', tokens: [ { type: 'text', raw: 'del', text: 'del' } ]