New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[commonmark] make html comments, html inlines, html blocks and links compliant #1135
Changes from all commits
ffd386a
056f4e2
9450b09
652ba97
b2611c1
f2ebd43
27d4da6
4de3c98
c12c5d7
316db0a
d89012f
8e30cd2
bcf9abb
1b8ca2b
c1ef53c
56972f8
d08039e
7abf702
fb2f317
0904e44
de66018
d2cef5a
ef64418
786334a
bf9c9c5
821e2da
dc92048
3be817b
6750997
e66f7aa
fc17a2c
9f20c46
d8ff951
277d093
3afc360
29d33d9
271d357
47365c1
8877ff7
ca349c8
5125739
eb95a71
13dd38a
ef3516c
5b135c3
55f47f1
7a80cdf
f21a4d6
2094181
aeca6a1
5536922
e56e35a
22b06cc
b9394a0
fc97171
d47dc29
f584aca
eea3932
8594a06
209dff1
ea48e96
dfc5b3e
d29f68a
2e23540
f7d4d21
c398550
e4fd972
680a6c3
163a482
4e2b647
9cb1900
b738cd6
4aa4f02
bc7c9db
d94a68c
4e52c42
5396950
8815ba3
a6c6f0d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,16 +20,25 @@ var block = { | |
nptable: noop, | ||
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, | ||
list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, | ||
html: /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/, | ||
html: '^ {0,3}(?:' // optional indentation | ||
+ '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1) | ||
+ '|comment[^\\n]*(\\n+|$)' // (2) | ||
+ '|<\\?[\\s\\S]*?\\?>\\n*' // (3) | ||
+ '|<![A-Z][\\s\\S]*?>\\n*' // (4) | ||
+ '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>\\n*' // (5) | ||
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6) | ||
+ '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag | ||
+ '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag | ||
+ ')', | ||
def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, | ||
table: noop, | ||
lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/, | ||
paragraph: /^([^\n]+(?:\n?(?!hr|heading|lheading| {0,3}>|tag)[^\n]+)+)/, | ||
paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading| {0,3}>|<\/?(?:tag)(?: +|\\n|\/?>)|<(?:script|pre|style|!--))[^\n]+)+)/, | ||
text: /^[^\n]+/ | ||
}; | ||
|
||
block._label = /(?:\\[\[\]]|[^\[\]])+/; | ||
block._title = /(?:"(?:\\"|[^"]|"[^"\n]*")*"|'\n?(?:[^'\n]+\n?)*'|\([^()]*\))/; | ||
block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/; | ||
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Safe. |
||
block.def = edit(block.def) | ||
.replace('label', block._label) | ||
.replace('title', block._title) | ||
|
@@ -47,23 +56,24 @@ block.list = edit(block.list) | |
.replace('def', '\\n+(?=' + block.def.source + ')') | ||
.getRegex(); | ||
|
||
block._tag = '(?!(?:' | ||
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code' | ||
+ '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo' | ||
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b'; | ||
|
||
block.html = edit(block.html) | ||
.replace('comment', /<!--[\s\S]*?-->/) | ||
.replace('closed', /<(tag)[\s\S]+?<\/\1>/) | ||
.replace('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>\s]*)*?\/?>/) | ||
.replace(/tag/g, block._tag) | ||
block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' | ||
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' | ||
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' | ||
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' | ||
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' | ||
+ '|track|ul'; | ||
block._comment = /<!--(?!-?>)[\s\S]*?-->/; | ||
block.html = edit(block.html, 'i') | ||
.replace('comment', block._comment) | ||
.replace('tag', block._tag) | ||
.replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) | ||
.getRegex(); | ||
|
||
block.paragraph = edit(block.paragraph) | ||
.replace('hr', block.hr) | ||
.replace('heading', block.heading) | ||
.replace('lheading', block.lheading) | ||
.replace('tag', '<' + block._tag) | ||
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks | ||
.getRegex(); | ||
|
||
block.blockquote = edit(block.blockquote) | ||
|
@@ -101,6 +111,24 @@ block.tables = merge({}, block.gfm, { | |
table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/ | ||
}); | ||
|
||
/** | ||
* Pedantic grammar | ||
*/ | ||
|
||
block.pedantic = merge({}, block.normal, { | ||
html: edit( | ||
'^ *(?:comment *(?:\\n|\\s*$)' | ||
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag | ||
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
.replace('comment', block._comment) | ||
.replace(/tag/g, '(?!(?:' | ||
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' | ||
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' | ||
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') | ||
.getRegex(), | ||
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/ | ||
}); | ||
|
||
/** | ||
* Block Lexer | ||
*/ | ||
|
@@ -111,7 +139,9 @@ function Lexer(options) { | |
this.options = options || marked.defaults; | ||
this.rules = block.normal; | ||
|
||
if (this.options.gfm) { | ||
if (this.options.pedantic) { | ||
this.rules = block.pedantic; | ||
} else if (this.options.gfm) { | ||
if (this.options.tables) { | ||
this.rules = block.tables; | ||
} else { | ||
|
@@ -370,7 +400,7 @@ Lexer.prototype.token = function(src, top) { | |
if (top && (cap = this.rules.def.exec(src))) { | ||
src = src.substring(cap[0].length); | ||
if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); | ||
tag = cap[1].toLowerCase(); | ||
tag = cap[1].toLowerCase().replace(/\s+/g, ' '); | ||
if (!this.tokens.links[tag]) { | ||
this.tokens.links[tag] = { | ||
href: cap[2], | ||
|
@@ -461,13 +491,18 @@ Lexer.prototype.token = function(src, top) { | |
*/ | ||
|
||
var inline = { | ||
escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, | ||
escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, | ||
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, | ||
url: noop, | ||
tag: /^<!--[\s\S]*?-->|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/\s]*)*?\/?>/, | ||
link: /^!?\[(inside)\]\(href\)/, | ||
reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, | ||
nolink: /^!?\[((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\]/, | ||
tag: '^comment' | ||
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag | ||
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @davisjam same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you print out There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevermind, I did it. This regex is safe. |
||
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?> | ||
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html> | ||
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section | ||
link: /^!?\[(label)\]\(href(?:\s+(title))?\s*\)/, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @davisjam the commonmark spec is covered by this rule (and subrules at lines 530 and following). This is safe because the href part can't contain spaces, so the URI stops at the closing ) or at the first space. |
||
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/, | ||
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/, | ||
strong: /^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)|^__([^\s])__(?!_)|^\*\*([^\s])\*\*(?!\*)/, | ||
em: /^_([^\s][\s\S]*?[^\s_])_(?!_)|^_([^\s_][\s\S]*?[^\s])_(?!_)|^\*([^\s][\s\S]*?[^\s*])\*(?!\*)|^\*([^\s*][\s\S]*?[^\s])\*(?!\*)|^_([^\s_])_(?!_)|^\*([^\s*])\*(?!\*)/, | ||
code: /^(`+)\s*([\s\S]*?[^`]?)\s*\1(?!`)/, | ||
|
@@ -476,24 +511,34 @@ var inline = { | |
text: /^[\s\S]+?(?=[\\<!\[`*]|\b_| {2,}\n|$)/ | ||
}; | ||
|
||
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; | ||
|
||
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; | ||
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; | ||
|
||
inline.autolink = edit(inline.autolink) | ||
.replace('scheme', inline._scheme) | ||
.replace('email', inline._email) | ||
.getRegex() | ||
.getRegex(); | ||
|
||
inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Safe. |
||
|
||
inline._inside = /(?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/; | ||
inline._href = /\s*<?([\s\S]*?)>?(?:\s+['"]([\s\S]*?)['"])?\s*/; | ||
inline.tag = edit(inline.tag) | ||
.replace('comment', block._comment) | ||
.replace('attribute', inline._attribute) | ||
.getRegex(); | ||
|
||
inline._label = /(?:\[[^\[\]]*\]|\\[\[\]]?|`[^`]*`|[^\[\]\\])*?/; | ||
inline._href = /\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)/; | ||
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @davisjam check these 3 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Safe. |
||
|
||
inline.link = edit(inline.link) | ||
.replace('inside', inline._inside) | ||
.replace('label', inline._label) | ||
.replace('href', inline._href) | ||
.replace('title', inline._title) | ||
.getRegex(); | ||
|
||
inline.reflink = edit(inline.reflink) | ||
.replace('inside', inline._inside) | ||
.replace('label', inline._label) | ||
.getRegex(); | ||
|
||
/** | ||
|
@@ -508,7 +553,13 @@ inline.normal = merge({}, inline); | |
|
||
inline.pedantic = merge({}, inline.normal, { | ||
strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, | ||
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/ | ||
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/, | ||
link: edit(/^!?\[(label)\]\(\s*<?([\s\S]*?)>?(?:\s+(['"][\s\S]*?['"]))?\s*\)/) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This regex is vulnerable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A static scan of the tree on @Feder1co5oave's PR says:
(JSON-encoded). Checking the ones requested individually... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure this is the "old" link regex, I didn't change it.
The double ?, maybe? I'm kinda lost. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what
The issue is the use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok I see where we're getting at... I'm not sure what that output means, can you translate that into some example input that triggers the quadratic behavior?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wrapping the URI in angular parenthesis is optional. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't sound like this can be done cleanly (and safely) in a single regex. Can you provide a link to the spec for this rule? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the "legacy" link rule of marked, it's been like this for a long time. I didn't change it. It doesn't follow any clear spec, it just tries its best. We should come up with a different rule that doesn't break compatibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Since you didn't change it, it shouldn't prevent the PR from being merged. |
||
.replace('label', inline._label) | ||
.getRegex(), | ||
reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) | ||
.replace('label', inline._label) | ||
.getRegex() | ||
}); | ||
|
||
/** | ||
|
@@ -552,14 +603,14 @@ function InlineLexer(links, options) { | |
throw new Error('Tokens array requires a `links` property.'); | ||
} | ||
|
||
if (this.options.gfm) { | ||
if (this.options.pedantic) { | ||
this.rules = inline.pedantic; | ||
} else if (this.options.gfm) { | ||
if (this.options.breaks) { | ||
this.rules = inline.breaks; | ||
} else { | ||
this.rules = inline.gfm; | ||
} | ||
} else if (this.options.pedantic) { | ||
this.rules = inline.pedantic; | ||
} | ||
} | ||
|
||
|
@@ -587,6 +638,7 @@ InlineLexer.prototype.output = function(src) { | |
link, | ||
text, | ||
href, | ||
title, | ||
cap; | ||
|
||
while (src) { | ||
|
@@ -650,9 +702,12 @@ InlineLexer.prototype.output = function(src) { | |
if (cap = this.rules.link.exec(src)) { | ||
src = src.substring(cap[0].length); | ||
this.inLink = true; | ||
href = cap[2]; | ||
href = href[0] === '<' ? href.substring(1, href.length - 1) : href; | ||
title = cap[3] ? cap[3].substring(1, cap[3].length - 1) : cap[3]; | ||
out += this.outputLink(cap, { | ||
href: cap[2], | ||
title: cap[3] | ||
href: InlineLexer.escapes(href), | ||
title: InlineLexer.escapes(title) | ||
}); | ||
this.inLink = false; | ||
continue; | ||
|
@@ -725,12 +780,16 @@ InlineLexer.prototype.output = function(src) { | |
return out; | ||
}; | ||
|
||
InlineLexer.escapes = function(text) { | ||
return text ? text.replace(InlineLexer.rules._escapes, '$1') : text; | ||
} | ||
|
||
/** | ||
* Compile Link | ||
*/ | ||
|
||
InlineLexer.prototype.outputLink = function(cap, link) { | ||
var href = escape(link.href), | ||
var href = link.href, | ||
title = link.title ? escape(link.title) : null; | ||
|
||
return cap[0].charAt(0) !== '!' | ||
|
@@ -917,7 +976,12 @@ Renderer.prototype.link = function(href, title, text) { | |
if (this.options.baseUrl && !originIndependentUrl.test(href)) { | ||
href = resolveUrl(this.options.baseUrl, href); | ||
} | ||
var out = '<a href="' + href + '"'; | ||
try { | ||
href = encodeURI(href).replace(/%25/g, '%'); | ||
} catch (e) { | ||
return text; | ||
} | ||
var out = '<a href="' + escape(href) + '"'; | ||
if (title) { | ||
out += ' title="' + title + '"'; | ||
} | ||
|
@@ -1137,10 +1201,8 @@ Parser.prototype.tok = function() { | |
return this.renderer.listitem(body); | ||
} | ||
case 'html': { | ||
var html = !this.token.pre && !this.options.pedantic | ||
? this.inline.output(this.token.text) | ||
: this.token.text; | ||
return this.renderer.html(html); | ||
// TODO parse inline content if parameter markdown=1 | ||
return this.renderer.html(this.token.text); | ||
} | ||
case 'paragraph': { | ||
return this.renderer.paragraph(this.inline.output(this.token.text)); | ||
|
@@ -1179,7 +1241,7 @@ function unescape(html) { | |
} | ||
|
||
function edit(regex, opt) { | ||
regex = regex.source; | ||
regex = regex.source || regex; | ||
opt = opt || ''; | ||
return { | ||
replace: function(name, val) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@davisjam it this safe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you print out
html.source
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nevermind, I did it. Safe.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Feder1co5oave what is the
\\h*
for in(?=\\h*\\n)
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it that supposed to be
\\n*
?