Skip to content

Commit

Permalink
fix quadratic complexity in reference parser
Browse files Browse the repository at this point in the history
  • Loading branch information
rlidwka committed Mar 1, 2024
1 parent 806bead commit 5721f02
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 70 deletions.
1 change: 0 additions & 1 deletion lib/helpers/parse_link_destination.mjs
Expand Up @@ -10,7 +10,6 @@ export default function parseLinkDestination (str, start, max) {
const result = {
ok: false,
pos: 0,
lines: 0,
str: ''
}

Expand Down
66 changes: 40 additions & 26 deletions lib/helpers/parse_link_title.mjs
Expand Up @@ -3,50 +3,64 @@

import { unescapeAll } from '../common/utils.mjs'

export default function parseLinkTitle (str, start, max) {
let code, marker
let lines = 0
// Parse link title within `str` in [start, max] range,
// or continue previous parsing if `prev_state` is defined (equal to result of last execution).
//
export default function parseLinkTitle (str, start, max, prev_state) {
let code
let pos = start

const result = {
const state = {
// if `true`, this is a valid link title
ok: false,
// if `true`, this link can be continued on the next line
can_continue: false,
// if `ok`, it's the position of the first character after the closing marker
pos: 0,
lines: 0,
str: ''
// if `ok`, it's the unescaped title
str: '',
// expected closing marker character code
marker: 0
}

if (pos >= max) { return result }
if (prev_state) {
// this is a continuation of a previous parseLinkTitle call on the next line,
// used in reference links only
state.str = prev_state.str
state.marker = prev_state.marker
} else {
if (pos >= max) { return state }

marker = str.charCodeAt(pos)
let marker = str.charCodeAt(pos)
if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return state }

if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return result }
start++
pos++

pos++
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29 }

// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29 }
state.marker = marker
}

while (pos < max) {
code = str.charCodeAt(pos)
if (code === marker) {
result.pos = pos + 1
result.lines = lines
result.str = unescapeAll(str.slice(start + 1, pos))
result.ok = true
return result
} else if (code === 0x28 /* ( */ && marker === 0x29 /* ) */) {
return result
} else if (code === 0x0A) {
lines++
if (code === state.marker) {
state.pos = pos + 1
state.str += unescapeAll(str.slice(start, pos))
state.ok = true
return state
} else if (code === 0x28 /* ( */ && state.marker === 0x29 /* ) */) {
return state
} else if (code === 0x5C /* \ */ && pos + 1 < max) {
pos++
if (str.charCodeAt(pos) === 0x0A) {
lines++
}
}

pos++
}

return result
// no closing marker found, but this link title may continue on the next line (for references)
state.can_continue = true
state.str += unescapeAll(str.slice(start, pos))
return state
}
117 changes: 74 additions & 43 deletions lib/rules_block/reference.mjs
@@ -1,8 +1,6 @@
import { isSpace, normalizeReference } from '../common/utils.mjs'

export default function reference (state, startLine, _endLine, silent) {
let lines = 0

let pos = state.bMarks[startLine] + state.tShift[startLine]
let max = state.eMarks[startLine]
let nextLine = startLine + 1
Expand All @@ -12,45 +10,53 @@ export default function reference (state, startLine, _endLine, silent) {

if (state.src.charCodeAt(pos) !== 0x5B/* [ */) { return false }

// Simple check to quickly interrupt scan on [link](url) at the start of line.
// Can be useful on practice: https://github.com/markdown-it/markdown-it/issues/54
while (++pos < max) {
if (state.src.charCodeAt(pos) === 0x5D /* ] */ &&
state.src.charCodeAt(pos - 1) !== 0x5C/* \ */) {
if (pos + 1 === max) { return false }
if (state.src.charCodeAt(pos + 1) !== 0x3A/* : */) { return false }
break
}
}

const endLine = state.lineMax
function getNextLine (nextLine) {
const endLine = state.lineMax

// jump line-by-line until empty one or EOF
const terminatorRules = state.md.block.ruler.getRules('reference')
if (nextLine >= endLine || state.isEmpty(nextLine)) {
// empty line or end of input
return null
}

const oldParentType = state.parentType
state.parentType = 'reference'
let isContinuation = false

for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) {
// this would be a code block normally, but after paragraph
// it's considered a lazy continuation regardless of what's there
if (state.sCount[nextLine] - state.blkIndent > 3) { continue }
if (state.sCount[nextLine] - state.blkIndent > 3) { isContinuation = true }

// quirk for blockquotes, this line should already be checked by that rule
if (state.sCount[nextLine] < 0) { continue }

// Some tags can terminate paragraph without empty line.
let terminate = false
for (let i = 0, l = terminatorRules.length; i < l; i++) {
if (terminatorRules[i](state, nextLine, endLine, true)) {
terminate = true
break
if (state.sCount[nextLine] < 0) { isContinuation = true }

if (!isContinuation) {
const terminatorRules = state.md.block.ruler.getRules('reference')
const oldParentType = state.parentType
state.parentType = 'reference'

// Some tags can terminate paragraph without empty line.
let terminate = false
for (let i = 0, l = terminatorRules.length; i < l; i++) {
if (terminatorRules[i](state, nextLine, endLine, true)) {
terminate = true
break
}
}

state.parentType = oldParentType
if (terminate) {
// terminated by another block
return null
}
}
if (terminate) { break }

const pos = state.bMarks[nextLine] + state.tShift[nextLine]
const max = state.eMarks[nextLine]

// max + 1 explicitly includes the newline
return state.src.slice(pos, max + 1)
}

const str = state.getLines(startLine, nextLine, state.blkIndent, false).trim()
let str = state.src.slice(pos, max + 1)

max = str.length
let labelEnd = -1

Expand All @@ -62,11 +68,21 @@ export default function reference (state, startLine, _endLine, silent) {
labelEnd = pos
break
} else if (ch === 0x0A /* \n */) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (ch === 0x5C /* \ */) {
pos++
if (pos < max && str.charCodeAt(pos) === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
}
}
}
Expand All @@ -78,7 +94,12 @@ export default function reference (state, startLine, _endLine, silent) {
for (pos = labelEnd + 2; pos < max; pos++) {
const ch = str.charCodeAt(pos)
if (ch === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (isSpace(ch)) {
/* eslint no-empty:0 */
} else {
Expand All @@ -95,19 +116,23 @@ export default function reference (state, startLine, _endLine, silent) {
if (!state.md.validateLink(href)) { return false }

pos = destRes.pos
lines += destRes.lines

// save cursor state, we could require to rollback later
const destEndPos = pos
const destEndLineNo = lines
const destEndLineNo = nextLine

// [label]: destination 'title'
// ^^^ skipping those spaces
const start = pos
for (; pos < max; pos++) {
const ch = str.charCodeAt(pos)
if (ch === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (isSpace(ch)) {
/* eslint no-empty:0 */
} else {
Expand All @@ -117,17 +142,25 @@ export default function reference (state, startLine, _endLine, silent) {

// [label]: destination 'title'
// ^^^^^^^ parse this
const titleRes = state.md.helpers.parseLinkTitle(str, pos, max)
let titleRes = state.md.helpers.parseLinkTitle(str, pos, max)
while (titleRes.can_continue) {
const lineContent = getNextLine(nextLine)
if (lineContent === null) break
str += lineContent
pos = max
max = str.length
nextLine++
titleRes = state.md.helpers.parseLinkTitle(str, pos, max, titleRes)
}
let title

if (pos < max && start !== pos && titleRes.ok) {
title = titleRes.str
pos = titleRes.pos
lines += titleRes.lines
} else {
title = ''
pos = destEndPos
lines = destEndLineNo
nextLine = destEndLineNo
}

// skip trailing spaces until the rest of the line
Expand All @@ -143,7 +176,7 @@ export default function reference (state, startLine, _endLine, silent) {
// but it could still be a valid reference if we roll back
title = ''
pos = destEndPos
lines = destEndLineNo
nextLine = destEndLineNo
while (pos < max) {
const ch = str.charCodeAt(pos)
if (!isSpace(ch)) { break }
Expand Down Expand Up @@ -174,8 +207,6 @@ export default function reference (state, startLine, _endLine, silent) {
state.env.references[label] = { title, href }
}

state.parentType = oldParentType

state.line = startLine + lines + 1
state.line = nextLine
return true
}
34 changes: 34 additions & 0 deletions test/fixtures/markdown-it/commonmark_extras.txt
Expand Up @@ -49,6 +49,40 @@ Reference labels: support ligatures (equivalent according to unicode case foldin
<p><a href="/url">fffifl</a></p>
.

Reference can be interrupted by other rules
.
[foo]: /url 'title
- - -
'

[foo]
.
<p>[foo]: /url 'title</p>
<hr>
<p>’</p>
<p>[foo]</p>
.

Escape character in link reference title doesn't escape newlines
.
[foo]: /url "
hello
\
\
\
world
"

[foo]
.
<p><a href="/url" title="
hello
\
\
\
world
">foo</a></p>
.

Issue #35. `<` should work as punctuation
.
Expand Down

0 comments on commit 5721f02

Please sign in to comment.