Skip to content

Commit

Permalink
fix: speed up parsing long lists (#2302)
Browse files Browse the repository at this point in the history
Co-authored-by: Tony Brix <tony@brix.ninja>
  • Loading branch information
calculuschild and UziTech committed Dec 2, 2021
1 parent a06cec4 commit e0005d8
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 166 deletions.
79 changes: 36 additions & 43 deletions lib/marked.cjs
Expand Up @@ -533,7 +533,7 @@ var Tokenizer = /*#__PURE__*/function () {
var cap = this.rules.block.list.exec(src);

if (cap) {
var raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine, line, lines, itemContents;
var raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine, line, nextLine, rawLine, itemContents;
var bull = cap[1].trim();
var isordered = bull.length > 1;
var list = {
Expand All @@ -551,83 +551,77 @@ var Tokenizer = /*#__PURE__*/function () {
} // Get next list item


var itemRegex = new RegExp("^( {0,3}" + bull + ")((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))"); // Get each top-level item
var itemRegex = new RegExp("^( {0,3}" + bull + ")((?: [^\\n]*)?(?:\\n|$))"); // Check if current bullet point can start a new List Item

while (src) {
if (this.rules.block.hr.test(src)) {
// End list if we encounter an HR (possibly move into itemRegex?)
if (!(cap = itemRegex.exec(src))) {
break;
}

if (!(cap = itemRegex.exec(src))) {
if (this.rules.block.hr.test(src)) {
// End list if bullet was actually HR (possibly move into itemRegex?)
break;
}

lines = cap[2].split('\n');
raw = cap[0];
src = src.substring(raw.length);
line = cap[2].split('\n', 1)[0];
nextLine = src.split('\n', 1)[0];

if (this.options.pedantic) {
indent = 2;
itemContents = lines[0].trimLeft();
itemContents = line.trimLeft();
} else {
indent = cap[2].search(/[^ ]/); // Find first non-space char

indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent

itemContents = lines[0].slice(indent - cap[1].length);
itemContents = line.slice(indent);
indent += cap[1].length;
}

blankLine = false;
raw = cap[0];

if (!lines[0] && /^ *$/.test(lines[1])) {
// items begin with at most one blank line
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
if (!line && /^ *$/.test(nextLine)) {
// Items begin with at most one blank line
raw += nextLine + '\n';
src = src.substring(nextLine.length + 1);
list.loose = true;
lines = [];
}

var nextBulletRegex = new RegExp("^ {0," + Math.min(3, indent - 1) + "}(?:[*+-]|\\d{1,9}[.)])");
var nextBulletRegex = new RegExp("^ {0," + Math.min(3, indent - 1) + "}(?:[*+-]|\\d{1,9}[.)])"); // Check if following lines should be included in List Item

for (i = 1; i < lines.length; i++) {
line = lines[i];
while (src && !list.loose) {
rawLine = src.split('\n', 1)[0];
line = rawLine; // Re-align to follow commonmark nesting rules

if (this.options.pedantic) {
// Re-align to follow commonmark nesting rules
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
} // End list item if found start of new bullet


if (nextBulletRegex.test(line)) {
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
} // Until we encounter a blank line, item contents do not need indentation


if (!blankLine) {
if (!line.trim()) {
// Check if current line is empty
blankLine = true;
} // Dedent if possible


if (line.search(/[^ ]/) >= indent) {
itemContents += '\n' + line.slice(indent);
} else {
itemContents += '\n' + line;
}

continue;
} // Dedent this line

}

if (line.search(/[^ ]/) >= indent || !line.trim()) {
// Dedent if possible
itemContents += '\n' + line.slice(indent);
continue;
} else if (!blankLine) {
// Until blank line, item doesn't need indentation
itemContents += '\n' + line;
} else {
// Line was not properly indented; end of this item
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
// Otherwise, improper indentation ends this item
break;
}

if (!blankLine && !line.trim()) {
// Check if current line is blank
blankLine = true;
}

raw += rawLine + '\n';
src = src.substring(rawLine.length + 1);
}

if (!list.loose) {
Expand Down Expand Up @@ -658,7 +652,6 @@ var Tokenizer = /*#__PURE__*/function () {
text: itemContents
});
list.raw += raw;
src = src.slice(raw.length);
} // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic


Expand All @@ -671,7 +664,7 @@ var Tokenizer = /*#__PURE__*/function () {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);

if (list.items[i].tokens.some(function (t) {
if (!list.loose && list.items[i].tokens.some(function (t) {
return t.type === 'space';
})) {
list.loose = true;
Expand Down
74 changes: 35 additions & 39 deletions lib/marked.esm.js
Expand Up @@ -452,7 +452,7 @@ class Tokenizer {
let cap = this.rules.block.list.exec(src);
if (cap) {
let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine,
line, lines, itemContents;
line, nextLine, rawLine, itemContents;

let bull = cap[1].trim();
const isordered = bull.length > 1;
Expand All @@ -473,76 +473,73 @@ class Tokenizer {
}

// Get next list item
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))`);
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`);

// Get each top-level item
// Check if current bullet point can start a new List Item
while (src) {
if (this.rules.block.hr.test(src)) { // End list if we encounter an HR (possibly move into itemRegex?)
if (!(cap = itemRegex.exec(src))) {
break;
}

if (!(cap = itemRegex.exec(src))) {
if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?)
break;
}

lines = cap[2].split('\n');
raw = cap[0];
src = src.substring(raw.length);

line = cap[2].split('\n', 1)[0];
nextLine = src.split('\n', 1)[0];

if (this.options.pedantic) {
indent = 2;
itemContents = lines[0].trimLeft();
itemContents = line.trimLeft();
} else {
indent = cap[2].search(/[^ ]/); // Find first non-space char
indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
itemContents = lines[0].slice(indent - cap[1].length);
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
itemContents = line.slice(indent);
indent += cap[1].length;
}

blankLine = false;
raw = cap[0];

if (!lines[0] && /^ *$/.test(lines[1])) { // items begin with at most one blank line
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line
raw += nextLine + '\n';
src = src.substring(nextLine.length + 1);
list.loose = true;
lines = [];
}

const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])`);

for (i = 1; i < lines.length; i++) {
line = lines[i];
// Check if following lines should be included in List Item
while (src && !list.loose) {
rawLine = src.split('\n', 1)[0];
line = rawLine;

if (this.options.pedantic) { // Re-align to follow commonmark nesting rules
// Re-align to follow commonmark nesting rules
if (this.options.pedantic) {
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
}

// End list item if found start of new bullet
if (nextBulletRegex.test(line)) {
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
}

// Until we encounter a blank line, item contents do not need indentation
if (!blankLine) {
if (!line.trim()) { // Check if current line is empty
blankLine = true;
}

// Dedent if possible
if (line.search(/[^ ]/) >= indent) {
itemContents += '\n' + line.slice(indent);
} else {
itemContents += '\n' + line;
}
continue;
}

// Dedent this line
if (line.search(/[^ ]/) >= indent || !line.trim()) {
if (line.search(/[^ ]/) >= indent || !line.trim()) { // Dedent if possible
itemContents += '\n' + line.slice(indent);
continue;
} else { // Line was not properly indented; end of this item
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
} else if (!blankLine) { // Until blank line, item doesn't need indentation
itemContents += '\n' + line;
} else { // Otherwise, improper indentation ends this item
break;
}

if (!blankLine && !line.trim()) { // Check if current line is blank
blankLine = true;
}

raw += rawLine + '\n';
src = src.substring(rawLine.length + 1);
}

if (!list.loose) {
Expand Down Expand Up @@ -573,7 +570,6 @@ class Tokenizer {
});

list.raw += raw;
src = src.slice(raw.length);
}

// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
Expand All @@ -587,7 +583,7 @@ class Tokenizer {
for (i = 0; i < l; i++) {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (list.items[i].tokens.some(t => t.type === 'space')) {
if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) {
list.loose = true;
list.items[i].loose = true;
}
Expand Down

1 comment on commit e0005d8

@vercel
Copy link

@vercel vercel bot commented on e0005d8 Dec 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.