Skip to content

Commit

Permalink
Merge pull request #1686 from calculuschild/EmphasisFixes
Browse files Browse the repository at this point in the history
  • Loading branch information
UziTech committed Jul 13, 2020
2 parents a761316 + 6b729ed commit dddf9ae
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 129 deletions.
27 changes: 24 additions & 3 deletions src/Lexer.js
Expand Up @@ -319,9 +319,29 @@ module.exports = class Lexer {
/**
* Lexing/Compiling
*/
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
let token;

// String with links masked to avoid interference with em and strong
let maskedSrc = src;
let match;

// Mask out reflinks
if (this.tokens.links) {
const links = Object.keys(this.tokens.links);
if (links.length > 0) {
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
}
}
}
}
// Mask out other blocks
while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
}

while (src) {
// escape
if (token = this.tokenizer.escape(src)) {
Expand Down Expand Up @@ -360,15 +380,15 @@ module.exports = class Lexer {
}

// strong
if (token = this.tokenizer.strong(src)) {
if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
tokens.push(token);
continue;
}

// em
if (token = this.tokenizer.em(src)) {
if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
tokens.push(token);
Expand Down Expand Up @@ -414,6 +434,7 @@ module.exports = class Lexer {
// text
if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
src = src.substring(token.raw.length);
prevChar = token.raw.slice(-1);
tokens.push(token);
continue;
}
Expand Down
56 changes: 40 additions & 16 deletions src/Tokenizer.js
Expand Up @@ -490,25 +490,49 @@ module.exports = class Tokenizer {
}
}

strong(src) {
const cap = this.rules.inline.strong.exec(src);
if (cap) {
return {
type: 'strong',
raw: cap[0],
text: cap[4] || cap[3] || cap[2] || cap[1]
};
strong(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.strong.start.exec(src);

if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
const endReg = match[0] === '**' ? this.rules.inline.strong.endAst : this.rules.inline.strong.endUnd;

endReg.lastIndex = 0;

let cap;
while ((match = endReg.exec(maskedSrc)) != null) {
cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
if (cap) {
return {
type: 'strong',
raw: src.slice(0, cap[0].length),
text: src.slice(2, cap[0].length - 2)
};
}
}
}
}

em(src) {
const cap = this.rules.inline.em.exec(src);
if (cap) {
return {
type: 'em',
raw: cap[0],
text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
};
em(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.em.start.exec(src);

if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
const endReg = match[0] === '*' ? this.rules.inline.em.endAst : this.rules.inline.em.endUnd;

endReg.lastIndex = 0;

let cap;
while ((match = endReg.exec(maskedSrc)) != null) {
cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
if (cap) {
return {
type: 'em',
raw: src.slice(0, cap[0].length),
text: src.slice(1, cap[0].length - 1)
};
}
}
}
}

Expand Down
88 changes: 79 additions & 9 deletions src/rules.js
Expand Up @@ -168,19 +168,74 @@ const inline = {
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\s,punctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\s,punctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
reflinkSearch: 'reflink|nolink(?!\\()',
strong: {
start: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation
middle: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/,
endAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
endUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
},
em: {
start: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
middle: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
endAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
endUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
},
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
del: noopTest,
text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/
text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/,
punctuation: /^([\s*punctuation])/
};

// list of punctuation marks from common mark spec
// without ` and ] to workaround Rule 17 (inline code blocks/links)
// without , to work around example 393
inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
inline.em = edit(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
// without * and _ to workaround cases with double emphasis
inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();

// sequences em should skip over [title](link), `code`, <html>
inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*';

inline.em.start = edit(inline.em.start)
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.em.middle = edit(inline.em.middle)
.replace(/punctuation/g, inline._punctuation)
.replace(/overlapSkip/g, inline._overlapSkip)
.getRegex();

inline.em.endAst = edit(inline.em.endAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.em.endUnd = edit(inline.em.endUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.strong.start = edit(inline.strong.start)
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.strong.middle = edit(inline.strong.middle)
.replace(/punctuation/g, inline._punctuation)
.replace(/blockSkip/g, inline._blockSkip)
.getRegex();

inline.strong.endAst = edit(inline.strong.endAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.strong.endUnd = edit(inline.strong.endUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();

inline.blockSkip = edit(inline._blockSkip, 'g')
.getRegex();

inline.overlapSkip = edit(inline._overlapSkip, 'g')
.getRegex();

inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;

Expand Down Expand Up @@ -212,6 +267,11 @@ inline.reflink = edit(inline.reflink)
.replace('label', inline._label)
.getRegex();

inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
.replace('reflink', inline.reflink)
.replace('nolink', inline.nolink)
.getRegex();

/**
* Normal Inline Grammar
*/
Expand All @@ -223,8 +283,18 @@ inline.normal = merge({}, inline);
*/

inline.pedantic = merge({}, inline.normal, {
strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,
strong: {
start: /^__|\*\*/,
middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
endAst: /\*\*(?!\*)/g,
endUnd: /__(?!_)/g
},
em: {
start: /^_|\*/,
middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
endAst: /\*(?!\*)/g,
endUnd: /_(?!_)/g
},
link: edit(/^!?\[(label)\]\((.*?)\)/)
.replace('label', inline._label)
.getRegex(),
Expand Down

1 comment on commit dddf9ae

@vercel
Copy link

@vercel vercel bot commented on dddf9ae Jul 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.