diff --git a/src/Lexer.js b/src/Lexer.js index c4bbf41a83..acb0fc2996 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -350,7 +350,8 @@ export class Lexer { // Mask out escaped em & strong delimiters while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) { - maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex); + maskedSrc = maskedSrc.slice(0, match.index + match[0].length - 2) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex); + this.tokenizer.rules.inline.escapedEmSt.lastIndex--; } while (src) { diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 76f26e67fb..b15e6936c6 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -629,22 +629,24 @@ export class Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); + const raw = src.slice(0, lLength + match.index + (match[0].length - rDelim.length) + rLength); + // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { - const text = src.slice(1, lLength + match.index + rLength); + const text = raw.slice(1, -1); return { type: 'em', - raw: src.slice(0, lLength + match.index + rLength + 1), + raw, text, tokens: this.lexer.inlineTokens(text) }; } // Create 'strong' if smallest delimiter has even char count. **a*** - const text = src.slice(2, lLength + match.index + rLength - 1); + const text = raw.slice(2, -2); return { type: 'strong', - raw: src.slice(0, lLength + match.index + rLength + 1), + raw, text, tokens: this.lexer.inlineTokens(text) }; diff --git a/src/rules.js b/src/rules.js index 25d1415301..11bfbf4ef7 100644 --- a/src/rules.js +++ b/src/rules.js @@ -168,9 +168,9 @@ export const inline = { emStrong: { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. - // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a - rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, - rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ + // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a + rDelimAst: /^(?:[^_*\\]|\\.)*?\_\_(?:[^_*\\]|\\.)*?\*(?:[^_*\\]|\\.)*?(?=\_\_)|(?:[^*\\]|\\.)+(?=[^*])|[punct_](\*+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|(?:[^punct*_\s\\]|\\.)(\*+)(?=[^punct*_\s])/, + rDelimUnd: /^(?:[^_*\\]|\\.)*?\*\*(?:[^_*\\]|\\.)*?\_(?:[^_*\\]|\\.)*?(?=\*\*)|(?:[^_\\]|\\.)+(?=[^_])|[punct*](\_+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, @@ -186,7 +186,9 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu // sequences em should skip over [title](link), `code`, inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; -inline.escapedEmSt = /\\\*|\\_/g; +// lookbehind is not available on Safari as of version 16 +// inline.escapedEmSt = /(?<=(?:^|[^\\)(?:\\[^])*)\\[*_]/g; +inline.escapedEmSt = /(?:^|[^\\])(?:\\\\)*\\[*_]/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); diff --git a/test/specs/new/escape_within_emphasis.html b/test/specs/new/escape_within_emphasis.html new file mode 100644 index 0000000000..c5885c607b --- /dev/null +++ b/test/specs/new/escape_within_emphasis.html @@ -0,0 +1,7 @@ +

strong text[]

+ +

strong text\[]

+ +

em[pha](sis)

+ +

\

diff --git a/test/specs/new/escape_within_emphasis.md b/test/specs/new/escape_within_emphasis.md new file mode 100644 index 0000000000..03a7295b85 --- /dev/null +++ b/test/specs/new/escape_within_emphasis.md @@ -0,0 +1,7 @@ +**strong text\[**\] + +**strong text\\\[**\] + +_em\[pha\]\(sis\)_ + +_\\_ diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 23913b6e4a..8174d53139 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -776,6 +776,41 @@ paragraph }); }); + it('escaped punctuation inside emphasis', () => { + expectInlineTokens({ + md: '**strong text\\[**\\]', + tokens: [ + { + type: 'strong', + raw: '**strong text\\[**', + text: 'strong text\\[', + tokens: [ + { type: 'text', raw: 'strong text', text: 'strong text' }, + { type: 'escape', raw: '\\[', text: '[' } + ] + }, + { type: 'escape', raw: '\\]', text: ']' } + ] + }); + expectInlineTokens({ + md: '_em\\sis_', + tokens: [ + { + type: 'em', + raw: '_em\\sis_', + text: 'em\\sis', + tokens: [ + { type: 'text', raw: 'em', text: 'em' }, + { type: 'escape', raw: '\\<', text: '<' }, + { type: 'text', raw: 'pha', text: 'pha' }, + { type: 'escape', raw: '\\>', text: '>' }, + { type: 'text', raw: 'sis', text: 'sis' } + ] + } + ] + }); + }); + it('html', () => { expectInlineTokens({ md: '
html
',