Skip to content

Commit

Permalink
fix: Support escapes within emphasis (#2627)
Browse files Browse the repository at this point in the history
* fix: Support escapes within emphasis

...particularly right at the end.

Fixes #2280

* chore: realign comments with regexp alternatives

* test: Add HTML test for escapes within emphasis

* fix: Correct recognition and masking of escaped emphasis punctuation

* fix: Correct backslash fake-lookbehind
  • Loading branch information
gibson042 committed Nov 2, 2022
1 parent 54410cd commit 377823a
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 9 deletions.
3 changes: 2 additions & 1 deletion src/Lexer.js
Expand Up @@ -350,7 +350,8 @@ export class Lexer {

// Mask out escaped em & strong delimiters
while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) {
maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
maskedSrc = maskedSrc.slice(0, match.index + match[0].length - 2) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex);
this.tokenizer.rules.inline.escapedEmSt.lastIndex--;
}

while (src) {
Expand Down
10 changes: 6 additions & 4 deletions src/Tokenizer.js
Expand Up @@ -629,22 +629,24 @@ export class Tokenizer {
// Remove extra characters. *a*** -> *a*
rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);

const raw = src.slice(0, lLength + match.index + (match[0].length - rDelim.length) + rLength);

// Create `em` if smallest delimiter has odd char count. *a***
if (Math.min(lLength, rLength) % 2) {
const text = src.slice(1, lLength + match.index + rLength);
const text = raw.slice(1, -1);
return {
type: 'em',
raw: src.slice(0, lLength + match.index + rLength + 1),
raw,
text,
tokens: this.lexer.inlineTokens(text)
};
}

// Create 'strong' if smallest delimiter has even char count. **a***
const text = src.slice(2, lLength + match.index + rLength - 1);
const text = raw.slice(2, -2);
return {
type: 'strong',
raw: src.slice(0, lLength + match.index + rLength + 1),
raw,
text,
tokens: this.lexer.inlineTokens(text)
};
Expand Down
10 changes: 6 additions & 4 deletions src/rules.js
Expand Up @@ -168,9 +168,9 @@ export const inline = {
emStrong: {
lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
// (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
// () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
// () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
rDelimAst: /^(?:[^_*\\]|\\.)*?\_\_(?:[^_*\\]|\\.)*?\*(?:[^_*\\]|\\.)*?(?=\_\_)|(?:[^*\\]|\\.)+(?=[^*])|[punct_](\*+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|(?:[^punct*_\s\\]|\\.)(\*+)(?=[^punct*_\s])/,
rDelimUnd: /^(?:[^_*\\]|\\.)*?\*\*(?:[^_*\\]|\\.)*?\_(?:[^_*\\]|\\.)*?(?=\*\*)|(?:[^_\\]|\\.)+(?=[^_])|[punct*](\_+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
},
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
Expand All @@ -186,7 +186,9 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu

// sequences em should skip over [title](link), `code`, <html>
inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
inline.escapedEmSt = /\\\*|\\_/g;
// lookbehind is not available on Safari as of version 16
// inline.escapedEmSt = /(?<=(?:^|[^\\)(?:\\[^])*)\\[*_]/g;
inline.escapedEmSt = /(?:^|[^\\])(?:\\\\)*\\[*_]/g;

inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();

Expand Down
7 changes: 7 additions & 0 deletions test/specs/new/escape_within_emphasis.html
@@ -0,0 +1,7 @@
<p><strong>strong text[</strong>]</p>

<p><strong>strong text\[</strong>]</p>

<p><em>em[pha](sis)</em></p>

<p><em>\</em></p>
7 changes: 7 additions & 0 deletions test/specs/new/escape_within_emphasis.md
@@ -0,0 +1,7 @@
**strong text\[**\]

**strong text\\\[**\]

_em\[pha\]\(sis\)_

_\\_
35 changes: 35 additions & 0 deletions test/unit/Lexer-spec.js
Expand Up @@ -776,6 +776,41 @@ paragraph
});
});

it('escaped punctuation inside emphasis', () => {
expectInlineTokens({
md: '**strong text\\[**\\]',
tokens: [
{
type: 'strong',
raw: '**strong text\\[**',
text: 'strong text\\[',
tokens: [
{ type: 'text', raw: 'strong text', text: 'strong text' },
{ type: 'escape', raw: '\\[', text: '[' }
]
},
{ type: 'escape', raw: '\\]', text: ']' }
]
});
expectInlineTokens({
md: '_em\\<pha\\>sis_',
tokens: [
{
type: 'em',
raw: '_em\\<pha\\>sis_',
text: 'em\\<pha\\>sis',
tokens: [
{ type: 'text', raw: 'em', text: 'em' },
{ type: 'escape', raw: '\\<', text: '&lt;' },
{ type: 'text', raw: 'pha', text: 'pha' },
{ type: 'escape', raw: '\\>', text: '&gt;' },
{ type: 'text', raw: 'sis', text: 'sis' }
]
}
]
});
});

it('html', () => {
expectInlineTokens({
md: '<div>html</div>',
Expand Down

1 comment on commit 377823a

@vercel
Copy link

@vercel vercel bot commented on 377823a Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.