Skip to content

Commit

Permalink
Fix for em/strong corner cases
Browse files Browse the repository at this point in the history
Fixes #1775
  • Loading branch information
facelessuser committed Nov 7, 2022
1 parent 459a2f3 commit e3ae421
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 23 deletions.
23 changes: 23 additions & 0 deletions pymdownx/betterem.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@
STAR_STRONG = r'(\*{2})(?!\s)%s(?<!\s)\1' % STAR_CONTENT2
# __strong__
UNDER_STRONG = r'(_{2})(?!\s)%s(?<!\s)\1' % UNDER_CONTENT2

STAR_EM2 = r'(?<!\*)(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?!\*)'
UNDER_EM2 = r'(?<!_)(_)(?![_\s])(.+?)(?<![_\s])(_)(?!_)'

STAR_STRONG2 = r'(?<!\*)(\*{2})(?![\*\s])(.+?)(?<![\*\s])(\*{2})(?!\*)'
UNDER_STRONG2 = r'(?<!_)(_{2})(?![_\s])(.+?)(?<![_\s])(_{2})(?!_)'
# *emphasis*
STAR_EM = r'(\*)(?!\s)%s(?<!\s)\1' % STAR_CONTENT
# _emphasis_
Expand All @@ -72,6 +78,12 @@
# SMART _em_
SMART_UNDER_EM = r'(?<!\w)(_)(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT

SMART_STAR_EM2 = r'(?<![\w\*])(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?![\*\w])'
SMART_UNDER_EM2 = r'(?<![\w_])(_)(?![_\s])(.+?)(?<![_\s])(_)(?![_\w])'

SMART_STAR_STRONG2 = r'(?<![\w\*])(\*{2})(?![\*\s])(.+?)(?<![\*\s])(\*{2})(?![\*\w])'
SMART_UNDER_STRONG2 = r'(?<![\w_])(_{2})(?![_\s])(.+?)(?<![_\s])(_{2})(?![_\w])'

# Smart rules for when "smart asterisk" is enabled
# SMART: ***strong,em***
SMART_STAR_STRONG_EM = r'(?:(?<=_)|(?<![\w\*]))(\*{3})(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
Expand Down Expand Up @@ -99,7 +111,9 @@ class AsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG2, re.DOTALL | re.UNICODE), 'single', 'strong', True),
util.PatSeqItem(re.compile(STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -111,7 +125,9 @@ class SmartAsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG2, re.DOTALL | re.UNICODE), 'single', 'strong', True),
util.PatSeqItem(re.compile(SMART_STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -124,7 +140,9 @@ class UnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG2, re.DOTALL | re.UNICODE), 'single', 'strong', True),
util.PatSeqItem(re.compile(UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -136,7 +154,9 @@ class SmartUnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG2, re.DOTALL | re.UNICODE), 'single', 'strong', True),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand Down Expand Up @@ -174,6 +194,9 @@ def make_better(self, md):
enable_all = enabled == "all"
enable_under = enabled == "underscore" or enable_all
enable_star = enabled == "asterisk" or enable_all
else:
enable_under = False
enable_star = False

# If we don't have to move an existing extension, use the same priority,
# but if we do have to, move it closely to the relative needed position.
Expand Down
39 changes: 22 additions & 17 deletions pymdownx/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,50 +159,55 @@ def parse_url(url):
return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)


class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags'])):
class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags', 'full_recursion'])):
"""Pattern sequence item item."""

def __new__(cls, pattern, builder, tags, full_recursion=False):
"""Create object."""

return super(PatSeqItem, cls).__new__(cls, pattern, builder, tags, full_recursion)


class PatternSequenceProcessor(InlineProcessor):
"""Processor for handling complex nested patterns such as strong and em matches."""

PATTERNS = []

def build_single(self, m, tag, idx):
def build_single(self, m, tag, full_recursion, idx):
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
return el1

def build_double(self, m, tags, idx):
def build_double(self, m, tags, full_recursion, idx):
"""Return double tag."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
el1.append(el2)
if len(m.groups()) == 3:
text = m.group(3)
self.parse_sub_patterns(text, el1, el2, idx)
self.parse_sub_patterns(text, el1, el2, full_recursion, idx)
return el1

def build_double2(self, m, tags, idx):
def build_double2(self, m, tags, full_recursion, idx):
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
text = m.group(3)
el1.append(el2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
return el1

def parse_sub_patterns(self, data, parent, last, idx):
def parse_sub_patterns(self, data, parent, last, full_recursion, idx):
"""
Parses sub patterns.
Expand Down Expand Up @@ -231,7 +236,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
# See if the we can match an emphasis/strong pattern
for index, item in enumerate(self.PATTERNS):
# Only evaluate patterns that are after what was used on the parent
if index <= idx:
if not full_recursion and index <= idx:
continue
m = item.pattern.match(data, pos)
if m:
Expand All @@ -245,7 +250,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
last.tail = text
else:
parent.text = text
el = self.build_element(m, item.builder, item.tags, index)
el = self.build_element(m, item.builder, item.tags, item.full_recursion, index)
parent.append(el)
last = el
# Move our position past the matched hunk
Expand All @@ -266,15 +271,15 @@ def parse_sub_patterns(self, data, parent, last, idx):
else:
parent.text = text

def build_element(self, m, builder, tags, index):
def build_element(self, m, builder, tags, full_recursion, index):
"""Element builder."""

if builder == 'double2':
return self.build_double2(m, tags, index)
return self.build_double2(m, tags, full_recursion, index)
elif builder == 'double':
return self.build_double(m, tags, index)
return self.build_double(m, tags, full_recursion, index)
else:
return self.build_single(m, tags, index)
return self.build_single(m, tags, full_recursion, index)

def handleMatch(self, m, data):
"""Parse patterns."""
Expand All @@ -288,7 +293,7 @@ def handleMatch(self, m, data):
if m1:
start = m1.start(0)
end = m1.end(0)
el = self.build_element(m1, item.builder, item.tags, index)
el = self.build_element(m1, item.builder, item.tags, item.full_recursion, index)
break
return el, start, end

Expand Down
6 changes: 3 additions & 3 deletions tests/extensions/betterem/betterem (normal).html
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
<p>Test: <em>All will * be italic</em></p>
<p>Test: <em>All will *be italic</em></p>
<p>Test: <em>All will not</em> be italic*</p>
<p>Test: <em>All will not *</em> be italic*</p>
<p>Test: <em>All will not ** be italic</em></p>
<p>Test: <strong>All will * be bold</strong></p>
<p>Test: <em>All will *be italic</em>*</p>
<p>Test: <strong>All will not</strong>* be bold**</p>
<p>Test: <strong>All will not *</strong> be bold**</p>
<p>Test: <strong>All will not<em>*</em> be bold</strong></p>
<p>Test: <strong>All will not <em>*</em> be bold</strong></p>
<p>Test: This is text <strong>bold <em>italic bold</em></strong> with more text</p>
<p>Test: <strong>test <em>test</em> <em>test</em> test</strong></p>
<p>Test: <strong><em>test</em> test *test</strong>*</p>
Expand Down
6 changes: 3 additions & 3 deletions tests/extensions/betterem/betterem (reverse).html
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
<p>Test: <em>All will _ be italic</em></p>
<p>Test: <em>All will _be italic</em></p>
<p>Test: <em>All will not</em> be italic_</p>
<p>Test: <em>All will not _</em> be italic_</p>
<p>Test: <em>All will not __ be italic</em></p>
<p>Test: <strong>All will _ be bold</strong></p>
<p>Test: <em>All will _be italic</em>_</p>
<p>Test: <strong>All will not</strong>_ be bold__</p>
<p>Test: <strong>All will not _</strong> be bold__</p>
<p>Test: <strong>All will not<em>_</em> be bold</strong></p>
<p>Test: <strong>All will not <em>_</em> be bold</strong></p>
<p>Test: This is text <strong>bold <em>italic bold</em></strong> with more text</p>
<p>Test: <strong>test <em>test</em> <em>test</em> test</strong></p>
<p>Test: <strong><em>test* test *test</em></strong></p>
Expand Down

0 comments on commit e3ae421

Please sign in to comment.