Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for em/strong corner cases #1853

Merged
merged 1 commit into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/markdown/about/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 9.7.1

- **FIX**: BetterEm: Fix case where `**` nested between `*` would be handled in an unexpected way.

## 9.7

- **NEW**: Tabbed: Add new syntax to allow forcing a specific tab to be select by default.
Expand Down
19 changes: 14 additions & 5 deletions docs/src/markdown/extensions/betterem.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ BetterEm allows for a more natural nested token feel.
***I'm bold and italic!** I am just italic.*
```

BetterEm will try to prioritize the more sane option when nesting bold (`**`) between italic (`*`).

!!! example "Prioritize Best Example"

=== "Output"
*I'm italic. **I'm bold and italic.** I'm also just italic.*

=== "Markdown"

```
*I'm italic. **I'm bold and italic.** I'm also just italic.*
```

BetterEm will ensure smart mode doesn't terminate in scenarios where there are a large amount of consecutive tokens
inside.

Expand All @@ -68,12 +81,8 @@ inside.
=== "Output"
___A lot of underscores____________is okay___

___A lot of underscores____________is okay___

=== "Markdown"
```
___A lot of underscores____________is okay___

___A lot of underscores____________is okay___
```

Expand Down Expand Up @@ -113,7 +122,7 @@ BetterEm will allow non-smart emphasis to contain "floating" like tokens.

*All will not* be italic*

*All will not ** be italic*
*All will ** be italic*

**All will * be bold**

Expand Down
2 changes: 1 addition & 1 deletion pymdownx/__meta__.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,5 @@ def parse_version(ver, pre=False):
return Version(major, minor, micro, release, pre, post, dev)


__version_info__ = Version(9, 7, 0, "final")
__version_info__ = Version(9, 7, 1, "final")
__version__ = __version_info__._get_canonical()
21 changes: 17 additions & 4 deletions pymdownx/betterem.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@
STAR_STRONG = r'(\*{2})(?!\s)%s(?<!\s)\1' % STAR_CONTENT2
# __strong__
UNDER_STRONG = r'(_{2})(?!\s)%s(?<!\s)\1' % UNDER_CONTENT2

# Prioritize *value* when **value** is nested within
STAR_EM2 = r'(?<!\*)(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?!\*)'
# Prioritize _value_ when __value__ is nested within
UNDER_EM2 = r'(?<!_)(_)(?![_\s])(.+?)(?<![_\s])(_)(?!_)'

# *emphasis*
STAR_EM = r'(\*)(?!\s)%s(?<!\s)\1' % STAR_CONTENT
# _emphasis_
Expand All @@ -71,6 +77,8 @@
SMART_UNDER_STRONG = r'(?<!\w)(_{2})(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
# SMART _em_
SMART_UNDER_EM = r'(?<!\w)(_)(?![\s_])%s(?<!\s)\1(?!\w)' % SMART_UNDER_CONTENT
# Prioritize _value_ when __value__ is nested within
SMART_UNDER_EM2 = r'(?<![\w_])(_)(?![_\s])(.+?)(?<![_\s])(_)(?![_\w])'

# Smart rules for when "smart asterisk" is enabled
# SMART: ***strong,em***
Expand All @@ -89,6 +97,8 @@
SMART_STAR_STRONG = r'(?:(?<=_)|(?<![\w\*]))(\*{2})(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
# SMART *em*
SMART_STAR_EM = r'(?:(?<=_)|(?<![\w\*]))(\*)(?![\s\*])%s(?<!\s)\1(?:(?=_)|(?![\w\*]))' % SMART_STAR_CONTENT
# Prioritize *value* when **value** is nested within
SMART_STAR_EM2 = r'(?<![\w\*])(\*)(?![\*\s])(.+?)(?<![\*\s])(\*)(?![\*\w])'


class AsteriskProcessor(util.PatternSequenceProcessor):
Expand All @@ -100,6 +110,7 @@ class AsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -112,6 +123,7 @@ class SmartAsteriskProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_STAR_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_STAR_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_STAR_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_STAR_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -125,6 +137,7 @@ class UnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG_EM3, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
util.PatSeqItem(re.compile(UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand All @@ -137,6 +150,7 @@ class SmartUnderscoreProcessor(util.PatternSequenceProcessor):
util.PatSeqItem(re.compile(SMART_UNDER_EM_STRONG, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG_EM2, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
util.PatSeqItem(re.compile(SMART_UNDER_STRONG, re.DOTALL | re.UNICODE), 'single', 'strong'),
util.PatSeqItem(re.compile(SMART_UNDER_EM2, re.DOTALL | re.UNICODE), 'single', 'em', True),
util.PatSeqItem(re.compile(SMART_UNDER_EM, re.DOTALL | re.UNICODE), 'single', 'em')
]

Expand Down Expand Up @@ -170,10 +184,9 @@ def make_better(self, md):

config = self.getConfigs()
enabled = config["smart_enable"]
if enabled:
enable_all = enabled == "all"
enable_under = enabled == "underscore" or enable_all
enable_star = enabled == "asterisk" or enable_all
enable_all = enabled == "all"
enable_under = enabled == "underscore" or enable_all
enable_star = enabled == "asterisk" or enable_all

# If we don't have to move an existing extension, use the same priority,
# but if we do have to, move it closely to the relative needed position.
Expand Down
39 changes: 22 additions & 17 deletions pymdownx/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,50 +159,55 @@ def parse_url(url):
return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)


class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags'])):
class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags', 'full_recursion'])):
"""Pattern sequence item item."""

def __new__(cls, pattern, builder, tags, full_recursion=False):
"""Create object."""

return super(PatSeqItem, cls).__new__(cls, pattern, builder, tags, full_recursion)


class PatternSequenceProcessor(InlineProcessor):
"""Processor for handling complex nested patterns such as strong and em matches."""

PATTERNS = []

def build_single(self, m, tag, idx):
def build_single(self, m, tag, full_recursion, idx):
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
return el1

def build_double(self, m, tags, idx):
def build_double(self, m, tags, full_recursion, idx):
"""Return double tag."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
el1.append(el2)
if len(m.groups()) == 3:
text = m.group(3)
self.parse_sub_patterns(text, el1, el2, idx)
self.parse_sub_patterns(text, el1, el2, full_recursion, idx)
return el1

def build_double2(self, m, tags, idx):
def build_double2(self, m, tags, full_recursion, idx):
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""

tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
text = m.group(3)
el1.append(el2)
self.parse_sub_patterns(text, el2, None, idx)
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
return el1

def parse_sub_patterns(self, data, parent, last, idx):
def parse_sub_patterns(self, data, parent, last, full_recursion, idx):
"""
Parses sub patterns.

Expand Down Expand Up @@ -231,7 +236,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
# See if the we can match an emphasis/strong pattern
for index, item in enumerate(self.PATTERNS):
# Only evaluate patterns that are after what was used on the parent
if index <= idx:
if not full_recursion and index <= idx:
continue
m = item.pattern.match(data, pos)
if m:
Expand All @@ -245,7 +250,7 @@ def parse_sub_patterns(self, data, parent, last, idx):
last.tail = text
else:
parent.text = text
el = self.build_element(m, item.builder, item.tags, index)
el = self.build_element(m, item.builder, item.tags, item.full_recursion, index)
parent.append(el)
last = el
# Move our position past the matched hunk
Expand All @@ -266,15 +271,15 @@ def parse_sub_patterns(self, data, parent, last, idx):
else:
parent.text = text

def build_element(self, m, builder, tags, index):
def build_element(self, m, builder, tags, full_recursion, index):
"""Element builder."""

if builder == 'double2':
return self.build_double2(m, tags, index)
return self.build_double2(m, tags, full_recursion, index)
elif builder == 'double':
return self.build_double(m, tags, index)
return self.build_double(m, tags, full_recursion, index)
else:
return self.build_single(m, tags, index)
return self.build_single(m, tags, full_recursion, index)

def handleMatch(self, m, data):
"""Parse patterns."""
Expand All @@ -288,7 +293,7 @@ def handleMatch(self, m, data):
if m1:
start = m1.start(0)
end = m1.end(0)
el = self.build_element(m1, item.builder, item.tags, index)
el = self.build_element(m1, item.builder, item.tags, item.full_recursion, index)
break
return el, start, end

Expand Down
2 changes: 1 addition & 1 deletion tests/extensions/betterem/betterem (normal).html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<p>Test: <em>All will * be italic</em></p>
<p>Test: <em>All will *be italic</em></p>
<p>Test: <em>All will not</em> be italic*</p>
<p>Test: <em>All will not *</em> be italic*</p>
<p>Test: <em>All will not ** be italic</em></p>
<p>Test: <strong>All will * be bold</strong></p>
<p>Test: <em>All will *be italic</em>*</p>
<p>Test: <strong>All will not</strong>* be bold**</p>
Expand Down
2 changes: 1 addition & 1 deletion tests/extensions/betterem/betterem (reverse).html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<p>Test: <em>All will _ be italic</em></p>
<p>Test: <em>All will _be italic</em></p>
<p>Test: <em>All will not</em> be italic_</p>
<p>Test: <em>All will not _</em> be italic_</p>
<p>Test: <em>All will not __ be italic</em></p>
<p>Test: <strong>All will _ be bold</strong></p>
<p>Test: <em>All will _be italic</em>_</p>
<p>Test: <strong>All will not</strong>_ be bold__</p>
Expand Down