From bb3374d0fa6925d6d828e093de783f51d4de3376 Mon Sep 17 00:00:00 2001 From: fans656 Date: Sat, 30 Mar 2024 03:15:51 +0800 Subject: [PATCH] fix list parsing edge case Fix an incorrect list parsing (text merged) in situation like: ``` foo - bar table ``` which output: ```

foo
table

``` instead of: ```

foo

table

``` The "table" is wrongly matched by `BLOCK_HTML` specification due to the use of `_BLOCK_TAGS_PATTERN` without parens. All text begins with what's in `BLOCK_TAGS` will have this issue. --- src/mistune/block_parser.py | 2 +- tests/test_misc.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mistune/block_parser.py b/src/mistune/block_parser.py index a70615e..de37246 100644 --- a/src/mistune/block_parser.py +++ b/src/mistune/block_parser.py @@ -27,7 +27,7 @@ _LINE_BLANK_END = re.compile(r'\n[ \t]*\n$') _BLANK_TO_LINE = re.compile(r'[ \t]*\n') -_BLOCK_TAGS_PATTERN = '|'.join(BLOCK_TAGS) + '|' + '|'.join(PRE_TAGS) +_BLOCK_TAGS_PATTERN = '(' + '|'.join(BLOCK_TAGS) + '|' + '|'.join(PRE_TAGS) + ')' _OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r'[ \t]*>[ \t]*(?:\n|$)') _CLOSE_TAG_END = re.compile(r'[ \t]*>[ \t]*(?:\n|$)') _STRICT_BLOCK_QUOTE = re.compile(r'( {0,3}>[^\n]*(?:\n|$))+') diff --git a/tests/test_misc.py b/tests/test_misc.py index 2d053fa..f57c53b 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -103,3 +103,9 @@ def test_emsp(self): result = md('\u2003\u2003foo\nbar\n\n\u2003\u2003foobar') expected = '

\u2003\u2003foo
\nbar

\n

\u2003\u2003foobar

' self.assertEqual(result.strip(), expected) + + def test_html_tag_text_following_list(self): + md = mistune.create_markdown(escape=False, hard_wrap=True) + result = md('foo\n- bar\n\ntable') + expected = '

foo

\n\n

table

' + self.assertEqual(result.strip(), expected)