diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f0d01f..dc81fb6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,7 +33,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest-cov + pip install pytest pytest-cov mypy + + - name: Check types with Python ${{ matrix.python }} on ${{ matrix.os }} + run: | + mypy --strict src + mypy src tests - name: Test with Python ${{ matrix.python }} on ${{ matrix.os }} run: pytest diff --git a/src/mistune/__init__.py b/src/mistune/__init__.py index 5b9237c..59049d9 100644 --- a/src/mistune/__init__.py +++ b/src/mistune/__init__.py @@ -8,16 +8,26 @@ Documentation: https://mistune.lepture.com/ """ -from .markdown import Markdown -from .core import BlockState, InlineState, BaseRenderer +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union + +from typing_extensions import Literal + from .block_parser import BlockParser +from .core import BaseRenderer, BlockState, InlineState from .inline_parser import InlineParser +from .markdown import Markdown +from .plugins import Plugin, PluginRef, import_plugin from .renderers.html import HTMLRenderer from .util import escape, escape_url, safe_entity, unikey -from .plugins import import_plugin +RendererRef = Union[Literal["html", "ast"], BaseRenderer] -def create_markdown(escape: bool=True, hard_wrap: bool=False, renderer='html', plugins=None) -> Markdown: +def create_markdown( + escape: bool = True, + hard_wrap: bool = False, + renderer: Optional[RendererRef] = "html", + plugins: Optional[Iterable[PluginRef]] = None, +) -> Markdown: """Create a Markdown instance based on the given condition. :param escape: Boolean. If using html renderer, escape html. @@ -41,9 +51,10 @@ def create_markdown(escape: bool=True, hard_wrap: bool=False, renderer='html', p renderer = HTMLRenderer(escape=escape) inline = InlineParser(hard_wrap=hard_wrap) + real_plugins: Optional[Iterable[Plugin]] = None if plugins is not None: - plugins = [import_plugin(n) for n in plugins] - return Markdown(renderer=renderer, inline=inline, plugins=plugins) + real_plugins = [import_plugin(n) for n in plugins] + return Markdown(renderer=renderer, inline=inline, plugins=real_plugins) html: Markdown = create_markdown( @@ -52,11 +63,18 @@ def create_markdown(escape: bool=True, hard_wrap: bool=False, renderer='html', p ) -__cached_parsers = {} +__cached_parsers: Dict[ + Tuple[bool, Optional[RendererRef], Optional[Iterable[Any]]], Markdown +] = {} -def markdown(text, escape=True, renderer='html', plugins=None) -> str: - if renderer == 'ast': +def markdown( + text: str, + escape: bool = True, + renderer: Optional[RendererRef] = "html", + plugins: Optional[Iterable[Any]] = None, +) -> Union[str, List[Dict[str, Any]]]: + if renderer == "ast": # explicit and more similar to 2.x's API renderer = None key = (escape, renderer, plugins) @@ -77,5 +95,5 @@ def markdown(text, escape=True, renderer='html', plugins=None) -> str: 'html', 'create_markdown', 'markdown', ] -__version__ = '3.0.1' -__homepage__ = 'https://mistune.lepture.com/' +__version__: str = "3.0.1" +__homepage__: str = "https://mistune.lepture.com/" diff --git a/src/mistune/__main__.py b/src/mistune/__main__.py index 053a379..20aa7cd 100644 --- a/src/mistune/__main__.py +++ b/src/mistune/__main__.py @@ -1,23 +1,27 @@ -import sys import argparse -from .renderers.rst import RSTRenderer +import sys +from typing import TYPE_CHECKING, Optional + +from . import __version__ as version +from . import create_markdown from .renderers.markdown import MarkdownRenderer -from . import ( - create_markdown, - __version__ as version -) +from .renderers.rst import RSTRenderer + +if TYPE_CHECKING: + from .core import BaseRenderer + from .markdown import Markdown -def _md(args): +def _md(args: argparse.Namespace) -> "Markdown": if args.plugin: plugins = args.plugin else: # default plugins plugins = ['strikethrough', 'footnotes', 'table', 'speedup'] - if args.renderer == 'rst': - renderer = RSTRenderer() - elif args.renderer == 'markdown': + if args.renderer == "rst": + renderer: "BaseRenderer" = RSTRenderer() + elif args.renderer == "markdown": renderer = MarkdownRenderer() else: renderer = args.renderer @@ -29,7 +33,7 @@ def _md(args): ) -def _output(text, args): +def _output(text: str, args: argparse.Namespace) -> None: if args.output: with open(args.output, 'w') as f: f.write(text) @@ -52,7 +56,7 @@ def _output(text, args): ''' -def cli(): +def cli() -> None: parser = argparse.ArgumentParser( prog='python -m mistune', description=CMD_HELP, @@ -102,17 +106,19 @@ def cli(): if message: md = _md(args) text = md(message) + assert isinstance(text, str) _output(text, args) elif args.file: md = _md(args) text = md.read(args.file)[0] + assert isinstance(text, str) _output(text, args) else: print('You MUST specify a message or file') - return sys.exit(1) + sys.exit(1) -def read_stdin(): +def read_stdin() -> Optional[str]: is_stdin_pipe = not sys.stdin.isatty() if is_stdin_pipe: return sys.stdin.read() diff --git a/src/mistune/block_parser.py b/src/mistune/block_parser.py index 1442c3f..a70615e 100644 --- a/src/mistune/block_parser.py +++ b/src/mistune/block_parser.py @@ -1,5 +1,5 @@ import re -from typing import Optional, List, Tuple, Match +from typing import Optional, List, Tuple, Match, Type, Pattern from .util import ( unikey, escape_url, @@ -33,7 +33,9 @@ _STRICT_BLOCK_QUOTE = re.compile(r'( {0,3}>[^\n]*(?:\n|$))+') -class BlockParser(Parser): +class BlockParser(Parser[BlockState]): + state_cls = BlockState + BLANK_LINE = re.compile(r'(^[ \t\v\f]*\n)+', re.M) RAW_HTML = ( @@ -109,18 +111,18 @@ def __init__( name: getattr(self, 'parse_' + name) for name in self.SPECIFICATION } - def parse_blank_line(self, m: Match, state: BlockState) -> int: + def parse_blank_line(self, m: Match[str], state: BlockState) -> int: """Parse token for blank lines.""" state.append_token({'type': 'blank_line'}) return m.end() - def parse_thematic_break(self, m: Match, state: BlockState) -> int: + def parse_thematic_break(self, m: Match[str], state: BlockState) -> int: """Parse token for thematic break, e.g. ``
`` tag in HTML.""" state.append_token({'type': 'thematic_break'}) # $ does not count '\n' return m.end() + 1 - def parse_indent_code(self, m: Match, state: BlockState) -> int: + def parse_indent_code(self, m: Match[str], state: BlockState) -> int: """Parse token for code block which is indented by 4 spaces.""" # it is a part of the paragraph end_pos = state.append_paragraph() @@ -134,7 +136,7 @@ def parse_indent_code(self, m: Match, state: BlockState) -> int: state.append_token({'type': 'block_code', 'raw': code, 'style': 'indent'}) return m.end() - def parse_fenced_code(self, m: Match, state: BlockState) -> Optional[int]: + def parse_fenced_code(self, m: Match[str], state: BlockState) -> Optional[int]: """Parse token for fenced code block. A fenced code block is started with 3 or more backtick(`) or tilde(~). @@ -156,7 +158,7 @@ def markdown(text): # CommonMark Example 145 # Info strings for backtick code blocks cannot contain backticks if info.find(c) != -1: - return + return None _end = re.compile( r'^ {0,3}' + c + '{' + str(len(marker)) + r',}[ \t]*(?:\n|$)', re.M) @@ -182,7 +184,7 @@ def markdown(text): state.append_token(token) return end_pos - def parse_atx_heading(self, m: Match, state: BlockState) -> int: + def parse_atx_heading(self, m: Match[str], state: BlockState) -> int: """Parse token for ATX heading. An ATX heading is started with 1 to 6 symbol of ``#``.""" level = len(m.group('atx_1')) @@ -195,7 +197,7 @@ def parse_atx_heading(self, m: Match, state: BlockState) -> int: state.append_token(token) return m.end() + 1 - def parse_setex_heading(self, m: Match, state: BlockState) -> Optional[int]: + def parse_setex_heading(self, m: Match[str], state: BlockState) -> Optional[int]: """Parse token for setex style heading. A setex heading syntax looks like: .. code-block:: markdown @@ -212,11 +214,12 @@ def parse_setex_heading(self, m: Match, state: BlockState) -> Optional[int]: return m.end() + 1 sc = self.compile_sc(['thematic_break', 'list']) - m = sc.match(state.src, state.cursor) - if m: - return self.parse_method(m, state) + m2 = sc.match(state.src, state.cursor) + if m2: + return self.parse_method(m2, state) + return None - def parse_ref_link(self, m: Match, state: BlockState) -> Optional[int]: + def parse_ref_link(self, m: Match[str], state: BlockState) -> Optional[int]: """Parse link references and save the link information into ``state.env``. Here is an example of a link reference: @@ -241,11 +244,13 @@ def parse_ref_link(self, m: Match, state: BlockState) -> Optional[int]: label = m.group('reflink_1') key = unikey(label) if not key: - return + return None href, href_pos = parse_link_href(state.src, m.end(), block=True) if href is None: - return + return None + + assert href_pos is not None _blank = self.BLANK_LINE.search(state.src, href_pos) if _blank: @@ -255,26 +260,27 @@ def parse_ref_link(self, m: Match, state: BlockState) -> Optional[int]: title, title_pos = parse_link_title(state.src, href_pos, max_pos) if title_pos: - m = _BLANK_TO_LINE.match(state.src, title_pos) - if m: - title_pos = m.end() + m2 = _BLANK_TO_LINE.match(state.src, title_pos) + if m2: + title_pos = m2.end() else: title_pos = None title = None if title_pos is None: - m = _BLANK_TO_LINE.match(state.src, href_pos) - if m: - href_pos = m.end() + m3 = _BLANK_TO_LINE.match(state.src, href_pos) + if m3: + href_pos = m3.end() else: href_pos = None href = None end_pos = title_pos or href_pos if not end_pos: - return + return None if key not in state.env['ref_links']: + assert href is not None href = unescape_char(href) data = {'url': escape_url(href), 'label': label} if title: @@ -282,7 +288,9 @@ def parse_ref_link(self, m: Match, state: BlockState) -> Optional[int]: state.env['ref_links'][key] = data return end_pos - def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]: + def extract_block_quote( + self, m: Match[str], state: BlockState + ) -> Tuple[str, Optional[int]]: """Extract text and cursor end position of a block quote.""" # cleanup at first to detect if it is code block @@ -295,16 +303,16 @@ def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]: state.cursor = m.end() + 1 - end_pos = None + end_pos: Optional[int] = None if require_marker: - m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor) - if m: - quote = m.group(0) + m2 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor) + if m2: + quote = m2.group(0) quote = _BLOCK_QUOTE_LEADING.sub('', quote) quote = expand_leading_tab(quote, 3) quote = _BLOCK_QUOTE_TRIM.sub('', quote) text += quote - state.cursor = m.end() + state.cursor = m2.end() else: prev_blank_line = False break_sc = self.compile_sc([ @@ -312,14 +320,14 @@ def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]: 'list', 'block_html', ]) while state.cursor < state.cursor_max: - m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor) - if m: - quote = m.group(0) + m3 = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor) + if m3: + quote = m3.group(0) quote = _BLOCK_QUOTE_LEADING.sub('', quote) quote = expand_leading_tab(quote, 3) quote = _BLOCK_QUOTE_TRIM.sub('', quote) text += quote - state.cursor = m.end() + state.cursor = m3.end() if not quote.strip(): prev_blank_line = True else: @@ -332,9 +340,9 @@ def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]: # a block quote and a following paragraph break - m = break_sc.match(state.src, state.cursor) - if m: - end_pos = self.parse_method(m, state) + m4 = break_sc.match(state.src, state.cursor) + if m4: + end_pos = self.parse_method(m4, state) if end_pos: break @@ -349,7 +357,7 @@ def extract_block_quote(self, m: Match, state: BlockState) -> Tuple[str, int]: # treated as 4 spaces return expand_tab(text), end_pos - def parse_block_quote(self, m: Match, state: BlockState) -> int: + def parse_block_quote(self, m: Match[str], state: BlockState) -> int: """Parse token for block quote. Here is an example of the syntax: .. code-block:: markdown @@ -374,14 +382,14 @@ def parse_block_quote(self, m: Match, state: BlockState) -> int: state.append_token(token) return state.cursor - def parse_list(self, m: Match, state: BlockState) -> int: + def parse_list(self, m: Match[str], state: BlockState) -> int: """Parse tokens for ordered and unordered list.""" return parse_list(self, m, state) - def parse_block_html(self, m: Match, state: BlockState) -> Optional[int]: + def parse_block_html(self, m: Match[str], state: BlockState) -> Optional[int]: return self.parse_raw_html(m, state) - def parse_raw_html(self, m: Match, state: BlockState) -> Optional[int]: + def parse_raw_html(self, m: Match[str], state: BlockState) -> Optional[int]: marker = m.group(0).strip() # rule 2 @@ -429,6 +437,8 @@ def parse_raw_html(self, m: Match, state: BlockState) -> Optional[int]: (close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)): return _parse_html_to_newline(state, self.BLANK_LINE) + return None + def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None: sc = self.compile_sc(rules) @@ -443,14 +453,14 @@ def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None: state.add_paragraph(text) state.cursor = end_pos - end_pos = self.parse_method(m, state) - if end_pos: - state.cursor = end_pos + end_pos2 = self.parse_method(m, state) + if end_pos2: + state.cursor = end_pos2 else: - end_pos = state.find_line_end() - text = state.get_text(end_pos) + end_pos3 = state.find_line_end() + text = state.get_text(end_pos3) state.add_paragraph(text) - state.cursor = end_pos + state.cursor = end_pos3 if state.cursor < state.cursor_max: text = state.src[state.cursor:] @@ -458,7 +468,7 @@ def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None: state.cursor = state.cursor_max -def _parse_html_to_end(state, end_marker, start_pos): +def _parse_html_to_end(state: BlockState, end_marker: str, start_pos: int) -> int: marker_pos = state.src.find(end_marker, start_pos) if marker_pos == -1: text = state.src[state.cursor:] @@ -473,7 +483,7 @@ def _parse_html_to_end(state, end_marker, start_pos): return end_pos -def _parse_html_to_newline(state, newline): +def _parse_html_to_newline(state: BlockState, newline: Pattern[str]) -> int: m = newline.search(state.src, state.cursor) if m: end_pos = m.start() diff --git a/src/mistune/core.py b/src/mistune/core.py index 2bbe92b..75715fa 100644 --- a/src/mistune/core.py +++ b/src/mistune/core.py @@ -1,12 +1,39 @@ import re -from typing import Dict, Any +from collections.abc import Generator +from typing import ( + Any, + Callable, + ClassVar, + Dict, + Generic, + Iterable, + List, + Match, + MutableMapping, + Optional, + Pattern, + Set, + Type, + TypeVar, + Union, + cast, +) +from typing_extensions import Self _LINE_END = re.compile(r'\n|$') - class BlockState: """The state to save block parser's cursor and tokens.""" - def __init__(self, parent=None): + + src: str + tokens: List[Dict[str, Any]] + cursor: int + cursor_max: int + list_tight: bool + parent: Any + env: MutableMapping[str, Any] + + def __init__(self, parent: Optional[Any] = None) -> None: self.src = '' self.tokens = [] @@ -24,49 +51,51 @@ def __init__(self, parent=None): else: self.env = {'ref_links': {}} - def child_state(self, src: str): + def child_state(self, src: str) -> 'BlockState': child = self.__class__(self) child.process(src) return child - def process(self, src: str): + def process(self, src: str) -> None: self.src = src self.cursor_max = len(src) - def find_line_end(self): + def find_line_end(self) -> int: m = _LINE_END.search(self.src, self.cursor) + assert m is not None return m.end() - def get_text(self, end_pos: int): + def get_text(self, end_pos: int) -> str: return self.src[self.cursor:end_pos] - def last_token(self): + def last_token(self) -> Any: if self.tokens: return self.tokens[-1] - def prepend_token(self, token: Dict[str, Any]): + def prepend_token(self, token: Dict[str, Any]) -> None: """Insert token before the last token.""" self.tokens.insert(len(self.tokens) - 1, token) - def append_token(self, token: Dict[str, Any]): + def append_token(self, token: Dict[str, Any]) -> None: """Add token to the end of token list.""" self.tokens.append(token) - def add_paragraph(self, text: str): + def add_paragraph(self, text: str) -> None: last_token = self.last_token() if last_token and last_token['type'] == 'paragraph': last_token['text'] += text else: self.tokens.append({'type': 'paragraph', 'text': text}) - def append_paragraph(self): + def append_paragraph(self) -> Optional[int]: last_token = self.last_token() if last_token and last_token['type'] == 'paragraph': pos = self.find_line_end() last_token['text'] += self.get_text(pos) return pos + return None - def depth(self): + def depth(self) -> int: d = 0 parent = self.parent while parent: @@ -77,24 +106,25 @@ def depth(self): class InlineState: """The state to save inline parser's tokens.""" - def __init__(self, env: Dict[str, Any]): + + def __init__(self, env: MutableMapping[str, Any]): self.env = env self.src = '' - self.tokens = [] + self.tokens: List[Dict[str, Any]] = [] self.in_image = False self.in_link = False self.in_emphasis = False self.in_strong = False - def prepend_token(self, token: Dict[str, Any]): + def prepend_token(self, token: Dict[str, Any]) -> None: """Insert token before the last token.""" self.tokens.insert(len(self.tokens) - 1, token) - def append_token(self, token: Dict[str, Any]): + def append_token(self, token: Dict[str, Any]) -> None: """Add token to the end of token list.""" self.tokens.append(token) - def copy(self): + def copy(self) -> "InlineState": """Create a copy of current state.""" state = self.__class__(self.env) state.in_image = self.in_image @@ -104,21 +134,26 @@ def copy(self): return state -class Parser: - sc_flag = re.M - state_cls = BlockState +ST = TypeVar("ST", InlineState, BlockState) + +class Parser(Generic[ST]): + sc_flag: "re._FlagsType" = re.M + state_cls: Type[ST] - SPECIFICATION = {} - DEFAULT_RULES = [] + SPECIFICATION: ClassVar[Dict[str, str]] = {} + DEFAULT_RULES: ClassVar[Iterable[str]] = [] - def __init__(self): + def __init__(self) -> None: self.specification = self.SPECIFICATION.copy() self.rules = list(self.DEFAULT_RULES) - self._methods = {} + self._methods: Dict[ + str, + Callable[[Match[str], ST], Optional[int]], + ] = {} - self.__sc = {} + self.__sc: Dict[str, Pattern[str]] = {} - def compile_sc(self, rules=None): + def compile_sc(self, rules: Optional[List[str]] = None) -> Pattern[str]: if rules is None: key = '$' rules = self.rules @@ -134,7 +169,13 @@ def compile_sc(self, rules=None): self.__sc[key] = sc return sc - def register(self, name: str, pattern, func, before=None): + def register( + self, + name: str, + pattern: Union[str, None], + func: Callable[[Self, Match[str], ST], Optional[int]], + before: Optional[str] = None, + ) -> None: """Register a new rule to parse the token. This method is usually used to create a new plugin. @@ -149,11 +190,11 @@ def register(self, name: str, pattern, func, before=None): if name not in self.rules: self.insert_rule(self.rules, name, before=before) - def register_rule(self, name, pattern, func): + def register_rule(self, name: str, pattern: str, func: Any) -> None: raise DeprecationWarning('This plugin is not compatible with mistune v3.') @staticmethod - def insert_rule(rules, name, before=None): + def insert_rule(rules: List[str], name: str, before: Optional[str] = None) -> None: if before: try: index = rules.index(before) @@ -163,18 +204,20 @@ def insert_rule(rules, name, before=None): else: rules.append(name) - def parse_method(self, m, state): - func = self._methods[m.lastgroup] + def parse_method(self, m: Match[str], state: ST) -> Optional[int]: + lastgroup = m.lastgroup + assert lastgroup + func = self._methods[lastgroup] return func(m, state) class BaseRenderer(object): - NAME = 'base' + NAME: ClassVar[str] = "base" - def __init__(self): - self.__methods = {} + def __init__(self) -> None: + self.__methods: Dict[str, Callable[..., str]] = {} - def register(self, name: str, method): + def register(self, name: str, method: Callable[..., str]) -> None: """Register a render method for the named token. For example:: def render_wiki(renderer, key, title): @@ -185,25 +228,27 @@ def render_wiki(renderer, key, title): # bind self into renderer method self.__methods[name] = lambda *arg, **kwargs: method(self, *arg, **kwargs) - def _get_method(self, name): + def _get_method(self, name: str) -> Callable[..., str]: try: - return object.__getattribute__(self, name) + return cast(Callable[..., str], object.__getattribute__(self, name)) except AttributeError: method = self.__methods.get(name) if not method: raise AttributeError('No renderer "{!r}"'.format(name)) return method - def render_token(self, token, state): + def render_token(self, token: Dict[str, Any], state: BlockState) -> str: func = self._get_method(token['type']) return func(token, state) - def iter_tokens(self, tokens, state): + def iter_tokens( + self, tokens: Iterable[Dict[str, Any]], state: BlockState + ) -> Iterable[str]: for tok in tokens: yield self.render_token(tok, state) - def render_tokens(self, tokens, state): + def render_tokens(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> str: return ''.join(self.iter_tokens(tokens, state)) - def __call__(self, tokens, state): + def __call__(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> str: return self.render_tokens(tokens, state) diff --git a/src/mistune/directives/__init__.py b/src/mistune/directives/__init__.py index 660c4c8..521f174 100644 --- a/src/mistune/directives/__init__.py +++ b/src/mistune/directives/__init__.py @@ -1,14 +1,16 @@ -from ._base import DirectiveParser, BaseDirective, DirectivePlugin -from ._rst import RSTDirective +from typing import List + +from ._base import BaseDirective, DirectiveParser, DirectivePlugin from ._fenced import FencedDirective +from ._rst import RSTDirective from .admonition import Admonition -from .toc import TableOfContents +from .image import Figure, Image from .include import Include -from .image import Image, Figure +from .toc import TableOfContents + - class RstDirective(RSTDirective): # pragma: no cover - def __init__(self, plugins): + def __init__(self, plugins: List[DirectivePlugin]) -> None: super(RstDirective, self).__init__(plugins) import warnings warnings.warn( diff --git a/src/mistune/directives/_base.py b/src/mistune/directives/_base.py index ad326c6..49769de 100644 --- a/src/mistune/directives/_base.py +++ b/src/mistune/directives/_base.py @@ -1,23 +1,47 @@ import re - - -class DirectiveParser: +from abc import ABCMeta, abstractmethod +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Match, + Optional, + Tuple, + Type, + Union, +) + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BlockState + from ..markdown import Markdown + + +class DirectiveParser(ABCMeta): name = 'directive' @staticmethod - def parse_type(m: re.Match): + @abstractmethod + def parse_type(m: Match[str]) -> str: raise NotImplementedError() @staticmethod - def parse_title(m: re.Match): + @abstractmethod + def parse_title(m: Match[str]) -> str: raise NotImplementedError() @staticmethod - def parse_content(m: re.Match): + @abstractmethod + def parse_content(m: Match[str]) -> str: raise NotImplementedError() @classmethod - def parse_tokens(cls, block, text, state): + def parse_tokens( + cls, block: "BlockParser", text: str, state: "BlockState" + ) -> Iterable[Dict[str, Any]]: if state.depth() >= block.max_nested_level - 1 and cls.name in block.rules: rules = list(block.rules) rules.remove(cls.name) @@ -28,7 +52,7 @@ def parse_tokens(cls, block, text, state): return child.tokens @staticmethod - def parse_options(m: re.Match): + def parse_options(m: Match[str]) -> List[Tuple[str, str]]: text = m.group('options') if not text.strip(): return [] @@ -45,18 +69,33 @@ def parse_options(m: re.Match): return options -class BaseDirective: - parser = DirectiveParser - directive_pattern = None +class BaseDirective(metaclass=ABCMeta): + parser: Type[DirectiveParser] + directive_pattern: Optional[str] = None - def __init__(self, plugins): - self._methods = {} + def __init__(self, plugins: List["DirectivePlugin"]): + self._methods: Dict[ + str, + Callable[ + ["BlockParser", Match[str], "BlockState"], + Union[Dict[str, Any], List[Dict[str, Any]]], + ], + ] = {} self.__plugins = plugins - def register(self, name, fn): + def register( + self, + name: str, + fn: Callable[ + ["BlockParser", Match[str], "BlockState"], + Union[Dict[str, Any], List[Dict[str, Any]]], + ], + ) -> None: self._methods[name] = fn - def parse_method(self, block, m, state): + def parse_method( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: _type = self.parser.parse_type(m) method = self._methods.get(_type) if method: @@ -78,10 +117,15 @@ def parse_method(self, block, m, state): state.append_token(token) return token - def parse_directive(self, block, m, state): + @abstractmethod + def parse_directive( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Optional[int]: raise NotImplementedError() - def register_block_parser(self, md, before=None): + def register_block_parser( + self, md: "Markdown", before: Optional[str] = None + ) -> None: md.block.register( self.parser.name, self.directive_pattern, @@ -89,33 +133,38 @@ def register_block_parser(self, md, before=None): before=before, ) - def __call__(self, md): + def __call__(self, markdown: "Markdown") -> None: for plugin in self.__plugins: plugin.parser = self.parser - plugin(self, md) + plugin(self, markdown) class DirectivePlugin: - def __init__(self): - self.parser = None + parser: Type[DirectiveParser] + + def __init__(self) -> None: ... - def parse_options(self, m: re.Match): + def parse_options(self, m: Match[str]) -> List[Tuple[str, str]]: return self.parser.parse_options(m) - def parse_type(self, m: re.Match): + def parse_type(self, m: Match[str]) -> str: return self.parser.parse_type(m) - def parse_title(self, m: re.Match): + def parse_title(self, m: Match[str]) -> str: return self.parser.parse_title(m) - def parse_content(self, m: re.Match): + def parse_content(self, m: Match[str]) -> str: return self.parser.parse_content(m) - def parse_tokens(self, block, text, state): + def parse_tokens( + self, block: "BlockParser", text: str, state: "BlockState" + ) -> Iterable[Dict[str, Any]]: return self.parser.parse_tokens(block, text, state) - def parse(self, block, m, state): + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: raise NotImplementedError() - def __call__(self, md): + def __call__(self, directive: BaseDirective, md: "Markdown") -> None: raise NotImplementedError() diff --git a/src/mistune/directives/_fenced.py b/src/mistune/directives/_fenced.py index 818f130..b52b0aa 100644 --- a/src/mistune/directives/_fenced.py +++ b/src/mistune/directives/_fenced.py @@ -1,5 +1,13 @@ import re -from ._base import DirectiveParser, BaseDirective +from typing import TYPE_CHECKING, List, Match, Optional + +from ._base import BaseDirective, DirectiveParser, DirectivePlugin + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BlockState + from ..markdown import Markdown + __all__ = ['FencedDirective'] @@ -16,15 +24,15 @@ class FencedParser(DirectiveParser): name = 'fenced_directive' @staticmethod - def parse_type(m: re.Match): + def parse_type(m: Match[str]) -> str: return m.group('type') @staticmethod - def parse_title(m: re.Match): + def parse_title(m: Match[str]) -> str: return m.group('title') @staticmethod - def parse_content(m: re.Match): + def parse_content(m: Match[str]) -> str: return m.group('text') @@ -85,7 +93,7 @@ class FencedDirective(BaseDirective): """ parser = FencedParser - def __init__(self, plugins, markers='`~'): + def __init__(self, plugins: List[DirectivePlugin], markers: str = "`~") -> None: super(FencedDirective, self).__init__(plugins) self.markers = markers _marker_pattern = '|'.join(re.escape(c) for c in markers) @@ -94,7 +102,9 @@ def __init__(self, plugins, markers='`~'): r'\{[a-zA-Z0-9_-]+\}' ) - def _process_directive(self, block, marker, start, state): + def _process_directive( + self, block: "BlockParser", marker: str, start: int, state: "BlockState" + ) -> Optional[int]: mlen = len(marker) cursor_start = start + len(marker) @@ -114,17 +124,21 @@ def _process_directive(self, block, marker, start, state): m = _directive_re.match(text) if not m: - return + return None self.parse_method(block, m, state) return end_pos - def parse_directive(self, block, m, state): - marker = m.group('fenced_directive_mark') + def parse_directive( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Optional[int]: + marker = m.group("fenced_directive_mark") return self._process_directive(block, marker, m.start(), state) - def parse_fenced_code(self, block, m, state): - info = m.group('fenced_3') + def parse_fenced_code( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Optional[int]: + info = m.group("fenced_3") if not info or not _type_re.match(info): return block.parse_fenced_code(m, state) @@ -134,7 +148,7 @@ def parse_fenced_code(self, block, m, state): marker = m.group('fenced_2') return self._process_directive(block, marker, m.start(), state) - def __call__(self, md): + def __call__(self, md: "Markdown") -> None: super(FencedDirective, self).__call__(md) if self.markers == '`~': md.block.register('fenced_code', None, self.parse_fenced_code) diff --git a/src/mistune/directives/_rst.py b/src/mistune/directives/_rst.py index 6e054cf..bbd0a8d 100644 --- a/src/mistune/directives/_rst.py +++ b/src/mistune/directives/_rst.py @@ -1,5 +1,12 @@ import re -from ._base import DirectiveParser, BaseDirective +from typing import TYPE_CHECKING, Match, Optional + +from ._base import BaseDirective, DirectiveParser + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BlockState + from ..markdown import Markdown __all__ = ['RSTDirective'] @@ -15,15 +22,15 @@ class RSTParser(DirectiveParser): name = 'rst_directive' @staticmethod - def parse_type(m: re.Match): + def parse_type(m: Match[str]) -> str: return m.group('type') @staticmethod - def parse_title(m: re.Match): + def parse_title(m: Match[str]) -> str: return m.group('title') @staticmethod - def parse_content(m: re.Match): + def parse_content(m: Match[str]) -> str: full_content = m.group(0) text = m.group('text') pretext = full_content[:-len(text)] @@ -60,14 +67,16 @@ class RSTDirective(BaseDirective): parser = RSTParser directive_pattern = r'^\.\. +[a-zA-Z0-9_-]+\:\:' - def parse_directive(self, block, m, state): - m = _directive_re.match(state.src, state.cursor) - if not m: - return + def parse_directive( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Optional[int]: + m2 = _directive_re.match(state.src, state.cursor) + if not m2: + return None - self.parse_method(block, m, state) - return m.end() + self.parse_method(block, m2, state) + return m2.end() - def __call__(self, md): - super(RSTDirective, self).__call__(md) - self.register_block_parser(md) + def __call__(self, markdown: "Markdown") -> None: + super(RSTDirective, self).__call__(markdown) + self.register_block_parser(markdown) diff --git a/src/mistune/directives/admonition.py b/src/mistune/directives/admonition.py index b380611..f0d65e3 100644 --- a/src/mistune/directives/admonition.py +++ b/src/mistune/directives/admonition.py @@ -1,4 +1,11 @@ -from ._base import DirectivePlugin +from typing import TYPE_CHECKING, Any, Dict, List, Match, Optional + +from ._base import BaseDirective, DirectivePlugin + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BlockState + from ..markdown import Markdown class Admonition(DirectivePlugin): @@ -7,7 +14,9 @@ class Admonition(DirectivePlugin): "important", "note", "tip", "warning", } - def parse(self, block, m, state): + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Dict[str, Any]: name = self.parse_type(m) attrs = {'name': name} options = dict(self.parse_options(m)) @@ -35,17 +44,18 @@ def parse(self, block, m, state): 'attrs': attrs, } - def __call__(self, directive, md): + def __call__(self, directive: "BaseDirective", md: "Markdown") -> None: for name in self.SUPPORTED_NAMES: directive.register(name, self.parse) + assert md.renderer is not None if md.renderer.NAME == 'html': md.renderer.register('admonition', render_admonition) md.renderer.register('admonition_title', render_admonition_title) md.renderer.register('admonition_content', render_admonition_content) -def render_admonition(self, text, name, **attrs): +def render_admonition(self: Any, text: str, name: str, **attrs: Any) -> str: html = '
\n' + text + '
\n' -def render_admonition_title(self, text): +def render_admonition_title(self: Any, text: str) -> str: return '

' + text + '

\n' -def render_admonition_content(self, text): +def render_admonition_content(self: Any, text: str) -> str: return text diff --git a/src/mistune/directives/image.py b/src/mistune/directives/image.py index 5d9d40a..9f676f8 100644 --- a/src/mistune/directives/image.py +++ b/src/mistune/directives/image.py @@ -1,6 +1,15 @@ import re -from ._base import DirectivePlugin -from ..util import escape as escape_text, escape_url +from typing import TYPE_CHECKING, Any, Dict, List, Match, Optional + +from ..util import escape as escape_text +from ..util import escape_url +from ._base import BaseDirective, DirectivePlugin + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BlockState + from ..markdown import Markdown + from ..renderers.html import HTMLRenderer __all__ = ['Image', 'Figure'] @@ -8,7 +17,7 @@ _allowed_aligns = ["top", "middle", "bottom", "left", "center", "right"] -def _parse_attrs(options): +def _parse_attrs(options: Dict[str, Any]) -> Dict[str, Any]: attrs = {} if 'alt' in options: attrs['alt'] = options['alt'] @@ -32,19 +41,29 @@ def _parse_attrs(options): class Image(DirectivePlugin): NAME = 'image' - def parse(self, block, m, state): + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Dict[str, Any]: options = dict(self.parse_options(m)) attrs = _parse_attrs(options) attrs['src'] = self.parse_title(m) return {'type': 'block_image', 'attrs': attrs} - def __call__(self, directive, md): + def __call__(self, directive: "BaseDirective", md: "Markdown") -> None: directive.register(self.NAME, self.parse) + assert md.renderer is not None if md.renderer.NAME == 'html': md.renderer.register('block_image', render_block_image) -def render_block_image(self, src: str, alt=None, width=None, height=None, **attrs): +def render_block_image( + self: 'HTMLRenderer', + src: str, + alt: Optional[str] = None, + width: Optional[str] = None, + height: Optional[str] = None, + **attrs: Any, +) -> str: img = 'None, Optional[List[Dict[str, Any]]]: content = self.parse_content(m) if not content: - return + return None - tokens = self.parse_tokens(block, content, state) + tokens = list(self.parse_tokens(block, content, state)) caption = tokens[0] if caption['type'] == 'paragraph': caption['type'] = 'figcaption' @@ -97,8 +118,11 @@ def parse_directive_content(self, block, m, state): 'children': tokens[1:] }) return children + return None - def parse(self, block, m, state): + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Dict[str, Any]: options = dict(self.parse_options(m)) image_attrs = _parse_attrs(options) image_attrs['src'] = self.parse_title(m) @@ -121,9 +145,10 @@ def parse(self, block, m, state): 'children': children, } - def __call__(self, directive, md): + def __call__(self, directive: "BaseDirective", md: "Markdown") -> None: directive.register(self.NAME, self.parse) + assert md.renderer is not None if md.renderer.NAME == 'html': md.renderer.register('figure', render_figure) md.renderer.register('block_image', render_block_image) @@ -131,8 +156,14 @@ def __call__(self, directive, md): md.renderer.register('legend', render_legend) -def render_figure(self, text, align=None, figwidth=None, figclass=None): - _cls = 'figure' +def render_figure( + self: Any, + text: str, + align: Optional[str] = None, + figwidth: Optional[str] = None, + figclass: Optional[str] = None, +) -> str: + _cls = "figure" if align: _cls += ' align-' + align if figclass: @@ -144,9 +175,9 @@ def render_figure(self, text, align=None, figwidth=None, figclass=None): return html + '>\n' + text + '\n' -def render_figcaption(self, text): +def render_figcaption(self: Any, text: str) -> str: return '
' + text + '
\n' -def render_legend(self, text): +def render_legend(self: Any, text: str) -> str: return '
\n' + text + '
\n' diff --git a/src/mistune/directives/include.py b/src/mistune/directives/include.py index d2180ba..deae8e6 100644 --- a/src/mistune/directives/include.py +++ b/src/mistune/directives/include.py @@ -1,10 +1,18 @@ import os -from ._base import DirectivePlugin +from typing import TYPE_CHECKING, Any, Dict, List, Match, Union +from ._base import BaseDirective, DirectivePlugin + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState + from ..markdown import Markdown class Include(DirectivePlugin): - def parse(self, block, m, state): - source_file = state.env.get('__file__') + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: + source_file = state.env.get("__file__") if not source_file: return {'type': 'block_error', 'raw': 'Missing source file'} @@ -34,8 +42,7 @@ def parse(self, block, m, state): } with open(dest, 'rb') as f: - content = f.read() - content = content.decode(encoding) + content = f.read().decode(encoding) ext = os.path.splitext(relpath)[1] if ext in {'.md', '.markdown', '.mkd'}: @@ -55,11 +62,11 @@ def parse(self, block, m, state): 'attrs': attrs, } - def __call__(self, directive, md): - directive.register('include', self.parse) - if md.renderer and md.renderer.NAME == 'html': - md.renderer.register('include', render_html_include) + def __call__(self, directive: BaseDirective, md: "Markdown") -> None: + directive.register("include", self.parse) + if md.renderer and md.renderer.NAME == "html": + md.renderer.register("include", render_html_include) -def render_html_include(renderer, text, **attrs): - return '
\n' + text + '
\n' +def render_html_include(renderer: "BaseRenderer", text: str, **attrs: Any) -> str: + return '
\n' + text + "
\n" diff --git a/src/mistune/directives/toc.py b/src/mistune/directives/toc.py index 4084f43..7945dbc 100644 --- a/src/mistune/directives/toc.py +++ b/src/mistune/directives/toc.py @@ -13,19 +13,28 @@ heading levels writers want to include in the table of contents. """ -from ._base import DirectivePlugin +from typing import TYPE_CHECKING, Any, Dict, Match + from ..toc import normalize_toc_item, render_toc_ul +from ._base import BaseDirective, DirectivePlugin + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState + from ..markdown import Markdown class TableOfContents(DirectivePlugin): - def __init__(self, min_level=1, max_level=3): + def __init__(self, min_level: int = 1, max_level: int = 3) -> None: self.min_level = min_level self.max_level = max_level - def generate_heading_id(self, token, index): + def generate_heading_id(self, token: Dict[str, Any], index: int) -> str: return 'toc_' + str(index + 1) - def parse(self, block, m, state): + def parse( + self, block: "BlockParser", m: Match[str], state: "BlockState" + ) -> Dict[str, Any]: title = self.parse_title(m) options = self.parse_options(m) if options: @@ -51,7 +60,7 @@ def parse(self, block, m, state): } return {'type': 'toc', 'text': title or '', 'attrs': attrs} - def toc_hook(self, md, state): + def toc_hook(self, md: "Markdown", state: "BlockState") -> None: sections = [] headings = [] @@ -74,15 +83,17 @@ def toc_hook(self, md, state): toc = [item for item in toc_items if _min <= item[0] <= _max] sec['attrs']['toc'] = toc - def __call__(self, directive, md): - if md.renderer and md.renderer.NAME == 'html': + def __call__(self, directive: BaseDirective, md: "Markdown") -> None: + if md.renderer and md.renderer.NAME == "html": # only works with HTML renderer directive.register('toc', self.parse) md.before_render_hooks.append(self.toc_hook) md.renderer.register('toc', render_html_toc) -def render_html_toc(renderer, title, collapse=False, **attrs): +def render_html_toc( + renderer: "BaseRenderer", title: str, collapse: bool = False, **attrs: Any +) -> str: if not title: title = 'Table of Contents' toc = attrs['toc'] @@ -95,7 +106,7 @@ def render_html_toc(renderer, title, collapse=False, **attrs): return html + content + '\n' -def _normalize_level(options, name, default): +def _normalize_level(options: Dict[str, Any], name: str, default: Any) -> Any: level = options.get(name) if not level: return default diff --git a/src/mistune/helpers.py b/src/mistune/helpers.py index 04c1df1..be73c70 100644 --- a/src/mistune/helpers.py +++ b/src/mistune/helpers.py @@ -1,5 +1,7 @@ import re import string +from typing import Any, Dict, Tuple, Union + from .util import escape_url PREVENT_BACKSLASH = r'(? str: return _ESCAPE_CHAR_RE.sub(r'\1', text) -def parse_link_text(src, pos): +def parse_link_text(src: str, pos: int) -> Union[Tuple[str, int], Tuple[None, None]]: level = 1 found = False start_pos = pos @@ -77,7 +79,9 @@ def parse_link_text(src, pos): return None, None -def parse_link_label(src, start_pos): +def parse_link_label( + src: str, start_pos: int +) -> Union[Tuple[str, int], Tuple[None, None]]: m = _INLINE_LINK_LABEL_RE.match(src, start_pos) if m: label = m.group(0)[:-1] @@ -85,7 +89,9 @@ def parse_link_label(src, start_pos): return None, None -def parse_link_href(src, start_pos, block=False): +def parse_link_href( + src: str, start_pos: int, block: bool = False +) -> Union[Tuple[str, int], Tuple[None, None]]: m = LINK_BRACKET_START.match(src, start_pos) if m: start_pos = m.end() - 1 @@ -110,7 +116,9 @@ def parse_link_href(src, start_pos, block=False): return href, end_pos - 1 -def parse_link_title(src, start_pos, max_pos): +def parse_link_title( + src: str, start_pos: int, max_pos: int +) -> Union[Tuple[str, int], Tuple[None, None]]: m = LINK_TITLE_RE.match(src, start_pos, max_pos) if m: title = m.group(1)[1:-1] @@ -119,11 +127,13 @@ def parse_link_title(src, start_pos, max_pos): return None, None -def parse_link(src, pos): +def parse_link( + src: str, pos: int +) -> Union[Tuple[Dict[str, Any], int], Tuple[None, None]]: href, href_pos = parse_link_href(src, pos) if href is None: return None, None - + assert href_pos is not None title, title_pos = parse_link_title(src, href_pos, len(src)) next_pos = title_pos or href_pos m = PAREN_END_RE.match(src, next_pos) diff --git a/src/mistune/inline_parser.py b/src/mistune/inline_parser.py index 1fd961d..e14fb6d 100644 --- a/src/mistune/inline_parser.py +++ b/src/mistune/inline_parser.py @@ -1,21 +1,34 @@ import re -from typing import Optional, List, Dict, Any, Match -from .core import Parser, InlineState -from .util import ( - escape, - escape_url, - unikey, +from typing import ( + Any, + ClassVar, + Dict, + Generic, + Iterator, + List, + Match, + MutableMapping, + Optional, + Pattern, + Tuple, + TypeVar, + Union, ) + +from typing_extensions import Literal + +from .core import InlineState, Parser from .helpers import ( + HTML_ATTRIBUTES, + HTML_TAGNAME, PREVENT_BACKSLASH, PUNCTUATION, - HTML_TAGNAME, - HTML_ATTRIBUTES, - unescape_char, parse_link, parse_link_label, parse_link_text, + unescape_char, ) +from .util import escape, escape_url, unikey PAREN_END_RE = re.compile(r'\s*\)') @@ -46,7 +59,7 @@ } -class InlineParser(Parser): +class InlineParser(Parser[InlineState]): sc_flag = 0 state_cls = InlineState @@ -93,7 +106,7 @@ class InlineParser(Parser): 'linebreak', ) - def __init__(self, hard_wrap: bool=False): + def __init__(self, hard_wrap: bool = False) -> None: super(InlineParser, self).__init__() self.hard_wrap = hard_wrap @@ -107,7 +120,7 @@ def __init__(self, hard_wrap: bool=False): name: getattr(self, 'parse_' + name) for name in self.rules } - def parse_escape(self, m: Match, state: InlineState) -> int: + def parse_escape(self, m: Match[str], state: InlineState) -> int: text = m.group(0) text = unescape_char(text) state.append_token({ @@ -116,7 +129,7 @@ def parse_escape(self, m: Match, state: InlineState) -> int: }) return m.end() - def parse_link(self, m: Match, state: InlineState) -> Optional[int]: + def parse_link(self, m: Match[str], state: InlineState) -> Optional[int]: pos = m.end() marker = m.group(0) @@ -133,13 +146,17 @@ def parse_link(self, m: Match, state: InlineState) -> Optional[int]: if label is None: text, end_pos = parse_link_text(state.src, pos) if text is None: - return + return None + + assert end_pos is not None if text is None: text = label + assert text is not None + if end_pos >= len(state.src) and label is None: - return + return None rules = ['codespan', 'prec_auto_link', 'prec_inline_html'] prec_pos = self.precedence_scan(m, state, end_pos, rules) @@ -165,11 +182,11 @@ def parse_link(self, m: Match, state: InlineState) -> Optional[int]: label = label2 if label is None: - return + return None ref_links = state.env.get('ref_links') if not ref_links: - return + return None key = unikey(label) env = ref_links.get(key) @@ -180,8 +197,15 @@ def parse_link(self, m: Match, state: InlineState) -> Optional[int]: token['label'] = label state.append_token(token) return end_pos - - def __parse_link_token(self, is_image, text, attrs, state): + return None + + def __parse_link_token( + self, + is_image: bool, + text: str, + attrs: Optional[Dict[str, Any]], + state: InlineState, + ) -> Dict[str, Any]: new_state = state.copy() new_state.src = text if is_image: @@ -200,7 +224,7 @@ def __parse_link_token(self, is_image, text, attrs, state): } return token - def parse_auto_link(self, m: Match, state: InlineState) -> int: + def parse_auto_link(self, m: Match[str], state: InlineState) -> int: text = m.group(0) pos = m.end() if state.in_link: @@ -211,7 +235,7 @@ def parse_auto_link(self, m: Match, state: InlineState) -> int: self._add_auto_link(text, text, state) return pos - def parse_auto_email(self, m: Match, state: InlineState) -> int: + def parse_auto_email(self, m: Match[str], state: InlineState) -> int: text = m.group(0) pos = m.end() if state.in_link: @@ -223,14 +247,14 @@ def parse_auto_email(self, m: Match, state: InlineState) -> int: self._add_auto_link(url, text, state) return pos - def _add_auto_link(self, url, text, state): + def _add_auto_link(self, url: str, text: str, state: InlineState) -> None: state.append_token({ 'type': 'link', 'children': [{'type': 'text', 'raw': text}], 'attrs': {'url': escape_url(url)}, }) - def parse_emphasis(self, m: Match, state: InlineState) -> int: + def parse_emphasis(self, m: Match[str], state: InlineState) -> int: pos = m.end() marker = m.group(0) @@ -279,17 +303,17 @@ def parse_emphasis(self, m: Match, state: InlineState) -> int: }) return end_pos - def parse_codespan(self, m: Match, state: InlineState) -> int: + def parse_codespan(self, m: Match[str], state: InlineState) -> int: marker = m.group(0) # require same marker with same length at end pattern = re.compile(r'(.*?[^`])' + marker + r'(?!`)', re.S) pos = m.end() - m = pattern.match(state.src, pos) - if m: - end_pos = m.end() - code = m.group(1) + m2 = pattern.match(state.src, pos) + if m2: + end_pos = m2.end() + code = m2.group(1) # Line endings are treated like spaces code = code.replace('\n', ' ') if len(code.strip()): @@ -301,15 +325,15 @@ def parse_codespan(self, m: Match, state: InlineState) -> int: state.append_token({'type': 'text', 'raw': marker}) return pos - def parse_linebreak(self, m: Match, state: InlineState) -> int: + def parse_linebreak(self, m: Match[str], state: InlineState) -> int: state.append_token({'type': 'linebreak'}) return m.end() - def parse_softbreak(self, m: Match, state: InlineState) -> int: + def parse_softbreak(self, m: Match[str], state: InlineState) -> int: state.append_token({'type': 'softbreak'}) return m.end() - def parse_inline_html(self, m: Match, state: InlineState) -> int: + def parse_inline_html(self, m: Match[str], state: InlineState) -> int: end_pos = m.end() html = m.group(0) state.append_token({'type': 'inline_html', 'raw': html}) @@ -319,7 +343,7 @@ def parse_inline_html(self, m: Match, state: InlineState) -> int: state.in_link = False return end_pos - def process_text(self, text: str, state: InlineState): + def process_text(self, text: str, state: InlineState) -> None: state.append_token({'type': 'text', 'raw': text}) def parse(self, state: InlineState) -> List[Dict[str, Any]]: @@ -351,7 +375,13 @@ def parse(self, state: InlineState) -> List[Dict[str, Any]]: self.process_text(state.src[pos:], state) return state.tokens - def precedence_scan(self, m: Match, state: InlineState, end_pos: int, rules=None): + def precedence_scan( + self, + m: Match[str], + state: InlineState, + end_pos: int, + rules: Optional[List[str]] = None, + ) -> Optional[int]: if rules is None: rules = ['codespan', 'link', 'prec_auto_link', 'prec_inline_html'] @@ -359,20 +389,23 @@ def precedence_scan(self, m: Match, state: InlineState, end_pos: int, rules=None sc = self.compile_sc(rules) m1 = sc.search(state.src, mark_pos, end_pos) if not m1: - return + return None - rule_name = m1.lastgroup.replace('prec_', '') + lastgroup = m1.lastgroup + if not lastgroup: + return None + rule_name = lastgroup.replace("prec_", "") sc = self.compile_sc([rule_name]) m2 = sc.match(state.src, m1.start()) if not m2: - return + return None func = self._methods[rule_name] new_state = state.copy() new_state.src = state.src m2_pos = func(m2, new_state) if not m2_pos or m2_pos < end_pos: - return + return None raw_text = state.src[m.start():m2.start()] state.append_token({'type': 'text', 'raw': raw_text}) @@ -380,11 +413,11 @@ def precedence_scan(self, m: Match, state: InlineState, end_pos: int, rules=None state.append_token(token) return m2_pos - def render(self, state: InlineState): + def render(self, state: InlineState) -> List[Dict[str, Any]]: self.parse(state) return state.tokens - def __call__(self, s, env): + def __call__(self, s: str, env: MutableMapping[str, Any]) -> List[Dict[str, Any]]: state = self.state_cls(env) state.src = s return self.render(state) diff --git a/src/mistune/list_parser.py b/src/mistune/list_parser.py index a78e1a5..4bb2875 100644 --- a/src/mistune/list_parser.py +++ b/src/mistune/list_parser.py @@ -1,11 +1,15 @@ +"""because list is complex, split list parser in a new file""" + import re -from .core import BlockState -from .util import ( - strip_end, - expand_tab, - expand_leading_tab, -) -# because list is complex, split list parser in a new file +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Match + +from typing_extensions import Literal + +from .util import expand_leading_tab, expand_tab, strip_end + +if TYPE_CHECKING: + from .block_parser import BlockParser + from .core import BlockState LIST_PATTERN = ( r'^(?P {0,3})' @@ -16,7 +20,7 @@ _LINE_HAS_TEXT = re.compile(r'(\s*)\S') -def parse_list(block, m: re.Match, state: BlockState) -> int: +def parse_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int: """Parse tokens for ordered and unordered list.""" text = m.group('list_3') if not text.strip(): @@ -29,7 +33,7 @@ def parse_list(block, m: re.Match, state: BlockState) -> int: marker = m.group('list_2') ordered = len(marker) > 1 depth = state.depth() - token = { + token: Dict[str, Any] = { 'type': 'list', 'children': [], 'tight': True, @@ -50,7 +54,7 @@ def parse_list(block, m: re.Match, state: BlockState) -> int: token['attrs']['start'] = start state.cursor = m.end() + 1 - groups = (m.group('list_1'), marker, text) + groups: Optional[Tuple[str, str, str]] = (m.group("list_1"), marker, text) if depth >= block.max_nested_level - 1: rules = list(block.list_rules) @@ -73,7 +77,7 @@ def parse_list(block, m: re.Match, state: BlockState) -> int: return state.cursor -def _transform_tight_list(token): +def _transform_tight_list(token: Dict[str, Any]) -> None: if token['tight']: # reset tight list item for list_item in token['children']: @@ -84,7 +88,14 @@ def _transform_tight_list(token): _transform_tight_list(tok) -def _parse_list_item(block, bullet, groups, token, state, rules): +def _parse_list_item( + block: "BlockParser", + bullet: str, + groups: Tuple[str, str, str], + token: Dict[str, Any], + state: "BlockState", + rules: List[str], +) -> Optional[Tuple[str, str, str]]: spaces, marker, text = groups leading_width = len(spaces) + len(marker) @@ -179,8 +190,10 @@ def _parse_list_item(block, bullet, groups, token, state, rules): if next_group: return next_group + return None + -def _get_list_bullet(c): +def _get_list_bullet(c: str) -> str: if c == '.': bullet = r'\d{0,9}\.' elif c == ')': @@ -194,7 +207,7 @@ def _get_list_bullet(c): return bullet -def _compile_list_item_pattern(bullet, leading_width): +def _compile_list_item_pattern(bullet: str, leading_width: int) -> str: if leading_width > 3: leading_width = 3 return ( @@ -204,7 +217,7 @@ def _compile_list_item_pattern(bullet, leading_width): ) -def _compile_continue_width(text, leading_width): +def _compile_continue_width(text: str, leading_width: int) -> Tuple[str, int]: text = expand_leading_tab(text, 3) text = expand_tab(text) @@ -225,7 +238,7 @@ def _compile_continue_width(text, leading_width): return text, continue_width -def _clean_list_item_text(src, continue_width): +def _clean_list_item_text(src: str, continue_width: int) -> str: # according to Example 7, tab should be treated as 3 spaces rv = [] trim_space = ' ' * continue_width @@ -243,7 +256,7 @@ def _clean_list_item_text(src, continue_width): return '\n'.join(rv) -def _is_loose_list(tokens): +def _is_loose_list(tokens: Iterable[Dict[str, Any]]) -> bool: paragraph_count = 0 for tok in tokens: if tok['type'] == 'blank_line': @@ -252,3 +265,4 @@ def _is_loose_list(tokens): paragraph_count += 1 if paragraph_count > 1: return True + return False diff --git a/src/mistune/markdown.py b/src/mistune/markdown.py index 3369b71..34403eb 100644 --- a/src/mistune/markdown.py +++ b/src/mistune/markdown.py @@ -1,7 +1,9 @@ -from typing import Optional -from .core import BlockState +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union + from .block_parser import BlockParser +from .core import BaseRenderer, BlockState from .inline_parser import InlineParser +from .plugins import Plugin class Markdown: @@ -18,10 +20,14 @@ class Markdown: :param inline: inline level syntax parser :param plugins: mistune plugins to use """ - def __init__(self, renderer=None, - block: Optional[BlockParser]=None, - inline: Optional[InlineParser]=None, - plugins=None): + + def __init__( + self, + renderer: Optional[BaseRenderer] = None, + block: Optional[BlockParser] = None, + inline: Optional[InlineParser] = None, + plugins: Optional[Iterable[Plugin]] = None, + ): if block is None: block = BlockParser() @@ -31,24 +37,28 @@ def __init__(self, renderer=None, self.renderer = renderer self.block: BlockParser = block self.inline: InlineParser = inline - self.before_parse_hooks = [] - self.before_render_hooks = [] - self.after_render_hooks = [] + self.before_parse_hooks: List[Callable[["Markdown", BlockState], None]] = [] + self.before_render_hooks: List[Callable[["Markdown", BlockState], Any]] = [] + self.after_render_hooks: List[ + Callable[["Markdown", Union[str, List[Dict[str, Any]]], BlockState], str] + ] = [] if plugins: for plugin in plugins: plugin(self) - def use(self, plugin): + def use(self, plugin: Plugin) -> None: plugin(self) - def render_state(self, state: BlockState): + def render_state(self, state: BlockState) -> Union[str, List[Dict[str, Any]]]: data = self._iter_render(state.tokens, state) if self.renderer: return self.renderer(data, state) return list(data) - def _iter_render(self, tokens, state): + def _iter_render( + self, tokens: Iterable[Dict[str, Any]], state: BlockState + ) -> Iterable[Dict[str, Any]]: for tok in tokens: if 'children' in tok: children = self._iter_render(tok['children'], state) @@ -60,7 +70,9 @@ def _iter_render(self, tokens, state): tok['children'] = self.inline(text.strip(' \r\n\t\f'), state.env) yield tok - def parse(self, s: str, state: Optional[BlockState]=None): + def parse( + self, s: str, state: Optional[BlockState] = None + ) -> Tuple[Union[str, List[Dict[str, Any]]], BlockState]: """Parse and convert the given markdown string. If renderer is None, the returned **result** will be parsed markdown tokens. @@ -84,16 +96,18 @@ def parse(self, s: str, state: Optional[BlockState]=None): self.block.parse(state) - for hook in self.before_render_hooks: - hook(self, state) + for hook2 in self.before_render_hooks: + hook2(self, state) result = self.render_state(state) - for hook in self.after_render_hooks: - result = hook(self, result, state) + for hook3 in self.after_render_hooks: + result = hook3(self, result, state) return result, state - def read(self, filepath, encoding='utf-8', state=None): + def read( + self, filepath: str, encoding: str = "utf-8", state: Optional[BlockState] = None + ) -> Tuple[Union[str, List[Dict[str, Any]]], BlockState]: if state is None: state = self.block.state_cls() @@ -101,10 +115,10 @@ def read(self, filepath, encoding='utf-8', state=None): with open(filepath, 'rb') as f: s = f.read() - s = s.decode(encoding) - return self.parse(s, state) + s2 = s.decode(encoding) + return self.parse(s2, state) - def __call__(self, s: str): + def __call__(self, s: str) -> Union[str, List[Dict[str, Any]]]: if s is None: s = '\n' return self.parse(s)[0] diff --git a/src/mistune/plugins/__init__.py b/src/mistune/plugins/__init__.py index a79d727..939bcee 100644 --- a/src/mistune/plugins/__init__.py +++ b/src/mistune/plugins/__init__.py @@ -1,4 +1,8 @@ from importlib import import_module +from typing import TYPE_CHECKING, Dict, Protocol, Union, cast + +if TYPE_CHECKING: + from ..markdown import Markdown _plugins = { 'speedup': 'mistune.plugins.speedup.speedup', @@ -17,22 +21,29 @@ 'task_lists': 'mistune.plugins.task_lists.task_lists', 'spoiler': 'mistune.plugins.spoiler.spoiler', } -_cached_modules = {} -def import_plugin(name): - if name in _cached_modules: - return _cached_modules[name] +class Plugin(Protocol): + def __call__(self, markdown: "Markdown") -> None: ... + +_cached_modules: Dict[str, Plugin] = {} + +PluginRef = Union[str, Plugin] # reference to register a plugin + +def import_plugin(name: PluginRef) -> Plugin: if callable(name): return name + if name in _cached_modules: + return _cached_modules[name] + if name in _plugins: module_path, func_name = _plugins[name].rsplit(".", 1) else: module_path, func_name = name.rsplit(".", 1) module = import_module(module_path) - plugin = getattr(module, func_name) + plugin = cast(Plugin, getattr(module, func_name)) _cached_modules[name] = plugin return plugin diff --git a/src/mistune/plugins/abbr.py b/src/mistune/plugins/abbr.py index 1b45790..46ff2a5 100644 --- a/src/mistune/plugins/abbr.py +++ b/src/mistune/plugins/abbr.py @@ -1,7 +1,15 @@ import re import types -from ..util import escape +from typing import TYPE_CHECKING, Match, Optional + from ..helpers import PREVENT_BACKSLASH +from ..util import escape + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown __all__ = ['abbr'] @@ -12,8 +20,8 @@ ) -def parse_ref_abbr(block, m, state): - ref = state.env.get('ref_abbrs') +def parse_ref_abbr(block: "BlockParser", m: Match[str], state: "BlockState") -> int: + ref = state.env.get("ref_abbrs") if not ref: ref = {} key = m.group('abbr_key') @@ -25,8 +33,8 @@ def parse_ref_abbr(block, m, state): return m.end() + 1 -def process_text(inline, text, state): - ref = state.env.get('ref_abbrs') +def process_text(inline: "InlineParser", text: str, state: "InlineState") -> None: + ref = state.env.get("ref_abbrs") if not ref: return state.append_token({'type': 'text', 'raw': text}) @@ -67,13 +75,13 @@ def process_text(inline, text, state): state.append_token({'type': 'text', 'raw': text[pos:]}) -def render_abbr(renderer, text, title): +def render_abbr(renderer: "BaseRenderer", text: str, title: str) -> str: if not title: return '' + text + '' return '' + text + '' -def abbr(md): +def abbr(md: "Markdown") -> None: """A mistune plugin to support abbreviations, spec defined at https://michelf.ca/projects/php-markdown/extra/#abbr @@ -98,6 +106,6 @@ def abbr(md): """ md.block.register('ref_abbr', REF_ABBR, parse_ref_abbr, before='paragraph') # replace process_text - md.inline.process_text = types.MethodType(process_text, md.inline) + md.inline.process_text = types.MethodType(process_text, md.inline) # type: ignore[method-assign] if md.renderer and md.renderer.NAME == 'html': md.renderer.register('abbr', render_abbr) diff --git a/src/mistune/plugins/def_list.py b/src/mistune/plugins/def_list.py index 3675641..d3103cf 100644 --- a/src/mistune/plugins/def_list.py +++ b/src/mistune/plugins/def_list.py @@ -1,6 +1,14 @@ import re +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + from ..util import strip_end +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown + __all__ = ['def_list'] # https://michelf.ca/projects/php-markdown/extra/#def-list @@ -20,15 +28,15 @@ HAS_BLANK_LINE_RE = re.compile(r'\n[ \t]*\n$') -def parse_def_list(block, m, state): +def parse_def_list(block: "BlockParser", m: Match[str], state: "BlockState") -> int: pos = m.end() children = list(_parse_def_item(block, m)) - m = DEF_RE.match(state.src, pos) - while m: - children.extend(list(_parse_def_item(block, m))) - pos = m.end() - m = DEF_RE.match(state.src, pos) + m2 = DEF_RE.match(state.src, pos) + while m2: + children.extend(list(_parse_def_item(block, m2))) + pos = m2.end() + m2 = DEF_RE.match(state.src, pos) state.append_token({ 'type': 'def_list', @@ -37,8 +45,8 @@ def parse_def_list(block, m, state): return pos -def _parse_def_item(block, m): - head = m.group('def_list_head') +def _parse_def_item(block: "BlockParser", m: Match[str]) -> Iterable[Dict[str, Any]]: + head = m.group("def_list_head") for line in head.splitlines(): yield { 'type': 'def_list_head', @@ -48,15 +56,16 @@ def _parse_def_item(block, m): src = m.group(0) end = len(head) - m = DD_START_RE.search(src, end) - start = m.start() + m2 = DD_START_RE.search(src, end) + assert m2 is not None + start = m2.start() prev_blank_line = src[end:start] == '\n' - while m: - m = DD_START_RE.search(src, start + 1) - if not m: + while m2: + m2 = DD_START_RE.search(src, start + 1) + if not m2: break - end = m.start() + end = m2.start() text = src[start:end].replace(':', ' ', 1) children = _process_text(block, text, prev_blank_line) prev_blank_line = bool(HAS_BLANK_LINE_RE.search(text)) @@ -74,8 +83,8 @@ def _parse_def_item(block, m): } -def _process_text(block, text, loose): - text = TRIM_RE.sub('', text) +def _process_text(block: "BlockParser", text: str, loose: bool) -> List[Any]: + text = TRIM_RE.sub("", text) state = block.state_cls() state.process(strip_end(text)) # use default list rules @@ -86,19 +95,19 @@ def _process_text(block, text, loose): return tokens -def render_def_list(renderer, text): - return '
\n' + text + '
\n' +def render_def_list(renderer: "BaseRenderer", text: str) -> str: + return "
\n" + text + "
\n" -def render_def_list_head(renderer, text): - return '
' + text + '
\n' +def render_def_list_head(renderer: "BaseRenderer", text: str) -> str: + return "
" + text + "
\n" -def render_def_list_item(renderer, text): - return '
' + text + '
\n' +def render_def_list_item(renderer: "BaseRenderer", text: str) -> str: + return "
" + text + "
\n" -def def_list(md): +def def_list(md: "Markdown") -> None: """A mistune plugin to support def list, spec defined at https://michelf.ca/projects/php-markdown/extra/#def-list diff --git a/src/mistune/plugins/footnotes.py b/src/mistune/plugins/footnotes.py index 2e10704..b7780e3 100644 --- a/src/mistune/plugins/footnotes.py +++ b/src/mistune/plugins/footnotes.py @@ -1,7 +1,15 @@ import re +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional, Union + from ..core import BlockState -from ..util import unikey from ..helpers import LINK_LABEL +from ..util import unikey + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown __all__ = ['footnotes'] @@ -18,9 +26,11 @@ INLINE_FOOTNOTE = r'\[\^(?P' + LINK_LABEL + r')\]' -def parse_inline_footnote(inline, m: re.Match, state): - key = unikey(m.group('footnote_key')) - ref = state.env.get('ref_footnotes') +def parse_inline_footnote( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> int: + key = unikey(m.group("footnote_key")) + ref = state.env.get("ref_footnotes") if ref and key in ref: notes = state.env.get('footnotes') if not notes: @@ -38,8 +48,10 @@ def parse_inline_footnote(inline, m: re.Match, state): return m.end() -def parse_ref_footnote(block, m: re.Match, state: BlockState): - ref = state.env.get('ref_footnotes') +def parse_ref_footnote( + block: "BlockParser", m: Match[str], state: BlockState +) -> int: + ref = state.env.get("ref_footnotes") if not ref: ref = {} @@ -50,8 +62,12 @@ def parse_ref_footnote(block, m: re.Match, state: BlockState): return m.end() -def parse_footnote_item(block, key: str, index: int, state: BlockState): - ref = state.env.get('ref_footnotes') +def parse_footnote_item( + block: "BlockParser", key: str, index: int, state: BlockState +) -> Dict[str, Any]: + ref = state.env.get("ref_footnotes") + if not ref: + raise ValueError("Missing 'ref_footnotes'.") text = ref[key] lines = text.splitlines() @@ -76,8 +92,11 @@ def parse_footnote_item(block, key: str, index: int, state: BlockState): } -def md_footnotes_hook(md, result: str, state: BlockState): - notes = state.env.get('footnotes') +def md_footnotes_hook( + md: "Markdown", result: Union[str, List[Dict[str, Any]]], state: BlockState +) -> str: + assert isinstance(result, str) + notes = state.env.get("footnotes") if not notes: return result @@ -88,27 +107,30 @@ def md_footnotes_hook(md, result: str, state: BlockState): state = BlockState() state.tokens = [{'type': 'footnotes', 'children': children}] output = md.render_state(state) + assert isinstance(output, str) return result + output -def render_footnote_ref(renderer, key: str, index: int): +def render_footnote_ref(renderer: "BaseRenderer", key: str, index: int) -> str: i = str(index) html = '' return html + '' + i + '' -def render_footnotes(renderer, text: str): - return '
\n
    \n' + text + '
\n
\n' +def render_footnotes(renderer: "BaseRenderer", text: str) -> str: + return '
\n
    \n' + text + "
\n
\n" -def render_footnote_item(renderer, text: str, key: str, index: int): +def render_footnote_item( + renderer: "BaseRenderer", text: str, key: str, index: int +) -> str: i = str(index) back = '' text = text.rstrip()[:-4] + back + '

' return '
  • ' + text + '
  • \n' -def footnotes(md): +def footnotes(md: "Markdown") -> None: """A mistune plugin to support footnotes, spec defined at https://michelf.ca/projects/php-markdown/extra/#footnotes diff --git a/src/mistune/plugins/formatting.py b/src/mistune/plugins/formatting.py index 57e5def..9c23cb6 100644 --- a/src/mistune/plugins/formatting.py +++ b/src/mistune/plugins/formatting.py @@ -1,6 +1,14 @@ import re +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional, Pattern + from ..helpers import PREVENT_BACKSLASH +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown + __all__ = ["strikethrough", "mark", "insert", "superscript", "subscript"] _STRIKE_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\~|[^\s~])~~(?!~)') @@ -11,51 +19,65 @@ SUBSCRIPT_PATTERN = r'~(?:' + PREVENT_BACKSLASH + r'\\~|\S|\\ )+?~' -def parse_strikethrough(inline, m, state): - return _parse_to_end(inline, m, state, 'strikethrough', _STRIKE_END) +def parse_strikethrough( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> Optional[int]: + return _parse_to_end(inline, m, state, "strikethrough", _STRIKE_END) -def render_strikethrough(renderer, text): - return '' + text + '' +def render_strikethrough(renderer: "BaseRenderer", text: str) -> str: + return "" + text + "" -def parse_mark(inline, m, state): - return _parse_to_end(inline, m, state, 'mark', _MARK_END) +def parse_mark( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> Optional[int]: + return _parse_to_end(inline, m, state, "mark", _MARK_END) -def render_mark(renderer, text): - return '' + text + '' +def render_mark(renderer: "BaseRenderer", text: str) -> str: + return "" + text + "" -def parse_insert(inline, m, state): - return _parse_to_end(inline, m, state, 'insert', _INSERT_END) +def parse_insert( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> Optional[int]: + return _parse_to_end(inline, m, state, "insert", _INSERT_END) -def render_insert(renderer, text): - return '' + text + '' +def render_insert(renderer: "BaseRenderer", text: str) -> str: + return "" + text + "" -def parse_superscript(inline, m, state): - return _parse_script(inline, m, state, 'superscript') +def parse_superscript( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> int: + return _parse_script(inline, m, state, "superscript") -def render_superscript(renderer, text): - return '' + text + '' +def render_superscript(renderer: "BaseRenderer", text: str) -> str: + return "" + text + "" -def parse_subscript(inline, m, state): - return _parse_script(inline, m, state, 'subscript') +def parse_subscript(inline: "InlineParser", m: Match[str], state: "InlineState") -> int: + return _parse_script(inline, m, state, "subscript") -def render_subscript(renderer, text): - return '' + text + '' +def render_subscript(renderer: "BaseRenderer", text: str) -> str: + return "" + text + "" -def _parse_to_end(inline, m, state, tok_type, end_pattern): +def _parse_to_end( + inline: "InlineParser", + m: Match[str], + state: "InlineState", + tok_type: str, + end_pattern: Pattern[str], +) -> Optional[int]: pos = m.end() m1 = end_pattern.search(state.src, pos) if not m1: - return + return None end_pos = m1.end() text = state.src[pos:end_pos-2] new_state = state.copy() @@ -65,7 +87,9 @@ def _parse_to_end(inline, m, state, tok_type, end_pattern): return end_pos -def _parse_script(inline, m, state, tok_type): +def _parse_script( + inline: "InlineParser", m: Match[str], state: "InlineState", tok_type: str +) -> int: text = m.group(0) new_state = state.copy() new_state.src = text[1:-1].replace('\\ ', ' ') @@ -77,7 +101,7 @@ def _parse_script(inline, m, state, tok_type): return m.end() -def strikethrough(md): +def strikethrough(md: "Markdown") -> None: """A mistune plugin to support strikethrough. Spec defined by GitHub flavored Markdown and commonly used by many parsers: @@ -103,7 +127,7 @@ def strikethrough(md): md.renderer.register('strikethrough', render_strikethrough) -def mark(md): +def mark(md: "Markdown") -> None: """A mistune plugin to add ```` tag. Spec defined at https://facelessuser.github.io/pymdown-extensions/extensions/mark/: @@ -123,7 +147,7 @@ def mark(md): md.renderer.register('mark', render_mark) -def insert(md): +def insert(md: "Markdown") -> None: """A mistune plugin to add ```` tag. Spec defined at https://facelessuser.github.io/pymdown-extensions/extensions/caret/#insert: @@ -143,7 +167,7 @@ def insert(md): md.renderer.register('insert', render_insert) -def superscript(md): +def superscript(md: "Markdown") -> None: """A mistune plugin to add ```` tag. Spec defined at https://pandoc.org/MANUAL.html#superscripts-and-subscripts: @@ -158,7 +182,7 @@ def superscript(md): md.renderer.register('superscript', render_superscript) -def subscript(md): +def subscript(md: "Markdown") -> None: """A mistune plugin to add ```` tag. Spec defined at https://pandoc.org/MANUAL.html#superscripts-and-subscripts: diff --git a/src/mistune/plugins/math.py b/src/mistune/plugins/math.py index 805105e..e83bd8e 100644 --- a/src/mistune/plugins/math.py +++ b/src/mistune/plugins/math.py @@ -1,30 +1,39 @@ __all__ = ['math', 'math_in_quote', 'math_in_list'] +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown BLOCK_MATH_PATTERN = r'^ {0,3}\$\$[ \t]*\n(?P[\s\S]+?)\n\$\$[ \t]*$' INLINE_MATH_PATTERN = r'\$(?!\s)(?P.+?)(?!\s)\$' -def parse_block_math(block, m, state): - text = m.group('math_text') - state.append_token({'type': 'block_math', 'raw': text}) +def parse_block_math(block: "BlockParser", m: Match[str], state: "BlockState") -> int: + text = m.group("math_text") + state.append_token({"type": "block_math", "raw": text}) return m.end() + 1 -def parse_inline_math(inline, m, state): - text = m.group('math_text') - state.append_token({'type': 'inline_math', 'raw': text}) +def parse_inline_math( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> int: + text = m.group("math_text") + state.append_token({"type": "inline_math", "raw": text}) return m.end() -def render_block_math(renderer, text): - return '
    $$\n' + text + '\n$$
    \n' +def render_block_math(renderer: "BaseRenderer", text: str) -> str: + return '
    $$\n' + text + "\n$$
    \n" -def render_inline_math(renderer, text): - return r'\(' + text + r'\)' +def render_inline_math(renderer: "BaseRenderer", text: str) -> str: + return r'\(' + text + r"\)" -def math(md): +def math(md: "Markdown") -> None: """A mistune plugin to support math. The syntax is used by many markdown extensions: @@ -47,11 +56,11 @@ def math(md): md.renderer.register('inline_math', render_inline_math) -def math_in_quote(md): +def math_in_quote(md: "Markdown") -> None: """Enable block math plugin in block quote.""" md.block.insert_rule(md.block.block_quote_rules, 'block_math', before='list') -def math_in_list(md): +def math_in_list(md: "Markdown") -> None: """Enable block math plugin in list.""" md.block.insert_rule(md.block.list_rules, 'block_math', before='list') diff --git a/src/mistune/plugins/ruby.py b/src/mistune/plugins/ruby.py index eabc037..5081346 100644 --- a/src/mistune/plugins/ruby.py +++ b/src/mistune/plugins/ruby.py @@ -1,13 +1,21 @@ import re -from ..util import unikey +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + from ..helpers import parse_link, parse_link_label +from ..util import unikey + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown RUBY_PATTERN = r'\[(?:\w+\(\w+\))+\]' _ruby_re = re.compile(RUBY_PATTERN) -def parse_ruby(inline, m, state): +def parse_ruby(inline: "InlineParser", m: Match[str], state: "InlineState") -> int: text = m.group(0)[1:-2] items = text.split(')') tokens = [] @@ -38,7 +46,9 @@ def parse_ruby(inline, m, state): return end_pos -def _parse_ruby_link(inline, state, pos, tokens): +def _parse_ruby_link( + inline: "InlineParser", state: "InlineState", pos: int, tokens: List[Dict[str, Any]] +) -> Optional[int]: c = state.src[pos] if c == '(': # standard link [text]( "title") @@ -73,13 +83,14 @@ def _parse_ruby_link(inline, state, pos, tokens): 'raw': '[' + label + ']', }) return link_pos + return None -def render_ruby(renderer, text, rt): - return '' + text + '' + rt + '' +def render_ruby(renderer: "BaseRenderer", text: str, rt: str) -> str: + return "" + text + "" + rt + "" -def ruby(md): +def ruby(md: "Markdown") -> None: """A mistune plugin to support ```` tag. The syntax is defined at https://lepture.com/en/2022/markdown-ruby-markup: diff --git a/src/mistune/plugins/speedup.py b/src/mistune/plugins/speedup.py index 784022c..42f9444 100644 --- a/src/mistune/plugins/speedup.py +++ b/src/mistune/plugins/speedup.py @@ -1,5 +1,12 @@ import re import string +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown # because mismatch is too slow, add parsers for paragraph and text @@ -12,21 +19,20 @@ __all__ = ['speedup'] - -def parse_text(inline, m, state): +def parse_text(inline: "InlineParser", m: Match[str], state: "InlineState") -> int: text = m.group(0) text = HARD_LINEBREAK_RE.sub('\n', text) inline.process_text(text, state) return m.end() -def parse_paragraph(block, m, state): +def parse_paragraph(block: "BlockParser", m: Match[str], state: "BlockState") -> int: text = m.group(0) state.add_paragraph(text) return m.end() -def speedup(md): +def speedup(md: "Markdown") -> None: """Increase the speed of parsing paragraph and inline text.""" md.block.register('paragraph', PARAGRAPH, parse_paragraph) diff --git a/src/mistune/plugins/spoiler.py b/src/mistune/plugins/spoiler.py index 2931d2b..30371bc 100644 --- a/src/mistune/plugins/spoiler.py +++ b/src/mistune/plugins/spoiler.py @@ -1,4 +1,11 @@ import re +from typing import TYPE_CHECKING, Match, Optional + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown __all__ = ['spoiler'] @@ -8,7 +15,9 @@ INLINE_SPOILER_PATTERN = r'>!\s*(?P.+?)\s*!<' -def parse_block_spoiler(block, m, state): +def parse_block_spoiler( + block: "BlockParser", m: Match[str], state: "BlockState" +) -> int: text, end_pos = block.extract_block_quote(m, state) if not text.endswith('\n'): # ensure it endswith \n to make sure @@ -39,8 +48,10 @@ def parse_block_spoiler(block, m, state): return state.cursor -def parse_inline_spoiler(inline, m, state): - text = m.group('spoiler_text') +def parse_inline_spoiler( + inline: "InlineParser", m: Match[str], state: "InlineState" +) -> int: + text = m.group("spoiler_text") new_state = state.copy() new_state.src = text children = inline.render(new_state) @@ -48,15 +59,15 @@ def parse_inline_spoiler(inline, m, state): return m.end() -def render_block_spoiler(renderer, text): - return '
    \n' + text + '
    \n' +def render_block_spoiler(renderer: "BaseRenderer", text: str) -> str: + return '
    \n' + text + "
    \n" -def render_inline_spoiler(renderer, text): - return '' + text + '' +def render_inline_spoiler(renderer: "BaseRenderer", text: str) -> str: + return '' + text + "" -def spoiler(md): +def spoiler(md: "Markdown") -> None: """A mistune plugin to support block and inline spoiler. The syntax is inspired by stackexchange: diff --git a/src/mistune/plugins/table.py b/src/mistune/plugins/table.py index d3bc4c2..d47f0ac 100644 --- a/src/mistune/plugins/table.py +++ b/src/mistune/plugins/table.py @@ -1,6 +1,24 @@ import re +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Match, + Optional, + Tuple, + Union, +) + from ..helpers import PREVENT_BACKSLASH +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown + # https://michelf.ca/projects/php-markdown/extra/#table __all__ = ['table', 'table_in_quote', 'table_in_list'] @@ -24,23 +42,26 @@ ALIGN_RIGHT = re.compile(r'^ *-+: *$') -def parse_table(block, m, state): +def parse_table( + block: "BlockParser", m: Match[str], state: "BlockState" +) -> Optional[int]: pos = m.end() header = m.group('table_head') align = m.group('table_align') thead, aligns = _process_thead(header, align) if not thead: - return + return None + assert aligns is not None rows = [] body = m.group('table_body') for text in body.splitlines(): - m = TABLE_CELL.match(text) - if not m: # pragma: no cover - return - row = _process_row(m.group(1), aligns) + m2 = TABLE_CELL.match(text) + if not m2: # pragma: no cover + return None + row = _process_row(m2.group(1), aligns) if not row: - return + return None rows.append(row) children = [thead, {'type': 'table_body', 'children': rows}] @@ -48,19 +69,22 @@ def parse_table(block, m, state): return pos -def parse_nptable(block, m, state): - header = m.group('nptable_head') - align = m.group('nptable_align') +def parse_nptable( + block: "BlockParser", m: Match[str], state: "BlockState" +) -> Optional[int]: + header = m.group("nptable_head") + align = m.group("nptable_align") thead, aligns = _process_thead(header, align) if not thead: - return + return None + assert aligns is not None rows = [] body = m.group('nptable_body') for text in body.splitlines(): row = _process_row(text, aligns) if not row: - return + return None rows.append(row) children = [thead, {'type': 'table_body', 'children': rows}] @@ -68,7 +92,9 @@ def parse_nptable(block, m, state): return m.end() -def _process_thead(header, align): +def _process_thead( + header: str, align: str +) -> Union[Tuple[None, None], Tuple[Dict[str, Any], List[str]]]: headers = CELL_SPLIT.split(header) aligns = CELL_SPLIT.split(align) if len(headers) != len(aligns): @@ -96,7 +122,7 @@ def _process_thead(header, align): return thead, aligns -def _process_row(text, aligns): +def _process_row(text: str, aligns: List[str]) -> Optional[Dict[str, Any]]: cells = CELL_SPLIT.split(text) if len(cells) != len(aligns): return None @@ -112,23 +138,25 @@ def _process_row(text, aligns): return {'type': 'table_row', 'children': children} -def render_table(renderer, text): - return '\n' + text + '
    \n' +def render_table(renderer: "BaseRenderer", text: str) -> str: + return "\n" + text + "
    \n" -def render_table_head(renderer, text): - return '\n\n' + text + '\n\n' +def render_table_head(renderer: "BaseRenderer", text: str) -> str: + return "\n\n" + text + "\n\n" -def render_table_body(renderer, text): - return '\n' + text + '\n' +def render_table_body(renderer: "BaseRenderer", text: str) -> str: + return "\n" + text + "\n" -def render_table_row(renderer, text): - return '\n' + text + '\n' +def render_table_row(renderer: "BaseRenderer", text: str) -> str: + return "\n" + text + "\n" -def render_table_cell(renderer, text, align=None, head=False): +def render_table_cell( + renderer: "BaseRenderer", text: str, align: Optional[str] = None, head: bool = False +) -> str: if head: tag = 'th' else: @@ -141,7 +169,7 @@ def render_table_cell(renderer, text, align=None, head=False): return html + '>' + text + '\n' -def table(md): +def table(md: "Markdown") -> None: """A mistune plugin to support table, spec defined at https://michelf.ca/projects/php-markdown/extra/#table @@ -167,13 +195,13 @@ def table(md): md.renderer.register('table_cell', render_table_cell) -def table_in_quote(md): +def table_in_quote(md: "Markdown") -> None: """Enable table plugin in block quotes.""" md.block.insert_rule(md.block.block_quote_rules, 'table', before='paragraph') md.block.insert_rule(md.block.block_quote_rules, 'nptable', before='paragraph') -def table_in_list(md): +def table_in_list(md: "Markdown") -> None: """Enable table plugin in list.""" md.block.insert_rule(md.block.list_rules, 'table', before='paragraph') md.block.insert_rule(md.block.list_rules, 'nptable', before='paragraph') diff --git a/src/mistune/plugins/task_lists.py b/src/mistune/plugins/task_lists.py index 8571c32..eb7a258 100644 --- a/src/mistune/plugins/task_lists.py +++ b/src/mistune/plugins/task_lists.py @@ -1,4 +1,11 @@ import re +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown __all__ = ['task_lists'] @@ -6,15 +13,14 @@ TASK_LIST_ITEM = re.compile(r'^(\[[ xX]\])\s+') -def task_lists_hook(md, state): +def task_lists_hook(md: "Markdown", state: "BlockState") -> Iterable[Dict[str, Any]]: return _rewrite_all_list_items(state.tokens) -def render_task_list_item(renderer, text, checked=False): - checkbox = ( - ' str: + checkbox = '' + text + '\n' -def task_lists(md): +def task_lists(md: "Markdown") -> None: """A mistune plugin to support task lists. Spec defined by GitHub flavored Markdown and commonly used by many parsers: @@ -44,7 +50,9 @@ def task_lists(md): md.renderer.register('task_list_item', render_task_list_item) -def _rewrite_all_list_items(tokens): +def _rewrite_all_list_items( + tokens: Iterable[Dict[str, Any]] +) -> Iterable[Dict[str, Any]]: for tok in tokens: if tok['type'] == 'list_item': _rewrite_list_item(tok) @@ -53,7 +61,7 @@ def _rewrite_all_list_items(tokens): return tokens -def _rewrite_list_item(tok): +def _rewrite_list_item(tok: Dict[str, Any]) -> None: children = tok['children'] if children: first_child = children[0] diff --git a/src/mistune/plugins/url.py b/src/mistune/plugins/url.py index d6f2251..7f68d91 100644 --- a/src/mistune/plugins/url.py +++ b/src/mistune/plugins/url.py @@ -1,11 +1,18 @@ +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Match, Optional + from ..util import escape_url +if TYPE_CHECKING: + from ..block_parser import BlockParser + from ..core import BaseRenderer, BlockState, InlineState, Parser + from ..inline_parser import InlineParser + from ..markdown import Markdown __all__ = ['url'] URL_LINK_PATTERN = r'''https?:\/\/[^\s<]+[^<.,:;"')\]\s]''' -def parse_url_link(inline, m, state): +def parse_url_link(inline: "InlineParser", m: Match[str], state: "InlineState") -> int: text = m.group(0) pos = m.end() if state.in_link: @@ -19,5 +26,5 @@ def parse_url_link(inline, m, state): return pos -def url(md): - md.inline.register('url_link', URL_LINK_PATTERN, parse_url_link) +def url(md: "Markdown") -> None: + md.inline.register("url_link", URL_LINK_PATTERN, parse_url_link) diff --git a/src/mistune/renderers/_list.py b/src/mistune/renderers/_list.py index 90b3ef5..a9f0313 100644 --- a/src/mistune/renderers/_list.py +++ b/src/mistune/renderers/_list.py @@ -1,9 +1,16 @@ +from typing import TYPE_CHECKING, Any, Dict, Iterable, cast + from ..util import strip_end +if TYPE_CHECKING: + from ..core import BaseRenderer, BlockState + -def render_list(renderer, token, state) -> str: - attrs = token['attrs'] - if attrs['ordered']: +def render_list( + renderer: "BaseRenderer", token: Dict[str, Any], state: "BlockState" +) -> str: + attrs = token["attrs"] + if attrs["ordered"]: children = _render_ordered_list(renderer, token, state) else: children = _render_unordered_list(renderer, token, state) @@ -17,13 +24,18 @@ def render_list(renderer, token, state) -> str: return strip_end(text) + '\n' -def _render_list_item(renderer, parent, item, state): - leading = parent['leading'] - text = '' - for tok in item['children']: - if tok['type'] == 'list': - tok['parent'] = parent - elif tok['type'] == 'blank_line': +def _render_list_item( + renderer: "BaseRenderer", + parent: Dict[str, Any], + item: Dict[str, Any], + state: "BlockState", +) -> str: + leading = cast(str, parent["leading"]) + text = "" + for tok in item["children"]: + if tok["type"] == "list": + tok["parent"] = parent + elif tok["type"] == "blank_line": continue text += renderer.render_token(tok, state) @@ -38,11 +50,13 @@ def _render_list_item(renderer, parent, item, state): return leading + text -def _render_ordered_list(renderer, token, state): - attrs = token['attrs'] - start = attrs.get('start', 1) - for item in token['children']: - leading = str(start) + token['bullet'] + ' ' +def _render_ordered_list( + renderer: "BaseRenderer", token: Dict[str, Any], state: "BlockState" +) -> Iterable[str]: + attrs = token["attrs"] + start = attrs.get("start", 1) + for item in token["children"]: + leading = str(start) + token["bullet"] + " " parent = { 'leading': leading, 'tight': token['tight'], @@ -51,7 +65,9 @@ def _render_ordered_list(renderer, token, state): start += 1 -def _render_unordered_list(renderer, token, state): +def _render_unordered_list( + renderer: "BaseRenderer", token: Dict[str, Any], state: "BlockState" +) -> Iterable[str]: parent = { 'leading': token['bullet'] + ' ', 'tight': token['tight'], diff --git a/src/mistune/renderers/html.py b/src/mistune/renderers/html.py index 26f2d35..f8e86d7 100644 --- a/src/mistune/renderers/html.py +++ b/src/mistune/renderers/html.py @@ -1,29 +1,38 @@ -from ..core import BaseRenderer -from ..util import escape as escape_text, striptags, safe_entity +from typing import Any, ClassVar, Dict, Optional, Tuple + +from typing_extensions import Literal + +from ..core import BaseRenderer, BlockState +from ..util import escape as escape_text +from ..util import safe_entity, striptags class HTMLRenderer(BaseRenderer): """A renderer for converting Markdown to HTML.""" - NAME = 'html' - HARMFUL_PROTOCOLS = ( + + _escape: bool + NAME: ClassVar[Literal["html"]] = "html" + HARMFUL_PROTOCOLS: ClassVar[Tuple[str, ...]] = ( 'javascript:', 'vbscript:', 'file:', 'data:', ) - GOOD_DATA_PROTOCOLS = ( + GOOD_DATA_PROTOCOLS: ClassVar[Tuple[str, ...]] = ( 'data:image/gif;', 'data:image/png;', 'data:image/jpeg;', 'data:image/webp;', ) - def __init__(self, escape=True, allow_harmful_protocols=None): + def __init__( + self, escape: bool = True, allow_harmful_protocols: Optional[bool] = None + ) -> None: super(HTMLRenderer, self).__init__() self._allow_harmful_protocols = allow_harmful_protocols self._escape = escape - def render_token(self, token, state): + def render_token(self, token: Dict[str, Any], state: BlockState) -> str: # backward compitable with v2 func = self._get_method(token['type']) attrs = token.get('attrs') @@ -70,13 +79,13 @@ def emphasis(self, text: str) -> str: def strong(self, text: str) -> str: return '' + text + '' - def link(self, text: str, url: str, title=None) -> str: + def link(self, text: str, url: str, title: Optional[str] = None) -> str: s = '' + text + '' - def image(self, text: str, url: str, title=None) -> str: + def image(self, text: str, url: str, title: Optional[str] = None) -> str: src = self.safe_url(url) alt = escape_text(striptags(text)) s = '' + alt + ' str: def paragraph(self, text: str) -> str: return '

    ' + text + '

    \n' - def heading(self, text: str, level: int, **attrs) -> str: + def heading(self, text: str, level: int, **attrs: Any) -> str: tag = 'h' + str(level) html = '<' + tag _id = attrs.get('id') @@ -118,7 +127,7 @@ def thematic_break(self) -> str: def block_text(self, text: str) -> str: return text - def block_code(self, code: str, info=None) -> str: + def block_code(self, code: str, info: Optional[str] = None) -> str: html = '
     str:
         def block_error(self, text: str) -> str:
             return '
    ' + text + '
    \n' - def list(self, text: str, ordered: bool, **attrs) -> str: + def list(self, text: str, ordered: bool, **attrs: Any) -> str: if ordered: html = ' str: out = self.render_tokens(tokens, state) # special handle for line breaks out += '\n\n'.join(self.render_referrences(state)) + '\n' return strip_end(out) - def render_referrences(self, state: BlockState): + def render_referrences(self, state: BlockState) -> Iterable[str]: ref_links = state.env['ref_links'] for key in ref_links: attrs = ref_links[key] @@ -28,12 +29,12 @@ def render_referrences(self, state: BlockState): text += ' "' + title + '"' yield text - def render_children(self, token, state: BlockState): + def render_children(self, token: Dict[str, Any], state: BlockState) -> str: children = token['children'] return self.render_tokens(children, state) def text(self, token: Dict[str, Any], state: BlockState) -> str: - return token['raw'] + return cast(str, token["raw"]) def emphasis(self, token: Dict[str, Any], state: BlockState) -> str: return '*' + self.render_children(token, state) + '*' @@ -42,7 +43,7 @@ def strong(self, token: Dict[str, Any], state: BlockState) -> str: return '**' + self.render_children(token, state) + '**' def link(self, token: Dict[str, Any], state: BlockState) -> str: - label = token.get('label') + label = cast(str, token.get("label")) text = self.render_children(token, state) out = '[' + text + ']' if label: @@ -69,7 +70,7 @@ def image(self, token: Dict[str, Any], state: BlockState) -> str: return '!' + self.link(token, state) def codespan(self, token: Dict[str, Any], state: BlockState) -> str: - return '`' + token['raw'] + '`' + return "`" + cast(str, token["raw"]) + "`" def linebreak(self, token: Dict[str, Any], state: BlockState) -> str: return ' \n' @@ -81,15 +82,15 @@ def blank_line(self, token: Dict[str, Any], state: BlockState) -> str: return '' def inline_html(self, token: Dict[str, Any], state: BlockState) -> str: - return token['raw'] + return cast(str, token["raw"]) def paragraph(self, token: Dict[str, Any], state: BlockState) -> str: text = self.render_children(token, state) return text + '\n\n' def heading(self, token: Dict[str, Any], state: BlockState) -> str: - level = token['attrs']['level'] - marker = '#' * level + level = cast(int, token["attrs"]["level"]) + marker = "#" * level text = self.render_children(token, state) return marker + ' ' + text + '\n\n' @@ -100,23 +101,24 @@ def block_text(self, token: Dict[str, Any], state: BlockState) -> str: return self.render_children(token, state) + '\n' def block_code(self, token: Dict[str, Any], state: BlockState) -> str: - attrs = token.get('attrs', {}) - info = attrs.get('info', '') - code = token['raw'] - if code and code[-1] != '\n': - code += '\n' + attrs = token.get("attrs", {}) + info = cast(str, attrs.get("info", "")) + code = cast(str, token["raw"]) + if code and code[-1] != "\n": + code += "\n" marker = token.get('marker') if not marker: marker = _get_fenced_marker(code) - return marker + info + '\n' + code + marker + '\n\n' + marker2 = cast(str, marker) + return marker2 + info + "\n" + code + marker2 + "\n\n" def block_quote(self, token: Dict[str, Any], state: BlockState) -> str: text = indent(self.render_children(token, state), '> ') return text + '\n\n' def block_html(self, token: Dict[str, Any], state: BlockState) -> str: - return token['raw'] + '\n\n' + return cast(str, token["raw"]) + "\n\n" def block_error(self, token: Dict[str, Any], state: BlockState) -> str: return '' @@ -125,7 +127,7 @@ def list(self, token: Dict[str, Any], state: BlockState) -> str: return render_list(self, token, state) -def _get_fenced_marker(code): +def _get_fenced_marker(code: str) -> str: found = fenced_re.findall(code) if not found: return '```' diff --git a/src/mistune/renderers/rst.py b/src/mistune/renderers/rst.py index fa12c21..7022d15 100644 --- a/src/mistune/renderers/rst.py +++ b/src/mistune/renderers/rst.py @@ -1,8 +1,9 @@ -from typing import Dict, Any from textwrap import indent -from ._list import render_list +from typing import Any, Dict, Iterable, List, cast + from ..core import BaseRenderer, BlockState from ..util import strip_end +from ._list import render_list class RSTRenderer(BaseRenderer): @@ -20,7 +21,9 @@ class RSTRenderer(BaseRenderer): } INLINE_IMAGE_PREFIX = 'img-' - def iter_tokens(self, tokens, state): + def iter_tokens( + self, tokens: Iterable[Dict[str, Any]], state: BlockState + ) -> Iterable[str]: prev = None for tok in tokens: # ignore blank line @@ -30,14 +33,14 @@ def iter_tokens(self, tokens, state): prev = tok yield self.render_token(tok, state) - def __call__(self, tokens, state: BlockState): + def __call__(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> str: state.env['inline_images'] = [] out = self.render_tokens(tokens, state) # special handle for line breaks out += '\n\n'.join(self.render_referrences(state)) + '\n' return strip_end(out) - def render_referrences(self, state: BlockState): + def render_referrences(self, state: BlockState) -> Iterable[str]: images = state.env['inline_images'] for index, token in enumerate(images): attrs = token['attrs'] @@ -45,13 +48,13 @@ def render_referrences(self, state: BlockState): ident = self.INLINE_IMAGE_PREFIX + str(index) yield '.. |' + ident + '| image:: ' + attrs['url'] + '\n :alt: ' + alt - def render_children(self, token, state: BlockState): + def render_children(self, token: Dict[str, Any], state: BlockState) -> str: children = token['children'] return self.render_tokens(children, state) def text(self, token: Dict[str, Any], state: BlockState) -> str: - text = token['raw'] - return text.replace('|', r'\|') + text = cast(str, token["raw"]) + return text.replace("|", r"\|") def emphasis(self, token: Dict[str, Any], state: BlockState) -> str: return '*' + self.render_children(token, state) + '*' @@ -62,16 +65,16 @@ def strong(self, token: Dict[str, Any], state: BlockState) -> str: def link(self, token: Dict[str, Any], state: BlockState) -> str: attrs = token['attrs'] text = self.render_children(token, state) - return '`' + text + ' <' + attrs['url'] + '>`__' + return "`" + text + " <" + cast(str, attrs["url"]) + ">`__" def image(self, token: Dict[str, Any], state: BlockState) -> str: - refs: list = state.env['inline_images'] + refs: List[Dict[str, Any]] = state.env["inline_images"] index = len(refs) refs.append(token) return '|' + self.INLINE_IMAGE_PREFIX + str(index) + '|' def codespan(self, token: Dict[str, Any], state: BlockState) -> str: - return '``' + token['raw'] + '``' + return "``" + cast(str, token["raw"]) + "``" def linebreak(self, token: Dict[str, Any], state: BlockState) -> str: return '' @@ -87,10 +90,10 @@ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str: children = token['children'] if len(children) == 1 and children[0]['type'] == 'image': image = children[0] - attrs = image['attrs'] - title = attrs.get('title') + attrs = image["attrs"] + title = cast(str, attrs.get("title")) alt = self.render_children(image, state) - text = '.. figure:: ' + attrs['url'] + text = ".. figure:: " + cast(str, attrs["url"]) if title: text += '\n :alt: ' + title text += '\n\n' + indent(alt, ' ') @@ -114,9 +117,9 @@ def block_text(self, token: Dict[str, Any], state: BlockState) -> str: return self.render_children(token, state) + '\n' def block_code(self, token: Dict[str, Any], state: BlockState) -> str: - attrs = token.get('attrs', {}) - info = attrs.get('info') - code = indent(token['raw'], ' ') + attrs = token.get("attrs", {}) + info = cast(str, attrs.get("info")) + code = indent(cast(str, token["raw"]), " ") if info: lang = info.split()[0] return '.. code:: ' + lang + '\n\n' + code + '\n' diff --git a/src/mistune/toc.py b/src/mistune/toc.py index 0c5787f..be4b8b3 100644 --- a/src/mistune/toc.py +++ b/src/mistune/toc.py @@ -1,7 +1,18 @@ +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Optional, Tuple + +from .core import BlockState from .util import striptags +if TYPE_CHECKING: + from .markdown import Markdown + -def add_toc_hook(md, min_level=1, max_level=3, heading_id=None): +def add_toc_hook( + md: "Markdown", + min_level: int = 1, + max_level: int = 3, + heading_id: Optional[Callable[[Dict[str, Any], int], str]] = None, +) -> None: """Add a hook to save toc items into ``state.env``. This is usually helpful for doc generator:: @@ -21,10 +32,11 @@ def add_toc_hook(md, min_level=1, max_level=3, heading_id=None): :param heading_id: a function to generate heading_id """ if heading_id is None: - def heading_id(token, index): + + def heading_id(token: Dict[str, Any], index: int) -> str: return 'toc_' + str(index + 1) - def toc_hook(md, state): + def toc_hook(md: "Markdown", state: "BlockState") -> None: headings = [] for tok in state.tokens: @@ -44,16 +56,17 @@ def toc_hook(md, state): md.before_render_hooks.append(toc_hook) -def normalize_toc_item(md, token): - text = token['text'] +def normalize_toc_item(md: "Markdown", token: Dict[str, Any]) -> Tuple[int, str, str]: + text = token["text"] tokens = md.inline(text, {}) - html = md.renderer(tokens, {}) + assert md.renderer is not None + html = md.renderer(tokens, BlockState()) text = striptags(html) attrs = token['attrs'] return attrs['level'], attrs['id'], text -def render_toc_ul(toc): +def render_toc_ul(toc: Iterable[Tuple[int, str, str]]) -> str: """Render a
      table of content HTML. The param "toc" should be formatted into this structure:: @@ -74,7 +87,7 @@ def render_toc_ul(toc): return '' s = '
        \n' - levels = [] + levels: List[int] = [] for level, k, text in toc: item = '{}'.format(k, text) if not levels: diff --git a/src/mistune/util.py b/src/mistune/util.py index 5e1b9ed..80275ff 100644 --- a/src/mistune/util.py +++ b/src/mistune/util.py @@ -1,24 +1,24 @@ import re +from html import _replace_charref # type: ignore[attr-defined] +from typing import Match from urllib.parse import quote -from html import _replace_charref - _expand_tab_re = re.compile(r'^( {0,3})\t', flags=re.M) -def expand_leading_tab(text: str, width=4): - def repl(m): +def expand_leading_tab(text: str, width: int = 4) -> str: + def repl(m: Match[str]) -> str: s = m.group(1) return s + ' ' * (width - len(s)) return _expand_tab_re.sub(repl, text) -def expand_tab(text: str, space: str=' '): - repl = r'\1' + space +def expand_tab(text: str, space: str = " ") -> str: + repl = r"\1" + space return _expand_tab_re.sub(repl, text) -def escape(s: str, quote: bool=True): +def escape(s: str, quote: bool = True) -> str: """Escape characters of ``&<>``. If quote=True, ``"`` will be converted to ``"e;``.""" s = s.replace("&", "&") @@ -29,7 +29,7 @@ def escape(s: str, quote: bool=True): return s -def escape_url(link: str): +def escape_url(link: str) -> str: """Escape URL for safety.""" safe = ( ':/?#@' # gen-delims - '[]' (rfc3986) @@ -39,12 +39,12 @@ def escape_url(link: str): return escape(quote(unescape(link), safe=safe)) -def safe_entity(s: str): +def safe_entity(s: str) -> str: """Escape characters for safety.""" return escape(unescape(s)) -def unikey(s: str): +def unikey(s: str) -> str: """Generate a unique key for links and footnotes.""" key = ' '.join(s.split()).strip() return key.lower().upper() @@ -57,7 +57,7 @@ def unikey(s: str): ) -def unescape(s: str): +def unescape(s: str) -> str: """ Copy from `html.unescape`, but `_charref` is different. CommonMark does not accept entity references without a trailing semicolon @@ -70,12 +70,12 @@ def unescape(s: str): _striptags_re = re.compile(r'(|<[^>]*>)') -def striptags(s: str): +def striptags(s: str) -> str: return _striptags_re.sub('', s) _strip_end_re = re.compile(r'\n\s+$') -def strip_end(src: str): +def strip_end(src: str) -> str: return _strip_end_re.sub('\n', src) diff --git a/tests/__init__.py b/tests/__init__.py index 3e4e980..b715104 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,13 +1,15 @@ import re -from tests import fixtures +from abc import abstractmethod from unittest import TestCase +from tests import fixtures + class BaseTestCase(TestCase): @classmethod - def load_fixtures(cls, case_file): - def attach_case(n, text, html): - def method(self): + def load_fixtures(cls, case_file: str) -> None: + def attach_case(n: str, text: str, html: str) -> None: + def method(self: 'BaseTestCase') -> None: self.assert_case(n, text, html) name = 'test_{}'.format(n) @@ -21,15 +23,18 @@ def method(self): attach_case(n, text, html) @classmethod - def ignore_case(cls, name): + def ignore_case(cls, name: str) -> bool: return False + + @abstractmethod + def parse(self, text: str) -> str: ... - def assert_case(self, name, text, html): + def assert_case(self, name: str, text: str, html: str) -> None: result = self.parse(text) self.assertEqual(result, html) -def normalize_html(html): +def normalize_html(html: str) -> str: html = re.sub(r'>\n+', '>', html) html = re.sub(r'\n+<', '<', html) return html.strip() diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index c1ad3c1..9a1abe2 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,6 +1,7 @@ import os import re import json +from typing import Any, Iterable, Tuple ROOT = os.path.join(os.path.dirname(__file__)) @@ -12,17 +13,17 @@ ) -def load_ast(filename): +def load_ast(filename: str) -> Any: with open(os.path.join(ROOT, 'ast', filename)) as f: return json.load(f) -def load_json(filename): +def load_json(filename: str) -> Any: with open(os.path.join(ROOT, filename)) as f: return json.load(f) -def load_examples(filename): +def load_examples(filename: str) -> Iterable[Tuple[str, str, str]]: if filename.endswith('.json'): data = load_json(filename) for item in data: @@ -37,7 +38,7 @@ def load_examples(filename): -def parse_examples(text): +def parse_examples(text: str) -> Iterable[Tuple[str, str, str]]: data = EXAMPLE_PATTERN.findall(text) section = None diff --git a/tests/test_directives.py b/tests/test_directives.py index cd3bd01..388963a 100644 --- a/tests/test_directives.py +++ b/tests/test_directives.py @@ -77,7 +77,7 @@ def test_colon_fenced_toc(self): class TestDirectiveInclude(BaseTestCase): - md = create_markdown(escape=False, plugins=[RSTDirective([Include()])]) + md = create_markdown(escape=False, plugins=[RSTDirective([Include()])]) # type: ignore[list-item] def test_html_include(self): html = self.md.read(os.path.join(ROOT, 'include/text.md'))[0]