diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b29be3f..1132241 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.6.1 +current_version = 3.7.0 [bumpversion:file:mkdocs_include_markdown_plugin/__init__.py] diff --git a/.gitignore b/.gitignore index b139e0b..4829dcf 100644 --- a/.gitignore +++ b/.gitignore @@ -120,6 +120,9 @@ venv.bak/ # Rope project settings .ropeproject +# VS Code +.vscode + # mkdocs documentation /site diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 98d80d0..5fab760 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,11 +22,13 @@ repos: args: - --py36-plus - repo: https://github.com/asottile/setup-cfg-fmt - rev: v1.20.2 + rev: v2.0.0 hooks: - id: setup-cfg-fmt + args: + - --include-version-classifiers - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 5.0.4 hooks: - id: flake8 additional_dependencies: @@ -71,7 +73,7 @@ repos: - -c - .yamllint - repo: https://github.com/DavidAnson/markdownlint-cli2 - rev: v0.5.0 + rev: v0.5.1 hooks: - id: markdownlint-cli2 name: markdownlint-readme diff --git a/README.md b/README.md index 845651d..7ebbe90 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,15 @@ content to include. `true` and `false`. - # **dedent** (*false*): If enabled, the included content will be dedented. +- # + **exclude**: Specify with a glob which files should be ignored. Only useful + when passing globs to include multiple files. +- # + **trailing-newlines** (*true*): When this option is disabled, the trailing newlines + found in the content to include are stripped. Possible values are `true` and `false`. +- # + **encoding** (*utf-8*): Specify the encoding of the included file. + If not defined `utf-8` will be used. - # **rewrite-relative-urls** (*true*): When this option is enabled (default), Markdown links and images in the content that are specified by a relative URL @@ -97,12 +106,6 @@ content to include. **heading-offset** (0): Increases or decreases the Markdown headings depth by this number. Only supports number sign (`#`) heading syntax. Accepts negative values to drop leading `#` characters. -- # - **exclude**: Specify with a glob which files should be ignored. Only useful - when passing globs to include multiple files. -- # - **trailing-newlines** (*true*): When this option is disabled, the trailing newlines - found in the content to include are stripped. Possible values are `true` and `false`. ##### Examples @@ -166,6 +169,9 @@ Includes the content of a file or a group of files. - # **trailing-newlines** (*true*): When this option is disabled, the trailing newlines found in the content to include are stripped. Possible values are `true` and `false`. +- # + **encoding** (*utf-8*): Specify the encoding of the included file. + If not defined `utf-8` will be used. ##### Examples diff --git a/locale/es/README.md b/locale/es/README.md index 65cb184..835803e 100644 --- a/locale/es/README.md +++ b/locale/es/README.md @@ -78,6 +78,17 @@ indentar la plantilla `{% %}` incluidora. Los valores posibles son `true` y `false`. - # **dedent** (*false*): Si se habilita, el contenido incluido será dedentado. +- # **exclude**: +Expecifica mediante un glob los archivos que deben ser ignorados. Sólo es útil +pasando globs para incluir múltiples archivos. +- # **trailing-newlines** (*true*): +Cuando esta opción está deshabilitada, los saltos de línea finales que se +encuentran en el contenido a incluir se eliminan. Los valores posibles son +`true` y `false`. +- # **encoding** +(*utf-8*): Especifica la codificación del archivo incluído. Si no se define, se +usará `utf-8`. - # **rewrite-relative-urls** (*true*): Cuando esta opción está habilitada (por defecto), los enlaces e imágenes @@ -94,14 +105,6 @@ href="#include-markdown_heading-offset"># **heading-offset** (0): Incrementa o disminuye la profundidad de encabezados Markdown por el número especificado. Sólo soporta la sintaxis de encabezado de caracteres de hash (`#`). Acepta valores negativos para eliminar caracteres `#` a la izquierda. -- # **exclude**: -Expecifica mediante un glob los archivos que deben ser ignorados. Sólo es útil -pasando globs para incluir múltiples archivos. -- # **trailing-newlines** (*true*): -Cuando esta opción está deshabilitada, los saltos de línea finales que se -encuentran en el contenido a incluir se eliminan. Los valores posibles son -`true` y `false`. ##### Ejemplos @@ -165,6 +168,9 @@ pasando globs para incluir múltiples archivos. (*true*): Cuando esta opción está deshabilitada, los saltos de línea finales que se encuentran en el contenido a incluir se eliminan. Los valores posibles son `true` y `false`. +- # **encoding** (*utf-8*): +Especifica la codificación del archivo incluído. Si no se define, se usará +`utf-8`. ##### Ejemplos diff --git a/locale/es/README.md.po b/locale/es/README.md.po index 4ba8861..bbadaf5 100644 --- a/locale/es/README.md.po +++ b/locale/es/README.md.po @@ -315,3 +315,21 @@ msgstr "" "Las etiquetas de apertura y cierre por defecto son `{%` y `%}`. Se puede " "cambiar este valor por defecto con los campos de configuración `opening_tag`" " y `closing_tag`." + +msgid "" +"# **encoding** (*utf-8*): Specify the encoding of " +"the included file. If not defined `utf-8` will be used." +msgstr "" +"# **encoding** (*utf-8*): Especifica la codificación" +" del archivo incluído. Si no se define, se usará `utf-8`." + +msgid "" +"# **encoding** " +"(*utf-8*): Specify the encoding of the included file. If not defined `utf-8`" +" will be used." +msgstr "" +"# **encoding** " +"(*utf-8*): Especifica la codificación del archivo incluído. Si no se define," +" se usará `utf-8`." diff --git a/locale/fr/README.md b/locale/fr/README.md index c91128f..05d5cdf 100644 --- a/locale/fr/README.md +++ b/locale/fr/README.md @@ -77,6 +77,17 @@ href="#include-markdown_preserve-includer-indent"># **preserve-includer-inde l'incluseur modèle `{% %}`. Les valeurs possibles sont `true` et `false`. - # **dedent** (*false*): Lorsque est activée, le contenu inclus sera déchiqueté. +- # **exclude**: +Spécifiez avec un glob quels fichiers doivent être ignorés. Uniquement utile +lors du passage de globs pour inclure plusieurs fichiers. +- # **trailing-newlines** (*true*): +Lorsque cette option est désactivée, les nouvelles lignes de fin trouvées dans +le contenu à inclure sont supprimées. Les valeurs possibles sont `true` et +`false`. +- # **encoding** +(*utf-8*): Spécifiez l'encodage du fichier inclus. S'il n'est pas défini, +`utf-8` sera utilisé. - # **rewrite-relative-urls** (*true*): Lorsque cette option est activée (par défaut), liens et images Markdown dans le @@ -93,14 +104,6 @@ href="#include-markdown_heading-offset"># **heading-offset** (0): Augmente ou diminue la profondeur des en-têtes Markdown de ce nombre. Ne prend en charge que la syntaxe d'en-tête du signe dièse (`#`). Cet argument accepte les valeurs négatives pour supprimer les caractères `#` de tête. -- # **exclude**: -Spécifiez avec un glob quels fichiers doivent être ignorés. Uniquement utile -lors du passage de globs pour inclure plusieurs fichiers. -- # **trailing-newlines** (*true*): -Lorsque cette option est désactivée, les nouvelles lignes de fin trouvées dans -le contenu à inclure sont supprimées. Les valeurs possibles sont `true` et -`false`. ##### Exemples @@ -164,6 +167,9 @@ passage de globs pour inclure plusieurs fichiers. (*true*): Lorsque cette option est désactivée, les nouvelles lignes de fin trouvées dans le contenu à inclure sont supprimées. Les valeurs possibles sont `true` et `false`. +- # **encoding** (*utf-8*): +Spécifiez l'encodage du fichier inclus. S'il n'est pas défini, `utf-8` sera +utilisé. ##### Exemples diff --git a/locale/fr/README.md.po b/locale/fr/README.md.po index e86deb7..8b20afc 100644 --- a/locale/fr/README.md.po +++ b/locale/fr/README.md.po @@ -315,3 +315,21 @@ msgstr "" "Les balises d'ouverture et de fermeture par défaut sont `{%` et `%}`. Vous " "pouvez changer ces balises avec les paramètres de configuration " "`opening_tag` et `closing_tag`:" + +msgid "" +"# **encoding** (*utf-8*): Specify the encoding of " +"the included file. If not defined `utf-8` will be used." +msgstr "" +"# **encoding** (*utf-8*): Spécifiez l'encodage du " +"fichier inclus. S'il n'est pas défini, `utf-8` sera utilisé." + +msgid "" +"# **encoding** " +"(*utf-8*): Specify the encoding of the included file. If not defined `utf-8`" +" will be used." +msgstr "" +"# **encoding** " +"(*utf-8*): Spécifiez l'encodage du fichier inclus. S'il n'est pas défini, " +"`utf-8` sera utilisé." diff --git a/mkdocs_include_markdown_plugin/__init__.py b/mkdocs_include_markdown_plugin/__init__.py index cee7139..0a4d94b 100644 --- a/mkdocs_include_markdown_plugin/__init__.py +++ b/mkdocs_include_markdown_plugin/__init__.py @@ -1,2 +1,2 @@ __title__ = 'mkdocs_include_markdown_plugin' -__version__ = '3.6.1' +__version__ = '3.7.0' diff --git a/mkdocs_include_markdown_plugin/event.py b/mkdocs_include_markdown_plugin/event.py index 1080cba..3631d97 100644 --- a/mkdocs_include_markdown_plugin/event.py +++ b/mkdocs_include_markdown_plugin/event.py @@ -48,33 +48,27 @@ flags=INCLUDE_TAG_REGEX.flags, ) +str_arg = lambda arg: re.compile( + rf'{arg}=(?:"({DOUBLE_QUOTED_STR_ARGUMENT_PATTERN})")?' + rf"(?:'({SINGLE_QUOTED_STR_ARGUMENT_PATTERN})')?", +) + +bool_arg = lambda arg: re.compile( + rf'{arg}=({BOOL_ARGUMENT_PATTERN})', +) + ARGUMENT_REGEXES = { - # str - 'start': re.compile( - rf'start=(?:"({DOUBLE_QUOTED_STR_ARGUMENT_PATTERN})")?' - rf"(?:'({SINGLE_QUOTED_STR_ARGUMENT_PATTERN})')?", - ), - 'end': re.compile( - rf'end=(?:"({DOUBLE_QUOTED_STR_ARGUMENT_PATTERN})")?' - rf"(?:'({SINGLE_QUOTED_STR_ARGUMENT_PATTERN})')?", - ), - 'exclude': re.compile( - rf'exclude=(?:"({DOUBLE_QUOTED_STR_ARGUMENT_PATTERN})")?' - rf"(?:'({SINGLE_QUOTED_STR_ARGUMENT_PATTERN})')?", - ), + 'start': str_arg('start'), + 'end': str_arg('end'), + 'exclude': str_arg('exclude'), + 'encoding': str_arg('encoding'), # bool - 'rewrite-relative-urls': re.compile( - rf'rewrite-relative-urls=({BOOL_ARGUMENT_PATTERN})', - ), - 'comments': re.compile(rf'comments=({BOOL_ARGUMENT_PATTERN})'), - 'preserve-includer-indent': re.compile( - rf'preserve-includer-indent=({BOOL_ARGUMENT_PATTERN})', - ), - 'dedent': re.compile(rf'dedent=({BOOL_ARGUMENT_PATTERN})'), - 'trailing-newlines': re.compile( - rf'trailing-newlines=({BOOL_ARGUMENT_PATTERN})', - ), + 'rewrite-relative-urls': bool_arg('rewrite-relative-urls'), + 'comments': bool_arg('comments'), + 'preserve-includer-indent': bool_arg('preserve-includer-indent'), + 'dedent': bool_arg('dedent'), + 'trailing-newlines': bool_arg('trailing-newlines'), # int 'heading-offset': re.compile(r'heading-offset=(-?\d+)'), @@ -109,6 +103,11 @@ def lineno_from_content_start(content, start): return content[:start].count('\n') + 1 +def read_file(file_path, encoding): + with open(file_path, encoding=encoding) as f: + return f.read() + + def get_file_content( markdown, page_src_path, @@ -259,11 +258,29 @@ def found_include_tag(match): else: end = None + encoding_match = re.search( + ARGUMENT_REGEXES['encoding'], + arguments_string, + ) + if encoding_match: + encoding = parse_string_argument(encoding_match) + if encoding is None: + lineno = lineno_from_content_start( + markdown, + directive_match_start, + ) + logger.error( + "Invalid empty 'encoding' argument in 'include'" + ' directive at ' + f'{os.path.relpath(page_src_path, docs_dir)}:{lineno}', + ) + else: + encoding = 'utf-8' + text_to_include = '' expected_but_any_found = [start is not None, end is not None] for file_path in file_paths_to_include: - with open(file_path, encoding='utf-8') as f: - new_text_to_include = f.read() + new_text_to_include = read_file(file_path, encoding) if start is not None or end is not None: new_text_to_include, *expected_not_found = ( @@ -481,6 +498,25 @@ def found_include_markdown_tag(match): else: end = None + encoding_match = re.search( + ARGUMENT_REGEXES['encoding'], + arguments_string, + ) + if encoding_match: + encoding = parse_string_argument(encoding_match) + if encoding is None: + lineno = lineno_from_content_start( + markdown, + directive_match_start, + ) + logger.error( + "Invalid empty 'encoding' argument in 'include-markdown'" + ' directive at ' + f'{os.path.relpath(page_src_path, docs_dir)}:{lineno}', + ) + else: + encoding = 'utf-8' + # heading offset offset = 0 offset_match = re.search( @@ -499,8 +535,7 @@ def found_include_markdown_tag(match): # but they have been specified, so the warning(s) must be raised expected_but_any_found = [start is not None, end is not None] for file_path in file_paths_to_include: - with open(file_path, encoding='utf-8') as f: - new_text_to_include = f.read() + new_text_to_include = read_file(file_path, encoding) if start is not None or end is not None: new_text_to_include, *expected_not_found = ( diff --git a/mkdocs_include_markdown_plugin/process.py b/mkdocs_include_markdown_plugin/process.py index 5bc1fcb..c89108c 100644 --- a/mkdocs_include_markdown_plugin/process.py +++ b/mkdocs_include_markdown_plugin/process.py @@ -107,8 +107,8 @@ def process_current_paragraph(): if not _current_fcodeblock_delimiter and not _inside_icodeblock: lstripped_line = line.lstrip() if ( - lstripped_line.startswith('```') or - lstripped_line.startswith('~~~') + lstripped_line.startswith('```') + or lstripped_line.startswith('~~~') ): _current_fcodeblock_delimiter = lstripped_line[:3] if current_paragraph: @@ -157,8 +157,8 @@ def transform_line_by_line_skipping_codeblocks(markdown, func): if not _current_fcodeblock_delimiter: lstripped_line = line.lstrip() if ( - lstripped_line.startswith('```') or - lstripped_line.startswith('~~~') + lstripped_line.startswith('```') + or lstripped_line.startswith('~~~') ): _current_fcodeblock_delimiter = lstripped_line[:3] else: diff --git a/setup.cfg b/setup.cfg index a118b14..d4959ad 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = mkdocs_include_markdown_plugin -version = 3.6.1 +version = 3.7.0 description = Mkdocs Markdown includer plugin. long_description = file: README.md long_description_content_type = text/markdown @@ -47,13 +47,14 @@ dev = flake8-print==4.0.0 isort==5.9.1 mdpo==0.3.61 - mkdocs==1.2.3 + mkdocs==1.3.1 pre-commit==2.13.0 pytest==6.2.5 pytest-cov==3.0.0 pyupgrade==2.19.4 yamllint==1.26.1 test = + mkdocs==1.3.1 pytest==6.2.5 pytest-cov==3.0.0 @@ -74,5 +75,10 @@ line_length = 79 use_parentheses = True combine_as_imports = True include_trailing_comma = True -known_tests = testing_utils +known_tests = testing_helpers sections = STDLIB,THIRDPARTY,FIRSTPARTY,TESTS,LOCALFOLDER + +[flake8] +max-line-length = 79 +ignore = + E731,W503 diff --git a/tests/test_arguments.py b/tests/test_arguments.py index fef081c..44a46b6 100644 --- a/tests/test_arguments.py +++ b/tests/test_arguments.py @@ -7,7 +7,7 @@ from mkdocs_include_markdown_plugin.event import on_page_markdown -from testing_utils import parametrize_directives +from testing_helpers import parametrize_directives WINDOWS_DOUBLE_QUOTES_PATHS_NOT_ALLOWED_REASON = ( diff --git a/tests/test_encoding.py b/tests/test_encoding.py new file mode 100644 index 0000000..7681cdb --- /dev/null +++ b/tests/test_encoding.py @@ -0,0 +1,86 @@ +import sys + +import pytest + +from mkdocs_include_markdown_plugin.event import on_page_markdown + +from testing_helpers import parametrize_directives + + +@parametrize_directives +def test_encoding(directive, page, tmp_path): + page_to_include_file = tmp_path / 'included.md' + page_to_include_file.write_text('''Á + +Content to include + +É +''') + + with pytest.raises(UnicodeDecodeError): + on_page_markdown( + f'''{{% + {directive} "{page_to_include_file}" + comments=false + start='' + end="" + encoding="ascii" +%}}''', + page(tmp_path / 'includer.md'), + tmp_path, + ) + + +@pytest.mark.skipif( + sys.platform != 'linux', + reason='On Windows CI the utf-8 encoding does not work', +) +@parametrize_directives +def test_default_encoding(directive, page, tmp_path): + page_to_include_file = tmp_path / 'included.md' + page_to_include_file.write_text('''Á + +Content to include + +É +''') + + result = on_page_markdown( + f'''{{% + {directive} "{page_to_include_file}" + comments=false + start='' + end="" +%}}''', + page(tmp_path / 'includer.md'), + tmp_path, + ) + assert result == '\nContent to include\n' + + +@pytest.mark.skipif( + sys.platform != 'linux', + reason='On Windows CI the utf-8 encoding does not work', +) +@parametrize_directives +def test_explicit_default_encoding(directive, page, tmp_path): + page_to_include_file = tmp_path / 'included.md' + page_to_include_file.write_text('''Á + +Content to include + +É +''') + + result = on_page_markdown( + f'''{{% + {directive} "{page_to_include_file}" + comments=false + start='' + end="" + encoding="utf-8" +%}}''', + page(tmp_path / 'includer.md'), + tmp_path, + ) + assert result == '\nContent to include\n' diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 0000000..d3324dc --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,21 @@ +import os +import subprocess +import sys + +import pytest + +from testing_helpers import rootdir + + +EXAMPLES_DIR = os.path.join(rootdir, 'examples') + + +@pytest.mark.parametrize('dirname', os.listdir(EXAMPLES_DIR)) +def test_examples(dirname): + with open(os.devnull, 'w') as devnull: + assert subprocess.call( + [sys.executable, '-mmkdocs', 'build'], + cwd=os.path.join(EXAMPLES_DIR, dirname), + stdout=devnull, + stderr=devnull, + ) == 0 diff --git a/tests/test_exclude.py b/tests/test_exclude.py index d7e1755..dc07500 100644 --- a/tests/test_exclude.py +++ b/tests/test_exclude.py @@ -8,7 +8,7 @@ from mkdocs_include_markdown_plugin.event import on_page_markdown -from testing_utils import parametrize_directives +from testing_helpers import parametrize_directives @parametrize_directives diff --git a/tests/test_glob_include.py b/tests/test_glob_include.py index a856dd7..6e40f61 100644 --- a/tests/test_glob_include.py +++ b/tests/test_glob_include.py @@ -6,7 +6,7 @@ from mkdocs_include_markdown_plugin.event import on_page_markdown -from testing_utils import parametrize_directives +from testing_helpers import parametrize_directives def test_glob_include_simple(page, tmp_path): diff --git a/tests/testing_utils.py b/tests/testing_helpers.py similarity index 72% rename from tests/testing_utils.py rename to tests/testing_helpers.py index 2462f17..87c1cb2 100644 --- a/tests/testing_utils.py +++ b/tests/testing_helpers.py @@ -1,3 +1,5 @@ +import os + import pytest @@ -6,3 +8,5 @@ ('include', 'include-markdown'), ids=('directive=include', 'directive=include-markdown'), ) + +rootdir = os.path.join(os.path.dirname(__file__), '..')