diff --git a/doc/extdev/deprecated.rst b/doc/extdev/deprecated.rst index f496dceae0..18b0e6d04e 100644 --- a/doc/extdev/deprecated.rst +++ b/doc/extdev/deprecated.rst @@ -22,6 +22,16 @@ The following is a list of deprecated interfaces. - (will be) Removed - Alternatives + * - ``sphinx.util.path_stabilize`` + - 5.1 + - 7.0 + - ``sphinx.util.osutil.path_stabilize`` + + * - ``sphinx.util.get_matching_files`` + - 5.1 + - 7.0 + - ``sphinx.util.matching.get_matching_files`` + * - ``sphinx.ext.napoleon.iterators`` - 5.1 - 7.0 diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 76f7642f79..d2ba22c26c 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -200,15 +200,14 @@ General configuration .. confval:: exclude_patterns - A list of glob-style patterns that should be excluded when looking for - source files. [1]_ They are matched against the source file names relative + A list of glob-style patterns [1]_ that should be excluded when looking for + source files. They are matched against the source file names relative to the source directory, using slashes as directory separators on all platforms. Example patterns: - - ``'library/xml.rst'`` -- ignores the ``library/xml.rst`` file (replaces - entry in :confval:`unused_docs`) + - ``'library/xml.rst'`` -- ignores the ``library/xml.rst`` file - ``'library/xml'`` -- ignores the ``library/xml`` directory - ``'library/xml*'`` -- ignores all files and directories starting with ``library/xml`` @@ -219,6 +218,26 @@ General configuration .. versionadded:: 1.0 +.. confval:: include_patterns + + A list of glob-style patterns [1]_ that are used to find source files. They + are matched against the source file names relative to the source directory, + using slashes as directory separators on all platforms. The default is ``**``, + meaning that all files are recursively included from the source directory. + + Example patterns: + + - ``'**'`` -- all files in the source directory and subdirectories, recursively + - ``'library/xml'`` -- just the ``library/xml`` directory + - ``'library/xml*'`` -- all files and directories starting with ``library/xml`` + - ``'**/doc'`` -- all ``doc`` directories (this might be useful if + documentation is co-located with source files) + + :confval:`include_patterns` is also consulted when looking for static files + in :confval:`html_static_path` and :confval:`html_extra_path`. + + .. versionadded:: 5.1 + .. confval:: templates_path A list of paths that contain extra templates (or templates that overwrite diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index c9832d1c59..cfd4b5c968 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -840,7 +840,8 @@ def onerror(filename: str, error: Exception) -> None: logger.warning(__('Failed to copy a file in html_static_file: %s: %r'), filename, error) - excluded = Matcher(self.config.exclude_patterns + ["**/.*"]) + excluded = Matcher(self.config.exclude_patterns + ["**/.*"], + self.config.include_patterns) for entry in self.config.html_static_path: copy_asset(path.join(self.confdir, entry), path.join(self.outdir, '_static'), @@ -880,7 +881,7 @@ def copy_extra_files(self) -> None: """copy html_extra_path files.""" try: with progress_message(__('copying extra files')): - excluded = Matcher(self.config.exclude_patterns) + excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns) for extra_path in self.config.html_extra_path: entry = path.join(self.confdir, extra_path) copy_asset(entry, self.outdir, excluded) diff --git a/sphinx/config.py b/sphinx/config.py index 318173f275..2fb41f066b 100644 --- a/sphinx/config.py +++ b/sphinx/config.py @@ -101,7 +101,8 @@ class Config: 'root_doc': (lambda config: config.master_doc, 'env', []), 'source_suffix': ({'.rst': 'restructuredtext'}, 'env', Any), 'source_encoding': ('utf-8-sig', 'env', []), - 'exclude_patterns': ([], 'env', []), + 'exclude_patterns': ([], 'env', [str]), + 'include_patterns': (["**"], 'env', [str]), 'default_role': (None, 'env', [str]), 'add_function_parentheses': (True, 'env', []), 'add_module_names': (True, 'env', []), diff --git a/sphinx/directives/other.py b/sphinx/directives/other.py index 083fa088a7..38a332674e 100644 --- a/sphinx/directives/other.py +++ b/sphinx/directives/other.py @@ -84,7 +84,7 @@ def parse_content(self, toctree: addnodes.toctree) -> List[Node]: all_docnames.remove(self.env.docname) # remove current document ret: List[Node] = [] - excluded = Matcher(self.config.exclude_patterns) + excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns) for entry in self.content: if not entry: continue diff --git a/sphinx/environment/__init__.py b/sphinx/environment/__init__.py index 036aa3666d..df82c5a8a5 100644 --- a/sphinx/environment/__init__.py +++ b/sphinx/environment/__init__.py @@ -359,7 +359,7 @@ def find_files(self, config: Config, builder: "Builder") -> None: exclude_paths = (self.config.exclude_patterns + self.config.templates_path + builder.get_asset_paths()) - self.project.discover(exclude_paths) + self.project.discover(exclude_paths, self.config.include_patterns) # Current implementation is applying translated messages in the reading # phase.Therefore, in order to apply the updated message catalog, it is diff --git a/sphinx/environment/adapters/toctree.py b/sphinx/environment/adapters/toctree.py index 631060e75a..a04c4da8fe 100644 --- a/sphinx/environment/adapters/toctree.py +++ b/sphinx/environment/adapters/toctree.py @@ -74,7 +74,7 @@ def resolve(self, docname: str, builder: "Builder", toctree: addnodes.toctree, # interactions between marking and pruning the tree (see bug #1046). toctree_ancestors = self.get_toctree_ancestors(docname) - excluded = Matcher(self.env.config.exclude_patterns) + excluded = Matcher(self.env.config.exclude_patterns, self.env.config.include_patterns) def _toctree_add_classes(node: Element, depth: int) -> None: """Add 'toctree-l%d' and 'current' classes to the toctree.""" diff --git a/sphinx/ext/autosummary/__init__.py b/sphinx/ext/autosummary/__init__.py index 2b9055d3e8..eb5498e168 100644 --- a/sphinx/ext/autosummary/__init__.py +++ b/sphinx/ext/autosummary/__init__.py @@ -236,7 +236,7 @@ def run(self) -> List[Node]: tree_prefix = self.options['toctree'].strip() docnames = [] - excluded = Matcher(self.config.exclude_patterns) + excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns) filename_map = self.config.autosummary_filename_map for _name, _sig, _summary, real_name in items: real_name = filename_map.get(real_name, real_name) diff --git a/sphinx/project.py b/sphinx/project.py index 1340558665..9e046faa76 100644 --- a/sphinx/project.py +++ b/sphinx/project.py @@ -2,12 +2,12 @@ import os from glob import glob -from typing import Dict, List, Optional, Set +from typing import Dict, Iterable, Optional, Set from sphinx.locale import __ -from sphinx.util import get_matching_files, logging, path_stabilize -from sphinx.util.matching import compile_matchers -from sphinx.util.osutil import SEP, relpath +from sphinx.util import logging +from sphinx.util.matching import get_matching_files +from sphinx.util.osutil import SEP, path_stabilize, relpath logger = logging.getLogger(__name__) EXCLUDE_PATHS = ['**/_sources', '.#*', '**/.#*', '*.lproj/**'] @@ -30,13 +30,17 @@ def restore(self, other: "Project") -> None: """Take over a result of last build.""" self.docnames = other.docnames - def discover(self, exclude_paths: List[str] = []) -> Set[str]: + def discover(self, exclude_paths: Iterable[str] = (), + include_paths: Iterable[str] = ("**",)) -> Set[str]: """Find all document files in the source directory and put them in :attr:`docnames`. """ self.docnames = set() - excludes = compile_matchers(exclude_paths + EXCLUDE_PATHS) - for filename in get_matching_files(self.srcdir, excludes): # type: ignore + for filename in get_matching_files( + self.srcdir, + [*exclude_paths] + EXCLUDE_PATHS, + include_paths, + ): docname = self.path2doc(filename) if docname: if docname in self.docnames: diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index c80857a83b..0b0e1c952f 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -8,7 +8,7 @@ import sys import tempfile import traceback -import unicodedata +import warnings from datetime import datetime from importlib import import_module from os import path @@ -17,6 +17,7 @@ Optional, Pattern, Set, Tuple, Type, TypeVar) from urllib.parse import parse_qsl, quote_plus, urlencode, urlsplit, urlunsplit +from sphinx.deprecation import RemovedInSphinx70Warning from sphinx.errors import ExtensionError, FiletypeNotFoundError, SphinxParallelError from sphinx.locale import __ from sphinx.util import logging @@ -50,16 +51,25 @@ def docname_join(basedocname: str, docname: str) -> str: def path_stabilize(filepath: str) -> str: "Normalize path separator and unicode string" - newpath = filepath.replace(os.path.sep, SEP) - return unicodedata.normalize('NFC', newpath) + warnings.warn("'sphinx.util.path_stabilize' is deprecated, use " + "'sphinx.util.osutil.path_stabilize' instead.", + RemovedInSphinx70Warning, stacklevel=2) + from sphinx.util import osutil + + return osutil.path_stabilize(filepath) def get_matching_files(dirname: str, - exclude_matchers: Tuple[PathMatcher, ...] = ()) -> Iterable[str]: # NOQA + exclude_matchers: Tuple[PathMatcher, ...] = (), + include_matchers: Tuple[PathMatcher, ...] = ()) -> Iterable[str]: # NOQA """Get all file names in a directory, recursively. Exclude files and dirs matching some matcher in *exclude_matchers*. """ + warnings.warn("'sphinx.util.get_matching_files' is deprecated, use " + "'sphinx.util.matching.get_matching_files' instead. Note that" + "the types of the arguments have changed from callables to " + "plain string glob patterns.", RemovedInSphinx70Warning, stacklevel=2) # dirname is a normalized absolute path. dirname = path.normpath(path.abspath(dirname)) diff --git a/sphinx/util/matching.py b/sphinx/util/matching.py index 53a8933386..de4a776cfd 100644 --- a/sphinx/util/matching.py +++ b/sphinx/util/matching.py @@ -1,9 +1,10 @@ """Pattern-matching utility functions for Sphinx.""" +import os.path import re -from typing import Callable, Dict, Iterable, List, Match, Optional, Pattern +from typing import Callable, Dict, Iterable, Iterator, List, Match, Optional, Pattern -from sphinx.util.osutil import canon_path +from sphinx.util.osutil import canon_path, path_stabilize def _translate_pattern(pat: str) -> str: @@ -52,7 +53,7 @@ def _translate_pattern(pat: str) -> str: return res + '$' -def compile_matchers(patterns: List[str]) -> List[Callable[[str], Optional[Match[str]]]]: +def compile_matchers(patterns: Iterable[str]) -> List[Callable[[str], Optional[Match[str]]]]: return [re.compile(_translate_pattern(pat)).match for pat in patterns] @@ -63,9 +64,10 @@ class Matcher: For example, "**/index.rst" matches with "index.rst" """ - def __init__(self, patterns: List[str]) -> None: - expanded = [pat[3:] for pat in patterns if pat.startswith('**/')] - self.patterns = compile_matchers(patterns + expanded) + def __init__(self, exclude_patterns: Iterable[str], + include_patterns: Iterable[str] = ()) -> None: + expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')] + self.patterns = compile_matchers(list(exclude_patterns) + expanded) def __call__(self, string: str) -> bool: return self.match(string) @@ -99,3 +101,63 @@ def patfilter(names: Iterable[str], pat: str) -> List[str]: _pat_cache[pat] = re.compile(_translate_pattern(pat)) match = _pat_cache[pat].match return list(filter(match, names)) + + +def get_matching_files( + dirname: str, + exclude_patterns: Iterable[str] = (), + include_patterns: Iterable[str] = ("**",) +) -> Iterator[str]: + """Get all file names in a directory, recursively. + + Filter file names by the glob-style include_patterns and exclude_patterns. + The default values include all files ("**") and exclude nothing (""). + + Only files matching some pattern in *include_patterns* are included, and + exclusions from *exclude_patterns* take priority over inclusions. + + """ + # dirname is a normalized absolute path. + dirname = os.path.normpath(os.path.abspath(dirname)) + + exclude_matchers = compile_matchers(exclude_patterns) + include_matchers = compile_matchers(include_patterns) + + for root, dirs, files in os.walk(dirname, followlinks=True): + relative_root = os.path.relpath(root, dirname) + if relative_root == ".": + relative_root = "" # suppress dirname for files on the target dir + + # Filter files + included_files = [] + for entry in sorted(files): + entry = path_stabilize(os.path.join(relative_root, entry)) + keep = False + for matcher in include_matchers: + if matcher(entry): + keep = True + break # break the inner loop + + for matcher in exclude_matchers: + if matcher(entry): + keep = False + break # break the inner loop + + if keep: + included_files.append(entry) + + # Filter directories + filtered_dirs = [] + for dir_name in sorted(dirs): + normalised = path_stabilize(os.path.join(relative_root, dir_name)) + for matcher in exclude_matchers: + if matcher(normalised): + break # break the inner loop + else: + # if the loop didn't break + filtered_dirs.append(dir_name) + + dirs[:] = filtered_dirs + + # Yield filtered files + yield from included_files diff --git a/sphinx/util/osutil.py b/sphinx/util/osutil.py index 840655e425..d0ed42c8cd 100644 --- a/sphinx/util/osutil.py +++ b/sphinx/util/osutil.py @@ -6,6 +6,7 @@ import re import shutil import sys +import unicodedata from io import StringIO from os import path from typing import Any, Generator, Iterator, List, Optional, Type @@ -34,6 +35,12 @@ def canon_path(nativepath: str) -> str: return nativepath.replace(path.sep, SEP) +def path_stabilize(filepath: str) -> str: + "Normalize path separator and unicode string" + new_path = canon_path(filepath) + return unicodedata.normalize('NFC', new_path) + + def relative_uri(base: str, to: str) -> str: """Return a relative URL from ``base`` to ``to``.""" if to.startswith(SEP): diff --git a/tests/test_util_matching.py b/tests/test_util_matching.py index ee1d3b2cb6..ecff0e2d83 100644 --- a/tests/test_util_matching.py +++ b/tests/test_util_matching.py @@ -1,5 +1,5 @@ """Tests sphinx.util.matching functions.""" -from sphinx.util.matching import Matcher, compile_matchers +from sphinx.util.matching import Matcher, compile_matchers, get_matching_files def test_compile_matchers(): @@ -80,3 +80,95 @@ def test_Matcher(): assert not matcher('subdir/hello.py') assert matcher('world.py') assert matcher('subdir/world.py') + + +def test_get_matching_files_all(rootdir): + files = get_matching_files(rootdir / "test-root") + assert sorted(files) == [ + 'Makefile', '_templates/contentssb.html', '_templates/customsb.html', + '_templates/layout.html', 'autodoc.txt', 'autodoc_target.py', 'bom.txt', 'conf.py', + 'extapi.txt', 'extensions.txt', 'file_with_special_#_chars.xyz', 'footnote.txt', + 'images.txt', 'img.foo.png', 'img.gif', 'img.pdf', 'img.png', 'includes.txt', + 'index.txt', 'lists.txt', 'literal.inc', 'literal_orig.inc', 'markup.txt', 'math.txt', + 'objects.txt', 'otherext.foo', 'parsermod.py', 'quotes.inc', 'rimg.png', + 'special/api.h', 'special/code.py', 'subdir/excluded.txt', 'subdir/images.txt', + 'subdir/img.png', 'subdir/include.inc', 'subdir/includes.txt', 'subdir/simg.png', + 'svgimg.pdf', 'svgimg.svg', 'tabs.inc', 'test.inc', 'wrongenc.inc', + ] + + +def test_get_matching_files_all_exclude_single(rootdir): + files = get_matching_files(rootdir / "test-root", ["**.html"]) + assert sorted(files) == [ + 'Makefile', 'autodoc.txt', 'autodoc_target.py', 'bom.txt', 'conf.py', + 'extapi.txt', 'extensions.txt', 'file_with_special_#_chars.xyz', 'footnote.txt', + 'images.txt', 'img.foo.png', 'img.gif', 'img.pdf', 'img.png', 'includes.txt', + 'index.txt', 'lists.txt', 'literal.inc', 'literal_orig.inc', 'markup.txt', 'math.txt', + 'objects.txt', 'otherext.foo', 'parsermod.py', 'quotes.inc', 'rimg.png', + 'special/api.h', 'special/code.py', 'subdir/excluded.txt', 'subdir/images.txt', + 'subdir/img.png', 'subdir/include.inc', 'subdir/includes.txt', 'subdir/simg.png', + 'svgimg.pdf', 'svgimg.svg', 'tabs.inc', 'test.inc', 'wrongenc.inc', + ] + + +def test_get_matching_files_all_exclude_multiple(rootdir): + files = get_matching_files(rootdir / "test-root", ["**.html", "**.inc"]) + assert sorted(files) == [ + 'Makefile', 'autodoc.txt', 'autodoc_target.py', 'bom.txt', 'conf.py', + 'extapi.txt', 'extensions.txt', 'file_with_special_#_chars.xyz', 'footnote.txt', + 'images.txt', 'img.foo.png', 'img.gif', 'img.pdf', 'img.png', 'includes.txt', + 'index.txt', 'lists.txt', 'markup.txt', 'math.txt', 'objects.txt', 'otherext.foo', + 'parsermod.py', 'rimg.png', 'special/api.h', 'special/code.py', 'subdir/excluded.txt', + 'subdir/images.txt', 'subdir/img.png', 'subdir/includes.txt', 'subdir/simg.png', + 'svgimg.pdf', 'svgimg.svg', + ] + + +def test_get_matching_files_all_exclude_nonexistent(rootdir): + files = get_matching_files(rootdir / "test-root", ["halibut/**"]) + assert sorted(files) == [ + 'Makefile', '_templates/contentssb.html', '_templates/customsb.html', + '_templates/layout.html', 'autodoc.txt', 'autodoc_target.py', 'bom.txt', 'conf.py', + 'extapi.txt', 'extensions.txt', 'file_with_special_#_chars.xyz', 'footnote.txt', + 'images.txt', 'img.foo.png', 'img.gif', 'img.pdf', 'img.png', 'includes.txt', + 'index.txt', 'lists.txt', 'literal.inc', 'literal_orig.inc', 'markup.txt', 'math.txt', + 'objects.txt', 'otherext.foo', 'parsermod.py', 'quotes.inc', 'rimg.png', + 'special/api.h', 'special/code.py', 'subdir/excluded.txt', 'subdir/images.txt', + 'subdir/img.png', 'subdir/include.inc', 'subdir/includes.txt', 'subdir/simg.png', + 'svgimg.pdf', 'svgimg.svg', 'tabs.inc', 'test.inc', 'wrongenc.inc', + ] + + +def test_get_matching_files_all_include_single(rootdir): + files = get_matching_files(rootdir / "test-root", [], ["subdir/**"]) + assert sorted(files) == [ + 'subdir/excluded.txt', 'subdir/images.txt', 'subdir/img.png', 'subdir/include.inc', + 'subdir/includes.txt', 'subdir/simg.png', + ] + + +def test_get_matching_files_all_include_multiple(rootdir): + files = get_matching_files(rootdir / "test-root", [], ["special/**", "subdir/**"]) + assert sorted(files) == [ + 'special/api.h', 'special/code.py', 'subdir/excluded.txt', 'subdir/images.txt', + 'subdir/img.png', 'subdir/include.inc', 'subdir/includes.txt', 'subdir/simg.png', + ] + + +def test_get_matching_files_all_include_nonexistent(rootdir): + files = get_matching_files(rootdir / "test-root", [], ["halibut/**"]) + assert sorted(files) == [] + + +def test_get_matching_files_all_include_prefix(rootdir): + files = get_matching_files(rootdir / "test-root", [], ["autodoc*"]) + assert sorted(files) == [ + 'autodoc.txt', 'autodoc_target.py', + ] + + +def test_get_matching_files_all_include_question_mark(rootdir): + files = get_matching_files(rootdir / "test-root", [], ["img.???"]) + assert sorted(files) == [ + 'img.gif', 'img.pdf', 'img.png', + ]