Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add include_patterns as the opposite of exclude_patterns #10518

Merged
merged 10 commits into from Jul 17, 2022
10 changes: 10 additions & 0 deletions doc/extdev/deprecated.rst
Expand Up @@ -22,6 +22,16 @@ The following is a list of deprecated interfaces.
- (will be) Removed
- Alternatives

* - ``sphinx.util.path_stabilize``
- 5.1
- 7.0
- ``sphinx.util.osutil.path_stabilize``

* - ``sphinx.util.get_matching_files``
- 5.1
- 7.0
- ``sphinx.util.matching.get_matching_files``

* - ``sphinx.ext.napoleon.iterators``
- 5.1
- 7.0
Expand Down
27 changes: 23 additions & 4 deletions doc/usage/configuration.rst
Expand Up @@ -200,15 +200,14 @@ General configuration

.. confval:: exclude_patterns

A list of glob-style patterns that should be excluded when looking for
source files. [1]_ They are matched against the source file names relative
A list of glob-style patterns [1]_ that should be excluded when looking for
source files. They are matched against the source file names relative
to the source directory, using slashes as directory separators on all
platforms.

Example patterns:

- ``'library/xml.rst'`` -- ignores the ``library/xml.rst`` file (replaces
entry in :confval:`unused_docs`)
- ``'library/xml.rst'`` -- ignores the ``library/xml.rst`` file
- ``'library/xml'`` -- ignores the ``library/xml`` directory
- ``'library/xml*'`` -- ignores all files and directories starting with
``library/xml``
Expand All @@ -219,6 +218,26 @@ General configuration

.. versionadded:: 1.0

.. confval:: include_patterns

A list of glob-style patterns [1]_ that are used to find source files. They
are matched against the source file names relative to the source directory,
using slashes as directory separators on all platforms. The default is ``**``,
meaning that all files are recursively included from the source directory.

Example patterns:

- ``'**'`` -- all files in the source directory and subdirectories, recursively
- ``'library/xml'`` -- just the ``library/xml`` directory
- ``'library/xml*'`` -- all files and directories starting with ``library/xml``
- ``'**/doc'`` -- all ``doc`` directories (this might be useful if
documentation is co-located with source files)

:confval:`include_patterns` is also consulted when looking for static files
in :confval:`html_static_path` and :confval:`html_extra_path`.
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved

.. versionadded:: 5.1

.. confval:: templates_path

A list of paths that contain extra templates (or templates that overwrite
Expand Down
5 changes: 3 additions & 2 deletions sphinx/builders/html/__init__.py
Expand Up @@ -840,7 +840,8 @@ def onerror(filename: str, error: Exception) -> None:
logger.warning(__('Failed to copy a file in html_static_file: %s: %r'),
filename, error)

excluded = Matcher(self.config.exclude_patterns + ["**/.*"])
excluded = Matcher(self.config.exclude_patterns + ["**/.*"],
self.config.include_patterns)
for entry in self.config.html_static_path:
copy_asset(path.join(self.confdir, entry),
path.join(self.outdir, '_static'),
Expand Down Expand Up @@ -880,7 +881,7 @@ def copy_extra_files(self) -> None:
"""copy html_extra_path files."""
try:
with progress_message(__('copying extra files')):
excluded = Matcher(self.config.exclude_patterns)
excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns)
for extra_path in self.config.html_extra_path:
entry = path.join(self.confdir, extra_path)
copy_asset(entry, self.outdir, excluded)
Expand Down
3 changes: 2 additions & 1 deletion sphinx/config.py
Expand Up @@ -101,7 +101,8 @@ class Config:
'root_doc': (lambda config: config.master_doc, 'env', []),
'source_suffix': ({'.rst': 'restructuredtext'}, 'env', Any),
'source_encoding': ('utf-8-sig', 'env', []),
'exclude_patterns': ([], 'env', []),
'exclude_patterns': ([], 'env', [str]),
'include_patterns': (["**"], 'env', [str]),
'default_role': (None, 'env', [str]),
'add_function_parentheses': (True, 'env', []),
'add_module_names': (True, 'env', []),
Expand Down
2 changes: 1 addition & 1 deletion sphinx/directives/other.py
Expand Up @@ -84,7 +84,7 @@ def parse_content(self, toctree: addnodes.toctree) -> List[Node]:
all_docnames.remove(self.env.docname) # remove current document

ret: List[Node] = []
excluded = Matcher(self.config.exclude_patterns)
excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns)
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
for entry in self.content:
if not entry:
continue
Expand Down
2 changes: 1 addition & 1 deletion sphinx/environment/__init__.py
Expand Up @@ -359,7 +359,7 @@ def find_files(self, config: Config, builder: "Builder") -> None:
exclude_paths = (self.config.exclude_patterns +
self.config.templates_path +
builder.get_asset_paths())
self.project.discover(exclude_paths)
self.project.discover(exclude_paths, self.config.include_patterns)

# Current implementation is applying translated messages in the reading
# phase.Therefore, in order to apply the updated message catalog, it is
Expand Down
2 changes: 1 addition & 1 deletion sphinx/environment/adapters/toctree.py
Expand Up @@ -74,7 +74,7 @@ def resolve(self, docname: str, builder: "Builder", toctree: addnodes.toctree,
# interactions between marking and pruning the tree (see bug #1046).

toctree_ancestors = self.get_toctree_ancestors(docname)
excluded = Matcher(self.env.config.exclude_patterns)
excluded = Matcher(self.env.config.exclude_patterns, self.env.config.include_patterns)

def _toctree_add_classes(node: Element, depth: int) -> None:
"""Add 'toctree-l%d' and 'current' classes to the toctree."""
Expand Down
2 changes: 1 addition & 1 deletion sphinx/ext/autosummary/__init__.py
Expand Up @@ -236,7 +236,7 @@ def run(self) -> List[Node]:

tree_prefix = self.options['toctree'].strip()
docnames = []
excluded = Matcher(self.config.exclude_patterns)
excluded = Matcher(self.config.exclude_patterns, self.config.include_patterns)
filename_map = self.config.autosummary_filename_map
for _name, _sig, _summary, real_name in items:
real_name = filename_map.get(real_name, real_name)
Expand Down
18 changes: 11 additions & 7 deletions sphinx/project.py
Expand Up @@ -2,12 +2,12 @@

import os
from glob import glob
from typing import Dict, List, Optional, Set
from typing import Dict, Iterable, Optional, Set

from sphinx.locale import __
from sphinx.util import get_matching_files, logging, path_stabilize
from sphinx.util.matching import compile_matchers
from sphinx.util.osutil import SEP, relpath
from sphinx.util import logging
from sphinx.util.matching import get_matching_files
from sphinx.util.osutil import SEP, path_stabilize, relpath

logger = logging.getLogger(__name__)
EXCLUDE_PATHS = ['**/_sources', '.#*', '**/.#*', '*.lproj/**']
Expand All @@ -30,13 +30,17 @@ def restore(self, other: "Project") -> None:
"""Take over a result of last build."""
self.docnames = other.docnames

def discover(self, exclude_paths: List[str] = []) -> Set[str]:
def discover(self, exclude_paths: Iterable[str] = (),
include_paths: Iterable[str] = ("**",)) -> Set[str]:
"""Find all document files in the source directory and put them in
:attr:`docnames`.
"""
self.docnames = set()
excludes = compile_matchers(exclude_paths + EXCLUDE_PATHS)
for filename in get_matching_files(self.srcdir, excludes): # type: ignore
for filename in get_matching_files(
self.srcdir,
[*exclude_paths] + EXCLUDE_PATHS,
include_paths,
):
docname = self.path2doc(filename)
if docname:
if docname in self.docnames:
Expand Down
18 changes: 14 additions & 4 deletions sphinx/util/__init__.py
Expand Up @@ -8,7 +8,7 @@
import sys
import tempfile
import traceback
import unicodedata
import warnings
from datetime import datetime
from importlib import import_module
from os import path
Expand All @@ -17,6 +17,7 @@
Optional, Pattern, Set, Tuple, Type, TypeVar)
from urllib.parse import parse_qsl, quote_plus, urlencode, urlsplit, urlunsplit

from sphinx.deprecation import RemovedInSphinx70Warning
from sphinx.errors import ExtensionError, FiletypeNotFoundError, SphinxParallelError
from sphinx.locale import __
from sphinx.util import logging
Expand Down Expand Up @@ -50,16 +51,25 @@ def docname_join(basedocname: str, docname: str) -> str:

def path_stabilize(filepath: str) -> str:
"Normalize path separator and unicode string"
newpath = filepath.replace(os.path.sep, SEP)
return unicodedata.normalize('NFC', newpath)
warnings.warn("'sphinx.util.path_stabilize' is deprecated, use "
"'sphinx.util.osutil.path_stabilize' instead.",
RemovedInSphinx70Warning, stacklevel=2)
from sphinx.util import osutil

return osutil.path_stabilize(filepath)


def get_matching_files(dirname: str,
exclude_matchers: Tuple[PathMatcher, ...] = ()) -> Iterable[str]: # NOQA
exclude_matchers: Tuple[PathMatcher, ...] = (),
include_matchers: Tuple[PathMatcher, ...] = ()) -> Iterable[str]: # NOQA
"""Get all file names in a directory, recursively.

Exclude files and dirs matching some matcher in *exclude_matchers*.
"""
warnings.warn("'sphinx.util.get_matching_files' is deprecated, use "
"'sphinx.util.matching.get_matching_files' instead. Note that"
"the types of the arguments have changed from callables to "
"plain string glob patterns.", RemovedInSphinx70Warning, stacklevel=2)
# dirname is a normalized absolute path.
dirname = path.normpath(path.abspath(dirname))

Expand Down
74 changes: 68 additions & 6 deletions sphinx/util/matching.py
@@ -1,9 +1,10 @@
"""Pattern-matching utility functions for Sphinx."""

import os.path
import re
from typing import Callable, Dict, Iterable, List, Match, Optional, Pattern
from typing import Callable, Dict, Iterable, Iterator, List, Match, Optional, Pattern

from sphinx.util.osutil import canon_path
from sphinx.util.osutil import canon_path, path_stabilize


def _translate_pattern(pat: str) -> str:
Expand Down Expand Up @@ -52,7 +53,7 @@ def _translate_pattern(pat: str) -> str:
return res + '$'


def compile_matchers(patterns: List[str]) -> List[Callable[[str], Optional[Match[str]]]]:
def compile_matchers(patterns: Iterable[str]) -> List[Callable[[str], Optional[Match[str]]]]:
return [re.compile(_translate_pattern(pat)).match for pat in patterns]


Expand All @@ -63,9 +64,10 @@ class Matcher:
For example, "**/index.rst" matches with "index.rst"
"""

def __init__(self, patterns: List[str]) -> None:
expanded = [pat[3:] for pat in patterns if pat.startswith('**/')]
self.patterns = compile_matchers(patterns + expanded)
def __init__(self, exclude_patterns: Iterable[str],
include_patterns: Iterable[str] = ()) -> None:
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')]
self.patterns = compile_matchers(list(exclude_patterns) + expanded)

def __call__(self, string: str) -> bool:
return self.match(string)
Expand Down Expand Up @@ -99,3 +101,63 @@ def patfilter(names: Iterable[str], pat: str) -> List[str]:
_pat_cache[pat] = re.compile(_translate_pattern(pat))
match = _pat_cache[pat].match
return list(filter(match, names))


def get_matching_files(
dirname: str,
exclude_patterns: Iterable[str] = (),
include_patterns: Iterable[str] = ("**",)
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
) -> Iterator[str]:
"""Get all file names in a directory, recursively.
Filter file names by the glob-style include_patterns and exclude_patterns.
The default values include all files ("**") and exclude nothing ("").
Only files matching some pattern in *include_patterns* are included, and
exclusions from *exclude_patterns* take priority over inclusions.
"""
# dirname is a normalized absolute path.
dirname = os.path.normpath(os.path.abspath(dirname))

exclude_matchers = compile_matchers(exclude_patterns)
include_matchers = compile_matchers(include_patterns)

for root, dirs, files in os.walk(dirname, followlinks=True):
relative_root = os.path.relpath(root, dirname)
if relative_root == ".":
relative_root = "" # suppress dirname for files on the target dir

# Filter files
included_files = []
for entry in sorted(files):
entry = path_stabilize(os.path.join(relative_root, entry))
keep = False
for matcher in include_matchers:
if matcher(entry):
keep = True
break # break the inner loop

for matcher in exclude_matchers:
if matcher(entry):
keep = False
break # break the inner loop

if keep:
included_files.append(entry)

# Filter directories
filtered_dirs = []
for dir_name in sorted(dirs):
normalised = path_stabilize(os.path.join(relative_root, dir_name))
for matcher in exclude_matchers:
if matcher(normalised):
break # break the inner loop
else:
# if the loop didn't break
filtered_dirs.append(dir_name)

dirs[:] = filtered_dirs

# Yield filtered files
yield from included_files
7 changes: 7 additions & 0 deletions sphinx/util/osutil.py
Expand Up @@ -6,6 +6,7 @@
import re
import shutil
import sys
import unicodedata
from io import StringIO
from os import path
from typing import Any, Generator, Iterator, List, Optional, Type
Expand Down Expand Up @@ -34,6 +35,12 @@ def canon_path(nativepath: str) -> str:
return nativepath.replace(path.sep, SEP)


def path_stabilize(filepath: str) -> str:
"Normalize path separator and unicode string"
new_path = canon_path(filepath)
return unicodedata.normalize('NFC', new_path)


def relative_uri(base: str, to: str) -> str:
"""Return a relative URL from ``base`` to ``to``."""
if to.startswith(SEP):
Expand Down