Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement cache busting: append a suffix to CSS and JS URLs #3018

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
44 changes: 44 additions & 0 deletions docs/user-guide/configuration.md
Expand Up @@ -497,6 +497,50 @@ Only the plain string variant detects the `.mjs` extension and adds `type="modul

NOTE: `*.js` and `*.css` files, just like any other type of file, are always copied from `docs_dir` into the site's deployed copy, regardless if they're linked to the pages via the above configs or not.

### hash_rename_assets

NEW: **New in version 1.5.**

Set patterns of files to rename (on the fly, upon copying to the built site) by inserting a hash of the content. This is done for purposes of *cache busting*.

It is recommended to use this setting whenever possible.

The patterns follow the [.gitignore pattern format](https://git-scm.com/docs/gitignore#_pattern_format). But in this case these are positive matches, not "ignore" matches.

For example:

```yaml
extra_javascript:
- js/foo.js
- vendored/jquery-1.2.3.js
hash_rename_assets: |
*.css
*.js
!/vendored/*.js
```

Then the matched file is copied with a modified name and references to it are modified accordingly, e.g.:

```html
<script src="../js/foo.e3b0c442.js">
```

but one doesn't need to remember to update this hash, you can just keep modifying `foo.js` normally and referring to it normally.

Note how in this example we chose not to hash the file that already has its own version. But we could rename it, too.

### hash_append_assets

NEW: **New in version 1.5.**

Same as [hash_rename_assets](#hash_rename_assets) but the file doesn't get renamed, instead whenever it is referred to, a URL parameter is appended to it.

E.g. a script might get linked to as:

```html
<script src="../js/foo.js?h=e3b0c442">
```

### extra_templates

Set a list of templates in your `docs_dir` to be built by MkDocs. To see more
Expand Down
26 changes: 13 additions & 13 deletions mkdocs/commands/build.py
Expand Up @@ -4,7 +4,7 @@
import logging
import os
import time
from typing import TYPE_CHECKING, Sequence
from typing import TYPE_CHECKING
from urllib.parse import urljoin, urlsplit

import jinja2
Expand All @@ -13,7 +13,7 @@
import mkdocs
from mkdocs import utils
from mkdocs.exceptions import Abort, BuildError
from mkdocs.structure.files import File, Files, InclusionLevel, _set_exclusions, get_files
from mkdocs.structure.files import Files, InclusionLevel, _set_exclusions, get_files
from mkdocs.structure.nav import Navigation, get_navigation
from mkdocs.structure.pages import Page
from mkdocs.utils import DuplicateFilter # noqa - legacy re-export
Expand All @@ -30,7 +30,7 @@

def get_context(
nav: Navigation,
files: Sequence[File] | Files,
files: Files,
config: MkDocsConfig,
page: Page | None = None,
base_url: str = '',
Expand All @@ -42,23 +42,22 @@ def get_context(
base_url = utils.get_relative_url('.', page.url)

extra_javascript = [
utils.normalize_url(str(script), page, base_url) for script in config.extra_javascript
utils.normalize_url(str(script), page, base_url, files)
for script in config.extra_javascript
]
extra_css = [utils.normalize_url(path, page, base_url) for path in config.extra_css]

if isinstance(files, Files):
files = files.documentation_pages()
extra_css = [utils.normalize_url(path, page, base_url, files) for path in config.extra_css]

return templates.TemplateContext(
nav=nav,
pages=files,
pages=files.documentation_pages(),
base_url=base_url,
extra_css=extra_css,
extra_javascript=extra_javascript,
mkdocs_version=mkdocs.__version__,
build_date_utc=utils.get_build_datetime(),
config=config,
page=page,
_files=files,
)


Expand Down Expand Up @@ -190,7 +189,7 @@ def _populate_page(page: Page, config: MkDocsConfig, files: Files, dirty: bool =
def _build_page(
page: Page,
config: MkDocsConfig,
doc_files: Sequence[File],
files: Files,
nav: Navigation,
env: jinja2.Environment,
dirty: bool = False,
Expand All @@ -209,7 +208,7 @@ def _build_page(
# Activate page. Signals to theme that this is the current page.
page.active = True

context = get_context(nav, doc_files, config, page)
context = get_context(nav, files, config, page)

# Allow 'template:' override in md source files.
if 'template' in page.meta:
Expand Down Expand Up @@ -302,7 +301,8 @@ def build(
# Run `files` plugin events.
files = config.plugins.run_event('files', files, config=config)
# If plugins have added files but haven't set their inclusion level, calculate it again.
_set_exclusions(files._files, config)
_set_exclusions(files, config)
files._documentation_pages = None # Drop cache, if any.

nav = get_navigation(files, config)

Expand Down Expand Up @@ -346,7 +346,7 @@ def build(
for file in doc_files:
assert file.page is not None
_build_page(
file.page, config, doc_files, nav, env, dirty, excluded=file.inclusion.is_excluded()
file.page, config, files, nav, env, dirty, excluded=file.inclusion.is_excluded()
)

# Run `post_build` plugin events.
Expand Down
2 changes: 1 addition & 1 deletion mkdocs/config/config_options.py
Expand Up @@ -1164,6 +1164,6 @@ def run_validation(self, value: object) -> pathspec.gitignore.GitIgnoreSpec:
if not isinstance(value, str):
raise ValidationError(f'Expected a multiline string, but a {type(value)} was given.')
try:
return pathspec.gitignore.GitIgnoreSpec.from_lines(lines=value.splitlines())
return pathspec.gitignore.GitIgnoreSpec.from_lines(value.splitlines())
except ValueError as e:
raise ValidationError(str(e))
5 changes: 5 additions & 0 deletions mkdocs/config/defaults.py
Expand Up @@ -96,6 +96,11 @@ class MkDocsConfig(base.Config):
"""Specify which css or javascript files from the docs directory should be
additionally included in the site."""

hash_rename_assets = c.Optional(c.PathSpec())
hash_append_assets = c.Optional(c.PathSpec())
"""Specify which css or javascript files from the docs directory should be
renamed to contain a hash suffix, for cache busting."""

extra_templates = c.Type(list, default=[])
"""Similar to the above, but each template (HTML or XML) will be build with
Jinja2 and the global context."""
Expand Down
98 changes: 86 additions & 12 deletions mkdocs/structure/files.py
Expand Up @@ -2,6 +2,7 @@

import enum
import fnmatch
import hashlib
import logging
import os
import posixpath
Expand All @@ -21,6 +22,7 @@
if TYPE_CHECKING:
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.pages import Page
from mkdocs.theme import Theme


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -52,12 +54,22 @@ def is_not_in_nav(self):
return self.value <= self.NOT_IN_NAV.value


class AssetVersioning(enum.Enum):
NONE = 'NONE'
"""The asset file is copied as is."""
HASH_RENAME = 'HASH_RENAME'
"""The file (such as 'main.js') gets renamed to e.g. 'main.e3b0c442.js'."""
HASH_SUFFIX = 'HASH_SUFFIX'
"""The file (such as 'main.js') always gets a hash appended when linking to it, e.g. 'main.js?h=e3b0c442'."""


class Files:
"""A collection of [File][mkdocs.structure.files.File] objects."""

def __init__(self, files: list[File]) -> None:
self._files = files
self._src_uris: dict[str, File] | None = None
self._documentation_pages: Sequence[File] | None = None

def __iter__(self) -> Iterator[File]:
"""Iterate over the files within."""
Expand Down Expand Up @@ -91,11 +103,13 @@ def get_file_from_path(self, path: str) -> File | None:
def append(self, file: File) -> None:
"""Append file to Files collection."""
self._src_uris = None
self._documentation_pages = None
self._files.append(file)

def remove(self, file: File) -> None:
"""Remove file from Files collection."""
self._src_uris = None
self._documentation_pages = None
self._files.remove(file)

def copy_static_files(
Expand All @@ -113,7 +127,15 @@ def documentation_pages(
self, *, inclusion: Callable[[InclusionLevel], bool] = InclusionLevel.is_included
) -> Sequence[File]:
"""Return iterable of all Markdown page file objects."""
return [file for file in self if file.is_documentation_page() and inclusion(file.inclusion)]
cached = inclusion is InclusionLevel.is_included and self._documentation_pages
if cached:
return cached
result = [
file for file in self if file.is_documentation_page() and inclusion(file.inclusion)
]
if cached is None:
self._documentation_pages = result
return result

def static_pages(self) -> Sequence[File]:
"""Return iterable of all static page file objects."""
Expand Down Expand Up @@ -153,7 +175,15 @@ def filter(name):
for dir in config.theme.dirs:
# Find the first theme dir which contains path
if os.path.isfile(os.path.join(dir, path)):
self.append(File(path, dir, config.site_dir, config.use_directory_urls))
self.append(
File(
path,
dir,
config.site_dir,
config.use_directory_urls,
asset_versioning=_asset_versioning(path, config.theme),
)
)
break


Expand Down Expand Up @@ -222,15 +252,17 @@ def __init__(
*,
dest_uri: str | None = None,
inclusion: InclusionLevel = InclusionLevel.UNDEFINED,
asset_versioning: AssetVersioning = AssetVersioning.NONE,
) -> None:
self.page = None
self.src_path = path
self.name = self._get_stem()
self.abs_src_path = os.path.normpath(os.path.join(src_dir, self.src_uri))
self.asset_versioning = asset_versioning
if dest_uri is None:
dest_uri = self._get_dest_path(use_directory_urls)
self.dest_uri = dest_uri
self.url = self._get_url(use_directory_urls)
self.abs_src_path = os.path.normpath(os.path.join(src_dir, self.src_uri))
self.abs_dest_path = os.path.normpath(os.path.join(dest_dir, self.dest_uri))
self.inclusion = inclusion

Expand All @@ -239,7 +271,7 @@ def __eq__(self, other) -> bool:
isinstance(other, self.__class__)
and self.src_uri == other.src_uri
and self.abs_src_path == other.abs_src_path
and self.url == other.url
and self.dest_uri == other.dest_uri
)

def __repr__(self):
Expand All @@ -265,15 +297,31 @@ def _get_dest_path(self, use_directory_urls: bool) -> str:
else:
# foo.md => foo/index.html
return posixpath.join(parent, self.name, 'index.html')

if self.asset_versioning is AssetVersioning.HASH_RENAME:
try:
suf = _hash_suffix(self.abs_src_path)
except FileNotFoundError:
pass
else:
name, ext = posixpath.splitext(self.src_uri)
return f'{name}.{suf}{ext}'

return self.src_uri

def _get_url(self, use_directory_urls: bool) -> str:
"""Return url based in destination path."""
"""Return url based on destination path."""
url = self.dest_uri
dirname, filename = posixpath.split(url)
if use_directory_urls and filename == 'index.html':
url = (dirname or '.') + '/'
return urlquote(url)
if use_directory_urls:
if url == 'index.html' or url.endswith('/index.html'):
url = (posixpath.dirname(url) or '.') + '/'
url = urlquote(url)
if self.asset_versioning is AssetVersioning.HASH_SUFFIX:
try:
url += '?h=' + _hash_suffix(self.abs_src_path)
except FileNotFoundError:
pass
return url

def url_relative_to(self, other: File | str) -> str:
"""Return url for file relative to other file."""
Expand Down Expand Up @@ -316,6 +364,30 @@ def is_css(self) -> bool:
return self.src_uri.endswith('.css')


def _asset_versioning(
src_uri: str,
config: MkDocsConfig | Theme,
) -> AssetVersioning:
hash_rename_assets = getattr(config, 'hash_rename_assets', None)
hash_append_assets = getattr(config, 'hash_append_assets', None)
if hash_rename_assets and hash_rename_assets.match_file(src_uri):
return AssetVersioning.HASH_RENAME
elif hash_append_assets and hash_append_assets.match_file(src_uri):
return AssetVersioning.HASH_SUFFIX
return AssetVersioning.NONE


def _hash_suffix(abs_src_path: str) -> str:
digest = hashlib.sha256()
with open(abs_src_path, 'rb') as f:
while True:
data = f.read(65536)
if not data:
break
digest.update(data)
return digest.hexdigest()[:8]


_default_exclude = pathspec.gitignore.GitIgnoreSpec.from_lines(['.*', '/templates/'])


Expand All @@ -335,22 +407,24 @@ def _set_exclusions(files: Iterable[File], config: MkDocsConfig | Mapping[str, A
file.inclusion = InclusionLevel.INCLUDED


def get_files(config: MkDocsConfig | Mapping[str, Any]) -> Files:
def get_files(config: MkDocsConfig) -> Files:
"""Walk the `docs_dir` and return a Files collection."""
files: list[File] = []
conflicting_files: list[tuple[File, File]] = []
for source_dir, dirnames, filenames in os.walk(config['docs_dir'], followlinks=True):
relative_dir = os.path.relpath(source_dir, config['docs_dir'])
relative_dir = PurePath(os.path.relpath(source_dir, config['docs_dir'])).as_posix()
dirnames.sort()
filenames.sort(key=_file_sort_key)

files_by_dest: dict[str, File] = {}
for filename in filenames:
src_uri = posixpath.join(relative_dir, filename)
file = File(
os.path.join(relative_dir, filename),
src_uri,
config['docs_dir'],
config['site_dir'],
config['use_directory_urls'],
asset_versioning=_asset_versioning(src_uri, config),
)
# Skip README.md if an index file also exists in dir (part 1)
prev_file = files_by_dest.setdefault(file.dest_uri, file)
Expand Down
2 changes: 1 addition & 1 deletion mkdocs/tests/build_tests.py
Expand Up @@ -220,7 +220,7 @@ def test_context_extra_css_path_warning(self):

def test_context_extra_css_js_no_page(self):
cfg = load_config(extra_css=['style.css'], extra_javascript=['script.js'])
context = build.get_context(mock.Mock(), mock.Mock(), cfg, base_url='..')
context = build.get_context(mock.Mock(), Files([]), cfg, base_url='..')
self.assertEqual(context['extra_css'], ['../style.css'])
self.assertEqual(context['extra_javascript'], ['../script.js'])

Expand Down