Skip to content

Commit

Permalink
Make compressed sitemap deterministic (#2100)
Browse files Browse the repository at this point in the history
  • Loading branch information
ofek committed May 12, 2020
1 parent a4eb4eb commit fa5aa4a
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 22 deletions.
2 changes: 2 additions & 0 deletions docs/about/release-notes.md
Expand Up @@ -23,6 +23,8 @@ The current and past members of the MkDocs team.

## Version 1.1.1 (in development)

* Bugfix: Allow compressed sitemap to be deterministic by supporting the
`SOURCE_DATE_EPOCH` environment variable (#2100).
* Bugfix: Use README.md as index.html even if use_directory_urls is false (#2081).
* Bugfix: Ignore links which start with a backslash (#1680).
* Bugfix: Pass `builder` to the `on_serve` event so that it can be passed to
Expand Down
15 changes: 6 additions & 9 deletions mkdocs/commands/build.py
@@ -1,5 +1,3 @@
from datetime import datetime
from calendar import timegm
import logging
import os
import gzip
Expand Down Expand Up @@ -42,10 +40,6 @@ def get_context(nav, files, config, page=None, base_url=''):

extra_css = utils.create_media_urls(config['extra_css'], page, base_url)

# Support SOURCE_DATE_EPOCH environment variable for "reproducible" builds.
# See https://reproducible-builds.org/specs/source-date-epoch/
timestamp = int(os.environ.get('SOURCE_DATE_EPOCH', timegm(datetime.utcnow().utctimetuple())))

return {
'nav': nav,
'pages': files.documentation_pages(),
Expand All @@ -56,7 +50,7 @@ def get_context(nav, files, config, page=None, base_url=''):
'extra_javascript': extra_javascript,

'mkdocs_version': mkdocs.__version__,
'build_date_utc': datetime.utcfromtimestamp(timestamp),
'build_date_utc': utils.get_build_datetime(),

'config': config,
'page': page,
Expand Down Expand Up @@ -119,8 +113,11 @@ def _build_theme_template(template_name, env, files, config, nav):

if template_name == 'sitemap.xml':
log.debug("Gzipping template: %s", template_name)
with gzip.open('{}.gz'.format(output_path), 'wb') as f:
f.write(output.encode('utf-8'))
gz_filename = '{}.gz'.format(output_path)
with open(gz_filename, 'wb') as f:
timestamp = utils.get_build_timestamp()
with gzip.GzipFile(fileobj=f, filename=gz_filename, mode='wb', mtime=timestamp) as gz_buf:
gz_buf.write(output.encode('utf-8'))
else:
log.info("Template skipped: '{}' generated empty output.".format(template_name))

Expand Down
12 changes: 2 additions & 10 deletions mkdocs/structure/pages.py
@@ -1,5 +1,4 @@
import os
import datetime
import logging
from urllib.parse import urlparse, urlunparse, urljoin
from urllib.parse import unquote as urlunquote
Expand All @@ -10,7 +9,7 @@
from markdown.util import AMP_SUBSTITUTE

from mkdocs.structure.toc import get_toc
from mkdocs.utils import meta, get_markdown_title, warning_filter
from mkdocs.utils import meta, get_build_date, get_markdown_title, warning_filter

log = logging.getLogger(__name__)
log.addFilter(warning_filter)
Expand All @@ -33,14 +32,7 @@ def __init__(self, title, file, config):
self.is_page = True
self.is_link = False

# Support SOURCE_DATE_EPOCH environment variable for "reproducible" builds.
# See https://reproducible-builds.org/specs/source-date-epoch/
if 'SOURCE_DATE_EPOCH' in os.environ:
self.update_date = datetime.datetime.utcfromtimestamp(
int(os.environ['SOURCE_DATE_EPOCH'])
).strftime("%Y-%m-%d")
else:
self.update_date = datetime.datetime.now().strftime("%Y-%m-%d")
self.update_date = get_build_date()

self._set_canonical_url(config.get('site_url', None))
self._set_edit_url(config.get('repo_url', None), config.get('edit_uri', None))
Expand Down
6 changes: 3 additions & 3 deletions mkdocs/tests/build_tests.py
Expand Up @@ -202,14 +202,14 @@ def test_build_theme_template(self, mock_build_template, mock_write_file):

@mock.patch('mkdocs.utils.write_file')
@mock.patch('mkdocs.commands.build._build_template', return_value='some content')
@mock.patch('gzip.open')
def test_build_sitemap_template(self, mock_gzip_open, mock_build_template, mock_write_file):
@mock.patch('gzip.GzipFile')
def test_build_sitemap_template(self, mock_gzip_gzipfile, mock_build_template, mock_write_file):
cfg = load_config()
env = cfg['theme'].get_env()
build._build_theme_template('sitemap.xml', env, mock.Mock(), cfg, mock.Mock())
self.assert_mock_called_once(mock_write_file)
self.assert_mock_called_once(mock_build_template)
self.assert_mock_called_once(mock_gzip_open)
self.assert_mock_called_once(mock_gzip_gzipfile)

@mock.patch('mkdocs.utils.write_file')
@mock.patch('mkdocs.commands.build._build_template', return_value='')
Expand Down
39 changes: 39 additions & 0 deletions mkdocs/utils/__init__.py
Expand Up @@ -14,6 +14,7 @@
import yaml
import fnmatch
import posixpath
from datetime import datetime, timezone
from urllib.parse import urlparse

from mkdocs import exceptions
Expand Down Expand Up @@ -79,6 +80,44 @@ def modified_time(file_path):
return 0.0


def get_build_timestamp():
"""
Returns the number of seconds since the epoch.
Support SOURCE_DATE_EPOCH environment variable for reproducible builds.
See https://reproducible-builds.org/specs/source-date-epoch/
"""
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
if source_date_epoch is None:
return int(datetime.now(timezone.utc).timestamp())

return int(source_date_epoch)


def get_build_datetime():
"""
Returns an aware datetime object.
Support SOURCE_DATE_EPOCH environment variable for reproducible builds.
See https://reproducible-builds.org/specs/source-date-epoch/
"""
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
if source_date_epoch is None:
return datetime.now(timezone.utc)

return datetime.fromtimestamp(int(source_date_epoch), timezone.utc)


def get_build_date():
"""
Returns the displayable date string.
Support SOURCE_DATE_EPOCH environment variable for reproducible builds.
See https://reproducible-builds.org/specs/source-date-epoch/
"""
return get_build_datetime().strftime('%Y-%m-%d')


def reduce_list(data_set):
""" Reduce duplicate items in a list and preserve order """
seen = set()
Expand Down

0 comments on commit fa5aa4a

Please sign in to comment.