Skip to content

Commit

Permalink
Make `searchindex.js deterministic (#11665)
Browse files Browse the repository at this point in the history
  • Loading branch information
pietroalbini committed Sep 16, 2023
1 parent f0c25a0 commit 8e768e6
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Bugs fixed

* #11668: Raise a useful error when ``theme.conf`` is missing.
Patch by Vinay Sajip.
* #11622: Ensure that the order of keys in ``searchindex.js`` is deterministic.
Patch by Pietro Albini.

Testing
-------
Expand Down
2 changes: 1 addition & 1 deletion sphinx/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class _JavaScriptIndex:
SUFFIX = ')'

def dumps(self, data: Any) -> str:
return self.PREFIX + json.dumps(data) + self.SUFFIX
return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX

def loads(self, s: str) -> Any:
data = s[len(self.PREFIX):-len(self.SUFFIX)]
Expand Down
37 changes: 37 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,40 @@ def test_parallel(app):
app.build()
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']


@pytest.mark.sphinx(testroot='search')
def test_search_index_is_deterministic(app):
lists_not_to_sort = {
# Each element of .titles is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.titles',
# Each element of .filenames is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.filenames',
}

# In the search index, titles inside .alltitles are stored as a tuple of
# (document_idx, title_anchor). Tuples are represented as lists in JSON,
# but their contents must not be sorted. We cannot sort them anyway, as
# document_idx is an int and title_anchor is a str.
def is_title_tuple_type(item):
return len(item) == 2 and isinstance(item[0], int) and isinstance(item[1], str)

def assert_is_sorted(item, path):
err_path = path if path else '<root>'
if isinstance(item, dict):
assert list(item.keys()) == sorted(item.keys()), f'{err_path} is not sorted'
for key, value in item.items():
assert_is_sorted(value, f'{path}.{key}')
elif isinstance(item, list):
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
assert item == sorted(item), f'{err_path} is not sorted'
for i, child in enumerate(item):
assert_is_sorted(child, f'{path}[{i}]')

app.builder.build_all()
index = load_searchindex(app.outdir / 'searchindex.js')
# Pretty print the index. Only shown by pytest on failure.
print(f'searchindex.js contents:\n\n{json.dumps(index, indent=2)}')
assert_is_sorted(index, '')

0 comments on commit 8e768e6

Please sign in to comment.