Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search: support searching for (sub)titles #10717

Merged
merged 4 commits into from Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES
Expand Up @@ -18,6 +18,8 @@ Features added
* #10755: linkcheck: Check the source URL of raw directives that use the ``url``
option.
* #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
* #10717: HTML Search: Increase priority for full title and
subtitle matches in search results

Bugs fixed
----------
Expand Down
2 changes: 1 addition & 1 deletion sphinx/environment/__init__.py
Expand Up @@ -59,7 +59,7 @@

# This is increased every time an environment attribute is added
# or changed to properly invalidate pickle files.
ENV_VERSION = 56
ENV_VERSION = 57

# config status
CONFIG_OK = 1
Expand Down
26 changes: 24 additions & 2 deletions sphinx/search/__init__.py
Expand Up @@ -183,6 +183,7 @@ class WordCollector(nodes.NodeVisitor):
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
super().__init__(document)
self.found_words: List[str] = []
self.found_titles: List[Tuple[str, str]] = []
self.found_title_words: List[str] = []
self.lang = lang

Expand Down Expand Up @@ -213,7 +214,10 @@ def dispatch_visit(self, node: Node) -> None:
elif isinstance(node, nodes.Text):
self.found_words.extend(self.lang.split(node.astext()))
elif isinstance(node, nodes.title):
self.found_title_words.extend(self.lang.split(node.astext()))
title = node.astext()
ids = node.parent['ids']
self.found_titles.append((title, ids[0] if ids else None))
self.found_title_words.extend(self.lang.split(title))
elif isinstance(node, Element) and self.is_meta_keywords(node):
keywords = node['content']
keywords = [keyword.strip() for keyword in keywords.split(',')]
Expand All @@ -237,6 +241,7 @@ def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
# stemmed words in titles -> set(docname)
self._title_mapping: Dict[str, Set[str]] = {}
self._all_titles: Dict[str, List[Tuple[str, str]]] = {} # docname -> all titles
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# objtype index -> (domain, type, objname (localized))
Expand Down Expand Up @@ -281,6 +286,11 @@ def load(self, stream: IO, format: Any) -> None:
index2fn = frozen['docnames']
self._filenames = dict(zip(index2fn, frozen['filenames']))
self._titles = dict(zip(index2fn, frozen['titles']))
self._all_titles = {}

for title, doc_tuples in frozen['alltitles'].items():
for doc, titleid in doc_tuples:
self._all_titles.setdefault(index2fn[doc], []).append((title, titleid))

def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
rv = {}
Expand Down Expand Up @@ -364,23 +374,33 @@ def freeze(self) -> Dict[str, Any]:
objects = self.get_objects(fn2index) # populates _objtypes
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
objnames = self._objnames

alltitles: Dict[str, List[Tuple[int, str]]] = {}
for docname, titlelist in self._all_titles.items():
for title, titleid in titlelist:
alltitles.setdefault(title.lower(), []).append((fn2index[docname], titleid))

return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
objects=objects, objtypes=objtypes, objnames=objnames,
titleterms=title_terms, envversion=self.env.version)
titleterms=title_terms, envversion=self.env.version,
alltitles=alltitles)

def label(self) -> str:
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)

def prune(self, docnames: Iterable[str]) -> None:
"""Remove data for all docnames not in the list."""
new_titles = {}
new_alltitles = {}
new_filenames = {}
for docname in docnames:
if docname in self._titles:
new_titles[docname] = self._titles[docname]
new_alltitles[docname] = self._all_titles[docname]
new_filenames[docname] = self._filenames[docname]
self._titles = new_titles
self._filenames = new_filenames
self._all_titles = new_alltitles
for wordnames in self._mapping.values():
wordnames.intersection_update(docnames)
for wordnames in self._title_mapping.values():
Expand All @@ -403,6 +423,8 @@ def stem(word: str) -> str:
return self._stem_cache[word]
_filter = self.lang.word_filter

self._all_titles[docname] = visitor.found_titles

for word in visitor.found_title_words:
stemmed_word = stem(word)
if _filter(stemmed_word):
Expand Down
24 changes: 23 additions & 1 deletion sphinx/themes/basic/static/searchtools.js
Expand Up @@ -237,6 +237,11 @@ const Search = {
* execute search (requires search index to be loaded)
*/
query: (query) => {
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;

// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
const searchTerms = new Set();
Expand Down Expand Up @@ -272,6 +277,23 @@ const Search = {
let results = [];
_removeChildren(document.getElementById("search-progress"));

const queryLower = query.toLowerCase();
for (const [title, foundTitles] of Object.entries(allTitles)) {
if (title.includes(queryLower) && (queryLower.length >= title.length/2)) {
for (const [file, id] of foundTitles) {
let score = Math.round(100 * queryLower.length / title.length)
results.push([
docNames[file],
titles[file],
id !== null ? "#" + id : "",
null,
score,
filenames[file],
]);
}
}
}

// lookup as object
objectTerms.forEach((term) =>
results.push(...Search.performObjectSearch(term, objectTerms))
Expand Down Expand Up @@ -399,8 +421,8 @@ const Search = {
// prepare search
const terms = Search._index.terms;
const titleTerms = Search._index.titleterms;
const docNames = Search._index.docnames;
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;

const scoreMap = new Map();
Expand Down
6 changes: 4 additions & 2 deletions tests/test_search.py
Expand Up @@ -177,7 +177,8 @@ def test_IndexBuilder():
'non': [0, 1, 2, 3],
'test': [0, 1, 2, 3]},
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
'titleterms': {'section_titl': [0, 1, 2, 3]}
'titleterms': {'section_titl': [0, 1, 2, 3]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down Expand Up @@ -234,7 +235,8 @@ def test_IndexBuilder():
'non': [0, 1],
'test': [0, 1]},
'titles': ('title1_2', 'title2_2'),
'titleterms': {'section_titl': [0, 1]}
'titleterms': {'section_titl': [0, 1]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down