Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search: support searching for (sub)titles #10717

Merged
merged 4 commits into from Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES
Expand Up @@ -18,6 +18,7 @@ Features added
* #10755: linkcheck: Check the source URL of raw directives that use the ``url``
option.
* #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
* #10717: HTML Search: support searching for (sub)title.s
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved

Bugs fixed
----------
Expand Down
26 changes: 24 additions & 2 deletions sphinx/search/__init__.py
Expand Up @@ -183,6 +183,7 @@ class WordCollector(nodes.NodeVisitor):
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
super().__init__(document)
self.found_words: List[str] = []
self.found_titles: List[Tuple[str, str]] = []
self.found_title_words: List[str] = []
self.lang = lang

Expand Down Expand Up @@ -213,7 +214,10 @@ def dispatch_visit(self, node: Node) -> None:
elif isinstance(node, nodes.Text):
self.found_words.extend(self.lang.split(node.astext()))
elif isinstance(node, nodes.title):
self.found_title_words.extend(self.lang.split(node.astext()))
title = node.astext()
ids = node.parent['ids']
self.found_titles.append((title, ids[0] if ids else None))
self.found_title_words.extend(self.lang.split(title))
elif isinstance(node, Element) and self.is_meta_keywords(node):
keywords = node['content']
keywords = [keyword.strip() for keyword in keywords.split(',')]
Expand All @@ -237,6 +241,7 @@ def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
# stemmed words in titles -> set(docname)
self._title_mapping: Dict[str, Set[str]] = {}
self._all_titles: Dict[str, List[Tuple[str, str]]] = {} # docname -> all titles
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# objtype index -> (domain, type, objname (localized))
Expand Down Expand Up @@ -281,6 +286,11 @@ def load(self, stream: IO, format: Any) -> None:
index2fn = frozen['docnames']
self._filenames = dict(zip(index2fn, frozen['filenames']))
self._titles = dict(zip(index2fn, frozen['titles']))
self._all_titles = {}

for title, doc_tuples in frozen['alltitles'].items():
for doc, titleid in doc_tuples:
self._all_titles.setdefault(index2fn[doc], []).append((title, titleid))

def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
rv = {}
Expand Down Expand Up @@ -364,23 +374,33 @@ def freeze(self) -> Dict[str, Any]:
objects = self.get_objects(fn2index) # populates _objtypes
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
objnames = self._objnames

alltitles: Dict[str, List[Tuple[int, str]]] = {}
for docname, titlelist in self._all_titles.items():
for title, titleid in titlelist:
alltitles.setdefault(title.lower(), []).append((fn2index[docname], titleid))

return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
objects=objects, objtypes=objtypes, objnames=objnames,
titleterms=title_terms, envversion=self.env.version)
titleterms=title_terms, envversion=self.env.version,
alltitles=alltitles)

def label(self) -> str:
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)

def prune(self, docnames: Iterable[str]) -> None:
"""Remove data for all docnames not in the list."""
new_titles = {}
new_alltitles = {}
new_filenames = {}
for docname in docnames:
if docname in self._titles:
new_titles[docname] = self._titles[docname]
new_alltitles[docname] = self._all_titles[docname]
new_filenames[docname] = self._filenames[docname]
self._titles = new_titles
self._filenames = new_filenames
self._all_titles = new_alltitles
for wordnames in self._mapping.values():
wordnames.intersection_update(docnames)
for wordnames in self._title_mapping.values():
Expand All @@ -403,6 +423,8 @@ def stem(word: str) -> str:
return self._stem_cache[word]
_filter = self.lang.word_filter

self._all_titles[docname] = visitor.found_titles

for word in visitor.found_title_words:
stemmed_word = stem(word)
if _filter(stemmed_word):
Expand Down
20 changes: 20 additions & 0 deletions sphinx/themes/basic/static/searchtools.js
Expand Up @@ -237,6 +237,11 @@ const Search = {
* execute search (requires search index to be loaded)
*/
query: (query) => {
const docNames = Search._index.docnames;
const filenames = Search._index.filenames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;

// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
const searchTerms = new Set();
Expand Down Expand Up @@ -272,6 +277,21 @@ const Search = {
let results = [];
_removeChildren(document.getElementById("search-progress"));

const queryLower = query.toLowerCase();
Object.keys(allTitles).forEach((title) => {
if (title.includes(queryLower) && (queryLower.length >= title.length * 0.75))
allTitles[title].forEach((titlematch) => {
results.push([
docNames[titlematch[0]],
titles[titlematch[0]],
titlematch[1] !== null ? "#" + titlematch[1] : "",
null,
Math.round(100 * (queryLower.length / title.length)),
filenames[titlematch[0]],
]);
})
});

// lookup as object
objectTerms.forEach((term) =>
results.push(...Search.performObjectSearch(term, objectTerms))
Expand Down
6 changes: 4 additions & 2 deletions tests/test_search.py
Expand Up @@ -177,7 +177,8 @@ def test_IndexBuilder():
'non': [0, 1, 2, 3],
'test': [0, 1, 2, 3]},
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
'titleterms': {'section_titl': [0, 1, 2, 3]}
'titleterms': {'section_titl': [0, 1, 2, 3]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down Expand Up @@ -234,7 +235,8 @@ def test_IndexBuilder():
'non': [0, 1],
'test': [0, 1]},
'titles': ('title1_2', 'title2_2'),
'titleterms': {'section_titl': [0, 1]}
'titleterms': {'section_titl': [0, 1]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down