Skip to content

Commit

Permalink
search: support searching for (sub)titles
Browse files Browse the repository at this point in the history
Collect all titles from all pages and utilize a contains match
(case insensitive) in Search page.

Fixes: #10689
  • Loading branch information
marxin authored and AA-Turner committed Sep 9, 2022
1 parent 7473b05 commit 39a1301
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES
Expand Up @@ -18,6 +18,7 @@ Features added
* #10755: linkcheck: Check the source URL of raw directives that use the ``url``
option.
* #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
* #10717: HTML Search: support searching for (sub)title.s

Bugs fixed
----------
Expand Down
26 changes: 24 additions & 2 deletions sphinx/search/__init__.py
Expand Up @@ -183,6 +183,7 @@ class WordCollector(nodes.NodeVisitor):
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
super().__init__(document)
self.found_words: List[str] = []
self.found_titles: List[Tuple[str, str]] = []
self.found_title_words: List[str] = []
self.lang = lang

Expand Down Expand Up @@ -213,7 +214,10 @@ def dispatch_visit(self, node: Node) -> None:
elif isinstance(node, nodes.Text):
self.found_words.extend(self.lang.split(node.astext()))
elif isinstance(node, nodes.title):
self.found_title_words.extend(self.lang.split(node.astext()))
title = node.astext()
ids = node.parent['ids']
self.found_titles.append((title, ids[0] if ids else None))
self.found_title_words.extend(self.lang.split(title))
elif isinstance(node, Element) and self.is_meta_keywords(node):
keywords = node['content']
keywords = [keyword.strip() for keyword in keywords.split(',')]
Expand All @@ -237,6 +241,7 @@ def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
# stemmed words in titles -> set(docname)
self._title_mapping: Dict[str, Set[str]] = {}
self._all_titles: Dict[str, List[Tuple[str, str]]] = {} # docname -> all titles
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# objtype index -> (domain, type, objname (localized))
Expand Down Expand Up @@ -281,6 +286,11 @@ def load(self, stream: IO, format: Any) -> None:
index2fn = frozen['docnames']
self._filenames = dict(zip(index2fn, frozen['filenames']))
self._titles = dict(zip(index2fn, frozen['titles']))
self._all_titles = {}

for title, doc_tuples in frozen['alltitles'].items():
for doc, titleid in doc_tuples:
self._all_titles.setdefault(index2fn[doc], []).append((title, titleid))

def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
rv = {}
Expand Down Expand Up @@ -364,23 +374,33 @@ def freeze(self) -> Dict[str, Any]:
objects = self.get_objects(fn2index) # populates _objtypes
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
objnames = self._objnames

alltitles: Dict[str, List[Tuple[int, str]]] = {}
for docname, titlelist in self._all_titles.items():
for title, titleid in titlelist:
alltitles.setdefault(title.lower(), []).append((fn2index[docname], titleid))

return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
objects=objects, objtypes=objtypes, objnames=objnames,
titleterms=title_terms, envversion=self.env.version)
titleterms=title_terms, envversion=self.env.version,
alltitles=alltitles)

def label(self) -> str:
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)

def prune(self, docnames: Iterable[str]) -> None:
"""Remove data for all docnames not in the list."""
new_titles = {}
new_alltitles = {}
new_filenames = {}
for docname in docnames:
if docname in self._titles:
new_titles[docname] = self._titles[docname]
new_alltitles[docname] = self._all_titles[docname]
new_filenames[docname] = self._filenames[docname]
self._titles = new_titles
self._filenames = new_filenames
self._all_titles = new_alltitles
for wordnames in self._mapping.values():
wordnames.intersection_update(docnames)
for wordnames in self._title_mapping.values():
Expand All @@ -403,6 +423,8 @@ def stem(word: str) -> str:
return self._stem_cache[word]
_filter = self.lang.word_filter

self._all_titles[docname] = visitor.found_titles

for word in visitor.found_title_words:
stemmed_word = stem(word)
if _filter(stemmed_word):
Expand Down
20 changes: 20 additions & 0 deletions sphinx/themes/basic/static/searchtools.js
Expand Up @@ -237,6 +237,11 @@ const Search = {
* execute search (requires search index to be loaded)
*/
query: (query) => {
const docNames = Search._index.docnames;
const filenames = Search._index.filenames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;

// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
const searchTerms = new Set();
Expand Down Expand Up @@ -272,6 +277,21 @@ const Search = {
let results = [];
_removeChildren(document.getElementById("search-progress"));

const queryLower = query.toLowerCase();
Object.keys(allTitles).forEach((title) => {
if (title.includes(queryLower) && (queryLower.length >= title.length * 0.75))
allTitles[title].forEach((titlematch) => {
results.push([
docNames[titlematch[0]],
titles[titlematch[0]],
titlematch[1] !== null ? "#" + titlematch[1] : "",
null,
Math.round(100 * (queryLower.length / title.length)),
filenames[titlematch[0]],
]);
})
});

// lookup as object
objectTerms.forEach((term) =>
results.push(...Search.performObjectSearch(term, objectTerms))
Expand Down
6 changes: 4 additions & 2 deletions tests/test_search.py
Expand Up @@ -177,7 +177,8 @@ def test_IndexBuilder():
'non': [0, 1, 2, 3],
'test': [0, 1, 2, 3]},
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
'titleterms': {'section_titl': [0, 1, 2, 3]}
'titleterms': {'section_titl': [0, 1, 2, 3]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down Expand Up @@ -234,7 +235,8 @@ def test_IndexBuilder():
'non': [0, 1],
'test': [0, 1]},
'titles': ('title1_2', 'title2_2'),
'titleterms': {'section_titl': [0, 1]}
'titleterms': {'section_titl': [0, 1]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down

0 comments on commit 39a1301

Please sign in to comment.