mkdocs · waylan · Jan 25, 2024 · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/docs/user-guide/writing-your-docs.md b/docs/user-guide/writing-your-docs.md
@@ -538,3 +538,16 @@ Note that fenced code blocks can not be indented. Therefore, they cannot be
 nested inside list items, blockquotes, etc.
 
 [fenced code blocks]: https://python-markdown.github.io/extensions/fenced_code_blocks/
+
+### Search Keywords
+
+The [search plugin][] supports defining keywords for individual sections of a page. When search terms match the defined keywords, it ensures that the relevant section will be included in the search results. To use the feature, [enable][markdown_extensions] the [attr_list][] extension to Markdown.
+
+To define keywords for a section, assign a string of space separated words to the `data-search-keywords` attribute of any heading. Specifically, define an attribute list at the end of a heading which contains the attribute.
+
+```markdown
+# Section Title {data-search-keywords='space separated list of words'}
+```
+
+[search plugin]: configuration.md#search
+[attr_list]: https://python-markdown.github.io/extensions/attr_list/
diff --git a/mkdocs/contrib/search/prebuild-index.js b/mkdocs/contrib/search/prebuild-index.js
@@ -45,6 +45,7 @@ stdin.on('end', function () {
     }
     this.field('title');
     this.field('text');
+    this.field('keywords', {'boost': 10});
     this.ref('location');
 
     data.docs.forEach(function (doc) {

diff --git a/mkdocs/contrib/search/search_index.py b/mkdocs/contrib/search/search_index.py
@@ -10,7 +10,6 @@
 
 if TYPE_CHECKING:
     from mkdocs.structure.pages import Page
-    from mkdocs.structure.toc import AnchorLink, TableOfContents
 
 try:
     from lunr import lunr  # type: ignore
@@ -32,25 +31,12 @@ def __init__(self, **config) -> None:
         self._entries: list[dict] = []
         self.config = config
 
-    def _find_toc_by_id(self, toc, id_: str | None) -> AnchorLink | None:
-        """
-        Given a table of contents and HTML ID, iterate through
-        and return the matched item in the TOC.
-        """
-        for toc_item in toc:
-            if toc_item.id == id_:
-                return toc_item
-            toc_item_r = self._find_toc_by_id(toc_item.children, id_)
-            if toc_item_r is not None:
-                return toc_item_r
-        return None
-
-    def _add_entry(self, title: str | None, text: str, loc: str) -> None:
+    def _add_entry(self, title: str | None, text: str, keywords: str | None, loc: str) -> None:
         """A simple wrapper to add an entry, dropping bad characters."""
         text = text.replace('\u00a0', ' ')
         text = re.sub(r'[ \t\n\r\f\v]+', ' ', text.strip())
 
-        self._entries.append({'title': title, 'text': text, 'location': loc})
+        self._entries.append({'title': title, 'text': text, 'keywords': keywords, 'location': loc})
 
     def add_entry_from_context(self, page: Page) -> None:
         """
@@ -72,25 +58,21 @@ def add_entry_from_context(self, page: Page) -> None:
 
         # Create an entry for the full page.
         text = parser.stripped_html.rstrip('\n') if self.config['indexing'] == 'full' else ''
-        self._add_entry(title=page.title, text=text, loc=url)
+        self._add_entry(title=page.title, text=text, keywords='', loc=url)
 
         if self.config['indexing'] in ['full', 'sections']:
             for section in parser.data:
-                self.create_entry_for_section(section, page.toc, url)
+                self.create_entry_for_section(section, url)
 
-    def create_entry_for_section(
-        self, section: ContentSection, toc: TableOfContents, abs_url: str
-    ) -> None:
+    def create_entry_for_section(self, section: ContentSection, abs_url: str) -> None:
         """
-        Given a section on the page, the table of contents and
-        the absolute url for the page create an entry in the
-        index.
+        Given a section of a page and the absolute url for the page
+        create an entry in the index.
         """
-        toc_item = self._find_toc_by_id(toc, section.id)
-
         text = ' '.join(section.text) if self.config['indexing'] == 'full' else ''
-        if toc_item is not None:
-            self._add_entry(title=toc_item.title, text=text, loc=abs_url + toc_item.url)
+        self._add_entry(
+            title=section.title, text=text, keywords=section.keywords, loc=f'{abs_url}#{section.id}'
+        )
 
     def generate_search_index(self) -> str:
         """Python to json conversion."""
@@ -122,7 +104,7 @@ def generate_search_index(self) -> str:
             if haslunrpy:
                 lunr_idx = lunr(
                     ref='location',
-                    fields=('title', 'text'),
+                    fields=('title', 'text', dict(field_name='keywords', boost=10)),
                     documents=self._entries,
                     languages=self.config['lang'],
                 )
@@ -150,13 +132,26 @@ def __init__(
         text: list[str] | None = None,
         id_: str | None = None,
         title: str | None = None,
+        keywords: str | None = None,
     ) -> None:
         self.text = text or []
         self.id = id_
-        self.title = title
+        self.title = title or ''
+        self.keywords = keywords or ''
 
     def __eq__(self, other):
-        return self.text == other.text and self.id == other.id and self.title == other.title
+        return (
+            self.text == other.text
+            and self.id == other.id
+            and self.title == other.title
+            and self.keywords == other.keywords
+        )
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}("
+            f"text={self.text}, id='{self.id}', title='{self.title}', keywords='{self.keywords}')"
+        )
 
 
 _HEADER_TAGS = tuple(f"h{x}" for x in range(1, 7))
@@ -175,10 +170,17 @@ def __init__(self, *args, **kwargs) -> None:
         self.data: list[ContentSection] = []
         self.section: ContentSection | None = None
         self.is_header_tag = False
+        self.is_permalink = False
         self._stripped_html: list[str] = []
 
     def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
         """Called at the start of every HTML tag."""
+        atts = dict(attrs)
+        # Check for permalink in header
+        if self.is_header_tag and tag == 'a' and 'headerlink' in (atts.get('class') or ''):
+            self.is_permalink = True
+            return
+
         # We only care about the opening tag for headings.
         if tag not in _HEADER_TAGS:
             return
@@ -187,14 +189,17 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
         # for it and assign the ID if it has one.
         self.is_header_tag = True
         self.section = ContentSection()
+        self.section.id = atts.get('id')
+        self.section.keywords = atts.get('data-search-keywords') or ''
         self.data.append(self.section)
 
-        for attr in attrs:
-            if attr[0] == "id":
-                self.section.id = attr[1]
-
     def handle_endtag(self, tag: str) -> None:
         """Called at the end of every HTML tag."""
+        # Check for permalinks
+        if self.is_permalink and tag == 'a':
+            self.is_permalink = False
+            return
+
         # We only care about the opening tag for headings.
         if tag not in _HEADER_TAGS:
             return
@@ -203,6 +208,10 @@ def handle_endtag(self, tag: str) -> None:
 
     def handle_data(self, data: str) -> None:
         """Called for the text contents of each tag."""
+        # Do not retain permalink text.
+        if self.is_permalink:
+            return
+
         self._stripped_html.append(data)
 
         if self.section is None:
@@ -216,7 +225,7 @@ def handle_data(self, data: str) -> None:
         # Otherwise it is content of something under that header
         # section.
         if self.is_header_tag:
-            self.section.title = data
+            self.section.title = self.section.title + data
         else:
             self.section.text.append(data.rstrip('\n'))
 

diff --git a/mkdocs/contrib/search/templates/search/main.js b/mkdocs/contrib/search/templates/search/main.js
@@ -28,7 +28,7 @@ function escapeHtml (value) {
     .replace(/>/g, '&gt;');
 }
 
-function formatResult (location, title, summary) {
+function formatResult (location, title, summary, keywords) {
   return '<article><h3><a href="' + joinUrl(base_url, location) + '">'+ escapeHtml(title) + '</a></h3><p>' + escapeHtml(summary) +'</p></article>';
 }
 
@@ -40,7 +40,7 @@ function displayResults (results) {
   if (results.length > 0){
     for (var i=0; i < results.length; i++){
       var result = results[i];
-      var html = formatResult(result.location, result.title, result.summary);
+      var html = formatResult(result.location, result.title, result.summary, result.keywords);
       search_results.insertAdjacentHTML('beforeend', html);
     }
   } else {

diff --git a/mkdocs/contrib/search/templates/search/worker.js b/mkdocs/contrib/search/templates/search/worker.js
@@ -77,6 +77,7 @@ function onScriptsLoaded () {
       }
       this.field('title');
       this.field('text');
+      this.field('keywords', {'boost': 10});
       this.ref('location');
 
       for (var i=0; i < data.docs.length; i++) {

diff --git a/mkdocs/tests/search_tests.py b/mkdocs/tests/search_tests.py
@@ -9,8 +9,7 @@
 from mkdocs.contrib.search import search_index
 from mkdocs.structure.files import File
 from mkdocs.structure.pages import Page
-from mkdocs.structure.toc import get_toc
-from mkdocs.tests.base import dedent, get_markdown_toc, load_config
+from mkdocs.tests.base import dedent, load_config
 
 
 def strip_whitespace(string):
@@ -283,7 +282,23 @@ def test_content_parser(self):
         parser.close()
 
         self.assertEqual(
-            parser.data, [search_index.ContentSection(text=["TEST"], id_="title", title="Title")]
+            parser.data,
+            [search_index.ContentSection(text=["TEST"], id_="title", title="Title", keywords='')],
+        )
+
+    def test_content_parser_header_has_child(self):
+        parser = search_index.ContentParser()
+
+        parser.feed('<h1 id="title">Title <span>title</span> TITLE</h1>TEST')
+        parser.close()
+
+        self.assertEqual(
+            parser.data,
+            [
+                search_index.ContentSection(
+                    text=["TEST"], id_="title", title="Title title TITLE", keywords=''
+                )
+            ],
         )
 
     def test_content_parser_no_id(self):
@@ -293,7 +308,8 @@ def test_content_parser_no_id(self):
         parser.close()
 
         self.assertEqual(
-            parser.data, [search_index.ContentSection(text=["TEST"], id_=None, title="Title")]
+            parser.data,
+            [search_index.ContentSection(text=["TEST"], id_=None, title="Title", keywords='')],
         )
 
     def test_content_parser_content_before_header(self):
@@ -303,7 +319,8 @@ def test_content_parser_content_before_header(self):
         parser.close()
 
         self.assertEqual(
-            parser.data, [search_index.ContentSection(text=["TEST"], id_=None, title="Title")]
+            parser.data,
+            [search_index.ContentSection(text=["TEST"], id_=None, title="Title", keywords='')],
         )
 
     def test_content_parser_no_sections(self):
@@ -313,30 +330,20 @@ def test_content_parser_no_sections(self):
 
         self.assertEqual(parser.data, [])
 
-    def test_find_toc_by_id(self):
-        """Test finding the relevant TOC item by the tag ID."""
-        index = search_index.SearchIndex()
-
-        md = dedent(
-            """
-            # Heading 1
-            ## Heading 2
-            ### Heading 3
-            """
-        )
-        toc = get_toc(get_markdown_toc(md))
-
-        toc_item = index._find_toc_by_id(toc, "heading-1")
-        self.assertEqual(toc_item.url, "#heading-1")
-        self.assertEqual(toc_item.title, "Heading 1")
+    def test_data_search_keywords(self):
+        parser = search_index.ContentParser()
 
-        toc_item2 = index._find_toc_by_id(toc, "heading-2")
-        self.assertEqual(toc_item2.url, "#heading-2")
-        self.assertEqual(toc_item2.title, "Heading 2")
+        parser.feed('<h1 id="title" data-search-keywords="search keywords">Title</h1>TEST')
+        parser.close()
 
-        toc_item3 = index._find_toc_by_id(toc, "heading-3")
-        self.assertEqual(toc_item3.url, "#heading-3")
-        self.assertEqual(toc_item3.title, "Heading 3")
+        self.assertEqual(
+            parser.data,
+            [
+                search_index.ContentSection(
+                    text=["TEST"], id_="title", title="Title", keywords="search keywords"
+                )
+            ],
+        )
 
     def test_create_search_index(self):
         html_content = """
@@ -369,7 +376,6 @@ def test_create_search_index(self):
             ### Heading 3
             """
         )
-        toc = get_toc(get_markdown_toc(md))
 
         full_content = ''.join(f"Heading{i}Content{i}" for i in range(1, 4))
 
@@ -379,7 +385,6 @@ def test_create_search_index(self):
         for page in pages:
             # Fake page.read_source() and page.render()
             page.markdown = md
-            page.toc = toc
             page.content = html_content
 
             index = search_index.SearchIndex(**plugin.config)
@@ -425,7 +430,6 @@ def test_page(title, filename, config):
                 ## Heading 2
                 ### Heading 3"""
             )
-            test_page.toc = get_toc(get_markdown_toc(test_page.markdown))
             return test_page
 
         def validate_full(data, page):