Skip to content

Commit dcd276d

Browse files
committed
Extract _feed_visit_nodes
1 parent c87b758 commit dcd276d

File tree

1 file changed

+48
-39
lines changed

1 file changed

+48
-39
lines changed

sphinx/search/__init__.py

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from sphinx.util.index_entries import split_index_msg
2020

2121
if TYPE_CHECKING:
22-
from collections.abc import Iterable
22+
from collections.abc import Callable, Iterable
2323

2424
from sphinx.environment import BuildEnvironment
2525

@@ -525,47 +525,12 @@ def stem(word_to_stem: str) -> str:
525525
self._index_entries[docname] = sorted(_index_entries)
526526

527527
def _word_collector(self, doctree: nodes.document) -> WordStore:
528-
def _visit_nodes(node: nodes.Node) -> None:
529-
if isinstance(node, nodes.comment):
530-
return
531-
elif isinstance(node, nodes.raw):
532-
if 'html' in node.get('format', '').split():
533-
# Some people might put content in raw HTML that should be searched,
534-
# so we just amateurishly strip HTML tags and index the remaining
535-
# content
536-
nodetext = re.sub(
537-
r'<style.*?</style>',
538-
'',
539-
node.astext(),
540-
flags=re.IGNORECASE | re.DOTALL,
541-
)
542-
nodetext = re.sub(
543-
r'<script.*?</script>',
544-
'',
545-
nodetext,
546-
flags=re.IGNORECASE | re.DOTALL,
547-
)
548-
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
549-
word_store.words.extend(split(nodetext))
550-
return
551-
elif isinstance(node, nodes.meta) and _is_meta_keywords(node, language):
552-
keywords = [keyword.strip() for keyword in node['content'].split(',')]
553-
word_store.words.extend(keywords)
554-
elif isinstance(node, nodes.Text):
555-
word_store.words.extend(split(node.astext()))
556-
elif isinstance(node, nodes.title):
557-
title, is_main_title = node.astext(), len(word_store.titles) == 0
558-
ids = node.parent['ids']
559-
title_node_id = None if is_main_title else ids[0] if ids else None
560-
word_store.titles.append((title, title_node_id))
561-
word_store.title_words.extend(split(title))
562-
for child in node.children:
563-
_visit_nodes(child)
564-
565528
word_store = WordStore()
566529
split = self.lang.split
567530
language = self.lang.lang
568-
_visit_nodes(doctree)
531+
_feed_visit_nodes(
532+
doctree, word_store=word_store, split=split, language=language
533+
)
569534
return word_store
570535

571536
def context_for_searchtool(self) -> dict[str, Any]:
@@ -611,3 +576,47 @@ def get_js_stemmer_code(self) -> str:
611576
)
612577
else:
613578
return self.lang.js_stemmer_code
579+
580+
581+
def _feed_visit_nodes(
582+
node: nodes.Node,
583+
*,
584+
word_store: WordStore,
585+
split: Callable[[str], list[str]],
586+
language: str,
587+
) -> None:
588+
if isinstance(node, nodes.comment):
589+
return
590+
elif isinstance(node, nodes.raw):
591+
if 'html' in node.get('format', '').split():
592+
# Some people might put content in raw HTML that should be searched,
593+
# so we just amateurishly strip HTML tags and index the remaining
594+
# content
595+
nodetext = re.sub(
596+
r'<style.*?</style>',
597+
'',
598+
node.astext(),
599+
flags=re.IGNORECASE | re.DOTALL,
600+
)
601+
nodetext = re.sub(
602+
r'<script.*?</script>',
603+
'',
604+
nodetext,
605+
flags=re.IGNORECASE | re.DOTALL,
606+
)
607+
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
608+
word_store.words.extend(split(nodetext))
609+
return
610+
elif isinstance(node, nodes.meta) and _is_meta_keywords(node, language):
611+
keywords = [keyword.strip() for keyword in node['content'].split(',')]
612+
word_store.words.extend(keywords)
613+
elif isinstance(node, nodes.Text):
614+
word_store.words.extend(split(node.astext()))
615+
elif isinstance(node, nodes.title):
616+
title, is_main_title = node.astext(), len(word_store.titles) == 0
617+
ids = node.parent['ids']
618+
title_node_id = None if is_main_title else ids[0] if ids else None
619+
word_store.titles.append((title, title_node_id))
620+
word_store.title_words.extend(split(title))
621+
for child in node.children:
622+
_feed_visit_nodes(child, word_store=word_store, split=split, language=language)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy