|
2 | 2 | sphinxnotes.snippet
|
3 | 3 | ~~~~~~~~~~~~~~~~~~~
|
4 | 4 |
|
5 |
| -:copyright: Copyright 2020 Shengyu Zhang |
| 5 | +Sphinx extension entrypoint. |
| 6 | +
|
| 7 | +:copyright: Copyright 2024 Shengyu Zhang |
6 | 8 | :license: BSD, see LICENSE for details.
|
7 | 9 | """
|
8 | 10 |
|
9 | 11 | from __future__ import annotations
|
10 |
| -from typing import List, Tuple, Optional, TYPE_CHECKING |
11 |
| -import itertools |
| 12 | +from typing import List, Set, TYPE_CHECKING, Dict |
| 13 | +import re |
| 14 | +from os import path |
| 15 | +import time |
| 16 | +from importlib.metadata import version |
12 | 17 |
|
13 | 18 | from docutils import nodes
|
| 19 | +from sphinx.locale import __ |
| 20 | +from sphinx.util import logging |
| 21 | +from sphinx.builders.dummy import DummyBuilder |
14 | 22 |
|
15 | 23 | if TYPE_CHECKING:
|
| 24 | + from sphinx.application import Sphinx |
16 | 25 | from sphinx.environment import BuildEnvironment
|
17 |
| - |
18 |
| -__version__ = '1.1.1' |
19 |
| - |
20 |
| - |
21 |
| -class Snippet(object): |
22 |
| - """ |
23 |
| - Snippet is base class of reStructuredText snippet. |
24 |
| -
|
25 |
| - :param nodes: Document nodes that make up this snippet |
26 |
| - """ |
27 |
| - |
28 |
| - #: docname where the snippet is located, can be referenced by |
29 |
| - # :rst:role:`doc`. |
30 |
| - docname: str |
31 |
| - |
32 |
| - #: Source file path of snippet |
33 |
| - file: str |
34 |
| - |
35 |
| - #: Line number range of snippet, in the source file which is left closed |
36 |
| - #: and right opened. |
37 |
| - lineno: Tuple[int, int] |
38 |
| - |
39 |
| - #: The original reStructuredText of snippet |
40 |
| - rst: List[str] |
41 |
| - |
42 |
| - #: The possible identifier key of snippet, which is picked from nodes' |
43 |
| - #: (or nodes' parent's) `ids attr`_. |
44 |
| - #: |
45 |
| - #: .. _ids attr: https://docutils.sourceforge.io/docs/ref/doctree.html#ids |
46 |
| - refid: Optional[str] |
47 |
| - |
48 |
| - def __init__(self, *nodes: nodes.Node) -> None: |
49 |
| - assert len(nodes) != 0 |
50 |
| - |
51 |
| - env: BuildEnvironment = nodes[0].document.settings.env |
52 |
| - self.file = nodes[0].source |
53 |
| - self.docname = env.path2doc(self.file) |
54 |
| - |
55 |
| - lineno = [float('inf'), -float('inf')] |
56 |
| - for node in nodes: |
57 |
| - if not node.line: |
58 |
| - continue # Skip node that have None line, I dont know why |
59 |
| - lineno[0] = min(lineno[0], _line_of_start(node)) |
60 |
| - lineno[1] = max(lineno[1], _line_of_end(node)) |
61 |
| - self.lineno = lineno |
62 |
| - |
63 |
| - lines = [] |
64 |
| - with open(self.file, 'r') as f: |
65 |
| - start = self.lineno[0] - 1 |
66 |
| - stop = self.lineno[1] - 1 |
67 |
| - for line in itertools.islice(f, start, stop): |
68 |
| - lines.append(line.strip('\n')) |
69 |
| - self.rst = lines |
70 |
| - |
71 |
| - # Find exactly one ID attr in nodes |
72 |
| - self.refid = None |
73 |
| - for node in nodes: |
74 |
| - if node['ids']: |
75 |
| - self.refid = node['ids'][0] |
76 |
| - break |
77 |
| - |
78 |
| - # If no ID found, try parent |
79 |
| - if not self.refid: |
80 |
| - for node in nodes: |
81 |
| - if node.parent['ids']: |
82 |
| - self.refid = node.parent['ids'][0] |
83 |
| - break |
84 |
| - |
85 |
| - |
86 |
| -class Text(Snippet): |
87 |
| - #: Text of snippet |
88 |
| - text: str |
89 |
| - |
90 |
| - def __init__(self, node: nodes.Node) -> None: |
91 |
| - super().__init__(node) |
92 |
| - self.text = node.astext() |
93 |
| - |
94 |
| - |
95 |
| -class CodeBlock(Text): |
96 |
| - #: Language of code block |
97 |
| - language: str |
98 |
| - #: Caption of code block |
99 |
| - caption: Optional[str] |
100 |
| - |
101 |
| - def __init__(self, node: nodes.literal_block) -> None: |
102 |
| - assert isinstance(node, nodes.literal_block) |
103 |
| - super().__init__(node) |
104 |
| - self.language = node['language'] |
105 |
| - self.caption = node.get('caption') |
106 |
| - |
107 |
| - |
108 |
| -class WithCodeBlock(object): |
109 |
| - code_blocks: List[CodeBlock] |
110 |
| - |
111 |
| - def __init__(self, nodes: nodes.Nodes) -> None: |
112 |
| - self.code_blocks = [] |
113 |
| - for n in nodes.traverse(nodes.literal_block): |
114 |
| - self.code_blocks.append(self.CodeBlock(n)) |
115 |
| - |
116 |
| - |
117 |
| -class Title(Text): |
118 |
| - def __init__(self, node: nodes.title) -> None: |
119 |
| - assert isinstance(node, nodes.title) |
120 |
| - super().__init__(node) |
121 |
| - |
122 |
| - |
123 |
| -class WithTitle(object): |
124 |
| - title: Optional[Title] |
125 |
| - |
126 |
| - def __init__(self, node: nodes.Node) -> None: |
127 |
| - title_node = node.next_node(nodes.title) |
128 |
| - self.title = Title(title_node) if title_node else None |
129 |
| - |
130 |
| - |
131 |
| -class Section(Snippet, WithTitle): |
132 |
| - def __init__(self, node: nodes.section) -> None: |
133 |
| - assert isinstance(node, nodes.section) |
134 |
| - Snippet.__init__(self, node) |
135 |
| - WithTitle.__init__(self, node) |
136 |
| - |
137 |
| - |
138 |
| -class Document(Section): |
139 |
| - def __init__(self, node: nodes.document) -> None: |
140 |
| - assert isinstance(node, nodes.document) |
141 |
| - super().__init__(node.next_node(nodes.section)) |
142 |
| - |
143 |
| - |
144 |
| -################ |
145 |
| -# Nodes helper # |
146 |
| -################ |
147 |
| - |
148 |
| - |
149 |
| -def _line_of_start(node: nodes.Node) -> int: |
150 |
| - assert node.line |
151 |
| - if isinstance(node, nodes.title): |
152 |
| - if isinstance(node.parent.parent, nodes.document): |
153 |
| - # Spceial case for Document Title / Subtitle |
154 |
| - return 1 |
155 |
| - else: |
156 |
| - # Spceial case for section title |
157 |
| - return node.line - 1 |
158 |
| - elif isinstance(node, nodes.section): |
159 |
| - if isinstance(node.parent, nodes.document): |
160 |
| - # Spceial case for top level section |
161 |
| - return 1 |
162 |
| - else: |
163 |
| - # Spceial case for section |
164 |
| - return node.line - 1 |
165 |
| - return node.line |
166 |
| - |
167 |
| - |
168 |
| -def _line_of_end(node: nodes.Node) -> Optional[int]: |
169 |
| - next_node = node.next_node(descend=False, siblings=True, ascend=True) |
170 |
| - while next_node: |
171 |
| - if next_node.line: |
172 |
| - return _line_of_start(next_node) |
173 |
| - next_node = next_node.next_node( |
174 |
| - # Some nodes' line attr is always None, but their children has |
175 |
| - # valid line attr |
176 |
| - descend=True, |
177 |
| - # If node and its children have not valid line attr, try use line |
178 |
| - # of next node |
179 |
| - ascend=True, |
180 |
| - siblings=True, |
| 26 | + from sphinx.config import Config as SphinxConfig |
| 27 | + from collections.abc import Iterator |
| 28 | + |
| 29 | +from .config import Config |
| 30 | +from .snippet import Snippet, WithTitle, Document, Section |
| 31 | +from .picker import pick |
| 32 | +from .cache import Cache, Item |
| 33 | +from .keyword import Extractor |
| 34 | +from .utils import titlepath |
| 35 | + |
| 36 | + |
| 37 | +logger = logging.getLogger(__name__) |
| 38 | + |
| 39 | +cache: Cache | None = None |
| 40 | +extractor: Extractor = Extractor() |
| 41 | + |
| 42 | + |
| 43 | +def extract_tags(s: Snippet) -> str: |
| 44 | + tags = '' |
| 45 | + if isinstance(s, Document): |
| 46 | + tags += 'd' |
| 47 | + elif isinstance(s, Section): |
| 48 | + tags += 's' |
| 49 | + return tags |
| 50 | + |
| 51 | + |
| 52 | +def extract_excerpt(s: Snippet) -> str: |
| 53 | + if isinstance(s, Document) and s.title is not None: |
| 54 | + return '<' + s.title.text + '>' |
| 55 | + elif isinstance(s, Section) and s.title is not None: |
| 56 | + return '[' + s.title.text + ']' |
| 57 | + return '' |
| 58 | + |
| 59 | + |
| 60 | +def extract_keywords(s: Snippet) -> List[str]: |
| 61 | + keywords = [s.docname] |
| 62 | + # TODO: Deal with more snippet |
| 63 | + if isinstance(s, WithTitle) and s.title is not None: |
| 64 | + keywords.extend(extractor.extract(s.title.text, strip_stopwords=False)) |
| 65 | + return keywords |
| 66 | + |
| 67 | + |
| 68 | +def is_document_matched( |
| 69 | + pats: Dict[str, List[str]], docname: str |
| 70 | +) -> Dict[str, List[str]]: |
| 71 | + """Whether the docname matched by given patterns pats""" |
| 72 | + new_pats = {} |
| 73 | + for tag, ps in pats.items(): |
| 74 | + for pat in ps: |
| 75 | + if re.match(pat, docname): |
| 76 | + new_pats.setdefault(tag, []).append(pat) |
| 77 | + return new_pats |
| 78 | + |
| 79 | + |
| 80 | +def is_snippet_matched(pats: Dict[str, List[str]], s: [Snippet], docname: str) -> bool: |
| 81 | + """Whether the snippet's tags and docname matched by given patterns pats""" |
| 82 | + if '*' in pats: # Wildcard |
| 83 | + for pat in pats['*']: |
| 84 | + if re.match(pat, docname): |
| 85 | + return True |
| 86 | + |
| 87 | + not_in_pats = True |
| 88 | + for k in extract_tags(s): |
| 89 | + if k not in pats: |
| 90 | + continue |
| 91 | + not_in_pats = False |
| 92 | + for pat in pats[k]: |
| 93 | + if re.match(pat, docname): |
| 94 | + return True |
| 95 | + return not_in_pats |
| 96 | + |
| 97 | + |
| 98 | +def on_config_inited(app: Sphinx, appcfg: SphinxConfig) -> None: |
| 99 | + global cache |
| 100 | + cfg = Config(appcfg.snippet_config) |
| 101 | + cache = Cache(cfg.cache_dir) |
| 102 | + |
| 103 | + try: |
| 104 | + cache.load() |
| 105 | + except Exception as e: |
| 106 | + logger.warning('[snippet] failed to laod cache: %s' % e) |
| 107 | + |
| 108 | + |
| 109 | +def on_env_get_outdated( |
| 110 | + app: Sphinx, |
| 111 | + env: BuildEnvironment, |
| 112 | + added: Set[str], |
| 113 | + changed: Set[str], |
| 114 | + removed: Set[str], |
| 115 | +) -> List[str]: |
| 116 | + # Remove purged indexes and snippetes from db |
| 117 | + for docname in removed: |
| 118 | + del cache[(app.config.project, docname)] |
| 119 | + return [] |
| 120 | + |
| 121 | + |
| 122 | +def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> None: |
| 123 | + if not isinstance(doctree, nodes.document): |
| 124 | + # XXX: It may caused by ablog |
| 125 | + logger.debug( |
| 126 | + '[snippet] node %s is not nodes.document', type(doctree), location=doctree |
181 | 127 | )
|
182 |
| - # No line found, return the max line of source file |
183 |
| - if node.source: |
184 |
| - with open(node.source) as f: |
185 |
| - return sum(1 for line in f) |
186 |
| - raise AttributeError('None source attr of node %s' % node) |
| 128 | + return |
| 129 | + |
| 130 | + pats = is_document_matched(app.config.snippet_patterns, docname) |
| 131 | + if len(pats) == 0: |
| 132 | + logger.debug('[snippet] skip picking because %s is not matched', docname) |
| 133 | + return |
| 134 | + |
| 135 | + doc = [] |
| 136 | + snippets = pick(app, doctree, docname) |
| 137 | + for s, n in snippets: |
| 138 | + if not is_snippet_matched(pats, s, docname): |
| 139 | + continue |
| 140 | + tpath = [x.astext() for x in titlepath.resolve(app.env, docname, n)] |
| 141 | + if isinstance(s, Section): |
| 142 | + tpath = tpath[1:] |
| 143 | + doc.append( |
| 144 | + Item( |
| 145 | + snippet=s, |
| 146 | + tags=extract_tags(s), |
| 147 | + excerpt=extract_excerpt(s), |
| 148 | + keywords=extract_keywords(s), |
| 149 | + titlepath=tpath, |
| 150 | + ) |
| 151 | + ) |
| 152 | + |
| 153 | + cache_key = (app.config.project, docname) |
| 154 | + if len(doc) != 0: |
| 155 | + cache[cache_key] = doc |
| 156 | + elif cache_key in cache: |
| 157 | + del cache[cache_key] |
| 158 | + |
| 159 | + logger.debug( |
| 160 | + '[snippet] picked %s/%s snippetes in %s', len(doc), len(snippets), docname |
| 161 | + ) |
| 162 | + |
| 163 | + |
| 164 | +def on_builder_finished(app: Sphinx, exception) -> None: |
| 165 | + cache.dump() |
| 166 | + |
| 167 | + |
| 168 | +class SnippetBuilder(DummyBuilder): # DummyBuilder has dummy impls we need. |
| 169 | + name = 'snippet' |
| 170 | + epilog = __( |
| 171 | + 'The snippet builder produces snippets (not to OUTPUTDIR) for use by snippet CLI tool' |
| 172 | + ) |
| 173 | + |
| 174 | + def get_outdated_docs(self) -> Iterator[str]: |
| 175 | + """Modified from :py:meth:`sphinx.builders.html.StandaloneHTMLBuilder.get_outdated_docs`.""" |
| 176 | + for docname in self.env.found_docs: |
| 177 | + if docname not in self.env.all_docs: |
| 178 | + logger.debug('[build target] did not in env: %r', docname) |
| 179 | + yield docname |
| 180 | + continue |
| 181 | + |
| 182 | + assert cache is not None |
| 183 | + targetname = cache.itemfile((self.app.config.project, docname)) |
| 184 | + try: |
| 185 | + targetmtime = path.getmtime(targetname) |
| 186 | + except Exception: |
| 187 | + targetmtime = 0 |
| 188 | + try: |
| 189 | + srcmtime = path.getmtime(self.env.doc2path(docname)) |
| 190 | + if srcmtime > targetmtime: |
| 191 | + logger.debug( |
| 192 | + '[build target] targetname %r(%s), docname %r(%s)', |
| 193 | + targetname, |
| 194 | + _format_modified_time(targetmtime), |
| 195 | + docname, |
| 196 | + _format_modified_time( |
| 197 | + path.getmtime(self.env.doc2path(docname)) |
| 198 | + ), |
| 199 | + ) |
| 200 | + yield docname |
| 201 | + except OSError: |
| 202 | + # source doesn't exist anymore |
| 203 | + pass |
| 204 | + |
| 205 | + |
| 206 | +def _format_modified_time(timestamp: float) -> str: |
| 207 | + """Return an RFC 3339 formatted string representing the given timestamp.""" |
| 208 | + seconds, fraction = divmod(timestamp, 1) |
| 209 | + return time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(seconds)) + f'.{fraction:.3f}' |
| 210 | + |
| 211 | + |
| 212 | +def setup(app: Sphinx): |
| 213 | + app.add_builder(SnippetBuilder) |
| 214 | + |
| 215 | + app.add_config_value('snippet_config', {}, '') |
| 216 | + app.add_config_value('snippet_patterns', {'*': ['.*']}, '') |
| 217 | + |
| 218 | + app.connect('config-inited', on_config_inited) |
| 219 | + app.connect('env-get-outdated', on_env_get_outdated) |
| 220 | + app.connect('doctree-resolved', on_doctree_resolved) |
| 221 | + app.connect('build-finished', on_builder_finished) |
| 222 | + |
| 223 | + return {'version': version('sphinxnotes.any')} |
0 commit comments