Skip to content

Commit a926c82

Browse files
committed
refactor: Change extension entrypoint to sphinxnotes.snippet
1 parent 730e885 commit a926c82

File tree

6 files changed

+404
-397
lines changed

6 files changed

+404
-397
lines changed

src/sphinxnotes/snippet/__init__.py

Lines changed: 209 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -2,185 +2,222 @@
22
sphinxnotes.snippet
33
~~~~~~~~~~~~~~~~~~~
44
5-
:copyright: Copyright 2020 Shengyu Zhang
5+
Sphinx extension entrypoint.
6+
7+
:copyright: Copyright 2024 Shengyu Zhang
68
:license: BSD, see LICENSE for details.
79
"""
810

911
from __future__ import annotations
10-
from typing import List, Tuple, Optional, TYPE_CHECKING
11-
import itertools
12+
from typing import List, Set, TYPE_CHECKING, Dict
13+
import re
14+
from os import path
15+
import time
16+
from importlib.metadata import version
1217

1318
from docutils import nodes
19+
from sphinx.locale import __
20+
from sphinx.util import logging
21+
from sphinx.builders.dummy import DummyBuilder
1422

1523
if TYPE_CHECKING:
24+
from sphinx.application import Sphinx
1625
from sphinx.environment import BuildEnvironment
17-
18-
__version__ = '1.1.1'
19-
20-
21-
class Snippet(object):
22-
"""
23-
Snippet is base class of reStructuredText snippet.
24-
25-
:param nodes: Document nodes that make up this snippet
26-
"""
27-
28-
#: docname where the snippet is located, can be referenced by
29-
# :rst:role:`doc`.
30-
docname: str
31-
32-
#: Source file path of snippet
33-
file: str
34-
35-
#: Line number range of snippet, in the source file which is left closed
36-
#: and right opened.
37-
lineno: Tuple[int, int]
38-
39-
#: The original reStructuredText of snippet
40-
rst: List[str]
41-
42-
#: The possible identifier key of snippet, which is picked from nodes'
43-
#: (or nodes' parent's) `ids attr`_.
44-
#:
45-
#: .. _ids attr: https://docutils.sourceforge.io/docs/ref/doctree.html#ids
46-
refid: Optional[str]
47-
48-
def __init__(self, *nodes: nodes.Node) -> None:
49-
assert len(nodes) != 0
50-
51-
env: BuildEnvironment = nodes[0].document.settings.env
52-
self.file = nodes[0].source
53-
self.docname = env.path2doc(self.file)
54-
55-
lineno = [float('inf'), -float('inf')]
56-
for node in nodes:
57-
if not node.line:
58-
continue # Skip node that have None line, I dont know why
59-
lineno[0] = min(lineno[0], _line_of_start(node))
60-
lineno[1] = max(lineno[1], _line_of_end(node))
61-
self.lineno = lineno
62-
63-
lines = []
64-
with open(self.file, 'r') as f:
65-
start = self.lineno[0] - 1
66-
stop = self.lineno[1] - 1
67-
for line in itertools.islice(f, start, stop):
68-
lines.append(line.strip('\n'))
69-
self.rst = lines
70-
71-
# Find exactly one ID attr in nodes
72-
self.refid = None
73-
for node in nodes:
74-
if node['ids']:
75-
self.refid = node['ids'][0]
76-
break
77-
78-
# If no ID found, try parent
79-
if not self.refid:
80-
for node in nodes:
81-
if node.parent['ids']:
82-
self.refid = node.parent['ids'][0]
83-
break
84-
85-
86-
class Text(Snippet):
87-
#: Text of snippet
88-
text: str
89-
90-
def __init__(self, node: nodes.Node) -> None:
91-
super().__init__(node)
92-
self.text = node.astext()
93-
94-
95-
class CodeBlock(Text):
96-
#: Language of code block
97-
language: str
98-
#: Caption of code block
99-
caption: Optional[str]
100-
101-
def __init__(self, node: nodes.literal_block) -> None:
102-
assert isinstance(node, nodes.literal_block)
103-
super().__init__(node)
104-
self.language = node['language']
105-
self.caption = node.get('caption')
106-
107-
108-
class WithCodeBlock(object):
109-
code_blocks: List[CodeBlock]
110-
111-
def __init__(self, nodes: nodes.Nodes) -> None:
112-
self.code_blocks = []
113-
for n in nodes.traverse(nodes.literal_block):
114-
self.code_blocks.append(self.CodeBlock(n))
115-
116-
117-
class Title(Text):
118-
def __init__(self, node: nodes.title) -> None:
119-
assert isinstance(node, nodes.title)
120-
super().__init__(node)
121-
122-
123-
class WithTitle(object):
124-
title: Optional[Title]
125-
126-
def __init__(self, node: nodes.Node) -> None:
127-
title_node = node.next_node(nodes.title)
128-
self.title = Title(title_node) if title_node else None
129-
130-
131-
class Section(Snippet, WithTitle):
132-
def __init__(self, node: nodes.section) -> None:
133-
assert isinstance(node, nodes.section)
134-
Snippet.__init__(self, node)
135-
WithTitle.__init__(self, node)
136-
137-
138-
class Document(Section):
139-
def __init__(self, node: nodes.document) -> None:
140-
assert isinstance(node, nodes.document)
141-
super().__init__(node.next_node(nodes.section))
142-
143-
144-
################
145-
# Nodes helper #
146-
################
147-
148-
149-
def _line_of_start(node: nodes.Node) -> int:
150-
assert node.line
151-
if isinstance(node, nodes.title):
152-
if isinstance(node.parent.parent, nodes.document):
153-
# Spceial case for Document Title / Subtitle
154-
return 1
155-
else:
156-
# Spceial case for section title
157-
return node.line - 1
158-
elif isinstance(node, nodes.section):
159-
if isinstance(node.parent, nodes.document):
160-
# Spceial case for top level section
161-
return 1
162-
else:
163-
# Spceial case for section
164-
return node.line - 1
165-
return node.line
166-
167-
168-
def _line_of_end(node: nodes.Node) -> Optional[int]:
169-
next_node = node.next_node(descend=False, siblings=True, ascend=True)
170-
while next_node:
171-
if next_node.line:
172-
return _line_of_start(next_node)
173-
next_node = next_node.next_node(
174-
# Some nodes' line attr is always None, but their children has
175-
# valid line attr
176-
descend=True,
177-
# If node and its children have not valid line attr, try use line
178-
# of next node
179-
ascend=True,
180-
siblings=True,
26+
from sphinx.config import Config as SphinxConfig
27+
from collections.abc import Iterator
28+
29+
from .config import Config
30+
from .snippet import Snippet, WithTitle, Document, Section
31+
from .picker import pick
32+
from .cache import Cache, Item
33+
from .keyword import Extractor
34+
from .utils import titlepath
35+
36+
37+
logger = logging.getLogger(__name__)
38+
39+
cache: Cache | None = None
40+
extractor: Extractor = Extractor()
41+
42+
43+
def extract_tags(s: Snippet) -> str:
44+
tags = ''
45+
if isinstance(s, Document):
46+
tags += 'd'
47+
elif isinstance(s, Section):
48+
tags += 's'
49+
return tags
50+
51+
52+
def extract_excerpt(s: Snippet) -> str:
53+
if isinstance(s, Document) and s.title is not None:
54+
return '<' + s.title.text + '>'
55+
elif isinstance(s, Section) and s.title is not None:
56+
return '[' + s.title.text + ']'
57+
return ''
58+
59+
60+
def extract_keywords(s: Snippet) -> List[str]:
61+
keywords = [s.docname]
62+
# TODO: Deal with more snippet
63+
if isinstance(s, WithTitle) and s.title is not None:
64+
keywords.extend(extractor.extract(s.title.text, strip_stopwords=False))
65+
return keywords
66+
67+
68+
def is_document_matched(
69+
pats: Dict[str, List[str]], docname: str
70+
) -> Dict[str, List[str]]:
71+
"""Whether the docname matched by given patterns pats"""
72+
new_pats = {}
73+
for tag, ps in pats.items():
74+
for pat in ps:
75+
if re.match(pat, docname):
76+
new_pats.setdefault(tag, []).append(pat)
77+
return new_pats
78+
79+
80+
def is_snippet_matched(pats: Dict[str, List[str]], s: [Snippet], docname: str) -> bool:
81+
"""Whether the snippet's tags and docname matched by given patterns pats"""
82+
if '*' in pats: # Wildcard
83+
for pat in pats['*']:
84+
if re.match(pat, docname):
85+
return True
86+
87+
not_in_pats = True
88+
for k in extract_tags(s):
89+
if k not in pats:
90+
continue
91+
not_in_pats = False
92+
for pat in pats[k]:
93+
if re.match(pat, docname):
94+
return True
95+
return not_in_pats
96+
97+
98+
def on_config_inited(app: Sphinx, appcfg: SphinxConfig) -> None:
99+
global cache
100+
cfg = Config(appcfg.snippet_config)
101+
cache = Cache(cfg.cache_dir)
102+
103+
try:
104+
cache.load()
105+
except Exception as e:
106+
logger.warning('[snippet] failed to laod cache: %s' % e)
107+
108+
109+
def on_env_get_outdated(
110+
app: Sphinx,
111+
env: BuildEnvironment,
112+
added: Set[str],
113+
changed: Set[str],
114+
removed: Set[str],
115+
) -> List[str]:
116+
# Remove purged indexes and snippetes from db
117+
for docname in removed:
118+
del cache[(app.config.project, docname)]
119+
return []
120+
121+
122+
def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> None:
123+
if not isinstance(doctree, nodes.document):
124+
# XXX: It may caused by ablog
125+
logger.debug(
126+
'[snippet] node %s is not nodes.document', type(doctree), location=doctree
181127
)
182-
# No line found, return the max line of source file
183-
if node.source:
184-
with open(node.source) as f:
185-
return sum(1 for line in f)
186-
raise AttributeError('None source attr of node %s' % node)
128+
return
129+
130+
pats = is_document_matched(app.config.snippet_patterns, docname)
131+
if len(pats) == 0:
132+
logger.debug('[snippet] skip picking because %s is not matched', docname)
133+
return
134+
135+
doc = []
136+
snippets = pick(app, doctree, docname)
137+
for s, n in snippets:
138+
if not is_snippet_matched(pats, s, docname):
139+
continue
140+
tpath = [x.astext() for x in titlepath.resolve(app.env, docname, n)]
141+
if isinstance(s, Section):
142+
tpath = tpath[1:]
143+
doc.append(
144+
Item(
145+
snippet=s,
146+
tags=extract_tags(s),
147+
excerpt=extract_excerpt(s),
148+
keywords=extract_keywords(s),
149+
titlepath=tpath,
150+
)
151+
)
152+
153+
cache_key = (app.config.project, docname)
154+
if len(doc) != 0:
155+
cache[cache_key] = doc
156+
elif cache_key in cache:
157+
del cache[cache_key]
158+
159+
logger.debug(
160+
'[snippet] picked %s/%s snippetes in %s', len(doc), len(snippets), docname
161+
)
162+
163+
164+
def on_builder_finished(app: Sphinx, exception) -> None:
165+
cache.dump()
166+
167+
168+
class SnippetBuilder(DummyBuilder): # DummyBuilder has dummy impls we need.
169+
name = 'snippet'
170+
epilog = __(
171+
'The snippet builder produces snippets (not to OUTPUTDIR) for use by snippet CLI tool'
172+
)
173+
174+
def get_outdated_docs(self) -> Iterator[str]:
175+
"""Modified from :py:meth:`sphinx.builders.html.StandaloneHTMLBuilder.get_outdated_docs`."""
176+
for docname in self.env.found_docs:
177+
if docname not in self.env.all_docs:
178+
logger.debug('[build target] did not in env: %r', docname)
179+
yield docname
180+
continue
181+
182+
assert cache is not None
183+
targetname = cache.itemfile((self.app.config.project, docname))
184+
try:
185+
targetmtime = path.getmtime(targetname)
186+
except Exception:
187+
targetmtime = 0
188+
try:
189+
srcmtime = path.getmtime(self.env.doc2path(docname))
190+
if srcmtime > targetmtime:
191+
logger.debug(
192+
'[build target] targetname %r(%s), docname %r(%s)',
193+
targetname,
194+
_format_modified_time(targetmtime),
195+
docname,
196+
_format_modified_time(
197+
path.getmtime(self.env.doc2path(docname))
198+
),
199+
)
200+
yield docname
201+
except OSError:
202+
# source doesn't exist anymore
203+
pass
204+
205+
206+
def _format_modified_time(timestamp: float) -> str:
207+
"""Return an RFC 3339 formatted string representing the given timestamp."""
208+
seconds, fraction = divmod(timestamp, 1)
209+
return time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(seconds)) + f'.{fraction:.3f}'
210+
211+
212+
def setup(app: Sphinx):
213+
app.add_builder(SnippetBuilder)
214+
215+
app.add_config_value('snippet_config', {}, '')
216+
app.add_config_value('snippet_patterns', {'*': ['.*']}, '')
217+
218+
app.connect('config-inited', on_config_inited)
219+
app.connect('env-get-outdated', on_env_get_outdated)
220+
app.connect('doctree-resolved', on_doctree_resolved)
221+
app.connect('build-finished', on_builder_finished)
222+
223+
return {'version': version('sphinxnotes.any')}

0 commit comments

Comments
 (0)