update

sphinx-notes · Oct 20, 2024 · 860fd1f · 860fd1f
1 parent 41473e7
commit 860fd1f
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 62 deletions.
diff --git a/src/sphinxnotes/snippet/cache.py b/src/sphinxnotes/snippet/cache.py
@@ -93,7 +93,7 @@ def post_purge(self, key: DocID, value: list[Item]) -> None:
     def get_by_index_id(self, key: IndexID) -> Item | None:
         """Like get(), but use IndexID as key."""
         doc_id, item_index = self.index_id_to_doc_id.get(key, (None, None))
-        if not doc_id or not item_index:
+        if not doc_id or item_index is None:
             return None
         return self[doc_id][item_index]
 

diff --git a/src/sphinxnotes/snippet/ext.py b/src/sphinxnotes/snippet/ext.py
@@ -52,24 +52,20 @@ def extract_tags(s: Snippet) -> str:
 
 def extract_excerpt(s: Snippet) -> str:
     if isinstance(s, Document) and s.title is not None:
-        return '<' + s.title.text + '>'
+        return '<' + s.title + '>'
     elif isinstance(s, Section) and s.title is not None:
-        return '[' + s.title.text + ']'
+        return '[' + s.title + ']'
     elif isinstance(s, Code):
-        excerpt = s.desc.astext() if isinstance(s.desc, nodes.paragraph) else s.desc
-        return '`' + s.lang + ':' + excerpt + '`'
+        return s.lang + '`' + s.desc + '`'
     return ''
 
 
 def extract_keywords(s: Snippet) -> list[str]:
     keywords = [s.docname]
     if isinstance(s, WithTitle) and s.title is not None:
-        keywords.extend(extractor.extract(s.title.text, strip_stopwords=False))
+        keywords.extend(extractor.extract(s.title, strip_stopwords=False))
     if isinstance(s, Code):
-        if isinstance(s.desc, nodes.paragraph):
-            keywords.extend(extractor.extract(s.desc.astext(), strip_stopwords=False))
-        else:
-            keywords.extend(extractor.extract(s.desc, strip_stopwords=False))
+        keywords.extend(extractor.extract(s.desc, strip_stopwords=False))
     return keywords
 
 

diff --git a/src/sphinxnotes/snippet/picker.py b/src/sphinxnotes/snippet/picker.py
@@ -25,19 +25,22 @@
 
 def pick(
     app: Sphinx, doctree: nodes.document, docname: str
-) -> list[tuple[Snippet, nodes.section]]:
+) -> list[tuple[Snippet, nodes.Element]]:
     """
-    Pick snippets from document, return a list of snippet and the section
-    it belongs to.
+    Pick snippets from document, return a list of snippet and the related node.
+
+    As :class:`Snippet` can not hold any refs to doctree, we additionly returns
+    the related nodes here. To ensure the caller can back reference to original
+    document node and do more things (e.g. generate title path).
     """
     # FIXME: Why doctree.source is always None?
     if not doctree.attributes.get('source'):
-        logger.debug('Skipped document without source')
+        logger.debug('Skip document without source')
         return []
 
     metadata = app.env.metadata.get(docname, {})
     if 'no-search' in metadata or 'nosearch' in metadata:
-        logger.debug('Skipped document with nosearch metadata')
+        logger.debug('Skip document with nosearch metadata')
         return []
 
     # Walk doctree and pick snippets.
@@ -51,7 +54,7 @@ class SnippetPicker(nodes.SparseNodeVisitor):
     """Node visitor for picking snippets from document."""
 
     #: List of picked snippets and the section it belongs to
-    snippets: list[tuple[Snippet, nodes.section]]
+    snippets: list[tuple[Snippet, nodes.Element]]
 
     #: Stack of nested sections.
     _sections: list[nodes.section]
@@ -71,7 +74,7 @@ def visit_literal_block(self, node: nodes.literal_block) -> None:
         except ValueError as e:
             logger.debug(f'skip {node}: {e}')
             raise nodes.SkipNode
-        self.snippets.append((code, self._sections[-1]))
+        self.snippets.append((code, node))
 
     def visit_section(self, node: nodes.section) -> None:
         self._sections.append(node)

diff --git a/src/sphinxnotes/snippet/snippets.py b/src/sphinxnotes/snippet/snippets.py
@@ -12,6 +12,7 @@
 from typing import TYPE_CHECKING
 import itertools
 from os import path
+import sys
 
 from docutils import nodes
 
@@ -22,9 +23,15 @@
 class Snippet(object):
     """
     Snippet is structured fragments extracted from a single Sphinx document
-    (can also be said to be a reStructuredText file).
+    (usually, also a single reStructuredText file).
 
     :param nodes: nodes of doctree that make up this snippet.
+
+    .. warning::
+
+       Snippet will be persisted to disk via pickle, to keep it simple,
+       it CAN NOT holds reference to any doctree ``nodes``
+       (or even any non-std module).
     """
 
     #: docname where the snippet is located, can be referenced by
@@ -47,20 +54,29 @@ class Snippet(object):
     #: .. _ids attr: https://docutils.sourceforge.io/docs/ref/doctree.html#ids
     refid: str | None
 
-    def __init__(self, *nodes: nodes.Node) -> None:
+    def __init__(self, *nodes: nodes.Element) -> None:
         assert len(nodes) != 0
 
         env: BuildEnvironment = nodes[0].document.settings.env
-        self.file = nodes[0].source
-        self.docname = env.path2doc(self.file)
 
-        lineno = [float('inf'), -float('inf')]
+        file, docname = None, None
+        for node in nodes:
+            if (src := nodes[0].source) and path.exists(src):
+                file = src
+                docname = env.path2doc(file)
+                break
+        if not file or not docname:
+            raise ValueError('Missing source file or docname')
+        self.file = file
+        self.docname = docname
+
+        lineno = [sys.maxsize, -sys.maxsize]
         for node in nodes:
             if not node.line:
                 continue  # Skip node that have None line, I dont know why
             lineno[0] = min(lineno[0], _line_of_start(node))
             lineno[1] = max(lineno[1], _line_of_end(node))
-        self.lineno = lineno
+        self.lineno = (lineno[0], lineno[1])
 
         lines = []
         with open(self.file, 'r') as f:
@@ -85,26 +101,22 @@ def __init__(self, *nodes: nodes.Node) -> None:
                     break
 
 
-class Text(Snippet):
-    #: Text of snippet
-    text: str
-
-    def __init__(self, node: nodes.Node) -> None:
-        super().__init__(node)
-        self.text = node.astext()
-
-
 class Code(Snippet):
     #: Language of code block
     lang: str
     #: Description of code block, usually the text of preceding paragraph
-    desc: nodes.paragraph | str
+    desc: str
+    #: The code itself.
+    code: str
 
     def __init__(self, node: nodes.literal_block) -> None:
         assert isinstance(node, nodes.literal_block)
+        super().__init__(node)
 
         self.lang = node['language']
+        self.code = node.astext()
 
+        self.desc = ''
         if isinstance(para := node.previous_sibling(), nodes.paragraph):
             # Use the preceding paragraph as descritpion.
             #
@@ -119,30 +131,26 @@ def __init__(self, node: nodes.literal_block) -> None:
             # In this case, the preceding paragraph "Foo:" is the descritpion
             # of the code block. This convention also applies to the code,
             # code-block, sourcecode directive.
-            self.desc = para
-            super().__init__(para, node)
-        elif caption := node.get('caption'):
+            self.desc += para.astext().replace('\n', ' ')
+        if caption := node.get('caption'):
             # Use caption as descritpion.
-            # In sphinx, code-block, sourcecode and code may have caption option.
+            # In sphinx, all of code-block, sourcecode and code have caption option.
             # https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block
-            self.desc = caption
-            super().__init__(node)
-        else:
-            raise ValueError('Lack of description: preceding paragraph or caption')
-
-
-class Title(Text):
-    def __init__(self, node: nodes.title) -> None:
-        assert isinstance(node, nodes.title)
-        super().__init__(node)
+            self.desc += caption
+        if not self.desc:
+            raise ValueError(
+                f'Node f{node} lacks description: a preceding paragraph or a caption'
+            )
 
 
 class WithTitle(object):
-    title: Title | None
+    title: str
 
-    def __init__(self, node: nodes.Node) -> None:
-        title_node = node.next_node(nodes.title)
-        self.title = Title(title_node) if title_node else None
+    def __init__(self, node: nodes.Element) -> None:
+        if title := node.next_node(nodes.title):
+            self.title = title.astext()
+        else:
+            raise ValueError(f'Node f{node} lacks title')
 
 
 class Section(Snippet, WithTitle):
@@ -193,7 +201,7 @@ def _line_of_start(node: nodes.Node) -> int:
     return node.line
 
 
-def _line_of_end(node: nodes.Node) -> int | None:
+def _line_of_end(node: nodes.Node) -> int:
     next_node = node.next_node(descend=False, siblings=True, ascend=True)
     while next_node:
         if next_node.line:
@@ -208,7 +216,7 @@ def _line_of_end(node: nodes.Node) -> int | None:
             siblings=True,
         )
     # No line found, return the max line of source file
-    if node.source:
+    if node.source and path.exists(node.source):
         with open(node.source) as f:
-            return sum(1 for line in f)
+            return sum(1 for _ in f)
     raise AttributeError('None source attr of node %s' % node)
diff --git a/src/sphinxnotes/snippet/utils/titlepath.py b/src/sphinxnotes/snippet/utils/titlepath.py
@@ -14,17 +14,16 @@
 from docutils import nodes
 
 if TYPE_CHECKING:
-    from sphinx.enviornment import BuilderEnviornment
+    from sphinx.environment import BuildEnvironment
 
 
 def resolve(
-    env: BuilderEnviornment, docname: str, node: nodes.Node
+    env: BuildEnvironment, docname: str, node: nodes.Element
 ) -> list[nodes.title]:
     return resolve_section(node) + resolve_document(env, docname)
 
 
-def resolve_section(node: nodes.section) -> list[nodes.title]:
-    # FIXME: doc is None
+def resolve_section(node: nodes.Element) -> list[nodes.title]:
     titlenodes = []
     while node:
         if len(node) > 0 and isinstance(node[0], nodes.title):
@@ -33,10 +32,8 @@ def resolve_section(node: nodes.section) -> list[nodes.title]:
     return titlenodes
 
 
-def resolve_document(env: BuilderEnviornment, docname: str) -> list[nodes.title]:
-    """
-    .. note:: Title of document itself does not included in the returned list
-    """
+def resolve_document(env: BuildEnvironment, docname: str) -> list[nodes.title]:
+    """NOTE: Title of document itself does not included in the returned list"""
     titles = []
     master_doc = env.config.master_doc
     v = docname.split('/')