Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
SilverRainZ committed Oct 20, 2024
1 parent 41473e7 commit 860fd1f
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 62 deletions.
2 changes: 1 addition & 1 deletion src/sphinxnotes/snippet/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def post_purge(self, key: DocID, value: list[Item]) -> None:
def get_by_index_id(self, key: IndexID) -> Item | None:
"""Like get(), but use IndexID as key."""
doc_id, item_index = self.index_id_to_doc_id.get(key, (None, None))
if not doc_id or not item_index:
if not doc_id or item_index is None:
return None
return self[doc_id][item_index]

Expand Down
14 changes: 5 additions & 9 deletions src/sphinxnotes/snippet/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,20 @@ def extract_tags(s: Snippet) -> str:

def extract_excerpt(s: Snippet) -> str:
if isinstance(s, Document) and s.title is not None:
return '<' + s.title.text + '>'
return '<' + s.title + '>'
elif isinstance(s, Section) and s.title is not None:
return '[' + s.title.text + ']'
return '[' + s.title + ']'
elif isinstance(s, Code):
excerpt = s.desc.astext() if isinstance(s.desc, nodes.paragraph) else s.desc
return '`' + s.lang + ':' + excerpt + '`'
return s.lang + '`' + s.desc + '`'
return ''


def extract_keywords(s: Snippet) -> list[str]:
keywords = [s.docname]
if isinstance(s, WithTitle) and s.title is not None:
keywords.extend(extractor.extract(s.title.text, strip_stopwords=False))
keywords.extend(extractor.extract(s.title, strip_stopwords=False))
if isinstance(s, Code):
if isinstance(s.desc, nodes.paragraph):
keywords.extend(extractor.extract(s.desc.astext(), strip_stopwords=False))
else:
keywords.extend(extractor.extract(s.desc, strip_stopwords=False))
keywords.extend(extractor.extract(s.desc, strip_stopwords=False))
return keywords


Expand Down
17 changes: 10 additions & 7 deletions src/sphinxnotes/snippet/picker.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,22 @@

def pick(
app: Sphinx, doctree: nodes.document, docname: str
) -> list[tuple[Snippet, nodes.section]]:
) -> list[tuple[Snippet, nodes.Element]]:
"""
Pick snippets from document, return a list of snippet and the section
it belongs to.
Pick snippets from document, return a list of snippet and the related node.
As :class:`Snippet` can not hold any refs to doctree, we additionly returns
the related nodes here. To ensure the caller can back reference to original
document node and do more things (e.g. generate title path).
"""
# FIXME: Why doctree.source is always None?
if not doctree.attributes.get('source'):
logger.debug('Skipped document without source')
logger.debug('Skip document without source')
return []

metadata = app.env.metadata.get(docname, {})
if 'no-search' in metadata or 'nosearch' in metadata:
logger.debug('Skipped document with nosearch metadata')
logger.debug('Skip document with nosearch metadata')
return []

# Walk doctree and pick snippets.
Expand All @@ -51,7 +54,7 @@ class SnippetPicker(nodes.SparseNodeVisitor):
"""Node visitor for picking snippets from document."""

#: List of picked snippets and the section it belongs to
snippets: list[tuple[Snippet, nodes.section]]
snippets: list[tuple[Snippet, nodes.Element]]

#: Stack of nested sections.
_sections: list[nodes.section]
Expand All @@ -71,7 +74,7 @@ def visit_literal_block(self, node: nodes.literal_block) -> None:
except ValueError as e:
logger.debug(f'skip {node}: {e}')
raise nodes.SkipNode
self.snippets.append((code, self._sections[-1]))
self.snippets.append((code, node))

def visit_section(self, node: nodes.section) -> None:
self._sections.append(node)
Expand Down
82 changes: 45 additions & 37 deletions src/sphinxnotes/snippet/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import TYPE_CHECKING
import itertools
from os import path
import sys

from docutils import nodes

Expand All @@ -22,9 +23,15 @@
class Snippet(object):
"""
Snippet is structured fragments extracted from a single Sphinx document
(can also be said to be a reStructuredText file).
(usually, also a single reStructuredText file).
:param nodes: nodes of doctree that make up this snippet.
.. warning::
Snippet will be persisted to disk via pickle, to keep it simple,
it CAN NOT holds reference to any doctree ``nodes``
(or even any non-std module).
"""

#: docname where the snippet is located, can be referenced by
Expand All @@ -47,20 +54,29 @@ class Snippet(object):
#: .. _ids attr: https://docutils.sourceforge.io/docs/ref/doctree.html#ids
refid: str | None

def __init__(self, *nodes: nodes.Node) -> None:
def __init__(self, *nodes: nodes.Element) -> None:
assert len(nodes) != 0

env: BuildEnvironment = nodes[0].document.settings.env
self.file = nodes[0].source
self.docname = env.path2doc(self.file)

lineno = [float('inf'), -float('inf')]
file, docname = None, None
for node in nodes:
if (src := nodes[0].source) and path.exists(src):
file = src
docname = env.path2doc(file)
break
if not file or not docname:
raise ValueError('Missing source file or docname')
self.file = file
self.docname = docname

lineno = [sys.maxsize, -sys.maxsize]
for node in nodes:
if not node.line:
continue # Skip node that have None line, I dont know why
lineno[0] = min(lineno[0], _line_of_start(node))
lineno[1] = max(lineno[1], _line_of_end(node))
self.lineno = lineno
self.lineno = (lineno[0], lineno[1])

lines = []
with open(self.file, 'r') as f:
Expand All @@ -85,26 +101,22 @@ def __init__(self, *nodes: nodes.Node) -> None:
break


class Text(Snippet):
#: Text of snippet
text: str

def __init__(self, node: nodes.Node) -> None:
super().__init__(node)
self.text = node.astext()


class Code(Snippet):
#: Language of code block
lang: str
#: Description of code block, usually the text of preceding paragraph
desc: nodes.paragraph | str
desc: str
#: The code itself.
code: str

def __init__(self, node: nodes.literal_block) -> None:
assert isinstance(node, nodes.literal_block)
super().__init__(node)

self.lang = node['language']
self.code = node.astext()

self.desc = ''
if isinstance(para := node.previous_sibling(), nodes.paragraph):
# Use the preceding paragraph as descritpion.
#
Expand All @@ -119,30 +131,26 @@ def __init__(self, node: nodes.literal_block) -> None:
# In this case, the preceding paragraph "Foo:" is the descritpion
# of the code block. This convention also applies to the code,
# code-block, sourcecode directive.
self.desc = para
super().__init__(para, node)
elif caption := node.get('caption'):
self.desc += para.astext().replace('\n', ' ')
if caption := node.get('caption'):
# Use caption as descritpion.
# In sphinx, code-block, sourcecode and code may have caption option.
# In sphinx, all of code-block, sourcecode and code have caption option.
# https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block
self.desc = caption
super().__init__(node)
else:
raise ValueError('Lack of description: preceding paragraph or caption')


class Title(Text):
def __init__(self, node: nodes.title) -> None:
assert isinstance(node, nodes.title)
super().__init__(node)
self.desc += caption
if not self.desc:
raise ValueError(
f'Node f{node} lacks description: a preceding paragraph or a caption'
)


class WithTitle(object):
title: Title | None
title: str

def __init__(self, node: nodes.Node) -> None:
title_node = node.next_node(nodes.title)
self.title = Title(title_node) if title_node else None
def __init__(self, node: nodes.Element) -> None:
if title := node.next_node(nodes.title):
self.title = title.astext()
else:
raise ValueError(f'Node f{node} lacks title')


class Section(Snippet, WithTitle):
Expand Down Expand Up @@ -193,7 +201,7 @@ def _line_of_start(node: nodes.Node) -> int:
return node.line


def _line_of_end(node: nodes.Node) -> int | None:
def _line_of_end(node: nodes.Node) -> int:
next_node = node.next_node(descend=False, siblings=True, ascend=True)
while next_node:
if next_node.line:
Expand All @@ -208,7 +216,7 @@ def _line_of_end(node: nodes.Node) -> int | None:
siblings=True,
)
# No line found, return the max line of source file
if node.source:
if node.source and path.exists(node.source):
with open(node.source) as f:
return sum(1 for line in f)
return sum(1 for _ in f)
raise AttributeError('None source attr of node %s' % node)
13 changes: 5 additions & 8 deletions src/sphinxnotes/snippet/utils/titlepath.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@
from docutils import nodes

if TYPE_CHECKING:
from sphinx.enviornment import BuilderEnviornment
from sphinx.environment import BuildEnvironment


def resolve(
env: BuilderEnviornment, docname: str, node: nodes.Node
env: BuildEnvironment, docname: str, node: nodes.Element
) -> list[nodes.title]:
return resolve_section(node) + resolve_document(env, docname)


def resolve_section(node: nodes.section) -> list[nodes.title]:
# FIXME: doc is None
def resolve_section(node: nodes.Element) -> list[nodes.title]:
titlenodes = []
while node:
if len(node) > 0 and isinstance(node[0], nodes.title):
Expand All @@ -33,10 +32,8 @@ def resolve_section(node: nodes.section) -> list[nodes.title]:
return titlenodes


def resolve_document(env: BuilderEnviornment, docname: str) -> list[nodes.title]:
"""
.. note:: Title of document itself does not included in the returned list
"""
def resolve_document(env: BuildEnvironment, docname: str) -> list[nodes.title]:
"""NOTE: Title of document itself does not included in the returned list"""
titles = []
master_doc = env.config.master_doc
v = docname.split('/')
Expand Down

0 comments on commit 860fd1f

Please sign in to comment.