diff --git a/README.md b/README.md index bb60430..7eebcd7 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,97 @@ This works the same as [a normal link to that heading](../doc1.md#hello-world). Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. And the issue is somewhat exacerbated by the fact that [MkDocs supports only *relative* links between pages](https://github.com/mkdocs/mkdocs/issues/1592). -Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. +Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. In such cases, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings. + +### Markdown anchors + +The autorefs plugin offers a feature called "Markdown anchors". Such anchors can be added anywhere in a document, and linked to from any other place. + +The syntax is: + +```md +[](){#id-of-the-anchor} +``` + +If you look closely, it starts with the usual syntax for a link, `[]()`, except both the text value and URL of the link are empty. Then we see `{#id-of-the-anchor}`, which is the syntax supported by the [`attr_list`](https://python-markdown.github.io/extensions/attr_list/) extension. It sets an HTML id to the anchor element. The autorefs plugin simply gives a meaning to such anchors with ids. Note that raw HTML anchors like `` are not supported. + +The `attr_list` extension must be enabled for the Markdown anchors feature to work: + +```yaml +# mkdocs.yml +plugins: + - search + - autorefs + +markdown_extensions: + - attr_list +``` + +Now, you can add anchors to documents: + +```md +Somewhere in a document. + +[](){#foobar-paragraph} + +Paragraph about foobar. +``` + +...making it possible to link to this anchor with our automatic links: + +```md +In any document. + +Check out the [paragraph about foobar][foobar-pararaph]. +``` + +If you add a Markdown anchor right above a heading, this anchor will redirect to the heading itself: + +```md +[](){#foobar} +## A verbose title about foobar +``` + +Linking to the `foobar` anchor will bring you directly to the heading, not the anchor itself, so the URL will show `#a-verbose-title-about-foobar` instead of `#foobar`. These anchors therefore act as "aliases" for headings. It is possible to define multiple aliases per heading: + +```md +[](){#contributing} +[](){#development-setup} +## How to contribute to the project? +``` + +Such aliases are especially useful when the same headings appear in several different pages. Without aliases, linking to the heading is undefined behavior (it could lead to any one of the headings). With unique aliases above headings, you can make sure to link to the right heading. + +For example, consider the following setup. You have one document per operating system describing how to install a project with the OS package manager or from sources: + +```tree +docs/ + install/ + arch.md + debian.md + gentoo.md +``` + +Each page has: + +```md +## Install with package manager +... + +## Install from sources +... +``` + +You don't want to change headings and make them redundant, like `## Arch: Install with package manager` and `## Debian: Install with package manager` just to be able to reference the right one with autorefs. Instead you can do this: + +```md +[](){#arch-install-pkg} +## Install with package manager +... + +[](){#arch-install-src} +## Install from sources +... +``` + +...changing `arch` by `debian`, `gentoo`, etc. in the other pages. diff --git a/mkdocs.yml b/mkdocs.yml index 98661b8..c120b5b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,6 +95,7 @@ markdown_extensions: permalink: "ยค" plugins: +- autorefs - search - markdown-exec - gen-files: @@ -109,6 +110,7 @@ plugins: import: - https://docs.python.org/3/objects.inv - https://www.mkdocs.org/objects.inv + - https://python-markdown.github.io/objects.inv paths: [src] options: docstring_options: diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py index 64b9feb..5ebc618 100644 --- a/src/mkdocs_autorefs/plugin.py +++ b/src/mkdocs_autorefs/plugin.py @@ -18,7 +18,9 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence from urllib.parse import urlsplit +from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin +from mkdocs.structure.pages import Page from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url @@ -59,14 +61,14 @@ def __init__(self) -> None: self._abs_url_map: dict[str, str] = {} self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None - def register_anchor(self, page: str, identifier: str) -> None: + def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None: """Register that an anchor corresponding to an identifier was encountered when rendering the page. Arguments: page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` identifier: The HTML anchor (without '#') as a string. """ - self._url_map[identifier] = f"{page}#{identifier}" + self._url_map[identifier] = f"{page}#{anchor or identifier}" def register_url(self, identifier: str, url: str) -> None: """Register that the identifier should be turned into a link to this URL. @@ -133,7 +135,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: The modified config. """ log.debug("Adding AutorefsExtension to the list") - config["markdown_extensions"].append(AutorefsExtension()) + config["markdown_extensions"].append(AutorefsExtension(self)) return config def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 @@ -145,7 +147,8 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # kwargs: Additional arguments passed by MkDocs. Returns: - The same Markdown. We only use this hook to map anchors to URLs. + The same Markdown. We only use this hook to keep a reference to the current page URL, + used during Markdown conversion by the anchor scanner tree processor. """ self.current_page = page.url return markdown diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py index 7d2dbae..59999c0 100644 --- a/src/mkdocs_autorefs/references.py +++ b/src/mkdocs_autorefs/references.py @@ -2,20 +2,33 @@ from __future__ import annotations +import logging import re from html import escape, unescape -from typing import TYPE_CHECKING, Any, Callable, Match +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match from urllib.parse import urlsplit from xml.etree.ElementTree import Element import markupsafe +from markdown.core import Markdown from markdown.extensions import Extension from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor +from markdown.treeprocessors import Treeprocessor from markdown.util import HTML_PLACEHOLDER_RE, INLINE_PLACEHOLDER_RE if TYPE_CHECKING: from markdown import Markdown + from mkdocs_autorefs.plugin import AutorefsPlugin + +try: + from mkdocs.plugins import get_plugin_logger + + log = get_plugin_logger(__name__) +except ImportError: + # TODO: remove once support for MkDocs <1.5 is dropped + log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] + _ATTR_VALUE = r'"[^"<>]+"|[^"<> ]+' # Possibly with double quotes around AUTO_REF_RE = re.compile( rf"autorefs-(?:identifier|optional|optional-hover))=(?P{_ATTR_VALUE})" @@ -208,13 +221,96 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str return html, unmapped +class AnchorScannerTreeProcessor(Treeprocessor): + """Tree processor to scan and register HTML anchors.""" + + _htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"} + + def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None: + """Initialize the tree processor. + + Parameters: + plugin: A reference to the autorefs plugin, to use its `register_anchor` method. + """ + super().__init__(md) + self.plugin = plugin + + def run(self, root: Element) -> None: # noqa: D102 + if self.plugin.current_page is not None: + pending_anchors = _PendingAnchors(self.plugin, self.plugin.current_page) + self._scan_anchors(root, pending_anchors) + pending_anchors.flush() + + def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> None: + for el in parent: + if el.tag == "a": + # We found an anchor. Record its id if it has one. + if anchor_id := el.get("id"): + pending_anchors.append(anchor_id) + # If the element has text or a link, it's not an alias. + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.text or el.get("href") or (el.tail and el.tail.strip()): + pending_anchors.flush() + + elif el.tag == "p": + # A `p` tag is a no-op for our purposes, just recurse into it in the context + # of the current collection of anchors. + self._scan_anchors(el, pending_anchors) + # Non-whitespace text after the element interrupts the chain, aliases can't apply. + if el.tail and el.tail.strip(): + pending_anchors.flush() + + elif el.tag in self._htags: + # If the element is a heading, that turns the pending anchors into aliases. + pending_anchors.flush(el.get("id")) + + else: + # But if it's some other interruption, flush anchors anyway as non-aliases. + pending_anchors.flush() + # Recurse into sub-elements, in a *separate* context. + self.run(el) + + +class _PendingAnchors: + """A collection of HTML anchors that may or may not become aliased to an upcoming heading.""" + + def __init__(self, plugin: AutorefsPlugin, current_page: str): + self.plugin = plugin + self.current_page = current_page + self.anchors: list[str] = [] + + def append(self, anchor: str) -> None: + self.anchors.append(anchor) + + def flush(self, alias_to: str | None = None) -> None: + for anchor in self.anchors: + self.plugin.register_anchor(self.current_page, anchor, alias_to) + self.anchors.clear() + + class AutorefsExtension(Extension): """Extension that inserts auto-references in Markdown.""" + def __init__( + self, + plugin: AutorefsPlugin | None = None, + **kwargs: Any, + ) -> None: + """Initialize the Markdown extension. + + Parameters: + plugin: An optional reference to the autorefs plugin (to pass it to the anchor scanner tree processor). + **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension]. + """ + super().__init__(**kwargs) + self.plugin = plugin + def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) """Register the extension. Add an instance of our [`AutoRefInlineProcessor`][mkdocs_autorefs.references.AutoRefInlineProcessor] to the Markdown parser. + Also optionally add an instance of our [`AnchorScannerTreeProcessor`][mkdocs_autorefs.references.AnchorScannerTreeProcessor] + to the Markdown parser if a reference to the autorefs plugin was passed to this extension. Arguments: md: A `markdown.Markdown` instance. @@ -224,3 +320,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me "mkdocs-autorefs", priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor ) + if self.plugin is not None and self.plugin.scan_toc and "attr_list" in md.treeprocessors: + log.debug("Enabling Markdown anchors feature") + md.treeprocessors.register( + AnchorScannerTreeProcessor(self.plugin, md), + "mkdocs-autorefs-anchors-scanner", + priority=0, + ) diff --git a/tests/test_references.py b/tests/test_references.py index 5047754..f687afb 100644 --- a/tests/test_references.py +++ b/tests/test_references.py @@ -2,11 +2,13 @@ from __future__ import annotations +from textwrap import dedent from typing import Mapping import markdown import pytest +from mkdocs_autorefs.plugin import AutorefsPlugin from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url @@ -249,6 +251,88 @@ def test_external_references() -> None: assert unmapped == [] +def test_register_markdown_anchors() -> None: + """Check that Markdown anchors are registered when enabled.""" + plugin = AutorefsPlugin() + md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)]) + plugin.current_page = "page" + md.convert( + dedent( + """ + [](){#foo} + ## Heading foo + + Paragraph 1. + + [](){#bar} + Paragraph 2. + + [](){#alias1} + [](){#alias2} + ## Heading bar + + [](){#alias3} + Text. + [](){#alias4} + ## Heading baz + + [](){#alias5} + [](){#alias6} + Decoy. + ## Heading more1 + + [](){#alias7} + [decoy](){#alias8} + [](){#alias9} + ## Heading more2 {#heading-custom2} + + [](){#alias10} + """, + ), + ) + assert plugin._url_map == { + "foo": "page#heading-foo", + "bar": "page#bar", + "alias1": "page#heading-bar", + "alias2": "page#heading-bar", + "alias3": "page#alias3", + "alias4": "page#heading-baz", + "alias5": "page#alias5", + "alias6": "page#alias6", + "alias7": "page#alias7", + "alias8": "page#alias8", + "alias9": "page#heading-custom2", + "alias10": "page#alias10", + } + + +def test_register_markdown_anchors_with_admonition() -> None: + """Check that Markdown anchors are registered inside a nested admonition element.""" + plugin = AutorefsPlugin() + md = markdown.Markdown(extensions=["attr_list", "toc", "admonition", AutorefsExtension(plugin)]) + plugin.current_page = "page" + md.convert( + dedent( + """ + [](){#alias1} + !!! note + ## Heading foo + + [](){#alias2} + ## Heading bar + + [](){#alias3} + ## Heading baz + """, + ), + ) + assert plugin._url_map == { + "alias1": "page#alias1", + "alias2": "page#heading-bar", + "alias3": "page#alias3", + } + + def test_keep_data_attributes() -> None: """Keep HTML data attributes from autorefs spans.""" url_map = {"example": "https://e.com"}