diff --git a/docs/changelog.md b/docs/changelog.md
index 786b75d..3c03394 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1 +1,10 @@
--8<-- "CHANGELOG.md"
+
+[](#hello){#hello2}
+
+## Hello
+
+Hello.
+
+Link to [Hello 1][hello1].
+Link to [Hello 2][hello2].
diff --git a/docs/index.md b/docs/index.md
index 612c7a5..78b84ee 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1 +1,10 @@
--8<-- "README.md"
+
+[](#hello){#hello1}
+
+## Hello
+
+Hello.
+
+Link to [Hello 1][hello1].
+Link to [Hello 2][hello2].
diff --git a/mkdocs.yml b/mkdocs.yml
index 98661b8..4d4cb75 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -95,6 +95,8 @@ markdown_extensions:
permalink: "ยค"
plugins:
+- autorefs:
+ scan_anchors: true
- search
- markdown-exec
- gen-files:
@@ -109,6 +111,7 @@ plugins:
import:
- https://docs.python.org/3/objects.inv
- https://www.mkdocs.org/objects.inv
+ - https://python-markdown.github.io/objects.inv
paths: [src]
options:
docstring_options:
diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py
index 5eca316..9f1a31e 100644
--- a/src/mkdocs_autorefs/plugin.py
+++ b/src/mkdocs_autorefs/plugin.py
@@ -15,12 +15,18 @@
import contextlib
import functools
import logging
+import re
+from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Sequence
from urllib.parse import urlsplit
+from mkdocs.config.base import Config
+from mkdocs.config.config_options import Type
+from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin
+from mkdocs.structure.pages import Page
-from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
+from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url
if TYPE_CHECKING:
from mkdocs.config.defaults import MkDocsConfig
@@ -36,7 +42,14 @@
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
-class AutorefsPlugin(BasePlugin):
+class AutorefsConfig(Config):
+ """Configuration options for the Autorefs plugin."""
+
+ scan_anchors = Type(bool, default=False)
+ """Whether to scan HTML pages for anchors defining references."""
+
+
+class AutorefsPlugin(BasePlugin[AutorefsConfig]):
"""An `mkdocs` plugin.
This plugin defines the following event hooks:
@@ -50,23 +63,28 @@ class AutorefsPlugin(BasePlugin):
"""
scan_toc: bool = True
+ scan_anchors: bool = False
current_page: str | None = None
+ _re_anchors = re.compile(r'')
+
def __init__(self) -> None:
"""Initialize the object."""
super().__init__()
self._url_map: dict[str, str] = {}
self._abs_url_map: dict[str, str] = {}
+ self._extension: AutorefsExtension | None = None
self.get_fallback_anchor: Callable[[str], str | None] | None = None
+ self.current_page: str | None = None
- def register_anchor(self, page: str, identifier: str) -> None:
+ def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
Arguments:
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
identifier: The HTML anchor (without '#') as a string.
"""
- self._url_map[identifier] = f"{page}#{identifier}"
+ self._url_map[identifier] = f"{page}#{anchor or identifier}"
def register_url(self, identifier: str, url: str) -> None:
"""Register that the identifier should be turned into a link to this URL.
@@ -133,20 +151,15 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
The modified config.
"""
log.debug("Adding AutorefsExtension to the list")
- config["markdown_extensions"].append(AutorefsExtension())
+ anchor_scanner_factory = (
+ partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None
+ )
+ # anchor_scanner_factory = None
+ self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory)
+ config["markdown_extensions"].append(self._extension)
return config
- def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
- """Remember which page is the current one.
-
- Arguments:
- markdown: Input Markdown.
- page: The related MkDocs page instance.
- kwargs: Additional arguments passed by MkDocs.
-
- Returns:
- The same Markdown. We only use this hook to map anchors to URLs.
- """
+ def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002, D102
self.current_page = page.url
return markdown
@@ -170,6 +183,11 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
log.debug(f"Mapping identifiers to URLs for page {page.file.src_path}")
for item in page.toc.items:
self.map_urls(page.url, item)
+
+ # if self.scan_anchors or self.config.scan_anchors:
+ # for href, hid in re.findall(self._re_anchors, html):
+ # self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#"))
+
return html
def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py
index 66b4931..5ce86a7 100644
--- a/src/mkdocs_autorefs/references.py
+++ b/src/mkdocs_autorefs/references.py
@@ -8,13 +8,17 @@
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
+from markdown.core import Markdown
from markdown.extensions import Extension
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
+from markdown.treeprocessors import Treeprocessor
from markdown.util import INLINE_PLACEHOLDER_RE
if TYPE_CHECKING:
from markdown import Markdown
+ from mkdocs_autorefs.plugin import AutorefsPlugin
+
AUTO_REF_RE = re.compile(
r"autorefs-identifier|autorefs-optional|autorefs-optional-hover)="
r'("?)(?P[^"<>]*)\2>(?P.*?)',
@@ -197,9 +201,48 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
return html, unmapped
+class AnchorScannerTreeProcessor(Treeprocessor):
+ """Tree processor to scan and register HTML anchors."""
+
+ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
+ """Initialize the tree processor.
+
+ Parameters:
+ plugin: A reference to the autorefs plugin, to use its `register_anchor` method.
+ """
+ super().__init__(md)
+ self.plugin = plugin
+
+ def run(self, root: Element) -> None: # noqa: D102
+ if self.plugin.current_page is not None:
+ self._scan_anchors(root)
+
+ def _scan_anchors(self, parent: Element) -> None:
+ for el in parent:
+ if el.tag == "a" and (hid := el.get("id")):
+ self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type]
+ else:
+ self._scan_anchors(el)
+
+
class AutorefsExtension(Extension):
"""Extension that inserts auto-references in Markdown."""
+ def __init__(
+ self,
+ anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None = None,
+ **kwargs: Any,
+ ) -> None:
+ """Initialize the Markdown extension.
+
+ Parameters:
+ anchor_scanner_factory: A callable that returns an instance of the anchor scanner tree processor.
+ **kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension].
+ """
+ super().__init__(**kwargs)
+ self.anchor_scanner_factory = anchor_scanner_factory
+ self.anchor_scanner: AnchorScannerTreeProcessor | None = None
+
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name)
"""Register the extension.
@@ -213,3 +256,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
"mkdocs-autorefs",
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
)
+ if self.anchor_scanner_factory:
+ self.anchor_scanner = self.anchor_scanner_factory(md)
+ md.treeprocessors.register(
+ self.anchor_scanner,
+ "mkdocs-autorefs-anchors-scanner",
+ priority=0,
+ )
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 8acd446..8fcae75 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None:
plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",))
== "../../foo/bar/baz.html#foo.bar.baz"
)
+
+
+def test_register_html_anchors() -> None:
+ """Check that HT?ML anchors are registered when enabled."""
+ plugin = AutorefsPlugin()
+ plugin.scan_toc = False
+ plugin.scan_anchors = True
+
+ class Page:
+ url = "/page/url"
+
+ plugin.on_page_content(
+ """
+
+
+
+
+ """,
+ page=Page(), # type: ignore[arg-type]
+ )
+ assert "foo.bar" in plugin._url_map
+ assert "foo.baz" not in plugin._url_map
+ assert "foo.qux" in plugin._url_map
+ assert "qux.foo" in plugin._url_map