Skip to content

Commit

Permalink
Auto-generate anchors for section headings to allow GitHub-style same…
Browse files Browse the repository at this point in the history
…-page links
  • Loading branch information
hunyadi committed Sep 11, 2024
1 parent b8f92f6 commit 487514d
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 1 deletion.
8 changes: 8 additions & 0 deletions md2conf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Arguments(argparse.Namespace):
space: str
loglevel: str
ignore_invalid_url: bool
heading_anchors: bool
generated_by: Optional[str]


Expand Down Expand Up @@ -100,6 +101,12 @@ def main() -> None:
default="png",
help="Format for rendering Mermaid diagrams (default: 'png').",
)
parser.add_argument(
"--heading-anchors",
action="store_true",
default=False,
help="Place an anchor at each section heading with GitHub-style same-page identifiers.",
)
parser.add_argument(
"--ignore-invalid-url",
action="store_true",
Expand All @@ -126,6 +133,7 @@ def main() -> None:
)

options = ConfluenceDocumentOptions(
heading_anchors=args.heading_anchors,
ignore_invalid_url=args.ignore_invalid_url,
generated_by=args.generated_by,
root_page_id=args.root_page,
Expand Down
54 changes: 53 additions & 1 deletion md2conf/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,30 @@ def transform(self, child: ET._Element) -> Optional[ET._Element]:
pass


def title_to_identifier(title: str) -> str:
"Converts a section heading title to a GitHub-style Markdown same-page anchor."

s = title.strip().lower()
s = re.sub("[^ A-Za-z0-9]", "", s)
s = s.replace(" ", "-")
return s


@dataclass
class ConfluenceConverterOptions:
"""
Options for converting an HTML tree into Confluence storage format.
:param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
plain text; when false, raise an exception.
:param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
conversion rules for the identifier.
:param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
:param diagram_output_format: Target image format for diagrams.
"""

ignore_invalid_url: bool = False
heading_anchors: bool = False
render_mermaid: bool = False
diagram_output_format: Literal["png", "svg"] = "png"

Expand Down Expand Up @@ -260,6 +274,30 @@ def __init__(
self.embedded_images = {}
self.page_metadata = page_metadata

def _transform_heading(self, heading: ET._Element) -> None:
title = "".join(heading.itertext()).strip()

for e in heading:
self.visit(e)

anchor = AC(
"structured-macro",
{
ET.QName(namespaces["ac"], "name"): "anchor",
ET.QName(namespaces["ac"], "schema-version"): "1",
},
AC(
"parameter",
{ET.QName(namespaces["ac"], "name"): ""},
title_to_identifier(title),
),
)

# insert anchor as first child, pushing any text nodes
heading.insert(0, anchor)
anchor.tail = heading.text
heading.text = None

def _transform_link(self, anchor: ET._Element) -> None:
url = anchor.attrib["href"]
if is_absolute_url(url):
Expand Down Expand Up @@ -565,7 +603,7 @@ def _transform_section(self, elem: ET._Element) -> ET._Element:
if elem[0].tail is not None:
raise DocumentError('expected: attribute `markdown="1"` on `<details>`')

summary = "".join(elem[0].itertext()).strip() or ""
summary = "".join(elem[0].itertext()).strip()
elem.remove(elem[0])

self.visit(elem)
Expand Down Expand Up @@ -593,6 +631,13 @@ def transform(self, child: ET._Element) -> Optional[ET._Element]:
tail: str = child.tail
child.tail = tail.replace("\n", " ")

if self.options.heading_anchors:
# <h1>...</h1>
# <h2>...</h2> ...
if re.match(r"^h[1-6]$", child.tag, flags=re.IGNORECASE) is not None:
self._transform_heading(child)
return None

# <p><img src="..." /></p>
if child.tag == "p" and len(child) == 1 and child[0].tag == "img":
return self._transform_image(child[0])
Expand Down Expand Up @@ -703,10 +748,16 @@ class ConfluenceDocumentOptions:
:param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
plain text; when false, raise an exception.
:param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
conversion rules for the identifier.
:param generated_by: Text to use as the generated-by prompt.
:param show_generated: Whether to display a prompt "This page has been generated with a tool."
:param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
:param diagram_output_format: Target image format for diagrams.
"""

ignore_invalid_url: bool = False
heading_anchors: bool = False
generated_by: Optional[str] = "This page has been generated with a tool."
root_page_id: Optional[str] = None
render_mermaid: bool = False
Expand Down Expand Up @@ -769,6 +820,7 @@ def __init__(
converter = ConfluenceStorageFormatConverter(
ConfluenceConverterOptions(
ignore_invalid_url=self.options.ignore_invalid_url,
heading_anchors=self.options.heading_anchors,
render_mermaid=self.options.render_mermaid,
diagram_output_format=self.options.diagram_output_format,
),
Expand Down
11 changes: 11 additions & 0 deletions tests/target/anchors.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<ac:structured-macro ac:name="info" ac:schema-version="1">
<ac:rich-text-body>
<p>This page has been generated with a tool.</p>
</ac:rich-text-body>
</ac:structured-macro>
<h2><ac:structured-macro ac:name="anchor" ac:schema-version="1"><ac:parameter ac:name="">sections</ac:parameter></ac:structured-macro>Sections</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam dignissim sem vel tortor scelerisque sodales. Quisque id faucibus massa. Integer enim nulla, cursus vel facilisis id, convallis in nunc. Suspendisse potenti. Aenean vehicula semper fringilla. Nulla pretium vitae enim sed feugiat. Integer molestie a elit ac fermentum.</p>
<h3><ac:structured-macro ac:name="anchor" ac:schema-version="1"><ac:parameter ac:name="">subsection-1</ac:parameter></ac:structured-macro>Subsection 1</h3>
<p>Integer vel cursus neque. Ut a ex orci. Nulla elementum lacus sed accumsan rutrum. Donec dui augue, porta at diam quis, ultricies rhoncus ipsum. Aliquam posuere mollis varius. Ut tempor velit facilisis ligula mattis consectetur. Praesent sit amet enim vitae est facilisis auctor vitae at nisi. Aliquam erat volutpat. Donec commodo vestibulum lorem sit amet semper. Vivamus vestibulum sem bibendum, pretium risus aliquam, pharetra augue. Donec id quam sit amet est cursus elementum ut eget massa. Duis vel sapien nunc. Phasellus dui risus, auctor eu erat vitae, vehicula dignissim justo. Morbi nec auctor leo. Praesent tincidunt faucibus tortor in iaculis.</p>
<h3><ac:structured-macro ac:name="anchor" ac:schema-version="1"><ac:parameter ac:name="">subsection-2</ac:parameter></ac:structured-macro>Subsection 2</h3>
<p>Nullam a dictum urna, volutpat convallis purus. Fusce at vulputate mauris. Donec dignissim id nisl quis ultricies. Quisque pulvinar nulla risus, vel tincidunt felis pretium hendrerit. Pellentesque placerat, mi vel luctus tristique, sem nisi volutpat dolor, vitae pulvinar ipsum orci vitae ligula. Morbi tempus at tortor sed volutpat. Vivamus facilisis orci libero, ac fermentum metus iaculis eget. Morbi aliquam enim et semper dapibus. Suspendisse ut velit sit amet metus convallis elementum. Cras sagittis justo ac venenatis pulvinar. Integer est massa, blandit vel nulla at, dignissim finibus dui.</p>
13 changes: 13 additions & 0 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ def test_markdown(self) -> None:

self.assertEqual(actual, expected)

def test_heading_anchors(self) -> None:
actual = ConfluenceDocument(
self.source_dir / "sections.md",
ConfluenceDocumentOptions(ignore_invalid_url=True, heading_anchors=True),
{},
).xhtml()
actual = standardize(actual)

with open(self.target_dir / "anchors.xml", "r", encoding="utf-8") as f:
expected = canonicalize(f.read())

self.assertEqual(actual, expected)

def test_mermaid_embedded_svg(self) -> None:
document = ConfluenceDocument(
self.source_dir / "mermaid.md",
Expand Down

0 comments on commit 487514d

Please sign in to comment.