Skip to content

Commit

Permalink
Raise meaningful error when Markdown has src with absolute URL
Browse files Browse the repository at this point in the history
  • Loading branch information
hunyadi committed Nov 21, 2024
1 parent c5025f2 commit 582fd94
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 19 deletions.
2 changes: 1 addition & 1 deletion integration_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_markdown(self) -> None:
self.assertListEqual(document.links, [])
self.assertListEqual(
document.images,
["figure/interoperability.png", "figure/interoperability.png"],
[Path("figure/interoperability.png"), Path("figure/interoperability.png")],
)

with open(self.out_dir / "document.html", "w", encoding="utf-8") as f:
Expand Down
6 changes: 3 additions & 3 deletions md2conf/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,11 @@ def _update_document(self, document: ConfluenceDocument, base_path: Path) -> Non
attachment_name(image),
)

for image, data in document.embedded_images.items():
for name, data in document.embedded_images.items():
self.api.upload_attachment(
document.id.page_id,
Path("EMB") / image,
attachment_name(image),
Path("EMB") / name,
name,
raw_data=data,
)

Expand Down
38 changes: 23 additions & 15 deletions md2conf/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import xml.etree.ElementTree
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from urllib.parse import ParseResult, urlparse, urlunparse

import lxml.etree as ET
Expand Down Expand Up @@ -304,7 +304,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
base_dir: Path
root_dir: Path
links: List[str]
images: List[str]
images: List[Path]
embedded_images: Dict[str, bytes]
page_metadata: Dict[Path, ConfluencePageMetadata]

Expand Down Expand Up @@ -434,17 +434,24 @@ def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
def _transform_image(self, image: ET._Element) -> ET._Element:
path: str = image.attrib["src"]

if not path:
raise DocumentError("image lacks `src` attribute")

if is_absolute_url(path):
# images whose `src` attribute is an absolute URL cannot be converted into an `ac:image`;
# Confluence images are expected to refer to an uploaded attachment
raise DocumentError("image has a `src` attribute that is an absolute URL")

relative_path = Path(path)

# prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
if path and is_relative_url(path):
relative_path = Path(path)
if (
relative_path.suffix == ".svg"
and (self.base_dir / relative_path.with_suffix(".png")).exists()
):
path = str(relative_path.with_suffix(".png"))

self.images.append(path)
png_file = relative_path.with_suffix(".png")
if relative_path.suffix == ".svg" and (self.base_dir / png_file).exists():
relative_path = png_file

self.images.append(relative_path)
caption = image.attrib["alt"]
image_name = attachment_name(relative_path)
return AC(
"image",
{
Expand All @@ -453,7 +460,8 @@ def _transform_image(self, image: ET._Element) -> ET._Element:
},
RI(
"attachment",
{ET.QName(namespaces["ri"], "filename"): attachment_name(path)},
# refers to an attachment uploaded alongside the page
{ET.QName(namespaces["ri"], "filename"): image_name},
),
AC("caption", HTML.p(caption)),
)
Expand Down Expand Up @@ -937,7 +945,7 @@ class ConfluenceDocumentOptions:
class ConfluenceDocument:
id: ConfluenceQualifiedID
links: List[str]
images: List[str]
images: List[Path]

options: ConfluenceDocumentOptions
root: ET._Element
Expand Down Expand Up @@ -1016,7 +1024,7 @@ def xhtml(self) -> str:
return elements_to_string(self.root)


def attachment_name(name: str) -> str:
def attachment_name(name: Union[Path, str]) -> str:
"""
Safe name for use with attachment uploads.
Expand All @@ -1025,7 +1033,7 @@ def attachment_name(name: str) -> str:
* Special characters: hyphen (-), underscore (_), period (.)
"""

return re.sub(r"[^\-0-9A-Za-z_.]", "_", name)
return re.sub(r"[^\-0-9A-Za-z_.]", "_", str(name))


def sanitize_confluence(html: str) -> str:
Expand Down

0 comments on commit 582fd94

Please sign in to comment.