Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dump-html: remove item id of links to binary data (#1742) #1861

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/moin/cli/maint/dump_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ def Dump(directory="HTML", theme="topside_cms", exclude_ns="userprofiles", user=
# to: <img alt="svg" src="+get/svg" />
invalid_src = re.compile(r' src="/\+get/\+[0-9a-f]{32}/')

invalid_href = re.compile(r' href="/\+get/\+[0-9a-f]{32}/')

# get ready to render and copy individual items
names = []
home_page = None
Expand All @@ -173,9 +175,11 @@ def Dump(directory="HTML", theme="topside_cms", exclude_ns="userprofiles", user=
# In the filesystem the item cannot have the same name as the directory.
# so we append .html to the filename for items in used_dirs.
for current_rev in app.storage.search(q, limit=None, sortedby=("namespace", "name")):

if current_rev.namespace in exclude_ns:
# we usually do not copy userprofiles, no one can login to a static wiki
continue

if not current_rev.name:
# TODO: we skip nameless tickets, but named tickets and comments are processed with ugly names
continue
Expand All @@ -199,21 +203,28 @@ def Dump(directory="HTML", theme="topside_cms", exclude_ns="userprofiles", user=
if not isinstance(rendered, str):
print(f"Rendering failed for {file_name} with response {rendered}")
continue

# remove item ID from: href="/+get/+7cb364b8ca5d4b7e960a4927c99a2912/example.drawio"
rendered = re.sub(invalid_href, ' href="/+get/', rendered)

# remove item ID from: src="/+get/+7cb364b8ca5d4b7e960a4927c99a2912/svg"
rendered = re.sub(invalid_src, ' src="/+get/', rendered)

# make hrefs relative to root folder
rel_path2root = PARENT_DIR * len(re.findall("/", item_name))
rendered = rendered.replace('href="/', 'href="' + rel_path2root)
rendered = rendered.replace('src="/static/', 'src="' + rel_path2root + "static/")
rendered = rendered.replace('src="/+serve/', 'src="+serve/')
rendered = rendered.replace('href="+index/"', 'href="+index"') # trailing slash changes relative position

# TODO: fix basic theme
rendered = rendered.replace('<a href="">', f'<a href="{app.cfg.default_root}">')
# remove item ID from: src="/+get/+7cb364b8ca5d4b7e960a4927c99a2912/svg"
valid_src = f' src="{rel_path2root}+get/'
rendered = re.sub(invalid_src, valid_src, rendered)

# correct links inside document
for node in used_dirs:
node_href = f'href="{rel_path2root}{node}"'
rendered = rendered.replace(node_href, node_href[:-1] + '.html"')

# copy raw data for all items to output /+get directory;
# images are required, text items are of marginal/no benefit
item = app.storage[current_rev.fqname.fullname]
Expand Down