Skip to content

Commit

Permalink
🗒️
Browse files Browse the repository at this point in the history
  • Loading branch information
riaf committed Dec 3, 2023
1 parent b3ea9c9 commit a8d4896
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 29 deletions.
1 change: 1 addition & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- 'script.py'
- 'Dockerfile'
- 'requirements.txt'
- '.github/workflows/docker-build.yml'

jobs:
build:
Expand Down
7 changes: 7 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"recommendations": [
"github.vscode-github-actions",
"ms-python.black-formatter",
"ms-python.python"
]
}
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
}
}
69 changes: 40 additions & 29 deletions script.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,77 +9,83 @@
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom


def parse_hugo_markdown(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()

parts = re.split(r'---\s*', content)
parts = re.split(r"---\s*", content)
metadata = parts[1]
body = parts[2]

meta_dict = {}
for line in metadata.split('\n'):
if ': ' in line:
key, value = line.split(': ', 1)
for line in metadata.split("\n"):
if ": " in line:
key, value = line.split(": ", 1)
meta_dict[key.strip()] = value.strip()

html_body = markdown2.markdown(body)

return meta_dict, html_body


def parse_date(date_str):
try:
return date_parser.parse(date_str)
except ValueError:
return None


def create_guid(file_path, date):
hash_input = f"{file_path}-{date}"
return hashlib.sha256(hash_input.encode()).hexdigest()


def create_wxr_element(meta_data, html_content, file_path):
item = Element('item')
title = SubElement(item, 'title')
pubDate = SubElement(item, 'pubDate')
creator = SubElement(item, 'dc:creator')
guid = SubElement(item, 'guid', isPermaLink="false")
content_encoded = SubElement(item, 'content:encoded')
category = SubElement(item, 'category')

title.text = meta_data.get('title', 'No Title')
creator.text = os.getenv('HUGO_WXR_CREATOR', 'admin')

date_str = meta_data.get('date', '')
item = Element("item")
title = SubElement(item, "title")
pubDate = SubElement(item, "pubDate")
creator = SubElement(item, "dc:creator")
guid = SubElement(item, "guid", isPermaLink="false")
content_encoded = SubElement(item, "content:encoded")
category = SubElement(item, "category")

title.text = meta_data.get("title", "No Title")
creator.text = os.getenv("HUGO_WXR_CREATOR", "admin")

date_str = meta_data.get("date", "")
parsed_date = parse_date(date_str)
if parsed_date:
pubDate.text = parsed_date.strftime("%a, %d %b %Y %H:%M:%S %z")
else:
pubDate.text = "Invalid Date"

guid_text = create_guid(file_path, meta_data.get('date', ''))
guid_base = os.getenv('HUGO_WXR_GUID', 'http://example.com/')
guid_text = create_guid(file_path, meta_data.get("date", ""))
guid_base = os.getenv("HUGO_WXR_GUID", "http://example.com/")
guid.text = f"{guid_base}{guid_text}"

content_encoded.text = html_content
category.text = meta_data.get('categories', ['Uncategorized'])[0]
category.text = meta_data.get("categories", ["Uncategorized"])[0]

return item


def create_wxr_document(items):
rss = Element('rss')
rss.set('version', '2.0')
rss.set('xmlns:content', 'http://purl.org/rss/1.0/modules/content/')
rss.set('xmlns:dc', 'http://purl.org/dc/elements/1.1/')
rss.set('xmlns:wp', 'http://wordpress.org/export/1.2/')
rss = Element("rss")
rss.set("version", "2.0")
rss.set("xmlns:content", "http://purl.org/rss/1.0/modules/content/")
rss.set("xmlns:dc", "http://purl.org/dc/elements/1.1/")
rss.set("xmlns:wp", "http://wordpress.org/export/1.2/")

channel = SubElement(rss, 'channel')
channel = SubElement(rss, "channel")
for item in items:
channel.append(item)

return minidom.parseString(tostring(rss)).toprettyxml(indent=" ")


def process_hugo_directory(content_dir):
markdown_files = glob.glob(content_dir + '/**/*.md', recursive=True)
markdown_files = glob.glob(content_dir + "/**/*.md", recursive=True)
wxr_items = []

for file_path in markdown_files:
Expand All @@ -92,17 +98,22 @@ def process_hugo_directory(content_dir):

return wxr_items


def parse_arguments():
parser = argparse.ArgumentParser(description="Convert Hugo content to WordPress WXR format.")
parser.add_argument('content_dir', help="Path to the Hugo content directory.")
parser = argparse.ArgumentParser(
description="Convert Hugo content to WordPress WXR format."
)
parser.add_argument("content_dir", help="Path to the Hugo content directory.")
return parser.parse_args()


def main():
args = parse_arguments()
wxr_items = process_hugo_directory(args.content_dir)
wxr_document = create_wxr_document(wxr_items)

print(wxr_document)


if __name__ == "__main__":
main()

0 comments on commit a8d4896

Please sign in to comment.