Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ A tool to convert Powerpoint pptx file into markdown.
* [Tiddlywiki](https://tiddlywiki.com/)'s wikitext
* [Madoko](https://www.madoko.net/)
* [Quarto](https://quarto.org/)
* [Wikijs](https://js.wiki/) Markdown with corrected image links for Wiki.js (forward slashes and lowercase names)

_Please star this repo if you like it!_

Expand Down Expand Up @@ -87,7 +88,7 @@ Use it with `pptx2md [filename] -t titles.txt`.
* `--enable-slides` deliniate slides `\n---\n`, this can help if you want to convert pptx slides to markdown slides
* `--try-multi-column` try to detect multi-column slides (very slow)
* `--min-block-size [size]` the minimum number of characters for a text block to be outputted
* `--wiki` / `--mdk` if you happen to be using tiddlywiki or madoko, this argument outputs the corresponding markup language
* `--wiki` / `--mdk` / `--wikijs` if you happen to be using tiddlywiki, madoko or wikijs this argument outputs the corresponding markup language
* `--qmd` outputs to the qmd markup language used for [quarto](https://quarto.org/docs/presentations/revealjs/) powered presentations
* `--page [number]` only convert the specified page
* `--keep-similar-titles` keep similar titles and add "(cont.)" to repeated slide titles
Expand Down Expand Up @@ -160,6 +161,7 @@ The `ConversionConfig` class accepts the same parameters as the command line arg
- `try_multi_column`: Attempt to detect multi-column slides
- `min_block_size`: Minimum text block size
- `wiki`: Output in TiddlyWiki format
- `wikijs`: Output in Wiki.js format
- `mdk`: Output in Madoko format
- `qmd`: Output in Quarto format
- `page`: Convert only specified page number
Expand Down
3 changes: 3 additions & 0 deletions pptx2md/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def parse_args() -> ConversionConfig:
arg_parser.add_argument('--enable-slides', action="store_true", help='deliniate slides `\n---\n`')
arg_parser.add_argument('--try-multi-column', action="store_true", help='try to detect multi-column slides')
arg_parser.add_argument('--wiki', action="store_true", help='generate output as wikitext(TiddlyWiki)')
arg_parser.add_argument('--wikijs', action="store_true", help='generate output and imagelinks compatible to wikijs')
arg_parser.add_argument('--mdk', action="store_true", help='generate output as madoko markdown')
arg_parser.add_argument('--qmd', action="store_true", help='generate output as quarto markdown presentation')
arg_parser.add_argument('--min-block-size',
Expand Down Expand Up @@ -73,9 +74,11 @@ def parse_args() -> ConversionConfig:
disable_color=args.disable_color,
disable_escaping=args.disable_escaping,
disable_notes=args.disable_notes,
disable_master=args.disable_master,
enable_slides=args.enable_slides,
try_multi_column=args.try_multi_column,
is_wiki=args.wiki,
is_wiki_js=args.wikijs,
is_mdk=args.mdk,
is_qmd=args.qmd,
min_block_size=args.min_block_size,
Expand Down
2 changes: 2 additions & 0 deletions pptx2md/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def convert(config: ConversionConfig):

if config.is_wiki:
out = outputter.WikiFormatter(config)
elif config.is_wiki_js:
out = outputter.WikijsFormatter(config)
elif config.is_mdk:
out = outputter.MadokoFormatter(config)
elif config.is_qmd:
Expand Down
8 changes: 8 additions & 0 deletions pptx2md/outputter.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,14 @@ def get_escaped(self, text):
text = re.sub(self.esc_re2, self.esc_repl, text)
return text

class WikijsFormatter(MarkdownFormatter):

def put_image(self, path, max_width=None):
if max_width is None:
self.ofile.write(f'![](/{urllib.parse.quote(path)})\n\n')
else:
self.ofile.write(f'<img src="/{urllib.parse.quote(path)}" style="max-width:{max_width}px;" />\n\n')


class WikiFormatter(Formatter):
# write outputs to wikitext
Expand Down
14 changes: 12 additions & 2 deletions pptx2md/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from operator import attrgetter
from typing import List, Union

from pathlib import Path

from PIL import Image
from pptx import Presentation
from pptx.enum.dml import MSO_COLOR_TYPE, MSO_THEME_COLOR
Expand Down Expand Up @@ -152,13 +154,15 @@ def process_picture(config: ConversionConfig, shape, slide_idx) -> Union[ImageEl

file_prefix = ''.join(os.path.basename(config.pptx_path).split('.')[:-1])
pic_name = file_prefix + f'_{picture_count}'
if(config.is_wiki_js):
pic_name = pic_name.lower()
pic_ext = shape.image.ext
if not os.path.exists(config.image_dir):
os.makedirs(config.image_dir)

output_path = config.image_dir / f'{pic_name}.{pic_ext}'
common_path = os.path.commonpath([config.output_path, config.image_dir])
img_outputter_path = os.path.relpath(output_path, common_path)
img_outputter_path = Path(os.path.relpath(output_path, common_path)).as_posix()
with open(output_path, 'wb') as f:
f.write(shape.image.blob)
picture_count += 1
Expand Down Expand Up @@ -240,7 +244,13 @@ def process_shapes(config: ConversionConfig, current_shapes, slide_id: int) -> L

def parse(config: ConversionConfig, prs: Presentation) -> ParsedPresentation:
result = ParsedPresentation(slides=[])

if(config.disable_master):
for slide in prs.slides:
for shape in slide.shapes:
# You can selectively remove shapes (like text, pictures, etc.)
if shape in prs.slide_master.slide_layouts[0]:
sp = shape
slide.shapes._spTree.remove(sp._element)
for idx, slide in enumerate(tqdm(prs.slides, desc='Converting slides')):
if config.page is not None and idx + 1 != config.page:
continue
Expand Down
6 changes: 6 additions & 0 deletions pptx2md/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,18 @@ class ConversionConfig(BaseModel):
disable_notes: bool = False
"""Do not add presenter notes"""

disable_master: bool = False
"""Do not add master layout"""

enable_slides: bool = False
"""Deliniate slides with `\n---\n`"""

is_wiki: bool = False
"""Generate output as wikitext (TiddlyWiki)"""

is_wiki_js: bool = False
"""Generate output and especially img links compatible to Wiki.js"""

is_mdk: bool = False
"""Generate output as madoko markdown"""

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pptx2md"
version = "2.0.6"
version = "2.0.18"
description = "This package converts pptx to markdown"
repository = "https://github.com/ssine/pptx2md"
authors = ["Liu Siyao <liu.siyao@qq.com>"]
Expand Down