diff --git a/README.md b/README.md index 02a63a5..b0ba1ce 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ A tool to convert Powerpoint pptx file into markdown. * [Tiddlywiki](https://tiddlywiki.com/)'s wikitext * [Madoko](https://www.madoko.net/) * [Quarto](https://quarto.org/) +* [Wikijs](https://js.wiki/) Markdown with corrected image links for Wiki.js (forward slashes and lowercase names) _Please star this repo if you like it!_ @@ -87,7 +88,7 @@ Use it with `pptx2md [filename] -t titles.txt`. * `--enable-slides` deliniate slides `\n---\n`, this can help if you want to convert pptx slides to markdown slides * `--try-multi-column` try to detect multi-column slides (very slow) * `--min-block-size [size]` the minimum number of characters for a text block to be outputted -* `--wiki` / `--mdk` if you happen to be using tiddlywiki or madoko, this argument outputs the corresponding markup language +* `--wiki` / `--mdk` / `--wikijs` if you happen to be using tiddlywiki, madoko or wikijs this argument outputs the corresponding markup language * `--qmd` outputs to the qmd markup language used for [quarto](https://quarto.org/docs/presentations/revealjs/) powered presentations * `--page [number]` only convert the specified page * `--keep-similar-titles` keep similar titles and add "(cont.)" to repeated slide titles @@ -160,6 +161,7 @@ The `ConversionConfig` class accepts the same parameters as the command line arg - `try_multi_column`: Attempt to detect multi-column slides - `min_block_size`: Minimum text block size - `wiki`: Output in TiddlyWiki format +- `wikijs`: Output in Wiki.js format - `mdk`: Output in Madoko format - `qmd`: Output in Quarto format - `page`: Convert only specified page number diff --git a/pptx2md/__main__.py b/pptx2md/__main__.py index 2f5d592..d6713a9 100644 --- a/pptx2md/__main__.py +++ b/pptx2md/__main__.py @@ -43,6 +43,7 @@ def parse_args() -> ConversionConfig: arg_parser.add_argument('--enable-slides', action="store_true", help='deliniate slides `\n---\n`') arg_parser.add_argument('--try-multi-column', action="store_true", help='try to detect multi-column slides') arg_parser.add_argument('--wiki', action="store_true", help='generate output as wikitext(TiddlyWiki)') + arg_parser.add_argument('--wikijs', action="store_true", help='generate output and imagelinks compatible to wikijs') arg_parser.add_argument('--mdk', action="store_true", help='generate output as madoko markdown') arg_parser.add_argument('--qmd', action="store_true", help='generate output as quarto markdown presentation') arg_parser.add_argument('--min-block-size', @@ -73,9 +74,11 @@ def parse_args() -> ConversionConfig: disable_color=args.disable_color, disable_escaping=args.disable_escaping, disable_notes=args.disable_notes, + disable_master=args.disable_master, enable_slides=args.enable_slides, try_multi_column=args.try_multi_column, is_wiki=args.wiki, + is_wiki_js=args.wikijs, is_mdk=args.mdk, is_qmd=args.qmd, min_block_size=args.min_block_size, diff --git a/pptx2md/entry.py b/pptx2md/entry.py index 6ad1bb8..35ba915 100644 --- a/pptx2md/entry.py +++ b/pptx2md/entry.py @@ -40,6 +40,8 @@ def convert(config: ConversionConfig): if config.is_wiki: out = outputter.WikiFormatter(config) + elif config.is_wiki_js: + out = outputter.WikijsFormatter(config) elif config.is_mdk: out = outputter.MadokoFormatter(config) elif config.is_qmd: diff --git a/pptx2md/outputter.py b/pptx2md/outputter.py index 3beadd6..9200c1e 100644 --- a/pptx2md/outputter.py +++ b/pptx2md/outputter.py @@ -200,6 +200,14 @@ def get_escaped(self, text): text = re.sub(self.esc_re2, self.esc_repl, text) return text +class WikijsFormatter(MarkdownFormatter): + + def put_image(self, path, max_width=None): + if max_width is None: + self.ofile.write(f'![](/{urllib.parse.quote(path)})\n\n') + else: + self.ofile.write(f'\n\n') + class WikiFormatter(Formatter): # write outputs to wikitext diff --git a/pptx2md/parser.py b/pptx2md/parser.py index edcc10a..3c3e6aa 100644 --- a/pptx2md/parser.py +++ b/pptx2md/parser.py @@ -20,6 +20,8 @@ from operator import attrgetter from typing import List, Union +from pathlib import Path + from PIL import Image from pptx import Presentation from pptx.enum.dml import MSO_COLOR_TYPE, MSO_THEME_COLOR @@ -152,13 +154,15 @@ def process_picture(config: ConversionConfig, shape, slide_idx) -> Union[ImageEl file_prefix = ''.join(os.path.basename(config.pptx_path).split('.')[:-1]) pic_name = file_prefix + f'_{picture_count}' + if(config.is_wiki_js): + pic_name = pic_name.lower() pic_ext = shape.image.ext if not os.path.exists(config.image_dir): os.makedirs(config.image_dir) output_path = config.image_dir / f'{pic_name}.{pic_ext}' common_path = os.path.commonpath([config.output_path, config.image_dir]) - img_outputter_path = os.path.relpath(output_path, common_path) + img_outputter_path = Path(os.path.relpath(output_path, common_path)).as_posix() with open(output_path, 'wb') as f: f.write(shape.image.blob) picture_count += 1 @@ -240,7 +244,13 @@ def process_shapes(config: ConversionConfig, current_shapes, slide_id: int) -> L def parse(config: ConversionConfig, prs: Presentation) -> ParsedPresentation: result = ParsedPresentation(slides=[]) - + if(config.disable_master): + for slide in prs.slides: + for shape in slide.shapes: + # You can selectively remove shapes (like text, pictures, etc.) + if shape in prs.slide_master.slide_layouts[0]: + sp = shape + slide.shapes._spTree.remove(sp._element) for idx, slide in enumerate(tqdm(prs.slides, desc='Converting slides')): if config.page is not None and idx + 1 != config.page: continue diff --git a/pptx2md/types.py b/pptx2md/types.py index 0de9e4f..d02e82b 100644 --- a/pptx2md/types.py +++ b/pptx2md/types.py @@ -54,12 +54,18 @@ class ConversionConfig(BaseModel): disable_notes: bool = False """Do not add presenter notes""" + disable_master: bool = False + """Do not add master layout""" + enable_slides: bool = False """Deliniate slides with `\n---\n`""" is_wiki: bool = False """Generate output as wikitext (TiddlyWiki)""" + is_wiki_js: bool = False + """Generate output and especially img links compatible to Wiki.js""" + is_mdk: bool = False """Generate output as madoko markdown""" diff --git a/pyproject.toml b/pyproject.toml index 9d21af0..d502de4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pptx2md" -version = "2.0.6" +version = "2.0.18" description = "This package converts pptx to markdown" repository = "https://github.com/ssine/pptx2md" authors = ["Liu Siyao "]