From 1734c1d2b9409d03ab28ab68a0a8f23321ce2be7 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 14:25:45 +0300 Subject: [PATCH 01/41] added includes map generation --- foliant/preprocessors/includes.py | 44 ++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index eb6a582..e3e1deb 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1,5 +1,7 @@ import re import urllib +import json +import os from shutil import rmtree from io import StringIO from hashlib import md5 @@ -43,6 +45,8 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' + self.src_dir = self.config.get("src_dir") + self.includes_map = {} self.logger = self.logger.getChild('includes') @@ -850,6 +854,7 @@ def process_includes( :returns: Markdown content with resolved includes ''' + recipient_md_path = markdown_file_path.relative_to(self.working_dir).as_posix() markdown_file_path = markdown_file_path.resolve() self.logger.debug(f'Processing Markdown file: {markdown_file_path}') @@ -867,11 +872,19 @@ def process_includes( include_statement = self.pattern.fullmatch(content_part) if include_statement: + donor_md_path = None + current_project_root_path = project_root_path body = self._tag_body_pattern.match(include_statement.group('body').strip()) options = self.get_options(include_statement.group('options')) + self.logger.debug(f'Include pair: {markdown_file_path} <- {options} {body}') + + # TODO: + # :param markdown_file_path: + # :returns date: + self.logger.debug( f'Processing include statement; body: {body}, options: {options}, ' + f'current project root path: {current_project_root_path}' @@ -949,6 +962,8 @@ def process_includes( self.logger.debug(f'Local path of the repo: {repo_path}') included_file_path = repo_path / body.group('path') + + donor_md_path = included_file_path.as_posix() + "1" if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -975,6 +990,7 @@ def process_includes( else: self.logger.debug('Local file referenced') + donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + "2" included_file_path = self._get_included_file_path(body.group('path'), markdown_file_path) if included_file_path.name.startswith('^'): @@ -1000,7 +1016,7 @@ def process_includes( nohead=options.get('nohead') ) - else: # if body + else: # if body missed self.logger.debug('Using the new syntax rules') if options.get('repo_url') and options.get('path'): @@ -1036,6 +1052,8 @@ def process_includes( include_link=include_link ) + donor_md_path = include_link + "3" + elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1061,13 +1079,22 @@ def process_includes( sethead=current_sethead, nohead=options.get('nohead') ) + + donor_md_path = options['url'] + "4" elif options.get('src'): self.logger.debug('Local file referenced') + # donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + "5" included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) - self.logger.debug(f'Resolved path to the included file: {included_file_path}') + + if included_file_path.as_posix().startswith(os.getcwd()): + _path = included_file_path.relative_to(os.getcwd()) + if _path.as_posix().startswith(self.working_dir.as_posix()): + donor_md_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + "5" + else: + donor_md_path = _path.as_posix() + "6" if options.get('project_root'): current_project_root_path = ( @@ -1087,6 +1114,7 @@ def process_includes( sethead=current_sethead, nohead=options.get('nohead') ) + else: self.logger.warning( 'Neither repo_url+path nor src specified, ignoring the include statement' @@ -1144,6 +1172,12 @@ def process_includes( processed_content_part = re.sub(r'\s+', ' ', processed_content_part).strip() + if donor_md_path: + if self.includes_map.get(recipient_md_path) == None : + self.includes_map[recipient_md_path] = [] + + self.includes_map[recipient_md_path].append({"path": donor_md_path}) + else: processed_content_part = content_part @@ -1191,7 +1225,6 @@ def apply(self): for source_file_path in self.working_dir.rglob(source_files_extension): with open(source_file_path, encoding='utf8') as source_file: source_content = source_file.read() - processed_content = self.process_includes( source_file_path, source_content, @@ -1201,5 +1234,10 @@ def apply(self): if processed_content: with open(source_file_path, 'w', encoding='utf8') as processed_file: processed_file.write(processed_content) + + # Write includes map + Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) + with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: + json.dump(self.includes_map, f) self.logger.info('Preprocessor applied') From 443b5163fe95dd46a81de1368c51612f283ca23e Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 14:30:01 +0300 Subject: [PATCH 02/41] fix --- foliant/preprocessors/includes.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index e3e1deb..243131b 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -963,7 +963,7 @@ def process_includes( included_file_path = repo_path / body.group('path') - donor_md_path = included_file_path.as_posix() + "1" + donor_md_path = included_file_path.as_posix() if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -990,7 +990,7 @@ def process_includes( else: self.logger.debug('Local file referenced') - donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + "2" + donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" included_file_path = self._get_included_file_path(body.group('path'), markdown_file_path) if included_file_path.name.startswith('^'): @@ -1016,7 +1016,7 @@ def process_includes( nohead=options.get('nohead') ) - else: # if body missed + else: # if body is missing self.logger.debug('Using the new syntax rules') if options.get('repo_url') and options.get('path'): @@ -1052,7 +1052,7 @@ def process_includes( include_link=include_link ) - donor_md_path = include_link + "3" + donor_md_path = include_link elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1080,21 +1080,20 @@ def process_includes( nohead=options.get('nohead') ) - donor_md_path = options['url'] + "4" + donor_md_path = options['url'] elif options.get('src'): self.logger.debug('Local file referenced') - # donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + "5" included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) self.logger.debug(f'Resolved path to the included file: {included_file_path}') if included_file_path.as_posix().startswith(os.getcwd()): _path = included_file_path.relative_to(os.getcwd()) if _path.as_posix().startswith(self.working_dir.as_posix()): - donor_md_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + "5" + donor_md_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" else: - donor_md_path = _path.as_posix() + "6" + donor_md_path = _path.as_posix() if options.get('project_root'): current_project_root_path = ( From 17643be6104aca61fdf38b37619f47609f476afc Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 16:01:45 +0300 Subject: [PATCH 03/41] update --- foliant/preprocessors/includes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 243131b..565200c 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -854,7 +854,7 @@ def process_includes( :returns: Markdown content with resolved includes ''' - recipient_md_path = markdown_file_path.relative_to(self.working_dir).as_posix() + recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' markdown_file_path = markdown_file_path.resolve() self.logger.debug(f'Processing Markdown file: {markdown_file_path}') @@ -1175,7 +1175,7 @@ def process_includes( if self.includes_map.get(recipient_md_path) == None : self.includes_map[recipient_md_path] = [] - self.includes_map[recipient_md_path].append({"path": donor_md_path}) + self.includes_map[recipient_md_path].append(donor_md_path) else: processed_content_part = content_part @@ -1224,6 +1224,7 @@ def apply(self): for source_file_path in self.working_dir.rglob(source_files_extension): with open(source_file_path, encoding='utf8') as source_file: source_content = source_file.read() + processed_content = self.process_includes( source_file_path, source_content, From ff5e3b43ec9e9be4ec6a46242558f059bf0952e1 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 16:45:24 +0300 Subject: [PATCH 04/41] fix --- foliant/preprocessors/includes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 565200c..48abeb2 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -990,8 +990,8 @@ def process_includes( else: self.logger.debug('Local file referenced') - donor_md_path = f"{self.src_dir}/{markdown_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" included_file_path = self._get_included_file_path(body.group('path'), markdown_file_path) + donor_md_path = f"{self.src_dir}/{included_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" if included_file_path.name.startswith('^'): included_file_path = self._find_file( From 2d91a1fb2c7b2ade22b2e71d8ab183cc4a964f0b Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 20:55:46 +0300 Subject: [PATCH 05/41] update and add debug --- foliant/preprocessors/includes.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 48abeb2..dfe7dcb 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -855,6 +855,7 @@ def process_includes( ''' recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + markdown_file_path = markdown_file_path.resolve() self.logger.debug(f'Processing Markdown file: {markdown_file_path}') @@ -964,6 +965,8 @@ def process_includes( included_file_path = repo_path / body.group('path') donor_md_path = included_file_path.as_posix() + + self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path}') if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -991,7 +994,6 @@ def process_includes( self.logger.debug('Local file referenced') included_file_path = self._get_included_file_path(body.group('path'), markdown_file_path) - donor_md_path = f"{self.src_dir}/{included_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -1016,6 +1018,10 @@ def process_includes( nohead=options.get('nohead') ) + donor_md_path = f"{self.src_dir}/{included_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path}') + else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1053,6 +1059,8 @@ def process_includes( ) donor_md_path = include_link + + self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path}') elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1081,6 +1089,8 @@ def process_includes( ) donor_md_path = options['url'] + + self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path}') elif options.get('src'): self.logger.debug('Local file referenced') @@ -1094,6 +1104,8 @@ def process_includes( donor_md_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" else: donor_md_path = _path.as_posix() + + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path}') if options.get('project_root'): current_project_root_path = ( From 5203691159376d8251761ff7a418510e4a365aa0 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 19 Jul 2024 21:23:25 +0300 Subject: [PATCH 06/41] update debug --- foliant/preprocessors/includes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index dfe7dcb..9d6c928 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -966,7 +966,7 @@ def process_includes( donor_md_path = included_file_path.as_posix() - self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path}') + self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -1020,7 +1020,7 @@ def process_includes( donor_md_path = f"{self.src_dir}/{included_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" - self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path}') + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1060,7 +1060,7 @@ def process_includes( donor_md_path = include_link - self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path}') + self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1090,7 +1090,7 @@ def process_includes( donor_md_path = options['url'] - self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path}') + self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') elif options.get('src'): self.logger.debug('Local file referenced') @@ -1105,7 +1105,7 @@ def process_includes( else: donor_md_path = _path.as_posix() - self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path}') + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') if options.get('project_root'): current_project_root_path = ( From 22542cfe8c5db7e797a1aeba1dbe830db1d8a5b1 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Sat, 20 Jul 2024 15:07:27 +0300 Subject: [PATCH 07/41] fix case 3 --- foliant/preprocessors/includes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 9d6c928..cd3bbfa 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1058,7 +1058,7 @@ def process_includes( include_link=include_link ) - donor_md_path = include_link + donor_md_path = include_link + options.get('path') self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') From 518e8d7f00cbc3f2b8c0c3dc465127c600077aec Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 22 Jul 2024 13:56:37 +0300 Subject: [PATCH 08/41] update --- foliant/preprocessors/includes.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index cd3bbfa..d8a3ed1 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -835,6 +835,16 @@ def _process_include( return included_content + def _prepare_path_for_includes_map(self, path: Path) -> str: + donor_path = None + if path.as_posix().startswith(os.getcwd()): + _path = path.relative_to(os.getcwd()) + if _path.as_posix().startswith(self.working_dir.as_posix()): + donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + else: + donor_path = _path.as_posix() + return donor_path + def process_includes( self, markdown_file_path: Path, @@ -965,7 +975,6 @@ def process_includes( included_file_path = repo_path / body.group('path') donor_md_path = included_file_path.as_posix() - self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') if included_file_path.name.startswith('^'): @@ -1018,8 +1027,8 @@ def process_includes( nohead=options.get('nohead') ) - donor_md_path = f"{self.src_dir}/{included_file_path.relative_to(os.getcwd()).relative_to(self.working_dir).as_posix()}" + donor_md_path = f"{self.src_dir}/{self._prepare_path_for_includes_map(included_file_path)}" self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') else: # if body is missing @@ -1059,7 +1068,6 @@ def process_includes( ) donor_md_path = include_link + options.get('path') - self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') elif options.get('url'): @@ -1089,7 +1097,6 @@ def process_includes( ) donor_md_path = options['url'] - self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') elif options.get('src'): @@ -1098,13 +1105,7 @@ def process_includes( included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) self.logger.debug(f'Resolved path to the included file: {included_file_path}') - if included_file_path.as_posix().startswith(os.getcwd()): - _path = included_file_path.relative_to(os.getcwd()) - if _path.as_posix().startswith(self.working_dir.as_posix()): - donor_md_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" - else: - donor_md_path = _path.as_posix() - + donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') if options.get('project_root'): From c3a04d4854551266fbb9cce2c441db0274effdff Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 23 Jul 2024 14:30:26 +0300 Subject: [PATCH 09/41] add option --- foliant/preprocessors/includes.py | 68 ++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index d8a3ed1..54ed724 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -22,7 +22,8 @@ class Preprocessor(BasePreprocessor): 'allow_failure': True, 'cache_dir': Path('.includescache'), 'aliases': {}, - 'extensions': ['md'] + 'extensions': ['md'], + 'includes_map': False } tags = 'include', @@ -46,7 +47,9 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") - self.includes_map = {} + self.includes_map_enable = self.options['includes_map'] + if self.includes_map_enable: + self.includes_map = {} self.logger = self.logger.getChild('includes') @@ -837,10 +840,17 @@ def _process_include( def _prepare_path_for_includes_map(self, path: Path) -> str: donor_path = None - if path.as_posix().startswith(os.getcwd()): + if path.as_posix().startswith(self.working_dir.as_posix()): + _path = path.relative_to(self.working_dir) + donor_path = f"{self.src_dir}/{_path.as_posix()}" + elif path.as_posix().startswith(os.getcwd()): _path = path.relative_to(os.getcwd()) if _path.as_posix().startswith(self.working_dir.as_posix()): - donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + _path = _path.relative_to(self.working_dir) + if _path.as_posix().startswith(self.working_dir.as_posix()): + donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" + else: + donor_path = f"{self.src_dir}/{_path.as_posix()}" else: donor_path = _path.as_posix() return donor_path @@ -864,7 +874,8 @@ def process_includes( :returns: Markdown content with resolved includes ''' - recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + if self.includes_map_enable: + recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' markdown_file_path = markdown_file_path.resolve() @@ -883,7 +894,8 @@ def process_includes( include_statement = self.pattern.fullmatch(content_part) if include_statement: - donor_md_path = None + if self.includes_map_enable: + donor_md_path = None current_project_root_path = project_root_path @@ -974,8 +986,9 @@ def process_includes( included_file_path = repo_path / body.group('path') - donor_md_path = included_file_path.as_posix() - self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') + if self.includes_map_enable: + donor_md_path = included_file_path.as_posix() + self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') if included_file_path.name.startswith('^'): included_file_path = self._find_file( @@ -1027,9 +1040,9 @@ def process_includes( nohead=options.get('nohead') ) - - donor_md_path = f"{self.src_dir}/{self._prepare_path_for_includes_map(included_file_path)}" - self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') + if self.includes_map_enable: + donor_md_path = f"{self.src_dir}/{self._prepare_path_for_includes_map(included_file_path)}" + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1067,8 +1080,9 @@ def process_includes( include_link=include_link ) - donor_md_path = include_link + options.get('path') - self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') + if self.includes_map_enable: + donor_md_path = include_link + options.get('path') + self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1096,8 +1110,9 @@ def process_includes( nohead=options.get('nohead') ) - donor_md_path = options['url'] - self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') + if self.includes_map_enable: + donor_md_path = options['url'] + self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') elif options.get('src'): self.logger.debug('Local file referenced') @@ -1105,8 +1120,9 @@ def process_includes( included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) self.logger.debug(f'Resolved path to the included file: {included_file_path}') - donor_md_path = self._prepare_path_for_includes_map(included_file_path) - self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') if options.get('project_root'): current_project_root_path = ( @@ -1184,11 +1200,12 @@ def process_includes( processed_content_part = re.sub(r'\s+', ' ', processed_content_part).strip() - if donor_md_path: - if self.includes_map.get(recipient_md_path) == None : - self.includes_map[recipient_md_path] = [] + if self.includes_map_enable: + if donor_md_path: + if self.includes_map.get(recipient_md_path) == None : + self.includes_map[recipient_md_path] = [] - self.includes_map[recipient_md_path].append(donor_md_path) + self.includes_map[recipient_md_path].append(donor_md_path) else: processed_content_part = content_part @@ -1249,8 +1266,11 @@ def apply(self): processed_file.write(processed_content) # Write includes map - Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) - with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: - json.dump(self.includes_map, f) + if self.includes_map_enable: + output = f'{self.working_dir}/static/includes_map.json' + Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) + with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: + json.dump(self.includes_map, f) + self.logger.debug(f'includes_map write to {output}') self.logger.info('Preprocessor applied') From 8f7845dfb5a71e145b747af3fbabe4c071eca743 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 23 Jul 2024 15:39:30 +0300 Subject: [PATCH 10/41] fix import --- foliant/preprocessors/includes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 54ed724..d44f906 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1,13 +1,13 @@ import re import urllib -import json -import os from shutil import rmtree from io import StringIO from hashlib import md5 from pathlib import Path import socket from subprocess import run, CalledProcessError, PIPE, STDOUT +from json import dump +from os import getcwd from foliant.preprocessors.base import BasePreprocessor @@ -23,7 +23,7 @@ class Preprocessor(BasePreprocessor): 'cache_dir': Path('.includescache'), 'aliases': {}, 'extensions': ['md'], - 'includes_map': False + 'includes_map': True } tags = 'include', @@ -843,8 +843,8 @@ def _prepare_path_for_includes_map(self, path: Path) -> str: if path.as_posix().startswith(self.working_dir.as_posix()): _path = path.relative_to(self.working_dir) donor_path = f"{self.src_dir}/{_path.as_posix()}" - elif path.as_posix().startswith(os.getcwd()): - _path = path.relative_to(os.getcwd()) + elif path.as_posix().startswith(getcwd()): + _path = path.relative_to(getcwd()) if _path.as_posix().startswith(self.working_dir.as_posix()): _path = _path.relative_to(self.working_dir) if _path.as_posix().startswith(self.working_dir.as_posix()): @@ -1270,7 +1270,7 @@ def apply(self): output = f'{self.working_dir}/static/includes_map.json' Path(f'{self.working_dir}/static/').mkdir(parents=True, exist_ok=True) with open(f'{self.working_dir}/static/includes_map.json', 'w', encoding='utf8') as f: - json.dump(self.includes_map, f) + dump(self.includes_map, f) self.logger.debug(f'includes_map write to {output}') self.logger.info('Preprocessor applied') From d6ae30c2a749221713e170e037ef321c9bef640f Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 23 Jul 2024 16:53:50 +0300 Subject: [PATCH 11/41] remove uncessary comments --- foliant/preprocessors/includes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index d44f906..1e7f215 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -902,12 +902,6 @@ def process_includes( body = self._tag_body_pattern.match(include_statement.group('body').strip()) options = self.get_options(include_statement.group('options')) - self.logger.debug(f'Include pair: {markdown_file_path} <- {options} {body}') - - # TODO: - # :param markdown_file_path: - # :returns date: - self.logger.debug( f'Processing include statement; body: {body}, options: {options}, ' + f'current project root path: {current_project_root_path}' From d1673a14b41a59d1f4e148f243ab7ec4069f6f62 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 25 Jul 2024 14:44:57 +0300 Subject: [PATCH 12/41] update --- foliant/preprocessors/includes.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 1e7f215..b80e684 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -49,7 +49,7 @@ def __init__(self, *args, **kwargs): self.src_dir = self.config.get("src_dir") self.includes_map_enable = self.options['includes_map'] if self.includes_map_enable: - self.includes_map = {} + self.includes_map = [] self.logger = self.logger.getChild('includes') @@ -855,6 +855,12 @@ def _prepare_path_for_includes_map(self, path: Path) -> str: donor_path = _path.as_posix() return donor_path + def _exist_in_includes_map(self, map: list, path: str) -> bool: + for obj in map: + if obj["file"] == path: + return True + return False + def process_includes( self, markdown_file_path: Path, @@ -1035,7 +1041,7 @@ def process_includes( ) if self.includes_map_enable: - donor_md_path = f"{self.src_dir}/{self._prepare_path_for_includes_map(included_file_path)}" + donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') else: # if body is missing @@ -1196,10 +1202,11 @@ def process_includes( if self.includes_map_enable: if donor_md_path: - if self.includes_map.get(recipient_md_path) == None : - self.includes_map[recipient_md_path] = [] - - self.includes_map[recipient_md_path].append(donor_md_path) + if not self._exist_in_includes_map(self.includes_map, recipient_md_path): + self.includes_map.append({ 'file': recipient_md_path, "includes": [] }) + for i, f in enumerate(self.includes_map): + if f['file'] == recipient_md_path: + self.includes_map[i]['includes'].append(donor_md_path) else: processed_content_part = content_part From 7e18b4185b5165a62d2f13d20490cb94fb143dd3 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 25 Jul 2024 15:07:49 +0300 Subject: [PATCH 13/41] add test includes map --- foliant/preprocessors/includes.py | 2 +- test/test_includes.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index b80e684..fbf1f76 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -23,7 +23,7 @@ class Preprocessor(BasePreprocessor): 'cache_dir': Path('.includescache'), 'aliases': {}, 'extensions': ['md'], - 'includes_map': True + 'includes_map': False } tags = 'include', diff --git a/test/test_includes.py b/test/test_includes.py index d5638ce..049de10 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -243,3 +243,21 @@ def test_extensions(self): 'index.j2': '# My title\n\nIncluded content', 'sub/sub.md': 'Included content' } + + def test_includes_map(self): + self.ptf.options = {'includes_map': True } + input_map = { + 'index.md': '# My title\n\n\n\n', + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + expected_map = { + 'index.md': '# My title\n\nIncluded content 1\n\nIncluded content 2', + 'static/includes_map.json': "[{\"file\": \"__src__/index.md\", \"includes\": [\"__src__/sub/sub-1.md\", \"__src__/sub/sub-2.md\"]}]", + 'sub/sub-1.md': 'Included content 1', + 'sub/sub-2.md': 'Included content 2' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) From d75579f19e82a6ccd7e0844d014aece80368d8df Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 25 Jul 2024 17:46:52 +0300 Subject: [PATCH 14/41] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f246c0f..b90c8ec 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Includes for Foliant From 8987e8d0b94b848b88170b6d96ab38054c3adce6 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 25 Jul 2024 17:52:38 +0300 Subject: [PATCH 15/41] update python-test.yml --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index f516353..197a2e2 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,4 +1,4 @@ -name: Python package +name: Python package tests on: [push] From 0624b849499f6d67de410155b23e2d00afb75280 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 13 Aug 2024 10:20:04 +0300 Subject: [PATCH 16/41] test --- foliant/preprocessors/includes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index fbf1f76..b80e684 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -23,7 +23,7 @@ class Preprocessor(BasePreprocessor): 'cache_dir': Path('.includescache'), 'aliases': {}, 'extensions': ['md'], - 'includes_map': False + 'includes_map': True } tags = 'include', From 05fef21f4e7a3dbf9954f61fcbe0867d5223c1b0 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 3 Sep 2024 14:42:52 +0300 Subject: [PATCH 17/41] add: anchors into include map --- foliant/preprocessors/includes.py | 89 ++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index b80e684..861b366 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -22,8 +22,7 @@ class Preprocessor(BasePreprocessor): 'allow_failure': True, 'cache_dir': Path('.includescache'), 'aliases': {}, - 'extensions': ['md'], - 'includes_map': True + 'extensions': ['md'] } tags = 'include', @@ -47,9 +46,13 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") - self.includes_map_enable = self.options['includes_map'] - if self.includes_map_enable: + self.includes_map_enable = False + self.includes_map_anchors = False + if 'includes_map' in self.options: + self.includes_map_enable = True self.includes_map = [] + if 'anchors' in self.options['includes_map']: + self.includes_map_anchors = True self.logger = self.logger.getChild('includes') @@ -169,7 +172,7 @@ def _download_file_from_url(self, url: str) -> Path: for line in dict_new_link: downloaded_content = downloaded_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block with open(downloaded_file_path, 'w', encoding='utf8') as downloaded_file: @@ -224,6 +227,8 @@ def _sync_repo( except CalledProcessError as exception: self.logger.warning(str(exception)) + except Exception as exception: + self.logger.warning(str(exception)) else: self.logger.error(str(exception)) @@ -691,7 +696,7 @@ def _get_included_file_path( ) self.logger.debug(f'Finally, included file path: {included_file_path}') - + return included_file_path def _process_include( @@ -730,8 +735,8 @@ def _process_include( f'Included file path: {included_file_path}, from heading: {from_heading}, ' + f'to heading: {to_heading}, sethead: {sethead}, nohead: {nohead}' ) - - + + if included_file_path.exists(): included_file_path = included_file_path else: @@ -764,9 +769,9 @@ def _process_include( old_found_link = regexp_find_link.findall(included_content) - for line in old_found_link: + for line in old_found_link: relative_path = regexp_find_path.findall(line) - + for ex_line in relative_path: exceptions_characters = re.findall(r'https?://[^\s]+|@|:|\.png|\.jpeg|.svg', ex_line) if exceptions_characters: @@ -778,7 +783,7 @@ def _process_include( for line in dict_new_link: included_content = included_content.replace(line, dict_new_link[line]) - # End of the conversion code block + # End of the conversion code block if self.config.get('escape_code', False): if isinstance(self.config['escape_code'], dict): @@ -861,6 +866,27 @@ def _exist_in_includes_map(self, map: list, path: str) -> bool: return True return False + def _find_anchors(self, content: str) -> list: + anchors_list = [] + + anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) + for anchor in anchors: + anchors_list.append(anchor) + custom_ids = re.findall(r'\{\#([\-A-Za-z0-9]+)\}', content) + for anchor in custom_ids: + anchors_list.append(anchor) + elements_with_ids = re.findall(r'id\=[\"\']([\-A-Za-z0-9]+)[\"\']', content) + for anchor in elements_with_ids: + anchors_list.append(anchor) + return anchors_list + + def _add_anchors(self, l: list, content: str) -> list: + anchors = self._find_anchors(content) + if len(anchors) > 0: + for anchor in anchors: + l.append(anchor) + return l + def process_includes( self, markdown_file_path: Path, @@ -881,7 +907,10 @@ def process_includes( ''' if self.includes_map_enable: - recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + if markdown_file_path.as_posix().startswith(self.working_dir.as_posix()): + recipient_md_path = f'{self.src_dir}/{markdown_file_path.relative_to(self.working_dir).as_posix()}' + else: + recipient_md_path = f'{self.src_dir}/{markdown_file_path.as_posix()}' markdown_file_path = markdown_file_path.resolve() @@ -902,6 +931,7 @@ def process_includes( if include_statement: if self.includes_map_enable: donor_md_path = None + donor_anchors = [] current_project_root_path = project_root_path @@ -985,11 +1015,14 @@ def process_includes( self.logger.debug(f'Local path of the repo: {repo_path}') included_file_path = repo_path / body.group('path') - + if self.includes_map_enable: donor_md_path = included_file_path.as_posix() self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') + if self.includes_map_anchors: + donor_anchors = self._add_anchors(donor_anchors, processed_content_part) + if included_file_path.name.startswith('^'): included_file_path = self._find_file( included_file_path.name[1:], included_file_path.parent @@ -1044,6 +1077,9 @@ def process_includes( donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') + if self.includes_map_anchors: + donor_anchors = self._add_anchors(donor_anchors, processed_content_part) + else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1084,6 +1120,9 @@ def process_includes( donor_md_path = include_link + options.get('path') self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') + if self.includes_map_anchors: + donor_anchors = self._add_anchors(donor_anchors, processed_content_part) + elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1109,21 +1148,27 @@ def process_includes( sethead=current_sethead, nohead=options.get('nohead') ) - + if self.includes_map_enable: donor_md_path = options['url'] self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') + if self.includes_map_anchors: + donor_anchors = self._add_anchors(donor_anchors, processed_content_part) + elif options.get('src'): self.logger.debug('Local file referenced') included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) self.logger.debug(f'Resolved path to the included file: {included_file_path}') - + if self.includes_map_enable: donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') + if self.includes_map_anchors: + donor_anchors = self._add_anchors(donor_anchors, processed_content_part) + if options.get('project_root'): current_project_root_path = ( markdown_file_path.parent / options.get('project_root') @@ -1203,11 +1248,19 @@ def process_includes( if self.includes_map_enable: if donor_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): - self.includes_map.append({ 'file': recipient_md_path, "includes": [] }) + if not self.includes_map_anchors: + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + else: + self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) + for i, f in enumerate(self.includes_map): if f['file'] == recipient_md_path: self.includes_map[i]['includes'].append(donor_md_path) + if self.includes_map_anchors: + for anchor in donor_anchors: + self.includes_map[i]['anchors'].append(anchor) + else: processed_content_part = content_part @@ -1243,7 +1296,7 @@ def _get_source_files_extensions(self) -> list: return source_files_extensions def apply(self): - + self.logger.info('Applying preprocessor') # Cleaning up downloads because the content of remote source may have modified @@ -1265,7 +1318,7 @@ def apply(self): if processed_content: with open(source_file_path, 'w', encoding='utf8') as processed_file: processed_file.write(processed_content) - + # Write includes map if self.includes_map_enable: output = f'{self.working_dir}/static/includes_map.json' From 06896734d81dc56bbd5c1b28216ee88340302b0e Mon Sep 17 00:00:00 2001 From: Timur Osmanov <54434686+TOsmanov@users.noreply.github.com> Date: Mon, 9 Sep 2024 13:57:23 +0300 Subject: [PATCH 18/41] test: enabled by default --- foliant/preprocessors/includes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 861b366..7cb1747 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -46,13 +46,14 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") - self.includes_map_enable = False - self.includes_map_anchors = False + self.includes_map_enable = True # TODO: the default value is False + self.includes_map_anchors = True # TODO: the default value is False if 'includes_map' in self.options: self.includes_map_enable = True - self.includes_map = [] if 'anchors' in self.options['includes_map']: self.includes_map_anchors = True + if self.includes_map_enable: + self.includes_map = [] self.logger = self.logger.getChild('includes') From 0bd8ed7dfcfe34424418a741f97701ba42ddc275 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Wed, 2 Oct 2024 09:45:02 +0300 Subject: [PATCH 19/41] fix: tests and remove anchors --- foliant/preprocessors/includes.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 7cb1747..7c24ce6 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -46,12 +46,9 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") - self.includes_map_enable = True # TODO: the default value is False - self.includes_map_anchors = True # TODO: the default value is False + self.includes_map_enable = False if 'includes_map' in self.options: self.includes_map_enable = True - if 'anchors' in self.options['includes_map']: - self.includes_map_anchors = True if self.includes_map_enable: self.includes_map = [] @@ -1021,9 +1018,6 @@ def process_includes( donor_md_path = included_file_path.as_posix() self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') - if self.includes_map_anchors: - donor_anchors = self._add_anchors(donor_anchors, processed_content_part) - if included_file_path.name.startswith('^'): included_file_path = self._find_file( included_file_path.name[1:], included_file_path.parent @@ -1078,9 +1072,6 @@ def process_includes( donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') - if self.includes_map_anchors: - donor_anchors = self._add_anchors(donor_anchors, processed_content_part) - else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1121,9 +1112,6 @@ def process_includes( donor_md_path = include_link + options.get('path') self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') - if self.includes_map_anchors: - donor_anchors = self._add_anchors(donor_anchors, processed_content_part) - elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1154,9 +1142,6 @@ def process_includes( donor_md_path = options['url'] self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') - if self.includes_map_anchors: - donor_anchors = self._add_anchors(donor_anchors, processed_content_part) - elif options.get('src'): self.logger.debug('Local file referenced') @@ -1167,9 +1152,6 @@ def process_includes( donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') - if self.includes_map_anchors: - donor_anchors = self._add_anchors(donor_anchors, processed_content_part) - if options.get('project_root'): current_project_root_path = ( markdown_file_path.parent / options.get('project_root') @@ -1249,19 +1231,12 @@ def process_includes( if self.includes_map_enable: if donor_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): - if not self.includes_map_anchors: - self.includes_map.append({ 'file': recipient_md_path, "includes": []}) - else: - self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) for i, f in enumerate(self.includes_map): if f['file'] == recipient_md_path: self.includes_map[i]['includes'].append(donor_md_path) - if self.includes_map_anchors: - for anchor in donor_anchors: - self.includes_map[i]['anchors'].append(anchor) - else: processed_content_part = content_part From 1fefcd2e4c8f0fb25eba25f719e059afae514598 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Wed, 2 Oct 2024 09:55:31 +0300 Subject: [PATCH 20/41] update: README --- README.md | 6 ++++++ README_ru.md | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b90c8ec..55688c9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -79,6 +80,11 @@ Default `true`. Note that in the second example the default revision (`develop`) will be overridden with the custom one (`master`). +`includes_map` +: Enables generation of the `includes_map.json` file containing information about files inserted using the includes preprocessor. + + From this file, third-party services can receive information about the presence of inclusions in files, for example, to check links using a linter. + ## Usage The preprocessor allows two syntax variants for include statements. diff --git a/README_ru.md b/README_ru.md index ddede50..dd652e2 100644 --- a/README_ru.md +++ b/README_ru.md @@ -1,4 +1,4 @@ -[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) +[![](https://img.shields.io/pypi/v/foliantcontrib.includes.svg)](https://pypi.org/project/foliantcontrib.includes/) [![](https://img.shields.io/github/v/tag/foliant-docs/foliantcontrib.includes.svg?label=GitHub)](https://github.com/foliant-docs/foliantcontrib.includes) [![Tests](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml/badge.svg)](https://github.com/foliant-docs/foliantcontrib.includes/actions/workflows/python-test.yml) # Препроцессор Includes для Foliant @@ -34,6 +34,7 @@ preprocessors: - j2 aliases: ... + includes_map: true ``` `cache_dir` @@ -64,6 +65,10 @@ preprocessors: `aliases` : Сопоставление псевдонимов с URL-адресами репозитория Git. После определения этого параметра псевдоним может использоваться для ссылки на репозиторий вместо его полного URL-адреса. +`includes_map` +: Включает генерацию файла `includes_map.json`, содержащего информацию о файлах, вставленных с помощью препроцессора includes. + Из этого файла сторонние сервисы могут получать информацию о наличии текста вставленного в файл с помощью препроцессора, например, для проверки ссылок с помощью линтера. + >**Внимание!** > > Псевдонимы доступны только в рамках устаревшего синтаксиса инструкций include (см. ниже) From 54d782b4c182412ede9a54f4cf9d188acb3f508b Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Wed, 2 Oct 2024 10:02:19 +0300 Subject: [PATCH 21/41] add: anchors to includes map --- foliant/preprocessors/includes.py | 155 +++++++++++++++++++++--------- 1 file changed, 108 insertions(+), 47 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 7c24ce6..431ca3d 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -47,10 +47,12 @@ def __init__(self, *args, **kwargs): self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") self.includes_map_enable = False + self.includes_map_anchors = False if 'includes_map' in self.options: self.includes_map_enable = True - if self.includes_map_enable: self.includes_map = [] + if 'anchors' in self.options['includes_map']: + self.includes_map_anchors = True self.logger = self.logger.getChild('includes') @@ -709,7 +711,7 @@ def _process_include( sethead: int or None = None, nohead: bool = False, include_link: str or None = None - ) -> str: + ) -> (str, list): '''Replace a local include statement with the file content. Necessary adjustments are applied to the content: cut between certain headings, strip the top heading, set heading level. @@ -734,6 +736,8 @@ def _process_include( f'to heading: {to_heading}, sethead: {sethead}, nohead: {nohead}' ) + anchors = [] + if included_file_path.exists(): included_file_path = included_file_path @@ -783,6 +787,25 @@ def _process_include( included_content = included_content.replace(line, dict_new_link[line]) # End of the conversion code block + # Removing metadata from content before including + + included_content = remove_meta(included_content) + + included_content = self._cut_from_position_to_position( + included_content, + from_heading, + to_heading, + from_id, + to_id, + to_end, + sethead, + nohead + ) + + # Find anchors + if self.includes_map_anchors: + anchors = self._add_anchors(anchors, included_content) + if self.config.get('escape_code', False): if isinstance(self.config['escape_code'], dict): escapecode_options = self.config['escape_code'].get('options', {}) @@ -803,21 +826,6 @@ def _process_include( escapecode_options ).escape(included_content) - # Removing metadata from content before including - - included_content = remove_meta(included_content) - - included_content = self._cut_from_position_to_position( - included_content, - from_heading, - to_heading, - from_id, - to_id, - to_end, - sethead, - nohead - ) - included_content = self._adjust_image_paths(included_content, included_file_path) if project_root_path: @@ -839,9 +847,15 @@ def _process_include( included_file_path.parent ) - return included_content + return included_content, anchors def _prepare_path_for_includes_map(self, path: Path) -> str: + """Preparing the path of the inserted file for the includes map + + :param path: The path to the Markdown file to be inserted + + :returns: The path that will be used in the includes map + """ donor_path = None if path.as_posix().startswith(self.working_dir.as_posix()): _path = path.relative_to(self.working_dir) @@ -859,12 +873,25 @@ def _prepare_path_for_includes_map(self, path: Path) -> str: return donor_path def _exist_in_includes_map(self, map: list, path: str) -> bool: + """Is there a path on the includes map + + :param map: Includes map + :param path: Path + + :returns: True or False + """ for obj in map: if obj["file"] == path: return True return False def _find_anchors(self, content: str) -> list: + """Search for anchor links in the text + + :param content: Markdown content + + :returns: List of anchor links + """ anchors_list = [] anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) @@ -879,6 +906,13 @@ def _find_anchors(self, content: str) -> list: return anchors_list def _add_anchors(self, l: list, content: str) -> list: + """Add an anchor link to the list of anchor links + + :param l: The original list + :param content: Markdown content + + :returns: A list with added anchors + """ anchors = self._find_anchors(content) if len(anchors) > 0: for anchor in anchors: @@ -957,25 +991,27 @@ def process_includes( self.logger.debug(f'Set new current sethead: {current_sethead}') - # If the tag body is not empty, the legacy syntax is expected: - # - # - # ($repo_url#revision$path|src)#from_heading:to_heading - # - # - # If the tag body is empty, the new syntax is expected: - # - # + """ + If the tag body is not empty, the legacy syntax is expected: + + + ($repo_url#revision$path|src)#from_heading:to_heading + + + If the tag body is empty, the new syntax is expected: + + + """ if body: self.logger.debug('Using the legacy syntax rules') @@ -1018,6 +1054,7 @@ def process_includes( donor_md_path = included_file_path.as_posix() self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') + if included_file_path.name.startswith('^'): included_file_path = self._find_file( included_file_path.name[1:], included_file_path.parent @@ -1031,7 +1068,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=body.group('from_heading'), @@ -1040,6 +1077,9 @@ def process_includes( nohead=options.get('nohead') ) + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + else: self.logger.debug('Local file referenced') @@ -1059,7 +1099,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=body.group('from_heading'), @@ -1072,6 +1112,9 @@ def process_includes( donor_md_path = self._prepare_path_for_includes_map(included_file_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + else: # if body is missing self.logger.debug('Using the new syntax rules') @@ -1095,7 +1138,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1112,6 +1155,9 @@ def process_includes( donor_md_path = include_link + options.get('path') self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + elif options.get('url'): self.logger.debug('File to get by URL referenced') @@ -1126,7 +1172,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1142,16 +1188,15 @@ def process_includes( donor_md_path = options['url'] self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + elif options.get('src'): self.logger.debug('Local file referenced') included_file_path = self._get_included_file_path(options.get('src'), markdown_file_path) self.logger.debug(f'Resolved path to the included file: {included_file_path}') - if self.includes_map_enable: - donor_md_path = self._prepare_path_for_includes_map(included_file_path) - self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') - if options.get('project_root'): current_project_root_path = ( markdown_file_path.parent / options.get('project_root') @@ -1159,7 +1204,7 @@ def process_includes( self.logger.debug(f'Set new current project root path: {current_project_root_path}') - processed_content_part = self._process_include( + processed_content_part, anchors = self._process_include( included_file_path=included_file_path, project_root_path=current_project_root_path, from_heading=options.get('from_heading'), @@ -1171,6 +1216,13 @@ def process_includes( nohead=options.get('nohead') ) + if self.includes_map_enable: + donor_md_path = self._prepare_path_for_includes_map(included_file_path) + self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') + + if self.includes_map_enable and self.includes_map_anchors: + donor_anchors = donor_anchors + anchors + else: self.logger.warning( 'Neither repo_url+path nor src specified, ignoring the include statement' @@ -1231,12 +1283,21 @@ def process_includes( if self.includes_map_enable: if donor_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): - self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + if not self.includes_map_anchors or len(donor_anchors) == 0: + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + else: + self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) for i, f in enumerate(self.includes_map): if f['file'] == recipient_md_path: self.includes_map[i]['includes'].append(donor_md_path) + if self.includes_map_anchors: + for anchor in donor_anchors: + if not 'anchors' in self.includes_map[i]: + self.includes_map[i]['anchors'] = [] + self.includes_map[i]['anchors'].append(anchor) + else: processed_content_part = content_part From cb200ef0c05fceb7b5d11bf50e7f954dc177718a Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 10 Oct 2024 11:34:31 +0300 Subject: [PATCH 22/41] fix: regex --- foliant/preprocessors/includes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 431ca3d..ffb82d3 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -897,10 +897,10 @@ def _find_anchors(self, content: str) -> list: anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) for anchor in anchors: anchors_list.append(anchor) - custom_ids = re.findall(r'\{\#([\-A-Za-z0-9]+)\}', content) + custom_ids = re.findall(r'\{\#([\-\_A-Za-z0-9]+)\}', content) for anchor in custom_ids: anchors_list.append(anchor) - elements_with_ids = re.findall(r'id\=[\"\']([\-A-Za-z0-9]+)[\"\']', content) + elements_with_ids = re.findall(r'id\=[\"\']([\-\_A-Za-z0-9]+)[\"\']', content) for anchor in elements_with_ids: anchors_list.append(anchor) return anchors_list From 63d3c2d1fcc553cee0fdfc2a741d2275c4290ee4 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 15 Oct 2024 11:15:27 +0300 Subject: [PATCH 23/41] test: enabled by default --- foliant/preprocessors/includes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index ffb82d3..f5e8ae0 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -46,8 +46,8 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' self.src_dir = self.config.get("src_dir") - self.includes_map_enable = False - self.includes_map_anchors = False + self.includes_map_enable = True # TODO:set the default value to False + self.includes_map_anchors = True # TODO:set the default value to False if 'includes_map' in self.options: self.includes_map_enable = True self.includes_map = [] From 882ed6482be014ba6cbbb4fe1499537a0803bcaf Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 15 Oct 2024 12:09:19 +0300 Subject: [PATCH 24/41] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d518e3e..7c430a3 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ description=SHORT_DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', - version='1.1.17', + version='1.1.19', author='Konstantin Molchanov', author_email='moigagoo@live.com', url='https://github.com/foliant-docs/foliantcontrib.includes', From 784137bef435ba48a7c97b276a800b5c27437884 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 15 Oct 2024 13:00:41 +0300 Subject: [PATCH 25/41] fix: error --- foliant/preprocessors/includes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index f5e8ae0..5a4e31a 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -50,10 +50,12 @@ def __init__(self, *args, **kwargs): self.includes_map_anchors = True # TODO:set the default value to False if 'includes_map' in self.options: self.includes_map_enable = True - self.includes_map = [] if 'anchors' in self.options['includes_map']: self.includes_map_anchors = True + if self.includes_map_enable: + self.includes_map = [] + self.logger = self.logger.getChild('includes') self.logger.debug(f'Preprocessor inited: {self.__dict__}') From f3d9d8c9db66e6bcf665c8fb061ab57f016d33e2 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 14 Nov 2024 14:04:27 +0300 Subject: [PATCH 26/41] add: clean urls --- foliant/preprocessors/includes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 5a4e31a..2169c74 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -921,6 +921,9 @@ def _add_anchors(self, l: list, content: str) -> list: l.append(anchor) return l + def clean_tokens(self, url: str) -> str: + return re.sub(r"(https*://)(.*)@(.*)", r"\1\3", url) + def process_includes( self, markdown_file_path: Path, @@ -1054,6 +1057,7 @@ def process_includes( if self.includes_map_enable: donor_md_path = included_file_path.as_posix() + donor_md_path = self.clean_tokens(donor_md_path) self.logger.debug(f'Set the repo URL of the included file to {recipient_md_path}: {donor_md_path} (1)') @@ -1112,6 +1116,7 @@ def process_includes( if self.includes_map_enable: donor_md_path = self._prepare_path_for_includes_map(included_file_path) + donor_md_path = self.clean_tokens(donor_md_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (2)') if self.includes_map_enable and self.includes_map_anchors: @@ -1155,6 +1160,7 @@ def process_includes( if self.includes_map_enable: donor_md_path = include_link + options.get('path') + donor_md_path = self.clean_tokens(donor_md_path) self.logger.debug(f'Set the link of the included file to {recipient_md_path}: {donor_md_path} (3)') if self.includes_map_enable and self.includes_map_anchors: @@ -1188,6 +1194,7 @@ def process_includes( if self.includes_map_enable: donor_md_path = options['url'] + donor_md_path = self.clean_tokens(donor_md_path) self.logger.debug(f'Set the URL of the included file to {recipient_md_path}: {donor_md_path} (4)') if self.includes_map_enable and self.includes_map_anchors: @@ -1220,6 +1227,7 @@ def process_includes( if self.includes_map_enable: donor_md_path = self._prepare_path_for_includes_map(included_file_path) + donor_md_path = self.clean_tokens(donor_md_path) self.logger.debug(f'Set the path of the included file to {recipient_md_path}: {donor_md_path} (5)') if self.includes_map_enable and self.includes_map_anchors: From 82f3e0b95d00b1aab1f274c4e63c29d50c7453b8 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 18 Nov 2024 10:25:47 +0300 Subject: [PATCH 27/41] add: convert chapters to list --- foliant/preprocessors/includes.py | 54 ++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 2169c74..df7dd71 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -56,10 +56,33 @@ def __init__(self, *args, **kwargs): if self.includes_map_enable: self.includes_map = [] + self.chapters = [] + self.chapters_list(self.config["chapters"], self.chapters) # converting chapters to a list + self.logger = self.logger.getChild('includes') self.logger.debug(f'Preprocessor inited: {self.__dict__}') + def chapters_list(self, obj, chapters: list) -> list: + '''Converting chapters to a list + :param config_chapters: Chapters from config + :param chapters: List of chapters + ''' + if isinstance(obj, list): + for item in obj: + if isinstance(item, str): + chapters.append(f"{self.src_dir}/{item}") + else: + self.chapters_list(item, chapters) + elif isinstance(obj, Path): + chapters.append(f"{self.src_dir}/{obj.as_posix()}") + elif isinstance(obj, object): + for k, v in obj.items(): + if isinstance(v, str): + chapters.append(f"{self.src_dir}/{v}") + else: + self.chapters_list(v, chapters) + def _find_file( self, file_name: str, @@ -1292,21 +1315,22 @@ def process_includes( if self.includes_map_enable: if donor_md_path: - if not self._exist_in_includes_map(self.includes_map, recipient_md_path): - if not self.includes_map_anchors or len(donor_anchors) == 0: - self.includes_map.append({ 'file': recipient_md_path, "includes": []}) - else: - self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) - - for i, f in enumerate(self.includes_map): - if f['file'] == recipient_md_path: - self.includes_map[i]['includes'].append(donor_md_path) - - if self.includes_map_anchors: - for anchor in donor_anchors: - if not 'anchors' in self.includes_map[i]: - self.includes_map[i]['anchors'] = [] - self.includes_map[i]['anchors'].append(anchor) + if recipient_md_path in self.chapters: + if not self._exist_in_includes_map(self.includes_map, recipient_md_path): + if not self.includes_map_anchors or len(donor_anchors) == 0: + self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + else: + self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) + + for i, f in enumerate(self.includes_map): + if f['file'] == recipient_md_path: + self.includes_map[i]['includes'].append(donor_md_path) + + if self.includes_map_anchors: + for anchor in donor_anchors: + if not 'anchors' in self.includes_map[i]: + self.includes_map[i]['anchors'] = [] + self.includes_map[i]['anchors'].append(anchor) else: processed_content_part = content_part From c408fc8298d7c6c4aee10d5805142199e690bafb Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Wed, 27 Nov 2024 15:00:05 +0300 Subject: [PATCH 28/41] fix: clean_tokens bug --- foliant/preprocessors/includes.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index df7dd71..129a26f 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -55,6 +55,7 @@ def __init__(self, *args, **kwargs): if self.includes_map_enable: self.includes_map = [] + self.enable_clean_tokens = True self.chapters = [] self.chapters_list(self.config["chapters"], self.chapters) # converting chapters to a list @@ -945,7 +946,13 @@ def _add_anchors(self, l: list, content: str) -> list: return l def clean_tokens(self, url: str) -> str: - return re.sub(r"(https*://)(.*)@(.*)", r"\1\3", url) + if self.enable_clean_tokens: + try: + s = re.sub(r"(https*://)(.*)@(.*)", r"\1\3", url) + except: + s = url + + return s def process_includes( self, From 5e62479267a0250a19537104fbaf02b12210ff50 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 12 Dec 2024 19:21:33 +0300 Subject: [PATCH 29/41] fix: for index.md --- foliant/preprocessors/includes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 129a26f..4ca39a6 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1322,7 +1322,7 @@ def process_includes( if self.includes_map_enable: if donor_md_path: - if recipient_md_path in self.chapters: + if recipient_md_path in self.chapters or if "index.md" in recipient_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): if not self.includes_map_anchors or len(donor_anchors) == 0: self.includes_map.append({ 'file': recipient_md_path, "includes": []}) From d84217db6b97d21588dcc62d74c5ae99e211b6e1 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 12 Dec 2024 20:57:12 +0300 Subject: [PATCH 30/41] fix --- .gitignore | 1 + foliant/preprocessors/includes.py | 4 ++-- test_in_docker.sh | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100755 test_in_docker.sh diff --git a/.gitignore b/.gitignore index 7bbc71c..b4cf3c6 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.includescache # Translations *.mo diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 4ca39a6..c153e3c 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -50,7 +50,7 @@ def __init__(self, *args, **kwargs): self.includes_map_anchors = True # TODO:set the default value to False if 'includes_map' in self.options: self.includes_map_enable = True - if 'anchors' in self.options['includes_map']: + if type(self.options['includes_map']) != bool and 'anchors' in self.options['includes_map']: self.includes_map_anchors = True if self.includes_map_enable: @@ -1322,7 +1322,7 @@ def process_includes( if self.includes_map_enable: if donor_md_path: - if recipient_md_path in self.chapters or if "index.md" in recipient_md_path: + if recipient_md_path in self.chapters or "index.md" in recipient_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): if not self.includes_map_anchors or len(donor_anchors) == 0: self.includes_map.append({ 'file': recipient_md_path, "includes": []}) diff --git a/test_in_docker.sh b/test_in_docker.sh new file mode 100755 index 0000000..66dc98b --- /dev/null +++ b/test_in_docker.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +docker run --rm -it \ + -v "./:/app/" \ + --workdir "/app/" \ + python:3.9 "/app/test.sh" \ No newline at end of file From 5fe9c0ef672f9aae1b425bf5e2c1864033ad5719 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 21 Feb 2025 16:08:37 +0300 Subject: [PATCH 31/41] add: adjust links for included content --- foliant/preprocessors/includes.py | 97 ++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 21 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index c153e3c..42349f7 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -33,6 +33,7 @@ class Preprocessor(BasePreprocessor): ) _image_pattern = re.compile(r'\!\[(?P.*?)\]\((?P((?!:\/\/).)+?)\)') + _link_pattern = re.compile(r'\[(?P.*?)\]\((?P((?!:\/\/).)+?)\)') _tag_body_pattern = re.compile( r'(\$(?P[^\#^\$]+)(\#(?P[^\$]+))?\$)?' + @@ -45,7 +46,9 @@ def __init__(self, *args, **kwargs): self._cache_dir_path = self.project_path / self.options['cache_dir'] self._downloaded_dir_path = self._cache_dir_path / '_downloaded_content' - self.src_dir = self.config.get("src_dir") + self.src_dir = self.config.get('src_dir') + self.tmp_dir = self.config.get('tmp_dir', '__folianttmp__') + self.includes_map_enable = True # TODO:set the default value to False self.includes_map_anchors = True # TODO:set the default value to False if 'includes_map' in self.options: @@ -78,7 +81,7 @@ def chapters_list(self, obj, chapters: list) -> list: elif isinstance(obj, Path): chapters.append(f"{self.src_dir}/{obj.as_posix()}") elif isinstance(obj, object): - for k, v in obj.items(): + for _, v in obj.items(): if isinstance(v, str): chapters.append(f"{self.src_dir}/{v}") else: @@ -180,8 +183,8 @@ def _download_file_from_url(self, url: str) -> Path: # The beginning of the block codes for converting relative paths to links dict_new_link = {} - regexp_find_link = re.compile('\[.+?\]\(.+?\)') - regexp_find_path = re.compile('\(.+?\)') + regexp_find_link = re.compile(r'\[.+?\]\(.+?\)') + regexp_find_path = re.compile(r'\(.+?\)') old_found_link = regexp_find_link.findall(downloaded_content) @@ -587,6 +590,52 @@ def _sub(image): return self._image_pattern.sub(_sub, content) + def _adjust_links( + self, + content: str, + markdown_file_path: Path, + origin_file_path: Path + ) -> str: + '''Locate internal link referenced in a Markdown string and replace their paths + with the relative ones. + + :param content: Markdown content + :param markdown_file_path: Path to the Markdown file containing the content + + :returns: Markdown content with relative internal link paths + ''' + + def _sub(m): + caption = m.group('text') + link = m.group('path') + anchor = '' + + link_array = m.group('path').split('#') + if len(link_array) > 1: + link = link_array[0] + anchor = f'#{link_array[1]}' + root_path = self.project_path.absolute() / self.tmp_dir + + if Path(link).is_absolute() is False: + try: + origin_root = origin_file_path.relative_to(root_path) + depth_origin = len(origin_root.parts) + link = (markdown_file_path.absolute().parent / Path(link)).resolve() + link = link.relative_to(root_path) + link = '../' * depth_origin + link.as_posix() + self.logger.debug( + f'Updating link reference; user specified path: {m.group("path")}, ' + + f'absolute path: {link}' + ) + except Exception as exception: + self.logger.debug( + f'An error {exception} occurred when resolving the link: {m.group("path")}' + ) + link = m.group('path') + return f'[{caption}]({link}{anchor})' + + return self._link_pattern.sub(_sub, content) + def _adjust_paths_in_tags_attributes( self, content: str, @@ -736,7 +785,8 @@ def _process_include( to_end: bool = False, sethead: int or None = None, nohead: bool = False, - include_link: str or None = None + include_link: str or None = None, + origin_file_path: Path = None ) -> (str, list): '''Replace a local include statement with the file content. Necessary adjustments are applied to the content: cut between certain headings, @@ -764,10 +814,7 @@ def _process_include( anchors = [] - - if included_file_path.exists(): - included_file_path = included_file_path - else: + if not included_file_path.exists(): if self.options['allow_failure']: self.logger.error(f'The url or repo_url link is not correct, file not found: {included_file_path}') @@ -853,6 +900,7 @@ def _process_include( ).escape(included_content) included_content = self._adjust_image_paths(included_content, included_file_path) + included_content = self._adjust_links(included_content, included_file_path, origin_file_path) if project_root_path: included_content = self._adjust_paths_in_tags_attributes( @@ -898,7 +946,7 @@ def _prepare_path_for_includes_map(self, path: Path) -> str: donor_path = _path.as_posix() return donor_path - def _exist_in_includes_map(self, map: list, path: str) -> bool: + def _exist_in_includes_map(self, includes_map: list, path: str) -> bool: """Is there a path on the includes map :param map: Includes map @@ -906,7 +954,7 @@ def _exist_in_includes_map(self, map: list, path: str) -> bool: :returns: True or False """ - for obj in map: + for obj in includes_map: if obj["file"] == path: return True return False @@ -946,11 +994,11 @@ def _add_anchors(self, l: list, content: str) -> list: return l def clean_tokens(self, url: str) -> str: + token_pattern = r"(https*://)(.*)@(.*)" + s = url if self.enable_clean_tokens: - try: - s = re.sub(r"(https*://)(.*)@(.*)", r"\1\3", url) - except: - s = url + if re.search(str(token_pattern), str(url)): + s = re.sub(str(token_pattern), r"\1\3", str(url)) return s @@ -1110,7 +1158,8 @@ def process_includes( from_heading=body.group('from_heading'), to_heading=body.group('to_heading'), sethead=current_sethead, - nohead=options.get('nohead') + nohead=options.get('nohead'), + origin_file_path=markdown_file_path ) if self.includes_map_enable and self.includes_map_anchors: @@ -1141,7 +1190,8 @@ def process_includes( from_heading=body.group('from_heading'), to_heading=body.group('to_heading'), sethead=current_sethead, - nohead=options.get('nohead') + nohead=options.get('nohead'), + origin_file_path=markdown_file_path ) if self.includes_map_enable: @@ -1185,7 +1235,8 @@ def process_includes( to_end=options.get('to_end'), sethead=current_sethead, nohead=options.get('nohead'), - include_link=include_link + include_link=include_link, + origin_file_path=markdown_file_path ) if self.includes_map_enable: @@ -1219,7 +1270,8 @@ def process_includes( to_id=options.get('to_id'), to_end=options.get('to_end'), sethead=current_sethead, - nohead=options.get('nohead') + nohead=options.get('nohead'), + origin_file_path=markdown_file_path ) if self.includes_map_enable: @@ -1252,7 +1304,8 @@ def process_includes( to_id=options.get('to_id'), to_end=options.get('to_end'), sethead=current_sethead, - nohead=options.get('nohead') + nohead=options.get('nohead'), + origin_file_path=markdown_file_path ) if self.includes_map_enable: @@ -1283,6 +1336,7 @@ def process_includes( wrap_code = options.get('wrap_code', '') if wrap_code == 'triple_backticks' or wrap_code == 'triple_tildas': + wrapper = '' if wrap_code == 'triple_backticks': self.logger.debug('Wrapping included content as fence code block with triple backticks') @@ -1337,7 +1391,8 @@ def process_includes( for anchor in donor_anchors: if not 'anchors' in self.includes_map[i]: self.includes_map[i]['anchors'] = [] - self.includes_map[i]['anchors'].append(anchor) + if anchor not in self.includes_map[i]['anchors']: + self.includes_map[i]['anchors'].append(anchor) else: processed_content_part = content_part From 55ec7789f051ac5f6f633682448b98f9e0fe6708 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Fri, 21 Feb 2025 16:53:01 +0300 Subject: [PATCH 32/41] add: tests for adjust_links --- .gitignore | 1 + foliant/preprocessors/includes.py | 4 ++-- test/test_includes.py | 40 +++++++++++++++++++++++++++++-- test_in_docker.sh | 4 ++-- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b4cf3c6..b75d941 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ coverage.xml *.cover .hypothesis/ .includescache +.error_link # Translations *.mo diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 42349f7..c2e12ef 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -49,8 +49,8 @@ def __init__(self, *args, **kwargs): self.src_dir = self.config.get('src_dir') self.tmp_dir = self.config.get('tmp_dir', '__folianttmp__') - self.includes_map_enable = True # TODO:set the default value to False - self.includes_map_anchors = True # TODO:set the default value to False + self.includes_map_enable = False + self.includes_map_anchors = False if 'includes_map' in self.options: self.includes_map_enable = True if type(self.options['includes_map']) != bool and 'anchors' in self.options['includes_map']: diff --git a/test/test_includes.py b/test/test_includes.py index 049de10..dc07674 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -29,7 +29,7 @@ def test_src(self): input_mapping=input_map, expected_mapping=expected_map, ) - + def test_url(self): input_map = { 'index.md': '# My title\n\n', @@ -53,7 +53,7 @@ def test_repo_path(self): input_mapping=input_map, expected_mapping=expected_map, ) - + def test_include_link(self): input_map = { 'index.md': '# My title\n\n', @@ -261,3 +261,39 @@ def test_includes_map(self): input_mapping=input_map, expected_mapping=expected_map, ) + + def test_adjust_link(self): + input_map = { + 'sub/file_a.md': '# Title file_a\n\n', + 'sub/file_b.md': 'Included [file_c link](./file_c.md#anchor)', + 'sub/file_c.md': '# Included content \n\n## Header with anchor {#anchor}', + 'file_d.md': '# Title file_d\n\n' + } + expected_map = { + 'sub/file_a.md': '# Title file_a\n\nIncluded [file_c link](../../sub/file_c.md#anchor)', + 'sub/file_b.md': 'Included [file_c link](./file_c.md#anchor)', + 'sub/file_c.md': '# Included content \n\n## Header with anchor {#anchor}', + 'file_d.md': '# Title file_d\n\nIncluded [file_c link](../sub/file_c.md#anchor)' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) + + def test_adjust_link_two(self): + input_map = { + 'file_a.md': '# Title file_a\n\n', + 'file_b.md': 'Included [file_c link](./file_c.md#anchor)', + 'file_c.md': '# Included content \n\n## Header with anchor {#anchor}', + 'sub/file_d.md': '# Title file_d\n\n' + } + expected_map = { + 'file_a.md': '# Title file_a\n\nIncluded [file_c link](../file_c.md#anchor)', + 'file_b.md': 'Included [file_c link](./file_c.md#anchor)', + 'file_c.md': '# Included content \n\n## Header with anchor {#anchor}', + 'sub/file_d.md': '# Title file_d\n\nIncluded [file_c link](../../file_c.md#anchor)' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) diff --git a/test_in_docker.sh b/test_in_docker.sh index 66dc98b..c8bd732 100755 --- a/test_in_docker.sh +++ b/test_in_docker.sh @@ -2,5 +2,5 @@ docker run --rm -it \ -v "./:/app/" \ - --workdir "/app/" \ - python:3.9 "/app/test.sh" \ No newline at end of file + -w "/app/" \ + python:3.9 "/app/test.sh" From b76f3e39009ea6c02177173edd71eda1883d6e1f Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 25 Feb 2025 17:30:05 +0300 Subject: [PATCH 33/41] update: adjust_links --- foliant/preprocessors/includes.py | 51 ++++++++++++++++++++----------- test/test_includes.py | 22 +++++++++++-- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 15568ec..a33612f 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -609,29 +609,46 @@ def _sub(m): caption = m.group('text') link = m.group('path') anchor = '' - link_array = m.group('path').split('#') if len(link_array) > 1: link = link_array[0] anchor = f'#{link_array[1]}' root_path = self.project_path.absolute() / self.tmp_dir - if Path(link).is_absolute() is False: - try: - origin_root = origin_file_path.relative_to(root_path) - depth_origin = len(origin_root.parts) - link = (markdown_file_path.absolute().parent / Path(link)).resolve() - link = link.relative_to(root_path) - link = '../' * depth_origin + link.as_posix() - self.logger.debug( - f'Updating link reference; user specified path: {m.group("path")}, ' + - f'absolute path: {link}' - ) - except Exception as exception: - self.logger.debug( - f'An error {exception} occurred when resolving the link: {m.group("path")}' - ) - link = m.group('path') + extension = Path(link).suffix + if extension == ".md": + try: + origin_rel = origin_file_path.relative_to(root_path) + depth_origin = len(origin_rel.parts) + link = (markdown_file_path.absolute().parent / Path(link)).resolve() + link = link.relative_to(root_path) + link = '../' * depth_origin + link.as_posix() + self.logger.debug( + f'Updating link reference; user specified path: {m.group("path")}, ' + + f'absolute path: {link}' + ) + except Exception as exception: + self.logger.debug( + f'An error {exception} occurred when resolving the link: {m.group("path")}' + ) + link = m.group('path') + elif extension == "": + try: + origin_rel = origin_file_path.relative_to(root_path) + depth_origin = len(origin_rel.parts) + depth_markdown_file = len(markdown_file_path.relative_to(root_path).parts) + if depth_origin > depth_markdown_file: + link = '../' * (depth_origin - depth_markdown_file) + link + self.logger.debug( + f'Updating link reference; user specified path: {m.group("path")}, ' + + f'absolute path: {link}' + ) + except Exception as exception: + self.logger.debug( + f'An error {exception} occurred when resolving the link: {m.group("path")}' + ) + link = m.group('path') + return f'[{caption}]({link}{anchor})' return self._link_pattern.sub(_sub, content) diff --git a/test/test_includes.py b/test/test_includes.py index dc07674..0d48c43 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -262,7 +262,7 @@ def test_includes_map(self): expected_mapping=expected_map, ) - def test_adjust_link(self): + def test_adjust_links_with_md(self): input_map = { 'sub/file_a.md': '# Title file_a\n\n', 'sub/file_b.md': 'Included [file_c link](./file_c.md#anchor)', @@ -280,7 +280,7 @@ def test_adjust_link(self): expected_mapping=expected_map, ) - def test_adjust_link_two(self): + def test_adjust_links_with_md_two(self): input_map = { 'file_a.md': '# Title file_a\n\n', 'file_b.md': 'Included [file_c link](./file_c.md#anchor)', @@ -297,3 +297,21 @@ def test_adjust_link_two(self): input_mapping=input_map, expected_mapping=expected_map, ) + + def test_adjust_links(self): + input_map = { + 'file_a.md': '# Title file_a\n\n', + 'file_b.md': 'Included [file_c link](../file_c/)', + 'file_c.md': '# Included content \n\n## Header', + 'sub/file_d.md': '# Title file_d\n\n' + } + expected_map = { + 'file_a.md': '# Title file_a\n\nIncluded [file_c link](../file_c/)', + 'file_b.md': 'Included [file_c link](../file_c/)', + 'file_c.md': '# Included content \n\n## Header', + 'sub/file_d.md': '# Title file_d\n\nIncluded [file_c link](../../file_c/)' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) From e89f2728ad5c431195ab388674f39ae01977261f Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Thu, 27 Feb 2025 11:40:31 +0300 Subject: [PATCH 34/41] add: resolving links as links to md files --- foliant/preprocessors/includes.py | 65 +++++++++++++++++-------------- test/test_includes.py | 35 +++++++++++++++++ 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index a33612f..d963441 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -604,6 +604,16 @@ def _adjust_links( :returns: Markdown content with relative internal link paths ''' + def _resolve_md_link(link, root_path, depth_origin): + try: + resolved_link = (markdown_file_path.absolute().parent / Path(link)).resolve() + resolved_link = resolved_link.relative_to(root_path) + resolved_link = '../' * depth_origin + resolved_link.as_posix() + return resolved_link + except Exception as exception: + self.logger.debug( + f'An error {exception} occurred when resolving the link: {link}' + ) def _sub(m): caption = m.group('text') @@ -616,38 +626,33 @@ def _sub(m): root_path = self.project_path.absolute() / self.tmp_dir if Path(link).is_absolute() is False: extension = Path(link).suffix - if extension == ".md": - try: - origin_rel = origin_file_path.relative_to(root_path) - depth_origin = len(origin_rel.parts) - link = (markdown_file_path.absolute().parent / Path(link)).resolve() - link = link.relative_to(root_path) - link = '../' * depth_origin + link.as_posix() - self.logger.debug( - f'Updating link reference; user specified path: {m.group("path")}, ' + - f'absolute path: {link}' - ) - except Exception as exception: - self.logger.debug( - f'An error {exception} occurred when resolving the link: {m.group("path")}' - ) - link = m.group('path') - elif extension == "": - try: - origin_rel = origin_file_path.relative_to(root_path) - depth_origin = len(origin_rel.parts) + try: + origin_rel = origin_file_path.relative_to(root_path) + depth_origin = len(origin_rel.parts) + if extension == ".md": + link = _resolve_md_link(link, root_path, depth_origin) + elif extension == "": depth_markdown_file = len(markdown_file_path.relative_to(root_path).parts) - if depth_origin > depth_markdown_file: + if depth_origin >= depth_markdown_file: link = '../' * (depth_origin - depth_markdown_file) + link - self.logger.debug( - f'Updating link reference; user specified path: {m.group("path")}, ' + - f'absolute path: {link}' - ) - except Exception as exception: - self.logger.debug( - f'An error {exception} occurred when resolving the link: {m.group("path")}' - ) - link = m.group('path') + else: + link_split = link.split('/') + if link_split[0] == '..': + if link_split[-1] == '': + link_split = link_split[:-1] + link_split = link_split[1:] + link = f"{'/'.join(link_split)}.md" + link = _resolve_md_link(link, root_path, depth_origin) + print(link) + self.logger.debug( + f'Updating link reference; user specified path: {m.group("path")}, ' + + f'absolute path: {link}' + ) + except Exception as exception: + self.logger.debug( + f'An error {exception} occurred when resolving the link: {m.group("path")}' + ) + link = m.group('path') return f'[{caption}]({link}{anchor})' diff --git a/test/test_includes.py b/test/test_includes.py index 0d48c43..40435e8 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -315,3 +315,38 @@ def test_adjust_links(self): input_mapping=input_map, expected_mapping=expected_map, ) + + + def test_adjust_links_two(self): + input_map = { + 'sub/file_a.md': '# Title file_a\n\n', + 'sub/file_b.md': 'Included [file_c link](../file_c/)', + 'file_d.md': '# Title file_d\n\n' + } + expected_map = { + 'sub/file_a.md': '# Title file_a\n\nIncluded [file_c link](../file_c/)', + 'sub/file_b.md': 'Included [file_c link](../file_c/)', + 'file_d.md': '# Title file_d\n\nIncluded [file_c link](../sub/file_c.md)' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) + + def test_adjust_links_three(self): + input_map = { + 'sub/file_a.md': '# Title file_a\n\n', + 'sub/file_b.md': 'Included [file_c link](../file_c)', + 'sub/file_c.md': '# Included content \n\n## Header', + 'file_d.md': '# Title file_d\n\n' + } + expected_map = { + 'sub/file_a.md': '# Title file_a\n\nIncluded [file_c link](../file_c)', + 'sub/file_b.md': 'Included [file_c link](../file_c)', + 'sub/file_c.md': '# Included content \n\n## Header', + 'file_d.md': '# Title file_d\n\nIncluded [file_c link](../sub/file_c.md)' + } + self.ptf.test_preprocessor( + input_mapping=input_map, + expected_mapping=expected_map, + ) From c0a7570c394f9e60fbf5c68d0a5f644b36269d68 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 24 Mar 2025 10:25:49 +0300 Subject: [PATCH 35/41] fix: unnecessary code removed --- foliant/preprocessors/includes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index d963441..025a6aa 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -643,7 +643,6 @@ def _sub(m): link_split = link_split[1:] link = f"{'/'.join(link_split)}.md" link = _resolve_md_link(link, root_path, depth_origin) - print(link) self.logger.debug( f'Updating link reference; user specified path: {m.group("path")}, ' + f'absolute path: {link}' From adc17beca4c35dd98ab684492dd938eb5ee56b2e Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 13 May 2025 09:55:20 +0300 Subject: [PATCH 36/41] fix: resolve md links --- foliant/preprocessors/includes.py | 6 +++--- test/test_includes.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 025a6aa..dd31c4e 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -604,7 +604,7 @@ def _adjust_links( :returns: Markdown content with relative internal link paths ''' - def _resolve_md_link(link, root_path, depth_origin): + def _resolve_link(link, root_path, depth_origin): try: resolved_link = (markdown_file_path.absolute().parent / Path(link)).resolve() resolved_link = resolved_link.relative_to(root_path) @@ -630,7 +630,7 @@ def _sub(m): origin_rel = origin_file_path.relative_to(root_path) depth_origin = len(origin_rel.parts) if extension == ".md": - link = _resolve_md_link(link, root_path, depth_origin) + link = _resolve_link(link, root_path, depth_origin - 1) elif extension == "": depth_markdown_file = len(markdown_file_path.relative_to(root_path).parts) if depth_origin >= depth_markdown_file: @@ -642,7 +642,7 @@ def _sub(m): link_split = link_split[:-1] link_split = link_split[1:] link = f"{'/'.join(link_split)}.md" - link = _resolve_md_link(link, root_path, depth_origin) + link = _resolve_link(link, root_path, depth_origin) self.logger.debug( f'Updating link reference; user specified path: {m.group("path")}, ' + f'absolute path: {link}' diff --git a/test/test_includes.py b/test/test_includes.py index 40435e8..c52d8c7 100644 --- a/test/test_includes.py +++ b/test/test_includes.py @@ -270,10 +270,10 @@ def test_adjust_links_with_md(self): 'file_d.md': '# Title file_d\n\n' } expected_map = { - 'sub/file_a.md': '# Title file_a\n\nIncluded [file_c link](../../sub/file_c.md#anchor)', + 'sub/file_a.md': '# Title file_a\n\nIncluded [file_c link](../sub/file_c.md#anchor)', 'sub/file_b.md': 'Included [file_c link](./file_c.md#anchor)', 'sub/file_c.md': '# Included content \n\n## Header with anchor {#anchor}', - 'file_d.md': '# Title file_d\n\nIncluded [file_c link](../sub/file_c.md#anchor)' + 'file_d.md': '# Title file_d\n\nIncluded [file_c link](sub/file_c.md#anchor)' } self.ptf.test_preprocessor( input_mapping=input_map, @@ -288,10 +288,10 @@ def test_adjust_links_with_md_two(self): 'sub/file_d.md': '# Title file_d\n\n' } expected_map = { - 'file_a.md': '# Title file_a\n\nIncluded [file_c link](../file_c.md#anchor)', + 'file_a.md': '# Title file_a\n\nIncluded [file_c link](file_c.md#anchor)', 'file_b.md': 'Included [file_c link](./file_c.md#anchor)', 'file_c.md': '# Included content \n\n## Header with anchor {#anchor}', - 'sub/file_d.md': '# Title file_d\n\nIncluded [file_c link](../../file_c.md#anchor)' + 'sub/file_d.md': '# Title file_d\n\nIncluded [file_c link](../file_c.md#anchor)' } self.ptf.test_preprocessor( input_mapping=input_map, From 66f8de2880734eacfab447b08e704c41899d1a86 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 21 Jul 2025 13:24:14 +0300 Subject: [PATCH 37/41] fix: anchor in custom_id --- foliant/preprocessors/includes.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index dd31c4e..b27eff4 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1045,27 +1045,6 @@ def _exist_in_includes_map(self, map: list, path: str) -> bool: return True return False - def _find_anchors(self, content: str) -> list: - anchors_list = [] - - anchors = re.findall(r'\([\-\_A-Za-z0-9]+)\<\/anchor\>', content) - for anchor in anchors: - anchors_list.append(anchor) - custom_ids = re.findall(r'\{\#([\-A-Za-z0-9]+)\}', content) - for anchor in custom_ids: - anchors_list.append(anchor) - elements_with_ids = re.findall(r'id\=[\"\']([\-A-Za-z0-9]+)[\"\']', content) - for anchor in elements_with_ids: - anchors_list.append(anchor) - return anchors_list - - def _add_anchors(self, l: list, content: str) -> list: - anchors = self._find_anchors(content) - if len(anchors) > 0: - for anchor in anchors: - l.append(anchor) - return l - def process_includes( self, markdown_file_path: Path, From 40dac624f861621ce92f374c7a1922cc5cc3f00f Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 28 Jul 2025 15:59:23 +0300 Subject: [PATCH 38/41] add: resolve link to same file --- foliant/preprocessors/includes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index b27eff4..28c5f1b 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -643,6 +643,12 @@ def _sub(m): link_split = link_split[1:] link = f"{'/'.join(link_split)}.md" link = _resolve_link(link, root_path, depth_origin) + if ( + (depth_origin - depth_markdown_file) == 0 + )and ( + Path(Path(link).name).with_suffix('').as_posix() == Path(origin_rel.name).with_suffix('').as_posix() + ): + link = '' self.logger.debug( f'Updating link reference; user specified path: {m.group("path")}, ' + f'absolute path: {link}' From 96b5bdc8719883b795b1a1fb9618ecab3e092b06 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 29 Jul 2025 09:14:32 +0300 Subject: [PATCH 39/41] fix: tests --- foliant/preprocessors/includes.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 28c5f1b..009abe2 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -629,12 +629,13 @@ def _sub(m): try: origin_rel = origin_file_path.relative_to(root_path) depth_origin = len(origin_rel.parts) + depth_markdown_file = len(markdown_file_path.relative_to(root_path).parts) + depth_difference = depth_origin - depth_markdown_file if extension == ".md": link = _resolve_link(link, root_path, depth_origin - 1) elif extension == "": - depth_markdown_file = len(markdown_file_path.relative_to(root_path).parts) if depth_origin >= depth_markdown_file: - link = '../' * (depth_origin - depth_markdown_file) + link + link = '../' * depth_difference + link else: link_split = link.split('/') if link_split[0] == '..': @@ -644,9 +645,9 @@ def _sub(m): link = f"{'/'.join(link_split)}.md" link = _resolve_link(link, root_path, depth_origin) if ( - (depth_origin - depth_markdown_file) == 0 - )and ( - Path(Path(link).name).with_suffix('').as_posix() == Path(origin_rel.name).with_suffix('').as_posix() + depth_difference == 0 + ) and ( + Path(Path(link).name).with_suffix('').as_posix() == Path(origin_rel.name).with_suffix('').as_posix() ): link = '' self.logger.debug( From 887e205fda4c3674d7f868d25212268b89f95853 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Mon, 15 Sep 2025 17:47:55 +0300 Subject: [PATCH 40/41] fix: mistakes --- foliant/preprocessors/includes.py | 83 ++++++++----------------------- 1 file changed, 22 insertions(+), 61 deletions(-) diff --git a/foliant/preprocessors/includes.py b/foliant/preprocessors/includes.py index 009abe2..2bcd18c 100644 --- a/foliant/preprocessors/includes.py +++ b/foliant/preprocessors/includes.py @@ -1,5 +1,6 @@ import re -import urllib +import urllib.request +import urllib.error from shutil import rmtree from io import StringIO from hashlib import md5 @@ -204,7 +205,6 @@ def _download_file_from_url(self, url: str) -> Path: # End of the conversion code block with open(downloaded_file_path, 'w', encoding='utf8') as downloaded_file: - downloaded_file.write(downloaded_content) else: self.logger.debug('File found in cache, it was already downloaded at this run') @@ -624,7 +624,7 @@ def _sub(m): link = link_array[0] anchor = f'#{link_array[1]}' root_path = self.project_path.absolute() / self.tmp_dir - if Path(link).is_absolute() is False: + if not Path(link).is_absolute(): extension = Path(link).suffix try: origin_rel = origin_file_path.relative_to(root_path) @@ -774,7 +774,6 @@ def _get_included_file_path( included_file_path = (current_processed_file_path.parent / user_specified_path).resolve() - self.logger.debug(f'User-specified included file path: {included_file_path}') if ( @@ -851,15 +850,16 @@ def _process_include( if not Path(path_error_link).exists(): Path(path_error_link).mkdir() - path_error_file = open(path_error_link/included_file_path.name, 'w+') + path_error_file = open(path_error_link/included_file_path.name, 'w+', encoding='utf8') if self.options['stub_text']: path_error_file.write(f'The url or repo_url link is not correct, file not found: {included_file_path}') path_error_file.close() - included_file_path=path_error_link/included_file_path.name + included_file_path = path_error_link/included_file_path.name else: - self.logger.error(f'The url or repo_url link is not correct, file not found: {included_file_path}') + self.logger.error(f'The url or repo_url link is not correct, file not found: {included_file_path}') + return '', anchors with open(included_file_path, encoding='utf8') as included_file: included_content = included_file.read() @@ -867,8 +867,8 @@ def _process_include( # The beginning of the block codes for converting relative paths to links if include_link: dict_new_link = {} - regexp_find_link = re.compile('\[.+?\]\(.+?\)') - regexp_find_path = re.compile('\(.+?\)') + regexp_find_link = re.compile(r'\[.+?\]\(.+?\)') + regexp_find_path = re.compile(r'\(.+?\)') old_found_link = regexp_find_link.findall(included_content) @@ -887,10 +887,9 @@ def _process_include( for line in dict_new_link: included_content = included_content.replace(line, dict_new_link[line]) # End of the conversion code block - # Removing metadata from content before including + # Removing metadata from content before including included_content = remove_meta(included_content) - included_content = self._cut_from_position_to_position( included_content, from_heading, @@ -909,7 +908,6 @@ def _process_include( if self.config.get('escape_code', False): if isinstance(self.config['escape_code'], dict): escapecode_options = self.config['escape_code'].get('options', {}) - else: escapecode_options = {} @@ -950,42 +948,6 @@ def _process_include( return included_content, anchors - def _prepare_path_for_includes_map(self, path: Path) -> str: - """Preparing the path of the inserted file for the includes map - - :param path: The path to the Markdown file to be inserted - - :returns: The path that will be used in the includes map - """ - donor_path = None - if path.as_posix().startswith(self.working_dir.as_posix()): - _path = path.relative_to(self.working_dir) - donor_path = f"{self.src_dir}/{_path.as_posix()}" - elif path.as_posix().startswith(getcwd()): - _path = path.relative_to(getcwd()) - if _path.as_posix().startswith(self.working_dir.as_posix()): - _path = _path.relative_to(self.working_dir) - if _path.as_posix().startswith(self.working_dir.as_posix()): - donor_path = f"{self.src_dir}/{_path.relative_to(self.working_dir).as_posix()}" - else: - donor_path = f"{self.src_dir}/{_path.as_posix()}" - else: - donor_path = _path.as_posix() - return donor_path - - def _exist_in_includes_map(self, includes_map: list, path: str) -> bool: - """Is there a path on the includes map - - :param map: Includes map - :param path: Path - - :returns: True or False - """ - for obj in includes_map: - if obj["file"] == path: - return True - return False - def _find_anchors(self, content: str) -> list: """Search for anchor links in the text @@ -1015,18 +977,16 @@ def _add_anchors(self, l: list, content: str) -> list: :returns: A list with added anchors """ anchors = self._find_anchors(content) - if len(anchors) > 0: - for anchor in anchors: - l.append(anchor) + if anchors: + l.extend(anchors) return l def clean_tokens(self, url: str) -> str: token_pattern = r"(https*://)(.*)@(.*)" s = url if self.enable_clean_tokens: - if re.search(str(token_pattern), str(url)): - s = re.sub(str(token_pattern), r"\1\3", str(url)) - + if re.search(token_pattern, str(url)): + s = re.sub(token_pattern, r"\1\3", str(url)) return s def _prepare_path_for_includes_map(self, path: Path) -> str: @@ -1046,8 +1006,8 @@ def _prepare_path_for_includes_map(self, path: Path) -> str: donor_path = _path.as_posix() return donor_path - def _exist_in_includes_map(self, map: list, path: str) -> bool: - for obj in map: + def _exist_in_includes_map(self, includes_map: list, path: str) -> bool: + for obj in includes_map: if obj["file"] == path: return True return False @@ -1430,18 +1390,18 @@ def process_includes( if recipient_md_path in self.chapters or "index.md" in recipient_md_path: if not self._exist_in_includes_map(self.includes_map, recipient_md_path): if not self.includes_map_anchors or len(donor_anchors) == 0: - self.includes_map.append({ 'file': recipient_md_path, "includes": []}) + self.includes_map.append({'file': recipient_md_path, "includes": []}) else: - self.includes_map.append({ 'file': recipient_md_path, "includes": [], 'anchors': []}) + self.includes_map.append({'file': recipient_md_path, "includes": [], 'anchors': []}) for i, f in enumerate(self.includes_map): if f['file'] == recipient_md_path: self.includes_map[i]['includes'].append(donor_md_path) if self.includes_map_anchors: + if 'anchors' not in self.includes_map[i]: + self.includes_map[i]['anchors'] = [] for anchor in donor_anchors: - if not 'anchors' in self.includes_map[i]: - self.includes_map[i]['anchors'] = [] if anchor not in self.includes_map[i]['anchors']: self.includes_map[i]['anchors'].append(anchor) @@ -1484,7 +1444,8 @@ def apply(self): self.logger.info('Applying preprocessor') # Cleaning up downloads because the content of remote source may have modified - rmtree(self._downloaded_dir_path, ignore_errors=True) + if self._downloaded_dir_path.exists(): + rmtree(self._downloaded_dir_path, ignore_errors=True) source_files_extensions = self._get_source_files_extensions() From b95189f3a5c4f0f7756fba8012c3d38b14fad3a8 Mon Sep 17 00:00:00 2001 From: Timur Osmanov Date: Tue, 16 Sep 2025 12:05:23 +0300 Subject: [PATCH 41/41] update: changelog.md --- changelog.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/changelog.md b/changelog.md index f012ea8..02e7031 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +# 1.1.19 + +- Add: anchor link parsing for the includes map. +- Add: adjust of links in the included content depending on parent file. +- Fix: bugs. + # 1.1.18 - Add: option for generation of the includes map containing information about files inserted using the preprocessor.