From 935382017015f563c34d7f61abd3d474ab1fc4b7 Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Mon, 12 Dec 2022 22:22:13 -0800 Subject: [PATCH 01/11] Add sitemap_validator --- sphinx_sitemap/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index 42ba008..d4d17ad 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -13,9 +13,13 @@ import os import xml.etree.ElementTree as ET +import sphinx +from sphinx.util.logging import getLogger __version__ = "2.3.0" +logger = getLogger(__name__) + def setup(app): """Setup connects events to the sitemap builder""" @@ -24,7 +28,7 @@ def setup(app): "sitemap_url_scheme", default="{lang}{version}{link}", rebuild="" ) app.add_config_value("sitemap_locales", default=None, rebuild="") - + app.add_config_value("sitemap_validator", default={}, rebuild="") app.add_config_value("sitemap_filename", default="sitemap.xml", rebuild="") try: @@ -160,6 +164,18 @@ def create_sitemap(app, exception): ET.ElementTree(root).write( filename, xml_declaration=True, encoding="utf-8", method="xml" ) + + key = "{}{}".format(app.config.language or '', app.config.version or '') + if app.config.sitemap_validator and key in app.config.sitemap_validator: + with open(filename, "r") as myfile: + sitemap = myfile.read() + # if any of the urls don't match, throw a warning + for url in app.config.sitemap_validator[key]: + if url not in sitemap: + logger.warning( + 'Sitemap failed validation. {} not found in {}'.format(url, filename), + type='sitemap', subtype='validation') + print( "%s was generated for URL %s in %s" % (app.config.sitemap_filename, site_url, filename) From 9a2c498265450f0f6529cb3bca0a1c2683710e9f Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Mon, 12 Dec 2022 22:24:23 -0800 Subject: [PATCH 02/11] Fix typo --- sphinx_sitemap/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index d4d17ad..c15af9b 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -13,7 +13,6 @@ import os import xml.etree.ElementTree as ET -import sphinx from sphinx.util.logging import getLogger __version__ = "2.3.0" From 783342cc3891a49f314e2484ee1976c70389c84c Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Mon, 12 Dec 2022 22:25:52 -0800 Subject: [PATCH 03/11] Run through autoformatter --- sphinx_sitemap/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index c15af9b..21ef6c2 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -13,6 +13,7 @@ import os import xml.etree.ElementTree as ET + from sphinx.util.logging import getLogger __version__ = "2.3.0" @@ -164,16 +165,20 @@ def create_sitemap(app, exception): filename, xml_declaration=True, encoding="utf-8", method="xml" ) - key = "{}{}".format(app.config.language or '', app.config.version or '') + key = "{}{}".format(app.config.language or "", app.config.version or "") if app.config.sitemap_validator and key in app.config.sitemap_validator: with open(filename, "r") as myfile: - sitemap = myfile.read() + sitemap = myfile.read() # if any of the urls don't match, throw a warning for url in app.config.sitemap_validator[key]: if url not in sitemap: logger.warning( - 'Sitemap failed validation. {} not found in {}'.format(url, filename), - type='sitemap', subtype='validation') + "Sitemap failed validation. {} not found in {}".format( + url, filename + ), + type="sitemap", + subtype="validation", + ) print( "%s was generated for URL %s in %s" From 8ecfda4d43f9e29473300354f0f415476aa6fca6 Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Tue, 13 Dec 2022 19:00:11 -0800 Subject: [PATCH 04/11] Add rough draft of docs --- README.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.rst b/README.rst index cdca082..8ab8b47 100644 --- a/README.rst +++ b/README.rst @@ -65,6 +65,21 @@ Or for nested deployments, something like: appends trailing slashes to both the ``language`` and ``version`` values. You can also omit values from the scheme for desired behavior. +Setting up URL Validation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use `sitemap_validator` to setup URL validation, where a dictionary of lists is used to validate +one or more URLS for a given build based on the ``language`` and ``version``. + +For example: + +.. code-block:: python + + sitemap_validator = { + 'enlatest': ['https://example.com/en/latest/index.html', 'https://example.com/en/latest/test.html'], + 'delatest': ['https://example.com/de/latest/index.html', 'https://example.com/de/latest/test.html'] + } + Changing the Filename ^^^^^^^^^^^^^^^^^^^^^ From e924e56d0a9a98f05495711992544ac29ba26f6f Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Tue, 13 Dec 2022 19:09:09 -0800 Subject: [PATCH 05/11] Expand on docs a little --- README.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 8ab8b47..d522985 100644 --- a/README.rst +++ b/README.rst @@ -68,10 +68,11 @@ can also omit values from the scheme for desired behavior. Setting up URL Validation ^^^^^^^^^^^^^^^^^^^^^^^^^ -Use `sitemap_validator` to setup URL validation, where a dictionary of lists is used to validate -one or more URLS for a given build based on the ``language`` and ``version``. +Use ``sitemap_validator`` to setup URL validation, where a dictionary of lists is used to +validate one or more URLS for a given build. -For example: +The keys for the dictionary are a concatenation of the `language`_ and `version`_ config +values for that build. For example: .. code-block:: python From 58b10a6e745582a59606b1333a2c98f9a6bd918e Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Sat, 17 Dec 2022 18:27:34 -0800 Subject: [PATCH 06/11] Move to a definition --- sphinx_sitemap/__init__.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index 21ef6c2..76d322c 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -108,6 +108,29 @@ def add_html_link(app, pagename, templatename, context, doctree): app.sitemap_links.append(pagename + ".html") +def validate_sitemap(app, filename): + """ if sitemap_validator is set, then use to check that all of the given URLs + for the current language and version are included in the sitemap. """ + key = "{}{}".format(app.config.language or "", app.config.version or "") + key = key or "nil" + passed = True + if app.config.sitemap_validator and key in app.config.sitemap_validator: + with open(filename, "r") as myfile: + sitemap = myfile.read() + # if any of the urls don't match, throw a warning + for url in app.config.sitemap_validator[key]: + if url not in sitemap: + passed = False + logger.warning( + "Sitemap failed validation. {} not found in {}".format( + url, filename + ), + type="sitemap", + subtype="validation", + ) + return passed + + def create_sitemap(app, exception): """Generates the sitemap.xml from the collected HTML page links""" site_url = app.builder.config.site_url or app.builder.config.html_baseurl @@ -165,20 +188,7 @@ def create_sitemap(app, exception): filename, xml_declaration=True, encoding="utf-8", method="xml" ) - key = "{}{}".format(app.config.language or "", app.config.version or "") - if app.config.sitemap_validator and key in app.config.sitemap_validator: - with open(filename, "r") as myfile: - sitemap = myfile.read() - # if any of the urls don't match, throw a warning - for url in app.config.sitemap_validator[key]: - if url not in sitemap: - logger.warning( - "Sitemap failed validation. {} not found in {}".format( - url, filename - ), - type="sitemap", - subtype="validation", - ) + validate_sitemap(app, filename) print( "%s was generated for URL %s in %s" From 49765ad919da7c40872b7ba7ae657bc89f5481ed Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Sat, 17 Dec 2022 18:51:20 -0800 Subject: [PATCH 07/11] Add docs for sitemap_validator_required --- README.rst | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index d522985..2e0f33c 100644 --- a/README.rst +++ b/README.rst @@ -68,19 +68,37 @@ can also omit values from the scheme for desired behavior. Setting up URL Validation ^^^^^^^^^^^^^^^^^^^^^^^^^ -Use ``sitemap_validator`` to setup URL validation, where a dictionary of lists is used to +Use ``sitemap_validator_urls`` to setup URL validation, where a dictionary of lists is used to validate one or more URLS for a given build. The keys for the dictionary are a concatenation of the `language`_ and `version`_ config -values for that build. For example: +values for that build, where the string ``nil`` is set for the key if both the language and +version are not set. For example, to setup validation for multiple builds: .. code-block:: python - sitemap_validator = { - 'enlatest': ['https://example.com/en/latest/index.html', 'https://example.com/en/latest/test.html'], - 'delatest': ['https://example.com/de/latest/index.html', 'https://example.com/de/latest/test.html'] + sitemap_validator_urls = { + 'enlatest': ['https://my-site..com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + 'delatest': ['https://my-site..com/de/latest/index.html', 'https://my-site..com/de/latest/test.html'], } +or an example for a single build: + +.. code-block:: python + + sitemap_validator_urls = { + 'nil': ['https://my-site..com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + } + +For single builds, you can choose to set ``sitemap_validator_required`` to validate that the +`language`_ and `version`_ config values are concatenated as expected, where the string +``nil`` is used if both the language and version are not set. For example, if `language`_ is +set to ``en`` and `version`_ is set to ``latest``: + +.. code-block:: python + + sitemap_validator_required = 'enlatest' + Changing the Filename ^^^^^^^^^^^^^^^^^^^^^ From fa19c7183613bcf29b98eacde1805b3b3d9ad94f Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Sat, 17 Dec 2022 18:52:15 -0800 Subject: [PATCH 08/11] Fix typos --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 2e0f33c..34b1c5c 100644 --- a/README.rst +++ b/README.rst @@ -78,8 +78,8 @@ version are not set. For example, to setup validation for multiple builds: .. code-block:: python sitemap_validator_urls = { - 'enlatest': ['https://my-site..com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], - 'delatest': ['https://my-site..com/de/latest/index.html', 'https://my-site..com/de/latest/test.html'], + 'enlatest': ['https://my-site.com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + 'delatest': ['https://my-site.com/de/latest/index.html', 'https://my-site..com/de/latest/test.html'], } or an example for a single build: @@ -87,7 +87,7 @@ or an example for a single build: .. code-block:: python sitemap_validator_urls = { - 'nil': ['https://my-site..com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + 'nil': ['https://my-site.com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], } For single builds, you can choose to set ``sitemap_validator_required`` to validate that the From b6193421c04572d9f4f052c8d209321288330e2e Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Sat, 17 Dec 2022 19:00:35 -0800 Subject: [PATCH 09/11] Add sitemap_validator_required --- sphinx_sitemap/__init__.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index 76d322c..f9a4364 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -28,7 +28,8 @@ def setup(app): "sitemap_url_scheme", default="{lang}{version}{link}", rebuild="" ) app.add_config_value("sitemap_locales", default=None, rebuild="") - app.add_config_value("sitemap_validator", default={}, rebuild="") + app.add_config_value("sitemap_validator_urls", default={}, rebuild="") + app.add_config_value("sitemap_validator_required", default=None, rebuild="") app.add_config_value("sitemap_filename", default="sitemap.xml", rebuild="") try: @@ -109,16 +110,31 @@ def add_html_link(app, pagename, templatename, context, doctree): def validate_sitemap(app, filename): - """ if sitemap_validator is set, then use to check that all of the given URLs - for the current language and version are included in the sitemap. """ + """ + If sitemap_validator_required is set, then make sure the concatenated language and + version match the given string. + + If sitemap_validator_urls is set, then use to check that all of the given URLs + for the current language and version are included in the sitemap. + """ key = "{}{}".format(app.config.language or "", app.config.version or "") key = key or "nil" + + if app.config.sitemap_validator_required and key != app.config.sitemap_validator_required: + logger.warning( + "Sitemap failed validation. {} does not match the required {}".format( + key, app.config.sitemap_validator_required + ), + type="sitemap", + subtype="validation", + ) + passed = True - if app.config.sitemap_validator and key in app.config.sitemap_validator: + if app.config.sitemap_validator_urls and key in app.config.sitemap_validator_urls: with open(filename, "r") as myfile: sitemap = myfile.read() # if any of the urls don't match, throw a warning - for url in app.config.sitemap_validator[key]: + for url in app.config.sitemap_validator_urls[key]: if url not in sitemap: passed = False logger.warning( From bacdfd2bdc4ba826f28acaecda6e1515170b9995 Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Sat, 17 Dec 2022 22:18:02 -0800 Subject: [PATCH 10/11] Improve Sphinx 5 note --- README.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 34b1c5c..c524317 100644 --- a/README.rst +++ b/README.rst @@ -45,8 +45,10 @@ Customizing the URL Scheme The default URL format is ``{lang}{version}{link}``. ``{lang}`` and ``{version}`` are controlled by the `language`_ and `version`_ config variables. -**Note:** As of Sphinx version 5, the ``language`` config value defaults to ``"en"``, if that -makes the default scheme produce the incorrect url, then change the default behavior. + **Note:** As of Sphinx version 5, the ``language`` config value defaults to ``"en"``, as + opposed to the previous behavior of being blank, if that makes the default scheme produce + the incorrect url, then change the default scheme. You may also want to look at **Setting + up URL Validation** section below to help ensure the sitemap stays accurate. To change the default behavior, set the value of ``sitemap_url_scheme`` in **conf.py** to the desired format. For example: @@ -72,7 +74,7 @@ Use ``sitemap_validator_urls`` to setup URL validation, where a dictionary of li validate one or more URLS for a given build. The keys for the dictionary are a concatenation of the `language`_ and `version`_ config -values for that build, where the string ``nil`` is set for the key if both the language and +values for that build, where the string ``"nil"`` is set for the key if both the language and version are not set. For example, to setup validation for multiple builds: .. code-block:: python @@ -93,7 +95,7 @@ or an example for a single build: For single builds, you can choose to set ``sitemap_validator_required`` to validate that the `language`_ and `version`_ config values are concatenated as expected, where the string ``nil`` is used if both the language and version are not set. For example, if `language`_ is -set to ``en`` and `version`_ is set to ``latest``: +set to ``"en"`` and `version`_ is set to ``"latest"``: .. code-block:: python From 92dd444886df50ee732a9eaa67779b876bd8ef4c Mon Sep 17 00:00:00 2001 From: Jared Dillard Date: Tue, 27 Dec 2022 21:12:37 -0800 Subject: [PATCH 11/11] Fix some vale issues --- docs/advanced-configuration.rst | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/docs/advanced-configuration.rst b/docs/advanced-configuration.rst index 8028a15..c7367a5 100644 --- a/docs/advanced-configuration.rst +++ b/docs/advanced-configuration.rst @@ -8,9 +8,8 @@ Customizing the URL Scheme :confval:`sitemap_url_scheme` defaults to ``{lang}{version}{link}``, where ``{lang}`` and ``{version}`` get set by :confval:`language` and :confval:`version` in **conf.py**. -.. important:: As of Sphinx version 5, the :confval:`language` config value defaults to ``"en"``, as - opposed to the previous behavior of being blank, if that makes the default scheme produce - the incorrect url, then change the default scheme. You may also want to look at :ref:`` section below to help ensure the sitemap stays accurate. +.. important:: As of Sphinx version 5, :confval:`language` defaults to ``"en"``, if that makes the default scheme produce + the incorrect URL, then change the default scheme. You may also want to look at :ref:`` section below to help ensure the sitemap stays accurate. To change the default behavior, set the value of :confval:`sitemap_url_scheme` in **conf.py** to the desired format. For example: @@ -37,9 +36,8 @@ Setting up URL Validation Use :confval:`sitemap_validator_urls` to setup URL validation, where a dictionary of lists is used to validate one or more URLS for a given build. -The keys for the dictionary are a concatenation of the :confval:`language` and :confval:`version` config -values for that build, where the string ``"nil"`` is set for the key if both the language and -version are not set. For example, to setup validation for multiple builds: +The keys for the dictionary are a concatenation of :confval:`language` and :confval:`version` for that build, where the string ``"nil"`` is set for the key if both the language and version are not set. +For example, to setup validation for multiple builds: .. code-block:: python @@ -56,10 +54,8 @@ or an example for a single build: 'nil': ['https://my-site.com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], } -For single builds, you can choose to set :confval:`sitemap_validator_required` to validate that the -:confval:`language` and :confval:`version` config values are concatenated as expected, where the string -``nil`` is used if both the language and version are not set. For example, if :confval:`language` is -set to ``"en"`` and :confval:`version` is set to ``"latest"``: +For single builds, set :confval:`sitemap_validator_required` to validate that :confval:`language` and :confval:`version` are concatenated as expected, with ``"nil"`` being used without :confval:`language` and :confval:`version` being set. +For example, with :confval:`language` set to ``"en"`` and :confval:`version` set to ``"latest"``: .. code-block:: python