diff --git a/docs/advanced-configuration.rst b/docs/advanced-configuration.rst index 0aa0f8d..c7367a5 100644 --- a/docs/advanced-configuration.rst +++ b/docs/advanced-configuration.rst @@ -8,8 +8,8 @@ Customizing the URL Scheme :confval:`sitemap_url_scheme` defaults to ``{lang}{version}{link}``, where ``{lang}`` and ``{version}`` get set by :confval:`language` and :confval:`version` in **conf.py**. -.. important:: As of Sphinx version 5, ``language`` defaults to ``"en"``, if that - makes the default scheme produce the incorrect URL, then change the default behavior. +.. important:: As of Sphinx version 5, :confval:`language` defaults to ``"en"``, if that makes the default scheme produce + the incorrect URL, then change the default scheme. You may also want to look at :ref:`` section below to help ensure the sitemap stays accurate. To change the default behavior, set the value of :confval:`sitemap_url_scheme` in **conf.py** to the desired format. For example: @@ -28,6 +28,39 @@ Or for nested deployments, something like: You can also omit values from the scheme for desired behavior. +.. _configuration_url_validation: + +Setting up URL Validation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use :confval:`sitemap_validator_urls` to setup URL validation, where a dictionary of lists is used to +validate one or more URLS for a given build. + +The keys for the dictionary are a concatenation of :confval:`language` and :confval:`version` for that build, where the string ``"nil"`` is set for the key if both the language and version are not set. +For example, to setup validation for multiple builds: + +.. code-block:: python + + sitemap_validator_urls = { + 'enlatest': ['https://my-site.com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + 'delatest': ['https://my-site.com/de/latest/index.html', 'https://my-site..com/de/latest/test.html'], + } + +or an example for a single build: + +.. code-block:: python + + sitemap_validator_urls = { + 'nil': ['https://my-site.com/en/latest/index.html', 'https://my-site..com/en/latest/test.html'], + } + +For single builds, set :confval:`sitemap_validator_required` to validate that :confval:`language` and :confval:`version` are concatenated as expected, with ``"nil"`` being used without :confval:`language` and :confval:`version` being set. +For example, with :confval:`language` set to ``"en"`` and :confval:`version` set to ``"latest"``: + +.. code-block:: python + + sitemap_validator_required = 'enlatest' + .. _configuration_changing_filename: Changing the Filename diff --git a/docs/configuration-values.rst b/docs/configuration-values.rst index 46d1b27..9b0b7ce 100644 --- a/docs/configuration-values.rst +++ b/docs/configuration-values.rst @@ -11,6 +11,15 @@ A list of possible configuration values to configure in **conf.py**: .. versionadded:: 2.0.0 +.. confval:: sitemap_validator_urls + + A list of urls to check for a specified combination of :confval:`language` and :confval:`version`. + Default is ``{}``. + + See :ref:`configuration_url_validation` for more information. + + .. versionadded:: 2.5.0 + .. confval:: sitemap_filename The filename used for the sitemap. Default is ``sitemap.xml``. diff --git a/sphinx_sitemap/__init__.py b/sphinx_sitemap/__init__.py index 0a280c1..7d81d90 100644 --- a/sphinx_sitemap/__init__.py +++ b/sphinx_sitemap/__init__.py @@ -30,7 +30,8 @@ def setup(app): "sitemap_url_scheme", default="{lang}{version}{link}", rebuild="" ) app.add_config_value("sitemap_locales", default=None, rebuild="") - + app.add_config_value("sitemap_validator_urls", default={}, rebuild="") + app.add_config_value("sitemap_validator_required", default=None, rebuild="") app.add_config_value("sitemap_filename", default="sitemap.xml", rebuild="") try: @@ -111,6 +112,47 @@ def add_html_link(app, pagename, templatename, context, doctree): env.sitemap_links.put(sitemap_link) +def validate_sitemap(app, filename): + """ + If sitemap_validator_required is set, then make sure the concatenated language and + version match the given string. + + If sitemap_validator_urls is set, then use to check that all of the given URLs + for the current language and version are included in the sitemap. + """ + key = "{}{}".format(app.config.language or "", app.config.version or "") + key = key or "nil" + + if ( + app.config.sitemap_validator_required + and key != app.config.sitemap_validator_required + ): + logger.warning( + "Sitemap failed validation. {} does not match the required {}".format( + key, app.config.sitemap_validator_required + ), + type="sitemap", + subtype="validation", + ) + + passed = True + if app.config.sitemap_validator_urls and key in app.config.sitemap_validator_urls: + with open(filename, "r") as myfile: + sitemap = myfile.read() + # if any of the urls don't match, throw a warning + for url in app.config.sitemap_validator_urls[key]: + if url not in sitemap: + passed = False + logger.warning( + "Sitemap failed validation. {} not found in {}".format( + url, filename + ), + type="sitemap", + subtype="validation", + ) + return passed + + def create_sitemap(app, exception): """Generates the sitemap.xml from the collected HTML page links""" site_url = app.builder.config.site_url or app.builder.config.html_baseurl @@ -176,6 +218,8 @@ def create_sitemap(app, exception): filename, xml_declaration=True, encoding="utf-8", method="xml" ) + validate_sitemap(app, filename) + logger.info( "sphinx-sitemap: %s was generated for URL %s in %s" % (app.config.sitemap_filename, site_url, filename),