From 1c70aa9b9e321fc2796509a0e948c7e94452a3aa Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 27 Mar 2023 21:14:06 +0300 Subject: [PATCH 1/4] extensions: overhaul javadoc/doxygen comment parsing Full javadoc/doxygen compatibility has never been a goal for hawkmoth. We've always promoted using pure reStructuredText and Sphinx, because it avoids any problematic conversions. If you want all the bells and whistles of doxygen, you should use doxygen. However, we have to acknowledge there are a lot of codebases full of javadoc/doxygen style documentation comments, and a lot of people who are familiar with that style of code documentation. Requiring the use of reStructuredText to even try hawkmoth can be quite a hurdle. To that end, we've always had a rudimentary regex based conversion available. But let's face the fact, is too simple, and too difficult to maintain or extend as it is. Try to find a middle ground with an improved parser that understands paragraphs, inline markup, code blocks, and the like. Make it easier to extend. Recognize all doxygen commands (more than 180 of them!), even though we only implement a handful, making it possible to warn about the unimplemented ones (this is for future improvement, not done yet). --- src/hawkmoth/ext/javadoc/__init__.py | 426 ++++++++++++++++++++++++++- 1 file changed, 419 insertions(+), 7 deletions(-) diff --git a/src/hawkmoth/ext/javadoc/__init__.py b/src/hawkmoth/ext/javadoc/__init__.py index dff40432..25ea4916 100644 --- a/src/hawkmoth/ext/javadoc/__init__.py +++ b/src/hawkmoth/ext/javadoc/__init__.py @@ -1,21 +1,433 @@ # Copyright (c) 2023, Jani Nikula # Licensed under the terms of BSD 2-Clause, see LICENSE for details. -from hawkmoth.util.doccompat import javadoc_liberal +import re + +# The "operator" character, either \ or @, but not escaped with \ +OP = r'(?[\\@])' + +class _handler: + """Base class for all command handlers.""" + _indented_paragraph = False + + def __init__(self, app=None, indent=None, op=None, command=None, rest=None): + self._app = app + self._indent = indent + self._op = op + self._command = command + self._rest = rest + + def blank_line_ends(self): + """Does a blank line end this command?""" + return True + + def command_ends(self, command): + """Does the provided command end this command?""" + return True + + @staticmethod + def _inline_markup(line): + """Handle inline markup.""" + + word_regex = r'[^\s.]+' + tagged_phrase_regex = r'[^<]*' + + # italics: \a \e \em ... + line = re.sub(fr'{OP}(a|e|em)\s+(?P{word_regex})', r'*\g*', line) + line = re.sub(fr'(?P{tagged_phrase_regex})', r'*\g*', line) + + # bold: \b ... + line = re.sub(fr'{OP}b\s+(?P{word_regex})', r'**\g**', line) + line = re.sub(fr'(?P{tagged_phrase_regex})', r'**\g**', line) + + # monospace: \c \p ... + line = re.sub(fr'{OP}(c|p)\s+(?P{word_regex})', r'``\g``', line) + line = re.sub(fr'(?P{tagged_phrase_regex})', r'``\g``', line) + + # references to previous anchors + # FIXME: link title + line = re.sub(fr'{OP}ref\s+(?P\w+)', r':ref:`\g`', line) + + # FIXME: + # - copybrief + # - copydetails + # - emoji + # - f + # - n + + return line + + def rest(self): + """Return the "rest" of the line after @command.""" + return self._inline_markup(self._rest) + + def header(self): + """Convert the first line of this command.""" + yield '' + + def convert(self, line): + """Convert a regular line within this paragraph.""" + line = self._inline_markup(line) + + if self._indented_paragraph: + line = f' {line}' + + yield line + +class _plain(_handler): + pass + +class _not_implemented(_plain): + """Placeholder for commands that have not been implemented.""" + # FIXME: warn about not implemented commands + pass + +class _block_with_end_command(_handler): + """Paragraph with a dedicated command to end it. + + For example, @code/@endcode.""" + _end_command = None + + def end_command(self): + """Get the name of the command that ends this paragraph.""" + return self._end_command if self._end_command else f'end{self._command}' + + def blank_line_ends(self): + return False + + def command_ends(self, command): + return self.end_command() == command + +class _ignore_until_end_command(_block_with_end_command): + """Ignore the paragraph until dedicated command ends it.""" + # FIXME: warn about ignored commands + def header(self): + yield '' + + def convert(self, line): + yield '' + +class _startuml(_ignore_until_end_command): + # Needed because it's @startuml/@enduml, not @uml/@enduml. + _end_command = 'enduml' + +class _code(_block_with_end_command): + def header(self): + yield '' + yield '.. code-block::' + yield '' + + def convert(self, line): + yield f' {line}' + +class _anchor(_handler): + def header(self): + anchor = self._rest.strip() + + yield '' + yield f'.. {anchor}:' + yield '' + +class _strip_command(_handler): + """Strip command, treat everything else as normal.""" + def header(self): + yield f'{self._indent}{self.rest().strip()}' + +class _field_list(_handler): + """Paragraph which becomes a single field list item.""" + _field_name = None + _indented_paragraph = True + + def field_name(self): + return self._field_name if self._field_name else self._command + + def header(self): + yield '' + yield f'{self._indent}:{self.field_name()}:{self.rest()}' + +class _author(_field_list): + _field_name = 'author' + +class _return(_field_list): + _field_name = 'return' + +class _see(_field_list): + _field_name = 'see' + +class _param(_field_list): + """Parameter description.""" + _field_name = 'param' + + def header(self): + mo = re.match(r'^(\[(?P[a-zA-Z, ]+)\])?(?P\s*)(?P([a-zA-Z0-9_]+|\.\.\.))(?P\s*(?P.*))', # noqa: E501 + self.rest()) + if mo is None: + # FIXME + yield '' + + direction = mo.group('direction') + name = mo.group('name') + desc = mo.group('desc') + + yield '' + if direction: + yield f'{self._indent}:param {name}: **[{direction}]** {desc}' + else: + yield f'{self._indent}:param {name}: {desc}' + +class _admonition(_handler): + """Admonitions such as @note and @warning.""" + _indented_paragraph = True + _directive = None + + def directive(self): + return self._directive if self._directive else self._command + + def header(self): + yield '' + yield f'.. {self.directive()}::' + yield '' + rest = self.rest().strip() + if rest: + yield f' {rest}' + +# Map non-inline commands to handler classes. +# +# All the non-inline commands in the order listed at +# https://www.doxygen.nl/manual/commands.html +_handlers = { + # structural indicators + 'addtogroup': _not_implemented, + 'callgraph': _not_implemented, + 'hidecallgraph': _not_implemented, + 'callergraph': _not_implemented, + 'hidecallergraph': _not_implemented, + 'showrefby': _not_implemented, + 'hiderefby': _not_implemented, + 'showrefs': _not_implemented, + 'hiderefs': _not_implemented, + 'showinlinesource': _not_implemented, + 'hideinlinesource': _not_implemented, + 'includegraph': _not_implemented, + 'hideincludegraph': _not_implemented, + 'includedbygraph': _not_implemented, + 'hideincludedbygraph': _not_implemented, + 'directorygraph': _not_implemented, + 'hidedirectorygraph': _not_implemented, + 'collaborationgraph': _not_implemented, + 'hidecollaborationgraph': _not_implemented, + 'inheritancegraph': _not_implemented, + 'hideingeritancegraph': _not_implemented, + 'groupgraph': _not_implemented, + 'hidegroupgraph': _not_implemented, + 'qualifier': _not_implemented, + 'category': _not_implemented, + 'class': _not_implemented, # WARN + 'concept': _not_implemented, + 'def': _not_implemented, # WARN + 'defgroup': _not_implemented, + 'dir': _not_implemented, + 'enum': _not_implemented, # WARN + 'example': _not_implemented, # FIXME + 'endinternal': _not_implemented, # WARN + 'extends': _not_implemented, # FIXME + 'file': _not_implemented, # FIXME + 'fileinfo': _not_implemented, + 'lineinfo': _not_implemented, # WARN + 'fn': _not_implemented, # WARN + 'headerfile': _not_implemented, + 'hideinitializer': _not_implemented, + 'idlexcept': _not_implemented, + 'implements': _not_implemented, + 'ingroup': _not_implemented, + 'interface': _not_implemented, # WARN + 'internal': _not_implemented, + 'mainpage': _not_implemented, # FIXME + 'memberof': _not_implemented, + 'module': _not_implemented, + 'name': _not_implemented, + 'namespace': _not_implemented, + 'nosubgrouping': _not_implemented, + 'overload': _not_implemented, + 'package': _not_implemented, + 'page': _not_implemented, # FIXME + 'private': _not_implemented, + 'privatesection': _not_implemented, + 'property': _not_implemented, + 'protected': _not_implemented, + 'protectedsection': _not_implemented, + 'protocol': _not_implemented, + 'public': _not_implemented, + 'publicsection': _not_implemented, + 'pure': _not_implemented, + 'relates': _not_implemented, # FIXME + 'related': _not_implemented, # FIXME + 'relatesalso': _not_implemented, + 'relatedalso': _not_implemented, + 'showinitializer': _not_implemented, + 'static': _not_implemented, + 'typedef': _not_implemented, # WARN + 'union': _not_implemented, # WARN + 'var': _not_implemented, # WARN + 'vhdlflow': _not_implemented, + 'weakgroup': _not_implemented, + # section indicators + 'attention': _admonition, + 'author': _author, + 'authors': _author, + 'brief': _strip_command, + 'bug': _field_list, + 'cond': _not_implemented, + 'copyright': _field_list, + 'date': _field_list, + 'showdate': _not_implemented, + 'deprecated': _field_list, + 'details': _strip_command, + 'noop': _not_implemented, + 'raisewarning': _not_implemented, + 'else': _not_implemented, + 'elseif': _not_implemented, + 'endcond': _not_implemented, + 'endif': _not_implemented, + 'exception': _field_list, + 'if': _not_implemented, + 'ifnot': _not_implemented, + 'invariant': _field_list, + 'note': _admonition, + 'par': _not_implemented, + 'param': _param, + 'parblock': _not_implemented, + 'endparblock': _not_implemented, + 'tparam': _field_list, + 'post': _field_list, + 'pre': _field_list, + 'remark': _field_list, + 'remarks': _field_list, + 'result': _return, + 'return': _return, + 'returns': _return, + 'retval': _return, + 'sa': _see, + 'see': _see, + 'short': _strip_command, + 'since': _field_list, + 'test': _field_list, + 'throw': _field_list, + 'throws': _field_list, + 'todo': _field_list, + 'version': _field_list, + 'warning': _admonition, + 'xrefitem': _not_implemented, + 'addindex': _not_implemented, + 'anchor': _anchor, + 'cite': _field_list, + 'endlink': _not_implemented, + 'link': _not_implemented, + 'refitem': _not_implemented, + 'secreflist': _not_implemented, + 'endsecreflist': _not_implemented, + 'subpage': _not_implemented, + 'tableofcontents': _not_implemented, + 'section': _not_implemented, + 'subsection': _not_implemented, + 'subsubsection': _not_implemented, + 'paragraph': _not_implemented, + 'dontinclude': _not_implemented, + 'include': _not_implemented, + 'includelineno': _not_implemented, + 'includedoc': _not_implemented, + 'line': _not_implemented, + 'skip': _not_implemented, + 'skipline': _not_implemented, + 'snippet': _not_implemented, + 'snippetlineno': _not_implemented, + 'snippetdoc': _not_implemented, + 'until': _not_implemented, + 'verbinclude': _not_implemented, + 'htmlinclude': _not_implemented, + 'latexinclude': _not_implemented, + 'rtfinclude': _not_implemented, + 'maninclude': _not_implemented, + 'docbookinclude': _not_implemented, + 'xmlinclude': _not_implemented, + # visual enhancements + 'arg': _not_implemented, # FIXME + 'code': _code, + 'copydoc': _not_implemented, + 'docbookonly': _ignore_until_end_command, + 'dot': _ignore_until_end_command, + 'msc': _ignore_until_end_command, + 'startuml': _startuml, + 'dotfile': _not_implemented, + 'mscfile': _not_implemented, + 'diafile': _not_implemented, + 'doxyconfig': _not_implemented, + 'endcode': _plain, + 'enddocbookonly': _plain, + 'enddot': _plain, + 'endmsc': _plain, + 'enduml': _plain, + 'endhtmlonly': _plain, + 'endlatexonly': _plain, + 'endmanonly': _plain, + 'endrtfonly': _plain, + 'endverbatim': _plain, + 'endxmlonly': _plain, + 'htmlonly': _ignore_until_end_command, + 'image': _not_implemented, # FIXME + 'latexonly': _ignore_until_end_command, + 'manonly': _ignore_until_end_command, + 'li': _not_implemented, # FIXME + 'rtfonly': _ignore_until_end_command, + 'verbatim': _code, + 'xmlonly': _ignore_until_end_command, +} + +# Ensure at least this regex is compiled. +_command_pattern = re.compile(fr'(?P\s*){OP}(?P[a-zA-Z]+)(?P.*)') + +def _convert(lines, app=None): + handler = _plain(app=app) + + for line in lines: + if line.strip() == '' and handler.blank_line_ends(): + handler = _plain(app=app) + yield from handler.convert(line) + continue + + mo = _command_pattern.match(line) + if mo is None: + # No command match, continue with current handler + yield from handler.convert(line) + continue + + command = mo.group('command') + + handler_cls = _handlers.get(command) + if handler_cls is None: + # Unknown command, continue with current handler + yield from handler.convert(line) + continue + + if not handler.command_ends(command): + # Command does not finish block, continue with current handler + yield from handler.convert(line) + continue + + # Switch paragraph handler, and emit header for it + handler = handler_cls(app=app, **mo.groupdict()) + + yield from handler.header() def _process_docstring(app, lines, transform, options): if transform != app.config.hawkmoth_javadoc_transform: return - comment = '\n'.join(lines) - comment = javadoc_liberal(comment) - lines[:] = comment.splitlines()[:] + lines[:] = [line for line in _convert(app=app, lines=lines)] def process_docstring(lines): """Simple interface for CLI and testing.""" - comment = '\n'.join(lines) - comment = javadoc_liberal(comment) - lines[:] = comment.splitlines()[:] + lines[:] = [line for line in _convert(lines=lines)] def setup(app): app.setup_extension('hawkmoth') From 54ca33e961132d31c071e3ae27baa4c26ad29f9a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 17 Nov 2023 12:29:22 +0200 Subject: [PATCH 2/4] examples: update the javadoc/doxygen example Improve the description of the javadoc builtin extension. This also expands the tests slightly. --- doc/examples.rst | 4 ++-- test/examples/javadoc.c | 20 +++++++++++++++++--- test/examples/javadoc.rst | 24 +++++++++++++++++++++--- test/examples/javadoc.yaml | 2 +- 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/doc/examples.rst b/doc/examples.rst index 647e72b4..30b7757c 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -387,8 +387,8 @@ Output :transform: napoleon -Javadoc-style comments ----------------------- +Javadoc/Doxygen-style comments +------------------------------ Source ~~~~~~ diff --git a/test/examples/javadoc.c b/test/examples/javadoc.c index 7e1ab40f..0b90d8a9 100644 --- a/test/examples/javadoc.c +++ b/test/examples/javadoc.c @@ -8,11 +8,25 @@ enum mode; * Sphinx event. You can use the built-in extensions for this, or create your * own. * - * In this example, hawkmoth.ext.javadoc built-in extension is used to support - * Javadoc-style documentation comments. + * In this example, hawkmoth.ext.javadoc built-in extension is used to + * support Javadoc/Doxygen-style documentation comments. You can use both \@ and + * \\ for the commands. + * + * \note + * While the most common commands and inline markup \a should work, the + * Javadoc/Doxygen support is nowhere near complete. + * + * The support should be good enough for basic API documentation, including + * things like code blocks: + * + * \code + * ¯\_(ツ)_/¯ + * \endcode + * + * And parameter and return value descriptions, and the like: * * @param list The list to frob. - * @param mode The frobnication mode. + * @param[in] mode The frobnication mode. * @return 0 on success, non-zero error code on error. * @since v0.1 */ diff --git a/test/examples/javadoc.rst b/test/examples/javadoc.rst index 570ed28f..6bc4e5d5 100644 --- a/test/examples/javadoc.rst +++ b/test/examples/javadoc.rst @@ -7,13 +7,31 @@ Sphinx event. You can use the built-in extensions for this, or create your own. - In this example, hawkmoth.ext.javadoc built-in extension is used to support - Javadoc-style documentation comments. + In this example, ``hawkmoth.ext.javadoc`` built-in extension is used to + support Javadoc/Doxygen-style documentation comments. You can use both \@ and + \\ for the commands. + + + .. note:: + + While the most common commands and inline markup *should* work, the + Javadoc/Doxygen support is nowhere near complete. + + The support should be good enough for basic API documentation, including + things like code blocks: + + + .. code-block:: + + ¯\_(ツ)_/¯ + + + And parameter and return value descriptions, and the like: :param list: The list to frob. - :param mode: The frobnication mode. + :param mode: **[in]** The frobnication mode. :return: 0 on success, non-zero error code on error. diff --git a/test/examples/javadoc.yaml b/test/examples/javadoc.yaml index e9fbb630..d7691f62 100644 --- a/test/examples/javadoc.yaml +++ b/test/examples/javadoc.yaml @@ -5,6 +5,6 @@ directives: - javadoc.c options: transform: javadoc -example-title: Javadoc-style comments +example-title: Javadoc/Doxygen-style comments example-priority: 95 expected: javadoc.rst From 75f24e5768e76411de1b2113a1f75c8464853266 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 17 Nov 2023 23:38:23 +0200 Subject: [PATCH 3/4] doc: clarify the transform option for built-in extensions --- doc/built-in-extensions.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/built-in-extensions.rst b/doc/built-in-extensions.rst index a0bfc8f3..2175dbca 100644 --- a/doc/built-in-extensions.rst +++ b/doc/built-in-extensions.rst @@ -31,7 +31,7 @@ Installation and configuration in ``conf.py``: Name of the transformation to handle. Defaults to ``'javadoc'``. Only convert the comment if the ``transform`` option matches this name, otherwise do - nothing. + nothing. Usually there's no need to modify this option. For example: @@ -39,6 +39,10 @@ For example: :caption: conf.py extensions.append('hawkmoth.ext.javadoc') + hawkmoth_transform_default = 'javadoc' # Transform everything + +:data:`hawkmoth_transform_default` sets the default for the ``transform`` +option. .. code-block:: c :caption: file.c @@ -58,7 +62,6 @@ For example: .. c:autofunction:: baz :file: file.c - :transform: javadoc .. _hawkmoth.ext.napoleon: @@ -81,7 +84,7 @@ Installation and configuration in ``conf.py``: Name of the transformation to handle. Defaults to ``'napoleon'``. Only convert the comment if the ``transform`` option matches this name, otherwise - do nothing. + do nothing. Usually there's no need to modify this option. For example: @@ -89,7 +92,8 @@ For example: :caption: conf.py extensions.append('hawkmoth.ext.napoleon') - hawkmoth_transform_default = 'napoleon' + # Uncomment to transform everything, example below uses :transform: option + # hawkmoth_transform_default = 'napoleon' .. code-block:: c :caption: file.c @@ -111,6 +115,7 @@ For example: .. c:autofunction:: baz :file: file.c + :transform: napoleon .. _hawkmoth.ext.transformations: From b2ed2d2ddbf5a0783f03e717df70532b561b2ab2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Sat, 18 Nov 2023 00:12:42 +0200 Subject: [PATCH 4/4] doc: better describe what to expect from the javadoc extension It's a little better than "rudimentary at best", but still keep the expectations at bay. --- doc/built-in-extensions.rst | 13 +++++++++---- doc/syntax.rst | 9 ++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/built-in-extensions.rst b/doc/built-in-extensions.rst index 2175dbca..2c600ba7 100644 --- a/doc/built-in-extensions.rst +++ b/doc/built-in-extensions.rst @@ -11,15 +11,20 @@ extensions. hawkmoth.ext.javadoc -------------------- -This extension converts Javadoc_ comments to reStructuredText, using the -:event:`hawkmoth-process-docstring` event. +This extension converts Javadoc_ and Doxygen_ comments to reStructuredText, +using the :event:`hawkmoth-process-docstring` event. -.. note:: +The most commonly used commands are covered, including some inline markup, using +either \@ or \\ command character. The support is not complete, and mainly +covers the basic API documentation needs. - The Javadoc support is rudimentary at best. +Note that this does not change the comment block format, only the contents of +the comments. Only the ``/** ... */`` format is supported. .. _Javadoc: https://www.oracle.com/java/technologies/javase/javadoc.html +.. _Doxygen: https://www.doxygen.nl/ + Installation and configuration in ``conf.py``: .. code-block:: python diff --git a/doc/syntax.rst b/doc/syntax.rst index aeb41914..ae5b5f37 100644 --- a/doc/syntax.rst +++ b/doc/syntax.rst @@ -8,7 +8,7 @@ source code must be documented using specific documentation comment style, and the comments must follow reStructuredText markup. Optionally, the syntax may be :ref:`extended ` to support -e.g. Javadoc and Napoleon style comments. +e.g. Javadoc/Doxygen and Napoleon style comments. See :ref:`the examples section ` for a quick tour of what's possible, and read on for documentation comment formatting details. @@ -78,11 +78,14 @@ Hawkmoth supports :ref:`extending ` the syntax using :ref:`built-in reStructuredText. The :ref:`hawkmoth.ext.javadoc` extension provides limited support for Javadoc_ -style comments, and the :ref:`hawkmoth.ext.napoleon` extension provides support -for :external+sphinx:py:mod:`sphinx.ext.napoleon` style comments. +and Doxygen_ style comments, and the :ref:`hawkmoth.ext.napoleon` extension +provides support for :external+sphinx:py:mod:`sphinx.ext.napoleon` style +comments. .. _Javadoc: https://www.oracle.com/java/technologies/javase/javadoc.html +.. _Doxygen: https://www.doxygen.nl/ + .. _cross-referencing: Cross-Referencing C and C++ Constructs