docs: improve autogenerated API docs (#606)

Related to #324
apify · Oct 22, 2024 · 01f0746 · 01f0746
1 parent fef0874
commit 01f0746
Show file tree

Hide file tree

Showing 10 changed files with 504 additions and 55 deletions.
diff --git a/src/crawlee/configuration.py b/src/crawlee/configuration.py
@@ -18,8 +18,8 @@ class Configuration(BaseSettings):
     """Configuration of the Crawler.
 
     Args:
-        internal_timeout: timeout for internal operations such as marking a request as processed
-        verbose_log: allows verbose logging
+        internal_timeout: Timeout for internal operations such as marking a request as processed.
+        verbose_log: Allows verbose logging.
         default_storage_id: The default storage ID.
         purge_on_start: Whether to purge the storage on start.
     """

diff --git a/website/build_api_reference.sh b/website/build_api_reference.sh
@@ -11,7 +11,7 @@ sed_no_backup() {
 }
 
 # Create docspec dump of this package's source code through pydoc-markdown
-poetry run pydoc-markdown --quiet --dump > docspec-dump.jsonl
+python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
 sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl
 
 rm -rf "${apify_shared_tempdir}"

diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
@@ -80,6 +80,7 @@ module.exports = {
                 sortSidebar: groupSort,
                 pathToCurrentVersionTypedocJSON: `${__dirname}/api-typedoc-generated.json`,
                 routeBasePath: 'api',
+                python: true,
             },
         ],
         // [

diff --git a/website/package.json b/website/package.json
@@ -33,7 +33,7 @@
         "typescript": "5.6.2"
     },
     "dependencies": {
-        "@apify/docusaurus-plugin-typedoc-api": "^4.2.2",
+        "@apify/docusaurus-plugin-typedoc-api": "^4.2.6",
         "@apify/utilities": "^2.8.0",
         "@docusaurus/core": "^3.5.2",
         "@docusaurus/mdx-loader": "^3.5.2",
@@ -56,5 +56,5 @@
         "stream-browserify": "^3.0.0",
         "unist-util-visit": "^5.0.0"
     },
-    "packageManager": "yarn@4.4.1"
+    "packageManager": "yarn@4.5.1"
 }
diff --git a/website/pydoc-markdown/__init__.py b/website/pydoc-markdown/__init__.py
diff --git a/website/pydoc-markdown/generate_ast.py b/website/pydoc-markdown/generate_ast.py
@@ -0,0 +1,46 @@
+"""
+Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.
+
+This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
+"""
+
+from pydoc_markdown.interfaces import Context
+from pydoc_markdown.contrib.loaders.python import PythonLoader
+from pydoc_markdown.contrib.processors.filter import FilterProcessor
+from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
+from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
+from google_docstring_processor import ApifyGoogleProcessor
+from docspec import dump_module
+
+import json
+import os
+
+project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')
+
+context = Context(directory='.')
+loader = PythonLoader(search_path=[project_path])
+filter = FilterProcessor(
+    documented_only=False,
+    skip_empty_modules=False,
+)
+crossref = CrossrefProcessor()
+google = ApifyGoogleProcessor()
+
+loader.init(context)
+filter.init(context)
+google.init(context)
+crossref.init(context)
+
+processors = [filter, google, crossref]
+
+dump = []
+
+modules = list(loader.load())
+
+for processor in processors:
+    processor.process(modules, None)
+
+for module in modules:
+    dump.append(dump_module(module))
+
+print(json.dumps(dump, indent=4))
diff --git a/website/pydoc-markdown/google_docstring_processor.py b/website/pydoc-markdown/google_docstring_processor.py
@@ -0,0 +1,185 @@
+# -*- coding: utf8 -*-
+# Copyright (c) 2019 Niklas Rosenstein
+# !!! Modified 2024 Jindřich Bär
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import dataclasses
+import re
+import typing as t
+
+import docspec
+
+from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
+from pydoc_markdown.interfaces import Processor, Resolver
+
+import json
+
+
+@dataclasses.dataclass
+class ApifyGoogleProcessor(Processor):
+    """
+    This class implements the preprocessor for Google and PEP 257 docstrings. It converts
+    docstrings formatted in the Google docstyle to Markdown syntax.
+
+    References:
+
+    * https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
+    * https://www.python.org/dev/peps/pep-0257/
+
+    Example:
+
+    ```
+    Attributes:
+        module_level_variable1 (int): Module level variables may be documented in
+            either the ``Attributes`` section of the module docstring, or in an
+            inline docstring immediately following the variable.
+
+            Either form is acceptable, but the two should not be mixed. Choose
+            one convention to document module level variables and be consistent
+            with it.
+
+    Todo:
+        * For module TODOs
+        * You have to also use ``sphinx.ext.todo`` extension
+    ```
+
+    Renders as:
+
+    Attributes:
+        module_level_variable1 (int): Module level variables may be documented in
+            either the ``Attributes`` section of the module docstring, or in an
+            inline docstring immediately following the variable.
+
+            Either form is acceptable, but the two should not be mixed. Choose
+            one convention to document module level variables and be consistent
+            with it.
+
+    Todo:
+        * For module TODOs
+        * You have to also use ``sphinx.ext.todo`` extension
+
+    @doc:fmt:google
+    """
+
+    _param_res = [
+        re.compile(r"^(?P<param>\S+):\s+(?P<desc>.+)$"),
+        re.compile(r"^(?P<param>\S+)\s+\((?P<type>[^)]+)\):\s+(?P<desc>.+)$"),
+        re.compile(r"^(?P<param>\S+)\s+--\s+(?P<desc>.+)$"),
+        re.compile(r"^(?P<param>\S+)\s+\{\[(?P<type>\S+)\]\}\s+--\s+(?P<desc>.+)$"),
+        re.compile(r"^(?P<param>\S+)\s+\{(?P<type>\S+)\}\s+--\s+(?P<desc>.+)$"),
+    ]
+
+    _keywords_map = {
+        "Args:": "Arguments",
+        "Arguments:": "Arguments",
+        "Attributes:": "Attributes",
+        "Example:": "Example",
+        "Examples:": "Examples",
+        "Keyword Args:": "Arguments",
+        "Keyword Arguments:": "Arguments",
+        "Methods:": "Methods",
+        "Note:": "Notes",
+        "Notes:": "Notes",
+        "Other Parameters:": "Arguments",
+        "Parameters:": "Arguments",
+        "Return:": "Returns",
+        "Returns:": "Returns",
+        "Raises:": "Raises",
+        "References:": "References",
+        "See Also:": "See Also",
+        "Todo:": "Todo",
+        "Warning:": "Warnings",
+        "Warnings:": "Warnings",
+        "Warns:": "Warns",
+        "Yield:": "Yields",
+        "Yields:": "Yields",
+    }
+
+    def check_docstring_format(self, docstring: str) -> bool:
+        for section_name in self._keywords_map:
+            if section_name in docstring:
+                return True
+        return False
+
+    def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
+        docspec.visit(modules, self._process)
+
+    def _process(self, node: docspec.ApiObject):
+        if not node.docstring:
+            return
+
+        lines = []
+        sections = []
+        current_lines: t.List[str] = []
+        in_codeblock = False
+        keyword = None
+        multiline_argument_offset = -1
+
+        def _commit():
+            if keyword:
+                sections.append({keyword: list(current_lines)})
+            else:
+                lines.extend(current_lines)
+            current_lines.clear()
+
+        for line in node.docstring.content.split("\n"):
+            multiline_argument_offset += 1
+            if line.lstrip().startswith("```"):
+                in_codeblock = not in_codeblock
+                current_lines.append(line)
+                if not in_codeblock:
+                    _commit()
+                continue
+
+            if in_codeblock:
+                current_lines.append(line)
+                continue
+
+            line = line.strip()
+            if line in self._keywords_map:
+                _commit()
+                keyword = self._keywords_map[line]
+                continue
+
+            if keyword is None:
+                lines.append(line)
+                continue
+
+            for param_re in self._param_res:
+                param_match = param_re.match(line)
+                if param_match:
+                    current_lines.append(param_match.groupdict())
+                    multiline_argument_offset = 0
+                    break
+
+            if not param_match:
+                if multiline_argument_offset == 1:
+                    current_lines[-1]["desc"] += "\n" + line
+                    multiline_argument_offset = 0
+                else:
+                    current_lines.append(line)
+
+        _commit()
+        node.docstring.content = json.dumps({
+            "text": "\n".join(lines),
+            "sections": sections,
+        }, indent=None)
+
+
diff --git a/website/src/css/custom.css b/website/src/css/custom.css
@@ -541,3 +541,20 @@ div[class^=announcementBar_] button {
     box-shadow: var(--ifm-alert-shadow);
     padding: var(--ifm-alert-padding-vertical) var(--ifm-alert-padding-horizontal);
 }
+
+.tsd-parameters li {
+    margin-bottom: 16px;
+}
+
+.tsd-parameters-title {
+    font-size: 16px;
+    margin-bottom: 16px !important;
+}
+
+.tsd-returns-title {
+    font-size: 16px;
+}
+
+.tsd-api-options {
+    display: none;
+}