Skip to content

Commit

Permalink
docs: improve autogenerated API docs (#606)
Browse files Browse the repository at this point in the history
Related to #324
  • Loading branch information
barjin authored Oct 22, 2024
1 parent fef0874 commit 01f0746
Show file tree
Hide file tree
Showing 10 changed files with 504 additions and 55 deletions.
4 changes: 2 additions & 2 deletions src/crawlee/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class Configuration(BaseSettings):
"""Configuration of the Crawler.
Args:
internal_timeout: timeout for internal operations such as marking a request as processed
verbose_log: allows verbose logging
internal_timeout: Timeout for internal operations such as marking a request as processed.
verbose_log: Allows verbose logging.
default_storage_id: The default storage ID.
purge_on_start: Whether to purge the storage on start.
"""
Expand Down
2 changes: 1 addition & 1 deletion website/build_api_reference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sed_no_backup() {
}

# Create docspec dump of this package's source code through pydoc-markdown
poetry run pydoc-markdown --quiet --dump > docspec-dump.jsonl
python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl

rm -rf "${apify_shared_tempdir}"
Expand Down
1 change: 1 addition & 0 deletions website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ module.exports = {
sortSidebar: groupSort,
pathToCurrentVersionTypedocJSON: `${__dirname}/api-typedoc-generated.json`,
routeBasePath: 'api',
python: true,
},
],
// [
Expand Down
4 changes: 2 additions & 2 deletions website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"typescript": "5.6.2"
},
"dependencies": {
"@apify/docusaurus-plugin-typedoc-api": "^4.2.2",
"@apify/docusaurus-plugin-typedoc-api": "^4.2.6",
"@apify/utilities": "^2.8.0",
"@docusaurus/core": "^3.5.2",
"@docusaurus/mdx-loader": "^3.5.2",
Expand All @@ -56,5 +56,5 @@
"stream-browserify": "^3.0.0",
"unist-util-visit": "^5.0.0"
},
"packageManager": "yarn@4.4.1"
"packageManager": "yarn@4.5.1"
}
Empty file.
46 changes: 46 additions & 0 deletions website/pydoc-markdown/generate_ast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.
This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
"""

from pydoc_markdown.interfaces import Context
from pydoc_markdown.contrib.loaders.python import PythonLoader
from pydoc_markdown.contrib.processors.filter import FilterProcessor
from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
from google_docstring_processor import ApifyGoogleProcessor
from docspec import dump_module

import json
import os

project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')

context = Context(directory='.')
loader = PythonLoader(search_path=[project_path])
filter = FilterProcessor(
documented_only=False,
skip_empty_modules=False,
)
crossref = CrossrefProcessor()
google = ApifyGoogleProcessor()

loader.init(context)
filter.init(context)
google.init(context)
crossref.init(context)

processors = [filter, google, crossref]

dump = []

modules = list(loader.load())

for processor in processors:
processor.process(modules, None)

for module in modules:
dump.append(dump_module(module))

print(json.dumps(dump, indent=4))
185 changes: 185 additions & 0 deletions website/pydoc-markdown/google_docstring_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# -*- coding: utf8 -*-
# Copyright (c) 2019 Niklas Rosenstein
# !!! Modified 2024 Jindřich Bär
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

import dataclasses
import re
import typing as t

import docspec

from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
from pydoc_markdown.interfaces import Processor, Resolver

import json


@dataclasses.dataclass
class ApifyGoogleProcessor(Processor):
"""
This class implements the preprocessor for Google and PEP 257 docstrings. It converts
docstrings formatted in the Google docstyle to Markdown syntax.
References:
* https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
* https://www.python.org/dev/peps/pep-0257/
Example:
```
Attributes:
module_level_variable1 (int): Module level variables may be documented in
either the ``Attributes`` section of the module docstring, or in an
inline docstring immediately following the variable.
Either form is acceptable, but the two should not be mixed. Choose
one convention to document module level variables and be consistent
with it.
Todo:
* For module TODOs
* You have to also use ``sphinx.ext.todo`` extension
```
Renders as:
Attributes:
module_level_variable1 (int): Module level variables may be documented in
either the ``Attributes`` section of the module docstring, or in an
inline docstring immediately following the variable.
Either form is acceptable, but the two should not be mixed. Choose
one convention to document module level variables and be consistent
with it.
Todo:
* For module TODOs
* You have to also use ``sphinx.ext.todo`` extension
@doc:fmt:google
"""

_param_res = [
re.compile(r"^(?P<param>\S+):\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\((?P<type>[^)]+)\):\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+--\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\{\[(?P<type>\S+)\]\}\s+--\s+(?P<desc>.+)$"),
re.compile(r"^(?P<param>\S+)\s+\{(?P<type>\S+)\}\s+--\s+(?P<desc>.+)$"),
]

_keywords_map = {
"Args:": "Arguments",
"Arguments:": "Arguments",
"Attributes:": "Attributes",
"Example:": "Example",
"Examples:": "Examples",
"Keyword Args:": "Arguments",
"Keyword Arguments:": "Arguments",
"Methods:": "Methods",
"Note:": "Notes",
"Notes:": "Notes",
"Other Parameters:": "Arguments",
"Parameters:": "Arguments",
"Return:": "Returns",
"Returns:": "Returns",
"Raises:": "Raises",
"References:": "References",
"See Also:": "See Also",
"Todo:": "Todo",
"Warning:": "Warnings",
"Warnings:": "Warnings",
"Warns:": "Warns",
"Yield:": "Yields",
"Yields:": "Yields",
}

def check_docstring_format(self, docstring: str) -> bool:
for section_name in self._keywords_map:
if section_name in docstring:
return True
return False

def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
docspec.visit(modules, self._process)

def _process(self, node: docspec.ApiObject):
if not node.docstring:
return

lines = []
sections = []
current_lines: t.List[str] = []
in_codeblock = False
keyword = None
multiline_argument_offset = -1

def _commit():
if keyword:
sections.append({keyword: list(current_lines)})
else:
lines.extend(current_lines)
current_lines.clear()

for line in node.docstring.content.split("\n"):
multiline_argument_offset += 1
if line.lstrip().startswith("```"):
in_codeblock = not in_codeblock
current_lines.append(line)
if not in_codeblock:
_commit()
continue

if in_codeblock:
current_lines.append(line)
continue

line = line.strip()
if line in self._keywords_map:
_commit()
keyword = self._keywords_map[line]
continue

if keyword is None:
lines.append(line)
continue

for param_re in self._param_res:
param_match = param_re.match(line)
if param_match:
current_lines.append(param_match.groupdict())
multiline_argument_offset = 0
break

if not param_match:
if multiline_argument_offset == 1:
current_lines[-1]["desc"] += "\n" + line
multiline_argument_offset = 0
else:
current_lines.append(line)

_commit()
node.docstring.content = json.dumps({
"text": "\n".join(lines),
"sections": sections,
}, indent=None)


17 changes: 17 additions & 0 deletions website/src/css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -541,3 +541,20 @@ div[class^=announcementBar_] button {
box-shadow: var(--ifm-alert-shadow);
padding: var(--ifm-alert-padding-vertical) var(--ifm-alert-padding-horizontal);
}

.tsd-parameters li {
margin-bottom: 16px;
}

.tsd-parameters-title {
font-size: 16px;
margin-bottom: 16px !important;
}

.tsd-returns-title {
font-size: 16px;
}

.tsd-api-options {
display: none;
}
Loading

0 comments on commit 01f0746

Please sign in to comment.