Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.DS_Store
workspaces_api/.DS_Store
__pycache__/
2 changes: 1 addition & 1 deletion create_wandb_sdk_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fi
python generate_sdk_docs.py --temp_output_directory=$TEMP_DIR

# Process output doc created by lazydocs so it works with Docusaurus
python process_sdk_markdown.py --output_directory=$TEMP_DIR
python process_sdk_markdown.py --markdown_directory=$TEMP_DIR

# Make destination directory
mkdir -p $DESTINATION_DIR
Expand Down
31 changes: 31 additions & 0 deletions create_workspaces_reports_docs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/usr/bash

MODULE1=wandb_workspaces.workspaces.interface
MODULE2=wandb_workspaces.reports.v2.interface
OUTPUT_DIR=wandb_workspaces


if [ -d "$OUTPUT_DIR" ]; then
echo "Workspace and Reports directory exists"
else
echo "Workspace and Reports directory does not exist. Creating it now..."
mkdir -p "$OUTPUT_DIR"
echo "Directory created: $OUTPUT_DIR"
fi

## Array of modules to process
declare -a arr=($MODULE1 $MODULE2)

# Create initial doc for each module
for MODULE in "${arr[@]}"
do
echo "Generating docs for module: $MODULE"
lazydocs --output-path=./$OUTPUT_DIR $MODULE --src-base-url="https://github.com/wandb/wandb-workspaces/tree/main"

echo "Processing markdown files for module: $OUTPUT_DIR/$MODULE.md"
python process_sdk_markdown.py --markdown_directory $OUTPUT_DIR

echo "Mintlify docs..."
# Clean up the directory: add admonitions, extract mdx files, etc.
python mintlify_workspaces_report_docs.py --markdown_directory $OUTPUT_DIR
done
4 changes: 2 additions & 2 deletions generate_sdk_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _type_key_string(docodile):
return SOURCE["SDK"]["hugo_specs"]["frontmatter"] + "\n"


def _add_frontmatter(docodile):
def add_frontmatter(docodile):
"""Add frontmatter to the markdown file.

Args:
Expand Down Expand Up @@ -344,7 +344,7 @@ def create_markdown(docodile, generator):


with open(docodile.filename, 'w') as file:
file.write(_add_frontmatter(docodile))
file.write(add_frontmatter(docodile))
file.write(add_github_import_statement())
file.write(format_github_button(docodile.getfile_path))
file.write("\n\n")
Expand Down
205 changes: 205 additions & 0 deletions mintlify_workspaces_report_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/bin/usr/python
import os
import re
import glob
import argparse

def remove_images(markdown_text):
"""Remove all <img> tags from the markdown text."""
img_pattern = r'<img[^>]*>'
cleaned_text = re.sub(img_pattern, '', markdown_text).strip()
return cleaned_text

def remove_markdownlint_disable(markdown_text):
"""Remove markdownlint-disable HTML comments from the markdown text."""
pattern = r'<!--\s*markdownlint-disable\s*-->'
cleaned_text = re.sub(pattern, '', markdown_text).strip()
return cleaned_text


def remove_module_header(markdown_text):
"""Remove the module header line (e.g., '# <kbd>module</kbd> `module.name`')."""
pattern = r'#\s*<kbd>module</kbd>\s*`[^`]*`\s*\n?'
cleaned_text = re.sub(pattern, '', markdown_text).strip()
return cleaned_text


def remove_internal_classes(markdown_text):
"""Remove class definitions marked as INTERNAL.

Matches patterns like:
## <kbd>class</kbd> `ClassName`
INTERNAL: This class is not for public use.
---

Handles trailing whitespace and optional --- separator.
"""
pattern = r'#{1,}\s*<kbd>class</kbd>\s*`[^`]*`\s*\nINTERNAL:[^\n]*[\s\n]*(?:---)?'
cleaned_text = re.sub(pattern, '', markdown_text).strip()
return cleaned_text


def remove_empty_lines(markdown_text):
"""Remove empty lines from the markdown text."""
cleaned_text = re.sub(r'\n\s*\n', '\n', markdown_text).strip()
return cleaned_text


def strip_trailing_whitespace(text):
"""Strip trailing whitespace (spaces/tabs) from each line in the text."""
return re.sub(r'[ \t]+$', '', text, flags=re.MULTILINE)


def rename_markdown_file(old_filename, output_directory="."):
"""
Rename the markdown file from old_filename to new_filename.
"""
base_name = os.path.basename(old_filename).split('.')[1]
os.rename(old_filename, os.path.join(output_directory, base_name + ".mdx"))

def _markdown_title(filename):
"""
Create markdown title based on the filename read in.
"""
base_name = os.path.basename(filename).split('.')[1].capitalize()
return f"title: {base_name}\n"


def add_frontmatter(filename):
"""Add frontmatter to the markdown file.

Args:
filename (str): Name of the file.
"""
return "---\n" + _markdown_title(filename) + "---\n"


def add_github_import_statement():
"""Add GitHub import statement to the markdown file.

Args:
filename (str): Name of the file.
"""
return "import { GitHubLink } from '/snippets/en/_includes/github-source-link.mdx';" + "\n\n"

def format_github_button(filename, base_url="https://github.com/wandb/wandb-workspaces/blob/main/wandb_workspaces/"):
"""Add GitHub button to the markdown file.

Args:
filename (str): Name of the file.
base_url (str): Base URL for the GitHub button.
"""

name = os.path.basename(filename).split('.')[1]
if "reports" in name:
version = os.path.basename(filename).split('.')[2]
href_links = os.path.join(base_url, name + "/" + version + "/internal.py")
else:
href_links = os.path.join(base_url, name + "/internal.py")
return _github_button(href_links)

def _github_button(href_links):
"""To do: Add hugo scripting to add this function. For now, just add code line # for debugging.

Args:
href_links (str): URL for the GitHub button.
"""
return '<GitHubLink url="' + href_links + '" />' + "\n\n"

def add_public_preview_note():
"""Add admonition markdown to the markdown file."""
note = (
"<Note>\nW&B Report and Workspace API is in Public Preview.\n</Note>\n\n"
)
return note


def alphabetize_headings(markdown_text):
"""Alphabetize the classes, etc. in the markdown file."""
# Split the text into two parts: the module docstring (before the first "---") and the rest
parts = markdown_text.split('---', 1)

# If there is content before the first "---", treat it as the module docstring
if len(parts) > 1:
docstring = parts[0].strip() # The module docstring
rest_of_content = '---' + parts[1] # The remaining content starting with the first ---
else:
# If no separator found, assume everything is the module docstring
docstring = markdown_text.strip()
rest_of_content = ""

# Split the rest of the content into blocks based on the "---" separator
blocks = re.split(r'(?=---)', rest_of_content)

sections = []

# Pattern to match H2 headings (classes)
h2_pattern = re.compile(r'## <kbd>class</kbd> `([^`]+)`')

current_section = None

# Iterate over each block to find H2 headings and group content, including H3
for block in blocks:
h2_match = h2_pattern.search(block)
if h2_match:
# Extract the class name from the H2 heading
class_name = h2_match.group(1)
if current_section:
sections.append(current_section)
# Start a new section with the current block as content
current_section = (class_name, block)
elif current_section:
# Append the block content to the current section
current_section = (current_section[0], current_section[1] + block)

# Append the last section
if current_section:
sections.append(current_section)

# Sort the sections alphabetically by the class name
sections.sort(key=lambda x: x[0])

# Reconstruct the markdown text with the docstring followed by the sorted sections
sorted_markdown = docstring + "\n\n" + "\n\n".join([section[1] for section in sections])

return sorted_markdown


def main(args):

# Read input markdown file
for filename in glob.glob(os.path.join(os.getcwd(), args.markdown_directory, "*.md")):

# Process each markdown file
print("Processing...", filename)
with open(filename, "r+") as file:

# Process markdown text
markdown_text = file.read()
markdown_text = remove_markdownlint_disable(markdown_text)
markdown_text = remove_module_header(markdown_text)
markdown_text = remove_images(markdown_text)
markdown_text = remove_internal_classes(markdown_text)
# markdown_text = remove_empty_lines(markdown_text)
markdown_text = alphabetize_headings(markdown_text)
markdown_text = strip_trailing_whitespace(markdown_text)

# Write back to the file with frontmatter and GitHub import statement
file.seek(0)
file.write(add_frontmatter(filename))
file.write(add_github_import_statement())
file.write(format_github_button(filename))
file.write(add_public_preview_note())
file.write(markdown_text)
file.truncate()

# Rename markdown file and change extension to .mdx
rename_markdown_file(filename, output_directory=args.markdown_directory)



if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Add frontmatter and GitHub link to markdown files.")
parser.add_argument("--markdown_directory", default="wandb_sdk_docs",
help="Directory containing markdown files to process")
main(parser.parse_args())
5 changes: 3 additions & 2 deletions process_sdk_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ def process_text(markdown_text: str) -> str:


def main(args):
for filename in glob.glob(os.path.join(os.getcwd(), args.output_directory, "*.md")):
for filename in glob.glob(os.path.join(os.getcwd(), args.markdown_directory, "*.md")):
print("Reading in...", filename)
with open(filename, "r") as f:
text = f.read()
with open(filename, "w") as f:
Expand All @@ -144,6 +145,6 @@ def main(args):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Post‑process lazydocs markdown.")
parser.add_argument("--output_directory", default="wandb_sdk_docs",
parser.add_argument("--markdown_directory", default="wandb_sdk_docs",
help="Directory containing markdown files to process")
main(parser.parse_args())