Skip to content

Commit

Permalink
Merge pull request #27 from Yukino2002/revamp
Browse files Browse the repository at this point in the history
Update scripts to use the Github API
  • Loading branch information
rbharath authored Aug 9, 2023
2 parents e5c469a + 7b52bf2 commit 4379f5c
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 2,012 deletions.
127 changes: 78 additions & 49 deletions new-website/utils/tutorials/fetch_tutorials.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,83 +12,102 @@
import os
import shutil
import subprocess
import re
import requests
from bs4 import BeautifulSoup
from utils import clean

DEEPCHEM_REPO_OWNER = "deepchem"
DEEPCHEM_REPO_NAME = "deepchem"
TUTORIALS_PATH = "examples/tutorials"
TUTORIAL_RENDER_ORDER_PATH = "examples/tutorials/website-render-order"

def fetch_file_list_from_repo(path_to_directory):

def fetch_file_list_from_repo(repo_owner, repo_name, path):
"""
Fetches the names of all the files from a given directory in a Github repository.
Fetches the metadata like name, download_url etc of all the files in a given path in a Github repository .
Parameters
----------
path_to_directory: str
The URL of the directory in the Github repository.
repo_owner: str
The owner of the Github repository.
repo_name: str
The name of the Github repository.
path: str
The path in the Github repository from which the file names are to be fetched.
Returns
-------
files: list
A list of strings, where each string represents the name of a file present in the directory.
"""
files = []
response = requests.get(path_to_directory)
soup = BeautifulSoup(response.content, 'html.parser')

fileNames = soup.find_all(
'a', attrs={'class': 'js-navigation-open Link--primary'})
for fileName in fileNames:
fileName = fileName.text
files.append(fileName)

return files

data: list
A list of dictionaries, where each dictionary contains various metadata like the name and download_url of a file.
def fetch_tutorial_data():
Raises
------
Exception
If the response status code is not 200.
"""
Fetches the names of all the tutorials from the given Github URL.
url = "https://api.github.com/repos/{}/{}/contents/{}".format(
repo_owner, repo_name, path)
response = requests.get(url)
if response.status_code == 200:
data = response.json()
return data
else:
raise Exception(
"Error fetching file names: {}".format(response.status_code))


def get_tutorial_list():
"""
Fetches the names of all the tutorials from the Deepchem Github repository.
Returns
-------
tutorials: list
A list of strings, where each string represents the name of a tutorial.
"""
tutorials_url = 'https://github.com/deepchem/deepchem/tree/master/examples/tutorials'
tutorials = fetch_file_list_from_repo(tutorials_url)
tutorials = fetch_file_list_from_repo(
DEEPCHEM_REPO_OWNER, DEEPCHEM_REPO_NAME, TUTORIALS_PATH)

tutorial_names = [tutorial['name'] for tutorial in tutorials]

# Filter only the ipynb files
tutorials = [
tutorial for tutorial in tutorials if tutorial.endswith('.ipynb')]
return tutorials
tutorial_names = [
tutorial for tutorial in tutorial_names if tutorial.endswith('.ipynb')]
return tutorial_names


def fetch_tutorial_render_order():
"""
Downloads the CSV files containing the tutorial order from the Deepchem repository.
"""

raw_path = 'https://raw.githubusercontent.com/deepchem/deepchem/master/examples/tutorials/website-render-order/'
csv_directory = 'https://github.com/deepchem/deepchem/tree/master/examples/tutorials/website-render-order'
tutorial_order = fetch_file_list_from_repo(csv_directory)
tutorial_order_csv_data = fetch_file_list_from_repo(
DEEPCHEM_REPO_OWNER, DEEPCHEM_REPO_NAME, TUTORIAL_RENDER_ORDER_PATH)

tutorial_order_csv_names = [tutorial['name']
for tutorial in tutorial_order_csv_data]

# Filter only the csv files
tutorial_order = [
tutorial for tutorial in tutorial_order if tutorial.endswith('.csv')]
tutorial_order_csv_names = [
tutorial for tutorial in tutorial_order_csv_names if tutorial.endswith('.csv')]

for tutorial_group in tutorial_order:
response = requests.get(raw_path + tutorial_group)
with open(f"./website-render-order/{tutorial_group}", "wb") as tutorial_file:
tutorial_file.write(response.content)
for tutorial_group in tutorial_order_csv_data:
response = requests.get(tutorial_group.get('download_url'))
if response.status_code == 200:
with open(f"./website-render-order/{tutorial_group.get('name')}", "wb") as tutorial_file:
tutorial_file.write(response.content)
else:
raise Exception(
"Error fetching tutorial render order: {}".format(response.status_code))


def create_directories():
"""
Creates the required directories
"""
os.makedirs('./html-notebooks',exist_ok=True)
os.makedirs('./html-notebooks', exist_ok=True)
os.makedirs('./ipynb-notebooks', exist_ok=True)
os.makedirs('./website-render-order', exist_ok=True)


def convert_to_html(tutorials):
"""
Converts the Jupyter notebooks in the './ipynb-notebooks' directory to HTML files and stores them in the './html-notebooks' directory.
Expand All @@ -101,31 +120,41 @@ def convert_to_html(tutorials):
fromPath = "./ipynb-notebooks/"
toPath = "./html-notebooks/"

tutorialURL = 'https://raw.githubusercontent.com/deepchem/deepchem/master/examples/tutorials/'

for tutorial in tutorials:
try:
file_name_html = f'{tutorial.rsplit(".")[0]}.html'
response = requests.get(tutorialURL + tutorial)
with open(f"./ipynb-notebooks/{tutorial}", "wb") as tutorial_file:
tutorial_file_name = tutorial["name"]
tutorial_download_link = tutorial["download_url"]

if not tutorial_file_name.endswith('.ipynb'):
continue

file_name_html = f'{tutorial_file_name.rsplit(".")[0]}.html'
response = requests.get(tutorial_download_link)
with open(f"./ipynb-notebooks/{tutorial_file_name}", "wb") as tutorial_file:
tutorial_file.write(response.content)

subprocess.call(
f'jq -M "del(.metadata.widgets)" ./ipynb-notebooks/{tutorial} > ./ipynb-notebooks/fixed-{tutorial}', shell=True)
f'jq -M "del(.metadata.widgets)" ./ipynb-notebooks/{tutorial_file_name} > ./ipynb-notebooks/fixed-{tutorial_file_name}', shell=True
)
subprocess.call(
f'python -m nbconvert --to html ./ipynb-notebooks/fixed-{tutorial}', shell=True)
f'python -m nbconvert --to html ./ipynb-notebooks/fixed-{tutorial_file_name}', shell=True)
shutil.copyfile(f'{fromPath}fixed-{file_name_html}',
toPath + file_name_html)

with open('./notebooks.txt', "a") as notebook_list:
notebook_list.write(file_name_html + '\n')
except Exception as exception:
print(exception)
print(f"Could not process {tutorial}")
print(f"Could not process {tutorial_file_name}")


if __name__ == "__main__":
tutorials = fetch_tutorial_data()
create_directories()
convert_to_html(tutorials)
tutorials = fetch_file_list_from_repo(
DEEPCHEM_REPO_OWNER, DEEPCHEM_REPO_NAME, TUTORIALS_PATH)

# The script throws an AssertionError if no tutorials are fetched. This is to prevent website deployment if no tutorials are fetched.
assert len(tutorials) > 0

fetch_tutorial_render_order()
convert_to_html(tutorials)
Loading

0 comments on commit 4379f5c

Please sign in to comment.