diff --git a/new-website/.gitignore b/new-website/.gitignore index d550aaa5..bc76c665 100644 --- a/new-website/.gitignore +++ b/new-website/.gitignore @@ -2,5 +2,6 @@ /docs /utils/tutorials/html-notebooks /utils/tutorials/ipynb-notebooks +/utils/tutorials/storage /utils/tutorials/website-render-order /utils/tutorials/notebooks.txt \ No newline at end of file diff --git a/new-website/README.md b/new-website/README.md index 01e6e236..13cd2e9c 100644 --- a/new-website/README.md +++ b/new-website/README.md @@ -80,13 +80,18 @@ A detailed description of the working of the scripts is given below. - The CSV file itself contains the Titles and File names of the tutorials in the order in which they should be read. - ### `export_tutorials.py` + - This script reads the list of notebooks from `/utils/tutorials/notebooks.txt` and parses the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) using `BeautifulSoup`. - The script then creates a react component for each tutorial and exports it to the `/deepchem/pages/tutorials` directory. - The script also creates a json data file for each tutorial and exports it to the `/deepchem/data/tutorials` directory. - The template for the react components is stored in `utils/tutorials/tutorial_component_template.py`. Please note, that any files required by scripts are generated by the scripts themselves and are not stored in the repository. +- ### `build_pdf_book.py` + - The script reads the list of notebooks from `utils/tutorials/website-render-order` and converts the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) to PDF files using `pdfkit` and stores them in `/utils/tutorials/storage/`. + - The script then merged these PDFs and creates the file `merged.pdf`. + - Please note, pdfunite package is required to be installed for merging. `apt install poppler-utils` ## Deployment diff --git a/new-website/utils/requirements.txt b/new-website/utils/requirements.txt index fdb8b98b..0fed149c 100644 Binary files a/new-website/utils/requirements.txt and b/new-website/utils/requirements.txt differ diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py new file mode 100644 index 00000000..aa9b3c6d --- /dev/null +++ b/new-website/utils/tutorials/build_pdf_book.py @@ -0,0 +1,65 @@ +""" +This script is used to build the pdf book from DeepChem Tutorials. + +Requirements: + - pdfunite + - pdfkit + +Example Usage: + - Run the script "fetch_tutorials.py" // It will fetch all the tutorials. + - Run the script "build_pdf_book.py" + - It may cause error, mostly due to the type of graphic used in some tutorials + which donot compile properly, remove them from the website-render-order or fix + them, and run this script again. + +NOTE: + - NO FILES OR DIRECTORIES HAVE TO BE CREATED MANUALLY. The script will create the required directories and files. + - Run scripts in the Top-Level folder. + +""" +import os +import pandas as pd +import pdfkit +from utils import numeric_sorter + + +INFO_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/website-render-order/" +DATA_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/html-notebooks/" +PDF_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/storage/" + +files = os.listdir(INFO_PATH) +files = sorted(files) + +files_list = numeric_sorter(files) + +def html_to_pdf(): + """ + Converts HTML files to PDF files. + + Raises + ------ + ProtocolUnknownError + If it faces some unknown kind of graphic. + + """ + for i in files_list: + chapter = pd.read_csv(INFO_PATH + "-".join(i)) + for j in chapter["File Name"]: + print(i, j) + pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf") + +def merge_pdf(): + """Merges the compiled PDFs.""" + command = "pdfunite " + for i in files_list: + chapter = pd.read_csv(INFO_PATH + "-".join(i)) + for j in chapter["File Name"]: + print(i, j) + command = command + PDF_PATH + j[:-5] + "pdf " + os.system(command + "merged.pdf") + + +if __name__ == "__main__": + os.system("mkdir " + PDF_PATH) + html_to_pdf() + merge_pdf() diff --git a/new-website/utils/tutorials/utils.py b/new-website/utils/tutorials/utils.py index aa5c0478..57d443f1 100644 --- a/new-website/utils/tutorials/utils.py +++ b/new-website/utils/tutorials/utils.py @@ -8,6 +8,32 @@ import re +def numeric_sorter(s): + """ + Sorts the tutorials according to their serial number. + + Parameters + ---------- + s: List[str] + The List to be sorted. + + Returns + ------- + s_sorted: List[List[str]] + The sorted and Broken into parts list. + + """ + s_splitted_list = [] + s_sorted = [] + for i in s: + s_splitted_list.append(i.split("-")) + for i in range(len(s_splitted_list)+1): + for j in s_splitted_list: + if i == int(j[0]): + s_sorted.append(j) + return s_sorted + + def to_valid_identifier(s): """ Converts a given string into a valid identifier.