From a9728e1b634b7468724c3a122645f7ed3c716458 Mon Sep 17 00:00:00 2001 From: Rakshit Kumar Singh Date: Sun, 3 Dec 2023 18:49:44 +0530 Subject: [PATCH 1/4] Create devcontainer.json --- .devcontainer/devcontainer.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..0e56f819 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,3 @@ +{ + "image": "mcr.microsoft.com/devcontainers/base:ubuntu", +} From e69ca4ba601c1c03c040815cdf424ff3f1b7c114 Mon Sep 17 00:00:00 2001 From: GreatRSingh Date: Mon, 4 Dec 2023 01:49:00 +0530 Subject: [PATCH 2/4] Revert "Create devcontainer.json" This reverts commit a9728e1b634b7468724c3a122645f7ed3c716458. --- .devcontainer/devcontainer.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 0e56f819..00000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "image": "mcr.microsoft.com/devcontainers/base:ubuntu", -} From 8a7585c19c2f6218a1786f75f8d10fca1a483a5b Mon Sep 17 00:00:00 2001 From: Rakshit Kumar Singh Date: Fri, 8 Dec 2023 09:14:59 +0000 Subject: [PATCH 3/4] Building PDFs --- new-website/utils/tutorials/build_pdf_book.py | 64 +++++++++++++++++++ new-website/utils/tutorials/utils.py | 26 ++++++++ 2 files changed, 90 insertions(+) create mode 100644 new-website/utils/tutorials/build_pdf_book.py diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py new file mode 100644 index 00000000..28bdf0bd --- /dev/null +++ b/new-website/utils/tutorials/build_pdf_book.py @@ -0,0 +1,64 @@ +""" +This script is used to build the pdf book from DeepChem Tutorials. + +Requirements: + - pdfunite + +Example Usage: + - Run the script "fetch_tutorials.py" // It will fetch all the tutorials. + - Run the script "build_pdf_book.py" + - It may cause error, mostly due to the type of graphic used in some tutorials + which donot compile properly, remove them from the website-render-order or fix + them, and run this script again. + +NOTE: + - NO FILES OR DIRECTORIES HAVE TO BE CREATED MANUALLY. The script will create the required directories and files. + - Run scripts in the Top-Level folder. + +""" +import os +import pandas as pd +import pdfkit +from utils import numeric_sorter + + +INFO_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/website-render-order/" +DATA_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/html-notebooks/" +PDF_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/storage/" + +files = os.listdir(INFO_PATH) +files = sorted(files) + +files_list = numeric_sorter(files) + +def html_to_pdf(): + """ + Converts HTML files to PDF files. + + Raises + ------ + ProtocolUnknownError + If it faces some unknown kind of graphic. + + """ + for i in files_list: + chapter = pd.read_csv(INFO_PATH + "-".join(i)) + for j in chapter["File Name"]: + print(i, j) + pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf") + +def merge_pdf(): + """Merges the compiled PDFs.""" + command = "pdfunite " + for i in files_list: + chapter = pd.read_csv(INFO_PATH + "-".join(i)) + for j in chapter["File Name"]: + print(i, j) + command = command + PDF_PATH + j[:-5] + "pdf " + os.system(command + "merged.pdf") + + +if __name__ == "__main__": + os.system("mkdir " + PDF_PATH) + html_to_pdf() + merge_pdf() diff --git a/new-website/utils/tutorials/utils.py b/new-website/utils/tutorials/utils.py index aa5c0478..57d443f1 100644 --- a/new-website/utils/tutorials/utils.py +++ b/new-website/utils/tutorials/utils.py @@ -8,6 +8,32 @@ import re +def numeric_sorter(s): + """ + Sorts the tutorials according to their serial number. + + Parameters + ---------- + s: List[str] + The List to be sorted. + + Returns + ------- + s_sorted: List[List[str]] + The sorted and Broken into parts list. + + """ + s_splitted_list = [] + s_sorted = [] + for i in s: + s_splitted_list.append(i.split("-")) + for i in range(len(s_splitted_list)+1): + for j in s_splitted_list: + if i == int(j[0]): + s_sorted.append(j) + return s_sorted + + def to_valid_identifier(s): """ Converts a given string into a valid identifier. From 9b8cb56b43ee3260695a35513efe0a3685506053 Mon Sep 17 00:00:00 2001 From: Rakshit Kumar Singh Date: Sat, 9 Dec 2023 10:08:55 +0000 Subject: [PATCH 4/4] Docs --- new-website/.gitignore | 1 + new-website/README.md | 5 +++++ new-website/utils/requirements.txt | Bin 740 -> 770 bytes new-website/utils/tutorials/build_pdf_book.py | 1 + 4 files changed, 7 insertions(+) diff --git a/new-website/.gitignore b/new-website/.gitignore index d550aaa5..bc76c665 100644 --- a/new-website/.gitignore +++ b/new-website/.gitignore @@ -2,5 +2,6 @@ /docs /utils/tutorials/html-notebooks /utils/tutorials/ipynb-notebooks +/utils/tutorials/storage /utils/tutorials/website-render-order /utils/tutorials/notebooks.txt \ No newline at end of file diff --git a/new-website/README.md b/new-website/README.md index 01e6e236..13cd2e9c 100644 --- a/new-website/README.md +++ b/new-website/README.md @@ -80,13 +80,18 @@ A detailed description of the working of the scripts is given below. - The CSV file itself contains the Titles and File names of the tutorials in the order in which they should be read. - ### `export_tutorials.py` + - This script reads the list of notebooks from `/utils/tutorials/notebooks.txt` and parses the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) using `BeautifulSoup`. - The script then creates a react component for each tutorial and exports it to the `/deepchem/pages/tutorials` directory. - The script also creates a json data file for each tutorial and exports it to the `/deepchem/data/tutorials` directory. - The template for the react components is stored in `utils/tutorials/tutorial_component_template.py`. Please note, that any files required by scripts are generated by the scripts themselves and are not stored in the repository. +- ### `build_pdf_book.py` + - The script reads the list of notebooks from `utils/tutorials/website-render-order` and converts the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) to PDF files using `pdfkit` and stores them in `/utils/tutorials/storage/`. + - The script then merged these PDFs and creates the file `merged.pdf`. + - Please note, pdfunite package is required to be installed for merging. `apt install poppler-utils` ## Deployment diff --git a/new-website/utils/requirements.txt b/new-website/utils/requirements.txt index fdb8b98b436c78222be1dd4807aec28cb58eace1..0fed149c707087d4271f1fc90ee3b5bb5e468536 100644 GIT binary patch delta 38 ocmaFD+Qhcu36mTz0~bR9LkdG0LpDPuLkWW|5E?S*F&Ka`0I^#KCIA2c delta 7 OcmZo-d&0Wm2@?Pe8v_Uc diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py index 28bdf0bd..aa9b3c6d 100644 --- a/new-website/utils/tutorials/build_pdf_book.py +++ b/new-website/utils/tutorials/build_pdf_book.py @@ -3,6 +3,7 @@ Requirements: - pdfunite + - pdfkit Example Usage: - Run the script "fetch_tutorials.py" // It will fetch all the tutorials.