From a9728e1b634b7468724c3a122645f7ed3c716458 Mon Sep 17 00:00:00 2001
From: Rakshit Kumar Singh <rakshitsingh421@gmail.com>
Date: Sun, 3 Dec 2023 18:49:44 +0530
Subject: [PATCH 1/4] Create devcontainer.json

---
 .devcontainer/devcontainer.json | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .devcontainer/devcontainer.json

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000..0e56f819
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,3 @@
+{
+  "image": "mcr.microsoft.com/devcontainers/base:ubuntu",
+}

From e69ca4ba601c1c03c040815cdf424ff3f1b7c114 Mon Sep 17 00:00:00 2001
From: GreatRSingh <rakshitsingh421@gmail.com>
Date: Mon, 4 Dec 2023 01:49:00 +0530
Subject: [PATCH 2/4] Revert "Create devcontainer.json"

This reverts commit a9728e1b634b7468724c3a122645f7ed3c716458.
---
 .devcontainer/devcontainer.json | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 .devcontainer/devcontainer.json

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
deleted file mode 100644
index 0e56f819..00000000
--- a/.devcontainer/devcontainer.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "image": "mcr.microsoft.com/devcontainers/base:ubuntu",
-}

From 8a7585c19c2f6218a1786f75f8d10fca1a483a5b Mon Sep 17 00:00:00 2001
From: Rakshit Kumar Singh <rakshitsingh421@gmail.com>
Date: Fri, 8 Dec 2023 09:14:59 +0000
Subject: [PATCH 3/4] Building PDFs

---
 new-website/utils/tutorials/build_pdf_book.py | 64 +++++++++++++++++++
 new-website/utils/tutorials/utils.py          | 26 ++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 new-website/utils/tutorials/build_pdf_book.py

diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py
new file mode 100644
index 00000000..28bdf0bd
--- /dev/null
+++ b/new-website/utils/tutorials/build_pdf_book.py
@@ -0,0 +1,64 @@
+"""
+This script is used to build the pdf book from DeepChem Tutorials.
+
+Requirements:
+    - pdfunite
+
+Example Usage:
+    - Run the script "fetch_tutorials.py" // It will fetch all the tutorials.
+    - Run the script "build_pdf_book.py"
+    - It may cause error, mostly due to the type of graphic used in some tutorials
+    which donot compile properly, remove them from the website-render-order or fix
+    them, and run this script again.
+    
+NOTE:
+    - NO FILES OR DIRECTORIES HAVE TO BE CREATED MANUALLY. The script will create the required directories and files.
+    - Run scripts in the Top-Level folder.
+
+"""
+import os
+import pandas as pd
+import pdfkit
+from utils import numeric_sorter
+
+
+INFO_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/website-render-order/"
+DATA_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/html-notebooks/"
+PDF_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/storage/"
+
+files = os.listdir(INFO_PATH)
+files = sorted(files)
+
+files_list = numeric_sorter(files)
+
+def html_to_pdf():
+    """
+    Converts HTML files to PDF files.
+
+    Raises
+    ------
+    ProtocolUnknownError
+        If it faces some unknown kind of graphic.
+
+    """
+    for i in files_list:
+        chapter = pd.read_csv(INFO_PATH + "-".join(i))
+        for j in chapter["File Name"]:
+            print(i, j)
+            pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf")
+
+def merge_pdf():
+    """Merges the compiled PDFs."""
+    command = "pdfunite "
+    for i in files_list:
+        chapter = pd.read_csv(INFO_PATH + "-".join(i))
+        for j in chapter["File Name"]:
+            print(i, j)
+            command = command + PDF_PATH + j[:-5] + "pdf "
+    os.system(command + "merged.pdf")
+
+
+if __name__ == "__main__":
+    os.system("mkdir " + PDF_PATH)
+    html_to_pdf()
+    merge_pdf()
diff --git a/new-website/utils/tutorials/utils.py b/new-website/utils/tutorials/utils.py
index aa5c0478..57d443f1 100644
--- a/new-website/utils/tutorials/utils.py
+++ b/new-website/utils/tutorials/utils.py
@@ -8,6 +8,32 @@
 import re
 
 
+def numeric_sorter(s):
+    """
+    Sorts the tutorials according to their serial number.
+
+    Parameters
+    ----------
+    s: List[str]
+        The List to be sorted.
+
+    Returns
+    -------
+    s_sorted: List[List[str]]
+        The sorted and Broken into parts list.
+
+    """
+    s_splitted_list = []
+    s_sorted = []
+    for i in s:
+        s_splitted_list.append(i.split("-"))
+    for i in range(len(s_splitted_list)+1):
+        for j in s_splitted_list:
+            if i == int(j[0]):
+                s_sorted.append(j)
+    return s_sorted
+
+
 def to_valid_identifier(s):
     """
     Converts a given string into a valid identifier.

From 9b8cb56b43ee3260695a35513efe0a3685506053 Mon Sep 17 00:00:00 2001
From: Rakshit Kumar Singh <rakshitsingh421@gmail.com>
Date: Sat, 9 Dec 2023 10:08:55 +0000
Subject: [PATCH 4/4] Docs

---
 new-website/.gitignore                        |   1 +
 new-website/README.md                         |   5 +++++
 new-website/utils/requirements.txt            | Bin 740 -> 770 bytes
 new-website/utils/tutorials/build_pdf_book.py |   1 +
 4 files changed, 7 insertions(+)

diff --git a/new-website/.gitignore b/new-website/.gitignore
index d550aaa5..bc76c665 100644
--- a/new-website/.gitignore
+++ b/new-website/.gitignore
@@ -2,5 +2,6 @@
 /docs
 /utils/tutorials/html-notebooks
 /utils/tutorials/ipynb-notebooks
+/utils/tutorials/storage
 /utils/tutorials/website-render-order
 /utils/tutorials/notebooks.txt
\ No newline at end of file
diff --git a/new-website/README.md b/new-website/README.md
index 01e6e236..13cd2e9c 100644
--- a/new-website/README.md
+++ b/new-website/README.md
@@ -80,13 +80,18 @@ A detailed description of the working of the scripts is given below.
     - The CSV file itself contains the Titles and File names of the tutorials in the order in which they should be read.
 
 - ### `export_tutorials.py`
+
   - This script reads the list of notebooks from `/utils/tutorials/notebooks.txt` and parses the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) using `BeautifulSoup`.
   - The script then creates a react component for each tutorial and exports it to the `/deepchem/pages/tutorials` directory.
   - The script also creates a json data file for each tutorial and exports it to the `/deepchem/data/tutorials` directory.
   - The template for the react components is stored in `utils/tutorials/tutorial_component_template.py`.
     Please note, that any files required by scripts are generated by the scripts themselves and are not stored in the repository.
 
+- ### `build_pdf_book.py`
 
+  - The script reads the list of notebooks from `utils/tutorials/website-render-order` and converts the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) to PDF files using `pdfkit` and stores them in `/utils/tutorials/storage/`.
+  - The script then merged these PDFs and creates the file `merged.pdf`.
+    - Please note, pdfunite package is required to be installed for merging. `apt install poppler-utils`
 
 
 ## Deployment
diff --git a/new-website/utils/requirements.txt b/new-website/utils/requirements.txt
index fdb8b98b436c78222be1dd4807aec28cb58eace1..0fed149c707087d4271f1fc90ee3b5bb5e468536 100644
GIT binary patch
delta 38
ocmaFD+Qhcu36mTz0~bR9LkdG0LpDPuLkWW|5E?S*F&Ka`0I^#KCIA2c

delta 7
OcmZo-d&0Wm2@?Pe8v_Uc

diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py
index 28bdf0bd..aa9b3c6d 100644
--- a/new-website/utils/tutorials/build_pdf_book.py
+++ b/new-website/utils/tutorials/build_pdf_book.py
@@ -3,6 +3,7 @@
 
 Requirements:
     - pdfunite
+    - pdfkit
 
 Example Usage:
     - Run the script "fetch_tutorials.py" // It will fetch all the tutorials.