diff --git a/.github/workflows/build_pdf_book.yml b/.github/workflows/build_pdf_book.yml new file mode 100644 index 00000000..43641804 --- /dev/null +++ b/.github/workflows/build_pdf_book.yml @@ -0,0 +1,42 @@ +name: Build latest version of PDF Book + +on: + workflow_dispatch: + repository_dispatch: + types: [rebuild-book] + +permissions: + contents: write + +jobs: + build_pdf_book: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: install requirements + run: | + cd new-website + cd utils + pip install -r requirements.txt + sudo apt-get install -y poppler-utils + sudo apt-get install -y wkhtmltopdf + + - name: fetch latest version of tutorials + run: | + sudo apt-get install jq + cd new-website + cd utils/tutorials + python3 fetch_tutorials.py + + - name: build pdf book + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: | + cd new-website + cd utils/tutorials + python3 build_pdf_book.py + + \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 96643bb4..606f3567 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,6 +24,8 @@ jobs: cd new-website cd utils pip install -r requirements.txt + sudo apt-get install -y poppler-utils + sudo apt-get install -y wkhtmltopdf - name: Test tutorial fetching and export run: | @@ -37,4 +39,10 @@ jobs: cd utils/tutorials python3 test_utils.py + - name: Test tutorials build pdf book functions + run: | + cd new-website + cd utils/tutorials + python3 test_build_pdf_book.py + \ No newline at end of file diff --git a/new-website/README.md b/new-website/README.md index 13cd2e9c..d8caf792 100644 --- a/new-website/README.md +++ b/new-website/README.md @@ -90,8 +90,10 @@ A detailed description of the working of the scripts is given below. - ### `build_pdf_book.py` - The script reads the list of notebooks from `utils/tutorials/website-render-order` and converts the HTML files (downloaded temporarily to `/utils/tutorials/html-notebooks`) to PDF files using `pdfkit` and stores them in `/utils/tutorials/storage/`. - - The script then merged these PDFs and creates the file `merged.pdf`. + - The script then merges these PDFs and creates the file `merged.pdf`. + - The `merged.pdf` file is then uploaded to the S3 bucket. - Please note, pdfunite package is required to be installed for merging. `apt install poppler-utils` + ## Deployment @@ -106,11 +108,21 @@ A detailed description of the working of the scripts is given below. ## Workflow script -- The `deploy_gh_pages.yml` workflow script in `.github/workflows` is triggered on updates to the main branch. -- The workflow runs a single job comprising of 3 steps - - Fetch version data: This step fetches the latest deepchem release version from the github [api endpoint](https://api.github.com/repos/deepchem/deepchem/releases) and updates the terminal commands in `deepchem/data/home/terminal-commands.json` - - Install and build: This step checks out the repository, installs the required dependencies using npm i, runs the linting process with npm run lint, and generates the static website with npm run export. - - Deploy: This step deploys the website to the gh-pages branch using the [JamesIves/github-pages-deploy-action](https://github.com/JamesIves/github-pages-deploy-action). The website files are copied from the deepchem/out directory, and any files listed in the clean-exclude parameter are excluded from the cleaning process. +- ### `deploy_gh_pages.yml` + + - The `deploy_gh_pages.yml` workflow script in `.github/workflows` is triggered on updates to the main branch. + - The workflow runs a single job comprising of 3 steps + - Fetch version data: This step fetches the latest deepchem release version from the github [api endpoint](https://api.github.com/repos/deepchem/deepchem/releases) and updates the terminal commands in `deepchem/data/home/terminal-commands.json` + - Install and build: This step checks out the repository, installs the required dependencies using npm i, runs the linting process with npm run lint, and generates the static website with npm run export. + - Deploy: This step deploys the website to the gh-pages branch using the [JamesIves/github-pages-deploy-action](https://github.com/JamesIves/github-pages-deploy-action). The website files are copied from the deepchem/out directory, and any files listed in the clean-exclude parameter are excluded from the cleaning process. + + - ### `build_pdf_book.yml` + + - The `build_pdf_book.yml` workflow script in `.github/workflows` is triggered on updates to the `deepchem/examples/tutorials` directory in `deepchem` repository. + - The workflow runs a single job comprising of 3 steps + - Install requirements: This step installs the dependencies specified in the `requirements.txt` file in `new-website/utils`. It also installs poppler-utils and wkhtmltopdf packages. + - Fetch latest version of tutorials: It installs the jq package and then runs the `fetch_tutorials.py` script. + - Build pdf book: This step runs the `build_pdf_book.py` script. ## Workflow overview diff --git a/new-website/utils/requirements.txt b/new-website/utils/requirements.txt index 299b57f7..40024653 100644 Binary files a/new-website/utils/requirements.txt and b/new-website/utils/requirements.txt differ diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py index e289939f..dc41092f 100644 --- a/new-website/utils/tutorials/build_pdf_book.py +++ b/new-website/utils/tutorials/build_pdf_book.py @@ -5,6 +5,7 @@ - pdfunite - pdfkit - mdpdf + - boto3 Example Usage: - Run the script "fetch_tutorials.py" // It will fetch all the tutorials. @@ -23,42 +24,131 @@ import pdfkit from utils import numeric_sorter from typing import List +import signal +import logging +import boto3 +from botocore.exceptions import ClientError +INFO_PATH = "website-render-order/" +DATA_PATH = "html-notebooks/" +PDF_PATH = "storage/" -INFO_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/website-render-order/" -DATA_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/html-notebooks/" -PDF_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/storage/" -files = os.listdir(INFO_PATH) -files = sorted(files) +def timeout_handler(signum, frame): + """ + For terminating a function call. + + Raises + ------ + Exception + If the function is called. -files_list = numeric_sorter(files) + """ + raise Exception("Coversion Timed out") + -def html_to_pdf(): +def html_to_pdf(data_path=DATA_PATH, info_path=INFO_PATH, pdf_path=PDF_PATH): """ Converts HTML files to PDF files. + Parameters + ---------- + data_path: str + Path of the html files to be converted. Defaults to DATA_PATH. + info_path: str + Path for Tutorial Render Order. Defaults to INFO_PATH. + pdf_path: str + Path where the converted pdf files will be stored. Defaults to PDF_PATH. + Raises ------ ProtocolUnknownError If it faces some unknown kind of graphic. + IOError + If the file specified in the website-render-order is not present in /html-notebooks. + Exception + If the Conversion takes longer than 60 seconds. """ + files = os.listdir(info_path) + files = sorted(files) + files_list = numeric_sorter(files) + for i in files_list: - chapter = pd.read_csv(INFO_PATH + "-".join(i)) + chapter = pd.read_csv(info_path + "-".join(i)) for j in chapter["File Name"]: - print(i, j) - pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf") + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(60) + try: + print(i, j) + pdfkit.from_file(data_path + j[:-5] + "html", pdf_path + j[:-5] + "pdf") + print("Conversion Successful") + except Exception as e: + print("Exception occured: {}".format(e)) + + +def upload_file(file_name, bucket, object_name=None): + """ + Upload a file to an S3 bucket + + Parameters + ---------- + file_name: str + Path of the File to be uploaded. + bucket: str + Name of the Bucket to upload the file to. + object_name: str + S3 object name. If not specified then file_name is used. + + Returns + ------- + boolean: + True if file was uploaded, else False + + """ + + # If S3 object_name was not specified, use file_name + if object_name is None: + object_name = os.path.basename(file_name) + + # Upload the file + s3_client = boto3.client('s3') + try: + response = s3_client.upload_file(file_name, bucket, object_name) + except ClientError as e: + logging.error(e) + return False + return True + + +def merge_pdf(info_path=INFO_PATH, pdf_path=PDF_PATH): + """ + Merges the compiled PDFs. + + Parameters + ---------- + info_path: str + Path for Tutorial Render Order. Defaults to INFO_PATH. + pdf_path: str + Path where the merged pdf file will be stored. Defaults to PDF_PATH. + + """ + files = os.listdir(info_path) + files = sorted(files) + + files_list = numeric_sorter(files) -def merge_pdf(): - """Merges the compiled PDFs.""" command = "pdfunite " for i in files_list: - chapter = pd.read_csv(INFO_PATH + "-".join(i)) + print(i) + chapter = pd.read_csv(info_path + "-".join(i)) for j in chapter["File Name"]: - print(i, j) - command = command + PDF_PATH + j[:-5] + "pdf " - os.system(command + "merged.pdf") + file_path = pdf_path + j[:-5] + "pdf" + if (os.path.exists(file_path)): + print(i, j) + command = command + pdf_path + j[:-5] + "pdf " + os.system(command + f"{pdf_path}merged.pdf") + def merge_pdf_pages(a: List[str]): """Merges the PDFs. @@ -73,7 +163,8 @@ def merge_pdf_pages(a: List[str]): command = "pdfunite " for i in a: command = command + i + ' ' - os.system(command + "storage/merged.pdf") + os.system(command + "storage/full_pdf.pdf") + def compile_information_pages(): """Converts the Acknowledgent page and content page from @@ -85,9 +176,12 @@ def compile_information_pages(): pdfkit.from_file('contents.html', 'storage/contents.pdf') pdfkit.from_file('acknowledgement.html', 'storage/acknowledgement.pdf') + if __name__ == "__main__": os.system("mkdir " + PDF_PATH) html_to_pdf() merge_pdf() compile_information_pages() - merge_pdf_pages(['storage/title.pdf', 'storage/acknowledgement.pdf', 'storage/contents.pdf', 'storage/full_pdf.pdf']) + merge_pdf_pages(['storage/title.pdf', 'storage/acknowledgement.pdf', 'storage/contents.pdf', 'storage/merged.pdf']) + upload_file('storage/full_pdf.pdf', 'deepchemtutorials', 'TutorialsBook.pdf') + \ No newline at end of file diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/About_nODE_Using_Torchdiffeq_in_Deepchem.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/About_nODE_Using_Torchdiffeq_in_Deepchem.html new file mode 100644 index 00000000..0cb6d8be --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/About_nODE_Using_Torchdiffeq_in_Deepchem.html @@ -0,0 +1,8084 @@ + + + + + +fixed-About_nODE_Using_Torchdiffeq_in_Deepchem + + + + + + + + + + + + +
+
+ +
+
+ +
+
+ +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+
+ +
+
+ +
+ + +
+
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_Model_Training.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_Model_Training.html new file mode 100644 index 00000000..23a19dfb --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_Model_Training.html @@ -0,0 +1,7835 @@ + + + + + +fixed-Advanced_Model_Training + + + + + + + + + + + + +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+
+ +
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_model_training_using_hyperopt.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_model_training_using_hyperopt.html new file mode 100644 index 00000000..dd47580c --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/Advanced_model_training_using_hyperopt.html @@ -0,0 +1,7973 @@ + + + + + +fixed-Advanced_model_training_using_hyperopt + + + + + + + + + + + + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/An_Introduction_To_MoleculeNet.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/An_Introduction_To_MoleculeNet.html new file mode 100644 index 00000000..d5ab40fc --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/An_Introduction_To_MoleculeNet.html @@ -0,0 +1,8207 @@ + + + + + +fixed-An_Introduction_To_MoleculeNet + + + + + + + + + + + + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+ + +
+ + +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+ +
+ + +
+ + +
+
+ +
+
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/Atomic_Contributions_for_Molecules.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/Atomic_Contributions_for_Molecules.html new file mode 100644 index 00000000..c5764358 --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/Atomic_Contributions_for_Molecules.html @@ -0,0 +1,8924 @@ + + + + + +fixed-Atomic_Contributions_for_Molecules + + + + + + + + + + + + +
+
+ +
+ + +
+ + +
+
+ +
+ +
+
+ +
+ +
+ + +
+
+ +
+ + +
+
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+
+ +
+
+ +
+ +
+ +
+
+ +
+
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_html-notebooks/Conditional_Generative_Adversarial_Networks.html b/new-website/utils/tutorials/mocks/mock_html-notebooks/Conditional_Generative_Adversarial_Networks.html new file mode 100644 index 00000000..ae80f332 --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_html-notebooks/Conditional_Generative_Adversarial_Networks.html @@ -0,0 +1,7836 @@ + + + + + +fixed-Conditional_Generative_Adversarial_Networks + + + + + + + + + + + + +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+
+ + diff --git a/new-website/utils/tutorials/mocks/mock_storage/About_nODE_Using_Torchdiffeq_in_Deepchem.pdf b/new-website/utils/tutorials/mocks/mock_storage/About_nODE_Using_Torchdiffeq_in_Deepchem.pdf new file mode 100644 index 00000000..8ec4a0cd Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/About_nODE_Using_Torchdiffeq_in_Deepchem.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_storage/Advanced_Model_Training.pdf b/new-website/utils/tutorials/mocks/mock_storage/Advanced_Model_Training.pdf new file mode 100644 index 00000000..a45aabc7 Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/Advanced_Model_Training.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_storage/Advanced_model_training_using_hyperopt.pdf b/new-website/utils/tutorials/mocks/mock_storage/Advanced_model_training_using_hyperopt.pdf new file mode 100644 index 00000000..4eaed0c9 Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/Advanced_model_training_using_hyperopt.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_storage/An_Introduction_To_MoleculeNet.pdf b/new-website/utils/tutorials/mocks/mock_storage/An_Introduction_To_MoleculeNet.pdf new file mode 100644 index 00000000..5193a066 Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/An_Introduction_To_MoleculeNet.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_storage/Atomic_Contributions_for_Molecules.pdf b/new-website/utils/tutorials/mocks/mock_storage/Atomic_Contributions_for_Molecules.pdf new file mode 100644 index 00000000..a2dff6e1 Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/Atomic_Contributions_for_Molecules.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_storage/Conditional_Generative_Adversarial_Networks.pdf b/new-website/utils/tutorials/mocks/mock_storage/Conditional_Generative_Adversarial_Networks.pdf new file mode 100644 index 00000000..da674275 Binary files /dev/null and b/new-website/utils/tutorials/mocks/mock_storage/Conditional_Generative_Adversarial_Networks.pdf differ diff --git a/new-website/utils/tutorials/mocks/mock_website_render_order/1-mock_website_render_order.csv b/new-website/utils/tutorials/mocks/mock_website_render_order/1-mock_website_render_order.csv new file mode 100644 index 00000000..b37c16c3 --- /dev/null +++ b/new-website/utils/tutorials/mocks/mock_website_render_order/1-mock_website_render_order.csv @@ -0,0 +1,7 @@ +Title,File Name +About_nODE_Using_Torchdiffeq_in_Deepchem,About_nODE_Using_Torchdiffeq_in_Deepchem.ipynb +Advanced model training using Hyperopt,Advanced_model_training_using_hyperopt.ipynb +Advanced Model Training,Advanced_Model_Training.ipynb +An Introduction to MoleculeNet,An_Introduction_To_MoleculeNet.ipynb +Atomic Contributions for Molecules,Atomic_Contributions_for_Molecules.ipynb +Conditional Generative Adversarial Networks,Conditional_Generative_Adversarial_Networks.ipynb \ No newline at end of file diff --git a/new-website/utils/tutorials/test_build_pdf_book.py b/new-website/utils/tutorials/test_build_pdf_book.py new file mode 100644 index 00000000..154bf9cd --- /dev/null +++ b/new-website/utils/tutorials/test_build_pdf_book.py @@ -0,0 +1,81 @@ +""" +Unit tests for the functions in build_pdf_book.py modules. + +These tests verify the correctness of the following functions: +- build_pdf_book.html_to_pdf(): Converts HTML files to PDF files. +- build_pdf_book.merge_pdf(): Merges a list of PDF files into a single PDF file + +Example usage: +$ python test_build_pdf_book.py + +""" + +import unittest +from unittest.mock import patch +import build_pdf_book +import os +import json + + +class TestConvertHTMLToPDF(unittest.TestCase): + """ + Test cases for the html_to_pdf function. + """ + + def test_convert_html_to_pdf(self): + """ + This function tests whether the html_to_pdf function correctly converts + HTML files to PDF files and saves them in the appropriate directory. + + """ + INFO_PATH = "mocks/mock_website_render_order/" + DATA_PATH = "mocks/mock_html-notebooks/" + PDF_PATH = "mocks/mock_temp_storage/" + + with open('./mocks/github-response-mock.json', 'rb') as f: + tutorials = json.load(f) + + try: + os.makedirs('mocks/mock_temp_storage/') + + except Exception as exception: + print("Directory already exist, or could not create directory. ") + print(exception) + + build_pdf_book.html_to_pdf(DATA_PATH, INFO_PATH, PDF_PATH) + + for tutorial in tutorials: + tutorial_file_name = tutorial["name"] + file_name_pdf = f'{tutorial_file_name.rsplit(".")[0]}.pdf' + self.assertTrue(os.path.isfile( + f"mocks/mock_temp_storage/{file_name_pdf}")) + os.remove(f"mocks/mock_temp_storage/{file_name_pdf}") + + os.rmdir("mocks/mock_temp_storage") + + +class TestMergePDF(unittest.TestCase): + """ + Test cases for the merge_pdf function. + """ + + def test_convert_html_to_pdf(self): + """ + This function tests whether the merge_pdf function correctly merges + a list of PDF files into a single PDF file and saves it in the specified directory. + + """ + + INFO_PATH = "mocks/mock_website_render_order/" + PDF_PATH = "mocks/mock_storage/" + + build_pdf_book.merge_pdf(INFO_PATH, PDF_PATH) + + self.assertTrue(os.path.isfile( + f"{PDF_PATH}merged.pdf")) + os.remove(f"{PDF_PATH}merged.pdf") + + +if __name__ == "__main__": + unittest.main() + \ No newline at end of file