deepchem · Cannon07 · Apr 29, 2024 · Apr 29, 2024 · May 1, 2024 · May 2, 2024
diff --git a/.github/workflows/build_pdf_book.yml b/.github/workflows/build_pdf_book.yml
@@ -0,0 +1,42 @@
+name: Build latest version of PDF Book
+
+on:
+  workflow_dispatch:
+  repository_dispatch:
+    types: [rebuild-book]
+
+permissions:
+  contents: write
+
+jobs:
+  build_pdf_book:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: install requirements
+        run: |
+          cd new-website
+          cd utils
+          pip install -r requirements.txt
+          sudo apt-get install -y poppler-utils
+          sudo apt-get install -y wkhtmltopdf
+
+      - name: fetch latest version of tutorials
+        run: |
+          sudo apt-get install jq
+          cd new-website
+          cd utils/tutorials
+          python3 fetch_tutorials.py
+
+      - name: build pdf book
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          cd new-website
+          cd utils/tutorials
+          python3 build_pdf_book.py
+
+
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -24,6 +24,8 @@ jobs:
           cd new-website
           cd utils
           pip install -r requirements.txt
+          sudo apt-get install -y poppler-utils
+          sudo apt-get install -y wkhtmltopdf
 
       - name: Test tutorial fetching and export
         run: |
@@ -37,4 +39,10 @@ jobs:
           cd utils/tutorials
           python3 test_utils.py
 
+      - name: Test tutorials build pdf book functions
+        run: |
+          cd new-website
+          cd utils/tutorials
+          python3 test_build_pdf_book.py
+
 
diff --git a/new-website/utils/requirements.txt b/new-website/utils/requirements.txt
diff --git a/new-website/utils/tutorials/build_pdf_book.py b/new-website/utils/tutorials/build_pdf_book.py
@@ -5,6 +5,7 @@
     - pdfunite
     - pdfkit
     - mdpdf
+    - boto3
 
 Example Usage:
     - Run the script "fetch_tutorials.py" // It will fetch all the tutorials.
@@ -23,42 +24,131 @@
 import pdfkit
 from utils import numeric_sorter
 from typing import List
+import signal
+import logging
+import boto3
+from botocore.exceptions import ClientError
 
+INFO_PATH = "website-render-order/"
+DATA_PATH = "html-notebooks/"
+PDF_PATH = "storage/"
 
-INFO_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/website-render-order/"
-DATA_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/html-notebooks/"
-PDF_PATH = "/workspaces/deepchem.github.io/new-website/utils/tutorials/storage/"
 
-files = os.listdir(INFO_PATH)
-files = sorted(files)
+def timeout_handler(signum, frame):
+    """
+    For terminating a function call.
+
+    Raises
+    ------
+    Exception
+        If the function is called.
 
-files_list = numeric_sorter(files)
+    """
+    raise Exception("Coversion Timed out")
+
 
-def html_to_pdf():
+def html_to_pdf(data_path=DATA_PATH, info_path=INFO_PATH, pdf_path=PDF_PATH):
     """
     Converts HTML files to PDF files.
 
+    Parameters
+    ----------
+    data_path: str
+        Path of the html files to be converted. Defaults to DATA_PATH.
+    info_path: str
+        Path for Tutorial Render Order. Defaults to INFO_PATH.
+    pdf_path: str
+        Path where the converted pdf files will be stored. Defaults to PDF_PATH.
+
     Raises
     ------
     ProtocolUnknownError
         If it faces some unknown kind of graphic.
+    IOError
+        If the file specified in the website-render-order is not present in /html-notebooks.
+    Exception
+        If the Conversion takes longer than 60 seconds.
 
     """
+    files = os.listdir(info_path)
+    files = sorted(files)
+    files_list = numeric_sorter(files)
+
     for i in files_list:
-        chapter = pd.read_csv(INFO_PATH + "-".join(i))
+        chapter = pd.read_csv(info_path + "-".join(i))
         for j in chapter["File Name"]:
-            print(i, j)
-            pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf")
+            signal.signal(signal.SIGALRM, timeout_handler)
+            signal.alarm(60)
+            try:
+                print(i, j)
+                pdfkit.from_file(data_path + j[:-5] + "html", pdf_path + j[:-5] + "pdf")
+                print("Conversion Successful")
+            except Exception as e:
+                print("Exception occured: {}".format(e))   
+
+
+def upload_file(file_name, bucket, object_name=None):
+    """
+    Upload a file to an S3 bucket
+
+    Parameters
+    ----------
+    file_name: str 
+        Path of the File to be uploaded.
+    bucket: str
+        Name of the Bucket to upload the file to.
+    object_name: str
+        S3 object name. If not specified then file_name is used.
+
+    Returns
+    -------
+    boolean: 
+        True if file was uploaded, else False
+
+    """
+
+    # If S3 object_name was not specified, use file_name
+    if object_name is None:
+        object_name = os.path.basename(file_name)
+
+    # Upload the file
+    s3_client = boto3.client('s3')
+    try:
+        response = s3_client.upload_file(file_name, bucket, object_name)
+    except ClientError as e:
+        logging.error(e)
+        return False
+    return True 
+
+
+def merge_pdf(info_path=INFO_PATH, pdf_path=PDF_PATH):
+    """
+    Merges the compiled PDFs.
+
+    Parameters
+    ----------
+    info_path: str
+        Path for Tutorial Render Order. Defaults to INFO_PATH.
+    pdf_path: str
+        Path where the merged pdf file will be stored. Defaults to PDF_PATH.
+
+    """
+    files = os.listdir(info_path)
+    files = sorted(files)
+
+    files_list = numeric_sorter(files)
 
-def merge_pdf():
-    """Merges the compiled PDFs."""
     command = "pdfunite "
     for i in files_list:
-        chapter = pd.read_csv(INFO_PATH + "-".join(i))
+        print(i)
+        chapter = pd.read_csv(info_path + "-".join(i))
         for j in chapter["File Name"]:
-            print(i, j)
-            command = command + PDF_PATH + j[:-5] + "pdf "
-    os.system(command + "merged.pdf")
+            file_path = pdf_path + j[:-5] + "pdf"
+            if (os.path.exists(file_path)):
+                print(i, j)
+                command = command + pdf_path + j[:-5] + "pdf "
+    os.system(command + f"{pdf_path}merged.pdf")
+
 
 def merge_pdf_pages(a: List[str]):
     """Merges the PDFs.
@@ -73,7 +163,8 @@ def merge_pdf_pages(a: List[str]):
     command = "pdfunite "
     for i in a:
         command = command + i + ' '
-    os.system(command + "storage/merged.pdf")
+    os.system(command + "storage/full_pdf.pdf")
+
 
 def compile_information_pages():
     """Converts the Acknowledgent page and content page from
@@ -85,9 +176,12 @@ def compile_information_pages():
     pdfkit.from_file('contents.html', 'storage/contents.pdf')
     pdfkit.from_file('acknowledgement.html', 'storage/acknowledgement.pdf')
 
+
 if __name__ == "__main__":
     os.system("mkdir " + PDF_PATH)
     html_to_pdf()
     merge_pdf()
     compile_information_pages()
-    merge_pdf_pages(['storage/title.pdf', 'storage/acknowledgement.pdf', 'storage/contents.pdf', 'storage/full_pdf.pdf'])
+    merge_pdf_pages(['storage/title.pdf', 'storage/acknowledgement.pdf', 'storage/contents.pdf', 'storage/merged.pdf'])
+    upload_file('storage/full_pdf.pdf', 'deepchemtutorials', 'TutorialsBook.pdf')
+