Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic background assembly implementation #890

Merged
merged 7 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 169 additions & 55 deletions docassemble/AssemblyLine/al_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import subprocess
from collections import ChainMap
import pikepdf
from typing import Tuple

__all__ = [
"ALAddendumField",
Expand Down Expand Up @@ -1822,6 +1823,107 @@ def as_editable_list(
editable.append(doc)
return editable

def get_cacheable_documents(
self,
key: str = "final",
pdf: bool = True,
docx: bool = False,
original: bool = False,
refresh: bool = True,
pdfa: bool = False,
include_zip: bool = True,
include_full_pdf: bool = False,
append_matching_suffix: bool = True,
) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]:
"""
Generates a cache of all enabled documents in the bundle, and returns it in a structure that can be cached
and returned for use in a background process.

The result looks like this:

[
{"title": "Document 1", "pdf": DAFile, "docx": DAFile, download_filename: str},
],
DAFile, # Zip of whole bundle
DAFile # PDF of whole bundle

Args:
key (str): Identifier for the document version, default is "final".
pdf (bool): Flag to include a PDF version of each file, default is True.
docx (bool): Flag to include a DOCX version of each file, default is False.
original (bool): Flag to include the original version of each file, default is False.
refresh (bool): Flag to reconsider the 'enabled' attribute, default is True.
pdfa (bool): Flag to return documents in PDF/A format, default is False.
include_zip (bool): Flag to include a zip option, default is True.
include_full_pdf (bool): Flag to include a PDF version of the whole bundle, default is False.
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.

Returns:
Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]: A list of dictionaries containing the enabled documents, a zip file of the whole bundle, and a PDF of the whole
"""
# reduce idempotency delays
enabled_docs = self.enabled_documents(refresh=refresh)
for doc in enabled_docs:
doc.title

results = []

for doc in enabled_docs:
result = {"title": doc.title}
filename_root = os.path.splitext(str(doc.filename))[0]
if pdf:
result["pdf"] = doc.as_pdf(
key=key,
refresh=refresh,
pdfa=pdfa,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".pdf"
if docx and doc._is_docx(key=key):
result["docx"] = doc.as_docx(
key=key,
refresh=refresh,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".docx"
if original:
result["original"] = doc[key]
result["download_filename"] = doc.filename

try:
# If it's possible, set the file extension to the actual filetype
# This is mostly necessary if people omit the file extension in attachment block
# for filetype="original"
if original:
download_doc = result["original"]
else:
download_doc = result["docx"]
ext = next(
iter(
mimetypes.guess_all_extensions(
download_doc.mimetype, strict=True
)
)
)
result["download_filename"] = filename_root + ext
except:
pass
results.append(result)

if len(enabled_docs) > 1 and include_zip:
bundled_zip = self.as_zip(
key=key, format="original" if original else "docx" if docx else "pdf"
)
else:
bundled_zip = None

if len(enabled_docs) > 1 and include_full_pdf:
bundled_pdf = self.as_pdf(key=key, pdfa=pdfa)
else:
bundled_pdf = None

return results, bundled_zip, bundled_pdf

def download_list_html(
self,
key: str = "final",
Expand All @@ -1838,6 +1940,9 @@ def download_list_html(
zip_icon: str = "file-archive",
append_matching_suffix: bool = True,
include_email: bool = False,
use_previously_cached_files: bool = False,
include_full_pdf: bool = False,
full_pdf_label: Optional[str] = None,
) -> str:
"""
Constructs an HTML table displaying a list of documents with 'view' and 'download' buttons.
Expand All @@ -1857,59 +1962,50 @@ def download_list_html(
zip_icon (str): Icon for the zip option, default is "file-archive".
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.
include_email (bool): Flag to include an email option, default is False.
use_previously_cached_files (bool): Flag to use previously cached files (e.g., made in background) if defined. default is False.
include_full_pdf (bool): Flag to include a full PDF option, default is False.
full_pdf_label (Optional[str]): Label for the full PDF option. If not provided, uses the generic template for `self.full_pdf_label` ("Download all").

Returns:
str: HTML representation of a table with documents and their associated actions.
"""
if not hasattr(self, "_cached_zip_label"):
self._cached_zip_label = str(self.zip_label)

# Trigger some variables up top to avoid idempotency issues
enabled_docs = self.enabled_documents(refresh=refresh)
for doc in enabled_docs:
doc.title
if format == "pdf":
doc.as_pdf(
key=key,
refresh=refresh,
pdfa=pdfa,
append_matching_suffix=append_matching_suffix,
) # Generate cached file for this session
if not hasattr(self, "_cached_full_pdf_label"):
self._cached_full_pdf_label = str(self.full_pdf_label)

if use_previously_cached_files and hasattr(self, "_downloadable_files"):
downloadable_files, bundled_zip, bundled_pdf = self._downloadable_files
else:
downloadable_files, bundled_zip, bundled_pdf = self.get_cacheable_documents(
key=key,
pdf=(format == "pdf" or view == True),
docx=format == "docx",
original=format == "original",
refresh=refresh,
pdfa=pdfa,
include_zip=include_zip,
include_full_pdf=include_full_pdf,
append_matching_suffix=append_matching_suffix,
)

html = f'<div class="container al_table al_doc_table" id="{ html_safe_str(self.instanceName) }">'

for doc in enabled_docs:
filename_root = os.path.splitext(str(doc.filename))[0]
# Do our best to use the provided filename + the extension from requested filetype
if format == "original":
download_doc = doc[key]
download_filename = doc.filename
if format == "docx" and doc._is_docx(key=key):
download_doc = doc.as_docx(
key=key, append_matching_suffix=append_matching_suffix
)
download_filename = filename_root + ".docx"
else:
download_doc = doc.as_pdf(
key=key, append_matching_suffix=append_matching_suffix
)
download_filename = filename_root + ".pdf"
for result in downloadable_files:
title = result["title"]
download_filename = result.get("download_filename", "document")

try:
# If it's possible, set the file extension to the actual filetype
# This is mostly necessary if people omit the file extension in attachment block
# for filetype="original"
ext = next(
iter(
mimetypes.guess_all_extensions(
download_doc.mimetype, strict=True
)
)
)
download_filename = filename_root + ext
except:
pass
if format == "original" and "original" in result:
download_doc = result["original"]
elif format == "docx" and "docx" in result:
download_doc = result["docx"]
elif "pdf" in result:
download_doc = result["pdf"]
else:
continue # Skip if the desired format is not available

# Construct the download button
doc_download_button = action_button_html(
download_doc.url_for(
attachment=True, display_filename=download_filename
Expand All @@ -1920,12 +2016,14 @@ def download_list_html(
size="md",
classname="al_download al_button",
)
if view and doc.as_pdf().url_for().endswith(".pdf"):

# Construct the view button if needed
if view and "pdf" in result and result["pdf"].url_for().endswith(".pdf"):
# Use .pdf as the filename extension
view_filename = os.path.splitext(download_filename)[0] + ".pdf"
doc_view_button = action_button_html(
doc.as_pdf(key=key).url_for(
attachment=False,
display_filename=filename_root + ".pdf",
append_matching_suffix=append_matching_suffix,
result["pdf"].url_for(
attachment=False, display_filename=view_filename
),
label=view_label,
icon=view_icon,
Expand All @@ -1936,31 +2034,47 @@ def download_list_html(
buttons = [doc_view_button, doc_download_button]
else:
buttons = [doc_download_button]
html += table_row(doc.title, buttons)

# Add a zip file row if there's more than one doc
filename_root = os.path.splitext(str(self.filename))[0]
if len(enabled_docs) > 1 and include_zip:
html += table_row(title, buttons)

# Add a zip file row if included
if include_zip and bundled_zip:
if not zip_label:
zip_label = self._cached_zip_label
# Zip file will match the format of the download table
zip = self.as_zip(key=key, format=format, include_pdf=view)
filename_root = os.path.splitext(str(self.filename))[0]
zip_button = action_button_html(
zip.url_for(attachment=False, display_filename=filename_root + ".zip"),
bundled_zip.url_for(
attachment=False, display_filename=filename_root + ".zip"
),
label=zip_label,
icon=zip_icon,
color="primary",
size="md",
classname="al_zip al_button",
)
html += table_row(zip.title, zip_button)
html += table_row(zip_label, zip_button)

if include_full_pdf and bundled_pdf:
if not full_pdf_label:
full_pdf_label = self._cached_full_pdf_label
filename_root = os.path.splitext(str(self.filename))[0]
full_pdf_button = action_button_html(
bundled_pdf.url_for(
attachment=False, display_filename=filename_root + ".pdf"
),
label=full_pdf_label,
icon="file-pdf",
color="primary",
size="md",
classname="al_full_pdf al_button",
)
html += table_row(full_pdf_label, full_pdf_button)

if include_email:
html += self.send_email_table_row(key=key)

html += "\n</div>"

# Discuss: Do we want a table with the ability to have a merged pdf row?
return html

def download_html(
Expand Down
78 changes: 77 additions & 1 deletion docassemble/AssemblyLine/data/questions/al_document.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,85 @@ template: x.zip_label
content: |
Download all
---
generic object: ALDocumentBundle
template: x.full_pdf_label
content: |
Download as one PDF
---
id: al exhibit ocr pages bg
event: al_exhibit_ocr_pages
code: |
to_pdf = action_argument('to_pdf')
from_file = action_argument('from_file')
background_response(ocrmypdf_task(from_file, to_pdf))
background_response(ocrmypdf_task(from_file, to_pdf))
---
############## Background document assembly #################
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_task = background_action(x.attr_name('create_downloads'))
---
generic object: ALDocumentBundle
event: x.create_downloads
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_with_docx_task = background_action(x.attr_name('create_downloads_with_docx'))
---
generic object: ALDocumentBundle
event: x.create_downloads_with_docx
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, docx=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
event: x.save_downloads
code: |
x._downloadable_files = action_argument('download_response')

background_response()
---
id: waiting screen
question: |
Please wait while we make your documents
subquestion: |
This can take a few minutes.

<div class="spinner-border text-primary d-flex justify-content-center" role="status">
<span class="sr-only">Making documents...</span>
</div>
event: al_download_waiting_screen
reload: True
---
########## Background preview assembly ################
---
generic object: ALDocumentBundle
code: |
x.generate_preview_task = background_action(x.attr_name('generate_preview_event'))
---
generic object: ALDocumentBundle
event: x.generate_preview_event
code: |
preview_response = x.as_pdf(key="preview")
background_response_action(x.attr_name('save_preview'), preview_response=preview_response)
---
generic object: ALDocumentBundle
event: x.save_preview
code: |
x._preview_file = action_argument('preview_response')
background_response()
---
id: preview waiting screen
question: |
Please wait while we generate a preview of your document
subquestion: |
This can take a few minutes.

<div class="spinner-border text-primary d-flex justify-content-center" role="status">
<span class="sr-only">Generating preview...</span>
</div>
event: al_preview_waiting_screen
reload: True
Loading
Loading