diff --git a/.gitignore b/.gitignore index 73a4dc1..c1a60e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ **.DS_Store .gitignore -frontend/node_modules \ No newline at end of file +frontend/node_modules +frontend/build +attachments +__pycache__ +backend/env \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 0a50ca1..34deee2 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -3,7 +3,8 @@ FROM tiangolo/uwsgi-nginx:python3.6 RUN apt-get update && \ apt-get install -y \ libldap2-dev \ - libsasl2-dev + libsasl2-dev \ + wkhtmltopdf RUN mkdir /app/elogy @@ -20,4 +21,4 @@ RUN mkdir /var/elogy/attachments EXPOSE 80 -ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app"] +ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app", "--wsgi-disable-file-wrapper"] diff --git a/backend/Makefile b/backend/Makefile index d1a15be..25271bc 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -3,5 +3,4 @@ install: env/bin/pip install -e .[dev] run: - FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888 - + FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888 --reload --debugger diff --git a/backend/backend/api/entries.py b/backend/backend/api/entries.py index 712cecf..690e895 100644 --- a/backend/backend/api/entries.py +++ b/backend/backend/api/entries.py @@ -1,4 +1,4 @@ -import logging +import io from flask import request, send_file from flask_restful import Resource, marshal, marshal_with, abort @@ -8,7 +8,7 @@ from ..db import Entry, Logbook, EntryLock from ..attachments import handle_img_tags -from ..export import export_entries_as_pdf +from ..export import export_entries from ..actions import new_entry, edit_entry from . import fields, send_signal @@ -141,9 +141,10 @@ def put(self, args, entry_id, logbook_id=None): "archived": Boolean(), "ignore_children": Boolean(), "n": Integer(missing=50), - "offset": Integer(), + "offset": Integer(missing=0), "download": Str(), "sort_by_timestamp": Boolean(missing=True), + "reverse_order": Boolean(missing=True) } @@ -159,6 +160,9 @@ def get(self, args, logbook_id=None): metadata = [meta.split(":") for meta in args.get("metadata", [])] + n_entries = args["n"] + offset = args["offset"] + if logbook_id: # restrict search to the given logbook and its descendants logbook = Logbook.get(Logbook.id == logbook_id) @@ -169,8 +173,9 @@ def get(self, args, logbook_id=None): attachment_filter=args.get("attachments"), attribute_filter=attributes, metadata_filter=metadata, - n=args["n"], offset=args.get("offset"), - sort_by_timestamp=args.get("sort_by_timestamp")) + n=n_entries, offset=offset, + sort_by_timestamp=args.get("sort_by_timestamp"), + reverse_order=args.get("reverse_order")) entries = logbook.get_entries(**search_args) else: # global search (all logbooks) @@ -182,23 +187,26 @@ def get(self, args, logbook_id=None): attachment_filter=args.get("attachments"), attribute_filter=attributes, metadata_filter=metadata, - n=args["n"], offset=args.get("offset"), - sort_by_timestamp=args.get("sort_by_timestamp")) + n=n_entries, offset=offset, + sort_by_timestamp=args.get("sort_by_timestamp"), + reverse_order=args.get("reverse_order")) entries = Entry.search(**search_args) - if args.get("download") == "pdf": - # return a PDF version - # TODO: not sure if this belongs in the API - pdf = export_entries_as_pdf(logbook, entries) - if pdf is None: - abort(400, message="Could not create PDF!") - return send_file(pdf, mimetype="application/pdf", - as_attachment=True, - attachment_filename=("{logbook.name}.pdf" - .format(logbook=logbook))) - - return marshal(dict(logbook=logbook, - entries=list(entries)), fields.entries) + if args.get("download"): + # Allow getting the entries as one big file (html or pdf), to go. + try: + data, real_n_entries, mimetype, filename = export_entries( + logbook, entries, n_entries, offset, args["download"]) + except ValueError: + abort(400) + buf = io.BytesIO() + buf.write(data) + buf.seek(0) + return send_file(buf, mimetype=mimetype, as_attachment=True, + attachment_filename=filename) + + return marshal(dict(logbook=logbook, entries=list(entries)), + fields.entries) class EntryLockResource(Resource): diff --git a/backend/backend/app.py b/backend/backend/app.py index 44ed026..3eb2145 100644 --- a/backend/backend/app.py +++ b/backend/backend/app.py @@ -4,9 +4,8 @@ from time import time -from flask import Flask, current_app, send_from_directory, g, request +from flask import Flask, current_app, send_from_directory, g from flask_restful import Api -import logging from .api.errors import errors as api_errors from .api.logbooks import LogbooksResource, LogbookChangesResource @@ -23,7 +22,7 @@ static_folder="frontend/build/static", static_url_path="/static") app.config.from_envvar('ELOGY_CONFIG_FILE') - +app.root_path = app.config["ROOT_PATH"] app.secret_key = app.config["SECRET"] # add some hooks for debugging purposes diff --git a/backend/backend/attachments.py b/backend/backend/attachments.py index 02e39f2..aa8e2b5 100644 --- a/backend/backend/attachments.py +++ b/backend/backend/attachments.py @@ -3,16 +3,14 @@ original files, in a configurable location on disk. """ -from base64 import decodestring +from base64 import decodestring, encodestring import binascii from datetime import datetime -from dateutil.parser import parse import io import mimetypes import os -from flask import (Blueprint, abort, request, url_for, redirect, - current_app, jsonify, send_from_directory) +from flask import url_for, current_app from lxml import html, etree from lxml.html.clean import Cleaner from PIL import Image @@ -39,6 +37,19 @@ def get_content_type(file_): return type_ +def load_attachment(path): + upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"]) + full_path = os.path.join(upload_dir, path) + with open(full_path, "rb") as f: + return f.read() + + +def get_full_attachment_path(path): + upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"]) + full_path = os.path.join(upload_dir, path) + return full_path + + def save_attachment(file_, timestamp, entry_id, metadata=None, embedded=False): "Store an attachment in the proper place" # make up a path and unique filename using the timestamp @@ -188,3 +199,31 @@ def handle_img_tags(text, entry_id=None, timestamp=None): return content, attachments +def embed_attachments(text, entry_id=None, timestamp=None): + + """ Embed inlined images in HTML, for export. """ + # TODO I guess we could embed some other types too, e.g. text files..? + + try: + doc = html.document_fromstring(text) + except etree.ParserError: + return text + + for i, element in enumerate(doc.xpath("//*[@src]")): + src = element.attrib['src'].split("?", 1)[0] + if src.startswith("/attachments/"): # TODO This is a bit crude + # Now we know it's an attachment and not an external link. + mimetype = mimetypes.guess_type(src)[0] + if mimetype and mimetype.startswith("image/"): + # We know it's an image and what type it is + try: + image = load_attachment(src[13:]) + except IOError: + # TODO Guess the file does not exist. Put something here? + continue + encoded = encodestring(image) + element.attrib["src"] = (f"data:{mimetype};base64, " + + encoded.decode("utf-8")) + + # TODO maybe replace broken images with something more explanatory? + return etree.tostring(doc).decode("utf-8") diff --git a/backend/backend/db.py b/backend/backend/db.py index c60500c..eae93d1 100644 --- a/backend/backend/db.py +++ b/backend/backend/db.py @@ -571,7 +571,7 @@ def search(cls, logbook=None, followups=False, attribute_filter=None, content_filter=None, title_filter=None, author_filter=None, attachment_filter=None, metadata_filter=None, - sort_by_timestamp=True): + sort_by_timestamp=True, reverse_order=False): # Note: this is all pretty messy. The reason we're building # the query as a raw string is that peewee does not (currently) @@ -739,7 +739,8 @@ def search(cls, logbook=None, followups=False, # Check if we're searching, in that case we want to show all entries. if followups or any([title_filter, content_filter, author_filter, - metadata_filter, attribute_filter, attachment_filter]): + metadata_filter, attribute_filter, + attachment_filter]): query += " GROUP BY thread" else: # We're not searching. In this case we'll only show @@ -748,7 +749,9 @@ def search(cls, logbook=None, followups=False, # sort newest first, taking into account the last edit if any # TODO: does this make sense? Should we only consider creation date? order_by = sort_by_timestamp and "timestamp" or "entry.created_at" - query += " ORDER BY entry.priority DESC, {} DESC".format(order_by) + ordering = "DESC" if reverse_order else "ASC" + query += " ORDER BY entry.priority DESC, {} {}".format(order_by, + ordering) if n: query += " LIMIT {}".format(n) if offset: diff --git a/backend/backend/export.py b/backend/backend/export.py index 2ff416a..83e8914 100644 --- a/backend/backend/export.py +++ b/backend/backend/export.py @@ -1,40 +1,35 @@ +from datetime import datetime from tempfile import NamedTemporaryFile +from flask import current_app +from slugify import slugify + try: import pdfkit except ImportError: pdfkit = None +from .attachments import embed_attachments + -def export_entries_as_pdf(logbook, entries): +def export_entries_as_pdf(logbook, entries, n, offset): """ - Super basic "proof-of-concept" PDF export - No proper formatting, and does not embed images. + Export entries as a PDF. Simply uses the HTML export function below, + and converts the HTML into PDF using pdfkit. Note that pdfkit relies on the external library "wkhtmltopdf". TODO: pdfkit seems a bit limited, look for a more flexible alternative. "reportlab" looks pretty good (https://bitbucket.org/rptlab/reportlab) """ if pdfkit is None: - return None - - entries_html = [ - """ -

Created at: {created_at}

-

Title: {title}

-

Authors: {authors}

-

{content}

- """.format(title=entry.title or "(No title)", - authors=", ".join(a["name"] for a in entry.authors), - created_at=entry.created_at, - content=entry.content or "---") - for entry in entries - ] + raise ValueError("No pdfkit/wkhtmltopdf available.") + + html, n_entries = export_entries_as_html(logbook, entries, n, offset) with NamedTemporaryFile(prefix=logbook.name, suffix=".pdf", - delete=False) as f: + delete=True) as f: options = { "load-error-handling": "ignore", "load-media-error-handling": "ignore", @@ -45,10 +40,59 @@ def export_entries_as_pdf(logbook, entries): 'encoding': "UTF-8", } try: - pdfkit.from_string("
".join(entries_html), f.name, options) + pdfkit.from_string(html, f.name, options) except OSError: # Apparently there's some issue with wkhtmltopdf which produces # errors, but it works anyway. See # https://github.com/wkhtmltopdf/wkhtmltopdf/issues/2051 pass - return f.name + + f.seek(0) + return f.read(), n_entries + + +def export_entries_as_html(logbook, entries, n, offset): + + """ + Takes the given logbook entries and generates a single HTML string. + Inline images are embedded, to make it a standalone document. + """ + + entries = list(entries) + if not entries: + raise ValueError("No entries!") + + template = current_app.jinja_env.get_or_select_template( + "entry_export.html.jinja2") + + current_app.logger.info("Rendering HTML for logbook %s; n=%d, offset=%d", + logbook.id, n, offset) + html = template.render(logbook=logbook, entries=entries, + n=len(entries), offset=offset, + embed=embed_attachments, + export_time=datetime.now()) + + return html, len(entries) + + +def export_entries(logbook, entries, n_entries, offset, filetype): + + if filetype == "html": + html, real_n_entries = export_entries_as_html( + logbook, entries, n=n_entries, offset=offset) + data = html.encode("utf-8") + mimetype = "text/html" + elif filetype == "pdf": + pdf, real_n_entries = export_entries_as_pdf( + logbook, entries, n=n_entries, offset=offset) + data = pdf + mimetype = "application/pdf" + + filename = "{}_{}_{}-{}.{}".format( + slugify(logbook.name), + datetime.now().strftime("%Y%m%dT%H%M%S"), + offset, + offset + real_n_entries, + filetype + ) + return data, real_n_entries, mimetype, filename diff --git a/backend/config.py b/backend/config.py index 4b086ae..efe8d78 100644 --- a/backend/config.py +++ b/backend/config.py @@ -5,6 +5,8 @@ BASEURL = os.getenv('ELOGY_URL', 'https://elogy.maxiv.lu.se') +ROOT_PATH = os.getenv('ELOGY_ROOT', '') + DEBUG = bool(os.getenv('ELOGY_DEBUG', 1)) # !!!Change this to False for production use!!! # The name of the database file diff --git a/backend/requirements.txt b/backend/requirements.txt index f59ed98..f1d88d3 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -14,6 +14,7 @@ peewee==2.10.2 Pillow==5.0.0 pyldap==2.4.45 python-dateutil==2.6.1 +python-slugify==2.0.1 pytz==2017.3 six==1.11.0 uWSGI==2.0.15 @@ -21,3 +22,4 @@ webargs==1.8.1 Werkzeug==0.14.1 wtf-peewee==0.2.6 WTForms==2.1 +pdfkit==0.6.1 diff --git a/backend/setup.py b/backend/setup.py index 2694095..165dfad 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -18,7 +18,9 @@ 'flask-restful', 'pyldap', # should be optional, depends on libpdap and libsasl! 'flask-admin', # maybe also optional? - 'wtf-peewee' + 'wtf-peewee', + 'python-slugify', # optional, only used for file export + 'pdfkit' # optional, for export and depends on wkhtmltopdf ], extras_require={ "dev": [ diff --git a/backend/templates/entry_export.html.jinja2 b/backend/templates/entry_export.html.jinja2 new file mode 100644 index 0000000..2f0de56 --- /dev/null +++ b/backend/templates/entry_export.html.jinja2 @@ -0,0 +1,77 @@ +'{{logbook.name}}', entries {{offset}}-{{offset+n}} + + + + + + + + +{% macro render_entry(logbook, entry, embed) %} +
+
+
+ Entry: #{{entry.id}} + {% if entry.follows %} (Follows #{{entry.follows_id}}){% endif %} +
+
+ Created at: {{ entry.created_at }} + {%- if entry.last_changed_at -%}, + Last modified at: {{ entry.last_changed_at }} + {% endif %} +
+
Title: {{ entry.title }}
+
Authors: {% for author in entry.authors %}{{ author.name }}{{", " if not loop.last else ""}}{% endfor %}
+
+
{{ embed(entry.content) }}
+ + {% for followup in entry.followups %} +
+ {{ render_entry(logbook, followup, embed) }} +
+ {% endfor %} +
+{% endmacro %} + + + +
+

{{ logbook.name }}

+
{{ logbook.description or "(No description)" }}
+
Exported from Elogy at {{ url_for("entriesresource", logbook_id=logbook.id, _external=True) }} on .
+
+ + +
+ {% for entry in entries %} + {{ render_entry(logbook, entry, embed) }} + {% endfor %} +
+ diff --git a/frontend/src/logbook.js b/frontend/src/logbook.js index e6cc261..a24df50 100644 --- a/frontend/src/logbook.js +++ b/frontend/src/logbook.js @@ -264,6 +264,17 @@ class Logbook extends React.Component { Configure {" "} |  + + Export HTML + {" "} + |  ) : null} \n", + " \n" + "" + ]) + + if args.zip: + zip_file = dirname + ".zip" + print("Zipping data into destination '{}'...".format(zip_file)) + with ZipFile(zip_file, mode="w", compression=ZIP_DEFLATED) as zf: + zf.write(os.path.join(tmpdir, "index.html"), os.path.join(dirname, "index.html")) + for filename in html_filenames: + zf.write(os.path.join(tmpdir, filename), os.path.join(dirname, filename)) + else: + print("Putting results in destination '{}'...".format(dirname)) + shutil.copytree(tmpdir, dirname)