diff --git a/.gitignore b/.gitignore index 73a4dc1..c1a60e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ **.DS_Store .gitignore -frontend/node_modules \ No newline at end of file +frontend/node_modules +frontend/build +attachments +__pycache__ +backend/env \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 0a50ca1..34deee2 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -3,7 +3,8 @@ FROM tiangolo/uwsgi-nginx:python3.6 RUN apt-get update && \ apt-get install -y \ libldap2-dev \ - libsasl2-dev + libsasl2-dev \ + wkhtmltopdf RUN mkdir /app/elogy @@ -20,4 +21,4 @@ RUN mkdir /var/elogy/attachments EXPOSE 80 -ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app"] +ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app", "--wsgi-disable-file-wrapper"] diff --git a/backend/Makefile b/backend/Makefile index d1a15be..25271bc 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -3,5 +3,4 @@ install: env/bin/pip install -e .[dev] run: - FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888 - + FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888 --reload --debugger diff --git a/backend/backend/api/entries.py b/backend/backend/api/entries.py index 712cecf..690e895 100644 --- a/backend/backend/api/entries.py +++ b/backend/backend/api/entries.py @@ -1,4 +1,4 @@ -import logging +import io from flask import request, send_file from flask_restful import Resource, marshal, marshal_with, abort @@ -8,7 +8,7 @@ from ..db import Entry, Logbook, EntryLock from ..attachments import handle_img_tags -from ..export import export_entries_as_pdf +from ..export import export_entries from ..actions import new_entry, edit_entry from . import fields, send_signal @@ -141,9 +141,10 @@ def put(self, args, entry_id, logbook_id=None): "archived": Boolean(), "ignore_children": Boolean(), "n": Integer(missing=50), - "offset": Integer(), + "offset": Integer(missing=0), "download": Str(), "sort_by_timestamp": Boolean(missing=True), + "reverse_order": Boolean(missing=True) } @@ -159,6 +160,9 @@ def get(self, args, logbook_id=None): metadata = [meta.split(":") for meta in args.get("metadata", [])] + n_entries = args["n"] + offset = args["offset"] + if logbook_id: # restrict search to the given logbook and its descendants logbook = Logbook.get(Logbook.id == logbook_id) @@ -169,8 +173,9 @@ def get(self, args, logbook_id=None): attachment_filter=args.get("attachments"), attribute_filter=attributes, metadata_filter=metadata, - n=args["n"], offset=args.get("offset"), - sort_by_timestamp=args.get("sort_by_timestamp")) + n=n_entries, offset=offset, + sort_by_timestamp=args.get("sort_by_timestamp"), + reverse_order=args.get("reverse_order")) entries = logbook.get_entries(**search_args) else: # global search (all logbooks) @@ -182,23 +187,26 @@ def get(self, args, logbook_id=None): attachment_filter=args.get("attachments"), attribute_filter=attributes, metadata_filter=metadata, - n=args["n"], offset=args.get("offset"), - sort_by_timestamp=args.get("sort_by_timestamp")) + n=n_entries, offset=offset, + sort_by_timestamp=args.get("sort_by_timestamp"), + reverse_order=args.get("reverse_order")) entries = Entry.search(**search_args) - if args.get("download") == "pdf": - # return a PDF version - # TODO: not sure if this belongs in the API - pdf = export_entries_as_pdf(logbook, entries) - if pdf is None: - abort(400, message="Could not create PDF!") - return send_file(pdf, mimetype="application/pdf", - as_attachment=True, - attachment_filename=("{logbook.name}.pdf" - .format(logbook=logbook))) - - return marshal(dict(logbook=logbook, - entries=list(entries)), fields.entries) + if args.get("download"): + # Allow getting the entries as one big file (html or pdf), to go. + try: + data, real_n_entries, mimetype, filename = export_entries( + logbook, entries, n_entries, offset, args["download"]) + except ValueError: + abort(400) + buf = io.BytesIO() + buf.write(data) + buf.seek(0) + return send_file(buf, mimetype=mimetype, as_attachment=True, + attachment_filename=filename) + + return marshal(dict(logbook=logbook, entries=list(entries)), + fields.entries) class EntryLockResource(Resource): diff --git a/backend/backend/app.py b/backend/backend/app.py index 44ed026..3eb2145 100644 --- a/backend/backend/app.py +++ b/backend/backend/app.py @@ -4,9 +4,8 @@ from time import time -from flask import Flask, current_app, send_from_directory, g, request +from flask import Flask, current_app, send_from_directory, g from flask_restful import Api -import logging from .api.errors import errors as api_errors from .api.logbooks import LogbooksResource, LogbookChangesResource @@ -23,7 +22,7 @@ static_folder="frontend/build/static", static_url_path="/static") app.config.from_envvar('ELOGY_CONFIG_FILE') - +app.root_path = app.config["ROOT_PATH"] app.secret_key = app.config["SECRET"] # add some hooks for debugging purposes diff --git a/backend/backend/attachments.py b/backend/backend/attachments.py index 02e39f2..aa8e2b5 100644 --- a/backend/backend/attachments.py +++ b/backend/backend/attachments.py @@ -3,16 +3,14 @@ original files, in a configurable location on disk. """ -from base64 import decodestring +from base64 import decodestring, encodestring import binascii from datetime import datetime -from dateutil.parser import parse import io import mimetypes import os -from flask import (Blueprint, abort, request, url_for, redirect, - current_app, jsonify, send_from_directory) +from flask import url_for, current_app from lxml import html, etree from lxml.html.clean import Cleaner from PIL import Image @@ -39,6 +37,19 @@ def get_content_type(file_): return type_ +def load_attachment(path): + upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"]) + full_path = os.path.join(upload_dir, path) + with open(full_path, "rb") as f: + return f.read() + + +def get_full_attachment_path(path): + upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"]) + full_path = os.path.join(upload_dir, path) + return full_path + + def save_attachment(file_, timestamp, entry_id, metadata=None, embedded=False): "Store an attachment in the proper place" # make up a path and unique filename using the timestamp @@ -188,3 +199,31 @@ def handle_img_tags(text, entry_id=None, timestamp=None): return content, attachments +def embed_attachments(text, entry_id=None, timestamp=None): + + """ Embed inlined images in HTML, for export. """ + # TODO I guess we could embed some other types too, e.g. text files..? + + try: + doc = html.document_fromstring(text) + except etree.ParserError: + return text + + for i, element in enumerate(doc.xpath("//*[@src]")): + src = element.attrib['src'].split("?", 1)[0] + if src.startswith("/attachments/"): # TODO This is a bit crude + # Now we know it's an attachment and not an external link. + mimetype = mimetypes.guess_type(src)[0] + if mimetype and mimetype.startswith("image/"): + # We know it's an image and what type it is + try: + image = load_attachment(src[13:]) + except IOError: + # TODO Guess the file does not exist. Put something here? + continue + encoded = encodestring(image) + element.attrib["src"] = (f"data:{mimetype};base64, " + + encoded.decode("utf-8")) + + # TODO maybe replace broken images with something more explanatory? + return etree.tostring(doc).decode("utf-8") diff --git a/backend/backend/db.py b/backend/backend/db.py index c60500c..eae93d1 100644 --- a/backend/backend/db.py +++ b/backend/backend/db.py @@ -571,7 +571,7 @@ def search(cls, logbook=None, followups=False, attribute_filter=None, content_filter=None, title_filter=None, author_filter=None, attachment_filter=None, metadata_filter=None, - sort_by_timestamp=True): + sort_by_timestamp=True, reverse_order=False): # Note: this is all pretty messy. The reason we're building # the query as a raw string is that peewee does not (currently) @@ -739,7 +739,8 @@ def search(cls, logbook=None, followups=False, # Check if we're searching, in that case we want to show all entries. if followups or any([title_filter, content_filter, author_filter, - metadata_filter, attribute_filter, attachment_filter]): + metadata_filter, attribute_filter, + attachment_filter]): query += " GROUP BY thread" else: # We're not searching. In this case we'll only show @@ -748,7 +749,9 @@ def search(cls, logbook=None, followups=False, # sort newest first, taking into account the last edit if any # TODO: does this make sense? Should we only consider creation date? order_by = sort_by_timestamp and "timestamp" or "entry.created_at" - query += " ORDER BY entry.priority DESC, {} DESC".format(order_by) + ordering = "DESC" if reverse_order else "ASC" + query += " ORDER BY entry.priority DESC, {} {}".format(order_by, + ordering) if n: query += " LIMIT {}".format(n) if offset: diff --git a/backend/backend/export.py b/backend/backend/export.py index 2ff416a..83e8914 100644 --- a/backend/backend/export.py +++ b/backend/backend/export.py @@ -1,40 +1,35 @@ +from datetime import datetime from tempfile import NamedTemporaryFile +from flask import current_app +from slugify import slugify + try: import pdfkit except ImportError: pdfkit = None +from .attachments import embed_attachments + -def export_entries_as_pdf(logbook, entries): +def export_entries_as_pdf(logbook, entries, n, offset): """ - Super basic "proof-of-concept" PDF export - No proper formatting, and does not embed images. + Export entries as a PDF. Simply uses the HTML export function below, + and converts the HTML into PDF using pdfkit. Note that pdfkit relies on the external library "wkhtmltopdf". TODO: pdfkit seems a bit limited, look for a more flexible alternative. "reportlab" looks pretty good (https://bitbucket.org/rptlab/reportlab) """ if pdfkit is None: - return None - - entries_html = [ - """ -
Created at: {created_at}
-Title: {title}
-Authors: {authors}
-{content}
- """.format(title=entry.title or "(No title)", - authors=", ".join(a["name"] for a in entry.authors), - created_at=entry.created_at, - content=entry.content or "---") - for entry in entries - ] + raise ValueError("No pdfkit/wkhtmltopdf available.") + + html, n_entries = export_entries_as_html(logbook, entries, n, offset) with NamedTemporaryFile(prefix=logbook.name, suffix=".pdf", - delete=False) as f: + delete=True) as f: options = { "load-error-handling": "ignore", "load-media-error-handling": "ignore", @@ -45,10 +40,59 @@ def export_entries_as_pdf(logbook, entries): 'encoding': "UTF-8", } try: - pdfkit.from_string("