Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle various termination scenarios of the conversion process #772

Merged
merged 10 commits into from
Apr 24, 2024
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
command: |
apt-get update
apt-get install -y git make python3 python3-poetry --no-install-recommends
poetry install --no-ansi --only lint
poetry install --no-ansi --only lint,test
- run:
name: Run linters to enforce code style
command: poetry run make lint
Expand Down
104 changes: 92 additions & 12 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import contextlib
import logging
import os
import subprocess
import sys
import tempfile
from abc import ABC, abstractmethod
from pathlib import Path
from typing import IO, Callable, Optional
from typing import IO, Callable, Iterator, Optional

from colorama import Fore, Style

Expand All @@ -22,6 +23,10 @@
PIXELS_TO_PDF_LOG_START = "----- PIXELS TO PDF LOG START -----"
PIXELS_TO_PDF_LOG_END = "----- PIXELS TO PDF LOG END -----"

TIMEOUT_EXCEPTION = 15
TIMEOUT_GRACE = 15
TIMEOUT_FORCE = 5


def read_bytes(f: IO[bytes], size: int, exact: bool = True) -> bytes:
"""Read bytes from a file-like object."""
Expand Down Expand Up @@ -69,20 +74,14 @@ def convert(
self.progress_callback = progress_callback
document.mark_as_converting()
try:
conversion_proc = self.start_doc_to_pixels_proc()
with tempfile.TemporaryDirectory() as t:
Path(f"{t}/pixels").mkdir()
self.doc_to_pixels(document, t, conversion_proc)
conversion_proc.wait(3)
# TODO: validate convert to pixels output
with self.doc_to_pixels_proc(document) as conversion_proc:
self.doc_to_pixels(document, t, conversion_proc)
self.pixels_to_pdf(document, t, ocr_lang)
document.mark_as_safe()
if document.archive_after_conversion:
document.archive()
except errors.ConverterProcException as e:
exception = self.get_proc_exception(conversion_proc)
self.print_progress(document, True, str(exception), 0)
document.mark_as_failed()
except errors.ConversionException as e:
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
Expand Down Expand Up @@ -174,9 +173,23 @@ def print_progress(
if self.progress_callback:
self.progress_callback(error, text, percentage)

def get_proc_exception(self, p: subprocess.Popen) -> Exception:
def get_proc_exception(
self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION
) -> Exception:
"""Returns an exception associated with a process exit code"""
error_code = p.wait(3)
try:
error_code = p.wait(timeout)
except subprocess.TimeoutExpired:
return errors.UnexpectedConversionError(
"Encountered an I/O error during document to pixels conversion,"
f" but the conversion process is still running after {timeout} seconds"
f" (PID: {p.pid})"
)
except Exception:
return errors.UnexpectedConversionError(
"Encountered an I/O error during document to pixels conversion,"
f" but the status of the conversion process is unknown (PID: {p.pid})"
)
return errors.exception_from_error_code(error_code)

@abstractmethod
Expand All @@ -192,9 +205,76 @@ def sanitize_conversion_str(self, untrusted_conversion_str: str) -> str:
return armor_start + conversion_string + armor_end

@abstractmethod
def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
pass

@abstractmethod
def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
"""Terminate gracefully the process started for the doc-to-pixels phase."""
pass

def ensure_stop_doc_to_pixels_proc(
self,
document: Document,
p: subprocess.Popen,
timeout_grace: int = TIMEOUT_GRACE,
timeout_force: int = TIMEOUT_FORCE,
) -> None:
"""Stop the conversion process, or ensure it has exited.

This method should be called when we want to verify that the doc-to-pixels
process has exited, or terminate it ourselves. The termination should happen as
gracefully as possible, and we should not block indefinitely until the process
has exited.
"""
# Check if the process completed.
ret = p.poll()
if ret is not None:
return

# At this point, the process is still running. This may be benign, as we haven't
# waited for it yet. Terminate it gracefully.
self.terminate_doc_to_pixels_proc(document, p)
try:
p.wait(timeout_grace)
except subprocess.TimeoutExpired:
log.warning(
f"Conversion process did not terminate gracefully after {timeout_grace}"
" seconds. Killing it forcefully..."
)

# Forcefully kill the running process.
p.kill()
try:
p.wait(timeout_force)
except subprocess.TimeoutExpired:
log.warning(
"Conversion process did not terminate forcefully after"
f" {timeout_force} seconds. Resources may linger..."
)

@contextlib.contextmanager
def doc_to_pixels_proc(
self,
document: Document,
timeout_exception: int = TIMEOUT_EXCEPTION,
timeout_grace: int = TIMEOUT_GRACE,
timeout_force: int = TIMEOUT_FORCE,
) -> Iterator[subprocess.Popen]:
"""Start a conversion process, pass it to the caller, and then clean it up."""
p = self.start_doc_to_pixels_proc(document)
try:
yield p
except errors.ConverterProcException as e:
deeplow marked this conversation as resolved.
Show resolved Hide resolved
exception = self.get_proc_exception(p, timeout_exception)
raise exception from e
finally:
self.ensure_stop_doc_to_pixels_proc(
document, p, timeout_grace=timeout_grace, timeout_force=timeout_force
)


# From global_common:

Expand Down
46 changes: 43 additions & 3 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ def is_container_installed() -> bool:

return installed

def doc_to_pixels_container_name(self, document: Document) -> str:
"""Unique container name for the doc-to-pixels phase."""
return f"dangerzone-doc-to-pixels-{document.id}"

def pixels_to_pdf_container_name(self, document: Document) -> str:
"""Unique container name for the pixels-to-pdf phase."""
return f"dangerzone-pixels-to-pdf-{document.id}"

def assert_field_type(self, val: Any, _type: object) -> None:
# XXX: Use a stricter check than isinstance because `bool` is a subclass of
# `int`.
Expand Down Expand Up @@ -172,6 +180,7 @@ def exec(
def exec_container(
self,
command: List[str],
name: str,
extra_args: List[str] = [],
) -> subprocess.Popen:
container_runtime = self.get_runtime()
Expand All @@ -187,6 +196,7 @@ def exec_container(
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
enable_stdin = ["-i"]
set_name = ["--name", name]

prevent_leakage_args = ["--rm"]

Expand All @@ -196,6 +206,7 @@ def exec_container(
+ security_args
+ prevent_leakage_args
+ enable_stdin
+ set_name
+ extra_args
+ [self.CONTAINER_NAME]
+ command
Expand All @@ -204,6 +215,28 @@ def exec_container(
args = [container_runtime] + args
return self.exec(args)

def kill_container(self, name: str) -> None:
"""Terminate a spawned container.

We choose to terminate spawned containers using the `kill` action that the
container runtime provides, instead of terminating the process that spawned
them. The reason is that this process is not always tied to the underlying
container. For instance, in Docker containers, this process is actually
connected to the Docker daemon, and killing it will just close the associated
standard streams.
"""
container_runtime = self.get_runtime()
cmd = [container_runtime, "kill", name]
try:
subprocess.run(cmd, capture_output=True, check=True)
deeplow marked this conversation as resolved.
Show resolved Hide resolved
except subprocess.CalledProcessError as e:
log.warning(f"Failed to kill container {name}: {str(e)}")
log.warning(f"Output from the kill command: {e.output}")
except Exception as e:
log.exception(
f"Unexpected error occurred while killing container {name}: {str(e)}"
)

def pixels_to_pdf(
self, document: Document, tempdir: str, ocr_lang: Optional[str]
) -> None:
Expand All @@ -222,7 +255,8 @@ def pixels_to_pdf(
f"OCR_LANGUAGE={ocr_lang}",
]

pixels_to_pdf_proc = self.exec_container(command, extra_args)
name = self.pixels_to_pdf_container_name(document)
pixels_to_pdf_proc = self.exec_container(command, name, extra_args)
if pixels_to_pdf_proc.stdout:
for line in pixels_to_pdf_proc.stdout:
self.parse_progress_trusted(document, line.decode())
Expand Down Expand Up @@ -251,14 +285,20 @@ def pixels_to_pdf(
)
shutil.move(container_output_filename, document.output_filename)

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
# Convert document to pixels
command = [
"/usr/bin/python3",
"-m",
"dangerzone.conversion.doc_to_pixels",
]
return self.exec_container(command)
name = self.doc_to_pixels_container_name(document)
return self.exec_container(command, name=name)

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
self.kill_container(self.doc_to_pixels_container_name(document))

def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until length conversions are better handled
Expand Down
7 changes: 6 additions & 1 deletion dangerzone/isolation_provider/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,13 @@ def pixels_to_pdf(
) -> None:
pass

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
return subprocess.Popen("True")

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
pass

def get_max_parallel_conversions(self) -> int:
return 1
28 changes: 27 additions & 1 deletion dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
def get_max_parallel_conversions(self) -> int:
return 1

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
dev_mode = getattr(sys, "dangerzone_dev", False) == True
if dev_mode:
# Use dz.ConvertDev RPC call instead, if we are in development mode.
Expand All @@ -77,6 +77,32 @@ def start_doc_to_pixels_proc(self) -> subprocess.Popen:

return p

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
"""Terminate a spawned disposable qube.

Qubes does not offer a way out of the box to terminate disposable Qubes from
domU [1]. Our best bet is to close the standard streams of the process, and hope
that the disposable qube will attempt to read/write to them, and thus receive an
EOF.

There are two ways we can do the above; close the standard streams explicitly,
or terminate the process. The problem with the latter is that terminating
`qrexec-client-vm` happens immediately, and we no longer have a way to learn if
the disposable qube actually terminated. That's why we prefer closing the
standard streams explicitly, so that we can afterwards use `Popen.wait()` to
learn if the qube terminated.
deeplow marked this conversation as resolved.
Show resolved Hide resolved

[1]: https://github.com/freedomofpress/dangerzone/issues/563#issuecomment-2034803232
"""
if p.stdin:
p.stdin.close()
if p.stdout:
p.stdout.close()
if p.stderr:
p.stderr.close()

def teleport_dz_module(self, wpipe: IO[bytes]) -> None:
"""Send the dangerzone module to another qube, as a zipfile."""
# Grab the absolute file path of the dangerzone module.
Expand Down
Loading
Loading