Skip to content

Commit

Permalink
Implement filesystem storage for PFCON in-network
Browse files Browse the repository at this point in the history
  • Loading branch information
jbernal0019 committed Aug 11, 2023
1 parent 18b4124 commit f0bf5e3
Show file tree
Hide file tree
Showing 8 changed files with 508 additions and 24 deletions.
52 changes: 44 additions & 8 deletions make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#
# make.sh [-h] [-i] [-s] [-N] [-U] \
# [-O <swarm|kubernetes>] \
# [-F <swift|filesystem|zipfile>] \
# [-S <storeBase>] \
# [local|fnndsc[:dev]]
#
Expand All @@ -23,9 +24,13 @@
#
# unmake.sh ; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh
#
# Run full pfcon instantiation operating in-network on Swarm:
# Run full pfcon instantiation operating in-network on Swarm using Swift storage:
#
# unmake.sh -N; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -N
# unmake.sh -N -F swift; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -N -F swift
#
# Run full pfcon instantiation operating in-network on Swarm using mounted filesystem storage:
#
# unmake.sh -N -F filesystem; sudo rm -fr CHRIS_REMOTE_FS; rm -fr CHRIS_REMOTE_FS; make.sh -N -F filesystem
#
# Skip the intro:
#
Expand Down Expand Up @@ -61,6 +66,12 @@
# Optional set pfcon to operate in-network mode (using a swift storage instead of
# a zip file).
#
# -F <swift|filesystem|zipfile>
#
# Explicitly set the storage environment. This option must be swift or filesystem
# for pfcon operating in-network mode. For pfcon operating in out-of-network mode
# it must be set to zipfile (default).
#
# -U
#
# Optional skip the UNIT tests.
Expand Down Expand Up @@ -90,14 +101,15 @@ source ./cparse.sh

declare -i STEP=0
ORCHESTRATOR=swarm
STORAGE=zipfile
HERE=$(pwd)

print_usage () {
echo "Usage: ./make.sh [-h] [-i] [-s] [-N] [-U] [-O <swarm|kubernetes>] [-S <storeBase>] [local|fnndsc[:dev]]"
echo "Usage: ./make.sh [-h] [-i] [-s] [-N] [-F <swift|filesystem|zipfile>] [-U] [-O <swarm|kubernetes>] [-S <storeBase>] [local|fnndsc[:dev]]"
exit 1
}

while getopts ":hsiNUO:S:" opt; do
while getopts ":hsiNUF:O:S:" opt; do
case $opt in
h) print_usage
;;
Expand All @@ -107,6 +119,12 @@ while getopts ":hsiNUO:S:" opt; do
;;
N) b_pfconInNetwork=1
;;
F) STORAGE=$OPTARG
if ! [[ "$STORAGE" =~ ^(swift|filesystem|zipfile)$ ]]; then
echo "Invalid value for option -- F"
print_usage
fi
;;
U) b_skipUnitTests=1
;;
O) ORCHESTRATOR=$OPTARG
Expand Down Expand Up @@ -157,10 +175,15 @@ title -d 1 "Setting global exports..."
fi
if (( b_pfconInNetwork )) ; then
echo -e "PFCON_INNETWORK=True" | ./boxes.sh
if [[ $STORAGE == 'zipfile' ]]; then
echo -e "Need to pass '-F <swift|filesystem>' when PFCON_INNETWORK=True" | ./boxes.sh
exit 1
fi
else
echo -e "PFCON_INNETWORK=False" | ./boxes.sh
fi
echo -e "ORCHESTRATOR=$ORCHESTRATOR" | ./boxes.sh
echo -e "STORAGE=$STORAGE" | ./boxes.sh
echo -e "exporting STOREBASE=$STOREBASE " | ./boxes.sh
export STOREBASE=$STOREBASE
export SOURCEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand All @@ -187,7 +210,11 @@ windowBottom
title -d 1 "Building :dev"
cd $HERE
if (( b_pfconInNetwork )) ; then
CMD="docker compose -f swarm/docker-compose_dev_innetwork.yml build"
if [[ $STORAGE == 'swift' ]]; then
CMD="docker compose -f swarm/docker-compose_dev_innetwork.yml build"
elif [[ $STORAGE == 'filesystem' ]]; then
CMD="docker compose -f swarm/docker-compose_dev_innetwork_fs.yml build"
fi
else
CMD="docker compose -f swarm/docker-compose_dev.yml build"
fi
Expand Down Expand Up @@ -232,8 +259,13 @@ windowBottom
title -d 1 "Starting pfcon containerized dev environment on $ORCHESTRATOR"
if [[ $ORCHESTRATOR == swarm ]]; then
if (( b_pfconInNetwork )) ; then
echo "docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack
if [[ $STORAGE == 'swift' ]]; then
echo "docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev_innetwork.yml pfcon_dev_stack
elif [[ $STORAGE == 'filesystem' ]]; then
echo "docker stack deploy -c swarm/docker-compose_dev_innetwork_fs.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev_innetwork_fs.yml pfcon_dev_stack
fi
else
echo "docker stack deploy -c swarm/docker-compose_dev.yml pfcon_dev_stack" | ./boxes.sh ${LightCyan}
docker stack deploy -c swarm/docker-compose_dev.yml pfcon_dev_stack
Expand Down Expand Up @@ -274,7 +306,11 @@ if (( ! b_skipUnitTests )) ; then
sleep 5
if [[ $ORCHESTRATOR == swarm ]]; then
if (( b_pfconInNetwork )) ; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork.py --color=yes
if [[ $STORAGE == 'swift' ]]; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork.py --color=yes
elif [[ $STORAGE == 'filesystem' ]]; then
docker exec $pfcon_dev pytest tests/test_resources_innetwork_fs.py --color=yes
fi
else
docker exec $pfcon_dev pytest tests/test_resources.py --color=yes
fi
Expand Down
5 changes: 4 additions & 1 deletion pfcon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ def __init__(self):

if self.PFCON_INNETWORK:
self.STORAGE_ENV = env('STORAGE_ENV', 'swift')
if self.STORAGE_ENV != 'swift':
if self.STORAGE_ENV not in ('swift', 'filesystem'):
raise ValueError(f"Unsupported value '{self.STORAGE_ENV}' for STORAGE_ENV")
else:
self.STORAGE_ENV = env('STORAGE_ENV', 'zipfile')
if self.STORAGE_ENV != 'zipfile':
raise ValueError(f"Unsupported value '{self.STORAGE_ENV}' for STORAGE_ENV")

if self.STORAGE_ENV == 'filesystem':
self.FILESYSTEM_BASEDIR = env('FILESYSTEM_BASEDIR', '/filesystem')

self.STORE_BASE = env('STOREBASE', '/var/local/storeBase')
self.env = env

Expand Down
91 changes: 91 additions & 0 deletions pfcon/filesystem_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Handle filesystem-based (eg. mount directory) storage. This is used when pfcon is
in-network and configured to directly copy the data from a filesystem.
"""

import logging
import datetime
import os
import json
import io
import shutil


from .base_storage import BaseStorage


logger = logging.getLogger(__name__)


class FileSystemStorage(BaseStorage):

def __init__(self, config):

super().__init__(config)

self.base_dir = config.get('FILESYSTEM_BASEDIR')


def store_data(self, job_id, job_incoming_dir, data, **kwargs):
"""
Copy all the files/folders under each input folder in the specified data list
into the specified incoming directory.
"""
nfiles = 0
for rel_path in data:
abs_path = os.path.join(self.base_dir, rel_path.strip('/'))

for root, dirs, files in os.walk(abs_path):
local_path = root.replace(abs_path, job_incoming_dir, 1)
os.makedirs(local_path, exist_ok=True)

for filename in files:
fs_file_path = os.path.join(root, filename)
try:
shutil.copy(fs_file_path, local_path)
except Exception as e:
logger.error(f'Failed to copy file {fs_file_path} for '
f'job {job_id}, detail: {str(e)}')
raise
nfiles += 1

logger.info(f'{nfiles} files copied from file system for job {job_id}')
return {
'jid': job_id,
'nfiles': nfiles,
'timestamp': f'{datetime.datetime.now()}',
'path': job_incoming_dir
}

def get_data(self, job_id, job_outgoing_dir, **kwargs):
"""
Copy output files/folders from the specified outgoing directory into the folder
specified by job_output_path keyword argument (relative to the FS base dir).
Then create job json file ready for transmission to a remote origin. The json
file contains the job_output_path prefix and the list of relative file paths.
"""
job_output_path = kwargs['job_output_path']
fs_output_path = os.path.join(self.base_dir, job_output_path)
fs_rel_file_paths = []

for root, dirs, files in os.walk(job_outgoing_dir):
rel_path = os.path.relpath(root, job_outgoing_dir)
if rel_path == '.':
rel_path = ''
fs_path = os.path.join(fs_output_path, rel_path)
os.makedirs(fs_path, exist_ok=True)

for filename in files:
local_file_path = os.path.join(root, filename)
if not os.path.islink(local_file_path):
try:
shutil.copy(local_file_path, fs_path)
except Exception as e:
logger.error(f'Failed to copy file {local_file_path} for '
f'job {job_id}, detail: {str(e)}')
raise
fs_rel_file_paths.append(os.path.join(rel_path, filename))

data = {'job_output_path': job_output_path,
'rel_file_paths': fs_rel_file_paths}
return io.BytesIO(json.dumps(data).encode())
27 changes: 23 additions & 4 deletions pfcon/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from .services import PmanService, ServiceException
from .zip_file_storage import ZipFileStorage
from .swift_storage import SwiftStorage
from .filesystem_storage import FileSystemStorage


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,6 +103,15 @@ def post(self):
logger.error(f'Error while fetching files from swift and '
f'storing job {job_id} data, detail: {str(e)}')
abort(400, message='input_dirs: Error fetching files from swift')

elif self.storage_env == 'filesystem':
storage = FileSystemStorage(app.config)
try:
d_info = storage.store_data(job_id, incoming_dir, args.input_dirs)
except Exception as e:
logger.error(f'Error while copying files from filesystem and '
f'storing job {job_id} data, detail: {str(e)}')
abort(400, message='input_dirs: Error copying files from filesystem')
else:
if self.storage_env == 'zipfile':
storage = ZipFileStorage(app.config)
Expand Down Expand Up @@ -165,6 +176,9 @@ def delete(self, job_id):
if self.pfcon_innetwork:
if self.storage_env == 'swift':
storage = SwiftStorage(app.config)

elif self.storage_env == 'filesystem':
storage = FileSystemStorage(app.config)
else:
if self.storage_env == 'zipfile':
storage = ZipFileStorage(app.config)
Expand Down Expand Up @@ -214,13 +228,18 @@ def get(self, job_id):

if self.pfcon_innetwork:
job_output_path = request.args.get('job_output_path')

if job_output_path:
storage = None
if self.storage_env == 'swift':
storage = SwiftStorage(app.config)
content = storage.get_data(job_id, outgoing_dir,
job_output_path=job_output_path)
download_name = f'{job_id}.json'
mimetype = 'application/json'
elif self.storage_env == 'filesystem':
storage = FileSystemStorage(app.config)

content = storage.get_data(job_id, outgoing_dir,
job_output_path=job_output_path.lstrip('/'))
download_name = f'{job_id}.json'
mimetype = 'application/json'
else:
# if no query parameter passed then the job's zip file is returned
storage = ZipFileStorage(app.config)
Expand Down
4 changes: 2 additions & 2 deletions pfcon/zip_file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def get_data(self, job_id, job_outgoing_dir, **kwargs):
except Exception as e:
logger.error(f'Failed to read file {local_file_path} for '
f'job {job_id}, detail: {str(e)}')
else:
nfiles += 1
raise
nfiles += 1
memory_zip_file.seek(0)

logger.info(f'{nfiles} files compressed for job {job_id}')
Expand Down
73 changes: 73 additions & 0 deletions swarm/docker-compose_dev_innetwork_fs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# https://docs.docker.com/compose/yml/
# Each service defined in docker-compose.yml must specify exactly one of
# image or build. Other keys are optional, and are analogous to their
# docker run command-line counterparts.
#
# As with docker run, options specified in the Dockerfile (e.g., CMD,
# EXPOSE, VOLUME, ENV) are respected by default - you don't need to
# specify them again in docker-compose.yml.
#

version: '3.7'

services:
pfcon:
image: localhost:5000/fnndsc/pfcon:dev
build:
context: ..
args:
ENVIRONMENT: local
stdin_open: true # docker run -i
tty: true # docker run -t
# We need to mount a physical dir in the HOST onto the key store in pfcon. This dir
# is given by the STOREBASE env variable substitution. The keystore can be specified
# by the --storeBase flag during development.
command: ["python", "-m", "pfcon"]
environment:
- APPLICATION_MODE=development
- PFCON_INNETWORK=true
- STORAGE_ENV=filesystem
volumes:
- fs_storage_dev:/filesystem
- ${STOREBASE:?}:/var/local/storeBase:z
- ../pfcon:/app/pfcon:z
- ../tests:/app/tests:z
ports:
- "30006:5005"
depends_on:
- pman
- swift_service
networks:
- remote
labels:
name: "pfcon"
role: "pfcon service"

pman:
image: ${PMANREPO:?}/pman
# Since pman spins off containers of its own it needs to mount storeBase dir (where
# pfcon shares the data) into the spawned container. This directory is passed in the
# STOREBASE env variable.
environment:
- STORAGE_TYPE=host
- STOREBASE
- SECRET_KEY="w1kxu^l=@pnsf!5piqz6!!5kdcdpo79y6jebbp+2244yjm*#+k"
- CONTAINER_ENV=swarm
volumes:
- /var/run/docker.sock:/var/run/docker.sock:z
deploy:
placement:
constraints:
- "node.role==manager"
networks:
- remote
labels:
name: "pman"
role: "pman service"


networks:
remote:

volumes:
fs_storage_dev:
Loading

0 comments on commit f0bf5e3

Please sign in to comment.