Skip to content

Commit

Permalink
Feat/opt image s3 (#1411)
Browse files Browse the repository at this point in the history
* [ImageAlitgment] using uppy to upload opt img to s3

* [OpticalImage] fixed pylint

* [OptImageTest] fixed add optical image tet

* [OpticalImage] Added script to clone optical image to s3

* [OpticalImageScript] fixed pylint warnings

* [OptImageUpload] limit number of files to one and fixed PR suggestions

* [RawOpticalImage] changed raw optical image upload bucket

* [RawOticalImage] fixed tests

* added upload bucket to config

* [engine] fixed typo in the config file

* [RawOpt] added nginx template config and changed script bucket
  • Loading branch information
lmacielvieira authored Sep 11, 2023
1 parent eda7770 commit 28a598c
Show file tree
Hide file tree
Showing 17 changed files with 360 additions and 89 deletions.
4 changes: 4 additions & 0 deletions ansible/aws/templates/all/vars.yml.template
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,10 @@ nginx_servers:
params:
- proxy_pass http://isotope_image_server/database_upload;
- client_max_body_size 5M;
- path: /raw_opt_upload
params:
- proxy_pass http://isotope_image_server/raw_opt_upload;
- client_max_body_size 50M;
- path: /graphql
params:
- proxy_pass http://graphql_server/graphql;
Expand Down
6 changes: 6 additions & 0 deletions docker/nginx/config/sites-enabled/default
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ server {
# include proxy-params.conf;
client_max_body_size 5M;
}
location /raw_opt_upload {
set $graphql_storage graphql:4201;
proxy_pass http://$graphql_storage$request_uri;
# include proxy-params.conf;
client_max_body_size 50M;
}
location /graphql {
set $graphql graphql:3010;
proxy_pass http://$graphql$request_uri;
Expand Down
3 changes: 2 additions & 1 deletion metaspace/engine/conf/config.docker.json
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@
"secret_access_key": "minioadmin"
},
"image_storage": {
"bucket": "sm-image-storage-dev"
"bucket": "sm-image-storage-dev",
"raw_img_bucket": "sm-engine-dev"
},
"imzml_browser_storage": {
"bucket": "sm-imzml-browser-dev"
Expand Down
3 changes: 2 additions & 1 deletion metaspace/engine/conf/config.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ The Prefix is necessary and should not have a leading or trailing slash
"adducts": {{ sm_default_adducts | to_json }}
},
"image_storage": {
"bucket": "{{ sm_image_storage_bucket }}"
"bucket": "{{ sm_image_storage_bucket }}",
"raw_img_bucket": "{{ sm_graphql_s3_upload_bucket }}"
},
"imzml_browser_storage": {
"bucket": "{{ sm_imzml_browser_bucket }}"
Expand Down
3 changes: 2 additions & 1 deletion metaspace/engine/conf/scitest_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@
"secret_access_key": "minioadmin"
},
"image_storage": {
"bucket": "sm-image-storage-sci-test"
"bucket": "sm-image-storage-sci-test",
"raw_img_bucket": "sm-engine-dev"
},
"imzml_browser_storage": {
"bucket": "sm-imzml-browser-sci-test"
Expand Down
3 changes: 2 additions & 1 deletion metaspace/engine/conf/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@
"secret_access_key": "minioadmin"
},
"image_storage": {
"bucket": "sm-image-storage-test"
"bucket": "sm-image-storage-test",
"raw_img_bucket": "sm-engine-tests"
},
"imzml_browser_storage": {
"bucket": "sm-imzml-browser-test"
Expand Down
132 changes: 132 additions & 0 deletions metaspace/engine/scripts/update_raw_optical_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
This script provides a simple way to update raw optical images paths from storage to s3.
"""

import argparse
import logging
import os
import sys

import botocore
import pandas as pd
from botocore.exceptions import NoCredentialsError

from sm.engine.config import SMConfig
from sm.engine.db import DB
from sm.engine.storage import get_s3_client
from sm.engine.util import GlobalInit

logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
)
logger = logging.getLogger('engine')


def get_datasets(sql_where):
"""
Selects datasets with optical images from the database
"""

try:
if sql_where:
datasets_with_opt_img = DB().select(
f"SELECT id, optical_image FROM "
f"public.dataset WHERE optical_image is not null AND {sql_where} ORDER BY id"
)
else:
datasets_with_opt_img = DB().select(
"SELECT id, optical_image FROM public.dataset "
"WHERE optical_image is not null ORDER BY id"
)

data = []
for dataset in datasets_with_opt_img:
ds_id, optical_image = dataset
data.append({'ds_id': ds_id, 'uuid': optical_image})

logger.info(f'Got {len(datasets_with_opt_img)} datasets')
return data

except botocore.exceptions.ClientError:
logger.error("No datasets selected")
return []


def check_s3_file(config, bucket_name, file_key):
"""
Checks if optical image exists in s3
"""
try:
get_s3_client(sm_config=config).head_object(Bucket=bucket_name, Key=file_key)
return True
except botocore.exceptions.ClientError:
return False


def check_storage_file(file_path):
"""
Checks if optical image exists in storage
"""
return os.path.exists(file_path)


def upload_to_s3(config, local_file, bucket, s3_file):
"""
Clone optical image from storage and upload to s3
"""

try:
get_s3_client(sm_config=config).upload_file(local_file, bucket, s3_file)
logger.info(f'Upload to s3 Successful: {s3_file}')
except NoCredentialsError:
logger.error("Credentials not available")
except botocore.exceptions.ClientError:
logger.error(f'Failed to upload to s3: {s3_file}')


def main():
"""
List datasets with optical image according to the sql_where clause and update the path to s3
"""
parser = argparse.ArgumentParser(
description='Update raw optical images paths from storage to s3'
)
parser.add_argument(
'--config', dest='config_path', default='conf/config.json', help='SM config path'
)
parser.add_argument(
'--sql-where',
dest='sql_where',
default=None,
help='SQL WHERE clause for picking rows from the dataset table, '
'e.g. "id = \'2023-01-01_09h51m24s\' AND status = \'FINISHED\' AND size_hash IS NULL"',
)
args = parser.parse_args()

sm_config = SMConfig.get_conf()

with GlobalInit(args.config_path):
datasets = pd.DataFrame(get_datasets(sql_where=args.sql_where))
if datasets.empty:
logger.info('No datasets found')
sys.exit(0)

for _, row in datasets.iterrows():
bucket = sm_config['image_storage']['raw_img_bucket']
opt_img_s3_path = 'raw_optical/{}/{}'.format(row['ds_id'], row['uuid'])
has_file_s3 = check_s3_file(sm_config, bucket, opt_img_s3_path)
if not has_file_s3:
file_path = '/opt/data/metaspace/public/raw_optical_images/{}/{}'.format(
row['uuid'][0:3], row['uuid'][3:]
)
has_file_storage = check_storage_file(file_path)

if has_file_storage:
upload_to_s3(sm_config, file_path, bucket, opt_img_s3_path)
else:
logger.error(f'File does not exists on storage: {file_path}')


if __name__ == '__main__':
main()
16 changes: 14 additions & 2 deletions metaspace/engine/sm/engine/image_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@
class ImageType(str, Enum):
ISO = 'iso'
OPTICAL = 'optical'
RAW = 'raw_optical'
THUMB = 'thumb'
DIAG = 'diag'


class ImageStorage:
ISO = ImageType.ISO
OPTICAL = ImageType.OPTICAL
RAW = ImageType.RAW
THUMB = ImageType.THUMB
DIAG = ImageType.DIAG

Expand All @@ -48,13 +50,18 @@ def __init__(self, sm_config: Dict = None):
self.s3: S3ServiceResource = get_s3_resource(sm_config)
self.s3_client: S3Client = self.s3.meta.client
self.bucket = self.s3.Bucket(sm_config['image_storage']['bucket'])
self.raw_img_bucket = self.s3.Bucket(sm_config['image_storage']['raw_img_bucket'])

@staticmethod
def _make_key(image_type, ds_id, img_id):
return f'{image_type}/{ds_id}/{img_id}'

def _get_object(self, image_type, ds_id, img_id):
key = self._make_key(image_type, ds_id, img_id)

if image_type == self.RAW:
return self.raw_img_bucket.Object(key)

return self.bucket.Object(key)

@staticmethod
Expand Down Expand Up @@ -95,6 +102,10 @@ def delete_images(self, image_type: ImageType, ds_id: str, image_ids: List[str])
def get_image_url(self, image_type: ImageType, ds_id: str, image_id: str) -> str:
endpoint = self.s3_client.meta.endpoint_url
key = self._make_key(image_type, ds_id, image_id)

if image_type == self.RAW:
return f'{endpoint}/{self.raw_img_bucket.name}/{key}'

return f'{endpoint}/{self.bucket.name}/{key}'

def get_ion_images_for_analysis(
Expand Down Expand Up @@ -209,6 +220,7 @@ def __call__(
OPTICAL = ImageType.OPTICAL
THUMB = ImageType.THUMB
DIAG = ImageType.DIAG
RAW = ImageType.RAW

get_image: Callable[[ImageType, str, str], bytes]
post_image: Callable[[ImageType, str, Union[bytes, BytesIO]], str]
Expand Down Expand Up @@ -257,8 +269,8 @@ def configure_bucket(sm_config: Dict):

def init(sm_config: Dict):
# pylint: disable=global-statement
global _instance, get_image, post_image, delete_image, delete_images, get_image_url
global get_ion_images_for_analysis
global _instance, get_image, post_image, delete_image, delete_images
global get_image_url, get_ion_images_for_analysis
_instance = ImageStorage(sm_config)
get_image = _instance.get_image
post_image = _instance.post_image
Expand Down
14 changes: 6 additions & 8 deletions metaspace/engine/sm/engine/optical_image.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import io
import logging
import numpy as np
import requests
from PIL import Image

from sm.engine import image_storage
from sm.engine.dataset import Dataset


SEL_DATASET_RAW_OPTICAL_IMAGE = 'SELECT optical_image from dataset WHERE id = %s'
UPD_DATASET_RAW_OPTICAL_IMAGE = (
'update dataset set optical_image = %s, transform = %s WHERE id = %s'
Expand Down Expand Up @@ -180,19 +178,19 @@ def _add_thumbnail_optical_image(db, ds, dims, optical_img, transform):
db.alter(UPD_DATASET_THUMB_OPTICAL_IMAGE, params=(img_thumb_id, img_thumb_url, ds.id))


def add_optical_image(db, ds_id, url, transform, zoom_levels=(1, 2, 4, 8)):
def add_optical_image(db, ds_id, raw_optical_img_id, transform, zoom_levels=(1, 2, 4, 8)):
"""Add optical image to dataset.
Generates scaled and transformed versions of the provided optical image + creates the thumbnail
"""

ds = Dataset.load(db, ds_id)
logger.info(f'Adding optical image {url} to "{ds.id}" dataset')
logger.info(f'Adding optical image {raw_optical_img_id} to "{ds.id}" dataset')

dims = _annotation_image_shape(db, ds)
resp = requests.get(url)
optical_img = Image.open(io.BytesIO(resp.content))
image_bytes = image_storage.get_image(image_storage.RAW, ds_id, raw_optical_img_id)
optical_img = Image.open(io.BytesIO(image_bytes))

raw_optical_img_id = url.split('/')[-1]
_add_raw_optical_image(db, ds, raw_optical_img_id, transform)
_add_zoom_optical_images(db, ds, dims, optical_img, transform, zoom_levels)
_add_thumbnail_optical_image(db, ds, dims, optical_img, transform)
Expand All @@ -205,7 +203,7 @@ def del_optical_image(db, ds_id):
logger.info(f'Deleting optical image of "{ds.id}" dataset')
(raw_img_id,) = db.select_one(SEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id,))
if raw_img_id:
image_storage.delete_image(image_storage.OPTICAL, ds_id, raw_img_id)
image_storage.delete_image(image_storage.RAW, ds_id, raw_img_id)
for img_id in db.select_onecol(SEL_OPTICAL_IMAGE, params=(ds.id,)):
image_storage.delete_image(image_storage.OPTICAL, ds_id, img_id)
(thumbnail_img_id,) = db.select_one(SEL_OPTICAL_IMAGE_THUMBNAIL, params=(ds.id,))
Expand Down
23 changes: 18 additions & 5 deletions metaspace/engine/tests/test_optical_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,32 @@
from unittest import mock

import PIL.Image
import pytest

from sm.engine.db import DB
from sm.engine.optical_image import add_optical_image, OpticalImageType
from tests.utils import create_test_ds


from sm.engine import image_storage
from sm.engine.storage import get_s3_bucket


@pytest.fixture(autouse=True, scope='module')
def clean_storage(sm_config):
yield
get_s3_bucket(sm_config['image_storage']['bucket'], sm_config).objects.all().delete()


def create_image_bytes():
fp = io.BytesIO()
PIL.Image.new('RGB', (100, 100)).save(fp, format='PNG')
fp.seek(0)
return fp.read()


@mock.patch('sm.engine.optical_image.requests')
@mock.patch('sm.engine.optical_image.image_storage')
def test_add_optical_image(image_storage_mock, requests_mock, fill_db, metadata, ds_config):
def test_add_optical_image(image_storage_mock, fill_db, metadata, ds_config):
image_ids = [
'opt_img_scaled_id1',
'opt_img_id1',
Expand All @@ -28,17 +38,20 @@ def test_add_optical_image(image_storage_mock, requests_mock, fill_db, metadata,
'opt_img_id3',
'thumbnail_id',
]

image_storage_mock.post_image.side_effect = image_ids
image_storage_mock.get_image_url.return_value = [f'http://{img_id}' for img_id in image_ids]
image_storage_mock.get_image.return_value = create_image_bytes()

requests_mock.get.return_value = mock.Mock(content=create_image_bytes())

db = DB()
ds = create_test_ds()

zoom_levels = [1, 2, 3]
raw_img_id = 'raw_opt_img_id'
test_image_bytes = create_image_bytes()
raw_img_id = image_storage.post_image(image_storage.RAW, ds.id, test_image_bytes)
print(raw_img_id)
print(ds.id)

add_optical_image(
db, ds.id, raw_img_id, [[1, 0, 0], [0, 1, 0], [0, 0, 1]], zoom_levels=zoom_levels
)
Expand Down
1 change: 1 addition & 0 deletions metaspace/graphql/schemas/dataset.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ type OpticalImage {

type RawOpticalImage {
url: String
uuid: String
transform: [[Float]]
}

Expand Down
11 changes: 10 additions & 1 deletion metaspace/graphql/src/modules/dataset/controller/Dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,17 @@ export const rawOpticalImage = async(datasetId: string, ctx: Context) => {
if (ds) {
const engineDataset = await ctx.entityManager.getRepository(EngineDataset).findOne(datasetId)
if (engineDataset && engineDataset.opticalImage) {
const s3 = getS3Client()
const imageUrl = s3.getSignedUrl('getObject',
{
Bucket: `${config.upload.bucket}/raw_optical/${datasetId}`,
Key: engineDataset.opticalImage,
Expires: 1800,
})

return {
url: `/fs/raw_optical_images/${engineDataset.opticalImage}`,
url: imageUrl,
uuid: engineDataset.opticalImage,
transform: engineDataset.transform,
}
}
Expand Down
Loading

0 comments on commit 28a598c

Please sign in to comment.