diff --git a/CHANGES.rst b/CHANGES.rst index 42d5758..4318e9b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,14 +10,18 @@ Changes ======= +Version 0.8.3 (2022-04-18) +-------------------------- + +- Fix build error and snippet for ssl disable. + Version 0.8.2 (2022-03-25) -------------------------- - Fix dependency deprecation git protocol for pip - Fix MODIS nodata publishing -- Add support to use custom parameters in Catalog -- Publish Sentinel-2 PVI as quicklook +- Version 0.8.1 (2021-05-07) -------------------------- diff --git a/bdc_collection_builder/celery/publish.py b/bdc_collection_builder/celery/publish.py index 3ba9cdf..19dc71d 100644 --- a/bdc_collection_builder/celery/publish.py +++ b/bdc_collection_builder/celery/publish.py @@ -498,4 +498,7 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti if not destination_file.exists(): shutil.move(str(old_file_path), str(destination)) + logging.info(f'Cleaning up {temporary_dir.name}') + shutil.rmtree(temporary_dir.name) + return item diff --git a/bdc_collection_builder/celery/tasks.py b/bdc_collection_builder/celery/tasks.py index 8d101a0..e8a8d75 100644 --- a/bdc_collection_builder/celery/tasks.py +++ b/bdc_collection_builder/celery/tasks.py @@ -24,8 +24,9 @@ from flask import current_app as flask_app from ..collections.models import RadcorActivity, RadcorActivityHistory +from ..collections.processor import sen2cor from ..collections.utils import (get_or_create_model, get_provider, - is_valid_compressed_file, post_processing) + is_valid_compressed_file, post_processing, safe_request) from ..config import Config from .publish import get_item_path, publish_collection @@ -151,8 +152,10 @@ def download(activity: dict, **kwargs): catalog_name = activity['args']['catalog'] catalog_args.update(parallel=True, progress=False, lazy=True) - provider = get_provider(catalog=catalog_name, **catalog_args) - download_order = [provider] + provider, collector = get_provider(catalog=catalog_name, **catalog_args) + setattr(collector, 'instance', provider) + setattr(collector, 'provider_name', f'{provider.name} (CUSTOM)') + download_order = [collector] else: # Use parallel flag for providers which has number maximum of connections per client (Sentinel-Hub only) download_order = collector_extension.get_provider_order(collection, lazy=True, parallel=True, progress=False, @@ -182,9 +185,9 @@ def download(activity: dict, **kwargs): Item.name == scene_id ).first() - if item: + if item and item.assets.get('asset'): # TODO: Get asset name of download file - item_path = item.assets['asset']['href'] + item_path = item.assets['asset'].get('href', '') item_path = item_path if not item_path.startswith('/') else item_path[1:] item_path = Path(prefix) / item_path @@ -215,7 +218,9 @@ def download(activity: dict, **kwargs): for collector in download_order: try: logging.info(f'Trying to download from {collector.provider_name}(id={collector.instance.id})') - temp_file = Path(collector.download(scene_id, output=tmp, dataset=activity['args']['dataset'])) + + with safe_request(): + temp_file = Path(collector.download(scene_id, output=tmp, dataset=activity['args']['dataset'])) activity['args']['provider_id'] = collector.instance.id @@ -231,6 +236,9 @@ def download(activity: dict, **kwargs): raise RuntimeError(f'Download fails {activity["sceneid"]}.') shutil.move(str(temp_file), str(download_file)) + if tmp and Path(tmp).exists(): + logging.info(f'Cleaning up {tmp}') + shutil.rmtree(tmp) refresh_execution_args(execution, activity, compressed_file=str(download_file)) @@ -248,6 +256,7 @@ def correction(activity: dict, collection_id=None, **kwargs): logging.info(f'Starting Correction Task for {collection.name}(id={collection.id}, scene_id={scene_id})') data_collection = get_provider_collection_from_activity(activity) + tmp = None try: output_path = data_collection.path(collection, prefix=Config.PUBLISH_DATA_DIR) @@ -293,15 +302,12 @@ def correction(activity: dict, collection_id=None, **kwargs): logging.info(f'Removing {str(output_path_entry)} sen2cor file before.') output_path_entry.unlink() - sen2cor_conf = Config.SEN2COR_CONFIG - logging.info(f'Using {entry} of sceneid {scene_id}') - # TODO: Use custom sen2cor version (2.5 or 2.8) - cmd = f'''docker run --rm -i \ - -v $INDIR:/mnt/input-dir \ - -v $OUTDIR:/mnt/output-dir \ - -v {sen2cor_conf["SEN2COR_AUX_DIR"]}:/home/lib/python2.7/site-packages/sen2cor/aux_data \ - {container_workdir} {sen2cor_conf["SEN2COR_DOCKER_IMAGE"]} {entry}''' env['OUTDIR'] = str(Path(tmp) / 'output') + + sen2cor(scene_id, input_dir=str(tmp), output_dir=env['OUTDIR'], + docker_container_work_dir=container_workdir.split(' '), **env) + + logging.info(f'Using {entry} of sceneid {scene_id}') else: lasrc_conf = Config.LASRC_CONFIG @@ -314,13 +320,13 @@ def correction(activity: dict, collection_id=None, **kwargs): -v {lasrc_conf["LEDAPS_AUX_DIR"]}:/mnt/ledaps-aux:ro \ {container_workdir} {lasrc_conf["LASRC_DOCKER_IMAGE"]} {entry}''' - logging.debug(cmd) + logging.debug(cmd) - # Execute command line - process = subprocess.Popen(cmd, shell=True, env=env, stdin=subprocess.PIPE) - process.wait() + # Execute command line + process = subprocess.Popen(cmd, shell=True, env=env, stdin=subprocess.PIPE) + process.wait() - assert process.returncode == 0 + assert process.returncode == 0 # TODO: We should be able to get output name from execution if processor_name.lower() == 'sen2cor': @@ -338,6 +344,10 @@ def correction(activity: dict, collection_id=None, **kwargs): except Exception as e: logging.error(f'Error in correction {scene_id} - {str(e)}', exc_info=True) raise e + finally: + if tmp and Path(tmp).exists(): + logging.info(f'Cleaning up {tmp}') + shutil.rmtree(tmp) return activity diff --git a/bdc_collection_builder/collections/processor.py b/bdc_collection_builder/collections/processor.py new file mode 100644 index 0000000..298031c --- /dev/null +++ b/bdc_collection_builder/collections/processor.py @@ -0,0 +1,53 @@ +import logging +import subprocess +from pathlib import Path +from typing import Optional + +from ..config import Config + + +def sen2cor(scene_id: str, input_dir: str, output_dir: str, + docker_container_work_dir: list, version: Optional[str] = None, **env): + if version is not None: + version_minor = '.'.join(version.split('.')[:-1]) + args = [ + 'docker', 'run', '--rm', '-i', + '--name', scene_id, + '-v', f'{input_dir}:/mnt/input_dir', + '-v', f'{output_dir}:/mnt/output_dir', + '-v', f'{Config.SEN2COR_CONFIG["SEN2COR_DIR"]}/CCI4SEN2COR:/mnt/aux_data', + '-v', f'{Config.SEN2COR_CONFIG["SEN2COR_DIR"]}/{version_minor}/cfg/L2A_GIPP.xml:/opt/sen2cor/{version}/cfg/L2A_GIPP.xml', + *docker_container_work_dir, + f'{Config.SEN2COR_CONFIG["SEN2COR_DOCKER_IMAGE"]}:{version}', + f'{scene_id}.SAFE' + ] + + logging.info(f'Using Sen2Cor {version}') + + process = subprocess.Popen(args, env=env, stdin=subprocess.PIPE) + process.wait() + + if process.returncode != 0: + raise RuntimeError(f'Could not execute Sen2Cor using {version}') + + output_tmp = list(Path(output_dir).iterdir())[0] + + output_path = Path(output_dir) / output_tmp.name + + return output_path + + def _safe_execute(*args, **kwargs): + try: + return sen2cor(*args, **kwargs), None + except RuntimeError as e: + return None, e + + versions_supported = ['2.10.0', '2.8.0', '2.5.5'] + + err = None + for version in versions_supported: + out, err = _safe_execute(scene_id, input_dir, output_dir, docker_container_work_dir, version=version, **env) + if out: + return out + + raise RuntimeError(f'Could not execute Sen2Cor using {versions_supported} - {err}') diff --git a/bdc_collection_builder/collections/utils.py b/bdc_collection_builder/collections/utils.py index ff35fa8..0206eb0 100644 --- a/bdc_collection_builder/collections/utils.py +++ b/bdc_collection_builder/collections/utils.py @@ -10,16 +10,19 @@ # Python Native +import contextlib import datetime import logging import shutil import tarfile +import warnings from json import loads as json_parser from os import path as resource_path from os import remove as resource_remove from pathlib import Path from tempfile import TemporaryDirectory from typing import List, Tuple +from urllib3.exceptions import InsecureRequestWarning from zipfile import BadZipfile, ZipFile from zlib import error as zlib_error @@ -29,6 +32,8 @@ import rasterio import rasterio.features import rasterio.warp +import requests + import shapely import shapely.geometry from bdc_catalog.models import Band, Collection, Provider, db @@ -540,3 +545,44 @@ def is_sen2cor(collection: Collection) -> bool: return True return False + + +_settings = requests.Session.merge_environment_settings + + +@contextlib.contextmanager +def safe_request(): + """Define a decorator to disable any SSL Certificate Validation while requesting data. + + This snippet was adapted from https://stackoverflow.com/questions/15445981/how-do-i-disable-the-security-certificate-check-in-python-requests. + """ + opened_adapters = set() + + if not Config.DISABLE_SSL: + yield + + logging.info('Disabling SSL validation') + + def _merge_environment_settings(self, url, proxies, stream, verify, cert): + """Stack the opened contexts into heap and set all the active adapters with verify=False.""" + opened_adapters.add(self.get_adapter(url)) + + settings = _settings(self, url, proxies, stream, verify, cert) + settings['verify'] = False + + return settings + + requests.Session.merge_environment_settings = _merge_environment_settings + + try: + with warnings.catch_warnings(): + warnings.simplefilter('ignore', InsecureRequestWarning) + yield + finally: + requests.Session.merge_environment_settings = _settings + + for adapter in opened_adapters: + try: + adapter.close() + except: + pass diff --git a/bdc_collection_builder/config.py b/bdc_collection_builder/config.py index 6e871a2..3570d97 100644 --- a/bdc_collection_builder/config.py +++ b/bdc_collection_builder/config.py @@ -40,6 +40,7 @@ class Config: ) # Sen2Cor/Fmask Processor SEN2COR_CONFIG = dict( + SEN2COR_DIR=os.getenv('SEN2COR_DIR', '/data/auxiliaries/sen2cor'), SEN2COR_DOCKER_IMAGE=os.getenv('SEN2COR_DOCKER_IMAGE', 'registry.dpi.inpe.br/brazildatacube/sen2cor:2.8.0'), SEN2COR_AUX_DIR=os.getenv('SEN2COR_AUX_DIR', '/data/auxiliaries/sen2cor/CCI4SEN2COR'), SEN2COR_CONFIG_DIR=os.getenv('SEN2COR_CONFIG_DIR', '/data/auxiliaries/sen2cor/config/2.8'), @@ -78,6 +79,9 @@ class Config: # The optional directory where published collections will be stored (Default is DATA_DIR) PUBLISH_DATA_DIR = os.environ.get('PUBLISH_DATA_DIR', DATA_DIR) + # Disable any entry related requests and SSL validation. + DISABLE_SSL = strtobool(os.getenv('DISABLE_SSL', 'YES')) + TASK_RETRY_DELAY = int(os.environ.get('TASK_RETRY_DELAY', 60 * 60)) # a hour CELERYD_PREFETCH_MULTIPLIER = int(os.environ.get('CELERYD_PREFETCH_MULTIPLIER', 4)) # disable diff --git a/bdc_collection_builder/controller.py b/bdc_collection_builder/controller.py index 9b5988c..68461e1 100644 --- a/bdc_collection_builder/controller.py +++ b/bdc_collection_builder/controller.py @@ -26,7 +26,7 @@ from .celery.tasks import correction, download, harmonization, post, publish from .collections.models import (ActivitySRC, RadcorActivity, RadcorActivityHistory, db) -from .collections.utils import get_or_create_model, get_provider +from .collections.utils import get_or_create_model, get_provider, safe_request from .forms import CollectionForm, RadcorActivityForm, SimpleActivityForm @@ -247,39 +247,40 @@ def radcor(cls, args: dict): try: catalog_provider, provider = get_provider(catalog=args['catalog'], **catalog_args) - if 'scenes' in args: - result = [] + with safe_request(): + if 'scenes' in args: + result = [] - unique_scenes = set(args['scenes']) + unique_scenes = set(args['scenes']) - for scene in unique_scenes: - query_result = provider.search( - query=args['dataset'], - filename=f'{scene}*', - **options - ) + for scene in unique_scenes: + query_result = provider.search( + query=args['dataset'], + filename=f'{scene}*', + **options + ) - result.extend(query_result) - elif 'tiles' in args: - result = [] - for tile in args['tiles']: - query_result = provider.search( + result.extend(query_result) + elif 'tiles' in args: + result = [] + for tile in args['tiles']: + query_result = provider.search( + query=args['dataset'], + tile=tile, + start_date=args['start'], + end_date=args['end'], + cloud_cover=cloud, + **options + ) + result.extend(query_result) + else: + result = provider.search( query=args['dataset'], - tile=tile, start_date=args['start'], end_date=args['end'], cloud_cover=cloud, **options ) - result.extend(query_result) - else: - result = provider.search( - query=args['dataset'], - start_date=args['start'], - end_date=args['end'], - cloud_cover=cloud, - **options - ) def _recursive(scene, task, parent=None, parallel=True, pass_args=True): """Create task dispatcher recursive.""" diff --git a/bdc_collection_builder/version.py b/bdc_collection_builder/version.py index e345525..6118e8b 100644 --- a/bdc_collection_builder/version.py +++ b/bdc_collection_builder/version.py @@ -13,4 +13,4 @@ """ -__version__ = '0.8.2' +__version__ = '0.8.3' diff --git a/docker/Dockerfile.atm b/docker/Dockerfile.atm index 7764b35..112aede 100644 --- a/docker/Dockerfile.atm +++ b/docker/Dockerfile.atm @@ -28,4 +28,5 @@ WORKDIR /app RUN pip3 install -U pip && \ pip3 install wheel && \ + pip3 install "Flask<2.1" "numpy==1.17.4" "imageio==2.10.3" && \ pip3 install -e . \ No newline at end of file diff --git a/setup.py b/setup.py index a976bb9..744a830 100644 --- a/setup.py +++ b/setup.py @@ -50,20 +50,23 @@ 'beautifulsoup4>=4.8.1', 'boto3>=1.11', 'docutils>=0.10,<0.15' - 'Flask>=1.1.1', + 'Flask>=1.1,<2.1', 'marshmallow-sqlalchemy>=0.19.0', - 'rasterio>=1.1.2,<1.2', - 'rio-cogeo>=1.1,<2', - 'numpy>=1.17.2', + 'rasterio==1.2.1', + 'rio-cogeo==3.0.2', + 'numpy>=1.17,<1.20', 'numpngw>=0.0.8', - 'scikit-image>=0.16.2', 'SQLAlchemy[postgresql_psycopg2binary]>=1.3,<1.4', 'bdc-collectors @ git+https://github.com/brazil-data-cube/bdc-collectors.git@v0.6.0#egg=bdc-collectors', 'bdc-catalog @ git+https://github.com/brazil-data-cube/bdc-catalog.git@v0.8.2#egg=bdc-catalog', 'celery[librabbitmq]>=4.3,<4.4.3', 'python-dateutil>=2,<3', - 'Werkzeug>=0.16,<1.0', 'shapely>=1.7,<2', + # Build Error Fix + 'tifffile==2021.11.2', + 'scipy==1.7.2', + 'scikit-image==0.18.3', + 'imageio==2.10.3', 'MarkupSafe==2.0.1', ]