From b8bdb495dc622a72b3c8804912f439a22e0d5c79 Mon Sep 17 00:00:00 2001 From: Tyler Sutterley Date: Wed, 15 Jun 2022 15:08:13 -0700 Subject: [PATCH] feat: use attempt login for nsidc utilities (#55) * feat: add NASA CMR spatial bounding box queries * feat: update classify photons parameters to match current GSFC version * feat: updates for uploading to pypi * refactor: place some imports behind try/except statements --- .github/workflows/Dockerfile | 1 + README.rst | 4 + doc/source/getting_started/Install.rst | 15 +- doc/source/getting_started/Parallel-HDF5.rst | 22 +- environment.yml | 26 +- icesat2_toolkit/__init__.py | 3 +- icesat2_toolkit/convert.py | 20 +- icesat2_toolkit/spatial.py | 13 +- icesat2_toolkit/utilities.py | 408 +++++++++---------- notebooks/Fit ICESat-2 ATL03.ipynb | 4 +- notebooks/Read ICESat-2 ATL03.ipynb | 4 +- requirements.txt | 29 +- scripts/MPI_ICESat2_ATL03.py | 7 +- scripts/MPI_ICESat2_ATL03_histogram.py | 7 +- scripts/convert_ICESat2_format.py | 7 +- setup.py | 3 - version.txt | 2 +- 17 files changed, 302 insertions(+), 273 deletions(-) diff --git a/.github/workflows/Dockerfile b/.github/workflows/Dockerfile index 156a656..8971f3f 100644 --- a/.github/workflows/Dockerfile +++ b/.github/workflows/Dockerfile @@ -300,6 +300,7 @@ RUN pip3 install --no-cache-dir --no-binary=h5py,cartopy \ paramiko \ pyproj \ python-dateutil \ + pyYAPC \ scikit-learn \ scipy \ scp \ diff --git a/README.rst b/README.rst index e3a012a..2dfad96 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,7 @@ read-ICESat-2 |Language| |License| +|PyPI Version| |Documentation Status| |Binder| |Pangeo| @@ -15,6 +16,9 @@ read-ICESat-2 .. |License| image:: https://img.shields.io/badge/license-MIT-green.svg :target: https://github.com/tsutterley/read-ICESat-2/blob/main/LICENSE +.. |PyPI Version| image:: https://img.shields.io/pypi/v/icesat2-toolkit.svg + :target: https://pypi.python.org/pypi/icesat2-toolkit/ + .. |Documentation Status| image:: https://readthedocs.org/projects/read-icesat-2/badge/?version=latest :target: https://read-icesat-2.readthedocs.io/en/latest/?badge=latest diff --git a/doc/source/getting_started/Install.rst b/doc/source/getting_started/Install.rst index a114472..76c10d2 100644 --- a/doc/source/getting_started/Install.rst +++ b/doc/source/getting_started/Install.rst @@ -27,8 +27,8 @@ The ``read-ICESat-2`` installation uses the ``gdal-config`` routines to set the Installation ############ -Presently ``read-ICESat-2`` is only available for use as a -`GitHub repository `_. +``read-ICESat-2`` is available for download from the `GitHub repository `_, +and the `Python Package Index (pypi) `_, The contents of the repository can be download as a `zipped file `_ or cloned. To use this repository, please fork into your own account and then clone onto your system. @@ -55,6 +55,11 @@ Alternatively can install the utilities directly from GitHub with ``pip``: python3 -m pip install --user git+https://github.com/tsutterley/read-ICESat-2.git -Executable versions of this repository can also be tested using -`Binder `_ or -`Pangeo `_. +| This repository can be also tested using `BinderHub `_ platforms: +| |Binder| |Pangeo| + +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/tsutterley/read-ICESat-2/main + +.. |Pangeo| image:: https://img.shields.io/static/v1.svg?logo=Jupyter&label=PangeoBinderAWS&message=us-west-2&color=orange + :target: https://aws-uswest2-binder.pangeo.io/v2/gh/tsutterley/read-ICESat-2/main?urlpath=lab diff --git a/doc/source/getting_started/Parallel-HDF5.rst b/doc/source/getting_started/Parallel-HDF5.rst index 817d090..5eca588 100644 --- a/doc/source/getting_started/Parallel-HDF5.rst +++ b/doc/source/getting_started/Parallel-HDF5.rst @@ -41,14 +41,14 @@ Dependencies .. code-block:: bash - curl -O http://zlib.net/zlib-1.2.11.tar.gz + curl -O http://zlib.net/zlib-1.2.12.tar.gz export CFLAGS=-fPIC - mkdir -p $HOME/packages/zlib/1.2.11 - ./configure --prefix=$HOME/packages/zlib/1.2.11 + mkdir -p $HOME/packages/zlib/1.2.12 + ./configure --prefix=$HOME/packages/zlib/1.2.12 make make install -modulefile for local installation of zlib (``~/privatemodules/zlib/1.2.11``): +modulefile for local installation of zlib (``~/privatemodules/zlib/1.2.12``): .. code-block:: tcl @@ -56,12 +56,12 @@ modulefile for local installation of zlib (``~/privatemodules/zlib/1.2.11``): # # zlib module for use with 'environment-modules' package: # - module-whatis "Provides zlib 1.2.11 (local)" + module-whatis "Provides zlib 1.2.12 (local)" global env - prepend-path PATH $env(HOME)/packages/zlib/1.2.11/bin - prepend-path LD_LIBRARY_PATH $env(HOME)/packages/zlib/1.2.11/lib - prepend-path MANPATH $env(HOME)/packages/zlib/1.2.11/share/man/ - append-path ZLIB_DIR $env(HOME)/packages/zlib/1.2.11/ + prepend-path PATH $env(HOME)/packages/zlib/1.2.12/bin + prepend-path LD_LIBRARY_PATH $env(HOME)/packages/zlib/1.2.12/lib + prepend-path MANPATH $env(HOME)/packages/zlib/1.2.12/share/man/ + append-path ZLIB_DIR $env(HOME)/packages/zlib/1.2.12/ - `szip `_ @@ -127,7 +127,7 @@ modulefile for local installation of OpenMPI (``~/privatemodules/mpi/openmpi/4.0 CC=~/packages/mpi/openmpi/4.0.3/bin/mpicc ./configure \ --enable-parallel --enable-hl --enable-shared \ --prefix=$HOME/packages/hdf5/1.10.5 \ - --with-zlib=$HOME/packages/zlib/1.2.11 \ + --with-zlib=$HOME/packages/zlib/1.2.12 \ --with-szip=$HOME/packages/szip/2.1.1 make make check @@ -143,7 +143,7 @@ modulefile for local installation of HDF5 (``~/privatemodules/hdf5/1.10.5``): # module-whatis "Provides hdf5 1.10.5 (local)" global env - prereq $env(HOME)/privatemodules/zlib/1.2.11 $env(HOME)/privatemodules/szip/2.1.1 $env(HOME)/privatemodules/mpi/openmpi/4.0.3 + prereq $env(HOME)/privatemodules/zlib/1.2.12 $env(HOME)/privatemodules/szip/2.1.1 $env(HOME)/privatemodules/mpi/openmpi/4.0.3 prepend-path PATH $env(HOME)/packages/hdf5/1.10.5/bin prepend-path LD_LIBRARY_PATH $env(HOME)/packages/hdf5/1.10.5/lib prepend-path MANPATH $env(HOME)/packages/hdf5/1.10.5/share/man/ diff --git a/environment.yml b/environment.yml index 0acb0eb..172094a 100644 --- a/environment.yml +++ b/environment.yml @@ -1,26 +1,26 @@ -name: read-ICESat-2 +name: icesat2_toolkit channels: - conda-forge dependencies: - python>=3.6 - notebook - - numpy - - scipy - - scikit-learn - - python-dateutil - - pandas - - pyproj - - matplotlib + - boto3 - cartopy - - mpi4py + - future - gdal - h5py>=2.9=mpi* - - netCDF4 - - zarr - - future - - boto3 - lxml + - matplotlib + - mpi4py + - netCDF4 + - numpy + - pandas - paramiko + - pyproj + - python-dateutil + - scikit-learn + - scipy - scp + - zarr - pip: - git+https://github.com/tsutterley/yapc.git diff --git a/icesat2_toolkit/__init__.py b/icesat2_toolkit/__init__.py index 400a221..23b8b5c 100644 --- a/icesat2_toolkit/__init__.py +++ b/icesat2_toolkit/__init__.py @@ -19,7 +19,6 @@ import icesat2_toolkit.time import icesat2_toolkit.utilities import icesat2_toolkit.version -from icesat2_toolkit.convert import convert from icesat2_toolkit.convert_delta_time import convert_delta_time from icesat2_toolkit.read_ICESat2_ATL03 import read_HDF5_ATL03, \ find_HDF5_ATL03_beams, read_HDF5_ATL09, read_HDF5_ATL03_main, read_HDF5_ATL03_beam @@ -33,4 +32,4 @@ from icesat2_toolkit.read_ICESat2_ATL12 import read_HDF5_ATL12, \ find_HDF5_ATL12_beams # get version number -__version__ = icesat2_toolkit.version.version \ No newline at end of file +__version__ = icesat2_toolkit.version.version diff --git a/icesat2_toolkit/convert.py b/icesat2_toolkit/convert.py index f22fb35..ccfab31 100644 --- a/icesat2_toolkit/convert.py +++ b/icesat2_toolkit/convert.py @@ -1,6 +1,6 @@ """ convert.py -Written by Tyler Sutterley (04/2022) +Written by Tyler Sutterley (06/2022) Utilities for converting ICESat-2 HDF5 files into different formats PYTHON DEPENDENCIES: @@ -21,6 +21,7 @@ time.py: Utilities for calculating time operations UPDATE HISTORY: + Updated 06/2022: place zarr and pandas imports behind try/except statements Updated 04/2022: updated docstrings to numpy documentation format Updated 01/2022: added ascii and dataframe outputs for ATL07 Updated 09/2021: added ground track and time to output dataframes @@ -33,13 +34,26 @@ import os import re import h5py -import zarr -import pandas +import warnings import itertools import posixpath import numpy as np from icesat2_toolkit.convert_delta_time import convert_delta_time +try: + import pandas +except ModuleNotFoundError: + warnings.filterwarnings("always") + warnings.warn("pandas not available") + warnings.warn("Some functions will throw an exception if called") + +try: + import zarr +except ModuleNotFoundError: + warnings.filterwarnings("always") + warnings.warn("zarr not available") + warnings.warn("Some functions will throw an exception if called") + class convert(): np.seterr(invalid='ignore') def __init__(self, filename=None, reformat=None): diff --git a/icesat2_toolkit/spatial.py b/icesat2_toolkit/spatial.py index 70b2a82..90d9d0e 100644 --- a/icesat2_toolkit/spatial.py +++ b/icesat2_toolkit/spatial.py @@ -1,7 +1,7 @@ #!/usr/bin/env python u""" spatial.py -Written by Tyler Sutterley (04/2022) +Written by Tyler Sutterley (06/2022) Utilities for reading and operating on spatial data @@ -17,6 +17,7 @@ https://pypi.python.org/pypi/GDAL UPDATE HISTORY: + Updated 06/2022: place netCDF4 import behind try/except statements Updated 04/2022: updated docstrings to numpy documentation format Updated 01/2022: use iteration breaks in convert ellipsoid function Written 11/2021 @@ -28,14 +29,22 @@ import uuid import h5py import logging -import netCDF4 import warnings import numpy as np + +try: + import netCDF4 +except ModuleNotFoundError: + warnings.filterwarnings("always") + warnings.warn("netCDF4 not available") + warnings.warn("Some functions will throw an exception if called") + try: import osgeo.gdal, osgeo.osr, osgeo.gdalconst except ModuleNotFoundError: warnings.filterwarnings("always") warnings.warn("GDAL not available") + warnings.warn("Some functions will throw an exception if called") def case_insensitive_filename(filename): """ diff --git a/icesat2_toolkit/utilities.py b/icesat2_toolkit/utilities.py index 9cfd993..126f441 100644 --- a/icesat2_toolkit/utilities.py +++ b/icesat2_toolkit/utilities.py @@ -1,7 +1,7 @@ #!/usr/bin/env python u""" utilities.py -Written by Tyler Sutterley (04/2022) +Written by Tyler Sutterley (06/2022) Download and management utilities for syncing time and auxiliary files PYTHON DEPENDENCIES: @@ -9,6 +9,7 @@ https://pypi.python.org/pypi/lxml UPDATE HISTORY: + Updated 06/2022: add NASA CMR spatial bounding box queries Updated 04/2022: updated docstrings to numpy documentation format Updated 03/2022: added NASA CMR query parameters for ATL14/15 added attempt login function to recursively check credentials @@ -65,7 +66,7 @@ from urllib.parse import urlencode import urllib.request as urllib2 -#-- PURPOSE: get absolute path within a package from a relative path +# PURPOSE: get absolute path within a package from a relative path def get_data_path(relpath): """ Get the absolute path within a package from a relative path @@ -75,16 +76,16 @@ def get_data_path(relpath): relpath: str, relative path """ - #-- current file path + # current file path filename = inspect.getframeinfo(inspect.currentframe()).filename filepath = os.path.dirname(os.path.abspath(filename)) if isinstance(relpath,list): - #-- use *splat operator to extract from list + # use *splat operator to extract from list return os.path.join(filepath,*relpath) elif isinstance(relpath,str): return os.path.join(filepath,relpath) -#-- PURPOSE: get the hash value of a file +# PURPOSE: get the hash value of a file def get_hash(local, algorithm='MD5'): """ Get the hash value from a local file or BytesIO object @@ -99,17 +100,17 @@ def get_hash(local, algorithm='MD5'): - ``'MD5'``: Message Digest - ``'sha1'``: Secure Hash Algorithm """ - #-- check if open file object or if local file exists + # check if open file object or if local file exists if isinstance(local, io.IOBase): if (algorithm == 'MD5'): return hashlib.md5(local.getvalue()).hexdigest() elif (algorithm == 'sha1'): return hashlib.sha1(local.getvalue()).hexdigest() elif os.access(os.path.expanduser(local),os.F_OK): - #-- generate checksum hash for local file - #-- open the local_file in binary read mode + # generate checksum hash for local file + # open the local_file in binary read mode with open(os.path.expanduser(local), 'rb') as local_buffer: - #-- generate checksum hash for a given type + # generate checksum hash for a given type if (algorithm == 'MD5'): return hashlib.md5(local_buffer.read()).hexdigest() elif (algorithm == 'sha1'): @@ -117,7 +118,7 @@ def get_hash(local, algorithm='MD5'): else: return '' -#-- PURPOSE: recursively split a url path +# PURPOSE: recursively split a url path def url_split(s): """ Recursively split a url path into a list @@ -134,7 +135,7 @@ def url_split(s): return tail, return url_split(head) + (tail,) -#-- PURPOSE: convert file lines to arguments +# PURPOSE: convert file lines to arguments def convert_arg_line_to_args(arg_line): """ Convert file lines to arguments @@ -144,13 +145,13 @@ def convert_arg_line_to_args(arg_line): arg_line: str line string containing a single argument and/or comments """ - #-- remove commented lines and after argument comments + # remove commented lines and after argument comments for arg in re.sub(r'\#(.*?)$',r'',arg_line).split(): if not arg.strip(): continue yield arg -#-- PURPOSE: returns the Unix timestamp value for a formatted date string +# PURPOSE: returns the Unix timestamp value for a formatted date string def get_unix_time(time_string, format='%Y-%m-%d %H:%M:%S'): """ Get the Unix timestamp value for a formatted date string @@ -168,7 +169,7 @@ def get_unix_time(time_string, format='%Y-%m-%d %H:%M:%S'): pass else: return calendar.timegm(parsed_time) - #-- try parsing with dateutil + # try parsing with dateutil try: parsed_time = dateutil.parser.parse(time_string.rstrip()) except (TypeError, ValueError): @@ -176,7 +177,7 @@ def get_unix_time(time_string, format='%Y-%m-%d %H:%M:%S'): else: return parsed_time.timestamp() -#-- PURPOSE: output a time string in isoformat +# PURPOSE: output a time string in isoformat def isoformat(time_string): """ Reformat a date string to ISO formatting @@ -186,7 +187,7 @@ def isoformat(time_string): time_string: str formatted time string to parse """ - #-- try parsing with dateutil + # try parsing with dateutil try: parsed_time = dateutil.parser.parse(time_string.rstrip()) except (TypeError, ValueError): @@ -194,7 +195,7 @@ def isoformat(time_string): else: return parsed_time.isoformat() -#-- PURPOSE: rounds a number to an even number less than or equal to original +# PURPOSE: rounds a number to an even number less than or equal to original def even(value): """ Rounds a number to an even number less than or equal to original @@ -206,7 +207,7 @@ def even(value): """ return 2*int(value//2) -#-- PURPOSE: rounds a number upward to its nearest integer +# PURPOSE: rounds a number upward to its nearest integer def ceil(value): """ Rounds a number upward to its nearest integer @@ -218,7 +219,7 @@ def ceil(value): """ return -int(-value//1) -#-- PURPOSE: make a copy of a file with all system information +# PURPOSE: make a copy of a file with all system information def copy(source, destination, move=False, **kwargs): """ Copy or move a file with all system information @@ -234,14 +235,14 @@ def copy(source, destination, move=False, **kwargs): """ source = os.path.abspath(os.path.expanduser(source)) destination = os.path.abspath(os.path.expanduser(destination)) - #-- log source and destination + # log source and destination logging.info('{0} -->\n\t{1}'.format(source,destination)) shutil.copyfile(source, destination) shutil.copystat(source, destination) if move: os.remove(source) -#-- PURPOSE: check ftp connection +# PURPOSE: check ftp connection def check_ftp_connection(HOST, username=None, password=None): """ Check internet connection with ftp host @@ -255,7 +256,7 @@ def check_ftp_connection(HOST, username=None, password=None): password: str or NoneType ftp password """ - #-- attempt to connect to ftp host + # attempt to connect to ftp host try: f = ftplib.FTP(HOST) f.login(username, password) @@ -267,7 +268,7 @@ def check_ftp_connection(HOST, username=None, password=None): else: return True -#-- PURPOSE: list a directory on a ftp host +# PURPOSE: list a directory on a ftp host def ftp_list(HOST, username=None, password=None, timeout=None, basename=False, pattern=None, sort=False): """ @@ -297,52 +298,52 @@ def ftp_list(HOST, username=None, password=None, timeout=None, mtimes: list last modification times for items in the directory """ - #-- verify inputs for remote ftp host + # verify inputs for remote ftp host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try to connect to ftp host + # try to connect to ftp host try: ftp = ftplib.FTP(HOST[0],timeout=timeout) except (socket.gaierror,IOError): raise RuntimeError('Unable to connect to {0}'.format(HOST[0])) else: ftp.login(username,password) - #-- list remote path + # list remote path output = ftp.nlst(posixpath.join(*HOST[1:])) - #-- get last modified date of ftp files and convert into unix time + # get last modified date of ftp files and convert into unix time mtimes = [None]*len(output) - #-- iterate over each file in the list and get the modification time + # iterate over each file in the list and get the modification time for i,f in enumerate(output): try: - #-- try sending modification time command + # try sending modification time command mdtm = ftp.sendcmd('MDTM {0}'.format(f)) except ftplib.error_perm: - #-- directories will return with an error + # directories will return with an error pass else: - #-- convert the modification time into unix time + # convert the modification time into unix time mtimes[i] = get_unix_time(mdtm[4:], format="%Y%m%d%H%M%S") - #-- reduce to basenames + # reduce to basenames if basename: output = [posixpath.basename(i) for i in output] - #-- reduce using regular expression pattern + # reduce using regular expression pattern if pattern: i = [i for i,f in enumerate(output) if re.search(pattern,f)] - #-- reduce list of listed items and last modified times + # reduce list of listed items and last modified times output = [output[indice] for indice in i] mtimes = [mtimes[indice] for indice in i] - #-- sort the list + # sort the list if sort: i = [i for i,j in sorted(enumerate(output), key=lambda i: i[1])] - #-- sort list of listed items and last modified times + # sort list of listed items and last modified times output = [output[indice] for indice in i] mtimes = [mtimes[indice] for indice in i] - #-- close the ftp connection + # close the ftp connection ftp.close() - #-- return the list of items and last modified times + # return the list of items and last modified times return (output,mtimes) -#-- PURPOSE: download a file from a ftp host +# PURPOSE: download a file from a ftp host def from_ftp(HOST, username=None, password=None, timeout=None, local=None, hash='', chunk=8192, verbose=False, fid=sys.stdout, mode=0o775): @@ -377,59 +378,59 @@ def from_ftp(HOST, username=None, password=None, timeout=None, remote_buffer: obj BytesIO representation of file """ - #-- create logger + # create logger loglevel = logging.INFO if verbose else logging.CRITICAL logging.basicConfig(stream=fid, level=loglevel) - #-- verify inputs for remote ftp host + # verify inputs for remote ftp host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try downloading from ftp + # try downloading from ftp try: - #-- try to connect to ftp host + # try to connect to ftp host ftp = ftplib.FTP(HOST[0],timeout=timeout) except (socket.gaierror,IOError): raise RuntimeError('Unable to connect to {0}'.format(HOST[0])) else: ftp.login(username,password) - #-- remote path + # remote path ftp_remote_path = posixpath.join(*HOST[1:]) - #-- copy remote file contents to bytesIO object + # copy remote file contents to bytesIO object remote_buffer = io.BytesIO() ftp.retrbinary('RETR {0}'.format(ftp_remote_path), remote_buffer.write, blocksize=chunk) remote_buffer.seek(0) - #-- save file basename with bytesIO object + # save file basename with bytesIO object remote_buffer.filename = HOST[-1] - #-- generate checksum hash for remote file + # generate checksum hash for remote file remote_hash = hashlib.md5(remote_buffer.getvalue()).hexdigest() - #-- get last modified date of remote file and convert into unix time + # get last modified date of remote file and convert into unix time mdtm = ftp.sendcmd('MDTM {0}'.format(ftp_remote_path)) remote_mtime = get_unix_time(mdtm[4:], format="%Y%m%d%H%M%S") - #-- compare checksums + # compare checksums if local and (hash != remote_hash): - #-- convert to absolute path + # convert to absolute path local = os.path.abspath(local) - #-- create directory if non-existent + # create directory if non-existent if not os.access(os.path.dirname(local), os.F_OK): os.makedirs(os.path.dirname(local), mode) - #-- print file information + # print file information args = (posixpath.join(*HOST),local) logging.info('{0} -->\n\t{1}'.format(*args)) - #-- store bytes to file using chunked transfer encoding + # store bytes to file using chunked transfer encoding remote_buffer.seek(0) with open(os.path.expanduser(local), 'wb') as f: shutil.copyfileobj(remote_buffer, f, chunk) - #-- change the permissions mode + # change the permissions mode os.chmod(local,mode) - #-- keep remote modification time of file and local access time + # keep remote modification time of file and local access time os.utime(local, (os.stat(local).st_atime, remote_mtime)) - #-- close the ftp connection + # close the ftp connection ftp.close() - #-- return the bytesIO object + # return the bytesIO object remote_buffer.seek(0) return remote_buffer -#-- PURPOSE: check internet connection +# PURPOSE: check internet connection def check_connection(HOST): """ Check internet connection with http host @@ -439,7 +440,7 @@ def check_connection(HOST): HOST: str remote http host """ - #-- attempt to connect to http host + # attempt to connect to http host try: urllib2.urlopen(HOST,timeout=20,context=ssl.SSLContext()) except urllib2.URLError: @@ -447,7 +448,7 @@ def check_connection(HOST): else: return True -#-- PURPOSE: list a directory on an Apache http Server +# PURPOSE: list a directory on an Apache http Server def http_list(HOST, timeout=None, context=ssl.SSLContext(), parser=lxml.etree.HTMLParser(), format='%Y-%m-%d %H:%M', pattern='', sort=False): @@ -480,40 +481,40 @@ def http_list(HOST, timeout=None, context=ssl.SSLContext(), colerror: list notification for list error """ - #-- verify inputs for remote http host + # verify inputs for remote http host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try listing from http + # try listing from http try: - #-- Create and submit request. + # Create and submit request. request=urllib2.Request(posixpath.join(*HOST)) response=urllib2.urlopen(request,timeout=timeout,context=context) except (urllib2.HTTPError, urllib2.URLError) as e: colerror = 'List error from {0}'.format(posixpath.join(*HOST)) return (False,False,colerror) else: - #-- read and parse request for files (column names and modified times) + # read and parse request for files (column names and modified times) tree = lxml.etree.parse(response,parser) colnames = tree.xpath('//tr/td[not(@*)]//a/@href') - #-- get the Unix timestamp value for a modification time + # get the Unix timestamp value for a modification time collastmod = [get_unix_time(i,format=format) for i in tree.xpath('//tr/td[@align="right"][1]/text()')] - #-- reduce using regular expression pattern + # reduce using regular expression pattern if pattern: i = [i for i,f in enumerate(colnames) if re.search(pattern,f)] - #-- reduce list of column names and last modified times + # reduce list of column names and last modified times colnames = [colnames[indice] for indice in i] collastmod = [collastmod[indice] for indice in i] - #-- sort the list + # sort the list if sort: i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])] - #-- sort list of column names and last modified times + # sort list of column names and last modified times colnames = [colnames[indice] for indice in i] collastmod = [collastmod[indice] for indice in i] - #-- return the list of column names and last modified times + # return the list of column names and last modified times return (colnames,collastmod,None) -#-- PURPOSE: download a file from a http host +# PURPOSE: download a file from a http host def from_http(HOST, timeout=None, context=ssl.SSLContext(), local=None, hash='', chunk=16384, verbose=False, fid=sys.stdout, mode=0o775): @@ -548,45 +549,45 @@ def from_http(HOST, timeout=None, context=ssl.SSLContext(), remote_buffer: obj BytesIO representation of file """ - #-- create logger + # create logger loglevel = logging.INFO if verbose else logging.CRITICAL logging.basicConfig(stream=fid, level=loglevel) - #-- verify inputs for remote http host + # verify inputs for remote http host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try downloading from http + # try downloading from http try: - #-- Create and submit request. + # Create and submit request. request = urllib2.Request(posixpath.join(*HOST)) response = urllib2.urlopen(request,timeout=timeout,context=context) except (urllib2.HTTPError, urllib2.URLError): raise Exception('Download error from {0}'.format(posixpath.join(*HOST))) else: - #-- copy remote file contents to bytesIO object + # copy remote file contents to bytesIO object remote_buffer = io.BytesIO() shutil.copyfileobj(response, remote_buffer, chunk) remote_buffer.seek(0) - #-- save file basename with bytesIO object + # save file basename with bytesIO object remote_buffer.filename = HOST[-1] - #-- generate checksum hash for remote file + # generate checksum hash for remote file remote_hash = hashlib.md5(remote_buffer.getvalue()).hexdigest() - #-- compare checksums + # compare checksums if local and (hash != remote_hash): - #-- convert to absolute path + # convert to absolute path local = os.path.abspath(local) - #-- create directory if non-existent + # create directory if non-existent if not os.access(os.path.dirname(local), os.F_OK): os.makedirs(os.path.dirname(local), mode) - #-- print file information + # print file information args = (posixpath.join(*HOST),local) logging.info('{0} -->\n\t{1}'.format(*args)) - #-- store bytes to file using chunked transfer encoding + # store bytes to file using chunked transfer encoding remote_buffer.seek(0) with open(os.path.expanduser(local), 'wb') as f: shutil.copyfileobj(remote_buffer, f, chunk) - #-- change the permissions mode + # change the permissions mode os.chmod(local,mode) - #-- return the bytesIO object + # return the bytesIO object remote_buffer.seek(0) return remote_buffer @@ -668,7 +669,7 @@ def attempt_login(urs, context=ssl.SSLContext(), # reached end of available retries raise RuntimeError('End of Retries: Check NASA Earthdata credentials') -#-- PURPOSE: "login" to NASA Earthdata with supplied credentials +# PURPOSE: "login" to NASA Earthdata with supplied credentials def build_opener(username, password, context=ssl.SSLContext(), password_manager=True, get_ca_certs=False, redirect=False, authorization_header=False, urs='https://urs.earthdata.nasa.gov'): @@ -699,41 +700,41 @@ def build_opener(username, password, context=ssl.SSLContext(), opener: obj OpenerDirector instance """ - #-- https://docs.python.org/3/howto/urllib2.html#id5 + # https://docs.python.org/3/howto/urllib2.html#id5 handler = [] - #-- create a password manager + # create a password manager if password_manager: password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() - #-- Add the username and password for NASA Earthdata Login system + # Add the username and password for NASA Earthdata Login system password_mgr.add_password(None,urs,username,password) handler.append(urllib2.HTTPBasicAuthHandler(password_mgr)) - #-- Create cookie jar for storing cookies. This is used to store and return - #-- the session cookie given to use by the data server (otherwise will just - #-- keep sending us back to Earthdata Login to authenticate). + # Create cookie jar for storing cookies. This is used to store and return + # the session cookie given to use by the data server (otherwise will just + # keep sending us back to Earthdata Login to authenticate). cookie_jar = CookieJar() handler.append(urllib2.HTTPCookieProcessor(cookie_jar)) - #-- SSL context handler + # SSL context handler if get_ca_certs: context.get_ca_certs() handler.append(urllib2.HTTPSHandler(context=context)) - #-- redirect handler + # redirect handler if redirect: handler.append(urllib2.HTTPRedirectHandler()) - #-- create "opener" (OpenerDirector instance) + # create "opener" (OpenerDirector instance) opener = urllib2.build_opener(*handler) - #-- Encode username/password for request authorization headers - #-- add Authorization header to opener + # Encode username/password for request authorization headers + # add Authorization header to opener if authorization_header: b64 = base64.b64encode('{0}:{1}'.format(username,password).encode()) opener.addheaders = [("Authorization","Basic {0}".format(b64.decode()))] - #-- Now all calls to urllib2.urlopen use our opener. + # Now all calls to urllib2.urlopen use our opener. urllib2.install_opener(opener) - #-- All calls to urllib2.urlopen will now use handler - #-- Make sure not to include the protocol in with the URL, or - #-- HTTPPasswordMgrWithDefaultRealm will be confused. + # All calls to urllib2.urlopen will now use handler + # Make sure not to include the protocol in with the URL, or + # HTTPPasswordMgrWithDefaultRealm will be confused. return opener -#-- PURPOSE: check that entered NASA Earthdata credentials are valid +# PURPOSE: check that entered NASA Earthdata credentials are valid def check_credentials(): """ Check that entered NASA Earthdata credentials are valid @@ -749,7 +750,7 @@ def check_credentials(): else: return True -#-- PURPOSE: list a directory on NSIDC https server +# PURPOSE: list a directory on NSIDC https server def nsidc_list(HOST, username=None, password=None, build=True, timeout=None, urs='urs.earthdata.nasa.gov', parser=lxml.etree.HTMLParser(), pattern='', sort=False): @@ -786,48 +787,42 @@ def nsidc_list(HOST, username=None, password=None, build=True, colerror: list Notification for list error """ - #-- use netrc credentials - if build and not (username or password): - username,_,password = netrc.netrc().authenticators(urs) - #-- build urllib2 opener and check credentials + # attempt to build urllib2 opener and check credentials if build: - #-- build urllib2 opener with credentials - build_opener(username, password) - #-- check credentials - check_credentials() - #-- verify inputs for remote https host + attempt_login(urs, username=username, password=password) + # verify inputs for remote https host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try listing from https + # try listing from https try: - #-- Create and submit request. + # Create and submit request. request = urllib2.Request(posixpath.join(*HOST)) tree = lxml.etree.parse(urllib2.urlopen(request,timeout=timeout),parser) except (urllib2.HTTPError, urllib2.URLError) as e: colerror = 'List error from {0}'.format(posixpath.join(*HOST)) return (False,False,colerror) else: - #-- read and parse request for files (column names and modified times) + # read and parse request for files (column names and modified times) colnames = tree.xpath('//td[@class="indexcolname"]//a/@href') - #-- get the Unix timestamp value for a modification time + # get the Unix timestamp value for a modification time collastmod = [get_unix_time(i,format='%Y-%m-%d %H:%M') for i in tree.xpath('//td[@class="indexcollastmod"]/text()')] - #-- reduce using regular expression pattern + # reduce using regular expression pattern if pattern: i = [i for i,f in enumerate(colnames) if re.search(pattern,f)] - #-- reduce list of column names and last modified times + # reduce list of column names and last modified times colnames = [colnames[indice] for indice in i] collastmod = [collastmod[indice] for indice in i] - #-- sort the list + # sort the list if sort: i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])] - #-- sort list of column names and last modified times + # sort list of column names and last modified times colnames = [colnames[indice] for indice in i] collastmod = [collastmod[indice] for indice in i] - #-- return the list of column names and last modified times + # return the list of column names and last modified times return (colnames,collastmod,None) -#-- PURPOSE: download a file from a NSIDC https server +# PURPOSE: download a file from a NSIDC https server def from_nsidc(HOST, username=None, password=None, build=True, timeout=None, urs='urs.earthdata.nasa.gov', local=None, hash='', chunk=16384, verbose=False, fid=sys.stdout, mode=0o775): @@ -868,59 +863,53 @@ def from_nsidc(HOST, username=None, password=None, build=True, response_error: str or None notification for response error """ - #-- create logger + # create logger loglevel = logging.INFO if verbose else logging.CRITICAL logging.basicConfig(stream=fid, level=loglevel) - #-- use netrc credentials - if build and not (username or password): - username,_,password = netrc.netrc().authenticators(urs) - #-- build urllib2 opener and check credentials + # attempt to build urllib2 opener and check credentials if build: - #-- build urllib2 opener with credentials - build_opener(username, password) - #-- check credentials - check_credentials() - #-- verify inputs for remote https host + attempt_login(urs, username=username, password=password) + # verify inputs for remote https host if isinstance(HOST, str): HOST = url_split(HOST) - #-- try downloading from https + # try downloading from https try: - #-- Create and submit request. + # Create and submit request. request = urllib2.Request(posixpath.join(*HOST)) response = urllib2.urlopen(request,timeout=timeout) except (urllib2.HTTPError, urllib2.URLError) as e: response_error = 'Download error from {0}'.format(posixpath.join(*HOST)) return (False,response_error) else: - #-- copy remote file contents to bytesIO object + # copy remote file contents to bytesIO object remote_buffer = io.BytesIO() shutil.copyfileobj(response, remote_buffer, chunk) remote_buffer.seek(0) - #-- save file basename with bytesIO object + # save file basename with bytesIO object remote_buffer.filename = HOST[-1] - #-- generate checksum hash for remote file + # generate checksum hash for remote file remote_hash = hashlib.md5(remote_buffer.getvalue()).hexdigest() - #-- compare checksums + # compare checksums if local and (hash != remote_hash): - #-- convert to absolute path + # convert to absolute path local = os.path.abspath(local) - #-- create directory if non-existent + # create directory if non-existent if not os.access(os.path.dirname(local), os.F_OK): os.makedirs(os.path.dirname(local), mode) - #-- print file information + # print file information args = (posixpath.join(*HOST),local) logging.info('{0} -->\n\t{1}'.format(*args)) - #-- store bytes to file using chunked transfer encoding + # store bytes to file using chunked transfer encoding remote_buffer.seek(0) with open(os.path.expanduser(local), 'wb') as f: shutil.copyfileobj(remote_buffer, f, chunk) - #-- change the permissions mode + # change the permissions mode os.chmod(local,mode) - #-- return the bytesIO object + # return the bytesIO object remote_buffer.seek(0) return (remote_buffer,None) -#-- PURPOSE: build formatted query string for ICESat-2 release +# PURPOSE: build formatted query string for ICESat-2 release def query_release(release): """ Build formatted query string for ICESat-2 release @@ -937,11 +926,11 @@ def query_release(release): """ if release is None: return '' - #-- maximum length of version in CMR queries + # maximum length of version in CMR queries desired_pad_length = 3 if len(str(release)) > desired_pad_length: raise RuntimeError('Release string too long: "{0}"'.format(release)) - #-- Strip off any leading zeros + # Strip off any leading zeros release = str(release).lstrip('0') query_params = '' while len(release) <= desired_pad_length: @@ -950,7 +939,7 @@ def query_release(release): desired_pad_length -= 1 return query_params -#-- PURPOSE: check if the submitted cycles are valid +# PURPOSE: check if the submitted cycles are valid def cycles(cycle): """ Check if the submitted cycles are valid @@ -965,19 +954,19 @@ def cycles(cycle): cycle_list: list formatted available 91-day orbital cycles """ - #-- string length of cycles in granules + # string length of cycles in granules cycle_length = 2 - #-- number of GPS seconds between the GPS epoch and ATLAS SDP epoch + # number of GPS seconds between the GPS epoch and ATLAS SDP epoch atlas_sdp_gps_epoch = 1198800018.0 - #-- number of GPS seconds since the GPS epoch for first ATLAS data point + # number of GPS seconds since the GPS epoch for first ATLAS data point atlas_gps_start_time = atlas_sdp_gps_epoch + 24710205.39202261 epoch1 = datetime.datetime(1980, 1, 6, 0, 0, 0) epoch2 = datetime.datetime(1970, 1, 1, 0, 0, 0) - #-- get the total number of seconds since the start of ATLAS and now + # get the total number of seconds since the start of ATLAS and now delta_time_epochs = (epoch2 - epoch1).total_seconds() atlas_UNIX_start_time = atlas_gps_start_time - delta_time_epochs present_time = datetime.datetime.now().timestamp() - #-- divide total time by cycle length to get the maximum number of orbital cycles + # divide total time by cycle length to get the maximum number of orbital cycles ncycles = ceil((present_time - atlas_UNIX_start_time) / (86400 * 91)) all_cycles = [str(c + 1).zfill(cycle_length) for c in range(ncycles)] if cycle is None: @@ -993,13 +982,13 @@ def cycles(cycle): cycle_list.append(str(c).zfill(cycle_length)) else: raise TypeError("Please enter the cycle number as a list or string") - #-- check if user-entered cycle is outside of currently available range + # check if user-entered cycle is outside of currently available range if not set(all_cycles) & set(cycle_list): warnings.filterwarnings("always") warnings.warn("Listed cycle is not presently available") return cycle_list -#-- PURPOSE: check if the submitted RGTs are valid +# PURPOSE: check if the submitted RGTs are valid def tracks(track): """ Check if the submitted RGTs are valid @@ -1014,9 +1003,9 @@ def tracks(track): track_list: list formatted available reference ground tracks (RGTs) """ - #-- string length of RGTs in granules + # string length of RGTs in granules track_length = 4 - #-- total number of ICESat-2 satellite RGTs is 1387 + # total number of ICESat-2 satellite RGTs is 1387 all_tracks = [str(tr + 1).zfill(track_length) for tr in range(1387)] if track is None: return ["????"] @@ -1033,13 +1022,13 @@ def tracks(track): raise TypeError( "Reference Ground Track as a list or string" ) - #-- check if user-entered RGT is outside of the valid range + # check if user-entered RGT is outside of the valid range if not set(all_tracks) & set(track_list): warnings.filterwarnings("always") warnings.warn("Listed Reference Ground Track is not available") return track_list -#-- PURPOSE: check if the submitted granule regions are valid +# PURPOSE: check if the submitted granule regions are valid def granules(granule): """ Check if the submitted granule regions are valid @@ -1054,9 +1043,9 @@ def granules(granule): granule_list: list formatted available granule regions """ - #-- string length of granule regions in granule files + # string length of granule regions in granule files granule_length = 2 - #-- total number of ICESat-2 granule regions is 14 + # total number of ICESat-2 granule regions is 14 all_granules = [str(g).zfill(granule_length) for g in range(1,15)] if granule is None: return ["??"] @@ -1071,13 +1060,13 @@ def granules(granule): granule_list.append(str(g).zfill(granule_length)) else: raise TypeError("Please enter the cycle number as a list or string") - #-- check if user-entered granule is outside of currently available range + # check if user-entered granule is outside of currently available range if not set(all_granules) & set(granule_list): warnings.filterwarnings("always") warnings.warn("Listed cycle is not presently available") return granule_list -#-- PURPOSE: check if the submitted ATL14/ATL15 regions are valid +# PURPOSE: check if the submitted ATL14/ATL15 regions are valid def regions(region): """ Check if the submitted ATL14/ATL15 regions are valid @@ -1092,7 +1081,7 @@ def regions(region): region_list: list formatted available ATL14/ATL15 regions """ - #-- all available ICESat-2 ATL14/15 regions + # all available ICESat-2 ATL14/15 regions all_regions = ['AA','AK','CN','CS','GL','IS','SV','RA'] if region is None: return ["??"] @@ -1107,13 +1096,13 @@ def regions(region): region_list.append(str(r)) else: raise TypeError("Please enter the region as a list or string") - #-- check if user-entered region is currently not available + # check if user-entered region is currently not available if not set(all_regions) & set(region_list): warnings.filterwarnings("always") warnings.warn("Listed region is not presently available") return region_list -#-- PURPOSE: check if the submitted ATL14/ATL15 regions are valid +# PURPOSE: check if the submitted ATL14/ATL15 regions are valid def resolutions(resolution): """ Check if the submitted ATL14/ATL15 resolutions are valid @@ -1128,7 +1117,7 @@ def resolutions(resolution): resolution_list: list formatted available ATL14/ATL15 resolutions """ - #-- all available ICESat-2 ATL14/15 resolutions + # all available ICESat-2 ATL14/15 resolutions all_resolutions = ['100m','01km','10km','20km','40km'] if resolution is None: return ["????"] @@ -1143,7 +1132,7 @@ def resolutions(resolution): resolution_list.append(str(r)) else: raise TypeError("Please enter the resolution as a list or string") - #-- check if user-entered resolution is currently not available + # check if user-entered resolution is currently not available if not set(all_resolutions) & set(resolution_list): warnings.filterwarnings("always") warnings.warn("Listed resolution is not presently available") @@ -1169,18 +1158,18 @@ def readable_granules(product, **kwargs): readable_granule_list: list readable granule names for CMR queries """ - #-- default keyword arguments + # default keyword arguments kwargs.setdefault("cycles", None) kwargs.setdefault("tracks", None) kwargs.setdefault("granules", None) kwargs.setdefault("regions", None) kwargs.setdefault("resolutions", None) - #-- list of readable granule names + # list of readable granule names readable_granule_list = [] - #-- check if querying along-track or gridded products + # check if querying along-track or gridded products if product in ("ATL14","ATL15"): - #-- gridded land ice products - #-- for each ATL14/ATL15 parameter + # gridded land ice products + # for each ATL14/ATL15 parameter for r in regions(kwargs["regions"]): for s in resolutions(kwargs["resolutions"]): args = (product, r, s) @@ -1188,15 +1177,15 @@ def readable_granules(product, **kwargs): # append the granule pattern readable_granule_list.append(pattern.format(*args)) else: - #-- along-track products - #-- for each available cycle of interest + # along-track products + # for each available cycle of interest for c in cycles(kwargs["cycles"]): - #-- for each available track of interest + # for each available track of interest for t in tracks(kwargs["tracks"]): - #-- for each available granule region of interest + # for each available granule region of interest for g in granules(kwargs["granules"]): - #-- use single character wildcards "?" for date strings, - #-- sea ice product hemispheres, and any unset parameters + # use single character wildcards "?" for date strings, + # sea ice product hemispheres, and any unset parameters if product in ("ATL07", "ATL10", "ATL20", "ATL21"): args = (product, 14 * "?", t, c, g) pattern = "{0}-??_{1}_{2}{3}{4}_*" @@ -1206,12 +1195,12 @@ def readable_granules(product, **kwargs): else: args = (product, 14 * "?", t, c, g) pattern = "{0}_{1}_{2}{3}{4}_*" - #-- append the granule pattern + # append the granule pattern readable_granule_list.append(pattern.format(*args)) - #-- return readable granules list + # return readable granules list return readable_granule_list -#-- PURPOSE: filter the CMR json response for desired data files +# PURPOSE: filter the CMR json response for desired data files def cmr_filter_json(search_results, request_type="application/x-hdfeos"): """ Filter the CMR json response for desired data files @@ -1230,25 +1219,25 @@ def cmr_filter_json(search_results, request_type="application/x-hdfeos"): granule_urls: list ICESat-2 granule urls from NSIDC """ - #-- output list of granule ids and urls + # output list of granule ids and urls producer_granule_ids = [] granule_urls = [] - #-- check that there are urls for request + # check that there are urls for request if ('feed' not in search_results) or ('entry' not in search_results['feed']): return (producer_granule_ids,granule_urls) - #-- iterate over references and get cmr location + # iterate over references and get cmr location for entry in search_results['feed']['entry']: producer_granule_ids.append(entry['producer_granule_id']) for link in entry['links']: if (link['type'] == request_type): granule_urls.append(link['href']) break - #-- return the list of urls and granule ids + # return the list of urls and granule ids return (producer_granule_ids,granule_urls) -#-- PURPOSE: cmr queries for orbital parameters +# PURPOSE: cmr queries for orbital parameters def cmr(product=None, release=None, cycles=None, tracks=None, - granules=None, regions=None, resolutions=None, + granules=None, regions=None, resolutions=None, bbox=None, start_date=None, end_date=None, provider='NSIDC_ECS', request_type="application/x-hdfeos", verbose=False, fid=sys.stdout): @@ -1271,6 +1260,9 @@ def cmr(product=None, release=None, cycles=None, tracks=None, ICESat-2 ATL14/15 region strings to query resolutions: str, list or NoneType, default None ICESat-2 ATL14/15 resolution strings to query + bbox: list or NoneType, default None + Spatial bounding box for CMR query in form + (``lon_min``, ``lat_min``, ``lon_max``, ``lat_max``) start_date: str or NoneType, default None starting date for CMR product query end_date: str or NoneType, default None @@ -1291,40 +1283,44 @@ def cmr(product=None, release=None, cycles=None, tracks=None, granule_urls: list ICESat-2 granule urls from NSIDC """ - #-- create logger + # create logger loglevel = logging.INFO if verbose else logging.CRITICAL logging.basicConfig(stream=fid, level=loglevel) - #-- build urllib2 opener with SSL context - #-- https://docs.python.org/3/howto/urllib2.html#id5 + # build urllib2 opener with SSL context + # https://docs.python.org/3/howto/urllib2.html#id5 handler = [] - #-- Create cookie jar for storing cookies + # Create cookie jar for storing cookies cookie_jar = CookieJar() handler.append(urllib2.HTTPCookieProcessor(cookie_jar)) handler.append(urllib2.HTTPSHandler(context=ssl.SSLContext())) - #-- create "opener" (OpenerDirector instance) + # create "opener" (OpenerDirector instance) opener = urllib2.build_opener(*handler) - #-- build CMR query + # build CMR query cmr_format = 'json' cmr_page_size = 2000 CMR_HOST = ['https://cmr.earthdata.nasa.gov','search', 'granules.{0}'.format(cmr_format)] - #-- build list of CMR query parameters + # build list of CMR query parameters CMR_KEYS = [] CMR_KEYS.append('?provider={0}'.format(provider)) CMR_KEYS.append('&sort_key[]=start_date') CMR_KEYS.append('&sort_key[]=producer_granule_id') CMR_KEYS.append('&scroll=true') CMR_KEYS.append('&page_size={0}'.format(cmr_page_size)) - #-- append product string + # append product string CMR_KEYS.append('&short_name={0}'.format(product)) - #-- append release strings + # append release strings CMR_KEYS.append(query_release(release)) - #-- append keys for start and end time - #-- verify that start and end times are in ISO format + # append keys for start and end time + # verify that start and end times are in ISO format start_date = isoformat(start_date) if start_date else '' end_date = isoformat(end_date) if end_date else '' CMR_KEYS.append('&temporal={0},{1}'.format(start_date, end_date)) - #-- append keys for querying specific granules + # append keys for spatial bounding box + if bbox is not None: + bounding_box = ','.join([str(b) for b in bbox]) + CMR_KEYS.append('&bounding_box={0}'.format(bounding_box)) + # append keys for querying specific granules CMR_KEYS.append("&options[readable_granule_name][pattern]=true") CMR_KEYS.append("&options[spatial][or]=true") readable_granule_list = readable_granules(product, @@ -1332,10 +1328,10 @@ def cmr(product=None, release=None, cycles=None, tracks=None, regions=regions, resolutions=resolutions) for gran in readable_granule_list: CMR_KEYS.append("&readable_granule_name[]={0}".format(gran)) - #-- full CMR query url + # full CMR query url cmr_query_url = "".join([posixpath.join(*CMR_HOST),*CMR_KEYS]) logging.info('CMR request={0}'.format(cmr_query_url)) - #-- output list of granule names and urls + # output list of granule names and urls producer_granule_ids = [] granule_urls = [] cmr_scroll_id = None @@ -1344,17 +1340,17 @@ def cmr(product=None, release=None, cycles=None, tracks=None, if cmr_scroll_id: req.add_header('cmr-scroll-id', cmr_scroll_id) response = opener.open(req) - #-- get scroll id for next iteration + # get scroll id for next iteration if not cmr_scroll_id: headers = {k.lower():v for k,v in dict(response.info()).items()} cmr_scroll_id = headers['cmr-scroll-id'] - #-- read the CMR search as JSON + # read the CMR search as JSON search_page = json.loads(response.read().decode('utf-8')) ids,urls = cmr_filter_json(search_page, request_type=request_type) if not urls: break - #-- extend lists + # extend lists producer_granule_ids.extend(ids) granule_urls.extend(urls) - #-- return the list of granule ids and urls + # return the list of granule ids and urls return (producer_granule_ids, granule_urls) diff --git a/notebooks/Fit ICESat-2 ATL03.ipynb b/notebooks/Fit ICESat-2 ATL03.ipynb index e2f746f..1706e91 100644 --- a/notebooks/Fit ICESat-2 ATL03.ipynb +++ b/notebooks/Fit ICESat-2 ATL03.ipynb @@ -954,8 +954,8 @@ " i2, = np.nonzero(photon_mframes[i1] == unique_major_frames[iteration])\n", " #-- calculate photon event weights\n", " pe_weights[i1[i2]] = classify_photons(x_atc[i1], h_ph[i1],\n", - " h_win_width, i2, K=3, min_ph=3, min_xspread=1.0,\n", - " min_hspread=0.01, aspect=3, method='linear')\n", + " h_win_width, i2, K=0, min_knn=5, min_ph=3, min_xspread=1.0,\n", + " min_hspread=0.01, win_x=15.0, win_h=6.0, method='linear')\n", "\n", " #-- photon event weights scaled to a single byte\n", " weight_ph = np.array(255*pe_weights,dtype=np.uint8)\n", diff --git a/notebooks/Read ICESat-2 ATL03.ipynb b/notebooks/Read ICESat-2 ATL03.ipynb index bc56ed8..adf9a5b 100644 --- a/notebooks/Read ICESat-2 ATL03.ipynb +++ b/notebooks/Read ICESat-2 ATL03.ipynb @@ -257,8 +257,8 @@ " i2, = np.nonzero(photon_mframes[i1] == unique_major_frames[iteration])\n", " #-- calculate photon event weights\n", " pe_weights[i1[i2]] = classify_photons(x_atc[i1], h_ph[i1],\n", - " h_win_width, i2, K=3, min_ph=3, min_xspread=1.0,\n", - " min_hspread=0.01, aspect=3, method='linear')\n", + " h_win_width, i2, K=0, min_knn=5, min_ph=3, min_xspread=1.0,\n", + " min_hspread=0.01, win_x=15.0, win_h=6.0, method='linear')\n", "\n", " #-- photon event weights scaled to a single byte\n", " weight_ph = np.array(255*pe_weights,dtype=np.uint8)\n", diff --git a/requirements.txt b/requirements.txt index d9684b7..9cfe054 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,21 @@ -numpy -scipy -mpi4py -h5py --no-binary=h5py -netCDF4 -zarr -gdal -pandas -pyproj -scikit-learn -python-dateutil -matplotlib +boto3 cartopy --no-binary=cartopy -shapely fiona future -boto3 +gdal +h5py --no-binary=h5py lxml +matplotlib +mpi4py +netCDF4 +numpy +pandas paramiko +pyproj +python-dateutil +pyYAPC +scikit-learn +scipy scp +shapely +zarr \ No newline at end of file diff --git a/scripts/MPI_ICESat2_ATL03.py b/scripts/MPI_ICESat2_ATL03.py index d728a05..03ba05b 100644 --- a/scripts/MPI_ICESat2_ATL03.py +++ b/scripts/MPI_ICESat2_ATL03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python u""" -MPI_ICESat2_ATL03.py (10/2021) +MPI_ICESat2_ATL03.py (06/2022) Read ICESat-2 ATL03 and ATL09 data files to calculate average segment surfaces ATL03 datasets: Global Geolocated Photons ATL09 datasets: Atmospheric Characteristics @@ -42,6 +42,7 @@ classify_photons.py: Yet Another Photon Classifier for Geolocated Photon Data UPDATE HISTORY: + Updated 06/2022: update classify photons to match current GSFC version Updated 05/2022: use argparse descriptions within sphinx documentation Updated 10/2021: using python logging for handling verbose output do not use possible TEP photons in photon classification calculation @@ -481,8 +482,8 @@ def main(): h_win_width += tlm_height[b][idx] #-- calculate photon event weights Distributed_Weights[i1[i2]] = classify_photons(x_atc[i1], h_ph[i1], - h_win_width, i2, K=3, min_ph=3, min_xspread=1.0, - min_hspread=0.01, aspect=3, method='linear') + h_win_width, i2, K=0, min_knn=5, min_ph=3, min_xspread=1.0, + min_hspread=0.01, win_x=15.0, win_h=6.0, method='linear') #-- photon event weights pe_weights = np.zeros((n_pe),dtype=np.float64) comm.Allreduce(sendbuf=[Distributed_Weights, MPI.DOUBLE], \ diff --git a/scripts/MPI_ICESat2_ATL03_histogram.py b/scripts/MPI_ICESat2_ATL03_histogram.py index 8452814..51ec91d 100644 --- a/scripts/MPI_ICESat2_ATL03_histogram.py +++ b/scripts/MPI_ICESat2_ATL03_histogram.py @@ -1,6 +1,6 @@ #!/usr/bin/env python u""" -MPI_ICESat2_ATL03_histogram.py (10/2021) +MPI_ICESat2_ATL03_histogram.py (06/2022) Read ICESat-2 ATL03 and ATL09 data files to calculate average segment surfaces ATL03 datasets: Global Geolocated Photons ATL09 datasets: Atmospheric Characteristics @@ -75,6 +75,7 @@ Geophysical Journal International (1997) 131, 267-280 UPDATE HISTORY: + Updated 06/2022: update classify photons to match current GSFC version Updated 05/2022: use argparse descriptions within sphinx documentation Updated 10/2021: using python logging for handling verbose output do not use possible TEP photons in photon classification calculation @@ -554,8 +555,8 @@ def main(): h_win_width += tlm_height[b][idx] #-- calculate photon event weights Distributed_Weights[i1[i2]] = classify_photons(x_atc[i1], h_ph[i1], - h_win_width, i2, K=3, min_ph=3, min_xspread=1.0, - min_hspread=0.01, aspect=3, method='linear') + h_win_width, i2, K=0, min_knn=5, min_ph=3, min_xspread=1.0, + min_hspread=0.01, win_x=15.0, win_h=6.0, method='linear') #-- photon event weights pe_weights = np.zeros((n_pe),dtype=np.float) comm.Allreduce(sendbuf=[Distributed_Weights, MPI.DOUBLE], \ diff --git a/scripts/convert_ICESat2_format.py b/scripts/convert_ICESat2_format.py index f510e6e..e211668 100644 --- a/scripts/convert_ICESat2_format.py +++ b/scripts/convert_ICESat2_format.py @@ -56,6 +56,7 @@ https://pandas.pydata.org/ UPDATE HISTORY: + Updated 06/2022: use explicit import of convert functions Updated 05/2022: use argparse descriptions within sphinx documentation Updated 10/2021: using python logging for handling verbose output added parsing for converting file lines to arguments @@ -75,8 +76,8 @@ import argparse import traceback import multiprocessing as mp -import icesat2_toolkit.convert import icesat2_toolkit.utilities +from icesat2_toolkit.convert import convert #-- PURPOSE: convert the ICESat-2 elevation data from HDF5 to zarr #-- or rechunked HDF5 formats @@ -202,9 +203,9 @@ def convert_HDF5(hdf5_file,FORMAT=None,CHUNKS=None,CLOBBER=False,MODE=0o775): #-- if file does not exist, is to be overwritten, or CLOBBER is set if TEST or CLOBBER: #-- output string for printing files transferred - output='{0} -->\n\t{1}{2}\n'.format(hdf5_file,output_file,OVERWRITE) + output = '{0} -->\n\t{1}{2}\n'.format(hdf5_file,output_file,OVERWRITE) #-- copy everything from the HDF5 file to the output file - conv = icesat2_toolkit.convert(filename=hdf5_file,reformat=FORMAT) + conv = convert(filename=hdf5_file, reformat=FORMAT) conv.file_converter(chunks=CHUNKS) #-- keep remote modification time of file and local access time os.utime(output_file, (os.stat(output_file).st_atime, hdf5_mtime)) diff --git a/setup.py b/setup.py index a81d81d..a0c356f 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,6 @@ # get install requirements with open('requirements.txt') as fh: install_requires = [line.split().pop(0) for line in fh.read().splitlines()] -# dependency links -dependency_links = ['https://github.com/tsutterley/yapc/tarball/main'] # get version with open('version.txt') as fh: @@ -76,7 +74,6 @@ def check_output(cmd): keywords=keywords, packages=find_packages(), install_requires=install_requires, - dependency_links=dependency_links, scripts=scripts, include_package_data=True, ) diff --git a/version.txt b/version.txt index 447ad9f..a2eec8e 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.0.21 \ No newline at end of file +1.0.0.22 \ No newline at end of file