From 97ac6df7109d280bcfc16b5a39f8920e556a695c Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 12 Jan 2017 14:44:44 -0800
Subject: [PATCH 01/47] Initial restructure

---
 .coveragerc                 |    1 +
 .gitignore                  |    1 +
 .travis.yml                 |    1 +
 README.md                   |   18 +
 README.rst                  |  426 -----
 blobxfer.py                 | 3033 -----------------------------------
 blobxfer/__init__.py        |   25 +
 blobxfer/util.py            |  213 +++
 blobxfer/version.py         |   25 +
 setup.py                    |   68 +-
 test/test_blobxfer.py       | 1436 -----------------
 test_requirements.txt       |    5 +
 tests/test_blobxfer_util.py |  133 ++
 tox.ini                     |   18 +
 14 files changed, 492 insertions(+), 4911 deletions(-)
 create mode 100644 README.md
 delete mode 100644 README.rst
 delete mode 100755 blobxfer.py
 create mode 100644 blobxfer/__init__.py
 create mode 100644 blobxfer/util.py
 create mode 100644 blobxfer/version.py
 delete mode 100644 test/test_blobxfer.py
 create mode 100644 test_requirements.txt
 create mode 100644 tests/test_blobxfer_util.py
 create mode 100644 tox.ini

diff --git a/.coveragerc b/.coveragerc
index b710cba..5fc34c3 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -6,6 +6,7 @@ omit =
 exclude_lines =
     # Have to re-enable the standard pragma
     pragma: no cover
+    noqa
 
     # Don't complain about missing debug-only code:
     def __repr__
diff --git a/.gitignore b/.gitignore
index ddc86bb..21d27b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,6 +43,7 @@ htmlcov/
 nosetests.xml
 coverage.xml
 *,cover
+junit-*.xml
 
 # Translations
 *.mo
diff --git a/.travis.yml b/.travis.yml
index cdf9217..5bc451d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ python:
   - 3.3
   - 3.4
   - 3.5
+  - 3.6
   - pypy
   # disable pypy3 until 3.3 compliance
   #- pypy3
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6853cb4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+blobxfer
+========
+
+AzCopy-like OS independent Azure storage blob and file share transfer tool
+
+Change Log
+----------
+
+See the [CHANGELOG.md](https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md) file.
+
+------------------------------------------------------------------------
+
+This project has adopted the
+[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the
+[Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
+or contact [<opencode@microsoft.com>](mailto:opencode@microsoft.com) with any
+additional questions or comments.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 882d883..0000000
--- a/README.rst
+++ /dev/null
@@ -1,426 +0,0 @@
-.. image:: https://travis-ci.org/Azure/blobxfer.svg?branch=master
-  :target: https://travis-ci.org/Azure/blobxfer
-.. image:: https://coveralls.io/repos/github/Azure/blobxfer/badge.svg?branch=master
-  :target: https://coveralls.io/github/Azure/blobxfer?branch=master
-.. image:: https://img.shields.io/pypi/v/blobxfer.svg
-  :target: https://pypi.python.org/pypi/blobxfer
-.. image:: https://img.shields.io/pypi/pyversions/blobxfer.svg
-  :target: https://pypi.python.org/pypi/blobxfer
-.. image:: https://img.shields.io/pypi/l/blobxfer.svg
-  :target: https://pypi.python.org/pypi/blobxfer
-.. image:: https://img.shields.io/docker/pulls/alfpark/blobxfer.svg
-  :target: https://hub.docker.com/r/alfpark/blobxfer
-.. image:: https://images.microbadger.com/badges/image/alfpark/blobxfer.svg
-  :target: https://microbadger.com/images/alfpark/blobxfer
-
-blobxfer
-========
-AzCopy-like OS independent Azure storage blob and file share transfer tool
-
-Installation
-------------
-`blobxfer`_ is on PyPI and can be installed via:
-
-::
-
-  pip install blobxfer
-
-blobxfer is compatible with Python 2.7 and 3.3+. To install for Python 3, some
-distributions may use ``pip3`` instead. If you do not want to install blobxfer
-as a system-wide binary and modify system-wide python packages, use the
-``--user`` flag with ``pip`` or ``pip3``.
-
-blobxfer is also on `Docker Hub`_, and the Docker image for Linux can be
-pulled with the following command:
-
-::
-
-  docker pull alfpark/blobxfer
-
-Please see example usage below on how to use the docker image.
-
-If you encounter difficulties installing the script, it may be due to the
-``cryptography`` dependency. Please ensure that your system is able to install
-binary wheels provided by these dependencies (e.g., on Windows) or is able to
-compile the dependencies (i.e., ensure you have a C compiler, python, ssl,
-and ffi development libraries/headers installed prior to invoking pip). For
-instance, to install blobxfer on a fresh Ubuntu 14.04/16.04 installation for
-Python 2.7, issue the following commands:
-
-::
-
-    apt-get update
-    apt-get install -y build-essential libssl-dev libffi-dev libpython-dev python-dev python-pip
-    pip install --upgrade blobxfer
-
-If you need more fine-grained control on installing dependencies, continue
-reading this section. Depending upon the desired mode of authentication with
-Azure and options, the script will require the following packages, some of
-which will automatically pull required dependent packages. Below is a list of
-dependent packages:
-
-- Base Requirements
-
-  - `azure-common`_
-  - `azure-storage`_
-  - `requests`_
-
-- Encryption Support
-
-  - `cryptography`_
-
-- Service Management Certificate Support
-
-  - `azure-servicemanagement-legacy`_
-
-You can install these packages using pip, easy_install or through standard
-setup.py procedures. These dependencies will be automatically installed if
-using a package-based install or setup.py. The required versions of these
-dependent packages can be found in ``setup.py``.
-
-.. _blobxfer: https://pypi.python.org/pypi/blobxfer
-.. _Docker Hub: https://hub.docker.com/r/alfpark/blobxfer
-.. _azure-common: https://pypi.python.org/pypi/azure-common
-.. _azure-storage: https://pypi.python.org/pypi/azure-storage
-.. _requests: https://pypi.python.org/pypi/requests
-.. _cryptography: https://pypi.python.org/pypi/cryptography
-.. _azure-servicemanagement-legacy: https://pypi.python.org/pypi/azure-servicemanagement-legacy
-
-Introduction
-------------
-
-The blobxfer.py script allows interacting with storage accounts using any of
-the following methods: (1) management certificate, (2) shared account key,
-(3) SAS key. The script can, in addition to working with single files, mirror
-entire directories into and out of containers or file shares from Azure
-Storage, respectively. File and block/page level MD5 integrity checking is
-supported along with various transfer optimizations, built-in retries,
-user-specified timeouts, and client-side encryption.
-
-Program parameters and command-line options can be listed via the ``-h``
-switch. Please invoke this first if you are unfamiliar with blobxfer operation
-as not all options are explained below. At the minimum, three positional
-arguments are required: storage account name, container or share name, and
-local resource. Additionally, one of the following authentication switches
-must be supplied: ``--subscriptionid`` with ``--managementcert``,
-``--storageaccountkey``, or ``--saskey``. Do not combine different
-authentication schemes together.
-
-Environment variables ``BLOBXFER_STORAGEACCOUNTKEY``, ``BLOBXFER_SASKEY``,
-and ``BLOBXFER_RSAKEYPASSPHRASE`` can take the place of
-``--storageaccountkey``, ``--saskey``, and ``--rsakeypassphrase`` respectively
-if you do not want to expose credentials on a command line.
-
-It is generally recommended to use SAS keys wherever appropriate; only HTTPS
-transport is used in the script. Please note that when using SAS keys that
-only container- or fileshare-level SAS keys will allow for entire directory
-uploading or container/fileshare downloading. The container/fileshare must
-also have been created beforehand if using a service SAS, as
-containers/fileshares cannot be created using service SAS keys. Account-level
-SAS keys with a signed resource type of ``c`` or container will allow
-containers/fileshares to be created with SAS keys.
-
-Example Usage
--------------
-
-The following examples show how to invoke the script with commonly used
-options. Note that the authentication parameters are missing from the below
-examples. You will need to select a preferred method of authenticating with
-Azure and add the authentication switches (or as environment variables) as
-noted above.
-
-The script will attempt to perform a smart transfer, by detecting if the local
-resource exists. For example:
-
-::
-
-  blobxfer mystorageacct container0 mylocalfile.txt
-
-Note: if you downloaded the script directly from github, then you should append
-``.py`` to the blobxfer command.
-
-If mylocalfile.txt exists locally, then the script will attempt to upload the
-file to container0 on mystorageacct. If the file does not exist, then it will
-attempt to download the resource. If the desired behavior is to download the
-file from Azure even if the local file exists, one can override the detection
-mechanism with ``--download``. ``--upload`` is available to force the transfer
-to Azure storage. Note that specifying a particular direction does not force
-the actual operation to occur as that depends on other options specified such
-as skipping on MD5 matches. Note that you may use the ``--remoteresource`` flag
-to rename the local file as the blob name on Azure storage if uploading,
-however, ``--remoteresource`` has no effect if uploading a directory of files.
-Please refer to the ``--collate`` option as explained below.
-
-If the local resource is a directory that exists, the script will attempt to
-mirror (recursively copy) the entire directory to Azure storage while
-maintaining subdirectories as virtual directories in Azure storage. You can
-disable the recursive copy (i.e., upload only the files in the directory)
-using the ``--no-recursive`` flag.
-
-To upload a directory with files only matching a Unix-style shell wildcard
-pattern, an example commandline would be:
-
-::
-
-  blobxfer mystorageacct container0 mylocaldir --upload --include '**/*.txt'
-
-This would attempt to recursively upload the contents of mylocaldir
-to container0 for any file matching the wildcard pattern ``*.txt`` within
-all subdirectories. Include patterns can be applied for uploads as well as
-downloads. Note that you will need to prevent globbing by your shell such
-that wildcard expansion does not take place before script interprets the
-argument.  If ``--include`` is not specified, all files will be uploaded
-or downloaded for the specific context.
-
-To download an entire container from your storage account, an example
-commandline would be:
-
-::
-
-  blobxfer mystorageacct container0 mylocaldir --remoteresource .
-
-Assuming mylocaldir directory does not exist, the script will attempt to
-download all of the contents in container0 because “.” is set with
-``--remoteresource`` flag. To download individual blobs, one would specify the
-blob name instead of “.” with the ``--remoteresource`` flag. If mylocaldir
-directory exists, the script will attempt to upload the directory instead of
-downloading it. If you want to force the download direction even if the
-directory exists, indicate that with the ``--download`` flag. When downloading
-an entire container, the script will attempt to pre-allocate file space and
-recreate the sub-directory structure as needed.
-
-To collate files into specified virtual directories or local paths, use
-the ``--collate`` flag with the appropriate parameter. For example, the
-following commandline:
-
-::
-
-  blobxfer mystorageacct container0 myvhds --upload --collate vhds --autovhd
-
-If the directory ``myvhds`` had two vhd files a.vhd and subdir/b.vhd, these
-files would be uploaded into ``container0`` under the virtual directory named
-``vhds``, and b.vhd would not contain the virtual directory subdir; thus,
-flattening the directory structure. The ``--autovhd`` flag would automatically
-enable page blob uploads for these files. If you wish to collate all files
-into the container directly, you would replace ``--collate vhds`` with
-``--collate .``
-
-To strip leading components of a path on upload, use ``--strip-components``
-with a number argument which will act similarly to tar's
-``--strip-components=NUMBER`` parameter. This parameter is only applied
-during an upload.
-
-To encrypt or decrypt files, the option ``--rsapublickey`` and
-``--rsaprivatekey`` is available. This option requires a file location for a
-PEM encoded RSA public or private key. An optional parameter,
-``--rsakeypassphrase`` is available for passphrase protected RSA private keys.
-
-To encrypt and upload, only the RSA public key is required although an RSA
-private key may be specified. To download and decrypt blobs which are
-encrypted, the RSA private key is required.
-
-::
-
-  blobxfer mystorageacct container0 myblobs --upload --rsapublickey mypublickey.pem
-
-The above example commandline would encrypt and upload files contained in
-``myblobs`` using an RSA public key named ``mypublickey.pem``. An RSA private
-key may be specified instead for uploading (public parts will be used).
-
-::
-
-  blobxfer mystorageacct container0 myblobs --remoteresource . --download --rsaprivatekey myprivatekey.pem
-
-The above example commandline would download and decrypt all blobs in the
-container ``container0`` using an RSA private key named ``myprivatekey.pem``.
-An RSA private key must be specified for downloading and decryption of
-encrypted blobs.
-
-Currently only the ``FullBlob`` encryption mode is supported for the
-parameter ``--encmode``. The ``FullBlob`` encryption mode either uploads or
-downloads Azure Storage .NET/Java compatible client-side encrypted block blobs.
-
-Please read important points in the Encryption Notes below for more
-information.
-
-To transfer to an Azure Files share, specify the ``--fileshare`` option and
-specify the share name as the second positional argument.
-
-::
-
-  blobxfer mystorageacct myshare localfiles --fileshare --upload
-
-The above example would upload all files in the ``localfiles`` directory to
-the share named ``myshare``. Encryption/decryption options are compatible with
-Azure Files as the destination or source. Please refer to this `MSDN article`_
-for features not supported by the Azure File Service.
-
-.. _MSDN article: https://msdn.microsoft.com/en-us/library/azure/dn744326.aspx
-
-Docker Usage
-------------
-
-An example execution for uploading the host path ``/example/host/path``
-to a storage container named ``container0`` would be:
-
-::
-
-  docker run --rm -t -v /example/host/path:/path/in/container alfpark/blobxfer mystorageacct container0 /path/in/container --upload
-
-Note that docker volume mount mappings must be crafted with care to ensure
-consistency with directory depth between the host and the container.
-Optionally, you can utilize the ``--strip-components`` flag to remove leading
-path components as desired.
-
-General Notes
--------------
-
-- If the pyOpenSSL package is present, urllib3/requests may use this package
-  (as discussed in the Performance Notes below), which may result in
-  exceptions being thrown that are not normalized by urllib3. This may
-  result in exceptions that should be retried, but are not. It is recommended
-  to upgrade your Python where pyOpenSSL is not required for fully validating
-  peers and such that blobxfer can operate without pyOpenSSL in a secure
-  fashion. You can also run blobxfer via Docker or in a virtualenv
-  environment without pyOpenSSL.
-- blobxfer does not take any leases on blobs or containers. It is up to
-  the user to ensure that blobs are not modified while download/uploads
-  are being performed.
-- No validation is performed regarding container and file naming and length
-  restrictions.
-- blobxfer will attempt to download from blob storage as-is. If the source
-  filename is incompatible with the destination operating system, then
-  failure may result.
-- When using SAS, the SAS key must be a container- or share-level SAS if
-  performing recursive directory upload or container/file share download.
-- If uploading via service-level SAS keys, the container or file share must
-  already be created in Azure storage prior to upload. Account-level SAS keys
-  with the signed resource type of ``c`` or container-level permission will
-  allow conatiner or file share creation.
-- For non-SAS requests, timeouts may not be properly honored due to
-  limitations of the Azure Python SDK.
-- By default, files with matching MD5 checksums will be skipped for both
-  download (if MD5 information is present on the blob) and upload. Specify
-  ``--no-skiponmatch`` to disable this functionality.
-- When uploading files as page blobs, the content is page boundary
-  byte-aligned. The MD5 for the blob is computed using the final aligned
-  data if the source is not page boundary byte-aligned. This enables these
-  page blobs or files to be skipped during subsequent download or upload by
-  default (i.e., ``--no-skiponmatch`` parameter is not specified).
-- If ``--delete`` is specified, any remote files found that have no
-  corresponding local file in directory upload mode will be deleted. Deletion
-  occurs prior to any transfers, analogous to the delete-before rsync option.
-  Please note that this parameter will interact with ``--include`` and any
-  file not included from the include pattern will be deleted.
-- ``--include`` has no effect when specifying a single file to upload or
-  blob to download. When specifying ``--include`` on container download,
-  the pattern will be applied to the blob name without the container name.
-  Globbing of wildcards must be disabled such that the script can read
-  the include pattern without the shell expanding the wildcards, if specified.
-- Empty directories are not created locally when downloading from an Azure
-  file share which has empty directories.
-- Empty directories are not deleted if ``--delete`` is specified and no
-  files remain in the directory on the Azure file share.
-
-Performance Notes
------------------
-
-- Most likely, you will need to tweak the ``--numworkers`` argument that best
-  suits your environment. The default is the number of CPUs on the running
-  machine multiplied by 3 (except when transferring to/from file shares).
-  Increasing this number (or even using the default) may not provide the
-  optimal balance between concurrency and your network conditions.
-  Additionally, this number may not work properly if you are attempting to
-  run multiple blobxfer sessions in parallel from one machine or IP address.
-  Futhermore, this number may be defaulted to be set too high if encryption
-  is enabled and the machine cannot handle processing multiple threads in
-  parallel.
-- Computing file MD5 can be time consuming for large files. If integrity
-  checking or rsync-like capability is not required, specify
-  ``--no-computefilemd5`` to disable MD5 computation for files.
-- File share performance can be "slow" or become a bottleneck, especially for
-  file shares containing thousands of files as multiple REST calls must be
-  performed for each file. Currently, a single file share has a limit of up
-  to 60 MB/s and 1000 8KB IOPS. Please refer to the
-  `Azure Storage Scalability and Performance Targets`_ for performance targets
-  and limits regarding Azure Storage Blobs and Files. If scalable high
-  performance is required, consider using blob storage or multiple file
-  shares.
-- Using SAS keys may provide the best performance as the script bypasses
-  the Azure Storage Python SDK and uses requests/urllib3 directly with
-  Azure Storage endpoints. Transfers to/from Azure Files will always use
-  the Azure Storage Python SDK even with SAS keys.
-- As of requests 2.6.0 and Python versions < 2.7.9 (i.e., interpreter found
-  on default Ubuntu 14.04 installations), if certain packages are installed,
-  as those found in ``requests[security]`` then the underlying ``urllib3``
-  package will utilize the ``ndg-httpsclient`` package which will use
-  `pyOpenSSL`_. This will ensure the peers are `fully validated`_. However,
-  this incurs a rather larger performance penalty. If you understand the
-  potential security risks for disabling this behavior due to high performance
-  requirements, you can either remove ``ndg-httpsclient`` or use the script
-  in a ``virtualenv`` environment without the ``ndg-httpsclient`` package.
-  Python versions >= 2.7.9 are not affected by this issue. These warnings can
-  be suppressed using ``--disable-urllib-warnings``, but is not recommended
-  unless you understand the security implications.
-
-.. _Azure Storage Scalability and Performance Targets: https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/
-.. _pyOpenSSL: https://urllib3.readthedocs.org/en/latest/security.html#pyopenssl
-.. _fully validated: https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning
-
-
-Encryption Notes
-----------------
-
-- All required information regarding the encryption process is stored on
-  each blob's ``encryptiondata`` and ``encryptiondata_authentication``
-  metadata. These metadata entries are used on download to configure the proper
-  download and parameters for the decryption process as well as to authenticate
-  the encryption. Encryption metadata set by blobxfer (or the Azure Storage
-  .NET/Java client library) should not be modified or blobs/files may be
-  unrecoverable.
-- Local files can be encrypted by blobxfer and stored in Azure Files and,
-  correspondingly, remote files on Azure File shares can be decrypted by
-  blobxfer as long as the metdata portions remain in-tact.
-- Keys for AES256 block cipher are generated on a per-blob/file basis. These
-  keys are encrypted using RSAES-OAEP.
-- MD5 for both the pre-encrypted and encrypted version of the file is stored
-  in blob/file metadata. Rsync-like synchronization is still supported
-  transparently with encrypted blobs/files.
-- Whole file MD5 checks are skipped if a message authentication code is found
-  to validate the integrity of the encrypted data.
-- Attempting to upload the same file as an encrypted blob with a different RSA
-  key or under a different encryption mode will not occur if the file content
-  MD5 is the same. This behavior can be overridden by including the option
-  ``--no-skiponmatch``.
-- If one wishes to apply encryption to a blob/file already uploaded to Azure
-  Storage that has not changed, the upload will not occur since the underlying
-  file content MD5 has not changed; this behavior can be overriden by
-  including the option ``--no-skiponmatch``.
-- Encryption is only applied to block blobs (or fileshare files). Encrypted
-  page blobs appear to be of minimal value stored in Azure Storage via
-  blobxfer. Thus, if uploading VHDs while enabling encryption in the script,
-  do not enable the option ``--pageblob``. ``--autovhd`` will continue to work
-  transparently where vhd files will be uploaded as page blobs in unencrypted
-  form while other files will be uploaded as encrypted block blobs. Note that
-  using ``--autovhd`` with encryption will force set the max chunk size to
-  4 MiB for non-encrypted vhd files.
-- Downloading encrypted blobs/files may not fully preallocate each file due to
-  padding. Script failure can result during transfer if there is insufficient
-  disk space.
-- Zero-byte (empty) files are not encrypted.
-
-Change Log
-----------
-
-See the `CHANGELOG.md`_ file.
-
-.. _CHANGELOG.md: https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md
-
-----
-
-This project has adopted the
-`Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`__.
-For more information see the
-`Code of Conduct FAQ <https://opensource.microsoft.com/codeofconduct/faq/>`__
-or contact `opencode@microsoft.com <mailto:opencode@microsoft.com>`__ with any
-additional questions or comments.
diff --git a/blobxfer.py b/blobxfer.py
deleted file mode 100755
index 5cadcba..0000000
--- a/blobxfer.py
+++ /dev/null
@@ -1,3033 +0,0 @@
-#!/usr/bin/env python
-
-# blobxfer Tool
-#
-# Copyright (c) Microsoft Corporation
-#
-# All rights reserved.
-#
-# MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-"""
-Data transfer tool for Azure blob and file storage
-
-See notes in the README.rst file.
-
-TODO list:
-- convert from threading to multiprocessing
-- move instruction queue data to class
-- migrate connections with sas to azure-storage
-"""
-
-# pylint: disable=R0913,R0914
-
-# stdlib imports
-from __future__ import print_function
-import argparse
-import base64
-import errno
-import fnmatch
-import hashlib
-import hmac
-import json
-import mimetypes
-import multiprocessing
-import os
-import platform
-# pylint: disable=F0401
-try:
-    import queue
-except ImportError:  # pragma: no cover
-    import Queue as queue
-# pylint: enable=F0401
-import socket
-import sys
-import threading
-import time
-import traceback
-try:
-    from urllib.parse import quote as urlquote
-except ImportError:  # pramga: no cover
-    from urllib import quote as urlquote
-import xml.etree.ElementTree as ET
-# non-stdlib imports
-import azure.common
-try:
-    import azure.servicemanagement
-except ImportError:  # pragma: no cover
-    pass
-import azure.storage.blob
-import azure.storage.file
-try:
-    import cryptography.hazmat.backends
-    import cryptography.hazmat.primitives.asymmetric.padding
-    import cryptography.hazmat.primitives.asymmetric.rsa
-    import cryptography.hazmat.primitives.ciphers
-    import cryptography.hazmat.primitives.ciphers.algorithms
-    import cryptography.hazmat.primitives.ciphers.modes
-    import cryptography.hazmat.primitives.constant_time
-    import cryptography.hazmat.primitives.hashes
-    import cryptography.hazmat.primitives.padding
-    import cryptography.hazmat.primitives.serialization
-except ImportError:  # pragma: no cover
-    pass
-import requests
-
-# remap keywords for Python3
-# pylint: disable=W0622,C0103
-try:
-    xrange
-except NameError:  # pragma: no cover
-    xrange = range
-try:
-    long
-except NameError:  # pragma: no cover
-    long = int
-# pylint: enable=W0622,C0103
-
-# global defines
-_SCRIPT_VERSION = '0.12.1'
-_PY2 = sys.version_info.major == 2
-_DEFAULT_MAX_STORAGEACCOUNT_WORKERS = multiprocessing.cpu_count() * 3
-_MAX_BLOB_CHUNK_SIZE_BYTES = 4194304
-_EMPTY_MAX_PAGE_SIZE_MD5 = 'tc+p1sj+vWGPkawoQ9UKHA=='
-_MAX_LISTBLOBS_RESULTS = 1000
-_PAGEBLOB_BOUNDARY = 512
-_DEFAULT_STORAGE_ENDPOINT = 'core.windows.net'
-_DEFAULT_MANAGEMENT_ENDPOINT = 'management.core.windows.net'
-_ENVVAR_STORAGEACCOUNTKEY = 'BLOBXFER_STORAGEACCOUNTKEY'
-_ENVVAR_SASKEY = 'BLOBXFER_SASKEY'
-_ENVVAR_RSAKEYPASSPHRASE = 'BLOBXFER_RSAKEYPASSPHRASE'
-# encryption defines
-_AES256_KEYLENGTH_BYTES = 32
-_AES256_BLOCKSIZE_BYTES = 16
-_HMACSHA256_DIGESTSIZE_BYTES = 32
-_AES256CBC_HMACSHA256_OVERHEAD_BYTES = _AES256_BLOCKSIZE_BYTES + \
-    _HMACSHA256_DIGESTSIZE_BYTES
-_ENCRYPTION_MODE_FULLBLOB = 'FullBlob'
-_ENCRYPTION_MODE_CHUNKEDBLOB = 'ChunkedBlob'
-_DEFAULT_ENCRYPTION_MODE = _ENCRYPTION_MODE_FULLBLOB
-_ENCRYPTION_PROTOCOL_VERSION = '1.0'
-_ENCRYPTION_ALGORITHM = 'AES_CBC_256'
-_ENCRYPTION_AUTH_ALGORITHM = 'HMAC-SHA256'
-_ENCRYPTION_CHUNKSTRUCTURE = 'IV || EncryptedData || Signature'
-_ENCRYPTION_ENCRYPTED_KEY_SCHEME = 'RSA-OAEP'
-_ENCRYPTION_METADATA_NAME = 'encryptiondata'
-_ENCRYPTION_METADATA_MODE = 'EncryptionMode'
-_ENCRYPTION_METADATA_ALGORITHM = 'Algorithm'
-_ENCRYPTION_METADATA_MAC = 'MessageAuthenticationCode'
-_ENCRYPTION_METADATA_LAYOUT = 'EncryptedDataLayout'
-_ENCRYPTION_METADATA_CHUNKOFFSETS = 'ChunkByteOffsets'
-_ENCRYPTION_METADATA_CHUNKSTRUCTURE = 'ChunkStructure'
-_ENCRYPTION_METADATA_AGENT = 'EncryptionAgent'
-_ENCRYPTION_METADATA_PROTOCOL = 'Protocol'
-_ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM = 'EncryptionAlgorithm'
-_ENCRYPTION_METADATA_INTEGRITY_AUTH = 'EncryptionAuthentication'
-_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY = 'WrappedContentKey'
-_ENCRYPTION_METADATA_ENCRYPTEDKEY = 'EncryptedKey'
-_ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY = 'EncryptedAuthenticationKey'
-_ENCRYPTION_METADATA_CONTENT_IV = 'ContentEncryptionIV'
-_ENCRYPTION_METADATA_KEYID = 'KeyId'
-_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS = 'BlobxferExtensions'
-_ENCRYPTION_METADATA_PREENCRYPTED_MD5 = 'PreEncryptedContentMD5'
-_ENCRYPTION_METADATA_AUTH_NAME = 'encryptiondata_authentication'
-_ENCRYPTION_METADATA_AUTH_METAAUTH = 'EncryptionMetadataAuthentication'
-_ENCRYPTION_METADATA_AUTH_ENCODING = 'Encoding'
-_ENCRYPTION_METADATA_AUTH_ENCODING_TYPE = 'UTF-8'
-
-
-class EncryptionMetadataJson(object):
-    """Class for handling encryption metadata json"""
-    def __init__(
-            self, args, symkey, signkey, iv, encdata_signature,
-            preencrypted_md5, rsakeyid=None):
-        """Ctor for EncryptionMetadataJson
-        Parameters:
-            args - program arguments
-            symkey - symmetric key
-            signkey - signing key
-            iv - initialization vector
-            encdata_signature - encrypted data signature (MAC)
-            preencrypted_md5 - pre-encrypted md5 hash
-            rsakeyid - symmetric key id
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        self.encmode = args.encmode
-        self.rsaprivatekey = args.rsaprivatekey
-        self.rsapublickey = args.rsapublickey
-        self.chunksizebytes = args.chunksizebytes
-        self.symkey = symkey
-        self.signkey = signkey
-        if rsakeyid is None:
-            self.rsakeyid = 'private:key1'
-        else:
-            self.rsakeyid = rsakeyid
-        self.iv = iv
-        self.hmac = encdata_signature
-        self.md5 = preencrypted_md5
-
-    def construct_metadata_json(self):
-        """Constructs encryptiondata metadata
-        Paramters:
-            None
-        Returns:
-            dict of encryptiondata and encryptiondata_authentiation json
-        Raises:
-            Nothing
-        """
-        encsymkey, _ = rsa_encrypt_key(
-            self.rsaprivatekey, self.rsapublickey, self.symkey)
-        encsignkey, _ = rsa_encrypt_key(
-            self.rsaprivatekey, self.rsapublickey, self.signkey)
-        encjson = {
-            _ENCRYPTION_METADATA_MODE: self.encmode,
-            _ENCRYPTION_METADATA_WRAPPEDCONTENTKEY: {
-                _ENCRYPTION_METADATA_KEYID: self.rsakeyid,
-                _ENCRYPTION_METADATA_ENCRYPTEDKEY: encsymkey,
-                _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY: encsignkey,
-                _ENCRYPTION_METADATA_ALGORITHM:
-                _ENCRYPTION_ENCRYPTED_KEY_SCHEME,
-            },
-            _ENCRYPTION_METADATA_AGENT: {
-                _ENCRYPTION_METADATA_PROTOCOL: _ENCRYPTION_PROTOCOL_VERSION,
-                _ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM:
-                _ENCRYPTION_ALGORITHM
-            },
-            _ENCRYPTION_METADATA_INTEGRITY_AUTH: {
-                _ENCRYPTION_METADATA_ALGORITHM:
-                _ENCRYPTION_AUTH_ALGORITHM,
-            },
-            'KeyWrappingMetadata': {},
-        }
-        if self.md5 is not None:
-            encjson[_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS] = {
-                _ENCRYPTION_METADATA_PREENCRYPTED_MD5: self.md5
-            }
-        if self.encmode == _ENCRYPTION_MODE_FULLBLOB:
-            encjson[_ENCRYPTION_METADATA_CONTENT_IV] = base64encode(self.iv)
-            encjson[_ENCRYPTION_METADATA_INTEGRITY_AUTH][
-                _ENCRYPTION_METADATA_MAC] = base64encode(self.hmac)
-        elif self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-            encjson[_ENCRYPTION_METADATA_LAYOUT] = {}
-            encjson[_ENCRYPTION_METADATA_LAYOUT][
-                _ENCRYPTION_METADATA_CHUNKOFFSETS] = \
-                self.chunksizebytes + _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1
-            encjson[_ENCRYPTION_METADATA_LAYOUT][
-                _ENCRYPTION_METADATA_CHUNKSTRUCTURE] = \
-                _ENCRYPTION_CHUNKSTRUCTURE
-        else:
-            raise RuntimeError(
-                'Unknown encryption mode: {}'.format(self.encmode))
-        bencjson = json.dumps(
-            encjson, sort_keys=True, ensure_ascii=False).encode(
-                _ENCRYPTION_METADATA_AUTH_ENCODING_TYPE)
-        encjson = {_ENCRYPTION_METADATA_NAME:
-                   json.dumps(encjson, sort_keys=True)}
-        # compute MAC over encjson
-        hmacsha256 = hmac.new(self.signkey, digestmod=hashlib.sha256)
-        hmacsha256.update(bencjson)
-        authjson = {
-            _ENCRYPTION_METADATA_AUTH_METAAUTH: {
-                _ENCRYPTION_METADATA_ALGORITHM: _ENCRYPTION_AUTH_ALGORITHM,
-                _ENCRYPTION_METADATA_AUTH_ENCODING:
-                _ENCRYPTION_METADATA_AUTH_ENCODING_TYPE,
-                _ENCRYPTION_METADATA_MAC: base64encode(hmacsha256.digest()),
-            }
-        }
-        encjson[_ENCRYPTION_METADATA_AUTH_NAME] = json.dumps(
-            authjson, sort_keys=True)
-        return encjson
-
-    def parse_metadata_json(
-            self, blobname, rsaprivatekey, rsapublickey, mddict):
-        """Parses a meta data dictionary containing the encryptiondata
-        metadata
-        Parameters:
-            blobname - name of blob
-            rsaprivatekey - RSA private key
-            rsapublickey - RSA public key
-            mddict - metadata dictionary
-        Returns:
-            Nothing
-        Raises:
-            RuntimeError if encryptiondata metadata contains invalid or
-                unknown fields
-        """
-        if _ENCRYPTION_METADATA_NAME not in mddict:
-            return
-        # json parse internal dict
-        meta = json.loads(mddict[_ENCRYPTION_METADATA_NAME])
-        # populate preencryption md5
-        if (_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS in meta and
-                _ENCRYPTION_METADATA_PREENCRYPTED_MD5 in meta[
-                    _ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS]):
-            self.md5 = meta[_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS][
-                _ENCRYPTION_METADATA_PREENCRYPTED_MD5]
-        else:
-            self.md5 = None
-        # if RSA key is not present return
-        if rsaprivatekey is None and rsapublickey is None:
-            return
-        # check for required metadata fields
-        if (_ENCRYPTION_METADATA_MODE not in meta or
-                _ENCRYPTION_METADATA_AGENT not in meta):
-            return
-        # populate encryption mode
-        self.encmode = meta[_ENCRYPTION_METADATA_MODE]
-        # validate known encryption metadata is set to proper values
-        if self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-            chunkstructure = meta[_ENCRYPTION_METADATA_LAYOUT][
-                _ENCRYPTION_METADATA_CHUNKSTRUCTURE]
-            if chunkstructure != _ENCRYPTION_CHUNKSTRUCTURE:
-                raise RuntimeError(
-                    '{}: unknown encrypted chunk structure {}'.format(
-                        blobname, chunkstructure))
-        protocol = meta[_ENCRYPTION_METADATA_AGENT][
-            _ENCRYPTION_METADATA_PROTOCOL]
-        if protocol != _ENCRYPTION_PROTOCOL_VERSION:
-            raise RuntimeError('{}: unknown encryption protocol: {}'.format(
-                blobname, protocol))
-        blockcipher = meta[_ENCRYPTION_METADATA_AGENT][
-            _ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM]
-        if blockcipher != _ENCRYPTION_ALGORITHM:
-            raise RuntimeError('{}: unknown block cipher: {}'.format(
-                blobname, blockcipher))
-        if _ENCRYPTION_METADATA_INTEGRITY_AUTH in meta:
-            intauth = meta[_ENCRYPTION_METADATA_INTEGRITY_AUTH][
-                _ENCRYPTION_METADATA_ALGORITHM]
-            if intauth != _ENCRYPTION_AUTH_ALGORITHM:
-                raise RuntimeError(
-                    '{}: unknown integrity/auth method: {}'.format(
-                        blobname, intauth))
-        symkeyalg = meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][
-            _ENCRYPTION_METADATA_ALGORITHM]
-        if symkeyalg != _ENCRYPTION_ENCRYPTED_KEY_SCHEME:
-            raise RuntimeError('{}: unknown key encryption scheme: {}'.format(
-                blobname, symkeyalg))
-        # populate iv and hmac
-        if self.encmode == _ENCRYPTION_MODE_FULLBLOB:
-            self.iv = base64.b64decode(meta[_ENCRYPTION_METADATA_CONTENT_IV])
-            # don't base64 decode hmac
-            if _ENCRYPTION_METADATA_INTEGRITY_AUTH in meta:
-                self.hmac = meta[_ENCRYPTION_METADATA_INTEGRITY_AUTH][
-                    _ENCRYPTION_METADATA_MAC]
-            else:
-                self.hmac = None
-        # populate chunksize
-        if self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-            self.chunksizebytes = long(
-                meta[_ENCRYPTION_METADATA_LAYOUT][
-                    _ENCRYPTION_METADATA_CHUNKOFFSETS])
-        # if RSA key is a public key, stop here as keys cannot be decrypted
-        if rsaprivatekey is None:
-            return
-        # decrypt symmetric key
-        self.symkey = rsa_decrypt_key(
-            rsaprivatekey,
-            meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][
-                _ENCRYPTION_METADATA_ENCRYPTEDKEY], None)
-        # decrypt signing key, if it exists
-        if _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY in meta[
-                _ENCRYPTION_METADATA_WRAPPEDCONTENTKEY]:
-            self.signkey = rsa_decrypt_key(
-                rsaprivatekey,
-                meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][
-                    _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY], None)
-        else:
-            self.signkey = None
-        # validate encryptiondata metadata using the signing key
-        if (self.signkey is not None and
-                _ENCRYPTION_METADATA_AUTH_NAME in mddict):
-            authmeta = json.loads(mddict[_ENCRYPTION_METADATA_AUTH_NAME])
-            if _ENCRYPTION_METADATA_AUTH_METAAUTH not in authmeta:
-                raise RuntimeError(
-                    '{}: encryption metadata auth block not found'.format(
-                        blobname))
-            if _ENCRYPTION_METADATA_AUTH_ENCODING not in authmeta[
-                    _ENCRYPTION_METADATA_AUTH_METAAUTH]:
-                raise RuntimeError(
-                    '{}: encryption metadata auth encoding not found'.format(
-                        blobname))
-            intauth = authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][
-                _ENCRYPTION_METADATA_ALGORITHM]
-            if intauth != _ENCRYPTION_AUTH_ALGORITHM:
-                raise RuntimeError(
-                    '{}: unknown integrity/auth method: {}'.format(
-                        blobname, intauth))
-            authhmac = base64.b64decode(
-                authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][
-                    _ENCRYPTION_METADATA_MAC])
-            bmeta = mddict[_ENCRYPTION_METADATA_NAME].encode(
-                authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][
-                    _ENCRYPTION_METADATA_AUTH_ENCODING])
-            hmacsha256 = hmac.new(self.signkey, digestmod=hashlib.sha256)
-            hmacsha256.update(bmeta)
-            if hmacsha256.digest() != authhmac:
-                raise RuntimeError(
-                    '{}: encryption metadata authentication failed'.format(
-                        blobname))
-
-
-class PqTupleSort(tuple):
-    """Priority Queue tuple sorter: handles priority collisions.
-    0th item in the tuple is the priority number."""
-    def __lt__(self, rhs):
-        return self[0] < rhs[0]
-
-    def __gt__(self, rhs):
-        return self[0] > rhs[0]
-
-    def __le__(self, rhs):
-        return self[0] <= rhs[0]
-
-    def __ge__(self, rhs):
-        return self[0] >= rhs[0]
-
-
-class SasBlobList(object):
-    """Sas Blob listing object"""
-    def __init__(self):
-        """Ctor for SasBlobList"""
-        self.blobs = []
-        self.next_marker = None
-
-    def __iter__(self):
-        """Iterator"""
-        return iter(self.blobs)
-
-    def __len__(self):
-        """Length"""
-        return len(self.blobs)
-
-    def __getitem__(self, index):
-        """Accessor"""
-        return self.blobs[index]
-
-    def add_blob(self, name, content_length, content_md5, blobtype, mddict):
-        """Adds a blob to the list
-        Parameters:
-            name - blob name
-            content_length - content length
-            content_md5 - content md5
-            blobtype - blob type
-            mddict - metadata dictionary
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        obj = type('bloblistobject', (object,), {})
-        obj.name = name
-        obj.metadata = mddict
-        obj.properties = type('properties', (object,), {})
-        obj.properties.content_length = content_length
-        obj.properties.content_settings = azure.storage.blob.ContentSettings()
-        if content_md5 is not None and len(content_md5) > 0:
-            obj.properties.content_settings.content_md5 = content_md5
-        obj.properties.blobtype = blobtype
-        self.blobs.append(obj)
-
-    def set_next_marker(self, marker):
-        """Set the continuation token
-        Parameters:
-            marker - next marker
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        if marker is not None and len(marker) > 0:
-            self.next_marker = marker
-
-
-class SasBlobService(object):
-    """BlobService supporting SAS for functions used in the Python SDK.
-       create_container method does not exist because it is not a supported
-       operation under SAS"""
-    def __init__(self, endpoint, saskey, timeout):
-        """SAS Blob Service ctor
-        Parameters:
-            endpoint - storage endpoint
-            saskey - saskey
-            timeout - timeout
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        self.endpoint = endpoint
-        # normalize sas key
-        if saskey[0] != '?':
-            self.saskey = '?' + saskey
-        else:
-            self.saskey = saskey
-        self.timeout = timeout
-
-    def _parse_blob_list_xml(self, content):
-        """Parse blob list in xml format to an attribute-based object
-        Parameters:
-            content - http response content in xml
-        Returns:
-            attribute-based object
-        Raises:
-            No special exception handling
-        """
-        result = SasBlobList()
-        root = ET.fromstring(content)
-        blobs = root.find('Blobs')
-        for blob in blobs.iter('Blob'):
-            name = blob.find('Name').text
-            props = blob.find('Properties')
-            cl = long(props.find('Content-Length').text)
-            md5 = props.find('Content-MD5').text
-            bt = props.find('BlobType').text
-            metadata = blob.find('Metadata')
-            mddict = {}
-            for md in metadata:
-                mddict[md.tag] = md.text
-            result.add_blob(name, cl, md5, bt, mddict)
-        try:
-            result.set_next_marker(root.find('NextMarker').text)
-        except Exception:
-            pass
-        return result
-
-    def list_blobs(
-            self, container_name, marker=None,
-            max_results=_MAX_LISTBLOBS_RESULTS, include=None):
-        """List blobs in container
-        Parameters:
-            container_name - container name
-            marker - marker
-            max_results - max results
-            include - `azure.storage.models.Include` include object
-        Returns:
-            List of blobs
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            saskey=self.saskey)
-        reqparams = {
-            'restype': 'container',
-            'comp': 'list',
-            'maxresults': str(max_results)}
-        if marker is not None:
-            reqparams['marker'] = marker
-        if include is not None and include.metadata:
-            reqparams['include'] = 'metadata'
-        response = azure_request(
-            requests.get, url=url, params=reqparams, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 200:
-            raise IOError(
-                'incorrect status code returned for list_blobs: {}'.format(
-                    response.status_code))
-        return self._parse_blob_list_xml(response.content)
-
-    def _get_blob(self, container_name, blob_name, start_range, end_range):
-        """Get blob
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            start_range - start range of bytes
-            end_range - end range of bytes
-        Returns:
-            `azure.storage.blob.Blob` object
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {
-            'x-ms-range': 'bytes={}-{}'.format(start_range, end_range)
-        }
-        response = azure_request(
-            requests.get, url=url, headers=reqheaders, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 200 and response.status_code != 206:
-            raise IOError(
-                'incorrect status code returned for get_blob: {}'.format(
-                    response.status_code))
-        return azure.storage.blob.Blob(content=response.content)
-
-    def get_blob_properties(self, container_name, blob_name):
-        """Get blob properties
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-        Returns:
-            `azure.storage.blob.Blob` object
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        response = azure_request(
-            requests.head, url=url, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 200:
-            raise IOError('incorrect status code returned for '
-                          'get_blob_properties: {}'.format(
-                              response.status_code))
-        # parse response headers into blob object
-        blob = azure.storage.blob.Blob()
-        blob.propertes = azure.storage.blob.BlobProperties()
-        blob.properties.content_length = \
-            long(response.headers['content-length'])
-        blob.properties.content_settings = azure.storage.blob.ContentSettings()
-        if 'content-md5' in response.headers:
-            blob.properties.content_settings.content_md5 = \
-                response.headers['content-md5']
-        # read meta values, all meta values are lowercased
-        mddict = {}
-        for res in response.headers:
-            if res.startswith('x-ms-meta-'):
-                mddict[res[10:]] = response.headers[res]
-        blob.metadata = mddict
-        return blob
-
-    def set_blob_metadata(
-            self, container_name, blob_name, metadata):
-        """Set blob metadata. Clearing is not supported.
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            metadata - blob metadata dictionary
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        if metadata is None or len(metadata) == 0:
-            return
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqparams = {'comp': 'metadata'}
-        reqheaders = {}
-        for key in metadata:
-            reqheaders['x-ms-meta-' + key] = metadata[key]
-        response = azure_request(
-            requests.put, url=url, params=reqparams, headers=reqheaders,
-            timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 200:
-            raise IOError(
-                'incorrect status code returned for '
-                'set_blob_metadata: {}'.format(response.status_code))
-
-    def create_blob(
-            self, container_name, blob_name, content_length, content_settings):
-        """Create blob for initializing page blobs
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            content_length - content length aligned to 512-byte boundary
-            content_settings - `azure.storage.blob.ContentSettings` object
-        Returns:
-            response content
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {
-            'x-ms-blob-type': 'PageBlob',
-            'x-ms-blob-content-length': str(content_length),
-        }
-        if content_settings is not None:
-            if content_settings.content_md5 is not None:
-                reqheaders['x-ms-blob-content-md5'] = \
-                    content_settings.content_md5
-            if content_settings.content_type is not None:
-                reqheaders['x-ms-blob-content-type'] = \
-                    content_settings.content_type
-        response = azure_request(
-            requests.put, url=url, headers=reqheaders, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 201:
-            raise IOError(
-                'incorrect status code returned for create_blob: {}'.format(
-                    response.status_code))
-        return response.content
-
-    def _put_blob(
-            self, container_name, blob_name, blob, content_settings):
-        """Put blob for creating/updated block blobs
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            blob - blob content
-            content_settings - `azure.storage.blob.ContentSettings` object
-        Returns:
-            response content
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {'x-ms-blob-type': 'BlockBlob'}
-        if content_settings is not None:
-            if content_settings.content_md5 is not None:
-                reqheaders['x-ms-blob-content-md5'] = \
-                    content_settings.content_md5
-            if content_settings.content_type is not None:
-                reqheaders['x-ms-blob-content-type'] = \
-                    content_settings.content_type
-        response = azure_request(
-            requests.put, url=url, headers=reqheaders, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 201:
-            raise IOError(
-                'incorrect status code returned for put_blob: {}'.format(
-                    response.status_code))
-        return response.content
-
-    def update_page(
-            self, container_name, blob_name, page, start_range, end_range,
-            validate_content=False, content_md5=None):
-        """Put page for page blob. This API differs from the Python storage
-        sdk to maintain efficiency for block md5 computation.
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            page - page data
-            start_range - start range of bytes
-            end_range - end range of bytes
-            validate_content - validate content
-            content_md5 - md5 hash for page data
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {
-            'x-ms-range': 'bytes={}-{}'.format(start_range, end_range),
-            'x-ms-page-write': 'update'}
-        if validate_content and content_md5 is not None:
-            reqheaders['Content-MD5'] = content_md5
-        reqparams = {'comp': 'page'}
-        response = azure_request(
-            requests.put, url=url, params=reqparams, headers=reqheaders,
-            data=page, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 201:
-            raise IOError(
-                'incorrect status code returned for update_page: {}'.format(
-                    response.status_code))
-
-    def put_block(
-            self, container_name, blob_name, block, block_id,
-            validate_content=False):
-        """Put block for blob
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            block - block data
-            block_id - block id
-            validate_content - validate content
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        # compute block md5
-        if validate_content:
-            reqheaders = {'Content-MD5': compute_md5_for_data_asbase64(block)}
-        else:
-            reqheaders = None
-        reqparams = {'comp': 'block', 'blockid': block_id}
-        response = azure_request(
-            requests.put, url=url, params=reqparams, headers=reqheaders,
-            data=block, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 201:
-            raise IOError(
-                'incorrect status code returned for put_block: {}'.format(
-                    response.status_code))
-
-    def put_block_list(
-            self, container_name, blob_name, block_list,
-            content_settings):
-        """Put block list for blob
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            block_list - list of `azure.storage.blob.BlobBlock`
-            content_settings - `azure.storage.blob.ContentSettings` object
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {}
-        if content_settings is not None:
-            if content_settings.content_md5 is not None:
-                reqheaders['x-ms-blob-content-md5'] = \
-                    content_settings.content_md5
-            if content_settings.content_type is not None:
-                reqheaders['x-ms-blob-content-type'] = \
-                    content_settings.content_type
-        reqparams = {'comp': 'blocklist'}
-        body = ['<?xml version="1.0" encoding="utf-8"?><BlockList>']
-        for block in block_list:
-            body.append('<Latest>{}</Latest>'.format(block.id))
-        body.append('</BlockList>')
-        response = azure_request(
-            requests.put, url=url, params=reqparams, headers=reqheaders,
-            data=''.join(body), timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 201:
-            raise IOError(
-                'incorrect status code returned for put_block_list: {}'.format(
-                    response.status_code))
-
-    def set_blob_properties(
-            self, container_name, blob_name, content_settings):
-        """Sets blob properties (MD5 only)
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-            content_settings - `azure.storage.blob.ContentSettings` object
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        reqheaders = {}
-        if content_settings is not None:
-            if content_settings.content_md5 is not None:
-                reqheaders['x-ms-blob-content-md5'] = \
-                    content_settings.content_md5
-        reqparams = {'comp': 'properties'}
-        response = azure_request(
-            requests.put, url=url, params=reqparams, headers=reqheaders,
-            timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 200:
-            raise IOError('incorrect status code returned for '
-                          'set_blob_properties: {}'.format(
-                              response.status_code))
-
-    def delete_blob(
-            self, container_name, blob_name):
-        """Deletes a blob
-        Parameters:
-            container_name - container name
-            blob_name - name of blob
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}/{blob_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            blob_name=blob_name, saskey=self.saskey)
-        response = azure_request(
-            requests.delete, url=url, timeout=self.timeout)
-        response.raise_for_status()
-        if response.status_code != 202:
-            raise IOError(
-                'incorrect status code returned for delete_blob: {}'.format(
-                    response.status_code))
-
-    def create_container(
-            self, container_name, fail_on_exist=False):
-        """Create a container
-        Parameters:
-            container_name - container name
-        Returns:
-            Nothing
-        Raises:
-            IOError if unexpected status code
-        """
-        url = '{endpoint}{container_name}{saskey}'.format(
-            endpoint=self.endpoint, container_name=container_name,
-            saskey=self.saskey)
-        reqparams = {'restype': 'container'}
-        response = azure_request(
-            requests.put, url=url, params=reqparams, timeout=self.timeout)
-        if response.status_code != 201:
-            if response.status_code == 409:
-                if fail_on_exist:
-                    response.raise_for_status()
-                else:
-                    return
-            raise IOError('incorrect status code returned for '
-                          'create_container: {}'.format(
-                              response.status_code))
-
-
-class StorageChunkWorker(threading.Thread):
-    """Chunk worker for a storage entity"""
-    def __init__(
-            self, exc, s_in_queue, s_out_queue, args, xfertoazure,
-            blob_service, file_service):
-        """Storage Chunk worker Thread ctor
-        Parameters:
-            exc - exception list
-            s_in_queue - storage in queue
-            s_out_queue - storage out queue
-            args - program arguments
-            xfertoazure - xfer to azure (direction)
-            blob_service - blob service
-            file_service - file service
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        threading.Thread.__init__(self)
-        self.terminate = False
-        self._exc = exc
-        self._in_queue = s_in_queue
-        self._out_queue = s_out_queue
-        self.args = args
-        self.xfertoazure = xfertoazure
-        self.blob_service = blob_service
-        self.file_service = file_service
-
-    def run(self):
-        """Thread code
-        Parameters:
-            Nothing
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        while not self.terminate:
-            try:
-                pri, (localresource, container, remoteresource, blockid,
-                      offset, bytestoxfer, encparam, flock, filedesc) = \
-                    self._in_queue.get_nowait()
-            except queue.Empty:
-                break
-            # detect termination early and break if necessary
-            if self.terminate:
-                break
-            try:
-                if self.xfertoazure:
-                    # if iv is not ready for this chunk, re-add back to queue
-                    if (not as_page_blob(self.args, localresource) and
-                            ((self.args.rsaprivatekey is not None or
-                              self.args.rsapublickey is not None) and
-                             self.args.encmode == _ENCRYPTION_MODE_FULLBLOB)):
-                        _iblockid = int(blockid)
-                        if _iblockid not in encparam[2]:
-                            self._in_queue.put(
-                                PqTupleSort((
-                                    pri,
-                                    (localresource, container, remoteresource,
-                                     blockid, offset, bytestoxfer, encparam,
-                                     flock, filedesc))))
-                            continue
-                    # upload block/page
-                    self.put_storage_data(
-                        localresource, container, remoteresource, blockid,
-                        offset, bytestoxfer, encparam, flock, filedesc)
-                else:
-                    # download range
-                    self.get_storage_range(
-                        localresource, container, remoteresource, blockid,
-                        offset, bytestoxfer, encparam, flock, filedesc)
-                # pylint: disable=W0703
-            except Exception:
-                # pylint: enable=W0703
-                self._exc.append(traceback.format_exc())
-            self._out_queue.put((localresource, encparam))
-            if len(self._exc) > 0:
-                break
-
-    def put_storage_data(
-            self, localresource, container, remoteresource, blockid, offset,
-            bytestoxfer, encparam, flock, filedesc):
-        """Puts data (blob, page or file bits) into Azure storage
-        Parameters:
-            localresource - name of local resource
-            container - blob container
-            remoteresource - name of remote resource
-            blockid - block id (ignored for page blobs)
-            offset - file offset
-            bytestoxfer - number of bytes to xfer
-            encparam - encryption metadata: (symkey, signkey, ivmap, pad)
-            flock - file lock
-            filedesc - file handle
-        Returns:
-            Nothing
-        Raises:
-            IOError if file cannot be read
-        """
-        # if bytestoxfer is zero, then we're transferring a zero-byte
-        # file, use put blob instead of page/block ops
-        if bytestoxfer == 0:
-            contentmd5 = compute_md5_for_data_asbase64(b'')
-            if as_page_blob(self.args, localresource):
-                azure_request(
-                    self.blob_service[1].create_blob, container_name=container,
-                    blob_name=remoteresource, content_length=bytestoxfer,
-                    content_settings=azure.storage.blob.ContentSettings(
-                        content_type=get_mime_type(localresource),
-                        content_md5=contentmd5))
-            elif self.args.fileshare:
-                fsfile = split_fileshare_path_into_parts(remoteresource)
-                azure_request(
-                    self.file_service.create_file, share_name=container,
-                    directory_name=fsfile[0], file_name=fsfile[1],
-                    content_length=bytestoxfer,
-                    content_settings=azure.storage.file.ContentSettings(
-                        content_type=get_mime_type(localresource),
-                        content_md5=contentmd5))
-            else:
-                azure_request(
-                    self.blob_service[0]._put_blob, container_name=container,
-                    blob_name=remoteresource, blob=None,
-                    content_settings=azure.storage.blob.ContentSettings(
-                        content_type=get_mime_type(localresource),
-                        content_md5=contentmd5))
-            return
-        # read the file at specified offset, must take lock
-        data = None
-        with flock:
-            closefd = False
-            if not filedesc:
-                filedesc = open(localresource, 'rb')
-                closefd = True
-            filedesc.seek(offset, 0)
-            data = filedesc.read(bytestoxfer)
-            if closefd:
-                filedesc.close()
-        if not data:
-            raise IOError('could not read {}: {} -> {}'.format(
-                localresource, offset, offset + bytestoxfer))
-        # issue REST put
-        if as_page_blob(self.args, localresource):
-            aligned = page_align_content_length(bytestoxfer)
-            # fill data to boundary
-            if aligned != bytestoxfer:
-                data = data.ljust(aligned, b'\0')
-            # compute page md5
-            contentmd5 = compute_md5_for_data_asbase64(data)
-            # check if this page is empty
-            if contentmd5 == _EMPTY_MAX_PAGE_SIZE_MD5:
-                return
-            elif len(data) != _MAX_BLOB_CHUNK_SIZE_BYTES:
-                data_chk = b'\0' * len(data)
-                data_chk_md5 = compute_md5_for_data_asbase64(data_chk)
-                del data_chk
-                if data_chk_md5 == contentmd5:
-                    return
-                del data_chk_md5
-            # upload page range
-            if self.args.saskey:
-                azure_request(
-                    self.blob_service[1].update_page, container_name=container,
-                    blob_name=remoteresource, page=data, start_range=offset,
-                    end_range=offset + aligned - 1,
-                    validate_content=self.args.computeblockmd5,
-                    content_md5=contentmd5, timeout=self.args.timeout)
-            else:
-                azure_request(
-                    self.blob_service[1].update_page, container_name=container,
-                    blob_name=remoteresource, page=data, start_range=offset,
-                    end_range=offset + aligned - 1,
-                    validate_content=self.args.computeblockmd5,
-                    timeout=self.args.timeout)
-        else:
-            # encrypt block if required
-            if (encparam is not None and
-                    (self.args.rsaprivatekey is not None or
-                     self.args.rsapublickey is not None)):
-                symkey = encparam[0]
-                signkey = encparam[1]
-                if self.args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                    _blkid = int(blockid)
-                    iv = encparam[2][_blkid]
-                    pad = encparam[3]
-                else:
-                    iv = None
-                    pad = True
-                data = encrypt_chunk(
-                    symkey, signkey, data, self.args.encmode, iv=iv, pad=pad)
-                with flock:
-                    if self.args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                        # compute hmac for chunk
-                        if _blkid == 0:
-                            encparam[2]['hmac'].update(iv + data)
-                        else:
-                            encparam[2]['hmac'].update(data)
-                        # store iv for next chunk
-                        encparam[2][_blkid + 1] = data[
-                            len(data) - _AES256_BLOCKSIZE_BYTES:]
-                    # compute md5 for encrypted data chunk
-                    encparam[2]['md5'].update(data)
-                    if self.args.fileshare:
-                        bytestoxfer = len(data)
-                        encparam[2]['filesize'] += bytestoxfer
-            if self.args.fileshare:
-                fsfile = split_fileshare_path_into_parts(remoteresource)
-                # subtract 1 from end_range
-                azure_request(
-                    self.file_service.update_range, share_name=container,
-                    directory_name=fsfile[0], file_name=fsfile[1],
-                    data=data, start_range=offset,
-                    end_range=offset + bytestoxfer - 1,
-                    validate_content=self.args.computeblockmd5,
-                    timeout=self.args.timeout)
-            else:
-                azure_request(
-                    self.blob_service[0].put_block, container_name=container,
-                    blob_name=remoteresource, block=data, block_id=blockid,
-                    validate_content=self.args.computeblockmd5,
-                    timeout=self.args.timeout)
-        del data
-
-    def get_storage_range(
-            self, localresource, container, remoteresource, blockid, offset,
-            bytestoxfer, encparam, flock, filedesc):
-        """Get a segment of a blob/page/file using range offset downloading
-        Parameters:
-            localresource - name of local resource
-            container - blob container
-            remoteresource - name of remote resource
-            blockid - block id (integral)
-            offset - file offset
-            bytestoxfer - number of bytes to xfer
-            encparam - decryption metadata:
-                (symkey, signkey, offset_mod, encmode, ivmap, unpad)
-            flock - file lock
-            filedesc - file handle
-        Returns:
-            Nothing
-        Raises:
-            Nothing
-        """
-        if (encparam[0] is not None and
-                encparam[3] == _ENCRYPTION_MODE_FULLBLOB):
-            if offset == 0:
-                start_range = offset
-                end_range = offset + bytestoxfer
-            else:
-                # retrieve block size data prior for IV
-                start_range = offset - _AES256_BLOCKSIZE_BYTES
-                end_range = offset + bytestoxfer
-        else:
-            start_range = offset
-            end_range = offset + bytestoxfer
-        if self.args.fileshare:
-            fsfile = split_fileshare_path_into_parts(remoteresource)
-            _blob = azure_request(
-                self.file_service._get_file, share_name=container,
-                directory_name=fsfile[0], file_name=fsfile[1],
-                start_range=start_range, end_range=end_range,
-                timeout=self.args.timeout)
-        else:
-            if as_page_blob(self.args, localresource):
-                blob_service = self.blob_service[1]
-            else:
-                blob_service = self.blob_service[0]
-            _blob = azure_request(
-                blob_service._get_blob, timeout=self.args.timeout,
-                container_name=container, blob_name=remoteresource,
-                start_range=start_range, end_range=end_range)
-        blobdata = _blob.content
-        # decrypt block if required
-        if encparam[0] is not None:
-            if encparam[3] == _ENCRYPTION_MODE_FULLBLOB:
-                if offset == 0:
-                    iv = encparam[4][0]
-                else:
-                    iv = blobdata[:_AES256_BLOCKSIZE_BYTES]
-                    blobdata = blobdata[_AES256_BLOCKSIZE_BYTES:]
-                unpad = encparam[5]
-                # update any buffered data to hmac
-                hmacdict = encparam[4]['hmac']
-                if hmacdict['hmac'] is not None:
-                    # grab file lock to manipulate hmac
-                    with flock:
-                        # include iv in first hmac calculation
-                        if offset == 0:
-                            hmacdict['buffered'][blockid] = iv + blobdata
-                        else:
-                            hmacdict['buffered'][blockid] = blobdata
-                        # try to process hmac data
-                        while True:
-                            curr = hmacdict['curr']
-                            if curr in hmacdict['buffered']:
-                                hmacdict['hmac'].update(
-                                    hmacdict['buffered'][curr])
-                                hmacdict['buffered'].pop(curr)
-                                hmacdict['curr'] = curr + 1
-                            else:
-                                break
-            else:
-                iv = None
-                unpad = True
-            blobdata = decrypt_chunk(
-                encparam[0], encparam[1], blobdata, encparam[3], iv=iv,
-                unpad=unpad)
-        if blobdata is not None:
-            with flock:
-                closefd = False
-                if not filedesc:
-                    filedesc = open(localresource, 'r+b')
-                    closefd = True
-                filedesc.seek(offset - (encparam[2] or 0), 0)
-                filedesc.write(blobdata)
-                if closefd:
-                    filedesc.close()
-        del blobdata
-        del _blob
-
-
-def pad_pkcs7(buf):
-    """Appends PKCS7 padding to an input buffer.
-    Parameters:
-        buf - buffer to add padding
-    Returns:
-        buffer with PKCS7_PADDING
-    Raises:
-        No special exception handling
-    """
-    padder = cryptography.hazmat.primitives.padding.PKCS7(
-        cryptography.hazmat.primitives.ciphers.
-        algorithms.AES.block_size).padder()
-    return padder.update(buf) + padder.finalize()
-
-
-def unpad_pkcs7(buf):
-    """Removes PKCS7 padding a decrypted object.
-    Parameters:
-        buf - buffer to remove padding
-    Returns:
-        buffer without PKCS7_PADDING
-    Raises:
-        No special exception handling
-    """
-    unpadder = cryptography.hazmat.primitives.padding.PKCS7(
-        cryptography.hazmat.primitives.ciphers.
-        algorithms.AES.block_size).unpadder()
-    return unpadder.update(buf) + unpadder.finalize()
-
-
-def generate_aes256_keys():
-    """Generate AES256 symmetric key and signing key
-    Parameters:
-        None
-    Returns:
-        Tuple of symmetric key and signing key
-    Raises:
-        Nothing
-    """
-    symkey = os.urandom(_AES256_KEYLENGTH_BYTES)
-    signkey = os.urandom(_AES256_KEYLENGTH_BYTES)
-    return symkey, signkey
-
-
-def rsa_encrypt_key(rsaprivatekey, rsapublickey, plainkey, asbase64=True):
-    """Encrypt a plaintext key using RSA and PKCS1_OAEP padding
-    Parameters:
-        rsaprivatekey - rsa private key for encryption
-        rsapublickey - rsa public key for encryption
-        plainkey - plaintext key
-        asbase64 - encode as base64
-    Returns:
-        Tuple of encrypted key and signature (if RSA private key is given)
-    Raises:
-        Nothing
-    """
-    if rsapublickey is None:
-        rsapublickey = rsaprivatekey.public_key()
-    if rsaprivatekey is None:
-        signature = None
-    else:
-        signer = rsaprivatekey.signer(
-            cryptography.hazmat.primitives.asymmetric.padding.PSS(
-                mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
-                    cryptography.hazmat.primitives.hashes.SHA256()),
-                salt_length=cryptography.hazmat.primitives.asymmetric.
-                padding.PSS.MAX_LENGTH),
-            cryptography.hazmat.primitives.hashes.SHA256())
-        signer.update(plainkey)
-        signature = signer.finalize()
-    enckey = rsapublickey.encrypt(
-        plainkey, cryptography.hazmat.primitives.asymmetric.padding.OAEP(
-            mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
-                algorithm=cryptography.hazmat.primitives.hashes.SHA1()),
-            algorithm=cryptography.hazmat.primitives.hashes.SHA1(),
-            label=None))
-    if asbase64:
-        return base64encode(enckey), base64encode(
-            signature) if signature is not None else signature
-    else:
-        return enckey, signature
-
-
-def rsa_decrypt_key(rsaprivatekey, enckey, signature, isbase64=True):
-    """Decrypt an RSA encrypted key and optional signature verification
-    Parameters:
-        rsaprivatekey - rsa private key for decryption
-        enckey - encrypted key
-        signature - optional signature to verify encrypted data
-        isbase64 - if keys are base64 encoded
-    Returns:
-        Decrypted key
-    Raises:
-        RuntimeError if RSA signature validation fails
-    """
-    if isbase64:
-        enckey = base64.b64decode(enckey)
-    deckey = rsaprivatekey.decrypt(
-        enckey, cryptography.hazmat.primitives.asymmetric.padding.OAEP(
-            mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
-                algorithm=cryptography.hazmat.primitives.hashes.SHA1()),
-            algorithm=cryptography.hazmat.primitives.hashes.SHA1(),
-            label=None))
-    if signature is not None and len(signature) > 0:
-        rsapublickey = rsaprivatekey.public_key()
-        if isbase64:
-            signature = base64.b64decode(signature)
-        verifier = rsapublickey.verifier(
-            signature, cryptography.hazmat.primitives.asymmetric.padding.PSS(
-                mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
-                    cryptography.hazmat.primitives.hashes.SHA256()),
-                salt_length=cryptography.hazmat.primitives.asymmetric.
-                padding.PSS.MAX_LENGTH),
-            cryptography.hazmat.primitives.hashes.SHA256())
-        verifier.update(deckey)
-        verifier.verify()
-    return deckey
-
-
-def encrypt_chunk(symkey, signkey, data, encmode, iv=None, pad=False):
-    """Encrypt a chunk of data
-    Parameters:
-        symkey - symmetric key
-        signkey - signing key
-        data - data to encrypt
-        encmode - encryption mode
-        iv - initialization vector
-        pad - pad data
-    Returns:
-        iv and hmac not specified: iv || encrypted data || signature
-        else: encrypted data
-    Raises:
-        No special exception handling
-    """
-    # create iv
-    if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-        iv = os.urandom(_AES256_BLOCKSIZE_BYTES)
-        # force padding on since this will be an individual encrypted chunk
-        pad = True
-    # encrypt data
-    cipher = cryptography.hazmat.primitives.ciphers.Cipher(
-        cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey),
-        cryptography.hazmat.primitives.ciphers.modes.CBC(iv),
-        backend=cryptography.hazmat.backends.default_backend()).encryptor()
-    if pad:
-        encdata = cipher.update(pad_pkcs7(data)) + cipher.finalize()
-    else:
-        encdata = cipher.update(data) + cipher.finalize()
-    # sign encrypted data
-    if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-        hmacsha256 = hmac.new(signkey, digestmod=hashlib.sha256)
-        hmacsha256.update(iv + encdata)
-        return iv + encdata + hmacsha256.digest()
-    else:
-        return encdata
-
-
-def decrypt_chunk(
-        symkey, signkey, encchunk, encmode, iv=None, unpad=False):
-    """Decrypt a chunk of data
-    Parameters:
-        symkey - symmetric key
-        signkey - signing key
-        encchunk - data to decrypt
-        encmode - encryption mode
-        blockid - block id
-        iv - initialization vector
-        unpad - unpad data
-    Returns:
-        decrypted data
-    Raises:
-        RuntimeError if signature verification fails
-    """
-    # if chunked blob, then preprocess for iv and signature
-    if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-        # retrieve iv
-        iv = encchunk[:_AES256_BLOCKSIZE_BYTES]
-        # retrieve encrypted data
-        encdata = encchunk[
-            _AES256_BLOCKSIZE_BYTES:-_HMACSHA256_DIGESTSIZE_BYTES]
-        # retrieve signature
-        sig = encchunk[-_HMACSHA256_DIGESTSIZE_BYTES:]
-        # validate integrity of data
-        hmacsha256 = hmac.new(signkey, digestmod=hashlib.sha256)
-        # compute hmac over iv + encdata
-        hmacsha256.update(encchunk[:-_HMACSHA256_DIGESTSIZE_BYTES])
-        if not cryptography.hazmat.primitives.constant_time.bytes_eq(
-                hmacsha256.digest(), sig):
-            raise RuntimeError(
-                'Encrypted data integrity check failed for chunk')
-    else:
-        encdata = encchunk
-    # decrypt data
-    cipher = cryptography.hazmat.primitives.ciphers.Cipher(
-        cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey),
-        cryptography.hazmat.primitives.ciphers.modes.CBC(iv),
-        backend=cryptography.hazmat.backends.default_backend()).decryptor()
-    decrypted = cipher.update(encdata) + cipher.finalize()
-    if unpad:
-        return unpad_pkcs7(decrypted)
-    else:
-        return decrypted
-
-
-def azure_request(req, timeout=None, *args, **kwargs):
-    """Wrapper method to issue/retry requests to Azure, works with both
-    the Azure Python SDK and Requests
-    Parameters:
-        req - request to issue
-        timeout - timeout in seconds
-        args - positional args to req
-        kwargs - keyworded args to req
-    Returns:
-        result of request
-    Raises:
-        Any uncaught exceptions
-        IOError if timeout
-    """
-    start = time.clock()
-    lastwait = None
-    while True:
-        try:
-            return req(*args, **kwargs)
-        except requests.Timeout:
-            pass
-        except (requests.ConnectionError,
-                requests.exceptions.ChunkedEncodingError) as exc:
-            if (isinstance(exc.args[0], requests.packages.urllib3.
-                           exceptions.ProtocolError) and
-                    isinstance(exc.args[0].args[1], socket.error)):
-                err = exc.args[0].args[1].errno
-                if (err != errno.ECONNRESET and
-                        err != errno.ECONNREFUSED and
-                        err != errno.ECONNABORTED and
-                        err != errno.ENETRESET and
-                        err != errno.ETIMEDOUT):
-                    raise
-        except requests.HTTPError as exc:
-            if (exc.response.status_code < 500 or
-                    exc.response.status_code == 501 or
-                    exc.response.status_code == 505):
-                raise
-        except azure.common.AzureHttpError as exc:
-            if (exc.status_code < 500 or
-                    exc.status_code == 501 or
-                    exc.status_code == 505):
-                raise
-        if timeout is not None and time.clock() - start > timeout:
-            raise IOError(
-                'waited {} sec for request {}, exceeded timeout of {}'.format(
-                    time.clock() - start, req.__name__, timeout))
-        if lastwait is None or lastwait > 8:
-            wait = 1
-        else:
-            wait = lastwait << 1
-        lastwait = wait
-        time.sleep(wait)
-
-
-def create_dir_ifnotexists(dirname):
-    """Create a directory if it doesn't exist
-    Parameters:
-        dirname - name of directory to create
-    Returns:
-        Nothing
-    Raises:
-        Unhandled exceptions
-    """
-    try:
-        os.makedirs(dirname)
-        print('created local directory: {}'.format(dirname))
-    except OSError as exc:
-        if exc.errno != errno.EEXIST:
-            raise  # pragma: no cover
-
-
-def get_mime_type(filename):
-    """Guess the type of a file based on its filename
-    Parameters:
-        filename - filename to guess the content-type
-    Returns:
-        A string of the form 'type/subtype',
-        usable for a MIME content-type header
-    Raises:
-        Nothing
-    """
-    return (mimetypes.guess_type(filename)[0] or 'application/octet-stream')
-
-
-def encode_blobname(args, blobname):
-    """Encode blob name: url encode. Due to current Azure Python Storage SDK
-    limitations, does not apply to non-SAS requests.
-    Parameters:
-        args - program arguments
-    Returns:
-        urlencoded blob name
-    Raises:
-        Nothing
-    """
-    if args.saskey is None or args.fileshare:
-        return blobname
-    else:
-        return urlquote(blobname)
-
-
-def base64encode(obj):
-    """Encode object to base64
-    Parameters:
-        obj - object to encode
-    Returns:
-        base64 encoded string
-    Raises:
-        Nothing
-    """
-    if _PY2:
-        return base64.b64encode(obj)
-    else:
-        return str(base64.b64encode(obj), 'ascii')
-
-
-def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
-    """Compute MD5 hash for file and encode as Base64
-    Parameters:
-        filename - filename to compute md5
-        pagealign - align bytes for page boundary
-        blocksize - block size in bytes
-    Returns:
-        MD5 for file encoded as Base64
-    Raises:
-        Nothing
-    """
-    hasher = hashlib.md5()
-    with open(filename, 'rb') as filedesc:
-        while True:
-            buf = filedesc.read(blocksize)
-            if not buf:
-                break
-            buflen = len(buf)
-            if pagealign and buflen < blocksize:
-                aligned = page_align_content_length(buflen)
-                if aligned != buflen:
-                    buf = buf.ljust(aligned, b'\0')
-            hasher.update(buf)
-        return base64encode(hasher.digest())
-
-
-def compute_md5_for_data_asbase64(data):
-    """Compute MD5 hash for bits and encode as Base64
-    Parameters:
-        data - data to compute MD5 hash over
-    Returns:
-        MD5 for data encoded as Base64
-    Raises:
-        Nothing
-    """
-    hasher = hashlib.md5()
-    hasher.update(data)
-    return base64encode(hasher.digest())
-
-
-def page_align_content_length(length):
-    """Compute page boundary alignment
-    Parameters:
-        length - content length
-    Returns:
-        aligned byte boundary
-    Raises:
-        Nothing
-    """
-    mod = length % _PAGEBLOB_BOUNDARY
-    if mod != 0:
-        return length + (_PAGEBLOB_BOUNDARY - mod)
-    return length
-
-
-def as_page_blob(args, name):
-    """Determines if the file should be a pageblob depending upon args
-    Parameters:
-        args - program args
-        name - file name
-    Returns:
-        True if file should be a pageblob
-    Raises:
-        Nothing
-    """
-    if not args.fileshare and (
-            args.pageblob or (args.autovhd and name.lower().endswith('.vhd'))):
-        return True
-    return False
-
-
-def get_blob_listing(blob_service, args, metadata=True):
-    """Convenience method for generating a blob listing of a container
-    Parameters:
-        blob_service - blob service
-        args - program arguments
-        metadata - include metadata
-    Returns:
-        dictionary of blob -> list [content length, content md5, enc metadata]
-    Raises:
-        Nothing
-    """
-    marker = None
-    blobdict = {}
-    if metadata:
-        incl = azure.storage.blob.Include.METADATA
-    else:
-        incl = None
-    while True:
-        try:
-            result = azure_request(
-                blob_service.list_blobs, timeout=args.timeout,
-                container_name=args.container, marker=marker, include=incl)
-        except azure.common.AzureMissingResourceHttpError:
-            break
-        for blob in result:
-            blobdict[blob.name] = [
-                blob.properties.content_length,
-                blob.properties.content_settings.content_md5, None]
-            if (blob.metadata is not None and
-                    _ENCRYPTION_METADATA_NAME in blob.metadata):
-                encmeta = EncryptionMetadataJson(
-                    args, None, None, None, None, None)
-                encmeta.parse_metadata_json(
-                    blob.name, args.rsaprivatekey, args.rsapublickey,
-                    blob.metadata)
-                blobdict[blob.name][1] = encmeta.md5
-                if (args.rsaprivatekey is not None or
-                        args.rsapublickey is not None):
-                    blobdict[blob.name][2] = encmeta
-        marker = result.next_marker
-        if marker is None or len(marker) < 1:
-            break
-    return blobdict
-
-
-def get_fileshare_listing(file_service, args, metadata=True):
-    """Retrieve all files and directories under a file share
-    Parameters:
-        file_service - file service
-        args - program args
-        metadata - retrieve metadata
-    Returns:
-        dictionary of files -> list [content length, content md5, enc metadata]
-    Raises:
-        Nothing
-    """
-    blobdict = {}
-    dirs = [None]
-    while len(dirs) > 0:
-        dir = dirs.pop()
-        fsfiles = file_service.list_directories_and_files(
-            share_name=args.container, directory_name=dir,
-            timeout=args.timeout)
-        if dir is None:
-            dir = ''
-        for fsfile in fsfiles:
-            fspath = os.path.join(dir, fsfile.name)
-            if isinstance(fsfile, azure.storage.file.File):
-                fsprop = get_fileshare_file_properties(
-                    file_service, args, fspath)
-                blobdict[fspath] = fsprop[1]
-            else:
-                dirs.append(fspath)
-    return blobdict
-
-
-def split_fileshare_path_into_parts(remotefname):
-    """Split fileshare name into parts
-    Parameters:
-        remotefname - remote file name
-    Returns:
-        tuple of (directory name, file name)
-    Raises:
-        Nothing
-    """
-    parts = remotefname.split(os.path.sep)
-    dirname = os.path.sep.join(parts[:len(parts) - 1])
-    return (dirname, parts[-1])
-
-
-def get_fileshare_file_properties(file_service, args, remotefname):
-    """Convenience method for retrieving a file share file's properties and
-    metadata
-    Parameters:
-        file_service - file service
-        args - program arguments
-        remotefname - remote file name
-    Returns:
-        blobdict entry tuple (file name, blobdict value)
-    Raises:
-        Nothing
-    """
-    # split directory and file name
-    dirname, fname = split_fileshare_path_into_parts(remotefname)
-    try:
-        fsfile = file_service.get_file_properties(
-            args.container, dirname, fname, timeout=args.timeout)
-    except azure.common.AzureMissingResourceHttpError:
-        return None
-    fsmeta = file_service.get_file_metadata(
-        args.container, dirname, fname, timeout=args.timeout)
-    entry = [
-        fsfile.properties.content_length,
-        fsfile.properties.content_settings.content_md5, None]
-    if fsmeta is not None and _ENCRYPTION_METADATA_NAME in fsmeta:
-        encmeta = EncryptionMetadataJson(
-            args, None, None, None, None, None)
-        encmeta.parse_metadata_json(
-            fsfile.name, args.rsaprivatekey, args.rsapublickey,
-            fsmeta)
-        entry[1] = encmeta.md5
-        if (args.rsaprivatekey is not None or
-                args.rsapublickey is not None):
-            entry[2] = encmeta
-    return (fsfile.name, entry)
-
-
-def create_all_parent_directories_fileshare(
-        file_service, args, fsfile, dirscreated):
-    """Create all parent directories of a given file share path
-    Parameters
-        file_service - file service
-        args - program args
-        fsfile - file share path
-        dirscreated - directories created set
-    Returns:
-        Nothing
-    Raises:
-        Nothing
-    """
-    dirs = fsfile[0].split(os.path.sep)
-    for i in xrange(0, len(dirs)):
-        dir = os.path.join(*(dirs[0:i + 1]))
-        if dir not in dirscreated:
-            file_service.create_directory(
-                share_name=args.container,
-                directory_name=dir, fail_on_exist=False,
-                timeout=args.timeout)
-            dirscreated.add(dir)
-
-
-def generate_xferspec_download(
-        blob_service, file_service, args, storage_in_queue, localfile,
-        remoteresource, addfd, blobprop):
-    """Generate an xferspec for download
-    Parameters:
-        blob_service - blob service
-        file_service - file service
-        args - program arguments
-        storage_in_queue - storage input queue
-        localfile - name of local resource
-        remoteresource - name of remote resource
-        addfd - create and add file handle
-        blobprop - blob properties list [length, md5, metadatadict]
-    Returns:
-        xferspec containing instructions
-    Raises:
-        ValueError if get_blob_properties returns an invalid result or
-            contentlength is invalid
-    """
-    contentlength = blobprop[0]
-    contentmd5 = blobprop[1]
-    encmeta = blobprop[2]
-    remoteresource = encode_blobname(args, remoteresource)
-    # get the blob metadata if missing
-    if not args.fileshare and (
-            contentlength is None or contentmd5 is None or
-            (args.rsaprivatekey is not None and encmeta is None)):
-        result = azure_request(
-            blob_service.get_blob_properties, timeout=args.timeout,
-            container_name=args.container, blob_name=remoteresource)
-        if not result:
-            raise ValueError(
-                'unexpected result for get_blob_properties is None')
-        contentmd5 = result.properties.content_settings.content_md5
-        contentlength = result.properties.content_length
-        if (args.rsaprivatekey is not None and
-                _ENCRYPTION_METADATA_NAME in result.metadata):
-            encmeta = EncryptionMetadataJson(
-                args, None, None, None, None, None)
-            encmeta.parse_metadata_json(
-                remoteresource, args.rsaprivatekey, args.rsapublickey,
-                result.metadata)
-    if contentlength < 0:
-        raise ValueError(
-            'contentlength is invalid for {}'.format(remoteresource))
-    # overwrite content md5 if encryption metadata exists
-    if encmeta is not None:
-        contentmd5 = encmeta.md5
-    # check if download is needed
-    if (args.skiponmatch and contentmd5 is not None and
-            os.path.exists(localfile)):
-        print('computing file md5 on: {} length: {}'.format(
-            localfile, contentlength))
-        lmd5 = compute_md5_for_file_asbase64(localfile)
-        print('  >> {} <L..R> {} {} '.format(
-            lmd5, contentmd5, remoteresource), end='')
-        if lmd5 != contentmd5:
-            print('MISMATCH: re-download')
-        else:
-            print('match: skip')
-            return None, None, None, None
-    else:
-        print('remote blob: {} length: {} bytes, md5: {}'.format(
-            remoteresource, contentlength, contentmd5))
-    tmpfilename = localfile + '.blobtmp'
-    if encmeta is not None:
-        chunksize = encmeta.chunksizebytes
-        symkey = encmeta.symkey
-        signkey = encmeta.signkey
-        if encmeta.encmode == _ENCRYPTION_MODE_FULLBLOB:
-            ivmap = {
-                0: encmeta.iv,
-                'hmac': {
-                    'hmac': None,
-                    'buffered': {},
-                    'curr': 0,
-                    'sig': encmeta.hmac,
-                }
-            }
-            if signkey is not None:
-                ivmap['hmac']['hmac'] = hmac.new(
-                    signkey, digestmod=hashlib.sha256)
-            offset_mod = 0
-        elif encmeta.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-            ivmap = None
-            offset_mod = _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1
-        else:
-            raise RuntimeError('Unknown encryption mode: {}'.format(
-                encmeta.encmode))
-    else:
-        chunksize = args.chunksizebytes
-        offset_mod = 0
-        symkey = None
-        signkey = None
-        ivmap = None
-    nchunks = contentlength // chunksize
-    # compute allocation size, if encrypted this will be an
-    # underallocation estimate
-    if contentlength > 0:
-        if encmeta is not None:
-            if encmeta.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-                allocatesize = contentlength - ((nchunks + 2) * offset_mod)
-            else:
-                allocatesize = contentlength - _AES256_BLOCKSIZE_BYTES
-        else:
-            allocatesize = contentlength
-        if allocatesize < 0:
-            allocatesize = 0
-    else:
-        allocatesize = 0
-    currfileoffset = 0
-    nstorageops = 0
-    flock = threading.Lock()
-    filedesc = None
-    # preallocate file
-    with flock:
-        filedesc = open(tmpfilename, 'wb')
-        if allocatesize > 0:
-            filedesc.seek(allocatesize - 1)
-            filedesc.write(b'\0')
-        filedesc.close()
-        if addfd:
-            # reopen under r+b mode
-            filedesc = open(tmpfilename, 'r+b')
-        else:
-            filedesc = None
-    chunktoadd = min(chunksize, contentlength)
-    for i in xrange(nchunks + 1):
-        if chunktoadd + currfileoffset > contentlength:
-            chunktoadd = contentlength - currfileoffset
-        # on download, chunktoadd must be offset by 1 as the x-ms-range
-        # header expects it that way. x -> y bytes means first bits of the
-        # (x+1)th byte to the last bits of the (y+1)th byte. for example,
-        # 0 -> 511 means byte 1 to byte 512
-        encparam = [
-            symkey, signkey, i * offset_mod,
-            encmeta.encmode if encmeta is not None else None, ivmap, False]
-        xferspec = (tmpfilename, args.container, remoteresource, i,
-                    currfileoffset, chunktoadd - 1, encparam, flock, filedesc)
-        currfileoffset = currfileoffset + chunktoadd
-        nstorageops = nstorageops + 1
-        storage_in_queue.put(PqTupleSort((i, xferspec)))
-        if currfileoffset >= contentlength:
-            encparam[5] = True
-            break
-    return contentlength, nstorageops, contentmd5, filedesc
-
-
-def generate_xferspec_upload(
-        args, storage_in_queue, blobskipdict, blockids, localfile,
-        remoteresource, addfd):
-    """Generate an xferspec for upload
-    Parameters:
-        args - program arguments
-        storage_in_queue - storage input queue
-        blobskipdict - blob skip dictionary
-        blockids - block id dictionary
-        localfile - name of local resource
-        remoteresource - name of remote resource
-        addfd - create and add file handle
-    Returns:
-        xferspec containing instructions
-    Raises:
-        Nothing
-    """
-    # compute md5 hash
-    md5digest = None
-    if args.computefilemd5:
-        print('computing file md5 on: {}'.format(localfile))
-        md5digest = compute_md5_for_file_asbase64(
-            localfile, as_page_blob(args, localfile))
-        # check if upload is needed
-        if args.skiponmatch and remoteresource in blobskipdict:
-            print('  >> {} <L..R> {} {} '.format(
-                md5digest, blobskipdict[remoteresource][1],
-                remoteresource), end='')
-            if md5digest != blobskipdict[remoteresource][1]:
-                print('MISMATCH: re-upload')
-            else:
-                print('match: skip')
-                return None, 0, None, None
-        else:
-            print('  >> md5: {}'.format(md5digest))
-    # create blockids entry
-    if localfile not in blockids:
-        blockids[localfile] = []
-    # partition local file into chunks
-    filesize = os.path.getsize(localfile)
-    if as_page_blob(args, localfile) and (
-            args.rsaprivatekey is not None or
-            args.rsapublickey is not None):
-        chunksizebytes = _MAX_BLOB_CHUNK_SIZE_BYTES
-        nchunks = filesize // chunksizebytes
-        if nchunks > 250000:
-            raise RuntimeError(
-                '{} chunks for file {} exceeds Azure Storage limits for a '
-                'single page blob'.format(nchunks, localfile))
-    else:
-        chunksizebytes = args.chunksizebytes
-        nchunks = filesize // chunksizebytes
-        if nchunks > 50000:
-            raise RuntimeError(
-                '{} chunks for file {} exceeds Azure Storage limits for a '
-                'single block blob'.format(nchunks, localfile))
-    chunktoadd = min(chunksizebytes, filesize)
-    currfileoffset = 0
-    nstorageops = 0
-    flock = threading.Lock()
-    filedesc = None
-    if addfd:
-        with flock:
-            filedesc = open(localfile, 'rb')
-    symkey = None
-    signkey = None
-    ivmap = None
-    for i in xrange(nchunks + 1):
-        if chunktoadd + currfileoffset > filesize:
-            chunktoadd = filesize - currfileoffset
-        blockid = '{0:08d}'.format(currfileoffset // chunksizebytes)
-        # generate the ivmap for the first block
-        if (not as_page_blob(args, localfile) and
-                (args.rsaprivatekey is not None or
-                 args.rsapublickey is not None) and currfileoffset == 0):
-            # generate sym/signing keys
-            symkey, signkey = generate_aes256_keys()
-            if args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                ivmap = {
-                    i: os.urandom(_AES256_BLOCKSIZE_BYTES),
-                    'hmac': hmac.new(signkey, digestmod=hashlib.sha256),
-                }
-            else:
-                ivmap = {}
-            ivmap['md5'] = hashlib.md5()
-            ivmap['filesize'] = 0
-        blockids[localfile].append(blockid)
-        encparam = [symkey, signkey, ivmap, False]
-        xferspec = (localfile, args.container,
-                    encode_blobname(args, remoteresource), blockid,
-                    currfileoffset, chunktoadd, encparam, flock, filedesc)
-        currfileoffset = currfileoffset + chunktoadd
-        nstorageops = nstorageops + 1
-        storage_in_queue.put(PqTupleSort((i, xferspec)))
-        if currfileoffset >= filesize:
-            encparam[3] = True
-            break
-    return filesize, nstorageops, md5digest, filedesc
-
-
-def apply_file_collation_and_strip(args, fname):
-    """Apply collation path or component strip to a remote filename
-    Parameters:
-        args - arguments
-        fname - file name
-    Returns:
-        remote filename
-    Raises:
-        No special exception handling
-    """
-    remotefname = fname.strip(os.path.sep)
-    if args.collate is not None:
-        remotefname = remotefname.split(os.path.sep)[-1]
-        if args.collate != '.':
-            remotefname = os.path.sep.join((args.collate, remotefname))
-    elif args.stripcomponents > 0:
-        rtmp = remotefname.split(os.path.sep)
-        nsc = min((len(rtmp) - 1, args.stripcomponents))
-        if nsc > 0:
-            remotefname = os.path.sep.join(rtmp[nsc:])
-    return remotefname
-
-
-def main():
-    """Main function
-    Parameters:
-        None
-    Returns:
-        Nothing
-    Raises:
-        ValueError for invalid arguments
-    """
-    # get command-line args
-    args = parseargs()
-
-    # populate args from env vars
-    if args.storageaccountkey is None:
-        args.storageaccountkey = os.getenv(_ENVVAR_STORAGEACCOUNTKEY)
-    if args.saskey is None:
-        args.saskey = os.getenv(_ENVVAR_SASKEY)
-    if args.rsakeypassphrase is None:
-        args.rsakeypassphrase = os.getenv(_ENVVAR_RSAKEYPASSPHRASE)
-
-    # check some parameters
-    if (len(args.localresource) < 1 or len(args.storageaccount) < 1 or
-            len(args.container) < 1):
-        raise ValueError('invalid positional arguments')
-    if len(args.endpoint) < 1:
-        raise ValueError('storage endpoint is invalid')
-    if args.upload and args.download:
-        raise ValueError(
-            'cannot specify both download and upload transfer direction '
-            'within the same invocation')
-    if args.subscriptionid is not None and args.managementcert is None:
-        raise ValueError(
-            'cannot specify subscription id without a management cert')
-    if args.subscriptionid is None and args.managementcert is not None:
-        raise ValueError(
-            'cannot specify a management cert without a subscription id')
-    if args.storageaccountkey is not None and args.saskey is not None:
-        raise ValueError('cannot use both a sas key and storage account key')
-    if args.pageblob and args.fileshare:
-        raise ValueError(
-            'cannot specify both page blob and file share destinations')
-    if args.autovhd and args.fileshare:
-        raise ValueError(
-            'cannot specify both autovhd and file share destination')
-    if args.pageblob and args.autovhd:
-        raise ValueError('cannot specify both pageblob and autovhd parameters')
-    if args.collate is not None and args.stripcomponents is not None:
-        raise ValueError(
-            'cannot specify collate and non-default component '
-            'strip: {}'.format(args.stripcomponents))
-    if args.stripcomponents is None:
-        args.stripcomponents = 1
-    if args.stripcomponents < 0:
-        raise ValueError('invalid component strip number: {}'.format(
-            args.stripcomponents))
-    if args.rsaprivatekey is not None and args.rsapublickey is not None:
-        raise ValueError('cannot specify both RSA private and public keys')
-    if args.rsapublickey is not None and args.rsakeypassphrase is not None:
-        raise ValueError('cannot specify an RSA public key and passphrase')
-    if args.timeout is not None and args.timeout <= 0:
-        args.timeout = None
-
-    # get key if we don't have a handle on one
-    sms = None
-    if args.saskey is not None:
-        if len(args.saskey) < 1:
-            raise ValueError('invalid sas key specified')
-    elif args.storageaccountkey is None:
-        if (args.managementcert is not None and
-                args.subscriptionid is not None):
-            # check to ensure management cert is valid
-            if len(args.managementcert) == 0 or \
-                    args.managementcert.split('.')[-1].lower() != 'pem':
-                raise ValueError('management cert appears to be invalid')
-            if args.managementep is None or len(args.managementep) == 0:
-                raise ValueError('management endpoint is invalid')
-            # expand management cert path out if contains ~
-            args.managementcert = os.path.abspath(args.managementcert)
-            # get sms reference
-            sms = azure.servicemanagement.ServiceManagementService(
-                args.subscriptionid, args.managementcert, args.managementep)
-            # get keys
-            service_keys = azure_request(
-                sms.get_storage_account_keys, timeout=args.timeout,
-                service_name=args.storageaccount)
-            args.storageaccountkey = service_keys.storage_service_keys.primary
-        else:
-            raise ValueError('could not determine authentication to use')
-
-    # check storage account key validity
-    if args.storageaccountkey is not None and \
-            len(args.storageaccountkey) < 1:
-        raise ValueError('storage account key is invalid')
-
-    # set valid num workers
-    if args.numworkers < 1:
-        args.numworkers = 1
-    if (args.fileshare and
-            args.numworkers == _DEFAULT_MAX_STORAGEACCOUNT_WORKERS):
-        args.numworkers //= 2
-
-    # expand any paths
-    args.localresource = os.path.expanduser(args.localresource)
-
-    # sanitize remote file name
-    if args.remoteresource:
-        args.remoteresource = args.remoteresource.strip(os.path.sep)
-
-    # set chunk size
-    if (args.chunksizebytes is None or args.chunksizebytes < 64 or
-            args.chunksizebytes > _MAX_BLOB_CHUNK_SIZE_BYTES):
-        args.chunksizebytes = _MAX_BLOB_CHUNK_SIZE_BYTES
-
-    # set storage ep
-    endpoint = None
-    if sms:
-        storage_acct = azure_request(
-            sms.get_storage_account_properties, timeout=args.timeout,
-            service_name=args.storageaccount)
-        if args.fileshare:
-            endpoint = storage_acct.storage_service_properties.endpoints[3]
-        else:
-            endpoint = storage_acct.storage_service_properties.endpoints[0]
-    else:
-        if args.fileshare:
-            endpoint = 'https://{}.file.{}/'.format(
-                args.storageaccount, args.endpoint)
-        else:
-            endpoint = 'https://{}.blob.{}/'.format(
-                args.storageaccount, args.endpoint)
-
-    # create master block blob, page blob and file service
-    blob_service = None
-    if args.storageaccountkey:
-        if args.endpoint[0] == '.':
-            args.endpoint = args.endpoint[1:]
-        block_blob_service = azure.storage.blob.BlockBlobService(
-            account_name=args.storageaccount,
-            account_key=args.storageaccountkey,
-            endpoint_suffix=args.endpoint)
-        page_blob_service = azure.storage.blob.PageBlobService(
-            account_name=args.storageaccount,
-            account_key=args.storageaccountkey,
-            endpoint_suffix=args.endpoint)
-        file_service = azure.storage.file.FileService(
-            account_name=args.storageaccount,
-            account_key=args.storageaccountkey,
-            endpoint_suffix=args.endpoint)
-        blob_service = (block_blob_service, page_blob_service)
-    elif args.saskey:
-        _bs = SasBlobService(endpoint, args.saskey, args.timeout)
-        blob_service = (_bs, _bs)
-        # normalize sas key for python sdk
-        if args.saskey[0] == '?':
-            args.saskey = args.saskey[1:]
-        file_service = azure.storage.file.FileService(
-            account_name=args.storageaccount,
-            sas_token=args.saskey,
-            endpoint_suffix=args.endpoint)
-        # disable container/share creation if SAS is not account-level and
-        # does not contain a signed resource type with container-level access
-        if args.createcontainer:
-            args.createcontainer = False
-            sasparts = args.saskey.split('&')
-            for part in sasparts:
-                tmp = part.split('=')
-                if tmp[0] == 'srt':
-                    if 'c' in tmp[1]:
-                        args.createcontainer = True
-                    break
-            del sasparts
-    if blob_service is None:
-        raise ValueError('blob_service is invalid')
-    if args.fileshare and file_service is None:
-        raise ValueError('file_service is invalid')
-
-    # check which way we're transfering
-    xfertoazure = False
-    if (args.upload or
-            (not args.download and os.path.exists(args.localresource))):
-        xfertoazure = True
-    else:
-        if args.remoteresource is None:
-            raise ValueError('cannot download remote file if not specified')
-
-    # import rsa key
-    if args.rsaprivatekey is not None:
-        rsakeyfile = args.rsaprivatekey
-    elif args.rsapublickey is not None:
-        rsakeyfile = args.rsapublickey
-    else:
-        rsakeyfile = None
-    if rsakeyfile is not None:
-        # check for conflicting options
-        if args.pageblob:
-            raise ValueError(
-                'cannot operate in page blob mode with encryption enabled')
-        # check for supported encryption modes
-        if (args.encmode != _ENCRYPTION_MODE_FULLBLOB and
-                args.encmode != _ENCRYPTION_MODE_CHUNKEDBLOB):
-            raise RuntimeError(
-                'Unknown encryption mode: {}'.format(args.encmode))
-        # only allow full blob encryption mode for now due to
-        # possible compatibility issues
-        if args.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-            raise RuntimeError(
-                '{} encryption mode not allowed'.format(args.encmode))
-        with open(rsakeyfile, 'rb') as keyfile:
-            if args.rsaprivatekey is not None:
-                args.rsaprivatekey = cryptography.hazmat.primitives.\
-                    serialization.load_pem_private_key(
-                        keyfile.read(), args.rsakeypassphrase,
-                        backend=cryptography.hazmat.backends.default_backend())
-            else:
-                args.rsapublickey = cryptography.hazmat.primitives.\
-                    serialization.load_pem_public_key(
-                        keyfile.read(),
-                        backend=cryptography.hazmat.backends.default_backend())
-        if args.rsaprivatekey is None and not xfertoazure:
-            raise ValueError('imported RSA key does not have a private key')
-        # adjust chunk size for padding for chunked mode
-        if xfertoazure:
-            if args.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB:
-                args.chunksizebytes -= _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1
-            elif args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                nchunks = args.chunksizebytes // \
-                    _AES256CBC_HMACSHA256_OVERHEAD_BYTES
-                args.chunksizebytes = (nchunks - 1) * \
-                    _AES256CBC_HMACSHA256_OVERHEAD_BYTES
-                del nchunks
-        # ensure chunk size is greater than overhead
-        if args.chunksizebytes <= (
-                _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1) << 1:
-            raise ValueError('chunksizebytes {} <= encryption min {}'.format(
-                args.chunksizebytes,
-                (_AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1) << 1))
-
-    # disable urllib3 warnings if specified
-    if args.disableurllibwarnings:
-        print('!!! WARNING: DISABLING URLLIB3 WARNINGS !!!')
-        requests.packages.urllib3.disable_warnings(
-            requests.packages.urllib3.exceptions.InsecurePlatformWarning)
-        requests.packages.urllib3.disable_warnings(
-            requests.packages.urllib3.exceptions.SNIMissingWarning)
-
-    # collect package versions
-    packages = ['az.common=' + azure.common.__version__]
-    try:
-        packages.append('az.sml=' + azure.servicemanagement.__version__)
-    except Exception:
-        pass
-    try:
-        packages.append('az.stor=' + azure.storage.__version__)
-    except Exception:
-        pass
-    try:
-        packages.append('crypt=' + cryptography.__version__)
-    except Exception:
-        pass
-    packages.append(
-        'req=' + requests.__version__)
-
-    # print all parameters
-    print('=====================================')
-    print(' azure blobxfer parameters [v{}]'.format(_SCRIPT_VERSION))
-    print('=====================================')
-    print('             platform: {}'.format(platform.platform()))
-    print('   python interpreter: {} {}'.format(
-        platform.python_implementation(), platform.python_version()))
-    print('     package versions: {}'.format(' '.join(packages)))
-    del packages
-    print('      subscription id: {}'.format(args.subscriptionid))
-    print('      management cert: {}'.format(args.managementcert))
-    print('   transfer direction: {}'.format(
-        'local->Azure' if xfertoazure else 'Azure->local'))
-    print('       local resource: {}'.format(args.localresource))
-    print('      include pattern: {}'.format(args.include))
-    print('      remote resource: {}'.format(args.remoteresource))
-    print('   max num of workers: {}'.format(args.numworkers))
-    print('              timeout: {}'.format(args.timeout))
-    print('      storage account: {}'.format(args.storageaccount))
-    print('              use SAS: {}'.format(True if args.saskey else False))
-    print('  upload as page blob: {}'.format(args.pageblob))
-    print('  auto vhd->page blob: {}'.format(args.autovhd))
-    print(' upload to file share: {}'.format(args.fileshare))
-    print(' container/share name: {}'.format(args.container))
-    print('  container/share URI: {}'.format(endpoint + args.container))
-    print('    compute block MD5: {}'.format(args.computeblockmd5))
-    print('     compute file MD5: {}'.format(args.computefilemd5))
-    print('    skip on MD5 match: {}'.format(args.skiponmatch))
-    print('   chunk size (bytes): {}'.format(args.chunksizebytes))
-    print('     create container: {}'.format(args.createcontainer))
-    print('  keep mismatched MD5: {}'.format(args.keepmismatchedmd5files))
-    print('     recursive if dir: {}'.format(args.recursive))
-    print('component strip on up: {}'.format(args.stripcomponents))
-    print('        remote delete: {}'.format(args.delete))
-    print('           collate to: {}'.format(args.collate or 'disabled'))
-    print('      local overwrite: {}'.format(args.overwrite))
-    print('      encryption mode: {}'.format(
-        (args.encmode or 'disabled' if xfertoazure else 'file dependent')
-        if args.rsaprivatekey is not None or args.rsapublickey is not None
-        else 'disabled'))
-    print('         RSA key file: {}'.format(rsakeyfile or 'disabled'))
-    print('         RSA key type: {}'.format(
-        'private' if args.rsaprivatekey is not None else 'public'
-        if args.rsapublickey is not None else 'disabled'))
-    print('=======================================\n')
-
-    # mark start time after init
-    print('script start time: {}'.format(time.strftime("%Y-%m-%d %H:%M:%S")))
-    start = time.time()
-
-    # populate instruction queues
-    allfilesize = 0
-    storage_in_queue = queue.PriorityQueue()
-    nstorageops = 0
-    blockids = {}
-    completed_blockids = {}
-    filemap = {}
-    filesizes = {}
-    delblobs = None
-    md5map = {}
-    filedesc = None
-    if xfertoazure:
-        # if skiponmatch is enabled, list blobs first and check
-        if args.skiponmatch and not args.fileshare:
-            blobskipdict = get_blob_listing(blob_service[0], args)
-        else:
-            blobskipdict = {}
-        if os.path.isdir(args.localresource):
-            if args.remoteresource is not None:
-                print('WARNING: ignorning specified remoteresource {} for '
-                      'directory upload'.format(args.remoteresource))
-            _remotefiles = set()
-            # mirror directory
-            if args.recursive:
-                for root, _, files in os.walk(args.localresource):
-                    for dirfile in files:
-                        fname = os.path.join(root, dirfile)
-                        if args.include is not None and not fnmatch.fnmatch(
-                                fname, args.include):
-                            continue
-                        remotefname = apply_file_collation_and_strip(
-                            args, fname)
-                        _remotefiles.add(remotefname)
-                        # manually pull file properties for file service
-                        if args.fileshare and args.skiponmatch:
-                            fsfile = get_fileshare_file_properties(
-                                file_service, args, remotefname)
-                            if fsfile is not None:
-                                blobskipdict[fsfile[0]] = fsfile[1]
-                        filesize, ops, md5digest, filedesc = \
-                            generate_xferspec_upload(
-                                args, storage_in_queue, blobskipdict,
-                                blockids, fname, remotefname, False)
-                        if filesize is not None:
-                            completed_blockids[fname] = 0
-                            md5map[fname] = md5digest
-                            filemap[fname] = encode_blobname(args, remotefname)
-                            filesizes[fname] = filesize
-                            allfilesize = allfilesize + filesize
-                            nstorageops = nstorageops + ops
-            else:
-                # copy just directory contents, non-recursively
-                for lfile in os.listdir(args.localresource):
-                    fname = os.path.join(args.localresource, lfile)
-                    if os.path.isdir(fname) or (
-                            args.include is not None and not fnmatch.fnmatch(
-                                fname, args.include)):
-                        continue
-                    remotefname = apply_file_collation_and_strip(args, fname)
-                    _remotefiles.add(remotefname)
-                    # manually pull file properties for file service
-                    if args.fileshare and args.skiponmatch:
-                        fsfile = get_fileshare_file_properties(
-                            file_service, args, remotefname)
-                        if fsfile is not None:
-                            blobskipdict[fsfile[0]] = fsfile[1]
-                    filesize, ops, md5digest, filedesc = \
-                        generate_xferspec_upload(
-                            args, storage_in_queue, blobskipdict,
-                            blockids, fname, remotefname, False)
-                    if filesize is not None:
-                        completed_blockids[fname] = 0
-                        md5map[fname] = md5digest
-                        filemap[fname] = encode_blobname(args, remotefname)
-                        filesizes[fname] = filesize
-                        allfilesize = allfilesize + filesize
-                        nstorageops = nstorageops + ops
-            # fill deletion list
-            if args.delete:
-                # get blob skip dict if it hasn't been populated
-                if len(blobskipdict) == 0:
-                    if args.fileshare:
-                        blobskipdict = get_fileshare_listing(
-                            file_service, args)
-                    else:
-                        blobskipdict = get_blob_listing(
-                            blob_service[0], args, metadata=False)
-                delblobs = [x for x in blobskipdict if x not in _remotefiles]
-            del _remotefiles
-        else:
-            # upload single file
-            if args.remoteresource is None:
-                args.remoteresource = args.localresource
-            else:
-                if args.stripcomponents > 0:
-                    args.stripcomponents -= 1
-            args.remoteresource = apply_file_collation_and_strip(
-                args, args.remoteresource)
-            # manually pull file properties for file service
-            if args.fileshare and args.skiponmatch:
-                fsfile = get_fileshare_file_properties(
-                    file_service, args, args.remoteresource)
-                if fsfile is not None:
-                    blobskipdict[fsfile[0]] = fsfile[1]
-            filesize, nstorageops, md5digest, filedesc = \
-                generate_xferspec_upload(
-                    args, storage_in_queue, blobskipdict, blockids,
-                    args.localresource, args.remoteresource, True)
-            if filesize is not None:
-                completed_blockids[args.localresource] = 0
-                md5map[args.localresource] = md5digest
-                filemap[args.localresource] = encode_blobname(
-                    args, args.remoteresource)
-                filesizes[args.localresource] = filesize
-                allfilesize = allfilesize + filesize
-        del blobskipdict
-        # create container/file share if needed
-        if args.createcontainer:
-            if args.fileshare:
-                print('creating file share, if needed: {}'.format(
-                    args.container))
-                try:
-                    azure_request(
-                        file_service.create_share, share_name=args.container,
-                        fail_on_exist=False, timeout=args.timeout)
-                except azure.common.AzureConflictHttpError:
-                    pass
-            else:
-                print('creating container, if needed: {}'.format(
-                    args.container))
-                try:
-                    azure_request(
-                        blob_service[0].create_container, timeout=args.timeout,
-                        container_name=args.container, fail_on_exist=False)
-                except azure.common.AzureConflictHttpError:
-                    pass
-        # initialize page blobs or file share files
-        if len(filemap) > 0:
-            if args.pageblob or args.autovhd:
-                print('initializing page blobs')
-                for key in filemap:
-                    if as_page_blob(args, key):
-                        blob_service[1].create_blob(
-                            container_name=args.container,
-                            blob_name=filemap[key],
-                            content_length=page_align_content_length(
-                                filesizes[key]), content_settings=None)
-            elif args.fileshare:
-                print('initializing files on fileshare')
-                dirscreated = set()
-                for key in filemap:
-                    fsfile = split_fileshare_path_into_parts(filemap[key])
-                    if args.rsaprivatekey or args.rsapublickey:
-                        fspad = _AES256_BLOCKSIZE_BYTES
-                    else:
-                        fspad = 0
-                    # try to create the file first, if preconditon failure
-                    # then try creating the parent directory
-                    try:
-                        file_service.create_file(
-                            share_name=args.container,
-                            directory_name=fsfile[0], file_name=fsfile[1],
-                            content_length=filesizes[key] + fspad,
-                            content_settings=None, timeout=args.timeout)
-                    except azure.common.AzureMissingResourceHttpError as exc:
-                        create_all_parent_directories_fileshare(
-                            file_service, args, fsfile, dirscreated)
-                        file_service.create_file(
-                            share_name=args.container,
-                            directory_name=fsfile[0], file_name=fsfile[1],
-                            content_length=filesizes[key] + fspad,
-                            content_settings=None, timeout=args.timeout)
-                del dirscreated
-    else:
-        if args.remoteresource == '.':
-            print('attempting to copy entire {} {} to {}'.format(
-                'file share' if args.fileshare else 'container',
-                args.container, args.localresource))
-            if args.fileshare:
-                blobdict = get_fileshare_listing(file_service, args)
-            else:
-                blobdict = get_blob_listing(blob_service[0], args)
-        else:
-            if args.fileshare:
-                fsfile = get_fileshare_file_properties(
-                    file_service, args, args.remoteresource)
-                if fsfile is None:
-                    raise RuntimeError('file {} not found on share {}'.format(
-                        args.remoteresource, args.container))
-                blobdict = {args.remoteresource: fsfile[1]}
-            else:
-                blobdict = {args.remoteresource: [None, None, None]}
-        if len(blobdict) > 0:
-            print('generating local directory structure and '
-                  'pre-allocating space')
-            # make the localresource directory
-            created_dirs = set()
-            create_dir_ifnotexists(args.localresource)
-            created_dirs.add(args.localresource)
-        # generate xferspec for all blobs
-        for blob in blobdict:
-            # filter results
-            if args.include is not None and not fnmatch.fnmatch(
-                    blob, args.include):
-                continue
-            if args.collate is not None:
-                localfile = os.path.join(
-                    args.localresource, args.collate, blob)
-            else:
-                localfile = os.path.join(args.localresource, blob)
-            # create any subdirectories if required
-            localdir = os.path.dirname(localfile)
-            if localdir not in created_dirs:
-                create_dir_ifnotexists(localdir)
-                created_dirs.add(localdir)
-            # add instructions
-            filesize, ops, md5digest, filedesc = \
-                generate_xferspec_download(
-                    blob_service[0], file_service, args, storage_in_queue,
-                    localfile, blob, False, blobdict[blob])
-            if filesize is not None:
-                md5map[localfile] = md5digest
-                filemap[localfile] = localfile + '.blobtmp'
-                allfilesize = allfilesize + filesize
-                nstorageops = nstorageops + ops
-        if len(blobdict) > 0:
-            del created_dirs
-        del blobdict
-
-    # delete any remote blobs if specified
-    if xfertoazure and delblobs is not None:
-        if args.fileshare:
-            print('deleting {} remote files'.format(len(delblobs)))
-            for blob in delblobs:
-                fsfile = split_fileshare_path_into_parts(blob)
-                azure_request(
-                    file_service.delete_file,
-                    share_name=args.container, directory_name=fsfile[0],
-                    file_name=fsfile[1], timeout=args.timeout)
-        else:
-            print('deleting {} remote blobs'.format(len(delblobs)))
-            for blob in delblobs:
-                azure_request(
-                    blob_service[0].delete_blob, timeout=args.timeout,
-                    container_name=args.container, blob_name=blob)
-        print('deletion complete.')
-
-    if nstorageops == 0:
-        print('detected no transfer actions needed to be taken, exiting...')
-        sys.exit(0)
-
-    if xfertoazure:
-        # count number of empty files
-        emptyfiles = 0
-        for fsize in filesizes.items():
-            if fsize[1] == 0:
-                emptyfiles += 1
-        print('detected {} empty files to upload'.format(emptyfiles))
-        if args.fileshare:
-            print('performing {} put ranges and {} set file properties'.format(
-                nstorageops, len(blockids) - emptyfiles))
-            progress_text = 'ranges'
-        elif args.pageblob:
-            print('performing {} put pages/blobs and {} set blob '
-                  'properties'.format(
-                      nstorageops, len(blockids) - emptyfiles))
-            progress_text = 'pages'
-        elif args.autovhd:
-            print('performing {} mixed page/block operations with {} '
-                  'finalizing operations'.format(
-                      nstorageops, len(blockids) - emptyfiles))
-            progress_text = 'chunks'
-        else:
-            print('performing {} put blocks/blobs and {} put block '
-                  'lists'.format(
-                      nstorageops, len(blockids) - emptyfiles))
-            progress_text = 'blocks'
-    else:
-        print('performing {} range-gets'.format(nstorageops))
-        progress_text = 'range-gets'
-
-    # spawn workers
-    storage_out_queue = queue.Queue(nstorageops)
-    maxworkers = min((args.numworkers, nstorageops))
-    print('spawning {} worker threads'.format(maxworkers))
-    exc_list = []
-    threads = []
-    for _ in xrange(maxworkers):
-        thr = StorageChunkWorker(
-            exc_list, storage_in_queue, storage_out_queue, args, xfertoazure,
-            blob_service, file_service)
-        thr.start()
-        threads.append(thr)
-
-    done_ops = 0
-    hmacs = {}
-    storage_start = time.time()
-    progress_bar(
-        args.progressbar, 'xfer', progress_text, nstorageops,
-        done_ops, storage_start)
-    while True:
-        try:
-            localresource, encparam = storage_out_queue.get()
-        except KeyboardInterrupt:
-            print('\n\nKeyboardInterrupt detected, force terminating '
-                  'threads (this may take a while)...')
-            for thr in threads:
-                thr.terminate = True
-            for thr in threads:
-                thr.join()
-            raise
-        if len(exc_list) > 0:
-            for exc in exc_list:
-                print(exc)
-            sys.exit(1)
-        if xfertoazure:
-            completed_blockids[localresource] = completed_blockids[
-                localresource] + 1
-            if completed_blockids[localresource] == len(
-                    blockids[localresource]):
-                if as_page_blob(args, localresource):
-                    if args.computefilemd5:
-                        azure_request(
-                            blob_service[1].set_blob_properties,
-                            timeout=args.timeout,
-                            container_name=args.container,
-                            blob_name=filemap[localresource],
-                            content_settings=azure.storage.blob.
-                            ContentSettings(content_md5=md5map[localresource]))
-                elif args.fileshare:
-                    fsfile = split_fileshare_path_into_parts(
-                        filemap[localresource])
-                    # set file metadata for encrypted files
-                    if filesizes[localresource] > 0 and (
-                            args.rsaprivatekey is not None or
-                            args.rsapublickey is not None):
-                        if args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                            encmetadata = EncryptionMetadataJson(
-                                args, encparam[0], encparam[1],
-                                encparam[2][0],
-                                encparam[2]['hmac'].digest(),
-                                md5map[localresource]
-                            ).construct_metadata_json()
-                        else:
-                            encmetadata = EncryptionMetadataJson(
-                                args, encparam[0], encparam[1], None,
-                                None, md5map[localresource]
-                            ).construct_metadata_json()
-                        azure_request(
-                            file_service.set_file_metadata,
-                            share_name=args.container,
-                            directory_name=fsfile[0], file_name=fsfile[1],
-                            metadata=encmetadata,
-                            timeout=args.timeout)
-                        # resize file to final encrypted size if required
-                        if (filesizes[localresource] +
-                                _AES256_BLOCKSIZE_BYTES !=
-                                encparam[2]['filesize']):
-                            azure_request(
-                                file_service.resize_file,
-                                share_name=args.container,
-                                directory_name=fsfile[0], file_name=fsfile[1],
-                                content_length=encparam[2]['filesize'],
-                                timeout=args.timeout)
-                    if args.computefilemd5:
-                        if (args.rsaprivatekey is not None or
-                                args.rsapublickey is not None):
-                            md5 = base64encode(encparam[2]['md5'].digest())
-                        else:
-                            md5 = md5map[localresource]
-                        azure_request(
-                            file_service.set_file_properties,
-                            share_name=args.container,
-                            directory_name=fsfile[0], file_name=fsfile[1],
-                            content_settings=azure.storage.file.
-                            ContentSettings(content_md5=md5),
-                            timeout=args.timeout)
-                else:
-                    # only perform put block list on non-zero byte files
-                    if filesizes[localresource] > 0:
-                        if (args.rsaprivatekey is not None or
-                                args.rsapublickey is not None):
-                            md5 = base64encode(encparam[2]['md5'].digest())
-                        else:
-                            md5 = md5map[localresource]
-                        block_list = []
-                        for bid in blockids[localresource]:
-                            block_list.append(
-                                azure.storage.blob.BlobBlock(id=bid))
-                        azure_request(
-                            blob_service[0].put_block_list,
-                            timeout=args.timeout,
-                            container_name=args.container,
-                            blob_name=filemap[localresource],
-                            block_list=block_list,
-                            content_settings=azure.storage.blob.
-                            ContentSettings(
-                                content_type=get_mime_type(localresource),
-                                content_md5=md5))
-                        # set blob metadata for encrypted blobs
-                        if (args.rsaprivatekey is not None or
-                                args.rsapublickey is not None):
-                            if args.encmode == _ENCRYPTION_MODE_FULLBLOB:
-                                encmetadata = EncryptionMetadataJson(
-                                    args, encparam[0], encparam[1],
-                                    encparam[2][0],
-                                    encparam[2]['hmac'].digest(),
-                                    md5map[localresource]
-                                ).construct_metadata_json()
-                            else:
-                                encmetadata = EncryptionMetadataJson(
-                                    args, encparam[0], encparam[1], None,
-                                    None, md5map[localresource]
-                                ).construct_metadata_json()
-                            azure_request(
-                                blob_service[0].set_blob_metadata,
-                                timeout=args.timeout,
-                                container_name=args.container,
-                                blob_name=filemap[localresource],
-                                metadata=encmetadata)
-        else:
-            if (args.rsaprivatekey is not None and
-                    encparam[3] == _ENCRYPTION_MODE_FULLBLOB and
-                    not as_page_blob(args, localresource) and
-                    encparam[4]['hmac']['hmac'] is not None):
-                hmacs[localresource] = encparam[4]['hmac']
-        done_ops += 1
-        progress_bar(
-            args.progressbar, 'xfer', progress_text, nstorageops,
-            done_ops, storage_start)
-        if done_ops == nstorageops:
-            break
-    endtime = time.time()
-    if filedesc is not None:
-        filedesc.close()
-    progress_bar(
-        args.progressbar, 'xfer', progress_text, nstorageops,
-        done_ops, storage_start)
-    print('\n\n{} MiB transfered, elapsed {} sec. '
-          'Throughput = {} Mbit/sec\n'.format(
-              allfilesize / 1048576.0, endtime - storage_start,
-              (8.0 * allfilesize / 1048576.0) / (endtime - storage_start)))
-
-    # finalize files/blobs
-    if not xfertoazure:
-        print(
-            'performing finalization (if applicable): {}: {}, MD5: {}'.format(
-                _ENCRYPTION_AUTH_ALGORITHM,
-                args.rsaprivatekey is not None, args.computefilemd5))
-        for localfile in filemap:
-            tmpfilename = filemap[localfile]
-            finalizefile = True
-            skipmd5 = False
-            # check hmac
-            if (args.rsaprivatekey is not None and
-                    args.encmode == _ENCRYPTION_MODE_FULLBLOB):
-                if tmpfilename in hmacs:
-                    hmacdict = hmacs[tmpfilename]
-                    # process any remaining hmac data
-                    while len(hmacdict['buffered']) > 0:
-                        curr = hmacdict['curr']
-                        if curr in hmacdict['buffered']:
-                            hmacdict['hmac'].update(hmacdict['buffered'][curr])
-                            hmacdict['buffered'].pop(curr)
-                            hmacdict['curr'] = curr + 1
-                        else:
-                            break
-                    digest = base64encode(hmacdict['hmac'].digest())
-                    res = 'OK'
-                    if digest != hmacdict['sig']:
-                        res = 'MISMATCH'
-                        finalizefile = False
-                    else:
-                        skipmd5 = True
-                    print('[{}: {}, {}] {} <L..R> {}'.format(
-                        _ENCRYPTION_AUTH_ALGORITHM, res, localfile,
-                        digest, hmacdict['sig']))
-            # compare md5 hash
-            if args.computefilemd5 and not skipmd5:
-                lmd5 = compute_md5_for_file_asbase64(tmpfilename)
-                if md5map[localfile] is None:
-                    print('[MD5: SKIPPED, {}] {} <L..R> {}'.format(
-                        localfile, lmd5, md5map[localfile]))
-                else:
-                    if lmd5 != md5map[localfile]:
-                        res = 'MISMATCH'
-                        if not args.keepmismatchedmd5files:
-                            finalizefile = False
-                    else:
-                        res = 'OK'
-                    print('[MD5: {}, {}] {} <L..R> {}'.format(
-                        res, localfile, lmd5, md5map[localfile]))
-            if finalizefile:
-                # check for existing file first
-                if os.path.exists(localfile):
-                    if args.overwrite:
-                        os.remove(localfile)
-                    else:
-                        raise IOError(
-                            'cannot overwrite existing file: {}'.format(
-                                localfile))
-                # move tmp file to real file
-                os.rename(tmpfilename, localfile)
-            else:
-                os.remove(tmpfilename)
-        print('finalization complete.')
-
-    # output final log lines
-    print('\nscript elapsed time: {} sec'.format(time.time() - start))
-    print('script end time: {}'.format(time.strftime("%Y-%m-%d %H:%M:%S")))
-
-
-def progress_bar(display, sprefix, rtext, value, qsize, start):
-    """Display a progress bar
-    Parameters:
-        display - display bar
-        sprefix - progress prefix
-        rtext - rate text
-        value - value input value
-        qsize - queue size
-        start - start time
-    Returns:
-        Nothing
-    Raises:
-        Nothing
-    """
-    if not display:
-        return
-    done = float(qsize) / value
-    diff = time.time() - start
-    if diff <= 0:
-        # arbitrarily give a small delta
-        diff = 1e-6
-    rate = float(qsize) / (diff / 60)
-    sys.stdout.write(
-        '\r{0} progress: [{1:30s}] {2:.2f}% {3:10.2f} {4}/min    '.format(
-            sprefix, '>' * int(done * 30), done * 100, rate, rtext))
-    sys.stdout.flush()
-
-
-def parseargs():  # pragma: no cover
-    """Sets up command-line arguments and parser
-    Parameters:
-        Nothing
-    Returns:
-        Parsed command line arguments
-    Raises:
-        Nothing
-    """
-    parser = argparse.ArgumentParser(
-        description='Transfer files/blobs to/from Azure blob or file storage')
-    parser.set_defaults(
-        autovhd=False, endpoint=_DEFAULT_STORAGE_ENDPOINT,
-        chunksizebytes=_MAX_BLOB_CHUNK_SIZE_BYTES, collate=None,
-        computeblockmd5=False, computefilemd5=True, createcontainer=True,
-        delete=False, disableurllibwarnings=False,
-        encmode=_DEFAULT_ENCRYPTION_MODE, fileshare=False, include=None,
-        managementep=_DEFAULT_MANAGEMENT_ENDPOINT,
-        numworkers=_DEFAULT_MAX_STORAGEACCOUNT_WORKERS, overwrite=True,
-        pageblob=False, progressbar=True, recursive=True, rsaprivatekey=None,
-        rsapublickey=None, rsakeypassphrase=None, skiponmatch=True,
-        stripcomponents=None, timeout=None)
-    parser.add_argument('storageaccount', help='name of storage account')
-    parser.add_argument(
-        'container',
-        help='name of blob container or file share')
-    parser.add_argument(
-        'localresource',
-        help='name of the local file or directory, if mirroring. "."=use '
-        'current directory')
-    parser.add_argument(
-        '--autovhd', action='store_true',
-        help='automatically upload files ending in .vhd as page blobs')
-    parser.add_argument(
-        '--collate', nargs='?',
-        help='collate all files into a specified path')
-    parser.add_argument(
-        '--computeblockmd5', dest='computeblockmd5', action='store_true',
-        help='compute block/page level MD5 during upload')
-    parser.add_argument(
-        '--chunksizebytes', type=int,
-        help='maximum chunk size to transfer in bytes [{}]'.format(
-            _MAX_BLOB_CHUNK_SIZE_BYTES))
-    parser.add_argument(
-        '--delete', action='store_true',
-        help='delete extraneous remote blobs that have no corresponding '
-        'local file when uploading directories')
-    parser.add_argument(
-        '--disable-urllib-warnings', action='store_true',
-        dest='disableurllibwarnings',
-        help='disable urllib warnings (not recommended)')
-    parser.add_argument(
-        '--download', action='store_true',
-        help='force transfer direction to download from Azure')
-    parser.add_argument(
-        '--encmode',
-        help='encryption mode [{}]'.format(_DEFAULT_ENCRYPTION_MODE))
-    parser.add_argument(
-        '--endpoint',
-        help='storage endpoint [{}]'.format(_DEFAULT_STORAGE_ENDPOINT))
-    parser.add_argument(
-        '--fileshare', action='store_true',
-        help='transfer to a file share rather than block/page blob')
-    parser.add_argument(
-        '--include', type=str,
-        help='include pattern (Unix shell-style wildcards)')
-    parser.add_argument(
-        '--keepmismatchedmd5files', action='store_true',
-        help='keep files with MD5 mismatches')
-    parser.add_argument(
-        '--managementcert',
-        help='path to management certificate .pem file')
-    parser.add_argument(
-        '--managementep',
-        help='management endpoint [{}]'.format(_DEFAULT_MANAGEMENT_ENDPOINT))
-    parser.add_argument(
-        '--no-computefilemd5', dest='computefilemd5', action='store_false',
-        help='do not compute file MD5 and either upload as metadata '
-        'or validate on download')
-    parser.add_argument(
-        '--no-createcontainer', dest='createcontainer', action='store_false',
-        help='do not create container if it does not exist')
-    parser.add_argument(
-        '--no-overwrite', dest='overwrite', action='store_false',
-        help='do not overwrite local files on download')
-    parser.add_argument(
-        '--no-progressbar', dest='progressbar', action='store_false',
-        help='disable progress bar')
-    parser.add_argument(
-        '--no-recursive', dest='recursive', action='store_false',
-        help='do not mirror local directory recursively')
-    parser.add_argument(
-        '--no-skiponmatch', dest='skiponmatch', action='store_false',
-        help='do not skip upload/download on MD5 match')
-    parser.add_argument(
-        '--numworkers', type=int,
-        help='max number of workers [{}]'.format(
-            _DEFAULT_MAX_STORAGEACCOUNT_WORKERS))
-    parser.add_argument(
-        '--pageblob', action='store_true',
-        help='upload as page blob rather than block blob, blobs will '
-        'be page-aligned in Azure storage')
-    parser.add_argument(
-        '--rsaprivatekey',
-        help='RSA private key file in PEM format. Specifying an RSA private '
-        'key will turn on decryption (or encryption). An RSA private key is '
-        'required for downloading and decrypting blobs and may be specified '
-        'for encrypting and uploading blobs.')
-    parser.add_argument(
-        '--rsapublickey',
-        help='RSA public key file in PEM format. Specifying an RSA public '
-        'key will turn on encryption. An RSA public key can only be used '
-        'for encrypting and uploading blobs.')
-    parser.add_argument(
-        '--rsakeypassphrase',
-        help='Optional passphrase for decrypting an RSA private key; can be '
-        'specified as {} environment variable instead'.format(
-            _ENVVAR_RSAKEYPASSPHRASE))
-    parser.add_argument(
-        '--remoteresource',
-        help='name of remote resource on Azure storage. "."=container '
-        'copy recursive implied')
-    parser.add_argument(
-        '--saskey',
-        help='SAS key to use, if recursive upload or container download, '
-        'this must be a container SAS; can be specified as '
-        '{} environment variable instead'.format(_ENVVAR_SASKEY))
-    parser.add_argument(
-        '--storageaccountkey',
-        help='storage account shared key; can be specified as '
-        '{} environment variable instead'.format(_ENVVAR_STORAGEACCOUNTKEY))
-    parser.add_argument(
-        '--strip-components', dest='stripcomponents', type=int,
-        help='strip N leading components from path on upload [1]')
-    parser.add_argument('--subscriptionid', help='subscription id')
-    parser.add_argument(
-        '--timeout', type=float,
-        help='timeout in seconds for any operation to complete')
-    parser.add_argument(
-        '--upload', action='store_true',
-        help='force transfer direction to upload to Azure')
-    parser.add_argument('--version', action='version', version=_SCRIPT_VERSION)
-    return parser.parse_args()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py
new file mode 100644
index 0000000..157d59f
--- /dev/null
+++ b/blobxfer/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from .version import __version__  # noqa
diff --git a/blobxfer/util.py b/blobxfer/util.py
new file mode 100644
index 0000000..bf3a9a8
--- /dev/null
+++ b/blobxfer/util.py
@@ -0,0 +1,213 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, str, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import base64
+import copy
+import hashlib
+import logging
+import logging.handlers
+import mimetypes
+try:
+    from os import scandir as scandir
+except ImportError:  # noqa
+    from scandir import scandir as scandir
+import sys
+# non-stdlib imports
+# local imports
+
+# global defines
+_PY2 = sys.version_info.major == 2
+_PAGEBLOB_BOUNDARY = 512
+
+
+def on_python2():
+    # type: (None) -> bool
+    """Execution on python2
+    :rtype: bool
+    :return: if on Python2
+    """
+    return _PY2
+
+
+def setup_logger(logger):  # noqa
+    # type: (logger) -> None
+    """Set up logger"""
+    logger.setLevel(logging.DEBUG)
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter(
+        '%(asctime)sZ %(levelname)s %(name)s:%(funcName)s:%(lineno)d '
+        '%(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+
+def is_none_or_empty(obj):
+    # type: (any) -> bool
+    """Determine if object is None or empty
+    :type any obj: object
+    :rtype: bool
+    :return: if object is None or empty
+    """
+    if obj is None or len(obj) == 0:
+        return True
+    return False
+
+
+def is_not_empty(obj):
+    # type: (any) -> bool
+    """Determine if object is not None and is length is > 0
+    :type any obj: object
+    :rtype: bool
+    :return: if object is not None and length is > 0
+    """
+    if obj is not None and len(obj) > 0:
+        return True
+    return False
+
+
+def merge_dict(dict1, dict2):
+    # type: (dict, dict) -> dict
+    """Recursively merge dictionaries: dict2 on to dict1. This differs
+    from dict.update() in that values that are dicts are recursively merged.
+    Note that only dict value types are merged, not lists, etc.
+
+    :param dict dict1: dictionary to merge to
+    :param dict dict2: dictionary to merge with
+    :rtype: dict
+    :return: merged dictionary
+    """
+    if not isinstance(dict1, dict) or not isinstance(dict2, dict):
+        raise ValueError('dict1 or dict2 is not a dictionary')
+    result = copy.deepcopy(dict1)
+    for k, v in dict2.items():
+        if k in result and isinstance(result[k], dict):
+            result[k] = merge_dict(result[k], v)
+        else:
+            result[k] = copy.deepcopy(v)
+    return result
+
+
+def scantree(path):
+    # type: (str) -> os.DirEntry
+    """Recursively scan a directory tree
+    :param str path: path to scan
+    :rtype: DirEntry
+    :return: DirEntry via generator
+    """
+    for entry in scandir(path):
+        if entry.is_dir(follow_symlinks=True):
+            # due to python2 compat, cannot use yield from here
+            for t in scantree(entry.path):
+                yield t
+        else:
+            yield entry
+
+
+def get_mime_type(filename):
+    # type: (str) -> str
+    """Guess the type of a file based on its filename
+    :param str filename: filename to guess the content-type
+    :rtype: str
+    :rturn: string of form 'class/type' for MIME content-type header
+    """
+    return (mimetypes.guess_type(filename)[0] or 'application/octet-stream')
+
+
+def base64_encode_as_string(obj):  # noqa
+    # type: (any) -> str
+    """Encode object to base64
+    :param any obj: object to encode
+    :rtype: str
+    :return: base64 encoded string
+    """
+    if _PY2:
+        return base64.b64encode(obj)
+    else:
+        return str(base64.b64encode(obj), 'ascii')
+
+
+def base64_decode_string(string):
+    # type: (str) -> str
+    """Base64 decode a string
+    :param str string: string to decode
+    :rtype: str
+    :return: decoded string
+    """
+    return base64.b64decode(string)
+
+
+def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
+    # type: (str, bool, int) -> str
+    """Compute MD5 hash for file and encode as Base64
+    :param str filename: file to compute MD5 for
+    :param bool pagealign: page align data
+    :param int blocksize: block size
+    :rtype: str
+    :return: MD5 for file encoded as Base64
+    """
+    hasher = hashlib.md5()
+    with open(filename, 'rb') as filedesc:
+        while True:
+            buf = filedesc.read(blocksize)
+            if not buf:
+                break
+            buflen = len(buf)
+            if pagealign and buflen < blocksize:
+                aligned = page_align_content_length(buflen)
+                if aligned != buflen:
+                    buf = buf.ljust(aligned, b'\0')
+            hasher.update(buf)
+        return base64_encode_as_string(hasher.digest())
+
+
+def compute_md5_for_data_asbase64(data):
+    # type: (obj) -> str
+    """Compute MD5 hash for bits and encode as Base64
+    :param any data: data to compute MD5 for
+    :rtype: str
+    :return: MD5 for data
+    """
+    hasher = hashlib.md5()
+    hasher.update(data)
+    return base64_encode_as_string(hasher.digest())
+
+
+def page_align_content_length(length):
+    # type: (int) -> int
+    """Compute page boundary alignment
+    :param int length: content length
+    :rtype: int
+    :return: aligned byte boundary
+    """
+    mod = length % _PAGEBLOB_BOUNDARY
+    if mod != 0:
+        return length + (_PAGEBLOB_BOUNDARY - mod)
+    return length
diff --git a/blobxfer/version.py b/blobxfer/version.py
new file mode 100644
index 0000000..9e8b65b
--- /dev/null
+++ b/blobxfer/version.py
@@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+__version__ = '1.0.0a1'
diff --git a/setup.py b/setup.py
index 7709e0e..83d5abb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,41 +1,76 @@
+from codecs import open
+import os
 import re
 try:
     from setuptools import setup
 except ImportError:
     from distutils.core import setup
+import sys
 
-with open('blobxfer.py', 'r') as fd:
+if sys.argv[-1] == 'publish':
+    os.system('rm -rf blobxfer.egg-info/ build dist __pycache__/')
+    os.system('python setup.py sdist bdist_wheel')
+    os.unlink('README.rst')
+    sys.exit()
+elif sys.argv[-1] == 'upload':
+    os.system('twine upload dist/*')
+    sys.exit()
+elif sys.argv[-1] == 'sdist' or sys.argv[-1] == 'bdist_wheel':
+    import pypandoc
+    long_description = pypandoc.convert('README.md', 'rst')
+else:
+    long_description = ''
+
+with open('blobxfer/version.py', 'r', 'utf-8') as fd:
     version = re.search(
-        r'^_SCRIPT_VERSION\s*=\s*[\'"]([^\'"]*)[\'"]',
+        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
         fd.read(), re.MULTILINE).group(1)
 
-with open('README.rst') as readme:
-    long_description = ''.join(readme).strip()
+if not version:
+    raise RuntimeError('Cannot find version')
+
+packages = [
+    'blobxfer',
+    'blobxfer.blob',
+    'blobxfer.blob.block',
+    'blobxfer_cli',
+]
+
+install_requires = [
+    'azure-common==1.1.4',
+    'azure-storage==0.33.0',
+    'click==6.6',
+    'cryptography>=1.7.1',
+    'future==0.16.0',
+    'ruamel.yaml==0.13.11',
+]
+
+if sys.version_info < (3, 5):
+    install_requires.append('pathlib2')
+    install_requires.append('scandir')
 
 setup(
     name='blobxfer',
     version=version,
     author='Microsoft Corporation, Azure Batch and HPC Team',
     author_email='',
-    description='Azure storage transfer tool with AzCopy-like features',
+    description=(
+        'Azure storage transfer tool and library with AzCopy-like features'),
     long_description=long_description,
     platforms='any',
     url='https://github.com/Azure/blobxfer',
     license='MIT',
-    py_modules=['blobxfer'],
+    packages=packages,
+    package_data={'blobxfer': ['LICENSE']},
+    package_dir={'blobxfer': 'blobxfer', 'blobxfer_cli': 'cli'},
     entry_points={
-        'console_scripts': 'blobxfer=blobxfer:main',
+        'console_scripts': 'blobxfer=blobxfer_cli.cli:cli',
     },
-    install_requires=[
-        'azure-common==1.1.4',
-        'azure-storage==0.33.0',
-        'azure-servicemanagement-legacy==0.20.5',
-        'cryptography>=1.6',
-        'requests==2.12.3'
-    ],
+    zip_safe=False,
+    install_requires=install_requires,
     tests_require=['pytest'],
     classifiers=[
-        'Development Status :: 4 - Beta',
+        'Development Status :: 3 - Alpha',
         'Environment :: Console',
         'Intended Audience :: Developers',
         'Intended Audience :: System Administrators',
@@ -47,7 +82,8 @@
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
         'Topic :: Utilities',
     ],
-    keywords='azcopy azure storage blob files transfer copy smb',
+    keywords='azcopy azure storage blob files transfer copy smb cifs',
 )
diff --git a/test/test_blobxfer.py b/test/test_blobxfer.py
deleted file mode 100644
index 28208af..0000000
--- a/test/test_blobxfer.py
+++ /dev/null
@@ -1,1436 +0,0 @@
-# coding=utf-8
-"""Tests for blobxfer"""
-
-# stdlib imports
-import base64
-import copy
-import errno
-import json
-import math
-import os
-try:
-    import queue
-except ImportError:
-    import Queue as queue
-import socket
-import sys
-import threading
-import uuid
-# non-stdlib imports
-import azure.common
-import azure.storage.blob
-import cryptography.exceptions
-import cryptography.hazmat.backends
-import cryptography.hazmat.primitives.asymmetric.rsa
-import cryptography.hazmat.primitives.serialization
-from mock import (MagicMock, Mock, patch)
-import pytest
-import requests
-import requests_mock
-# module under test
-sys.path.append('..')
-import blobxfer  # noqa
-
-
-# global defines
-_RSAKEY = cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key(
-    public_exponent=65537, key_size=2048,
-    backend=cryptography.hazmat.backends.default_backend())
-
-
-def test_encrypt_decrypt_chunk():
-    enckey, signkey = blobxfer.generate_aes256_keys()
-    assert len(enckey) == blobxfer._AES256_KEYLENGTH_BYTES
-    assert len(signkey) == blobxfer._AES256_KEYLENGTH_BYTES
-
-    # test random binary data, unaligned
-    iv = os.urandom(16)
-    plaindata = os.urandom(31)
-    encdata = blobxfer.encrypt_chunk(
-        enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB,
-        pad=True)
-    assert encdata != plaindata
-    decdata = blobxfer.decrypt_chunk(
-        enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB,
-        unpad=True)
-    assert decdata == plaindata
-    with pytest.raises(RuntimeError):
-        badsig = base64.b64encode(b'0')
-        blobxfer.decrypt_chunk(
-            enckey, badsig, encdata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB,
-            unpad=True)
-
-    encdata = blobxfer.encrypt_chunk(
-        enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv=iv, pad=True)
-    decdata = blobxfer.decrypt_chunk(
-        enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv=iv, unpad=True)
-    assert decdata == plaindata
-
-    # test random binary data aligned on boundary
-    plaindata = os.urandom(32)
-    encdata = blobxfer.encrypt_chunk(
-        enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv=iv, pad=True)
-    assert encdata != plaindata
-    decdata = blobxfer.decrypt_chunk(
-        enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv=iv, unpad=True)
-    assert decdata == plaindata
-
-    # test text data
-    plaindata = b'attack at dawn!'
-    encdata = blobxfer.encrypt_chunk(
-        enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv, pad=True)
-    assert encdata != plaindata
-    decdata = blobxfer.decrypt_chunk(
-        enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB,
-        iv, unpad=True)
-    assert decdata == plaindata
-
-
-def test_rsa_keys():
-    symkey = os.urandom(32)
-    enckey, sig = blobxfer.rsa_encrypt_key(
-        _RSAKEY, None, symkey, asbase64=False)
-    assert enckey is not None
-    assert sig is not None
-    plainkey = blobxfer.rsa_decrypt_key(_RSAKEY, enckey, sig, isbase64=False)
-    assert symkey == plainkey
-
-    with pytest.raises(cryptography.exceptions.InvalidSignature):
-        badsig = base64.b64encode(b'0')
-        blobxfer.rsa_decrypt_key(_RSAKEY, enckey, badsig, isbase64=False)
-
-    enckey, sig = blobxfer.rsa_encrypt_key(
-        _RSAKEY, None, symkey, asbase64=True)
-    assert enckey is not None
-    assert sig is not None
-    plainkey = blobxfer.rsa_decrypt_key(_RSAKEY, enckey, sig, isbase64=True)
-    assert symkey == plainkey
-
-    with pytest.raises(cryptography.exceptions.InvalidSignature):
-        badsig = base64.b64encode(b'0')
-        blobxfer.rsa_decrypt_key(_RSAKEY, enckey, badsig, isbase64=True)
-
-
-def test_compute_md5(tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    testdata = str(uuid.uuid4())
-    with open(lpath, 'wt') as f:
-        f.write(testdata)
-    md5_file = blobxfer.compute_md5_for_file_asbase64(lpath)
-    md5_data = blobxfer.compute_md5_for_data_asbase64(testdata.encode('utf8'))
-    assert md5_file == md5_data
-
-    # test non-existent file
-    with pytest.raises(IOError):
-        blobxfer.compute_md5_for_file_asbase64(testdata)
-
-
-def test_page_align_content_length():
-    assert 0 == blobxfer.page_align_content_length(0)
-    assert 512 == blobxfer.page_align_content_length(511)
-    assert 512 == blobxfer.page_align_content_length(512)
-    assert 1024 == blobxfer.page_align_content_length(513)
-
-
-def _func_successful_requests_call(timeout=None):
-    response = MagicMock()
-    response.raise_for_status = lambda: None
-    return response
-
-
-def _func_raise_requests_exception_once(val, timeout=None):
-    if len(val) > 0:
-        response = MagicMock()
-        response.raise_for_status = lambda: None
-        return response
-    val.append(0)
-    ex = requests.Timeout()
-    raise ex
-
-
-def _func_raise_requests_connection_error_once(val, timeout=None):
-    if len(val) > 0:
-        response = MagicMock()
-        response.raise_for_status = lambda: None
-        return response
-    val.append(0)
-    ex = requests.ConnectionError(
-        requests.packages.urllib3.exceptions.ProtocolError(
-            'Connection aborted.',
-            socket.error(errno.ECONNRESET, 'Connection reset by peer')))
-    raise ex
-
-
-def _func_raise_requests_chunked_encoding_error_once(val, timeout=None):
-    if len(val) > 0:
-        response = MagicMock()
-        response.raise_for_status = lambda: None
-        return response
-    val.append(0)
-    ex = requests.exceptions.ChunkedEncodingError(
-        requests.packages.urllib3.exceptions.ProtocolError(
-            'Connection aborted.',
-            socket.error(errno.ECONNRESET, 'Connection reset by peer')))
-    raise ex
-
-
-def _func_raise_azurehttperror_once(val, timeout=None):
-    if len(val) > 0:
-        response = MagicMock()
-        return response
-    val.append(0)
-    ex = azure.common.AzureHttpError('ServerBusy', 503)
-    raise ex
-
-
-@patch('time.sleep', return_value=None)
-def test_azure_request(patched_time_sleep):
-    azcomerr = azure.common.AzureHttpError('ServerBusy', 503)
-
-    with pytest.raises(IOError):
-        mock = Mock(side_effect=azcomerr)
-        mock.__name__ = 'name'
-        blobxfer.azure_request(mock, timeout=0.001)
-
-    with pytest.raises(Exception):
-        ex = Exception()
-        ex.message = 'Uncaught'
-        blobxfer.azure_request(Mock(side_effect=ex))
-
-    with pytest.raises(Exception):
-        ex = Exception()
-        ex.__delattr__('message')
-        blobxfer.azure_request(Mock(side_effect=ex))
-
-    blobxfer.azure_request(
-        _func_raise_requests_connection_error_once, val=[], timeout=1)
-
-    blobxfer.azure_request(
-        _func_raise_requests_chunked_encoding_error_once, val=[], timeout=1)
-
-    blobxfer.azure_request(
-        _func_raise_azurehttperror_once, val=[], timeout=1)
-
-    with pytest.raises(requests.HTTPError):
-        exc = requests.HTTPError()
-        exc.response = MagicMock()
-        exc.response.status_code = 404
-        mock = Mock(side_effect=exc)
-        blobxfer.azure_request(mock)
-
-    try:
-        blobxfer.azure_request(
-            _func_raise_requests_exception_once, val=[], timeout=1)
-    except Exception:
-        pytest.fail('unexpected Exception raised')
-
-    try:
-        blobxfer.azure_request(_func_successful_requests_call)
-    except Exception:
-        pytest.fail('unexpected Exception raised')
-
-
-def test_sasblobservice_listblobs():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-    content = b'<?xml version="1.0" encoding="utf-8"?><EnumerationResults ' + \
-        b'ServiceEndpoint="http://myaccount.blob.core.windows.net/" ' + \
-        b'ContainerName="mycontainer"><Prefix>string-value</Prefix>' + \
-        b'<Marker>string-value</Marker><MaxResults>int-value</MaxResults>' + \
-        b'<Delimiter>string-value</Delimiter><Blobs><Blob><Name>blob-name' + \
-        b'</Name><Snapshot>date-time-value</Snapshot><Properties>' + \
-        b'<Last-Modified>date-time-value</Last-Modified><Etag>etag</Etag>' + \
-        b'<Content-Length>2147483648</Content-Length><Content-Type>' + \
-        b'blob-content-type</Content-Type><Content-Encoding />' + \
-        b'<Content-Language /><Content-MD5>abc</Content-MD5>' + \
-        b'<Cache-Control /><x-ms-blob-sequence-number>sequence-number' + \
-        b'</x-ms-blob-sequence-number><BlobType>BlockBlob</BlobType>' + \
-        b'<LeaseStatus>locked|unlocked</LeaseStatus><LeaseState>' + \
-        b'available | leased | expired | breaking | broken</LeaseState>' + \
-        b'<LeaseDuration>infinite | fixed</LeaseDuration><CopyId>id' + \
-        b'</CopyId><CopyStatus>pending | success | aborted | failed' + \
-        b'</CopyStatus><CopySource>source url</CopySource><CopyProgress>' + \
-        b'bytes copied/bytes total</CopyProgress><CopyCompletionTime>' + \
-        b'datetime</CopyCompletionTime><CopyStatusDescription>' + \
-        b'error string</CopyStatusDescription></Properties><Metadata>' + \
-        b'<Name>value</Name></Metadata></Blob><BlobPrefix><Name>' + \
-        b'blob-prefix</Name></BlobPrefix></Blobs><NextMarker>nm' + \
-        b'</NextMarker></EnumerationResults>'
-
-    with requests_mock.mock() as m:
-        m.get('mock://blobepcontainer?saskey', content=content)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        metamock = MagicMock()
-        metamock.metadata = True
-        result = sbs.list_blobs('container', 'marker', include=metamock)
-        assert len(result) == 1
-        assert result[0].name == 'blob-name'
-        assert result[0].properties.content_length == 2147483648
-        assert result[0].properties.content_settings.content_md5 == 'abc'
-        assert result[0].properties.blobtype == 'BlockBlob'
-        assert result[0].metadata['Name'] == 'value'
-        assert result.next_marker == 'nm'
-
-        m.get('mock://blobepcontainer?saskey', content=b'', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.list_blobs('container', 'marker')
-
-
-def test_sasblobservice_setblobmetadata():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey')
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        sbs.set_blob_metadata('container', 'blob', None)
-        sbs.set_blob_metadata('container', 'blob', {'name': 'value'})
-
-        m.put('mock://blobepcontainer/blob?saskey', status_code=201)
-        with pytest.raises(IOError):
-            sbs.set_blob_metadata('container', 'blob', {'name': 'value'})
-
-
-def test_sasblobservice_getblob():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.get('mock://blobepcontainer/blob?saskey', content=b'data')
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        results = sbs._get_blob('container', 'blob', 0, 1)
-        assert results.content == b'data'
-
-        m.get('mock://blobepcontainer/blob?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs._get_blob('container', 'blob', 0, 1)
-
-
-def test_sasblobservice_getblobproperties():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.head('mock://blobepcontainer/blob?saskey',
-               headers={'x-ms-meta-hello': 'world', 'content-length': '1'})
-        sbs = blobxfer.SasBlobService('mock://blobep', '?saskey', None)
-        results = sbs.get_blob_properties('container', 'blob')
-        assert results.metadata['hello'] == 'world'
-
-        m.head('mock://blobepcontainer/blob?saskey', text='', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.get_blob_properties('container', 'blob')
-
-
-def test_sasblobservice_putblock():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', '?saskey', None)
-        try:
-            sbs.put_block(
-                'container', 'blob', 'block', 'blockid',
-                validate_content=False)
-        except Exception:
-            pytest.fail('unexpected Exception raised')
-
-        m.put('mock://blobepcontainer/blob?saskey', text='', status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.put_block(
-                'container', 'blob', 'block', 'blockid',
-                validate_content=False)
-
-
-def test_sasblobservice_putblocklist():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        block_list = [
-            azure.storage.blob.BlobBlock(id='1'),
-            azure.storage.blob.BlobBlock(id='2')
-        ]
-        cs = azure.storage.blob.ContentSettings(content_md5='md5')
-        sbs.put_block_list('container', 'blob', block_list, cs)
-
-        m.put('mock://blobepcontainer/blob?saskey', text='', status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.put_block_list('container', 'blob', block_list, cs)
-
-
-def test_sasblobservice_setblobproperties():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        cs = azure.storage.blob.ContentSettings(content_md5='md5')
-        sbs.set_blob_properties('container', 'blob', cs)
-
-        m.put('mock://blobepcontainer/blob?saskey', text='', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.set_blob_properties('container', 'blob', cs)
-
-
-def test_sasblobservice_putblob():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        cs = azure.storage.blob.ContentSettings(
-            content_type='a', content_md5='md5')
-        sbs._put_blob('container', 'blob', None, cs)
-
-        m.put('mock://blobepcontainer/blob?saskey', content=b'',
-              status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs._put_blob('container', 'blob', None, cs)
-
-
-def test_sasblobservice_createblob():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', content=b'',
-              status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        cs = azure.storage.blob.ContentSettings(content_md5='md5')
-        sbs.create_blob('container', 'blob', 0, cs)
-
-        m.put('mock://blobepcontainer/blob?saskey', content=b'',
-              status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(IOError):
-            sbs.create_blob('container', 'blob', 0, cs)
-
-
-def test_sasblobservice_createcontainer():
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        sbs.create_container('container', fail_on_exist=False)
-
-        m.put('mock://blobepcontainer?saskey', status_code=409)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(requests.exceptions.HTTPError):
-            sbs.create_container('container', fail_on_exist=True)
-
-
-def test_storagechunkworker_run(tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-    args = MagicMock()
-    args.rsakey = None
-    args.pageblob = True
-    args.autovhd = False
-    args.timeout = None
-    args.fileshare = False
-
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    exc_list = []
-    flock = threading.Lock()
-    sa_in_queue = queue.PriorityQueue()
-    sa_out_queue = queue.Queue()
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=200)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        bcw = blobxfer.StorageChunkWorker(
-            exc_list, sa_in_queue, sa_out_queue, args, True, (sbs, sbs), None)
-        with pytest.raises(IOError):
-            bcw.put_storage_data(
-                lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None)
-
-    args.pageblob = False
-    with requests_mock.mock() as m:
-        m.put('mock://blobepcontainer/blob?saskey', status_code=201)
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        bcw = blobxfer.StorageChunkWorker(
-            exc_list, sa_in_queue, sa_out_queue, args, True, (sbs, sbs), None)
-        bcw.put_storage_data(
-            lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None)
-
-        m.get('mock://blobepcontainer/blob?saskey', status_code=200)
-        bcw.get_storage_range(
-            lpath, 'container', 'blob', 0, 0, 4,
-            [None, None, None, None, None, False], flock, None)
-
-        # test zero-length putblob
-        bcw.put_storage_data(
-            lpath, 'container', 'blob', 'blockid', 0, 0, None, flock, None)
-        bcw._pageblob = True
-        bcw.put_storage_data(
-            lpath, 'container', 'blob', 'blockid', 0, 0, None, flock, None)
-
-        # test empty page
-        with open(lpath, 'wb') as f:
-            f.write(b'\0' * 4 * 1024 * 1024)
-        bcw.put_storage_data(
-            lpath, 'container', 'blob', 'blockid', 0, 4 * 1024 * 1024,
-            None, flock, None)
-        with open(lpath, 'wb') as f:
-            f.write(b'\0' * 4 * 1024)
-        bcw.put_storage_data(
-            lpath, 'container', 'blob', 'blockid', 0, 4 * 1024,
-            None, flock, None)
-
-    sa_in_queue.put((0, (lpath, 'container', 'blob', 'blockid', 0, 4,
-                         [None, None, None, None], flock, None)))
-    with requests_mock.mock() as m:
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        bcw = blobxfer.StorageChunkWorker(
-            exc_list, sa_in_queue, sa_out_queue, args, False, (sbs, sbs), None)
-        m.get('mock://blobepcontainer/blob?saskey', status_code=201)
-        bcw.run()
-        assert len(exc_list) > 0
-
-
-@patch('azure.storage.file.FileService.update_range')
-@patch('azure.storage.file.FileService._get_file')
-def test_storagechunkworker_files_run(
-        patched_get_file, patched_update_range, tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-    args = MagicMock()
-    args.rsakey = None
-    args.pageblob = False
-    args.autovhd = False
-    args.timeout = None
-    args.fileshare = True
-
-    exc_list = []
-    flock = threading.Lock()
-    sa_in_queue = queue.PriorityQueue()
-    sa_out_queue = queue.Queue()
-    fs = azure.storage.file.FileService(account_name='sa', account_key='key')
-    bcw = blobxfer.StorageChunkWorker(
-        exc_list, sa_in_queue, sa_out_queue, args, True, None, fs)
-    patched_update_range.return_value = MagicMock()
-    bcw.put_storage_data(
-        lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None)
-
-    bcw = blobxfer.StorageChunkWorker(
-        exc_list, sa_in_queue, sa_out_queue, args, False, None, fs)
-    patched_get_file.return_value = MagicMock()
-    patched_get_file.return_value.content = b''
-    bcw.get_storage_range(
-        lpath, 'container', 'blob', 0, 0, 4,
-        [None, None, None, None, None, False], flock, None)
-
-
-@patch('blobxfer.azure_request', return_value=None)
-def test_generate_xferspec_download_invalid(patched_azure_request):
-    args = MagicMock()
-    args.storageaccount = 'blobep'
-    args.container = 'container'
-    args.storageaccountkey = 'saskey'
-    args.chunksizebytes = 5
-    args.timeout = None
-    args.fileshare = False
-    sa_in_queue = queue.PriorityQueue()
-
-    with requests_mock.mock() as m:
-        m.head('mock://blobepcontainer/blob?saskey', headers={
-            'content-length': '-1', 'content-md5': 'md5'})
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(ValueError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, 'tmppath', 'blob', True,
-                [None, None, None])
-
-
-def test_generate_xferspec_download(tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    args = MagicMock()
-    args.rsakey = None
-    args.storageaccount = 'blobep'
-    args.container = 'container'
-    args.storageaccountkey = 'saskey'
-    args.chunksizebytes = 5
-    args.timeout = None
-    args.fileshare = False
-    sa_in_queue = queue.PriorityQueue()
-
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with requests_mock.mock() as m:
-        m.head('mock://blobepcontainer/blob?saskey', headers={
-            'content-length': '-1', 'content-md5': 'md5'})
-        sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None)
-        with pytest.raises(ValueError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', True,
-                [None, None, None])
-        assert sa_in_queue.qsize() == 0
-        m.head('mock://blobepcontainer/blob?saskey', headers={
-            'content-length': '6', 'content-md5': 'md5'})
-        cl, nsops, md5, fd = blobxfer.generate_xferspec_download(
-            sbs, None, args, sa_in_queue, lpath, 'blob', True,
-            [None, None, None])
-        assert sa_in_queue.qsize() == 2
-        assert 2 == nsops
-        assert 6 == cl
-        assert 2 == nsops
-        assert 'md5' == md5
-        assert fd is not None
-        fd.close()
-        cl, nsops, md5, fd = blobxfer.generate_xferspec_download(
-            sbs, None, args, sa_in_queue, lpath, 'blob', False,
-            [None, None, None])
-        assert 2 == nsops
-        assert fd is None
-        assert sa_in_queue.qsize() == 4
-        with open(lpath, 'wt') as f:
-            f.write('012345')
-        m.head('mock://blobepcontainer/blob?saskey', headers={
-            'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='})
-        cl, nsops, md5, fd = blobxfer.generate_xferspec_download(
-            sbs, None, args, sa_in_queue, lpath, 'blob', True,
-            [None, None, None])
-        assert nsops is None
-        assert cl is None
-        assert sa_in_queue.qsize() == 4
-
-        sa_in_queue = queue.PriorityQueue()
-        args.rsaprivatekey = _RSAKEY
-        args.rsapublickey = None
-        symkey, signkey = blobxfer.generate_aes256_keys()
-        args.encmode = blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB
-        metajson = blobxfer.EncryptionMetadataJson(
-            args, symkey, signkey, iv=b'0', encdata_signature=b'0',
-            preencrypted_md5=None)
-        encmeta = metajson.construct_metadata_json()
-        goodencjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME])
-        goodauthjson = json.loads(
-            encmeta[blobxfer._ENCRYPTION_METADATA_AUTH_NAME])
-        metajson2 = blobxfer.EncryptionMetadataJson(
-            args, None, None, None, None, None)
-        metajson2.parse_metadata_json(
-            'blob', args.rsaprivatekey, args.rsapublickey, encmeta)
-        assert metajson2.symkey == symkey
-        assert metajson2.signkey == signkey
-        assert metajson2.encmode == args.encmode
-        assert metajson2.chunksizebytes == args.chunksizebytes + \
-            blobxfer._AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1
-        encjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME])
-        encjson[blobxfer._ENCRYPTION_METADATA_LAYOUT][
-            blobxfer._ENCRYPTION_METADATA_CHUNKSTRUCTURE] = 'X'
-        headers = {
-            'content-length': '64',
-            'content-md5': 'md5',
-            'x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME:
-            json.dumps(encjson),
-            'x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME:
-            json.dumps(goodauthjson),
-        }
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        # switch to full blob mode tests
-        args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB
-        metajson = blobxfer.EncryptionMetadataJson(
-            args, symkey, signkey, iv=b'0', encdata_signature=b'0',
-            preencrypted_md5=None)
-        encmeta = metajson.construct_metadata_json()
-        goodencjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME])
-        goodauthjson = json.loads(
-            encmeta[blobxfer._ENCRYPTION_METADATA_AUTH_NAME])
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(goodauthjson)
-
-        encjson = copy.deepcopy(goodencjson)
-        encjson[blobxfer._ENCRYPTION_METADATA_AGENT][
-            blobxfer._ENCRYPTION_METADATA_PROTOCOL] = 'X'
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(encjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        encjson = copy.deepcopy(goodencjson)
-        encjson[blobxfer._ENCRYPTION_METADATA_AGENT][
-            blobxfer._ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM] = 'X'
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(encjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        encjson = copy.deepcopy(goodencjson)
-        encjson[blobxfer._ENCRYPTION_METADATA_INTEGRITY_AUTH][
-            blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X'
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(encjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        encjson = copy.deepcopy(goodencjson)
-        encjson[blobxfer._ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][
-            blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X'
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(encjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        authjson = copy.deepcopy(goodauthjson)
-        authjson.pop(blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH, None)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(authjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        authjson = copy.deepcopy(goodauthjson)
-        authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH].pop(
-            blobxfer._ENCRYPTION_METADATA_AUTH_ENCODING, None)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(authjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        authjson = copy.deepcopy(goodauthjson)
-        authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH][
-            blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X'
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(authjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        authjson = copy.deepcopy(goodauthjson)
-        authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH][
-            blobxfer._ENCRYPTION_METADATA_MAC] = blobxfer.base64encode(b'X')
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(authjson)
-        m.head('mock://blobepcontainer/blob?saskey', headers=headers)
-        with pytest.raises(RuntimeError):
-            blobxfer.generate_xferspec_download(
-                sbs, None, args, sa_in_queue, lpath, 'blob', False,
-                [None, None, None])
-
-        args.chunksizebytes = 5
-        metajson.chunksizebytes = args.chunksizebytes
-        metajson.md5 = headers['content-md5']
-        args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB
-        encjson = copy.deepcopy(goodencjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \
-            json.dumps(encjson)
-        headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \
-            json.dumps(goodauthjson)
-        hcl = int(headers['content-length'])
-        cl, nsops, md5, fd = blobxfer.generate_xferspec_download(
-            sbs, None, args, sa_in_queue, lpath, 'blob', False,
-            [hcl, headers['content-md5'], metajson])
-        assert hcl == cl
-        calcops = hcl // args.chunksizebytes
-        hclmod = hcl % args.chunksizebytes
-        if hclmod > 0:
-            calcops += 1
-        assert calcops == nsops
-        assert headers['content-md5'] == md5
-        assert fd is None
-        assert sa_in_queue.qsize() == nsops
-        data = sa_in_queue.get()
-        assert data is not None
-
-
-def test_generate_xferspec_upload(tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-    args = MagicMock()
-    args.storageaccount = 'sa'
-    args.container = 'container'
-    args.storageaccountkey = 'key'
-    args.chunksizebytes = 5
-    args.skiponmatch = False
-    args.pageblob = False
-    args.autovhd = False
-    sa_in_queue = queue.PriorityQueue()
-    fs, nsops, md5, fd = blobxfer.generate_xferspec_upload(
-        args, sa_in_queue, {}, {}, lpath, 'rr', True)
-    stat = os.stat(lpath)
-    assert stat.st_size == fs
-    assert math.ceil(stat.st_size / 5.0) == nsops
-    assert fd is not None
-    fd.close()
-    args.skiponmatch = True
-    with open(lpath, 'wt') as f:
-        f.write('012345')
-    sd = {}
-    sd['rr'] = [6, '1qmpM8iq/FHlWsBmK25NSg==']
-    fs, nsops, md5, fd = blobxfer.generate_xferspec_upload(
-        args, sa_in_queue, sd, {}, lpath, 'rr', False)
-    assert fs is None
-
-
-def test_apply_file_collation_and_strip():
-    args = MagicMock()
-    args.collate = 'collatedir'
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, 'tmpdir/file0')
-    assert rfname == 'collatedir/file0'
-
-    args.collate = None
-    args.stripcomponents = 0
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, 'tmpdir/file0')
-    assert rfname == 'tmpdir/file0'
-    args.stripcomponents = 1
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, 'tmpdir/file0')
-    assert rfname == 'file0'
-    args.stripcomponents = 2
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, 'tmpdir/file0')
-    assert rfname == 'file0'
-    args.stripcomponents = 1
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, '/tmpdir/tmpdir2/file0')
-    assert rfname == 'tmpdir2/file0'
-    args.stripcomponents = 2
-    rfname = blobxfer.apply_file_collation_and_strip(
-        args, 'tmpdir/tmpdir2/file0')
-    assert rfname == 'file0'
-
-
-@patch('azure.storage.file.FileService.create_directory')
-def test_create_all_parent_directories_fileshare(patched_cd):
-    patched_cd.return_value = MagicMock()
-    fsfile = ['tmp/a/b', None]
-    file_service = MagicMock()
-    args = MagicMock()
-    args.container = 'fshare'
-    args.timeout = None
-    dirscreated = set()
-    blobxfer.create_all_parent_directories_fileshare(
-        file_service, args, fsfile, dirscreated)
-    assert len(dirscreated) == 3
-    assert 'tmp' in dirscreated
-    assert 'tmp/a' in dirscreated
-    assert 'tmp/a/b' in dirscreated
-    fsfile = ['tmp/a/b/c', None]
-    blobxfer.create_all_parent_directories_fileshare(
-        file_service, args, fsfile, dirscreated)
-    assert len(dirscreated) == 4
-    assert 'tmp/a/b/c' in dirscreated
-    fsfile = ['x/a/b/c', None]
-    blobxfer.create_all_parent_directories_fileshare(
-        file_service, args, fsfile, dirscreated)
-    assert len(dirscreated) == 8
-    assert 'x/a/b/c' in dirscreated
-
-
-def _mock_get_storage_account_keys(timeout=None, service_name=None):
-    ret = MagicMock()
-    ret.storage_service_keys.primary = 'mmkey'
-    return ret
-
-
-def _mock_get_storage_account_properties(timeout=None, service_name=None):
-    ret = MagicMock()
-    ret.storage_service_properties.endpoints = [None]
-    return ret
-
-
-def _mock_blobservice_create_container(timeout=None, container_name=None,
-                                       fail_on_exist=None):
-    raise azure.common.AzureConflictHttpError('conflict', 409)
-
-
-@patch('blobxfer.parseargs')
-@patch('azure.servicemanagement.ServiceManagementService.'
-       'get_storage_account_keys')
-@patch('azure.servicemanagement.ServiceManagementService.'
-       'get_storage_account_properties')
-def test_main1(
-        patched_sms_saprops, patched_sms_sakeys, patched_parseargs, tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    args = MagicMock()
-    args.include = None
-    args.stripcomponents = 0
-    args.delete = False
-    args.rsaprivatekey = None
-    args.rsapublickey = None
-    args.rsakeypassphrase = None
-    args.numworkers = 0
-    args.localresource = ''
-    args.storageaccount = 'blobep'
-    args.container = 'container'
-    args.storageaccountkey = None
-    os.environ[blobxfer._ENVVAR_STORAGEACCOUNTKEY] = 'saskey'
-    args.chunksizebytes = 5
-    args.pageblob = False
-    args.autovhd = False
-    args.fileshare = False
-    patched_parseargs.return_value = args
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.localresource = lpath
-    args.endpoint = ''
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.endpoint = 'blobep'
-    args.upload = True
-    args.download = True
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.upload = None
-    args.download = None
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    os.environ.pop(blobxfer._ENVVAR_STORAGEACCOUNTKEY)
-    args.storageaccountkey = None
-    args.timeout = -1
-    args.saskey = ''
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.saskey = None
-    args.storageaccountkey = None
-    args.managementcert = 'cert.spam'
-    args.subscriptionid = '1234'
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.managementcert = 'cert.pem'
-    args.managementep = None
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.managementep = 'mep'
-    args.subscriptionid = None
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.subscriptionid = '1234'
-    args.pageblob = True
-    args.autovhd = True
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.autovhd = False
-    args.fileshare = True
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.pageblob = False
-    args.autovhd = True
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.autovhd = False
-    args.fileshare = False
-    with patch('azure.servicemanagement.ServiceManagementService') as mock:
-        mock.return_value = MagicMock()
-        mock.return_value.get_storage_account_keys = \
-            _mock_get_storage_account_keys
-        mock.return_value.get_storage_account_properties = \
-            _mock_get_storage_account_properties
-        with pytest.raises(ValueError):
-            blobxfer.main()
-    args.managementep = None
-    args.managementcert = None
-    args.subscriptionid = None
-    args.remoteresource = 'blob'
-    args.chunksizebytes = None
-    with patch('azure.storage.blob.BlockBlobService') as mock:
-        mock.return_value = None
-        with pytest.raises(ValueError):
-            blobxfer.main()
-    args.storageaccountkey = None
-    args.saskey = None
-    os.environ[blobxfer._ENVVAR_SASKEY] = 'saskey'
-    args.remoteresource = None
-    args.download = True
-    with pytest.raises(ValueError):
-        blobxfer.main()
-
-    args.download = False
-    args.upload = True
-    args.remoteresource = None
-    args.storageaccountkey = ''
-    args.saskey = None
-    with pytest.raises(ValueError):
-        blobxfer.main()
-
-    args.collate = 'collatetmp'
-    with pytest.raises(ValueError):
-        blobxfer.main()
-
-    args.collate = None
-    args.storageaccountkey = None
-    args.saskey = ''
-    with pytest.raises(ValueError):
-        blobxfer.main()
-
-    args.saskey = None
-    os.environ.pop(blobxfer._ENVVAR_SASKEY)
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.managementcert = '0'
-    args.managementep = ''
-    args.subscriptionid = '0'
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.managementcert = 'test.pem'
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.managementep = 'mep.mep'
-    ssk = MagicMock()
-    ssk.storage_service_keys = MagicMock()
-    ssk.storage_service_keys.primary = ''
-    patched_sms_sakeys.return_value = ssk
-    ssp = MagicMock()
-    ssp.storage_service_properties = MagicMock()
-    ssp.storage_service_properties.endpoints = ['blobep']
-    patched_sms_saprops.return_value = ssp
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    ssk.storage_service_keys.primary = 'key1'
-    args.storageaccountkey = None
-    args.rsaprivatekey = ''
-    args.rsapublickey = ''
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.rsaprivatekey = ''
-    args.rsapublickey = None
-    args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB
-    with pytest.raises(IOError):
-        blobxfer.main()
-
-    args.rsaprivatekey = None
-    args.storageaccountkey = None
-    args.managementcert = None
-    args.managementep = None
-    args.subscriptionid = None
-
-    args.upload = False
-    args.download = True
-    args.remoteresource = None
-    args.saskey = 'saskey&srt=c'
-    with pytest.raises(ValueError):
-        blobxfer.main()
-    args.upload = True
-    args.download = False
-    args.saskey = None
-
-    os.environ[blobxfer._ENVVAR_SASKEY] = 'saskey'
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-    with requests_mock.mock() as m:
-        m.put('https://blobep.blob.blobep/container/blob?saskey'
-              '&comp=block&blockid=00000000', status_code=201)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&blockid=00000000&comp=block', status_code=201)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&comp=block&blockid=00000000', status_code=201)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&comp=metadata', status_code=200)
-        m.get('https://blobep.blob.blobep/container?saskey&comp=list'
-              '&restype=container&maxresults=1000',
-              text='<?xml version="1.0" encoding="utf-8"?>'
-              '<EnumerationResults ContainerName="https://blobep.blob.blobep/'
-              'container"><Blobs><Blob><Name>' + lpath + '</Name>'
-              '<Properties><Content-Length>6</Content-Length>'
-              '<Content-MD5>md5</Content-MD5><BlobType>BlockBlob</BlobType>'
-              '</Properties><Metadata/></Blob></Blobs></EnumerationResults>')
-        args.progressbar = False
-        args.skiponmatch = True
-        blobxfer.main()
-
-        args.progressbar = True
-        args.download = True
-        args.upload = False
-        args.remoteresource = None
-        with pytest.raises(ValueError):
-            blobxfer.main()
-
-        args.remoteresource = 'blob'
-        args.localresource = str(tmpdir)
-        m.head('https://blobep.blob.blobep/container/blob?saskey', headers={
-            'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='})
-        m.get('https://blobep.blob.blobep/container/blob?saskey',
-              content=b'012345')
-        blobxfer.main()
-
-        args.pageblob = False
-        args.autovhd = False
-        args.skiponmatch = False
-        pemcontents = _RSAKEY.private_bytes(
-            encoding=cryptography.hazmat.primitives.serialization.
-            Encoding.PEM,
-            format=cryptography.hazmat.primitives.serialization.
-            PrivateFormat.PKCS8,
-            encryption_algorithm=cryptography.hazmat.primitives.
-            serialization.NoEncryption())
-        pempath = str(tmpdir.join('rsa.pem'))
-        with open(pempath, 'wb') as f:
-            f.write(pemcontents)
-        args.rsaprivatekey = pempath
-        blobxfer.main()
-        os.remove(pempath)
-
-        args.rsaprivatekey = None
-        args.skiponmatch = True
-        args.remoteresource = '.'
-        args.keepmismatchedmd5files = False
-        m.get('https://blobep.blob.blobep/container?saskey&comp=list'
-              '&restype=container&maxresults=1000',
-              text='<?xml version="1.0" encoding="utf-8"?>'
-              '<EnumerationResults ContainerName="https://blobep.blob.blobep/'
-              'container"><Blobs><Blob><Name>blob</Name><Properties>'
-              '<Content-Length>6</Content-Length><Content-MD5>'
-              '</Content-MD5><BlobType>BlockBlob</BlobType></Properties>'
-              '<Metadata/></Blob></Blobs></EnumerationResults>')
-        m.get('https://blobep.blob.blobep/container/?saskey')
-        with pytest.raises(SystemExit):
-            blobxfer.main()
-
-        m.get('https://blobep.blob.blobep/container?saskey&comp=list'
-              '&restype=container&maxresults=1000',
-              text='<?xml version="1.0" encoding="utf-8"?>'
-              '<EnumerationResults ContainerName="https://blobep.blob.blobep/'
-              'container"><Blobs><Blob><Name>blob</Name><Properties>'
-              '<Content-Length>6</Content-Length><Content-MD5>md5'
-              '</Content-MD5><BlobType>BlockBlob</BlobType></Properties>'
-              '<Metadata/></Blob></Blobs></EnumerationResults>')
-        blobxfer.main()
-
-        tmplpath = str(tmpdir.join('test', 'test2', 'test3'))
-        args.localresource = tmplpath
-        blobxfer.main()
-
-    args.localresource = str(tmpdir)
-    notmp_lpath = '/'.join(lpath.strip('/').split('/')[1:])
-
-    with requests_mock.mock() as m:
-        args.delete = True
-        args.download = False
-        args.upload = True
-        args.remoteresource = None
-        args.skiponmatch = False
-        m.put('https://blobep.blob.blobep/container/test.tmp?saskey'
-              '&comp=block&blockid=00000000', status_code=200)
-        m.put('https://blobep.blob.blobep/container/test.tmp?saskey'
-              '&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&comp=block&blockid=00000000', status_code=200)
-        m.put('https://blobep.blob.blobep/container' + lpath +
-              '?saskey&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container/' + notmp_lpath +
-              '?saskey&comp=block&blockid=00000000', status_code=200)
-        m.put('https://blobep.blob.blobep/container/' + notmp_lpath +
-              '?saskey&comp=blocklist', status_code=201)
-        m.get('https://blobep.blob.blobep/container?saskey&comp=list'
-              '&restype=container&maxresults=1000',
-              text='<?xml version="1.0" encoding="utf-8"?>'
-              '<EnumerationResults ContainerName="https://blobep.blob.blobep/'
-              'container"><Blobs><Blob><Name>blob</Name><Properties>'
-              '<Content-Length>6</Content-Length><Content-MD5>md5'
-              '</Content-MD5><BlobType>BlockBlob</BlobType></Properties>'
-              '<Metadata/></Blob></Blobs></EnumerationResults>')
-        m.delete('https://blobep.blob.blobep/container/blob?saskey',
-                 status_code=202)
-        with pytest.raises(SystemExit):
-            blobxfer.main()
-
-        args.recursive = False
-        m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey'
-              '&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container/test.tmp.blobtmp?saskey'
-              '&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey'
-              '&comp=block&blockid=00000000', status_code=200)
-        m.put('https://blobep.blob.blobep/container/blob?saskey'
-              '&comp=blocklist', status_code=201)
-        with pytest.raises(SystemExit):
-            blobxfer.main()
-
-        args.stripcomponents = None
-        args.collate = '.'
-        args.pageblob = True
-        args.upload = True
-        args.download = False
-        m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey',
-              status_code=201)
-        m.put('https://blobep.blob.blobep/container/test.tmp?saskey',
-              status_code=201)
-        m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey'
-              '&comp=properties', status_code=200)
-        m.put('https://blobep.blob.blobep/container/test.tmp?saskey'
-              '&comp=properties', status_code=200)
-        m.put('https://blobep.blob.blobep/container/blob?saskey',
-              status_code=201)
-        with pytest.raises(IOError):
-            blobxfer.main()
-
-        args.stripcomponents = None
-        m.put('https://blobep.blob.blobep/container/blobsaskey',
-              status_code=200)
-        with pytest.raises(IOError):
-            blobxfer.main()
-
-        args.stripcomponents = None
-        args.pageblob = False
-        m.put('https://blobep.blob.blobep/container/' + notmp_lpath +
-              '?saskey&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container/blob?saskey',
-              status_code=201)
-        blobxfer.main()
-
-        args.stripcomponents = None
-        args.autovhd = True
-        blobxfer.main()
-
-        args.stripcomponents = None
-        args.include = 'nofiles'
-        with pytest.raises(SystemExit):
-            blobxfer.main()
-
-        args.stripcomponents = None
-        args.include = '*'
-        blobxfer.main()
-
-        args.include = None
-        args.stripcomponents = None
-        args.pageblob = False
-        args.autovhd = False
-        pempath = str(tmpdir.join('rsa.pem'))
-        with open(pempath, 'wb') as f:
-            f.write(pemcontents)
-        args.rsaprivatekey = pempath
-        m.put('https://blobep.blob.blobep/container/rsa.pem?saskey&comp=block'
-              '&blockid=00000000', status_code=201)
-        m.put('https://blobep.blob.blobep/container/rsa.pem?saskey'
-              '&comp=blocklist', status_code=201)
-        m.put('https://blobep.blob.blobep/container/rsa.pem?saskey'
-              '&comp=metadata', status_code=200)
-        m.put('https://blobep.blob.blobep/container/blob?saskey'
-              '&comp=metadata', status_code=200)
-        m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey'
-              '&comp=metadata', status_code=200)
-        m.put('https://blobep.blob.blobep/container/test.tmp.blobtmp?saskey'
-              '&comp=metadata', status_code=200)
-        m.put('https://blobep.blob.blobep/container/test.tmp?saskey'
-              '&comp=metadata', status_code=200)
-        blobxfer.main()
-
-        args.stripcomponents = None
-        args.download = True
-        args.upload = False
-        args.rsaprivatekey = pempath
-        args.remoteresource = 'blob'
-        args.localresource = str(tmpdir)
-        m.head('https://blobep.blob.blobep/container/blob?saskey', headers={
-            'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='})
-        m.get('https://blobep.blob.blobep/container/blob?saskey',
-              content=b'012345')
-        # TODO add encrypted data json
-        blobxfer.main()
-
-        os.environ.pop(blobxfer._ENVVAR_SASKEY)
-
-
-@patch('blobxfer.parseargs')
-def test_main2(patched_parseargs, tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    args = MagicMock()
-    patched_parseargs.return_value = args
-    args.include = None
-    args.stripcomponents = 1
-    args.delete = False
-    args.rsaprivatekey = None
-    args.rsapublickey = None
-    args.numworkers = 64
-    args.storageaccount = 'blobep'
-    args.container = 'container'
-    args.chunksizebytes = 5
-    args.localresource = lpath
-    args.endpoint = '.blobep'
-    args.timeout = 10
-    args.managementep = None
-    args.managementcert = None
-    args.subscriptionid = None
-    args.chunksizebytes = None
-    args.download = False
-    args.upload = True
-    args.remoteresource = None
-    args.collate = None
-    args.saskey = None
-    args.storageaccountkey = 'key'
-    args.fileshare = False
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-
-    session = requests.Session()
-    adapter = requests_mock.Adapter()
-    session.mount('mock', adapter)
-
-    with patch('azure.storage.blob.BlockBlobService') as mock:
-        args.createcontainer = True
-        args.pageblob = False
-        args.autovhd = False
-        mock.return_value = MagicMock()
-        mock.return_value.create_container = _mock_blobservice_create_container
-        blobxfer.main()
-
-
-@patch('azure.storage.file.FileService.create_share')
-@patch('azure.storage.file.FileService.create_file')
-@patch('azure.storage.file.FileService.create_directory')
-@patch('azure.storage.file.FileService.get_file_properties')
-@patch('azure.storage.file.FileService.get_file_metadata')
-@patch('azure.storage.file.FileService.list_directories_and_files')
-@patch('azure.storage.file.FileService.update_range')
-@patch('azure.storage.file.FileService._get_file')
-@patch('azure.storage.file.FileService.set_file_properties')
-@patch('azure.storage.file.FileService.set_file_metadata')
-@patch('azure.storage.file.FileService.resize_file')
-@patch('blobxfer.parseargs')
-def test_main3(
-        patched_parseargs, patched_rf, patched_sfm, patched_sfp,
-        patched_get_file, patched_update_range, patched_ldaf, patched_gfm,
-        patched_gfp, patched_cd, patched_cf, patched_cs, tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    args = MagicMock()
-    patched_parseargs.return_value = args
-    args.include = None
-    args.stripcomponents = 1
-    args.delete = False
-    args.rsaprivatekey = None
-    args.rsapublickey = None
-    args.numworkers = 64
-    args.storageaccount = 'sa'
-    args.container = 'myshare'
-    args.chunksizebytes = 5
-    args.localresource = lpath
-    args.endpoint = 'core.windows.net'
-    args.timeout = 10
-    args.managementep = None
-    args.managementcert = None
-    args.subscriptionid = None
-    args.chunksizebytes = None
-    args.download = False
-    args.upload = True
-    args.remoteresource = None
-    args.collate = None
-    args.saskey = None
-    args.storageaccountkey = 'key'
-    args.pageblob = False
-    args.autovhd = False
-    args.fileshare = True
-    args.computefilemd5 = True
-    args.skiponmatch = True
-    with open(lpath, 'wt') as f:
-        f.write(str(uuid.uuid4()))
-
-    patched_cs.return_value = MagicMock()
-    patched_cf.return_value = MagicMock()
-    patched_gfp.return_value = MagicMock()
-    patched_update_range.return_value = MagicMock()
-    patched_get_file.return_value = MagicMock()
-    patched_get_file.return_value.content = b'\0' * 8
-
-    pemcontents = _RSAKEY.private_bytes(
-        encoding=cryptography.hazmat.primitives.serialization.
-        Encoding.PEM,
-        format=cryptography.hazmat.primitives.serialization.
-        PrivateFormat.PKCS8,
-        encryption_algorithm=cryptography.hazmat.primitives.
-        serialization.NoEncryption())
-    pempath = str(tmpdir.join('rsa.pem'))
-    with open(pempath, 'wb') as f:
-        f.write(pemcontents)
-
-    args.rsaprivatekey = pempath
-    args.rsakeypassphrase = None
-    args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB
-    blobxfer.main()
-
-    args.download = True
-    args.upload = False
-    args.rsaprivatekey = pempath
-    args.remoteresource = '.'
-    with pytest.raises(SystemExit):
-        blobxfer.main()
-
-    patched_ldaf.return_value = [azure.storage.file.File(name='test.tmp')]
-    patched_gfp.return_value = MagicMock()
-    patched_gfp.return_value.properties = MagicMock()
-    patched_gfp.return_value.properties.content_length = 1
-    patched_gfp.return_value.properties.content_settings = MagicMock()
-    patched_gfp.return_value.properties.content_settings.content_md5 = 'md5'
-    args.rsaprivatekey = pempath
-    args.localresource = lpath.rstrip(os.path.sep + 'test.tmp')
-    blobxfer.main()
-
-    os.remove(pempath)
diff --git a/test_requirements.txt b/test_requirements.txt
new file mode 100644
index 0000000..925320c
--- /dev/null
+++ b/test_requirements.txt
@@ -0,0 +1,5 @@
+flake8>=3.2.1
+mock>=2.0.0
+pypandoc>=1.3.3
+pytest>=3.0.5
+pytest-cov>=2.4.0
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
new file mode 100644
index 0000000..bc17d06
--- /dev/null
+++ b/tests/test_blobxfer_util.py
@@ -0,0 +1,133 @@
+# coding=utf-8
+"""Tests for util"""
+
+# stdlib imports
+import sys
+import uuid
+# non-stdlib imports
+import pytest
+# module under test
+import blobxfer.util
+
+
+def test_on_python2():
+    py2 = sys.version_info.major == 2
+    assert py2 == blobxfer.util.on_python2()
+
+
+def test_is_none_or_empty():
+    a = None
+    assert blobxfer.util.is_none_or_empty(a)
+    a = []
+    assert blobxfer.util.is_none_or_empty(a)
+    a = {}
+    assert blobxfer.util.is_none_or_empty(a)
+    a = ''
+    assert blobxfer.util.is_none_or_empty(a)
+    a = 'asdf'
+    assert not blobxfer.util.is_none_or_empty(a)
+    a = ['asdf']
+    assert not blobxfer.util.is_none_or_empty(a)
+    a = {'asdf': 0}
+    assert not blobxfer.util.is_none_or_empty(a)
+    a = [None]
+    assert not blobxfer.util.is_none_or_empty(a)
+
+
+def test_is_not_empty():
+    a = None
+    assert not blobxfer.util.is_not_empty(a)
+    a = []
+    assert not blobxfer.util.is_not_empty(a)
+    a = {}
+    assert not blobxfer.util.is_not_empty(a)
+    a = ''
+    assert not blobxfer.util.is_not_empty(a)
+    a = 'asdf'
+    assert blobxfer.util.is_not_empty(a)
+    a = ['asdf']
+    assert blobxfer.util.is_not_empty(a)
+    a = {'asdf': 0}
+    assert blobxfer.util.is_not_empty(a)
+    a = [None]
+    assert blobxfer.util.is_not_empty(a)
+
+
+def test_merge_dict():
+    with pytest.raises(ValueError):
+        blobxfer.util.merge_dict(1, 2)
+
+    a = {'a_only': 42, 'a_and_b': 43,
+         'a_only_dict': {'a': 44}, 'a_and_b_dict': {'a_o': 45, 'a_a_b': 46}}
+    b = {'b_only': 45, 'a_and_b': 46,
+         'b_only_dict': {'a': 47}, 'a_and_b_dict': {'b_o': 48, 'a_a_b': 49}}
+    c = blobxfer.util.merge_dict(a, b)
+    assert c['a_only'] == 42
+    assert c['b_only'] == 45
+    assert c['a_and_b_dict']['a_o'] == 45
+    assert c['a_and_b_dict']['b_o'] == 48
+    assert c['a_and_b_dict']['a_a_b'] == 49
+    assert c['b_only_dict']['a'] == 47
+    assert c['a_and_b'] == 46
+    assert a['a_only'] == 42
+    assert a['a_and_b'] == 43
+    assert b['b_only'] == 45
+    assert b['a_and_b'] == 46
+
+
+def test_scantree(tmpdir):
+    tmpdir.mkdir('abc')
+    abcpath = tmpdir.join('abc')
+    abcpath.join('hello.txt').write('hello')
+    abcpath.mkdir('def')
+    defpath = abcpath.join('def')
+    defpath.join('world.txt').write('world')
+    found = set()
+    for de in blobxfer.util.scantree(str(tmpdir.dirpath())):
+        if de.name != '.lock':
+            found.add(de.name)
+    assert 'hello.txt' in found
+    assert 'world.txt' in found
+    assert len(found) == 2
+
+
+def test_get_mime_type():
+    a = 'b.txt'
+    mt = blobxfer.util.get_mime_type(a)
+    assert mt == 'text/plain'
+    a = 'c.probably_cant_determine_this'
+    mt = blobxfer.util.get_mime_type(a)
+    assert mt == 'application/octet-stream'
+
+
+def test_base64_encode_as_string():
+    a = b'abc'
+    enc = blobxfer.util.base64_encode_as_string(a)
+    assert type(enc) != bytes
+    dec = blobxfer.util.base64_decode_string(enc)
+    assert a == dec
+
+
+def test_compute_md5(tmpdir):
+    lpath = str(tmpdir.join('test.tmp'))
+    testdata = str(uuid.uuid4())
+    with open(lpath, 'wt') as f:
+        f.write(testdata)
+    md5_file = blobxfer.util.compute_md5_for_file_asbase64(lpath)
+    md5_data = blobxfer.util.compute_md5_for_data_asbase64(
+        testdata.encode('utf8'))
+    assert md5_file == md5_data
+
+    md5_file_page = blobxfer.util.compute_md5_for_file_asbase64(lpath, True)
+    assert md5_file != md5_file_page
+
+    # test non-existent file
+    with pytest.raises(IOError):
+        blobxfer.util.compute_md5_for_file_asbase64(testdata)
+
+
+def test_page_align_content_length():
+    assert 0 == blobxfer.util.page_align_content_length(0)
+    assert 512 == blobxfer.util.page_align_content_length(511)
+    assert 512 == blobxfer.util.page_align_content_length(512)
+    assert 1024 == blobxfer.util.page_align_content_length(513)
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..f2b110d
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,18 @@
+[tox]
+envlist = py35
+
+[testenv]
+deps = -rtest_requirements.txt
+commands =
+  #flake8 {envsitepackagesdir}/blobxfer_cli/
+  #flake8 {envsitepackagesdir}/blobxfer/
+  py.test \
+    -x -l -s \
+    --ignore venv/ \
+    --cov-config .coveragerc \
+    --cov-report term-missing \
+    --cov {envsitepackagesdir}/blobxfer
+
+[flake8]
+max-line-length = 79
+select = F,E,W

From 8053cf69ec591df4bd2714e0a4a1f9f4583ec0f5 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 7 Feb 2017 08:18:16 -0800
Subject: [PATCH 02/47] Continue layout restructure

- Add base models
- Add client create ops
- Travis CI to tox
---
 .travis.yml                                   |  23 +-
 blobxfer/api.py                               |  47 ++
 blobxfer/blob/__init__.py                     |   0
 blobxfer/blob/append/__init__.py              |   0
 blobxfer/blob/append/operations.py            |  69 ++
 blobxfer/blob/block/__init__.py               |   0
 blobxfer/blob/block/operations.py             |  63 ++
 blobxfer/blob/operations.py                   |  64 ++
 blobxfer/blob/page/__init__.py                |   0
 blobxfer/blob/page/operations.py              |  58 ++
 blobxfer/file/__init__.py                     |   0
 blobxfer/file/operations.py                   |  85 +++
 blobxfer/models.py                            | 448 ++++++++++++
 blobxfer/operations.py                        |  81 +++
 blobxfer/util.py                              |   2 +-
 cli/__init__.py                               |   0
 cli/cli.py                                    | 643 ++++++++++++++++++
 cli/settings.py                               | 211 ++++++
 setup.py                                      |   6 +
 tests/test_blobxfer_blob_append_operations.py |  29 +
 tests/test_blobxfer_blob_block_operations.py  |  29 +
 tests/test_blobxfer_blob_page_operations.py   |  29 +
 tests/test_blobxfer_file_operations.py        |  29 +
 tests/test_blobxfer_models.py                 | 132 ++++
 tests/test_blobxfer_util.py                   |   7 +-
 tox.ini                                       |   2 +-
 26 files changed, 2032 insertions(+), 25 deletions(-)
 create mode 100644 blobxfer/api.py
 create mode 100644 blobxfer/blob/__init__.py
 create mode 100644 blobxfer/blob/append/__init__.py
 create mode 100644 blobxfer/blob/append/operations.py
 create mode 100644 blobxfer/blob/block/__init__.py
 create mode 100644 blobxfer/blob/block/operations.py
 create mode 100644 blobxfer/blob/operations.py
 create mode 100644 blobxfer/blob/page/__init__.py
 create mode 100644 blobxfer/blob/page/operations.py
 create mode 100644 blobxfer/file/__init__.py
 create mode 100644 blobxfer/file/operations.py
 create mode 100644 blobxfer/models.py
 create mode 100644 blobxfer/operations.py
 create mode 100644 cli/__init__.py
 create mode 100644 cli/cli.py
 create mode 100644 cli/settings.py
 create mode 100644 tests/test_blobxfer_blob_append_operations.py
 create mode 100644 tests/test_blobxfer_blob_block_operations.py
 create mode 100644 tests/test_blobxfer_blob_page_operations.py
 create mode 100644 tests/test_blobxfer_file_operations.py
 create mode 100644 tests/test_blobxfer_models.py

diff --git a/.travis.yml b/.travis.yml
index 5bc451d..b183124 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,28 +6,9 @@ python:
   - 3.4
   - 3.5
   - 3.6
-  - pypy
-  # disable pypy3 until 3.3 compliance
-  #- pypy3
 install:
-  - |
-      if [ "$TRAVIS_PYTHON_VERSION" = "pypy" ]; then
-        export PYENV_ROOT="$HOME/.pyenv"
-        if [ -f "$PYENV_ROOT/bin/pyenv" ]; then
-          pushd "$PYENV_ROOT" && git pull && popd
-        else
-          rm -rf "$PYENV_ROOT" && git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT"
-        fi
-        export PYPY_VERSION="5.4.1"
-        "$PYENV_ROOT/bin/pyenv" install --skip-existing "pypy-$PYPY_VERSION"
-        virtualenv --python="$PYENV_ROOT/versions/pypy-$PYPY_VERSION/bin/python" "$HOME/virtualenvs/pypy-$PYPY_VERSION"
-        source "$HOME/virtualenvs/pypy-$PYPY_VERSION/bin/activate"
-      fi
-  - travis_retry pip install -e .
-  - travis_retry pip install coveralls flake8 mock pytest pytest-cov requests_mock
+  - travis_retry pip install tox-travis coveralls
 script:
-  - flake8 blobxfer.py test/test_blobxfer.py
-  - PYTHONPATH=. py.test -l --full-trace --cov-config .coveragerc --cov-report term-missing --cov blobxfer test/test_blobxfer.py
+  - tox
 after_success:
   - coveralls --rcfile=.coveragerc --verbose
-
diff --git a/blobxfer/api.py b/blobxfer/api.py
new file mode 100644
index 0000000..45f2145
--- /dev/null
+++ b/blobxfer/api.py
@@ -0,0 +1,47 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function  # noqa
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+# non-stdlib imports
+# local imports
+
+from .blob.operations import check_if_single_blob  # noqa
+from .blob.append.operations import (  # noqa
+    create_client as create_append_blob_client
+)
+from .blob.block.operations import (  # noqa
+    create_client as create_block_blob_client
+)
+from .blob.page.operations import (  # noqa
+    create_client as create_page_blob_client
+)
+from .file.operations import (  # noqa
+    create_client as create_file_client
+)
diff --git a/blobxfer/blob/__init__.py b/blobxfer/blob/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/blob/append/__init__.py b/blobxfer/blob/append/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/blob/append/operations.py b/blobxfer/blob/append/operations.py
new file mode 100644
index 0000000..88d5b58
--- /dev/null
+++ b/blobxfer/blob/append/operations.py
@@ -0,0 +1,69 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+from azure.storage.blob import AppendBlobService
+# local imports
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def create_client(storage_account):
+    # type: (blobxfer.models.AzureStorageAccount) -> AppendBlobService
+    """Create Append blob client
+    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :rtype: AppendBlobService
+    :return: append blob service client
+    """
+    if storage_account.is_sas:
+        client = AppendBlobService(
+            account_name=storage_account.name,
+            sas_token=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    else:
+        client = AppendBlobService(
+            account_name=storage_account.name,
+            account_key=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    return client
+
+
+def list_blobs(client, container, prefix):
+    # type: (azure.storage.blob.AppendBlobService, str, str) -> list
+    """List append blobs in path
+    :param AppendBlobService client: append blob client
+    :param str container: container
+    :param str prefix: path prefix
+    """
+
+    pass
diff --git a/blobxfer/blob/block/__init__.py b/blobxfer/blob/block/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/blob/block/operations.py b/blobxfer/blob/block/operations.py
new file mode 100644
index 0000000..94fd534
--- /dev/null
+++ b/blobxfer/blob/block/operations.py
@@ -0,0 +1,63 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+from azure.storage.blob import BlockBlobService
+# local imports
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def create_client(storage_account):
+    # type: (blobxfer.models.AzureStorageAccount) -> BlockBlobService
+    """Create block blob client
+    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :rtype: BlockBlobService
+    :return: block blob service client
+    """
+    if storage_account.is_sas:
+        client = BlockBlobService(
+            account_name=storage_account.name,
+            sas_token=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    else:
+        client = BlockBlobService(
+            account_name=storage_account.name,
+            account_key=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    return client
+
+
+def upload_block():
+    logger.info('upload block')
+    print('upload')
diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
new file mode 100644
index 0000000..567d9aa
--- /dev/null
+++ b/blobxfer/blob/operations.py
@@ -0,0 +1,64 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+# local imports
+from ..util import is_none_or_empty
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def check_if_single_blob(client, container, prefix):
+    # type: (azure.storage.blob.BaseBlobService, str, str) -> bool
+    """List append blobs in path
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param str prefix: path prefix
+    :rtype: bool
+    :return: if prefix in container is a single blob
+    """
+    blobs = client.list_blobs(
+        container_name=container, prefix=prefix, num_results=1)
+    return is_none_or_empty(blobs.next_marker)
+
+
+def list_blobs(client, container, prefix, mode):
+    # type: (azure.storage.blob.BaseBlobService, str, str,
+    #        blobxfer.models.AzureStorageModes) -> list
+    """List blobs in path conforming to mode
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param str prefix: path prefix
+    """
+
+    pass
diff --git a/blobxfer/blob/page/__init__.py b/blobxfer/blob/page/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/blob/page/operations.py b/blobxfer/blob/page/operations.py
new file mode 100644
index 0000000..f23520b
--- /dev/null
+++ b/blobxfer/blob/page/operations.py
@@ -0,0 +1,58 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+from azure.storage.blob import PageBlobService
+# local imports
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def create_client(storage_account):
+    # type: (blobxfer.models.AzureStorageAccount) -> PageBlobService
+    """Create block blob client
+    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :rtype: PageBlobService
+    :return: block blob service client
+    """
+    if storage_account.is_sas:
+        client = PageBlobService(
+            account_name=storage_account.name,
+            sas_token=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    else:
+        client = PageBlobService(
+            account_name=storage_account.name,
+            account_key=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    return client
diff --git a/blobxfer/file/__init__.py b/blobxfer/file/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
new file mode 100644
index 0000000..38705d6
--- /dev/null
+++ b/blobxfer/file/operations.py
@@ -0,0 +1,85 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+from azure.storage.file import FileService
+# local imports
+from ..util import is_none_or_empty
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def create_client(storage_account):
+    # type: (blobxfer.models.AzureStorageAccount) -> FileService
+    """Create file client
+    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :rtype: FileService
+    :return: file service client
+    """
+    if storage_account.is_sas:
+        client = FileService(
+            account_name=storage_account.name,
+            sas_token=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    else:
+        client = FileService(
+            account_name=storage_account.name,
+            account_key=storage_account.key,
+            endpoint_suffix=storage_account.endpoint)
+    return client
+
+
+def check_if_single_file(client, container, prefix):
+    # type: (azure.storage.blob.BaseBlobService, str, str) -> bool
+    """List append blobs in path
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param str prefix: path prefix
+    :rtype: bool
+    :return: if prefix in container is a single blob
+    """
+    blobs = client.list_blobs(
+        container_name=container, prefix=prefix, num_results=1)
+    return is_none_or_empty(blobs.next_marker)
+
+
+def list_blobs(client, container, prefix, mode):
+    # type: (azure.storage.blob.BaseBlobService, str, str,
+    #        blobxfer.models.AzureStorageModes) -> list
+    """List blobs in path conforming to mode
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param str prefix: path prefix
+    """
+
+    pass
diff --git a/blobxfer/models.py b/blobxfer/models.py
new file mode 100644
index 0000000..d511e15
--- /dev/null
+++ b/blobxfer/models.py
@@ -0,0 +1,448 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import collections
+import enum
+import fnmatch
+import logging
+import os
+try:
+    import pathlib2 as pathlib
+except ImportError:
+    import pathlib
+# non-stdlib imports
+# local imports
+from .util import scantree
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+# enums
+class AzureStorageModes(enum.Enum):
+    Auto = 1
+    Append = 2
+    Block = 3
+    File = 4
+    Page = 5
+
+
+# named tuples
+GeneralOptions = collections.namedtuple(
+    'GeneralOptions', [
+        'progress_bar',
+        'timeout_sec',
+        'verbose',
+    ]
+)
+VectoredIoOptions = collections.namedtuple(
+    'VectoredIoOptions', [
+        'stripe_chunk_size_bytes',
+        'multi_storage_account_distribution_mode',
+    ]
+)
+SkipOnOptions = collections.namedtuple(
+    'SkipOnOptions', [
+        'filesize_match',
+        'lmt_ge',
+        'md5_match',
+    ]
+)
+UploadOptions = collections.namedtuple(
+    'UploadOptions', [
+        'chunk_size_bytes',
+        'delete_extraneous_destination',
+        'exclude',
+        'include',
+        'mode',
+        'overwrite',
+        'recursive',
+        'rsa_private_key',
+        'rsa_private_key_passphrase',
+        'rsa_public_key',
+        'skip_on',
+        'store_file_attributes',
+        'store_file_md5',
+        'strip_components',
+        'vectored_io',
+        'split_size_bytes',
+    ]
+)
+DownloadOptions = collections.namedtuple(
+    'DownloadOptions', [
+        'check_file_md5',
+        'delete_extraneous_destination',
+        'exclude',
+        'include',
+        'mode',
+        'overwrite',
+        'recursive',
+        'restore_file_attributes',
+        'rsa_private_key',
+        'rsa_private_key_passphrase',
+        'skip_on',
+    ]
+)
+SyncCopyOptions = collections.namedtuple(
+    'SyncCopyOptions', [
+        'exclude',
+        'include',
+        'mode',
+        'overwrite',
+        'skip_on',
+    ]
+)
+LocalPath = collections.namedtuple(
+    'LocalPath', [
+        'parent_path', 'relative_path'
+    ]
+)
+AzureDestinationOptions = collections.namedtuple(
+    'AzureDestinationOptions', [
+        'path',
+    ]
+)
+AzureSourceOptions = collections.namedtuple(
+    'AzureSourceOptions', [
+    ]
+)
+
+
+class AzureStorageCredentials(object):
+    """Azure Storage Credentials"""
+    def __init__(self):
+        # type: (AzureStorageCredentials) -> None
+        """Ctor for AzureStorageCredentials"""
+        self._storage_accounts = {}
+
+    def add_storage_account(self, name, key, endpoint):
+        # type: (AzureStorageCredentials, str, str, str) -> None
+        """Add a storage account
+        :param AzureStorageCredentials self: this
+        :param str name: name of storage account to store
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        if name in self._storage_accounts:
+            raise ValueError(
+                '{} already exists in storage accounts'.format(name))
+        self._storage_accounts[name] = AzureStorageAccount(name, key, endpoint)
+
+    def get_storage_account(self, name):
+        # type: (AzureStorageCredentials, str) -> AzureStorageAccount
+        """Get storage account details
+        :param AzureStorageCredentials self: this
+        :param str name: name of storage account to retrieve
+        :rtype: AzureStorageAccount
+        :return: storage account details
+        """
+        return self._storage_accounts[name]
+
+
+class AzureStorageAccount(object):
+    """Azure Storage Account"""
+    def __init__(self, name, key, endpoint):
+        # type: (AzureStorageAccount, str, str, str) -> None
+        """Ctor for AzureStorageAccount
+        :param str name: name of storage account
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        self.name = name
+        self.key = key
+        self.endpoint = endpoint
+        self.is_sas = self._key_is_sas(self.key)
+
+    @staticmethod
+    def _key_is_sas(key):
+        # type: (str) -> bool
+        """Determine if key is a sas
+        :param str key: key to parse
+        :rtype: bool
+        :return: if key is a sas
+        """
+        # keys starting with ? are sas keys as ? is not in the base-64
+        # character range
+        if key.startswith('?'):
+            return True
+        else:
+            # & is not in the base-64 character range, so technically
+            # the presence of this character means the key is a sas. however,
+            # perform a stronger check for the sig= parameter.
+            tmp = key.split('&')
+            if len(tmp) == 1:
+                return False
+            elif any(x.startswith('sig=') for x in tmp):
+                return True
+        return False
+
+
+class _BaseSourcePaths(object):
+    """Base Source Paths"""
+    def __init__(self):
+        # type: (_BaseSourcePaths) -> None
+        """Ctor for _BaseSourcePaths
+        :param _BaseSourcePaths self: this
+        """
+        self._include = None
+        self._exclude = None
+        self._paths = []
+
+    def add_include(self, incl):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add an include
+        :param _BaseSourcePaths self: this
+        :param str incl: include filter
+        """
+        if self._include is None:
+            self._include = [incl]
+        else:
+            self._include.append(incl)
+
+    def add_includes(self, includes):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of includes
+        :param _BaseSourcePaths self: this
+        :param list includes: list of includes
+        """
+        if not isinstance(includes, list):
+            raise ValueError('includes is not of type list')
+        if self._include is None:
+            self._include = includes
+        else:
+            self._include.extend(includes)
+
+    def add_exclude(self, excl):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add an exclude
+        :param _BaseSourcePaths self: this
+        :param str excl: exclude filter
+        """
+        if self._exclude is None:
+            self._exclude = [excl]
+        else:
+            self._exclude.append(excl)
+
+    def add_excludes(self, excludes):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of excludes
+        :param _BaseSourcePaths self: this
+        :param list excludes: list of excludes
+        """
+        if not isinstance(excludes, list):
+            raise ValueError('excludes is not of type list')
+        if self._exclude is None:
+            self._exclude = excludes
+        else:
+            self._exclude.extend(excludes)
+
+    def add_path(self, path):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add a local path
+        :param _BaseSourcePaths self: this
+        :param str path: path to add
+        """
+        self._paths.append(pathlib.Path(path))
+
+    def add_paths(self, paths):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of local paths
+        :param _BaseSourcePaths self: this
+        :param list paths: paths to add
+        """
+        for path in paths:
+            self.add_path(path)
+
+    def _inclusion_check(self, path):
+        # type: (_BaseSourcePaths, pathlib.Path) -> bool
+        """Check file for inclusion against filters
+        :param _BaseSourcePaths self: this
+        :param pathlib.Path path: path to check
+        :rtype: bool
+        :return: if file should be included
+        """
+        _spath = str(path)
+        inc = True
+        if self._include is not None:
+            inc = any([fnmatch.fnmatch(_spath, x) for x in self._include])
+        if inc and self._exclude is not None:
+            inc = not any([fnmatch.fnmatch(_spath, x) for x in self._exclude])
+        return inc
+
+
+class LocalSourcePaths(_BaseSourcePaths):
+    """Local Source Paths"""
+    def files(self):
+        # type: (LocalSourcePaths) -> LocalPath
+        """Generator for files in paths
+        :param LocalSourcePaths self: this
+        :rtype: LocalPath
+        :return: LocalPath
+        """
+        for _path in self._paths:
+            _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
+            _expath = pathlib.Path(_ppath)
+            for entry in scantree(_ppath):
+                _rpath = pathlib.Path(entry.path).relative_to(_ppath)
+                if not self._inclusion_check(_rpath):
+                    logger.debug(
+                        'skipping file {} due to filters'.format(_rpath))
+                    continue
+                yield LocalPath(parent_path=_expath, relative_path=_rpath)
+
+
+class LocalDestinationPath(object):
+    """Local Destination Path"""
+    def __init__(self, path=None):
+        # type: (LocalDestinationPath, str) -> None
+        """Ctor for LocalDestinationPath
+        :param LocalDestinationPath self: this
+        :param str path: path
+        """
+        self._is_dir = None
+        if path is not None:
+            self.path = path
+
+    @property
+    def path(self):
+        # type: (LocalDestinationPath) -> pathlib.Path
+        """Path property
+        :param LocalDestinationPath self: this
+        :rtype: pathlib.Path
+        :return: local destination path
+        """
+        return self._path
+
+    @path.setter
+    def path(self, value):
+        # type: (LocalDestinationPath, str) -> None
+        """Path property setter
+        :param LocalDestinationPath self: this
+        :param str value: value to set path to
+        """
+        self._path = pathlib.Path(value)
+
+    @property
+    def is_dir(self):
+        # type: (LocalDestinationPath) -> bool
+        """is_dir property
+        :param LocalDestinationPath self: this
+        :rtype: bool
+        :return: if local destination path is a directory
+        """
+        return self._is_dir
+
+    @is_dir.setter
+    def is_dir(self, value):
+        # type: (LocalDestinationPath, bool) -> None
+        """is_dir property setter
+        :param LocalDestinationPath self: this
+        :param bool value: value to set is_dir to
+        """
+        self._is_dir = value
+
+    def ensure_path_exists(self):
+        # type: (LocalDestinationPath) -> None
+        """Ensure path exists
+        :param LocalDestinationPath self: this
+        """
+        if self._is_dir is None:
+            raise RuntimeError('is_dir not set')
+        if self._is_dir:
+            self._path.mkdir(mode=0o750, parents=True, exist_ok=True)
+        else:
+            if self._path.exists() and self._path.is_dir():
+                raise RuntimeError(
+                    ('destination path {} already exists and is a '
+                     'directory').format(self._path))
+            else:
+                # ensure parent path exists and is created
+                self._path.parent.mkdir(
+                    mode=0o750, parents=True, exist_ok=True)
+
+
+class AzureSourcePaths(_BaseSourcePaths):
+    def __init__(self, mode):
+        super.__init__()
+        self._mode = mode
+
+    def set_clients(self, append, block, file, page):
+        pass
+
+    def files(self):
+        if self._mode == AzureStorageModes.Auto:
+            pass
+        elif self._mode == AzureStorageModes.Append:
+            pass
+        elif self._mode == AzureStorageModes.Block:
+            pass
+        elif self._mode == AzureStorageModes.File:
+            pass
+        elif self._mode == AzureStorageModes.Page:
+            pass
+        else:
+            raise RuntimeError('unknown Azure Storage Mode: {}'.format(
+                self._mode))
+
+    def _append_files(self):
+        for _path in self._paths:
+
+            pass
+
+
+class AzureDestinationPaths(object):
+    def __init__(self):
+        pass
+
+
+class FileDescriptor(object):
+    def __init__(self, filepath):
+        if filepath == '-':
+            self.stdin = True
+            self.path = None
+        else:
+            self.stdin = False
+            self.path = pathlib.Path(filepath)
+        self.size = None
+        self.hmac = None
+        self.md5 = None
+        self.bytes_xferred = 0
+
+
+class ReadFileDescriptor(FileDescriptor):
+    def __init__(self, filepath):
+        super().__init__(filepath)
+
+
+class WriteFileDescriptor(FileDescriptor):
+    def __init__(self, filepath):
+        super().__init__(filepath)
diff --git a/blobxfer/operations.py b/blobxfer/operations.py
new file mode 100644
index 0000000..b073367
--- /dev/null
+++ b/blobxfer/operations.py
@@ -0,0 +1,81 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+# local imports
+from .models import FileDescriptor
+
+
+def file_chunks(fd, chunk_size):
+    # type: (FileDescriptor, int) -> bytes
+    """Generator for getting file chunks of a file
+    :param FileDescriptor fd: file descriptor
+    :param int chunk_size: the amount of data to read
+    :rtype: bytes
+    :return: file data
+    """
+    with fd.path.open('rb') as f:
+        while True:
+            data = f.read(chunk_size)
+            if not data:
+                break
+            yield data
+
+
+def read_file_chunk(fd, chunk_num, chunk_size):
+    # type: (FileDescriptor, int, int) -> bytes
+    """Read file chunk
+    :param FileDescriptor fd: file descriptor
+    :param int chunk_num: chunk number
+    :param int chunk_size: the amount of data to read
+    :rtype: bytes
+    :return: file data
+    """
+    offset = chunk_num * chunk_size
+    with fd.path.open('rb') as f:
+        f.seek(offset, 0)
+        return f.read(chunk_size)
+
+
+def write_file_chunk(fd, chunk_num, chunk_size, data):
+    # type: (FileDescriptor, int, int, bytes) -> None
+    """Write file chunk
+    :param FileDescriptor fd: file descriptor
+    :param int chunk_num: chunk number
+    :param int chunk_size: the amount of data to read
+    :rtype: bytes
+    :return: file data
+    """
+    offset = chunk_num * chunk_size
+    with fd.path.open('wb') as f:
+        f.seek(offset, 0)
+        f.write(data)
diff --git a/blobxfer/util.py b/blobxfer/util.py
index bf3a9a8..f498ff6 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -25,7 +25,7 @@
 # compat imports
 from __future__ import absolute_import, division, print_function
 from builtins import (  # noqa
-    bytes, dict, int, list, object, range, str, ascii, chr, hex, input,
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
     next, oct, open, pow, round, super, filter, map, zip
 )
 # stdlib imports
diff --git a/cli/__init__.py b/cli/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cli.py b/cli/cli.py
new file mode 100644
index 0000000..cb23a31
--- /dev/null
+++ b/cli/cli.py
@@ -0,0 +1,643 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import json
+import logging
+try:
+    import pathlib2 as pathlib
+except ImportError:
+    import pathlib
+# non-stdlib imports
+import click
+import ruamel.yaml
+# blobxfer library imports
+import blobxfer.api
+import blobxfer.util
+# local imports
+import settings
+
+# create logger
+logger = logging.getLogger('blobxfer')
+blobxfer.util.setup_logger(logger)
+# global defines
+_CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+
+
+class CliContext(object):
+    """CliContext class: holds context for CLI commands"""
+    def __init__(self):
+        """Ctor for CliContext"""
+        self.yaml_config = None
+        self.config = {}
+        self.cli_options = {}
+        self.block_blob_client = None
+        self.page_blob_client = None
+        self.append_blob_client = None
+        self.smb_file_client = None
+
+    def initialize(self):
+        # type: (CliContext) -> None
+        """Initialize context
+        :param CliContext self: this
+        """
+        self._init_config()
+
+    def _read_yaml_file(self, yaml_file):
+        # type: (CliContext, pathlib.Path) -> None
+        """Read a yaml file into self.config
+        :param CliContext self: this
+        :param pathlib.Path yaml_file: yaml file to load
+        """
+        with yaml_file.open('r') as f:
+            if self.config is None:
+                self.config = ruamel.yaml.load(
+                    f, Loader=ruamel.yaml.RoundTripLoader)
+            else:
+                self.config = blobxfer.util.merge_dict(
+                    self.config, ruamel.yaml.load(
+                        f, Loader=ruamel.yaml.RoundTripLoader))
+
+    def _init_config(self):
+        # type: (CliContext) -> None
+        """Initializes configuration of the context
+        :param CliContext self: this
+        """
+        # load yaml config file into memory
+        if blobxfer.util.is_not_empty(self.yaml_config):
+            self.yaml_config = pathlib.Path(self.yaml_config)
+            self._read_yaml_file(self.yaml_config)
+        # merge cli options with config
+        settings.merge_settings(self.config, self.cli_options)
+        if self.config['options']['verbose']:
+            logger.debug('config: \n' + json.dumps(self.config, indent=4))
+        # free mem
+        del self.yaml_config
+        del self.cli_options
+
+
+# create a pass decorator for shared context between commands
+pass_cli_context = click.make_pass_decorator(CliContext, ensure=True)
+
+
+def _progress_bar_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['progress_bar'] = value
+        return value
+    return click.option(
+        '--progress-bar/--no-progress-bar',
+        expose_value=False,
+        default=True,
+        help='Display progress bar',
+        callback=callback)(f)
+
+
+def _timeout_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['timeout'] = value
+        return value
+    return click.option(
+        '--timeout',
+        expose_value=False,
+        type=int,
+        help='Individual chunk transfer timeout',
+        callback=callback)(f)
+
+
+def _verbose_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['verbose'] = value
+        return value
+    return click.option(
+        '-v', '--verbose',
+        expose_value=False,
+        is_flag=True,
+        help='Verbose output',
+        callback=callback)(f)
+
+
+def common_options(f):
+    f = _verbose_option(f)
+    f = _timeout_option(f)
+    f = _progress_bar_option(f)
+    return f
+
+
+def _local_resource_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.local_resource = value
+        return value
+    return click.argument(
+        'local-resource',
+        callback=callback)(f)
+
+
+def _storage_account_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['storage_account'] = value
+        return value
+    return click.argument(
+        'storage-account',
+        callback=callback)(f)
+
+
+def _remote_path_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['remote_path'] = value
+        return value
+    return click.argument(
+        'remote-path',
+        callback=callback)(f)
+
+
+def upload_download_arguments(f):
+    f = _remote_path_argument(f)
+    f = _storage_account_argument(f)
+    f = _local_resource_argument(f)
+    return f
+
+
+def _sync_copy_dest_storage_account_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_storage_account'] = value
+        return value
+    return click.argument(
+        'sync-copy-dest-storage-account',
+        callback=callback)(f)
+
+
+def _sync_copy_dest_remote_path_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_remote_path'] = value
+        return value
+    return click.argument(
+        'sync-copy-dest-remote-path',
+        callback=callback)(f)
+
+
+def sync_copy_arguments(f):
+    f = _sync_copy_dest_remote_path_argument(f)
+    f = _sync_copy_dest_storage_account_argument(f)
+    f = _remote_path_argument(f)
+    f = _storage_account_argument(f)
+    return f
+
+
+def _access_key_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['access_key'] = value
+        return value
+    return click.option(
+        '--access-key',
+        expose_value=False,
+        help='Storage account access key',
+        envvar='BLOBXFER_ACCESS_KEY',
+        callback=callback)(f)
+
+
+def _chunk_size_bytes_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['chunk_size_bytes'] = value
+        return value
+    return click.option(
+        '--chunk-size-bytes',
+        expose_value=False,
+        type=int,
+        default=4194304,
+        help='Chunk size in bytes [4194304]',
+        callback=callback)(f)
+
+
+def _delete_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['delete'] = value
+        return value
+    return click.option(
+        '--delete',
+        expose_value=False,
+        is_flag=True,
+        help='Delete extraneous files on target [False]',
+        callback=callback)(f)
+
+
+def _endpoint_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['endpoint'] = value
+        return value
+    return click.option(
+        '--endpoint',
+        expose_value=False,
+        default='core.windows.net',
+        help='Azure Storage endpoint [core.windows.net]',
+        callback=callback)(f)
+
+
+def _exclude_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['exclude'] = value
+        return value
+    return click.option(
+        '--exclude',
+        expose_value=False,
+        default=None,
+        help='Exclude pattern',
+        callback=callback)(f)
+
+
+def _file_attributes(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['file_attributes'] = value
+        return value
+    return click.option(
+        '--file-attributes',
+        expose_value=False,
+        is_flag=True,
+        help='Store or restore file attributes [False]',
+        callback=callback)(f)
+
+
+def _file_md5_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['file_md5'] = value
+        return value
+    return click.option(
+        '--file-md5/--no-file-md5',
+        expose_value=False,
+        default=True,
+        help='Compute file MD5 [True]',
+        callback=callback)(f)
+
+
+def _include_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['include'] = value
+        return value
+    return click.option(
+        '--include',
+        expose_value=False,
+        default=None,
+        help='Include pattern',
+        callback=callback)(f)
+
+
+def _mode_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['mode'] = value
+        return value
+    return click.option(
+        '--mode',
+        expose_value=False,
+        default='auto',
+        help='Transfer mode: auto, append, block, file, page [auto]',
+        callback=callback)(f)
+
+
+def _overwrite_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['overwrite'] = value
+        return value
+    return click.option(
+        '--overwrite/--no-overwrite',
+        expose_value=False,
+        default=True,
+        help='Overwrite destination if exists [True]',
+        callback=callback)(f)
+
+
+def _recursive_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['recursive'] = value
+        return value
+    return click.option(
+        '--recursive/--no-recursive',
+        expose_value=False,
+        default=True,
+        help='Recursive [True]',
+        callback=callback)(f)
+
+
+def _rsa_private_key_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['rsa_private_key'] = value
+        return value
+    return click.option(
+        '--rsa-private-key',
+        expose_value=False,
+        default=None,
+        help='RSA private key',
+        envvar='BLOBXFER_RSA_PRIVATE_KEY',
+        callback=callback)(f)
+
+
+def _rsa_private_key_passphrase_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['rsa_private_key_passphrase'] = value
+        return value
+    return click.option(
+        '--rsa-private-key-passphrase',
+        expose_value=False,
+        default=None,
+        help='RSA private key passphrase',
+        envvar='BLOBXFER_RSA_PRIVATE_KEY_PASSPHRASE',
+        callback=callback)(f)
+
+
+def _rsa_public_key_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['rsa_public_key'] = value
+        return value
+    return click.option(
+        '--rsa-public-key',
+        expose_value=False,
+        default=None,
+        help='RSA public key',
+        envvar='BLOBXFER_RSA_PUBLIC_KEY',
+        callback=callback)(f)
+
+
+def _sas_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sas'] = value
+        return value
+    return click.option(
+        '--sas',
+        expose_value=False,
+        help='Shared access signature',
+        envvar='BLOBXFER_SAS',
+        callback=callback)(f)
+
+
+def _skip_on_filesize_match_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['skip_on_filesize_match'] = value
+        return value
+    return click.option(
+        '--skip-on-filesize-match',
+        expose_value=False,
+        is_flag=True,
+        help='Skip on equivalent file size [False]',
+        callback=callback)(f)
+
+
+def _skip_on_lmt_ge_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['skip_on_lmt_ge'] = value
+        return value
+    return click.option(
+        '--skip-on-lmt-ge',
+        expose_value=False,
+        is_flag=True,
+        help='Skip on last modified time greater than or equal to [False]',
+        callback=callback)(f)
+
+
+def _skip_on_md5_match_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['skip_on_md5_match'] = value
+        return value
+    return click.option(
+        '--skip-on-md5-match',
+        expose_value=False,
+        is_flag=True,
+        help='Skip on MD5 match [False]',
+        callback=callback)(f)
+
+
+def _strip_components_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['strip_components'] = value
+        return value
+    return click.option(
+        '--strip-components',
+        expose_value=False,
+        type=int,
+        default=1,
+        help='Strip leading file path components [1]',
+        callback=callback)(f)
+
+
+def _sync_copy_dest_access_key_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_access_key'] = value
+        return value
+    return click.option(
+        '--sync-copy-dest-access-key',
+        expose_value=False,
+        help='Storage account access key for synccopy destination',
+        envvar='BLOBXFER_SYNC_COPY_DEST_ACCESS_KEY',
+        callback=callback)(f)
+
+
+def _sync_copy_dest_sas_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_sas'] = value
+        return value
+    return click.option(
+        '--sync-copy-dest-sas',
+        expose_value=False,
+        help='Shared access signature for synccopy destination',
+        envvar='BLOBXFER_SYNC_COPY_SAS',
+        callback=callback)(f)
+
+
+def upload_options(f):
+    f = _strip_components_option(f)
+    f = _skip_on_md5_match_option(f)
+    f = _skip_on_lmt_ge_option(f)
+    f = _skip_on_filesize_match_option(f)
+    f = _sas_option(f)
+    f = _rsa_public_key_option(f)
+    f = _rsa_private_key_passphrase_option(f)
+    f = _rsa_private_key_option(f)
+    f = _recursive_option(f)
+    f = _overwrite_option(f)
+    f = _mode_option(f)
+    f = _include_option(f)
+    f = _file_md5_option(f)
+    f = _file_attributes(f)
+    f = _exclude_option(f)
+    f = _endpoint_option(f)
+    f = _delete_option(f)
+    f = _chunk_size_bytes_option(f)
+    f = _access_key_option(f)
+    return f
+
+
+def download_options(f):
+    f = _skip_on_md5_match_option(f)
+    f = _skip_on_lmt_ge_option(f)
+    f = _skip_on_filesize_match_option(f)
+    f = _sas_option(f)
+    f = _rsa_private_key_passphrase_option(f)
+    f = _rsa_private_key_option(f)
+    f = _recursive_option(f)
+    f = _overwrite_option(f)
+    f = _mode_option(f)
+    f = _include_option(f)
+    f = _file_md5_option(f)
+    f = _file_attributes(f)
+    f = _exclude_option(f)
+    f = _endpoint_option(f)
+    f = _delete_option(f)
+    f = _access_key_option(f)
+    return f
+
+
+def sync_copy_options(f):
+    f = _sync_copy_dest_sas_option(f)
+    f = _sync_copy_dest_access_key_option(f)
+    f = _skip_on_md5_match_option(f)
+    f = _skip_on_lmt_ge_option(f)
+    f = _skip_on_filesize_match_option(f)
+    f = _sas_option(f)
+    f = _overwrite_option(f)
+    f = _mode_option(f)
+    f = _include_option(f)
+    f = _exclude_option(f)
+    f = _endpoint_option(f)
+    f = _access_key_option(f)
+    return f
+
+
+def _config_argument(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.yaml_config = value
+        return value
+    return click.argument(
+        'config',
+        callback=callback)(f)
+
+
+def config_arguments(f):
+    f = _config_argument(f)
+    return f
+
+
+@click.group(context_settings=_CONTEXT_SETTINGS)
+@click.version_option(version=blobxfer.__version__)
+@click.pass_context
+def cli(ctx):
+    """Blobxfer-CLI: Azure Storage transfer tool"""
+    pass
+
+
+@cli.command('download')
+@upload_download_arguments
+@download_options
+@common_options
+@pass_cli_context
+def download(ctx, local_resource, storage_account, remote_path):
+    """Download blobs or files from Azure Storage"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Download, local_resource,
+        storage_account, remote_path)
+    ctx.initialize()
+    raise NotImplementedError()
+
+
+@cli.command('synccopy')
+@sync_copy_arguments
+@sync_copy_options
+@common_options
+@pass_cli_context
+def synccopy(
+        ctx, local_resource, storage_account, remote_path,
+        sync_copy_dest_storage_account, sync_copy_dest_remote_path):
+    """Synchronously copy blobs between Azure Storage accounts"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Synccopy, local_resource,
+        storage_account, remote_path, sync_copy_dest_storage_account,
+        sync_copy_dest_remote_path)
+    ctx.initialize()
+    raise NotImplementedError()
+
+
+@cli.command('upload')
+@upload_download_arguments
+@upload_options
+@common_options
+@pass_cli_context
+def upload(ctx, local_resource, storage_account, remote_path):
+    """Upload files to Azure Storage"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Upload, local_resource,
+        storage_account, remote_path)
+    ctx.initialize()
+    blobxfer.api.upload_block()
+
+
+@cli.group()
+@pass_cli_context
+def useconfig(ctx):
+    """Use config file for transfer"""
+    pass
+
+
+@useconfig.command('upload')
+@config_arguments
+@common_options
+@pass_cli_context
+def useconfig_upload(ctx):
+    """Upload files to Azure File Storage"""
+    ctx.initialize()
+    raise NotImplementedError()
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/cli/settings.py b/cli/settings.py
new file mode 100644
index 0000000..9d54d42
--- /dev/null
+++ b/cli/settings.py
@@ -0,0 +1,211 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import enum
+# non-stdlib imports
+# local imports
+from blobxfer.util import is_none_or_empty, is_not_empty, merge_dict
+
+
+# enums
+class TransferAction(enum.Enum):
+    Download = 1,
+    Upload = 2,
+    Synccopy = 3,
+
+
+def add_cli_options(
+        cli_options, action, local_resource=None, storage_account=None,
+        remote_path=None, sync_copy_dest_storage_account=None,
+        sync_copy_dest_remote_path=None):
+    # type: (dict, str, str, str, str, str, str) -> None
+    """Adds CLI options to the configuration object
+    :param dict cli_options: CLI options dict
+    :param TransferAction action: action
+    :param str local_resource: local resource
+    :param str storage_account: storage account
+    :param str remote_path: remote path
+    :param str sync_copy_dest_storage_account: synccopy dest sa
+    :param str sync_copy_dest_remote_path: synccopy dest rp
+    """
+    cli_options['_action'] = action.name.lower()
+    if is_not_empty(storage_account):
+        # add credentials
+        try:
+            key = cli_options['access_key']
+            if is_none_or_empty(key):
+                raise KeyError()
+        except KeyError:
+            try:
+                key = cli_options['sas']
+                if is_none_or_empty(key):
+                    raise KeyError()
+            except KeyError:
+                raise RuntimeError('access key or sas must be provided')
+        azstorage = {
+            'endpoint': cli_options['endpoint'],
+            'accounts': {
+                storage_account: key
+            }
+        }
+        del key
+        # construct "argument" from cli options
+        sa_rp = {storage_account: remote_path}
+        if action == TransferAction.Upload:
+            arg = {
+                'source': [local_resource],
+                'destination': [sa_rp],
+                'include': cli_options['include'],
+                'exclude': cli_options['exclude'],
+                'options': {
+                    'chunk_size_bytes': cli_options['chunk_size_bytes'],
+                    'delete_extraneous_destination': cli_options['delete'],
+                    'mode': cli_options['mode'],
+                    'overwrite': cli_options['overwrite'],
+                    'recursive': cli_options['recursive'],
+                    'rsa_private_key': cli_options['rsa_private_key'],
+                    'rsa_private_key_passphrase': cli_options[
+                        'rsa_private_key_passphrase'],
+                    'rsa_public_key': cli_options['rsa_public_key'],
+                    'skip_on': {
+                        'filesize_match': cli_options[
+                            'skip_on_filesize_match'],
+                        'lmt_ge': cli_options['skip_on_lmt_ge'],
+                        'md5_match': cli_options['skip_on_md5_match'],
+                    },
+                    'store_file_attributes': cli_options['file_attributes'],
+                    'store_file_md5': cli_options['file_md5'],
+                    'strip_components': cli_options['strip_components'],
+                },
+            }
+        elif action == TransferAction.Download:
+            arg = {
+                'source': [sa_rp],
+                'destination': local_resource,
+                'include': cli_options['include'],
+                'exclude': cli_options['exclude'],
+                'options': {
+                    'check_file_md5': cli_options['file_md5'],
+                    'delete_extraneous_destination': cli_options['delete'],
+                    'mode': cli_options['mode'],
+                    'overwrite': cli_options['overwrite'],
+                    'recursive': cli_options['recursive'],
+                    'rsa_private_key': cli_options['rsa_private_key'],
+                    'rsa_private_key_passphrase': cli_options[
+                        'rsa_private_key_passphrase'],
+                    'restore_file_attributes': cli_options['file_attributes'],
+                    'skip_on': {
+                        'filesize_match': cli_options[
+                            'skip_on_filesize_match'],
+                        'lmt_ge': cli_options['skip_on_lmt_ge'],
+                        'md5_match': cli_options['skip_on_md5_match'],
+                    },
+                },
+            }
+        elif action == TransferAction.Synccopy:
+            if is_none_or_empty(sync_copy_dest_storage_account):
+                raise RuntimeError(
+                    'must specify a destination storage account')
+            arg = {
+                'source': sa_rp,
+                'destination': [
+                    {
+                        sync_copy_dest_storage_account:
+                        sync_copy_dest_remote_path
+                    }
+                ],
+                'include': cli_options['include'],
+                'exclude': cli_options['exclude'],
+                'options': {
+                    'mode': cli_options['mode'],
+                    'overwrite': cli_options['overwrite'],
+                    'skip_on': {
+                        'filesize_match': cli_options[
+                            'skip_on_filesize_match'],
+                        'lmt_ge': cli_options['skip_on_lmt_ge'],
+                        'md5_match': cli_options['skip_on_md5_match'],
+                    },
+                },
+            }
+            try:
+                destkey = cli_options['sync_copy_dest_access_key']
+                if is_none_or_empty(destkey):
+                    raise KeyError()
+            except KeyError:
+                try:
+                    destkey = cli_options['sync_copy_dest_sas']
+                    if is_none_or_empty(destkey):
+                        raise KeyError()
+                except KeyError:
+                    raise RuntimeError(
+                        'destination access key or sas must be provided')
+            azstorage['accounts'][
+                cli_options['sync_copy_dest_storage_account']] = destkey
+            del destkey
+        cli_options[action.name.lower()] = arg
+        cli_options['azure_storage'] = azstorage
+
+
+def merge_settings(config, cli_options):
+    # type: (dict, dict) -> None
+    """Merge CLI options into main config
+    :param dict config: config dict
+    :param dict cli_options: cli options
+    """
+    action = cli_options['_action']
+    if (action != TransferAction.Upload.name.lower() and
+            action != TransferAction.Download.name.lower() and
+            action == TransferAction.Synccopy.name.lower()):
+        raise ValueError('invalid action: {}'.format(action))
+    # create action options
+    if action not in config:
+        config[action] = []
+    # merge any argument options
+    if action in cli_options:
+        config[action].append(cli_options[action])
+    # merge credentials
+    if 'azure_storage' in cli_options:
+        if 'azure_storage' not in config:
+            config['azure_storage'] = {}
+        config['azure_storage'] = merge_dict(
+            config['azure_storage'], cli_options['azure_storage'])
+    # merge general options
+    if 'options' not in config:
+        config['options'] = {}
+    try:
+        config['options']['verbose'] = cli_options['verbose']
+    except KeyError:
+        pass
+    try:
+        config['options']['timeout_sec'] = cli_options['timeout']
+    except KeyError:
+        pass
diff --git a/setup.py b/setup.py
index 83d5abb..6ea9896 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,10 @@
 packages = [
     'blobxfer',
     'blobxfer.blob',
+    'blobxfer.blob.append',
     'blobxfer.blob.block',
+    'blobxfer.blob.page',
+    'blobxfer.file',
     'blobxfer_cli',
 ]
 
@@ -45,6 +48,9 @@
     'ruamel.yaml==0.13.11',
 ]
 
+if sys.version_info < (3, 4):
+    install_requires.append('enum34')
+
 if sys.version_info < (3, 5):
     install_requires.append('pathlib2')
     install_requires.append('scandir')
diff --git a/tests/test_blobxfer_blob_append_operations.py b/tests/test_blobxfer_blob_append_operations.py
new file mode 100644
index 0000000..b4ad982
--- /dev/null
+++ b/tests/test_blobxfer_blob_append_operations.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+"""Tests for models"""
+
+# stdlib imports
+# non-stdlib imports
+import azure.storage
+import pytest
+# local imports
+import blobxfer.models as models
+# module under test
+import blobxfer.blob.append.operations as ops
+
+
+def test_create_client():
+    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.AppendBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSharedKeyAuthentication)
+
+    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.AppendBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSASAuthentication)
diff --git a/tests/test_blobxfer_blob_block_operations.py b/tests/test_blobxfer_blob_block_operations.py
new file mode 100644
index 0000000..dc83b8b
--- /dev/null
+++ b/tests/test_blobxfer_blob_block_operations.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+"""Tests for models"""
+
+# stdlib imports
+# non-stdlib imports
+import azure.storage
+import pytest
+# local imports
+import blobxfer.models as models
+# module under test
+import blobxfer.blob.block.operations as ops
+
+
+def test_create_client():
+    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.BlockBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSharedKeyAuthentication)
+
+    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.BlockBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSASAuthentication)
diff --git a/tests/test_blobxfer_blob_page_operations.py b/tests/test_blobxfer_blob_page_operations.py
new file mode 100644
index 0000000..8ae5989
--- /dev/null
+++ b/tests/test_blobxfer_blob_page_operations.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+"""Tests for models"""
+
+# stdlib imports
+# non-stdlib imports
+import azure.storage
+import pytest
+# local imports
+import blobxfer.models as models
+# module under test
+import blobxfer.blob.page.operations as ops
+
+
+def test_create_client():
+    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.PageBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSharedKeyAuthentication)
+
+    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.blob.PageBlobService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSASAuthentication)
diff --git a/tests/test_blobxfer_file_operations.py b/tests/test_blobxfer_file_operations.py
new file mode 100644
index 0000000..56c4b95
--- /dev/null
+++ b/tests/test_blobxfer_file_operations.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+"""Tests for models"""
+
+# stdlib imports
+# non-stdlib imports
+import azure.storage
+import pytest
+# local imports
+import blobxfer.models as models
+# module under test
+import blobxfer.file.operations as ops
+
+
+def test_create_client():
+    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.file.FileService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSharedKeyAuthentication)
+
+    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    client = ops.create_client(sa)
+    assert client is not None
+    assert isinstance(client, azure.storage.file.FileService)
+    assert isinstance(
+        client.authentication,
+        azure.storage._auth._StorageSASAuthentication)
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
new file mode 100644
index 0000000..2de4bc2
--- /dev/null
+++ b/tests/test_blobxfer_models.py
@@ -0,0 +1,132 @@
+# coding=utf-8
+"""Tests for models"""
+
+# stdlib imports
+import os
+# non-stdlib imports
+import pytest
+# module under test
+import blobxfer.models
+
+
+def test_storage_credentials():
+    creds = blobxfer.models.AzureStorageCredentials()
+    creds.add_storage_account('sa1', 'somekey1', 'endpoint')
+
+    a = creds.get_storage_account('sa1')
+    assert a.name == 'sa1'
+    assert a.key == 'somekey1'
+    assert a.endpoint == 'endpoint'
+
+    with pytest.raises(KeyError):
+        a = creds.get_storage_account('sa2')
+
+    with pytest.raises(ValueError):
+        creds.add_storage_account('sa1', 'somekeyxx', 'endpoint')
+
+    creds.add_storage_account('sa2', 'somekey2', 'endpoint2')
+    a = creds.get_storage_account('sa1')
+    b = creds.get_storage_account('sa2')
+    assert a.name == 'sa1'
+    assert a.key == 'somekey1'
+    assert a.endpoint == 'endpoint'
+    assert b.name == 'sa2'
+    assert b.key == 'somekey2'
+    assert b.endpoint == 'endpoint2'
+
+
+def test_key_is_sas():
+    a = blobxfer.models.AzureStorageAccount('name', 'abcdef', 'endpoint')
+    assert not a.is_sas
+
+    a = blobxfer.models.AzureStorageAccount('name', 'abcdef&blah', 'endpoint')
+    assert not a.is_sas
+
+    a = blobxfer.models.AzureStorageAccount('name', '?abcdef', 'endpoint')
+    assert a.is_sas
+
+    a = blobxfer.models.AzureStorageAccount(
+        'name', '?sv=0&sr=1&sig=2', 'endpoint')
+    assert a.is_sas
+
+    a = blobxfer.models.AzureStorageAccount(
+        'name', 'sv=0&sr=1&sig=2', 'endpoint')
+    assert a.is_sas
+
+    a = blobxfer.models.AzureStorageAccount(
+        'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint')
+    assert a.is_sas
+
+
+def test_localsourcepaths_files(tmpdir):
+    tmpdir.mkdir('abc')
+    tmpdir.join('moo.cow').write('z')
+    abcpath = tmpdir.join('abc')
+    abcpath.join('hello.txt').write('hello')
+    abcpath.join('blah.x').write('x')
+    abcpath.join('blah.y').write('x')
+    abcpath.join('blah.z').write('x')
+    abcpath.mkdir('def')
+    defpath = abcpath.join('def')
+    defpath.join('world.txt').write('world')
+    defpath.join('moo.cow').write('y')
+
+    a = blobxfer.models.LocalSourcePaths()
+    a.add_include('*.txt')
+    a.add_includes(['moo.cow', '*blah*'])
+    with pytest.raises(ValueError):
+        a.add_includes('abc')
+    a.add_exclude('**/blah.x')
+    a.add_excludes(['world.txt'])
+    with pytest.raises(ValueError):
+        a.add_excludes('abc')
+    a.add_path(str(tmpdir))
+    a_set = set()
+    for file in a.files():
+        sfile = str(file.parent_path / file.relative_path)
+        a_set.add(sfile)
+
+    assert str(abcpath.join('blah.x')) not in a_set
+    assert str(defpath.join('world.txt')) in a_set
+    assert str(defpath.join('moo.cow')) not in a_set
+
+    b = blobxfer.models.LocalSourcePaths()
+    b.add_includes(['moo.cow', '*blah*'])
+    b.add_include('*.txt')
+    b.add_excludes(['world.txt'])
+    b.add_exclude('**/blah.x')
+    b.add_paths([str(tmpdir)])
+    for file in a.files():
+        sfile = str(file.parent_path / file.relative_path)
+        assert sfile in a_set
+
+
+def test_localdestinationpath(tmpdir):
+    tmpdir.mkdir('1')
+    path = tmpdir.join('1')
+
+    a = blobxfer.models.LocalDestinationPath(str(path))
+    a.is_dir = True
+    assert str(a.path) == str(path)
+    assert a.is_dir
+
+    a.ensure_path_exists()
+    assert os.path.exists(str(a.path))
+
+    b = blobxfer.models.LocalDestinationPath()
+    b.is_dir = False
+    b.path = str(path)
+    with pytest.raises(RuntimeError):
+        b.ensure_path_exists()
+    assert not b.is_dir
+
+    path2 = tmpdir.join('2')
+    path3 = path2.join('3')
+    c = blobxfer.models.LocalDestinationPath(str(path3))
+    with pytest.raises(RuntimeError):
+        c.ensure_path_exists()
+    c.is_dir = False
+    c.ensure_path_exists()
+    assert os.path.exists(str(path2))
+    assert os.path.isdir(str(path2))
+    assert not c.is_dir
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index bc17d06..37e070c 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -83,7 +83,7 @@ def test_scantree(tmpdir):
     defpath = abcpath.join('def')
     defpath.join('world.txt').write('world')
     found = set()
-    for de in blobxfer.util.scantree(str(tmpdir.dirpath())):
+    for de in blobxfer.util.scantree(str(tmpdir)):
         if de.name != '.lock':
             found.add(de.name)
     assert 'hello.txt' in found
@@ -103,7 +103,10 @@ def test_get_mime_type():
 def test_base64_encode_as_string():
     a = b'abc'
     enc = blobxfer.util.base64_encode_as_string(a)
-    assert type(enc) != bytes
+    if blobxfer.util.on_python2():
+        assert type(enc) == str
+    else:
+        assert type(enc) != bytes
     dec = blobxfer.util.base64_decode_string(enc)
     assert a == dec
 
diff --git a/tox.ini b/tox.ini
index f2b110d..58a6df6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py35
+envlist = py27, py35
 
 [testenv]
 deps = -rtest_requirements.txt

From f0a4209e438420171927e742e03c686e1ab43533 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 14 Feb 2017 14:57:29 -0800
Subject: [PATCH 03/47] Continue download model evolution with tests

---
 blobxfer/__init__.py              |   5 +
 blobxfer/api.py                   |   1 -
 blobxfer/blob/operations.py       |   2 +-
 blobxfer/file/operations.py       |  37 +++++--
 blobxfer/models.py                | 177 ++++++++++++++++++++++++------
 blobxfer/operations.py            |  38 ++++++-
 blobxfer/util.py                  |  38 ++++++-
 cli/cli.py                        |  14 ++-
 cli/settings.py                   | 104 ++++++++++++++++--
 tests/test_blobxfer.py            |  13 +++
 tests/test_blobxfer_models.py     |  61 +++++++++-
 tests/test_blobxfer_operations.py |  83 ++++++++++++++
 tests/test_blobxfer_util.py       |  43 ++++++++
 13 files changed, 552 insertions(+), 64 deletions(-)
 create mode 100644 tests/test_blobxfer.py
 create mode 100644 tests/test_blobxfer_operations.py

diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py
index 157d59f..e05319c 100644
--- a/blobxfer/__init__.py
+++ b/blobxfer/__init__.py
@@ -23,3 +23,8 @@
 # DEALINGS IN THE SOFTWARE.
 
 from .version import __version__  # noqa
+
+# monkeypatch User-Agent string
+import azure.storage
+azure.storage._constants.USER_AGENT_STRING = 'blobxfer/{} {}'.format(
+    __version__, azure.storage._constants.USER_AGENT_STRING)
diff --git a/blobxfer/api.py b/blobxfer/api.py
index 45f2145..65e91ad 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -32,7 +32,6 @@
 # non-stdlib imports
 # local imports
 
-from .blob.operations import check_if_single_blob  # noqa
 from .blob.append.operations import (  # noqa
     create_client as create_append_blob_client
 )
diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index 567d9aa..126c267 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -40,7 +40,7 @@
 
 def check_if_single_blob(client, container, prefix):
     # type: (azure.storage.blob.BaseBlobService, str, str) -> bool
-    """List append blobs in path
+    """Check if prefix is a single blob or multiple blobs
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index 38705d6..3501b65 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -30,10 +30,14 @@
 )
 # stdlib imports
 import logging
+try:
+    import pathlib2 as pathlib
+except ImportError:
+    import pathlib
 # non-stdlib imports
+import azure.common
 from azure.storage.file import FileService
 # local imports
-from ..util import is_none_or_empty
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -59,18 +63,30 @@ def create_client(storage_account):
     return client
 
 
-def check_if_single_file(client, container, prefix):
-    # type: (azure.storage.blob.BaseBlobService, str, str) -> bool
-    """List append blobs in path
-    :param azure.storage.blob.BaseBlobService client: blob client
-    :param str container: container
+def parse_file_path(filepath):
+    # type: (pathlib.Path) -> tuple
+    if not isinstance(filepath, pathlib.Path):
+        filepath = pathlib.Path(filepath)
+    dirname = '/'.join(filepath.parts[:len(filepath.parts) - 1])
+    return (dirname, filepath.parts[-1])
+
+
+def check_if_single_file(client, fileshare, prefix):
+    # type: (azure.storage.file.FileService, str, str) -> bool
+    """Check if prefix is a single file or multiple files
+    :param FileService client: blob client
+    :param str fileshare: file share name
     :param str prefix: path prefix
     :rtype: bool
-    :return: if prefix in container is a single blob
+    :return: if prefix in fileshare is a single file
     """
-    blobs = client.list_blobs(
-        container_name=container, prefix=prefix, num_results=1)
-    return is_none_or_empty(blobs.next_marker)
+    dirname, fname = parse_file_path(prefix)
+    try:
+        client.get_file_properties(
+            share_name=fileshare, directory_name=dirname, file_name=fname)
+    except azure.common.AzureMissingResourceHttpError:
+        return False
+    return True
 
 
 def list_blobs(client, container, prefix, mode):
@@ -81,5 +97,4 @@ def list_blobs(client, container, prefix, mode):
     :param str container: container
     :param str prefix: path prefix
     """
-
     pass
diff --git a/blobxfer/models.py b/blobxfer/models.py
index d511e15..1e7e583 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -41,7 +41,16 @@
     import pathlib
 # non-stdlib imports
 # local imports
-from .util import scantree
+from .api import (
+    create_append_blob_client,
+    create_block_blob_client,
+    create_file_client,
+    create_page_blob_client,
+)
+from .util import (
+    normalize_azure_path,
+    scantree,
+)
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -81,15 +90,12 @@ class AzureStorageModes(enum.Enum):
     'UploadOptions', [
         'chunk_size_bytes',
         'delete_extraneous_destination',
-        'exclude',
-        'include',
         'mode',
         'overwrite',
         'recursive',
         'rsa_private_key',
         'rsa_private_key_passphrase',
         'rsa_public_key',
-        'skip_on',
         'store_file_attributes',
         'store_file_md5',
         'strip_components',
@@ -101,15 +107,12 @@ class AzureStorageModes(enum.Enum):
     'DownloadOptions', [
         'check_file_md5',
         'delete_extraneous_destination',
-        'exclude',
-        'include',
         'mode',
         'overwrite',
         'recursive',
         'restore_file_attributes',
         'rsa_private_key',
-        'rsa_private_key_passphrase',
-        'skip_on',
+        'rsa_private_key_passphrase'
     ]
 )
 SyncCopyOptions = collections.namedtuple(
@@ -126,15 +129,6 @@ class AzureStorageModes(enum.Enum):
         'parent_path', 'relative_path'
     ]
 )
-AzureDestinationOptions = collections.namedtuple(
-    'AzureDestinationOptions', [
-        'path',
-    ]
-)
-AzureSourceOptions = collections.namedtuple(
-    'AzureSourceOptions', [
-    ]
-)
 
 
 class AzureStorageCredentials(object):
@@ -177,10 +171,15 @@ def __init__(self, name, key, endpoint):
         :param str key: storage key or sas
         :param str endpoint: endpoint
         """
+        self._append_blob_client = None
+        self._block_blob_client = None
+        self._file_client = None
+        self._page_blob_client = None
         self.name = name
         self.key = key
         self.endpoint = endpoint
         self.is_sas = self._key_is_sas(self.key)
+        self._create_clients()
 
     @staticmethod
     def _key_is_sas(key):
@@ -205,6 +204,56 @@ def _key_is_sas(key):
                 return True
         return False
 
+    def _create_clients(self):
+        # type: (AzureStorageAccount) -> None
+        """Create Azure Storage clients
+        :param AzureStorageAccount self: this
+        """
+        self._append_blob_client = create_append_blob_client(self)
+        self._block_blob_client = create_block_blob_client(self)
+        self._file_client = create_file_client(self)
+        self._page_blob_client = create_page_blob_client(self)
+
+    @property
+    def append_blob_client(self):
+        # type: (AzureStorageAccount) -> azure.storage.blob.AppendBlobService
+        """Get append blob client
+        :param AzureStorageAccount self: this
+        :rtype: azure.storage.blob.AppendBlobService
+        :return: append blob client
+        """
+        return self._append_blob_client
+
+    @property
+    def block_blob_client(self):
+        # type: (AzureStorageAccount) -> azure.storage.blob.BlockBlobService
+        """Get block blob client
+        :param AzureStorageAccount self: this
+        :rtype: azure.storage.blob.BlockBlobService
+        :return: block blob client
+        """
+        return self._block_blob_client
+
+    @property
+    def file_client(self):
+        # type: (AzureStorageAccount) -> azure.storage.file.FileService
+        """Get file client
+        :param AzureStorageAccount self: this
+        :rtype: azure.storage.file.FileService
+        :return: file client
+        """
+        return self._file_client
+
+    @property
+    def page_blob_client(self):
+        # type: (AzureStorageAccount) -> azure.storage.blob.PageBlobService
+        """Get page blob client
+        :param AzureStorageAccount self: this
+        :rtype: azure.storage.blob.PageBlobService
+        :return: page blob client
+        """
+        return self._page_blob_client
+
 
 class _BaseSourcePaths(object):
     """Base Source Paths"""
@@ -217,6 +266,16 @@ def __init__(self):
         self._exclude = None
         self._paths = []
 
+    @property
+    def paths(self):
+        # type: (_BaseSourcePaths) -> List[pathlib.Path]
+        """Stored paths
+        :param _BaseSourcePaths self: this
+        :rtype: list
+        :return: list of pathlib.Path
+        """
+        return self._paths
+
     def add_include(self, incl):
         # type: (_BaseSourcePaths, str) -> None
         """Add an include
@@ -271,7 +330,10 @@ def add_path(self, path):
         :param _BaseSourcePaths self: this
         :param str path: path to add
         """
-        self._paths.append(pathlib.Path(path))
+        if isinstance(path, pathlib.Path):
+            self._paths.append(path)
+        else:
+            self._paths.append(pathlib.Path(path))
 
     def add_paths(self, paths):
         # type: (_BaseSourcePaths, list) -> None
@@ -390,28 +452,79 @@ def ensure_path_exists(self):
                     mode=0o750, parents=True, exist_ok=True)
 
 
-class AzureSourcePaths(_BaseSourcePaths):
-    def __init__(self, mode):
-        super.__init__()
-        self._mode = mode
+class DownloadSpecification(object):
+    """DownloadSpecification"""
+    def __init__(
+            self, download_options, skip_on_options, local_destination_path):
+        # type: (DownloadSpecification, DownloadOptions, SkipOnOptions,
+        #        LocalDestinationPath) -> None
+        """Ctor for DownloadSpecification
+        :param DownloadSepcification self: this
+        :param DownloadOptions download_options: download options
+        :param SkipOnOptions skip_on_options: skip on options
+        :param LocalDestinationPath local_destination_path: local dest path
+        """
+        self.options = download_options
+        self.skip_on = skip_on_options
+        self.destination = local_destination_path
+        self.sources = []
+
+    def add_azure_source_path(self, source):
+        # type: (DownloadSpecification, AzureSourcePath) -> None
+        """Add an Azure Source Path
+        :param DownloadSepcification self: this
+        :param AzureSourcePath source: Azure source path to add
+        """
+        self.sources.append(source)
 
-    def set_clients(self, append, block, file, page):
-        pass
 
-    def files(self):
-        if self._mode == AzureStorageModes.Auto:
+class AzureSourcePath(_BaseSourcePaths):
+    """AzureSourcePath"""
+    def __init__(self):
+        # type: (AzureSourcePath) -> None
+        """Ctor for AzureSourcePath
+        :param AzureSourcePath self: this
+        """
+        super(AzureSourcePath, self).__init__()
+        self._path_map = {}
+
+    def add_path_with_storage_account(self, remote_path, storage_account):
+        # type: (AzureSourcePath, str, str) -> None
+        """Add a path with an associated storage account
+        :param AzureSourcePath self: this
+        :param str remote_path: remote path
+        :param str storage_account: storage account to associate with path
+        """
+        if len(self._path_map) >= 1:
+            raise RuntimeError(
+                'cannot add multiple remote paths to AzureSourcePath objects')
+        rpath = normalize_azure_path(remote_path)
+        self.add_path(rpath)
+        self._path_map[rpath] = storage_account
+
+    def lookup_storage_account(self, remote_path):
+        # type: (AzureSourcePath, str) -> str
+        """Lookup the storage account associated with the remote path
+        :param AzureSourcePath self: this
+        :param str remote_path: remote path
+        :rtype: str
+        :return: storage account associated with path
+        """
+        return self._path_map[normalize_azure_path(remote_path)]
+
+    def files(self, mode):
+        if mode == AzureStorageModes.Auto:
             pass
-        elif self._mode == AzureStorageModes.Append:
+        elif mode == AzureStorageModes.Append:
             pass
-        elif self._mode == AzureStorageModes.Block:
+        elif mode == AzureStorageModes.Block:
             pass
-        elif self._mode == AzureStorageModes.File:
+        elif mode == AzureStorageModes.File:
             pass
-        elif self._mode == AzureStorageModes.Page:
+        elif mode == AzureStorageModes.Page:
             pass
         else:
-            raise RuntimeError('unknown Azure Storage Mode: {}'.format(
-                self._mode))
+            raise RuntimeError('unknown Azure Storage Mode: {}'.format(mode))
 
     def _append_files(self):
         for _path in self._paths:
diff --git a/blobxfer/operations.py b/blobxfer/operations.py
index b073367..476e597 100644
--- a/blobxfer/operations.py
+++ b/blobxfer/operations.py
@@ -32,7 +32,43 @@
 import logging
 # non-stdlib imports
 # local imports
-from .models import FileDescriptor
+from .models import (  # noqa
+    AzureStorageCredentials,
+    AzureStorageModes,
+    DownloadSpecification,
+    FileDescriptor,
+)
+from .blob.operations import check_if_single_blob
+from .file.operations import check_if_single_file
+from .util import explode_azure_path
+
+
+def ensure_local_destination(creds, spec):
+    """Ensure a local destination path given a download spec
+    :param AzureStorageCredentials creds: creds
+    :param DownloadSpecification spec: download spec
+    """
+    # ensure destination path is writable given the source
+    if len(spec.sources) < 1:
+        raise RuntimeError('no sources to download from specified')
+    # set is_dir for destination
+    spec.destination.is_dir = True
+    if len(spec.sources) == 1:
+        # we need to query the source to see if this is a directory
+        rpath = str(spec.sources[0].paths[0])
+        sa = creds.get_storage_account(
+            spec.sources[0].lookup_storage_account(rpath))
+        cont, dir = explode_azure_path(rpath)
+        if spec.options.mode == AzureStorageModes.File:
+            if check_if_single_file(sa.file_client, cont, dir):
+                spec.destination.is_dir = False
+        else:
+            if check_if_single_blob(sa.block_blob_client, cont, dir):
+                spec.destination.is_dir = False
+    logging.debug('dest is_dir={} for {} specs'.format(
+        spec.destination.is_dir, len(spec.sources)))
+    # ensure destination path
+    spec.destination.ensure_path_exists()
 
 
 def file_chunks(fd, chunk_size):
diff --git a/blobxfer/util.py b/blobxfer/util.py
index f498ff6..9b778bd 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -39,12 +39,12 @@
     from os import scandir as scandir
 except ImportError:  # noqa
     from scandir import scandir as scandir
-import sys
+import re
 # non-stdlib imports
+import future.utils
 # local imports
 
 # global defines
-_PY2 = sys.version_info.major == 2
 _PAGEBLOB_BOUNDARY = 512
 
 
@@ -54,7 +54,7 @@ def on_python2():
     :rtype: bool
     :return: if on Python2
     """
-    return _PY2
+    return future.utils.PY2
 
 
 def setup_logger(logger):  # noqa
@@ -148,7 +148,7 @@ def base64_encode_as_string(obj):  # noqa
     :rtype: str
     :return: base64 encoded string
     """
-    if _PY2:
+    if on_python2():
         return base64.b64encode(obj)
     else:
         return str(base64.b64encode(obj), 'ascii')
@@ -211,3 +211,33 @@ def page_align_content_length(length):
     if mod != 0:
         return length + (_PAGEBLOB_BOUNDARY - mod)
     return length
+
+
+def normalize_azure_path(path):
+    # type: (str) -> str
+    """Normalize remote path (strip slashes and use forward slashes)
+    :param str path: path to normalize
+    :rtype: str
+    :return: normalized path
+    """
+    if is_none_or_empty(path):
+        raise ValueError('provided path is invalid')
+    _path = path.strip('/').strip('\\')
+    return '/'.join(re.split('/|\\\\', _path))
+
+
+def explode_azure_path(path):
+    # type: (str) -> Tuple[str, str]
+    """Explodes an azure path into a container or fileshare and the
+    remaining virtual path
+    :param str path: path to explode
+    :rtype: tuple
+    :return: container, vpath
+    """
+    rpath = normalize_azure_path(path).split('/')
+    container = rpath[0]
+    if len(rpath) > 1:
+        rpath = '/'.join(rpath[1:])
+    else:
+        rpath = ''
+    return container, rpath
diff --git a/cli/cli.py b/cli/cli.py
index cb23a31..1ec99d4 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -41,6 +41,7 @@
 import blobxfer.api
 import blobxfer.util
 # local imports
+import download as dl
 import settings
 
 # create logger
@@ -57,10 +58,8 @@ def __init__(self):
         self.yaml_config = None
         self.config = {}
         self.cli_options = {}
-        self.block_blob_client = None
-        self.page_blob_client = None
-        self.append_blob_client = None
-        self.smb_file_client = None
+        self.credentials = None
+        self.general_options = None
 
     def initialize(self):
         # type: (CliContext) -> None
@@ -68,6 +67,9 @@ def initialize(self):
         :param CliContext self: this
         """
         self._init_config()
+        self.credentials = settings.create_azure_storage_credentials(
+            self.config)
+        self.general_options = settings.create_general_options(self.config)
 
     def _read_yaml_file(self, yaml_file):
         # type: (CliContext, pathlib.Path) -> None
@@ -588,7 +590,9 @@ def download(ctx, local_resource, storage_account, remote_path):
         ctx.cli_options, settings.TransferAction.Download, local_resource,
         storage_account, remote_path)
     ctx.initialize()
-    raise NotImplementedError()
+    specs = settings.create_download_specifications(ctx.config)
+    for spec in specs:
+        dl.download(ctx.general_options, ctx.credentials, spec)
 
 
 @cli.command('synccopy')
diff --git a/cli/settings.py b/cli/settings.py
index 9d54d42..ad71bf7 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -33,6 +33,7 @@
 import enum
 # non-stdlib imports
 # local imports
+import blobxfer.models
 from blobxfer.util import is_none_or_empty, is_not_empty, merge_dict
 
 
@@ -201,11 +202,98 @@ def merge_settings(config, cli_options):
     # merge general options
     if 'options' not in config:
         config['options'] = {}
-    try:
-        config['options']['verbose'] = cli_options['verbose']
-    except KeyError:
-        pass
-    try:
-        config['options']['timeout_sec'] = cli_options['timeout']
-    except KeyError:
-        pass
+    config['options']['progress_bar'] = cli_options['progress_bar']
+    config['options']['timeout_sec'] = cli_options['timeout']
+    config['options']['verbose'] = cli_options['verbose']
+
+
+def create_azure_storage_credentials(config):
+    # type: (dict) -> blobxfer.models.AzureStorageCredentials
+    """Create an AzureStorageCredentials object from configuration
+    :param dict config: config dict
+    :rtype: blobxfer.models.AzureStorageCredentials
+    :return: credentials object
+    """
+    creds = blobxfer.models.AzureStorageCredentials()
+    endpoint = config['azure_storage']['endpoint']
+    for name in config['azure_storage']['accounts']:
+        key = config['azure_storage']['accounts'][name]
+        creds.add_storage_account(name, key, endpoint)
+    return creds
+
+
+def create_general_options(config):
+    # type: (dict) -> blobxfer.models.GeneralOptions
+    """Create a GeneralOptions object from configuration
+    :param dict config: config dict
+    :rtype: blobxfer.models.GeneralOptions
+    :return: general options object
+    """
+    return blobxfer.models.GeneralOptions(
+        progress_bar=config['options']['progress_bar'],
+        timeout_sec=config['options']['timeout_sec'],
+        verbose=config['options']['verbose'],
+    )
+
+
+def create_download_specifications(config):
+    # type: (dict) -> List[blobxfer.models.DownloadSpecification]
+    """Create a list of DownloadSpecification objects from configuration
+    :param dict config: config dict
+    :rtype: list
+    :return: list of DownloadSpecification objects
+    """
+    specs = []
+    for conf in config['download']:
+        # create download options
+        confmode = conf['options']['mode'].lower()
+        if confmode == 'auto':
+            mode = blobxfer.models.AzureStorageModes.Auto
+        elif confmode == 'append':
+            mode = blobxfer.models.AzureStorageModes.Append
+        elif confmode == 'block':
+            mode = blobxfer.models.AzureStorageModes.Block
+        elif confmode == 'file':
+            mode == blobxfer.models.AzureStorageModes.File
+        elif confmode == 'page':
+            mode == blobxfer.models.AzureStorageModes.Page
+        else:
+            raise ValueError('unknown mode: {}'.format(confmode))
+        ds = blobxfer.models.DownloadSpecification(
+            download_options=blobxfer.models.DownloadOptions(
+                check_file_md5=conf['options']['check_file_md5'],
+                delete_extraneous_destination=conf[
+                    'options']['delete_extraneous_destination'],
+                mode=mode,
+                overwrite=conf['options']['overwrite'],
+                recursive=conf['options']['recursive'],
+                restore_file_attributes=conf[
+                    'options']['restore_file_attributes'],
+                rsa_private_key=conf['options']['rsa_private_key'],
+                rsa_private_key_passphrase=conf[
+                    'options']['rsa_private_key_passphrase'],
+            ),
+            skip_on_options=blobxfer.models.SkipOnOptions(
+                filesize_match=conf['options']['skip_on']['filesize_match'],
+                lmt_ge=conf['options']['skip_on']['lmt_ge'],
+                md5_match=conf['options']['skip_on']['md5_match'],
+            ),
+            local_destination_path=blobxfer.models.LocalDestinationPath(
+                conf['destination']
+            )
+        )
+        # create remote source paths
+        for src in conf['source']:
+            if len(src) != 1:
+                raise RuntimeError(
+                    'invalid number of source pairs specified per entry')
+            sa = next(iter(src))
+            asp = blobxfer.models.AzureSourcePath()
+            asp.add_path_with_storage_account(src[sa], sa)
+            if is_not_empty(conf['include']):
+                asp.add_includes(conf['include'])
+            if is_not_empty(conf['exclude']):
+                asp.add_excludes(conf['exclude'])
+            ds.add_azure_source_path(asp)
+        specs.append(ds)
+    return specs
diff --git a/tests/test_blobxfer.py b/tests/test_blobxfer.py
new file mode 100644
index 0000000..f64c084
--- /dev/null
+++ b/tests/test_blobxfer.py
@@ -0,0 +1,13 @@
+# coding=utf-8
+"""Tests for miscellaneous"""
+
+# stdlib imports
+# non-stdlib imports
+import azure.storage
+# module under test
+import blobxfer.version
+
+
+def test_user_agent_monkey_patch():
+    verstr = 'blobxfer/{}'.format(blobxfer.version.__version__)
+    assert azure.storage._constants.USER_AGENT_STRING.startswith(verstr)
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 2de4bc2..df1790d 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -3,7 +3,12 @@
 
 # stdlib imports
 import os
+try:
+    import pathlib2 as pathlib
+except ImportError:
+    import pathlib
 # non-stdlib imports
+import azure.storage
 import pytest
 # module under test
 import blobxfer.models
@@ -17,6 +22,14 @@ def test_storage_credentials():
     assert a.name == 'sa1'
     assert a.key == 'somekey1'
     assert a.endpoint == 'endpoint'
+    assert isinstance(
+        a.append_blob_client, azure.storage.blob.AppendBlobService)
+    assert isinstance(
+        a.block_blob_client, azure.storage.blob.BlockBlobService)
+    assert isinstance(
+        a.file_client, azure.storage.file.FileService)
+    assert isinstance(
+        a.page_blob_client, azure.storage.blob.PageBlobService)
 
     with pytest.raises(KeyError):
         a = creds.get_storage_account('sa2')
@@ -86,6 +99,7 @@ def test_localsourcepaths_files(tmpdir):
         sfile = str(file.parent_path / file.relative_path)
         a_set.add(sfile)
 
+    assert len(a.paths) == 1
     assert str(abcpath.join('blah.x')) not in a_set
     assert str(defpath.join('world.txt')) in a_set
     assert str(defpath.join('moo.cow')) not in a_set
@@ -95,7 +109,7 @@ def test_localsourcepaths_files(tmpdir):
     b.add_include('*.txt')
     b.add_excludes(['world.txt'])
     b.add_exclude('**/blah.x')
-    b.add_paths([str(tmpdir)])
+    b.add_paths([pathlib.Path(str(tmpdir))])
     for file in a.files():
         sfile = str(file.parent_path / file.relative_path)
         assert sfile in a_set
@@ -130,3 +144,48 @@ def test_localdestinationpath(tmpdir):
     assert os.path.exists(str(path2))
     assert os.path.isdir(str(path2))
     assert not c.is_dir
+
+
+def test_azuresourcepath():
+    p = '/cont/remote/path'
+    asp = blobxfer.models.AzureSourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    with pytest.raises(RuntimeError):
+        asp.add_path_with_storage_account('x', 'x')
+
+    assert 'sa' == asp.lookup_storage_account(p)
+
+
+def test_downloadspecification():
+    ds = blobxfer.models.DownloadSpecification(
+        download_options=blobxfer.models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=blobxfer.models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+            rsa_private_key_passphrase=None,
+        ),
+        skip_on_options=blobxfer.models.SkipOnOptions(
+            filesize_match=True,
+            lmt_ge=False,
+            md5_match=True,
+        ),
+        local_destination_path=blobxfer.models.LocalDestinationPath('dest'),
+    )
+
+    asp = blobxfer.models.AzureSourcePath()
+    p = 'some/remote/path'
+    asp.add_path_with_storage_account(p, 'sa')
+
+    ds.add_azure_source_path(asp)
+
+    assert ds.options.check_file_md5
+    assert not ds.skip_on.lmt_ge
+    assert ds.destination.path == pathlib.Path('dest')
+    assert len(ds.sources) == 1
+    assert p in ds.sources[0]._path_map
+    assert ds.sources[0]._path_map[p] == 'sa'
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
new file mode 100644
index 0000000..4ddc104
--- /dev/null
+++ b/tests/test_blobxfer_operations.py
@@ -0,0 +1,83 @@
+# coding=utf-8
+"""Tests for operations"""
+
+# stdlib imports
+from mock import (
+    MagicMock,
+    patch,
+)
+# non-stdlib imports
+import pytest
+# local imports
+import blobxfer.models
+# module under test
+import blobxfer.operations as ops
+
+
+@patch('blobxfer.operations.check_if_single_file')
+@patch('blobxfer.operations.check_if_single_blob')
+def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
+    downdir = tmpdir.join('down')
+
+    # non-file tests
+    ds = blobxfer.models.DownloadSpecification(
+        download_options=blobxfer.models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=blobxfer.models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+            rsa_private_key_passphrase=None,
+        ),
+        skip_on_options=MagicMock(),
+        local_destination_path=blobxfer.models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+
+    with pytest.raises(RuntimeError):
+        ops.ensure_local_destination(MagicMock(), ds)
+
+    asp = blobxfer.models.AzureSourcePath()
+    p = 'cont/remote/path'
+    asp.add_path_with_storage_account(p, 'sa')
+
+    ds.add_azure_source_path(asp)
+
+    patched_blob.return_value = False
+    ops.ensure_local_destination(MagicMock(), ds)
+    assert ds.destination.is_dir
+
+    patched_blob.return_value = True
+    with pytest.raises(RuntimeError):
+        ops.ensure_local_destination(MagicMock(), ds)
+
+    # file tests
+    ds = blobxfer.models.DownloadSpecification(
+        download_options=blobxfer.models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=blobxfer.models.AzureStorageModes.File,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+            rsa_private_key_passphrase=None,
+        ),
+        skip_on_options=MagicMock(),
+        local_destination_path=blobxfer.models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+
+    ds.add_azure_source_path(asp)
+
+    patched_file.return_value = False
+    ops.ensure_local_destination(MagicMock(), ds)
+    assert ds.destination.is_dir
+
+    patched_file.return_value = True
+    with pytest.raises(RuntimeError):
+        ops.ensure_local_destination(MagicMock(), ds)
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index 37e070c..9b6084e 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -131,6 +131,49 @@ def test_compute_md5(tmpdir):
 
 def test_page_align_content_length():
     assert 0 == blobxfer.util.page_align_content_length(0)
+    assert 512 == blobxfer.util.page_align_content_length(1)
     assert 512 == blobxfer.util.page_align_content_length(511)
     assert 512 == blobxfer.util.page_align_content_length(512)
     assert 1024 == blobxfer.util.page_align_content_length(513)
+    assert 1024 == blobxfer.util.page_align_content_length(1023)
+    assert 1024 == blobxfer.util.page_align_content_length(1024)
+    assert 1536 == blobxfer.util.page_align_content_length(1025)
+
+
+def test_normalize_azure_path():
+    a = '\\cont\\r1\\r2\\r3\\'
+    b = blobxfer.util.normalize_azure_path(a)
+    assert b == 'cont/r1/r2/r3'
+
+    a = '/cont/r1/r2/r3/'
+    b = blobxfer.util.normalize_azure_path(a)
+    assert b == 'cont/r1/r2/r3'
+
+    a = '/cont\\r1/r2\\r3/'
+    b = blobxfer.util.normalize_azure_path(a)
+    assert b == 'cont/r1/r2/r3'
+
+    with pytest.raises(ValueError):
+        blobxfer.util.normalize_azure_path('')
+
+
+def test_explode_azure_path():
+    p = 'cont'
+    cont, rpath = blobxfer.util.explode_azure_path(p)
+    assert cont == 'cont'
+    assert rpath == ''
+
+    p = 'cont/'
+    cont, rpath = blobxfer.util.explode_azure_path(p)
+    assert cont == 'cont'
+    assert rpath == ''
+
+    p = 'cont/a/'
+    cont, rpath = blobxfer.util.explode_azure_path(p)
+    assert cont == 'cont'
+    assert rpath == 'a'
+
+    p = '/some/remote/path'
+    cont, rpath = blobxfer.util.explode_azure_path(p)
+    assert cont == 'some'
+    assert rpath == 'remote/path'

From 616c7666bc8920d014320c4c029d0000dc286b52 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 16 Feb 2017 15:00:39 -0800
Subject: [PATCH 04/47] Add some crypto models

---
 blobxfer/blob/operations.py              |  29 ++-
 blobxfer/crypto/__init__.py              |   0
 blobxfer/crypto/models.py                | 286 +++++++++++++++++++++++
 blobxfer/crypto/operations.py            | 130 +++++++++++
 blobxfer/models.py                       |  50 ++--
 blobxfer/operations.py                   |  27 ++-
 cli/settings.py                          |  33 +--
 setup.py                                 |   1 +
 tests/test_blobxfer_crypto_models.py     | 202 ++++++++++++++++
 tests/test_blobxfer_crypto_operations.py |  44 ++++
 tests/test_blobxfer_models.py            |   1 -
 tests/test_blobxfer_operations.py        |   6 +-
 12 files changed, 757 insertions(+), 52 deletions(-)
 create mode 100644 blobxfer/crypto/__init__.py
 create mode 100644 blobxfer/crypto/models.py
 create mode 100644 blobxfer/crypto/operations.py
 create mode 100644 tests/test_blobxfer_crypto_models.py
 create mode 100644 tests/test_blobxfer_crypto_operations.py

diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index 126c267..405e384 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -31,8 +31,9 @@
 # stdlib imports
 import logging
 # non-stdlib imports
+import azure.common
+import azure.storage.blob.models
 # local imports
-from ..util import is_none_or_empty
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -47,18 +48,28 @@ def check_if_single_blob(client, container, prefix):
     :rtype: bool
     :return: if prefix in container is a single blob
     """
-    blobs = client.list_blobs(
-        container_name=container, prefix=prefix, num_results=1)
-    return is_none_or_empty(blobs.next_marker)
+    try:
+        client.get_blob_properties(
+            container_name=container, blob_name=prefix)
+    except azure.common.AzureMissingResourceHttpError:
+        return False
+    return True
 
 
-def list_blobs(client, container, prefix, mode):
-    # type: (azure.storage.blob.BaseBlobService, str, str,
-    #        blobxfer.models.AzureStorageModes) -> list
+def list_blobs(client, container, prefix):
+    # type: (azure.storage.blob.BaseBlobService, str,
+    #        str) -> azure.storage.blob.models.Blob
     """List blobs in path conforming to mode
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
+    :rtype: azure.storage.blob.models.Blob
+    :return: generator of blobs
     """
-
-    pass
+    blobs = client.list_blobs(
+        container_name=container,
+        prefix=prefix,
+        include=azure.storage.blob.models.Include.METADATA,
+    )
+    for blob in blobs:
+        yield blob
diff --git a/blobxfer/crypto/__init__.py b/blobxfer/crypto/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/crypto/models.py b/blobxfer/crypto/models.py
new file mode 100644
index 0000000..4300b58
--- /dev/null
+++ b/blobxfer/crypto/models.py
@@ -0,0 +1,286 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import base64
+import collections
+import hashlib
+import hmac
+import json
+import logging
+# non-stdlib imports
+# local imports
+import blobxfer.crypto.operations
+import blobxfer.util
+
+
+# encryption constants
+_AES256_KEYLENGTH_BYTES = 32
+_AES256_BLOCKSIZE_BYTES = 16
+_HMACSHA256_DIGESTSIZE_BYTES = 32
+_AES256CBC_HMACSHA256_OVERHEAD_BYTES = (
+    _AES256_BLOCKSIZE_BYTES + _HMACSHA256_DIGESTSIZE_BYTES
+)
+
+# named tuples
+EncryptionBlobxferExtensions = collections.namedtuple(
+    'EncryptionBlobxferExtensions', [
+        'pre_encrypted_content_md5',
+    ]
+)
+EncryptionAgent = collections.namedtuple(
+    'EncryptionAgent', [
+        'encryption_algorithm',
+        'protocol',
+    ]
+)
+EncryptionAuthentication = collections.namedtuple(
+    'EncryptionAuthentication', [
+        'algorithm',
+        'message_authentication_code',
+    ]
+)
+EncryptionWrappedContentKey = collections.namedtuple(
+    'EncryptionWrappedContentKey', [
+        'algorithm',
+        'encrypted_authentication_key',
+        'encrypted_key',
+        'key_id',
+    ]
+)
+EncryptionMetadataAuthentication = collections.namedtuple(
+    'EncryptionMetadataAuthentication', [
+        'algorithm',
+        'encoding',
+        'message_authentication_code',
+    ]
+)
+
+
+class EncryptionMetadata(object):
+    """EncryptionMetadata"""
+
+    # constants
+    _ENCRYPTION_MODE = 'FullBlob'
+    _ENCRYPTION_PROTOCOL_VERSION = '1.0'
+    _ENCRYPTION_ALGORITHM = 'AES_CBC_256'
+    _ENCRYPTED_KEY_SCHEME = 'RSA-OAEP'
+    _AUTH_ALGORITHM = 'HMAC-SHA256'
+    _AUTH_ENCODING_TYPE = 'UTF-8'
+
+    _METADATA_KEY_NAME = 'encryptiondata'
+    _METADATA_KEY_AUTH_NAME = 'encryptiondata_authentication'
+
+    _JSON_KEY_ENCRYPTION_MODE = 'EncryptionMode'
+    _JSON_KEY_ALGORITHM = 'Algorithm'
+    _JSON_KEY_MAC = 'MessageAuthenticationCode'
+    _JSON_KEY_ENCRYPTION_AGENT = 'EncryptionAgent'
+    _JSON_KEY_PROTOCOL = 'Protocol'
+    _JSON_KEY_ENCRYPTION_ALGORITHM = 'EncryptionAlgorithm'
+    _JSON_KEY_INTEGRITY_AUTH = 'EncryptionAuthentication'
+    _JSON_KEY_WRAPPEDCONTENTKEY = 'WrappedContentKey'
+    _JSON_KEY_ENCRYPTED_KEY = 'EncryptedKey'
+    _JSON_KEY_ENCRYPTED_AUTHKEY = 'EncryptedAuthenticationKey'
+    _JSON_KEY_CONTENT_IV = 'ContentEncryptionIV'
+    _JSON_KEY_KEYID = 'KeyId'
+    _JSON_KEY_BLOBXFER_EXTENSIONS = 'BlobxferExtensions'
+    _JSON_KEY_PREENCRYPTED_MD5 = 'PreEncryptedContentMD5'
+
+    _JSON_KEY_AUTH_METAAUTH = 'EncryptionMetadataAuthentication'
+    _JSON_KEY_AUTH_ENCODING = 'Encoding'
+
+    def __init__(self):
+        # type: (EncryptionMetadata) -> None
+        """Ctor for EncryptionMetadata
+        :param EncryptionMetadata self: this
+        """
+        self.blobxfer_extensions = None
+        self.content_encryption_iv = None
+        self.encryption_agent = None
+        self.encryption_authentication = None
+        self.encryption_mode = None
+        self.key_wrapping_metadata = {}
+        self.wrapped_content_key = None
+        self.encryption_metadata_authentication = None
+        self._symkey = None
+        self._signkey = None
+
+    @staticmethod
+    def encryption_metadata_exists(md):
+        # type: (dict) -> bool
+        """Check if encryption metadata exists in json metadata
+        :param dict md: metadata dictionary
+        :rtype: bool
+        :return: if encryption metadata exists
+        """
+        try:
+            if blobxfer.util.is_not_empty(
+                    md[EncryptionMetadata._METADATA_KEY_NAME]):
+                return True
+        except (KeyError, TypeError):
+            pass
+        return False
+
+    def convert_from_json(self, md, blobname, rsaprivatekey):
+        # type: (EncryptionMetadata, dict, str,
+        #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey)
+        #        -> None
+        """Read metadata json into objects
+        :param EncryptionMetadata self: this
+        :param dict md: metadata dictionary
+        :param str blobname: blob name
+        :param rsaprivatekey: RSA private key
+        :type rsaprivatekey:
+            cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
+        """
+        # populate from encryption data
+        ed = json.loads(md[EncryptionMetadata._METADATA_KEY_NAME])
+        try:
+            self.blobxfer_extensions = EncryptionBlobxferExtensions(
+                pre_encrypted_content_md5=ed[
+                    EncryptionMetadata._JSON_KEY_BLOBXFER_EXTENSIONS][
+                        EncryptionMetadata._JSON_KEY_PREENCRYPTED_MD5],
+            )
+        except KeyError:
+            pass
+        self.content_encryption_iv = ed[
+            EncryptionMetadata._JSON_KEY_CONTENT_IV]
+        self.encryption_agent = EncryptionAgent(
+            encryption_algorithm=ed[
+                EncryptionMetadata._JSON_KEY_ENCRYPTION_AGENT][
+                    EncryptionMetadata._JSON_KEY_ENCRYPTION_ALGORITHM],
+            protocol=ed[
+                EncryptionMetadata._JSON_KEY_ENCRYPTION_AGENT][
+                    EncryptionMetadata._JSON_KEY_PROTOCOL],
+        )
+        if (self.encryption_agent.encryption_algorithm !=
+                EncryptionMetadata._ENCRYPTION_ALGORITHM):
+            raise RuntimeError('{}: unknown block cipher: {}'.format(
+                blobname, self.encryption_agent.encryption_algorithm))
+        if (self.encryption_agent.protocol !=
+                EncryptionMetadata._ENCRYPTION_PROTOCOL_VERSION):
+            raise RuntimeError('{}: unknown encryption protocol: {}'.format(
+                blobname, self.encryption_agent.protocol))
+        self.encryption_authentication = EncryptionAuthentication(
+            algorithm=ed[
+                EncryptionMetadata._JSON_KEY_INTEGRITY_AUTH][
+                    EncryptionMetadata._JSON_KEY_ALGORITHM],
+            message_authentication_code=ed[
+                EncryptionMetadata._JSON_KEY_INTEGRITY_AUTH][
+                    EncryptionMetadata._JSON_KEY_MAC],
+        )
+        if (self.encryption_authentication.algorithm !=
+                EncryptionMetadata._AUTH_ALGORITHM):
+            raise RuntimeError(
+                '{}: unknown integrity/auth method: {}'.format(
+                    blobname, self.encryption_authentication.algorithm))
+        self.encryption_mode = ed[
+            EncryptionMetadata._JSON_KEY_ENCRYPTION_MODE]
+        if self.encryption_mode != EncryptionMetadata._ENCRYPTION_MODE:
+            raise RuntimeError(
+                '{}: unknown encryption mode: {}'.format(
+                    blobname, self.encryption_mode))
+        try:
+            _eak = ed[EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY][
+                EncryptionMetadata._JSON_KEY_ENCRYPTED_AUTHKEY]
+        except KeyError:
+            _eak = None
+        self.wrapped_content_key = EncryptionWrappedContentKey(
+            algorithm=ed[
+                EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY][
+                    EncryptionMetadata._JSON_KEY_ALGORITHM],
+            encrypted_authentication_key=_eak,
+            encrypted_key=ed[
+                EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY][
+                    EncryptionMetadata._JSON_KEY_ENCRYPTED_KEY],
+            key_id=ed[
+                EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY][
+                    EncryptionMetadata._JSON_KEY_KEYID],
+        )
+        if (self.wrapped_content_key.algorithm !=
+                EncryptionMetadata._ENCRYPTED_KEY_SCHEME):
+            raise RuntimeError('{}: unknown key encryption scheme: {}'.format(
+                blobname, self.wrapped_content_key.algorithm))
+        # if RSA key is a public key, stop here as keys cannot be decrypted
+        if rsaprivatekey is None:
+            return
+        # decrypt symmetric key
+        self._symkey = blobxfer.crypto.operations.\
+            rsa_decrypt_base64_encoded_key(
+                rsaprivatekey, self.wrapped_content_key.encrypted_key)
+        # decrypt signing key, if it exists
+        if blobxfer.util.is_not_empty(
+                self.wrapped_content_key.encrypted_authentication_key):
+            self._signkey = blobxfer.crypto.operations.\
+                rsa_decrypt_base64_encoded_key(
+                    rsaprivatekey,
+                    self.wrapped_content_key.encrypted_authentication_key)
+        else:
+            self._signkey = None
+        # populate from encryption data authentication
+        try:
+            eda = json.loads(md[EncryptionMetadata._METADATA_KEY_AUTH_NAME])
+        except KeyError:
+            pass
+        else:
+            self.encryption_metadata_authentication = \
+                EncryptionMetadataAuthentication(
+                    algorithm=eda[
+                        EncryptionMetadata._JSON_KEY_AUTH_METAAUTH][
+                            EncryptionMetadata._JSON_KEY_ALGORITHM],
+                    encoding=eda[
+                        EncryptionMetadata._JSON_KEY_AUTH_METAAUTH][
+                            EncryptionMetadata._JSON_KEY_AUTH_ENCODING],
+                    message_authentication_code=eda[
+                        EncryptionMetadata._JSON_KEY_AUTH_METAAUTH][
+                            EncryptionMetadata._JSON_KEY_MAC],
+                )
+            if (self.encryption_metadata_authentication.algorithm !=
+                    EncryptionMetadata._AUTH_ALGORITHM):
+                raise RuntimeError(
+                    '{}: unknown integrity/auth method: {}'.format(
+                        blobname,
+                        self.encryption_metadata_authentication.algorithm))
+            # verify hmac
+            authhmac = base64.b64decode(
+                self.encryption_metadata_authentication.
+                message_authentication_code)
+            bmeta = md[EncryptionMetadata._METADATA_KEY_NAME].encode(
+                self.encryption_metadata_authentication.encoding)
+            hmacsha256 = hmac.new(self._signkey, digestmod=hashlib.sha256)
+            hmacsha256.update(bmeta)
+            if hmacsha256.digest() != authhmac:
+                raise RuntimeError(
+                    '{}: encryption metadata authentication failed'.format(
+                        blobname))
+
+    def convert_to_json_with_mac(self):
+        pass
diff --git a/blobxfer/crypto/operations.py b/blobxfer/crypto/operations.py
new file mode 100644
index 0000000..9a0f099
--- /dev/null
+++ b/blobxfer/crypto/operations.py
@@ -0,0 +1,130 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import base64
+import logging
+# non-stdlib imports
+import cryptography.hazmat.backends
+import cryptography.hazmat.primitives.asymmetric.padding
+import cryptography.hazmat.primitives.asymmetric.rsa
+import cryptography.hazmat.primitives.ciphers
+import cryptography.hazmat.primitives.ciphers.algorithms
+import cryptography.hazmat.primitives.ciphers.modes
+import cryptography.hazmat.primitives.constant_time
+import cryptography.hazmat.primitives.hashes
+import cryptography.hazmat.primitives.padding
+import cryptography.hazmat.primitives.serialization
+# local imports
+import blobxfer.util
+
+
+def load_rsa_private_key_file(rsakeyfile, passphrase):
+    # type: (str, str) ->
+    #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
+    """Load an RSA Private key PEM file with passphrase if specified
+    :param str rsakeyfile: RSA private key PEM file to load
+    :param str passphrase: optional passphrase
+    :rtype: cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
+    :return: RSAPrivateKey
+    """
+    with open(rsakeyfile, 'rb') as keyfile:
+        return cryptography.hazmat.primitives.serialization.\
+            load_pem_private_key(
+                keyfile.read(),
+                passphrase,
+                backend=cryptography.hazmat.backends.default_backend()
+            )
+
+
+def load_rsa_public_key_file(rsakeyfile):
+    # type: (str, str) ->
+    #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey
+    """Load an RSA Public key PEM file
+    :param str rsakeyfile: RSA public key PEM file to load
+    :rtype: cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey
+    :return: RSAPublicKey
+    """
+    with open(rsakeyfile, 'rb') as keyfile:
+        return cryptography.hazmat.primitives.serialization.\
+            load_pem_public_key(
+                keyfile.read(),
+                backend=cryptography.hazmat.backends.default_backend()
+            )
+
+
+def rsa_decrypt_base64_encoded_key(rsaprivatekey, enckey):
+    # type: (cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey,
+    #        str) -> bytes
+    """Decrypt an RSA encrypted key encoded as base64
+    :param rsaprivatekey: RSA private key
+    :type rsaprivatekey:
+        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
+    :param str enckey: base64-encoded key
+    :rtype: bytes
+    :return: decrypted key
+    """
+    return rsaprivatekey.decrypt(
+        base64.b64decode(enckey),
+        cryptography.hazmat.primitives.asymmetric.padding.OAEP(
+            mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
+                algorithm=cryptography.hazmat.primitives.hashes.SHA1()
+            ),
+            algorithm=cryptography.hazmat.primitives.hashes.SHA1(),
+            label=None,
+        )
+    )
+
+
+def rsa_encrypt_key_base64_encoded(rsaprivatekey, rsapublickey, plainkey):
+    # type: (cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey,
+    #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey,
+    #        bytes) -> str
+    """Encrypt a plaintext key using RSA and PKCS1_OAEP padding
+    :param rsaprivatekey: RSA private key
+    :type rsaprivatekey:
+        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
+    :param rsapublickey: RSA public key
+    :type rsapublickey:
+        cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey
+    :param bytes plainkey: plain key
+    :rtype: str
+    :return: encrypted key
+    """
+    if rsapublickey is None:
+        rsapublickey = rsaprivatekey.public_key()
+    enckey = rsapublickey.encrypt(
+        plainkey, cryptography.hazmat.primitives.asymmetric.padding.OAEP(
+            mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1(
+                algorithm=cryptography.hazmat.primitives.hashes.SHA1()),
+            algorithm=cryptography.hazmat.primitives.hashes.SHA1(),
+            label=None))
+    return blobxfer.util.base64_encode_as_string(enckey)
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 1e7e583..921eef8 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -47,10 +47,9 @@
     create_file_client,
     create_page_blob_client,
 )
-from .util import (
-    normalize_azure_path,
-    scantree,
-)
+import blobxfer.blob.operations
+import blobxfer.crypto
+import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -94,7 +93,6 @@ class AzureStorageModes(enum.Enum):
         'overwrite',
         'recursive',
         'rsa_private_key',
-        'rsa_private_key_passphrase',
         'rsa_public_key',
         'store_file_attributes',
         'store_file_md5',
@@ -112,7 +110,6 @@ class AzureStorageModes(enum.Enum):
         'recursive',
         'restore_file_attributes',
         'rsa_private_key',
-        'rsa_private_key_passphrase'
     ]
 )
 SyncCopyOptions = collections.namedtuple(
@@ -179,6 +176,9 @@ def __init__(self, name, key, endpoint):
         self.key = key
         self.endpoint = endpoint
         self.is_sas = self._key_is_sas(self.key)
+        # normalize sas keys
+        if self.is_sas and self.key.startswith('?'):
+            self.key = self.key[1:]
         self._create_clients()
 
     @staticmethod
@@ -373,7 +373,7 @@ def files(self):
         for _path in self._paths:
             _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
             _expath = pathlib.Path(_ppath)
-            for entry in scantree(_ppath):
+            for entry in blobxfer.util.scantree(_ppath):
                 _rpath = pathlib.Path(entry.path).relative_to(_ppath)
                 if not self._inclusion_check(_rpath):
                     logger.debug(
@@ -498,7 +498,7 @@ def add_path_with_storage_account(self, remote_path, storage_account):
         if len(self._path_map) >= 1:
             raise RuntimeError(
                 'cannot add multiple remote paths to AzureSourcePath objects')
-        rpath = normalize_azure_path(remote_path)
+        rpath = blobxfer.util.normalize_azure_path(remote_path)
         self.add_path(rpath)
         self._path_map[rpath] = storage_account
 
@@ -510,11 +510,12 @@ def lookup_storage_account(self, remote_path):
         :rtype: str
         :return: storage account associated with path
         """
-        return self._path_map[normalize_azure_path(remote_path)]
+        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
 
-    def files(self, mode):
+    def files(self, creds, mode):
         if mode == AzureStorageModes.Auto:
-            pass
+            for blob in self._auto_blobs(creds):
+                yield blob
         elif mode == AzureStorageModes.Append:
             pass
         elif mode == AzureStorageModes.Block:
@@ -526,11 +527,34 @@ def files(self, mode):
         else:
             raise RuntimeError('unknown Azure Storage Mode: {}'.format(mode))
 
-    def _append_files(self):
+    def _append_blobs(self):
         for _path in self._paths:
-
             pass
 
+    def _auto_blobs(self, creds):
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for blob in blobxfer.blob.operations.list_blobs(
+                    sa.block_blob_client, cont, dir):
+                if blobxfer.crypto.models.EncryptionMetadata.\
+                        encryption_metadata_exists(blob.metadata):
+                    ed = blobxfer.crypto.models.EncryptionMetadata()
+                    ed.convert_from_json(blob.metadata)
+                else:
+                    ed = None
+                yield (_path, blob.name, ed)
+
+
+class AzureStorageEntity(object):
+    def __init__(self):
+        self._name = None
+        self._size = None
+        self._md5 = None
+        self._enc = None
+        self._vio = None
+
 
 class AzureDestinationPaths(object):
     def __init__(self):
diff --git a/blobxfer/operations.py b/blobxfer/operations.py
index 476e597..df1f46f 100644
--- a/blobxfer/operations.py
+++ b/blobxfer/operations.py
@@ -38,9 +38,9 @@
     DownloadSpecification,
     FileDescriptor,
 )
-from .blob.operations import check_if_single_blob
-from .file.operations import check_if_single_file
-from .util import explode_azure_path
+import blobxfer.blob.operations
+import blobxfer.file.operations
+import blobxfer.util
 
 
 def ensure_local_destination(creds, spec):
@@ -56,15 +56,18 @@ def ensure_local_destination(creds, spec):
     if len(spec.sources) == 1:
         # we need to query the source to see if this is a directory
         rpath = str(spec.sources[0].paths[0])
-        sa = creds.get_storage_account(
-            spec.sources[0].lookup_storage_account(rpath))
-        cont, dir = explode_azure_path(rpath)
-        if spec.options.mode == AzureStorageModes.File:
-            if check_if_single_file(sa.file_client, cont, dir):
-                spec.destination.is_dir = False
-        else:
-            if check_if_single_blob(sa.block_blob_client, cont, dir):
-                spec.destination.is_dir = False
+        cont, dir = blobxfer.util.explode_azure_path(rpath)
+        if not blobxfer.util.is_none_or_empty(dir):
+            sa = creds.get_storage_account(
+                spec.sources[0].lookup_storage_account(rpath))
+            if spec.options.mode == AzureStorageModes.File:
+                if blobxfer.file.operations.check_if_single_file(
+                        sa.file_client, cont, dir):
+                    spec.destination.is_dir = False
+            else:
+                if blobxfer.blob.operations.check_if_single_blob(
+                        sa.block_blob_client, cont, dir):
+                    spec.destination.is_dir = False
     logging.debug('dest is_dir={} for {} specs'.format(
         spec.destination.is_dir, len(spec.sources)))
     # ensure destination path
diff --git a/cli/settings.py b/cli/settings.py
index ad71bf7..db5c643 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -33,8 +33,9 @@
 import enum
 # non-stdlib imports
 # local imports
+import blobxfer.crypto.operations
 import blobxfer.models
-from blobxfer.util import is_none_or_empty, is_not_empty, merge_dict
+import blobxfer.util
 
 
 # enums
@@ -59,16 +60,16 @@ def add_cli_options(
     :param str sync_copy_dest_remote_path: synccopy dest rp
     """
     cli_options['_action'] = action.name.lower()
-    if is_not_empty(storage_account):
+    if blobxfer.util.is_not_empty(storage_account):
         # add credentials
         try:
             key = cli_options['access_key']
-            if is_none_or_empty(key):
+            if blobxfer.util.is_none_or_empty(key):
                 raise KeyError()
         except KeyError:
             try:
                 key = cli_options['sas']
-                if is_none_or_empty(key):
+                if blobxfer.util.is_none_or_empty(key):
                     raise KeyError()
             except KeyError:
                 raise RuntimeError('access key or sas must be provided')
@@ -133,7 +134,7 @@ def add_cli_options(
                 },
             }
         elif action == TransferAction.Synccopy:
-            if is_none_or_empty(sync_copy_dest_storage_account):
+            if blobxfer.util.is_none_or_empty(sync_copy_dest_storage_account):
                 raise RuntimeError(
                     'must specify a destination storage account')
             arg = {
@@ -159,12 +160,12 @@ def add_cli_options(
             }
             try:
                 destkey = cli_options['sync_copy_dest_access_key']
-                if is_none_or_empty(destkey):
+                if blobxfer.util.is_none_or_empty(destkey):
                     raise KeyError()
             except KeyError:
                 try:
                     destkey = cli_options['sync_copy_dest_sas']
-                    if is_none_or_empty(destkey):
+                    if blobxfer.util.is_none_or_empty(destkey):
                         raise KeyError()
                 except KeyError:
                     raise RuntimeError(
@@ -197,7 +198,7 @@ def merge_settings(config, cli_options):
     if 'azure_storage' in cli_options:
         if 'azure_storage' not in config:
             config['azure_storage'] = {}
-        config['azure_storage'] = merge_dict(
+        config['azure_storage'] = blobxfer.util.merge_dict(
             config['azure_storage'], cli_options['azure_storage'])
     # merge general options
     if 'options' not in config:
@@ -259,6 +260,14 @@ def create_download_specifications(config):
             mode == blobxfer.models.AzureStorageModes.Page
         else:
             raise ValueError('unknown mode: {}'.format(confmode))
+        # load RSA private key PEM file if specified
+        rpk = conf['options']['rsa_private_key']
+        if blobxfer.util.is_not_empty(rpk):
+            rpkp = conf['options']['rsa_private_key_passphrase']
+            rpk = blobxfer.crypto.operations.load_rsa_private_key_file(
+                rpk, rpkp)
+        else:
+            rpk = None
         ds = blobxfer.models.DownloadSpecification(
             download_options=blobxfer.models.DownloadOptions(
                 check_file_md5=conf['options']['check_file_md5'],
@@ -269,9 +278,7 @@ def create_download_specifications(config):
                 recursive=conf['options']['recursive'],
                 restore_file_attributes=conf[
                     'options']['restore_file_attributes'],
-                rsa_private_key=conf['options']['rsa_private_key'],
-                rsa_private_key_passphrase=conf[
-                    'options']['rsa_private_key_passphrase'],
+                rsa_private_key=rpk,
             ),
             skip_on_options=blobxfer.models.SkipOnOptions(
                 filesize_match=conf['options']['skip_on']['filesize_match'],
@@ -290,9 +297,9 @@ def create_download_specifications(config):
             sa = next(iter(src))
             asp = blobxfer.models.AzureSourcePath()
             asp.add_path_with_storage_account(src[sa], sa)
-            if is_not_empty(conf['include']):
+            if blobxfer.util.is_not_empty(conf['include']):
                 asp.add_includes(conf['include'])
-            if is_not_empty(conf['exclude']):
+            if blobxfer.util.is_not_empty(conf['exclude']):
                 asp.add_excludes(conf['exclude'])
             ds.add_azure_source_path(asp)
         specs.append(ds)
diff --git a/setup.py b/setup.py
index 6ea9896..889f709 100644
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,7 @@
     'blobxfer.blob.append',
     'blobxfer.blob.block',
     'blobxfer.blob.page',
+    'blobxfer.crypto',
     'blobxfer.file',
     'blobxfer_cli',
 ]
diff --git a/tests/test_blobxfer_crypto_models.py b/tests/test_blobxfer_crypto_models.py
new file mode 100644
index 0000000..33045c3
--- /dev/null
+++ b/tests/test_blobxfer_crypto_models.py
@@ -0,0 +1,202 @@
+# coding=utf-8
+"""Tests for crypto operations"""
+
+# stdlib imports
+import copy
+import json
+# non-stdlib imports
+import pytest
+# local imports
+# module under test
+import blobxfer.crypto.models as models
+import blobxfer.crypto.operations as ops
+
+
+_SAMPLE_RSA_KEY = """
+-----BEGIN RSA PRIVATE KEY-----
+MIICXQIBAAKBgQDwlQ0W6O2ixhZM+LYl/ZtUi4lpjFu6+Kt/fyim/LQojaa389yD
+e3lqWnAitj13n8uLpv1XuysG2fL+G0AvzT9JJj8gageJRC/8uffhOlxvH/vzfFqU
+wQEgwhuv9LXdFcl+mON4TiHqbKsUmggNNPNzSN/P0aohMG8pG8ihyO3uOQIDAQAB
+AoGBAIkaKA96RpKQmHzc79DOqgqQSorf9hajR/ismpovQOwrbWs/iddUMmktiOH/
+QSA+7Fx1mcK5Y1fQNO4i0X1sVjdasoPvmU7iGVgHQ9TX6F5LGQtDqAKXAH6GpjkF
+V7I7nEBs2vtetpzzq8up2nY7fuwPwse44jdLGZjh1pc0HcFRAkEA/F5XdWq5ZYVo
+hMyxxhdb+6J8NKZTsWn92tW0s/pGlkgDwrryglpLqNf9MR+Mm906UUVh6ZmsKoxD
+kZzA+4S3bwJBAPQLSryk8CUE0uFviYYANq3asn9sDDTGcvEceSGGwbaZOTDVQNQg
+7BhLL5vA8Be/xvkXfEaWa1XipmaBI+4WINcCQGQLEiid0jkIldJvQtoAUJqEYzCL
+7wmZtuSVazkdsfXJPpRnf9Nk8DFSzjA3DYqMPJ4THyl3neSQDgkfVvFeP0kCQQDu
+0OIJKwsJ3ueSznhw1mKrzTkh8pUbTBwNEQUEpv+H9fd+byGqtLD1sRXcwHjzdKt8
+9Nubo/VTraGS68tCYQsvAkAYxzwSeX7Gj9/mMBFx1Y5v9sSCqLZQeF7q1ltzkwlK
+n3by7Z7RvxXXPjv1YoFQPV0WlA6zo4sm0HwFzA0sbOql
+-----END RSA PRIVATE KEY-----
+"""
+
+_SAMPLE_ED = \
+    {
+        "BlobxferExtensions": {
+            "PreEncryptedContentMD5": "tc+p1sj+vWGPkawoQ9UKHA=="
+        },
+        "ContentEncryptionIV": "KjA4Y14+J1p7EJcYWhnKNQ==",
+        "EncryptionAgent": {
+            "EncryptionAlgorithm": "AES_CBC_256",
+            "Protocol": "1.0"
+        },
+        "EncryptionAuthentication": {
+            "Algorithm": "HMAC-SHA256",
+            "MessageAuthenticationCode":
+            "9oKt5Ett7t1AWahxNq3qcGd5NbZMxLtzSN8Lwqy3PgU="
+        },
+        "EncryptionMode": "FullBlob",
+        "KeyWrappingMetadata": {},
+        "WrappedContentKey": {
+            "Algorithm": "RSA-OAEP",
+            "EncryptedAuthenticationKey":
+            "1kO63RxIqIyUp1EW+v2o5VwyhAlrrJiLc+seXnNcVRm0YLHzJYqOrBCz2+"
+            "c2do2dJKhzTOXyPsJSwkvQVJ0NuYVUTxf6bzDNip2Ge1jTHnsd5IsljMKy"
+            "rSAvHaKs9NxdvDu5Ex6lhKEChnuMtJBq52zCML5+LUd98WkBxdB2az4=",
+            "EncryptedKey":
+            "yOuWT2txNNzOITtDcjV1Uf3/V+TRn5AKjvOtHt+PRuBgMhq6fOFV8kcJhO"
+            "zPxh8bHqydIFM2OQ+ktiETQ5Ibg7OA24hhr+n8Y6nJNpw3cGtP6L/23n8a"
+            "a7RMKhmactl3sToFM3xvaXRO0DYuDZeQtPR/DDKPgi2gK641y1THAoc=",
+            "KeyId": "private:key1"
+        }
+    }
+
+_SAMPLE_EDA = \
+    {
+        "EncryptionMetadataAuthentication": {
+            "Algorithm": "HMAC-SHA256",
+            "Encoding": "UTF-8",
+            "MessageAuthenticationCode":
+            "BhJjehtHxgSRIBaITDB6o6ZUt6mdehN0PDkhHtwXTP8="
+        }
+    }
+
+
+def test_encryption_metadata_exists():
+    md = None
+    assert not models.EncryptionMetadata.encryption_metadata_exists(md)
+
+    md = {}
+    assert not models.EncryptionMetadata.encryption_metadata_exists(md)
+
+    md = {'encryptiondata': {}}
+    assert not models.EncryptionMetadata.encryption_metadata_exists(md)
+
+    md = {'encryptiondata': {'key': 'value'}}
+    assert models.EncryptionMetadata.encryption_metadata_exists(md)
+
+
+def test_convert_from_json(tmpdir):
+    keyfile = tmpdir.join('keyfile')
+    keyfile.write(_SAMPLE_RSA_KEY)
+    rsaprivatekey = ops.load_rsa_private_key_file(str(keyfile), None)
+
+    # test various missing metadata fields
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['EncryptionAgent']['EncryptionAlgorithm'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['EncryptionAgent']['Protocol'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['EncryptionAuthentication']['Algorithm'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['EncryptionMode'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['WrappedContentKey'].pop('EncryptedAuthenticationKey')
+    ced['WrappedContentKey']['Algorithm'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    ceda = copy.deepcopy(_SAMPLE_EDA)
+    ceda['EncryptionMetadataAuthentication']['Algorithm'] = 'OOPS'
+    md = {
+        'encryptiondata': json.dumps(
+            _SAMPLE_ED, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(ceda)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    # test failed hmac
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced.pop('BlobxferExtensions')
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    with pytest.raises(RuntimeError):
+        em.convert_from_json(md, 'blob', rsaprivatekey)
+
+    # test correct path
+    md = {
+        'encryptiondata': json.dumps(
+            _SAMPLE_ED, sort_keys=True, ensure_ascii=False),
+        'encryptiondata_authentication': json.dumps(_SAMPLE_EDA)
+    }
+    em = models.EncryptionMetadata()
+    em.convert_from_json(md, 'blob', rsaprivatekey)
+    assert em.wrapped_content_key is not None
+    assert em._symkey is not None
+    assert em._signkey is not None
+
+    em = models.EncryptionMetadata()
+    em.convert_from_json(md, 'blob', None)
+    assert em.wrapped_content_key is not None
+    assert em._symkey is None
+    assert em._signkey is None
+
+    ced = copy.deepcopy(_SAMPLE_ED)
+    ced['WrappedContentKey'].pop('EncryptedAuthenticationKey')
+    md = {
+        'encryptiondata': json.dumps(
+            ced, sort_keys=True, ensure_ascii=False)
+    }
+    em = models.EncryptionMetadata()
+    em.convert_from_json(md, 'blob', rsaprivatekey)
+    assert em.wrapped_content_key is not None
+    assert em._symkey is not None
+    assert em._signkey is None
diff --git a/tests/test_blobxfer_crypto_operations.py b/tests/test_blobxfer_crypto_operations.py
new file mode 100644
index 0000000..1760701
--- /dev/null
+++ b/tests/test_blobxfer_crypto_operations.py
@@ -0,0 +1,44 @@
+# coding=utf-8
+"""Tests for crypto operations"""
+
+# stdlib imports
+from mock import patch
+import os
+# non-stdlib imports
+import cryptography.hazmat.primitives.asymmetric.rsa
+# local imports
+# module under test
+import blobxfer.crypto.operations as ops
+
+
+_RSAKEY = cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key(
+        public_exponent=65537, key_size=2048,
+        backend=cryptography.hazmat.backends.default_backend())
+
+
+@patch('cryptography.hazmat.primitives.serialization.load_pem_private_key')
+def test_load_rsa_private_key_file(patched_load, tmpdir):
+    keyfile = tmpdir.join('keyfile')
+    keyfile.write('a')
+    patched_load.return_value = _RSAKEY
+
+    rv = ops.load_rsa_private_key_file(str(keyfile), None)
+    assert rv == _RSAKEY
+
+
+@patch('cryptography.hazmat.primitives.serialization.load_pem_public_key')
+def test_load_rsa_public_key_file(patched_load, tmpdir):
+    keyfile = tmpdir.join('keyfile')
+    keyfile.write('b')
+    patched_load.return_value = 'rv'
+
+    rv = ops.load_rsa_public_key_file(str(keyfile))
+    assert rv == 'rv'
+
+
+def test_rsa_encrypt_decrypt_keys():
+    symkey = os.urandom(32)
+    enckey = ops.rsa_encrypt_key_base64_encoded(_RSAKEY, None, symkey)
+    assert enckey is not None
+    plainkey = ops.rsa_decrypt_base64_encoded_key(_RSAKEY, enckey)
+    assert symkey == plainkey
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index df1790d..2042a15 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -167,7 +167,6 @@ def test_downloadspecification():
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
-            rsa_private_key_passphrase=None,
         ),
         skip_on_options=blobxfer.models.SkipOnOptions(
             filesize_match=True,
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
index 4ddc104..f24703c 100644
--- a/tests/test_blobxfer_operations.py
+++ b/tests/test_blobxfer_operations.py
@@ -14,8 +14,8 @@
 import blobxfer.operations as ops
 
 
-@patch('blobxfer.operations.check_if_single_file')
-@patch('blobxfer.operations.check_if_single_blob')
+@patch('blobxfer.file.operations.check_if_single_file')
+@patch('blobxfer.blob.operations.check_if_single_blob')
 def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     downdir = tmpdir.join('down')
 
@@ -29,7 +29,6 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
-            rsa_private_key_passphrase=None,
         ),
         skip_on_options=MagicMock(),
         local_destination_path=blobxfer.models.LocalDestinationPath(
@@ -64,7 +63,6 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
-            rsa_private_key_passphrase=None,
         ),
         skip_on_options=MagicMock(),
         local_destination_path=blobxfer.models.LocalDestinationPath(

From d862d74aa2ef69523266f384bc3a0d787162726b Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 21 Feb 2017 15:06:43 -0800
Subject: [PATCH 05/47] Begin list parsing for download and MD5 offload

- Add AzureStorageEntity model
- Start MD5 offload api
- Start download api
---
 blobxfer/api.py                        |   4 +
 blobxfer/blob/operations.py            |  33 +++-
 blobxfer/download.py                   | 222 +++++++++++++++++++++++++
 blobxfer/file/operations.py            |  87 +++++++---
 blobxfer/md5.py                        | 116 +++++++++++++
 blobxfer/models.py                     | 213 ++++++++++++++++++++----
 blobxfer/operations.py                 |  22 +--
 blobxfer/util.py                       |   2 +-
 cli/cli.py                             |   3 +-
 setup.py                               |   1 +
 tests/test_blobxfer_blob_operations.py |  75 +++++++++
 tests/test_blobxfer_file_operations.py |  97 ++++++++++-
 tests/test_blobxfer_models.py          |  35 ++++
 tests/test_blobxfer_operations.py      |   4 +-
 14 files changed, 842 insertions(+), 72 deletions(-)
 create mode 100644 blobxfer/download.py
 create mode 100644 blobxfer/md5.py
 create mode 100644 tests/test_blobxfer_blob_operations.py

diff --git a/blobxfer/api.py b/blobxfer/api.py
index 65e91ad..550f265 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -44,3 +44,7 @@
 from .file.operations import (  # noqa
     create_client as create_file_client
 )
+
+from .download import (  # noqa
+    download
+)
diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index 405e384..ef2c976 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -34,42 +34,63 @@
 import azure.common
 import azure.storage.blob.models
 # local imports
+import blobxfer.models
 
 # create logger
 logger = logging.getLogger(__name__)
 
 
-def check_if_single_blob(client, container, prefix):
-    # type: (azure.storage.blob.BaseBlobService, str, str) -> bool
+def check_if_single_blob(client, container, prefix, timeout=None):
+    # type: (azure.storage.blob.BaseBlobService, str, str, int) -> bool
     """Check if prefix is a single blob or multiple blobs
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
+    :param int timeout: timeout
     :rtype: bool
     :return: if prefix in container is a single blob
     """
     try:
         client.get_blob_properties(
-            container_name=container, blob_name=prefix)
+            container_name=container, blob_name=prefix, timeout=timeout)
     except azure.common.AzureMissingResourceHttpError:
         return False
     return True
 
 
-def list_blobs(client, container, prefix):
-    # type: (azure.storage.blob.BaseBlobService, str,
-    #        str) -> azure.storage.blob.models.Blob
+def list_blobs(client, container, prefix, mode, timeout=None):
+    # type: (azure.storage.blob.BaseBlobService, str, str, int,
+    #        blobxfer.models.AzureStorageModes) ->
+    #        azure.storage.blob.models.Blob
     """List blobs in path conforming to mode
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
+    :param blobxfer.models.AzureStorageModes mode: storage mode
+    :param int timeout: timeout
     :rtype: azure.storage.blob.models.Blob
     :return: generator of blobs
     """
+    if mode == blobxfer.models.AzureStorageModes.File:
+        raise RuntimeError('cannot list Azure Files from blob client')
     blobs = client.list_blobs(
         container_name=container,
         prefix=prefix,
         include=azure.storage.blob.models.Include.METADATA,
+        timeout=timeout,
     )
     for blob in blobs:
+        if (mode == blobxfer.models.AzureStorageModes.Append and
+                blob.properties.blob_type !=
+                azure.storage.blob.models._BlobTypes.AppendBlob):
+            continue
+        elif (mode == blobxfer.models.AzureStorageModes.Block and
+                blob.properties.blob_type !=
+                azure.storage.blob.models._BlobTypes.BlockBlob):
+            continue
+        elif (mode == blobxfer.models.AzureStorageModes.Page and
+                blob.properties.blob_type !=
+                azure.storage.blob.models._BlobTypes.PageBlob):
+            continue
+        # auto or match, yield the blob
         yield blob
diff --git a/blobxfer/download.py b/blobxfer/download.py
new file mode 100644
index 0000000..11264c9
--- /dev/null
+++ b/blobxfer/download.py
@@ -0,0 +1,222 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import datetime
+import dateutil.tz
+import enum
+import logging
+try:
+    import pathlib2 as pathlib
+except ImportError:
+    import pathlib
+import threading
+# non-stdlib imports
+import dateutil
+# local imports
+import blobxfer.md5
+import blobxfer.operations
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+# global defines
+_MD5_MAP = {}
+_MD5_META_LOCK = threading.Lock()
+_ALL_REMOTE_FILES_PROCESSED = False
+
+
+class DownloadAction(enum.Enum):
+    Skip = 1
+    CheckMd5 = 2
+    Download = 3
+
+
+def _check_download_conditions(lpath, rfile, spec):
+    # type: (pathlib.Path, blobxfer.models.AzureStorageEntity,
+    #        blobxfer.models.DownloadSpecification) -> DownloadAction
+    """Check for download conditions
+    :param pathlib.Path lpath: local path
+    :param blobxfer.models.AzureStorageEntity rfile: remote file
+    :param blobxfer.models.DownloadSpecification spec: download spec
+    :rtype: DownloadAction
+    :return: download action
+    """
+    if not lpath.exists():
+        return DownloadAction.Download
+    if not spec.options.overwrite:
+        logger.info(
+            'not overwriting local file: {} (remote: {}/{})'.format(
+                lpath, rfile.container, rfile.name))
+        return DownloadAction.Skip
+    # check skip on options, MD5 match takes priority
+    if spec.skip_on.md5_match:
+        return DownloadAction.CheckMd5
+    # if neither of the remaining skip on actions are activated, download
+    if not spec.skip_on.filesize_match and not spec.skip_on.lmt_ge:
+        return DownloadAction.Download
+    # check skip on file size match
+    dl_fs = None
+    if spec.skip_on.filesize_match:
+        lsize = lpath.stat().st_size
+        if rfile.mode == blobxfer.models.AzureStorageModes.Page:
+            lsize = blobxfer.util.page_align_content_length(lsize)
+        if rfile.size == lsize:
+            dl_fs = False
+        else:
+            dl_fs = True
+    # check skip on lmt ge
+    dl_lmt = None
+    if spec.skip_on.lmt_ge:
+        mtime = datetime.datetime.fromtimestamp(
+            lpath.stat().st_mtime, tz=dateutil.tz.tzlocal())
+        if mtime >= rfile.lmt:
+            dl_lmt = False
+        else:
+            dl_lmt = True
+    # download if either skip on mismatch is True
+    if dl_fs or dl_lmt:
+        return DownloadAction.Download
+    else:
+        return DownloadAction.Skip
+
+
+def pre_md5_skip_on_check(lpath, rfile):
+    # type: (pathlib.Path, blobxfer.models.AzureStorageEntity) -> None
+    """Perform pre MD5 skip on check
+    :param pathlib.Path lpath: local path
+    :param blobxfer.models.AzureStorageEntity rfile: remote file
+    """
+    global _MD5_META_LOCK, _MD5_MAP
+    # if encryption metadata is present, check for pre-encryption
+    # md5 in blobxfer extensions
+    md5 = None
+    if rfile.encryption_metadata is not None:
+        md5 = rfile.encryption_metadata.blobxfer_extensions.\
+            pre_encrypted_content_md5
+    if md5 is None:
+        md5 = rfile.md5
+    slpath = str(lpath)
+    with _MD5_META_LOCK:
+        _MD5_MAP[slpath] = rfile
+        print('pre', lpath, len(_MD5_MAP))
+    blobxfer.md5.add_file_for_md5_check(
+        slpath, md5, rfile.mode)
+
+
+def post_md5_skip_on_check(filename, md5_match):
+    # type: (str, bool) -> None
+    """Perform post MD5 skip on check
+    :param str filename: local filename
+    :param bool md5_match: if MD5 matches
+    """
+    global _MD5_META_LOCK, _MD5_MAP
+    if not md5_match:
+        lpath = pathlib.Path(filename)
+        # TODO enqueue file for download
+    with _MD5_META_LOCK:
+        _MD5_MAP.pop(filename)
+
+
+def check_md5_downloads_thread():
+    def check_for_downloads_from_md5():
+        # type: (None) -> str
+        """Check queue for a file to download
+        :rtype: str
+        :return: local file path
+        """
+        global _MD5_META_LOCK, _MD5_MAP, _ALL_REMOTE_FILES_PROCESSED
+        cv = blobxfer.md5.get_done_cv()
+        while True:
+            with _MD5_META_LOCK:
+                if len(_MD5_MAP) == 0 and _ALL_REMOTE_FILES_PROCESSED:
+                    break
+            cv.acquire()
+            while True:
+                result = blobxfer.md5.check_md5_file_for_download()
+                if result is None:
+                    # use cv timeout due to possible non-wake while running
+                    cv.wait(1)
+                else:
+                    break
+            cv.release()
+            if result is not None:
+                post_md5_skip_on_check(result[0], result[1])
+
+    thr = threading.Thread(target=check_for_downloads_from_md5)
+    thr.start()
+    return thr
+
+
+def download(general_options, creds, spec):
+    # type: (blobxfer.models.GeneralOptions,
+    #        blobxfer.models.AzureStorageCredentials,
+    #        blobxfer.models.DownloadSpecification) -> None
+    """Download action
+    :param blobxfer.models.GeneralOptions general_options: general opts
+    :param blobxfer.models.AzureStorageCredentials creds: creds
+    :param blobxfer.models.DownloadSpecification spec: download spec
+    """
+    # ensure destination path
+    blobxfer.operations.ensure_local_destination(creds, spec)
+    logger.info('downloading to local path: {}'.format(spec.destination.path))
+    # initialize MD5 processes
+    blobxfer.md5.initialize_md5_processes()
+    md5_thread = check_md5_downloads_thread()
+    # iterate through source paths to download
+    for src in spec.sources:
+        for rfile in src.files(creds, spec.options, general_options):
+            # form local path for remote file
+            lpath = pathlib.Path(spec.destination.path, rfile.name)
+            # check on download conditions
+            action = _check_download_conditions(lpath, rfile, spec)
+            if action == DownloadAction.Skip:
+                continue
+            elif action == DownloadAction.CheckMd5:
+                pre_md5_skip_on_check(lpath, rfile)
+            elif action == DownloadAction.Download:
+                # add to download queue
+                ### TODO
+                pass
+            # cond checks?
+            print(rfile.container, rfile.name, rfile.lmt, rfile.size,
+                  rfile.md5, rfile.mode, rfile.encryption_metadata)
+
+    global _MD5_META_LOCK, _ALL_REMOTE_FILES_PROCESSED
+    with _MD5_META_LOCK:
+        _ALL_REMOTE_FILES_PROCESSED = True
+    md5_thread.join()
+    blobxfer.md5.finalize_md5_processes()
+
+    import time
+    time.sleep(5)
+
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index 3501b65..eae7640 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -36,7 +36,7 @@
     import pathlib
 # non-stdlib imports
 import azure.common
-from azure.storage.file import FileService
+import azure.storage.file
 # local imports
 
 # create logger
@@ -51,12 +51,12 @@ def create_client(storage_account):
     :return: file service client
     """
     if storage_account.is_sas:
-        client = FileService(
+        client = azure.storage.file.FileService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
     else:
-        client = FileService(
+        client = azure.storage.file.FileService(
             account_name=storage_account.name,
             account_key=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
@@ -64,37 +64,84 @@ def create_client(storage_account):
 
 
 def parse_file_path(filepath):
-    # type: (pathlib.Path) -> tuple
+    # type: (pathlib.Path) -> Tuple[str, str]
+    """Parse file path from file path
+    :param str filepath: file path
+    :rtype: tuple
+    :return: (dirname, rest of path)
+    """
     if not isinstance(filepath, pathlib.Path):
         filepath = pathlib.Path(filepath)
     dirname = '/'.join(filepath.parts[:len(filepath.parts) - 1])
-    return (dirname, filepath.parts[-1])
+    if len(dirname) == 0:
+        dirname = None
+    if len(filepath.parts) > 0:
+        fname = filepath.parts[-1]
+    else:
+        fname = None
+    return (dirname, fname)
 
 
-def check_if_single_file(client, fileshare, prefix):
-    # type: (azure.storage.file.FileService, str, str) -> bool
+def check_if_single_file(client, fileshare, prefix, timeout=None):
+    # type: (azure.storage.file.FileService, str, str, int) ->
+    #        Tuple[bool, azure.storage.file.models.File]
     """Check if prefix is a single file or multiple files
     :param FileService client: blob client
     :param str fileshare: file share name
     :param str prefix: path prefix
-    :rtype: bool
-    :return: if prefix in fileshare is a single file
+    :param int timeout: timeout
+    :rtype: tuple
+    :return: (if prefix in fileshare is a single file, file)
     """
     dirname, fname = parse_file_path(prefix)
+    file = None
     try:
-        client.get_file_properties(
-            share_name=fileshare, directory_name=dirname, file_name=fname)
+        file = client.get_file_properties(
+            share_name=fileshare,
+            directory_name=dirname,
+            file_name=fname,
+            timeout=timeout,
+        )
     except azure.common.AzureMissingResourceHttpError:
-        return False
-    return True
+        return (False, file)
+    return (True, file)
 
 
-def list_blobs(client, container, prefix, mode):
-    # type: (azure.storage.blob.BaseBlobService, str, str,
-    #        blobxfer.models.AzureStorageModes) -> list
-    """List blobs in path conforming to mode
-    :param azure.storage.blob.BaseBlobService client: blob client
-    :param str container: container
+def list_files(client, fileshare, prefix, timeout=None):
+    # type: (azure.storage.file.FileService, str, str, int) ->
+    #        azure.storage.file.models.File
+    """List files in path
+    :param azure.storage.file.FileService client: file client
+    :param str fileshare: file share
     :param str prefix: path prefix
+    :param int timeout: timeout
+    :rtype: azure.storage.file.models.File
+    :return: generator of files
     """
-    pass
+    # if single file, then yield file and return
+    _check = check_if_single_file(client, fileshare, prefix, timeout)
+    if _check[0]:
+        yield _check[1]
+        return
+    # else recursively list from prefix path
+    dirs = [prefix]
+    while len(dirs) > 0:
+        dir = dirs.pop()
+        files = client.list_directories_and_files(
+            share_name=fileshare,
+            directory_name=dir,
+            timeout=timeout,
+        )
+        for file in files:
+            fspath = str(pathlib.Path(
+                dir if dir is not None else '' / file.name))
+            if isinstance(file, azure.storage.file.File):
+                fsprop = client.get_file_properties(
+                    share_name=fileshare,
+                    directory_name=dir,
+                    file_name=file.name,
+                    timeout=timeout,
+                )
+                yield fsprop
+            else:
+                dirs.append(fspath)
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
new file mode 100644
index 0000000..fafd3f8
--- /dev/null
+++ b/blobxfer/md5.py
@@ -0,0 +1,116 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+import multiprocessing
+try:
+    import queue
+except ImportError:
+    import Queue as queue
+# non-stdlib imports
+# local imports
+import blobxfer.download
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+# global defines
+_TASK_QUEUE = multiprocessing.Queue()
+_DONE_QUEUE = multiprocessing.Queue()
+_DONE_CV = multiprocessing.Condition()
+_MD5_PROCS = []
+
+
+def _worker_md5_file_process():
+    global _TASK_QUEUE, _DONE_QUEUE
+    while True:
+        filename, remote_md5, pagealign = _TASK_QUEUE.get()
+        md5 = blobxfer.util.compute_md5_for_file_asbase64(filename, pagealign)
+        logger.debug('MD5: {} <L..R> {} {}'.format(md5, remote_md5, filename))
+        _DONE_CV.acquire()
+        _DONE_QUEUE.put((filename, md5 == remote_md5))
+        _DONE_CV.notify()
+        _DONE_CV.release()
+
+
+def get_done_cv():
+    global _DONE_CV
+    return _DONE_CV
+
+
+def check_md5_file_for_download():
+    # type: (None) -> str
+    """Check queue for a file to download
+    :rtype: str
+    :return: local file path
+    """
+    global _DONE_QUEUE
+    try:
+        return _DONE_QUEUE.get_nowait()
+    except queue.Empty:
+        return None
+
+
+def add_file_for_md5_check(filename, remote_md5, mode):
+    # type: (str, str, blobxfer.models.AzureStorageModes) -> bool
+    """Check an MD5 for a file for download
+    :param str filename: file to compute MD5 for
+    :param str remote_md5: remote MD5 to compare against
+    :param blobxfer.models.AzureStorageModes mode: mode
+    :rtype: bool
+    :return: MD5 match comparison
+    """
+    global _TASK_QUEUE
+    if mode == blobxfer.models.AzureStorageModes.Page:
+        pagealign = True
+    else:
+        pagealign = False
+    _TASK_QUEUE.put((filename, remote_md5, pagealign))
+
+
+def initialize_md5_processes(num_workers=None):
+    global _MD5_PROCS
+    if num_workers is None or num_workers < 1:
+        num_workers = multiprocessing.cpu_count() // 2
+    if num_workers < 1:
+        num_workers = 1
+    for _ in range(num_workers):
+        proc = multiprocessing.Process(target=_worker_md5_file_process)
+        proc.start()
+        _MD5_PROCS.append(proc)
+
+
+def finalize_md5_processes():
+    global _MD5_PROCS
+    for proc in _MD5_PROCS:
+        proc.terminate()
+        proc.join()
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 921eef8..918eaaa 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -47,8 +47,10 @@
     create_file_client,
     create_page_blob_client,
 )
+from azure.storage.blob.models import _BlobTypes as BlobTypes
 import blobxfer.blob.operations
-import blobxfer.crypto
+import blobxfer.file.operations
+import blobxfer.crypto.models
 import blobxfer.util
 
 # create logger
@@ -57,11 +59,11 @@
 
 # enums
 class AzureStorageModes(enum.Enum):
-    Auto = 1
-    Append = 2
-    Block = 3
-    File = 4
-    Page = 5
+    Auto = 10
+    Append = 20
+    Block = 30
+    File = 40
+    Page = 50
 
 
 # named tuples
@@ -512,49 +514,204 @@ def lookup_storage_account(self, remote_path):
         """
         return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
 
-    def files(self, creds, mode):
-        if mode == AzureStorageModes.Auto:
-            for blob in self._auto_blobs(creds):
-                yield blob
-        elif mode == AzureStorageModes.Append:
-            pass
-        elif mode == AzureStorageModes.Block:
-            pass
-        elif mode == AzureStorageModes.File:
-            pass
-        elif mode == AzureStorageModes.Page:
-            pass
+    def files(self, creds, options, general_options):
+        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
+        #        GeneralOptions) -> AzureStorageEntity
+        """Generator of Azure remote files or blobs
+        :param AzureSourcePath self: this
+        :param AzureStorageCredentials creds: storage creds
+        :param DownloadOptions options: download options
+        :param GeneralOptions general_options: general options
+        :rtype: AzureStorageEntity
+        :return: Azure storage entity object
+        """
+        if options.mode == AzureStorageModes.File:
+            for file in self._populate_from_list_files(
+                    creds, options, general_options):
+                yield file
         else:
-            raise RuntimeError('unknown Azure Storage Mode: {}'.format(mode))
+            for blob in self._populate_from_list_blobs(
+                    creds, options, general_options):
+                yield blob
 
-    def _append_blobs(self):
+    def _populate_from_list_files(self, creds, options, general_options):
+        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
+        #        GeneralOptions) -> AzureStorageEntity
+        """Internal generator for Azure remote files
+        :param AzureSourcePath self: this
+        :param AzureStorageCredentials creds: storage creds
+        :param DownloadOptions options: download options
+        :param GeneralOptions general_options: general options
+        :rtype: AzureStorageEntity
+        :return: Azure storage entity object
+        """
         for _path in self._paths:
-            pass
-
-    def _auto_blobs(self, creds):
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for file in blobxfer.file.operations.list_files(
+                    sa.file_client, cont, dir, general_options.timeout_sec):
+                if blobxfer.crypto.models.EncryptionMetadata.\
+                        encryption_metadata_exists(file.metadata):
+                    ed = blobxfer.crypto.models.EncryptionMetadata()
+                    ed.convert_from_json(
+                        file.metadata, file.name, options.rsa_private_key)
+                else:
+                    ed = None
+                ase = AzureStorageEntity(cont, ed)
+                ase.populate_from_file(file)
+                yield ase
+
+    def _populate_from_list_blobs(self, creds, options, general_options):
+        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
+        #        GeneralOptions) -> AzureStorageEntity
+        """Internal generator for Azure remote blobs
+        :param AzureSourcePath self: this
+        :param AzureStorageCredentials creds: storage creds
+        :param DownloadOptions options: download options
+        :param GeneralOptions general_options: general options
+        :rtype: AzureStorageEntity
+        :return: Azure storage entity object
+        """
         for _path in self._paths:
             rpath = str(_path)
             cont, dir = blobxfer.util.explode_azure_path(rpath)
             sa = creds.get_storage_account(self.lookup_storage_account(rpath))
             for blob in blobxfer.blob.operations.list_blobs(
-                    sa.block_blob_client, cont, dir):
+                    sa.block_blob_client, cont, dir, options.mode,
+                    general_options.timeout_sec):
                 if blobxfer.crypto.models.EncryptionMetadata.\
                         encryption_metadata_exists(blob.metadata):
                     ed = blobxfer.crypto.models.EncryptionMetadata()
-                    ed.convert_from_json(blob.metadata)
+                    ed.convert_from_json(
+                        blob.metadata, blob.name, options.rsa_private_key)
                 else:
                     ed = None
-                yield (_path, blob.name, ed)
+                ase = AzureStorageEntity(cont, ed)
+                ase.populate_from_blob(blob)
+                yield ase
 
 
 class AzureStorageEntity(object):
-    def __init__(self):
+    """Azure Storage Entity"""
+    def __init__(self, container, ed=None):
+        # type: (AzureStorageEntity, str
+        #        blobxfer.crypto.models.EncryptionMetadata) -> None
+        """Ctor for AzureStorageEntity
+        :param AzureStorageEntity self: this
+        :param str container: container name
+        :param blobxfer.crypto.models.EncryptionMetadata ed:
+            encryption metadata
+        """
+        self._container = container
         self._name = None
+        self._mode = None
+        self._lmt = None
         self._size = None
         self._md5 = None
-        self._enc = None
+        self._encryption = ed
         self._vio = None
 
+    def populate_from_blob(self, blob):
+        # type: (AzureStorageEntity, azure.storage.blob.models.Blob) -> None
+        """Populate properties from Blob
+        :param AzureStorageEntity self: this
+        :param azure.storage.blob.models.Blob blob: blob to populate from
+        """
+        self._name = blob.name
+        self._lmt = blob.properties.last_modified
+        self._size = blob.properties.content_length
+        self._md5 = blob.properties.content_settings.content_md5
+        if blob.properties.blob_type == BlobTypes.AppendBlob:
+            self._mode = AzureStorageModes.Append
+        elif blob.properties.blob_type == BlobTypes.BlockBlob:
+            self._mode = AzureStorageModes.Block
+        elif blob.properties.blob_type == BlobTypes.PageBlob:
+            self._mode = AzureStorageModes.Page
+
+    def populate_from_file(self, file):
+        # type: (AzureStorageEntity, azure.storage.file.models.File) -> None
+        """Populate properties from File
+        :param AzureStorageEntity self: this
+        :param azure.storage.file.models.File file: file to populate from
+        """
+        self._name = file.name
+        self._lmt = file.properties.last_modified
+        self._size = file.properties.content_length
+        self._md5 = file.properties.content_settings.content_md5
+        self._mode = AzureStorageModes.File
+
+    @property
+    def container(self):
+        # type: (AzureStorageEntity) -> str
+        """Container name
+        :param AzureStorageEntity self: this
+        :rtype: str
+        :return: name of container or file share
+        """
+        return self._container
+
+    @property
+    def name(self):
+        # type: (AzureStorageEntity) -> str
+        """Entity name
+        :param AzureStorageEntity self: this
+        :rtype: str
+        :return: name of entity
+        """
+        return self._name
+
+    @property
+    def lmt(self):
+        # type: (AzureStorageEntity) -> datetime.datetime
+        """Entity last modified time
+        :param AzureStorageEntity self: this
+        :rtype: datetime.datetime
+        :return: LMT of entity
+        """
+        return self._lmt
+
+    @property
+    def size(self):
+        # type: (AzureStorageEntity) -> int
+        """Entity size
+        :param AzureStorageEntity self: this
+        :rtype: int
+        :return: size of entity
+        """
+        return self._size
+
+    @property
+    def md5(self):
+        # type: (AzureStorageEntity) -> str
+        """Base64-encoded MD5
+        :param AzureStorageEntity self: this
+        :rtype: str
+        :return: md5 of entity
+        """
+        return self._md5
+
+    @property
+    def mode(self):
+        # type: (AzureStorageEntity) -> AzureStorageModes
+        """Entity mode (type)
+        :param AzureStorageEntity self: this
+        :rtype: AzureStorageModes
+        :return: type of entity
+        """
+        return self._mode
+
+    @property
+    def encryption_metadata(self):
+        # type: (AzureStorageEntity) ->
+        #        blobxfer.crypto.models.EncryptionMetadata
+        """Entity mode (type)
+        :param AzureStorageEntity self: this
+        :rtype: blobxfer.crypto.models.EncryptionMetadata
+        :return: encryption metadata of entity
+        """
+        return self._encryption
+
 
 class AzureDestinationPaths(object):
     def __init__(self):
diff --git a/blobxfer/operations.py b/blobxfer/operations.py
index df1f46f..82e4024 100644
--- a/blobxfer/operations.py
+++ b/blobxfer/operations.py
@@ -32,21 +32,21 @@
 import logging
 # non-stdlib imports
 # local imports
-from .models import (  # noqa
-    AzureStorageCredentials,
-    AzureStorageModes,
-    DownloadSpecification,
-    FileDescriptor,
-)
+import blobxfer.models
 import blobxfer.blob.operations
 import blobxfer.file.operations
 import blobxfer.util
 
+# create logger
+logger = logging.getLogger(__name__)
+
 
 def ensure_local_destination(creds, spec):
+    # type: (blobxfer.models.AzureStorageCredentials,
+    #        blobxfer.models.DownloadSpecification) -> None
     """Ensure a local destination path given a download spec
-    :param AzureStorageCredentials creds: creds
-    :param DownloadSpecification spec: download spec
+    :param blobxfer.models.AzureStorageCredentials creds: creds
+    :param blobxfer.models.DownloadSpecification spec: download spec
     """
     # ensure destination path is writable given the source
     if len(spec.sources) < 1:
@@ -60,15 +60,15 @@ def ensure_local_destination(creds, spec):
         if not blobxfer.util.is_none_or_empty(dir):
             sa = creds.get_storage_account(
                 spec.sources[0].lookup_storage_account(rpath))
-            if spec.options.mode == AzureStorageModes.File:
+            if spec.options.mode == blobxfer.models.AzureStorageModes.File:
                 if blobxfer.file.operations.check_if_single_file(
-                        sa.file_client, cont, dir):
+                        sa.file_client, cont, dir)[0]:
                     spec.destination.is_dir = False
             else:
                 if blobxfer.blob.operations.check_if_single_blob(
                         sa.block_blob_client, cont, dir):
                     spec.destination.is_dir = False
-    logging.debug('dest is_dir={} for {} specs'.format(
+    logger.debug('dest is_dir={} for {} specs'.format(
         spec.destination.is_dir, len(spec.sources)))
     # ensure destination path
     spec.destination.ensure_path_exists()
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 9b778bd..dd116bd 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -235,7 +235,7 @@ def explode_azure_path(path):
     :return: container, vpath
     """
     rpath = normalize_azure_path(path).split('/')
-    container = rpath[0]
+    container = str(rpath[0])
     if len(rpath) > 1:
         rpath = '/'.join(rpath[1:])
     else:
diff --git a/cli/cli.py b/cli/cli.py
index 1ec99d4..508013a 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -41,7 +41,6 @@
 import blobxfer.api
 import blobxfer.util
 # local imports
-import download as dl
 import settings
 
 # create logger
@@ -592,7 +591,7 @@ def download(ctx, local_resource, storage_account, remote_path):
     ctx.initialize()
     specs = settings.create_download_specifications(ctx.config)
     for spec in specs:
-        dl.download(ctx.general_options, ctx.credentials, spec)
+        blobxfer.api.download(ctx.general_options, ctx.credentials, spec)
 
 
 @cli.command('synccopy')
diff --git a/setup.py b/setup.py
index 889f709..475cb90 100644
--- a/setup.py
+++ b/setup.py
@@ -46,6 +46,7 @@
     'click==6.6',
     'cryptography>=1.7.1',
     'future==0.16.0',
+    'python-dateutil==2.6.0',
     'ruamel.yaml==0.13.11',
 ]
 
diff --git a/tests/test_blobxfer_blob_operations.py b/tests/test_blobxfer_blob_operations.py
new file mode 100644
index 0000000..1b7d300
--- /dev/null
+++ b/tests/test_blobxfer_blob_operations.py
@@ -0,0 +1,75 @@
+# coding=utf-8
+"""Tests for general blob operations"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+import azure.common
+import azure.storage.blob
+import pytest
+# local imports
+import blobxfer.models as models
+# module under test
+import blobxfer.blob.operations as ops
+
+
+def test_check_if_single_blob():
+    client = mock.MagicMock()
+    client.get_blob_properties = mock.MagicMock()
+    client.get_blob_properties.return_value = mock.MagicMock()
+
+    result = ops.check_if_single_blob(client, 'a', 'b/c')
+    assert result
+
+    client = mock.MagicMock()
+    client.get_blob_properties = mock.MagicMock()
+    client.get_blob_properties.side_effect = \
+        azure.common.AzureMissingResourceHttpError('msg', 404)
+
+    result = ops.check_if_single_blob(client, 'a', 'b/c')
+    assert not result
+
+
+def test_list_blobs():
+    with pytest.raises(RuntimeError):
+        for blob in ops.list_blobs(
+                None, 'cont', 'prefix', models.AzureStorageModes.File):
+            pass
+
+    client = mock.MagicMock()
+    client.list_blobs = mock.MagicMock()
+    _blob = azure.storage.blob.models.Blob(name='name')
+    _blob.properties = azure.storage.blob.models.BlobProperties()
+    client.list_blobs.return_value = [_blob]
+
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'cont', 'prefix', models.AzureStorageModes.Auto):
+        i += 1
+        assert blob.name == 'name'
+    assert i == 1
+
+    _blob.properties.blob_type = \
+        azure.storage.blob.models._BlobTypes.AppendBlob
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'dir', 'prefix', models.AzureStorageModes.Block):
+        i += 1
+        assert blob.name == 'name'
+    assert i == 0
+
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'dir', 'prefix', models.AzureStorageModes.Page):
+        i += 1
+        assert blob.name == 'name'
+    assert i == 0
+
+    _blob.properties.blob_type = \
+        azure.storage.blob.models._BlobTypes.BlockBlob
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'dir', 'prefix', models.AzureStorageModes.Append):
+        i += 1
+        assert blob.name == 'name'
+    assert i == 0
diff --git a/tests/test_blobxfer_file_operations.py b/tests/test_blobxfer_file_operations.py
index 56c4b95..fd39912 100644
--- a/tests/test_blobxfer_file_operations.py
+++ b/tests/test_blobxfer_file_operations.py
@@ -1,12 +1,14 @@
 # coding=utf-8
-"""Tests for models"""
+"""Tests for file operations"""
 
 # stdlib imports
+import mock
 # non-stdlib imports
+import azure.common
 import azure.storage
-import pytest
 # local imports
 import blobxfer.models as models
+import blobxfer.util as util
 # module under test
 import blobxfer.file.operations as ops
 
@@ -27,3 +29,94 @@ def test_create_client():
     assert isinstance(
         client.authentication,
         azure.storage._auth._StorageSASAuthentication)
+
+
+def test_parse_file_path():
+    rpath = '/a/b/c'
+    fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath))
+    dir, fname = ops.parse_file_path(path)
+    assert fshare == 'a'
+    assert dir == 'b'
+    assert fname == 'c'
+
+    rpath = 'a/b/c/d'
+    fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath))
+    dir, fname = ops.parse_file_path(path)
+    assert fshare == 'a'
+    assert dir == 'b/c'
+    assert fname == 'd'
+
+    rpath = 'a/b'
+    fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath))
+    dir, fname = ops.parse_file_path(path)
+    assert fshare == 'a'
+    assert dir is None
+    assert fname == 'b'
+
+    rpath = 'a'
+    fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath))
+    dir, fname = ops.parse_file_path(path)
+    assert fshare == 'a'
+    assert dir is None
+    assert fname is None
+
+
+def test_check_if_single_file():
+    client = mock.MagicMock()
+    client.get_file_properties = mock.MagicMock()
+    client.get_file_properties.return_value = mock.MagicMock()
+
+    result = ops.check_if_single_file(client, 'a', 'b/c')
+    assert result[0]
+
+    client = mock.MagicMock()
+    client.get_file_properties = mock.MagicMock()
+    client.get_file_properties.side_effect = \
+        azure.common.AzureMissingResourceHttpError('msg', 404)
+
+    result = ops.check_if_single_file(client, 'a', 'b/c')
+    assert not result[0]
+
+
+def test_list_files_single_file():
+    client = mock.MagicMock()
+    client.get_file_properties = mock.MagicMock()
+    client.get_file_properties.return_value = 'fp'
+
+    i = 0
+    for file in ops.list_files(client, 'a', 'b/c'):
+        i += 1
+        assert file == 'fp'
+    assert i == 1
+
+
+@mock.patch(
+    'blobxfer.file.operations.check_if_single_file',
+    return_value=(False, None)
+)
+def test_list_files_directory(patched_cisf):
+    client = mock.MagicMock()
+    client.list_directories_and_files = mock.MagicMock()
+    _file = azure.storage.file.models.File(name='name')
+    client.list_directories_and_files.return_value = [_file]
+    client.get_file_properties = mock.MagicMock()
+    client.get_file_properties.return_value = _file
+
+    i = 0
+    for file in ops.list_files(client, 'dir', ''):
+        i += 1
+        assert file.name == 'name'
+    assert i == 1
+
+    client = mock.MagicMock()
+    client.list_directories_and_files = mock.MagicMock()
+    _file = azure.storage.file.models.File(name='name')
+    client.list_directories_and_files.side_effect = [['dir'], [file]]
+    client.get_file_properties = mock.MagicMock()
+    client.get_file_properties.return_value = _file
+
+    i = 0
+    for file in ops.list_files(client, 'dir', ''):
+        i += 1
+        assert file.name == 'name'
+    assert i == 1
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 2042a15..c9a8d81 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -2,6 +2,7 @@
 """Tests for models"""
 
 # stdlib imports
+import mock
 import os
 try:
     import pathlib2 as pathlib
@@ -9,6 +10,7 @@
     import pathlib
 # non-stdlib imports
 import azure.storage
+import azure.storage.blob
 import pytest
 # module under test
 import blobxfer.models
@@ -188,3 +190,36 @@ def test_downloadspecification():
     assert len(ds.sources) == 1
     assert p in ds.sources[0]._path_map
     assert ds.sources[0]._path_map[p] == 'sa'
+
+
+def test_azurestorageentity():
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    assert ase.container == 'cont'
+    assert ase.encryption_metadata is None
+
+    blob = mock.MagicMock()
+    blob.name = 'name'
+    blob.properties = mock.MagicMock()
+    blob.properties.last_modified = 'lmt'
+    blob.properties.content_length = 123
+    blob.properties.content_settings = mock.MagicMock()
+    blob.properties.content_settings.content_md5 = 'abc'
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob
+    ase.populate_from_blob(blob)
+
+    assert ase.name == 'name'
+    assert ase.lmt == 'lmt'
+    assert ase.size == 123
+    assert ase.md5 == 'abc'
+    assert ase.mode == blobxfer.models.AzureStorageModes.Block
+
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob
+    ase.populate_from_blob(blob)
+    assert ase.mode == blobxfer.models.AzureStorageModes.Append
+
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob
+    ase.populate_from_blob(blob)
+    assert ase.mode == blobxfer.models.AzureStorageModes.Page
+
+    ase.populate_from_file(blob)
+    assert ase.mode == blobxfer.models.AzureStorageModes.File
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
index f24703c..78aef22 100644
--- a/tests/test_blobxfer_operations.py
+++ b/tests/test_blobxfer_operations.py
@@ -72,10 +72,10 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
 
     ds.add_azure_source_path(asp)
 
-    patched_file.return_value = False
+    patched_file.return_value = (False, None)
     ops.ensure_local_destination(MagicMock(), ds)
     assert ds.destination.is_dir
 
-    patched_file.return_value = True
+    patched_file.return_value = (True, MagicMock())
     with pytest.raises(RuntimeError):
         ops.ensure_local_destination(MagicMock(), ds)

From 290a1ebdf903806f187dc212663f8fc1f7bd8ca8 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 21 Feb 2017 21:20:16 -0800
Subject: [PATCH 06/47] Refactor MD5/Downloader to classes

- Eliminate global state
---
 blobxfer/api.py      |   2 +-
 blobxfer/download.py | 331 ++++++++++++++++++++++---------------------
 blobxfer/md5.py      | 155 +++++++++++---------
 cli/cli.py           |   4 +-
 4 files changed, 265 insertions(+), 227 deletions(-)

diff --git a/blobxfer/api.py b/blobxfer/api.py
index 550f265..69444ae 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -46,5 +46,5 @@
 )
 
 from .download import (  # noqa
-    download
+    Downloader
 )
diff --git a/blobxfer/download.py b/blobxfer/download.py
index 11264c9..b5a87d4 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -49,11 +49,6 @@
 # create logger
 logger = logging.getLogger(__name__)
 
-# global defines
-_MD5_MAP = {}
-_MD5_META_LOCK = threading.Lock()
-_ALL_REMOTE_FILES_PROCESSED = False
-
 
 class DownloadAction(enum.Enum):
     Skip = 1
@@ -61,162 +56,176 @@ class DownloadAction(enum.Enum):
     Download = 3
 
 
-def _check_download_conditions(lpath, rfile, spec):
-    # type: (pathlib.Path, blobxfer.models.AzureStorageEntity,
-    #        blobxfer.models.DownloadSpecification) -> DownloadAction
-    """Check for download conditions
-    :param pathlib.Path lpath: local path
-    :param blobxfer.models.AzureStorageEntity rfile: remote file
-    :param blobxfer.models.DownloadSpecification spec: download spec
-    :rtype: DownloadAction
-    :return: download action
-    """
-    if not lpath.exists():
-        return DownloadAction.Download
-    if not spec.options.overwrite:
-        logger.info(
-            'not overwriting local file: {} (remote: {}/{})'.format(
-                lpath, rfile.container, rfile.name))
-        return DownloadAction.Skip
-    # check skip on options, MD5 match takes priority
-    if spec.skip_on.md5_match:
-        return DownloadAction.CheckMd5
-    # if neither of the remaining skip on actions are activated, download
-    if not spec.skip_on.filesize_match and not spec.skip_on.lmt_ge:
-        return DownloadAction.Download
-    # check skip on file size match
-    dl_fs = None
-    if spec.skip_on.filesize_match:
-        lsize = lpath.stat().st_size
-        if rfile.mode == blobxfer.models.AzureStorageModes.Page:
-            lsize = blobxfer.util.page_align_content_length(lsize)
-        if rfile.size == lsize:
-            dl_fs = False
-        else:
-            dl_fs = True
-    # check skip on lmt ge
-    dl_lmt = None
-    if spec.skip_on.lmt_ge:
-        mtime = datetime.datetime.fromtimestamp(
-            lpath.stat().st_mtime, tz=dateutil.tz.tzlocal())
-        if mtime >= rfile.lmt:
-            dl_lmt = False
+class Downloader(object):
+    """Downloader"""
+    def __init__(self, general_options, creds, spec):
+        # type: (Downloader, blobxfer.models.GeneralOptions,
+        #        blobxfer.models.AzureStorageCredentials,
+        #        blobxfer.models.DownloadSpecification) -> None
+        """Ctor for Downloader
+        :param Downloader self: this
+        :param blobxfer.models.GeneralOptions general_options: general opts
+        :param blobxfer.models.AzureStorageCredentials creds: creds
+        :param blobxfer.models.DownloadSpecification spec: download spec
+        """
+        self._md5_meta_lock = threading.Lock()
+        self._all_remote_files_processed = False
+        self._md5_map = {}
+        self._md5_offload = None
+        self._md5_check_thread = None
+        self._general_options = general_options
+        self._creds = creds
+        self._spec = spec
+
+    def _check_download_conditions(self, lpath, rfile, spec):
+        # type: (Downloader, pathlib.Path, blobxfer.models.AzureStorageEntity,
+        #        blobxfer.models.DownloadSpecification) -> DownloadAction
+        """Check for download conditions
+        :param Downloader self: this
+        :param pathlib.Path lpath: local path
+        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        :param blobxfer.models.DownloadSpecification spec: download spec
+        :rtype: DownloadAction
+        :return: download action
+        """
+        if not lpath.exists():
+            return DownloadAction.Download
+        if not spec.options.overwrite:
+            logger.info(
+                'not overwriting local file: {} (remote: {}/{})'.format(
+                    lpath, rfile.container, rfile.name))
+            return DownloadAction.Skip
+        # check skip on options, MD5 match takes priority
+        if spec.skip_on.md5_match:
+            return DownloadAction.CheckMd5
+        # if neither of the remaining skip on actions are activated, download
+        if not spec.skip_on.filesize_match and not spec.skip_on.lmt_ge:
+            return DownloadAction.Download
+        # check skip on file size match
+        dl_fs = None
+        if spec.skip_on.filesize_match:
+            lsize = lpath.stat().st_size
+            if rfile.mode == blobxfer.models.AzureStorageModes.Page:
+                lsize = blobxfer.util.page_align_content_length(lsize)
+            if rfile.size == lsize:
+                dl_fs = False
+            else:
+                dl_fs = True
+        # check skip on lmt ge
+        dl_lmt = None
+        if spec.skip_on.lmt_ge:
+            mtime = datetime.datetime.fromtimestamp(
+                lpath.stat().st_mtime, tz=dateutil.tz.tzlocal())
+            if mtime >= rfile.lmt:
+                dl_lmt = False
+            else:
+                dl_lmt = True
+        # download if either skip on mismatch is True
+        if dl_fs or dl_lmt:
+            return DownloadAction.Download
         else:
-            dl_lmt = True
-    # download if either skip on mismatch is True
-    if dl_fs or dl_lmt:
-        return DownloadAction.Download
-    else:
-        return DownloadAction.Skip
-
-
-def pre_md5_skip_on_check(lpath, rfile):
-    # type: (pathlib.Path, blobxfer.models.AzureStorageEntity) -> None
-    """Perform pre MD5 skip on check
-    :param pathlib.Path lpath: local path
-    :param blobxfer.models.AzureStorageEntity rfile: remote file
-    """
-    global _MD5_META_LOCK, _MD5_MAP
-    # if encryption metadata is present, check for pre-encryption
-    # md5 in blobxfer extensions
-    md5 = None
-    if rfile.encryption_metadata is not None:
-        md5 = rfile.encryption_metadata.blobxfer_extensions.\
-            pre_encrypted_content_md5
-    if md5 is None:
-        md5 = rfile.md5
-    slpath = str(lpath)
-    with _MD5_META_LOCK:
-        _MD5_MAP[slpath] = rfile
-        print('pre', lpath, len(_MD5_MAP))
-    blobxfer.md5.add_file_for_md5_check(
-        slpath, md5, rfile.mode)
-
-
-def post_md5_skip_on_check(filename, md5_match):
-    # type: (str, bool) -> None
-    """Perform post MD5 skip on check
-    :param str filename: local filename
-    :param bool md5_match: if MD5 matches
-    """
-    global _MD5_META_LOCK, _MD5_MAP
-    if not md5_match:
-        lpath = pathlib.Path(filename)
-        # TODO enqueue file for download
-    with _MD5_META_LOCK:
-        _MD5_MAP.pop(filename)
-
-
-def check_md5_downloads_thread():
-    def check_for_downloads_from_md5():
-        # type: (None) -> str
-        """Check queue for a file to download
-        :rtype: str
-        :return: local file path
+            return DownloadAction.Skip
+
+    def _pre_md5_skip_on_check(self, lpath, rfile):
+        # type: (Downloader, pathlib.Path,
+        #        blobxfer.models.AzureStorageEntity) -> None
+        """Perform pre MD5 skip on check
+        :param Downloader self: this
+        :param pathlib.Path lpath: local path
+        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        """
+        # if encryption metadata is present, check for pre-encryption
+        # md5 in blobxfer extensions
+        md5 = None
+        if rfile.encryption_metadata is not None:
+            md5 = rfile.encryption_metadata.blobxfer_extensions.\
+                pre_encrypted_content_md5
+        if md5 is None:
+            md5 = rfile.md5
+        slpath = str(lpath)
+        with self._md5_meta_lock:
+            self._md5_map[slpath] = rfile
+        self._md5_offload.add_localfile_for_md5_check(slpath, md5, rfile.mode)
+
+    def _post_md5_skip_on_check(self, filename, md5_match):
+        # type: (Downloader, str, bool) -> None
+        """Perform post MD5 skip on check
+        :param Downloader self: this
+        :param str filename: local filename
+        :param bool md5_match: if MD5 matches
+        """
+        if not md5_match:
+            lpath = pathlib.Path(filename)
+            # TODO enqueue file for download
+        with self._md5_meta_lock:
+            self._md5_map.pop(filename)
+
+    def _initialize_check_md5_downloads_thread(self):
+        # type: (Downloader) -> None
+        """Initialize the md5 done queue check thread
+        :param Downloader self: this
         """
-        global _MD5_META_LOCK, _MD5_MAP, _ALL_REMOTE_FILES_PROCESSED
-        cv = blobxfer.md5.get_done_cv()
-        while True:
-            with _MD5_META_LOCK:
-                if len(_MD5_MAP) == 0 and _ALL_REMOTE_FILES_PROCESSED:
-                    break
-            cv.acquire()
+        def _check_for_downloads_from_md5(self):
+            # type: (Downloader) -> None
+            """Check queue for a file to download
+            :param Downloader self: this
+            """
+            cv = self._md5_offload.done_cv
             while True:
-                result = blobxfer.md5.check_md5_file_for_download()
-                if result is None:
-                    # use cv timeout due to possible non-wake while running
-                    cv.wait(1)
-                else:
-                    break
-            cv.release()
-            if result is not None:
-                post_md5_skip_on_check(result[0], result[1])
-
-    thr = threading.Thread(target=check_for_downloads_from_md5)
-    thr.start()
-    return thr
-
-
-def download(general_options, creds, spec):
-    # type: (blobxfer.models.GeneralOptions,
-    #        blobxfer.models.AzureStorageCredentials,
-    #        blobxfer.models.DownloadSpecification) -> None
-    """Download action
-    :param blobxfer.models.GeneralOptions general_options: general opts
-    :param blobxfer.models.AzureStorageCredentials creds: creds
-    :param blobxfer.models.DownloadSpecification spec: download spec
-    """
-    # ensure destination path
-    blobxfer.operations.ensure_local_destination(creds, spec)
-    logger.info('downloading to local path: {}'.format(spec.destination.path))
-    # initialize MD5 processes
-    blobxfer.md5.initialize_md5_processes()
-    md5_thread = check_md5_downloads_thread()
-    # iterate through source paths to download
-    for src in spec.sources:
-        for rfile in src.files(creds, spec.options, general_options):
-            # form local path for remote file
-            lpath = pathlib.Path(spec.destination.path, rfile.name)
-            # check on download conditions
-            action = _check_download_conditions(lpath, rfile, spec)
-            if action == DownloadAction.Skip:
-                continue
-            elif action == DownloadAction.CheckMd5:
-                pre_md5_skip_on_check(lpath, rfile)
-            elif action == DownloadAction.Download:
-                # add to download queue
-                ### TODO
-                pass
-            # cond checks?
-            print(rfile.container, rfile.name, rfile.lmt, rfile.size,
-                  rfile.md5, rfile.mode, rfile.encryption_metadata)
-
-    global _MD5_META_LOCK, _ALL_REMOTE_FILES_PROCESSED
-    with _MD5_META_LOCK:
-        _ALL_REMOTE_FILES_PROCESSED = True
-    md5_thread.join()
-    blobxfer.md5.finalize_md5_processes()
-
-    import time
-    time.sleep(5)
-
+                with self._md5_meta_lock:
+                    if (len(self._md5_map) == 0 and
+                            self._all_remote_files_processed):
+                        break
+                cv.acquire()
+                while True:
+                    result = self._md5_offload.get_localfile_md5_done()
+                    if result is None:
+                        # use cv timeout due to possible non-wake while running
+                        cv.wait(1)
+                    else:
+                        break
+                cv.release()
+                if result is not None:
+                    self._post_md5_skip_on_check(result[0], result[1])
+
+        self._md5_check_thread = threading.Thread(
+            target=_check_for_downloads_from_md5,
+            args=(self,)
+        )
+        self._md5_check_thread.start()
+
+    def start(self):
+        # type: (None) -> None
+        """Start Downloader"""
+        # ensure destination path
+        blobxfer.operations.ensure_local_destination(self._creds, self._spec)
+        logger.info('downloading blobs/files to local path: {}'.format(
+            self._spec.destination.path))
+        # initialize MD5 processes
+        self._md5_offload = blobxfer.md5.LocalFileMd5Offload()
+        self._initialize_check_md5_downloads_thread()
+        # iterate through source paths to download
+        for src in self._spec.sources:
+            for rfile in src.files(
+                    self._creds, self._spec.options, self._general_options):
+                # form local path for remote file
+                lpath = pathlib.Path(self._spec.destination.path, rfile.name)
+                # check on download conditions
+                action = self._check_download_conditions(
+                    lpath, rfile, self._spec)
+                if action == DownloadAction.Skip:
+                    continue
+                elif action == DownloadAction.CheckMd5:
+                    self._pre_md5_skip_on_check(lpath, rfile)
+                elif action == DownloadAction.Download:
+                    # TODO add to download queue
+                    pass
+                # cond checks?
+                print(rfile.container, rfile.name, rfile.lmt, rfile.size,
+                      rfile.md5, rfile.mode, rfile.encryption_metadata)
+
+        # clean up processes and threads
+        with self._md5_meta_lock:
+            self._all_remote_files_processed = True
+        self._md5_check_thread.join()
+        self._md5_offload.finalize_md5_processes()
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index fafd3f8..7d84439 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -43,74 +43,101 @@
 # create logger
 logger = logging.getLogger(__name__)
 
-# global defines
-_TASK_QUEUE = multiprocessing.Queue()
-_DONE_QUEUE = multiprocessing.Queue()
-_DONE_CV = multiprocessing.Condition()
-_MD5_PROCS = []
 
+class LocalFileMd5Offload(object):
+    """LocalFileMd5Offload"""
+    def __init__(self, num_workers=None):
+        # type: (LocalFileMd5Offload, int) -> None
+        """Ctor for Local File Md5 Offload
+        :param LocalFileMd5Offload self: this
+        :param int num_workers: number of worker processes
+        """
+        self._task_queue = multiprocessing.Queue()
+        self._done_queue = multiprocessing.Queue()
+        self._done_cv = multiprocessing.Condition()
+        self._term_signal = multiprocessing.Value('i', 0)
+        self._md5_procs = []
+        self._initialize_md5_processes(num_workers)
 
-def _worker_md5_file_process():
-    global _TASK_QUEUE, _DONE_QUEUE
-    while True:
-        filename, remote_md5, pagealign = _TASK_QUEUE.get()
-        md5 = blobxfer.util.compute_md5_for_file_asbase64(filename, pagealign)
-        logger.debug('MD5: {} <L..R> {} {}'.format(md5, remote_md5, filename))
-        _DONE_CV.acquire()
-        _DONE_QUEUE.put((filename, md5 == remote_md5))
-        _DONE_CV.notify()
-        _DONE_CV.release()
+    @property
+    def done_cv(self):
+        # type: (LocalFileMd5Offload) -> multiprocessing.Condition
+        """Get Download Done condition variable
+        :param LocalFileMd5Offload self: this
+        :rtype: multiprocessing.Condition
+        :return: cv for download done
+        """
+        return self._done_cv
 
+    def _initialize_md5_processes(self, num_workers=None):
+        # type: (LocalFileMd5Offload, int) -> None
+        """Initialize MD5 checking processes for files for download
+        :param LocalFileMd5Offload self: this
+        :param int num_workers: number of worker processes
+        """
+        if num_workers is None or num_workers < 1:
+            num_workers = multiprocessing.cpu_count() // 2
+        if num_workers < 1:
+            num_workers = 1
+        for _ in range(num_workers):
+            proc = multiprocessing.Process(
+                target=self._worker_compute_md5_localfile_process)
+            proc.start()
+            self._md5_procs.append(proc)
 
-def get_done_cv():
-    global _DONE_CV
-    return _DONE_CV
+    def finalize_md5_processes(self):
+        # type: (LocalFileMd5Offload) -> None
+        """Finalize MD5 checking processes for files for download
+        :param LocalFileMd5Offload self: this
+        """
+        self._term_signal.value = 1
+        for proc in self._md5_procs:
+            proc.join()
 
+    def _worker_compute_md5_localfile_process(self):
+        # type: (LocalFileMd5Offload) -> None
+        """Compute MD5 for local file
+        :param LocalFileMd5Offload self: this
+        """
+        while self._term_signal.value == 0:
+            try:
+                filename, remote_md5, pagealign = self._task_queue.get(True, 1)
+            except queue.Empty:
+                continue
+            md5 = blobxfer.util.compute_md5_for_file_asbase64(
+                filename, pagealign)
+            logger.debug('MD5: {} <L..R> {} {}'.format(
+                md5, remote_md5, filename))
+            self._done_cv.acquire()
+            self._done_queue.put((filename, md5 == remote_md5))
+            self.done_cv.notify()
+            self.done_cv.release()
 
-def check_md5_file_for_download():
-    # type: (None) -> str
-    """Check queue for a file to download
-    :rtype: str
-    :return: local file path
-    """
-    global _DONE_QUEUE
-    try:
-        return _DONE_QUEUE.get_nowait()
-    except queue.Empty:
-        return None
+    def get_localfile_md5_done(self):
+        # type: (LocalFileMd5Offload) -> Tuple[str, bool]
+        """Get from done queue of local files with MD5 completed
+        :param LocalFileMd5Offload self: this
+        :rtype: tuple or None
+        :return: (local file path, md5 match)
+        """
+        try:
+            return self._done_queue.get_nowait()
+        except queue.Empty:
+            return None
 
-
-def add_file_for_md5_check(filename, remote_md5, mode):
-    # type: (str, str, blobxfer.models.AzureStorageModes) -> bool
-    """Check an MD5 for a file for download
-    :param str filename: file to compute MD5 for
-    :param str remote_md5: remote MD5 to compare against
-    :param blobxfer.models.AzureStorageModes mode: mode
-    :rtype: bool
-    :return: MD5 match comparison
-    """
-    global _TASK_QUEUE
-    if mode == blobxfer.models.AzureStorageModes.Page:
-        pagealign = True
-    else:
-        pagealign = False
-    _TASK_QUEUE.put((filename, remote_md5, pagealign))
-
-
-def initialize_md5_processes(num_workers=None):
-    global _MD5_PROCS
-    if num_workers is None or num_workers < 1:
-        num_workers = multiprocessing.cpu_count() // 2
-    if num_workers < 1:
-        num_workers = 1
-    for _ in range(num_workers):
-        proc = multiprocessing.Process(target=_worker_md5_file_process)
-        proc.start()
-        _MD5_PROCS.append(proc)
-
-
-def finalize_md5_processes():
-    global _MD5_PROCS
-    for proc in _MD5_PROCS:
-        proc.terminate()
-        proc.join()
+    def add_localfile_for_md5_check(self, filename, remote_md5, mode):
+        # type: (LocalFileMd5Offload, str, str,
+        #        blobxfer.models.AzureStorageModes) -> bool
+        """Check an MD5 for a file for download
+        :param LocalFileMd5Offload self: this
+        :param str filename: file to compute MD5 for
+        :param str remote_md5: remote MD5 to compare against
+        :param blobxfer.models.AzureStorageModes mode: mode
+        :rtype: bool
+        :return: MD5 match comparison
+        """
+        if mode == blobxfer.models.AzureStorageModes.Page:
+            pagealign = True
+        else:
+            pagealign = False
+        self._task_queue.put((filename, remote_md5, pagealign))
diff --git a/cli/cli.py b/cli/cli.py
index 508013a..2a6d8d9 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -591,7 +591,9 @@ def download(ctx, local_resource, storage_account, remote_path):
     ctx.initialize()
     specs = settings.create_download_specifications(ctx.config)
     for spec in specs:
-        blobxfer.api.download(ctx.general_options, ctx.credentials, spec)
+        blobxfer.api.Downloader(
+            ctx.general_options, ctx.credentials, spec
+        ).start()
 
 
 @cli.command('synccopy')

From e1d5f9f8025ce192590cf5b4faee45ecb9cc5da8 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Wed, 22 Feb 2017 10:08:54 -0800
Subject: [PATCH 07/47] Add tests for MD5/download

- Add more model tests
- Add noqa tags for ImportError except blocks
---
 blobxfer/download.py              |  23 ++-
 blobxfer/file/operations.py       |   2 +-
 blobxfer/md5.py                   |   4 +-
 blobxfer/models.py                |   2 +-
 cli/cli.py                        |   2 +-
 setup.py                          |   2 +-
 tests/test_blobxfer_download.py   | 253 ++++++++++++++++++++++++++++++
 tests/test_blobxfer_md5.py        |  99 ++++++++++++
 tests/test_blobxfer_models.py     | 133 +++++++++++++---
 tests/test_blobxfer_operations.py |  25 ++-
 10 files changed, 487 insertions(+), 58 deletions(-)
 create mode 100644 tests/test_blobxfer_download.py
 create mode 100644 tests/test_blobxfer_md5.py

diff --git a/blobxfer/download.py b/blobxfer/download.py
index b5a87d4..0b7d837 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -36,7 +36,7 @@
 import logging
 try:
     import pathlib2 as pathlib
-except ImportError:
+except ImportError:  # noqa
     import pathlib
 import threading
 # non-stdlib imports
@@ -77,33 +77,33 @@ def __init__(self, general_options, creds, spec):
         self._creds = creds
         self._spec = spec
 
-    def _check_download_conditions(self, lpath, rfile, spec):
-        # type: (Downloader, pathlib.Path, blobxfer.models.AzureStorageEntity,
-        #        blobxfer.models.DownloadSpecification) -> DownloadAction
+    def _check_download_conditions(self, lpath, rfile):
+        # type: (Downloader, pathlib.Path,
+        #        blobxfer.models.AzureStorageEntity) -> DownloadAction
         """Check for download conditions
         :param Downloader self: this
         :param pathlib.Path lpath: local path
         :param blobxfer.models.AzureStorageEntity rfile: remote file
-        :param blobxfer.models.DownloadSpecification spec: download spec
         :rtype: DownloadAction
         :return: download action
         """
         if not lpath.exists():
             return DownloadAction.Download
-        if not spec.options.overwrite:
+        if not self._spec.options.overwrite:
             logger.info(
                 'not overwriting local file: {} (remote: {}/{})'.format(
                     lpath, rfile.container, rfile.name))
             return DownloadAction.Skip
         # check skip on options, MD5 match takes priority
-        if spec.skip_on.md5_match:
+        if self._spec.skip_on.md5_match:
             return DownloadAction.CheckMd5
         # if neither of the remaining skip on actions are activated, download
-        if not spec.skip_on.filesize_match and not spec.skip_on.lmt_ge:
+        if (not self._spec.skip_on.filesize_match and
+                not self._spec.skip_on.lmt_ge):
             return DownloadAction.Download
         # check skip on file size match
         dl_fs = None
-        if spec.skip_on.filesize_match:
+        if self._spec.skip_on.filesize_match:
             lsize = lpath.stat().st_size
             if rfile.mode == blobxfer.models.AzureStorageModes.Page:
                 lsize = blobxfer.util.page_align_content_length(lsize)
@@ -113,7 +113,7 @@ def _check_download_conditions(self, lpath, rfile, spec):
                 dl_fs = True
         # check skip on lmt ge
         dl_lmt = None
-        if spec.skip_on.lmt_ge:
+        if self._spec.skip_on.lmt_ge:
             mtime = datetime.datetime.fromtimestamp(
                 lpath.stat().st_mtime, tz=dateutil.tz.tzlocal())
             if mtime >= rfile.lmt:
@@ -211,8 +211,7 @@ def start(self):
                 # form local path for remote file
                 lpath = pathlib.Path(self._spec.destination.path, rfile.name)
                 # check on download conditions
-                action = self._check_download_conditions(
-                    lpath, rfile, self._spec)
+                action = self._check_download_conditions(lpath, rfile)
                 if action == DownloadAction.Skip:
                     continue
                 elif action == DownloadAction.CheckMd5:
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index eae7640..221f412 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -32,7 +32,7 @@
 import logging
 try:
     import pathlib2 as pathlib
-except ImportError:
+except ImportError:  # noqa
     import pathlib
 # non-stdlib imports
 import azure.common
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index 7d84439..dbbe6fd 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -33,7 +33,7 @@
 import multiprocessing
 try:
     import queue
-except ImportError:
+except ImportError:  # noqa
     import Queue as queue
 # non-stdlib imports
 # local imports
@@ -75,7 +75,7 @@ def _initialize_md5_processes(self, num_workers=None):
         :param LocalFileMd5Offload self: this
         :param int num_workers: number of worker processes
         """
-        if num_workers is None or num_workers < 1:
+        if num_workers is None:
             num_workers = multiprocessing.cpu_count() // 2
         if num_workers < 1:
             num_workers = 1
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 918eaaa..04e4c12 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -37,7 +37,7 @@
 import os
 try:
     import pathlib2 as pathlib
-except ImportError:
+except ImportError:  # noqa
     import pathlib
 # non-stdlib imports
 # local imports
diff --git a/cli/cli.py b/cli/cli.py
index 2a6d8d9..91600b5 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -32,7 +32,7 @@
 import logging
 try:
     import pathlib2 as pathlib
-except ImportError:
+except ImportError:  # noqa
     import pathlib
 # non-stdlib imports
 import click
diff --git a/setup.py b/setup.py
index 475cb90..11ba002 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 import re
 try:
     from setuptools import setup
-except ImportError:
+except ImportError:  # noqa
     from distutils.core import setup
 import sys
 
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download.py
new file mode 100644
index 0000000..99c9711
--- /dev/null
+++ b/tests/test_blobxfer_download.py
@@ -0,0 +1,253 @@
+# coding=utf-8
+"""Tests for download"""
+
+# stdlib imports
+import datetime
+import dateutil.tz
+import mock
+import multiprocessing
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+import azure.storage.blob
+import pytest
+# local imports
+import blobxfer.models as models
+import blobxfer.util as util
+# module under test
+import blobxfer.download as dl
+
+
+def test_check_download_conditions(tmpdir):
+    ap = tmpdir.join('a')
+    ap.write('abc')
+    ep = pathlib.Path(str(ap))
+    nep = pathlib.Path(str(tmpdir.join('nep')))
+
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=models.AzureStorageModes.Auto,
+            overwrite=False,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=models.SkipOnOptions(
+            filesize_match=True,
+            lmt_ge=True,
+            md5_match=True,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    result = d._check_download_conditions(nep, mock.MagicMock())
+    assert result == dl.DownloadAction.Download
+    result = d._check_download_conditions(ep, mock.MagicMock())
+    assert result == dl.DownloadAction.Skip
+
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=models.SkipOnOptions(
+            filesize_match=True,
+            lmt_ge=True,
+            md5_match=True,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    result = d._check_download_conditions(ep, mock.MagicMock())
+    assert result == dl.DownloadAction.CheckMd5
+
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=models.SkipOnOptions(
+            filesize_match=False,
+            lmt_ge=False,
+            md5_match=False,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    result = d._check_download_conditions(ep, mock.MagicMock())
+    assert result == dl.DownloadAction.Download
+
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=models.SkipOnOptions(
+            filesize_match=True,
+            lmt_ge=False,
+            md5_match=False,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    rfile = models.AzureStorageEntity('cont')
+    rfile._size = util.page_align_content_length(ep.stat().st_size)
+    rfile._mode = models.AzureStorageModes.Page
+    result = d._check_download_conditions(ep, rfile)
+    assert result == dl.DownloadAction.Skip
+
+    rfile._size = ep.stat().st_size
+    rfile._mode = models.AzureStorageModes.Page
+    result = d._check_download_conditions(ep, rfile)
+    assert result == dl.DownloadAction.Download
+
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
+            check_file_md5=True,
+            delete_extraneous_destination=False,
+            mode=models.AzureStorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=models.SkipOnOptions(
+            filesize_match=False,
+            lmt_ge=True,
+            md5_match=False,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    rfile = models.AzureStorageEntity('cont')
+    rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) + \
+        datetime.timedelta(days=1)
+    result = d._check_download_conditions(ep, rfile)
+    assert result == dl.DownloadAction.Download
+
+    rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) - \
+        datetime.timedelta(days=1)
+    result = d._check_download_conditions(ep, rfile)
+    assert result == dl.DownloadAction.Skip
+
+
+def test_pre_md5_skip_on_check():
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._md5_offload = mock.MagicMock()
+
+    rfile = models.AzureStorageEntity('cont')
+    rfile._encryption = mock.MagicMock()
+    rfile._encryption.blobxfer_extensions = mock.MagicMock()
+    rfile._encryption.blobxfer_extensions.pre_encrypted_content_md5 = \
+        'abc'
+
+    lpath = 'lpath'
+    d._pre_md5_skip_on_check(lpath, rfile)
+    assert lpath in d._md5_map
+
+    lpath = 'lpath2'
+    rfile._encryption = None
+    rfile._md5 = 'abc'
+    d._pre_md5_skip_on_check(lpath, rfile)
+    assert lpath in d._md5_map
+
+
+def test_post_md5_skip_on_check():
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._md5_offload = mock.MagicMock()
+
+    lpath = 'lpath'
+    rfile = models.AzureStorageEntity('cont')
+    rfile._md5 = 'abc'
+    d._pre_md5_skip_on_check(lpath, rfile)
+    assert lpath in d._md5_map
+
+    d._post_md5_skip_on_check(lpath, True)
+    assert lpath not in d._md5_map
+
+    # TODO test mismatch
+
+
+def test_initialize_check_md5_downloads_thread():
+    lpath = 'lpath'
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._md5_map[lpath] = mock.MagicMock()
+    d._md5_offload = mock.MagicMock()
+    d._md5_offload.done_cv = multiprocessing.Condition()
+    d._md5_offload.get_localfile_md5_done = mock.MagicMock()
+    d._md5_offload.get_localfile_md5_done.side_effect = [None, (lpath, True)]
+    d._post_md5_skip_on_check = mock.MagicMock()
+
+    d._initialize_check_md5_downloads_thread()
+    d._all_remote_files_processed = True
+    d._md5_map.clear()
+    d._md5_offload.done_cv.acquire()
+    d._md5_offload.done_cv.notify()
+    d._md5_offload.done_cv.release()
+    d._md5_check_thread.join()
+
+    assert d._post_md5_skip_on_check.call_count == 1
+
+
+@mock.patch('blobxfer.md5.LocalFileMd5Offload')
+@mock.patch('blobxfer.blob.operations.list_blobs')
+@mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
+def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._initialize_check_md5_downloads_thread = mock.MagicMock()
+    d._md5_check_thread = mock.MagicMock()
+    d._spec.sources = []
+    d._spec.options = mock.MagicMock()
+    d._spec.options.mode = models.AzureStorageModes.Auto
+    d._spec.options.overwrite = True
+    d._spec.skip_on = mock.MagicMock()
+    d._spec.skip_on.md5_match = False
+    d._spec.skip_on.lmt_ge = False
+    d._spec.skip_on.filesize_match = False
+    d._spec.destination = mock.MagicMock()
+    d._spec.destination.path = pathlib.Path(str(tmpdir))
+
+    p = '/cont/remote/path'
+    asp = models.AzureSourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+    d._spec.sources.append(asp)
+
+    b = azure.storage.blob.models.Blob(name='name')
+    patched_lb.side_effect = [[b]]
+
+    d._check_download_conditions = mock.MagicMock()
+    d._check_download_conditions.return_value = dl.DownloadAction.Skip
+    d.start()
+    # TODO assert
+
+    patched_lb.side_effect = [[b]]
+    d._all_remote_files_processed = False
+    d._check_download_conditions.return_value = dl.DownloadAction.CheckMd5
+    d._pre_md5_skip_on_check = mock.MagicMock()
+    d.start()
+    assert d._pre_md5_skip_on_check.call_count == 1
+
+    patched_lb.side_effect = [[b]]
+    d._all_remote_files_processed = False
+    d._check_download_conditions.return_value = dl.DownloadAction.Download
+    d.start()
+    # TODO assert
diff --git a/tests/test_blobxfer_md5.py b/tests/test_blobxfer_md5.py
new file mode 100644
index 0000000..7faa1ce
--- /dev/null
+++ b/tests/test_blobxfer_md5.py
@@ -0,0 +1,99 @@
+# coding=utf-8
+"""Tests for md5"""
+
+# stdlib imports
+import time
+# non-stdlib imports
+# local imports
+import blobxfer.models as models
+import blobxfer.util as util
+# module under test
+import blobxfer.md5 as md5
+
+
+def test_done_cv():
+    a = None
+    try:
+        a = md5.LocalFileMd5Offload()
+        assert a.done_cv == a._done_cv
+    finally:
+        if a:
+            a.finalize_md5_processes()
+
+
+def test_finalize_md5_processes():
+    a = None
+    try:
+        a = md5.LocalFileMd5Offload(num_workers=0)
+    finally:
+        if a:
+            a.finalize_md5_processes()
+
+    for proc in a._md5_procs:
+        assert not proc.is_alive()
+
+
+def test_from_add_to_done_non_pagealigned(tmpdir):
+    file = tmpdir.join('a')
+    file.write('abc')
+
+    remote_md5 = util.compute_md5_for_file_asbase64(str(file))
+
+    a = None
+    try:
+        a = md5.LocalFileMd5Offload(num_workers=1)
+        result = a.get_localfile_md5_done()
+        assert result is None
+
+        a.add_localfile_for_md5_check(
+            str(file), remote_md5, models.AzureStorageModes.Block)
+        i = 33
+        checked = False
+        while i > 0:
+            result = a.get_localfile_md5_done()
+            if result is None:
+                time.sleep(0.3)
+                i -= 1
+                continue
+            assert len(result) == 2
+            assert result[0] == str(file)
+            assert result[1]
+            checked = True
+            break
+        assert checked
+    finally:
+        if a:
+            a.finalize_md5_processes()
+
+
+def test_from_add_to_done_pagealigned(tmpdir):
+    file = tmpdir.join('a')
+    file.write('abc')
+
+    remote_md5 = util.compute_md5_for_file_asbase64(str(file), True)
+
+    a = None
+    try:
+        a = md5.LocalFileMd5Offload(num_workers=1)
+        result = a.get_localfile_md5_done()
+        assert result is None
+
+        a.add_localfile_for_md5_check(
+            str(file), remote_md5, models.AzureStorageModes.Page)
+        i = 33
+        checked = False
+        while i > 0:
+            result = a.get_localfile_md5_done()
+            if result is None:
+                time.sleep(0.3)
+                i -= 1
+                continue
+            assert len(result) == 2
+            assert result[0] == str(file)
+            assert result[1]
+            checked = True
+            break
+        assert checked
+    finally:
+        if a:
+            a.finalize_md5_processes()
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index c9a8d81..0612a9e 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -6,18 +6,19 @@
 import os
 try:
     import pathlib2 as pathlib
-except ImportError:
+except ImportError:  # noqa
     import pathlib
 # non-stdlib imports
 import azure.storage
 import azure.storage.blob
+import azure.storage.file
 import pytest
 # module under test
-import blobxfer.models
+import blobxfer.models as models
 
 
 def test_storage_credentials():
-    creds = blobxfer.models.AzureStorageCredentials()
+    creds = models.AzureStorageCredentials()
     creds.add_storage_account('sa1', 'somekey1', 'endpoint')
 
     a = creds.get_storage_account('sa1')
@@ -51,24 +52,24 @@ def test_storage_credentials():
 
 
 def test_key_is_sas():
-    a = blobxfer.models.AzureStorageAccount('name', 'abcdef', 'endpoint')
+    a = models.AzureStorageAccount('name', 'abcdef', 'endpoint')
     assert not a.is_sas
 
-    a = blobxfer.models.AzureStorageAccount('name', 'abcdef&blah', 'endpoint')
+    a = models.AzureStorageAccount('name', 'abcdef&blah', 'endpoint')
     assert not a.is_sas
 
-    a = blobxfer.models.AzureStorageAccount('name', '?abcdef', 'endpoint')
+    a = models.AzureStorageAccount('name', '?abcdef', 'endpoint')
     assert a.is_sas
 
-    a = blobxfer.models.AzureStorageAccount(
+    a = models.AzureStorageAccount(
         'name', '?sv=0&sr=1&sig=2', 'endpoint')
     assert a.is_sas
 
-    a = blobxfer.models.AzureStorageAccount(
+    a = models.AzureStorageAccount(
         'name', 'sv=0&sr=1&sig=2', 'endpoint')
     assert a.is_sas
 
-    a = blobxfer.models.AzureStorageAccount(
+    a = models.AzureStorageAccount(
         'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint')
     assert a.is_sas
 
@@ -86,7 +87,7 @@ def test_localsourcepaths_files(tmpdir):
     defpath.join('world.txt').write('world')
     defpath.join('moo.cow').write('y')
 
-    a = blobxfer.models.LocalSourcePaths()
+    a = models.LocalSourcePaths()
     a.add_include('*.txt')
     a.add_includes(['moo.cow', '*blah*'])
     with pytest.raises(ValueError):
@@ -106,7 +107,7 @@ def test_localsourcepaths_files(tmpdir):
     assert str(defpath.join('world.txt')) in a_set
     assert str(defpath.join('moo.cow')) not in a_set
 
-    b = blobxfer.models.LocalSourcePaths()
+    b = models.LocalSourcePaths()
     b.add_includes(['moo.cow', '*blah*'])
     b.add_include('*.txt')
     b.add_excludes(['world.txt'])
@@ -121,7 +122,7 @@ def test_localdestinationpath(tmpdir):
     tmpdir.mkdir('1')
     path = tmpdir.join('1')
 
-    a = blobxfer.models.LocalDestinationPath(str(path))
+    a = models.LocalDestinationPath(str(path))
     a.is_dir = True
     assert str(a.path) == str(path)
     assert a.is_dir
@@ -129,7 +130,7 @@ def test_localdestinationpath(tmpdir):
     a.ensure_path_exists()
     assert os.path.exists(str(a.path))
 
-    b = blobxfer.models.LocalDestinationPath()
+    b = models.LocalDestinationPath()
     b.is_dir = False
     b.path = str(path)
     with pytest.raises(RuntimeError):
@@ -138,7 +139,7 @@ def test_localdestinationpath(tmpdir):
 
     path2 = tmpdir.join('2')
     path3 = path2.join('3')
-    c = blobxfer.models.LocalDestinationPath(str(path3))
+    c = models.LocalDestinationPath(str(path3))
     with pytest.raises(RuntimeError):
         c.ensure_path_exists()
     c.is_dir = False
@@ -150,7 +151,7 @@ def test_localdestinationpath(tmpdir):
 
 def test_azuresourcepath():
     p = '/cont/remote/path'
-    asp = blobxfer.models.AzureSourcePath()
+    asp = models.AzureSourcePath()
     asp.add_path_with_storage_account(p, 'sa')
 
     with pytest.raises(RuntimeError):
@@ -159,26 +160,106 @@ def test_azuresourcepath():
     assert 'sa' == asp.lookup_storage_account(p)
 
 
+@mock.patch('blobxfer.crypto.models.EncryptionMetadata')
+@mock.patch('blobxfer.file.operations.list_files')
+def test_azuresourcepath_files(patched_lf, patched_em):
+    p = '/cont/remote/path'
+    asp = models.AzureSourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    options = mock.MagicMock()
+    options.mode = models.AzureStorageModes.File
+    creds = mock.MagicMock()
+    creds.get_storage_account = mock.MagicMock()
+    sa = mock.MagicMock()
+    sa.file_client = mock.MagicMock()
+    creds.get_storage_account.return_value = sa
+    f = azure.storage.file.models.File(name='name')
+    patched_lf.side_effect = [[f]]
+    patched_em.encryption_metadata_exists = mock.MagicMock()
+    patched_em.encryption_metadata_exists.return_value = False
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is None
+    assert i == 1
+
+    fe = azure.storage.file.models.File(name='name')
+    fe.metadata = {'encryptiondata': {'a': 'b'}}
+    patched_lf.side_effect = [[fe]]
+    patched_em.encryption_metadata_exists.return_value = True
+    patched_em.convert_from_json = mock.MagicMock()
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is not None
+    assert i == 1
+
+
+@mock.patch('blobxfer.crypto.models.EncryptionMetadata')
+@mock.patch('blobxfer.blob.operations.list_blobs')
+def test_azuresourcepath_blobs(patched_lb, patched_em):
+    p = '/cont/remote/path'
+    asp = models.AzureSourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    options = mock.MagicMock()
+    options.mode = models.AzureStorageModes.Auto
+    creds = mock.MagicMock()
+    creds.get_storage_account = mock.MagicMock()
+    sa = mock.MagicMock()
+    sa.block_blob_client = mock.MagicMock()
+    creds.get_storage_account.return_value = sa
+    b = azure.storage.blob.models.Blob(name='name')
+    patched_lb.side_effect = [[b]]
+    patched_em.encryption_metadata_exists = mock.MagicMock()
+    patched_em.encryption_metadata_exists.return_value = False
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is None
+    assert i == 1
+
+    be = azure.storage.blob.models.Blob(name='name')
+    be.metadata = {'encryptiondata': {'a': 'b'}}
+    patched_lb.side_effect = [[be]]
+    patched_em.encryption_metadata_exists.return_value = True
+    patched_em.convert_from_json = mock.MagicMock()
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is not None
+    assert i == 1
+
+
 def test_downloadspecification():
-    ds = blobxfer.models.DownloadSpecification(
-        download_options=blobxfer.models.DownloadOptions(
+    ds = models.DownloadSpecification(
+        download_options=models.DownloadOptions(
             check_file_md5=True,
             delete_extraneous_destination=False,
-            mode=blobxfer.models.AzureStorageModes.Auto,
+            mode=models.AzureStorageModes.Auto,
             overwrite=True,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=blobxfer.models.SkipOnOptions(
+        skip_on_options=models.SkipOnOptions(
             filesize_match=True,
             lmt_ge=False,
             md5_match=True,
         ),
-        local_destination_path=blobxfer.models.LocalDestinationPath('dest'),
+        local_destination_path=models.LocalDestinationPath('dest'),
     )
 
-    asp = blobxfer.models.AzureSourcePath()
+    asp = models.AzureSourcePath()
     p = 'some/remote/path'
     asp.add_path_with_storage_account(p, 'sa')
 
@@ -193,7 +274,7 @@ def test_downloadspecification():
 
 
 def test_azurestorageentity():
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = models.AzureStorageEntity('cont')
     assert ase.container == 'cont'
     assert ase.encryption_metadata is None
 
@@ -211,15 +292,15 @@ def test_azurestorageentity():
     assert ase.lmt == 'lmt'
     assert ase.size == 123
     assert ase.md5 == 'abc'
-    assert ase.mode == blobxfer.models.AzureStorageModes.Block
+    assert ase.mode == models.AzureStorageModes.Block
 
     blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob
     ase.populate_from_blob(blob)
-    assert ase.mode == blobxfer.models.AzureStorageModes.Append
+    assert ase.mode == models.AzureStorageModes.Append
 
     blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob
     ase.populate_from_blob(blob)
-    assert ase.mode == blobxfer.models.AzureStorageModes.Page
+    assert ase.mode == models.AzureStorageModes.Page
 
     ase.populate_from_file(blob)
-    assert ase.mode == blobxfer.models.AzureStorageModes.File
+    assert ase.mode == models.AzureStorageModes.File
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
index 78aef22..9926bab 100644
--- a/tests/test_blobxfer_operations.py
+++ b/tests/test_blobxfer_operations.py
@@ -2,10 +2,7 @@
 """Tests for operations"""
 
 # stdlib imports
-from mock import (
-    MagicMock,
-    patch,
-)
+import mock
 # non-stdlib imports
 import pytest
 # local imports
@@ -14,8 +11,8 @@
 import blobxfer.operations as ops
 
 
-@patch('blobxfer.file.operations.check_if_single_file')
-@patch('blobxfer.blob.operations.check_if_single_blob')
+@mock.patch('blobxfer.file.operations.check_if_single_file')
+@mock.patch('blobxfer.blob.operations.check_if_single_blob')
 def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     downdir = tmpdir.join('down')
 
@@ -30,14 +27,14 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=MagicMock(),
+        skip_on_options=mock.MagicMock(),
         local_destination_path=blobxfer.models.LocalDestinationPath(
             str(downdir)
         ),
     )
 
     with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(MagicMock(), ds)
+        ops.ensure_local_destination(mock.MagicMock(), ds)
 
     asp = blobxfer.models.AzureSourcePath()
     p = 'cont/remote/path'
@@ -46,12 +43,12 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     ds.add_azure_source_path(asp)
 
     patched_blob.return_value = False
-    ops.ensure_local_destination(MagicMock(), ds)
+    ops.ensure_local_destination(mock.MagicMock(), ds)
     assert ds.destination.is_dir
 
     patched_blob.return_value = True
     with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(MagicMock(), ds)
+        ops.ensure_local_destination(mock.MagicMock(), ds)
 
     # file tests
     ds = blobxfer.models.DownloadSpecification(
@@ -64,7 +61,7 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=MagicMock(),
+        skip_on_options=mock.MagicMock(),
         local_destination_path=blobxfer.models.LocalDestinationPath(
             str(downdir)
         ),
@@ -73,9 +70,9 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     ds.add_azure_source_path(asp)
 
     patched_file.return_value = (False, None)
-    ops.ensure_local_destination(MagicMock(), ds)
+    ops.ensure_local_destination(mock.MagicMock(), ds)
     assert ds.destination.is_dir
 
-    patched_file.return_value = (True, MagicMock())
+    patched_file.return_value = (True, mock.MagicMock())
     with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(MagicMock(), ds)
+        ops.ensure_local_destination(mock.MagicMock(), ds)

From 146fc24d35ca680a9b1c8010279204bbec777ef0 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 23 Feb 2017 14:57:01 -0800
Subject: [PATCH 08/47] Pre-download logic

- Add concurrency options
- Centralize MD5 logic
---
 blobxfer/blob/operations.py            |   7 +
 blobxfer/crypto/models.py              |  33 +++++
 blobxfer/download.py                   | 103 +++++++++++---
 blobxfer/md5.py                        |  51 ++++++-
 blobxfer/models.py                     | 178 ++++++++++++++++++++-----
 blobxfer/util.py                       |  54 +++-----
 cli/cli.py                             |  45 +++++++
 cli/settings.py                        |   8 ++
 tests/test_blobxfer_blob_operations.py |   7 +-
 tests/test_blobxfer_crypto_models.py   |   6 +
 tests/test_blobxfer_download.py        |  40 +++++-
 tests/test_blobxfer_md5.py             |  24 +++-
 tests/test_blobxfer_models.py          |  79 +++++++++++
 tests/test_blobxfer_util.py            |  33 ++---
 14 files changed, 548 insertions(+), 120 deletions(-)

diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index ef2c976..411ad52 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -35,6 +35,7 @@
 import azure.storage.blob.models
 # local imports
 import blobxfer.models
+import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -50,6 +51,8 @@ def check_if_single_blob(client, container, prefix, timeout=None):
     :rtype: bool
     :return: if prefix in container is a single blob
     """
+    if blobxfer.util.blob_is_snapshot(prefix):
+        return True
     try:
         client.get_blob_properties(
             container_name=container, blob_name=prefix, timeout=timeout)
@@ -94,3 +97,7 @@ def list_blobs(client, container, prefix, mode, timeout=None):
             continue
         # auto or match, yield the blob
         yield blob
+
+
+def get_blob_range(client, container, blob_name, snapshot):
+    pass
diff --git a/blobxfer/crypto/models.py b/blobxfer/crypto/models.py
index 4300b58..c4bb5b4 100644
--- a/blobxfer/crypto/models.py
+++ b/blobxfer/crypto/models.py
@@ -133,6 +133,26 @@ def __init__(self):
         self._symkey = None
         self._signkey = None
 
+    @property
+    def symmetric_key(self):
+        # type: (EncryptionMetadata) -> bytes
+        """Get symmetric key
+        :param EncryptionMetadata self: this
+        :rtype: bytes
+        :return: symmetric key
+        """
+        return self._symkey
+
+    @property
+    def signing_key(self):
+        # type: (EncryptionMetadata) -> bytes
+        """Get singing key
+        :param EncryptionMetadata self: this
+        :rtype: bytes
+        :return: signing key
+        """
+        return self._signkey
+
     @staticmethod
     def encryption_metadata_exists(md):
         # type: (dict) -> bool
@@ -283,4 +303,17 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
                         blobname))
 
     def convert_to_json_with_mac(self):
+        # TODO
         pass
+
+    def initialize_hmac(self):
+        # type: (EncryptionMetadata) -> hmac.HMAC
+        """Initialize an hmac from a signing key if it exists
+        :param EncryptionMetadata self: this
+        :rtype: hmac.HMAC or None
+        :return: hmac
+        """
+        if self._signkey is not None:
+            return hmac.new(self._signkey, digestmod=hashlib.sha256)
+        else:
+            return None
diff --git a/blobxfer/download.py b/blobxfer/download.py
index 0b7d837..dbbecac 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -38,6 +38,10 @@
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 import threading
 # non-stdlib imports
 import dateutil
@@ -73,6 +77,9 @@ def __init__(self, general_options, creds, spec):
         self._md5_map = {}
         self._md5_offload = None
         self._md5_check_thread = None
+        self._download_queue = queue.Queue()
+        self._download_threads = []
+        self._download_terminate = False
         self._general_options = general_options
         self._creds = creds
         self._spec = spec
@@ -154,11 +161,11 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         :param str filename: local filename
         :param bool md5_match: if MD5 matches
         """
+        with self._md5_meta_lock:
+            rfile = self._md5_map.pop(filename)
         if not md5_match:
             lpath = pathlib.Path(filename)
-            # TODO enqueue file for download
-        with self._md5_meta_lock:
-            self._md5_map.pop(filename)
+            self._add_to_download_queue(lpath, rfile)
 
     def _initialize_check_md5_downloads_thread(self):
         # type: (Downloader) -> None
@@ -173,11 +180,12 @@ def _check_for_downloads_from_md5(self):
             cv = self._md5_offload.done_cv
             while True:
                 with self._md5_meta_lock:
-                    if (len(self._md5_map) == 0 and
-                            self._all_remote_files_processed):
+                    if (self._download_terminate or
+                            (len(self._md5_map) == 0 and
+                             self._all_remote_files_processed)):
                         break
                 cv.acquire()
-                while True:
+                while not self._download_terminate:
                     result = self._md5_offload.get_localfile_md5_done()
                     if result is None:
                         # use cv timeout due to possible non-wake while running
@@ -194,16 +202,69 @@ def _check_for_downloads_from_md5(self):
         )
         self._md5_check_thread.start()
 
-    def start(self):
-        # type: (None) -> None
-        """Start Downloader"""
+    def _add_to_download_queue(self, lpath, rfile):
+        # type: (Downloader, pathlib.Path,
+        #        blobxfer.models.AzureStorageEntity) -> None
+        """Add remote file to download queue
+        :param Downloader self: this
+        :param pathlib.Path lpath: local path
+        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        """
+        # prepare remote file for download
+        rfile.prepare_for_download(lpath, self._spec.options)
+        # add remote file to queue
+        self._download_queue.put(rfile)
+
+    def _initialize_download_threads(self):
+        # type: (Downloader) -> None
+        """Initialize download threads
+        :param Downloader self: this
+        """
+        for _ in range(self._general_options.concurrency.transfer_threads):
+            thr = threading.Thread(target=self._worker_thread_download)
+            self._download_threads.append(thr)
+            thr.start()
+
+    def _terminate_download_threads(self):
+        # type: (Downloader) -> None
+        """Terminate download threads
+        :param Downloader self: this
+        """
+        self._download_terminate = True
+        for thr in self._download_threads:
+            thr.join()
+
+    def _worker_thread_download(self):
+        # type: (Downloader) -> None
+        """Worker thread download
+        :param Downloader self: this
+        """
+        while True:
+            if self._download_terminate:
+                break
+            try:
+                rfile = self._download_queue.get(False, 1)
+            except queue.Empty:
+                continue
+            # TODO
+            # get next offset with respect to chunk size
+
+            print('<<', rfile.container, rfile.name, rfile.lmt, rfile.size,
+                  rfile.md5, rfile.mode, rfile.encryption_metadata)
+
+    def _run(self):
+        # type: (Downloader) -> None
+        """Execute Downloader"""
         # ensure destination path
         blobxfer.operations.ensure_local_destination(self._creds, self._spec)
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
         # initialize MD5 processes
-        self._md5_offload = blobxfer.md5.LocalFileMd5Offload()
+        self._md5_offload = blobxfer.md5.LocalFileMd5Offload(
+            num_workers=self._general_options.concurrency.md5_processes)
         self._initialize_check_md5_downloads_thread()
+        # initialize download threads
+        self._initialize_download_threads()
         # iterate through source paths to download
         for src in self._spec.sources:
             for rfile in src.files(
@@ -217,14 +278,24 @@ def start(self):
                 elif action == DownloadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lpath, rfile)
                 elif action == DownloadAction.Download:
-                    # TODO add to download queue
-                    pass
-                # cond checks?
-                print(rfile.container, rfile.name, rfile.lmt, rfile.size,
-                      rfile.md5, rfile.mode, rfile.encryption_metadata)
-
+                    self._add_to_download_queue(lpath, rfile)
         # clean up processes and threads
         with self._md5_meta_lock:
             self._all_remote_files_processed = True
         self._md5_check_thread.join()
+        # TODO wait for download threads
+
         self._md5_offload.finalize_md5_processes()
+
+    def start(self):
+        # type: (Downloader) -> None
+        """Start the Downloader"""
+        try:
+            self._run()
+        except KeyboardInterrupt:
+            logger.error(
+                'KeyboardInterrupt detected, force terminating '
+                'processes and threads (this may take a while)...')
+            self._terminate_download_threads()
+            self._md5_offload.finalize_md5_processes()
+            raise
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index dbbe6fd..86dbd30 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -30,6 +30,7 @@
 )
 # stdlib imports
 import logging
+import hashlib
 import multiprocessing
 try:
     import queue
@@ -44,6 +45,51 @@
 logger = logging.getLogger(__name__)
 
 
+def new_md5_hasher():
+    # type: (None) -> md5.MD5
+    """Create a new MD5 hasher
+    :rtype: md5.MD5
+    :return: new MD5 hasher
+    """
+    return hashlib.md5()
+
+
+def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
+    # type: (str, bool, int) -> str
+    """Compute MD5 hash for file and encode as Base64
+    :param str filename: file to compute MD5 for
+    :param bool pagealign: page align data
+    :param int blocksize: block size
+    :rtype: str
+    :return: MD5 for file encoded as Base64
+    """
+    hasher = new_md5_hasher()
+    with open(filename, 'rb') as filedesc:
+        while True:
+            buf = filedesc.read(blocksize)
+            if not buf:
+                break
+            buflen = len(buf)
+            if pagealign and buflen < blocksize:
+                aligned = blobxfer.util.page_align_content_length(buflen)
+                if aligned != buflen:
+                    buf = buf.ljust(aligned, b'\0')
+            hasher.update(buf)
+        return blobxfer.util.base64_encode_as_string(hasher.digest())
+
+
+def compute_md5_for_data_asbase64(data):
+    # type: (obj) -> str
+    """Compute MD5 hash for bits and encode as Base64
+    :param any data: data to compute MD5 for
+    :rtype: str
+    :return: MD5 for data
+    """
+    hasher = new_md5_hasher()
+    hasher.update(data)
+    return blobxfer.util.base64_encode_as_string(hasher.digest())
+
+
 class LocalFileMd5Offload(object):
     """LocalFileMd5Offload"""
     def __init__(self, num_workers=None):
@@ -76,7 +122,7 @@ def _initialize_md5_processes(self, num_workers=None):
         :param int num_workers: number of worker processes
         """
         if num_workers is None:
-            num_workers = multiprocessing.cpu_count() // 2
+            num_workers = multiprocessing.cpu_count() // 2 - 1
         if num_workers < 1:
             num_workers = 1
         for _ in range(num_workers):
@@ -104,8 +150,7 @@ def _worker_compute_md5_localfile_process(self):
                 filename, remote_md5, pagealign = self._task_queue.get(True, 1)
             except queue.Empty:
                 continue
-            md5 = blobxfer.util.compute_md5_for_file_asbase64(
-                filename, pagealign)
+            md5 = compute_md5_for_file_asbase64(filename, pagealign)
             logger.debug('MD5: {} <L..R> {} {}'.format(
                 md5, remote_md5, filename))
             self._done_cv.acquire()
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 04e4c12..34d05ce 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -51,6 +51,7 @@
 import blobxfer.blob.operations
 import blobxfer.file.operations
 import blobxfer.crypto.models
+import blobxfer.md5
 import blobxfer.util
 
 # create logger
@@ -67,13 +68,6 @@ class AzureStorageModes(enum.Enum):
 
 
 # named tuples
-GeneralOptions = collections.namedtuple(
-    'GeneralOptions', [
-        'progress_bar',
-        'timeout_sec',
-        'verbose',
-    ]
-)
 VectoredIoOptions = collections.namedtuple(
     'VectoredIoOptions', [
         'stripe_chunk_size_bytes',
@@ -130,6 +124,46 @@ class AzureStorageModes(enum.Enum):
 )
 
 
+class ConcurrencyOptions(object):
+    """Concurrency Options"""
+    def __init__(self, crypto_processes, md5_processes, transfer_threads):
+        """Ctor for Concurrency Options
+        :param ConcurrencyOptions self: this
+        :param int crypto_processes: number of crypto procs
+        :param int md5_processes: number of md5 procs
+        :param int transfer_threads: number of transfer threads
+        """
+        self.crypto_processes = crypto_processes
+        self.md5_processes = md5_processes
+        self.transfer_threads = transfer_threads
+        if self.crypto_processes is None or self.crypto_processes < 1:
+            self.crypto_processes = 1
+        if self.md5_processes is None or self.md5_processes < 1:
+            self.md5_processes = 1
+        if self.transfer_threads is None or self.transfer_threads < 1:
+            self.transfer_threads = 1
+
+
+class GeneralOptions(object):
+    """General Options"""
+    def __init__(
+            self, concurrency, progress_bar=True, timeout_sec=None,
+            verbose=False):
+        """Ctor for General Options
+        :param GeneralOptions self: this
+        :param ConcurrencyOptions concurrency: concurrency options
+        :param bool progress_bar: progress bar
+        :param int timeout_sec: timeout in seconds
+        :param bool verbose: verbose output
+        """
+        if concurrency is None:
+            raise ValueError('concurrency option is unspecified')
+        self.concurrency = concurrency
+        self.progress_bar = progress_bar
+        self.timeout_sec = timeout_sec
+        self.verbose = verbose
+
+
 class AzureStorageCredentials(object):
     """Azure Storage Credentials"""
     def __init__(self):
@@ -608,38 +642,11 @@ def __init__(self, container, ed=None):
         self._mode = None
         self._lmt = None
         self._size = None
+        self._snapshot = None
         self._md5 = None
         self._encryption = ed
         self._vio = None
-
-    def populate_from_blob(self, blob):
-        # type: (AzureStorageEntity, azure.storage.blob.models.Blob) -> None
-        """Populate properties from Blob
-        :param AzureStorageEntity self: this
-        :param azure.storage.blob.models.Blob blob: blob to populate from
-        """
-        self._name = blob.name
-        self._lmt = blob.properties.last_modified
-        self._size = blob.properties.content_length
-        self._md5 = blob.properties.content_settings.content_md5
-        if blob.properties.blob_type == BlobTypes.AppendBlob:
-            self._mode = AzureStorageModes.Append
-        elif blob.properties.blob_type == BlobTypes.BlockBlob:
-            self._mode = AzureStorageModes.Block
-        elif blob.properties.blob_type == BlobTypes.PageBlob:
-            self._mode = AzureStorageModes.Page
-
-    def populate_from_file(self, file):
-        # type: (AzureStorageEntity, azure.storage.file.models.File) -> None
-        """Populate properties from File
-        :param AzureStorageEntity self: this
-        :param azure.storage.file.models.File file: file to populate from
-        """
-        self._name = file.name
-        self._lmt = file.properties.last_modified
-        self._size = file.properties.content_length
-        self._md5 = file.properties.content_settings.content_md5
-        self._mode = AzureStorageModes.File
+        self.download = None
 
     @property
     def container(self):
@@ -712,6 +719,105 @@ def encryption_metadata(self):
         """
         return self._encryption
 
+    def populate_from_blob(self, blob):
+        # type: (AzureStorageEntity, azure.storage.blob.models.Blob) -> None
+        """Populate properties from Blob
+        :param AzureStorageEntity self: this
+        :param azure.storage.blob.models.Blob blob: blob to populate from
+        """
+        self._name = blob.name
+        self._snapshot = blob.snapshot
+        self._lmt = blob.properties.last_modified
+        self._size = blob.properties.content_length
+        self._md5 = blob.properties.content_settings.content_md5
+        if blob.properties.blob_type == BlobTypes.AppendBlob:
+            self._mode = AzureStorageModes.Append
+        elif blob.properties.blob_type == BlobTypes.BlockBlob:
+            self._mode = AzureStorageModes.Block
+        elif blob.properties.blob_type == BlobTypes.PageBlob:
+            self._mode = AzureStorageModes.Page
+
+    def populate_from_file(self, file):
+        # type: (AzureStorageEntity, azure.storage.file.models.File) -> None
+        """Populate properties from File
+        :param AzureStorageEntity self: this
+        :param azure.storage.file.models.File file: file to populate from
+        """
+        self._name = file.name
+        self._lmt = file.properties.last_modified
+        self._size = file.properties.content_length
+        self._md5 = file.properties.content_settings.content_md5
+        self._mode = AzureStorageModes.File
+
+    def prepare_for_download(self, lpath, options):
+        # type: (AzureStorageEntity, pathlib.Path, DownloadOptions) -> None
+        """Prepare entity for download
+        :param AzureStorageEntity self: this
+        :param pathlib.Path lpath: local path
+        :param DownloadOptions options: download options
+        """
+        if self._encryption is not None:
+            hmac = self._encryption.initialize_hmac()
+        else:
+            hmac = None
+        if hmac is None and options.check_file_md5:
+            md5 = blobxfer.md5.new_md5_hasher()
+        else:
+            md5 = None
+        self.download = DownloadDescriptor(lpath, hmac, md5)
+        self.download.allocate_disk_space(
+            self._size, self._encryption is not None)
+
+
+class DownloadDescriptor(object):
+    """DownloadDescriptor"""
+    def __init__(self, lpath, hmac, md5):
+        # type: (DownloadDescriptior, pathlib.Path, hmac.HMAC, md5.MD5) -> None
+        """Ctor for Download Descriptor
+        :param DownloadDescriptor self: this
+        :param pathlib.Path lpath: local path
+        :param hmac.HMAC hmac: hmac
+        :param md5.MD5 md5: md5
+        """
+        self.final_path = lpath
+        # create path holding the temporary file to download to
+        _tmp = list(lpath.parts[:-1])
+        _tmp.append(lpath.name + '.bxtmp')
+        self.local_path = pathlib.Path(*_tmp)
+        self.hmac = hmac
+        self.md5 = md5
+        self.current_position = 0
+
+    def allocate_disk_space(self, size, encryption):
+        # type: (DownloadDescriptor, int, bool) -> None
+        """Perform file allocation (possibly sparse), if encrypted this may
+        be an underallocation
+        :param DownloadDescriptor self: this
+        :param int size: size
+        :param bool encryption: encryption enabled
+        """
+        # compute size
+        if size > 0:
+            if encryption:
+                allocatesize = size - \
+                    blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
+            else:
+                allocatesize = size
+            if allocatesize < 0:
+                allocatesize = 0
+        else:
+            allocatesize = 0
+        # create parent path
+        self.local_path.parent.mkdir(mode=0o750, parents=True, exist_ok=True)
+        # allocate file
+        with self.local_path.open('wb') as fd:
+            if allocatesize > 0:
+                try:
+                    os.posix_fallocate(fd.fileno(), 0, allocatesize)
+                except AttributeError:
+                    fd.seek(allocatesize - 1)
+                    fd.write(b'\0')
+
 
 class AzureDestinationPaths(object):
     def __init__(self):
diff --git a/blobxfer/util.py b/blobxfer/util.py
index dd116bd..9029fb1 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -31,7 +31,7 @@
 # stdlib imports
 import base64
 import copy
-import hashlib
+import dateutil
 import logging
 import logging.handlers
 import mimetypes
@@ -164,42 +164,6 @@ def base64_decode_string(string):
     return base64.b64decode(string)
 
 
-def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
-    # type: (str, bool, int) -> str
-    """Compute MD5 hash for file and encode as Base64
-    :param str filename: file to compute MD5 for
-    :param bool pagealign: page align data
-    :param int blocksize: block size
-    :rtype: str
-    :return: MD5 for file encoded as Base64
-    """
-    hasher = hashlib.md5()
-    with open(filename, 'rb') as filedesc:
-        while True:
-            buf = filedesc.read(blocksize)
-            if not buf:
-                break
-            buflen = len(buf)
-            if pagealign and buflen < blocksize:
-                aligned = page_align_content_length(buflen)
-                if aligned != buflen:
-                    buf = buf.ljust(aligned, b'\0')
-            hasher.update(buf)
-        return base64_encode_as_string(hasher.digest())
-
-
-def compute_md5_for_data_asbase64(data):
-    # type: (obj) -> str
-    """Compute MD5 hash for bits and encode as Base64
-    :param any data: data to compute MD5 for
-    :rtype: str
-    :return: MD5 for data
-    """
-    hasher = hashlib.md5()
-    hasher.update(data)
-    return base64_encode_as_string(hasher.digest())
-
-
 def page_align_content_length(length):
     # type: (int) -> int
     """Compute page boundary alignment
@@ -241,3 +205,19 @@ def explode_azure_path(path):
     else:
         rpath = ''
     return container, rpath
+
+
+def blob_is_snapshot(url):
+    # type: (str) -> bool
+    """Checks if the blob is a snapshot blob
+    :param url str: blob url
+    :rtype: bool
+    :return: if blob is a snapshot blob
+    """
+    if '?snapshot=' in url:
+        try:
+            dateutil.parser.parse(url.split('?snapshot=')[-1])
+            return True
+        except (ValueError, OverflowError):
+            pass
+    return False
diff --git a/cli/cli.py b/cli/cli.py
index 91600b5..64be863 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -107,6 +107,34 @@ def _init_config(self):
 pass_cli_context = click.make_pass_decorator(CliContext, ensure=True)
 
 
+def _crypto_processes_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['crypto_processes'] = value
+        return value
+    return click.option(
+        '--crypto-processes',
+        expose_value=False,
+        type=int,
+        default=0,
+        help='Concurrent crypto processes',
+        callback=callback)(f)
+
+
+def _md5_processes_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['md5_processes'] = value
+        return value
+    return click.option(
+        '--md5-processes',
+        expose_value=False,
+        type=int,
+        default=0,
+        help='Concurrent MD5 processes',
+        callback=callback)(f)
+
+
 def _progress_bar_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -133,6 +161,20 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _transfer_threads_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['transfer_threads'] = value
+        return value
+    return click.option(
+        '--transfer-threads',
+        expose_value=False,
+        type=int,
+        default=0,
+        help='Concurrent transfer threads',
+        callback=callback)(f)
+
+
 def _verbose_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -148,8 +190,11 @@ def callback(ctx, param, value):
 
 def common_options(f):
     f = _verbose_option(f)
+    f = _transfer_threads_option(f)
     f = _timeout_option(f)
     f = _progress_bar_option(f)
+    f = _md5_processes_option(f)
+    f = _crypto_processes_option(f)
     return f
 
 
diff --git a/cli/settings.py b/cli/settings.py
index db5c643..f567bea 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -203,8 +203,11 @@ def merge_settings(config, cli_options):
     # merge general options
     if 'options' not in config:
         config['options'] = {}
+    config['options']['crypto_processes'] = cli_options['crypto_processes']
+    config['options']['md5_processes'] = cli_options['md5_processes']
     config['options']['progress_bar'] = cli_options['progress_bar']
     config['options']['timeout_sec'] = cli_options['timeout']
+    config['options']['transfer_threads'] = cli_options['transfer_threads']
     config['options']['verbose'] = cli_options['verbose']
 
 
@@ -231,6 +234,11 @@ def create_general_options(config):
     :return: general options object
     """
     return blobxfer.models.GeneralOptions(
+        concurrency=blobxfer.models.ConcurrencyOptions(
+            crypto_processes=config['options']['crypto_processes'],
+            md5_processes=config['options']['md5_processes'],
+            transfer_threads=config['options']['transfer_threads'],
+        ),
         progress_bar=config['options']['progress_bar'],
         timeout_sec=config['options']['timeout_sec'],
         verbose=config['options']['verbose'],
diff --git a/tests/test_blobxfer_blob_operations.py b/tests/test_blobxfer_blob_operations.py
index 1b7d300..5c078ac 100644
--- a/tests/test_blobxfer_blob_operations.py
+++ b/tests/test_blobxfer_blob_operations.py
@@ -15,12 +15,15 @@
 
 def test_check_if_single_blob():
     client = mock.MagicMock()
-    client.get_blob_properties = mock.MagicMock()
-    client.get_blob_properties.return_value = mock.MagicMock()
+    client.get_blob_properties.return_value = True
 
     result = ops.check_if_single_blob(client, 'a', 'b/c')
     assert result
 
+    result = ops.check_if_single_blob(
+        client, 'a', 'a?snapshot=2017-02-23T22:21:14.8121864Z')
+    assert result
+
     client = mock.MagicMock()
     client.get_blob_properties = mock.MagicMock()
     client.get_blob_properties.side_effect = \
diff --git a/tests/test_blobxfer_crypto_models.py b/tests/test_blobxfer_crypto_models.py
index 33045c3..8d58419 100644
--- a/tests/test_blobxfer_crypto_models.py
+++ b/tests/test_blobxfer_crypto_models.py
@@ -179,9 +179,13 @@ def test_convert_from_json(tmpdir):
     }
     em = models.EncryptionMetadata()
     em.convert_from_json(md, 'blob', rsaprivatekey)
+    hmac = em.initialize_hmac()
     assert em.wrapped_content_key is not None
+    assert em._symkey == em.symmetric_key
+    assert em._signkey == em.signing_key
     assert em._symkey is not None
     assert em._signkey is not None
+    assert hmac is not None
 
     em = models.EncryptionMetadata()
     em.convert_from_json(md, 'blob', None)
@@ -197,6 +201,8 @@ def test_convert_from_json(tmpdir):
     }
     em = models.EncryptionMetadata()
     em.convert_from_json(md, 'blob', rsaprivatekey)
+    hmac = em.initialize_hmac()
     assert em.wrapped_content_key is not None
     assert em._symkey is not None
     assert em._signkey is None
+    assert hmac is None
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download.py
index 99c9711..56e8999 100644
--- a/tests/test_blobxfer_download.py
+++ b/tests/test_blobxfer_download.py
@@ -184,7 +184,10 @@ def test_post_md5_skip_on_check():
     d._post_md5_skip_on_check(lpath, True)
     assert lpath not in d._md5_map
 
-    # TODO test mismatch
+    d._add_to_download_queue = mock.MagicMock()
+    d._pre_md5_skip_on_check(lpath, rfile)
+    d._post_md5_skip_on_check(lpath, False)
+    assert d._add_to_download_queue.call_count == 1
 
 
 def test_initialize_check_md5_downloads_thread():
@@ -208,12 +211,28 @@ def test_initialize_check_md5_downloads_thread():
     assert d._post_md5_skip_on_check.call_count == 1
 
 
+def test_initialize_and_terminate_download_threads():
+    opts = mock.MagicMock()
+    opts.concurrency.transfer_threads = 2
+    d = dl.Downloader(opts, mock.MagicMock(), mock.MagicMock())
+    d._worker_thread_download = mock.MagicMock()
+
+    d._initialize_download_threads()
+    assert len(d._download_threads) == 2
+
+    d._terminate_download_threads()
+    assert d._download_terminate
+    for thr in d._download_threads:
+        assert not thr.is_alive()
+
+
 @mock.patch('blobxfer.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.blob.operations.list_blobs')
 @mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
 def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._initialize_check_md5_downloads_thread = mock.MagicMock()
+    d._initialize_download_threads = mock.MagicMock()
     d._md5_check_thread = mock.MagicMock()
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
@@ -232,17 +251,19 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._spec.sources.append(asp)
 
     b = azure.storage.blob.models.Blob(name='name')
+    b.properties.content_length = 0
     patched_lb.side_effect = [[b]]
 
+    d._pre_md5_skip_on_check = mock.MagicMock()
+
     d._check_download_conditions = mock.MagicMock()
     d._check_download_conditions.return_value = dl.DownloadAction.Skip
     d.start()
-    # TODO assert
+    assert d._pre_md5_skip_on_check.call_count == 0
 
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.CheckMd5
-    d._pre_md5_skip_on_check = mock.MagicMock()
     d.start()
     assert d._pre_md5_skip_on_check.call_count == 1
 
@@ -250,4 +271,15 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.Download
     d.start()
-    # TODO assert
+    assert d._download_queue.qsize() == 1
+
+
+def test_start_keyboard_interrupt():
+    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
+    d._terminate_download_threads = mock.MagicMock()
+    d._md5_offload = mock.MagicMock()
+
+    with pytest.raises(KeyboardInterrupt):
+        d.start()
+    assert d._terminate_download_threads.call_count == 1
diff --git a/tests/test_blobxfer_md5.py b/tests/test_blobxfer_md5.py
index 7faa1ce..05a66d2 100644
--- a/tests/test_blobxfer_md5.py
+++ b/tests/test_blobxfer_md5.py
@@ -3,14 +3,32 @@
 
 # stdlib imports
 import time
+import uuid
 # non-stdlib imports
+import pytest
 # local imports
 import blobxfer.models as models
-import blobxfer.util as util
 # module under test
 import blobxfer.md5 as md5
 
 
+def test_compute_md5(tmpdir):
+    lpath = str(tmpdir.join('test.tmp'))
+    testdata = str(uuid.uuid4())
+    with open(lpath, 'wt') as f:
+        f.write(testdata)
+    md5_file = md5.compute_md5_for_file_asbase64(lpath)
+    md5_data = md5.compute_md5_for_data_asbase64(testdata.encode('utf8'))
+    assert md5_file == md5_data
+
+    md5_file_page = md5.compute_md5_for_file_asbase64(lpath, True)
+    assert md5_file != md5_file_page
+
+    # test non-existent file
+    with pytest.raises(IOError):
+        md5.compute_md5_for_file_asbase64(testdata)
+
+
 def test_done_cv():
     a = None
     try:
@@ -37,7 +55,7 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
 
-    remote_md5 = util.compute_md5_for_file_asbase64(str(file))
+    remote_md5 = md5.compute_md5_for_file_asbase64(str(file))
 
     a = None
     try:
@@ -70,7 +88,7 @@ def test_from_add_to_done_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
 
-    remote_md5 = util.compute_md5_for_file_asbase64(str(file), True)
+    remote_md5 = md5.compute_md5_for_file_asbase64(str(file), True)
 
     a = None
     try:
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 0612a9e..7b81332 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -17,6 +17,41 @@
 import blobxfer.models as models
 
 
+def test_concurrency_options():
+    a = models.ConcurrencyOptions(
+        crypto_processes=-1,
+        md5_processes=0,
+        transfer_threads=-2,
+    )
+
+    assert a.crypto_processes == 1
+    assert a.md5_processes == 1
+    assert a.transfer_threads == 1
+
+
+def test_general_options():
+    a = models.GeneralOptions(
+        concurrency=models.ConcurrencyOptions(
+            crypto_processes=1,
+            md5_processes=2,
+            transfer_threads=3,
+        ),
+        progress_bar=False,
+        timeout_sec=1,
+        verbose=True,
+    )
+
+    assert a.concurrency.crypto_processes == 1
+    assert a.concurrency.md5_processes == 2
+    assert a.concurrency.transfer_threads == 3
+    assert not a.progress_bar
+    assert a.timeout_sec == 1
+    assert a.verbose
+
+    with pytest.raises(ValueError):
+        a = models.GeneralOptions(None)
+
+
 def test_storage_credentials():
     creds = models.AzureStorageCredentials()
     creds.add_storage_account('sa1', 'somekey1', 'endpoint')
@@ -304,3 +339,47 @@ def test_azurestorageentity():
 
     ase.populate_from_file(blob)
     assert ase.mode == models.AzureStorageModes.File
+
+
+def test_azurestorageentity_prepare_for_download(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+
+    ase = models.AzureStorageEntity('cont')
+    ase._size = 0
+    ase.prepare_for_download(lp, opts)
+
+    assert ase.download.hmac is None
+    assert ase.download.md5 is not None
+    assert ase.download.final_path == lp
+    assert ase.download.current_position == 0
+
+    ase._encryption = mock.MagicMock()
+    ase.prepare_for_download(lp, opts)
+
+    assert ase.download.hmac is not None
+    assert ase.download.md5 is None
+
+
+def test_downloaddescriptor(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    d = models.DownloadDescriptor(lp, None, None)
+    assert d.current_position == 0
+    assert d.final_path == lp
+    assert str(d.local_path) == str(lp) + '.bxtmp'
+
+    d.allocate_disk_space(1024, True)
+    assert d.local_path.stat().st_size == 1024 - 16
+
+    d.local_path.unlink()
+    d.allocate_disk_space(1, True)
+    assert d.local_path.stat().st_size == 0
+
+    d.local_path.unlink()
+    d.allocate_disk_space(1024, False)
+    assert d.local_path.stat().st_size == 1024
+
+    # pre-existing file check
+    d.allocate_disk_space(0, False)
+    assert d.local_path.stat().st_size == 0
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index 9b6084e..4dd0ebc 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -3,7 +3,6 @@
 
 # stdlib imports
 import sys
-import uuid
 # non-stdlib imports
 import pytest
 # module under test
@@ -111,24 +110,6 @@ def test_base64_encode_as_string():
     assert a == dec
 
 
-def test_compute_md5(tmpdir):
-    lpath = str(tmpdir.join('test.tmp'))
-    testdata = str(uuid.uuid4())
-    with open(lpath, 'wt') as f:
-        f.write(testdata)
-    md5_file = blobxfer.util.compute_md5_for_file_asbase64(lpath)
-    md5_data = blobxfer.util.compute_md5_for_data_asbase64(
-        testdata.encode('utf8'))
-    assert md5_file == md5_data
-
-    md5_file_page = blobxfer.util.compute_md5_for_file_asbase64(lpath, True)
-    assert md5_file != md5_file_page
-
-    # test non-existent file
-    with pytest.raises(IOError):
-        blobxfer.util.compute_md5_for_file_asbase64(testdata)
-
-
 def test_page_align_content_length():
     assert 0 == blobxfer.util.page_align_content_length(0)
     assert 512 == blobxfer.util.page_align_content_length(1)
@@ -177,3 +158,17 @@ def test_explode_azure_path():
     cont, rpath = blobxfer.util.explode_azure_path(p)
     assert cont == 'some'
     assert rpath == 'remote/path'
+
+
+def test_blob_is_snapshot():
+    a = '/cont/a?snapshot=2017-02-23T22:21:14.8121864Z'
+    assert blobxfer.util.blob_is_snapshot(a)
+
+    a = '/cont/a?snapshot=abc'
+    assert not blobxfer.util.blob_is_snapshot(a)
+
+    a = '/cont/a?snapshot='
+    assert not blobxfer.util.blob_is_snapshot(a)
+
+    a = '/cont/a?snapshot=2017-02-23T22:21:14.8121864Z?snapshot='
+    assert not blobxfer.util.blob_is_snapshot(a)

From e82890ada974bbba7b2dd98a06b1f3761bbfa3b3 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 23 Feb 2017 23:47:16 -0800
Subject: [PATCH 09/47] Download offsets

- Bind ASE to DownloadDescriptor instead
- Add chunk size option to download and synccopy
---
 blobxfer/crypto/operations.py            |  26 +++++
 blobxfer/download.py                     |  34 +++++-
 blobxfer/models.py                       | 134 ++++++++++++++++-------
 cli/cli.py                               |   4 +-
 cli/settings.py                          |   3 +
 tests/test_blobxfer_crypto_operations.py |   7 ++
 tests/test_blobxfer_download.py          |   6 +
 tests/test_blobxfer_models.py            | 134 +++++++++++++++++++----
 tests/test_blobxfer_operations.py        |   2 +
 9 files changed, 279 insertions(+), 71 deletions(-)

diff --git a/blobxfer/crypto/operations.py b/blobxfer/crypto/operations.py
index 9a0f099..1931407 100644
--- a/blobxfer/crypto/operations.py
+++ b/blobxfer/crypto/operations.py
@@ -128,3 +128,29 @@ def rsa_encrypt_key_base64_encoded(rsaprivatekey, rsapublickey, plainkey):
             algorithm=cryptography.hazmat.primitives.hashes.SHA1(),
             label=None))
     return blobxfer.util.base64_encode_as_string(enckey)
+
+
+def pad_pkcs7(buf):
+    # type: (bytes) -> bytes
+    """Appends PKCS7 padding to an input buffer
+    :param bytes buf: buffer to add padding
+    :rtype: bytes
+    :return: buffer with PKCS7_PADDING
+    """
+    padder = cryptography.hazmat.primitives.padding.PKCS7(
+        cryptography.hazmat.primitives.ciphers.
+        algorithms.AES.block_size).padder()
+    return padder.update(buf) + padder.finalize()
+
+
+def unpad_pkcs7(buf):
+    # type: (bytes) -> bytes
+    """Removes PKCS7 padding a decrypted object
+    :param bytes buf: buffer to remove padding
+    :rtype: bytes
+    :return: buffer without PKCS7_PADDING
+    """
+    unpadder = cryptography.hazmat.primitives.padding.PKCS7(
+        cryptography.hazmat.primitives.ciphers.
+        algorithms.AES.block_size).unpadder()
+    return unpadder.update(buf) + unpadder.finalize()
diff --git a/blobxfer/download.py b/blobxfer/download.py
index dbbecac..49eea54 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -47,6 +47,7 @@
 import dateutil
 # local imports
 import blobxfer.md5
+import blobxfer.models
 import blobxfer.operations
 import blobxfer.util
 
@@ -211,9 +212,10 @@ def _add_to_download_queue(self, lpath, rfile):
         :param blobxfer.models.AzureStorageEntity rfile: remote file
         """
         # prepare remote file for download
-        rfile.prepare_for_download(lpath, self._spec.options)
+        dd = blobxfer.models.DownloadDescriptor(
+            lpath, rfile, self._spec.options)
         # add remote file to queue
-        self._download_queue.put(rfile)
+        self._download_queue.put(dd)
 
     def _initialize_download_threads(self):
         # type: (Downloader) -> None
@@ -243,12 +245,34 @@ def _worker_thread_download(self):
             if self._download_terminate:
                 break
             try:
-                rfile = self._download_queue.get(False, 1)
+                dd = self._download_queue.get(False, 1)
             except queue.Empty:
                 continue
-            # TODO
-            # get next offset with respect to chunk size
+            # get download offsets
 
+            # issue get range
+
+            # if encryption:
+            # 1. compute rolling hmac if present
+            #    - roll through any subsequent unchecked parts
+            # 2. decrypt chunk
+
+            # compute rolling md5 if present
+            #    - roll through any subsequent unchecked parts
+
+            # write data to disk
+
+            # if no integrity check could be performed due to current
+            # integrity offset mismatch, add to unchecked set
+
+            # check if last chunk to write
+            # 1. complete integrity checks
+            # 2. set file uid/gid
+            # 3. set file modes
+
+            # pickle dd to resume file
+
+            rfile = dd._ase
             print('<<', rfile.container, rfile.name, rfile.lmt, rfile.size,
                   rfile.md5, rfile.mode, rfile.encryption_metadata)
 
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 34d05ce..6d0f753 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -100,6 +100,7 @@ class AzureStorageModes(enum.Enum):
 DownloadOptions = collections.namedtuple(
     'DownloadOptions', [
         'check_file_md5',
+        'chunk_size_bytes',
         'delete_extraneous_destination',
         'mode',
         'overwrite',
@@ -110,16 +111,24 @@ class AzureStorageModes(enum.Enum):
 )
 SyncCopyOptions = collections.namedtuple(
     'SyncCopyOptions', [
-        'exclude',
-        'include',
+        'chunk_size_bytes',
         'mode',
         'overwrite',
-        'skip_on',
     ]
 )
 LocalPath = collections.namedtuple(
     'LocalPath', [
-        'parent_path', 'relative_path'
+        'parent_path',
+        'relative_path',
+    ]
+)
+DownloadOffsets = collections.namedtuple(
+    'DownloadOffsets', [
+        'fd_start',
+        'num_bytes',
+        'range_end',
+        'range_start',
+        'unpad',
     ]
 )
 
@@ -749,58 +758,60 @@ def populate_from_file(self, file):
         self._md5 = file.properties.content_settings.content_md5
         self._mode = AzureStorageModes.File
 
-    def prepare_for_download(self, lpath, options):
-        # type: (AzureStorageEntity, pathlib.Path, DownloadOptions) -> None
-        """Prepare entity for download
-        :param AzureStorageEntity self: this
-        :param pathlib.Path lpath: local path
-        :param DownloadOptions options: download options
-        """
-        if self._encryption is not None:
-            hmac = self._encryption.initialize_hmac()
-        else:
-            hmac = None
-        if hmac is None and options.check_file_md5:
-            md5 = blobxfer.md5.new_md5_hasher()
-        else:
-            md5 = None
-        self.download = DownloadDescriptor(lpath, hmac, md5)
-        self.download.allocate_disk_space(
-            self._size, self._encryption is not None)
-
 
 class DownloadDescriptor(object):
-    """DownloadDescriptor"""
-    def __init__(self, lpath, hmac, md5):
-        # type: (DownloadDescriptior, pathlib.Path, hmac.HMAC, md5.MD5) -> None
-        """Ctor for Download Descriptor
+    """Download Descriptor"""
+
+    _AES_BLOCKSIZE = blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
+
+    def __init__(self, lpath, ase, options):
+        # type: (DownloadDescriptior, pathlib.Path, AzureStorageEntity,
+        #        DownloadOptions) -> None
+        """Ctor for DownloadDescriptor
         :param DownloadDescriptor self: this
         :param pathlib.Path lpath: local path
-        :param hmac.HMAC hmac: hmac
-        :param md5.MD5 md5: md5
+        :param AzureStorageEntity ase: Azure Storage Entity
+        :param DownloadOptions options: download options
         """
         self.final_path = lpath
         # create path holding the temporary file to download to
         _tmp = list(lpath.parts[:-1])
         _tmp.append(lpath.name + '.bxtmp')
         self.local_path = pathlib.Path(*_tmp)
-        self.hmac = hmac
-        self.md5 = md5
-        self.current_position = 0
-
-    def allocate_disk_space(self, size, encryption):
-        # type: (DownloadDescriptor, int, bool) -> None
-        """Perform file allocation (possibly sparse), if encrypted this may
-        be an underallocation
+        self._ase = ase
+        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
+        self.hmac = None
+        self.md5 = None
+        self.offset = 0
+        self.integrity_counter = 0
+        self.unchecked_chunks = set()
+        self._initialize_integrity_checkers(options)
+        self._allocate_disk_space()
+
+    def _initialize_integrity_checkers(self, options):
+        # type: (DownloadDescriptor, DownloadOptions) -> None
+        """Initialize file integrity checkers
+        :param DownloadDescriptor self: this
+        :param DownloadOptions options: download options
+        """
+        if self._ase.encryption_metadata is not None:
+            self.hmac = self._ase.encryption_metadata.initialize_hmac()
+        if self.hmac is None and options.check_file_md5:
+            self.md5 = blobxfer.md5.new_md5_hasher()
+
+    def _allocate_disk_space(self):
+        # type: (DownloadDescriptor, int) -> None
+        """Perform file allocation (possibly sparse)
         :param DownloadDescriptor self: this
         :param int size: size
-        :param bool encryption: encryption enabled
         """
+        size = self._ase.size
         # compute size
         if size > 0:
-            if encryption:
-                allocatesize = size - \
-                    blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
+            if self._ase.encryption_metadata is not None:
+                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
+                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                    self._AES_BLOCKSIZE
             else:
                 allocatesize = size
             if allocatesize < 0:
@@ -818,6 +829,47 @@ def allocate_disk_space(self, size, encryption):
                     fd.seek(allocatesize - 1)
                     fd.write(b'\0')
 
+    def next_offsets(self):
+        # type: (DownloadDescriptor) -> DownloadOffsets
+        """Retrieve the next offsets
+        :param DownloadDescriptor self: this
+        :rtype: DownloadOffsets
+        :return: download offsets
+        """
+        if self.offset >= self._ase.size:
+            return None
+        if self.offset + self._chunk_size > self._ase.size:
+            chunk = self._ase.size - self.offset
+        else:
+            chunk = self._chunk_size
+        # on download, num_bytes must be offset by -1 as the x-ms-range
+        # header expects it that way. x -> y bytes means first bits of the
+        # (x+1)th byte to the last bits of the (y+1)th byte. for example,
+        # 0 -> 511 means byte 1 to byte 512
+        num_bytes = chunk - 1
+        fd_start = self.offset
+        range_start = self.offset
+        if self._ase.encryption_metadata is not None:
+            # ensure start is AES block size aligned
+            range_start = range_start - (range_start % self._AES_BLOCKSIZE) - \
+                self._AES_BLOCKSIZE
+            if range_start <= 0:
+                range_start = 0
+        range_end = self.offset + num_bytes
+        self.offset += chunk
+        if (self._ase.encryption_metadata is not None and
+                self.offset >= self._ase.size):
+            unpad = True
+        else:
+            unpad = False
+        return DownloadOffsets(
+            fd_start=fd_start,
+            num_bytes=num_bytes,
+            range_start=range_start,
+            range_end=range_end,
+            unpad=unpad,
+        )
+
 
 class AzureDestinationPaths(object):
     def __init__(self):
diff --git a/cli/cli.py b/cli/cli.py
index 64be863..0c085c7 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -286,7 +286,7 @@ def callback(ctx, param, value):
         expose_value=False,
         type=int,
         default=4194304,
-        help='Chunk size in bytes [4194304]',
+        help='Block or chunk size in bytes [4194304]',
         callback=callback)(f)
 
 
@@ -580,6 +580,7 @@ def download_options(f):
     f = _exclude_option(f)
     f = _endpoint_option(f)
     f = _delete_option(f)
+    f = _chunk_size_bytes_option(f)
     f = _access_key_option(f)
     return f
 
@@ -596,6 +597,7 @@ def sync_copy_options(f):
     f = _include_option(f)
     f = _exclude_option(f)
     f = _endpoint_option(f)
+    f = _chunk_size_bytes_option(f)
     f = _access_key_option(f)
     return f
 
diff --git a/cli/settings.py b/cli/settings.py
index f567bea..448d0a8 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -117,6 +117,7 @@ def add_cli_options(
                 'exclude': cli_options['exclude'],
                 'options': {
                     'check_file_md5': cli_options['file_md5'],
+                    'chunk_size_bytes': cli_options['chunk_size_bytes'],
                     'delete_extraneous_destination': cli_options['delete'],
                     'mode': cli_options['mode'],
                     'overwrite': cli_options['overwrite'],
@@ -148,6 +149,7 @@ def add_cli_options(
                 'include': cli_options['include'],
                 'exclude': cli_options['exclude'],
                 'options': {
+                    'chunk_size_bytes': cli_options['chunk_size_bytes'],
                     'mode': cli_options['mode'],
                     'overwrite': cli_options['overwrite'],
                     'skip_on': {
@@ -279,6 +281,7 @@ def create_download_specifications(config):
         ds = blobxfer.models.DownloadSpecification(
             download_options=blobxfer.models.DownloadOptions(
                 check_file_md5=conf['options']['check_file_md5'],
+                chunk_size_bytes=conf['options']['chunk_size_bytes'],
                 delete_extraneous_destination=conf[
                     'options']['delete_extraneous_destination'],
                 mode=mode,
diff --git a/tests/test_blobxfer_crypto_operations.py b/tests/test_blobxfer_crypto_operations.py
index 1760701..a37be4f 100644
--- a/tests/test_blobxfer_crypto_operations.py
+++ b/tests/test_blobxfer_crypto_operations.py
@@ -42,3 +42,10 @@ def test_rsa_encrypt_decrypt_keys():
     assert enckey is not None
     plainkey = ops.rsa_decrypt_base64_encoded_key(_RSAKEY, enckey)
     assert symkey == plainkey
+
+
+def test_pkcs7_padding():
+    buf = os.urandom(32)
+    pbuf = ops.pad_pkcs7(buf)
+    buf2 = ops.unpad_pkcs7(pbuf)
+    assert buf == buf2
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download.py
index 56e8999..a80c629 100644
--- a/tests/test_blobxfer_download.py
+++ b/tests/test_blobxfer_download.py
@@ -29,6 +29,7 @@ def test_check_download_conditions(tmpdir):
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=False,
@@ -52,6 +53,7 @@ def test_check_download_conditions(tmpdir):
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=True,
@@ -73,6 +75,7 @@ def test_check_download_conditions(tmpdir):
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=True,
@@ -94,6 +97,7 @@ def test_check_download_conditions(tmpdir):
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=True,
@@ -123,6 +127,7 @@ def test_check_download_conditions(tmpdir):
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=True,
@@ -236,6 +241,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._md5_check_thread = mock.MagicMock()
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
+    d._spec.options.chunk_size_bytes = 1
     d._spec.options.mode = models.AzureStorageModes.Auto
     d._spec.options.overwrite = True
     d._spec.skip_on = mock.MagicMock()
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 7b81332..3227250 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -279,6 +279,7 @@ def test_downloadspecification():
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=models.AzureStorageModes.Auto,
             overwrite=True,
@@ -341,45 +342,130 @@ def test_azurestorageentity():
     assert ase.mode == models.AzureStorageModes.File
 
 
-def test_azurestorageentity_prepare_for_download(tmpdir):
+def test_downloaddescriptor(tmpdir):
     lp = pathlib.Path(str(tmpdir.join('a')))
+
     opts = mock.MagicMock()
     opts.check_file_md5 = True
-
+    opts.chunk_size_bytes = 1
     ase = models.AzureStorageEntity('cont')
-    ase._size = 0
-    ase.prepare_for_download(lp, opts)
-
-    assert ase.download.hmac is None
-    assert ase.download.md5 is not None
-    assert ase.download.final_path == lp
-    assert ase.download.current_position == 0
-
+    ase._size = 1024
     ase._encryption = mock.MagicMock()
-    ase.prepare_for_download(lp, opts)
+    d = models.DownloadDescriptor(lp, ase, opts)
 
-    assert ase.download.hmac is not None
-    assert ase.download.md5 is None
-
-
-def test_downloaddescriptor(tmpdir):
-    lp = pathlib.Path(str(tmpdir.join('a')))
-    d = models.DownloadDescriptor(lp, None, None)
-    assert d.current_position == 0
+    assert d.offset == 0
     assert d.final_path == lp
     assert str(d.local_path) == str(lp) + '.bxtmp'
-
-    d.allocate_disk_space(1024, True)
     assert d.local_path.stat().st_size == 1024 - 16
 
     d.local_path.unlink()
-    d.allocate_disk_space(1, True)
+    ase._size = 1
+    d._allocate_disk_space()
     assert d.local_path.stat().st_size == 0
 
     d.local_path.unlink()
-    d.allocate_disk_space(1024, False)
+    ase._encryption = None
+    ase._size = 1024
+    d._allocate_disk_space()
     assert d.local_path.stat().st_size == 1024
 
     # pre-existing file check
-    d.allocate_disk_space(0, False)
+    ase._size = 0
+    d._allocate_disk_space()
     assert d.local_path.stat().st_size == 0
+
+
+def test_downloaddescriptor_next_offsets(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 256
+    ase = models.AzureStorageEntity('cont')
+    ase._size = 128
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 127
+    assert offsets.range_start == 0
+    assert offsets.range_end == 127
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 0
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d.next_offsets() is None
+
+    ase._size = 1
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 0
+    assert offsets.range_start == 0
+    assert offsets.range_end == 0
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 255
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256 + 16
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 255
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 256
+    assert offsets.num_bytes == 15
+    assert offsets.range_start == 256
+    assert offsets.range_end == 256 + 15
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._encryption = mock.MagicMock()
+    ase._size = 128
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 127
+    assert offsets.range_start == 0
+    assert offsets.range_end == 127
+    assert offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 255
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256 + 32  # 16 bytes over + padding
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 255
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    offsets = d.next_offsets()
+    assert offsets.fd_start == 256
+    assert offsets.num_bytes == 31
+    assert offsets.range_start == 256 - 16
+    assert offsets.range_end == 256 + 31
+    assert offsets.unpad
+    assert d.next_offsets() is None
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
index 9926bab..9b648f6 100644
--- a/tests/test_blobxfer_operations.py
+++ b/tests/test_blobxfer_operations.py
@@ -20,6 +20,7 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     ds = blobxfer.models.DownloadSpecification(
         download_options=blobxfer.models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=blobxfer.models.AzureStorageModes.Auto,
             overwrite=True,
@@ -54,6 +55,7 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     ds = blobxfer.models.DownloadSpecification(
         download_options=blobxfer.models.DownloadOptions(
             check_file_md5=True,
+            chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
             mode=blobxfer.models.AzureStorageModes.File,
             overwrite=True,

From 31ef912cd675fc5dcd81953d4aa84b990b60c55f Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 24 Feb 2017 19:32:56 -0800
Subject: [PATCH 10/47] More progress on download

- Add custom retry handler
- Add snapshot support
- Add range gets
---
 blobxfer/blob/append/operations.py     |  20 ++--
 blobxfer/blob/block/operations.py      |  11 ++-
 blobxfer/blob/operations.py            |  28 +++++-
 blobxfer/blob/page/operations.py       |   9 +-
 blobxfer/download.py                   | 128 ++++++++++++++++++++-----
 blobxfer/file/operations.py            |  25 +++++
 blobxfer/md5.py                        |  10 +-
 blobxfer/models.py                     | 118 ++++++++++++++++++++---
 blobxfer/retry.py                      |  85 ++++++++++++++++
 blobxfer/util.py                       |  14 +++
 tests/test_blobxfer_blob_operations.py |  30 +++++-
 tests/test_blobxfer_download.py        |  29 ++++--
 tests/test_blobxfer_file_operations.py |  14 +++
 tests/test_blobxfer_md5.py             |   7 +-
 tests/test_blobxfer_models.py          |  59 ++++++++----
 tests/test_blobxfer_retry.py           |  39 ++++++++
 tests/test_blobxfer_util.py            |   9 ++
 17 files changed, 533 insertions(+), 102 deletions(-)
 create mode 100644 blobxfer/retry.py
 create mode 100644 tests/test_blobxfer_retry.py

diff --git a/blobxfer/blob/append/operations.py b/blobxfer/blob/append/operations.py
index 88d5b58..cbe4008 100644
--- a/blobxfer/blob/append/operations.py
+++ b/blobxfer/blob/append/operations.py
@@ -31,8 +31,9 @@
 # stdlib imports
 import logging
 # non-stdlib imports
-from azure.storage.blob import AppendBlobService
+import azure.storage.blob
 # local imports
+import blobxfer.retry
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -46,24 +47,15 @@ def create_client(storage_account):
     :return: append blob service client
     """
     if storage_account.is_sas:
-        client = AppendBlobService(
+        client = azure.storage.blob.AppendBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
     else:
-        client = AppendBlobService(
+        client = azure.storage.blob.AppendBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
+    # set retry policy
+    client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
-
-
-def list_blobs(client, container, prefix):
-    # type: (azure.storage.blob.AppendBlobService, str, str) -> list
-    """List append blobs in path
-    :param AppendBlobService client: append blob client
-    :param str container: container
-    :param str prefix: path prefix
-    """
-
-    pass
diff --git a/blobxfer/blob/block/operations.py b/blobxfer/blob/block/operations.py
index 94fd534..c07fda7 100644
--- a/blobxfer/blob/block/operations.py
+++ b/blobxfer/blob/block/operations.py
@@ -31,8 +31,9 @@
 # stdlib imports
 import logging
 # non-stdlib imports
-from azure.storage.blob import BlockBlobService
+import azure.storage.blob
 # local imports
+import blobxfer.retry
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -42,19 +43,21 @@ def create_client(storage_account):
     # type: (blobxfer.models.AzureStorageAccount) -> BlockBlobService
     """Create block blob client
     :param blobxfer.models.AzureStorageAccount storage_account: storage account
-    :rtype: BlockBlobService
+    :rtype: azure.storage.blob.BlockBlobService
     :return: block blob service client
     """
     if storage_account.is_sas:
-        client = BlockBlobService(
+        client = azure.storage.blob.BlockBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
     else:
-        client = BlockBlobService(
+        client = azure.storage.blob.BlockBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
+    # set retry policy
+    client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
 
 
diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index 411ad52..4a8f0eb 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -76,6 +76,13 @@ def list_blobs(client, container, prefix, mode, timeout=None):
     """
     if mode == blobxfer.models.AzureStorageModes.File:
         raise RuntimeError('cannot list Azure Files from blob client')
+    if blobxfer.util.blob_is_snapshot(prefix):
+        snapshot = blobxfer.util.parse_blob_snapshot_parameter(prefix)
+        blob = client.get_blob_properties(
+            container_name=container, blob_name=prefix, snapshot=snapshot,
+            timeout=timeout)
+        yield blob
+        return
     blobs = client.list_blobs(
         container_name=container,
         prefix=prefix,
@@ -99,5 +106,22 @@ def list_blobs(client, container, prefix, mode, timeout=None):
         yield blob
 
 
-def get_blob_range(client, container, blob_name, snapshot):
-    pass
+def get_blob_range(ase, offsets, timeout=None):
+    # type: (blobxfer.models.AzureStorageEntity,
+    #        blobxfer.models.DownloadOffsets, int) -> bytes
+    """Retrieve blob range
+    :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
+    :param blobxfer.models.DownloadOffsets offsets: downlaod offsets
+    :param int timeout: timeout
+    :rtype: bytes
+    :return: content for blob range
+    """
+    return ase.client._get_blob(
+        container_name=ase.container,
+        blob_name=ase.name,
+        snapshot=ase.snapshot,
+        start_range=offsets.range_start,
+        end_range=offsets.range_end,
+        validate_content=False,  # HTTPS takes care of integrity during xfer
+        timeout=timeout,
+    ).content
diff --git a/blobxfer/blob/page/operations.py b/blobxfer/blob/page/operations.py
index f23520b..359e207 100644
--- a/blobxfer/blob/page/operations.py
+++ b/blobxfer/blob/page/operations.py
@@ -31,8 +31,9 @@
 # stdlib imports
 import logging
 # non-stdlib imports
-from azure.storage.blob import PageBlobService
+import azure.storage.blob
 # local imports
+import blobxfer.retry
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -46,13 +47,15 @@ def create_client(storage_account):
     :return: block blob service client
     """
     if storage_account.is_sas:
-        client = PageBlobService(
+        client = azure.storage.blob.PageBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
     else:
-        client = PageBlobService(
+        client = azure.storage.blob.PageBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
+    # set retry policy
+    client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
diff --git a/blobxfer/download.py b/blobxfer/download.py
index 49eea54..970bf18 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -49,6 +49,8 @@
 import blobxfer.md5
 import blobxfer.models
 import blobxfer.operations
+import blobxfer.blob.operations
+import blobxfer.file.operations
 import blobxfer.util
 
 # create logger
@@ -74,12 +76,16 @@ def __init__(self, general_options, creds, spec):
         :param blobxfer.models.DownloadSpecification spec: download spec
         """
         self._md5_meta_lock = threading.Lock()
+        self._download_lock = threading.Lock()
         self._all_remote_files_processed = False
         self._md5_map = {}
         self._md5_offload = None
         self._md5_check_thread = None
         self._download_queue = queue.Queue()
+        self._download_set = set()
         self._download_threads = []
+        self._download_count = 0
+        self._download_total_bytes = 0
         self._download_terminate = False
         self._general_options = general_options
         self._creds = creds
@@ -164,8 +170,11 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         """
         with self._md5_meta_lock:
             rfile = self._md5_map.pop(filename)
-        if not md5_match:
-            lpath = pathlib.Path(filename)
+        lpath = pathlib.Path(filename)
+        if md5_match:
+            with self._download_lock:
+                self._download_set.remove(lpath)
+        else:
             self._add_to_download_queue(lpath, rfile)
 
     def _initialize_check_md5_downloads_thread(self):
@@ -185,12 +194,18 @@ def _check_for_downloads_from_md5(self):
                             (len(self._md5_map) == 0 and
                              self._all_remote_files_processed)):
                         break
+                result = None
                 cv.acquire()
                 while not self._download_terminate:
                     result = self._md5_offload.get_localfile_md5_done()
                     if result is None:
                         # use cv timeout due to possible non-wake while running
                         cv.wait(1)
+                        # check for terminating conditions
+                        with self._md5_meta_lock:
+                            if (len(self._md5_map) == 0 and
+                                    self._all_remote_files_processed):
+                                break
                     else:
                         break
                 cv.release()
@@ -214,7 +229,7 @@ def _add_to_download_queue(self, lpath, rfile):
         # prepare remote file for download
         dd = blobxfer.models.DownloadDescriptor(
             lpath, rfile, self._spec.options)
-        # add remote file to queue
+        # add download descriptor to queue
         self._download_queue.put(dd)
 
     def _initialize_download_threads(self):
@@ -222,17 +237,20 @@ def _initialize_download_threads(self):
         """Initialize download threads
         :param Downloader self: this
         """
+        logger.debug('spawning {} transfer threads'.format(
+            self._general_options.concurrency.transfer_threads))
         for _ in range(self._general_options.concurrency.transfer_threads):
             thr = threading.Thread(target=self._worker_thread_download)
             self._download_threads.append(thr)
             thr.start()
 
-    def _terminate_download_threads(self):
-        # type: (Downloader) -> None
+    def _wait_for_download_threads(self, terminate):
+        # type: (Downloader, bool) -> None
         """Terminate download threads
         :param Downloader self: this
+        :param bool terminate: terminate threads
         """
-        self._download_terminate = True
+        self._download_terminate = terminate
         for thr in self._download_threads:
             thr.join()
 
@@ -244,37 +262,68 @@ def _worker_thread_download(self):
         while True:
             if self._download_terminate:
                 break
+            with self._download_lock:
+                if (self._all_remote_files_processed and
+                        len(self._download_set) == 0):
+                    break
             try:
                 dd = self._download_queue.get(False, 1)
             except queue.Empty:
                 continue
             # get download offsets
-
+            offsets = dd.next_offsets()
+            # check if all operations completed
+            if offsets is None and dd.outstanding_operations == 0:
+                # TODO
+                # 1. complete integrity checks
+                # 2. set file uid/gid
+                # 3. set file modes
+                # 4. move file to final path
+                with self._download_lock:
+                    self._download_set.remove(dd.final_path)
+                    self._download_count += 1
+                logger.info('download complete: {}/{} to {}'.format(
+                    dd.entity.container, dd.entity.name, dd.final_path))
+                continue
+            # re-enqueue for other threads to download
+            self._download_queue.put(dd)
+            if offsets is None:
+                continue
             # issue get range
-
-            # if encryption:
-            # 1. compute rolling hmac if present
-            #    - roll through any subsequent unchecked parts
-            # 2. decrypt chunk
-
-            # compute rolling md5 if present
-            #    - roll through any subsequent unchecked parts
+            if dd.entity.mode == blobxfer.models.AzureStorageModes.File:
+                chunk = blobxfer.file.operations.get_file_range(
+                    dd.entity, offsets, self._general_options.timeout_sec)
+            else:
+                chunk = blobxfer.blob.operations.get_blob_range(
+                    dd.entity, offsets, self._general_options.timeout_sec)
+            # accounting
+            with self._download_lock:
+                self._download_total_bytes += offsets.num_bytes
+            # decrypt if necessary
+            if dd.entity.is_encrypted:
+                # TODO via crypto pool
+                # 1. compute rolling hmac if present
+                #    - roll through any subsequent unchecked parts
+                # 2. decrypt chunk
+                pass
+            # compute rolling md5 via md5 pool
+            if dd.must_compute_md5:
+                # TODO
+                # - roll through any subsequent unchecked parts
+                pass
 
             # write data to disk
 
             # if no integrity check could be performed due to current
             # integrity offset mismatch, add to unchecked set
 
-            # check if last chunk to write
-            # 1. complete integrity checks
-            # 2. set file uid/gid
-            # 3. set file modes
+            dd.dec_outstanding_operations()
 
             # pickle dd to resume file
 
-            rfile = dd._ase
-            print('<<', rfile.container, rfile.name, rfile.lmt, rfile.size,
-                  rfile.md5, rfile.mode, rfile.encryption_metadata)
+#             rfile = dd._ase
+#             print('<<', rfile.container, rfile.name, rfile.lmt, rfile.size,
+#                   rfile.md5, rfile.mode, rfile.encryption_metadata)
 
     def _run(self):
         # type: (Downloader) -> None
@@ -290,26 +339,52 @@ def _run(self):
         # initialize download threads
         self._initialize_download_threads()
         # iterate through source paths to download
+        nfiles = 0
+        empty_files = 0
+        skipped_files = 0
+        total_size = 0
+        skipped_size = 0
         for src in self._spec.sources:
             for rfile in src.files(
                     self._creds, self._spec.options, self._general_options):
+                nfiles += 1
+                total_size += rfile.size
+                if rfile.size == 0:
+                    empty_files += 1
                 # form local path for remote file
                 lpath = pathlib.Path(self._spec.destination.path, rfile.name)
                 # check on download conditions
                 action = self._check_download_conditions(lpath, rfile)
                 if action == DownloadAction.Skip:
+                    skipped_files += 1
+                    skipped_size += rfile.size
                     continue
-                elif action == DownloadAction.CheckMd5:
+                # add potential download to set
+                with self._download_lock:
+                    self._download_set.add(lpath)
+                # either MD5 check or download now
+                if action == DownloadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lpath, rfile)
                 elif action == DownloadAction.Download:
                     self._add_to_download_queue(lpath, rfile)
+        download_files = nfiles - skipped_files
+        download_size = total_size - skipped_size
         # clean up processes and threads
         with self._md5_meta_lock:
             self._all_remote_files_processed = True
+        logger.debug(
+            ('{0} remote files processed, waiting for download completion '
+             'of {1:.4f} MiB').format(nfiles, download_size / 1048576))
         self._md5_check_thread.join()
-        # TODO wait for download threads
-
+        self._wait_for_download_threads(terminate=False)
         self._md5_offload.finalize_md5_processes()
+        if (self._download_count != download_files or
+                self._download_total_bytes != download_size):
+            raise RuntimeError(
+                'download mismatch: [count={}/{} bytes={}/{}]'.format(
+                    self._download_count, download_files,
+                    self._download_total_bytes, download_size))
+        logger.info('all files downloaded')
 
     def start(self):
         # type: (Downloader) -> None
@@ -320,6 +395,7 @@ def start(self):
             logger.error(
                 'KeyboardInterrupt detected, force terminating '
                 'processes and threads (this may take a while)...')
-            self._terminate_download_threads()
+            self._wait_for_download_threads(terminate=True)
             self._md5_offload.finalize_md5_processes()
+            # TODO close resume file in finally?
             raise
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index 221f412..09f7d68 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -38,6 +38,7 @@
 import azure.common
 import azure.storage.file
 # local imports
+import blobxfer.retry
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -60,6 +61,8 @@ def create_client(storage_account):
             account_name=storage_account.name,
             account_key=storage_account.key,
             endpoint_suffix=storage_account.endpoint)
+    # set retry policy
+    client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
 
 
@@ -145,3 +148,25 @@ def list_files(client, fileshare, prefix, timeout=None):
                 yield fsprop
             else:
                 dirs.append(fspath)
+
+
+def get_file_range(ase, offsets, timeout=None):
+    # type: (blobxfer.models.AzureStorageEntity,
+    #        blobxfer.models.DownloadOffsets, int) -> bytes
+    """Retrieve file range
+    :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
+    :param blobxfer.models.DownloadOffsets offsets: downlaod offsets
+    :param int timeout: timeout
+    :rtype: bytes
+    :return: content for file range
+    """
+    dir, fpath = parse_file_path(ase.name)
+    return ase.client._get_file(
+        share_name=ase.container,
+        directory_name=dir,
+        file_name=fpath,
+        start_range=offsets.range_start,
+        end_range=offsets.range_end,
+        validate_content=False,  # HTTPS takes care of integrity during xfer
+        timeout=timeout,
+    ).content
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index 86dbd30..741e360 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -92,7 +92,7 @@ def compute_md5_for_data_asbase64(data):
 
 class LocalFileMd5Offload(object):
     """LocalFileMd5Offload"""
-    def __init__(self, num_workers=None):
+    def __init__(self, num_workers):
         # type: (LocalFileMd5Offload, int) -> None
         """Ctor for Local File Md5 Offload
         :param LocalFileMd5Offload self: this
@@ -115,16 +115,14 @@ def done_cv(self):
         """
         return self._done_cv
 
-    def _initialize_md5_processes(self, num_workers=None):
+    def _initialize_md5_processes(self, num_workers):
         # type: (LocalFileMd5Offload, int) -> None
         """Initialize MD5 checking processes for files for download
         :param LocalFileMd5Offload self: this
         :param int num_workers: number of worker processes
         """
-        if num_workers is None:
-            num_workers = multiprocessing.cpu_count() // 2 - 1
-        if num_workers < 1:
-            num_workers = 1
+        if num_workers is None or num_workers < 1:
+            raise ValueError('invalid num_workers: {}'.format(num_workers))
         for _ in range(num_workers):
             proc = multiprocessing.Process(
                 target=self._worker_compute_md5_localfile_process)
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 6d0f753..74809eb 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -34,11 +34,13 @@
 import enum
 import fnmatch
 import logging
+import math
 import os
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
+import multiprocessing
 # non-stdlib imports
 # local imports
 from .api import (
@@ -146,11 +148,15 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
         self.md5_processes = md5_processes
         self.transfer_threads = transfer_threads
         if self.crypto_processes is None or self.crypto_processes < 1:
+            self.crypto_processes = multiprocessing.cpu_count() // 2 - 1
+        if self.crypto_processes < 1:
             self.crypto_processes = 1
         if self.md5_processes is None or self.md5_processes < 1:
+            self.md5_processes = multiprocessing.cpu_count() // 2
+        if self.md5_processes < 1:
             self.md5_processes = 1
         if self.transfer_threads is None or self.transfer_threads < 1:
-            self.transfer_threads = 1
+            self.transfer_threads = multiprocessing.cpu_count() * 2
 
 
 class GeneralOptions(object):
@@ -602,7 +608,7 @@ def _populate_from_list_files(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = AzureStorageEntity(cont, ed)
-                ase.populate_from_file(file)
+                ase.populate_from_file(sa, file)
                 yield ase
 
     def _populate_from_list_blobs(self, creds, options, general_options):
@@ -631,7 +637,7 @@ def _populate_from_list_blobs(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = AzureStorageEntity(cont, ed)
-                ase.populate_from_blob(blob)
+                ase.populate_from_blob(sa, blob)
                 yield ase
 
 
@@ -646,6 +652,7 @@ def __init__(self, container, ed=None):
         :param blobxfer.crypto.models.EncryptionMetadata ed:
             encryption metadata
         """
+        self._client = None
         self._container = container
         self._name = None
         self._mode = None
@@ -657,6 +664,16 @@ def __init__(self, container, ed=None):
         self._vio = None
         self.download = None
 
+    @property
+    def client(self):
+        # type: (AzureStorageEntity) -> object
+        """Associated storage client
+        :param AzureStorageEntity self: this
+        :rtype: object
+        :return: associated storage client
+        """
+        return self._client
+
     @property
     def container(self):
         # type: (AzureStorageEntity) -> str
@@ -697,6 +714,16 @@ def size(self):
         """
         return self._size
 
+    @property
+    def snapshot(self):
+        # type: (AzureStorageEntity) -> str
+        """Entity snapshot
+        :param AzureStorageEntity self: this
+        :rtype: str
+        :return: snapshot of entity
+        """
+        return self._snapshot
+
     @property
     def md5(self):
         # type: (AzureStorageEntity) -> str
@@ -717,21 +744,33 @@ def mode(self):
         """
         return self._mode
 
+    @property
+    def is_encrypted(self):
+        # type: (AzureStorageEntity) -> bool
+        """If data is encrypted
+        :param AzureStorageEntity self: this
+        :rtype: bool
+        :return: if encryption metadata is present
+        """
+        return self._encryption is not None
+
     @property
     def encryption_metadata(self):
         # type: (AzureStorageEntity) ->
         #        blobxfer.crypto.models.EncryptionMetadata
-        """Entity mode (type)
+        """Entity metadata (type)
         :param AzureStorageEntity self: this
         :rtype: blobxfer.crypto.models.EncryptionMetadata
         :return: encryption metadata of entity
         """
         return self._encryption
 
-    def populate_from_blob(self, blob):
-        # type: (AzureStorageEntity, azure.storage.blob.models.Blob) -> None
+    def populate_from_blob(self, sa, blob):
+        # type: (AzureStorageEntity, AzureStorageAccount,
+        #        azure.storage.blob.models.Blob) -> None
         """Populate properties from Blob
         :param AzureStorageEntity self: this
+        :param AzureStorageAccount sa: storage account
         :param azure.storage.blob.models.Blob blob: blob to populate from
         """
         self._name = blob.name
@@ -741,22 +780,29 @@ def populate_from_blob(self, blob):
         self._md5 = blob.properties.content_settings.content_md5
         if blob.properties.blob_type == BlobTypes.AppendBlob:
             self._mode = AzureStorageModes.Append
+            self._client = sa.append_blob_client
         elif blob.properties.blob_type == BlobTypes.BlockBlob:
             self._mode = AzureStorageModes.Block
+            self._client = sa.block_blob_client
         elif blob.properties.blob_type == BlobTypes.PageBlob:
             self._mode = AzureStorageModes.Page
+            self._client = sa.page_blob_client
 
-    def populate_from_file(self, file):
-        # type: (AzureStorageEntity, azure.storage.file.models.File) -> None
+    def populate_from_file(self, sa, file):
+        # type: (AzureStorageEntity, AzureStorageAccount,
+        #        azure.storage.file.models.File) -> None
         """Populate properties from File
         :param AzureStorageEntity self: this
+        :param AzureStorageAccount sa: storage account
         :param azure.storage.file.models.File file: file to populate from
         """
         self._name = file.name
+        self._snapshot = None
         self._lmt = file.properties.last_modified
         self._size = file.properties.content_length
         self._md5 = file.properties.content_settings.content_md5
         self._mode = AzureStorageModes.File
+        self._client = sa.file_client
 
 
 class DownloadDescriptor(object):
@@ -778,23 +824,53 @@ def __init__(self, lpath, ase, options):
         _tmp = list(lpath.parts[:-1])
         _tmp.append(lpath.name + '.bxtmp')
         self.local_path = pathlib.Path(*_tmp)
+        self._meta_lock = multiprocessing.Lock()
         self._ase = ase
+        # calculate the total number of ops required for transfer
         self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
+        try:
+            self._total_chunks = int(
+                math.ceil(self._ase.size / self._chunk_size))
+        except ZeroDivisionError:
+            self._total_chunks = 0
         self.hmac = None
         self.md5 = None
         self.offset = 0
         self.integrity_counter = 0
         self.unchecked_chunks = set()
+        self._outstanding_ops = self._total_chunks
+        self._completed_ops = 0
+        # initialize checkers and allocate space
         self._initialize_integrity_checkers(options)
         self._allocate_disk_space()
 
+    @property
+    def entity(self):
+        # type: (DownloadDescriptor) -> AzureStorageEntity
+        """Get linked AzureStorageEntity
+        :param DownloadDescriptor self: this
+        :rtype: AzureStorageEntity
+        :return: AzureStorageEntity
+        """
+        return self._ase
+
+    @property
+    def must_compute_md5(self):
+        # type: (DownloadDescriptor) -> bool
+        """Check if MD5 must be computed
+        :param DownloadDescriptor self: this
+        :rtype: bool
+        :return: if MD5 must be computed
+        """
+        return self.md5 is not None
+
     def _initialize_integrity_checkers(self, options):
         # type: (DownloadDescriptor, DownloadOptions) -> None
         """Initialize file integrity checkers
         :param DownloadDescriptor self: this
         :param DownloadOptions options: download options
         """
-        if self._ase.encryption_metadata is not None:
+        if self._ase.is_encrypted:
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
         if self.hmac is None and options.check_file_md5:
             self.md5 = blobxfer.md5.new_md5_hasher()
@@ -808,7 +884,7 @@ def _allocate_disk_space(self):
         size = self._ase.size
         # compute size
         if size > 0:
-            if self._ase.encryption_metadata is not None:
+            if self._ase.is_encrypted:
                 # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
                 allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
                     self._AES_BLOCKSIZE
@@ -849,7 +925,7 @@ def next_offsets(self):
         num_bytes = chunk - 1
         fd_start = self.offset
         range_start = self.offset
-        if self._ase.encryption_metadata is not None:
+        if self._ase.is_encrypted:
             # ensure start is AES block size aligned
             range_start = range_start - (range_start % self._AES_BLOCKSIZE) - \
                 self._AES_BLOCKSIZE
@@ -857,19 +933,33 @@ def next_offsets(self):
                 range_start = 0
         range_end = self.offset + num_bytes
         self.offset += chunk
-        if (self._ase.encryption_metadata is not None and
-                self.offset >= self._ase.size):
+        if self._ase.is_encrypted and self.offset >= self._ase.size:
             unpad = True
         else:
             unpad = False
         return DownloadOffsets(
             fd_start=fd_start,
-            num_bytes=num_bytes,
+            num_bytes=chunk,
             range_start=range_start,
             range_end=range_end,
             unpad=unpad,
         )
 
+    @property
+    def outstanding_operations(self):
+        with self._meta_lock:
+            return self._outstanding_ops
+
+    @property
+    def completed_operations(self):
+        with self._meta_lock:
+            return self._completed_ops
+
+    def dec_outstanding_operations(self):
+        with self._meta_lock:
+            self._outstanding_ops -= 1
+            self._completed_ops += 1
+
 
 class AzureDestinationPaths(object):
     def __init__(self):
diff --git a/blobxfer/retry.py b/blobxfer/retry.py
new file mode 100644
index 0000000..ce42bd4
--- /dev/null
+++ b/blobxfer/retry.py
@@ -0,0 +1,85 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+# non-stdlib imports
+import azure.storage.retry
+# local imports
+
+
+class ExponentialRetryWithMaxWait(azure.storage.retry._Retry):
+    """Exponential Retry with Max Wait (infinite retries)"""
+    def __init__(self, initial_backoff=1, max_backoff=8, reset_at_max=True):
+        # type: (ExponentialRetryWithMaxWait, int, int, bool) -> None
+        """Ctor for ExponentialRetryWithMaxWait
+        :param ExponentialRetryWithMaxWait self: this
+        :param int initial_backoff: initial backoff
+        :param int max_backoff: max backoff
+        :param bool reset_at_max: reset after reaching max wait
+        """
+        if max_backoff < initial_backoff:
+            raise ValueError(
+                'max backoff {} less than initial backoff {}'.format(
+                    max_backoff, initial_backoff))
+        self.initial_backoff = initial_backoff
+        self.max_backoff = max_backoff
+        self.reset_at_max = reset_at_max
+        super(ExponentialRetryWithMaxWait, self).__init__(
+            max_backoff if self.reset_at_max else 2147483647, False)
+
+    def retry(self, context):
+        # type: (ExponentialRetryWithMaxWait,
+        #        azure.storage.models.RetryContext) -> int
+        """Retry handler
+        :param ExponentialRetryWithMaxWait self: this
+        :param azure.storage.models.RetryContext context: retry context
+        :rtype: int or None
+        :return: int
+        """
+        return self._retry(context, self._backoff)
+
+    def _backoff(self, context):
+        # type: (ExponentialRetryWithMaxWait,
+        #        azure.storage.models.RetryContext) -> int
+        """Backoff calculator
+        :param ExponentialRetryWithMaxWait self: this
+        :param azure.storage.models.RetryContext context: retry context
+        :rtype: int
+        :return: backoff amount
+        """
+        if context.count == 1:
+            backoff = self.initial_backoff
+        else:
+            backoff = self.initial_backoff << (context.count - 1)
+        if backoff > self.max_backoff and self.reset_at_max:
+            backoff = self.initial_backoff
+            context.count = 1
+        return backoff
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 9029fb1..c8885f7 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -221,3 +221,17 @@ def blob_is_snapshot(url):
         except (ValueError, OverflowError):
             pass
     return False
+
+
+def parse_blob_snapshot_parameter(url):
+    # type: (str) -> str
+    """Retrieves the blob snapshot parameter from a url
+    :param url str: blob url
+    :rtype: str
+    :return: snapshot parameter
+    """
+    if blob_is_snapshot(url):
+        tmp = url.split('?snapshot=')
+        if len(tmp) > 1:
+            return tmp[-1]
+    return None
diff --git a/tests/test_blobxfer_blob_operations.py b/tests/test_blobxfer_blob_operations.py
index 5c078ac..dd635f2 100644
--- a/tests/test_blobxfer_blob_operations.py
+++ b/tests/test_blobxfer_blob_operations.py
@@ -39,10 +39,9 @@ def test_list_blobs():
                 None, 'cont', 'prefix', models.AzureStorageModes.File):
             pass
 
-    client = mock.MagicMock()
-    client.list_blobs = mock.MagicMock()
     _blob = azure.storage.blob.models.Blob(name='name')
     _blob.properties = azure.storage.blob.models.BlobProperties()
+    client = mock.MagicMock()
     client.list_blobs.return_value = [_blob]
 
     i = 0
@@ -76,3 +75,30 @@ def test_list_blobs():
         i += 1
         assert blob.name == 'name'
     assert i == 0
+
+    _blob.snapshot = '2017-02-23T22:21:14.8121864Z'
+    client.get_blob_properties.return_value = _blob
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'cont',
+            'a?snapshot=2017-02-23T22:21:14.8121864Z',
+            models.AzureStorageModes.Auto):
+        i += 1
+        assert blob.name == 'name'
+        assert blob.snapshot == _blob.snapshot
+    assert i == 1
+
+
+def test_get_blob_range():
+    ase = mock.MagicMock()
+    ret = mock.MagicMock()
+    ret.content = b'\0'
+    ase.client._get_blob.return_value = ret
+    ase.container = 'cont'
+    ase.name = 'name'
+    ase.snapshot = None
+    offsets = mock.MagicMock()
+    offsets.start_range = 0
+    offsets.end_range = 1
+
+    assert ops.get_blob_range(ase, offsets) == ret.content
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download.py
index a80c629..6e12bcc 100644
--- a/tests/test_blobxfer_download.py
+++ b/tests/test_blobxfer_download.py
@@ -184,6 +184,7 @@ def test_post_md5_skip_on_check():
     rfile = models.AzureStorageEntity('cont')
     rfile._md5 = 'abc'
     d._pre_md5_skip_on_check(lpath, rfile)
+    d._download_set.add(pathlib.Path(lpath))
     assert lpath in d._md5_map
 
     d._post_md5_skip_on_check(lpath, True)
@@ -191,6 +192,7 @@ def test_post_md5_skip_on_check():
 
     d._add_to_download_queue = mock.MagicMock()
     d._pre_md5_skip_on_check(lpath, rfile)
+    d._download_set.add(pathlib.Path(lpath))
     d._post_md5_skip_on_check(lpath, False)
     assert d._add_to_download_queue.call_count == 1
 
@@ -199,21 +201,25 @@ def test_initialize_check_md5_downloads_thread():
     lpath = 'lpath'
     d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_map[lpath] = mock.MagicMock()
+    d._download_set.add(pathlib.Path(lpath))
     d._md5_offload = mock.MagicMock()
     d._md5_offload.done_cv = multiprocessing.Condition()
     d._md5_offload.get_localfile_md5_done = mock.MagicMock()
-    d._md5_offload.get_localfile_md5_done.side_effect = [None, (lpath, True)]
-    d._post_md5_skip_on_check = mock.MagicMock()
+    d._md5_offload.get_localfile_md5_done.side_effect = [None, (lpath, False)]
+    d._add_to_download_queue = mock.MagicMock()
 
     d._initialize_check_md5_downloads_thread()
+    while len(d._md5_map) > 0:
+        d._md5_offload.done_cv.acquire()
+        d._md5_offload.done_cv.notify()
+        d._md5_offload.done_cv.release()
     d._all_remote_files_processed = True
-    d._md5_map.clear()
     d._md5_offload.done_cv.acquire()
     d._md5_offload.done_cv.notify()
     d._md5_offload.done_cv.release()
     d._md5_check_thread.join()
 
-    assert d._post_md5_skip_on_check.call_count == 1
+    assert d._add_to_download_queue.call_count == 1
 
 
 def test_initialize_and_terminate_download_threads():
@@ -225,7 +231,7 @@ def test_initialize_and_terminate_download_threads():
     d._initialize_download_threads()
     assert len(d._download_threads) == 2
 
-    d._terminate_download_threads()
+    d._wait_for_download_threads(terminate=True)
     assert d._download_terminate
     for thr in d._download_threads:
         assert not thr.is_alive()
@@ -257,7 +263,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._spec.sources.append(asp)
 
     b = azure.storage.blob.models.Blob(name='name')
-    b.properties.content_length = 0
+    b.properties.content_length = 1
     patched_lb.side_effect = [[b]]
 
     d._pre_md5_skip_on_check = mock.MagicMock()
@@ -270,22 +276,25 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.CheckMd5
-    d.start()
+    with pytest.raises(RuntimeError):
+        d.start()
     assert d._pre_md5_skip_on_check.call_count == 1
 
+    b.properties.content_length = 0
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.Download
-    d.start()
+    with pytest.raises(RuntimeError):
+        d.start()
     assert d._download_queue.qsize() == 1
 
 
 def test_start_keyboard_interrupt():
     d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
-    d._terminate_download_threads = mock.MagicMock()
+    d._wait_for_download_threads = mock.MagicMock()
     d._md5_offload = mock.MagicMock()
 
     with pytest.raises(KeyboardInterrupt):
         d.start()
-    assert d._terminate_download_threads.call_count == 1
+    assert d._wait_for_download_threads.call_count == 1
diff --git a/tests/test_blobxfer_file_operations.py b/tests/test_blobxfer_file_operations.py
index fd39912..b221534 100644
--- a/tests/test_blobxfer_file_operations.py
+++ b/tests/test_blobxfer_file_operations.py
@@ -120,3 +120,17 @@ def test_list_files_directory(patched_cisf):
         i += 1
         assert file.name == 'name'
     assert i == 1
+
+
+def test_get_file_range():
+    ase = mock.MagicMock()
+    ret = mock.MagicMock()
+    ret.content = b'\0'
+    ase.client._get_file.return_value = ret
+    ase.container = 'cont'
+    ase.name = 'name'
+    offsets = mock.MagicMock()
+    offsets.start_range = 0
+    offsets.end_range = 1
+
+    assert ops.get_file_range(ase, offsets) == ret.content
diff --git a/tests/test_blobxfer_md5.py b/tests/test_blobxfer_md5.py
index 05a66d2..7a37072 100644
--- a/tests/test_blobxfer_md5.py
+++ b/tests/test_blobxfer_md5.py
@@ -32,7 +32,7 @@ def test_compute_md5(tmpdir):
 def test_done_cv():
     a = None
     try:
-        a = md5.LocalFileMd5Offload()
+        a = md5.LocalFileMd5Offload(num_workers=1)
         assert a.done_cv == a._done_cv
     finally:
         if a:
@@ -40,9 +40,12 @@ def test_done_cv():
 
 
 def test_finalize_md5_processes():
+    with pytest.raises(ValueError):
+        md5.LocalFileMd5Offload(num_workers=0)
+
     a = None
     try:
-        a = md5.LocalFileMd5Offload(num_workers=0)
+        a = md5.LocalFileMd5Offload(num_workers=1)
     finally:
         if a:
             a.finalize_md5_processes()
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 3227250..f0e636d 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -17,7 +17,8 @@
 import blobxfer.models as models
 
 
-def test_concurrency_options():
+@mock.patch('multiprocessing.cpu_count', return_value=1)
+def test_concurrency_options(patched_cc):
     a = models.ConcurrencyOptions(
         crypto_processes=-1,
         md5_processes=0,
@@ -26,7 +27,7 @@ def test_concurrency_options():
 
     assert a.crypto_processes == 1
     assert a.md5_processes == 1
-    assert a.transfer_threads == 1
+    assert a.transfer_threads == 2
 
 
 def test_general_options():
@@ -316,30 +317,37 @@ def test_azurestorageentity():
 
     blob = mock.MagicMock()
     blob.name = 'name'
+    blob.snapshot = None
     blob.properties = mock.MagicMock()
     blob.properties.last_modified = 'lmt'
     blob.properties.content_length = 123
     blob.properties.content_settings = mock.MagicMock()
     blob.properties.content_settings.content_md5 = 'abc'
     blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob
-    ase.populate_from_blob(blob)
+    ase.populate_from_blob(mock.MagicMock(), blob)
 
+    assert ase.client is not None
     assert ase.name == 'name'
     assert ase.lmt == 'lmt'
     assert ase.size == 123
     assert ase.md5 == 'abc'
+    assert ase.snapshot is None
     assert ase.mode == models.AzureStorageModes.Block
 
     blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob
-    ase.populate_from_blob(blob)
+    ase.populate_from_blob(mock.MagicMock(), blob)
     assert ase.mode == models.AzureStorageModes.Append
 
     blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob
-    ase.populate_from_blob(blob)
+    blob.snapshot = 'abc'
+    ase.populate_from_blob(mock.MagicMock(), blob)
     assert ase.mode == models.AzureStorageModes.Page
+    assert ase.snapshot is not None
 
-    ase.populate_from_file(blob)
+    blob.snapshot = None
+    ase.populate_from_file(mock.MagicMock(), blob)
     assert ase.mode == models.AzureStorageModes.File
+    assert ase.snapshot is None
 
 
 def test_downloaddescriptor(tmpdir):
@@ -347,12 +355,15 @@ def test_downloaddescriptor(tmpdir):
 
     opts = mock.MagicMock()
     opts.check_file_md5 = True
-    opts.chunk_size_bytes = 1
+    opts.chunk_size_bytes = 16
     ase = models.AzureStorageEntity('cont')
     ase._size = 1024
     ase._encryption = mock.MagicMock()
     d = models.DownloadDescriptor(lp, ase, opts)
 
+    assert d.entity == ase
+    assert not d.must_compute_md5
+    assert d._total_chunks == 64
     assert d.offset == 0
     assert d.final_path == lp
     assert str(d.local_path) == str(lp) + '.bxtmp'
@@ -360,18 +371,20 @@ def test_downloaddescriptor(tmpdir):
 
     d.local_path.unlink()
     ase._size = 1
-    d._allocate_disk_space()
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 1
     assert d.local_path.stat().st_size == 0
 
     d.local_path.unlink()
     ase._encryption = None
     ase._size = 1024
-    d._allocate_disk_space()
+    d = models.DownloadDescriptor(lp, ase, opts)
     assert d.local_path.stat().st_size == 1024
 
     # pre-existing file check
     ase._size = 0
-    d._allocate_disk_space()
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 0
     assert d.local_path.stat().st_size == 0
 
 
@@ -386,8 +399,9 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
+    assert d._total_chunks == 1
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 127
+    assert offsets.num_bytes == 128
     assert offsets.range_start == 0
     assert offsets.range_end == 127
     assert not offsets.unpad
@@ -395,13 +409,15 @@ def test_downloaddescriptor_next_offsets(tmpdir):
 
     ase._size = 0
     d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 0
     assert d.next_offsets() is None
 
     ase._size = 1
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 1
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 0
+    assert offsets.num_bytes == 1
     assert offsets.range_start == 0
     assert offsets.range_end == 0
     assert not offsets.unpad
@@ -410,8 +426,9 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._size = 256
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 1
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 255
+    assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
@@ -420,14 +437,15 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._size = 256 + 16
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 2
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 255
+    assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
     offsets = d.next_offsets()
     assert offsets.fd_start == 256
-    assert offsets.num_bytes == 15
+    assert offsets.num_bytes == 16
     assert offsets.range_start == 256
     assert offsets.range_end == 256 + 15
     assert not offsets.unpad
@@ -437,8 +455,9 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._size = 128
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 1
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 127
+    assert offsets.num_bytes == 128
     assert offsets.range_start == 0
     assert offsets.range_end == 127
     assert offsets.unpad
@@ -447,8 +466,9 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._size = 256
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 1
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 255
+    assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert offsets.unpad
@@ -457,14 +477,15 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._size = 256 + 32  # 16 bytes over + padding
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
+    assert d._total_chunks == 2
     assert offsets.fd_start == 0
-    assert offsets.num_bytes == 255
+    assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
     offsets = d.next_offsets()
     assert offsets.fd_start == 256
-    assert offsets.num_bytes == 31
+    assert offsets.num_bytes == 32
     assert offsets.range_start == 256 - 16
     assert offsets.range_end == 256 + 31
     assert offsets.unpad
diff --git a/tests/test_blobxfer_retry.py b/tests/test_blobxfer_retry.py
new file mode 100644
index 0000000..b66c41e
--- /dev/null
+++ b/tests/test_blobxfer_retry.py
@@ -0,0 +1,39 @@
+# coding=utf-8
+"""Tests for retry"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+import pytest
+# module under test
+import blobxfer.retry as retry
+
+
+def test_exponentialretrywithmaxwait():
+    with pytest.raises(ValueError):
+        er = retry.ExponentialRetryWithMaxWait(
+            initial_backoff=1, max_backoff=0)
+
+    er = retry.ExponentialRetryWithMaxWait()
+    context = mock.MagicMock()
+    context.count = 0
+    context.response.status = 500
+    bo = er.retry(context)
+    assert context.count == 1
+    assert bo == 1
+
+    bo = er.retry(context)
+    assert context.count == 2
+    assert bo == 2
+
+    bo = er.retry(context)
+    assert context.count == 3
+    assert bo == 4
+
+    bo = er.retry(context)
+    assert context.count == 4
+    assert bo == 8
+
+    bo = er.retry(context)
+    assert context.count == 1
+    assert bo == 1
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index 4dd0ebc..0f94c0e 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -172,3 +172,12 @@ def test_blob_is_snapshot():
 
     a = '/cont/a?snapshot=2017-02-23T22:21:14.8121864Z?snapshot='
     assert not blobxfer.util.blob_is_snapshot(a)
+
+
+def test_parse_blob_snapshot_parameter():
+    param = '2017-02-23T22:21:14.8121864Z'
+    a = '/cont/a?snapshot=' + param
+    assert blobxfer.util.parse_blob_snapshot_parameter(a) == param
+
+    a = '/cont/a?snapshot='
+    assert blobxfer.util.parse_blob_snapshot_parameter(a) is None

From d70d404b466823ec007777cb91b47cb8f0ca7794 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Sun, 26 Feb 2017 02:02:56 -0800
Subject: [PATCH 11/47] Current download parity

- Main download logic at parity with current blobxfer
- Refactor multiprocess offload into base class
- Add multiprocess crypto offload
---
 blobxfer/crypto/models.py                     |  11 +-
 blobxfer/crypto/operations.py                 | 122 ++++++++-
 blobxfer/download.py                          | 231 ++++++++++------
 blobxfer/md5.py                               |  85 +-----
 blobxfer/models.py                            | 246 ++++++++++++++----
 blobxfer/offload.py                           | 127 +++++++++
 blobxfer/util.py                              |  10 +
 cli/settings.py                               |   4 +-
 tests/test_blobxfer_blob_append_operations.py |   1 -
 tests/test_blobxfer_crypto_operations.py      |  40 ++-
 tests/test_blobxfer_download.py               |  27 +-
 tests/test_blobxfer_md5.py                    |  18 +-
 tests/test_blobxfer_models.py                 |  20 +-
 13 files changed, 695 insertions(+), 247 deletions(-)
 create mode 100644 blobxfer/offload.py

diff --git a/blobxfer/crypto/models.py b/blobxfer/crypto/models.py
index c4bb5b4..e08f6a7 100644
--- a/blobxfer/crypto/models.py
+++ b/blobxfer/crypto/models.py
@@ -35,20 +35,13 @@
 import hashlib
 import hmac
 import json
-import logging
 # non-stdlib imports
 # local imports
 import blobxfer.crypto.operations
 import blobxfer.util
 
-
 # encryption constants
-_AES256_KEYLENGTH_BYTES = 32
 _AES256_BLOCKSIZE_BYTES = 16
-_HMACSHA256_DIGESTSIZE_BYTES = 32
-_AES256CBC_HMACSHA256_OVERHEAD_BYTES = (
-    _AES256_BLOCKSIZE_BYTES + _HMACSHA256_DIGESTSIZE_BYTES
-)
 
 # named tuples
 EncryptionBlobxferExtensions = collections.namedtuple(
@@ -191,8 +184,8 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
             )
         except KeyError:
             pass
-        self.content_encryption_iv = ed[
-            EncryptionMetadata._JSON_KEY_CONTENT_IV]
+        self.content_encryption_iv = base64.b64decode(
+            ed[EncryptionMetadata._JSON_KEY_CONTENT_IV])
         self.encryption_agent = EncryptionAgent(
             encryption_algorithm=ed[
                 EncryptionMetadata._JSON_KEY_ENCRYPTION_AGENT][
diff --git a/blobxfer/crypto/operations.py b/blobxfer/crypto/operations.py
index 1931407..deeb287 100644
--- a/blobxfer/crypto/operations.py
+++ b/blobxfer/crypto/operations.py
@@ -31,7 +31,13 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 import base64
+import enum
 import logging
+import os
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 # non-stdlib imports
 import cryptography.hazmat.backends
 import cryptography.hazmat.primitives.asymmetric.padding
@@ -44,7 +50,13 @@
 import cryptography.hazmat.primitives.padding
 import cryptography.hazmat.primitives.serialization
 # local imports
-import blobxfer.util
+import blobxfer.offload
+
+# create logger
+logger = logging.getLogger(__name__)
+
+# encryption constants
+_AES256_KEYLENGTH_BYTES = 32
 
 
 def load_rsa_private_key_file(rsakeyfile, passphrase):
@@ -130,7 +142,7 @@ def rsa_encrypt_key_base64_encoded(rsaprivatekey, rsapublickey, plainkey):
     return blobxfer.util.base64_encode_as_string(enckey)
 
 
-def pad_pkcs7(buf):
+def pkcs7_pad(buf):
     # type: (bytes) -> bytes
     """Appends PKCS7 padding to an input buffer
     :param bytes buf: buffer to add padding
@@ -143,7 +155,7 @@ def pad_pkcs7(buf):
     return padder.update(buf) + padder.finalize()
 
 
-def unpad_pkcs7(buf):
+def pkcs7_unpad(buf):
     # type: (bytes) -> bytes
     """Removes PKCS7 padding a decrypted object
     :param bytes buf: buffer to remove padding
@@ -154,3 +166,107 @@ def unpad_pkcs7(buf):
         cryptography.hazmat.primitives.ciphers.
         algorithms.AES.block_size).unpadder()
     return unpadder.update(buf) + unpadder.finalize()
+
+
+def aes256_generate_random_key():
+    # type: (None) -> bytes
+    """Generate random AES256 key
+    :rtype: bytes
+    :return: random key
+    """
+    return os.urandom(_AES256_KEYLENGTH_BYTES)
+
+
+def aes_cbc_decrypt_data(symkey, iv, encdata, unpad):
+    # type: (bytes, bytes, bytes, bool) -> bytes
+    """Decrypt data using AES CBC
+    :param bytes symkey: symmetric key
+    :param bytes iv: initialization vector
+    :param bytes encdata: data to decrypt
+    :param bool unpad: unpad data
+    :rtype: bytes
+    :return: decrypted data
+    """
+    cipher = cryptography.hazmat.primitives.ciphers.Cipher(
+        cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey),
+        cryptography.hazmat.primitives.ciphers.modes.CBC(iv),
+        backend=cryptography.hazmat.backends.default_backend()).decryptor()
+    decrypted = cipher.update(encdata) + cipher.finalize()
+    if unpad:
+        return pkcs7_unpad(decrypted)
+    else:
+        return decrypted
+
+
+def aes_cbc_encrypt_data(symkey, iv, data, pad):
+    # type: (bytes, bytes, bytes, bool) -> bytes
+    """Encrypt data using AES CBC
+    :param bytes symkey: symmetric key
+    :param bytes iv: initialization vector
+    :param bytes data: data to encrypt
+    :param bool pad: pad data
+    :rtype: bytes
+    :return: encrypted data
+    """
+    cipher = cryptography.hazmat.primitives.ciphers.Cipher(
+        cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey),
+        cryptography.hazmat.primitives.ciphers.modes.CBC(iv),
+        backend=cryptography.hazmat.backends.default_backend()).encryptor()
+    if pad:
+        return cipher.update(pkcs7_pad(data)) + cipher.finalize()
+    else:
+        return cipher.update(data) + cipher.finalize()
+
+
+class CryptoAction(enum.Enum):
+    Encrypt = 1
+    Decrypt = 2
+
+
+class CryptoOffload(blobxfer.offload._MultiprocessOffload):
+    def __init__(self, num_workers):
+        # type: (CryptoOffload, int) -> None
+        """Ctor for Crypto Offload
+        :param CryptoOffload self: this
+        :param int num_workers: number of worker processes
+        """
+        super(CryptoOffload, self).__init__(num_workers, 'Crypto')
+
+    def _worker_process(self):
+        # type: (CryptoOffload) -> None
+        """Crypto worker
+        :param CryptoOffload self: this
+        """
+        while not self.terminated:
+            try:
+                inst = self._task_queue.get(True, 1)
+            except queue.Empty:
+                continue
+            if inst[0] == CryptoAction.Encrypt:
+                # TODO on upload
+                raise NotImplementedError()
+            elif inst[0] == CryptoAction.Decrypt:
+                final_path, offsets, symkey, iv, encdata = \
+                    inst[1], inst[2], inst[3], inst[4], inst[5]
+                data = aes_cbc_decrypt_data(symkey, iv, encdata, offsets.unpad)
+            self._done_cv.acquire()
+            self._done_queue.put((final_path, offsets, data))
+            self._done_cv.notify()
+            self._done_cv.release()
+
+    def add_decrypt_chunk(
+            self, final_path, offsets, symkey, iv, encdata):
+        # type: (CryptoOffload, str, blobxfer.models.DownloadOffsets, bytes,
+        #        bytes, bytes) -> None
+        """Add a chunk to decrypt
+        :param CryptoOffload self: this
+        :param str final_path: final path
+        :param blobxfer.models.DownloadOffsets offsets: offsets
+        :param bytes symkey: symmetric key
+        :param bytes iv: initialization vector
+        :param bytes encdata: encrypted data
+        """
+        self._task_queue.put(
+            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
+             encdata)
+        )
diff --git a/blobxfer/download.py b/blobxfer/download.py
index 970bf18..65878bb 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download.py
@@ -43,9 +43,12 @@
 except ImportError:  # noqa
     import Queue as queue
 import threading
+import time
 # non-stdlib imports
 import dateutil
 # local imports
+import blobxfer.crypto.models
+import blobxfer.crypto.operations
 import blobxfer.md5
 import blobxfer.models
 import blobxfer.operations
@@ -75,18 +78,20 @@ def __init__(self, general_options, creds, spec):
         :param blobxfer.models.AzureStorageCredentials creds: creds
         :param blobxfer.models.DownloadSpecification spec: download spec
         """
-        self._md5_meta_lock = threading.Lock()
-        self._download_lock = threading.Lock()
+        self._time_start = None
         self._all_remote_files_processed = False
+        self._crypto_offload = None
+        self._md5_meta_lock = threading.Lock()
         self._md5_map = {}
         self._md5_offload = None
-        self._md5_check_thread = None
+        self._download_lock = threading.Lock()
         self._download_queue = queue.Queue()
         self._download_set = set()
         self._download_threads = []
         self._download_count = 0
         self._download_total_bytes = 0
         self._download_terminate = False
+        self._dd_map = {}
         self._general_options = general_options
         self._creds = creds
         self._spec = spec
@@ -177,46 +182,69 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         else:
             self._add_to_download_queue(lpath, rfile)
 
-    def _initialize_check_md5_downloads_thread(self):
+    def _check_for_downloads_from_md5(self):
         # type: (Downloader) -> None
-        """Initialize the md5 done queue check thread
+        """Check queue for a file to download
         :param Downloader self: this
         """
-        def _check_for_downloads_from_md5(self):
-            # type: (Downloader) -> None
-            """Check queue for a file to download
-            :param Downloader self: this
-            """
-            cv = self._md5_offload.done_cv
-            while True:
-                with self._md5_meta_lock:
-                    if (self._download_terminate or
-                            (len(self._md5_map) == 0 and
-                             self._all_remote_files_processed)):
-                        break
-                result = None
-                cv.acquire()
-                while not self._download_terminate:
-                    result = self._md5_offload.get_localfile_md5_done()
-                    if result is None:
-                        # use cv timeout due to possible non-wake while running
-                        cv.wait(1)
-                        # check for terminating conditions
-                        with self._md5_meta_lock:
-                            if (len(self._md5_map) == 0 and
-                                    self._all_remote_files_processed):
-                                break
-                    else:
-                        break
-                cv.release()
-                if result is not None:
-                    self._post_md5_skip_on_check(result[0], result[1])
+        cv = self._md5_offload.done_cv
+        while True:
+            with self._md5_meta_lock:
+                if (self._download_terminate or
+                        (self._all_remote_files_processed and
+                         len(self._md5_map) == 0 and
+                         len(self._download_set) == 0)):
+                    break
+            result = None
+            cv.acquire()
+            while not self._download_terminate:
+                result = self._md5_offload.pop_done_queue()
+                if result is None:
+                    # use cv timeout due to possible non-wake while running
+                    cv.wait(1)
+                    # check for terminating conditions
+                    with self._md5_meta_lock:
+                        if (self._all_remote_files_processed and
+                                len(self._md5_map) == 0 and
+                                len(self._download_set) == 0):
+                            break
+                else:
+                    break
+            cv.release()
+            if result is not None:
+                self._post_md5_skip_on_check(result[0], result[1])
 
-        self._md5_check_thread = threading.Thread(
-            target=_check_for_downloads_from_md5,
-            args=(self,)
-        )
-        self._md5_check_thread.start()
+    def _check_for_crypto_done(self):
+        # type: (Downloader) -> None
+        """Check queue for crypto done
+        :param Downloader self: this
+        """
+        cv = self._crypto_offload.done_cv
+        while True:
+            with self._download_lock:
+                if (self._download_terminate or
+                        (self._all_remote_files_processed and
+                         len(self._download_set) == 0)):
+                    break
+            result = None
+            cv.acquire()
+            while not self._download_terminate:
+                result = self._crypto_offload.pop_done_queue()
+                if result is None:
+                    # use cv timeout due to possible non-wake while running
+                    cv.wait(1)
+                    # check for terminating conditions
+                    with self._download_lock:
+                        if (self._all_remote_files_processed and
+                                len(self._download_set) == 0):
+                            break
+                else:
+                    break
+            cv.release()
+            if result is not None:
+                with self._download_lock:
+                    dd = self._dd_map[result[0]]
+                self._complete_chunk_download(result[1], result[2], dd)
 
     def _add_to_download_queue(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
@@ -229,6 +257,9 @@ def _add_to_download_queue(self, lpath, rfile):
         # prepare remote file for download
         dd = blobxfer.models.DownloadDescriptor(
             lpath, rfile, self._spec.options)
+        if dd.entity.is_encrypted:
+            with self._download_lock:
+                self._dd_map[str(dd.final_path)] = dd
         # add download descriptor to queue
         self._download_queue.put(dd)
 
@@ -250,7 +281,8 @@ def _wait_for_download_threads(self, terminate):
         :param Downloader self: this
         :param bool terminate: terminate threads
         """
-        self._download_terminate = terminate
+        if terminate:
+            self._download_terminate = terminate
         for thr in self._download_threads:
             thr.join()
 
@@ -273,17 +305,15 @@ def _worker_thread_download(self):
             # get download offsets
             offsets = dd.next_offsets()
             # check if all operations completed
-            if offsets is None and dd.outstanding_operations == 0:
-                # TODO
-                # 1. complete integrity checks
-                # 2. set file uid/gid
-                # 3. set file modes
-                # 4. move file to final path
+            if offsets is None and dd.all_operations_completed:
+                # finalize file
+                dd.finalize_file()
+                # accounting
                 with self._download_lock:
+                    if dd.entity.is_encrypted:
+                        self._dd_map.pop(str(dd.final_path))
                     self._download_set.remove(dd.final_path)
                     self._download_count += 1
-                logger.info('download complete: {}/{} to {}'.format(
-                    dd.entity.container, dd.entity.name, dd.final_path))
                 continue
             # re-enqueue for other threads to download
             self._download_queue.put(dd)
@@ -291,39 +321,60 @@ def _worker_thread_download(self):
                 continue
             # issue get range
             if dd.entity.mode == blobxfer.models.AzureStorageModes.File:
-                chunk = blobxfer.file.operations.get_file_range(
+                data = blobxfer.file.operations.get_file_range(
                     dd.entity, offsets, self._general_options.timeout_sec)
             else:
-                chunk = blobxfer.blob.operations.get_blob_range(
+                data = blobxfer.blob.operations.get_blob_range(
                     dd.entity, offsets, self._general_options.timeout_sec)
             # accounting
             with self._download_lock:
                 self._download_total_bytes += offsets.num_bytes
             # decrypt if necessary
             if dd.entity.is_encrypted:
-                # TODO via crypto pool
-                # 1. compute rolling hmac if present
-                #    - roll through any subsequent unchecked parts
-                # 2. decrypt chunk
-                pass
-            # compute rolling md5 via md5 pool
-            if dd.must_compute_md5:
-                # TODO
-                # - roll through any subsequent unchecked parts
-                pass
-
-            # write data to disk
-
-            # if no integrity check could be performed due to current
-            # integrity offset mismatch, add to unchecked set
-
-            dd.dec_outstanding_operations()
-
-            # pickle dd to resume file
+                # slice data to proper bounds
+                encdata = data[blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES:]
+                intdata = encdata
+                # get iv for chunk and compute hmac
+                if offsets.chunk_num == 0:
+                    iv = dd.entity.encryption_metadata.content_encryption_iv
+                    # integrity check for first chunk must include iv
+                    intdata = iv + data
+                else:
+                    iv = data[:blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES]
+                # integrity check data
+                dd.perform_chunked_integrity_check(offsets, intdata)
+                # decrypt data
+                if self._crypto_offload is not None:
+                    self._crypto_offload.add_decrypt_chunk(
+                        str(dd.final_path), offsets,
+                        dd.entity.encryption_metadata.symmetric_key,
+                        iv, encdata)
+                    # data will be completed once retrieved from crypto queue
+                    continue
+                else:
+                    data = blobxfer.crypto.operations.aes_cbc_decrypt_data(
+                        dd.entity.encryption_metadata.symmetric_key,
+                        iv, encdata, offsets.unpad)
+            elif dd.must_compute_md5:
+                # rolling compute md5
+                dd.perform_chunked_integrity_check(offsets, data)
+            # complete chunk download
+            self._complete_chunk_download(offsets, data, dd)
 
-#             rfile = dd._ase
-#             print('<<', rfile.container, rfile.name, rfile.lmt, rfile.size,
-#                   rfile.md5, rfile.mode, rfile.encryption_metadata)
+    def _complete_chunk_download(self, offsets, data, dd):
+        # type: (Downloader, blobxfer.models.DownloadOffsets, bytes,
+        #        blobxfer.models.DownloadDescriptor) -> None
+        """Complete chunk download
+        :param Downloader self: this
+        :param blobxfer.models.DownloadOffsets offsets: offsets
+        :param bytes data: data
+        :param blobxfer.models.DownloadDescriptor dd: download descriptor
+        """
+        # write data to disk
+        dd.write_data(offsets, data)
+        # decrement outstanding operations
+        dd.dec_outstanding_operations()
+        # TODO pickle dd to resume file
 
     def _run(self):
         # type: (Downloader) -> None
@@ -335,7 +386,14 @@ def _run(self):
         # initialize MD5 processes
         self._md5_offload = blobxfer.md5.LocalFileMd5Offload(
             num_workers=self._general_options.concurrency.md5_processes)
-        self._initialize_check_md5_downloads_thread()
+        self._md5_offload.initialize_check_thread(
+            self._check_for_downloads_from_md5)
+        # initialize crypto processes
+        if self._general_options.concurrency.crypto_processes > 0:
+            self._crypto_offload = blobxfer.crypto.operations.CryptoOffload(
+                num_workers=self._general_options.concurrency.crypto_processes)
+            self._crypto_offload.initialize_check_thread(
+                self._check_for_crypto_done)
         # initialize download threads
         self._initialize_download_threads()
         # iterate through source paths to download
@@ -344,6 +402,7 @@ def _run(self):
         skipped_files = 0
         total_size = 0
         skipped_size = 0
+        self._time_start = time.clock()
         for src in self._spec.sources:
             for rfile in src.files(
                     self._creds, self._spec.options, self._general_options):
@@ -369,33 +428,41 @@ def _run(self):
                     self._add_to_download_queue(lpath, rfile)
         download_files = nfiles - skipped_files
         download_size = total_size - skipped_size
+        download_size_mib = download_size / 1048576
         # clean up processes and threads
         with self._md5_meta_lock:
             self._all_remote_files_processed = True
         logger.debug(
             ('{0} remote files processed, waiting for download completion '
-             'of {1:.4f} MiB').format(nfiles, download_size / 1048576))
-        self._md5_check_thread.join()
+             'of {1:.4f} MiB').format(nfiles, download_size_mib))
         self._wait_for_download_threads(terminate=False)
-        self._md5_offload.finalize_md5_processes()
+        end = time.clock()
+        runtime = end - self._time_start
         if (self._download_count != download_files or
                 self._download_total_bytes != download_size):
             raise RuntimeError(
                 'download mismatch: [count={}/{} bytes={}/{}]'.format(
                     self._download_count, download_files,
                     self._download_total_bytes, download_size))
-        logger.info('all files downloaded')
+        logger.info('all files downloaded: {0:.3f} sec {1:.4f} Mbps'.format(
+            runtime, download_size_mib * 8 / runtime))
 
     def start(self):
         # type: (Downloader) -> None
         """Start the Downloader"""
         try:
             self._run()
-        except KeyboardInterrupt:
-            logger.error(
-                'KeyboardInterrupt detected, force terminating '
-                'processes and threads (this may take a while)...')
+        except (KeyboardInterrupt, Exception) as ex:
+            if isinstance(ex, KeyboardInterrupt):
+                logger.error(
+                    'KeyboardInterrupt detected, force terminating '
+                    'processes and threads (this may take a while)...')
             self._wait_for_download_threads(terminate=True)
-            self._md5_offload.finalize_md5_processes()
+            # TODO delete all temp files
             # TODO close resume file in finally?
             raise
+        finally:
+            if self._md5_offload is not None:
+                self._md5_offload.finalize_processes()
+            if self._crypto_offload is not None:
+                self._crypto_offload.finalize_processes()
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index 741e360..84e85cc 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -30,8 +30,6 @@
 )
 # stdlib imports
 import logging
-import hashlib
-import multiprocessing
 try:
     import queue
 except ImportError:  # noqa
@@ -39,21 +37,14 @@
 # non-stdlib imports
 # local imports
 import blobxfer.download
+import blobxfer.models
+import blobxfer.offload
 import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
 
 
-def new_md5_hasher():
-    # type: (None) -> md5.MD5
-    """Create a new MD5 hasher
-    :rtype: md5.MD5
-    :return: new MD5 hasher
-    """
-    return hashlib.md5()
-
-
 def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
     # type: (str, bool, int) -> str
     """Compute MD5 hash for file and encode as Base64
@@ -63,7 +54,7 @@ def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
     :rtype: str
     :return: MD5 for file encoded as Base64
     """
-    hasher = new_md5_hasher()
+    hasher = blobxfer.util.new_md5_hasher()
     with open(filename, 'rb') as filedesc:
         while True:
             buf = filedesc.read(blocksize)
@@ -85,12 +76,12 @@ def compute_md5_for_data_asbase64(data):
     :rtype: str
     :return: MD5 for data
     """
-    hasher = new_md5_hasher()
+    hasher = blobxfer.util.new_md5_hasher()
     hasher.update(data)
     return blobxfer.util.base64_encode_as_string(hasher.digest())
 
 
-class LocalFileMd5Offload(object):
+class LocalFileMd5Offload(blobxfer.offload._MultiprocessOffload):
     """LocalFileMd5Offload"""
     def __init__(self, num_workers):
         # type: (LocalFileMd5Offload, int) -> None
@@ -98,52 +89,14 @@ def __init__(self, num_workers):
         :param LocalFileMd5Offload self: this
         :param int num_workers: number of worker processes
         """
-        self._task_queue = multiprocessing.Queue()
-        self._done_queue = multiprocessing.Queue()
-        self._done_cv = multiprocessing.Condition()
-        self._term_signal = multiprocessing.Value('i', 0)
-        self._md5_procs = []
-        self._initialize_md5_processes(num_workers)
-
-    @property
-    def done_cv(self):
-        # type: (LocalFileMd5Offload) -> multiprocessing.Condition
-        """Get Download Done condition variable
-        :param LocalFileMd5Offload self: this
-        :rtype: multiprocessing.Condition
-        :return: cv for download done
-        """
-        return self._done_cv
-
-    def _initialize_md5_processes(self, num_workers):
-        # type: (LocalFileMd5Offload, int) -> None
-        """Initialize MD5 checking processes for files for download
-        :param LocalFileMd5Offload self: this
-        :param int num_workers: number of worker processes
-        """
-        if num_workers is None or num_workers < 1:
-            raise ValueError('invalid num_workers: {}'.format(num_workers))
-        for _ in range(num_workers):
-            proc = multiprocessing.Process(
-                target=self._worker_compute_md5_localfile_process)
-            proc.start()
-            self._md5_procs.append(proc)
+        super(LocalFileMd5Offload, self).__init__(num_workers, 'MD5')
 
-    def finalize_md5_processes(self):
-        # type: (LocalFileMd5Offload) -> None
-        """Finalize MD5 checking processes for files for download
-        :param LocalFileMd5Offload self: this
-        """
-        self._term_signal.value = 1
-        for proc in self._md5_procs:
-            proc.join()
-
-    def _worker_compute_md5_localfile_process(self):
+    def _worker_process(self):
         # type: (LocalFileMd5Offload) -> None
         """Compute MD5 for local file
         :param LocalFileMd5Offload self: this
         """
-        while self._term_signal.value == 0:
+        while not self.terminated:
             try:
                 filename, remote_md5, pagealign = self._task_queue.get(True, 1)
             except queue.Empty:
@@ -153,31 +106,17 @@ def _worker_compute_md5_localfile_process(self):
                 md5, remote_md5, filename))
             self._done_cv.acquire()
             self._done_queue.put((filename, md5 == remote_md5))
-            self.done_cv.notify()
-            self.done_cv.release()
-
-    def get_localfile_md5_done(self):
-        # type: (LocalFileMd5Offload) -> Tuple[str, bool]
-        """Get from done queue of local files with MD5 completed
-        :param LocalFileMd5Offload self: this
-        :rtype: tuple or None
-        :return: (local file path, md5 match)
-        """
-        try:
-            return self._done_queue.get_nowait()
-        except queue.Empty:
-            return None
+            self._done_cv.notify()
+            self._done_cv.release()
 
     def add_localfile_for_md5_check(self, filename, remote_md5, mode):
         # type: (LocalFileMd5Offload, str, str,
-        #        blobxfer.models.AzureStorageModes) -> bool
-        """Check an MD5 for a file for download
+        #        blobxfer.models.AzureStorageModes) -> None
+        """Add a local file to MD5 check queue
         :param LocalFileMd5Offload self: this
         :param str filename: file to compute MD5 for
         :param str remote_md5: remote MD5 to compare against
         :param blobxfer.models.AzureStorageModes mode: mode
-        :rtype: bool
-        :return: MD5 match comparison
         """
         if mode == blobxfer.models.AzureStorageModes.Page:
             pagealign = True
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 74809eb..72b1291 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -41,6 +41,8 @@
 except ImportError:  # noqa
     import pathlib
 import multiprocessing
+import tempfile
+import threading
 # non-stdlib imports
 # local imports
 from .api import (
@@ -53,7 +55,6 @@
 import blobxfer.blob.operations
 import blobxfer.file.operations
 import blobxfer.crypto.models
-import blobxfer.md5
 import blobxfer.util
 
 # create logger
@@ -126,6 +127,7 @@ class AzureStorageModes(enum.Enum):
 )
 DownloadOffsets = collections.namedtuple(
     'DownloadOffsets', [
+        'chunk_num',
         'fd_start',
         'num_bytes',
         'range_end',
@@ -133,6 +135,14 @@ class AzureStorageModes(enum.Enum):
         'unpad',
     ]
 )
+UncheckedChunk = collections.namedtuple(
+    'UncheckedChunk', [
+        'data_len',
+        'fd_start',
+        'file_path',
+        'temp',
+    ]
+)
 
 
 class ConcurrencyOptions(object):
@@ -147,16 +157,16 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
         self.crypto_processes = crypto_processes
         self.md5_processes = md5_processes
         self.transfer_threads = transfer_threads
+        # allow crypto processes to be zero (which will inline crypto
+        # routines with main process)
         if self.crypto_processes is None or self.crypto_processes < 1:
-            self.crypto_processes = multiprocessing.cpu_count() // 2 - 1
-        if self.crypto_processes < 1:
-            self.crypto_processes = 1
+            self.crypto_processes = 0
         if self.md5_processes is None or self.md5_processes < 1:
             self.md5_processes = multiprocessing.cpu_count() // 2
         if self.md5_processes < 1:
             self.md5_processes = 1
         if self.transfer_threads is None or self.transfer_threads < 1:
-            self.transfer_threads = multiprocessing.cpu_count() * 2
+            self.transfer_threads = multiprocessing.cpu_count() * 3
 
 
 class GeneralOptions(object):
@@ -824,7 +834,8 @@ def __init__(self, lpath, ase, options):
         _tmp = list(lpath.parts[:-1])
         _tmp.append(lpath.name + '.bxtmp')
         self.local_path = pathlib.Path(*_tmp)
-        self._meta_lock = multiprocessing.Lock()
+        self._meta_lock = threading.Lock()
+        self._hasher_lock = threading.Lock()
         self._ase = ase
         # calculate the total number of ops required for transfer
         self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
@@ -835,9 +846,10 @@ def __init__(self, lpath, ase, options):
             self._total_chunks = 0
         self.hmac = None
         self.md5 = None
-        self.offset = 0
-        self.integrity_counter = 0
-        self.unchecked_chunks = set()
+        self._offset = 0
+        self._chunk_num = 0
+        self._next_integrity_chunk = 0
+        self._unchecked_chunks = {}
         self._outstanding_ops = self._total_chunks
         self._completed_ops = 0
         # initialize checkers and allocate space
@@ -871,9 +883,15 @@ def _initialize_integrity_checkers(self, options):
         :param DownloadOptions options: download options
         """
         if self._ase.is_encrypted:
+            # ensure symmetric key exists
+            if blobxfer.util.is_none_or_empty(
+                    self._ase.encryption_metadata.symmetric_key):
+                raise RuntimeError(
+                    'symmetric key is invalid: provide RSA private key '
+                    'or metadata corrupt')
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
         if self.hmac is None and options.check_file_md5:
-            self.md5 = blobxfer.md5.new_md5_hasher()
+            self.md5 = blobxfer.util.new_md5_hasher()
 
     def _allocate_disk_space(self):
         # type: (DownloadDescriptor, int) -> None
@@ -912,48 +930,182 @@ def next_offsets(self):
         :rtype: DownloadOffsets
         :return: download offsets
         """
-        if self.offset >= self._ase.size:
-            return None
-        if self.offset + self._chunk_size > self._ase.size:
-            chunk = self._ase.size - self.offset
-        else:
-            chunk = self._chunk_size
-        # on download, num_bytes must be offset by -1 as the x-ms-range
-        # header expects it that way. x -> y bytes means first bits of the
-        # (x+1)th byte to the last bits of the (y+1)th byte. for example,
-        # 0 -> 511 means byte 1 to byte 512
-        num_bytes = chunk - 1
-        fd_start = self.offset
-        range_start = self.offset
-        if self._ase.is_encrypted:
-            # ensure start is AES block size aligned
-            range_start = range_start - (range_start % self._AES_BLOCKSIZE) - \
-                self._AES_BLOCKSIZE
-            if range_start <= 0:
-                range_start = 0
-        range_end = self.offset + num_bytes
-        self.offset += chunk
-        if self._ase.is_encrypted and self.offset >= self._ase.size:
-            unpad = True
+        with self._meta_lock:
+            if self._offset >= self._ase.size:
+                return None
+            if self._offset + self._chunk_size > self._ase.size:
+                chunk = self._ase.size - self._offset
+            else:
+                chunk = self._chunk_size
+            # on download, num_bytes must be offset by -1 as the x-ms-range
+            # header expects it that way. x -> y bytes means first bits of the
+            # (x+1)th byte to the last bits of the (y+1)th byte. for example,
+            # 0 -> 511 means byte 1 to byte 512
+            num_bytes = chunk - 1
+            chunk_num = self._chunk_num
+            fd_start = self._offset
+            range_start = self._offset
+            if self._ase.is_encrypted:
+                # ensure start is AES block size aligned
+                range_start = range_start - \
+                    (range_start % self._AES_BLOCKSIZE) - \
+                    self._AES_BLOCKSIZE
+                if range_start <= 0:
+                    range_start = 0
+            range_end = self._offset + num_bytes
+            self._offset += chunk
+            self._chunk_num += 1
+            if self._ase.is_encrypted and self._offset >= self._ase.size:
+                unpad = True
+            else:
+                unpad = False
+            return DownloadOffsets(
+                chunk_num=chunk_num,
+                fd_start=fd_start,
+                num_bytes=chunk,
+                range_start=range_start,
+                range_end=range_end,
+                unpad=unpad,
+            )
+
+    def _postpone_integrity_check(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Postpone integrity check for chunk
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        if self.must_compute_md5:
+            with self.local_path.open('r+b') as fd:
+                fd.seek(offsets.fd_start, 0)
+                fd.write(data)
+            unchecked = UncheckedChunk(
+                data_len=len(data),
+                fd_start=offsets.fd_start,
+                file_path=self.local_path,
+                temp=False,
+            )
         else:
-            unpad = False
-        return DownloadOffsets(
-            fd_start=fd_start,
-            num_bytes=chunk,
-            range_start=range_start,
-            range_end=range_end,
-            unpad=unpad,
-        )
-
-    @property
-    def outstanding_operations(self):
+            fname = None
+            with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
+                fname = fd.name
+                fd.write(data)
+            unchecked = UncheckedChunk(
+                data_len=len(data),
+                fd_start=0,
+                file_path=pathlib.Path(fname),
+                temp=True,
+            )
         with self._meta_lock:
-            return self._outstanding_ops
+            self._unchecked_chunks[offsets.chunk_num] = unchecked
+
+    def perform_chunked_integrity_check(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Hash data against stored MD5 hasher safely
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        self_check = False
+        hasher = self.hmac or self.md5
+        # iterate from next chunk to be checked
+        while True:
+            ucc = None
+            with self._meta_lock:
+                chunk_num = self._next_integrity_chunk
+                # check if the next chunk is ready
+                if chunk_num in self._unchecked_chunks:
+                    ucc = self._unchecked_chunks.pop(chunk_num)
+                elif chunk_num != offsets.chunk_num:
+                    break
+            # prepare data for hashing
+            if ucc is None:
+                chunk = data
+                self_check = True
+            else:
+                with ucc.file_path.open('rb') as fd:
+                    fd.seek(ucc.fd_start, 0)
+                    chunk = fd.read(ucc.data_len)
+                if ucc.temp:
+                    ucc.file_path.unlink()
+            # hash data and set next integrity chunk
+            with self._hasher_lock:
+                hasher.update(chunk)
+            with self._meta_lock:
+                self._next_integrity_chunk += 1
+        # store data that hasn't been checked
+        if not self_check:
+            self._postpone_integrity_check(offsets, data)
+
+    def write_data(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Postpone integrity check for chunk
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        with self.local_path.open('r+b') as fd:
+            fd.seek(offsets.fd_start, 0)
+            fd.write(data)
+
+    def finalize_file(self):
+        # type: (DownloadDescriptor) -> Tuple[bool, str]
+        """Finalize file download
+        :param DownloadDescriptor self: this
+        :rtype: tuple
+        :return (if integrity check passed or not, message)
+        """
+        # check final file integrity
+        check = False
+        msg = None
+        if self.hmac is not None:
+            mac = self._ase.encryption_metadata.encryption_authentication.\
+                message_authentication_code
+            digest = blobxfer.util.base64_encode_as_string(self.hmac.digest())
+            if digest == mac:
+                check = True
+            msg = '{}: {}, {} {} <L..R> {}'.format(
+                self._ase.encryption_metadata.encryption_authentication.
+                algorithm,
+                'OK' if check else 'MISMATCH',
+                self._ase.name,
+                digest,
+                mac,
+            )
+        elif self.md5 is not None:
+            digest = blobxfer.util.base64_encode_as_string(self.md5.digest())
+            if digest == self._ase.md5:
+                check = True
+            msg = 'MD5: {}, {} {} <L..R> {}'.format(
+                'OK' if check else 'MISMATCH',
+                self._ase.name,
+                digest,
+                self._ase.md5,
+            )
+        else:
+            check = True
+            msg = 'MD5: SKIPPED, {} None <L..R> {}'.format(
+                self._ase.name,
+                self._ase.md5
+            )
+        # cleanup if download failed
+        if not check:
+            logger.error(msg)
+            # delete temp download file
+            self.local_path.unlink()
+            return
+        logger.debug(msg)
+
+        # TODO set file uid/gid and mode
+
+        # move temp download file to final path
+        self.local_path.rename(self.final_path)
 
     @property
-    def completed_operations(self):
+    def all_operations_completed(self):
         with self._meta_lock:
-            return self._completed_ops
+            return (self._outstanding_ops == 0 and
+                    len(self._unchecked_chunks) == 0)
 
     def dec_outstanding_operations(self):
         with self._meta_lock:
diff --git a/blobxfer/offload.py b/blobxfer/offload.py
new file mode 100644
index 0000000..80f84fe
--- /dev/null
+++ b/blobxfer/offload.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import logging
+import multiprocessing
+import threading
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+class _MultiprocessOffload(object):
+    def __init__(self, num_workers, description=None):
+        # type: (_MultiprocessOffload, int, str) -> None
+        """Ctor for Crypto Offload
+        :param _MultiprocessOffload self: this
+        :param int num_workers: number of worker processes
+        :param str description: description
+        """
+        self._task_queue = multiprocessing.Queue()
+        self._done_queue = multiprocessing.Queue()
+        self._done_cv = multiprocessing.Condition()
+        self._term_signal = multiprocessing.Value('i', 0)
+        self._procs = []
+        self._check_thread = None
+        self._initialize_processes(num_workers, description)
+
+    @property
+    def done_cv(self):
+        # type: (_MultiprocessOffload) -> multiprocessing.Condition
+        """Get Done condition variable
+        :param _MultiprocessOffload self: this
+        :rtype: multiprocessing.Condition
+        :return: cv for download done
+        """
+        return self._done_cv
+
+    @property
+    def terminated(self):
+        # type: (_MultiprocessOffload) -> bool
+        """Check if terminated
+        :param _MultiprocessOffload self: this
+        :rtype: bool
+        :return: if terminated
+        """
+        return self._term_signal.value == 1
+
+    def _initialize_processes(self, num_workers, description):
+        # type: (_MultiprocessOffload, int, str) -> None
+        """Initialize processes
+        :param _MultiprocessOffload self: this
+        :param int num_workers: number of worker processes
+        :param str description: description
+        """
+        if num_workers is None or num_workers < 1:
+            raise ValueError('invalid num_workers: {}'.format(num_workers))
+        logger.debug('initializing {}{} processes'.format(
+            num_workers, ' ' + description if not None else ''))
+        for _ in range(num_workers):
+            proc = multiprocessing.Process(target=self._worker_process)
+            proc.start()
+            self._procs.append(proc)
+
+    def finalize_processes(self):
+        # type: (_MultiprocessOffload) -> None
+        """Finalize processes
+        :param _MultiprocessOffload self: this
+        """
+        self._term_signal.value = 1
+        if self._check_thread is not None:
+            self._check_thread.join()
+        for proc in self._procs:
+            proc.join()
+
+    def pop_done_queue(self):
+        # type: (_MultiprocessOffload) -> object
+        """Get item from done queue
+        :param _MultiprocessOffload self: this
+        :rtype: object or None
+        :return: object from done queue, if exists
+        """
+        try:
+            return self._done_queue.get_nowait()
+        except queue.Empty:
+            return None
+
+    def initialize_check_thread(self, check_func):
+        # type: (_MultiprocessOffload, object) -> None
+        """Initialize the crypto done queue check thread
+        :param Downloader self: this
+        :param object check_func: check function
+        """
+        self._check_thread = threading.Thread(target=check_func)
+        self._check_thread.start()
diff --git a/blobxfer/util.py b/blobxfer/util.py
index c8885f7..eec47a9 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -32,6 +32,7 @@
 import base64
 import copy
 import dateutil
+import hashlib
 import logging
 import logging.handlers
 import mimetypes
@@ -164,6 +165,15 @@ def base64_decode_string(string):
     return base64.b64decode(string)
 
 
+def new_md5_hasher():
+    # type: (None) -> md5.MD5
+    """Create a new MD5 hasher
+    :rtype: md5.MD5
+    :return: new MD5 hasher
+    """
+    return hashlib.md5()
+
+
 def page_align_content_length(length):
     # type: (int) -> int
     """Compute page boundary alignment
diff --git a/cli/settings.py b/cli/settings.py
index 448d0a8..8e5db75 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -265,9 +265,9 @@ def create_download_specifications(config):
         elif confmode == 'block':
             mode = blobxfer.models.AzureStorageModes.Block
         elif confmode == 'file':
-            mode == blobxfer.models.AzureStorageModes.File
+            mode = blobxfer.models.AzureStorageModes.File
         elif confmode == 'page':
-            mode == blobxfer.models.AzureStorageModes.Page
+            mode = blobxfer.models.AzureStorageModes.Page
         else:
             raise ValueError('unknown mode: {}'.format(confmode))
         # load RSA private key PEM file if specified
diff --git a/tests/test_blobxfer_blob_append_operations.py b/tests/test_blobxfer_blob_append_operations.py
index b4ad982..e207057 100644
--- a/tests/test_blobxfer_blob_append_operations.py
+++ b/tests/test_blobxfer_blob_append_operations.py
@@ -4,7 +4,6 @@
 # stdlib imports
 # non-stdlib imports
 import azure.storage
-import pytest
 # local imports
 import blobxfer.models as models
 # module under test
diff --git a/tests/test_blobxfer_crypto_operations.py b/tests/test_blobxfer_crypto_operations.py
index a37be4f..88990e5 100644
--- a/tests/test_blobxfer_crypto_operations.py
+++ b/tests/test_blobxfer_crypto_operations.py
@@ -46,6 +46,42 @@ def test_rsa_encrypt_decrypt_keys():
 
 def test_pkcs7_padding():
     buf = os.urandom(32)
-    pbuf = ops.pad_pkcs7(buf)
-    buf2 = ops.unpad_pkcs7(pbuf)
+    pbuf = ops.pkcs7_pad(buf)
+    buf2 = ops.pkcs7_unpad(pbuf)
     assert buf == buf2
+
+
+def test_aes_cbc_encryption():
+    enckey = ops.aes256_generate_random_key()
+    assert len(enckey) == ops._AES256_KEYLENGTH_BYTES
+
+    # test random binary data, unaligned
+    iv = os.urandom(16)
+    plaindata = os.urandom(31)
+    encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True)
+    assert encdata != plaindata
+    decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True)
+    assert decdata == plaindata
+
+    # test random binary data aligned on boundary
+    plaindata = os.urandom(32)
+    encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True)
+    assert encdata != plaindata
+    decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True)
+    assert decdata == plaindata
+
+    # test "text" data
+    plaintext = 'attack at dawn!'
+    plaindata = plaintext.encode('utf8')
+    encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True)
+    assert encdata != plaindata
+    decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True)
+    assert decdata == plaindata
+    assert plaindata.decode('utf8') == plaintext
+
+    # test unpadded
+    plaindata = os.urandom(32)
+    encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, False)
+    assert encdata != plaindata
+    decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, False)
+    assert decdata == plaindata
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download.py
index 6e12bcc..aef5e79 100644
--- a/tests/test_blobxfer_download.py
+++ b/tests/test_blobxfer_download.py
@@ -197,27 +197,18 @@ def test_post_md5_skip_on_check():
     assert d._add_to_download_queue.call_count == 1
 
 
-def test_initialize_check_md5_downloads_thread():
+def test_check_for_downloads_from_md5():
     lpath = 'lpath'
     d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_map[lpath] = mock.MagicMock()
     d._download_set.add(pathlib.Path(lpath))
     d._md5_offload = mock.MagicMock()
     d._md5_offload.done_cv = multiprocessing.Condition()
-    d._md5_offload.get_localfile_md5_done = mock.MagicMock()
-    d._md5_offload.get_localfile_md5_done.side_effect = [None, (lpath, False)]
+    d._md5_offload.pop_done_queue.side_effect = [None, (lpath, False)]
     d._add_to_download_queue = mock.MagicMock()
 
-    d._initialize_check_md5_downloads_thread()
-    while len(d._md5_map) > 0:
-        d._md5_offload.done_cv.acquire()
-        d._md5_offload.done_cv.notify()
-        d._md5_offload.done_cv.release()
-    d._all_remote_files_processed = True
-    d._md5_offload.done_cv.acquire()
-    d._md5_offload.done_cv.notify()
-    d._md5_offload.done_cv.release()
-    d._md5_check_thread.join()
+    with pytest.raises(StopIteration):
+        d._check_for_downloads_from_md5()
 
     assert d._add_to_download_queue.call_count == 1
 
@@ -237,14 +228,15 @@ def test_initialize_and_terminate_download_threads():
         assert not thr.is_alive()
 
 
+@mock.patch('time.clock')
 @mock.patch('blobxfer.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.blob.operations.list_blobs')
 @mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
-def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
+def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-    d._initialize_check_md5_downloads_thread = mock.MagicMock()
     d._initialize_download_threads = mock.MagicMock()
-    d._md5_check_thread = mock.MagicMock()
+    patched_lfmo._check_thread = mock.MagicMock()
+    d._general_options.concurrency.crypto_processes = 0
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
@@ -270,12 +262,14 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
 
     d._check_download_conditions = mock.MagicMock()
     d._check_download_conditions.return_value = dl.DownloadAction.Skip
+    patched_tc.side_effect = [1, 2]
     d.start()
     assert d._pre_md5_skip_on_check.call_count == 0
 
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.CheckMd5
+    patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
     assert d._pre_md5_skip_on_check.call_count == 1
@@ -284,6 +278,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = dl.DownloadAction.Download
+    patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
     assert d._download_queue.qsize() == 1
diff --git a/tests/test_blobxfer_md5.py b/tests/test_blobxfer_md5.py
index 7a37072..c38e758 100644
--- a/tests/test_blobxfer_md5.py
+++ b/tests/test_blobxfer_md5.py
@@ -36,7 +36,7 @@ def test_done_cv():
         assert a.done_cv == a._done_cv
     finally:
         if a:
-            a.finalize_md5_processes()
+            a.finalize_processes()
 
 
 def test_finalize_md5_processes():
@@ -48,9 +48,9 @@ def test_finalize_md5_processes():
         a = md5.LocalFileMd5Offload(num_workers=1)
     finally:
         if a:
-            a.finalize_md5_processes()
+            a.finalize_processes()
 
-    for proc in a._md5_procs:
+    for proc in a._procs:
         assert not proc.is_alive()
 
 
@@ -63,7 +63,7 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
     a = None
     try:
         a = md5.LocalFileMd5Offload(num_workers=1)
-        result = a.get_localfile_md5_done()
+        result = a.pop_done_queue()
         assert result is None
 
         a.add_localfile_for_md5_check(
@@ -71,7 +71,7 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
         i = 33
         checked = False
         while i > 0:
-            result = a.get_localfile_md5_done()
+            result = a.pop_done_queue()
             if result is None:
                 time.sleep(0.3)
                 i -= 1
@@ -84,7 +84,7 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
         assert checked
     finally:
         if a:
-            a.finalize_md5_processes()
+            a.finalize_processes()
 
 
 def test_from_add_to_done_pagealigned(tmpdir):
@@ -96,7 +96,7 @@ def test_from_add_to_done_pagealigned(tmpdir):
     a = None
     try:
         a = md5.LocalFileMd5Offload(num_workers=1)
-        result = a.get_localfile_md5_done()
+        result = a.pop_done_queue()
         assert result is None
 
         a.add_localfile_for_md5_check(
@@ -104,7 +104,7 @@ def test_from_add_to_done_pagealigned(tmpdir):
         i = 33
         checked = False
         while i > 0:
-            result = a.get_localfile_md5_done()
+            result = a.pop_done_queue()
             if result is None:
                 time.sleep(0.3)
                 i -= 1
@@ -117,4 +117,4 @@ def test_from_add_to_done_pagealigned(tmpdir):
         assert checked
     finally:
         if a:
-            a.finalize_md5_processes()
+            a.finalize_processes()
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index f0e636d..897e28d 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -25,9 +25,9 @@ def test_concurrency_options(patched_cc):
         transfer_threads=-2,
     )
 
-    assert a.crypto_processes == 1
+    assert a.crypto_processes == 0
     assert a.md5_processes == 1
-    assert a.transfer_threads == 2
+    assert a.transfer_threads == 3
 
 
 def test_general_options():
@@ -359,12 +359,16 @@ def test_downloaddescriptor(tmpdir):
     ase = models.AzureStorageEntity('cont')
     ase._size = 1024
     ase._encryption = mock.MagicMock()
+    with pytest.raises(RuntimeError):
+        d = models.DownloadDescriptor(lp, ase, opts)
+
+    ase._encryption.symmetric_key = b'123'
     d = models.DownloadDescriptor(lp, ase, opts)
 
     assert d.entity == ase
     assert not d.must_compute_md5
     assert d._total_chunks == 64
-    assert d.offset == 0
+    assert d._offset == 0
     assert d.final_path == lp
     assert str(d.local_path) == str(lp) + '.bxtmp'
     assert d.local_path.stat().st_size == 1024 - 16
@@ -400,6 +404,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
 
     offsets = d.next_offsets()
     assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 128
     assert offsets.range_start == 0
@@ -416,6 +421,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 1
     assert offsets.range_start == 0
@@ -427,6 +433,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 256
     assert offsets.range_start == 0
@@ -438,12 +445,14 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 2
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
     offsets = d.next_offsets()
+    assert offsets.chunk_num == 1
     assert offsets.fd_start == 256
     assert offsets.num_bytes == 16
     assert offsets.range_start == 256
@@ -452,10 +461,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
     ase._size = 128
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 128
     assert offsets.range_start == 0
@@ -467,6 +478,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 256
     assert offsets.range_start == 0
@@ -478,12 +490,14 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     d = models.DownloadDescriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 2
+    assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
     assert offsets.num_bytes == 256
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
     offsets = d.next_offsets()
+    assert offsets.chunk_num == 1
     assert offsets.fd_start == 256
     assert offsets.num_bytes == 32
     assert offsets.range_start == 256 - 16

From 85895c1e3cbbea3c88244234930a265a4a986fa1 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Sun, 26 Feb 2017 20:15:43 -0800
Subject: [PATCH 12/47] Refactor download to module

- Add more UT coverage
---
 blobxfer/api.py                               |   2 +-
 blobxfer/blob/operations.py                   |   4 +-
 blobxfer/crypto/operations.py                 |   9 +-
 blobxfer/download/__init__.py                 |   0
 blobxfer/download/models.py                   | 377 ++++++++++++++++++
 .../{download.py => download/operations.py}   |  13 +-
 blobxfer/file/operations.py                   |   4 +-
 blobxfer/md5.py                               |   3 +-
 blobxfer/models.py                            | 319 ---------------
 blobxfer/offload.py                           |  18 +-
 setup.py                                      |   1 +
 tests/test_blobxfer_crypto_operations.py      |  43 +-
 tests/test_blobxfer_download_models.py        | 377 ++++++++++++++++++
 ...y => test_blobxfer_download_operations.py} |  55 +--
 tests/test_blobxfer_models.py                 | 156 --------
 tests/test_blobxfer_offload.py                |  36 ++
 16 files changed, 890 insertions(+), 527 deletions(-)
 create mode 100644 blobxfer/download/__init__.py
 create mode 100644 blobxfer/download/models.py
 rename blobxfer/{download.py => download/operations.py} (97%)
 create mode 100644 tests/test_blobxfer_download_models.py
 rename tests/{test_blobxfer_download.py => test_blobxfer_download_operations.py} (84%)
 create mode 100644 tests/test_blobxfer_offload.py

diff --git a/blobxfer/api.py b/blobxfer/api.py
index 69444ae..57fcf09 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -45,6 +45,6 @@
     create_client as create_file_client
 )
 
-from .download import (  # noqa
+from .download.operations import (  # noqa
     Downloader
 )
diff --git a/blobxfer/blob/operations.py b/blobxfer/blob/operations.py
index 4a8f0eb..27c19c7 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/blob/operations.py
@@ -108,10 +108,10 @@ def list_blobs(client, container, prefix, mode, timeout=None):
 
 def get_blob_range(ase, offsets, timeout=None):
     # type: (blobxfer.models.AzureStorageEntity,
-    #        blobxfer.models.DownloadOffsets, int) -> bytes
+    #        blobxfer.download.models.DownloadOffsets, int) -> bytes
     """Retrieve blob range
     :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
-    :param blobxfer.models.DownloadOffsets offsets: downlaod offsets
+    :param blobxfer.download.models.DownloadOffsets offsets: download offsets
     :param int timeout: timeout
     :rtype: bytes
     :return: content for blob range
diff --git a/blobxfer/crypto/operations.py b/blobxfer/crypto/operations.py
index deeb287..2534148 100644
--- a/blobxfer/crypto/operations.py
+++ b/blobxfer/crypto/operations.py
@@ -230,7 +230,8 @@ def __init__(self, num_workers):
         :param CryptoOffload self: this
         :param int num_workers: number of worker processes
         """
-        super(CryptoOffload, self).__init__(num_workers, 'Crypto')
+        super(CryptoOffload, self).__init__(
+            self._worker_process, num_workers, 'Crypto')
 
     def _worker_process(self):
         # type: (CryptoOffload) -> None
@@ -256,12 +257,12 @@ def _worker_process(self):
 
     def add_decrypt_chunk(
             self, final_path, offsets, symkey, iv, encdata):
-        # type: (CryptoOffload, str, blobxfer.models.DownloadOffsets, bytes,
-        #        bytes, bytes) -> None
+        # type: (CryptoOffload, str, blobxfer.download.models.DownloadOffsets,
+        #        bytes, bytes, bytes) -> None
         """Add a chunk to decrypt
         :param CryptoOffload self: this
         :param str final_path: final path
-        :param blobxfer.models.DownloadOffsets offsets: offsets
+        :param blobxfer.download.models.DownloadOffsets offsets: offsets
         :param bytes symkey: symmetric key
         :param bytes iv: initialization vector
         :param bytes encdata: encrypted data
diff --git a/blobxfer/download/__init__.py b/blobxfer/download/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blobxfer/download/models.py b/blobxfer/download/models.py
new file mode 100644
index 0000000..39b641d
--- /dev/null
+++ b/blobxfer/download/models.py
@@ -0,0 +1,377 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import collections
+import logging
+import math
+import os
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+import tempfile
+import threading
+# non-stdlib imports
+# local imports
+import blobxfer.blob.operations
+import blobxfer.file.operations
+import blobxfer.crypto.models
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+# named tuples
+DownloadOffsets = collections.namedtuple(
+    'DownloadOffsets', [
+        'chunk_num',
+        'fd_start',
+        'num_bytes',
+        'range_end',
+        'range_start',
+        'unpad',
+    ]
+)
+UncheckedChunk = collections.namedtuple(
+    'UncheckedChunk', [
+        'data_len',
+        'fd_start',
+        'file_path',
+        'temp',
+    ]
+)
+
+
+class DownloadDescriptor(object):
+    """Download Descriptor"""
+
+    _AES_BLOCKSIZE = blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
+
+    def __init__(self, lpath, ase, options):
+        # type: (DownloadDescriptior, pathlib.Path, AzureStorageEntity,
+        #        DownloadOptions) -> None
+        """Ctor for DownloadDescriptor
+        :param DownloadDescriptor self: this
+        :param pathlib.Path lpath: local path
+        :param AzureStorageEntity ase: Azure Storage Entity
+        :param DownloadOptions options: download options
+        """
+        self.final_path = lpath
+        # create path holding the temporary file to download to
+        _tmp = list(lpath.parts[:-1])
+        _tmp.append(lpath.name + '.bxtmp')
+        self.local_path = pathlib.Path(*_tmp)
+        self._meta_lock = threading.Lock()
+        self._hasher_lock = threading.Lock()
+        self._ase = ase
+        # calculate the total number of ops required for transfer
+        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
+        try:
+            self._total_chunks = int(
+                math.ceil(self._ase.size / self._chunk_size))
+        except ZeroDivisionError:
+            self._total_chunks = 0
+        self.hmac = None
+        self.md5 = None
+        self._offset = 0
+        self._chunk_num = 0
+        self._next_integrity_chunk = 0
+        self._unchecked_chunks = {}
+        self._outstanding_ops = self._total_chunks
+        self._completed_ops = 0
+        # initialize checkers and allocate space
+        self._initialize_integrity_checkers(options)
+        self._allocate_disk_space()
+
+    @property
+    def entity(self):
+        # type: (DownloadDescriptor) -> AzureStorageEntity
+        """Get linked AzureStorageEntity
+        :param DownloadDescriptor self: this
+        :rtype: AzureStorageEntity
+        :return: AzureStorageEntity
+        """
+        return self._ase
+
+    @property
+    def must_compute_md5(self):
+        # type: (DownloadDescriptor) -> bool
+        """Check if MD5 must be computed
+        :param DownloadDescriptor self: this
+        :rtype: bool
+        :return: if MD5 must be computed
+        """
+        return self.md5 is not None
+
+    @property
+    def all_operations_completed(self):
+        # type: (DownloadDescriptor) -> bool
+        """All operations are completed
+        :param DownloadDescriptor self: this
+        :rtype: bool
+        :return: if all operations completed
+        """
+        with self._meta_lock:
+            return (self._outstanding_ops == 0 and
+                    len(self._unchecked_chunks) == 0)
+
+    def dec_outstanding_operations(self):
+        # type: (DownloadDescriptor) -> None
+        """Decrement outstanding operations (and increment completed ops)
+        :param DownloadDescriptor self: this
+        """
+        with self._meta_lock:
+            self._outstanding_ops -= 1
+            self._completed_ops += 1
+
+    def _initialize_integrity_checkers(self, options):
+        # type: (DownloadDescriptor, DownloadOptions) -> None
+        """Initialize file integrity checkers
+        :param DownloadDescriptor self: this
+        :param DownloadOptions options: download options
+        """
+        if self._ase.is_encrypted:
+            # ensure symmetric key exists
+            if blobxfer.util.is_none_or_empty(
+                    self._ase.encryption_metadata.symmetric_key):
+                raise RuntimeError(
+                    'symmetric key is invalid: provide RSA private key '
+                    'or metadata corrupt')
+            self.hmac = self._ase.encryption_metadata.initialize_hmac()
+        if self.hmac is None and options.check_file_md5:
+            self.md5 = blobxfer.util.new_md5_hasher()
+
+    def _allocate_disk_space(self):
+        # type: (DownloadDescriptor, int) -> None
+        """Perform file allocation (possibly sparse)
+        :param DownloadDescriptor self: this
+        :param int size: size
+        """
+        size = self._ase.size
+        # compute size
+        if size > 0:
+            if self._ase.is_encrypted:
+                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
+                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                    self._AES_BLOCKSIZE
+            else:
+                allocatesize = size
+            if allocatesize < 0:
+                allocatesize = 0
+        else:
+            allocatesize = 0
+        # create parent path
+        self.local_path.parent.mkdir(mode=0o750, parents=True, exist_ok=True)
+        # allocate file
+        with self.local_path.open('wb') as fd:
+            if allocatesize > 0:
+                try:
+                    os.posix_fallocate(fd.fileno(), 0, allocatesize)
+                except AttributeError:
+                    fd.seek(allocatesize - 1)
+                    fd.write(b'\0')
+
+    def next_offsets(self):
+        # type: (DownloadDescriptor) -> DownloadOffsets
+        """Retrieve the next offsets
+        :param DownloadDescriptor self: this
+        :rtype: DownloadOffsets
+        :return: download offsets
+        """
+        with self._meta_lock:
+            if self._offset >= self._ase.size:
+                return None
+            if self._offset + self._chunk_size > self._ase.size:
+                chunk = self._ase.size - self._offset
+            else:
+                chunk = self._chunk_size
+            # on download, num_bytes must be offset by -1 as the x-ms-range
+            # header expects it that way. x -> y bytes means first bits of the
+            # (x+1)th byte to the last bits of the (y+1)th byte. for example,
+            # 0 -> 511 means byte 1 to byte 512
+            num_bytes = chunk - 1
+            chunk_num = self._chunk_num
+            fd_start = self._offset
+            range_start = self._offset
+            if self._ase.is_encrypted:
+                # ensure start is AES block size aligned
+                range_start = range_start - \
+                    (range_start % self._AES_BLOCKSIZE) - \
+                    self._AES_BLOCKSIZE
+                if range_start <= 0:
+                    range_start = 0
+            range_end = self._offset + num_bytes
+            self._offset += chunk
+            self._chunk_num += 1
+            if self._ase.is_encrypted and self._offset >= self._ase.size:
+                unpad = True
+            else:
+                unpad = False
+            return DownloadOffsets(
+                chunk_num=chunk_num,
+                fd_start=fd_start,
+                num_bytes=chunk,
+                range_start=range_start,
+                range_end=range_end,
+                unpad=unpad,
+            )
+
+    def _postpone_integrity_check(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Postpone integrity check for chunk
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        if self.must_compute_md5:
+            with self.local_path.open('r+b') as fd:
+                fd.seek(offsets.fd_start, 0)
+                fd.write(data)
+            unchecked = UncheckedChunk(
+                data_len=len(data),
+                fd_start=offsets.fd_start,
+                file_path=self.local_path,
+                temp=False,
+            )
+        else:
+            fname = None
+            with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
+                fname = fd.name
+                fd.write(data)
+            unchecked = UncheckedChunk(
+                data_len=len(data),
+                fd_start=0,
+                file_path=pathlib.Path(fname),
+                temp=True,
+            )
+        with self._meta_lock:
+            self._unchecked_chunks[offsets.chunk_num] = unchecked
+
+    def perform_chunked_integrity_check(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Hash data against stored MD5 hasher safely
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        self_check = False
+        hasher = self.hmac or self.md5
+        # iterate from next chunk to be checked
+        while True:
+            ucc = None
+            with self._meta_lock:
+                chunk_num = self._next_integrity_chunk
+                # check if the next chunk is ready
+                if chunk_num in self._unchecked_chunks:
+                    ucc = self._unchecked_chunks.pop(chunk_num)
+                elif chunk_num != offsets.chunk_num:
+                    break
+            # prepare data for hashing
+            if ucc is None:
+                chunk = data
+                self_check = True
+            else:
+                with ucc.file_path.open('rb') as fd:
+                    fd.seek(ucc.fd_start, 0)
+                    chunk = fd.read(ucc.data_len)
+                if ucc.temp:
+                    ucc.file_path.unlink()
+            # hash data and set next integrity chunk
+            with self._hasher_lock:
+                hasher.update(chunk)
+            with self._meta_lock:
+                self._next_integrity_chunk += 1
+        # store data that hasn't been checked
+        if not self_check:
+            self._postpone_integrity_check(offsets, data)
+
+    def write_data(self, offsets, data):
+        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        """Postpone integrity check for chunk
+        :param DownloadDescriptor self: this
+        :param DownloadOffsets offsets: download offsets
+        :param bytes data: data
+        """
+        with self.local_path.open('r+b') as fd:
+            fd.seek(offsets.fd_start, 0)
+            fd.write(data)
+
+    def finalize_file(self):
+        # type: (DownloadDescriptor) -> None
+        """Finalize file download
+        :param DownloadDescriptor self: this
+        """
+        # check final file integrity
+        check = False
+        msg = None
+        if self.hmac is not None:
+            mac = self._ase.encryption_metadata.encryption_authentication.\
+                message_authentication_code
+            digest = blobxfer.util.base64_encode_as_string(self.hmac.digest())
+            if digest == mac:
+                check = True
+            msg = '{}: {}, {} {} <L..R> {}'.format(
+                self._ase.encryption_metadata.encryption_authentication.
+                algorithm,
+                'OK' if check else 'MISMATCH',
+                self._ase.name,
+                digest,
+                mac,
+            )
+        elif self.md5 is not None:
+            digest = blobxfer.util.base64_encode_as_string(self.md5.digest())
+            if digest == self._ase.md5:
+                check = True
+            msg = 'MD5: {}, {} {} <L..R> {}'.format(
+                'OK' if check else 'MISMATCH',
+                self._ase.name,
+                digest,
+                self._ase.md5,
+            )
+        else:
+            check = True
+            msg = 'MD5: SKIPPED, {} None <L..R> {}'.format(
+                self._ase.name,
+                self._ase.md5
+            )
+        # cleanup if download failed
+        if not check:
+            logger.error(msg)
+            # delete temp download file
+            self.local_path.unlink()
+            return
+        logger.debug(msg)
+
+        # TODO set file uid/gid and mode
+
+        # move temp download file to final path
+        self.local_path.rename(self.final_path)
diff --git a/blobxfer/download.py b/blobxfer/download/operations.py
similarity index 97%
rename from blobxfer/download.py
rename to blobxfer/download/operations.py
index 65878bb..ac36ebe 100644
--- a/blobxfer/download.py
+++ b/blobxfer/download/operations.py
@@ -49,8 +49,8 @@
 # local imports
 import blobxfer.crypto.models
 import blobxfer.crypto.operations
+import blobxfer.download.models
 import blobxfer.md5
-import blobxfer.models
 import blobxfer.operations
 import blobxfer.blob.operations
 import blobxfer.file.operations
@@ -255,7 +255,7 @@ def _add_to_download_queue(self, lpath, rfile):
         :param blobxfer.models.AzureStorageEntity rfile: remote file
         """
         # prepare remote file for download
-        dd = blobxfer.models.DownloadDescriptor(
+        dd = blobxfer.download.models.DownloadDescriptor(
             lpath, rfile, self._spec.options)
         if dd.entity.is_encrypted:
             with self._download_lock:
@@ -362,13 +362,14 @@ def _worker_thread_download(self):
             self._complete_chunk_download(offsets, data, dd)
 
     def _complete_chunk_download(self, offsets, data, dd):
-        # type: (Downloader, blobxfer.models.DownloadOffsets, bytes,
-        #        blobxfer.models.DownloadDescriptor) -> None
+        # type: (Downloader, blobxfer.download.models.DownloadOffsets, bytes,
+        #        blobxfer.models.download.DownloadDescriptor) -> None
         """Complete chunk download
         :param Downloader self: this
-        :param blobxfer.models.DownloadOffsets offsets: offsets
+        :param blobxfer.download.models.DownloadOffsets offsets: offsets
         :param bytes data: data
-        :param blobxfer.models.DownloadDescriptor dd: download descriptor
+        :param blobxfer.models.download.DownloadDescriptor dd:
+            download descriptor
         """
         # write data to disk
         dd.write_data(offsets, data)
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index 09f7d68..ec654dd 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -152,10 +152,10 @@ def list_files(client, fileshare, prefix, timeout=None):
 
 def get_file_range(ase, offsets, timeout=None):
     # type: (blobxfer.models.AzureStorageEntity,
-    #        blobxfer.models.DownloadOffsets, int) -> bytes
+    #        blobxfer.download.models.DownloadOffsets, int) -> bytes
     """Retrieve file range
     :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
-    :param blobxfer.models.DownloadOffsets offsets: downlaod offsets
+    :param blobxfer.download.models.DownloadOffsets offsets: download offsets
     :param int timeout: timeout
     :rtype: bytes
     :return: content for file range
diff --git a/blobxfer/md5.py b/blobxfer/md5.py
index 84e85cc..1c403cc 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/md5.py
@@ -89,7 +89,8 @@ def __init__(self, num_workers):
         :param LocalFileMd5Offload self: this
         :param int num_workers: number of worker processes
         """
-        super(LocalFileMd5Offload, self).__init__(num_workers, 'MD5')
+        super(LocalFileMd5Offload, self).__init__(
+            self._worker_process, num_workers, 'MD5')
 
     def _worker_process(self):
         # type: (LocalFileMd5Offload) -> None
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 72b1291..8a91885 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -34,15 +34,12 @@
 import enum
 import fnmatch
 import logging
-import math
 import os
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
 import multiprocessing
-import tempfile
-import threading
 # non-stdlib imports
 # local imports
 from .api import (
@@ -125,24 +122,6 @@ class AzureStorageModes(enum.Enum):
         'relative_path',
     ]
 )
-DownloadOffsets = collections.namedtuple(
-    'DownloadOffsets', [
-        'chunk_num',
-        'fd_start',
-        'num_bytes',
-        'range_end',
-        'range_start',
-        'unpad',
-    ]
-)
-UncheckedChunk = collections.namedtuple(
-    'UncheckedChunk', [
-        'data_len',
-        'fd_start',
-        'file_path',
-        'temp',
-    ]
-)
 
 
 class ConcurrencyOptions(object):
@@ -815,304 +794,6 @@ def populate_from_file(self, sa, file):
         self._client = sa.file_client
 
 
-class DownloadDescriptor(object):
-    """Download Descriptor"""
-
-    _AES_BLOCKSIZE = blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
-
-    def __init__(self, lpath, ase, options):
-        # type: (DownloadDescriptior, pathlib.Path, AzureStorageEntity,
-        #        DownloadOptions) -> None
-        """Ctor for DownloadDescriptor
-        :param DownloadDescriptor self: this
-        :param pathlib.Path lpath: local path
-        :param AzureStorageEntity ase: Azure Storage Entity
-        :param DownloadOptions options: download options
-        """
-        self.final_path = lpath
-        # create path holding the temporary file to download to
-        _tmp = list(lpath.parts[:-1])
-        _tmp.append(lpath.name + '.bxtmp')
-        self.local_path = pathlib.Path(*_tmp)
-        self._meta_lock = threading.Lock()
-        self._hasher_lock = threading.Lock()
-        self._ase = ase
-        # calculate the total number of ops required for transfer
-        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
-        try:
-            self._total_chunks = int(
-                math.ceil(self._ase.size / self._chunk_size))
-        except ZeroDivisionError:
-            self._total_chunks = 0
-        self.hmac = None
-        self.md5 = None
-        self._offset = 0
-        self._chunk_num = 0
-        self._next_integrity_chunk = 0
-        self._unchecked_chunks = {}
-        self._outstanding_ops = self._total_chunks
-        self._completed_ops = 0
-        # initialize checkers and allocate space
-        self._initialize_integrity_checkers(options)
-        self._allocate_disk_space()
-
-    @property
-    def entity(self):
-        # type: (DownloadDescriptor) -> AzureStorageEntity
-        """Get linked AzureStorageEntity
-        :param DownloadDescriptor self: this
-        :rtype: AzureStorageEntity
-        :return: AzureStorageEntity
-        """
-        return self._ase
-
-    @property
-    def must_compute_md5(self):
-        # type: (DownloadDescriptor) -> bool
-        """Check if MD5 must be computed
-        :param DownloadDescriptor self: this
-        :rtype: bool
-        :return: if MD5 must be computed
-        """
-        return self.md5 is not None
-
-    def _initialize_integrity_checkers(self, options):
-        # type: (DownloadDescriptor, DownloadOptions) -> None
-        """Initialize file integrity checkers
-        :param DownloadDescriptor self: this
-        :param DownloadOptions options: download options
-        """
-        if self._ase.is_encrypted:
-            # ensure symmetric key exists
-            if blobxfer.util.is_none_or_empty(
-                    self._ase.encryption_metadata.symmetric_key):
-                raise RuntimeError(
-                    'symmetric key is invalid: provide RSA private key '
-                    'or metadata corrupt')
-            self.hmac = self._ase.encryption_metadata.initialize_hmac()
-        if self.hmac is None and options.check_file_md5:
-            self.md5 = blobxfer.util.new_md5_hasher()
-
-    def _allocate_disk_space(self):
-        # type: (DownloadDescriptor, int) -> None
-        """Perform file allocation (possibly sparse)
-        :param DownloadDescriptor self: this
-        :param int size: size
-        """
-        size = self._ase.size
-        # compute size
-        if size > 0:
-            if self._ase.is_encrypted:
-                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
-                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
-                    self._AES_BLOCKSIZE
-            else:
-                allocatesize = size
-            if allocatesize < 0:
-                allocatesize = 0
-        else:
-            allocatesize = 0
-        # create parent path
-        self.local_path.parent.mkdir(mode=0o750, parents=True, exist_ok=True)
-        # allocate file
-        with self.local_path.open('wb') as fd:
-            if allocatesize > 0:
-                try:
-                    os.posix_fallocate(fd.fileno(), 0, allocatesize)
-                except AttributeError:
-                    fd.seek(allocatesize - 1)
-                    fd.write(b'\0')
-
-    def next_offsets(self):
-        # type: (DownloadDescriptor) -> DownloadOffsets
-        """Retrieve the next offsets
-        :param DownloadDescriptor self: this
-        :rtype: DownloadOffsets
-        :return: download offsets
-        """
-        with self._meta_lock:
-            if self._offset >= self._ase.size:
-                return None
-            if self._offset + self._chunk_size > self._ase.size:
-                chunk = self._ase.size - self._offset
-            else:
-                chunk = self._chunk_size
-            # on download, num_bytes must be offset by -1 as the x-ms-range
-            # header expects it that way. x -> y bytes means first bits of the
-            # (x+1)th byte to the last bits of the (y+1)th byte. for example,
-            # 0 -> 511 means byte 1 to byte 512
-            num_bytes = chunk - 1
-            chunk_num = self._chunk_num
-            fd_start = self._offset
-            range_start = self._offset
-            if self._ase.is_encrypted:
-                # ensure start is AES block size aligned
-                range_start = range_start - \
-                    (range_start % self._AES_BLOCKSIZE) - \
-                    self._AES_BLOCKSIZE
-                if range_start <= 0:
-                    range_start = 0
-            range_end = self._offset + num_bytes
-            self._offset += chunk
-            self._chunk_num += 1
-            if self._ase.is_encrypted and self._offset >= self._ase.size:
-                unpad = True
-            else:
-                unpad = False
-            return DownloadOffsets(
-                chunk_num=chunk_num,
-                fd_start=fd_start,
-                num_bytes=chunk,
-                range_start=range_start,
-                range_end=range_end,
-                unpad=unpad,
-            )
-
-    def _postpone_integrity_check(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
-        """Postpone integrity check for chunk
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
-        :param bytes data: data
-        """
-        if self.must_compute_md5:
-            with self.local_path.open('r+b') as fd:
-                fd.seek(offsets.fd_start, 0)
-                fd.write(data)
-            unchecked = UncheckedChunk(
-                data_len=len(data),
-                fd_start=offsets.fd_start,
-                file_path=self.local_path,
-                temp=False,
-            )
-        else:
-            fname = None
-            with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
-                fname = fd.name
-                fd.write(data)
-            unchecked = UncheckedChunk(
-                data_len=len(data),
-                fd_start=0,
-                file_path=pathlib.Path(fname),
-                temp=True,
-            )
-        with self._meta_lock:
-            self._unchecked_chunks[offsets.chunk_num] = unchecked
-
-    def perform_chunked_integrity_check(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
-        """Hash data against stored MD5 hasher safely
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
-        :param bytes data: data
-        """
-        self_check = False
-        hasher = self.hmac or self.md5
-        # iterate from next chunk to be checked
-        while True:
-            ucc = None
-            with self._meta_lock:
-                chunk_num = self._next_integrity_chunk
-                # check if the next chunk is ready
-                if chunk_num in self._unchecked_chunks:
-                    ucc = self._unchecked_chunks.pop(chunk_num)
-                elif chunk_num != offsets.chunk_num:
-                    break
-            # prepare data for hashing
-            if ucc is None:
-                chunk = data
-                self_check = True
-            else:
-                with ucc.file_path.open('rb') as fd:
-                    fd.seek(ucc.fd_start, 0)
-                    chunk = fd.read(ucc.data_len)
-                if ucc.temp:
-                    ucc.file_path.unlink()
-            # hash data and set next integrity chunk
-            with self._hasher_lock:
-                hasher.update(chunk)
-            with self._meta_lock:
-                self._next_integrity_chunk += 1
-        # store data that hasn't been checked
-        if not self_check:
-            self._postpone_integrity_check(offsets, data)
-
-    def write_data(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
-        """Postpone integrity check for chunk
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
-        :param bytes data: data
-        """
-        with self.local_path.open('r+b') as fd:
-            fd.seek(offsets.fd_start, 0)
-            fd.write(data)
-
-    def finalize_file(self):
-        # type: (DownloadDescriptor) -> Tuple[bool, str]
-        """Finalize file download
-        :param DownloadDescriptor self: this
-        :rtype: tuple
-        :return (if integrity check passed or not, message)
-        """
-        # check final file integrity
-        check = False
-        msg = None
-        if self.hmac is not None:
-            mac = self._ase.encryption_metadata.encryption_authentication.\
-                message_authentication_code
-            digest = blobxfer.util.base64_encode_as_string(self.hmac.digest())
-            if digest == mac:
-                check = True
-            msg = '{}: {}, {} {} <L..R> {}'.format(
-                self._ase.encryption_metadata.encryption_authentication.
-                algorithm,
-                'OK' if check else 'MISMATCH',
-                self._ase.name,
-                digest,
-                mac,
-            )
-        elif self.md5 is not None:
-            digest = blobxfer.util.base64_encode_as_string(self.md5.digest())
-            if digest == self._ase.md5:
-                check = True
-            msg = 'MD5: {}, {} {} <L..R> {}'.format(
-                'OK' if check else 'MISMATCH',
-                self._ase.name,
-                digest,
-                self._ase.md5,
-            )
-        else:
-            check = True
-            msg = 'MD5: SKIPPED, {} None <L..R> {}'.format(
-                self._ase.name,
-                self._ase.md5
-            )
-        # cleanup if download failed
-        if not check:
-            logger.error(msg)
-            # delete temp download file
-            self.local_path.unlink()
-            return
-        logger.debug(msg)
-
-        # TODO set file uid/gid and mode
-
-        # move temp download file to final path
-        self.local_path.rename(self.final_path)
-
-    @property
-    def all_operations_completed(self):
-        with self._meta_lock:
-            return (self._outstanding_ops == 0 and
-                    len(self._unchecked_chunks) == 0)
-
-    def dec_outstanding_operations(self):
-        with self._meta_lock:
-            self._outstanding_ops -= 1
-            self._completed_ops += 1
-
-
 class AzureDestinationPaths(object):
     def __init__(self):
         pass
diff --git a/blobxfer/offload.py b/blobxfer/offload.py
index 80f84fe..ca2cc85 100644
--- a/blobxfer/offload.py
+++ b/blobxfer/offload.py
@@ -43,10 +43,11 @@
 
 
 class _MultiprocessOffload(object):
-    def __init__(self, num_workers, description=None):
-        # type: (_MultiprocessOffload, int, str) -> None
+    def __init__(self, target, num_workers, description=None):
+        # type: (_MultiprocessOffload, function, int, str) -> None
         """Ctor for Crypto Offload
         :param _MultiprocessOffload self: this
+        :param function target: target function for process
         :param int num_workers: number of worker processes
         :param str description: description
         """
@@ -56,7 +57,7 @@ def __init__(self, num_workers, description=None):
         self._term_signal = multiprocessing.Value('i', 0)
         self._procs = []
         self._check_thread = None
-        self._initialize_processes(num_workers, description)
+        self._initialize_processes(target, num_workers, description)
 
     @property
     def done_cv(self):
@@ -78,10 +79,11 @@ def terminated(self):
         """
         return self._term_signal.value == 1
 
-    def _initialize_processes(self, num_workers, description):
-        # type: (_MultiprocessOffload, int, str) -> None
+    def _initialize_processes(self, target, num_workers, description):
+        # type: (_MultiprocessOffload, function, int, str) -> None
         """Initialize processes
         :param _MultiprocessOffload self: this
+        :param function target: target function for process
         :param int num_workers: number of worker processes
         :param str description: description
         """
@@ -90,7 +92,7 @@ def _initialize_processes(self, num_workers, description):
         logger.debug('initializing {}{} processes'.format(
             num_workers, ' ' + description if not None else ''))
         for _ in range(num_workers):
-            proc = multiprocessing.Process(target=self._worker_process)
+            proc = multiprocessing.Process(target=target)
             proc.start()
             self._procs.append(proc)
 
@@ -118,10 +120,10 @@ def pop_done_queue(self):
             return None
 
     def initialize_check_thread(self, check_func):
-        # type: (_MultiprocessOffload, object) -> None
+        # type: (_MultiprocessOffload, function) -> None
         """Initialize the crypto done queue check thread
         :param Downloader self: this
-        :param object check_func: check function
+        :param function check_func: check function
         """
         self._check_thread = threading.Thread(target=check_func)
         self._check_thread.start()
diff --git a/setup.py b/setup.py
index 11ba002..729dcc9 100644
--- a/setup.py
+++ b/setup.py
@@ -36,6 +36,7 @@
     'blobxfer.blob.block',
     'blobxfer.blob.page',
     'blobxfer.crypto',
+    'blobxfer.download',
     'blobxfer.file',
     'blobxfer_cli',
 ]
diff --git a/tests/test_blobxfer_crypto_operations.py b/tests/test_blobxfer_crypto_operations.py
index 88990e5..84d633a 100644
--- a/tests/test_blobxfer_crypto_operations.py
+++ b/tests/test_blobxfer_crypto_operations.py
@@ -2,11 +2,13 @@
 """Tests for crypto operations"""
 
 # stdlib imports
-from mock import patch
+import mock
 import os
+import time
 # non-stdlib imports
 import cryptography.hazmat.primitives.asymmetric.rsa
 # local imports
+import blobxfer.download.models
 # module under test
 import blobxfer.crypto.operations as ops
 
@@ -16,7 +18,8 @@
         backend=cryptography.hazmat.backends.default_backend())
 
 
-@patch('cryptography.hazmat.primitives.serialization.load_pem_private_key')
+@mock.patch(
+    'cryptography.hazmat.primitives.serialization.load_pem_private_key')
 def test_load_rsa_private_key_file(patched_load, tmpdir):
     keyfile = tmpdir.join('keyfile')
     keyfile.write('a')
@@ -26,7 +29,7 @@ def test_load_rsa_private_key_file(patched_load, tmpdir):
     assert rv == _RSAKEY
 
 
-@patch('cryptography.hazmat.primitives.serialization.load_pem_public_key')
+@mock.patch('cryptography.hazmat.primitives.serialization.load_pem_public_key')
 def test_load_rsa_public_key_file(patched_load, tmpdir):
     keyfile = tmpdir.join('keyfile')
     keyfile.write('b')
@@ -85,3 +88,37 @@ def test_aes_cbc_encryption():
     assert encdata != plaindata
     decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, False)
     assert decdata == plaindata
+
+
+def test_cryptooffload_decrypt():
+    a = None
+    try:
+        a = ops.CryptoOffload(1)
+        offsets = blobxfer.download.models.DownloadOffsets(
+            chunk_num=0,
+            fd_start=1,
+            num_bytes=2,
+            range_end=3,
+            range_start=4,
+            unpad=False,
+        )
+        a.add_decrypt_chunk(
+            'fp', offsets, ops.aes256_generate_random_key(), os.urandom(16),
+            os.urandom(16))
+        i = 33
+        checked = False
+        while i > 0:
+            result = a.pop_done_queue()
+            if result is None:
+                time.sleep(0.3)
+                i -= 1
+                continue
+            assert len(result) == 3
+            assert result[0] == 'fp'
+            assert result[1] == offsets
+            checked = True
+            break
+        assert checked
+    finally:
+        if a is not None:
+            a.finalize_processes()
diff --git a/tests/test_blobxfer_download_models.py b/tests/test_blobxfer_download_models.py
new file mode 100644
index 0000000..530e4ac
--- /dev/null
+++ b/tests/test_blobxfer_download_models.py
@@ -0,0 +1,377 @@
+# coding=utf-8
+"""Tests for download models"""
+
+# stdlib imports
+import hashlib
+import hmac
+import mock
+import os
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+import pytest
+# local imports
+import blobxfer.models
+import blobxfer.util as util
+# module under test
+import blobxfer.download.models as models
+
+
+def test_downloaddescriptor(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 1024
+    ase._encryption = mock.MagicMock()
+    with pytest.raises(RuntimeError):
+        d = models.DownloadDescriptor(lp, ase, opts)
+
+    ase._encryption.symmetric_key = b'123'
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    assert d.entity == ase
+    assert not d.must_compute_md5
+    assert d._total_chunks == 64
+    assert d._offset == 0
+    assert d.final_path == lp
+    assert str(d.local_path) == str(lp) + '.bxtmp'
+    assert d.local_path.stat().st_size == 1024 - 16
+
+    d.local_path.unlink()
+    ase._size = 1
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 1
+    assert d.local_path.stat().st_size == 0
+
+    d.local_path.unlink()
+    ase._encryption = None
+    ase._size = 1024
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d.local_path.stat().st_size == 1024
+
+    # pre-existing file check
+    ase._size = 0
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 0
+    assert d.local_path.stat().st_size == 0
+
+
+def test_downloaddescriptor_next_offsets(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 256
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 128
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 128
+    assert offsets.range_start == 0
+    assert offsets.range_end == 127
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 0
+    d = models.DownloadDescriptor(lp, ase, opts)
+    assert d._total_chunks == 0
+    assert d.next_offsets() is None
+
+    ase._size = 1
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 1
+    assert offsets.range_start == 0
+    assert offsets.range_end == 0
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 256
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256 + 16
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 2
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 256
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    offsets = d.next_offsets()
+    assert offsets.chunk_num == 1
+    assert offsets.fd_start == 256
+    assert offsets.num_bytes == 16
+    assert offsets.range_start == 256
+    assert offsets.range_end == 256 + 15
+    assert not offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
+    ase._size = 128
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 128
+    assert offsets.range_start == 0
+    assert offsets.range_end == 127
+    assert offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 1
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 256
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert offsets.unpad
+    assert d.next_offsets() is None
+
+    ase._size = 256 + 32  # 16 bytes over + padding
+    d = models.DownloadDescriptor(lp, ase, opts)
+    offsets = d.next_offsets()
+    assert d._total_chunks == 2
+    assert offsets.chunk_num == 0
+    assert offsets.fd_start == 0
+    assert offsets.num_bytes == 256
+    assert offsets.range_start == 0
+    assert offsets.range_end == 255
+    assert not offsets.unpad
+    offsets = d.next_offsets()
+    assert offsets.chunk_num == 1
+    assert offsets.fd_start == 256
+    assert offsets.num_bytes == 32
+    assert offsets.range_start == 256 - 16
+    assert offsets.range_end == 256 + 31
+    assert offsets.unpad
+    assert d.next_offsets() is None
+
+
+def test_postpone_integrity_check(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 32
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    d._postpone_integrity_check(offsets, b'0' * ase._size)
+
+    assert offsets.chunk_num in d._unchecked_chunks
+    ucc = d._unchecked_chunks[offsets.chunk_num]
+    assert ucc.data_len == ase._size
+    assert ucc.fd_start == offsets.fd_start
+    assert ucc.file_path == d.local_path
+    assert not ucc.temp
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 32
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    d._postpone_integrity_check(offsets, b'0' * ase._size)
+
+    assert offsets.chunk_num in d._unchecked_chunks
+    ucc = d._unchecked_chunks[offsets.chunk_num]
+    assert ucc.data_len == ase._size
+    assert ucc.fd_start == offsets.fd_start
+    assert ucc.file_path != d.local_path
+    assert ucc.temp
+
+
+def test_perform_chunked_integrity_check(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    data = b'0' * opts.chunk_size_bytes
+    d._postpone_integrity_check(offsets, data)
+    d.perform_chunked_integrity_check(offsets, data)
+
+    assert d._next_integrity_chunk == 1
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    data = b'0' * opts.chunk_size_bytes
+    offsets1 = d.next_offsets()
+    d._postpone_integrity_check(offsets1, data)
+    ucc = d._unchecked_chunks[offsets1.chunk_num]
+    d.perform_chunked_integrity_check(offsets, data)
+
+    assert d._next_integrity_chunk == 2
+    assert not ucc.file_path.exists()
+    assert not ucc.file_path.exists()
+
+
+def test_write_data(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    data = b'0' * ase._size
+    d.write_data(offsets, data)
+
+    assert d.local_path.exists()
+    assert d.local_path.stat().st_size == len(data)
+
+
+def test_finalize_file(tmpdir):
+    # hmac check success
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
+    signkey = os.urandom(32)
+    ase._encryption.initialize_hmac = mock.MagicMock()
+    ase._encryption.initialize_hmac.return_value = hmac.new(
+        signkey, digestmod=hashlib.sha256)
+
+    data = b'0' * (ase._size - 16)
+    _hmac = hmac.new(signkey, digestmod=hashlib.sha256)
+    _hmac.update(data)
+    ase._encryption.encryption_authentication.\
+        message_authentication_code = util.base64_encode_as_string(
+            _hmac.digest())
+
+    d = models.DownloadDescriptor(lp, ase, opts)
+    d.hmac.update(data)
+    d.finalize_file()
+
+    assert not d.local_path.exists()
+    assert d.final_path.exists()
+    assert d.final_path.stat().st_size == len(data)
+
+    # md5 check success
+    lp = pathlib.Path(str(tmpdir.join('b')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+
+    data = b'0' * ase._size
+    md5 = util.new_md5_hasher()
+    md5.update(data)
+    ase._md5 = util.base64_encode_as_string(md5.digest())
+
+    d = models.DownloadDescriptor(lp, ase, opts)
+    d.md5.update(data)
+    d.finalize_file()
+
+    assert not d.local_path.exists()
+    assert d.final_path.exists()
+    assert d.final_path.stat().st_size == len(data)
+
+    # no check
+    lp = pathlib.Path(str(tmpdir.join('c')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+
+    data = b'0' * ase._size
+
+    d = models.DownloadDescriptor(lp, ase, opts)
+    d.finalize_file()
+
+    assert not d.local_path.exists()
+    assert d.final_path.exists()
+    assert d.final_path.stat().st_size == len(data)
+
+    # md5 mismatch
+    lp = pathlib.Path(str(tmpdir.join('d')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+
+    data = b'0' * ase._size
+    ase._md5 = 'oops'
+
+    d = models.DownloadDescriptor(lp, ase, opts)
+    d.md5.update(data)
+    d.finalize_file()
+
+    assert not d.local_path.exists()
+    assert not d.final_path.exists()
+
+
+def test_operations(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 32
+
+    d = models.DownloadDescriptor(lp, ase, opts)
+    d._outstanding_ops = 1
+    d._unchecked_chunks = {0: None}
+    assert not d.all_operations_completed
+
+    d.dec_outstanding_operations()
+    assert d._completed_ops == 1
+    assert not d.all_operations_completed
+
+    d._unchecked_chunks.pop(0)
+    assert d.all_operations_completed
diff --git a/tests/test_blobxfer_download.py b/tests/test_blobxfer_download_operations.py
similarity index 84%
rename from tests/test_blobxfer_download.py
rename to tests/test_blobxfer_download_operations.py
index aef5e79..0aebc75 100644
--- a/tests/test_blobxfer_download.py
+++ b/tests/test_blobxfer_download_operations.py
@@ -1,5 +1,5 @@
 # coding=utf-8
-"""Tests for download"""
+"""Tests for download operations"""
 
 # stdlib imports
 import datetime
@@ -17,7 +17,7 @@
 import blobxfer.models as models
 import blobxfer.util as util
 # module under test
-import blobxfer.download as dl
+import blobxfer.download.operations as ops
 
 
 def test_check_download_conditions(tmpdir):
@@ -44,11 +44,11 @@ def test_check_download_conditions(tmpdir):
         ),
         local_destination_path=models.LocalDestinationPath('dest'),
     )
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     result = d._check_download_conditions(nep, mock.MagicMock())
-    assert result == dl.DownloadAction.Download
+    assert result == ops.DownloadAction.Download
     result = d._check_download_conditions(ep, mock.MagicMock())
-    assert result == dl.DownloadAction.Skip
+    assert result == ops.DownloadAction.Skip
 
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
@@ -68,9 +68,9 @@ def test_check_download_conditions(tmpdir):
         ),
         local_destination_path=models.LocalDestinationPath('dest'),
     )
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     result = d._check_download_conditions(ep, mock.MagicMock())
-    assert result == dl.DownloadAction.CheckMd5
+    assert result == ops.DownloadAction.CheckMd5
 
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
@@ -90,9 +90,9 @@ def test_check_download_conditions(tmpdir):
         ),
         local_destination_path=models.LocalDestinationPath('dest'),
     )
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     result = d._check_download_conditions(ep, mock.MagicMock())
-    assert result == dl.DownloadAction.Download
+    assert result == ops.DownloadAction.Download
 
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
@@ -112,17 +112,17 @@ def test_check_download_conditions(tmpdir):
         ),
         local_destination_path=models.LocalDestinationPath('dest'),
     )
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     rfile = models.AzureStorageEntity('cont')
     rfile._size = util.page_align_content_length(ep.stat().st_size)
     rfile._mode = models.AzureStorageModes.Page
     result = d._check_download_conditions(ep, rfile)
-    assert result == dl.DownloadAction.Skip
+    assert result == ops.DownloadAction.Skip
 
     rfile._size = ep.stat().st_size
     rfile._mode = models.AzureStorageModes.Page
     result = d._check_download_conditions(ep, rfile)
-    assert result == dl.DownloadAction.Download
+    assert result == ops.DownloadAction.Download
 
     ds = models.DownloadSpecification(
         download_options=models.DownloadOptions(
@@ -142,21 +142,21 @@ def test_check_download_conditions(tmpdir):
         ),
         local_destination_path=models.LocalDestinationPath('dest'),
     )
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     rfile = models.AzureStorageEntity('cont')
     rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) + \
         datetime.timedelta(days=1)
     result = d._check_download_conditions(ep, rfile)
-    assert result == dl.DownloadAction.Download
+    assert result == ops.DownloadAction.Download
 
     rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) - \
         datetime.timedelta(days=1)
     result = d._check_download_conditions(ep, rfile)
-    assert result == dl.DownloadAction.Skip
+    assert result == ops.DownloadAction.Skip
 
 
 def test_pre_md5_skip_on_check():
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_offload = mock.MagicMock()
 
     rfile = models.AzureStorageEntity('cont')
@@ -177,7 +177,7 @@ def test_pre_md5_skip_on_check():
 
 
 def test_post_md5_skip_on_check():
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_offload = mock.MagicMock()
 
     lpath = 'lpath'
@@ -199,7 +199,7 @@ def test_post_md5_skip_on_check():
 
 def test_check_for_downloads_from_md5():
     lpath = 'lpath'
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_map[lpath] = mock.MagicMock()
     d._download_set.add(pathlib.Path(lpath))
     d._md5_offload = mock.MagicMock()
@@ -209,14 +209,19 @@ def test_check_for_downloads_from_md5():
 
     with pytest.raises(StopIteration):
         d._check_for_downloads_from_md5()
-
     assert d._add_to_download_queue.call_count == 1
 
+    d._add_to_download_queue = mock.MagicMock()
+    d._all_remote_files_processed = False
+    d._download_terminate = True
+    d._check_for_downloads_from_md5()
+    assert d._add_to_download_queue.call_count == 0
+
 
 def test_initialize_and_terminate_download_threads():
     opts = mock.MagicMock()
     opts.concurrency.transfer_threads = 2
-    d = dl.Downloader(opts, mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(opts, mock.MagicMock(), mock.MagicMock())
     d._worker_thread_download = mock.MagicMock()
 
     d._initialize_download_threads()
@@ -233,7 +238,7 @@ def test_initialize_and_terminate_download_threads():
 @mock.patch('blobxfer.blob.operations.list_blobs')
 @mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
 def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._initialize_download_threads = mock.MagicMock()
     patched_lfmo._check_thread = mock.MagicMock()
     d._general_options.concurrency.crypto_processes = 0
@@ -261,14 +266,14 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d._pre_md5_skip_on_check = mock.MagicMock()
 
     d._check_download_conditions = mock.MagicMock()
-    d._check_download_conditions.return_value = dl.DownloadAction.Skip
+    d._check_download_conditions.return_value = ops.DownloadAction.Skip
     patched_tc.side_effect = [1, 2]
     d.start()
     assert d._pre_md5_skip_on_check.call_count == 0
 
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
-    d._check_download_conditions.return_value = dl.DownloadAction.CheckMd5
+    d._check_download_conditions.return_value = ops.DownloadAction.CheckMd5
     patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
@@ -277,7 +282,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     b.properties.content_length = 0
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
-    d._check_download_conditions.return_value = dl.DownloadAction.Download
+    d._check_download_conditions.return_value = ops.DownloadAction.Download
     patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
@@ -285,7 +290,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
 
 
 def test_start_keyboard_interrupt():
-    d = dl.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
     d._wait_for_download_threads = mock.MagicMock()
     d._md5_offload = mock.MagicMock()
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 897e28d..2ad07cf 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -348,159 +348,3 @@ def test_azurestorageentity():
     ase.populate_from_file(mock.MagicMock(), blob)
     assert ase.mode == models.AzureStorageModes.File
     assert ase.snapshot is None
-
-
-def test_downloaddescriptor(tmpdir):
-    lp = pathlib.Path(str(tmpdir.join('a')))
-
-    opts = mock.MagicMock()
-    opts.check_file_md5 = True
-    opts.chunk_size_bytes = 16
-    ase = models.AzureStorageEntity('cont')
-    ase._size = 1024
-    ase._encryption = mock.MagicMock()
-    with pytest.raises(RuntimeError):
-        d = models.DownloadDescriptor(lp, ase, opts)
-
-    ase._encryption.symmetric_key = b'123'
-    d = models.DownloadDescriptor(lp, ase, opts)
-
-    assert d.entity == ase
-    assert not d.must_compute_md5
-    assert d._total_chunks == 64
-    assert d._offset == 0
-    assert d.final_path == lp
-    assert str(d.local_path) == str(lp) + '.bxtmp'
-    assert d.local_path.stat().st_size == 1024 - 16
-
-    d.local_path.unlink()
-    ase._size = 1
-    d = models.DownloadDescriptor(lp, ase, opts)
-    assert d._total_chunks == 1
-    assert d.local_path.stat().st_size == 0
-
-    d.local_path.unlink()
-    ase._encryption = None
-    ase._size = 1024
-    d = models.DownloadDescriptor(lp, ase, opts)
-    assert d.local_path.stat().st_size == 1024
-
-    # pre-existing file check
-    ase._size = 0
-    d = models.DownloadDescriptor(lp, ase, opts)
-    assert d._total_chunks == 0
-    assert d.local_path.stat().st_size == 0
-
-
-def test_downloaddescriptor_next_offsets(tmpdir):
-    lp = pathlib.Path(str(tmpdir.join('a')))
-
-    opts = mock.MagicMock()
-    opts.check_file_md5 = True
-    opts.chunk_size_bytes = 256
-    ase = models.AzureStorageEntity('cont')
-    ase._size = 128
-    d = models.DownloadDescriptor(lp, ase, opts)
-
-    offsets = d.next_offsets()
-    assert d._total_chunks == 1
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 128
-    assert offsets.range_start == 0
-    assert offsets.range_end == 127
-    assert not offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._size = 0
-    d = models.DownloadDescriptor(lp, ase, opts)
-    assert d._total_chunks == 0
-    assert d.next_offsets() is None
-
-    ase._size = 1
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 1
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 1
-    assert offsets.range_start == 0
-    assert offsets.range_end == 0
-    assert not offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._size = 256
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 1
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 256
-    assert offsets.range_start == 0
-    assert offsets.range_end == 255
-    assert not offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._size = 256 + 16
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 2
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 256
-    assert offsets.range_start == 0
-    assert offsets.range_end == 255
-    assert not offsets.unpad
-    offsets = d.next_offsets()
-    assert offsets.chunk_num == 1
-    assert offsets.fd_start == 256
-    assert offsets.num_bytes == 16
-    assert offsets.range_start == 256
-    assert offsets.range_end == 256 + 15
-    assert not offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._encryption = mock.MagicMock()
-    ase._encryption.symmetric_key = b'123'
-    ase._size = 128
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 1
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 128
-    assert offsets.range_start == 0
-    assert offsets.range_end == 127
-    assert offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._size = 256
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 1
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 256
-    assert offsets.range_start == 0
-    assert offsets.range_end == 255
-    assert offsets.unpad
-    assert d.next_offsets() is None
-
-    ase._size = 256 + 32  # 16 bytes over + padding
-    d = models.DownloadDescriptor(lp, ase, opts)
-    offsets = d.next_offsets()
-    assert d._total_chunks == 2
-    assert offsets.chunk_num == 0
-    assert offsets.fd_start == 0
-    assert offsets.num_bytes == 256
-    assert offsets.range_start == 0
-    assert offsets.range_end == 255
-    assert not offsets.unpad
-    offsets = d.next_offsets()
-    assert offsets.chunk_num == 1
-    assert offsets.fd_start == 256
-    assert offsets.num_bytes == 32
-    assert offsets.range_start == 256 - 16
-    assert offsets.range_end == 256 + 31
-    assert offsets.unpad
-    assert d.next_offsets() is None
diff --git a/tests/test_blobxfer_offload.py b/tests/test_blobxfer_offload.py
new file mode 100644
index 0000000..71cc97d
--- /dev/null
+++ b/tests/test_blobxfer_offload.py
@@ -0,0 +1,36 @@
+# coding=utf-8
+"""Tests for offload"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+import pytest
+# local imports
+# module under test
+import blobxfer.offload as offload
+
+
+def test_multiprocess_offload():
+    with pytest.raises(ValueError):
+        a = offload._MultiprocessOffload(None, None)
+
+    target = mock.MagicMock()
+    a = offload._MultiprocessOffload(target, 1, 'test')
+    assert len(a._procs) == 1
+    assert not a.terminated
+    assert a._done_cv == a.done_cv
+    assert a._check_thread is None
+    assert a.pop_done_queue() is None
+
+    item = (0, 'abc')
+    a._done_queue.put(item)
+
+    check_func = mock.MagicMock()
+    a.initialize_check_thread(check_func)
+
+    a.finalize_processes()
+    assert a.terminated
+    for proc in a._procs:
+        assert not proc.is_alive()
+
+    assert a.pop_done_queue() == item

From 8204d335aca804fa7e98819b8e4e8a66ab2ecf76 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Mon, 27 Feb 2017 13:32:56 -0800
Subject: [PATCH 13/47] More coverage for download operations

- Move some class instance vars to properties for mocking
- Simplify termination conditions to properties
---
 blobxfer/download/operations.py            |  68 +++---
 tests/test_blobxfer_download_operations.py | 254 ++++++++++++++++++++-
 2 files changed, 285 insertions(+), 37 deletions(-)

diff --git a/blobxfer/download/operations.py b/blobxfer/download/operations.py
index ac36ebe..9679ca6 100644
--- a/blobxfer/download/operations.py
+++ b/blobxfer/download/operations.py
@@ -96,6 +96,34 @@ def __init__(self, general_options, creds, spec):
         self._creds = creds
         self._spec = spec
 
+    @property
+    def termination_check(self):
+        # type: (Downloader) -> bool
+        """Check if terminated
+        :param Downloader self: this
+        :rtype: bool
+        :return: if terminated
+        """
+        with self._download_lock:
+            return (self._download_terminate or
+                    (self._all_remote_files_processed and
+                     len(self._download_set) == 0))
+
+    @property
+    def termination_check_md5(self):
+        # type: (Downloader) -> bool
+        """Check if terminated from MD5 context
+        :param Downloader self: this
+        :rtype: bool
+        :return: if terminated from MD5 context
+        """
+        with self._md5_meta_lock:
+            with self._download_lock:
+                return (self._download_terminate or
+                        (self._all_remote_files_processed and
+                         len(self._md5_map) == 0 and
+                         len(self._download_set) == 0))
+
     def _check_download_conditions(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
         #        blobxfer.models.AzureStorageEntity) -> DownloadAction
@@ -188,26 +216,17 @@ def _check_for_downloads_from_md5(self):
         :param Downloader self: this
         """
         cv = self._md5_offload.done_cv
-        while True:
-            with self._md5_meta_lock:
-                if (self._download_terminate or
-                        (self._all_remote_files_processed and
-                         len(self._md5_map) == 0 and
-                         len(self._download_set) == 0)):
-                    break
+        while not self.termination_check_md5:
             result = None
             cv.acquire()
-            while not self._download_terminate:
+            while True:
                 result = self._md5_offload.pop_done_queue()
                 if result is None:
                     # use cv timeout due to possible non-wake while running
                     cv.wait(1)
                     # check for terminating conditions
-                    with self._md5_meta_lock:
-                        if (self._all_remote_files_processed and
-                                len(self._md5_map) == 0 and
-                                len(self._download_set) == 0):
-                            break
+                    if self.termination_check_md5:
+                        break
                 else:
                     break
             cv.release()
@@ -220,24 +239,17 @@ def _check_for_crypto_done(self):
         :param Downloader self: this
         """
         cv = self._crypto_offload.done_cv
-        while True:
-            with self._download_lock:
-                if (self._download_terminate or
-                        (self._all_remote_files_processed and
-                         len(self._download_set) == 0)):
-                    break
+        while not self.termination_check:
             result = None
             cv.acquire()
-            while not self._download_terminate:
+            while True:
                 result = self._crypto_offload.pop_done_queue()
                 if result is None:
                     # use cv timeout due to possible non-wake while running
                     cv.wait(1)
                     # check for terminating conditions
-                    with self._download_lock:
-                        if (self._all_remote_files_processed and
-                                len(self._download_set) == 0):
-                            break
+                    if self.termination_check:
+                        break
                 else:
                     break
             cv.release()
@@ -291,13 +303,7 @@ def _worker_thread_download(self):
         """Worker thread download
         :param Downloader self: this
         """
-        while True:
-            if self._download_terminate:
-                break
-            with self._download_lock:
-                if (self._all_remote_files_processed and
-                        len(self._download_set) == 0):
-                    break
+        while not self.termination_check:
             try:
                 dd = self._download_queue.get(False, 1)
             except queue.Empty:
diff --git a/tests/test_blobxfer_download_operations.py b/tests/test_blobxfer_download_operations.py
index 0aebc75..eedbeb8 100644
--- a/tests/test_blobxfer_download_operations.py
+++ b/tests/test_blobxfer_download_operations.py
@@ -10,10 +10,15 @@
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 # non-stdlib imports
 import azure.storage.blob
 import pytest
 # local imports
+import blobxfer.download.models
 import blobxfer.models as models
 import blobxfer.util as util
 # module under test
@@ -206,16 +211,93 @@ def test_check_for_downloads_from_md5():
     d._md5_offload.done_cv = multiprocessing.Condition()
     d._md5_offload.pop_done_queue.side_effect = [None, (lpath, False)]
     d._add_to_download_queue = mock.MagicMock()
+    d._all_remote_files_processed = False
+    d._download_terminate = True
+    d._check_for_downloads_from_md5()
+    assert d._add_to_download_queue.call_count == 0
 
-    with pytest.raises(StopIteration):
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.'
+            'termination_check_md5',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._md5_map[lpath] = mock.MagicMock()
+        d._download_set.add(pathlib.Path(lpath))
+        d._md5_offload = mock.MagicMock()
+        d._md5_offload.done_cv = multiprocessing.Condition()
+        d._md5_offload.pop_done_queue.side_effect = [None, (lpath, False)]
+        d._add_to_download_queue = mock.MagicMock()
+        patched_tc.side_effect = [False, False, True]
         d._check_for_downloads_from_md5()
-    assert d._add_to_download_queue.call_count == 1
+        assert d._add_to_download_queue.call_count == 1
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.'
+            'termination_check_md5',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._md5_map[lpath] = mock.MagicMock()
+        d._download_set.add(pathlib.Path(lpath))
+        d._md5_offload = mock.MagicMock()
+        d._md5_offload.done_cv = multiprocessing.Condition()
+        d._md5_offload.pop_done_queue.side_effect = [None]
+        d._add_to_download_queue = mock.MagicMock()
+        patched_tc.side_effect = [False, True, True]
+        d._check_for_downloads_from_md5()
+        assert d._add_to_download_queue.call_count == 0
 
-    d._add_to_download_queue = mock.MagicMock()
+
+def test_check_for_crypto_done():
+    lpath = 'lpath'
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._download_set.add(pathlib.Path(lpath))
+    d._dd_map[lpath] = mock.MagicMock()
+    d._crypto_offload = mock.MagicMock()
+    d._crypto_offload.done_cv = multiprocessing.Condition()
+    d._crypto_offload.pop_done_queue.side_effect = [
+        None,
+        (lpath, mock.MagicMock(), mock.MagicMock()),
+    ]
+    d._complete_chunk_download = mock.MagicMock()
     d._all_remote_files_processed = False
     d._download_terminate = True
-    d._check_for_downloads_from_md5()
-    assert d._add_to_download_queue.call_count == 0
+    d._check_for_crypto_done()
+    assert d._complete_chunk_download.call_count == 0
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._download_set.add(pathlib.Path(lpath))
+        d._dd_map[lpath] = mock.MagicMock()
+        d._crypto_offload = mock.MagicMock()
+        d._crypto_offload.done_cv = multiprocessing.Condition()
+        d._crypto_offload.pop_done_queue.side_effect = [
+            None,
+            (lpath, mock.MagicMock(), mock.MagicMock()),
+        ]
+        patched_tc.side_effect = [False, False, True]
+        d._complete_chunk_download = mock.MagicMock()
+        d._check_for_crypto_done()
+        assert d._complete_chunk_download.call_count == 1
+
+
+def test_add_to_download_queue(tmpdir):
+    path = tmpdir.join('a')
+    lpath = pathlib.Path(str(path))
+    ase = models.AzureStorageEntity('cont')
+    ase._size = 1
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'abc'
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._spec.options.chunk_size_bytes = 1
+
+    d._add_to_download_queue(lpath, ase)
+    assert d._download_queue.qsize() == 1
+    assert path in d._dd_map
 
 
 def test_initialize_and_terminate_download_threads():
@@ -233,6 +315,166 @@ def test_initialize_and_terminate_download_threads():
         assert not thr.is_alive()
 
 
+def test_complete_chunk_download(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 16
+    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    offsets = dd.next_offsets()
+    data = b'0' * ase._size
+
+    d._complete_chunk_download(offsets, data, dd)
+
+    assert dd.local_path.exists()
+    assert dd.local_path.stat().st_size == len(data)
+    assert dd._completed_ops == 1
+
+
+@mock.patch('blobxfer.crypto.operations.aes_cbc_decrypt_data')
+@mock.patch('blobxfer.file.operations.get_file_range')
+@mock.patch('blobxfer.blob.operations.get_blob_range')
+def test_worker_thread_download(
+        patched_gbr, patched_gfr, patched_acdd, tmpdir):
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._complete_chunk_download = mock.MagicMock()
+    d._download_terminate = True
+    d._worker_thread_download()
+    assert d._complete_chunk_download.call_count == 0
+
+    d._download_terminate = False
+    d._all_remote_files_processed = True
+    d._worker_thread_download()
+    assert d._complete_chunk_download.call_count == 0
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        with mock.patch(
+                'blobxfer.download.models.DownloadDescriptor.'
+                'all_operations_completed',
+                new_callable=mock.PropertyMock) as patched_aoc:
+            d = ops.Downloader(
+                mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+            d._complete_chunk_download = mock.MagicMock()
+            opts = mock.MagicMock()
+            opts.check_file_md5 = False
+            opts.chunk_size_bytes = 16
+            ase = blobxfer.models.AzureStorageEntity('cont')
+            ase._size = 16
+            ase._encryption = mock.MagicMock()
+            ase._encryption.symmetric_key = b'abc'
+            lp = pathlib.Path(str(tmpdir.join('a')))
+            dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+            dd.next_offsets = mock.MagicMock(side_effect=[None, None])
+            dd.finalize_file = mock.MagicMock()
+            patched_aoc.side_effect = [False, True]
+            patched_tc.side_effect = [False, False, False, True]
+            d._dd_map[str(lp)] = mock.MagicMock()
+            d._download_set.add(lp)
+            d._download_queue = mock.MagicMock()
+            d._download_queue.get.side_effect = [queue.Empty, dd, dd]
+            d._worker_thread_download()
+            assert d._complete_chunk_download.call_count == 0
+            assert str(lp) not in d._dd_map
+            assert dd.finalize_file.call_count == 1
+            assert d._download_count == 1
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        opts = mock.MagicMock()
+        opts.check_file_md5 = True
+        opts.chunk_size_bytes = 16
+        ase = blobxfer.models.AzureStorageEntity('cont')
+        ase._mode = blobxfer.models.AzureStorageModes.File
+        ase._size = 16
+        patched_gfr.return_value = b'0' * ase._size
+        lp = pathlib.Path(str(tmpdir.join('b')))
+        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd.finalize_file = mock.MagicMock()
+        dd.perform_chunked_integrity_check = mock.MagicMock()
+        d._dd_map[str(lp)] = mock.MagicMock()
+        d._download_set.add(lp)
+        d._download_queue = mock.MagicMock()
+        d._download_queue.get.side_effect = [dd]
+        d._complete_chunk_download = mock.MagicMock()
+        patched_tc.side_effect = [False, True]
+        d._worker_thread_download()
+        assert d._complete_chunk_download.call_count == 1
+        assert dd.perform_chunked_integrity_check.call_count == 1
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        opts = mock.MagicMock()
+        opts.check_file_md5 = False
+        opts.chunk_size_bytes = 16
+        ase = blobxfer.models.AzureStorageEntity('cont')
+        ase._mode = blobxfer.models.AzureStorageModes.Auto
+        ase._size = 32
+        ase._encryption = mock.MagicMock()
+        ase._encryption.symmetric_key = b'abc'
+        ase._encryption.content_encryption_iv = b'0' * 16
+        patched_gfr.return_value = b'0' * ase._size
+        lp = pathlib.Path(str(tmpdir.join('c')))
+        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd.finalize_file = mock.MagicMock()
+        dd.perform_chunked_integrity_check = mock.MagicMock()
+        d._crypto_offload = mock.MagicMock()
+        d._crypto_offload.add_decrypt_chunk = mock.MagicMock()
+        d._dd_map[str(lp)] = mock.MagicMock()
+        d._download_set.add(lp)
+        d._download_queue = mock.MagicMock()
+        d._download_queue.get.side_effect = [dd]
+        d._complete_chunk_download = mock.MagicMock()
+        patched_tc.side_effect = [False, True]
+        d._worker_thread_download()
+        assert d._complete_chunk_download.call_count == 0
+        assert d._crypto_offload.add_decrypt_chunk.call_count == 1
+        assert dd.perform_chunked_integrity_check.call_count == 1
+
+    with mock.patch(
+            'blobxfer.download.operations.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._general_options.concurrency.crypto_processes = 0
+        opts = mock.MagicMock()
+        opts.check_file_md5 = False
+        opts.chunk_size_bytes = 16
+        ase = blobxfer.models.AzureStorageEntity('cont')
+        ase._mode = blobxfer.models.AzureStorageModes.Auto
+        ase._size = 32
+        ase._encryption = mock.MagicMock()
+        ase._encryption.symmetric_key = b'abc'
+        ase._encryption.content_encryption_iv = b'0' * 16
+        patched_gfr.return_value = b'0' * ase._size
+        lp = pathlib.Path(str(tmpdir.join('d')))
+        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd.next_offsets()
+        dd.perform_chunked_integrity_check = mock.MagicMock()
+        patched_acdd.return_value = b'0' * 16
+        d._dd_map[str(lp)] = mock.MagicMock()
+        d._download_set.add(lp)
+        d._download_queue = mock.MagicMock()
+        d._download_queue.get.side_effect = [dd]
+        d._complete_chunk_download = mock.MagicMock()
+        patched_tc.side_effect = [False, True]
+        d._worker_thread_download()
+        assert d._complete_chunk_download.call_count == 1
+        assert patched_acdd.call_count == 1
+        assert dd.perform_chunked_integrity_check.call_count == 1
+
+
 @mock.patch('time.clock')
 @mock.patch('blobxfer.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.blob.operations.list_blobs')
@@ -241,7 +483,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._initialize_download_threads = mock.MagicMock()
     patched_lfmo._check_thread = mock.MagicMock()
-    d._general_options.concurrency.crypto_processes = 0
+    d._general_options.concurrency.crypto_processes = 1
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1

From fa72fc92fa998eaecb9eff9f1baa2b4525bfecb4 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 28 Feb 2017 09:55:28 -0800
Subject: [PATCH 14/47] Fix file download issues

- Update dependencies to latest
---
 blobxfer/download/operations.py            | 24 +++++++++++++++-------
 blobxfer/file/operations.py                | 15 ++++++++------
 cli/cli.py                                 |  4 ++--
 setup.py                                   |  8 ++++----
 tests/test_blobxfer_download_operations.py |  1 +
 tests/test_blobxfer_file_operations.py     | 22 +++++++++++---------
 6 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/blobxfer/download/operations.py b/blobxfer/download/operations.py
index 9679ca6..4c9a0a9 100644
--- a/blobxfer/download/operations.py
+++ b/blobxfer/download/operations.py
@@ -43,7 +43,6 @@
 except ImportError:  # noqa
     import Queue as queue
 import threading
-import time
 # non-stdlib imports
 import dateutil
 # local imports
@@ -78,7 +77,6 @@ def __init__(self, general_options, creds, spec):
         :param blobxfer.models.AzureStorageCredentials creds: creds
         :param blobxfer.models.DownloadSpecification spec: download spec
         """
-        self._time_start = None
         self._all_remote_files_processed = False
         self._crypto_offload = None
         self._md5_meta_lock = threading.Lock()
@@ -87,6 +85,7 @@ def __init__(self, general_options, creds, spec):
         self._download_lock = threading.Lock()
         self._download_queue = queue.Queue()
         self._download_set = set()
+        self._download_start = None
         self._download_threads = []
         self._download_count = 0
         self._download_total_bytes = 0
@@ -274,6 +273,11 @@ def _add_to_download_queue(self, lpath, rfile):
                 self._dd_map[str(dd.final_path)] = dd
         # add download descriptor to queue
         self._download_queue.put(dd)
+        if self._download_start is None:
+            with self._download_lock:
+                if self._download_start is None:
+                    self._download_start = datetime.datetime.now(
+                        tz=dateutil.tz.tzlocal())
 
     def _initialize_download_threads(self):
         # type: (Downloader) -> None
@@ -386,6 +390,8 @@ def _complete_chunk_download(self, offsets, data, dd):
     def _run(self):
         # type: (Downloader) -> None
         """Execute Downloader"""
+        start_time = datetime.datetime.now(tz=dateutil.tz.tzlocal())
+        logger.info('script start time: {0}'.format(start_time))
         # ensure destination path
         blobxfer.operations.ensure_local_destination(self._creds, self._spec)
         logger.info('downloading blobs/files to local path: {}'.format(
@@ -409,7 +415,6 @@ def _run(self):
         skipped_files = 0
         total_size = 0
         skipped_size = 0
-        self._time_start = time.clock()
         for src in self._spec.sources:
             for rfile in src.files(
                     self._creds, self._spec.options, self._general_options):
@@ -443,16 +448,21 @@ def _run(self):
             ('{0} remote files processed, waiting for download completion '
              'of {1:.4f} MiB').format(nfiles, download_size_mib))
         self._wait_for_download_threads(terminate=False)
-        end = time.clock()
-        runtime = end - self._time_start
+        end_time = datetime.datetime.now(tz=dateutil.tz.tzlocal())
         if (self._download_count != download_files or
                 self._download_total_bytes != download_size):
             raise RuntimeError(
                 'download mismatch: [count={}/{} bytes={}/{}]'.format(
                     self._download_count, download_files,
                     self._download_total_bytes, download_size))
-        logger.info('all files downloaded: {0:.3f} sec {1:.4f} Mbps'.format(
-            runtime, download_size_mib * 8 / runtime))
+        if self._download_start is not None:
+            dltime = (end_time - self._download_start).total_seconds()
+            logger.info(
+                ('elapsed download + verify time and throughput: {0:.3f} sec, '
+                 '{1:.4f} Mbps').format(
+                     dltime, download_size_mib * 8 / dltime))
+        logger.info('script end time: {0} (elapsed: {1:.3f} sec)'.format(
+            end_time, (end_time - start_time).total_seconds()))
 
     def start(self):
         # type: (Downloader) -> None
diff --git a/blobxfer/file/operations.py b/blobxfer/file/operations.py
index ec654dd..eff3d01 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/file/operations.py
@@ -39,6 +39,7 @@
 import azure.storage.file
 # local imports
 import blobxfer.retry
+import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -96,8 +97,10 @@ def check_if_single_file(client, fileshare, prefix, timeout=None):
     :rtype: tuple
     :return: (if prefix in fileshare is a single file, file)
     """
-    dirname, fname = parse_file_path(prefix)
     file = None
+    if blobxfer.util.is_none_or_empty(prefix):
+        return (False, file)
+    dirname, fname = parse_file_path(prefix)
     try:
         file = client.get_file_properties(
             share_name=fileshare,
@@ -136,13 +139,13 @@ def list_files(client, fileshare, prefix, timeout=None):
             timeout=timeout,
         )
         for file in files:
-            fspath = str(pathlib.Path(
-                dir if dir is not None else '' / file.name))
-            if isinstance(file, azure.storage.file.File):
+            fspath = str(
+                pathlib.Path(dir if dir is not None else '') / file.name)
+            if type(file) == azure.storage.file.models.File:
                 fsprop = client.get_file_properties(
                     share_name=fileshare,
-                    directory_name=dir,
-                    file_name=file.name,
+                    directory_name=None,
+                    file_name=fspath,
                     timeout=timeout,
                 )
                 yield fsprop
diff --git a/cli/cli.py b/cli/cli.py
index 0c085c7..d273845 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -337,7 +337,7 @@ def callback(ctx, param, value):
     return click.option(
         '--file-attributes',
         expose_value=False,
-        is_flag=True,
+        is_flag=False,
         help='Store or restore file attributes [False]',
         callback=callback)(f)
 
@@ -350,7 +350,7 @@ def callback(ctx, param, value):
     return click.option(
         '--file-md5/--no-file-md5',
         expose_value=False,
-        default=True,
+        default=False,
         help='Compute file MD5 [True]',
         callback=callback)(f)
 
diff --git a/setup.py b/setup.py
index 729dcc9..f6336db 100644
--- a/setup.py
+++ b/setup.py
@@ -43,12 +43,12 @@
 
 install_requires = [
     'azure-common==1.1.4',
-    'azure-storage==0.33.0',
-    'click==6.6',
-    'cryptography>=1.7.1',
+    'azure-storage==0.34.0',
+    'click==6.7',
+    'cryptography>=1.7.2',
     'future==0.16.0',
     'python-dateutil==2.6.0',
-    'ruamel.yaml==0.13.11',
+    'ruamel.yaml==0.13.14',
 ]
 
 if sys.version_info < (3, 4):
diff --git a/tests/test_blobxfer_download_operations.py b/tests/test_blobxfer_download_operations.py
index eedbeb8..49d550e 100644
--- a/tests/test_blobxfer_download_operations.py
+++ b/tests/test_blobxfer_download_operations.py
@@ -481,6 +481,7 @@ def test_worker_thread_download(
 @mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
 def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._download_start = datetime.datetime.now(tz=dateutil.tz.tzlocal())
     d._initialize_download_threads = mock.MagicMock()
     patched_lfmo._check_thread = mock.MagicMock()
     d._general_options.concurrency.crypto_processes = 1
diff --git a/tests/test_blobxfer_file_operations.py b/tests/test_blobxfer_file_operations.py
index b221534..e354bda 100644
--- a/tests/test_blobxfer_file_operations.py
+++ b/tests/test_blobxfer_file_operations.py
@@ -69,6 +69,9 @@ def test_check_if_single_file():
     result = ops.check_if_single_file(client, 'a', 'b/c')
     assert result[0]
 
+    result = ops.check_if_single_file(client, 'a', '')
+    assert not result[0]
+
     client = mock.MagicMock()
     client.get_file_properties = mock.MagicMock()
     client.get_file_properties.side_effect = \
@@ -95,11 +98,9 @@ def test_list_files_single_file():
     return_value=(False, None)
 )
 def test_list_files_directory(patched_cisf):
-    client = mock.MagicMock()
-    client.list_directories_and_files = mock.MagicMock()
     _file = azure.storage.file.models.File(name='name')
+    client = mock.MagicMock()
     client.list_directories_and_files.return_value = [_file]
-    client.get_file_properties = mock.MagicMock()
     client.get_file_properties.return_value = _file
 
     i = 0
@@ -108,17 +109,18 @@ def test_list_files_directory(patched_cisf):
         assert file.name == 'name'
     assert i == 1
 
+    print('test')
+    _dir = azure.storage.file.models.Directory(name='dirname')
+    _file = azure.storage.file.models.File(name='dirname/name')
     client = mock.MagicMock()
-    client.list_directories_and_files = mock.MagicMock()
-    _file = azure.storage.file.models.File(name='name')
-    client.list_directories_and_files.side_effect = [['dir'], [file]]
-    client.get_file_properties = mock.MagicMock()
-    client.get_file_properties.return_value = _file
+    client.list_directories_and_files.side_effect = [[_dir, _file]]
+    client.get_file_properties.side_effect = [_file]
 
     i = 0
-    for file in ops.list_files(client, 'dir', ''):
+    for file in ops.list_files(client, '', ''):
         i += 1
-        assert file.name == 'name'
+        assert file.name == _file.name
+        assert type(file) == azure.storage.file.models.File
     assert i == 1
 
 

From bc36c5d08b7bf7663beaf90b3c21208c38e5296d Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 28 Feb 2017 13:27:20 -0800
Subject: [PATCH 15/47] Add cleanup actions

---
 blobxfer/download/models.py                | 20 +++++++++
 blobxfer/download/operations.py            | 25 ++++++++++-
 blobxfer/models.py                         |  9 +++-
 cli/cli.py                                 | 14 +++++++
 cli/settings.py                            |  2 +
 tests/test_blobxfer_download_models.py     | 31 ++++++++++++++
 tests/test_blobxfer_download_operations.py | 48 ++++++++++++++++++++++
 tests/test_blobxfer_models.py              | 22 ++++++++++
 8 files changed, 167 insertions(+), 4 deletions(-)

diff --git a/blobxfer/download/models.py b/blobxfer/download/models.py
index 39b641d..7fb4fe8 100644
--- a/blobxfer/download/models.py
+++ b/blobxfer/download/models.py
@@ -200,6 +200,26 @@ def _allocate_disk_space(self):
                     fd.seek(allocatesize - 1)
                     fd.write(b'\0')
 
+    def cleanup_all_temporary_files(self):
+        # type: (DownloadDescriptor) -> None
+        """Cleanup all temporary files in case of an exception or interrupt.
+        This function is not thread-safe.
+        :param DownloadDescriptor self: this
+        """
+        # delete local file
+        try:
+            self.local_path.unlink()
+        except OSError:
+            pass
+        # iterate unchecked chunks and delete
+        for key in self._unchecked_chunks:
+            ucc = self._unchecked_chunks[key]
+            if ucc.temp:
+                try:
+                    ucc.file_path.unlink()
+                except OSError:
+                    pass
+
     def next_offsets(self):
         # type: (DownloadDescriptor) -> DownloadOffsets
         """Retrieve the next offsets
diff --git a/blobxfer/download/operations.py b/blobxfer/download/operations.py
index 4c9a0a9..b947bcf 100644
--- a/blobxfer/download/operations.py
+++ b/blobxfer/download/operations.py
@@ -387,6 +387,26 @@ def _complete_chunk_download(self, offsets, data, dd):
         dd.dec_outstanding_operations()
         # TODO pickle dd to resume file
 
+    def _cleanup_temporary_files(self):
+        # type: (Downloader) -> None
+        """Cleanup temporary files in case of an exception or interrupt.
+        This function is not thread-safe.
+        :param Downloader self: this
+        """
+        # do not clean up if resume file exists
+        if self._general_options.resume_file is not None:
+            logger.debug(
+                'not cleaning up temporary files since resume file has '
+                'been specified')
+            return
+        # iterate through dd map and cleanup files
+        for key in self._dd_map:
+            dd = self._dd_map[key]
+            try:
+                dd.cleanup_all_temporary_files()
+            except Exception as e:
+                logger.exception(e)
+
     def _run(self):
         # type: (Downloader) -> None
         """Execute Downloader"""
@@ -475,10 +495,11 @@ def start(self):
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
             self._wait_for_download_threads(terminate=True)
-            # TODO delete all temp files
-            # TODO close resume file in finally?
+            self._cleanup_temporary_files()
             raise
         finally:
+            # TODO close resume file
+            # shutdown processes
             if self._md5_offload is not None:
                 self._md5_offload.finalize_processes()
             if self._crypto_offload is not None:
diff --git a/blobxfer/models.py b/blobxfer/models.py
index 8a91885..3648722 100644
--- a/blobxfer/models.py
+++ b/blobxfer/models.py
@@ -151,12 +151,13 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
 class GeneralOptions(object):
     """General Options"""
     def __init__(
-            self, concurrency, progress_bar=True, timeout_sec=None,
-            verbose=False):
+            self, concurrency, progress_bar=True, resume_file=None,
+            timeout_sec=None, verbose=False):
         """Ctor for General Options
         :param GeneralOptions self: this
         :param ConcurrencyOptions concurrency: concurrency options
         :param bool progress_bar: progress bar
+        :param str resume_file: resume file
         :param int timeout_sec: timeout in seconds
         :param bool verbose: verbose output
         """
@@ -164,6 +165,10 @@ def __init__(
             raise ValueError('concurrency option is unspecified')
         self.concurrency = concurrency
         self.progress_bar = progress_bar
+        if blobxfer.util.is_not_empty(resume_file):
+            self.resume_file = pathlib.Path(resume_file)
+        else:
+            self.resume_file = None
         self.timeout_sec = timeout_sec
         self.verbose = verbose
 
diff --git a/cli/cli.py b/cli/cli.py
index d273845..03fb231 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -148,6 +148,19 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _resume_file_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['resume_file'] = value
+        return value
+    return click.option(
+        '--resume-file',
+        expose_value=False,
+        default=None,
+        help='Save or use resume file specified',
+        callback=callback)(f)
+
+
 def _timeout_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -192,6 +205,7 @@ def common_options(f):
     f = _verbose_option(f)
     f = _transfer_threads_option(f)
     f = _timeout_option(f)
+    f = _resume_file_option(f)
     f = _progress_bar_option(f)
     f = _md5_processes_option(f)
     f = _crypto_processes_option(f)
diff --git a/cli/settings.py b/cli/settings.py
index 8e5db75..4da2500 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -208,6 +208,7 @@ def merge_settings(config, cli_options):
     config['options']['crypto_processes'] = cli_options['crypto_processes']
     config['options']['md5_processes'] = cli_options['md5_processes']
     config['options']['progress_bar'] = cli_options['progress_bar']
+    config['options']['resume_file'] = cli_options['resume_file']
     config['options']['timeout_sec'] = cli_options['timeout']
     config['options']['transfer_threads'] = cli_options['transfer_threads']
     config['options']['verbose'] = cli_options['verbose']
@@ -242,6 +243,7 @@ def create_general_options(config):
             transfer_threads=config['options']['transfer_threads'],
         ),
         progress_bar=config['options']['progress_bar'],
+        resume_file=config['options']['resume_file'],
         timeout_sec=config['options']['timeout_sec'],
         verbose=config['options']['verbose'],
     )
diff --git a/tests/test_blobxfer_download_models.py b/tests/test_blobxfer_download_models.py
index 530e4ac..e91607e 100644
--- a/tests/test_blobxfer_download_models.py
+++ b/tests/test_blobxfer_download_models.py
@@ -251,6 +251,37 @@ def test_perform_chunked_integrity_check(tmpdir):
     assert not ucc.file_path.exists()
 
 
+def test_cleanup_all_temporary_files(tmpdir):
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 16
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    data = b'0' * opts.chunk_size_bytes
+    d._postpone_integrity_check(offsets, data)
+    assert len(d._unchecked_chunks) == 1
+    d.cleanup_all_temporary_files()
+    assert not d.local_path.exists()
+    assert not d._unchecked_chunks[0].file_path.exists()
+
+    lp = pathlib.Path(str(tmpdir.join('b')))
+    d = models.DownloadDescriptor(lp, ase, opts)
+
+    offsets = d.next_offsets()
+    data = b'0' * opts.chunk_size_bytes
+    d._postpone_integrity_check(offsets, data)
+    assert len(d._unchecked_chunks) == 1
+    d.local_path.unlink()
+    d._unchecked_chunks[0].file_path.unlink()
+    d.cleanup_all_temporary_files()
+    assert not d.local_path.exists()
+    assert not d._unchecked_chunks[0].file_path.exists()
+
+
 def test_write_data(tmpdir):
     lp = pathlib.Path(str(tmpdir.join('a')))
 
diff --git a/tests/test_blobxfer_download_operations.py b/tests/test_blobxfer_download_operations.py
index 49d550e..3645291 100644
--- a/tests/test_blobxfer_download_operations.py
+++ b/tests/test_blobxfer_download_operations.py
@@ -475,12 +475,58 @@ def test_worker_thread_download(
         assert dd.perform_chunked_integrity_check.call_count == 1
 
 
+def test_cleanup_temporary_files(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 16
+    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd.cleanup_all_temporary_files = mock.MagicMock()
+    dd.cleanup_all_temporary_files.side_effect = Exception
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._general_options.resume_file = pathlib.Path('abc')
+    d._dd_map[0] = dd
+    d._cleanup_temporary_files()
+    assert dd.local_path.exists()
+
+    lp = pathlib.Path(str(tmpdir.join('b')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 16
+    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._general_options.resume_file = None
+    d._dd_map[0] = dd
+    d._cleanup_temporary_files()
+    assert not dd.local_path.exists()
+
+    lp = pathlib.Path(str(tmpdir.join('c')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase._size = 16
+    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd.cleanup_all_temporary_files = mock.MagicMock()
+    dd.cleanup_all_temporary_files.side_effect = Exception
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._general_options.resume_file = None
+    d._dd_map[0] = dd
+    d._cleanup_temporary_files()
+    assert dd.local_path.exists()
+
+
 @mock.patch('time.clock')
 @mock.patch('blobxfer.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.blob.operations.list_blobs')
 @mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
 def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._cleanup_temporary_files = mock.MagicMock()
     d._download_start = datetime.datetime.now(tz=dateutil.tz.tzlocal())
     d._initialize_download_threads = mock.MagicMock()
     patched_lfmo._check_thread = mock.MagicMock()
@@ -536,8 +582,10 @@ def test_start_keyboard_interrupt():
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
     d._wait_for_download_threads = mock.MagicMock()
+    d._cleanup_temporary_files = mock.MagicMock()
     d._md5_offload = mock.MagicMock()
 
     with pytest.raises(KeyboardInterrupt):
         d.start()
     assert d._wait_for_download_threads.call_count == 1
+    assert d._cleanup_temporary_files.call_count == 1
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
index 2ad07cf..f200aaf 100644
--- a/tests/test_blobxfer_models.py
+++ b/tests/test_blobxfer_models.py
@@ -38,6 +38,7 @@ def test_general_options():
             transfer_threads=3,
         ),
         progress_bar=False,
+        resume_file='abc',
         timeout_sec=1,
         verbose=True,
     )
@@ -46,6 +47,27 @@ def test_general_options():
     assert a.concurrency.md5_processes == 2
     assert a.concurrency.transfer_threads == 3
     assert not a.progress_bar
+    assert a.resume_file == pathlib.Path('abc')
+    assert a.timeout_sec == 1
+    assert a.verbose
+
+    a = models.GeneralOptions(
+        concurrency=models.ConcurrencyOptions(
+            crypto_processes=1,
+            md5_processes=2,
+            transfer_threads=3,
+        ),
+        progress_bar=False,
+        resume_file=None,
+        timeout_sec=1,
+        verbose=True,
+    )
+
+    assert a.concurrency.crypto_processes == 1
+    assert a.concurrency.md5_processes == 2
+    assert a.concurrency.transfer_threads == 3
+    assert not a.progress_bar
+    assert a.resume_file is None
     assert a.timeout_sec == 1
     assert a.verbose
 

From bb81f29802deedb3b30f97485b6c39a8850775d2 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 4 Apr 2017 11:37:27 -0700
Subject: [PATCH 16/47] Reorganize sources into sensible hierarchy

---
 blobxfer/api.py                               |  10 +-
 blobxfer/blob/block/__init__.py               |   0
 blobxfer/blob/page/__init__.py                |   0
 blobxfer/crypto/__init__.py                   |   0
 blobxfer/download/__init__.py                 |   0
 blobxfer/file/__init__.py                     |   0
 blobxfer/models.py                            | 828 ------------------
 blobxfer/models/__init__.py                   | 145 +++
 blobxfer/models/azure.py                      | 460 ++++++++++
 .../{crypto/models.py => models/crypto.py}    |  68 +-
 .../models.py => models/download.py}          | 189 +++-
 blobxfer/{ => models}/md5.py                  |  54 +-
 blobxfer/{ => models}/offload.py              |   0
 blobxfer/models/options.py                    | 147 ++++
 blobxfer/models/upload.py                     |  75 ++
 blobxfer/operations.py                        | 120 ---
 blobxfer/{blob => operations}/__init__.py     |   0
 .../append => operations/azure}/__init__.py   |   0
 .../azure/blob/__init__.py}                   |  22 +-
 .../azure/blob/append.py}                     |   5 +-
 .../azure/blob/block.py}                      |   5 +-
 .../azure/blob/page.py}                       |   5 +-
 .../azure/file.py}                            |  13 +-
 .../operations.py => operations/crypto.py}    |  62 +-
 .../operations.py => operations/download.py}  | 102 ++-
 blobxfer/operations/md5.py                    |  74 ++
 blobxfer/util.py                              |   2 +-
 cli/settings.py                               |  53 +-
 setup.py                                      |  10 +-
 29 files changed, 1252 insertions(+), 1197 deletions(-)
 delete mode 100644 blobxfer/blob/block/__init__.py
 delete mode 100644 blobxfer/blob/page/__init__.py
 delete mode 100644 blobxfer/crypto/__init__.py
 delete mode 100644 blobxfer/download/__init__.py
 delete mode 100644 blobxfer/file/__init__.py
 delete mode 100644 blobxfer/models.py
 create mode 100644 blobxfer/models/__init__.py
 create mode 100644 blobxfer/models/azure.py
 rename blobxfer/{crypto/models.py => models/crypto.py} (84%)
 rename blobxfer/{download/models.py => models/download.py} (69%)
 rename blobxfer/{ => models}/md5.py (64%)
 rename blobxfer/{ => models}/offload.py (100%)
 create mode 100644 blobxfer/models/options.py
 create mode 100644 blobxfer/models/upload.py
 delete mode 100644 blobxfer/operations.py
 rename blobxfer/{blob => operations}/__init__.py (100%)
 rename blobxfer/{blob/append => operations/azure}/__init__.py (100%)
 rename blobxfer/{blob/operations.py => operations/azure/blob/__init__.py} (86%)
 rename blobxfer/{blob/append/operations.py => operations/azure/blob/append.py} (93%)
 rename blobxfer/{blob/block/operations.py => operations/azure/blob/block.py} (93%)
 rename blobxfer/{blob/page/operations.py => operations/azure/blob/page.py} (93%)
 rename blobxfer/{file/operations.py => operations/azure/file.py} (93%)
 rename blobxfer/{crypto/operations.py => operations/crypto.py} (79%)
 rename blobxfer/{download/operations.py => operations/download.py} (83%)
 create mode 100644 blobxfer/operations/md5.py

diff --git a/blobxfer/api.py b/blobxfer/api.py
index 57fcf09..f8c3378 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -32,19 +32,19 @@
 # non-stdlib imports
 # local imports
 
-from .blob.append.operations import (  # noqa
+from .operations.azure.blob.append import (  # noqa
     create_client as create_append_blob_client
 )
-from .blob.block.operations import (  # noqa
+from .operations.azure.blob.block import (  # noqa
     create_client as create_block_blob_client
 )
-from .blob.page.operations import (  # noqa
+from .operations.azure.blob.page import (  # noqa
     create_client as create_page_blob_client
 )
-from .file.operations import (  # noqa
+from .operations.azure.file import (  # noqa
     create_client as create_file_client
 )
 
-from .download.operations import (  # noqa
+from .operations.download import (  # noqa
     Downloader
 )
diff --git a/blobxfer/blob/block/__init__.py b/blobxfer/blob/block/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blobxfer/blob/page/__init__.py b/blobxfer/blob/page/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blobxfer/crypto/__init__.py b/blobxfer/crypto/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blobxfer/download/__init__.py b/blobxfer/download/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blobxfer/file/__init__.py b/blobxfer/file/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blobxfer/models.py b/blobxfer/models.py
deleted file mode 100644
index 3648722..0000000
--- a/blobxfer/models.py
+++ /dev/null
@@ -1,828 +0,0 @@
-# Copyright (c) Microsoft Corporation
-#
-# All rights reserved.
-#
-# MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# compat imports
-from __future__ import (
-    absolute_import, division, print_function, unicode_literals
-)
-from builtins import (  # noqa
-    bytes, dict, int, list, object, range, ascii, chr, hex, input,
-    next, oct, open, pow, round, super, filter, map, zip)
-# stdlib imports
-import collections
-import enum
-import fnmatch
-import logging
-import os
-try:
-    import pathlib2 as pathlib
-except ImportError:  # noqa
-    import pathlib
-import multiprocessing
-# non-stdlib imports
-# local imports
-from .api import (
-    create_append_blob_client,
-    create_block_blob_client,
-    create_file_client,
-    create_page_blob_client,
-)
-from azure.storage.blob.models import _BlobTypes as BlobTypes
-import blobxfer.blob.operations
-import blobxfer.file.operations
-import blobxfer.crypto.models
-import blobxfer.util
-
-# create logger
-logger = logging.getLogger(__name__)
-
-
-# enums
-class AzureStorageModes(enum.Enum):
-    Auto = 10
-    Append = 20
-    Block = 30
-    File = 40
-    Page = 50
-
-
-# named tuples
-VectoredIoOptions = collections.namedtuple(
-    'VectoredIoOptions', [
-        'stripe_chunk_size_bytes',
-        'multi_storage_account_distribution_mode',
-    ]
-)
-SkipOnOptions = collections.namedtuple(
-    'SkipOnOptions', [
-        'filesize_match',
-        'lmt_ge',
-        'md5_match',
-    ]
-)
-UploadOptions = collections.namedtuple(
-    'UploadOptions', [
-        'chunk_size_bytes',
-        'delete_extraneous_destination',
-        'mode',
-        'overwrite',
-        'recursive',
-        'rsa_private_key',
-        'rsa_public_key',
-        'store_file_attributes',
-        'store_file_md5',
-        'strip_components',
-        'vectored_io',
-        'split_size_bytes',
-    ]
-)
-DownloadOptions = collections.namedtuple(
-    'DownloadOptions', [
-        'check_file_md5',
-        'chunk_size_bytes',
-        'delete_extraneous_destination',
-        'mode',
-        'overwrite',
-        'recursive',
-        'restore_file_attributes',
-        'rsa_private_key',
-    ]
-)
-SyncCopyOptions = collections.namedtuple(
-    'SyncCopyOptions', [
-        'chunk_size_bytes',
-        'mode',
-        'overwrite',
-    ]
-)
-LocalPath = collections.namedtuple(
-    'LocalPath', [
-        'parent_path',
-        'relative_path',
-    ]
-)
-
-
-class ConcurrencyOptions(object):
-    """Concurrency Options"""
-    def __init__(self, crypto_processes, md5_processes, transfer_threads):
-        """Ctor for Concurrency Options
-        :param ConcurrencyOptions self: this
-        :param int crypto_processes: number of crypto procs
-        :param int md5_processes: number of md5 procs
-        :param int transfer_threads: number of transfer threads
-        """
-        self.crypto_processes = crypto_processes
-        self.md5_processes = md5_processes
-        self.transfer_threads = transfer_threads
-        # allow crypto processes to be zero (which will inline crypto
-        # routines with main process)
-        if self.crypto_processes is None or self.crypto_processes < 1:
-            self.crypto_processes = 0
-        if self.md5_processes is None or self.md5_processes < 1:
-            self.md5_processes = multiprocessing.cpu_count() // 2
-        if self.md5_processes < 1:
-            self.md5_processes = 1
-        if self.transfer_threads is None or self.transfer_threads < 1:
-            self.transfer_threads = multiprocessing.cpu_count() * 3
-
-
-class GeneralOptions(object):
-    """General Options"""
-    def __init__(
-            self, concurrency, progress_bar=True, resume_file=None,
-            timeout_sec=None, verbose=False):
-        """Ctor for General Options
-        :param GeneralOptions self: this
-        :param ConcurrencyOptions concurrency: concurrency options
-        :param bool progress_bar: progress bar
-        :param str resume_file: resume file
-        :param int timeout_sec: timeout in seconds
-        :param bool verbose: verbose output
-        """
-        if concurrency is None:
-            raise ValueError('concurrency option is unspecified')
-        self.concurrency = concurrency
-        self.progress_bar = progress_bar
-        if blobxfer.util.is_not_empty(resume_file):
-            self.resume_file = pathlib.Path(resume_file)
-        else:
-            self.resume_file = None
-        self.timeout_sec = timeout_sec
-        self.verbose = verbose
-
-
-class AzureStorageCredentials(object):
-    """Azure Storage Credentials"""
-    def __init__(self):
-        # type: (AzureStorageCredentials) -> None
-        """Ctor for AzureStorageCredentials"""
-        self._storage_accounts = {}
-
-    def add_storage_account(self, name, key, endpoint):
-        # type: (AzureStorageCredentials, str, str, str) -> None
-        """Add a storage account
-        :param AzureStorageCredentials self: this
-        :param str name: name of storage account to store
-        :param str key: storage key or sas
-        :param str endpoint: endpoint
-        """
-        if name in self._storage_accounts:
-            raise ValueError(
-                '{} already exists in storage accounts'.format(name))
-        self._storage_accounts[name] = AzureStorageAccount(name, key, endpoint)
-
-    def get_storage_account(self, name):
-        # type: (AzureStorageCredentials, str) -> AzureStorageAccount
-        """Get storage account details
-        :param AzureStorageCredentials self: this
-        :param str name: name of storage account to retrieve
-        :rtype: AzureStorageAccount
-        :return: storage account details
-        """
-        return self._storage_accounts[name]
-
-
-class AzureStorageAccount(object):
-    """Azure Storage Account"""
-    def __init__(self, name, key, endpoint):
-        # type: (AzureStorageAccount, str, str, str) -> None
-        """Ctor for AzureStorageAccount
-        :param str name: name of storage account
-        :param str key: storage key or sas
-        :param str endpoint: endpoint
-        """
-        self._append_blob_client = None
-        self._block_blob_client = None
-        self._file_client = None
-        self._page_blob_client = None
-        self.name = name
-        self.key = key
-        self.endpoint = endpoint
-        self.is_sas = self._key_is_sas(self.key)
-        # normalize sas keys
-        if self.is_sas and self.key.startswith('?'):
-            self.key = self.key[1:]
-        self._create_clients()
-
-    @staticmethod
-    def _key_is_sas(key):
-        # type: (str) -> bool
-        """Determine if key is a sas
-        :param str key: key to parse
-        :rtype: bool
-        :return: if key is a sas
-        """
-        # keys starting with ? are sas keys as ? is not in the base-64
-        # character range
-        if key.startswith('?'):
-            return True
-        else:
-            # & is not in the base-64 character range, so technically
-            # the presence of this character means the key is a sas. however,
-            # perform a stronger check for the sig= parameter.
-            tmp = key.split('&')
-            if len(tmp) == 1:
-                return False
-            elif any(x.startswith('sig=') for x in tmp):
-                return True
-        return False
-
-    def _create_clients(self):
-        # type: (AzureStorageAccount) -> None
-        """Create Azure Storage clients
-        :param AzureStorageAccount self: this
-        """
-        self._append_blob_client = create_append_blob_client(self)
-        self._block_blob_client = create_block_blob_client(self)
-        self._file_client = create_file_client(self)
-        self._page_blob_client = create_page_blob_client(self)
-
-    @property
-    def append_blob_client(self):
-        # type: (AzureStorageAccount) -> azure.storage.blob.AppendBlobService
-        """Get append blob client
-        :param AzureStorageAccount self: this
-        :rtype: azure.storage.blob.AppendBlobService
-        :return: append blob client
-        """
-        return self._append_blob_client
-
-    @property
-    def block_blob_client(self):
-        # type: (AzureStorageAccount) -> azure.storage.blob.BlockBlobService
-        """Get block blob client
-        :param AzureStorageAccount self: this
-        :rtype: azure.storage.blob.BlockBlobService
-        :return: block blob client
-        """
-        return self._block_blob_client
-
-    @property
-    def file_client(self):
-        # type: (AzureStorageAccount) -> azure.storage.file.FileService
-        """Get file client
-        :param AzureStorageAccount self: this
-        :rtype: azure.storage.file.FileService
-        :return: file client
-        """
-        return self._file_client
-
-    @property
-    def page_blob_client(self):
-        # type: (AzureStorageAccount) -> azure.storage.blob.PageBlobService
-        """Get page blob client
-        :param AzureStorageAccount self: this
-        :rtype: azure.storage.blob.PageBlobService
-        :return: page blob client
-        """
-        return self._page_blob_client
-
-
-class _BaseSourcePaths(object):
-    """Base Source Paths"""
-    def __init__(self):
-        # type: (_BaseSourcePaths) -> None
-        """Ctor for _BaseSourcePaths
-        :param _BaseSourcePaths self: this
-        """
-        self._include = None
-        self._exclude = None
-        self._paths = []
-
-    @property
-    def paths(self):
-        # type: (_BaseSourcePaths) -> List[pathlib.Path]
-        """Stored paths
-        :param _BaseSourcePaths self: this
-        :rtype: list
-        :return: list of pathlib.Path
-        """
-        return self._paths
-
-    def add_include(self, incl):
-        # type: (_BaseSourcePaths, str) -> None
-        """Add an include
-        :param _BaseSourcePaths self: this
-        :param str incl: include filter
-        """
-        if self._include is None:
-            self._include = [incl]
-        else:
-            self._include.append(incl)
-
-    def add_includes(self, includes):
-        # type: (_BaseSourcePaths, list) -> None
-        """Add a list of includes
-        :param _BaseSourcePaths self: this
-        :param list includes: list of includes
-        """
-        if not isinstance(includes, list):
-            raise ValueError('includes is not of type list')
-        if self._include is None:
-            self._include = includes
-        else:
-            self._include.extend(includes)
-
-    def add_exclude(self, excl):
-        # type: (_BaseSourcePaths, str) -> None
-        """Add an exclude
-        :param _BaseSourcePaths self: this
-        :param str excl: exclude filter
-        """
-        if self._exclude is None:
-            self._exclude = [excl]
-        else:
-            self._exclude.append(excl)
-
-    def add_excludes(self, excludes):
-        # type: (_BaseSourcePaths, list) -> None
-        """Add a list of excludes
-        :param _BaseSourcePaths self: this
-        :param list excludes: list of excludes
-        """
-        if not isinstance(excludes, list):
-            raise ValueError('excludes is not of type list')
-        if self._exclude is None:
-            self._exclude = excludes
-        else:
-            self._exclude.extend(excludes)
-
-    def add_path(self, path):
-        # type: (_BaseSourcePaths, str) -> None
-        """Add a local path
-        :param _BaseSourcePaths self: this
-        :param str path: path to add
-        """
-        if isinstance(path, pathlib.Path):
-            self._paths.append(path)
-        else:
-            self._paths.append(pathlib.Path(path))
-
-    def add_paths(self, paths):
-        # type: (_BaseSourcePaths, list) -> None
-        """Add a list of local paths
-        :param _BaseSourcePaths self: this
-        :param list paths: paths to add
-        """
-        for path in paths:
-            self.add_path(path)
-
-    def _inclusion_check(self, path):
-        # type: (_BaseSourcePaths, pathlib.Path) -> bool
-        """Check file for inclusion against filters
-        :param _BaseSourcePaths self: this
-        :param pathlib.Path path: path to check
-        :rtype: bool
-        :return: if file should be included
-        """
-        _spath = str(path)
-        inc = True
-        if self._include is not None:
-            inc = any([fnmatch.fnmatch(_spath, x) for x in self._include])
-        if inc and self._exclude is not None:
-            inc = not any([fnmatch.fnmatch(_spath, x) for x in self._exclude])
-        return inc
-
-
-class LocalSourcePaths(_BaseSourcePaths):
-    """Local Source Paths"""
-    def files(self):
-        # type: (LocalSourcePaths) -> LocalPath
-        """Generator for files in paths
-        :param LocalSourcePaths self: this
-        :rtype: LocalPath
-        :return: LocalPath
-        """
-        for _path in self._paths:
-            _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
-            _expath = pathlib.Path(_ppath)
-            for entry in blobxfer.util.scantree(_ppath):
-                _rpath = pathlib.Path(entry.path).relative_to(_ppath)
-                if not self._inclusion_check(_rpath):
-                    logger.debug(
-                        'skipping file {} due to filters'.format(_rpath))
-                    continue
-                yield LocalPath(parent_path=_expath, relative_path=_rpath)
-
-
-class LocalDestinationPath(object):
-    """Local Destination Path"""
-    def __init__(self, path=None):
-        # type: (LocalDestinationPath, str) -> None
-        """Ctor for LocalDestinationPath
-        :param LocalDestinationPath self: this
-        :param str path: path
-        """
-        self._is_dir = None
-        if path is not None:
-            self.path = path
-
-    @property
-    def path(self):
-        # type: (LocalDestinationPath) -> pathlib.Path
-        """Path property
-        :param LocalDestinationPath self: this
-        :rtype: pathlib.Path
-        :return: local destination path
-        """
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        # type: (LocalDestinationPath, str) -> None
-        """Path property setter
-        :param LocalDestinationPath self: this
-        :param str value: value to set path to
-        """
-        self._path = pathlib.Path(value)
-
-    @property
-    def is_dir(self):
-        # type: (LocalDestinationPath) -> bool
-        """is_dir property
-        :param LocalDestinationPath self: this
-        :rtype: bool
-        :return: if local destination path is a directory
-        """
-        return self._is_dir
-
-    @is_dir.setter
-    def is_dir(self, value):
-        # type: (LocalDestinationPath, bool) -> None
-        """is_dir property setter
-        :param LocalDestinationPath self: this
-        :param bool value: value to set is_dir to
-        """
-        self._is_dir = value
-
-    def ensure_path_exists(self):
-        # type: (LocalDestinationPath) -> None
-        """Ensure path exists
-        :param LocalDestinationPath self: this
-        """
-        if self._is_dir is None:
-            raise RuntimeError('is_dir not set')
-        if self._is_dir:
-            self._path.mkdir(mode=0o750, parents=True, exist_ok=True)
-        else:
-            if self._path.exists() and self._path.is_dir():
-                raise RuntimeError(
-                    ('destination path {} already exists and is a '
-                     'directory').format(self._path))
-            else:
-                # ensure parent path exists and is created
-                self._path.parent.mkdir(
-                    mode=0o750, parents=True, exist_ok=True)
-
-
-class DownloadSpecification(object):
-    """DownloadSpecification"""
-    def __init__(
-            self, download_options, skip_on_options, local_destination_path):
-        # type: (DownloadSpecification, DownloadOptions, SkipOnOptions,
-        #        LocalDestinationPath) -> None
-        """Ctor for DownloadSpecification
-        :param DownloadSepcification self: this
-        :param DownloadOptions download_options: download options
-        :param SkipOnOptions skip_on_options: skip on options
-        :param LocalDestinationPath local_destination_path: local dest path
-        """
-        self.options = download_options
-        self.skip_on = skip_on_options
-        self.destination = local_destination_path
-        self.sources = []
-
-    def add_azure_source_path(self, source):
-        # type: (DownloadSpecification, AzureSourcePath) -> None
-        """Add an Azure Source Path
-        :param DownloadSepcification self: this
-        :param AzureSourcePath source: Azure source path to add
-        """
-        self.sources.append(source)
-
-
-class AzureSourcePath(_BaseSourcePaths):
-    """AzureSourcePath"""
-    def __init__(self):
-        # type: (AzureSourcePath) -> None
-        """Ctor for AzureSourcePath
-        :param AzureSourcePath self: this
-        """
-        super(AzureSourcePath, self).__init__()
-        self._path_map = {}
-
-    def add_path_with_storage_account(self, remote_path, storage_account):
-        # type: (AzureSourcePath, str, str) -> None
-        """Add a path with an associated storage account
-        :param AzureSourcePath self: this
-        :param str remote_path: remote path
-        :param str storage_account: storage account to associate with path
-        """
-        if len(self._path_map) >= 1:
-            raise RuntimeError(
-                'cannot add multiple remote paths to AzureSourcePath objects')
-        rpath = blobxfer.util.normalize_azure_path(remote_path)
-        self.add_path(rpath)
-        self._path_map[rpath] = storage_account
-
-    def lookup_storage_account(self, remote_path):
-        # type: (AzureSourcePath, str) -> str
-        """Lookup the storage account associated with the remote path
-        :param AzureSourcePath self: this
-        :param str remote_path: remote path
-        :rtype: str
-        :return: storage account associated with path
-        """
-        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
-
-    def files(self, creds, options, general_options):
-        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
-        #        GeneralOptions) -> AzureStorageEntity
-        """Generator of Azure remote files or blobs
-        :param AzureSourcePath self: this
-        :param AzureStorageCredentials creds: storage creds
-        :param DownloadOptions options: download options
-        :param GeneralOptions general_options: general options
-        :rtype: AzureStorageEntity
-        :return: Azure storage entity object
-        """
-        if options.mode == AzureStorageModes.File:
-            for file in self._populate_from_list_files(
-                    creds, options, general_options):
-                yield file
-        else:
-            for blob in self._populate_from_list_blobs(
-                    creds, options, general_options):
-                yield blob
-
-    def _populate_from_list_files(self, creds, options, general_options):
-        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
-        #        GeneralOptions) -> AzureStorageEntity
-        """Internal generator for Azure remote files
-        :param AzureSourcePath self: this
-        :param AzureStorageCredentials creds: storage creds
-        :param DownloadOptions options: download options
-        :param GeneralOptions general_options: general options
-        :rtype: AzureStorageEntity
-        :return: Azure storage entity object
-        """
-        for _path in self._paths:
-            rpath = str(_path)
-            cont, dir = blobxfer.util.explode_azure_path(rpath)
-            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
-            for file in blobxfer.file.operations.list_files(
-                    sa.file_client, cont, dir, general_options.timeout_sec):
-                if blobxfer.crypto.models.EncryptionMetadata.\
-                        encryption_metadata_exists(file.metadata):
-                    ed = blobxfer.crypto.models.EncryptionMetadata()
-                    ed.convert_from_json(
-                        file.metadata, file.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = AzureStorageEntity(cont, ed)
-                ase.populate_from_file(sa, file)
-                yield ase
-
-    def _populate_from_list_blobs(self, creds, options, general_options):
-        # type: (AzureSourcePath, AzureStorageCredentials, DownloadOptions,
-        #        GeneralOptions) -> AzureStorageEntity
-        """Internal generator for Azure remote blobs
-        :param AzureSourcePath self: this
-        :param AzureStorageCredentials creds: storage creds
-        :param DownloadOptions options: download options
-        :param GeneralOptions general_options: general options
-        :rtype: AzureStorageEntity
-        :return: Azure storage entity object
-        """
-        for _path in self._paths:
-            rpath = str(_path)
-            cont, dir = blobxfer.util.explode_azure_path(rpath)
-            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
-            for blob in blobxfer.blob.operations.list_blobs(
-                    sa.block_blob_client, cont, dir, options.mode,
-                    general_options.timeout_sec):
-                if blobxfer.crypto.models.EncryptionMetadata.\
-                        encryption_metadata_exists(blob.metadata):
-                    ed = blobxfer.crypto.models.EncryptionMetadata()
-                    ed.convert_from_json(
-                        blob.metadata, blob.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = AzureStorageEntity(cont, ed)
-                ase.populate_from_blob(sa, blob)
-                yield ase
-
-
-class AzureStorageEntity(object):
-    """Azure Storage Entity"""
-    def __init__(self, container, ed=None):
-        # type: (AzureStorageEntity, str
-        #        blobxfer.crypto.models.EncryptionMetadata) -> None
-        """Ctor for AzureStorageEntity
-        :param AzureStorageEntity self: this
-        :param str container: container name
-        :param blobxfer.crypto.models.EncryptionMetadata ed:
-            encryption metadata
-        """
-        self._client = None
-        self._container = container
-        self._name = None
-        self._mode = None
-        self._lmt = None
-        self._size = None
-        self._snapshot = None
-        self._md5 = None
-        self._encryption = ed
-        self._vio = None
-        self.download = None
-
-    @property
-    def client(self):
-        # type: (AzureStorageEntity) -> object
-        """Associated storage client
-        :param AzureStorageEntity self: this
-        :rtype: object
-        :return: associated storage client
-        """
-        return self._client
-
-    @property
-    def container(self):
-        # type: (AzureStorageEntity) -> str
-        """Container name
-        :param AzureStorageEntity self: this
-        :rtype: str
-        :return: name of container or file share
-        """
-        return self._container
-
-    @property
-    def name(self):
-        # type: (AzureStorageEntity) -> str
-        """Entity name
-        :param AzureStorageEntity self: this
-        :rtype: str
-        :return: name of entity
-        """
-        return self._name
-
-    @property
-    def lmt(self):
-        # type: (AzureStorageEntity) -> datetime.datetime
-        """Entity last modified time
-        :param AzureStorageEntity self: this
-        :rtype: datetime.datetime
-        :return: LMT of entity
-        """
-        return self._lmt
-
-    @property
-    def size(self):
-        # type: (AzureStorageEntity) -> int
-        """Entity size
-        :param AzureStorageEntity self: this
-        :rtype: int
-        :return: size of entity
-        """
-        return self._size
-
-    @property
-    def snapshot(self):
-        # type: (AzureStorageEntity) -> str
-        """Entity snapshot
-        :param AzureStorageEntity self: this
-        :rtype: str
-        :return: snapshot of entity
-        """
-        return self._snapshot
-
-    @property
-    def md5(self):
-        # type: (AzureStorageEntity) -> str
-        """Base64-encoded MD5
-        :param AzureStorageEntity self: this
-        :rtype: str
-        :return: md5 of entity
-        """
-        return self._md5
-
-    @property
-    def mode(self):
-        # type: (AzureStorageEntity) -> AzureStorageModes
-        """Entity mode (type)
-        :param AzureStorageEntity self: this
-        :rtype: AzureStorageModes
-        :return: type of entity
-        """
-        return self._mode
-
-    @property
-    def is_encrypted(self):
-        # type: (AzureStorageEntity) -> bool
-        """If data is encrypted
-        :param AzureStorageEntity self: this
-        :rtype: bool
-        :return: if encryption metadata is present
-        """
-        return self._encryption is not None
-
-    @property
-    def encryption_metadata(self):
-        # type: (AzureStorageEntity) ->
-        #        blobxfer.crypto.models.EncryptionMetadata
-        """Entity metadata (type)
-        :param AzureStorageEntity self: this
-        :rtype: blobxfer.crypto.models.EncryptionMetadata
-        :return: encryption metadata of entity
-        """
-        return self._encryption
-
-    def populate_from_blob(self, sa, blob):
-        # type: (AzureStorageEntity, AzureStorageAccount,
-        #        azure.storage.blob.models.Blob) -> None
-        """Populate properties from Blob
-        :param AzureStorageEntity self: this
-        :param AzureStorageAccount sa: storage account
-        :param azure.storage.blob.models.Blob blob: blob to populate from
-        """
-        self._name = blob.name
-        self._snapshot = blob.snapshot
-        self._lmt = blob.properties.last_modified
-        self._size = blob.properties.content_length
-        self._md5 = blob.properties.content_settings.content_md5
-        if blob.properties.blob_type == BlobTypes.AppendBlob:
-            self._mode = AzureStorageModes.Append
-            self._client = sa.append_blob_client
-        elif blob.properties.blob_type == BlobTypes.BlockBlob:
-            self._mode = AzureStorageModes.Block
-            self._client = sa.block_blob_client
-        elif blob.properties.blob_type == BlobTypes.PageBlob:
-            self._mode = AzureStorageModes.Page
-            self._client = sa.page_blob_client
-
-    def populate_from_file(self, sa, file):
-        # type: (AzureStorageEntity, AzureStorageAccount,
-        #        azure.storage.file.models.File) -> None
-        """Populate properties from File
-        :param AzureStorageEntity self: this
-        :param AzureStorageAccount sa: storage account
-        :param azure.storage.file.models.File file: file to populate from
-        """
-        self._name = file.name
-        self._snapshot = None
-        self._lmt = file.properties.last_modified
-        self._size = file.properties.content_length
-        self._md5 = file.properties.content_settings.content_md5
-        self._mode = AzureStorageModes.File
-        self._client = sa.file_client
-
-
-class AzureDestinationPaths(object):
-    def __init__(self):
-        pass
-
-
-class FileDescriptor(object):
-    def __init__(self, filepath):
-        if filepath == '-':
-            self.stdin = True
-            self.path = None
-        else:
-            self.stdin = False
-            self.path = pathlib.Path(filepath)
-        self.size = None
-        self.hmac = None
-        self.md5 = None
-        self.bytes_xferred = 0
-
-
-class ReadFileDescriptor(FileDescriptor):
-    def __init__(self, filepath):
-        super().__init__(filepath)
-
-
-class WriteFileDescriptor(FileDescriptor):
-    def __init__(self, filepath):
-        super().__init__(filepath)
diff --git a/blobxfer/models/__init__.py b/blobxfer/models/__init__.py
new file mode 100644
index 0000000..82ac224
--- /dev/null
+++ b/blobxfer/models/__init__.py
@@ -0,0 +1,145 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import fnmatch
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+# local imports
+
+
+class _BaseSourcePaths(object):
+    """Base Source Paths"""
+    def __init__(self):
+        # type: (_BaseSourcePaths) -> None
+        """Ctor for _BaseSourcePaths
+        :param _BaseSourcePaths self: this
+        """
+        self._include = None
+        self._exclude = None
+        self._paths = []
+
+    @property
+    def paths(self):
+        # type: (_BaseSourcePaths) -> List[pathlib.Path]
+        """Stored paths
+        :param _BaseSourcePaths self: this
+        :rtype: list
+        :return: list of pathlib.Path
+        """
+        return self._paths
+
+    def add_include(self, incl):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add an include
+        :param _BaseSourcePaths self: this
+        :param str incl: include filter
+        """
+        if self._include is None:
+            self._include = [incl]
+        else:
+            self._include.append(incl)
+
+    def add_includes(self, includes):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of includes
+        :param _BaseSourcePaths self: this
+        :param list includes: list of includes
+        """
+        if not isinstance(includes, list):
+            raise ValueError('includes is not of type list')
+        if self._include is None:
+            self._include = includes
+        else:
+            self._include.extend(includes)
+
+    def add_exclude(self, excl):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add an exclude
+        :param _BaseSourcePaths self: this
+        :param str excl: exclude filter
+        """
+        if self._exclude is None:
+            self._exclude = [excl]
+        else:
+            self._exclude.append(excl)
+
+    def add_excludes(self, excludes):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of excludes
+        :param _BaseSourcePaths self: this
+        :param list excludes: list of excludes
+        """
+        if not isinstance(excludes, list):
+            raise ValueError('excludes is not of type list')
+        if self._exclude is None:
+            self._exclude = excludes
+        else:
+            self._exclude.extend(excludes)
+
+    def add_path(self, path):
+        # type: (_BaseSourcePaths, str) -> None
+        """Add a local path
+        :param _BaseSourcePaths self: this
+        :param str path: path to add
+        """
+        if isinstance(path, pathlib.Path):
+            self._paths.append(path)
+        else:
+            self._paths.append(pathlib.Path(path))
+
+    def add_paths(self, paths):
+        # type: (_BaseSourcePaths, list) -> None
+        """Add a list of local paths
+        :param _BaseSourcePaths self: this
+        :param list paths: paths to add
+        """
+        for path in paths:
+            self.add_path(path)
+
+    def _inclusion_check(self, path):
+        # type: (_BaseSourcePaths, pathlib.Path) -> bool
+        """Check file for inclusion against filters
+        :param _BaseSourcePaths self: this
+        :param pathlib.Path path: path to check
+        :rtype: bool
+        :return: if file should be included
+        """
+        _spath = str(path)
+        inc = True
+        if self._include is not None:
+            inc = any([fnmatch.fnmatch(_spath, x) for x in self._include])
+        if inc and self._exclude is not None:
+            inc = not any([fnmatch.fnmatch(_spath, x) for x in self._exclude])
+        return inc
diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
new file mode 100644
index 0000000..57d1f38
--- /dev/null
+++ b/blobxfer/models/azure.py
@@ -0,0 +1,460 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import enum
+# non-stdlib imports
+from azure.storage.blob.models import _BlobTypes as BlobTypes
+# local imports
+import blobxfer.models
+import blobxfer.operations.azure.blob
+import blobxfer.operations.azure.blob.append
+import blobxfer.operations.azure.blob.block
+import blobxfer.operations.azure.blob.page
+import blobxfer.operations.azure.file
+
+
+# enums
+class StorageModes(enum.Enum):
+    Auto = 10
+    Append = 20
+    Block = 30
+    File = 40
+    Page = 50
+
+
+class StorageCredentials(object):
+    """Azure Storage Credentials"""
+    def __init__(self):
+        # type: (StorageCredentials) -> None
+        """Ctor for StorageCredentials"""
+        self._storage_accounts = {}
+
+    def add_storage_account(self, name, key, endpoint):
+        # type: (StorageCredentials, str, str, str) -> None
+        """Add a storage account
+        :param StorageCredentials self: this
+        :param str name: name of storage account to store
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        if name in self._storage_accounts:
+            raise ValueError(
+                '{} already exists in storage accounts'.format(name))
+        self._storage_accounts[name] = StorageAccount(name, key, endpoint)
+
+    def get_storage_account(self, name):
+        # type: (StorageCredentials, str) -> StorageAccount
+        """Get storage account details
+        :param StorageCredentials self: this
+        :param str name: name of storage account to retrieve
+        :rtype: StorageAccount
+        :return: storage account details
+        """
+        return self._storage_accounts[name]
+
+
+class StorageAccount(object):
+    """Azure Storage Account"""
+    def __init__(self, name, key, endpoint):
+        # type: (StorageAccount, str, str, str) -> None
+        """Ctor for StorageAccount
+        :param str name: name of storage account
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        self._append_blob_client = None
+        self._block_blob_client = None
+        self._file_client = None
+        self._page_blob_client = None
+        self.name = name
+        self.key = key
+        self.endpoint = endpoint
+        self.is_sas = self._key_is_sas(self.key)
+        # normalize sas keys
+        if self.is_sas and self.key.startswith('?'):
+            self.key = self.key[1:]
+        self._create_clients()
+
+    @staticmethod
+    def _key_is_sas(key):
+        # type: (str) -> bool
+        """Determine if key is a sas
+        :param str key: key to parse
+        :rtype: bool
+        :return: if key is a sas
+        """
+        # keys starting with ? are sas keys as ? is not in the base-64
+        # character range
+        if key.startswith('?'):
+            return True
+        else:
+            # & is not in the base-64 character range, so technically
+            # the presence of this character means the key is a sas. however,
+            # perform a stronger check for the sig= parameter.
+            tmp = key.split('&')
+            if len(tmp) == 1:
+                return False
+            elif any(x.startswith('sig=') for x in tmp):
+                return True
+        return False
+
+    def _create_clients(self):
+        # type: (StorageAccount) -> None
+        """Create Azure Storage clients
+        :param StorageAccount self: this
+        """
+        self._append_blob_client = \
+            blobxfer.operations.azure.blob.append.create_client(self)
+        self._block_blob_client = \
+            blobxfer.operations.azure.blob.block.create_client(self)
+        self._file_client = blobxfer.operations.azure.file.create_client(self)
+        self._page_blob_client = \
+            blobxfer.operations.azure.blob.page.create_client(self)
+
+    @property
+    def append_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.AppendBlobService
+        """Get append blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.AppendBlobService
+        :return: append blob client
+        """
+        return self._append_blob_client
+
+    @property
+    def block_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.BlockBlobService
+        """Get block blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.BlockBlobService
+        :return: block blob client
+        """
+        return self._block_blob_client
+
+    @property
+    def file_client(self):
+        # type: (StorageAccount) -> azure.storage.file.FileService
+        """Get file client
+        :param StorageAccount self: this
+        :rtype: azure.storage.file.FileService
+        :return: file client
+        """
+        return self._file_client
+
+    @property
+    def page_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.PageBlobService
+        """Get page blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.PageBlobService
+        :return: page blob client
+        """
+        return self._page_blob_client
+
+
+class StorageEntity(object):
+    """Azure Storage Entity"""
+    def __init__(self, container, ed=None):
+        # type: (StorageEntity, str
+        #        blobxfer.models.crypto.EncryptionMetadata) -> None
+        """Ctor for StorageEntity
+        :param StorageEntity self: this
+        :param str container: container name
+        :param blobxfer.models.crypto.EncryptionMetadata ed:
+            encryption metadata
+        """
+        self._client = None
+        self._container = container
+        self._name = None
+        self._mode = None
+        self._lmt = None
+        self._size = None
+        self._snapshot = None
+        self._md5 = None
+        self._encryption = ed
+        self._vio = None
+        self.download = None
+
+    @property
+    def client(self):
+        # type: (StorageEntity) -> object
+        """Associated storage client
+        :param StorageEntity self: this
+        :rtype: object
+        :return: associated storage client
+        """
+        return self._client
+
+    @property
+    def container(self):
+        # type: (StorageEntity) -> str
+        """Container name
+        :param StorageEntity self: this
+        :rtype: str
+        :return: name of container or file share
+        """
+        return self._container
+
+    @property
+    def name(self):
+        # type: (StorageEntity) -> str
+        """Entity name
+        :param StorageEntity self: this
+        :rtype: str
+        :return: name of entity
+        """
+        return self._name
+
+    @property
+    def lmt(self):
+        # type: (StorageEntity) -> datetime.datetime
+        """Entity last modified time
+        :param StorageEntity self: this
+        :rtype: datetime.datetime
+        :return: LMT of entity
+        """
+        return self._lmt
+
+    @property
+    def size(self):
+        # type: (StorageEntity) -> int
+        """Entity size
+        :param StorageEntity self: this
+        :rtype: int
+        :return: size of entity
+        """
+        return self._size
+
+    @property
+    def snapshot(self):
+        # type: (StorageEntity) -> str
+        """Entity snapshot
+        :param StorageEntity self: this
+        :rtype: str
+        :return: snapshot of entity
+        """
+        return self._snapshot
+
+    @property
+    def md5(self):
+        # type: (StorageEntity) -> str
+        """Base64-encoded MD5
+        :param StorageEntity self: this
+        :rtype: str
+        :return: md5 of entity
+        """
+        return self._md5
+
+    @property
+    def mode(self):
+        # type: (StorageEntity) -> blobxfer.models.azure.StorageModes
+        """Entity mode (type)
+        :param StorageEntity self: this
+        :rtype: blobxfer.models.azure.StorageModes
+        :return: type of entity
+        """
+        return self._mode
+
+    @property
+    def is_encrypted(self):
+        # type: (StorageEntity) -> bool
+        """If data is encrypted
+        :param StorageEntity self: this
+        :rtype: bool
+        :return: if encryption metadata is present
+        """
+        return self._encryption is not None
+
+    @property
+    def encryption_metadata(self):
+        # type: (StorageEntity) ->
+        #        blobxfer.models.crypto.EncryptionMetadata
+        """Entity metadata (type)
+        :param StorageEntity self: this
+        :rtype: blobxfer.models.crypto.EncryptionMetadata
+        :return: encryption metadata of entity
+        """
+        return self._encryption
+
+    def populate_from_blob(self, sa, blob):
+        # type: (StorageEntity, blobxfer.models.azure.StorageAccount,
+        #        azure.storage.blob.models.Blob) -> None
+        """Populate properties from Blob
+        :param StorageEntity self: this
+        :param blobxfer.models.azure.StorageAccount sa: storage account
+        :param azure.storage.blob.models.Blob blob: blob to populate from
+        """
+        self._name = blob.name
+        self._snapshot = blob.snapshot
+        self._lmt = blob.properties.last_modified
+        self._size = blob.properties.content_length
+        self._md5 = blob.properties.content_settings.content_md5
+        if blob.properties.blob_type == BlobTypes.AppendBlob:
+            self._mode = StorageModes.Append
+            self._client = sa.append_blob_client
+        elif blob.properties.blob_type == BlobTypes.BlockBlob:
+            self._mode = StorageModes.Block
+            self._client = sa.block_blob_client
+        elif blob.properties.blob_type == BlobTypes.PageBlob:
+            self._mode = StorageModes.Page
+            self._client = sa.page_blob_client
+
+    def populate_from_file(self, sa, file):
+        # type: (StorageEntity, blobxfer.models.azure.StorageAccount,
+        #        azure.storage.file.models.File) -> None
+        """Populate properties from File
+        :param StorageEntity self: this
+        :param blobxfer.models.azure.StorageAccount sa: storage account
+        :param azure.storage.file.models.File file: file to populate from
+        """
+        self._name = file.name
+        self._snapshot = None
+        self._lmt = file.properties.last_modified
+        self._size = file.properties.content_length
+        self._md5 = file.properties.content_settings.content_md5
+        self._mode = StorageModes.File
+        self._client = sa.file_client
+
+
+class SourcePath(blobxfer.models._BaseSourcePaths):
+    """Azure Source Path"""
+    def __init__(self):
+        # type: (SourcePath) -> None
+        """Ctor for SourcePath
+        :param SourcePath self: this
+        """
+        super(SourcePath, self).__init__()
+        self._path_map = {}
+
+    def add_path_with_storage_account(self, remote_path, storage_account):
+        # type: (SourcePath, str, str) -> None
+        """Add a path with an associated storage account
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :param str storage_account: storage account to associate with path
+        """
+        if len(self._path_map) >= 1:
+            raise RuntimeError(
+                'cannot add multiple remote paths to SourcePath objects')
+        rpath = blobxfer.util.normalize_azure_path(remote_path)
+        self.add_path(rpath)
+        self._path_map[rpath] = storage_account
+
+    def lookup_storage_account(self, remote_path):
+        # type: (SourcePath, str) -> str
+        """Lookup the storage account associated with the remote path
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :rtype: str
+        :return: storage account associated with path
+        """
+        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
+
+    def files(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Generator of Azure remote files or blobs
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        if options.mode == blobxfer.models.azure.StorageModes.File:
+            for file in self._populate_from_list_files(
+                    creds, options, general_options):
+                yield file
+        else:
+            for blob in self._populate_from_list_blobs(
+                    creds, options, general_options):
+                yield blob
+
+    def _populate_from_list_files(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Internal generator for Azure remote files
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for file in blobxfer.operations.azure.file.list_files(
+                    sa.file_client, cont, dir, general_options.timeout_sec):
+                if blobxfer.models.crypto.EncryptionMetadata.\
+                        encryption_metadata_exists(file.metadata):
+                    ed = blobxfer.models.crypto.EncryptionMetadata()
+                    ed.convert_from_json(
+                        file.metadata, file.name, options.rsa_private_key)
+                else:
+                    ed = None
+                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                ase.populate_from_file(sa, file)
+                yield ase
+
+    def _populate_from_list_blobs(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Internal generator for Azure remote blobs
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for blob in blobxfer.operations.azure.blob.list_blobs(
+                    sa.block_blob_client, cont, dir, options.mode,
+                    general_options.timeout_sec):
+                if blobxfer.models.crypto.EncryptionMetadata.\
+                        encryption_metadata_exists(blob.metadata):
+                    ed = blobxfer.models.crypto.EncryptionMetadata()
+                    ed.convert_from_json(
+                        blob.metadata, blob.name, options.rsa_private_key)
+                else:
+                    ed = None
+                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                ase.populate_from_blob(sa, blob)
+                yield ase
diff --git a/blobxfer/crypto/models.py b/blobxfer/models/crypto.py
similarity index 84%
rename from blobxfer/crypto/models.py
rename to blobxfer/models/crypto.py
index e08f6a7..904da80 100644
--- a/blobxfer/crypto/models.py
+++ b/blobxfer/models/crypto.py
@@ -32,12 +32,18 @@
 # stdlib imports
 import base64
 import collections
+import enum
 import hashlib
 import hmac
 import json
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 # non-stdlib imports
 # local imports
-import blobxfer.crypto.operations
+import blobxfer.models.offload
+import blobxfer.operations.crypto
 import blobxfer.util
 
 # encryption constants
@@ -246,13 +252,13 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
         if rsaprivatekey is None:
             return
         # decrypt symmetric key
-        self._symkey = blobxfer.crypto.operations.\
+        self._symkey = blobxfer.operations.crypto.\
             rsa_decrypt_base64_encoded_key(
                 rsaprivatekey, self.wrapped_content_key.encrypted_key)
         # decrypt signing key, if it exists
         if blobxfer.util.is_not_empty(
                 self.wrapped_content_key.encrypted_authentication_key):
-            self._signkey = blobxfer.crypto.operations.\
+            self._signkey = blobxfer.operations.crypto.\
                 rsa_decrypt_base64_encoded_key(
                     rsaprivatekey,
                     self.wrapped_content_key.encrypted_authentication_key)
@@ -310,3 +316,59 @@ def initialize_hmac(self):
             return hmac.new(self._signkey, digestmod=hashlib.sha256)
         else:
             return None
+
+
+class CryptoAction(enum.Enum):
+    Encrypt = 1
+    Decrypt = 2
+
+
+class CryptoOffload(blobxfer.models.offload._MultiprocessOffload):
+    def __init__(self, num_workers):
+        # type: (CryptoOffload, int) -> None
+        """Ctor for Crypto Offload
+        :param CryptoOffload self: this
+        :param int num_workers: number of worker processes
+        """
+        super(CryptoOffload, self).__init__(
+            self._worker_process, num_workers, 'Crypto')
+
+    def _worker_process(self):
+        # type: (CryptoOffload) -> None
+        """Crypto worker
+        :param CryptoOffload self: this
+        """
+        while not self.terminated:
+            try:
+                inst = self._task_queue.get(True, 1)
+            except queue.Empty:
+                continue
+            if inst[0] == CryptoAction.Encrypt:
+                # TODO on upload
+                raise NotImplementedError()
+            elif inst[0] == CryptoAction.Decrypt:
+                final_path, offsets, symkey, iv, encdata = \
+                    inst[1], inst[2], inst[3], inst[4], inst[5]
+                data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
+                    symkey, iv, encdata, offsets.unpad)
+            self._done_cv.acquire()
+            self._done_queue.put((final_path, offsets, data))
+            self._done_cv.notify()
+            self._done_cv.release()
+
+    def add_decrypt_chunk(
+            self, final_path, offsets, symkey, iv, encdata):
+        # type: (CryptoOffload, str, blobxfer.models.download.Offsets,
+        #        bytes, bytes, bytes) -> None
+        """Add a chunk to decrypt
+        :param CryptoOffload self: this
+        :param str final_path: final path
+        :param blobxfer.models.download.Offsets offsets: offsets
+        :param bytes symkey: symmetric key
+        :param bytes iv: initialization vector
+        :param bytes encdata: encrypted data
+        """
+        self._task_queue.put(
+            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
+             encdata)
+        )
diff --git a/blobxfer/download/models.py b/blobxfer/models/download.py
similarity index 69%
rename from blobxfer/download/models.py
rename to blobxfer/models/download.py
index 7fb4fe8..d4d8f06 100644
--- a/blobxfer/download/models.py
+++ b/blobxfer/models/download.py
@@ -42,17 +42,16 @@
 import threading
 # non-stdlib imports
 # local imports
-import blobxfer.blob.operations
-import blobxfer.file.operations
-import blobxfer.crypto.models
+import blobxfer.models.options
+import blobxfer.models.crypto
 import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
 
 # named tuples
-DownloadOffsets = collections.namedtuple(
-    'DownloadOffsets', [
+Offsets = collections.namedtuple(
+    'Offsets', [
         'chunk_num',
         'fd_start',
         'num_bytes',
@@ -71,19 +70,117 @@
 )
 
 
-class DownloadDescriptor(object):
+class LocalDestinationPath(object):
+    """Local Destination Path"""
+    def __init__(self, path=None):
+        # type: (LocalDestinationPath, str) -> None
+        """Ctor for LocalDestinationPath
+        :param LocalDestinationPath self: this
+        :param str path: path
+        """
+        self._is_dir = None
+        if path is not None:
+            self.path = path
+
+    @property
+    def path(self):
+        # type: (LocalDestinationPath) -> pathlib.Path
+        """Path property
+        :param LocalDestinationPath self: this
+        :rtype: pathlib.Path
+        :return: local destination path
+        """
+        return self._path
+
+    @path.setter
+    def path(self, value):
+        # type: (LocalDestinationPath, str) -> None
+        """Path property setter
+        :param LocalDestinationPath self: this
+        :param str value: value to set path to
+        """
+        self._path = pathlib.Path(value)
+
+    @property
+    def is_dir(self):
+        # type: (LocalDestinationPath) -> bool
+        """is_dir property
+        :param LocalDestinationPath self: this
+        :rtype: bool
+        :return: if local destination path is a directory
+        """
+        return self._is_dir
+
+    @is_dir.setter
+    def is_dir(self, value):
+        # type: (LocalDestinationPath, bool) -> None
+        """is_dir property setter
+        :param LocalDestinationPath self: this
+        :param bool value: value to set is_dir to
+        """
+        self._is_dir = value
+
+    def ensure_path_exists(self):
+        # type: (LocalDestinationPath) -> None
+        """Ensure path exists
+        :param LocalDestinationPath self: this
+        """
+        if self._is_dir is None:
+            raise RuntimeError('is_dir not set')
+        if self._is_dir:
+            self._path.mkdir(mode=0o750, parents=True, exist_ok=True)
+        else:
+            if self._path.exists() and self._path.is_dir():
+                raise RuntimeError(
+                    ('destination path {} already exists and is a '
+                     'directory').format(self._path))
+            else:
+                # ensure parent path exists and is created
+                self._path.parent.mkdir(
+                    mode=0o750, parents=True, exist_ok=True)
+
+
+class Specification(object):
+    """Download Specification"""
+    def __init__(
+            self, download_options, skip_on_options, local_destination_path):
+        # type: (Specification, blobxfer.models.options.Download,
+        #        blobxfer.models.options.SkipOn, LocalDestinationPath) -> None
+        """Ctor for Specification
+        :param DownloadSepcification self: this
+        :param blobxfer.models.options.Download download_options:
+            download options
+        :param blobxfer.models.options.SkipOn skip_on_options: skip on options
+        :param LocalDestinationPath local_destination_path: local dest path
+        """
+        self.options = download_options
+        self.skip_on = skip_on_options
+        self.destination = local_destination_path
+        self.sources = []
+
+    def add_azure_source_path(self, source):
+        # type: (Specification, AzureSourcePath) -> None
+        """Add an Azure Source Path
+        :param DownloadSepcification self: this
+        :param AzureSourcePath source: Azure source path to add
+        """
+        self.sources.append(source)
+
+
+class Descriptor(object):
     """Download Descriptor"""
 
-    _AES_BLOCKSIZE = blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES
+    _AES_BLOCKSIZE = blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES
 
     def __init__(self, lpath, ase, options):
-        # type: (DownloadDescriptior, pathlib.Path, AzureStorageEntity,
-        #        DownloadOptions) -> None
-        """Ctor for DownloadDescriptor
-        :param DownloadDescriptor self: this
+        # type: (DownloadDescriptior, pathlib.Path,
+        #        blobxfer.models.azure.StorageEntity,
+        #        blobxfer.models.options.Download) -> None
+        """Ctor for Descriptor
+        :param Descriptor self: this
         :param pathlib.Path lpath: local path
-        :param AzureStorageEntity ase: Azure Storage Entity
-        :param DownloadOptions options: download options
+        :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
+        :param blobxfer.models.options.Download options: download options
         """
         self.final_path = lpath
         # create path holding the temporary file to download to
@@ -114,19 +211,19 @@ def __init__(self, lpath, ase, options):
 
     @property
     def entity(self):
-        # type: (DownloadDescriptor) -> AzureStorageEntity
-        """Get linked AzureStorageEntity
-        :param DownloadDescriptor self: this
-        :rtype: AzureStorageEntity
-        :return: AzureStorageEntity
+        # type: (Descriptor) -> blobxfer.models.azure.StorageEntity
+        """Get linked blobxfer.models.azure.StorageEntity
+        :param Descriptor self: this
+        :rtype: blobxfer.models.azure.StorageEntity
+        :return: blobxfer.models.azure.StorageEntity
         """
         return self._ase
 
     @property
     def must_compute_md5(self):
-        # type: (DownloadDescriptor) -> bool
+        # type: (Descriptor) -> bool
         """Check if MD5 must be computed
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         :rtype: bool
         :return: if MD5 must be computed
         """
@@ -134,9 +231,9 @@ def must_compute_md5(self):
 
     @property
     def all_operations_completed(self):
-        # type: (DownloadDescriptor) -> bool
+        # type: (Descriptor) -> bool
         """All operations are completed
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         :rtype: bool
         :return: if all operations completed
         """
@@ -145,19 +242,19 @@ def all_operations_completed(self):
                     len(self._unchecked_chunks) == 0)
 
     def dec_outstanding_operations(self):
-        # type: (DownloadDescriptor) -> None
+        # type: (Descriptor) -> None
         """Decrement outstanding operations (and increment completed ops)
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         """
         with self._meta_lock:
             self._outstanding_ops -= 1
             self._completed_ops += 1
 
     def _initialize_integrity_checkers(self, options):
-        # type: (DownloadDescriptor, DownloadOptions) -> None
+        # type: (Descriptor, blobxfer.models.options.Download) -> None
         """Initialize file integrity checkers
-        :param DownloadDescriptor self: this
-        :param DownloadOptions options: download options
+        :param Descriptor self: this
+        :param blobxfer.models.options.Download options: download options
         """
         if self._ase.is_encrypted:
             # ensure symmetric key exists
@@ -171,9 +268,9 @@ def _initialize_integrity_checkers(self, options):
             self.md5 = blobxfer.util.new_md5_hasher()
 
     def _allocate_disk_space(self):
-        # type: (DownloadDescriptor, int) -> None
+        # type: (Descriptor, int) -> None
         """Perform file allocation (possibly sparse)
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         :param int size: size
         """
         size = self._ase.size
@@ -201,10 +298,10 @@ def _allocate_disk_space(self):
                     fd.write(b'\0')
 
     def cleanup_all_temporary_files(self):
-        # type: (DownloadDescriptor) -> None
+        # type: (Descriptor) -> None
         """Cleanup all temporary files in case of an exception or interrupt.
         This function is not thread-safe.
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         """
         # delete local file
         try:
@@ -221,10 +318,10 @@ def cleanup_all_temporary_files(self):
                     pass
 
     def next_offsets(self):
-        # type: (DownloadDescriptor) -> DownloadOffsets
+        # type: (Descriptor) -> Offsets
         """Retrieve the next offsets
-        :param DownloadDescriptor self: this
-        :rtype: DownloadOffsets
+        :param Descriptor self: this
+        :rtype: Offsets
         :return: download offsets
         """
         with self._meta_lock:
@@ -256,7 +353,7 @@ def next_offsets(self):
                 unpad = True
             else:
                 unpad = False
-            return DownloadOffsets(
+            return Offsets(
                 chunk_num=chunk_num,
                 fd_start=fd_start,
                 num_bytes=chunk,
@@ -266,10 +363,10 @@ def next_offsets(self):
             )
 
     def _postpone_integrity_check(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        # type: (Descriptor, Offsets, bytes) -> None
         """Postpone integrity check for chunk
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
+        :param Descriptor self: this
+        :param Offsets offsets: download offsets
         :param bytes data: data
         """
         if self.must_compute_md5:
@@ -297,10 +394,10 @@ def _postpone_integrity_check(self, offsets, data):
             self._unchecked_chunks[offsets.chunk_num] = unchecked
 
     def perform_chunked_integrity_check(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        # type: (Descriptor, Offsets, bytes) -> None
         """Hash data against stored MD5 hasher safely
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
+        :param Descriptor self: this
+        :param Offsets offsets: download offsets
         :param bytes data: data
         """
         self_check = False
@@ -335,10 +432,10 @@ def perform_chunked_integrity_check(self, offsets, data):
             self._postpone_integrity_check(offsets, data)
 
     def write_data(self, offsets, data):
-        # type: (DownloadDescriptor, DownloadOffsets, bytes) -> None
+        # type: (Descriptor, Offsets, bytes) -> None
         """Postpone integrity check for chunk
-        :param DownloadDescriptor self: this
-        :param DownloadOffsets offsets: download offsets
+        :param Descriptor self: this
+        :param Offsets offsets: download offsets
         :param bytes data: data
         """
         with self.local_path.open('r+b') as fd:
@@ -346,9 +443,9 @@ def write_data(self, offsets, data):
             fd.write(data)
 
     def finalize_file(self):
-        # type: (DownloadDescriptor) -> None
+        # type: (Descriptor) -> None
         """Finalize file download
-        :param DownloadDescriptor self: this
+        :param Descriptor self: this
         """
         # check final file integrity
         check = False
diff --git a/blobxfer/md5.py b/blobxfer/models/md5.py
similarity index 64%
rename from blobxfer/md5.py
rename to blobxfer/models/md5.py
index 1c403cc..f8c1d3a 100644
--- a/blobxfer/md5.py
+++ b/blobxfer/models/md5.py
@@ -36,52 +36,15 @@
     import Queue as queue
 # non-stdlib imports
 # local imports
-import blobxfer.download
-import blobxfer.models
-import blobxfer.offload
-import blobxfer.util
+import blobxfer.models.azure
+import blobxfer.models.offload
+import blobxfer.operations.md5
 
 # create logger
 logger = logging.getLogger(__name__)
 
 
-def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
-    # type: (str, bool, int) -> str
-    """Compute MD5 hash for file and encode as Base64
-    :param str filename: file to compute MD5 for
-    :param bool pagealign: page align data
-    :param int blocksize: block size
-    :rtype: str
-    :return: MD5 for file encoded as Base64
-    """
-    hasher = blobxfer.util.new_md5_hasher()
-    with open(filename, 'rb') as filedesc:
-        while True:
-            buf = filedesc.read(blocksize)
-            if not buf:
-                break
-            buflen = len(buf)
-            if pagealign and buflen < blocksize:
-                aligned = blobxfer.util.page_align_content_length(buflen)
-                if aligned != buflen:
-                    buf = buf.ljust(aligned, b'\0')
-            hasher.update(buf)
-        return blobxfer.util.base64_encode_as_string(hasher.digest())
-
-
-def compute_md5_for_data_asbase64(data):
-    # type: (obj) -> str
-    """Compute MD5 hash for bits and encode as Base64
-    :param any data: data to compute MD5 for
-    :rtype: str
-    :return: MD5 for data
-    """
-    hasher = blobxfer.util.new_md5_hasher()
-    hasher.update(data)
-    return blobxfer.util.base64_encode_as_string(hasher.digest())
-
-
-class LocalFileMd5Offload(blobxfer.offload._MultiprocessOffload):
+class LocalFileMd5Offload(blobxfer.models.offload._MultiprocessOffload):
     """LocalFileMd5Offload"""
     def __init__(self, num_workers):
         # type: (LocalFileMd5Offload, int) -> None
@@ -102,7 +65,8 @@ def _worker_process(self):
                 filename, remote_md5, pagealign = self._task_queue.get(True, 1)
             except queue.Empty:
                 continue
-            md5 = compute_md5_for_file_asbase64(filename, pagealign)
+            md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64(
+                filename, pagealign)
             logger.debug('MD5: {} <L..R> {} {}'.format(
                 md5, remote_md5, filename))
             self._done_cv.acquire()
@@ -112,14 +76,14 @@ def _worker_process(self):
 
     def add_localfile_for_md5_check(self, filename, remote_md5, mode):
         # type: (LocalFileMd5Offload, str, str,
-        #        blobxfer.models.AzureStorageModes) -> None
+        #        blobxfer.models.azure.StorageModes) -> None
         """Add a local file to MD5 check queue
         :param LocalFileMd5Offload self: this
         :param str filename: file to compute MD5 for
         :param str remote_md5: remote MD5 to compare against
-        :param blobxfer.models.AzureStorageModes mode: mode
+        :param blobxfer.models.azure.StorageModes mode: mode
         """
-        if mode == blobxfer.models.AzureStorageModes.Page:
+        if mode == blobxfer.models.azure.StorageModes.Page:
             pagealign = True
         else:
             pagealign = False
diff --git a/blobxfer/offload.py b/blobxfer/models/offload.py
similarity index 100%
rename from blobxfer/offload.py
rename to blobxfer/models/offload.py
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
new file mode 100644
index 0000000..f7c9f6f
--- /dev/null
+++ b/blobxfer/models/options.py
@@ -0,0 +1,147 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import collections
+import logging
+import multiprocessing
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+# local imports
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+# named tuples
+VectoredIo = collections.namedtuple(
+    'VectoredIoOptions', [
+        'stripe_chunk_size_bytes',
+        'multi_storage_account_distribution_mode',
+    ]
+)
+SkipOn = collections.namedtuple(
+    'SkipOn', [
+        'filesize_match',
+        'lmt_ge',
+        'md5_match',
+    ]
+)
+Upload = collections.namedtuple(
+    'Upload', [
+        'chunk_size_bytes',
+        'delete_extraneous_destination',
+        'mode',
+        'overwrite',
+        'recursive',
+        'rsa_private_key',
+        'rsa_public_key',
+        'store_file_attributes',
+        'store_file_md5',
+        'strip_components',
+        'vectored_io',
+        'split_size_bytes',
+    ]
+)
+Download = collections.namedtuple(
+    'Download', [
+        'check_file_md5',
+        'chunk_size_bytes',
+        'delete_extraneous_destination',
+        'mode',
+        'overwrite',
+        'recursive',
+        'restore_file_attributes',
+        'rsa_private_key',
+    ]
+)
+SyncCopy = collections.namedtuple(
+    'SyncCopy', [
+        'chunk_size_bytes',
+        'mode',
+        'overwrite',
+    ]
+)
+
+
+class Concurrency(object):
+    """Concurrency Options"""
+    def __init__(self, crypto_processes, md5_processes, transfer_threads):
+        """Ctor for Concurrency Options
+        :param Concurrency self: this
+        :param int crypto_processes: number of crypto procs
+        :param int md5_processes: number of md5 procs
+        :param int transfer_threads: number of transfer threads
+        """
+        self.crypto_processes = crypto_processes
+        self.md5_processes = md5_processes
+        self.transfer_threads = transfer_threads
+        # allow crypto processes to be zero (which will inline crypto
+        # routines with main process)
+        if self.crypto_processes is None or self.crypto_processes < 1:
+            self.crypto_processes = 0
+        if self.md5_processes is None or self.md5_processes < 1:
+            self.md5_processes = multiprocessing.cpu_count() // 2
+        if self.md5_processes < 1:
+            self.md5_processes = 1
+        if self.transfer_threads is None or self.transfer_threads < 1:
+            self.transfer_threads = multiprocessing.cpu_count() * 3
+            # cap maximum number of threads from cpu count to 24
+            if self.transfer_threads > 24:
+                self.transfer_threads = 24
+
+
+class General(object):
+    """General Options"""
+    def __init__(
+            self, concurrency, progress_bar=True, resume_file=None,
+            timeout_sec=None, verbose=False):
+        """Ctor for General Options
+        :param General self: this
+        :param Concurrency concurrency: concurrency options
+        :param bool progress_bar: progress bar
+        :param str resume_file: resume file
+        :param int timeout_sec: timeout in seconds
+        :param bool verbose: verbose output
+        """
+        if concurrency is None:
+            raise ValueError('concurrency option is unspecified')
+        self.concurrency = concurrency
+        self.progress_bar = progress_bar
+        if blobxfer.util.is_not_empty(resume_file):
+            self.resume_file = pathlib.Path(resume_file)
+        else:
+            self.resume_file = None
+        self.timeout_sec = timeout_sec
+        self.verbose = verbose
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
new file mode 100644
index 0000000..607b001
--- /dev/null
+++ b/blobxfer/models/upload.py
@@ -0,0 +1,75 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import collections
+import logging
+import os
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+# local imports
+import blobxfer.models
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+LocalPath = collections.namedtuple(
+    'LocalPath', [
+        'parent_path',
+        'relative_path',
+    ]
+)
+
+
+class LocalSourcePaths(blobxfer.models._BaseSourcePaths):
+    """Local Source Paths"""
+    def files(self):
+        # type: (LocalSourcePaths) -> LocalPath
+        """Generator for files in paths
+        :param LocalSourcePaths self: this
+        :rtype: LocalPath
+        :return: LocalPath
+        """
+        for _path in self._paths:
+            _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
+            _expath = pathlib.Path(_ppath)
+            for entry in blobxfer.util.scantree(_ppath):
+                _rpath = pathlib.Path(entry.path).relative_to(_ppath)
+                if not self._inclusion_check(_rpath):
+                    logger.debug(
+                        'skipping file {} due to filters'.format(_rpath))
+                    continue
+                yield LocalPath(parent_path=_expath, relative_path=_rpath)
diff --git a/blobxfer/operations.py b/blobxfer/operations.py
deleted file mode 100644
index 82e4024..0000000
--- a/blobxfer/operations.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright (c) Microsoft Corporation
-#
-# All rights reserved.
-#
-# MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# compat imports
-from __future__ import absolute_import, division, print_function
-from builtins import (  # noqa
-    bytes, dict, int, list, object, range, ascii, chr, hex, input,
-    next, oct, open, pow, round, super, filter, map, zip
-)
-# stdlib imports
-import logging
-# non-stdlib imports
-# local imports
-import blobxfer.models
-import blobxfer.blob.operations
-import blobxfer.file.operations
-import blobxfer.util
-
-# create logger
-logger = logging.getLogger(__name__)
-
-
-def ensure_local_destination(creds, spec):
-    # type: (blobxfer.models.AzureStorageCredentials,
-    #        blobxfer.models.DownloadSpecification) -> None
-    """Ensure a local destination path given a download spec
-    :param blobxfer.models.AzureStorageCredentials creds: creds
-    :param blobxfer.models.DownloadSpecification spec: download spec
-    """
-    # ensure destination path is writable given the source
-    if len(spec.sources) < 1:
-        raise RuntimeError('no sources to download from specified')
-    # set is_dir for destination
-    spec.destination.is_dir = True
-    if len(spec.sources) == 1:
-        # we need to query the source to see if this is a directory
-        rpath = str(spec.sources[0].paths[0])
-        cont, dir = blobxfer.util.explode_azure_path(rpath)
-        if not blobxfer.util.is_none_or_empty(dir):
-            sa = creds.get_storage_account(
-                spec.sources[0].lookup_storage_account(rpath))
-            if spec.options.mode == blobxfer.models.AzureStorageModes.File:
-                if blobxfer.file.operations.check_if_single_file(
-                        sa.file_client, cont, dir)[0]:
-                    spec.destination.is_dir = False
-            else:
-                if blobxfer.blob.operations.check_if_single_blob(
-                        sa.block_blob_client, cont, dir):
-                    spec.destination.is_dir = False
-    logger.debug('dest is_dir={} for {} specs'.format(
-        spec.destination.is_dir, len(spec.sources)))
-    # ensure destination path
-    spec.destination.ensure_path_exists()
-
-
-def file_chunks(fd, chunk_size):
-    # type: (FileDescriptor, int) -> bytes
-    """Generator for getting file chunks of a file
-    :param FileDescriptor fd: file descriptor
-    :param int chunk_size: the amount of data to read
-    :rtype: bytes
-    :return: file data
-    """
-    with fd.path.open('rb') as f:
-        while True:
-            data = f.read(chunk_size)
-            if not data:
-                break
-            yield data
-
-
-def read_file_chunk(fd, chunk_num, chunk_size):
-    # type: (FileDescriptor, int, int) -> bytes
-    """Read file chunk
-    :param FileDescriptor fd: file descriptor
-    :param int chunk_num: chunk number
-    :param int chunk_size: the amount of data to read
-    :rtype: bytes
-    :return: file data
-    """
-    offset = chunk_num * chunk_size
-    with fd.path.open('rb') as f:
-        f.seek(offset, 0)
-        return f.read(chunk_size)
-
-
-def write_file_chunk(fd, chunk_num, chunk_size, data):
-    # type: (FileDescriptor, int, int, bytes) -> None
-    """Write file chunk
-    :param FileDescriptor fd: file descriptor
-    :param int chunk_num: chunk number
-    :param int chunk_size: the amount of data to read
-    :rtype: bytes
-    :return: file data
-    """
-    offset = chunk_num * chunk_size
-    with fd.path.open('wb') as f:
-        f.seek(offset, 0)
-        f.write(data)
diff --git a/blobxfer/blob/__init__.py b/blobxfer/operations/__init__.py
similarity index 100%
rename from blobxfer/blob/__init__.py
rename to blobxfer/operations/__init__.py
diff --git a/blobxfer/blob/append/__init__.py b/blobxfer/operations/azure/__init__.py
similarity index 100%
rename from blobxfer/blob/append/__init__.py
rename to blobxfer/operations/azure/__init__.py
diff --git a/blobxfer/blob/operations.py b/blobxfer/operations/azure/blob/__init__.py
similarity index 86%
rename from blobxfer/blob/operations.py
rename to blobxfer/operations/azure/blob/__init__.py
index 27c19c7..e0cf878 100644
--- a/blobxfer/blob/operations.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -34,7 +34,7 @@
 import azure.common
 import azure.storage.blob.models
 # local imports
-import blobxfer.models
+import blobxfer.models.azure
 import blobxfer.util
 
 # create logger
@@ -63,18 +63,18 @@ def check_if_single_blob(client, container, prefix, timeout=None):
 
 def list_blobs(client, container, prefix, mode, timeout=None):
     # type: (azure.storage.blob.BaseBlobService, str, str, int,
-    #        blobxfer.models.AzureStorageModes) ->
+    #        blobxfer.models.azure.StorageModes) ->
     #        azure.storage.blob.models.Blob
     """List blobs in path conforming to mode
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
-    :param blobxfer.models.AzureStorageModes mode: storage mode
+    :param blobxfer.models.azure.StorageModes mode: storage mode
     :param int timeout: timeout
     :rtype: azure.storage.blob.models.Blob
     :return: generator of blobs
     """
-    if mode == blobxfer.models.AzureStorageModes.File:
+    if mode == blobxfer.models.azure.StorageModes.File:
         raise RuntimeError('cannot list Azure Files from blob client')
     if blobxfer.util.blob_is_snapshot(prefix):
         snapshot = blobxfer.util.parse_blob_snapshot_parameter(prefix)
@@ -90,15 +90,15 @@ def list_blobs(client, container, prefix, mode, timeout=None):
         timeout=timeout,
     )
     for blob in blobs:
-        if (mode == blobxfer.models.AzureStorageModes.Append and
+        if (mode == blobxfer.models.azure.StorageModes.Append and
                 blob.properties.blob_type !=
                 azure.storage.blob.models._BlobTypes.AppendBlob):
             continue
-        elif (mode == blobxfer.models.AzureStorageModes.Block and
+        elif (mode == blobxfer.models.azure.StorageModes.Block and
                 blob.properties.blob_type !=
                 azure.storage.blob.models._BlobTypes.BlockBlob):
             continue
-        elif (mode == blobxfer.models.AzureStorageModes.Page and
+        elif (mode == blobxfer.models.azure.StorageModes.Page and
                 blob.properties.blob_type !=
                 azure.storage.blob.models._BlobTypes.PageBlob):
             continue
@@ -107,11 +107,11 @@ def list_blobs(client, container, prefix, mode, timeout=None):
 
 
 def get_blob_range(ase, offsets, timeout=None):
-    # type: (blobxfer.models.AzureStorageEntity,
-    #        blobxfer.download.models.DownloadOffsets, int) -> bytes
+    # type: (blobxfer.models.azure.StorageEntity,
+    #        blobxfer.models.download.Offsets, int) -> bytes
     """Retrieve blob range
-    :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
-    :param blobxfer.download.models.DownloadOffsets offsets: download offsets
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param blobxfer.models.download.Offsets offsets: download offsets
     :param int timeout: timeout
     :rtype: bytes
     :return: content for blob range
diff --git a/blobxfer/blob/append/operations.py b/blobxfer/operations/azure/blob/append.py
similarity index 93%
rename from blobxfer/blob/append/operations.py
rename to blobxfer/operations/azure/blob/append.py
index cbe4008..296e8c7 100644
--- a/blobxfer/blob/append/operations.py
+++ b/blobxfer/operations/azure/blob/append.py
@@ -40,9 +40,10 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.AzureStorageAccount) -> AppendBlobService
+    # type: (blobxfer.models.azure.StorageAccount) -> AppendBlobService
     """Create Append blob client
-    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :param blobxfer.models.azure.StorageAccount storage_account:
+        storage account
     :rtype: AppendBlobService
     :return: append blob service client
     """
diff --git a/blobxfer/blob/block/operations.py b/blobxfer/operations/azure/blob/block.py
similarity index 93%
rename from blobxfer/blob/block/operations.py
rename to blobxfer/operations/azure/blob/block.py
index c07fda7..c68ac32 100644
--- a/blobxfer/blob/block/operations.py
+++ b/blobxfer/operations/azure/blob/block.py
@@ -40,9 +40,10 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.AzureStorageAccount) -> BlockBlobService
+    # type: (blobxfer.models.azure.StorageAccount) -> BlockBlobService
     """Create block blob client
-    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :param blobxfer.models.azure.StorageAccount storage_account:
+        storage account
     :rtype: azure.storage.blob.BlockBlobService
     :return: block blob service client
     """
diff --git a/blobxfer/blob/page/operations.py b/blobxfer/operations/azure/blob/page.py
similarity index 93%
rename from blobxfer/blob/page/operations.py
rename to blobxfer/operations/azure/blob/page.py
index 359e207..8a64622 100644
--- a/blobxfer/blob/page/operations.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -40,9 +40,10 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.AzureStorageAccount) -> PageBlobService
+    # type: (blobxfer.models.azure.StorageAccount) -> PageBlobService
     """Create block blob client
-    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :param blobxfer.models.azure.StorageAccount storage_account:
+        storage account
     :rtype: PageBlobService
     :return: block blob service client
     """
diff --git a/blobxfer/file/operations.py b/blobxfer/operations/azure/file.py
similarity index 93%
rename from blobxfer/file/operations.py
rename to blobxfer/operations/azure/file.py
index eff3d01..e531fc2 100644
--- a/blobxfer/file/operations.py
+++ b/blobxfer/operations/azure/file.py
@@ -46,9 +46,10 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.AzureStorageAccount) -> FileService
+    # type: (blobxfer.models.azure.StorageAccount) -> FileService
     """Create file client
-    :param blobxfer.models.AzureStorageAccount storage_account: storage account
+    :param blobxfer.models.azure.StorageAccount storage_account:
+        storage account
     :rtype: FileService
     :return: file service client
     """
@@ -154,11 +155,11 @@ def list_files(client, fileshare, prefix, timeout=None):
 
 
 def get_file_range(ase, offsets, timeout=None):
-    # type: (blobxfer.models.AzureStorageEntity,
-    #        blobxfer.download.models.DownloadOffsets, int) -> bytes
+    # type: (blobxfer.models.azure.StorageEntity,
+    #        blobxfer.models.download.Offsets, int) -> bytes
     """Retrieve file range
-    :param blobxfer.models.AzureStorageEntity ase: AzureStorageEntity
-    :param blobxfer.download.models.DownloadOffsets offsets: download offsets
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param blobxfer.models.download.Offsets offsets: download offsets
     :param int timeout: timeout
     :rtype: bytes
     :return: content for file range
diff --git a/blobxfer/crypto/operations.py b/blobxfer/operations/crypto.py
similarity index 79%
rename from blobxfer/crypto/operations.py
rename to blobxfer/operations/crypto.py
index 2534148..44202c3 100644
--- a/blobxfer/crypto/operations.py
+++ b/blobxfer/operations/crypto.py
@@ -31,13 +31,8 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 import base64
-import enum
 import logging
 import os
-try:
-    import queue
-except ImportError:  # noqa
-    import Queue as queue
 # non-stdlib imports
 import cryptography.hazmat.backends
 import cryptography.hazmat.primitives.asymmetric.padding
@@ -50,7 +45,7 @@
 import cryptography.hazmat.primitives.padding
 import cryptography.hazmat.primitives.serialization
 # local imports
-import blobxfer.offload
+import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
@@ -216,58 +211,3 @@ def aes_cbc_encrypt_data(symkey, iv, data, pad):
         return cipher.update(pkcs7_pad(data)) + cipher.finalize()
     else:
         return cipher.update(data) + cipher.finalize()
-
-
-class CryptoAction(enum.Enum):
-    Encrypt = 1
-    Decrypt = 2
-
-
-class CryptoOffload(blobxfer.offload._MultiprocessOffload):
-    def __init__(self, num_workers):
-        # type: (CryptoOffload, int) -> None
-        """Ctor for Crypto Offload
-        :param CryptoOffload self: this
-        :param int num_workers: number of worker processes
-        """
-        super(CryptoOffload, self).__init__(
-            self._worker_process, num_workers, 'Crypto')
-
-    def _worker_process(self):
-        # type: (CryptoOffload) -> None
-        """Crypto worker
-        :param CryptoOffload self: this
-        """
-        while not self.terminated:
-            try:
-                inst = self._task_queue.get(True, 1)
-            except queue.Empty:
-                continue
-            if inst[0] == CryptoAction.Encrypt:
-                # TODO on upload
-                raise NotImplementedError()
-            elif inst[0] == CryptoAction.Decrypt:
-                final_path, offsets, symkey, iv, encdata = \
-                    inst[1], inst[2], inst[3], inst[4], inst[5]
-                data = aes_cbc_decrypt_data(symkey, iv, encdata, offsets.unpad)
-            self._done_cv.acquire()
-            self._done_queue.put((final_path, offsets, data))
-            self._done_cv.notify()
-            self._done_cv.release()
-
-    def add_decrypt_chunk(
-            self, final_path, offsets, symkey, iv, encdata):
-        # type: (CryptoOffload, str, blobxfer.download.models.DownloadOffsets,
-        #        bytes, bytes, bytes) -> None
-        """Add a chunk to decrypt
-        :param CryptoOffload self: this
-        :param str final_path: final path
-        :param blobxfer.download.models.DownloadOffsets offsets: offsets
-        :param bytes symkey: symmetric key
-        :param bytes iv: initialization vector
-        :param bytes encdata: encrypted data
-        """
-        self._task_queue.put(
-            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
-             encdata)
-        )
diff --git a/blobxfer/download/operations.py b/blobxfer/operations/download.py
similarity index 83%
rename from blobxfer/download/operations.py
rename to blobxfer/operations/download.py
index b947bcf..7a843e7 100644
--- a/blobxfer/download/operations.py
+++ b/blobxfer/operations/download.py
@@ -46,13 +46,11 @@
 # non-stdlib imports
 import dateutil
 # local imports
-import blobxfer.crypto.models
-import blobxfer.crypto.operations
-import blobxfer.download.models
-import blobxfer.md5
-import blobxfer.operations
-import blobxfer.blob.operations
-import blobxfer.file.operations
+import blobxfer.models.crypto
+import blobxfer.models.md5
+import blobxfer.operations.azure.blob
+import blobxfer.operations.azure.file
+import blobxfer.operations.crypto
 import blobxfer.util
 
 # create logger
@@ -68,14 +66,14 @@ class DownloadAction(enum.Enum):
 class Downloader(object):
     """Downloader"""
     def __init__(self, general_options, creds, spec):
-        # type: (Downloader, blobxfer.models.GeneralOptions,
-        #        blobxfer.models.AzureStorageCredentials,
-        #        blobxfer.models.DownloadSpecification) -> None
+        # type: (Downloader, blobxfer.models.options.General,
+        #        blobxfer.models.azure.StorageCredentials,
+        #        blobxfer.models.download.Specification) -> None
         """Ctor for Downloader
         :param Downloader self: this
-        :param blobxfer.models.GeneralOptions general_options: general opts
-        :param blobxfer.models.AzureStorageCredentials creds: creds
-        :param blobxfer.models.DownloadSpecification spec: download spec
+        :param blobxfer.models.options.General general_options: general opts
+        :param blobxfer.models.azure.StorageCredentials creds: creds
+        :param blobxfer.models.download.Specification spec: download spec
         """
         self._all_remote_files_processed = False
         self._crypto_offload = None
@@ -123,13 +121,47 @@ def termination_check_md5(self):
                          len(self._md5_map) == 0 and
                          len(self._download_set) == 0))
 
+    @staticmethod
+    def ensure_local_destination(creds, spec):
+        # type: (blobxfer.models.azure.StorageCredentials,
+        #        blobxfer.models.download.Specification) -> None
+        """Ensure a local destination path given a download spec
+        :param blobxfer.models.azure.StorageCredentials creds: creds
+        :param blobxfer.models.download.Specification spec: download spec
+        """
+        # ensure destination path is writable given the source
+        if len(spec.sources) < 1:
+            raise RuntimeError('no sources to download from specified')
+        # set is_dir for destination
+        spec.destination.is_dir = True
+        if len(spec.sources) == 1:
+            # we need to query the source to see if this is a directory
+            rpath = str(spec.sources[0].paths[0])
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            if not blobxfer.util.is_none_or_empty(dir):
+                sa = creds.get_storage_account(
+                    spec.sources[0].lookup_storage_account(rpath))
+                if (spec.options.mode ==
+                        blobxfer.models.azure.StorageModes.File):
+                    if blobxfer.operations.azure.file.check_if_single_file(
+                            sa.file_client, cont, dir)[0]:
+                        spec.destination.is_dir = False
+                else:
+                    if blobxfer.operations.azure.blob.check_if_single_blob(
+                            sa.block_blob_client, cont, dir):
+                        spec.destination.is_dir = False
+        logger.debug('dest is_dir={} for {} specs'.format(
+            spec.destination.is_dir, len(spec.sources)))
+        # ensure destination path
+        spec.destination.ensure_path_exists()
+
     def _check_download_conditions(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
-        #        blobxfer.models.AzureStorageEntity) -> DownloadAction
+        #        blobxfer.models.azure.StorageEntity) -> DownloadAction
         """Check for download conditions
         :param Downloader self: this
         :param pathlib.Path lpath: local path
-        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
         :rtype: DownloadAction
         :return: download action
         """
@@ -151,7 +183,7 @@ def _check_download_conditions(self, lpath, rfile):
         dl_fs = None
         if self._spec.skip_on.filesize_match:
             lsize = lpath.stat().st_size
-            if rfile.mode == blobxfer.models.AzureStorageModes.Page:
+            if rfile.mode == blobxfer.models.azure.StorageModes.Page:
                 lsize = blobxfer.util.page_align_content_length(lsize)
             if rfile.size == lsize:
                 dl_fs = False
@@ -174,11 +206,11 @@ def _check_download_conditions(self, lpath, rfile):
 
     def _pre_md5_skip_on_check(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
-        #        blobxfer.models.AzureStorageEntity) -> None
+        #        blobxfer.models.azure.StorageEntity) -> None
         """Perform pre MD5 skip on check
         :param Downloader self: this
         :param pathlib.Path lpath: local path
-        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
         """
         # if encryption metadata is present, check for pre-encryption
         # md5 in blobxfer extensions
@@ -259,14 +291,14 @@ def _check_for_crypto_done(self):
 
     def _add_to_download_queue(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
-        #        blobxfer.models.AzureStorageEntity) -> None
+        #        blobxfer.models.azure.StorageEntity) -> None
         """Add remote file to download queue
         :param Downloader self: this
         :param pathlib.Path lpath: local path
-        :param blobxfer.models.AzureStorageEntity rfile: remote file
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
         """
         # prepare remote file for download
-        dd = blobxfer.download.models.DownloadDescriptor(
+        dd = blobxfer.models.download.Descriptor(
             lpath, rfile, self._spec.options)
         if dd.entity.is_encrypted:
             with self._download_lock:
@@ -330,11 +362,11 @@ def _worker_thread_download(self):
             if offsets is None:
                 continue
             # issue get range
-            if dd.entity.mode == blobxfer.models.AzureStorageModes.File:
-                data = blobxfer.file.operations.get_file_range(
+            if dd.entity.mode == blobxfer.models.azure.StorageModes.File:
+                data = blobxfer.operations.azure.file.get_file_range(
                     dd.entity, offsets, self._general_options.timeout_sec)
             else:
-                data = blobxfer.blob.operations.get_blob_range(
+                data = blobxfer.operations.azure.blob.get_blob_range(
                     dd.entity, offsets, self._general_options.timeout_sec)
             # accounting
             with self._download_lock:
@@ -342,7 +374,7 @@ def _worker_thread_download(self):
             # decrypt if necessary
             if dd.entity.is_encrypted:
                 # slice data to proper bounds
-                encdata = data[blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES:]
+                encdata = data[blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES:]
                 intdata = encdata
                 # get iv for chunk and compute hmac
                 if offsets.chunk_num == 0:
@@ -350,7 +382,7 @@ def _worker_thread_download(self):
                     # integrity check for first chunk must include iv
                     intdata = iv + data
                 else:
-                    iv = data[:blobxfer.crypto.models._AES256_BLOCKSIZE_BYTES]
+                    iv = data[:blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES]
                 # integrity check data
                 dd.perform_chunked_integrity_check(offsets, intdata)
                 # decrypt data
@@ -362,7 +394,7 @@ def _worker_thread_download(self):
                     # data will be completed once retrieved from crypto queue
                     continue
                 else:
-                    data = blobxfer.crypto.operations.aes_cbc_decrypt_data(
+                    data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
                         dd.entity.encryption_metadata.symmetric_key,
                         iv, encdata, offsets.unpad)
             elif dd.must_compute_md5:
@@ -372,14 +404,13 @@ def _worker_thread_download(self):
             self._complete_chunk_download(offsets, data, dd)
 
     def _complete_chunk_download(self, offsets, data, dd):
-        # type: (Downloader, blobxfer.download.models.DownloadOffsets, bytes,
-        #        blobxfer.models.download.DownloadDescriptor) -> None
+        # type: (Downloader, blobxfer.models.download.Offsets, bytes,
+        #        blobxfer.models.download.Descriptor) -> None
         """Complete chunk download
         :param Downloader self: this
-        :param blobxfer.download.models.DownloadOffsets offsets: offsets
+        :param blobxfer.models.download.Offsets offsets: offsets
         :param bytes data: data
-        :param blobxfer.models.download.DownloadDescriptor dd:
-            download descriptor
+        :param blobxfer.models.download.Descriptor dd: download descriptor
         """
         # write data to disk
         dd.write_data(offsets, data)
@@ -413,17 +444,18 @@ def _run(self):
         start_time = datetime.datetime.now(tz=dateutil.tz.tzlocal())
         logger.info('script start time: {0}'.format(start_time))
         # ensure destination path
-        blobxfer.operations.ensure_local_destination(self._creds, self._spec)
+        blobxfer.operations.download.Downloader.ensure_local_destination(
+            self._creds, self._spec)
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
         # initialize MD5 processes
-        self._md5_offload = blobxfer.md5.LocalFileMd5Offload(
+        self._md5_offload = blobxfer.models.md5.LocalFileMd5Offload(
             num_workers=self._general_options.concurrency.md5_processes)
         self._md5_offload.initialize_check_thread(
             self._check_for_downloads_from_md5)
         # initialize crypto processes
         if self._general_options.concurrency.crypto_processes > 0:
-            self._crypto_offload = blobxfer.crypto.operations.CryptoOffload(
+            self._crypto_offload = blobxfer.models.crypto.CryptoOffload(
                 num_workers=self._general_options.concurrency.crypto_processes)
             self._crypto_offload.initialize_check_thread(
                 self._check_for_crypto_done)
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
new file mode 100644
index 0000000..4a50d25
--- /dev/null
+++ b/blobxfer/operations/md5.py
@@ -0,0 +1,74 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import absolute_import, division, print_function
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip
+)
+# stdlib imports
+import logging
+# non-stdlib imports
+# local imports
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
+    # type: (str, bool, int) -> str
+    """Compute MD5 hash for file and encode as Base64
+    :param str filename: file to compute MD5 for
+    :param bool pagealign: page align data
+    :param int blocksize: block size
+    :rtype: str
+    :return: MD5 for file encoded as Base64
+    """
+    hasher = blobxfer.util.new_md5_hasher()
+    with open(filename, 'rb') as filedesc:
+        while True:
+            buf = filedesc.read(blocksize)
+            if not buf:
+                break
+            buflen = len(buf)
+            if pagealign and buflen < blocksize:
+                aligned = blobxfer.util.page_align_content_length(buflen)
+                if aligned != buflen:
+                    buf = buf.ljust(aligned, b'\0')
+            hasher.update(buf)
+        return blobxfer.util.base64_encode_as_string(hasher.digest())
+
+
+def compute_md5_for_data_asbase64(data):
+    # type: (obj) -> str
+    """Compute MD5 hash for bits and encode as Base64
+    :param any data: data to compute MD5 for
+    :rtype: str
+    :return: MD5 for data
+    """
+    hasher = blobxfer.util.new_md5_hasher()
+    hasher.update(data)
+    return blobxfer.util.base64_encode_as_string(hasher.digest())
diff --git a/blobxfer/util.py b/blobxfer/util.py
index eec47a9..ec85fe5 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -64,7 +64,7 @@ def setup_logger(logger):  # noqa
     logger.setLevel(logging.DEBUG)
     handler = logging.StreamHandler()
     formatter = logging.Formatter(
-        '%(asctime)sZ %(levelname)s %(name)s:%(funcName)s:%(lineno)d '
+        '%(asctime)s %(levelname)s %(name)s:%(funcName)s:%(lineno)d '
         '%(message)s')
     handler.setFormatter(formatter)
     logger.addHandler(handler)
diff --git a/cli/settings.py b/cli/settings.py
index 4da2500..b62de58 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -33,8 +33,10 @@
 import enum
 # non-stdlib imports
 # local imports
-import blobxfer.crypto.operations
-import blobxfer.models
+import blobxfer.models.azure
+import blobxfer.models.download
+import blobxfer.models.options
+import blobxfer.operations.crypto
 import blobxfer.util
 
 
@@ -215,13 +217,13 @@ def merge_settings(config, cli_options):
 
 
 def create_azure_storage_credentials(config):
-    # type: (dict) -> blobxfer.models.AzureStorageCredentials
-    """Create an AzureStorageCredentials object from configuration
+    # type: (dict) -> blobxfer.models.azure.StorageCredentials
+    """Create an Azure StorageCredentials object from configuration
     :param dict config: config dict
-    :rtype: blobxfer.models.AzureStorageCredentials
+    :rtype: blobxfer.models.azure.StorageCredentials
     :return: credentials object
     """
-    creds = blobxfer.models.AzureStorageCredentials()
+    creds = blobxfer.models.azure.StorageCredentials()
     endpoint = config['azure_storage']['endpoint']
     for name in config['azure_storage']['accounts']:
         key = config['azure_storage']['accounts'][name]
@@ -230,14 +232,14 @@ def create_azure_storage_credentials(config):
 
 
 def create_general_options(config):
-    # type: (dict) -> blobxfer.models.GeneralOptions
-    """Create a GeneralOptions object from configuration
+    # type: (dict) -> blobxfer.models.options.General
+    """Create a General Options object from configuration
     :param dict config: config dict
-    :rtype: blobxfer.models.GeneralOptions
+    :rtype: blobxfer.models.options.General
     :return: general options object
     """
-    return blobxfer.models.GeneralOptions(
-        concurrency=blobxfer.models.ConcurrencyOptions(
+    return blobxfer.models.options.General(
+        concurrency=blobxfer.models.options.Concurrency(
             crypto_processes=config['options']['crypto_processes'],
             md5_processes=config['options']['md5_processes'],
             transfer_threads=config['options']['transfer_threads'],
@@ -250,38 +252,38 @@ def create_general_options(config):
 
 
 def create_download_specifications(config):
-    # type: (dict) -> List[blobxfer.models.DownloadSpecification]
-    """Create a list of DownloadSpecification objects from configuration
+    # type: (dict) -> List[blobxfer.models.download.Specification]
+    """Create a list of Download Specification objects from configuration
     :param dict config: config dict
     :rtype: list
-    :return: list of DownloadSpecification objects
+    :return: list of Download Specification objects
     """
     specs = []
     for conf in config['download']:
         # create download options
         confmode = conf['options']['mode'].lower()
         if confmode == 'auto':
-            mode = blobxfer.models.AzureStorageModes.Auto
+            mode = blobxfer.models.azure.StorageModes.Auto
         elif confmode == 'append':
-            mode = blobxfer.models.AzureStorageModes.Append
+            mode = blobxfer.models.azure.StorageModes.Append
         elif confmode == 'block':
-            mode = blobxfer.models.AzureStorageModes.Block
+            mode = blobxfer.models.azure.StorageModes.Block
         elif confmode == 'file':
-            mode = blobxfer.models.AzureStorageModes.File
+            mode = blobxfer.models.azure.StorageModes.File
         elif confmode == 'page':
-            mode = blobxfer.models.AzureStorageModes.Page
+            mode = blobxfer.models.azure.StorageModes.Page
         else:
             raise ValueError('unknown mode: {}'.format(confmode))
         # load RSA private key PEM file if specified
         rpk = conf['options']['rsa_private_key']
         if blobxfer.util.is_not_empty(rpk):
             rpkp = conf['options']['rsa_private_key_passphrase']
-            rpk = blobxfer.crypto.operations.load_rsa_private_key_file(
+            rpk = blobxfer.operations.crypto.load_rsa_private_key_file(
                 rpk, rpkp)
         else:
             rpk = None
-        ds = blobxfer.models.DownloadSpecification(
-            download_options=blobxfer.models.DownloadOptions(
+        ds = blobxfer.models.download.Specification(
+            download_options=blobxfer.models.options.Download(
                 check_file_md5=conf['options']['check_file_md5'],
                 chunk_size_bytes=conf['options']['chunk_size_bytes'],
                 delete_extraneous_destination=conf[
@@ -293,12 +295,13 @@ def create_download_specifications(config):
                     'options']['restore_file_attributes'],
                 rsa_private_key=rpk,
             ),
-            skip_on_options=blobxfer.models.SkipOnOptions(
+            skip_on_options=blobxfer.models.options.SkipOn(
                 filesize_match=conf['options']['skip_on']['filesize_match'],
                 lmt_ge=conf['options']['skip_on']['lmt_ge'],
                 md5_match=conf['options']['skip_on']['md5_match'],
             ),
-            local_destination_path=blobxfer.models.LocalDestinationPath(
+            local_destination_path=blobxfer.models.download.
+            LocalDestinationPath(
                 conf['destination']
             )
         )
@@ -308,7 +311,7 @@ def create_download_specifications(config):
                 raise RuntimeError(
                     'invalid number of source pairs specified per entry')
             sa = next(iter(src))
-            asp = blobxfer.models.AzureSourcePath()
+            asp = blobxfer.models.azure.SourcePath()
             asp.add_path_with_storage_account(src[sa], sa)
             if blobxfer.util.is_not_empty(conf['include']):
                 asp.add_includes(conf['include'])
diff --git a/setup.py b/setup.py
index f6336db..7c61abf 100644
--- a/setup.py
+++ b/setup.py
@@ -45,18 +45,18 @@
     'azure-common==1.1.4',
     'azure-storage==0.34.0',
     'click==6.7',
-    'cryptography>=1.7.2',
+    'cryptography>=1.8.1',
     'future==0.16.0',
     'python-dateutil==2.6.0',
-    'ruamel.yaml==0.13.14',
+    'ruamel.yaml==0.14.5',
 ]
 
 if sys.version_info < (3, 4):
-    install_requires.append('enum34')
+    install_requires.append('enum34==1.1.6')
 
 if sys.version_info < (3, 5):
-    install_requires.append('pathlib2')
-    install_requires.append('scandir')
+    install_requires.append('pathlib2==2.2.1')
+    install_requires.append('scandir==1.5')
 
 setup(
     name='blobxfer',

From 2bcab1e605e43a51b0485a3713b97aac731f2a0e Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Wed, 5 Apr 2017 14:14:41 -0700
Subject: [PATCH 17/47] Fix tests to align with refactor

- Move some classes out of models to operations
---
 blobxfer/models/azure.py                      | 259 +-----------
 blobxfer/models/crypto.py                     |  63 +--
 blobxfer/models/download.py                   |   2 +-
 blobxfer/models/md5.py                        |  90 -----
 blobxfer/operations/azure/__init__.py         | 284 +++++++++++++
 blobxfer/operations/azure/blob/append.py      |   4 +-
 blobxfer/operations/azure/blob/block.py       |   4 +-
 blobxfer/operations/azure/blob/page.py        |   4 +-
 blobxfer/operations/azure/file.py             |   4 +-
 blobxfer/operations/crypto.py                 |  62 +++
 blobxfer/operations/download.py               |  18 +-
 blobxfer/operations/md5.py                    |  52 +++
 cli/settings.py                               |   9 +-
 setup.py                                      |  11 +-
 test_requirements.txt                         |   4 +-
 tests/test_blobxfer_models.py                 | 372 ------------------
 tests/test_blobxfer_models_azure.py           |  51 +++
 ...dels.py => test_blobxfer_models_crypto.py} |   6 +-
 ...ls.py => test_blobxfer_models_download.py} | 147 +++++--
 ...oad.py => test_blobxfer_models_offload.py} |   2 +-
 tests/test_blobxfer_models_options.py         |  82 ++++
 tests/test_blobxfer_models_upload.py          |  56 +++
 tests/test_blobxfer_operations.py             |  80 ----
 tests/test_blobxfer_operations_azure.py       | 161 ++++++++
 ...=> test_blobxfer_operations_azure_blob.py} |  16 +-
 ..._blobxfer_operations_azure_blob_append.py} |  10 +-
 ...t_blobxfer_operations_azure_blob_block.py} |  11 +-
 ...st_blobxfer_operations_azure_blob_page.py} |   9 +-
 ...=> test_blobxfer_operations_azure_file.py} |  10 +-
 ....py => test_blobxfer_operations_crypto.py} |   6 +-
 ...y => test_blobxfer_operations_download.py} | 204 +++++++---
 ...md5.py => test_blobxfer_operations_md5.py} |  30 +-
 32 files changed, 1077 insertions(+), 1046 deletions(-)
 delete mode 100644 blobxfer/models/md5.py
 delete mode 100644 tests/test_blobxfer_models.py
 create mode 100644 tests/test_blobxfer_models_azure.py
 rename tests/{test_blobxfer_crypto_models.py => test_blobxfer_models_crypto.py} (98%)
 rename tests/{test_blobxfer_download_models.py => test_blobxfer_models_download.py} (74%)
 rename tests/{test_blobxfer_offload.py => test_blobxfer_models_offload.py} (94%)
 create mode 100644 tests/test_blobxfer_models_options.py
 create mode 100644 tests/test_blobxfer_models_upload.py
 delete mode 100644 tests/test_blobxfer_operations.py
 create mode 100644 tests/test_blobxfer_operations_azure.py
 rename tests/{test_blobxfer_blob_operations.py => test_blobxfer_operations_azure_blob.py} (83%)
 rename tests/{test_blobxfer_blob_append_operations.py => test_blobxfer_operations_azure_blob_append.py} (71%)
 rename tests/{test_blobxfer_blob_block_operations.py => test_blobxfer_operations_azure_blob_block.py} (71%)
 rename tests/{test_blobxfer_blob_page_operations.py => test_blobxfer_operations_azure_blob_page.py} (74%)
 rename tests/{test_blobxfer_file_operations.py => test_blobxfer_operations_azure_file.py} (93%)
 rename tests/{test_blobxfer_crypto_operations.py => test_blobxfer_operations_crypto.py} (96%)
 rename tests/{test_blobxfer_download_operations.py => test_blobxfer_operations_download.py} (78%)
 rename tests/{test_blobxfer_md5.py => test_blobxfer_operations_md5.py} (72%)

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index 57d1f38..9f28ca3 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -34,12 +34,6 @@
 # non-stdlib imports
 from azure.storage.blob.models import _BlobTypes as BlobTypes
 # local imports
-import blobxfer.models
-import blobxfer.operations.azure.blob
-import blobxfer.operations.azure.blob.append
-import blobxfer.operations.azure.blob.block
-import blobxfer.operations.azure.blob.page
-import blobxfer.operations.azure.file
 
 
 # enums
@@ -51,136 +45,6 @@ class StorageModes(enum.Enum):
     Page = 50
 
 
-class StorageCredentials(object):
-    """Azure Storage Credentials"""
-    def __init__(self):
-        # type: (StorageCredentials) -> None
-        """Ctor for StorageCredentials"""
-        self._storage_accounts = {}
-
-    def add_storage_account(self, name, key, endpoint):
-        # type: (StorageCredentials, str, str, str) -> None
-        """Add a storage account
-        :param StorageCredentials self: this
-        :param str name: name of storage account to store
-        :param str key: storage key or sas
-        :param str endpoint: endpoint
-        """
-        if name in self._storage_accounts:
-            raise ValueError(
-                '{} already exists in storage accounts'.format(name))
-        self._storage_accounts[name] = StorageAccount(name, key, endpoint)
-
-    def get_storage_account(self, name):
-        # type: (StorageCredentials, str) -> StorageAccount
-        """Get storage account details
-        :param StorageCredentials self: this
-        :param str name: name of storage account to retrieve
-        :rtype: StorageAccount
-        :return: storage account details
-        """
-        return self._storage_accounts[name]
-
-
-class StorageAccount(object):
-    """Azure Storage Account"""
-    def __init__(self, name, key, endpoint):
-        # type: (StorageAccount, str, str, str) -> None
-        """Ctor for StorageAccount
-        :param str name: name of storage account
-        :param str key: storage key or sas
-        :param str endpoint: endpoint
-        """
-        self._append_blob_client = None
-        self._block_blob_client = None
-        self._file_client = None
-        self._page_blob_client = None
-        self.name = name
-        self.key = key
-        self.endpoint = endpoint
-        self.is_sas = self._key_is_sas(self.key)
-        # normalize sas keys
-        if self.is_sas and self.key.startswith('?'):
-            self.key = self.key[1:]
-        self._create_clients()
-
-    @staticmethod
-    def _key_is_sas(key):
-        # type: (str) -> bool
-        """Determine if key is a sas
-        :param str key: key to parse
-        :rtype: bool
-        :return: if key is a sas
-        """
-        # keys starting with ? are sas keys as ? is not in the base-64
-        # character range
-        if key.startswith('?'):
-            return True
-        else:
-            # & is not in the base-64 character range, so technically
-            # the presence of this character means the key is a sas. however,
-            # perform a stronger check for the sig= parameter.
-            tmp = key.split('&')
-            if len(tmp) == 1:
-                return False
-            elif any(x.startswith('sig=') for x in tmp):
-                return True
-        return False
-
-    def _create_clients(self):
-        # type: (StorageAccount) -> None
-        """Create Azure Storage clients
-        :param StorageAccount self: this
-        """
-        self._append_blob_client = \
-            blobxfer.operations.azure.blob.append.create_client(self)
-        self._block_blob_client = \
-            blobxfer.operations.azure.blob.block.create_client(self)
-        self._file_client = blobxfer.operations.azure.file.create_client(self)
-        self._page_blob_client = \
-            blobxfer.operations.azure.blob.page.create_client(self)
-
-    @property
-    def append_blob_client(self):
-        # type: (StorageAccount) -> azure.storage.blob.AppendBlobService
-        """Get append blob client
-        :param StorageAccount self: this
-        :rtype: azure.storage.blob.AppendBlobService
-        :return: append blob client
-        """
-        return self._append_blob_client
-
-    @property
-    def block_blob_client(self):
-        # type: (StorageAccount) -> azure.storage.blob.BlockBlobService
-        """Get block blob client
-        :param StorageAccount self: this
-        :rtype: azure.storage.blob.BlockBlobService
-        :return: block blob client
-        """
-        return self._block_blob_client
-
-    @property
-    def file_client(self):
-        # type: (StorageAccount) -> azure.storage.file.FileService
-        """Get file client
-        :param StorageAccount self: this
-        :rtype: azure.storage.file.FileService
-        :return: file client
-        """
-        return self._file_client
-
-    @property
-    def page_blob_client(self):
-        # type: (StorageAccount) -> azure.storage.blob.PageBlobService
-        """Get page blob client
-        :param StorageAccount self: this
-        :rtype: azure.storage.blob.PageBlobService
-        :return: page blob client
-        """
-        return self._page_blob_client
-
-
 class StorageEntity(object):
     """Azure Storage Entity"""
     def __init__(self, container, ed=None):
@@ -306,11 +170,11 @@ def encryption_metadata(self):
         return self._encryption
 
     def populate_from_blob(self, sa, blob):
-        # type: (StorageEntity, blobxfer.models.azure.StorageAccount,
+        # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
         #        azure.storage.blob.models.Blob) -> None
         """Populate properties from Blob
         :param StorageEntity self: this
-        :param blobxfer.models.azure.StorageAccount sa: storage account
+        :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.blob.models.Blob blob: blob to populate from
         """
         self._name = blob.name
@@ -329,11 +193,11 @@ def populate_from_blob(self, sa, blob):
             self._client = sa.page_blob_client
 
     def populate_from_file(self, sa, file):
-        # type: (StorageEntity, blobxfer.models.azure.StorageAccount,
+        # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
         #        azure.storage.file.models.File) -> None
         """Populate properties from File
         :param StorageEntity self: this
-        :param blobxfer.models.azure.StorageAccount sa: storage account
+        :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.file.models.File file: file to populate from
         """
         self._name = file.name
@@ -343,118 +207,3 @@ def populate_from_file(self, sa, file):
         self._md5 = file.properties.content_settings.content_md5
         self._mode = StorageModes.File
         self._client = sa.file_client
-
-
-class SourcePath(blobxfer.models._BaseSourcePaths):
-    """Azure Source Path"""
-    def __init__(self):
-        # type: (SourcePath) -> None
-        """Ctor for SourcePath
-        :param SourcePath self: this
-        """
-        super(SourcePath, self).__init__()
-        self._path_map = {}
-
-    def add_path_with_storage_account(self, remote_path, storage_account):
-        # type: (SourcePath, str, str) -> None
-        """Add a path with an associated storage account
-        :param SourcePath self: this
-        :param str remote_path: remote path
-        :param str storage_account: storage account to associate with path
-        """
-        if len(self._path_map) >= 1:
-            raise RuntimeError(
-                'cannot add multiple remote paths to SourcePath objects')
-        rpath = blobxfer.util.normalize_azure_path(remote_path)
-        self.add_path(rpath)
-        self._path_map[rpath] = storage_account
-
-    def lookup_storage_account(self, remote_path):
-        # type: (SourcePath, str) -> str
-        """Lookup the storage account associated with the remote path
-        :param SourcePath self: this
-        :param str remote_path: remote path
-        :rtype: str
-        :return: storage account associated with path
-        """
-        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
-
-    def files(self, creds, options, general_options):
-        # type: (SourcePath, StorageCredentials,
-        #        blobxfer.models.options.Download,
-        #        blobxfer.models.options.General) -> StorageEntity
-        """Generator of Azure remote files or blobs
-        :param SourcePath self: this
-        :param StorageCredentials creds: storage creds
-        :param blobxfer.models.options.Download options: download options
-        :param blobxfer.models.options.General general_options: general options
-        :rtype: StorageEntity
-        :return: Azure storage entity object
-        """
-        if options.mode == blobxfer.models.azure.StorageModes.File:
-            for file in self._populate_from_list_files(
-                    creds, options, general_options):
-                yield file
-        else:
-            for blob in self._populate_from_list_blobs(
-                    creds, options, general_options):
-                yield blob
-
-    def _populate_from_list_files(self, creds, options, general_options):
-        # type: (SourcePath, StorageCredentials,
-        #        blobxfer.models.options.Download,
-        #        blobxfer.models.options.General) -> StorageEntity
-        """Internal generator for Azure remote files
-        :param SourcePath self: this
-        :param StorageCredentials creds: storage creds
-        :param blobxfer.models.options.Download options: download options
-        :param blobxfer.models.options.General general_options: general options
-        :rtype: StorageEntity
-        :return: Azure storage entity object
-        """
-        for _path in self._paths:
-            rpath = str(_path)
-            cont, dir = blobxfer.util.explode_azure_path(rpath)
-            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
-            for file in blobxfer.operations.azure.file.list_files(
-                    sa.file_client, cont, dir, general_options.timeout_sec):
-                if blobxfer.models.crypto.EncryptionMetadata.\
-                        encryption_metadata_exists(file.metadata):
-                    ed = blobxfer.models.crypto.EncryptionMetadata()
-                    ed.convert_from_json(
-                        file.metadata, file.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_file(sa, file)
-                yield ase
-
-    def _populate_from_list_blobs(self, creds, options, general_options):
-        # type: (SourcePath, StorageCredentials,
-        #        blobxfer.models.options.Download,
-        #        blobxfer.models.options.General) -> StorageEntity
-        """Internal generator for Azure remote blobs
-        :param SourcePath self: this
-        :param StorageCredentials creds: storage creds
-        :param blobxfer.models.options.Download options: download options
-        :param blobxfer.models.options.General general_options: general options
-        :rtype: StorageEntity
-        :return: Azure storage entity object
-        """
-        for _path in self._paths:
-            rpath = str(_path)
-            cont, dir = blobxfer.util.explode_azure_path(rpath)
-            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
-            for blob in blobxfer.operations.azure.blob.list_blobs(
-                    sa.block_blob_client, cont, dir, options.mode,
-                    general_options.timeout_sec):
-                if blobxfer.models.crypto.EncryptionMetadata.\
-                        encryption_metadata_exists(blob.metadata):
-                    ed = blobxfer.models.crypto.EncryptionMetadata()
-                    ed.convert_from_json(
-                        blob.metadata, blob.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_blob(sa, blob)
-                yield ase
diff --git a/blobxfer/models/crypto.py b/blobxfer/models/crypto.py
index 904da80..91a2f4a 100644
--- a/blobxfer/models/crypto.py
+++ b/blobxfer/models/crypto.py
@@ -32,14 +32,9 @@
 # stdlib imports
 import base64
 import collections
-import enum
 import hashlib
 import hmac
 import json
-try:
-    import queue
-except ImportError:  # noqa
-    import Queue as queue
 # non-stdlib imports
 # local imports
 import blobxfer.models.offload
@@ -47,7 +42,7 @@
 import blobxfer.util
 
 # encryption constants
-_AES256_BLOCKSIZE_BYTES = 16
+AES256_BLOCKSIZE_BYTES = 16
 
 # named tuples
 EncryptionBlobxferExtensions = collections.namedtuple(
@@ -316,59 +311,3 @@ def initialize_hmac(self):
             return hmac.new(self._signkey, digestmod=hashlib.sha256)
         else:
             return None
-
-
-class CryptoAction(enum.Enum):
-    Encrypt = 1
-    Decrypt = 2
-
-
-class CryptoOffload(blobxfer.models.offload._MultiprocessOffload):
-    def __init__(self, num_workers):
-        # type: (CryptoOffload, int) -> None
-        """Ctor for Crypto Offload
-        :param CryptoOffload self: this
-        :param int num_workers: number of worker processes
-        """
-        super(CryptoOffload, self).__init__(
-            self._worker_process, num_workers, 'Crypto')
-
-    def _worker_process(self):
-        # type: (CryptoOffload) -> None
-        """Crypto worker
-        :param CryptoOffload self: this
-        """
-        while not self.terminated:
-            try:
-                inst = self._task_queue.get(True, 1)
-            except queue.Empty:
-                continue
-            if inst[0] == CryptoAction.Encrypt:
-                # TODO on upload
-                raise NotImplementedError()
-            elif inst[0] == CryptoAction.Decrypt:
-                final_path, offsets, symkey, iv, encdata = \
-                    inst[1], inst[2], inst[3], inst[4], inst[5]
-                data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
-                    symkey, iv, encdata, offsets.unpad)
-            self._done_cv.acquire()
-            self._done_queue.put((final_path, offsets, data))
-            self._done_cv.notify()
-            self._done_cv.release()
-
-    def add_decrypt_chunk(
-            self, final_path, offsets, symkey, iv, encdata):
-        # type: (CryptoOffload, str, blobxfer.models.download.Offsets,
-        #        bytes, bytes, bytes) -> None
-        """Add a chunk to decrypt
-        :param CryptoOffload self: this
-        :param str final_path: final path
-        :param blobxfer.models.download.Offsets offsets: offsets
-        :param bytes symkey: symmetric key
-        :param bytes iv: initialization vector
-        :param bytes encdata: encrypted data
-        """
-        self._task_queue.put(
-            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
-             encdata)
-        )
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index d4d8f06..150e07e 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -170,7 +170,7 @@ def add_azure_source_path(self, source):
 class Descriptor(object):
     """Download Descriptor"""
 
-    _AES_BLOCKSIZE = blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES
+    _AES_BLOCKSIZE = blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES
 
     def __init__(self, lpath, ase, options):
         # type: (DownloadDescriptior, pathlib.Path,
diff --git a/blobxfer/models/md5.py b/blobxfer/models/md5.py
deleted file mode 100644
index f8c1d3a..0000000
--- a/blobxfer/models/md5.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) Microsoft Corporation
-#
-# All rights reserved.
-#
-# MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# compat imports
-from __future__ import absolute_import, division, print_function
-from builtins import (  # noqa
-    bytes, dict, int, list, object, range, ascii, chr, hex, input,
-    next, oct, open, pow, round, super, filter, map, zip
-)
-# stdlib imports
-import logging
-try:
-    import queue
-except ImportError:  # noqa
-    import Queue as queue
-# non-stdlib imports
-# local imports
-import blobxfer.models.azure
-import blobxfer.models.offload
-import blobxfer.operations.md5
-
-# create logger
-logger = logging.getLogger(__name__)
-
-
-class LocalFileMd5Offload(blobxfer.models.offload._MultiprocessOffload):
-    """LocalFileMd5Offload"""
-    def __init__(self, num_workers):
-        # type: (LocalFileMd5Offload, int) -> None
-        """Ctor for Local File Md5 Offload
-        :param LocalFileMd5Offload self: this
-        :param int num_workers: number of worker processes
-        """
-        super(LocalFileMd5Offload, self).__init__(
-            self._worker_process, num_workers, 'MD5')
-
-    def _worker_process(self):
-        # type: (LocalFileMd5Offload) -> None
-        """Compute MD5 for local file
-        :param LocalFileMd5Offload self: this
-        """
-        while not self.terminated:
-            try:
-                filename, remote_md5, pagealign = self._task_queue.get(True, 1)
-            except queue.Empty:
-                continue
-            md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64(
-                filename, pagealign)
-            logger.debug('MD5: {} <L..R> {} {}'.format(
-                md5, remote_md5, filename))
-            self._done_cv.acquire()
-            self._done_queue.put((filename, md5 == remote_md5))
-            self._done_cv.notify()
-            self._done_cv.release()
-
-    def add_localfile_for_md5_check(self, filename, remote_md5, mode):
-        # type: (LocalFileMd5Offload, str, str,
-        #        blobxfer.models.azure.StorageModes) -> None
-        """Add a local file to MD5 check queue
-        :param LocalFileMd5Offload self: this
-        :param str filename: file to compute MD5 for
-        :param str remote_md5: remote MD5 to compare against
-        :param blobxfer.models.azure.StorageModes mode: mode
-        """
-        if mode == blobxfer.models.azure.StorageModes.Page:
-            pagealign = True
-        else:
-            pagealign = False
-        self._task_queue.put((filename, remote_md5, pagealign))
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index e69de29..20ddb50 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -0,0 +1,284 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+# non-stdlib imports
+# local imports
+import blobxfer.models
+import blobxfer.operations.azure.blob.append
+import blobxfer.operations.azure.blob.block
+import blobxfer.operations.azure.blob.page
+import blobxfer.operations.azure.file
+
+
+class StorageCredentials(object):
+    """Azure Storage Credentials"""
+    def __init__(self):
+        # type: (StorageCredentials) -> None
+        """Ctor for StorageCredentials"""
+        self._storage_accounts = {}
+
+    def add_storage_account(self, name, key, endpoint):
+        # type: (StorageCredentials, str, str, str) -> None
+        """Add a storage account
+        :param StorageCredentials self: this
+        :param str name: name of storage account to store
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        if name in self._storage_accounts:
+            raise ValueError(
+                '{} already exists in storage accounts'.format(name))
+        self._storage_accounts[name] = StorageAccount(name, key, endpoint)
+
+    def get_storage_account(self, name):
+        # type: (StorageCredentials, str) -> StorageAccount
+        """Get storage account details
+        :param StorageCredentials self: this
+        :param str name: name of storage account to retrieve
+        :rtype: StorageAccount
+        :return: storage account details
+        """
+        return self._storage_accounts[name]
+
+
+class StorageAccount(object):
+    """Azure Storage Account"""
+    def __init__(self, name, key, endpoint):
+        # type: (StorageAccount, str, str, str) -> None
+        """Ctor for StorageAccount
+        :param str name: name of storage account
+        :param str key: storage key or sas
+        :param str endpoint: endpoint
+        """
+        self._append_blob_client = None
+        self._block_blob_client = None
+        self._file_client = None
+        self._page_blob_client = None
+        self.name = name
+        self.key = key
+        self.endpoint = endpoint
+        self.is_sas = self._key_is_sas(self.key)
+        # normalize sas keys
+        if self.is_sas and self.key.startswith('?'):
+            self.key = self.key[1:]
+        self._create_clients()
+
+    @staticmethod
+    def _key_is_sas(key):
+        # type: (str) -> bool
+        """Determine if key is a sas
+        :param str key: key to parse
+        :rtype: bool
+        :return: if key is a sas
+        """
+        # keys starting with ? are sas keys as ? is not in the base-64
+        # character range
+        if key.startswith('?'):
+            return True
+        else:
+            # & is not in the base-64 character range, so technically
+            # the presence of this character means the key is a sas. however,
+            # perform a stronger check for the sig= parameter.
+            tmp = key.split('&')
+            if len(tmp) == 1:
+                return False
+            elif any(x.startswith('sig=') for x in tmp):
+                return True
+        return False
+
+    def _create_clients(self):
+        # type: (StorageAccount) -> None
+        """Create Azure Storage clients
+        :param StorageAccount self: this
+        """
+        self._append_blob_client = \
+            blobxfer.operations.azure.blob.append.create_client(self)
+        self._block_blob_client = \
+            blobxfer.operations.azure.blob.block.create_client(self)
+        self._file_client = blobxfer.operations.azure.file.create_client(self)
+        self._page_blob_client = \
+            blobxfer.operations.azure.blob.page.create_client(self)
+
+    @property
+    def append_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.AppendBlobService
+        """Get append blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.AppendBlobService
+        :return: append blob client
+        """
+        return self._append_blob_client
+
+    @property
+    def block_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.BlockBlobService
+        """Get block blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.BlockBlobService
+        :return: block blob client
+        """
+        return self._block_blob_client
+
+    @property
+    def file_client(self):
+        # type: (StorageAccount) -> azure.storage.file.FileService
+        """Get file client
+        :param StorageAccount self: this
+        :rtype: azure.storage.file.FileService
+        :return: file client
+        """
+        return self._file_client
+
+    @property
+    def page_blob_client(self):
+        # type: (StorageAccount) -> azure.storage.blob.PageBlobService
+        """Get page blob client
+        :param StorageAccount self: this
+        :rtype: azure.storage.blob.PageBlobService
+        :return: page blob client
+        """
+        return self._page_blob_client
+
+
+class SourcePath(blobxfer.models._BaseSourcePaths):
+    """Azure Source Path"""
+    def __init__(self):
+        # type: (SourcePath) -> None
+        """Ctor for SourcePath
+        :param SourcePath self: this
+        """
+        super(SourcePath, self).__init__()
+        self._path_map = {}
+
+    def add_path_with_storage_account(self, remote_path, storage_account):
+        # type: (SourcePath, str, str) -> None
+        """Add a path with an associated storage account
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :param str storage_account: storage account to associate with path
+        """
+        if len(self._path_map) >= 1:
+            raise RuntimeError(
+                'cannot add multiple remote paths to SourcePath objects')
+        rpath = blobxfer.util.normalize_azure_path(remote_path)
+        self.add_path(rpath)
+        self._path_map[rpath] = storage_account
+
+    def lookup_storage_account(self, remote_path):
+        # type: (SourcePath, str) -> str
+        """Lookup the storage account associated with the remote path
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :rtype: str
+        :return: storage account associated with path
+        """
+        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
+
+    def files(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Generator of Azure remote files or blobs
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        if options.mode == blobxfer.models.azure.StorageModes.File:
+            for file in self._populate_from_list_files(
+                    creds, options, general_options):
+                yield file
+        else:
+            for blob in self._populate_from_list_blobs(
+                    creds, options, general_options):
+                yield blob
+
+    def _populate_from_list_files(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Internal generator for Azure remote files
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for file in blobxfer.operations.azure.file.list_files(
+                    sa.file_client, cont, dir, general_options.timeout_sec):
+                if blobxfer.models.crypto.EncryptionMetadata.\
+                        encryption_metadata_exists(file.metadata):
+                    ed = blobxfer.models.crypto.EncryptionMetadata()
+                    ed.convert_from_json(
+                        file.metadata, file.name, options.rsa_private_key)
+                else:
+                    ed = None
+                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                ase.populate_from_file(sa, file)
+                yield ase
+
+    def _populate_from_list_blobs(self, creds, options, general_options):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General) -> StorageEntity
+        """Internal generator for Azure remote blobs
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+            for blob in blobxfer.operations.azure.blob.list_blobs(
+                    sa.block_blob_client, cont, dir, options.mode,
+                    general_options.timeout_sec):
+                if blobxfer.models.crypto.EncryptionMetadata.\
+                        encryption_metadata_exists(blob.metadata):
+                    ed = blobxfer.models.crypto.EncryptionMetadata()
+                    ed.convert_from_json(
+                        blob.metadata, blob.name, options.rsa_private_key)
+                else:
+                    ed = None
+                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                ase.populate_from_blob(sa, blob)
+                yield ase
diff --git a/blobxfer/operations/azure/blob/append.py b/blobxfer/operations/azure/blob/append.py
index 296e8c7..910ab5d 100644
--- a/blobxfer/operations/azure/blob/append.py
+++ b/blobxfer/operations/azure/blob/append.py
@@ -40,9 +40,9 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.azure.StorageAccount) -> AppendBlobService
+    # type: (blobxfer.operations.azure.StorageAccount) -> AppendBlobService
     """Create Append blob client
-    :param blobxfer.models.azure.StorageAccount storage_account:
+    :param blobxfer.operations.azure.StorageAccount storage_account:
         storage account
     :rtype: AppendBlobService
     :return: append blob service client
diff --git a/blobxfer/operations/azure/blob/block.py b/blobxfer/operations/azure/blob/block.py
index c68ac32..b6fd673 100644
--- a/blobxfer/operations/azure/blob/block.py
+++ b/blobxfer/operations/azure/blob/block.py
@@ -40,9 +40,9 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.azure.StorageAccount) -> BlockBlobService
+    # type: (blobxfer.operations.azure.StorageAccount) -> BlockBlobService
     """Create block blob client
-    :param blobxfer.models.azure.StorageAccount storage_account:
+    :param blobxfer.operations.azure.StorageAccount storage_account:
         storage account
     :rtype: azure.storage.blob.BlockBlobService
     :return: block blob service client
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 8a64622..6aedc8f 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -40,9 +40,9 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.azure.StorageAccount) -> PageBlobService
+    # type: (blobxfer.operations.azure.StorageAccount) -> PageBlobService
     """Create block blob client
-    :param blobxfer.models.azure.StorageAccount storage_account:
+    :param blobxfer.operations.azure.StorageAccount storage_account:
         storage account
     :rtype: PageBlobService
     :return: block blob service client
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index e531fc2..1b17f94 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -46,9 +46,9 @@
 
 
 def create_client(storage_account):
-    # type: (blobxfer.models.azure.StorageAccount) -> FileService
+    # type: (blobxfer.operations.azure.StorageAccount) -> FileService
     """Create file client
-    :param blobxfer.models.azure.StorageAccount storage_account:
+    :param blobxfer.operations.azure.StorageAccount storage_account:
         storage account
     :rtype: FileService
     :return: file service client
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 44202c3..98945d3 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -31,8 +31,13 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 import base64
+import enum
 import logging
 import os
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 # non-stdlib imports
 import cryptography.hazmat.backends
 import cryptography.hazmat.primitives.asymmetric.padding
@@ -54,6 +59,12 @@
 _AES256_KEYLENGTH_BYTES = 32
 
 
+# enums
+class CryptoAction(enum.Enum):
+    Encrypt = 1
+    Decrypt = 2
+
+
 def load_rsa_private_key_file(rsakeyfile, passphrase):
     # type: (str, str) ->
     #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
@@ -211,3 +222,54 @@ def aes_cbc_encrypt_data(symkey, iv, data, pad):
         return cipher.update(pkcs7_pad(data)) + cipher.finalize()
     else:
         return cipher.update(data) + cipher.finalize()
+
+
+class CryptoOffload(blobxfer.models.offload._MultiprocessOffload):
+    def __init__(self, num_workers):
+        # type: (CryptoOffload, int) -> None
+        """Ctor for Crypto Offload
+        :param CryptoOffload self: this
+        :param int num_workers: number of worker processes
+        """
+        super(CryptoOffload, self).__init__(
+            self._worker_process, num_workers, 'Crypto')
+
+    def _worker_process(self):
+        # type: (CryptoOffload) -> None
+        """Crypto worker
+        :param CryptoOffload self: this
+        """
+        while not self.terminated:
+            try:
+                inst = self._task_queue.get(True, 1)
+            except queue.Empty:
+                continue
+            if inst[0] == CryptoAction.Encrypt:
+                # TODO on upload
+                raise NotImplementedError()
+            elif inst[0] == CryptoAction.Decrypt:
+                final_path, offsets, symkey, iv, encdata = \
+                    inst[1], inst[2], inst[3], inst[4], inst[5]
+                data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
+                    symkey, iv, encdata, offsets.unpad)
+            self._done_cv.acquire()
+            self._done_queue.put((final_path, offsets, data))
+            self._done_cv.notify()
+            self._done_cv.release()
+
+    def add_decrypt_chunk(
+            self, final_path, offsets, symkey, iv, encdata):
+        # type: (CryptoOffload, str, blobxfer.models.download.Offsets,
+        #        bytes, bytes, bytes) -> None
+        """Add a chunk to decrypt
+        :param CryptoOffload self: this
+        :param str final_path: final path
+        :param blobxfer.models.download.Offsets offsets: offsets
+        :param bytes symkey: symmetric key
+        :param bytes iv: initialization vector
+        :param bytes encdata: encrypted data
+        """
+        self._task_queue.put(
+            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
+             encdata)
+        )
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 7a843e7..61875fe 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -47,10 +47,10 @@
 import dateutil
 # local imports
 import blobxfer.models.crypto
-import blobxfer.models.md5
 import blobxfer.operations.azure.blob
 import blobxfer.operations.azure.file
 import blobxfer.operations.crypto
+import blobxfer.operations.md5
 import blobxfer.util
 
 # create logger
@@ -67,12 +67,12 @@ class Downloader(object):
     """Downloader"""
     def __init__(self, general_options, creds, spec):
         # type: (Downloader, blobxfer.models.options.General,
-        #        blobxfer.models.azure.StorageCredentials,
+        #        blobxfer.operations.azure.StorageCredentials,
         #        blobxfer.models.download.Specification) -> None
         """Ctor for Downloader
         :param Downloader self: this
         :param blobxfer.models.options.General general_options: general opts
-        :param blobxfer.models.azure.StorageCredentials creds: creds
+        :param blobxfer.operations.azure.StorageCredentials creds: creds
         :param blobxfer.models.download.Specification spec: download spec
         """
         self._all_remote_files_processed = False
@@ -123,10 +123,10 @@ def termination_check_md5(self):
 
     @staticmethod
     def ensure_local_destination(creds, spec):
-        # type: (blobxfer.models.azure.StorageCredentials,
+        # type: (blobxfer.operations.azure.StorageCredentials,
         #        blobxfer.models.download.Specification) -> None
         """Ensure a local destination path given a download spec
-        :param blobxfer.models.azure.StorageCredentials creds: creds
+        :param blobxfer.operations.azure.StorageCredentials creds: creds
         :param blobxfer.models.download.Specification spec: download spec
         """
         # ensure destination path is writable given the source
@@ -374,7 +374,7 @@ def _worker_thread_download(self):
             # decrypt if necessary
             if dd.entity.is_encrypted:
                 # slice data to proper bounds
-                encdata = data[blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES:]
+                encdata = data[blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
                 intdata = encdata
                 # get iv for chunk and compute hmac
                 if offsets.chunk_num == 0:
@@ -382,7 +382,7 @@ def _worker_thread_download(self):
                     # integrity check for first chunk must include iv
                     intdata = iv + data
                 else:
-                    iv = data[:blobxfer.models.crypto._AES256_BLOCKSIZE_BYTES]
+                    iv = data[:blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES]
                 # integrity check data
                 dd.perform_chunked_integrity_check(offsets, intdata)
                 # decrypt data
@@ -449,13 +449,13 @@ def _run(self):
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
         # initialize MD5 processes
-        self._md5_offload = blobxfer.models.md5.LocalFileMd5Offload(
+        self._md5_offload = blobxfer.operations.md5.LocalFileMd5Offload(
             num_workers=self._general_options.concurrency.md5_processes)
         self._md5_offload.initialize_check_thread(
             self._check_for_downloads_from_md5)
         # initialize crypto processes
         if self._general_options.concurrency.crypto_processes > 0:
-            self._crypto_offload = blobxfer.models.crypto.CryptoOffload(
+            self._crypto_offload = blobxfer.operations.crypto.CryptoOffload(
                 num_workers=self._general_options.concurrency.crypto_processes)
             self._crypto_offload.initialize_check_thread(
                 self._check_for_crypto_done)
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
index 4a50d25..d260c9e 100644
--- a/blobxfer/operations/md5.py
+++ b/blobxfer/operations/md5.py
@@ -30,8 +30,14 @@
 )
 # stdlib imports
 import logging
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
 # non-stdlib imports
 # local imports
+import blobxfer.models.azure
+import blobxfer.models.offload
 import blobxfer.util
 
 # create logger
@@ -72,3 +78,49 @@ def compute_md5_for_data_asbase64(data):
     hasher = blobxfer.util.new_md5_hasher()
     hasher.update(data)
     return blobxfer.util.base64_encode_as_string(hasher.digest())
+
+
+class LocalFileMd5Offload(blobxfer.models.offload._MultiprocessOffload):
+    """LocalFileMd5Offload"""
+    def __init__(self, num_workers):
+        # type: (LocalFileMd5Offload, int) -> None
+        """Ctor for Local File Md5 Offload
+        :param LocalFileMd5Offload self: this
+        :param int num_workers: number of worker processes
+        """
+        super(LocalFileMd5Offload, self).__init__(
+            self._worker_process, num_workers, 'MD5')
+
+    def _worker_process(self):
+        # type: (LocalFileMd5Offload) -> None
+        """Compute MD5 for local file
+        :param LocalFileMd5Offload self: this
+        """
+        while not self.terminated:
+            try:
+                filename, remote_md5, pagealign = self._task_queue.get(True, 1)
+            except queue.Empty:
+                continue
+            md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64(
+                filename, pagealign)
+            logger.debug('MD5: {} <L..R> {} {}'.format(
+                md5, remote_md5, filename))
+            self._done_cv.acquire()
+            self._done_queue.put((filename, md5 == remote_md5))
+            self._done_cv.notify()
+            self._done_cv.release()
+
+    def add_localfile_for_md5_check(self, filename, remote_md5, mode):
+        # type: (LocalFileMd5Offload, str, str,
+        #        blobxfer.models.azure.StorageModes) -> None
+        """Add a local file to MD5 check queue
+        :param LocalFileMd5Offload self: this
+        :param str filename: file to compute MD5 for
+        :param str remote_md5: remote MD5 to compare against
+        :param blobxfer.models.azure.StorageModes mode: mode
+        """
+        if mode == blobxfer.models.azure.StorageModes.Page:
+            pagealign = True
+        else:
+            pagealign = False
+        self._task_queue.put((filename, remote_md5, pagealign))
diff --git a/cli/settings.py b/cli/settings.py
index b62de58..4faadd9 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -36,6 +36,7 @@
 import blobxfer.models.azure
 import blobxfer.models.download
 import blobxfer.models.options
+import blobxfer.operations.azure
 import blobxfer.operations.crypto
 import blobxfer.util
 
@@ -217,13 +218,13 @@ def merge_settings(config, cli_options):
 
 
 def create_azure_storage_credentials(config):
-    # type: (dict) -> blobxfer.models.azure.StorageCredentials
+    # type: (dict) -> blobxfer.operations.azure.StorageCredentials
     """Create an Azure StorageCredentials object from configuration
     :param dict config: config dict
-    :rtype: blobxfer.models.azure.StorageCredentials
+    :rtype: blobxfer.operations.azure.StorageCredentials
     :return: credentials object
     """
-    creds = blobxfer.models.azure.StorageCredentials()
+    creds = blobxfer.operations.azure.StorageCredentials()
     endpoint = config['azure_storage']['endpoint']
     for name in config['azure_storage']['accounts']:
         key = config['azure_storage']['accounts'][name]
@@ -311,7 +312,7 @@ def create_download_specifications(config):
                 raise RuntimeError(
                     'invalid number of source pairs specified per entry')
             sa = next(iter(src))
-            asp = blobxfer.models.azure.SourcePath()
+            asp = blobxfer.operations.azure.SourcePath()
             asp.add_path_with_storage_account(src[sa], sa)
             if blobxfer.util.is_not_empty(conf['include']):
                 asp.add_includes(conf['include'])
diff --git a/setup.py b/setup.py
index 7c61abf..74b57cf 100644
--- a/setup.py
+++ b/setup.py
@@ -31,13 +31,10 @@
 
 packages = [
     'blobxfer',
-    'blobxfer.blob',
-    'blobxfer.blob.append',
-    'blobxfer.blob.block',
-    'blobxfer.blob.page',
-    'blobxfer.crypto',
-    'blobxfer.download',
-    'blobxfer.file',
+    'blobxfer.models',
+    'blobxfer.operations',
+    'blobxfer.operations.azure',
+    'blobxfer.operations.azure.blob',
     'blobxfer_cli',
 ]
 
diff --git a/test_requirements.txt b/test_requirements.txt
index 925320c..f2315c3 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,5 +1,5 @@
-flake8>=3.2.1
+flake8>=3.3.0
 mock>=2.0.0
 pypandoc>=1.3.3
-pytest>=3.0.5
+pytest>=3.0.7
 pytest-cov>=2.4.0
diff --git a/tests/test_blobxfer_models.py b/tests/test_blobxfer_models.py
deleted file mode 100644
index f200aaf..0000000
--- a/tests/test_blobxfer_models.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# coding=utf-8
-"""Tests for models"""
-
-# stdlib imports
-import mock
-import os
-try:
-    import pathlib2 as pathlib
-except ImportError:  # noqa
-    import pathlib
-# non-stdlib imports
-import azure.storage
-import azure.storage.blob
-import azure.storage.file
-import pytest
-# module under test
-import blobxfer.models as models
-
-
-@mock.patch('multiprocessing.cpu_count', return_value=1)
-def test_concurrency_options(patched_cc):
-    a = models.ConcurrencyOptions(
-        crypto_processes=-1,
-        md5_processes=0,
-        transfer_threads=-2,
-    )
-
-    assert a.crypto_processes == 0
-    assert a.md5_processes == 1
-    assert a.transfer_threads == 3
-
-
-def test_general_options():
-    a = models.GeneralOptions(
-        concurrency=models.ConcurrencyOptions(
-            crypto_processes=1,
-            md5_processes=2,
-            transfer_threads=3,
-        ),
-        progress_bar=False,
-        resume_file='abc',
-        timeout_sec=1,
-        verbose=True,
-    )
-
-    assert a.concurrency.crypto_processes == 1
-    assert a.concurrency.md5_processes == 2
-    assert a.concurrency.transfer_threads == 3
-    assert not a.progress_bar
-    assert a.resume_file == pathlib.Path('abc')
-    assert a.timeout_sec == 1
-    assert a.verbose
-
-    a = models.GeneralOptions(
-        concurrency=models.ConcurrencyOptions(
-            crypto_processes=1,
-            md5_processes=2,
-            transfer_threads=3,
-        ),
-        progress_bar=False,
-        resume_file=None,
-        timeout_sec=1,
-        verbose=True,
-    )
-
-    assert a.concurrency.crypto_processes == 1
-    assert a.concurrency.md5_processes == 2
-    assert a.concurrency.transfer_threads == 3
-    assert not a.progress_bar
-    assert a.resume_file is None
-    assert a.timeout_sec == 1
-    assert a.verbose
-
-    with pytest.raises(ValueError):
-        a = models.GeneralOptions(None)
-
-
-def test_storage_credentials():
-    creds = models.AzureStorageCredentials()
-    creds.add_storage_account('sa1', 'somekey1', 'endpoint')
-
-    a = creds.get_storage_account('sa1')
-    assert a.name == 'sa1'
-    assert a.key == 'somekey1'
-    assert a.endpoint == 'endpoint'
-    assert isinstance(
-        a.append_blob_client, azure.storage.blob.AppendBlobService)
-    assert isinstance(
-        a.block_blob_client, azure.storage.blob.BlockBlobService)
-    assert isinstance(
-        a.file_client, azure.storage.file.FileService)
-    assert isinstance(
-        a.page_blob_client, azure.storage.blob.PageBlobService)
-
-    with pytest.raises(KeyError):
-        a = creds.get_storage_account('sa2')
-
-    with pytest.raises(ValueError):
-        creds.add_storage_account('sa1', 'somekeyxx', 'endpoint')
-
-    creds.add_storage_account('sa2', 'somekey2', 'endpoint2')
-    a = creds.get_storage_account('sa1')
-    b = creds.get_storage_account('sa2')
-    assert a.name == 'sa1'
-    assert a.key == 'somekey1'
-    assert a.endpoint == 'endpoint'
-    assert b.name == 'sa2'
-    assert b.key == 'somekey2'
-    assert b.endpoint == 'endpoint2'
-
-
-def test_key_is_sas():
-    a = models.AzureStorageAccount('name', 'abcdef', 'endpoint')
-    assert not a.is_sas
-
-    a = models.AzureStorageAccount('name', 'abcdef&blah', 'endpoint')
-    assert not a.is_sas
-
-    a = models.AzureStorageAccount('name', '?abcdef', 'endpoint')
-    assert a.is_sas
-
-    a = models.AzureStorageAccount(
-        'name', '?sv=0&sr=1&sig=2', 'endpoint')
-    assert a.is_sas
-
-    a = models.AzureStorageAccount(
-        'name', 'sv=0&sr=1&sig=2', 'endpoint')
-    assert a.is_sas
-
-    a = models.AzureStorageAccount(
-        'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint')
-    assert a.is_sas
-
-
-def test_localsourcepaths_files(tmpdir):
-    tmpdir.mkdir('abc')
-    tmpdir.join('moo.cow').write('z')
-    abcpath = tmpdir.join('abc')
-    abcpath.join('hello.txt').write('hello')
-    abcpath.join('blah.x').write('x')
-    abcpath.join('blah.y').write('x')
-    abcpath.join('blah.z').write('x')
-    abcpath.mkdir('def')
-    defpath = abcpath.join('def')
-    defpath.join('world.txt').write('world')
-    defpath.join('moo.cow').write('y')
-
-    a = models.LocalSourcePaths()
-    a.add_include('*.txt')
-    a.add_includes(['moo.cow', '*blah*'])
-    with pytest.raises(ValueError):
-        a.add_includes('abc')
-    a.add_exclude('**/blah.x')
-    a.add_excludes(['world.txt'])
-    with pytest.raises(ValueError):
-        a.add_excludes('abc')
-    a.add_path(str(tmpdir))
-    a_set = set()
-    for file in a.files():
-        sfile = str(file.parent_path / file.relative_path)
-        a_set.add(sfile)
-
-    assert len(a.paths) == 1
-    assert str(abcpath.join('blah.x')) not in a_set
-    assert str(defpath.join('world.txt')) in a_set
-    assert str(defpath.join('moo.cow')) not in a_set
-
-    b = models.LocalSourcePaths()
-    b.add_includes(['moo.cow', '*blah*'])
-    b.add_include('*.txt')
-    b.add_excludes(['world.txt'])
-    b.add_exclude('**/blah.x')
-    b.add_paths([pathlib.Path(str(tmpdir))])
-    for file in a.files():
-        sfile = str(file.parent_path / file.relative_path)
-        assert sfile in a_set
-
-
-def test_localdestinationpath(tmpdir):
-    tmpdir.mkdir('1')
-    path = tmpdir.join('1')
-
-    a = models.LocalDestinationPath(str(path))
-    a.is_dir = True
-    assert str(a.path) == str(path)
-    assert a.is_dir
-
-    a.ensure_path_exists()
-    assert os.path.exists(str(a.path))
-
-    b = models.LocalDestinationPath()
-    b.is_dir = False
-    b.path = str(path)
-    with pytest.raises(RuntimeError):
-        b.ensure_path_exists()
-    assert not b.is_dir
-
-    path2 = tmpdir.join('2')
-    path3 = path2.join('3')
-    c = models.LocalDestinationPath(str(path3))
-    with pytest.raises(RuntimeError):
-        c.ensure_path_exists()
-    c.is_dir = False
-    c.ensure_path_exists()
-    assert os.path.exists(str(path2))
-    assert os.path.isdir(str(path2))
-    assert not c.is_dir
-
-
-def test_azuresourcepath():
-    p = '/cont/remote/path'
-    asp = models.AzureSourcePath()
-    asp.add_path_with_storage_account(p, 'sa')
-
-    with pytest.raises(RuntimeError):
-        asp.add_path_with_storage_account('x', 'x')
-
-    assert 'sa' == asp.lookup_storage_account(p)
-
-
-@mock.patch('blobxfer.crypto.models.EncryptionMetadata')
-@mock.patch('blobxfer.file.operations.list_files')
-def test_azuresourcepath_files(patched_lf, patched_em):
-    p = '/cont/remote/path'
-    asp = models.AzureSourcePath()
-    asp.add_path_with_storage_account(p, 'sa')
-
-    options = mock.MagicMock()
-    options.mode = models.AzureStorageModes.File
-    creds = mock.MagicMock()
-    creds.get_storage_account = mock.MagicMock()
-    sa = mock.MagicMock()
-    sa.file_client = mock.MagicMock()
-    creds.get_storage_account.return_value = sa
-    f = azure.storage.file.models.File(name='name')
-    patched_lf.side_effect = [[f]]
-    patched_em.encryption_metadata_exists = mock.MagicMock()
-    patched_em.encryption_metadata_exists.return_value = False
-
-    i = 0
-    for file in asp.files(creds, options, mock.MagicMock()):
-        i += 1
-        assert file.name == 'name'
-        assert file.encryption_metadata is None
-    assert i == 1
-
-    fe = azure.storage.file.models.File(name='name')
-    fe.metadata = {'encryptiondata': {'a': 'b'}}
-    patched_lf.side_effect = [[fe]]
-    patched_em.encryption_metadata_exists.return_value = True
-    patched_em.convert_from_json = mock.MagicMock()
-
-    i = 0
-    for file in asp.files(creds, options, mock.MagicMock()):
-        i += 1
-        assert file.name == 'name'
-        assert file.encryption_metadata is not None
-    assert i == 1
-
-
-@mock.patch('blobxfer.crypto.models.EncryptionMetadata')
-@mock.patch('blobxfer.blob.operations.list_blobs')
-def test_azuresourcepath_blobs(patched_lb, patched_em):
-    p = '/cont/remote/path'
-    asp = models.AzureSourcePath()
-    asp.add_path_with_storage_account(p, 'sa')
-
-    options = mock.MagicMock()
-    options.mode = models.AzureStorageModes.Auto
-    creds = mock.MagicMock()
-    creds.get_storage_account = mock.MagicMock()
-    sa = mock.MagicMock()
-    sa.block_blob_client = mock.MagicMock()
-    creds.get_storage_account.return_value = sa
-    b = azure.storage.blob.models.Blob(name='name')
-    patched_lb.side_effect = [[b]]
-    patched_em.encryption_metadata_exists = mock.MagicMock()
-    patched_em.encryption_metadata_exists.return_value = False
-
-    i = 0
-    for file in asp.files(creds, options, mock.MagicMock()):
-        i += 1
-        assert file.name == 'name'
-        assert file.encryption_metadata is None
-    assert i == 1
-
-    be = azure.storage.blob.models.Blob(name='name')
-    be.metadata = {'encryptiondata': {'a': 'b'}}
-    patched_lb.side_effect = [[be]]
-    patched_em.encryption_metadata_exists.return_value = True
-    patched_em.convert_from_json = mock.MagicMock()
-
-    i = 0
-    for file in asp.files(creds, options, mock.MagicMock()):
-        i += 1
-        assert file.name == 'name'
-        assert file.encryption_metadata is not None
-    assert i == 1
-
-
-def test_downloadspecification():
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
-            check_file_md5=True,
-            chunk_size_bytes=4194304,
-            delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
-            overwrite=True,
-            recursive=True,
-            restore_file_attributes=False,
-            rsa_private_key=None,
-        ),
-        skip_on_options=models.SkipOnOptions(
-            filesize_match=True,
-            lmt_ge=False,
-            md5_match=True,
-        ),
-        local_destination_path=models.LocalDestinationPath('dest'),
-    )
-
-    asp = models.AzureSourcePath()
-    p = 'some/remote/path'
-    asp.add_path_with_storage_account(p, 'sa')
-
-    ds.add_azure_source_path(asp)
-
-    assert ds.options.check_file_md5
-    assert not ds.skip_on.lmt_ge
-    assert ds.destination.path == pathlib.Path('dest')
-    assert len(ds.sources) == 1
-    assert p in ds.sources[0]._path_map
-    assert ds.sources[0]._path_map[p] == 'sa'
-
-
-def test_azurestorageentity():
-    ase = models.AzureStorageEntity('cont')
-    assert ase.container == 'cont'
-    assert ase.encryption_metadata is None
-
-    blob = mock.MagicMock()
-    blob.name = 'name'
-    blob.snapshot = None
-    blob.properties = mock.MagicMock()
-    blob.properties.last_modified = 'lmt'
-    blob.properties.content_length = 123
-    blob.properties.content_settings = mock.MagicMock()
-    blob.properties.content_settings.content_md5 = 'abc'
-    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob
-    ase.populate_from_blob(mock.MagicMock(), blob)
-
-    assert ase.client is not None
-    assert ase.name == 'name'
-    assert ase.lmt == 'lmt'
-    assert ase.size == 123
-    assert ase.md5 == 'abc'
-    assert ase.snapshot is None
-    assert ase.mode == models.AzureStorageModes.Block
-
-    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob
-    ase.populate_from_blob(mock.MagicMock(), blob)
-    assert ase.mode == models.AzureStorageModes.Append
-
-    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob
-    blob.snapshot = 'abc'
-    ase.populate_from_blob(mock.MagicMock(), blob)
-    assert ase.mode == models.AzureStorageModes.Page
-    assert ase.snapshot is not None
-
-    blob.snapshot = None
-    ase.populate_from_file(mock.MagicMock(), blob)
-    assert ase.mode == models.AzureStorageModes.File
-    assert ase.snapshot is None
diff --git a/tests/test_blobxfer_models_azure.py b/tests/test_blobxfer_models_azure.py
new file mode 100644
index 0000000..37a40a5
--- /dev/null
+++ b/tests/test_blobxfer_models_azure.py
@@ -0,0 +1,51 @@
+# coding=utf-8
+"""Tests for models azure"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+import azure.storage
+import azure.storage.blob
+import azure.storage.file
+# module under test
+import blobxfer.models.azure as azmodels
+
+
+def test_azurestorageentity():
+    ase = azmodels.StorageEntity('cont')
+    assert ase.container == 'cont'
+    assert ase.encryption_metadata is None
+
+    blob = mock.MagicMock()
+    blob.name = 'name'
+    blob.snapshot = None
+    blob.properties = mock.MagicMock()
+    blob.properties.last_modified = 'lmt'
+    blob.properties.content_length = 123
+    blob.properties.content_settings = mock.MagicMock()
+    blob.properties.content_settings.content_md5 = 'abc'
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob
+    ase.populate_from_blob(mock.MagicMock(), blob)
+
+    assert ase.client is not None
+    assert ase.name == 'name'
+    assert ase.lmt == 'lmt'
+    assert ase.size == 123
+    assert ase.md5 == 'abc'
+    assert ase.snapshot is None
+    assert ase.mode == azmodels.StorageModes.Block
+
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob
+    ase.populate_from_blob(mock.MagicMock(), blob)
+    assert ase.mode == azmodels.StorageModes.Append
+
+    blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob
+    blob.snapshot = 'abc'
+    ase.populate_from_blob(mock.MagicMock(), blob)
+    assert ase.mode == azmodels.StorageModes.Page
+    assert ase.snapshot is not None
+
+    blob.snapshot = None
+    ase.populate_from_file(mock.MagicMock(), blob)
+    assert ase.mode == azmodels.StorageModes.File
+    assert ase.snapshot is None
diff --git a/tests/test_blobxfer_crypto_models.py b/tests/test_blobxfer_models_crypto.py
similarity index 98%
rename from tests/test_blobxfer_crypto_models.py
rename to tests/test_blobxfer_models_crypto.py
index 8d58419..8503a71 100644
--- a/tests/test_blobxfer_crypto_models.py
+++ b/tests/test_blobxfer_models_crypto.py
@@ -1,5 +1,5 @@
 # coding=utf-8
-"""Tests for crypto operations"""
+"""Tests for crypto models"""
 
 # stdlib imports
 import copy
@@ -8,8 +8,8 @@
 import pytest
 # local imports
 # module under test
-import blobxfer.crypto.models as models
-import blobxfer.crypto.operations as ops
+import blobxfer.models.crypto as models
+import blobxfer.operations.crypto as ops
 
 
 _SAMPLE_RSA_KEY = """
diff --git a/tests/test_blobxfer_download_models.py b/tests/test_blobxfer_models_download.py
similarity index 74%
rename from tests/test_blobxfer_download_models.py
rename to tests/test_blobxfer_models_download.py
index e91607e..69133e2 100644
--- a/tests/test_blobxfer_download_models.py
+++ b/tests/test_blobxfer_models_download.py
@@ -13,10 +13,77 @@
 # non-stdlib imports
 import pytest
 # local imports
-import blobxfer.models
+import blobxfer.models.azure as azmodels
+import blobxfer.models.options as options
+import blobxfer.operations.azure as azops
 import blobxfer.util as util
 # module under test
-import blobxfer.download.models as models
+import blobxfer.models.download as models
+
+
+def test_localdestinationpath(tmpdir):
+    tmpdir.mkdir('1')
+    path = tmpdir.join('1')
+
+    a = models.LocalDestinationPath(str(path))
+    a.is_dir = True
+    assert str(a.path) == str(path)
+    assert a.is_dir
+
+    a.ensure_path_exists()
+    assert os.path.exists(str(a.path))
+
+    b = models.LocalDestinationPath()
+    b.is_dir = False
+    b.path = str(path)
+    with pytest.raises(RuntimeError):
+        b.ensure_path_exists()
+    assert not b.is_dir
+
+    path2 = tmpdir.join('2')
+    path3 = path2.join('3')
+    c = models.LocalDestinationPath(str(path3))
+    with pytest.raises(RuntimeError):
+        c.ensure_path_exists()
+    c.is_dir = False
+    c.ensure_path_exists()
+    assert os.path.exists(str(path2))
+    assert os.path.isdir(str(path2))
+    assert not c.is_dir
+
+
+def test_downloadspecification():
+    ds = models.Specification(
+        download_options=options.Download(
+            check_file_md5=True,
+            chunk_size_bytes=4194304,
+            delete_extraneous_destination=False,
+            mode=azmodels.StorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=options.SkipOn(
+            filesize_match=True,
+            lmt_ge=False,
+            md5_match=True,
+        ),
+        local_destination_path=models.LocalDestinationPath('dest'),
+    )
+
+    asp = azops.SourcePath()
+    p = 'some/remote/path'
+    asp.add_path_with_storage_account(p, 'sa')
+
+    ds.add_azure_source_path(asp)
+
+    assert ds.options.check_file_md5
+    assert not ds.skip_on.lmt_ge
+    assert ds.destination.path == pathlib.Path('dest')
+    assert len(ds.sources) == 1
+    assert p in ds.sources[0]._path_map
+    assert ds.sources[0]._path_map[p] == 'sa'
 
 
 def test_downloaddescriptor(tmpdir):
@@ -25,14 +92,14 @@ def test_downloaddescriptor(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 1024
     ase._encryption = mock.MagicMock()
     with pytest.raises(RuntimeError):
-        d = models.DownloadDescriptor(lp, ase, opts)
+        d = models.Descriptor(lp, ase, opts)
 
     ase._encryption.symmetric_key = b'123'
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     assert d.entity == ase
     assert not d.must_compute_md5
@@ -44,19 +111,19 @@ def test_downloaddescriptor(tmpdir):
 
     d.local_path.unlink()
     ase._size = 1
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     assert d._total_chunks == 1
     assert d.local_path.stat().st_size == 0
 
     d.local_path.unlink()
     ase._encryption = None
     ase._size = 1024
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     assert d.local_path.stat().st_size == 1024
 
     # pre-existing file check
     ase._size = 0
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     assert d._total_chunks == 0
     assert d.local_path.stat().st_size == 0
 
@@ -67,9 +134,9 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 256
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 128
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     assert d._total_chunks == 1
@@ -82,12 +149,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._size = 0
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     assert d._total_chunks == 0
     assert d.next_offsets() is None
 
     ase._size = 1
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
@@ -99,7 +166,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._size = 256
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
@@ -111,7 +178,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._size = 256 + 16
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 2
     assert offsets.chunk_num == 0
@@ -132,7 +199,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
     ase._size = 128
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
@@ -144,7 +211,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._size = 256
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
@@ -156,7 +223,7 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() is None
 
     ase._size = 256 + 32  # 16 bytes over + padding
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     offsets = d.next_offsets()
     assert d._total_chunks == 2
     assert offsets.chunk_num == 0
@@ -181,9 +248,9 @@ def test_postpone_integrity_check(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 32
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     d._postpone_integrity_check(offsets, b'0' * ase._size)
@@ -198,9 +265,9 @@ def test_postpone_integrity_check(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 32
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     d._postpone_integrity_check(offsets, b'0' * ase._size)
@@ -219,9 +286,9 @@ def test_perform_chunked_integrity_check(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
@@ -233,11 +300,11 @@ def test_perform_chunked_integrity_check(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
@@ -255,10 +322,10 @@ def test_cleanup_all_temporary_files(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 16
     lp = pathlib.Path(str(tmpdir.join('a')))
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
@@ -269,7 +336,7 @@ def test_cleanup_all_temporary_files(tmpdir):
     assert not d._unchecked_chunks[0].file_path.exists()
 
     lp = pathlib.Path(str(tmpdir.join('b')))
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
@@ -288,9 +355,9 @@ def test_write_data(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
 
     offsets = d.next_offsets()
     data = b'0' * ase._size
@@ -306,7 +373,7 @@ def test_finalize_file(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
@@ -322,7 +389,7 @@ def test_finalize_file(tmpdir):
         message_authentication_code = util.base64_encode_as_string(
             _hmac.digest())
 
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     d.hmac.update(data)
     d.finalize_file()
 
@@ -335,7 +402,7 @@ def test_finalize_file(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
 
     data = b'0' * ase._size
@@ -343,7 +410,7 @@ def test_finalize_file(tmpdir):
     md5.update(data)
     ase._md5 = util.base64_encode_as_string(md5.digest())
 
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     d.md5.update(data)
     d.finalize_file()
 
@@ -356,12 +423,12 @@ def test_finalize_file(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
 
     data = b'0' * ase._size
 
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     d.finalize_file()
 
     assert not d.local_path.exists()
@@ -373,13 +440,13 @@ def test_finalize_file(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
 
     data = b'0' * ase._size
     ase._md5 = 'oops'
 
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     d.md5.update(data)
     d.finalize_file()
 
@@ -392,10 +459,10 @@ def test_operations(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 32
 
-    d = models.DownloadDescriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts)
     d._outstanding_ops = 1
     d._unchecked_chunks = {0: None}
     assert not d.all_operations_completed
diff --git a/tests/test_blobxfer_offload.py b/tests/test_blobxfer_models_offload.py
similarity index 94%
rename from tests/test_blobxfer_offload.py
rename to tests/test_blobxfer_models_offload.py
index 71cc97d..ca5a2bb 100644
--- a/tests/test_blobxfer_offload.py
+++ b/tests/test_blobxfer_models_offload.py
@@ -7,7 +7,7 @@
 import pytest
 # local imports
 # module under test
-import blobxfer.offload as offload
+import blobxfer.models.offload as offload
 
 
 def test_multiprocess_offload():
diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py
new file mode 100644
index 0000000..1e7cb8b
--- /dev/null
+++ b/tests/test_blobxfer_models_options.py
@@ -0,0 +1,82 @@
+# coding=utf-8
+"""Tests for models options"""
+
+# stdlib imports
+import mock
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+import pytest
+# module under test
+import blobxfer.models.options as options
+
+
+@mock.patch('multiprocessing.cpu_count', return_value=1)
+def test_concurrency_options(patched_cc):
+    a = options.Concurrency(
+        crypto_processes=-1,
+        md5_processes=0,
+        transfer_threads=-2,
+    )
+
+    assert a.crypto_processes == 0
+    assert a.md5_processes == 1
+    assert a.transfer_threads == 3
+
+
+@mock.patch('multiprocessing.cpu_count', return_value=10)
+def test_concurrency_options_max_transfer_threads(patched_cc):
+    a = options.Concurrency(
+        crypto_processes=1,
+        md5_processes=1,
+        transfer_threads=None,
+    )
+
+    assert a.transfer_threads == 24
+
+
+def test_general_options():
+    a = options.General(
+        concurrency=options.Concurrency(
+            crypto_processes=1,
+            md5_processes=2,
+            transfer_threads=3,
+        ),
+        progress_bar=False,
+        resume_file='abc',
+        timeout_sec=1,
+        verbose=True,
+    )
+
+    assert a.concurrency.crypto_processes == 1
+    assert a.concurrency.md5_processes == 2
+    assert a.concurrency.transfer_threads == 3
+    assert not a.progress_bar
+    assert a.resume_file == pathlib.Path('abc')
+    assert a.timeout_sec == 1
+    assert a.verbose
+
+    a = options.General(
+        concurrency=options.Concurrency(
+            crypto_processes=1,
+            md5_processes=2,
+            transfer_threads=3,
+        ),
+        progress_bar=False,
+        resume_file=None,
+        timeout_sec=1,
+        verbose=True,
+    )
+
+    assert a.concurrency.crypto_processes == 1
+    assert a.concurrency.md5_processes == 2
+    assert a.concurrency.transfer_threads == 3
+    assert not a.progress_bar
+    assert a.resume_file is None
+    assert a.timeout_sec == 1
+    assert a.verbose
+
+    with pytest.raises(ValueError):
+        a = options.General(None)
diff --git a/tests/test_blobxfer_models_upload.py b/tests/test_blobxfer_models_upload.py
new file mode 100644
index 0000000..21d9494
--- /dev/null
+++ b/tests/test_blobxfer_models_upload.py
@@ -0,0 +1,56 @@
+# coding=utf-8
+"""Tests for models upload"""
+
+# stdlib imports
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+import pytest
+# module under test
+import blobxfer.models.upload as upload
+
+
+def test_localsourcepaths_files(tmpdir):
+    tmpdir.mkdir('abc')
+    tmpdir.join('moo.cow').write('z')
+    abcpath = tmpdir.join('abc')
+    abcpath.join('hello.txt').write('hello')
+    abcpath.join('blah.x').write('x')
+    abcpath.join('blah.y').write('x')
+    abcpath.join('blah.z').write('x')
+    abcpath.mkdir('def')
+    defpath = abcpath.join('def')
+    defpath.join('world.txt').write('world')
+    defpath.join('moo.cow').write('y')
+
+    a = upload.LocalSourcePaths()
+    a.add_include('*.txt')
+    a.add_includes(['moo.cow', '*blah*'])
+    with pytest.raises(ValueError):
+        a.add_includes('abc')
+    a.add_exclude('**/blah.x')
+    a.add_excludes(['world.txt'])
+    with pytest.raises(ValueError):
+        a.add_excludes('abc')
+    a.add_path(str(tmpdir))
+    a_set = set()
+    for file in a.files():
+        sfile = str(file.parent_path / file.relative_path)
+        a_set.add(sfile)
+
+    assert len(a.paths) == 1
+    assert str(abcpath.join('blah.x')) not in a_set
+    assert str(defpath.join('world.txt')) in a_set
+    assert str(defpath.join('moo.cow')) not in a_set
+
+    b = upload.LocalSourcePaths()
+    b.add_includes(['moo.cow', '*blah*'])
+    b.add_include('*.txt')
+    b.add_excludes(['world.txt'])
+    b.add_exclude('**/blah.x')
+    b.add_paths([pathlib.Path(str(tmpdir))])
+    for file in a.files():
+        sfile = str(file.parent_path / file.relative_path)
+        assert sfile in a_set
diff --git a/tests/test_blobxfer_operations.py b/tests/test_blobxfer_operations.py
deleted file mode 100644
index 9b648f6..0000000
--- a/tests/test_blobxfer_operations.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# coding=utf-8
-"""Tests for operations"""
-
-# stdlib imports
-import mock
-# non-stdlib imports
-import pytest
-# local imports
-import blobxfer.models
-# module under test
-import blobxfer.operations as ops
-
-
-@mock.patch('blobxfer.file.operations.check_if_single_file')
-@mock.patch('blobxfer.blob.operations.check_if_single_blob')
-def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
-    downdir = tmpdir.join('down')
-
-    # non-file tests
-    ds = blobxfer.models.DownloadSpecification(
-        download_options=blobxfer.models.DownloadOptions(
-            check_file_md5=True,
-            chunk_size_bytes=4194304,
-            delete_extraneous_destination=False,
-            mode=blobxfer.models.AzureStorageModes.Auto,
-            overwrite=True,
-            recursive=True,
-            restore_file_attributes=False,
-            rsa_private_key=None,
-        ),
-        skip_on_options=mock.MagicMock(),
-        local_destination_path=blobxfer.models.LocalDestinationPath(
-            str(downdir)
-        ),
-    )
-
-    with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(mock.MagicMock(), ds)
-
-    asp = blobxfer.models.AzureSourcePath()
-    p = 'cont/remote/path'
-    asp.add_path_with_storage_account(p, 'sa')
-
-    ds.add_azure_source_path(asp)
-
-    patched_blob.return_value = False
-    ops.ensure_local_destination(mock.MagicMock(), ds)
-    assert ds.destination.is_dir
-
-    patched_blob.return_value = True
-    with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(mock.MagicMock(), ds)
-
-    # file tests
-    ds = blobxfer.models.DownloadSpecification(
-        download_options=blobxfer.models.DownloadOptions(
-            check_file_md5=True,
-            chunk_size_bytes=4194304,
-            delete_extraneous_destination=False,
-            mode=blobxfer.models.AzureStorageModes.File,
-            overwrite=True,
-            recursive=True,
-            restore_file_attributes=False,
-            rsa_private_key=None,
-        ),
-        skip_on_options=mock.MagicMock(),
-        local_destination_path=blobxfer.models.LocalDestinationPath(
-            str(downdir)
-        ),
-    )
-
-    ds.add_azure_source_path(asp)
-
-    patched_file.return_value = (False, None)
-    ops.ensure_local_destination(mock.MagicMock(), ds)
-    assert ds.destination.is_dir
-
-    patched_file.return_value = (True, mock.MagicMock())
-    with pytest.raises(RuntimeError):
-        ops.ensure_local_destination(mock.MagicMock(), ds)
diff --git a/tests/test_blobxfer_operations_azure.py b/tests/test_blobxfer_operations_azure.py
new file mode 100644
index 0000000..c90340a
--- /dev/null
+++ b/tests/test_blobxfer_operations_azure.py
@@ -0,0 +1,161 @@
+# coding=utf-8
+"""Tests for operations azure"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+import azure.storage
+import azure.storage.blob
+import azure.storage.file
+import pytest
+# module under test
+import blobxfer.models.azure as azmodels
+import blobxfer.operations.azure as azops
+
+
+def test_storage_credentials():
+    creds = azops.StorageCredentials()
+    creds.add_storage_account('sa1', 'somekey1', 'endpoint')
+
+    a = creds.get_storage_account('sa1')
+    assert a.name == 'sa1'
+    assert a.key == 'somekey1'
+    assert a.endpoint == 'endpoint'
+    assert isinstance(
+        a.append_blob_client, azure.storage.blob.AppendBlobService)
+    assert isinstance(
+        a.block_blob_client, azure.storage.blob.BlockBlobService)
+    assert isinstance(
+        a.file_client, azure.storage.file.FileService)
+    assert isinstance(
+        a.page_blob_client, azure.storage.blob.PageBlobService)
+
+    with pytest.raises(KeyError):
+        a = creds.get_storage_account('sa2')
+
+    with pytest.raises(ValueError):
+        creds.add_storage_account('sa1', 'somekeyxx', 'endpoint')
+
+    creds.add_storage_account('sa2', 'somekey2', 'endpoint2')
+    a = creds.get_storage_account('sa1')
+    b = creds.get_storage_account('sa2')
+    assert a.name == 'sa1'
+    assert a.key == 'somekey1'
+    assert a.endpoint == 'endpoint'
+    assert b.name == 'sa2'
+    assert b.key == 'somekey2'
+    assert b.endpoint == 'endpoint2'
+
+
+def test_key_is_sas():
+    a = azops.StorageAccount('name', 'abcdef', 'endpoint')
+    assert not a.is_sas
+
+    a = azops.StorageAccount('name', 'abcdef&blah', 'endpoint')
+    assert not a.is_sas
+
+    a = azops.StorageAccount('name', '?abcdef', 'endpoint')
+    assert a.is_sas
+
+    a = azops.StorageAccount(
+        'name', '?sv=0&sr=1&sig=2', 'endpoint')
+    assert a.is_sas
+
+    a = azops.StorageAccount(
+        'name', 'sv=0&sr=1&sig=2', 'endpoint')
+    assert a.is_sas
+
+    a = azops.StorageAccount(
+        'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint')
+    assert a.is_sas
+
+
+def test_azuresourcepath():
+    p = '/cont/remote/path'
+    asp = azops.SourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    with pytest.raises(RuntimeError):
+        asp.add_path_with_storage_account('x', 'x')
+
+    assert 'sa' == asp.lookup_storage_account(p)
+
+
+@mock.patch('blobxfer.models.crypto.EncryptionMetadata')
+@mock.patch('blobxfer.operations.azure.file.list_files')
+def test_azuresourcepath_files(patched_lf, patched_em):
+    p = '/cont/remote/path'
+    asp = azops.SourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    options = mock.MagicMock()
+    options.mode = azmodels.StorageModes.File
+    creds = mock.MagicMock()
+    creds.get_storage_account = mock.MagicMock()
+    sa = mock.MagicMock()
+    sa.file_client = mock.MagicMock()
+    creds.get_storage_account.return_value = sa
+    f = azure.storage.file.models.File(name='name')
+    patched_lf.side_effect = [[f]]
+    patched_em.encryption_metadata_exists = mock.MagicMock()
+    patched_em.encryption_metadata_exists.return_value = False
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is None
+    assert i == 1
+
+    fe = azure.storage.file.models.File(name='name')
+    fe.metadata = {'encryptiondata': {'a': 'b'}}
+    patched_lf.side_effect = [[fe]]
+    patched_em.encryption_metadata_exists.return_value = True
+    patched_em.convert_from_json = mock.MagicMock()
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is not None
+    assert i == 1
+
+
+@mock.patch('blobxfer.models.crypto.EncryptionMetadata')
+@mock.patch('blobxfer.operations.azure.blob.list_blobs')
+def test_azuresourcepath_blobs(patched_lb, patched_em):
+    p = '/cont/remote/path'
+    asp = azops.SourcePath()
+    asp.add_path_with_storage_account(p, 'sa')
+
+    options = mock.MagicMock()
+    options.mode = azmodels.StorageModes.Auto
+    creds = mock.MagicMock()
+    creds.get_storage_account = mock.MagicMock()
+    sa = mock.MagicMock()
+    sa.block_blob_client = mock.MagicMock()
+    creds.get_storage_account.return_value = sa
+    b = azure.storage.blob.models.Blob(name='name')
+    patched_lb.side_effect = [[b]]
+    patched_em.encryption_metadata_exists = mock.MagicMock()
+    patched_em.encryption_metadata_exists.return_value = False
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is None
+    assert i == 1
+
+    be = azure.storage.blob.models.Blob(name='name')
+    be.metadata = {'encryptiondata': {'a': 'b'}}
+    patched_lb.side_effect = [[be]]
+    patched_em.encryption_metadata_exists.return_value = True
+    patched_em.convert_from_json = mock.MagicMock()
+
+    i = 0
+    for file in asp.files(creds, options, mock.MagicMock()):
+        i += 1
+        assert file.name == 'name'
+        assert file.encryption_metadata is not None
+    assert i == 1
diff --git a/tests/test_blobxfer_blob_operations.py b/tests/test_blobxfer_operations_azure_blob.py
similarity index 83%
rename from tests/test_blobxfer_blob_operations.py
rename to tests/test_blobxfer_operations_azure_blob.py
index dd635f2..d6ad180 100644
--- a/tests/test_blobxfer_blob_operations.py
+++ b/tests/test_blobxfer_operations_azure_blob.py
@@ -8,9 +8,9 @@
 import azure.storage.blob
 import pytest
 # local imports
-import blobxfer.models as models
+import blobxfer.models.azure as azmodels
 # module under test
-import blobxfer.blob.operations as ops
+import blobxfer.operations.azure.blob as ops
 
 
 def test_check_if_single_blob():
@@ -36,7 +36,7 @@ def test_check_if_single_blob():
 def test_list_blobs():
     with pytest.raises(RuntimeError):
         for blob in ops.list_blobs(
-                None, 'cont', 'prefix', models.AzureStorageModes.File):
+                None, 'cont', 'prefix', azmodels.StorageModes.File):
             pass
 
     _blob = azure.storage.blob.models.Blob(name='name')
@@ -46,7 +46,7 @@ def test_list_blobs():
 
     i = 0
     for blob in ops.list_blobs(
-            client, 'cont', 'prefix', models.AzureStorageModes.Auto):
+            client, 'cont', 'prefix', azmodels.StorageModes.Auto):
         i += 1
         assert blob.name == 'name'
     assert i == 1
@@ -55,14 +55,14 @@ def test_list_blobs():
         azure.storage.blob.models._BlobTypes.AppendBlob
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', models.AzureStorageModes.Block):
+            client, 'dir', 'prefix', azmodels.StorageModes.Block):
         i += 1
         assert blob.name == 'name'
     assert i == 0
 
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', models.AzureStorageModes.Page):
+            client, 'dir', 'prefix', azmodels.StorageModes.Page):
         i += 1
         assert blob.name == 'name'
     assert i == 0
@@ -71,7 +71,7 @@ def test_list_blobs():
         azure.storage.blob.models._BlobTypes.BlockBlob
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', models.AzureStorageModes.Append):
+            client, 'dir', 'prefix', azmodels.StorageModes.Append):
         i += 1
         assert blob.name == 'name'
     assert i == 0
@@ -82,7 +82,7 @@ def test_list_blobs():
     for blob in ops.list_blobs(
             client, 'cont',
             'a?snapshot=2017-02-23T22:21:14.8121864Z',
-            models.AzureStorageModes.Auto):
+            azmodels.StorageModes.Auto):
         i += 1
         assert blob.name == 'name'
         assert blob.snapshot == _blob.snapshot
diff --git a/tests/test_blobxfer_blob_append_operations.py b/tests/test_blobxfer_operations_azure_blob_append.py
similarity index 71%
rename from tests/test_blobxfer_blob_append_operations.py
rename to tests/test_blobxfer_operations_azure_blob_append.py
index e207057..5553b7d 100644
--- a/tests/test_blobxfer_blob_append_operations.py
+++ b/tests/test_blobxfer_operations_azure_blob_append.py
@@ -1,17 +1,17 @@
 # coding=utf-8
-"""Tests for models"""
+"""Tests for operations: blob append"""
 
 # stdlib imports
 # non-stdlib imports
 import azure.storage
 # local imports
-import blobxfer.models as models
 # module under test
-import blobxfer.blob.append.operations as ops
+import blobxfer.operations.azure as azops
+import blobxfer.operations.azure.blob.append as ops
 
 
 def test_create_client():
-    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.AppendBlobService)
@@ -19,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.AppendBlobService)
diff --git a/tests/test_blobxfer_blob_block_operations.py b/tests/test_blobxfer_operations_azure_blob_block.py
similarity index 71%
rename from tests/test_blobxfer_blob_block_operations.py
rename to tests/test_blobxfer_operations_azure_blob_block.py
index dc83b8b..4aece2d 100644
--- a/tests/test_blobxfer_blob_block_operations.py
+++ b/tests/test_blobxfer_operations_azure_blob_block.py
@@ -1,18 +1,17 @@
 # coding=utf-8
-"""Tests for models"""
+"""Tests for operations: block blob"""
 
 # stdlib imports
 # non-stdlib imports
 import azure.storage
-import pytest
 # local imports
-import blobxfer.models as models
 # module under test
-import blobxfer.blob.block.operations as ops
+import blobxfer.operations.azure as azops
+import blobxfer.operations.azure.blob.block as ops
 
 
 def test_create_client():
-    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.BlockBlobService)
@@ -20,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.BlockBlobService)
diff --git a/tests/test_blobxfer_blob_page_operations.py b/tests/test_blobxfer_operations_azure_blob_page.py
similarity index 74%
rename from tests/test_blobxfer_blob_page_operations.py
rename to tests/test_blobxfer_operations_azure_blob_page.py
index 8ae5989..f70e83d 100644
--- a/tests/test_blobxfer_blob_page_operations.py
+++ b/tests/test_blobxfer_operations_azure_blob_page.py
@@ -4,15 +4,14 @@
 # stdlib imports
 # non-stdlib imports
 import azure.storage
-import pytest
 # local imports
-import blobxfer.models as models
 # module under test
-import blobxfer.blob.page.operations as ops
+import blobxfer.operations.azure as azops
+import blobxfer.operations.azure.blob.page as ops
 
 
 def test_create_client():
-    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.PageBlobService)
@@ -20,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.PageBlobService)
diff --git a/tests/test_blobxfer_file_operations.py b/tests/test_blobxfer_operations_azure_file.py
similarity index 93%
rename from tests/test_blobxfer_file_operations.py
rename to tests/test_blobxfer_operations_azure_file.py
index e354bda..c6bf764 100644
--- a/tests/test_blobxfer_file_operations.py
+++ b/tests/test_blobxfer_operations_azure_file.py
@@ -7,14 +7,14 @@
 import azure.common
 import azure.storage
 # local imports
-import blobxfer.models as models
 import blobxfer.util as util
 # module under test
-import blobxfer.file.operations as ops
+import blobxfer.operations.azure as azops
+import blobxfer.operations.azure.file as ops
 
 
 def test_create_client():
-    sa = models.AzureStorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.file.FileService)
@@ -22,7 +22,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = models.AzureStorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.file.FileService)
@@ -94,7 +94,7 @@ def test_list_files_single_file():
 
 
 @mock.patch(
-    'blobxfer.file.operations.check_if_single_file',
+    'blobxfer.operations.azure.file.check_if_single_file',
     return_value=(False, None)
 )
 def test_list_files_directory(patched_cisf):
diff --git a/tests/test_blobxfer_crypto_operations.py b/tests/test_blobxfer_operations_crypto.py
similarity index 96%
rename from tests/test_blobxfer_crypto_operations.py
rename to tests/test_blobxfer_operations_crypto.py
index 84d633a..3ed2262 100644
--- a/tests/test_blobxfer_crypto_operations.py
+++ b/tests/test_blobxfer_operations_crypto.py
@@ -8,9 +8,9 @@
 # non-stdlib imports
 import cryptography.hazmat.primitives.asymmetric.rsa
 # local imports
-import blobxfer.download.models
+import blobxfer.models.download
 # module under test
-import blobxfer.crypto.operations as ops
+import blobxfer.operations.crypto as ops
 
 
 _RSAKEY = cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key(
@@ -94,7 +94,7 @@ def test_cryptooffload_decrypt():
     a = None
     try:
         a = ops.CryptoOffload(1)
-        offsets = blobxfer.download.models.DownloadOffsets(
+        offsets = blobxfer.models.download.Offsets(
             chunk_num=0,
             fd_start=1,
             num_bytes=2,
diff --git a/tests/test_blobxfer_download_operations.py b/tests/test_blobxfer_operations_download.py
similarity index 78%
rename from tests/test_blobxfer_download_operations.py
rename to tests/test_blobxfer_operations_download.py
index 3645291..4e05182 100644
--- a/tests/test_blobxfer_download_operations.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -18,11 +18,82 @@
 import azure.storage.blob
 import pytest
 # local imports
-import blobxfer.download.models
-import blobxfer.models as models
+import blobxfer.models.azure as azmodels
+import blobxfer.models.download as models
+import blobxfer.models.options as options
+import blobxfer.operations.azure as azops
 import blobxfer.util as util
 # module under test
-import blobxfer.download.operations as ops
+import blobxfer.operations.download as ops
+
+
+@mock.patch('blobxfer.operations.azure.file.check_if_single_file')
+@mock.patch('blobxfer.operations.azure.blob.check_if_single_blob')
+def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
+    downdir = tmpdir.join('down')
+
+    # non-file tests
+    ds = models.Specification(
+        download_options=options.Download(
+            check_file_md5=True,
+            chunk_size_bytes=4194304,
+            delete_extraneous_destination=False,
+            mode=azmodels.StorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=mock.MagicMock(),
+        local_destination_path=models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+
+    with pytest.raises(RuntimeError):
+        ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
+
+    asp = azops.SourcePath()
+    p = 'cont/remote/path'
+    asp.add_path_with_storage_account(p, 'sa')
+
+    ds.add_azure_source_path(asp)
+
+    patched_blob.return_value = False
+    ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
+    assert ds.destination.is_dir
+
+    patched_blob.return_value = True
+    with pytest.raises(RuntimeError):
+        ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
+
+    # file tests
+    ds = models.Specification(
+        download_options=options.Download(
+            check_file_md5=True,
+            chunk_size_bytes=4194304,
+            delete_extraneous_destination=False,
+            mode=azmodels.StorageModes.File,
+            overwrite=True,
+            recursive=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=mock.MagicMock(),
+        local_destination_path=models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+
+    ds.add_azure_source_path(asp)
+
+    patched_file.return_value = (False, None)
+    ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
+    assert ds.destination.is_dir
+
+    patched_file.return_value = (True, mock.MagicMock())
+    with pytest.raises(RuntimeError):
+        ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
 
 
 def test_check_download_conditions(tmpdir):
@@ -31,18 +102,18 @@ def test_check_download_conditions(tmpdir):
     ep = pathlib.Path(str(ap))
     nep = pathlib.Path(str(tmpdir.join('nep')))
 
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
+    ds = models.Specification(
+        download_options=options.Download(
             check_file_md5=True,
             chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
+            mode=azmodels.StorageModes.Auto,
             overwrite=False,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=models.SkipOnOptions(
+        skip_on_options=options.SkipOn(
             filesize_match=True,
             lmt_ge=True,
             md5_match=True,
@@ -55,18 +126,18 @@ def test_check_download_conditions(tmpdir):
     result = d._check_download_conditions(ep, mock.MagicMock())
     assert result == ops.DownloadAction.Skip
 
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
+    ds = models.Specification(
+        download_options=options.Download(
             check_file_md5=True,
             chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
+            mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=models.SkipOnOptions(
+        skip_on_options=options.SkipOn(
             filesize_match=True,
             lmt_ge=True,
             md5_match=True,
@@ -77,18 +148,18 @@ def test_check_download_conditions(tmpdir):
     result = d._check_download_conditions(ep, mock.MagicMock())
     assert result == ops.DownloadAction.CheckMd5
 
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
+    ds = models.Specification(
+        download_options=options.Download(
             check_file_md5=True,
             chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
+            mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=models.SkipOnOptions(
+        skip_on_options=options.SkipOn(
             filesize_match=False,
             lmt_ge=False,
             md5_match=False,
@@ -99,18 +170,18 @@ def test_check_download_conditions(tmpdir):
     result = d._check_download_conditions(ep, mock.MagicMock())
     assert result == ops.DownloadAction.Download
 
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
+    ds = models.Specification(
+        download_options=options.Download(
             check_file_md5=True,
             chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
+            mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=models.SkipOnOptions(
+        skip_on_options=options.SkipOn(
             filesize_match=True,
             lmt_ge=False,
             md5_match=False,
@@ -118,29 +189,29 @@ def test_check_download_conditions(tmpdir):
         local_destination_path=models.LocalDestinationPath('dest'),
     )
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
-    rfile = models.AzureStorageEntity('cont')
+    rfile = azmodels.StorageEntity('cont')
     rfile._size = util.page_align_content_length(ep.stat().st_size)
-    rfile._mode = models.AzureStorageModes.Page
+    rfile._mode = azmodels.StorageModes.Page
     result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.Skip
 
     rfile._size = ep.stat().st_size
-    rfile._mode = models.AzureStorageModes.Page
+    rfile._mode = azmodels.StorageModes.Page
     result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.Download
 
-    ds = models.DownloadSpecification(
-        download_options=models.DownloadOptions(
+    ds = models.Specification(
+        download_options=options.Download(
             check_file_md5=True,
             chunk_size_bytes=4194304,
             delete_extraneous_destination=False,
-            mode=models.AzureStorageModes.Auto,
+            mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
-        skip_on_options=models.SkipOnOptions(
+        skip_on_options=options.SkipOn(
             filesize_match=False,
             lmt_ge=True,
             md5_match=False,
@@ -148,7 +219,7 @@ def test_check_download_conditions(tmpdir):
         local_destination_path=models.LocalDestinationPath('dest'),
     )
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
-    rfile = models.AzureStorageEntity('cont')
+    rfile = azmodels.StorageEntity('cont')
     rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) + \
         datetime.timedelta(days=1)
     result = d._check_download_conditions(ep, rfile)
@@ -164,7 +235,7 @@ def test_pre_md5_skip_on_check():
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._md5_offload = mock.MagicMock()
 
-    rfile = models.AzureStorageEntity('cont')
+    rfile = azmodels.StorageEntity('cont')
     rfile._encryption = mock.MagicMock()
     rfile._encryption.blobxfer_extensions = mock.MagicMock()
     rfile._encryption.blobxfer_extensions.pre_encrypted_content_md5 = \
@@ -186,7 +257,7 @@ def test_post_md5_skip_on_check():
     d._md5_offload = mock.MagicMock()
 
     lpath = 'lpath'
-    rfile = models.AzureStorageEntity('cont')
+    rfile = azmodels.StorageEntity('cont')
     rfile._md5 = 'abc'
     d._pre_md5_skip_on_check(lpath, rfile)
     d._download_set.add(pathlib.Path(lpath))
@@ -217,7 +288,7 @@ def test_check_for_downloads_from_md5():
     assert d._add_to_download_queue.call_count == 0
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.'
+            'blobxfer.operations.download.Downloader.'
             'termination_check_md5',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
@@ -233,7 +304,7 @@ def test_check_for_downloads_from_md5():
         assert d._add_to_download_queue.call_count == 1
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.'
+            'blobxfer.operations.download.Downloader.'
             'termination_check_md5',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
@@ -267,7 +338,7 @@ def test_check_for_crypto_done():
     assert d._complete_chunk_download.call_count == 0
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.termination_check',
+            'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -288,7 +359,7 @@ def test_check_for_crypto_done():
 def test_add_to_download_queue(tmpdir):
     path = tmpdir.join('a')
     lpath = pathlib.Path(str(path))
-    ase = models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 1
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'abc'
@@ -320,9 +391,9 @@ def test_complete_chunk_download(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts)
 
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     offsets = dd.next_offsets()
@@ -335,9 +406,9 @@ def test_complete_chunk_download(tmpdir):
     assert dd._completed_ops == 1
 
 
-@mock.patch('blobxfer.crypto.operations.aes_cbc_decrypt_data')
-@mock.patch('blobxfer.file.operations.get_file_range')
-@mock.patch('blobxfer.blob.operations.get_blob_range')
+@mock.patch('blobxfer.operations.crypto.aes_cbc_decrypt_data')
+@mock.patch('blobxfer.operations.azure.file.get_file_range')
+@mock.patch('blobxfer.operations.azure.blob.get_blob_range')
 def test_worker_thread_download(
         patched_gbr, patched_gfr, patched_acdd, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -352,10 +423,10 @@ def test_worker_thread_download(
     assert d._complete_chunk_download.call_count == 0
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.termination_check',
+            'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
         with mock.patch(
-                'blobxfer.download.models.DownloadDescriptor.'
+                'blobxfer.models.download.Descriptor.'
                 'all_operations_completed',
                 new_callable=mock.PropertyMock) as patched_aoc:
             d = ops.Downloader(
@@ -364,12 +435,12 @@ def test_worker_thread_download(
             opts = mock.MagicMock()
             opts.check_file_md5 = False
             opts.chunk_size_bytes = 16
-            ase = blobxfer.models.AzureStorageEntity('cont')
+            ase = azmodels.StorageEntity('cont')
             ase._size = 16
             ase._encryption = mock.MagicMock()
             ase._encryption.symmetric_key = b'abc'
             lp = pathlib.Path(str(tmpdir.join('a')))
-            dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+            dd = models.Descriptor(lp, ase, opts)
             dd.next_offsets = mock.MagicMock(side_effect=[None, None])
             dd.finalize_file = mock.MagicMock()
             patched_aoc.side_effect = [False, True]
@@ -385,19 +456,19 @@ def test_worker_thread_download(
             assert d._download_count == 1
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.termination_check',
+            'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
         opts = mock.MagicMock()
         opts.check_file_md5 = True
         opts.chunk_size_bytes = 16
-        ase = blobxfer.models.AzureStorageEntity('cont')
-        ase._mode = blobxfer.models.AzureStorageModes.File
+        ase = azmodels.StorageEntity('cont')
+        ase._mode = azmodels.StorageModes.File
         ase._size = 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('b')))
-        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts)
         dd.finalize_file = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         d._dd_map[str(lp)] = mock.MagicMock()
@@ -411,22 +482,22 @@ def test_worker_thread_download(
         assert dd.perform_chunked_integrity_check.call_count == 1
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.termination_check',
+            'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
         opts = mock.MagicMock()
         opts.check_file_md5 = False
         opts.chunk_size_bytes = 16
-        ase = blobxfer.models.AzureStorageEntity('cont')
-        ase._mode = blobxfer.models.AzureStorageModes.Auto
+        ase = azmodels.StorageEntity('cont')
+        ase._mode = azmodels.StorageModes.Auto
         ase._size = 32
         ase._encryption = mock.MagicMock()
         ase._encryption.symmetric_key = b'abc'
         ase._encryption.content_encryption_iv = b'0' * 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('c')))
-        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts)
         dd.finalize_file = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         d._crypto_offload = mock.MagicMock()
@@ -443,7 +514,7 @@ def test_worker_thread_download(
         assert dd.perform_chunked_integrity_check.call_count == 1
 
     with mock.patch(
-            'blobxfer.download.operations.Downloader.termination_check',
+            'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -451,15 +522,15 @@ def test_worker_thread_download(
         opts = mock.MagicMock()
         opts.check_file_md5 = False
         opts.chunk_size_bytes = 16
-        ase = blobxfer.models.AzureStorageEntity('cont')
-        ase._mode = blobxfer.models.AzureStorageModes.Auto
+        ase = azmodels.StorageEntity('cont')
+        ase._mode = azmodels.StorageModes.Auto
         ase._size = 32
         ase._encryption = mock.MagicMock()
         ase._encryption.symmetric_key = b'abc'
         ase._encryption.content_encryption_iv = b'0' * 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('d')))
-        dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts)
         dd.next_offsets()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         patched_acdd.return_value = b'0' * 16
@@ -480,9 +551,9 @@ def test_cleanup_temporary_files(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts)
     dd.cleanup_all_temporary_files = mock.MagicMock()
     dd.cleanup_all_temporary_files.side_effect = Exception
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -495,9 +566,9 @@ def test_cleanup_temporary_files(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts)
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._general_options.resume_file = None
     d._dd_map[0] = dd
@@ -508,9 +579,9 @@ def test_cleanup_temporary_files(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 16
-    ase = blobxfer.models.AzureStorageEntity('cont')
+    ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = blobxfer.download.models.DownloadDescriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts)
     dd.cleanup_all_temporary_files = mock.MagicMock()
     dd.cleanup_all_temporary_files.side_effect = Exception
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -521,9 +592,12 @@ def test_cleanup_temporary_files(tmpdir):
 
 
 @mock.patch('time.clock')
-@mock.patch('blobxfer.md5.LocalFileMd5Offload')
-@mock.patch('blobxfer.blob.operations.list_blobs')
-@mock.patch('blobxfer.operations.ensure_local_destination', return_value=True)
+@mock.patch('blobxfer.operations.md5.LocalFileMd5Offload')
+@mock.patch('blobxfer.operations.azure.blob.list_blobs')
+@mock.patch(
+    'blobxfer.operations.download.Downloader.ensure_local_destination',
+    return_value=True
+)
 def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._cleanup_temporary_files = mock.MagicMock()
@@ -534,7 +608,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
-    d._spec.options.mode = models.AzureStorageModes.Auto
+    d._spec.options.mode = azmodels.StorageModes.Auto
     d._spec.options.overwrite = True
     d._spec.skip_on = mock.MagicMock()
     d._spec.skip_on.md5_match = False
@@ -544,7 +618,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d._spec.destination.path = pathlib.Path(str(tmpdir))
 
     p = '/cont/remote/path'
-    asp = models.AzureSourcePath()
+    asp = azops.SourcePath()
     asp.add_path_with_storage_account(p, 'sa')
     d._spec.sources.append(asp)
 
diff --git a/tests/test_blobxfer_md5.py b/tests/test_blobxfer_operations_md5.py
similarity index 72%
rename from tests/test_blobxfer_md5.py
rename to tests/test_blobxfer_operations_md5.py
index c38e758..5bd7b20 100644
--- a/tests/test_blobxfer_md5.py
+++ b/tests/test_blobxfer_operations_md5.py
@@ -7,9 +7,9 @@
 # non-stdlib imports
 import pytest
 # local imports
-import blobxfer.models as models
+import blobxfer.models.azure as azmodels
 # module under test
-import blobxfer.md5 as md5
+import blobxfer.operations.md5 as ops
 
 
 def test_compute_md5(tmpdir):
@@ -17,22 +17,22 @@ def test_compute_md5(tmpdir):
     testdata = str(uuid.uuid4())
     with open(lpath, 'wt') as f:
         f.write(testdata)
-    md5_file = md5.compute_md5_for_file_asbase64(lpath)
-    md5_data = md5.compute_md5_for_data_asbase64(testdata.encode('utf8'))
+    md5_file = ops.compute_md5_for_file_asbase64(lpath)
+    md5_data = ops.compute_md5_for_data_asbase64(testdata.encode('utf8'))
     assert md5_file == md5_data
 
-    md5_file_page = md5.compute_md5_for_file_asbase64(lpath, True)
+    md5_file_page = ops.compute_md5_for_file_asbase64(lpath, True)
     assert md5_file != md5_file_page
 
     # test non-existent file
     with pytest.raises(IOError):
-        md5.compute_md5_for_file_asbase64(testdata)
+        ops.compute_md5_for_file_asbase64(testdata)
 
 
 def test_done_cv():
     a = None
     try:
-        a = md5.LocalFileMd5Offload(num_workers=1)
+        a = ops.LocalFileMd5Offload(num_workers=1)
         assert a.done_cv == a._done_cv
     finally:
         if a:
@@ -41,11 +41,11 @@ def test_done_cv():
 
 def test_finalize_md5_processes():
     with pytest.raises(ValueError):
-        md5.LocalFileMd5Offload(num_workers=0)
+        ops.LocalFileMd5Offload(num_workers=0)
 
     a = None
     try:
-        a = md5.LocalFileMd5Offload(num_workers=1)
+        a = ops.LocalFileMd5Offload(num_workers=1)
     finally:
         if a:
             a.finalize_processes()
@@ -58,16 +58,16 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
 
-    remote_md5 = md5.compute_md5_for_file_asbase64(str(file))
+    remote_md5 = ops.compute_md5_for_file_asbase64(str(file))
 
     a = None
     try:
-        a = md5.LocalFileMd5Offload(num_workers=1)
+        a = ops.LocalFileMd5Offload(num_workers=1)
         result = a.pop_done_queue()
         assert result is None
 
         a.add_localfile_for_md5_check(
-            str(file), remote_md5, models.AzureStorageModes.Block)
+            str(file), remote_md5, azmodels.StorageModes.Block)
         i = 33
         checked = False
         while i > 0:
@@ -91,16 +91,16 @@ def test_from_add_to_done_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
 
-    remote_md5 = md5.compute_md5_for_file_asbase64(str(file), True)
+    remote_md5 = ops.compute_md5_for_file_asbase64(str(file), True)
 
     a = None
     try:
-        a = md5.LocalFileMd5Offload(num_workers=1)
+        a = ops.LocalFileMd5Offload(num_workers=1)
         result = a.pop_done_queue()
         assert result is None
 
         a.add_localfile_for_md5_check(
-            str(file), remote_md5, models.AzureStorageModes.Page)
+            str(file), remote_md5, azmodels.StorageModes.Page)
         i = 33
         checked = False
         while i > 0:

From a09bbfb65538c0fcf55e4b82277eb4babd481fe6 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 7 Apr 2017 14:38:57 -0700
Subject: [PATCH 18/47] Add file log, param preamble and progress bar

---
 blobxfer/models/options.py                 |   6 +-
 blobxfer/operations/download.py            | 104 ++++++++++-----
 blobxfer/operations/progress.py            | 141 +++++++++++++++++++++
 blobxfer/util.py                           |  64 ++++++++--
 cli/cli.py                                 |  21 ++-
 cli/settings.py                            |   2 +
 tests/test_blobxfer_models_options.py      |   3 +
 tests/test_blobxfer_operations_download.py |   2 +-
 tests/test_blobxfer_operations_progress.py |  38 ++++++
 9 files changed, 331 insertions(+), 50 deletions(-)
 create mode 100644 blobxfer/operations/progress.py
 create mode 100644 tests/test_blobxfer_operations_progress.py

diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index f7c9f6f..c5f6da6 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -125,12 +125,13 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
 class General(object):
     """General Options"""
     def __init__(
-            self, concurrency, progress_bar=True, resume_file=None,
-            timeout_sec=None, verbose=False):
+            self, concurrency, log_file=None, progress_bar=True,
+            resume_file=None, timeout_sec=None, verbose=False):
         """Ctor for General Options
         :param General self: this
         :param Concurrency concurrency: concurrency options
         :param bool progress_bar: progress bar
+        :param str log_file: log file
         :param str resume_file: resume file
         :param int timeout_sec: timeout in seconds
         :param bool verbose: verbose output
@@ -138,6 +139,7 @@ def __init__(
         if concurrency is None:
             raise ValueError('concurrency option is unspecified')
         self.concurrency = concurrency
+        self.log_file = log_file
         self.progress_bar = progress_bar
         if blobxfer.util.is_not_empty(resume_file):
             self.resume_file = pathlib.Path(resume_file)
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 61875fe..b9c592b 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -30,8 +30,6 @@
     bytes, dict, int, list, object, range, ascii, chr, hex, input,
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
-import datetime
-import dateutil.tz
 import enum
 import logging
 try:
@@ -44,13 +42,13 @@
     import Queue as queue
 import threading
 # non-stdlib imports
-import dateutil
 # local imports
 import blobxfer.models.crypto
 import blobxfer.operations.azure.blob
 import blobxfer.operations.azure.file
 import blobxfer.operations.crypto
 import blobxfer.operations.md5
+import blobxfer.operations.progress
 import blobxfer.util
 
 # create logger
@@ -85,9 +83,12 @@ def __init__(self, general_options, creds, spec):
         self._download_set = set()
         self._download_start = None
         self._download_threads = []
-        self._download_count = 0
-        self._download_total_bytes = 0
+        self._download_total = None
+        self._download_sofar = 0
+        self._download_bytes_total = None
+        self._download_bytes_sofar = 0
         self._download_terminate = False
+        self._start_time = None
         self._dd_map = {}
         self._general_options = general_options
         self._creds = creds
@@ -155,6 +156,21 @@ def ensure_local_destination(creds, spec):
         # ensure destination path
         spec.destination.ensure_path_exists()
 
+    def _update_progress_bar(self):
+        # type: (Downloader) -> None
+        """Update progress bar
+        :param Downloader self: this
+        """
+        blobxfer.operations.progress.update_progress_bar(
+            self._general_options,
+            'download',
+            self._start_time,
+            self._download_total,
+            self._download_sofar,
+            self._download_bytes_total,
+            self._download_bytes_sofar,
+        )
+
     def _check_download_conditions(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
         #        blobxfer.models.azure.StorageEntity) -> DownloadAction
@@ -192,8 +208,8 @@ def _check_download_conditions(self, lpath, rfile):
         # check skip on lmt ge
         dl_lmt = None
         if self._spec.skip_on.lmt_ge:
-            mtime = datetime.datetime.fromtimestamp(
-                lpath.stat().st_mtime, tz=dateutil.tz.tzlocal())
+            mtime = blobxfer.util.datetime_from_timestamp(
+                lpath.stat().st_mtime)
             if mtime >= rfile.lmt:
                 dl_lmt = False
             else:
@@ -308,8 +324,7 @@ def _add_to_download_queue(self, lpath, rfile):
         if self._download_start is None:
             with self._download_lock:
                 if self._download_start is None:
-                    self._download_start = datetime.datetime.now(
-                        tz=dateutil.tz.tzlocal())
+                    self._download_start = blobxfer.util.datetime_now()
 
     def _initialize_download_threads(self):
         # type: (Downloader) -> None
@@ -344,6 +359,8 @@ def _worker_thread_download(self):
                 dd = self._download_queue.get(False, 1)
             except queue.Empty:
                 continue
+            # update progress bar
+            self._update_progress_bar()
             # get download offsets
             offsets = dd.next_offsets()
             # check if all operations completed
@@ -355,7 +372,7 @@ def _worker_thread_download(self):
                     if dd.entity.is_encrypted:
                         self._dd_map.pop(str(dd.final_path))
                     self._download_set.remove(dd.final_path)
-                    self._download_count += 1
+                    self._download_sofar += 1
                 continue
             # re-enqueue for other threads to download
             self._download_queue.put(dd)
@@ -370,7 +387,7 @@ def _worker_thread_download(self):
                     dd.entity, offsets, self._general_options.timeout_sec)
             # accounting
             with self._download_lock:
-                self._download_total_bytes += offsets.num_bytes
+                self._download_bytes_sofar += offsets.num_bytes
             # decrypt if necessary
             if dd.entity.is_encrypted:
                 # slice data to proper bounds
@@ -440,14 +457,16 @@ def _cleanup_temporary_files(self):
 
     def _run(self):
         # type: (Downloader) -> None
-        """Execute Downloader"""
-        start_time = datetime.datetime.now(tz=dateutil.tz.tzlocal())
-        logger.info('script start time: {0}'.format(start_time))
+        """Execute Downloader
+        :param Downloader self: this
+        """
         # ensure destination path
         blobxfer.operations.download.Downloader.ensure_local_destination(
             self._creds, self._spec)
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
+        # TODO catalog all local files if delete extraneous enabled
+
         # initialize MD5 processes
         self._md5_offload = blobxfer.operations.md5.LocalFileMd5Offload(
             num_workers=self._general_options.concurrency.md5_processes)
@@ -461,19 +480,22 @@ def _run(self):
                 self._check_for_crypto_done)
         # initialize download threads
         self._initialize_download_threads()
-        # iterate through source paths to download
+        # initialize local counters
         nfiles = 0
-        empty_files = 0
-        skipped_files = 0
         total_size = 0
+        skipped_files = 0
         skipped_size = 0
+        # mark start
+        self._start_time = blobxfer.util.datetime_now()
+        logger.info('download start time: {0}'.format(self._start_time))
+        # display progress bar if specified
+        self._update_progress_bar()
+        # iterate through source paths to download
         for src in self._spec.sources:
             for rfile in src.files(
                     self._creds, self._spec.options, self._general_options):
                 nfiles += 1
                 total_size += rfile.size
-                if rfile.size == 0:
-                    empty_files += 1
                 # form local path for remote file
                 lpath = pathlib.Path(self._spec.destination.path, rfile.name)
                 # check on download conditions
@@ -490,44 +512,60 @@ def _run(self):
                     self._pre_md5_skip_on_check(lpath, rfile)
                 elif action == DownloadAction.Download:
                     self._add_to_download_queue(lpath, rfile)
-        download_files = nfiles - skipped_files
-        download_size = total_size - skipped_size
-        download_size_mib = download_size / 1048576
-        # clean up processes and threads
+        self._download_total = nfiles - skipped_files
+        self._download_bytes_total = total_size - skipped_size
+        download_size_mib = self._download_bytes_total / blobxfer.util.MEGABYTE
+        # set remote files processed
         with self._md5_meta_lock:
             self._all_remote_files_processed = True
         logger.debug(
             ('{0} remote files processed, waiting for download completion '
              'of {1:.4f} MiB').format(nfiles, download_size_mib))
+        del nfiles
+        del total_size
+        del skipped_files
+        del skipped_size
+        # TODO delete all remaining local files not accounted for if
+        # delete extraneous enabled
+
+        # wait for downloads to complete
         self._wait_for_download_threads(terminate=False)
-        end_time = datetime.datetime.now(tz=dateutil.tz.tzlocal())
-        if (self._download_count != download_files or
-                self._download_total_bytes != download_size):
+        # update progress bar
+        self._update_progress_bar()
+        end_time = blobxfer.util.datetime_now()
+        if (self._download_sofar != self._download_total or
+                self._download_bytes_sofar != self._download_bytes_total):
             raise RuntimeError(
                 'download mismatch: [count={}/{} bytes={}/{}]'.format(
-                    self._download_count, download_files,
-                    self._download_total_bytes, download_size))
+                    self._download_sofar, self._download_total,
+                    self._download_bytes_sofar, self._download_bytes_total))
         if self._download_start is not None:
             dltime = (end_time - self._download_start).total_seconds()
             logger.info(
                 ('elapsed download + verify time and throughput: {0:.3f} sec, '
                  '{1:.4f} Mbps').format(
                      dltime, download_size_mib * 8 / dltime))
-        logger.info('script end time: {0} (elapsed: {1:.3f} sec)'.format(
-            end_time, (end_time - start_time).total_seconds()))
+        logger.info('download end time: {0} (elapsed: {1:.3f} sec)'.format(
+            end_time, (end_time - self._start_time).total_seconds()))
 
     def start(self):
         # type: (Downloader) -> None
-        """Start the Downloader"""
+        """Start the Downloader
+        :param Downloader self: this
+        """
         try:
+            blobxfer.operations.progress.output_download_parameters(
+                self._general_options, self._spec)
             self._run()
         except (KeyboardInterrupt, Exception) as ex:
             if isinstance(ex, KeyboardInterrupt):
                 logger.error(
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
-            self._wait_for_download_threads(terminate=True)
-            self._cleanup_temporary_files()
+            try:
+                self._wait_for_download_threads(terminate=True)
+            finally:
+                self._cleanup_temporary_files()
             raise
         finally:
             # TODO close resume file
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
new file mode 100644
index 0000000..25539a3
--- /dev/null
+++ b/blobxfer/operations/progress.py
@@ -0,0 +1,141 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import logging
+import os
+import platform
+import sys
+# non-stdlib imports
+import azure.storage
+# local imports
+import blobxfer.util
+import blobxfer.version
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+def update_progress_bar(
+        go, optext, start, total_files, files_sofar, total_bytes,
+        bytes_sofar):
+    # type: (blobxfer.options.General, str, datetime.datetime, int, int, int,
+    #        int) -> None
+    """Update the progress bar
+    :param blobxfer.options.General go: general options
+    :param str optext: operation prefix text
+    :param datetime.datetime start: start time
+    :param int total_files: total number of files
+    :param int files_sofar: files transfered so far
+    :param int total_bytes: total number of bytes
+    :param int bytes_sofar: bytes transferred so far
+    """
+    if not go.progress_bar or blobxfer.util.is_none_or_empty(go.log_file):
+        return
+    diff = (blobxfer.util.datetime_now() - start).total_seconds()
+    if diff <= 0:
+        # arbitrarily give a small delta
+        diff = 1e-9
+    if total_bytes is None:
+        done = 0
+    else:
+        done = float(bytes_sofar) / total_bytes
+    rate = bytes_sofar / blobxfer.util.MEGABYTE / diff
+    if optext == 'synccopy':
+        rtext = 'sync-copied'
+    else:
+        rtext = optext + 'ed'
+    if total_files is None:
+        fprog = 'n/a'
+    else:
+        fprog = '{}/{}'.format(files_sofar, total_files)
+    sys.stdout.write(
+        ('\r{0} progress: [{1:30s}] {2:.2f}% {3:12.3f} MiB/sec, '
+         '{4} {5}').format(
+             optext, '>' * int(done * 30), done * 100, rate, fprog, rtext)
+    )
+    if files_sofar == total_files:
+        sys.stdout.write(os.linesep)
+    sys.stdout.flush()
+
+
+def output_download_parameters(general_options, spec):
+    # type: (Downloader) -> None
+    """Output configuration block
+    :param Downloader downloader: this
+    """
+    log = []
+    log.append('===========================')
+    log.append(' azure blobxfer parameters')
+    log.append('===========================')
+    log.append('     blobxfer version: {}'.format(
+        blobxfer.version.__version__))
+    log.append('             platform: {}'.format(platform.platform()))
+    log.append('               python: {} {} az.stor={}'.format(
+        platform.python_implementation(), platform.python_version(),
+        azure.storage._constants.__version__))
+    log.append('   transfer direction: {}'.format('local->Azure'))
+    log.append('              workers: xfer={} md5={} crypto={}'.format(
+        general_options.concurrency.transfer_threads,
+        general_options.concurrency.md5_processes,
+        general_options.concurrency.crypto_processes))
+    log.append('              timeout: {}'.format(
+        general_options.timeout_sec))
+    log.append('          resume file: {}'.format(
+        general_options.resume_file))
+    log.append('              skip on: fs_match={} lmt_ge={} md5={}'.format(
+        spec.skip_on.filesize_match,
+        spec.skip_on.lmt_ge,
+        spec.skip_on.md5_match))
+    log.append('                 mode: {}'.format(
+        spec.options.mode))
+    log.append('     compute file md5: {}'.format(
+        spec.options.check_file_md5))
+    log.append('   chunk size (bytes): {}'.format(
+        spec.options.chunk_size_bytes))
+    log.append('    delete extraneous: {}'.format(
+        spec.options.delete_extraneous_destination))
+    log.append('            overwrite: {}'.format(
+        spec.options.overwrite))
+    log.append('            recursive: {}'.format(
+        spec.options.recursive))
+    log.append('      file attributes: {}'.format(
+        spec.options.restore_file_attributes))
+    log.append('      rsa private key: {}'.format(
+        'Loaded' if spec.options.rsa_private_key else 'None'))
+    log.append('    local destination: {}'.format(
+        spec.destination.path))
+    log.append('===========================')
+    log = os.linesep.join(log)
+    if blobxfer.util.is_not_empty(general_options.log_file):
+        print(log)
+    else:
+        logger.info('{}{}'.format(os.linesep, log))
diff --git a/blobxfer/util.py b/blobxfer/util.py
index ec85fe5..82c20a7 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -31,7 +31,7 @@
 # stdlib imports
 import base64
 import copy
-import dateutil
+import datetime
 import hashlib
 import logging
 import logging.handlers
@@ -42,10 +42,14 @@
     from scandir import scandir as scandir
 import re
 # non-stdlib imports
+import dateutil
+import dateutil.tz
 import future.utils
 # local imports
 
 # global defines
+MEGABYTE = 1048576
+_REGISTERED_LOGGER_HANDLERS = []
 _PAGEBLOB_BOUNDARY = 512
 
 
@@ -58,16 +62,34 @@ def on_python2():
     return future.utils.PY2
 
 
-def setup_logger(logger):  # noqa
-    # type: (logger) -> None
+def setup_logger(logger, logfile):  # noqa
+    # type: (logger, str) -> None
     """Set up logger"""
+    global _REGISTERED_LOGGER_HANDLERS
     logger.setLevel(logging.DEBUG)
-    handler = logging.StreamHandler()
+    if is_none_or_empty(logfile):
+        handler = logging.StreamHandler()
+    else:
+        handler = logging.FileHandler(logfile, encoding='utf-8')
+    logging.getLogger().addHandler(handler)
+    formatter = logging.Formatter('%(asctime)s %(levelname)s - %(message)s')
+    formatter.default_msec_format = '%s.%03d'
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.propagate = False
+    _REGISTERED_LOGGER_HANDLERS.append(handler)
+
+
+def set_verbose_logger_handlers():  # noqa
+    # type: (None) -> None
+    """Set logger handler formatters to more detail"""
+    global _REGISTERED_LOGGER_HANDLERS
     formatter = logging.Formatter(
         '%(asctime)s %(levelname)s %(name)s:%(funcName)s:%(lineno)d '
         '%(message)s')
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
+    formatter.default_msec_format = '%s.%03d'
+    for handler in _REGISTERED_LOGGER_HANDLERS:
+        handler.setFormatter(formatter)
 
 
 def is_none_or_empty(obj):
@@ -77,9 +99,7 @@ def is_none_or_empty(obj):
     :rtype: bool
     :return: if object is None or empty
     """
-    if obj is None or len(obj) == 0:
-        return True
-    return False
+    return obj is None or len(obj) == 0
 
 
 def is_not_empty(obj):
@@ -89,9 +109,7 @@ def is_not_empty(obj):
     :rtype: bool
     :return: if object is not None and length is > 0
     """
-    if obj is not None and len(obj) > 0:
-        return True
-    return False
+    return obj is not None and len(obj) > 0
 
 
 def merge_dict(dict1, dict2):
@@ -116,6 +134,28 @@ def merge_dict(dict1, dict2):
     return result
 
 
+def datetime_now():
+    # type: (None) -> datetime.datetime
+    """Return a timezone-aware datetime instance with local offset
+    :rtype: datetime.datetime
+    :return: datetime now with local tz
+    """
+    return datetime.datetime.now(tz=dateutil.tz.tzlocal())
+
+
+def datetime_from_timestamp(ts, tz=None):
+    # type: (int, dateutil.tz) -> datetime.datetime
+    """Convert a timestamp into datetime with offset
+    :param int ts: timestamp
+    :param dateutil.tz tz: time zone or local tz if not specified
+    :rtype: datetime.datetime
+    :return: converted timestamp to datetime
+    """
+    if tz is None:
+        tz = dateutil.tz.tzlocal()
+    return datetime.datetime.fromtimestamp(ts, tz=tz)
+
+
 def scantree(path):
     # type: (str) -> os.DirEntry
     """Recursively scan a directory tree
diff --git a/cli/cli.py b/cli/cli.py
index 03fb231..744ecdc 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -45,7 +45,6 @@
 
 # create logger
 logger = logging.getLogger('blobxfer')
-blobxfer.util.setup_logger(logger)
 # global defines
 _CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
@@ -96,7 +95,11 @@ def _init_config(self):
             self._read_yaml_file(self.yaml_config)
         # merge cli options with config
         settings.merge_settings(self.config, self.cli_options)
+        # set log file if specified
+        blobxfer.util.setup_logger(logger, self.config['options']['log_file'])
+        # output config
         if self.config['options']['verbose']:
+            blobxfer.util.set_verbose_logger_handlers()
             logger.debug('config: \n' + json.dumps(self.config, indent=4))
         # free mem
         del self.yaml_config
@@ -121,6 +124,19 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _log_file_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['log_file'] = value
+        return value
+    return click.option(
+        '--log-file',
+        expose_value=False,
+        default=None,
+        help='Log to file specified',
+        callback=callback)(f)
+
+
 def _md5_processes_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -144,7 +160,7 @@ def callback(ctx, param, value):
         '--progress-bar/--no-progress-bar',
         expose_value=False,
         default=True,
-        help='Display progress bar',
+        help='Display progress bar instead of console logs',
         callback=callback)(f)
 
 
@@ -208,6 +224,7 @@ def common_options(f):
     f = _resume_file_option(f)
     f = _progress_bar_option(f)
     f = _md5_processes_option(f)
+    f = _log_file_option(f)
     f = _crypto_processes_option(f)
     return f
 
diff --git a/cli/settings.py b/cli/settings.py
index 4faadd9..d464056 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -209,6 +209,7 @@ def merge_settings(config, cli_options):
     if 'options' not in config:
         config['options'] = {}
     config['options']['crypto_processes'] = cli_options['crypto_processes']
+    config['options']['log_file'] = cli_options['log_file']
     config['options']['md5_processes'] = cli_options['md5_processes']
     config['options']['progress_bar'] = cli_options['progress_bar']
     config['options']['resume_file'] = cli_options['resume_file']
@@ -245,6 +246,7 @@ def create_general_options(config):
             md5_processes=config['options']['md5_processes'],
             transfer_threads=config['options']['transfer_threads'],
         ),
+        log_file=config['options']['log_file'],
         progress_bar=config['options']['progress_bar'],
         resume_file=config['options']['resume_file'],
         timeout_sec=config['options']['timeout_sec'],
diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py
index 1e7cb8b..e73f3e3 100644
--- a/tests/test_blobxfer_models_options.py
+++ b/tests/test_blobxfer_models_options.py
@@ -44,6 +44,7 @@ def test_general_options():
             md5_processes=2,
             transfer_threads=3,
         ),
+        log_file='abc.log',
         progress_bar=False,
         resume_file='abc',
         timeout_sec=1,
@@ -53,6 +54,7 @@ def test_general_options():
     assert a.concurrency.crypto_processes == 1
     assert a.concurrency.md5_processes == 2
     assert a.concurrency.transfer_threads == 3
+    assert a.log_file == 'abc.log'
     assert not a.progress_bar
     assert a.resume_file == pathlib.Path('abc')
     assert a.timeout_sec == 1
@@ -73,6 +75,7 @@ def test_general_options():
     assert a.concurrency.crypto_processes == 1
     assert a.concurrency.md5_processes == 2
     assert a.concurrency.transfer_threads == 3
+    assert a.log_file is None
     assert not a.progress_bar
     assert a.resume_file is None
     assert a.timeout_sec == 1
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index 4e05182..ee0166f 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -453,7 +453,7 @@ def test_worker_thread_download(
             assert d._complete_chunk_download.call_count == 0
             assert str(lp) not in d._dd_map
             assert dd.finalize_file.call_count == 1
-            assert d._download_count == 1
+            assert d._download_sofar == 1
 
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
diff --git a/tests/test_blobxfer_operations_progress.py b/tests/test_blobxfer_operations_progress.py
new file mode 100644
index 0000000..7cb0776
--- /dev/null
+++ b/tests/test_blobxfer_operations_progress.py
@@ -0,0 +1,38 @@
+# coding=utf-8
+"""Tests for progress operations"""
+
+# stdlib imports
+import mock
+# non-stdlib imports
+# local imports
+import blobxfer.util as util
+# module under test
+import blobxfer.operations.progress as ops
+
+
+def test_output_download_parameters():
+    go = mock.MagicMock()
+    spec = mock.MagicMock()
+    go.log_file = 'abc'
+
+    ops.output_download_parameters(go, spec)
+
+    assert util.is_not_empty(go.log_file)
+
+
+def test_update_progress_bar():
+    go = mock.MagicMock()
+    go.progress_bar = True
+    go.log_file = 'abc'
+
+    start = util.datetime_now()
+
+    ops.update_progress_bar(
+        go, 'download', start, None, 1, None, 1)
+
+    with mock.patch('blobxfer.util.datetime_now') as patched_dt:
+        patched_dt.return_value = start
+        ops.update_progress_bar(
+            go, 'synccopy', start, 1, 1, 1, 1)
+
+    assert util.is_not_empty(go.log_file)

From fbdd1b1f122e4863e6aff96abd1cb6f0248f4fbf Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Mon, 10 Apr 2017 14:40:03 -0700
Subject: [PATCH 19/47] More download features

- Add delete support on download
- Add recursive support on download
- Add common requests session for connection pooling support with
  matched transfer thread count
---
 blobxfer/models/options.py                    |  8 +-
 blobxfer/operations/azure/__init__.py         | 35 ++++++--
 blobxfer/operations/azure/blob/__init__.py    | 11 ++-
 blobxfer/operations/azure/blob/append.py      |  6 +-
 blobxfer/operations/azure/blob/block.py       |  6 +-
 blobxfer/operations/azure/blob/page.py        |  6 +-
 blobxfer/operations/azure/file.py             | 14 ++--
 blobxfer/operations/crypto.py                 |  2 +-
 blobxfer/operations/download.py               | 81 +++++++++++++------
 blobxfer/operations/md5.py                    |  3 +-
 blobxfer/operations/progress.py               | 15 ++--
 cli/cli.py                                    |  6 +-
 cli/settings.py                               | 14 +++-
 setup.py                                      |  1 +
 tests/test_blobxfer_models_options.py         |  6 +-
 tests/test_blobxfer_operations_azure.py       | 14 ++--
 tests/test_blobxfer_operations_azure_blob.py  | 32 +++++---
 ...t_blobxfer_operations_azure_blob_append.py |  4 +-
 ...st_blobxfer_operations_azure_blob_block.py |  4 +-
 ...est_blobxfer_operations_azure_blob_page.py |  4 +-
 tests/test_blobxfer_operations_azure_file.py  | 10 +--
 tests/test_blobxfer_operations_download.py    | 37 +++++++++
 22 files changed, 222 insertions(+), 97 deletions(-)

diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index c5f6da6..08ba42a 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -116,10 +116,10 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
         if self.md5_processes < 1:
             self.md5_processes = 1
         if self.transfer_threads is None or self.transfer_threads < 1:
-            self.transfer_threads = multiprocessing.cpu_count() * 3
-            # cap maximum number of threads from cpu count to 24
-            if self.transfer_threads > 24:
-                self.transfer_threads = 24
+            self.transfer_threads = multiprocessing.cpu_count() * 4
+            # cap maximum number of threads from cpu count to 96
+            if self.transfer_threads > 96:
+                self.transfer_threads = 96
 
 
 class General(object):
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 20ddb50..cc33834 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -31,6 +31,7 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 # non-stdlib imports
+import requests
 # local imports
 import blobxfer.models
 import blobxfer.operations.azure.blob.append
@@ -41,10 +42,14 @@
 
 class StorageCredentials(object):
     """Azure Storage Credentials"""
-    def __init__(self):
-        # type: (StorageCredentials) -> None
-        """Ctor for StorageCredentials"""
+    def __init__(self, general_options):
+        # type: (StorageCredentials, blobxfer.models.options.General) -> None
+        """Ctor for StorageCredentials
+        :param StorageCredentials self: this
+        :param blobxfer.models.options.General: general options
+        """
         self._storage_accounts = {}
+        self._general_options = general_options
 
     def add_storage_account(self, name, key, endpoint):
         # type: (StorageCredentials, str, str, str) -> None
@@ -57,7 +62,10 @@ def add_storage_account(self, name, key, endpoint):
         if name in self._storage_accounts:
             raise ValueError(
                 '{} already exists in storage accounts'.format(name))
-        self._storage_accounts[name] = StorageAccount(name, key, endpoint)
+        self._storage_accounts[name] = StorageAccount(
+            name, key, endpoint,
+            self._general_options.concurrency.transfer_threads
+        )
 
     def get_storage_account(self, name):
         # type: (StorageCredentials, str) -> StorageAccount
@@ -72,12 +80,13 @@ def get_storage_account(self, name):
 
 class StorageAccount(object):
     """Azure Storage Account"""
-    def __init__(self, name, key, endpoint):
-        # type: (StorageAccount, str, str, str) -> None
+    def __init__(self, name, key, endpoint, transfer_threads):
+        # type: (StorageAccount, str, str, str, int) -> None
         """Ctor for StorageAccount
         :param str name: name of storage account
         :param str key: storage key or sas
         :param str endpoint: endpoint
+        :param int transfer_threads: number of transfer threads
         """
         self._append_blob_client = None
         self._block_blob_client = None
@@ -90,6 +99,15 @@ def __init__(self, name, key, endpoint):
         # normalize sas keys
         if self.is_sas and self.key.startswith('?'):
             self.key = self.key[1:]
+        # create requests session for connection pooling
+        self.session = requests.Session()
+        self.session.mount(
+            'https://',
+            requests.adapters.HTTPAdapter(
+                pool_connections=transfer_threads,
+                pool_maxsize=transfer_threads << 1,
+            )
+        )
         self._create_clients()
 
     @staticmethod
@@ -241,7 +259,8 @@ def _populate_from_list_files(self, creds, options, general_options):
             cont, dir = blobxfer.util.explode_azure_path(rpath)
             sa = creds.get_storage_account(self.lookup_storage_account(rpath))
             for file in blobxfer.operations.azure.file.list_files(
-                    sa.file_client, cont, dir, general_options.timeout_sec):
+                    sa.file_client, cont, dir, options.recursive,
+                    general_options.timeout_sec):
                 if blobxfer.models.crypto.EncryptionMetadata.\
                         encryption_metadata_exists(file.metadata):
                     ed = blobxfer.models.crypto.EncryptionMetadata()
@@ -271,7 +290,7 @@ def _populate_from_list_blobs(self, creds, options, general_options):
             sa = creds.get_storage_account(self.lookup_storage_account(rpath))
             for blob in blobxfer.operations.azure.blob.list_blobs(
                     sa.block_blob_client, cont, dir, options.mode,
-                    general_options.timeout_sec):
+                    options.recursive, general_options.timeout_sec):
                 if blobxfer.models.crypto.EncryptionMetadata.\
                         encryption_metadata_exists(blob.metadata):
                     ed = blobxfer.models.crypto.EncryptionMetadata()
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index e0cf878..0d49ed0 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -61,15 +61,16 @@ def check_if_single_blob(client, container, prefix, timeout=None):
     return True
 
 
-def list_blobs(client, container, prefix, mode, timeout=None):
-    # type: (azure.storage.blob.BaseBlobService, str, str, int,
-    #        blobxfer.models.azure.StorageModes) ->
+def list_blobs(client, container, prefix, mode, recursive, timeout=None):
+    # type: (azure.storage.blob.BaseBlobService, str, str,
+    #        blobxfer.models.azure.StorageModes, bool, int) ->
     #        azure.storage.blob.models.Blob
     """List blobs in path conforming to mode
     :param azure.storage.blob.BaseBlobService client: blob client
     :param str container: container
     :param str prefix: path prefix
     :param blobxfer.models.azure.StorageModes mode: storage mode
+    :param bool recursive: recursive
     :param int timeout: timeout
     :rtype: azure.storage.blob.models.Blob
     :return: generator of blobs
@@ -85,7 +86,7 @@ def list_blobs(client, container, prefix, mode, timeout=None):
         return
     blobs = client.list_blobs(
         container_name=container,
-        prefix=prefix,
+        prefix=prefix if blobxfer.util.is_not_empty(prefix) else None,
         include=azure.storage.blob.models.Include.METADATA,
         timeout=timeout,
     )
@@ -102,6 +103,8 @@ def list_blobs(client, container, prefix, mode, timeout=None):
                 blob.properties.blob_type !=
                 azure.storage.blob.models._BlobTypes.PageBlob):
             continue
+        if not recursive and '/' in blob.name:
+            continue
         # auto or match, yield the blob
         yield blob
 
diff --git a/blobxfer/operations/azure/blob/append.py b/blobxfer/operations/azure/blob/append.py
index 910ab5d..087e33b 100644
--- a/blobxfer/operations/azure/blob/append.py
+++ b/blobxfer/operations/azure/blob/append.py
@@ -51,12 +51,14 @@ def create_client(storage_account):
         client = azure.storage.blob.AppendBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     else:
         client = azure.storage.blob.AppendBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
diff --git a/blobxfer/operations/azure/blob/block.py b/blobxfer/operations/azure/blob/block.py
index b6fd673..00f7eb3 100644
--- a/blobxfer/operations/azure/blob/block.py
+++ b/blobxfer/operations/azure/blob/block.py
@@ -51,12 +51,14 @@ def create_client(storage_account):
         client = azure.storage.blob.BlockBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     else:
         client = azure.storage.blob.BlockBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 6aedc8f..05d36b6 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -51,12 +51,14 @@ def create_client(storage_account):
         client = azure.storage.blob.PageBlobService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     else:
         client = azure.storage.blob.PageBlobService(
             account_name=storage_account.name,
             account_key=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index 1b17f94..43e89ca 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -57,12 +57,14 @@ def create_client(storage_account):
         client = azure.storage.file.FileService(
             account_name=storage_account.name,
             sas_token=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     else:
         client = azure.storage.file.FileService(
             account_name=storage_account.name,
             account_key=storage_account.key,
-            endpoint_suffix=storage_account.endpoint)
+            endpoint_suffix=storage_account.endpoint,
+            request_session=storage_account.session)
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
@@ -114,13 +116,14 @@ def check_if_single_file(client, fileshare, prefix, timeout=None):
     return (True, file)
 
 
-def list_files(client, fileshare, prefix, timeout=None):
-    # type: (azure.storage.file.FileService, str, str, int) ->
+def list_files(client, fileshare, prefix, recursive, timeout=None):
+    # type: (azure.storage.file.FileService, str, str, bool, int) ->
     #        azure.storage.file.models.File
     """List files in path
     :param azure.storage.file.FileService client: file client
     :param str fileshare: file share
     :param str prefix: path prefix
+    :param bool recursive: recursive
     :param int timeout: timeout
     :rtype: azure.storage.file.models.File
     :return: generator of files
@@ -151,7 +154,8 @@ def list_files(client, fileshare, prefix, timeout=None):
                 )
                 yield fsprop
             else:
-                dirs.append(fspath)
+                if recursive:
+                    dirs.append(fspath)
 
 
 def get_file_range(ase, offsets, timeout=None):
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 98945d3..58f65d8 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -241,7 +241,7 @@ def _worker_process(self):
         """
         while not self.terminated:
             try:
-                inst = self._task_queue.get(True, 1)
+                inst = self._task_queue.get(True, 0.25)
             except queue.Empty:
                 continue
             if inst[0] == CryptoAction.Encrypt:
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index b9c592b..78baa34 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -81,7 +81,7 @@ def __init__(self, general_options, creds, spec):
         self._download_lock = threading.Lock()
         self._download_queue = queue.Queue()
         self._download_set = set()
-        self._download_start = None
+        self._download_start_time = None
         self._download_threads = []
         self._download_total = None
         self._download_sofar = 0
@@ -89,6 +89,7 @@ def __init__(self, general_options, creds, spec):
         self._download_bytes_sofar = 0
         self._download_terminate = False
         self._start_time = None
+        self._delete_after = set()
         self._dd_map = {}
         self._general_options = general_options
         self._creds = creds
@@ -164,7 +165,7 @@ def _update_progress_bar(self):
         blobxfer.operations.progress.update_progress_bar(
             self._general_options,
             'download',
-            self._start_time,
+            self._download_start_time,
             self._download_total,
             self._download_sofar,
             self._download_bytes_total,
@@ -321,10 +322,10 @@ def _add_to_download_queue(self, lpath, rfile):
                 self._dd_map[str(dd.final_path)] = dd
         # add download descriptor to queue
         self._download_queue.put(dd)
-        if self._download_start is None:
+        if self._download_start_time is None:
             with self._download_lock:
-                if self._download_start is None:
-                    self._download_start = blobxfer.util.datetime_now()
+                if self._download_start_time is None:
+                    self._download_start_time = blobxfer.util.datetime_now()
 
     def _initialize_download_threads(self):
         # type: (Downloader) -> None
@@ -356,7 +357,7 @@ def _worker_thread_download(self):
         """
         while not self.termination_check:
             try:
-                dd = self._download_queue.get(False, 1)
+                dd = self._download_queue.get(False, 0.25)
             except queue.Empty:
                 continue
             # update progress bar
@@ -455,23 +456,52 @@ def _cleanup_temporary_files(self):
             except Exception as e:
                 logger.exception(e)
 
+    def _catalog_local_files_for_deletion(self):
+        # type: (Downloader) -> None
+        """Catalog all local files if delete extraneous enabled
+        :param Downloader self: this
+        """
+        if not (self._spec.options.delete_extraneous_destination and
+                self._spec.destination.is_dir):
+            return
+        dst = str(self._spec.destination.path)
+        for file in blobxfer.util.scantree(dst):
+            self._delete_after.add(pathlib.Path(file.path))
+
+    def _delete_extraneous_files(self):
+        # type: (Downloader) -> None
+        """Delete extraneous files cataloged
+        :param Downloader self: this
+        """
+        logger.info('attempting to delete {} extraneous files'.format(
+            len(self._delete_after)))
+        for file in self._delete_after:
+            try:
+                file.unlink()
+            except OSError:
+                pass
+
     def _run(self):
         # type: (Downloader) -> None
         """Execute Downloader
         :param Downloader self: this
         """
+        # mark start
+        self._start_time = blobxfer.util.datetime_now()
+        logger.info('blobxfer start time: {0}'.format(self._start_time))
         # ensure destination path
         blobxfer.operations.download.Downloader.ensure_local_destination(
             self._creds, self._spec)
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
-        # TODO catalog all local files if delete extraneous enabled
-
+        self._catalog_local_files_for_deletion()
         # initialize MD5 processes
-        self._md5_offload = blobxfer.operations.md5.LocalFileMd5Offload(
-            num_workers=self._general_options.concurrency.md5_processes)
-        self._md5_offload.initialize_check_thread(
-            self._check_for_downloads_from_md5)
+        if (self._spec.options.check_file_md5 and
+                self._general_options.concurrency.md5_processes > 0):
+            self._md5_offload = blobxfer.operations.md5.LocalFileMd5Offload(
+                num_workers=self._general_options.concurrency.md5_processes)
+            self._md5_offload.initialize_check_thread(
+                self._check_for_downloads_from_md5)
         # initialize crypto processes
         if self._general_options.concurrency.crypto_processes > 0:
             self._crypto_offload = blobxfer.operations.crypto.CryptoOffload(
@@ -485,11 +515,6 @@ def _run(self):
         total_size = 0
         skipped_files = 0
         skipped_size = 0
-        # mark start
-        self._start_time = blobxfer.util.datetime_now()
-        logger.info('download start time: {0}'.format(self._start_time))
-        # display progress bar if specified
-        self._update_progress_bar()
         # iterate through source paths to download
         for src in self._spec.sources:
             for rfile in src.files(
@@ -498,6 +523,11 @@ def _run(self):
                 total_size += rfile.size
                 # form local path for remote file
                 lpath = pathlib.Path(self._spec.destination.path, rfile.name)
+                # remove from delete after set
+                try:
+                    self._delete_after.remove(lpath)
+                except KeyError:
+                    pass
                 # check on download conditions
                 action = self._check_download_conditions(lpath, rfile)
                 if action == DownloadAction.Skip:
@@ -525,27 +555,30 @@ def _run(self):
         del total_size
         del skipped_files
         del skipped_size
-        # TODO delete all remaining local files not accounted for if
-        # delete extraneous enabled
-
         # wait for downloads to complete
         self._wait_for_download_threads(terminate=False)
+        end_time = blobxfer.util.datetime_now()
         # update progress bar
         self._update_progress_bar()
-        end_time = blobxfer.util.datetime_now()
+        # check for mismatches
         if (self._download_sofar != self._download_total or
                 self._download_bytes_sofar != self._download_bytes_total):
             raise RuntimeError(
                 'download mismatch: [count={}/{} bytes={}/{}]'.format(
                     self._download_sofar, self._download_total,
                     self._download_bytes_sofar, self._download_bytes_total))
-        if self._download_start is not None:
-            dltime = (end_time - self._download_start).total_seconds()
+        # delete all remaining local files not accounted for if
+        # delete extraneous enabled
+        self._delete_extraneous_files()
+        # output throughput
+        if self._download_start_time is not None:
+            dltime = (end_time - self._download_start_time).total_seconds()
             logger.info(
                 ('elapsed download + verify time and throughput: {0:.3f} sec, '
                  '{1:.4f} Mbps').format(
                      dltime, download_size_mib * 8 / dltime))
-        logger.info('download end time: {0} (elapsed: {1:.3f} sec)'.format(
+        end_time = blobxfer.util.datetime_now()
+        logger.info('blobxfer end time: {0} (elapsed: {1:.3f} sec)'.format(
             end_time, (end_time - self._start_time).total_seconds()))
 
     def start(self):
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
index d260c9e..dbd05fb 100644
--- a/blobxfer/operations/md5.py
+++ b/blobxfer/operations/md5.py
@@ -98,7 +98,8 @@ def _worker_process(self):
         """
         while not self.terminated:
             try:
-                filename, remote_md5, pagealign = self._task_queue.get(True, 1)
+                filename, remote_md5, pagealign = self._task_queue.get(
+                    True, 0.25)
             except queue.Empty:
                 continue
             md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64(
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index 25539a3..b5ec8e9 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -36,6 +36,7 @@
 import sys
 # non-stdlib imports
 import azure.storage
+import requests
 # local imports
 import blobxfer.util
 import blobxfer.version
@@ -58,7 +59,8 @@ def update_progress_bar(
     :param int total_bytes: total number of bytes
     :param int bytes_sofar: bytes transferred so far
     """
-    if not go.progress_bar or blobxfer.util.is_none_or_empty(go.log_file):
+    if (not go.progress_bar or blobxfer.util.is_none_or_empty(go.log_file) or
+            start is None):
         return
     diff = (blobxfer.util.datetime_now() - start).total_seconds()
     if diff <= 0:
@@ -99,13 +101,16 @@ def output_download_parameters(general_options, spec):
     log.append('     blobxfer version: {}'.format(
         blobxfer.version.__version__))
     log.append('             platform: {}'.format(platform.platform()))
-    log.append('               python: {} {} az.stor={}'.format(
-        platform.python_implementation(), platform.python_version(),
-        azure.storage._constants.__version__))
+    log.append('               python: {} {} az.stor={} req={}'.format(
+        platform.python_implementation(),
+        platform.python_version(),
+        azure.storage._constants.__version__,
+        requests.__version__))
     log.append('   transfer direction: {}'.format('local->Azure'))
     log.append('              workers: xfer={} md5={} crypto={}'.format(
         general_options.concurrency.transfer_threads,
-        general_options.concurrency.md5_processes,
+        general_options.concurrency.md5_processes
+        if spec.options.check_file_md5 else 0,
         general_options.concurrency.crypto_processes))
     log.append('              timeout: {}'.format(
         general_options.timeout_sec))
diff --git a/cli/cli.py b/cli/cli.py
index 744ecdc..d27efc8 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -65,9 +65,9 @@ def initialize(self):
         :param CliContext self: this
         """
         self._init_config()
-        self.credentials = settings.create_azure_storage_credentials(
-            self.config)
         self.general_options = settings.create_general_options(self.config)
+        self.credentials = settings.create_azure_storage_credentials(
+            self.config, self.general_options)
 
     def _read_yaml_file(self, yaml_file):
         # type: (CliContext, pathlib.Path) -> None
@@ -382,7 +382,7 @@ def callback(ctx, param, value):
         '--file-md5/--no-file-md5',
         expose_value=False,
         default=False,
-        help='Compute file MD5 [True]',
+        help='Compute file MD5 [False]',
         callback=callback)(f)
 
 
diff --git a/cli/settings.py b/cli/settings.py
index d464056..088a4f4 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -218,14 +218,16 @@ def merge_settings(config, cli_options):
     config['options']['verbose'] = cli_options['verbose']
 
 
-def create_azure_storage_credentials(config):
-    # type: (dict) -> blobxfer.operations.azure.StorageCredentials
+def create_azure_storage_credentials(config, general_options):
+    # type: (dict, blobxfer.models.options.General) ->
+    #        blobxfer.operations.azure.StorageCredentials
     """Create an Azure StorageCredentials object from configuration
     :param dict config: config dict
+    :param blobxfer.models.options.General: general options
     :rtype: blobxfer.operations.azure.StorageCredentials
     :return: credentials object
     """
-    creds = blobxfer.operations.azure.StorageCredentials()
+    creds = blobxfer.operations.azure.StorageCredentials(general_options)
     endpoint = config['azure_storage']['endpoint']
     for name in config['azure_storage']['accounts']:
         key = config['azure_storage']['accounts'][name]
@@ -285,6 +287,12 @@ def create_download_specifications(config):
                 rpk, rpkp)
         else:
             rpk = None
+        # ensure compatible options
+        if (not conf['options']['check_file_md5'] and
+                conf['options']['skip_on']['md5_match']):
+            raise ValueError(
+                'Cannot specify skip on MD5 match without file MD5 enabled')
+        # create specification
         ds = blobxfer.models.download.Specification(
             download_options=blobxfer.models.options.Download(
                 check_file_md5=conf['options']['check_file_md5'],
diff --git a/setup.py b/setup.py
index 74b57cf..5a2d6c6 100644
--- a/setup.py
+++ b/setup.py
@@ -45,6 +45,7 @@
     'cryptography>=1.8.1',
     'future==0.16.0',
     'python-dateutil==2.6.0',
+    'requests==2.13.0',
     'ruamel.yaml==0.14.5',
 ]
 
diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py
index e73f3e3..4716f27 100644
--- a/tests/test_blobxfer_models_options.py
+++ b/tests/test_blobxfer_models_options.py
@@ -23,10 +23,10 @@ def test_concurrency_options(patched_cc):
 
     assert a.crypto_processes == 0
     assert a.md5_processes == 1
-    assert a.transfer_threads == 3
+    assert a.transfer_threads == 4
 
 
-@mock.patch('multiprocessing.cpu_count', return_value=10)
+@mock.patch('multiprocessing.cpu_count', return_value=64)
 def test_concurrency_options_max_transfer_threads(patched_cc):
     a = options.Concurrency(
         crypto_processes=1,
@@ -34,7 +34,7 @@ def test_concurrency_options_max_transfer_threads(patched_cc):
         transfer_threads=None,
     )
 
-    assert a.transfer_threads == 24
+    assert a.transfer_threads == 96
 
 
 def test_general_options():
diff --git a/tests/test_blobxfer_operations_azure.py b/tests/test_blobxfer_operations_azure.py
index c90340a..bfe976d 100644
--- a/tests/test_blobxfer_operations_azure.py
+++ b/tests/test_blobxfer_operations_azure.py
@@ -14,7 +14,7 @@
 
 
 def test_storage_credentials():
-    creds = azops.StorageCredentials()
+    creds = azops.StorageCredentials(mock.MagicMock())
     creds.add_storage_account('sa1', 'somekey1', 'endpoint')
 
     a = creds.get_storage_account('sa1')
@@ -48,25 +48,25 @@ def test_storage_credentials():
 
 
 def test_key_is_sas():
-    a = azops.StorageAccount('name', 'abcdef', 'endpoint')
+    a = azops.StorageAccount('name', 'abcdef', 'endpoint', 10)
     assert not a.is_sas
 
-    a = azops.StorageAccount('name', 'abcdef&blah', 'endpoint')
+    a = azops.StorageAccount('name', 'abcdef&blah', 'endpoint', 10)
     assert not a.is_sas
 
-    a = azops.StorageAccount('name', '?abcdef', 'endpoint')
+    a = azops.StorageAccount('name', '?abcdef', 'endpoint', 10)
     assert a.is_sas
 
     a = azops.StorageAccount(
-        'name', '?sv=0&sr=1&sig=2', 'endpoint')
+        'name', '?sv=0&sr=1&sig=2', 'endpoint', 10)
     assert a.is_sas
 
     a = azops.StorageAccount(
-        'name', 'sv=0&sr=1&sig=2', 'endpoint')
+        'name', 'sv=0&sr=1&sig=2', 'endpoint', 10)
     assert a.is_sas
 
     a = azops.StorageAccount(
-        'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint')
+        'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint', 10)
     assert a.is_sas
 
 
diff --git a/tests/test_blobxfer_operations_azure_blob.py b/tests/test_blobxfer_operations_azure_blob.py
index d6ad180..3880d1a 100644
--- a/tests/test_blobxfer_operations_azure_blob.py
+++ b/tests/test_blobxfer_operations_azure_blob.py
@@ -36,44 +36,51 @@ def test_check_if_single_blob():
 def test_list_blobs():
     with pytest.raises(RuntimeError):
         for blob in ops.list_blobs(
-                None, 'cont', 'prefix', azmodels.StorageModes.File):
+                None, 'cont', 'prefix', azmodels.StorageModes.File, True):
             pass
 
-    _blob = azure.storage.blob.models.Blob(name='name')
+    _blob = azure.storage.blob.models.Blob(name='dir/name')
     _blob.properties = azure.storage.blob.models.BlobProperties()
     client = mock.MagicMock()
     client.list_blobs.return_value = [_blob]
 
     i = 0
     for blob in ops.list_blobs(
-            client, 'cont', 'prefix', azmodels.StorageModes.Auto):
+            client, 'cont', 'prefix', azmodels.StorageModes.Auto, False):
         i += 1
-        assert blob.name == 'name'
+        assert blob.name == _blob.name
+    assert i == 0
+
+    i = 0
+    for blob in ops.list_blobs(
+            client, 'cont', 'prefix', azmodels.StorageModes.Auto, True):
+        i += 1
+        assert blob.name == _blob.name
     assert i == 1
 
     _blob.properties.blob_type = \
         azure.storage.blob.models._BlobTypes.AppendBlob
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', azmodels.StorageModes.Block):
+            client, 'dir', 'prefix', azmodels.StorageModes.Block, True):
         i += 1
-        assert blob.name == 'name'
+        assert blob.name == _blob.name
     assert i == 0
 
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', azmodels.StorageModes.Page):
+            client, 'dir', 'prefix', azmodels.StorageModes.Page, True):
         i += 1
-        assert blob.name == 'name'
+        assert blob.name == _blob.name
     assert i == 0
 
     _blob.properties.blob_type = \
         azure.storage.blob.models._BlobTypes.BlockBlob
     i = 0
     for blob in ops.list_blobs(
-            client, 'dir', 'prefix', azmodels.StorageModes.Append):
+            client, 'dir', 'prefix', azmodels.StorageModes.Append, True):
         i += 1
-        assert blob.name == 'name'
+        assert blob.name == _blob.name
     assert i == 0
 
     _blob.snapshot = '2017-02-23T22:21:14.8121864Z'
@@ -82,9 +89,10 @@ def test_list_blobs():
     for blob in ops.list_blobs(
             client, 'cont',
             'a?snapshot=2017-02-23T22:21:14.8121864Z',
-            azmodels.StorageModes.Auto):
+            azmodels.StorageModes.Auto,
+            True):
         i += 1
-        assert blob.name == 'name'
+        assert blob.name == _blob.name
         assert blob.snapshot == _blob.snapshot
     assert i == 1
 
diff --git a/tests/test_blobxfer_operations_azure_blob_append.py b/tests/test_blobxfer_operations_azure_blob_append.py
index 5553b7d..f6e8c23 100644
--- a/tests/test_blobxfer_operations_azure_blob_append.py
+++ b/tests/test_blobxfer_operations_azure_blob_append.py
@@ -11,7 +11,7 @@
 
 
 def test_create_client():
-    sa = azops.StorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.AppendBlobService)
@@ -19,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.AppendBlobService)
diff --git a/tests/test_blobxfer_operations_azure_blob_block.py b/tests/test_blobxfer_operations_azure_blob_block.py
index 4aece2d..2af2f6f 100644
--- a/tests/test_blobxfer_operations_azure_blob_block.py
+++ b/tests/test_blobxfer_operations_azure_blob_block.py
@@ -11,7 +11,7 @@
 
 
 def test_create_client():
-    sa = azops.StorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.BlockBlobService)
@@ -19,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.BlockBlobService)
diff --git a/tests/test_blobxfer_operations_azure_blob_page.py b/tests/test_blobxfer_operations_azure_blob_page.py
index f70e83d..f1b4d8c 100644
--- a/tests/test_blobxfer_operations_azure_blob_page.py
+++ b/tests/test_blobxfer_operations_azure_blob_page.py
@@ -11,7 +11,7 @@
 
 
 def test_create_client():
-    sa = azops.StorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.PageBlobService)
@@ -19,7 +19,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.blob.PageBlobService)
diff --git a/tests/test_blobxfer_operations_azure_file.py b/tests/test_blobxfer_operations_azure_file.py
index c6bf764..cb6b04e 100644
--- a/tests/test_blobxfer_operations_azure_file.py
+++ b/tests/test_blobxfer_operations_azure_file.py
@@ -14,7 +14,7 @@
 
 
 def test_create_client():
-    sa = azops.StorageAccount('name', 'key', 'endpoint')
+    sa = azops.StorageAccount('name', 'key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.file.FileService)
@@ -22,7 +22,7 @@ def test_create_client():
         client.authentication,
         azure.storage._auth._StorageSharedKeyAuthentication)
 
-    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint')
+    sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10)
     client = ops.create_client(sa)
     assert client is not None
     assert isinstance(client, azure.storage.file.FileService)
@@ -87,7 +87,7 @@ def test_list_files_single_file():
     client.get_file_properties.return_value = 'fp'
 
     i = 0
-    for file in ops.list_files(client, 'a', 'b/c'):
+    for file in ops.list_files(client, 'a', 'b/c', True):
         i += 1
         assert file == 'fp'
     assert i == 1
@@ -104,7 +104,7 @@ def test_list_files_directory(patched_cisf):
     client.get_file_properties.return_value = _file
 
     i = 0
-    for file in ops.list_files(client, 'dir', ''):
+    for file in ops.list_files(client, 'dir', '', True):
         i += 1
         assert file.name == 'name'
     assert i == 1
@@ -117,7 +117,7 @@ def test_list_files_directory(patched_cisf):
     client.get_file_properties.side_effect = [_file]
 
     i = 0
-    for file in ops.list_files(client, '', ''):
+    for file in ops.list_files(client, '', '', True):
         i += 1
         assert file.name == _file.name
         assert type(file) == azure.storage.file.models.File
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index ee0166f..ea2be05 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -591,6 +591,41 @@ def test_cleanup_temporary_files(tmpdir):
     assert dd.local_path.exists()
 
 
+def test_catalog_local_files_for_deletion(tmpdir):
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._spec.options.delete_extraneous_destination = False
+
+    d._catalog_local_files_for_deletion()
+    assert len(d._delete_after) == 0
+
+    a = tmpdir.join('a')
+    a.write('abc')
+    d._spec.destination.path = tmpdir
+    d._spec.options.delete_extraneous_destination = True
+    d._spec.destination.is_dir = True
+
+    d._catalog_local_files_for_deletion()
+    assert len(d._delete_after) == 1
+    assert pathlib.Path(str(a)) in d._delete_after
+
+
+def test_delete_extraneous_files(tmpdir):
+    a = tmpdir.join('a')
+    a.write('abc')
+    fp = pathlib.Path(str(a))
+
+    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._spec.options.delete_extraneous_destination = True
+    d._spec.destination.is_dir = True
+    d._delete_after.add(fp)
+
+    d._delete_extraneous_files()
+    assert not fp.exists()
+
+    # following should not throw exception
+    d._delete_extraneous_files()
+
+
 @mock.patch('time.clock')
 @mock.patch('blobxfer.operations.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.operations.azure.blob.list_blobs')
@@ -605,6 +640,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d._initialize_download_threads = mock.MagicMock()
     patched_lfmo._check_thread = mock.MagicMock()
     d._general_options.concurrency.crypto_processes = 1
+    d._general_options.concurrency.md5_processes = 1
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
@@ -616,6 +652,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     d._spec.skip_on.filesize_match = False
     d._spec.destination = mock.MagicMock()
     d._spec.destination.path = pathlib.Path(str(tmpdir))
+    d._download_start_time = util.datetime_now()
 
     p = '/cont/remote/path'
     asp = azops.SourcePath()

From 9ce5571e7fe25c8f9e8139f02ccd8b5b0606ea5b Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 18 Apr 2017 12:01:18 -0700
Subject: [PATCH 20/47] Add resume support for downloads

---
 blobxfer/models/download.py     | 362 ++++++++++++++++++++++++--------
 blobxfer/models/resume.py       | 171 +++++++++++++++
 blobxfer/operations/crypto.py   |  27 ++-
 blobxfer/operations/download.py |  98 +++++----
 blobxfer/operations/resume.py   | 149 +++++++++++++
 5 files changed, 662 insertions(+), 145 deletions(-)
 create mode 100644 blobxfer/models/resume.py
 create mode 100644 blobxfer/operations/resume.py

diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index 150e07e..70ba7a7 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -42,8 +42,9 @@
 import threading
 # non-stdlib imports
 # local imports
-import blobxfer.models.options
+import blobxfer.models.azure
 import blobxfer.models.crypto
+import blobxfer.models.options
 import blobxfer.util
 
 # create logger
@@ -172,42 +173,44 @@ class Descriptor(object):
 
     _AES_BLOCKSIZE = blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES
 
-    def __init__(self, lpath, ase, options):
+    def __init__(self, lpath, ase, options, resume_mgr):
         # type: (DownloadDescriptior, pathlib.Path,
         #        blobxfer.models.azure.StorageEntity,
-        #        blobxfer.models.options.Download) -> None
+        #        blobxfer.models.options.Download,
+        #        blobxfer.operations.resume.DownloadResumeManager) -> None
         """Ctor for Descriptor
         :param Descriptor self: this
         :param pathlib.Path lpath: local path
         :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
         :param blobxfer.models.options.Download options: download options
+        :param blobxfer.operations.resume.DownloadResumeManager resume_mgr:
+            download resume manager
         """
+        self._offset = 0
+        self._chunk_num = 0
+        self._next_integrity_chunk = 0
+        self._unchecked_chunks = {}
+        self._allocated = False
+        self._finalized = False
+        self._meta_lock = threading.Lock()
+        self._hasher_lock = threading.Lock()
+        self._resume_mgr = resume_mgr
+        self._ase = ase
+        # set paths
         self.final_path = lpath
         # create path holding the temporary file to download to
         _tmp = list(lpath.parts[:-1])
         _tmp.append(lpath.name + '.bxtmp')
         self.local_path = pathlib.Path(*_tmp)
-        self._meta_lock = threading.Lock()
-        self._hasher_lock = threading.Lock()
-        self._ase = ase
+        del _tmp
         # calculate the total number of ops required for transfer
         self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
-        try:
-            self._total_chunks = int(
-                math.ceil(self._ase.size / self._chunk_size))
-        except ZeroDivisionError:
-            self._total_chunks = 0
+        self._total_chunks = self._compute_total_chunks(self._chunk_size)
+        self._outstanding_ops = self._total_chunks
+        # initialize integrity checkers
         self.hmac = None
         self.md5 = None
-        self._offset = 0
-        self._chunk_num = 0
-        self._next_integrity_chunk = 0
-        self._unchecked_chunks = {}
-        self._outstanding_ops = self._total_chunks
-        self._completed_ops = 0
-        # initialize checkers and allocate space
         self._initialize_integrity_checkers(options)
-        self._allocate_disk_space()
 
     @property
     def entity(self):
@@ -241,14 +244,28 @@ def all_operations_completed(self):
             return (self._outstanding_ops == 0 and
                     len(self._unchecked_chunks) == 0)
 
-    def dec_outstanding_operations(self):
-        # type: (Descriptor) -> None
-        """Decrement outstanding operations (and increment completed ops)
+    @property
+    def is_resumable(self):
+        # type: (Descriptor) -> bool
+        """Download is resume capable
         :param Descriptor self: this
+        :rtype: bool
+        :return: if resumable
         """
-        with self._meta_lock:
-            self._outstanding_ops -= 1
-            self._completed_ops += 1
+        return self._resume_mgr is not None and self.hmac is None
+
+    def _compute_total_chunks(self, chunk_size):
+        # type: (Descriptor, int) -> int
+        """Compute total number of chunks for entity
+        :param Descriptor self: this
+        :param int chunk_size: chunk size
+        :rtype: int
+        :return: num chunks
+        """
+        try:
+            return int(math.ceil(self._ase.size / chunk_size))
+        except ZeroDivisionError:
+            return 0
 
     def _initialize_integrity_checkers(self, options):
         # type: (Descriptor, blobxfer.models.options.Download) -> None
@@ -273,29 +290,145 @@ def _allocate_disk_space(self):
         :param Descriptor self: this
         :param int size: size
         """
-        size = self._ase.size
-        # compute size
-        if size > 0:
-            if self._ase.is_encrypted:
-                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
-                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
-                    self._AES_BLOCKSIZE
+        with self._meta_lock:
+            if self._allocated:
+                return
+            size = self._ase.size
+            # compute size
+            if size > 0:
+                if self._ase.is_encrypted:
+                    # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
+                    allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                        self._AES_BLOCKSIZE
+                else:
+                    allocatesize = size
+                if allocatesize < 0:
+                    allocatesize = 0
             else:
-                allocatesize = size
-            if allocatesize < 0:
                 allocatesize = 0
-        else:
-            allocatesize = 0
-        # create parent path
-        self.local_path.parent.mkdir(mode=0o750, parents=True, exist_ok=True)
-        # allocate file
-        with self.local_path.open('wb') as fd:
-            if allocatesize > 0:
-                try:
-                    os.posix_fallocate(fd.fileno(), 0, allocatesize)
-                except AttributeError:
-                    fd.seek(allocatesize - 1)
-                    fd.write(b'\0')
+            # check if path already exists and is of sufficient size
+            if (not self.local_path.exists() or
+                    self.local_path.stat().st_size != allocatesize):
+                # create parent path
+                self.local_path.parent.mkdir(
+                    mode=0o750, parents=True, exist_ok=True)
+                # allocate file
+                with self.local_path.open('wb') as fd:
+                    if allocatesize > 0:
+                        try:
+                            os.posix_fallocate(fd.fileno(), 0, allocatesize)
+                        except AttributeError:
+                            fd.seek(allocatesize - 1)
+                            fd.write(b'\0')
+            self._allocated = True
+
+    def _resume(self):
+        # type: (Descriptor) -> int
+        """Resume a download, if possible
+        :param Descriptor self: this
+        :rtype: int or None
+        :return: verified download offset
+        """
+        if self._resume_mgr is None or self._offset != 0:
+            return None
+        # check if path exists in resume db
+        rr = self._resume_mgr.get_record(str(self.final_path))
+        if rr is None:
+            logger.debug('no resume record for {}'.format(self.final_path))
+            return None
+        # ensure lengths are the same
+        if rr.length != self._ase.size:
+            logger.warning('resume length mismatch {} -> {}'.format(
+                rr.length, self._ase.size))
+            return None
+        # calculate current chunk and offset
+        if rr.next_integrity_chunk == 0:
+            logger.debug('nothing to resume for {}'.format(self.final_path))
+            return None
+        curr_chunk = rr.next_integrity_chunk
+        curr_offset = curr_chunk * rr.chunk_size
+        # set offsets if completed and the final path exists
+        if rr.completed and self.final_path.exists():
+            logger.debug('{} download already completed'.format(
+                self.final_path))
+            with self._meta_lock:
+                self._offset = self._ase.size
+                self._chunk_num = curr_chunk
+                self._chunk_size = rr.chunk_size
+                self._total_chunks = self._compute_total_chunks(rr.chunk_size)
+                self._next_integrity_chunk = rr.next_integrity_chunk
+                self._outstanding_ops = 0
+                self._finalized = True
+            return self._ase.size
+        # encrypted files are not resumable due to hmac requirement
+        if self._ase.is_encrypted:
+            logger.debug('cannot resume encrypted entity {}/{}'.format(
+                self._ase.container, self._ase.name))
+            return None
+        # check if intermediate (blobtmp) exists
+        if not self.local_path.exists():
+            logger.warning('temporary download file {} does not exist'.format(
+                rr.temp_path))
+            return None
+        if self.hmac is not None:
+            raise RuntimeError(
+                'unexpected hmac object for entity {}/{}'.format(
+                    self._ase.container, self._ase.name))
+        # re-hash from 0 to offset if needed
+        if self.md5 is not None and curr_chunk > 0:
+            pagealign = (
+                self._ase.mode == blobxfer.models.azure.StorageModes.Page
+            )
+            _fd_offset = 0
+            _end_offset = min(
+                (curr_chunk * rr.chunk_size, rr.length)
+            )
+            logger.debug(
+                'integrity checking existing file {} to offset {}'.format(
+                    self.final_path, _end_offset))
+            with self._hasher_lock:
+                with self.local_path.open('rb') as filedesc:
+                    while _fd_offset < _end_offset:
+                        _blocksize = blobxfer.util.MEGABYTE << 2
+                        if (_fd_offset + _blocksize) > _end_offset:
+                            _blocksize = _end_offset - _fd_offset
+                        buf = filedesc.read(_blocksize)
+                        buflen = len(buf)
+                        if pagealign and buflen < _blocksize:
+                            aligned = blobxfer.\
+                                util.page_align_content_length(buflen)
+                            if aligned != buflen:
+                                buf = buf.ljust(aligned, b'\0')
+                        self.md5.update(buf)
+                        _fd_offset += _blocksize
+            del _fd_offset
+            del _end_offset
+            # compare hashes
+            hexdigest = self.md5.hexdigest()
+            if rr.md5hexdigest != hexdigest:
+                logger.warning(
+                    'MD5 mismatch resume={} computed={} for {}'.format(
+                         rr.md5hexdigest, hexdigest, self.local_path))
+                # reset hasher
+                self.md5 = blobxfer.util.new_md5_hasher()
+                return None
+        # set values from resume
+        with self._meta_lock:
+            self._offset = curr_offset
+            self._chunk_num = curr_chunk
+            self._chunk_size = rr.chunk_size
+            self._total_chunks = self._compute_total_chunks(rr.chunk_size)
+            self._next_integrity_chunk = rr.next_integrity_chunk
+            self._outstanding_ops = \
+                self._total_chunks - self._next_integrity_chunk
+            logger.debug(
+                ('resuming file {} from byte={} chunk={} chunk_size={} '
+                 'total_chunks={} next_integrity_chunk={} '
+                 'outstanding_ops={}').format(
+                     self.final_path, self._offset, self._chunk_num,
+                     self._chunk_size, self._total_chunks,
+                     self._next_integrity_chunk, self._outstanding_ops))
+        return curr_offset
 
     def cleanup_all_temporary_files(self):
         # type: (Descriptor) -> None
@@ -324,9 +457,12 @@ def next_offsets(self):
         :rtype: Offsets
         :return: download offsets
         """
+        resume_bytes = self._resume()
+        if resume_bytes is None and not self._allocated:
+            self._allocate_disk_space()
         with self._meta_lock:
             if self._offset >= self._ase.size:
-                return None
+                return None, resume_bytes
             if self._offset + self._chunk_size > self._ase.size:
                 chunk = self._ase.size - self._offset
             else:
@@ -360,47 +496,62 @@ def next_offsets(self):
                 range_start=range_start,
                 range_end=range_end,
                 unpad=unpad,
-            )
+            ), resume_bytes
+
+    def hmac_iv(self, iv):
+        # type: (Descriptor, bytes) -> None
+        """Send IV through hasher
+        :param Descriptor self: this
+        :param bytes iv: iv
+        """
+        with self._hasher_lock:
+            self.hmac.update(iv)
 
-    def _postpone_integrity_check(self, offsets, data):
+    def write_unchecked_data(self, offsets, data):
         # type: (Descriptor, Offsets, bytes) -> None
-        """Postpone integrity check for chunk
+        """Write unchecked data to disk
         :param Descriptor self: this
         :param Offsets offsets: download offsets
         :param bytes data: data
         """
-        if self.must_compute_md5:
-            with self.local_path.open('r+b') as fd:
-                fd.seek(offsets.fd_start, 0)
-                fd.write(data)
-            unchecked = UncheckedChunk(
-                data_len=len(data),
-                fd_start=offsets.fd_start,
-                file_path=self.local_path,
-                temp=False,
-            )
-        else:
-            fname = None
-            with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
-                fname = fd.name
-                fd.write(data)
-            unchecked = UncheckedChunk(
-                data_len=len(data),
-                fd_start=0,
-                file_path=pathlib.Path(fname),
-                temp=True,
-            )
+        with self.local_path.open('r+b') as fd:
+            fd.seek(offsets.fd_start, 0)
+            fd.write(data)
+        unchecked = UncheckedChunk(
+            data_len=len(data),
+            fd_start=offsets.fd_start,
+            file_path=self.local_path,
+            temp=False,
+        )
         with self._meta_lock:
             self._unchecked_chunks[offsets.chunk_num] = unchecked
 
-    def perform_chunked_integrity_check(self, offsets, data):
+    def write_unchecked_hmac_data(self, offsets, data):
         # type: (Descriptor, Offsets, bytes) -> None
-        """Hash data against stored MD5 hasher safely
+        """Write unchecked encrypted data to disk
         :param Descriptor self: this
         :param Offsets offsets: download offsets
-        :param bytes data: data
+        :param bytes data: hmac/encrypted data
+        """
+        fname = None
+        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
+            fname = fd.name
+            fd.write(data)
+        unchecked = UncheckedChunk(
+            data_len=len(data),
+            fd_start=0,
+            file_path=pathlib.Path(fname),
+            temp=True,
+        )
+        with self._meta_lock:
+            self._unchecked_chunks[offsets.chunk_num] = unchecked
+        return str(unchecked.file_path)
+
+    def perform_chunked_integrity_check(self):
+        # type: (Descriptor) -> None
+        """Hash data against stored hasher safely
+        :param Descriptor self: this
         """
-        self_check = False
         hasher = self.hmac or self.md5
         # iterate from next chunk to be checked
         while True:
@@ -410,26 +561,45 @@ def perform_chunked_integrity_check(self, offsets, data):
                 # check if the next chunk is ready
                 if chunk_num in self._unchecked_chunks:
                     ucc = self._unchecked_chunks.pop(chunk_num)
-                elif chunk_num != offsets.chunk_num:
+                else:
                     break
-            # prepare data for hashing
-            if ucc is None:
-                chunk = data
-                self_check = True
-            else:
+            # hash data and set next integrity chunk
+            md5hexdigest = None
+            if hasher is not None:
                 with ucc.file_path.open('rb') as fd:
-                    fd.seek(ucc.fd_start, 0)
+                    if not ucc.temp:
+                        fd.seek(ucc.fd_start, 0)
                     chunk = fd.read(ucc.data_len)
                 if ucc.temp:
                     ucc.file_path.unlink()
-            # hash data and set next integrity chunk
-            with self._hasher_lock:
-                hasher.update(chunk)
+                with self._hasher_lock:
+                    hasher.update(chunk)
+                    if hasher == self.md5:
+                        md5hexdigest = hasher.hexdigest()
             with self._meta_lock:
+                # update integrity counter and resume db
                 self._next_integrity_chunk += 1
-        # store data that hasn't been checked
-        if not self_check:
-            self._postpone_integrity_check(offsets, data)
+                if self.is_resumable:
+                    self._resume_mgr.add_or_update_record(
+                        self.final_path, self.local_path, self._ase.size,
+                        self._chunk_size, self._next_integrity_chunk, False,
+                        md5hexdigest,
+                    )
+                # decrement outstanding op counter
+                self._outstanding_ops -= 1
+
+    def _update_resume_for_completed(self):
+        # type: (Descriptor) -> None
+        """Update resume for completion
+        :param Descriptor self: this
+        """
+        if not self.is_resumable:
+            return
+        with self._meta_lock:
+            self._resume_mgr.add_or_update_record(
+                self.final_path, self.local_path, self._ase.size,
+                self._chunk_size, self._next_integrity_chunk, True, None,
+            )
 
     def write_data(self, offsets, data):
         # type: (Descriptor, Offsets, bytes) -> None
@@ -438,15 +608,19 @@ def write_data(self, offsets, data):
         :param Offsets offsets: download offsets
         :param bytes data: data
         """
-        with self.local_path.open('r+b') as fd:
-            fd.seek(offsets.fd_start, 0)
-            fd.write(data)
+        if len(data) > 0:
+            with self.local_path.open('r+b') as fd:
+                fd.seek(offsets.fd_start, 0)
+                fd.write(data)
 
     def finalize_file(self):
         # type: (Descriptor) -> None
         """Finalize file download
         :param Descriptor self: this
         """
+        with self._meta_lock:
+            if self._finalized:
+                return
         # check final file integrity
         check = False
         msg = None
@@ -491,4 +665,8 @@ def finalize_file(self):
         # TODO set file uid/gid and mode
 
         # move temp download file to final path
-        self.local_path.rename(self.final_path)
+        self.local_path.replace(self.final_path)
+        # update resume file
+        self._update_resume_for_completed()
+        with self._meta_lock:
+            self._finalized = True
diff --git a/blobxfer/models/resume.py b/blobxfer/models/resume.py
new file mode 100644
index 0000000..37a5acc
--- /dev/null
+++ b/blobxfer/models/resume.py
@@ -0,0 +1,171 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+# non-stdlib imports
+# local imports
+
+
+class Download(object):
+    """Download resume object"""
+    def __init__(
+            self, final_path, temp_path, length, chunk_size,
+            next_integrity_chunk, completed, md5):
+        # type: (Download, str, str, int, int, int, str) -> None
+        """Ctor for Download
+        :param Download self: this
+        :param str final_path: final path
+        :param str temp_path: temporary path
+        :param int length: total bytes
+        :param int chunk_size: chunk size in bytes
+        :param int next_integrity_chunk: next integrity chunk
+        :param str md5: md5 hex digest
+        """
+        self._final_path = final_path
+        self._temp_path = temp_path
+        self._length = length
+        self._chunk_size = chunk_size
+        self._next_integrity_chunk = next_integrity_chunk
+        self._completed = completed
+        self._md5hexdigest = md5 if md5 is not None else None
+
+    @property
+    def final_path(self):
+        # type: (Download) -> str
+        """Final path
+        :param Download self: this
+        :rtype: str
+        :return: final path
+        """
+        return self._final_path
+
+    @property
+    def temp_path(self):
+        # type: (Download) -> str
+        """Temp path
+        :param Download self: this
+        :rtype: str
+        :return: temp path
+        """
+        return self._temp_path
+
+    @property
+    def length(self):
+        # type: (Download) -> int
+        """Content length
+        :param Download self: this
+        :rtype: int
+        :return: number of bytes
+        """
+        return self._length
+
+    @property
+    def chunk_size(self):
+        # type: (Download) -> int
+        """Chunk size
+        :param Download self: this
+        :rtype: int
+        :return: chunk size in bytes
+        """
+        return self._chunk_size
+
+    @property
+    def next_integrity_chunk(self):
+        # type: (Download) -> int
+        """Get Next integrity chunk
+        :param Download self: this
+        :rtype: int
+        :return: next integrity chunk
+        """
+        return self._next_integrity_chunk
+
+    @next_integrity_chunk.setter
+    def next_integrity_chunk(self, value):
+        # type: (Download) -> None
+        """Set Next integrity chunk
+        :param Download self: this
+        :param int value: next chunk num
+        """
+        self._next_integrity_chunk = value
+
+    @property
+    def completed(self):
+        # type: (Download) -> bool
+        """Get Completed
+        :param Download self: this
+        :rtype: bool
+        :return: if completed
+        """
+        return self._completed
+
+    @completed.setter
+    def completed(self, value):
+        # type: (Download) -> None
+        """Set Completed
+        :param Download self: this
+        :param bool value: completion value
+        """
+        self._completed = value
+
+    @property
+    def md5hexdigest(self):
+        # type: (Download) -> str
+        """Get md5 hex digest
+        :param Download self: this
+        :rtype: str
+        :return: md5 hex digest
+        """
+        return self._md5hexdigest
+
+    @md5hexdigest.setter
+    def md5hexdigest(self, value):
+        # type: (Download) -> None
+        """Set md5 hex digest value if value is not None
+        :param Download self: this
+        :param str value: md5 hex digest
+        """
+        if value is None:
+            return
+        self._md5hexdigest = value
+
+    def __repr__(self):
+        # type: (Download) -> str
+        """Return representation
+        :param Download self: this
+        :rtype: str
+        :return: representation string
+        """
+        return ('Download<final_path={} temp_path={} length={} chunk_size={} '
+                'next_integrity_chunk={} completed={} md5={}>').format(
+                    self.final_path, self.temp_path, self.length,
+                    self.chunk_size, self.next_integrity_chunk,
+                    self.completed, self.md5hexdigest,
+                )
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 58f65d8..f494d81 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -248,28 +248,37 @@ def _worker_process(self):
                 # TODO on upload
                 raise NotImplementedError()
             elif inst[0] == CryptoAction.Decrypt:
-                final_path, offsets, symkey, iv, encdata = \
-                    inst[1], inst[2], inst[3], inst[4], inst[5]
+                final_path, local_path, offsets, symkey, iv, hmac_datafile = \
+                    inst[1], inst[2], inst[3], inst[4], inst[5], inst[6]
+                # read encrypted data from disk
+                with open(hmac_datafile, 'rb') as fd:
+                    encdata = fd.read()
                 data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
                     symkey, iv, encdata, offsets.unpad)
+                # write decrypted data to disk
+                if len(data) > 0:
+                    with open(local_path, 'r+b') as fd:
+                        fd.seek(offsets.fd_start, 0)
+                        fd.write(data)
             self._done_cv.acquire()
-            self._done_queue.put((final_path, offsets, data))
+            self._done_queue.put(final_path)
             self._done_cv.notify()
             self._done_cv.release()
 
     def add_decrypt_chunk(
-            self, final_path, offsets, symkey, iv, encdata):
-        # type: (CryptoOffload, str, blobxfer.models.download.Offsets,
-        #        bytes, bytes, bytes) -> None
+            self, final_path, local_path, offsets, symkey, iv, hmac_datafile):
+        # type: (CryptoOffload, str, str, blobxfer.models.download.Offsets,
+        #        bytes, bytes, str) -> None
         """Add a chunk to decrypt
         :param CryptoOffload self: this
         :param str final_path: final path
+        :param str local_path: temp local path
         :param blobxfer.models.download.Offsets offsets: offsets
         :param bytes symkey: symmetric key
         :param bytes iv: initialization vector
-        :param bytes encdata: encrypted data
+        :param str hmac_datafile: encrypted data file
         """
         self._task_queue.put(
-            (CryptoAction.Decrypt, final_path, offsets, symkey, iv,
-             encdata)
+            (CryptoAction.Decrypt, final_path, local_path, offsets, symkey,
+             iv, hmac_datafile)
         )
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 78baa34..bb63f3f 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -49,6 +49,7 @@
 import blobxfer.operations.crypto
 import blobxfer.operations.md5
 import blobxfer.operations.progress
+import blobxfer.operations.resume
 import blobxfer.util
 
 # create logger
@@ -94,6 +95,7 @@ def __init__(self, general_options, creds, spec):
         self._general_options = general_options
         self._creds = creds
         self._spec = spec
+        self._resume = None
 
     @property
     def termination_check(self):
@@ -255,6 +257,8 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         if md5_match:
             with self._download_lock:
                 self._download_set.remove(lpath)
+                self._download_total -= 1
+                self._download_bytes_total -= lpath.stat().st_size
         else:
             self._add_to_download_queue(lpath, rfile)
 
@@ -302,9 +306,14 @@ def _check_for_crypto_done(self):
                     break
             cv.release()
             if result is not None:
-                with self._download_lock:
-                    dd = self._dd_map[result[0]]
-                self._complete_chunk_download(result[1], result[2], dd)
+                try:
+                    with self._download_lock:
+                        dd = self._dd_map[result]
+                    dd.perform_chunked_integrity_check()
+                except KeyError:
+                    # this can happen if all of the last integrity
+                    # chunks are processed at once
+                    pass
 
     def _add_to_download_queue(self, lpath, rfile):
         # type: (Downloader, pathlib.Path,
@@ -316,7 +325,7 @@ def _add_to_download_queue(self, lpath, rfile):
         """
         # prepare remote file for download
         dd = blobxfer.models.download.Descriptor(
-            lpath, rfile, self._spec.options)
+            lpath, rfile, self._spec.options, self._resume)
         if dd.entity.is_encrypted:
             with self._download_lock:
                 self._dd_map[str(dd.final_path)] = dd
@@ -363,7 +372,12 @@ def _worker_thread_download(self):
             # update progress bar
             self._update_progress_bar()
             # get download offsets
-            offsets = dd.next_offsets()
+            offsets, resume_bytes = dd.next_offsets()
+            # add resume bytes to counter
+            if resume_bytes is not None:
+                with self._download_lock:
+                    self._download_bytes_sofar += resume_bytes
+                del resume_bytes
             # check if all operations completed
             if offsets is None and dd.all_operations_completed:
                 # finalize file
@@ -391,50 +405,43 @@ def _worker_thread_download(self):
                 self._download_bytes_sofar += offsets.num_bytes
             # decrypt if necessary
             if dd.entity.is_encrypted:
-                # slice data to proper bounds
-                encdata = data[blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
-                intdata = encdata
-                # get iv for chunk and compute hmac
+                # slice data to proper bounds and get iv for chunk
                 if offsets.chunk_num == 0:
+                    # set iv
                     iv = dd.entity.encryption_metadata.content_encryption_iv
-                    # integrity check for first chunk must include iv
-                    intdata = iv + data
+                    # set data to decrypt
+                    encdata = data
+                    # send iv through hmac
+                    dd.hmac_iv(iv)
                 else:
+                    # set iv
                     iv = data[:blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES]
-                # integrity check data
-                dd.perform_chunked_integrity_check(offsets, intdata)
+                    # set data to decrypt
+                    encdata = data[
+                        blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
+                # write encdata to disk for hmac later
+                _hmac_datafile = dd.write_unchecked_hmac_data(
+                    offsets, encdata)
                 # decrypt data
                 if self._crypto_offload is not None:
                     self._crypto_offload.add_decrypt_chunk(
-                        str(dd.final_path), offsets,
+                        str(dd.final_path), str(dd.local_path), offsets,
                         dd.entity.encryption_metadata.symmetric_key,
-                        iv, encdata)
-                    # data will be completed once retrieved from crypto queue
+                        iv, _hmac_datafile)
+                    # data will be integrity checked and written once
+                    # retrieved from crypto queue
                     continue
                 else:
                     data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
                         dd.entity.encryption_metadata.symmetric_key,
                         iv, encdata, offsets.unpad)
-            elif dd.must_compute_md5:
-                # rolling compute md5
-                dd.perform_chunked_integrity_check(offsets, data)
-            # complete chunk download
-            self._complete_chunk_download(offsets, data, dd)
-
-    def _complete_chunk_download(self, offsets, data, dd):
-        # type: (Downloader, blobxfer.models.download.Offsets, bytes,
-        #        blobxfer.models.download.Descriptor) -> None
-        """Complete chunk download
-        :param Downloader self: this
-        :param blobxfer.models.download.Offsets offsets: offsets
-        :param bytes data: data
-        :param blobxfer.models.download.Descriptor dd: download descriptor
-        """
-        # write data to disk
-        dd.write_data(offsets, data)
-        # decrement outstanding operations
-        dd.dec_outstanding_operations()
-        # TODO pickle dd to resume file
+                    dd.write_data(offsets, data)
+            else:
+                # write data to disk
+                dd.write_unchecked_data(offsets, data)
+            # integrity check data and write to disk (this is called
+            # regardless of md5/hmac enablement for resume purposes)
+            dd.perform_chunked_integrity_check()
 
     def _cleanup_temporary_files(self):
         # type: (Downloader) -> None
@@ -442,12 +449,6 @@ def _cleanup_temporary_files(self):
         This function is not thread-safe.
         :param Downloader self: this
         """
-        # do not clean up if resume file exists
-        if self._general_options.resume_file is not None:
-            logger.debug(
-                'not cleaning up temporary files since resume file has '
-                'been specified')
-            return
         # iterate through dd map and cleanup files
         for key in self._dd_map:
             dd = self._dd_map[key]
@@ -495,6 +496,10 @@ def _run(self):
         logger.info('downloading blobs/files to local path: {}'.format(
             self._spec.destination.path))
         self._catalog_local_files_for_deletion()
+        # initialize resume db if specified
+        if self._general_options.resume_file is not None:
+            self._resume = blobxfer.operations.resume.DownloadResumeManager(
+                self._general_options.resume_file)
         # initialize MD5 processes
         if (self._spec.options.check_file_md5 and
                 self._general_options.concurrency.md5_processes > 0):
@@ -570,6 +575,9 @@ def _run(self):
         # delete all remaining local files not accounted for if
         # delete extraneous enabled
         self._delete_extraneous_files()
+        # delete resume file if we've gotten this far
+        if self._resume is not None:
+            self._resume.delete()
         # output throughput
         if self._download_start_time is not None:
             dltime = (end_time - self._download_start_time).total_seconds()
@@ -592,7 +600,7 @@ def start(self):
             self._run()
         except (KeyboardInterrupt, Exception) as ex:
             if isinstance(ex, KeyboardInterrupt):
-                logger.error(
+                logger.info(
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
             try:
@@ -601,9 +609,11 @@ def start(self):
                 self._cleanup_temporary_files()
             raise
         finally:
-            # TODO close resume file
             # shutdown processes
             if self._md5_offload is not None:
                 self._md5_offload.finalize_processes()
             if self._crypto_offload is not None:
                 self._crypto_offload.finalize_processes()
+            # close resume file
+            if self._resume is not None:
+                self._resume.close()
diff --git a/blobxfer/operations/resume.py b/blobxfer/operations/resume.py
new file mode 100644
index 0000000..0f76562
--- /dev/null
+++ b/blobxfer/operations/resume.py
@@ -0,0 +1,149 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import contextlib
+import logging
+import pickle
+import shelve
+import threading
+# non-stdlib imports
+# local imports
+import blobxfer.models.resume
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+class DownloadResumeManager():
+    """Download Resume Manager"""
+    def __init__(self, resume_file):
+        # type: (DownloadResumeManager, str) -> None
+        """Ctor for DownloadResumeManager
+        :param DownloadResumeManager self: this
+        :param pathlib.Path resume_file: resume file
+        """
+        self._lock = threading.Lock()
+        self._resume_file = resume_file
+        self._data = shelve.open(
+            str(resume_file), protocol=pickle.HIGHEST_PROTOCOL)
+
+    def close(self):
+        # type: (DownloadResumeManager) -> None
+        """Close the internal data store
+        :param DownloadResumeManager self: this
+        """
+        if self._data is not None:
+            self._data.close()
+            self._data = None
+
+    def delete(self):
+        # type: (DownloadResumeManager) -> None
+        """Delete the resume file db
+        :param DownloadResumeManager self: this
+        """
+        self.close()
+        try:
+            self._resume_file.unlink()
+        except OSError as e:
+            logger.warning('could not unlink resume db: {}'.format(e))
+
+    @contextlib.contextmanager
+    def datalock(self, acquire=True):
+        # type: (DownloadResumeManager) -> None
+        """Delete the resume file db
+        :param DownloadResumeManager self: this
+        :param bool acquire: acquire lock
+        """
+        if acquire:
+            self._lock.acquire()
+        try:
+            yield
+        finally:
+            if acquire:
+                self._lock.release()
+
+    def get_record(self, final_path, lock=True):
+        # type: (DownloadResumeManager, str,
+        #        bool) -> blobxfer.models.resume.Download
+        """Get a resume record
+        :param DownloadResumeManager self: this
+        :param str final_path: final path
+        :param bool lock: acquire lock
+        :rtype: blobxfer.models.resume.Download
+        :return: Download record
+        """
+        with self.datalock(lock):
+            try:
+                return self._data[final_path]
+            except KeyError:
+                return None
+
+    def add_or_update_record(
+            self, final_path, temp_path, length, chunk_size,
+            next_integrity_chunk, completed, md5):
+        # type: (DownloadResumeManager, pathlib.Path, pathlib.Path, int, int,
+        #        int, bool, str) -> None
+        """Get a resume record
+        :param DownloadResumeManager self: this
+        :param pathlib.Path final_path: final path
+        :param pathlib.Path temp_path: temp local path
+        :param int length: content length
+        :param int chunk_size: chunk size in bytes
+        :param int next_integrity_chunk: next integrity chunk
+        :param bool completed: if completed
+        :param str md5: md5 hex digest
+        """
+        sfp = str(final_path)
+        with self.datalock():
+            dl = self.get_record(sfp, lock=False)
+            if dl is None:
+                dl = blobxfer.models.resume.Download(
+                    final_path=sfp,
+                    temp_path=str(temp_path),
+                    length=length,
+                    chunk_size=chunk_size,
+                    next_integrity_chunk=next_integrity_chunk,
+                    completed=completed,
+                    md5=md5,
+                )
+            else:
+                if (dl.completed or
+                        next_integrity_chunk < dl.next_integrity_chunk):
+                    return
+                if completed:
+                    dl.completed = completed
+                else:
+                    dl.next_integrity_chunk = next_integrity_chunk
+                    dl.md5hexdigest = md5
+            self._data[sfp] = dl
+            self._data.sync()

From 9701cfc76fd4599aa342edc6972c5a1c4ae937d7 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 18 Apr 2017 19:58:12 -0700
Subject: [PATCH 21/47] Fix tests with resume changes

- Add replace_file in util
---
 blobxfer/models/download.py                  |   2 +-
 blobxfer/util.py                             |  15 ++
 tests/test_blobxfer_models_download.py       | 161 +++++++++++--------
 tests/test_blobxfer_operations_azure_file.py |   1 -
 tests/test_blobxfer_operations_crypto.py     |  26 ++-
 tests/test_blobxfer_operations_download.py   |  82 ++++------
 6 files changed, 164 insertions(+), 123 deletions(-)

diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index 70ba7a7..7780378 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -665,7 +665,7 @@ def finalize_file(self):
         # TODO set file uid/gid and mode
 
         # move temp download file to final path
-        self.local_path.replace(self.final_path)
+        blobxfer.util.replace_file(self.local_path, self.final_path)
         # update resume file
         self._update_resume_for_completed()
         with self._meta_lock:
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 82c20a7..7d48ceb 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -41,6 +41,7 @@
 except ImportError:  # noqa
     from scandir import scandir as scandir
 import re
+import sys
 # non-stdlib imports
 import dateutil
 import dateutil.tz
@@ -172,6 +173,20 @@ def scantree(path):
             yield entry
 
 
+def replace_file(src, dst):
+    # type: (pathlib.Path, pathlib.Path) -> None
+    """Replace a file, using atomic replace if available
+    :param pathlib.Path src: source path
+    :param pathlib.Path dst: destination path
+    """
+    if sys.version_info < (3, 3):
+        if dst.exists():
+            dst.unlink()
+        src.rename(dst)
+    else:
+        src.replace(dst)
+
+
 def get_mime_type(filename):
     # type: (str) -> str
     """Guess the type of a file based on its filename
diff --git a/tests/test_blobxfer_models_download.py b/tests/test_blobxfer_models_download.py
index 69133e2..548ebf8 100644
--- a/tests/test_blobxfer_models_download.py
+++ b/tests/test_blobxfer_models_download.py
@@ -96,10 +96,11 @@ def test_downloaddescriptor(tmpdir):
     ase._size = 1024
     ase._encryption = mock.MagicMock()
     with pytest.raises(RuntimeError):
-        d = models.Descriptor(lp, ase, opts)
+        d = models.Descriptor(lp, ase, opts, None)
 
     ase._encryption.symmetric_key = b'123'
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
 
     assert d.entity == ase
     assert not d.must_compute_md5
@@ -107,24 +108,31 @@ def test_downloaddescriptor(tmpdir):
     assert d._offset == 0
     assert d.final_path == lp
     assert str(d.local_path) == str(lp) + '.bxtmp'
+    assert d._allocated
     assert d.local_path.stat().st_size == 1024 - 16
 
     d.local_path.unlink()
     ase._size = 1
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     assert d._total_chunks == 1
+    assert d._allocated
     assert d.local_path.stat().st_size == 0
 
     d.local_path.unlink()
     ase._encryption = None
     ase._size = 1024
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
+    assert d._allocated
     assert d.local_path.stat().st_size == 1024
 
     # pre-existing file check
     ase._size = 0
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     assert d._total_chunks == 0
+    assert d._allocated
     assert d.local_path.stat().st_size == 0
 
 
@@ -136,9 +144,10 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     opts.chunk_size_bytes = 256
     ase = azmodels.StorageEntity('cont')
     ase._size = 128
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -146,16 +155,17 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 127
     assert not offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 0
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
     assert d._total_chunks == 0
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 1
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -163,11 +173,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 0
     assert not offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 256
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -175,11 +186,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 256 + 16
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 2
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -187,20 +199,22 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
-    offsets = d.next_offsets()
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert offsets.chunk_num == 1
     assert offsets.fd_start == 256
     assert offsets.num_bytes == 16
     assert offsets.range_start == 256
     assert offsets.range_end == 256 + 15
     assert not offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
     ase._size = 128
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -208,11 +222,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 127
     assert offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 256
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 1
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -220,11 +235,12 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
     ase._size = 256 + 32  # 16 bytes over + padding
-    d = models.Descriptor(lp, ase, opts)
-    offsets = d.next_offsets()
+    d = models.Descriptor(lp, ase, opts, None)
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert d._total_chunks == 2
     assert offsets.chunk_num == 0
     assert offsets.fd_start == 0
@@ -232,17 +248,18 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert offsets.range_start == 0
     assert offsets.range_end == 255
     assert not offsets.unpad
-    offsets = d.next_offsets()
+    offsets, resume_bytes = d.next_offsets()
+    assert resume_bytes is None
     assert offsets.chunk_num == 1
     assert offsets.fd_start == 256
     assert offsets.num_bytes == 32
     assert offsets.range_start == 256 - 16
     assert offsets.range_end == 256 + 31
     assert offsets.unpad
-    assert d.next_offsets() is None
+    assert d.next_offsets() == (None, None)
 
 
-def test_postpone_integrity_check(tmpdir):
+def test_write_unchecked_data(tmpdir):
     lp = pathlib.Path(str(tmpdir.join('a')))
 
     opts = mock.MagicMock()
@@ -250,10 +267,10 @@ def test_postpone_integrity_check(tmpdir):
     opts.chunk_size_bytes = 32
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
-    d._postpone_integrity_check(offsets, b'0' * ase._size)
+    offsets, _ = d.next_offsets()
+    d.write_unchecked_data(offsets, b'0' * ase._size)
 
     assert offsets.chunk_num in d._unchecked_chunks
     ucc = d._unchecked_chunks[offsets.chunk_num]
@@ -262,15 +279,19 @@ def test_postpone_integrity_check(tmpdir):
     assert ucc.file_path == d.local_path
     assert not ucc.temp
 
+
+def test_write_unchecked_hmac_data(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
     opts = mock.MagicMock()
     opts.check_file_md5 = False
     opts.chunk_size_bytes = 32
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
-    d._postpone_integrity_check(offsets, b'0' * ase._size)
+    offsets, _ = d.next_offsets()
+    d.write_unchecked_hmac_data(offsets, b'0' * ase._size)
 
     assert offsets.chunk_num in d._unchecked_chunks
     ucc = d._unchecked_chunks[offsets.chunk_num]
@@ -288,14 +309,16 @@ def test_perform_chunked_integrity_check(tmpdir):
     opts.chunk_size_bytes = 16
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
+    offsets, _ = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
-    d._postpone_integrity_check(offsets, data)
-    d.perform_chunked_integrity_check(offsets, data)
+    d.write_unchecked_data(offsets, data)
+    d.perform_chunked_integrity_check()
 
     assert d._next_integrity_chunk == 1
+    assert 0 not in d._unchecked_chunks
+    assert len(d._unchecked_chunks) == 0
 
     opts = mock.MagicMock()
     opts.check_file_md5 = False
@@ -304,18 +327,23 @@ def test_perform_chunked_integrity_check(tmpdir):
     ase._size = 32
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
-    offsets1 = d.next_offsets()
-    d._postpone_integrity_check(offsets1, data)
-    ucc = d._unchecked_chunks[offsets1.chunk_num]
-    d.perform_chunked_integrity_check(offsets, data)
+    offsets, _ = d.next_offsets()
+    d.write_unchecked_hmac_data(offsets, data)
+    ucc = d._unchecked_chunks[offsets.chunk_num]
+    offsets1, _ = d.next_offsets()
+    d.write_unchecked_hmac_data(offsets1, data)
+    ucc1 = d._unchecked_chunks[offsets1.chunk_num]
+    d.perform_chunked_integrity_check()
 
-    assert d._next_integrity_chunk == 2
-    assert not ucc.file_path.exists()
     assert not ucc.file_path.exists()
+    assert not ucc1.file_path.exists()
+    assert d._next_integrity_chunk == 2
+    assert 0 not in d._unchecked_chunks
+    assert 1 not in d._unchecked_chunks
+    assert len(d._unchecked_chunks) == 0
 
 
 def test_cleanup_all_temporary_files(tmpdir):
@@ -325,22 +353,22 @@ def test_cleanup_all_temporary_files(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 16
     lp = pathlib.Path(str(tmpdir.join('a')))
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
+    offsets, _ = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
-    d._postpone_integrity_check(offsets, data)
+    d.write_unchecked_data(offsets, data)
     assert len(d._unchecked_chunks) == 1
     d.cleanup_all_temporary_files()
     assert not d.local_path.exists()
     assert not d._unchecked_chunks[0].file_path.exists()
 
     lp = pathlib.Path(str(tmpdir.join('b')))
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
+    offsets, _ = d.next_offsets()
     data = b'0' * opts.chunk_size_bytes
-    d._postpone_integrity_check(offsets, data)
+    d.write_unchecked_hmac_data(offsets, data)
     assert len(d._unchecked_chunks) == 1
     d.local_path.unlink()
     d._unchecked_chunks[0].file_path.unlink()
@@ -357,9 +385,9 @@ def test_write_data(tmpdir):
     opts.chunk_size_bytes = 16
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
 
-    offsets = d.next_offsets()
+    offsets, _ = d.next_offsets()
     data = b'0' * ase._size
     d.write_data(offsets, data)
 
@@ -389,7 +417,8 @@ def test_finalize_file(tmpdir):
         message_authentication_code = util.base64_encode_as_string(
             _hmac.digest())
 
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     d.hmac.update(data)
     d.finalize_file()
 
@@ -410,7 +439,8 @@ def test_finalize_file(tmpdir):
     md5.update(data)
     ase._md5 = util.base64_encode_as_string(md5.digest())
 
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     d.md5.update(data)
     d.finalize_file()
 
@@ -428,7 +458,8 @@ def test_finalize_file(tmpdir):
 
     data = b'0' * ase._size
 
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     d.finalize_file()
 
     assert not d.local_path.exists()
@@ -446,7 +477,8 @@ def test_finalize_file(tmpdir):
     data = b'0' * ase._size
     ase._md5 = 'oops'
 
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
     d.md5.update(data)
     d.finalize_file()
 
@@ -462,14 +494,11 @@ def test_operations(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
 
-    d = models.Descriptor(lp, ase, opts)
+    d = models.Descriptor(lp, ase, opts, None)
     d._outstanding_ops = 1
     d._unchecked_chunks = {0: None}
     assert not d.all_operations_completed
 
-    d.dec_outstanding_operations()
-    assert d._completed_ops == 1
-    assert not d.all_operations_completed
-
+    d._outstanding_ops -= 1
     d._unchecked_chunks.pop(0)
     assert d.all_operations_completed
diff --git a/tests/test_blobxfer_operations_azure_file.py b/tests/test_blobxfer_operations_azure_file.py
index cb6b04e..34b3c32 100644
--- a/tests/test_blobxfer_operations_azure_file.py
+++ b/tests/test_blobxfer_operations_azure_file.py
@@ -109,7 +109,6 @@ def test_list_files_directory(patched_cisf):
         assert file.name == 'name'
     assert i == 1
 
-    print('test')
     _dir = azure.storage.file.models.Directory(name='dirname')
     _file = azure.storage.file.models.File(name='dirname/name')
     client = mock.MagicMock()
diff --git a/tests/test_blobxfer_operations_crypto.py b/tests/test_blobxfer_operations_crypto.py
index 3ed2262..4dbe116 100644
--- a/tests/test_blobxfer_operations_crypto.py
+++ b/tests/test_blobxfer_operations_crypto.py
@@ -90,21 +90,32 @@ def test_aes_cbc_encryption():
     assert decdata == plaindata
 
 
-def test_cryptooffload_decrypt():
+def test_cryptooffload_decrypt(tmpdir):
+    symkey = ops.aes256_generate_random_key()
+    iv = os.urandom(16)
+    plainlen = 16
+    plaindata = os.urandom(plainlen)
+    encdata = ops.aes_cbc_encrypt_data(symkey, iv, plaindata, False)
+
+    afile = tmpdir.join('a')
+    afile.write(encdata, mode='wb')
+    hmacfile = str(afile)
+    bfile = tmpdir.join('b')
+    bfile.ensure(file=True)
+
     a = None
     try:
         a = ops.CryptoOffload(1)
         offsets = blobxfer.models.download.Offsets(
             chunk_num=0,
-            fd_start=1,
+            fd_start=0,  # this matters!
             num_bytes=2,
             range_end=3,
             range_start=4,
             unpad=False,
         )
         a.add_decrypt_chunk(
-            'fp', offsets, ops.aes256_generate_random_key(), os.urandom(16),
-            os.urandom(16))
+            'fp', str(bfile), offsets, symkey, iv, hmacfile)
         i = 33
         checked = False
         while i > 0:
@@ -113,12 +124,13 @@ def test_cryptooffload_decrypt():
                 time.sleep(0.3)
                 i -= 1
                 continue
-            assert len(result) == 3
-            assert result[0] == 'fp'
-            assert result[1] == offsets
+            assert result == 'fp'
             checked = True
             break
         assert checked
+        assert bfile.stat().size == plainlen
+        decdata = bfile.read(mode='rb')
+        assert decdata == plaindata
     finally:
         if a is not None:
             a.finalize_processes()
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index ea2be05..81ae200 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -252,11 +252,14 @@ def test_pre_md5_skip_on_check():
     assert lpath in d._md5_map
 
 
-def test_post_md5_skip_on_check():
+def test_post_md5_skip_on_check(tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._download_total = 0
+    d._download_bytes_total = 0
     d._md5_offload = mock.MagicMock()
 
-    lpath = 'lpath'
+    lp = tmpdir.join('lpath').ensure(file=True)
+    lpath = str(lp)
     rfile = azmodels.StorageEntity('cont')
     rfile._md5 = 'abc'
     d._pre_md5_skip_on_check(lpath, rfile)
@@ -324,18 +327,18 @@ def test_check_for_crypto_done():
     lpath = 'lpath'
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._download_set.add(pathlib.Path(lpath))
-    d._dd_map[lpath] = mock.MagicMock()
+    dd = mock.MagicMock()
+    d._dd_map[lpath] = dd
     d._crypto_offload = mock.MagicMock()
     d._crypto_offload.done_cv = multiprocessing.Condition()
     d._crypto_offload.pop_done_queue.side_effect = [
         None,
-        (lpath, mock.MagicMock(), mock.MagicMock()),
+        lpath,
     ]
-    d._complete_chunk_download = mock.MagicMock()
     d._all_remote_files_processed = False
     d._download_terminate = True
     d._check_for_crypto_done()
-    assert d._complete_chunk_download.call_count == 0
+    assert dd.perform_chunked_integrity_check.call_count == 0
 
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
@@ -343,17 +346,18 @@ def test_check_for_crypto_done():
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
         d._download_set.add(pathlib.Path(lpath))
-        d._dd_map[lpath] = mock.MagicMock()
+        dd = mock.MagicMock()
+        d._dd_map[lpath] = dd
         d._crypto_offload = mock.MagicMock()
         d._crypto_offload.done_cv = multiprocessing.Condition()
         d._crypto_offload.pop_done_queue.side_effect = [
             None,
-            (lpath, mock.MagicMock(), mock.MagicMock()),
+            lpath,
         ]
         patched_tc.side_effect = [False, False, True]
         d._complete_chunk_download = mock.MagicMock()
         d._check_for_crypto_done()
-        assert d._complete_chunk_download.call_count == 1
+        assert dd.perform_chunked_integrity_check.call_count == 1
 
 
 def test_add_to_download_queue(tmpdir):
@@ -386,26 +390,6 @@ def test_initialize_and_terminate_download_threads():
         assert not thr.is_alive()
 
 
-def test_complete_chunk_download(tmpdir):
-    lp = pathlib.Path(str(tmpdir.join('a')))
-    opts = mock.MagicMock()
-    opts.check_file_md5 = False
-    opts.chunk_size_bytes = 16
-    ase = azmodels.StorageEntity('cont')
-    ase._size = 16
-    dd = models.Descriptor(lp, ase, opts)
-
-    d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-    offsets = dd.next_offsets()
-    data = b'0' * ase._size
-
-    d._complete_chunk_download(offsets, data, dd)
-
-    assert dd.local_path.exists()
-    assert dd.local_path.stat().st_size == len(data)
-    assert dd._completed_ops == 1
-
-
 @mock.patch('blobxfer.operations.crypto.aes_cbc_decrypt_data')
 @mock.patch('blobxfer.operations.azure.file.get_file_range')
 @mock.patch('blobxfer.operations.azure.blob.get_blob_range')
@@ -431,7 +415,6 @@ def test_worker_thread_download(
                 new_callable=mock.PropertyMock) as patched_aoc:
             d = ops.Downloader(
                 mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-            d._complete_chunk_download = mock.MagicMock()
             opts = mock.MagicMock()
             opts.check_file_md5 = False
             opts.chunk_size_bytes = 16
@@ -440,17 +423,18 @@ def test_worker_thread_download(
             ase._encryption = mock.MagicMock()
             ase._encryption.symmetric_key = b'abc'
             lp = pathlib.Path(str(tmpdir.join('a')))
-            dd = models.Descriptor(lp, ase, opts)
-            dd.next_offsets = mock.MagicMock(side_effect=[None, None])
+            dd = models.Descriptor(lp, ase, opts, None)
+            dd.next_offsets = mock.MagicMock(
+                side_effect=[(None, None), (None, None)])
             dd.finalize_file = mock.MagicMock()
+            dd.perform_chunked_integrity_check = mock.MagicMock()
             patched_aoc.side_effect = [False, True]
             patched_tc.side_effect = [False, False, False, True]
-            d._dd_map[str(lp)] = mock.MagicMock()
+            d._dd_map[str(lp)] = dd
             d._download_set.add(lp)
             d._download_queue = mock.MagicMock()
             d._download_queue.get.side_effect = [queue.Empty, dd, dd]
             d._worker_thread_download()
-            assert d._complete_chunk_download.call_count == 0
             assert str(lp) not in d._dd_map
             assert dd.finalize_file.call_count == 1
             assert d._download_sofar == 1
@@ -468,17 +452,15 @@ def test_worker_thread_download(
         ase._size = 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('b')))
-        dd = models.Descriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts, None)
         dd.finalize_file = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         d._dd_map[str(lp)] = mock.MagicMock()
         d._download_set.add(lp)
         d._download_queue = mock.MagicMock()
         d._download_queue.get.side_effect = [dd]
-        d._complete_chunk_download = mock.MagicMock()
         patched_tc.side_effect = [False, True]
         d._worker_thread_download()
-        assert d._complete_chunk_download.call_count == 1
         assert dd.perform_chunked_integrity_check.call_count == 1
 
     with mock.patch(
@@ -497,21 +479,20 @@ def test_worker_thread_download(
         ase._encryption.content_encryption_iv = b'0' * 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('c')))
-        dd = models.Descriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts, None)
         dd.finalize_file = mock.MagicMock()
+        dd.write_unchecked_hmac_data = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         d._crypto_offload = mock.MagicMock()
         d._crypto_offload.add_decrypt_chunk = mock.MagicMock()
-        d._dd_map[str(lp)] = mock.MagicMock()
+        d._dd_map[str(lp)] = dd
         d._download_set.add(lp)
         d._download_queue = mock.MagicMock()
         d._download_queue.get.side_effect = [dd]
-        d._complete_chunk_download = mock.MagicMock()
         patched_tc.side_effect = [False, True]
         d._worker_thread_download()
-        assert d._complete_chunk_download.call_count == 0
         assert d._crypto_offload.add_decrypt_chunk.call_count == 1
-        assert dd.perform_chunked_integrity_check.call_count == 1
+        assert dd.write_unchecked_hmac_data.call_count == 1
 
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
@@ -530,19 +511,19 @@ def test_worker_thread_download(
         ase._encryption.content_encryption_iv = b'0' * 16
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('d')))
-        dd = models.Descriptor(lp, ase, opts)
+        dd = models.Descriptor(lp, ase, opts, None)
         dd.next_offsets()
+        dd.write_unchecked_hmac_data = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         patched_acdd.return_value = b'0' * 16
         d._dd_map[str(lp)] = mock.MagicMock()
         d._download_set.add(lp)
         d._download_queue = mock.MagicMock()
         d._download_queue.get.side_effect = [dd]
-        d._complete_chunk_download = mock.MagicMock()
         patched_tc.side_effect = [False, True]
         d._worker_thread_download()
-        assert d._complete_chunk_download.call_count == 1
         assert patched_acdd.call_count == 1
+        assert dd.write_unchecked_hmac_data.call_count == 1
         assert dd.perform_chunked_integrity_check.call_count == 1
 
 
@@ -553,7 +534,8 @@ def test_cleanup_temporary_files(tmpdir):
     opts.chunk_size_bytes = 16
     ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = models.Descriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts, None)
+    dd._allocate_disk_space()
     dd.cleanup_all_temporary_files = mock.MagicMock()
     dd.cleanup_all_temporary_files.side_effect = Exception
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -568,7 +550,8 @@ def test_cleanup_temporary_files(tmpdir):
     opts.chunk_size_bytes = 16
     ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = models.Descriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts, None)
+    dd._allocate_disk_space()
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._general_options.resume_file = None
     d._dd_map[0] = dd
@@ -581,7 +564,8 @@ def test_cleanup_temporary_files(tmpdir):
     opts.chunk_size_bytes = 16
     ase = azmodels.StorageEntity('cont')
     ase._size = 16
-    dd = models.Descriptor(lp, ase, opts)
+    dd = models.Descriptor(lp, ase, opts, None)
+    dd._allocate_disk_space()
     dd.cleanup_all_temporary_files = mock.MagicMock()
     dd.cleanup_all_temporary_files.side_effect = Exception
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
@@ -641,6 +625,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     patched_lfmo._check_thread = mock.MagicMock()
     d._general_options.concurrency.crypto_processes = 1
     d._general_options.concurrency.md5_processes = 1
+    d._general_options.resume_file = None
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
@@ -691,6 +676,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
 
 def test_start_keyboard_interrupt():
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+    d._general_options.resume_file = None
     d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
     d._wait_for_download_threads = mock.MagicMock()
     d._cleanup_temporary_files = mock.MagicMock()

From 158a67fefc6ba0c404641b10bbb3303b13979d56 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Wed, 19 Apr 2017 14:36:10 -0700
Subject: [PATCH 22/47] Improve test coverage

- Add exception handling for download worker threads
- Add resume tests
- Update test_requirements to conditionally install mock
- Use unittest.mock if present
---
 blobxfer/models/azure.py                     |   1 -
 blobxfer/models/download.py                  |  38 ++-
 blobxfer/operations/download.py              | 158 +++++++------
 setup.py                                     |   4 +-
 test_requirements.txt                        |   2 +-
 tests/test_blobxfer_models_azure.py          |   5 +-
 tests/test_blobxfer_models_download.py       | 230 ++++++++++++++++++-
 tests/test_blobxfer_models_offload.py        |   5 +-
 tests/test_blobxfer_models_options.py        |   5 +-
 tests/test_blobxfer_models_resume.py         |  32 +++
 tests/test_blobxfer_operations_azure.py      |   5 +-
 tests/test_blobxfer_operations_azure_blob.py |   5 +-
 tests/test_blobxfer_operations_azure_file.py |   5 +-
 tests/test_blobxfer_operations_crypto.py     |   5 +-
 tests/test_blobxfer_operations_download.py   |  73 +++++-
 tests/test_blobxfer_operations_progress.py   |   5 +-
 tests/test_blobxfer_operations_resume.py     |  65 ++++++
 tests/test_blobxfer_retry.py                 |   5 +-
 tests/test_blobxfer_util.py                  |  40 ++++
 19 files changed, 569 insertions(+), 119 deletions(-)
 create mode 100644 tests/test_blobxfer_models_resume.py
 create mode 100644 tests/test_blobxfer_operations_resume.py

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index 9f28ca3..afc971d 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -66,7 +66,6 @@ def __init__(self, container, ed=None):
         self._md5 = None
         self._encryption = ed
         self._vio = None
-        self.download = None
 
     @property
     def client(self):
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index 7780378..77f2757 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -329,7 +329,7 @@ def _resume(self):
         :rtype: int or None
         :return: verified download offset
         """
-        if self._resume_mgr is None or self._offset != 0:
+        if self._resume_mgr is None or self._offset > 0 or self._finalized:
             return None
         # check if path exists in resume db
         rr = self._resume_mgr.get_record(str(self.final_path))
@@ -346,12 +346,11 @@ def _resume(self):
             logger.debug('nothing to resume for {}'.format(self.final_path))
             return None
         curr_chunk = rr.next_integrity_chunk
-        curr_offset = curr_chunk * rr.chunk_size
         # set offsets if completed and the final path exists
         if rr.completed and self.final_path.exists():
-            logger.debug('{} download already completed'.format(
-                self.final_path))
             with self._meta_lock:
+                logger.debug('{} download already completed'.format(
+                    self.final_path))
                 self._offset = self._ase.size
                 self._chunk_num = curr_chunk
                 self._chunk_size = rr.chunk_size
@@ -375,34 +374,22 @@ def _resume(self):
                 'unexpected hmac object for entity {}/{}'.format(
                     self._ase.container, self._ase.name))
         # re-hash from 0 to offset if needed
+        _fd_offset = 0
+        _end_offset = min((curr_chunk * rr.chunk_size, rr.length))
         if self.md5 is not None and curr_chunk > 0:
-            pagealign = (
-                self._ase.mode == blobxfer.models.azure.StorageModes.Page
-            )
-            _fd_offset = 0
-            _end_offset = min(
-                (curr_chunk * rr.chunk_size, rr.length)
-            )
+            _blocksize = blobxfer.util.MEGABYTE << 2
             logger.debug(
                 'integrity checking existing file {} to offset {}'.format(
                     self.final_path, _end_offset))
             with self._hasher_lock:
                 with self.local_path.open('rb') as filedesc:
                     while _fd_offset < _end_offset:
-                        _blocksize = blobxfer.util.MEGABYTE << 2
                         if (_fd_offset + _blocksize) > _end_offset:
                             _blocksize = _end_offset - _fd_offset
-                        buf = filedesc.read(_blocksize)
-                        buflen = len(buf)
-                        if pagealign and buflen < _blocksize:
-                            aligned = blobxfer.\
-                                util.page_align_content_length(buflen)
-                            if aligned != buflen:
-                                buf = buf.ljust(aligned, b'\0')
-                        self.md5.update(buf)
+                        _buf = filedesc.read(_blocksize)
+                        self.md5.update(_buf)
                         _fd_offset += _blocksize
-            del _fd_offset
-            del _end_offset
+            del _blocksize
             # compare hashes
             hexdigest = self.md5.hexdigest()
             if rr.md5hexdigest != hexdigest:
@@ -414,13 +401,14 @@ def _resume(self):
                 return None
         # set values from resume
         with self._meta_lock:
-            self._offset = curr_offset
+            self._offset = _end_offset
             self._chunk_num = curr_chunk
             self._chunk_size = rr.chunk_size
             self._total_chunks = self._compute_total_chunks(rr.chunk_size)
             self._next_integrity_chunk = rr.next_integrity_chunk
-            self._outstanding_ops = \
+            self._outstanding_ops = (
                 self._total_chunks - self._next_integrity_chunk
+            )
             logger.debug(
                 ('resuming file {} from byte={} chunk={} chunk_size={} '
                  'total_chunks={} next_integrity_chunk={} '
@@ -428,7 +416,7 @@ def _resume(self):
                      self.final_path, self._offset, self._chunk_num,
                      self._chunk_size, self._total_chunks,
                      self._next_integrity_chunk, self._outstanding_ops))
-        return curr_offset
+        return _end_offset
 
     def cleanup_all_temporary_files(self):
         # type: (Descriptor) -> None
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index bb63f3f..f1112e1 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -96,6 +96,7 @@ def __init__(self, general_options, creds, spec):
         self._creds = creds
         self._spec = spec
         self._resume = None
+        self._exceptions = []
 
     @property
     def termination_check(self):
@@ -107,6 +108,7 @@ def termination_check(self):
         """
         with self._download_lock:
             return (self._download_terminate or
+                    len(self._exceptions) > 0 or
                     (self._all_remote_files_processed and
                      len(self._download_set) == 0))
 
@@ -369,79 +371,92 @@ def _worker_thread_download(self):
                 dd = self._download_queue.get(False, 0.25)
             except queue.Empty:
                 continue
-            # update progress bar
-            self._update_progress_bar()
-            # get download offsets
-            offsets, resume_bytes = dd.next_offsets()
-            # add resume bytes to counter
-            if resume_bytes is not None:
-                with self._download_lock:
-                    self._download_bytes_sofar += resume_bytes
-                del resume_bytes
-            # check if all operations completed
-            if offsets is None and dd.all_operations_completed:
-                # finalize file
-                dd.finalize_file()
-                # accounting
+            try:
+                self._process_download_descriptor(dd)
+            except Exception as e:
                 with self._download_lock:
-                    if dd.entity.is_encrypted:
-                        self._dd_map.pop(str(dd.final_path))
-                    self._download_set.remove(dd.final_path)
-                    self._download_sofar += 1
-                continue
-            # re-enqueue for other threads to download
-            self._download_queue.put(dd)
-            if offsets is None:
-                continue
-            # issue get range
-            if dd.entity.mode == blobxfer.models.azure.StorageModes.File:
-                data = blobxfer.operations.azure.file.get_file_range(
-                    dd.entity, offsets, self._general_options.timeout_sec)
-            else:
-                data = blobxfer.operations.azure.blob.get_blob_range(
-                    dd.entity, offsets, self._general_options.timeout_sec)
+                    self._exceptions.append(e)
+
+    def _process_download_descriptor(self, dd):
+        # type: (Downloader, blobxfer.models.download.Descriptor) -> None
+        """Process download descriptor
+        :param Downloader self: this
+        :param blobxfer.models.download.Descriptor: download descriptor
+        """
+        # update progress bar
+        self._update_progress_bar()
+        # get download offsets
+        offsets, resume_bytes = dd.next_offsets()
+        # add resume bytes to counter
+        if resume_bytes is not None:
+            with self._download_lock:
+                self._download_bytes_sofar += resume_bytes
+                logger.debug('adding {} sofar {} from {}'.format(
+                    resume_bytes, self._download_bytes_sofar, dd._ase.name))
+            del resume_bytes
+        # check if all operations completed
+        if offsets is None and dd.all_operations_completed:
+            # finalize file
+            dd.finalize_file()
             # accounting
             with self._download_lock:
-                self._download_bytes_sofar += offsets.num_bytes
-            # decrypt if necessary
-            if dd.entity.is_encrypted:
-                # slice data to proper bounds and get iv for chunk
-                if offsets.chunk_num == 0:
-                    # set iv
-                    iv = dd.entity.encryption_metadata.content_encryption_iv
-                    # set data to decrypt
-                    encdata = data
-                    # send iv through hmac
-                    dd.hmac_iv(iv)
-                else:
-                    # set iv
-                    iv = data[:blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES]
-                    # set data to decrypt
-                    encdata = data[
-                        blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
-                # write encdata to disk for hmac later
-                _hmac_datafile = dd.write_unchecked_hmac_data(
-                    offsets, encdata)
-                # decrypt data
-                if self._crypto_offload is not None:
-                    self._crypto_offload.add_decrypt_chunk(
-                        str(dd.final_path), str(dd.local_path), offsets,
-                        dd.entity.encryption_metadata.symmetric_key,
-                        iv, _hmac_datafile)
-                    # data will be integrity checked and written once
-                    # retrieved from crypto queue
-                    continue
-                else:
-                    data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
-                        dd.entity.encryption_metadata.symmetric_key,
-                        iv, encdata, offsets.unpad)
-                    dd.write_data(offsets, data)
+                if dd.entity.is_encrypted:
+                    self._dd_map.pop(str(dd.final_path))
+                self._download_set.remove(dd.final_path)
+                self._download_sofar += 1
+            return
+        # re-enqueue for other threads to download
+        self._download_queue.put(dd)
+        if offsets is None:
+            return
+        # issue get range
+        if dd.entity.mode == blobxfer.models.azure.StorageModes.File:
+            data = blobxfer.operations.azure.file.get_file_range(
+                dd.entity, offsets, self._general_options.timeout_sec)
+        else:
+            data = blobxfer.operations.azure.blob.get_blob_range(
+                dd.entity, offsets, self._general_options.timeout_sec)
+        # accounting
+        with self._download_lock:
+            self._download_bytes_sofar += offsets.num_bytes
+        # decrypt if necessary
+        if dd.entity.is_encrypted:
+            # slice data to proper bounds and get iv for chunk
+            if offsets.chunk_num == 0:
+                # set iv
+                iv = dd.entity.encryption_metadata.content_encryption_iv
+                # set data to decrypt
+                encdata = data
+                # send iv through hmac
+                dd.hmac_iv(iv)
             else:
-                # write data to disk
-                dd.write_unchecked_data(offsets, data)
-            # integrity check data and write to disk (this is called
-            # regardless of md5/hmac enablement for resume purposes)
-            dd.perform_chunked_integrity_check()
+                # set iv
+                iv = data[:blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES]
+                # set data to decrypt
+                encdata = data[blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
+            # write encdata to disk for hmac later
+            _hmac_datafile = dd.write_unchecked_hmac_data(
+                offsets, encdata)
+            # decrypt data
+            if self._crypto_offload is not None:
+                self._crypto_offload.add_decrypt_chunk(
+                    str(dd.final_path), str(dd.local_path), offsets,
+                    dd.entity.encryption_metadata.symmetric_key,
+                    iv, _hmac_datafile)
+                # data will be integrity checked and written once
+                # retrieved from crypto queue
+                return
+            else:
+                data = blobxfer.operations.crypto.aes_cbc_decrypt_data(
+                    dd.entity.encryption_metadata.symmetric_key,
+                    iv, encdata, offsets.unpad)
+                dd.write_data(offsets, data)
+        else:
+            # write data to disk
+            dd.write_unchecked_data(offsets, data)
+        # integrity check data and write to disk (this is called
+        # regardless of md5/hmac enablement for resume purposes)
+        dd.perform_chunked_integrity_check()
 
     def _cleanup_temporary_files(self):
         # type: (Downloader) -> None
@@ -565,6 +580,11 @@ def _run(self):
         end_time = blobxfer.util.datetime_now()
         # update progress bar
         self._update_progress_bar()
+        # check for exceptions
+        if len(self._exceptions) > 0:
+            logger.error('exceptions encountered while downloading')
+            # raise the first one
+            raise self._exceptions[0]
         # check for mismatches
         if (self._download_sofar != self._download_total or
                 self._download_bytes_sofar != self._download_bytes_total):
diff --git a/setup.py b/setup.py
index 5a2d6c6..2725ade 100644
--- a/setup.py
+++ b/setup.py
@@ -39,14 +39,14 @@
 ]
 
 install_requires = [
-    'azure-common==1.1.4',
+    'azure-common==1.1.5',
     'azure-storage==0.34.0',
     'click==6.7',
     'cryptography>=1.8.1',
     'future==0.16.0',
     'python-dateutil==2.6.0',
     'requests==2.13.0',
-    'ruamel.yaml==0.14.5',
+    'ruamel.yaml==0.14.8',
 ]
 
 if sys.version_info < (3, 4):
diff --git a/test_requirements.txt b/test_requirements.txt
index f2315c3..bc58365 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,5 +1,5 @@
 flake8>=3.3.0
-mock>=2.0.0
+mock>=2.0.0; python_version < '3.3'
 pypandoc>=1.3.3
 pytest>=3.0.7
 pytest-cov>=2.4.0
diff --git a/tests/test_blobxfer_models_azure.py b/tests/test_blobxfer_models_azure.py
index 37a40a5..6ddc95b 100644
--- a/tests/test_blobxfer_models_azure.py
+++ b/tests/test_blobxfer_models_azure.py
@@ -2,7 +2,10 @@
 """Tests for models azure"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import azure.storage
 import azure.storage.blob
diff --git a/tests/test_blobxfer_models_download.py b/tests/test_blobxfer_models_download.py
index 548ebf8..6c62ce1 100644
--- a/tests/test_blobxfer_models_download.py
+++ b/tests/test_blobxfer_models_download.py
@@ -4,18 +4,23 @@
 # stdlib imports
 import hashlib
 import hmac
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 import os
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
+import unittest
 # non-stdlib imports
 import pytest
 # local imports
 import blobxfer.models.azure as azmodels
 import blobxfer.models.options as options
 import blobxfer.operations.azure as azops
+import blobxfer.operations.resume as rops
 import blobxfer.util as util
 # module under test
 import blobxfer.models.download as models
@@ -100,6 +105,7 @@ def test_downloaddescriptor(tmpdir):
 
     ase._encryption.symmetric_key = b'123'
     d = models.Descriptor(lp, ase, opts, None)
+    assert not d._allocated
     d._allocate_disk_space()
 
     assert d.entity == ase
@@ -111,6 +117,9 @@ def test_downloaddescriptor(tmpdir):
     assert d._allocated
     assert d.local_path.stat().st_size == 1024 - 16
 
+    d._allocate_disk_space()
+    assert d._allocated
+
     d.local_path.unlink()
     ase._size = 1
     d = models.Descriptor(lp, ase, opts, None)
@@ -136,6 +145,146 @@ def test_downloaddescriptor(tmpdir):
     assert d.local_path.stat().st_size == 0
 
 
+@unittest.skipIf(util.on_python2(), 'fallocate does not exist')
+def test_downloaddescriptor_allocate_disk_space_via_seek(tmpdir):
+    fp = pathlib.Path(str(tmpdir.join('fp')))
+    lp = pathlib.Path(str(tmpdir.join('fp.bxtmp')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 256
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._name = 'blob'
+    d = models.Descriptor(fp, ase, opts, None)
+
+    with mock.patch('os.posix_fallocate') as patched_fallocate:
+        patched_fallocate.side_effect = [AttributeError()]
+        d._allocate_disk_space()
+        assert d._allocated
+        assert not fp.exists()
+        assert lp.stat().st_size == ase._size
+
+
+def test_downloaddescriptor_resume(tmpdir):
+    resumefile = pathlib.Path(str(tmpdir.join('resume')))
+    fp = pathlib.Path(str(tmpdir.join('fp')))
+    lp = pathlib.Path(str(tmpdir.join('fp.bxtmp')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 256
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._name = 'blob'
+
+    # test no record
+    rmgr = rops.DownloadResumeManager(resumefile)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb is None
+
+    # test length mismatch
+    rmgr.add_or_update_record(str(fp), str(lp), 127, 0, 0, False, None)
+    rb = d._resume()
+    assert rb is None
+
+    # test nothing to resume
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+
+    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 0, 0, False, None)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb is None
+
+    # test completion
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+
+    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, True, None)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    fp.touch()
+    rb = d._resume()
+    assert rb == ase._size
+
+    # test encrypted no resume
+    fp.unlink()
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
+    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb is None
+
+    # test if intermediate file not exists
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._name = 'blob'
+
+    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb is None
+
+    # ensure hmac not populated
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._name = 'blob'
+    lp.touch()
+
+    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    d.hmac = True
+    with pytest.raises(RuntimeError):
+        d._resume()
+
+    # md5 hash check
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+
+    data = os.urandom(32)
+    with lp.open('wb') as f:
+        f.write(data)
+    md5 = util.new_md5_hasher()
+    md5.update(data)
+
+    rmgr.add_or_update_record(
+        str(fp), str(lp), ase._size, 32, 1, False, md5.hexdigest())
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb == 32
+
+    # md5 hash mismatch
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+    rmgr.add_or_update_record(
+        str(fp), str(lp), ase._size, 32, 1, False, 'abc')
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb is None
+
+    # md5 hash check as page file
+    rmgr.delete()
+    rmgr = rops.DownloadResumeManager(resumefile)
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._name = 'blob'
+    ase._mode = azmodels.StorageModes.Page
+
+    rmgr.add_or_update_record(
+        str(fp), str(lp), ase._size, 32, 1, False, md5.hexdigest())
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    rb = d._resume()
+    assert rb == 32
+
+
 def test_downloaddescriptor_next_offsets(tmpdir):
     lp = pathlib.Path(str(tmpdir.join('a')))
 
@@ -259,6 +408,24 @@ def test_downloaddescriptor_next_offsets(tmpdir):
     assert d.next_offsets() == (None, None)
 
 
+def test_hmac_iv(tmpdir):
+    lp = pathlib.Path(str(tmpdir.join('a')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 256
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 128
+    ase._encryption = mock.MagicMock()
+    ase._encryption.symmetric_key = b'123'
+    ase._size = 128
+    d = models.Descriptor(lp, ase, opts, None)
+
+    iv = b'abc'
+    d.hmac_iv(iv)
+    assert d.hmac.update.call_count == 1
+
+
 def test_write_unchecked_data(tmpdir):
     lp = pathlib.Path(str(tmpdir.join('a')))
 
@@ -345,6 +512,48 @@ def test_perform_chunked_integrity_check(tmpdir):
     assert 1 not in d._unchecked_chunks
     assert len(d._unchecked_chunks) == 0
 
+    # check integrity with resume
+    resumefile = pathlib.Path(str(tmpdir.join('resume')))
+    fp = pathlib.Path(str(tmpdir.join('fp')))
+
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 32
+    ase._name = 'blob'
+    rmgr = rops.DownloadResumeManager(resumefile)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+
+    data = b'0' * opts.chunk_size_bytes
+    md5 = util.new_md5_hasher()
+    md5.update(data)
+    offsets, _ = d.next_offsets()
+    d.write_unchecked_hmac_data(offsets, data)
+    d.perform_chunked_integrity_check()
+    assert d._next_integrity_chunk == 1
+    assert len(d._unchecked_chunks) == 0
+    dr = rmgr.get_record(str(fp))
+    assert dr.next_integrity_chunk == 1
+    assert dr.md5hexdigest == md5.hexdigest()
+
+
+def test_update_resume_for_completed(tmpdir):
+    resumefile = pathlib.Path(str(tmpdir.join('resume')))
+    fp = pathlib.Path(str(tmpdir.join('fp')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = True
+    opts.chunk_size_bytes = 16
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 32
+    ase._name = 'blob'
+    rmgr = rops.DownloadResumeManager(resumefile)
+    d = models.Descriptor(fp, ase, opts, rmgr)
+    offsets, _ = d.next_offsets()
+    d._update_resume_for_completed()
+    dr = rmgr.get_record(str(fp))
+    assert dr.completed
+
 
 def test_cleanup_all_temporary_files(tmpdir):
     opts = mock.MagicMock()
@@ -396,6 +605,25 @@ def test_write_data(tmpdir):
 
 
 def test_finalize_file(tmpdir):
+    # already finalized
+    lp = pathlib.Path(str(tmpdir.join('af')))
+    opts = mock.MagicMock()
+    opts.check_file_md5 = False
+    opts.chunk_size_bytes = 16
+    ase = azmodels.StorageEntity('cont')
+    ase._size = 32
+
+    data = b'0' * ase._size
+
+    d = models.Descriptor(lp, ase, opts, None)
+    d._allocate_disk_space()
+    d._finalized = True
+    d.finalize_file()
+
+    assert d.local_path.exists()
+    assert not d.final_path.exists()
+    d.local_path.unlink()
+
     # hmac check success
     lp = pathlib.Path(str(tmpdir.join('a')))
     opts = mock.MagicMock()
diff --git a/tests/test_blobxfer_models_offload.py b/tests/test_blobxfer_models_offload.py
index ca5a2bb..24351e3 100644
--- a/tests/test_blobxfer_models_offload.py
+++ b/tests/test_blobxfer_models_offload.py
@@ -2,7 +2,10 @@
 """Tests for offload"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import pytest
 # local imports
diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py
index 4716f27..1ee72bb 100644
--- a/tests/test_blobxfer_models_options.py
+++ b/tests/test_blobxfer_models_options.py
@@ -2,7 +2,10 @@
 """Tests for models options"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
diff --git a/tests/test_blobxfer_models_resume.py b/tests/test_blobxfer_models_resume.py
new file mode 100644
index 0000000..55a6009
--- /dev/null
+++ b/tests/test_blobxfer_models_resume.py
@@ -0,0 +1,32 @@
+# coding=utf-8
+"""Tests for models resume"""
+
+# stdlib imports
+# non-stdlib imports
+# module under test
+import blobxfer.models.resume as rmodels
+
+
+def test_download():
+    d = rmodels.Download('fp', 'tp', 1, 2, 0, False, '')
+    assert d.final_path == 'fp'
+    assert d.temp_path == 'tp'
+    assert d.length == 1
+    assert d.chunk_size == 2
+    assert d.next_integrity_chunk == 0
+    assert not d.completed
+    assert d.md5hexdigest == ''
+
+    d.md5hexdigest = None
+    assert d.md5hexdigest == ''
+
+    d.md5hexdigest = 'abc'
+    assert d.md5hexdigest == 'abc'
+
+    d.next_integrity_chunk = 1
+    assert d.next_integrity_chunk == 1
+
+    d.completed = True
+    assert d.completed
+
+    assert len(str(d)) > 0
diff --git a/tests/test_blobxfer_operations_azure.py b/tests/test_blobxfer_operations_azure.py
index bfe976d..346fab6 100644
--- a/tests/test_blobxfer_operations_azure.py
+++ b/tests/test_blobxfer_operations_azure.py
@@ -2,7 +2,10 @@
 """Tests for operations azure"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import azure.storage
 import azure.storage.blob
diff --git a/tests/test_blobxfer_operations_azure_blob.py b/tests/test_blobxfer_operations_azure_blob.py
index 3880d1a..0ed626a 100644
--- a/tests/test_blobxfer_operations_azure_blob.py
+++ b/tests/test_blobxfer_operations_azure_blob.py
@@ -2,7 +2,10 @@
 """Tests for general blob operations"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import azure.common
 import azure.storage.blob
diff --git a/tests/test_blobxfer_operations_azure_file.py b/tests/test_blobxfer_operations_azure_file.py
index 34b3c32..2a45428 100644
--- a/tests/test_blobxfer_operations_azure_file.py
+++ b/tests/test_blobxfer_operations_azure_file.py
@@ -2,7 +2,10 @@
 """Tests for file operations"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import azure.common
 import azure.storage
diff --git a/tests/test_blobxfer_operations_crypto.py b/tests/test_blobxfer_operations_crypto.py
index 4dbe116..d3fdc62 100644
--- a/tests/test_blobxfer_operations_crypto.py
+++ b/tests/test_blobxfer_operations_crypto.py
@@ -2,7 +2,10 @@
 """Tests for crypto operations"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 import os
 import time
 # non-stdlib imports
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index 81ae200..749e835 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -3,8 +3,10 @@
 
 # stdlib imports
 import datetime
-import dateutil.tz
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 import multiprocessing
 try:
     import pathlib2 as pathlib
@@ -16,6 +18,7 @@
     import Queue as queue
 # non-stdlib imports
 import azure.storage.blob
+import dateutil.tz
 import pytest
 # local imports
 import blobxfer.models.azure as azmodels
@@ -340,6 +343,7 @@ def test_check_for_crypto_done():
     d._check_for_crypto_done()
     assert dd.perform_chunked_integrity_check.call_count == 0
 
+    # check successful integrity check call
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
@@ -359,6 +363,25 @@ def test_check_for_crypto_done():
         d._check_for_crypto_done()
         assert dd.perform_chunked_integrity_check.call_count == 1
 
+    # check KeyError on result
+    with mock.patch(
+            'blobxfer.operations.download.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        d = ops.Downloader(
+            mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._download_set.add(pathlib.Path(lpath))
+        dd = mock.MagicMock()
+        d._crypto_offload = mock.MagicMock()
+        d._crypto_offload.done_cv = multiprocessing.Condition()
+        d._crypto_offload.pop_done_queue.side_effect = [
+            None,
+            lpath,
+        ]
+        patched_tc.side_effect = [False, False, True]
+        d._complete_chunk_download = mock.MagicMock()
+        d._check_for_crypto_done()
+        assert dd.perform_chunked_integrity_check.call_count == 0
+
 
 def test_add_to_download_queue(tmpdir):
     path = tmpdir.join('a')
@@ -406,6 +429,27 @@ def test_worker_thread_download(
     d._worker_thread_download()
     assert d._complete_chunk_download.call_count == 0
 
+    with mock.patch(
+            'blobxfer.operations.download.Downloader.termination_check',
+            new_callable=mock.PropertyMock) as patched_tc:
+        patched_tc.side_effect = [False, False, True]
+        ase = azmodels.StorageEntity('cont')
+        ase._size = 16
+        ase._encryption = mock.MagicMock()
+        ase._encryption.symmetric_key = b'abc'
+        lp = pathlib.Path(str(tmpdir.join('exc')))
+        opts = mock.MagicMock()
+        opts.check_file_md5 = False
+        opts.chunk_size_bytes = 16
+        dd = models.Descriptor(lp, ase, opts, None)
+        d._download_queue = mock.MagicMock()
+        d._download_queue.get.side_effect = [queue.Empty, dd]
+        d._process_download_descriptor = mock.MagicMock()
+        d._process_download_descriptor.side_effect = RuntimeError('oops')
+        d._worker_thread_download()
+        assert len(d._exceptions) == 1
+        assert d._process_download_descriptor.call_count == 1
+
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
             new_callable=mock.PropertyMock) as patched_tc:
@@ -425,7 +469,7 @@ def test_worker_thread_download(
             lp = pathlib.Path(str(tmpdir.join('a')))
             dd = models.Descriptor(lp, ase, opts, None)
             dd.next_offsets = mock.MagicMock(
-                side_effect=[(None, None), (None, None)])
+                side_effect=[(None, 1), (None, 2)])
             dd.finalize_file = mock.MagicMock()
             dd.perform_chunked_integrity_check = mock.MagicMock()
             patched_aoc.side_effect = [False, True]
@@ -438,6 +482,7 @@ def test_worker_thread_download(
             assert str(lp) not in d._dd_map
             assert dd.finalize_file.call_count == 1
             assert d._download_sofar == 1
+            assert d._download_bytes_sofar == 3
 
     with mock.patch(
             'blobxfer.operations.download.Downloader.termination_check',
@@ -610,14 +655,13 @@ def test_delete_extraneous_files(tmpdir):
     d._delete_extraneous_files()
 
 
-@mock.patch('time.clock')
 @mock.patch('blobxfer.operations.md5.LocalFileMd5Offload')
 @mock.patch('blobxfer.operations.azure.blob.list_blobs')
 @mock.patch(
     'blobxfer.operations.download.Downloader.ensure_local_destination',
     return_value=True
 )
-def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
+def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._cleanup_temporary_files = mock.MagicMock()
     d._download_start = datetime.datetime.now(tz=dateutil.tz.tzlocal())
@@ -625,7 +669,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     patched_lfmo._check_thread = mock.MagicMock()
     d._general_options.concurrency.crypto_processes = 1
     d._general_options.concurrency.md5_processes = 1
-    d._general_options.resume_file = None
+    d._general_options.resume_file = pathlib.Path(str(tmpdir.join('rf')))
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
@@ -647,19 +691,15 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     b = azure.storage.blob.models.Blob(name='name')
     b.properties.content_length = 1
     patched_lb.side_effect = [[b]]
-
     d._pre_md5_skip_on_check = mock.MagicMock()
-
     d._check_download_conditions = mock.MagicMock()
     d._check_download_conditions.return_value = ops.DownloadAction.Skip
-    patched_tc.side_effect = [1, 2]
     d.start()
     assert d._pre_md5_skip_on_check.call_count == 0
 
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = ops.DownloadAction.CheckMd5
-    patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
     assert d._pre_md5_skip_on_check.call_count == 1
@@ -668,11 +708,22 @@ def test_start(patched_eld, patched_lb, patched_lfmo, patched_tc, tmpdir):
     patched_lb.side_effect = [[b]]
     d._all_remote_files_processed = False
     d._check_download_conditions.return_value = ops.DownloadAction.Download
-    patched_tc.side_effect = [1, 2]
     with pytest.raises(RuntimeError):
         d.start()
     assert d._download_queue.qsize() == 1
 
+    # test exception count
+    b = azure.storage.blob.models.Blob(name='name')
+    b.properties.content_length = 1
+    patched_lb.side_effect = [[b]]
+    d._pre_md5_skip_on_check = mock.MagicMock()
+    d._check_download_conditions = mock.MagicMock()
+    d._check_download_conditions.return_value = ops.DownloadAction.Skip
+    d._exceptions = [RuntimeError('oops')]
+    with pytest.raises(RuntimeError):
+        d.start()
+    assert d._pre_md5_skip_on_check.call_count == 0
+
 
 def test_start_keyboard_interrupt():
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
diff --git a/tests/test_blobxfer_operations_progress.py b/tests/test_blobxfer_operations_progress.py
index 7cb0776..75f9f79 100644
--- a/tests/test_blobxfer_operations_progress.py
+++ b/tests/test_blobxfer_operations_progress.py
@@ -2,7 +2,10 @@
 """Tests for progress operations"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 # local imports
 import blobxfer.util as util
diff --git a/tests/test_blobxfer_operations_resume.py b/tests/test_blobxfer_operations_resume.py
new file mode 100644
index 0000000..52f11b8
--- /dev/null
+++ b/tests/test_blobxfer_operations_resume.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+"""Tests for operations resume"""
+
+# stdlib imports
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+# non-stdlib imports
+# module under test
+import blobxfer.operations.resume as ops
+
+
+def test_download_resume_manager(tmpdir):
+    tmpdb = pathlib.Path(str(tmpdir.join('tmp.db')))
+
+    drm = ops.DownloadResumeManager(tmpdb)
+    assert drm._data is not None
+    drm.close()
+    assert drm._data is None
+    assert tmpdb.exists()
+    drm.delete()
+    assert drm._data is None
+    assert not tmpdb.exists()
+
+    final_path = 'fp'
+    drm = ops.DownloadResumeManager(tmpdb)
+    drm.add_or_update_record(final_path, 'tp', 1, 2, 0, False, None)
+    d = drm.get_record(final_path)
+
+    assert d.final_path == final_path
+
+    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, False, 'abc')
+    d = drm.get_record(final_path)
+
+    assert d.final_path == final_path
+    assert not d.completed
+    assert d.next_integrity_chunk == 1
+    assert d.md5hexdigest == 'abc'
+
+    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, True, None)
+    d = drm.get_record(final_path)
+
+    assert d.final_path == final_path
+    assert d.completed
+    assert d.next_integrity_chunk == 1
+    assert d.md5hexdigest == 'abc'
+
+    # idempotent check after completed
+    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, True, None)
+    d = drm.get_record(final_path)
+
+    assert d.final_path == final_path
+    assert d.completed
+    assert d.next_integrity_chunk == 1
+    assert d.md5hexdigest == 'abc'
+
+    drm.close()
+    assert drm._data is None
+    assert tmpdb.exists()
+
+    tmpdb.unlink()
+    drm.delete()
+    assert drm._data is None
+    assert not tmpdb.exists()
diff --git a/tests/test_blobxfer_retry.py b/tests/test_blobxfer_retry.py
index b66c41e..9d84b90 100644
--- a/tests/test_blobxfer_retry.py
+++ b/tests/test_blobxfer_retry.py
@@ -2,7 +2,10 @@
 """Tests for retry"""
 
 # stdlib imports
-import mock
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 # non-stdlib imports
 import pytest
 # module under test
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index 0f94c0e..64294d3 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -2,6 +2,14 @@
 """Tests for util"""
 
 # stdlib imports
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
 import sys
 # non-stdlib imports
 import pytest
@@ -90,6 +98,38 @@ def test_scantree(tmpdir):
     assert len(found) == 2
 
 
+def test_replace_file(tmpdir):
+    src = pathlib.Path(str(tmpdir.join('src')))
+    dst = pathlib.Path(str(tmpdir.join('dst')))
+    src.touch()
+    dst.touch()
+
+    replace_avail = sys.version_info >= (3, 3)
+
+    with mock.patch(
+            'sys.version_info',
+            new_callable=mock.PropertyMock(return_value=(3, 2, 0))):
+        blobxfer.util.replace_file(src, dst)
+        assert not src.exists()
+        assert dst.exists()
+
+    dst.unlink()
+    src.touch()
+    dst.touch()
+
+    with mock.patch(
+            'sys.version_info',
+            new_callable=mock.PropertyMock(return_value=(3, 3, 0))):
+        if replace_avail:
+            blobxfer.util.replace_file(src, dst)
+            assert not src.exists()
+            assert dst.exists()
+        else:
+            src = mock.MagicMock()
+            blobxfer.util.replace_file(src, dst)
+            assert src.replace.call_count == 1
+
+
 def test_get_mime_type():
     a = 'b.txt'
     mt = blobxfer.util.get_mime_type(a)

From 27126cbc7beb0de7550ab96d8b06af9eabda8c40 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 20 Apr 2017 09:03:53 -0700
Subject: [PATCH 23/47] Fix snapshot support

- Add rename option for single file download
- Disable MD5 checking if source doesn't have MD5 stored
---
 blobxfer/models/download.py                |  7 +--
 blobxfer/models/options.py                 |  2 +
 blobxfer/models/upload.py                  | 32 +++++++++--
 blobxfer/operations/azure/blob/__init__.py |  5 +-
 blobxfer/operations/download.py            | 20 ++++---
 blobxfer/operations/progress.py            |  2 +
 blobxfer/util.py                           |  4 +-
 tests/test_blobxfer_models_download.py     | 14 +++--
 tests/test_blobxfer_models_upload.py       |  4 +-
 tests/test_blobxfer_operations_download.py | 63 +++++++++++++++++++---
 tests/test_blobxfer_util.py                |  5 +-
 11 files changed, 126 insertions(+), 32 deletions(-)

diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index 77f2757..dd26515 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -148,7 +148,7 @@ def __init__(
         # type: (Specification, blobxfer.models.options.Download,
         #        blobxfer.models.options.SkipOn, LocalDestinationPath) -> None
         """Ctor for Specification
-        :param DownloadSepcification self: this
+        :param DownloadSpecification self: this
         :param blobxfer.models.options.Download download_options:
             download options
         :param blobxfer.models.options.SkipOn skip_on_options: skip on options
@@ -162,7 +162,7 @@ def __init__(
     def add_azure_source_path(self, source):
         # type: (Specification, AzureSourcePath) -> None
         """Add an Azure Source Path
-        :param DownloadSepcification self: this
+        :param DownloadSpecification self: this
         :param AzureSourcePath source: Azure source path to add
         """
         self.sources.append(source)
@@ -281,7 +281,8 @@ def _initialize_integrity_checkers(self, options):
                     'symmetric key is invalid: provide RSA private key '
                     'or metadata corrupt')
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
-        if self.hmac is None and options.check_file_md5:
+        if (self.hmac is None and options.check_file_md5 and
+                blobxfer.util.is_not_empty(self._ase.md5)):
             self.md5 = blobxfer.util.new_md5_hasher()
 
     def _allocate_disk_space(self):
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index 08ba42a..10b53b1 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -65,6 +65,7 @@
         'mode',
         'overwrite',
         'recursive',
+        'rename',
         'rsa_private_key',
         'rsa_public_key',
         'store_file_attributes',
@@ -82,6 +83,7 @@
         'mode',
         'overwrite',
         'recursive',
+        'rename',
         'restore_file_attributes',
         'rsa_private_key',
     ]
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 607b001..d8c898e 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -54,12 +54,12 @@
 )
 
 
-class LocalSourcePaths(blobxfer.models._BaseSourcePaths):
-    """Local Source Paths"""
+class LocalSourcePath(blobxfer.models._BaseSourcePaths):
+    """Local Source Path"""
     def files(self):
         # type: (LocalSourcePaths) -> LocalPath
         """Generator for files in paths
-        :param LocalSourcePaths self: this
+        :param LocalSourcePath self: this
         :rtype: LocalPath
         :return: LocalPath
         """
@@ -73,3 +73,29 @@ def files(self):
                         'skipping file {} due to filters'.format(_rpath))
                     continue
                 yield LocalPath(parent_path=_expath, relative_path=_rpath)
+
+
+class Specification(object):
+    """Upload Specification"""
+    def __init__(
+            self, upload_options, skip_on_options, remote_destination_path):
+        # type: (Specification, blobxfer.models.options.Upload,
+        #        blobxfer.models.options.SkipOn, RemoteDestinationPath) -> None
+        """Ctor for Specification
+        :param UploadSpecification self: this
+        :param blobxfer.models.options.Upload upload_options: upload options
+        :param blobxfer.models.options.SkipOn skip_on_options: skip on options
+        :param RemoteDestinationPath remote_destination_path: remote dest path
+        """
+        self.options = upload_options
+        self.skip_on = skip_on_options
+        self.destination = remote_destination_path
+        self.sources = []
+
+    def add_local_source_path(self, source):
+        # type: (Specification, LocalSourcePath) -> None
+        """Add a Local Source Path
+        :param UploadSpecification self: this
+        :param LocalSourcePath source: Local source path to add
+        """
+        self.sources.append(source)
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index 0d49ed0..c94c9c9 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -78,9 +78,10 @@ def list_blobs(client, container, prefix, mode, recursive, timeout=None):
     if mode == blobxfer.models.azure.StorageModes.File:
         raise RuntimeError('cannot list Azure Files from blob client')
     if blobxfer.util.blob_is_snapshot(prefix):
-        snapshot = blobxfer.util.parse_blob_snapshot_parameter(prefix)
+        base_blob, snapshot = blobxfer.util.parse_blob_snapshot_parameter(
+            prefix)
         blob = client.get_blob_properties(
-            container_name=container, blob_name=prefix, snapshot=snapshot,
+            container_name=container, blob_name=base_blob, snapshot=snapshot,
             timeout=timeout)
         yield blob
         return
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index f1112e1..f0e08ec 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -149,12 +149,14 @@ def ensure_local_destination(creds, spec):
                     spec.sources[0].lookup_storage_account(rpath))
                 if (spec.options.mode ==
                         blobxfer.models.azure.StorageModes.File):
-                    if blobxfer.operations.azure.file.check_if_single_file(
-                            sa.file_client, cont, dir)[0]:
+                    if (blobxfer.operations.azure.file.check_if_single_file(
+                            sa.file_client, cont, dir)[0] and
+                            spec.options.rename):
                         spec.destination.is_dir = False
                 else:
-                    if blobxfer.operations.azure.blob.check_if_single_blob(
-                            sa.block_blob_client, cont, dir):
+                    if (blobxfer.operations.azure.blob.check_if_single_blob(
+                            sa.block_blob_client, cont, dir) and
+                            spec.options.rename):
                         spec.destination.is_dir = False
         logger.debug('dest is_dir={} for {} specs'.format(
             spec.destination.is_dir, len(spec.sources)))
@@ -194,7 +196,8 @@ def _check_download_conditions(self, lpath, rfile):
                     lpath, rfile.container, rfile.name))
             return DownloadAction.Skip
         # check skip on options, MD5 match takes priority
-        if self._spec.skip_on.md5_match:
+        if (self._spec.skip_on.md5_match and
+                blobxfer.util.is_not_empty(rfile.md5)):
             return DownloadAction.CheckMd5
         # if neither of the remaining skip on actions are activated, download
         if (not self._spec.skip_on.filesize_match and
@@ -542,7 +545,12 @@ def _run(self):
                 nfiles += 1
                 total_size += rfile.size
                 # form local path for remote file
-                lpath = pathlib.Path(self._spec.destination.path, rfile.name)
+                if (not self._spec.destination.is_dir and
+                        self._spec.options.rename):
+                    lpath = pathlib.Path(self._spec.destination.path)
+                else:
+                    lpath = pathlib.Path(
+                        self._spec.destination.path, rfile.name)
                 # remove from delete after set
                 try:
                     self._delete_after.remove(lpath)
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index b5ec8e9..99d7445 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -132,6 +132,8 @@ def output_download_parameters(general_options, spec):
         spec.options.overwrite))
     log.append('            recursive: {}'.format(
         spec.options.recursive))
+    log.append('        rename single: {}'.format(
+        spec.options.rename))
     log.append('      file attributes: {}'.format(
         spec.options.restore_file_attributes))
     log.append('      rsa private key: {}'.format(
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 7d48ceb..9b4e644 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -297,6 +297,6 @@ def parse_blob_snapshot_parameter(url):
     """
     if blob_is_snapshot(url):
         tmp = url.split('?snapshot=')
-        if len(tmp) > 1:
-            return tmp[-1]
+        if len(tmp) == 2:
+            return tmp[0], tmp[1]
     return None
diff --git a/tests/test_blobxfer_models_download.py b/tests/test_blobxfer_models_download.py
index 6c62ce1..918a7f0 100644
--- a/tests/test_blobxfer_models_download.py
+++ b/tests/test_blobxfer_models_download.py
@@ -66,6 +66,7 @@ def test_downloadspecification():
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -266,6 +267,7 @@ def test_downloaddescriptor_resume(tmpdir):
     rmgr = rops.DownloadResumeManager(resumefile)
     rmgr.add_or_update_record(
         str(fp), str(lp), ase._size, 32, 1, False, 'abc')
+    ase._md5 = 'abc'
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
     assert rb is None
@@ -519,17 +521,21 @@ def test_perform_chunked_integrity_check(tmpdir):
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 16
+
+    data = b'0' * opts.chunk_size_bytes
+    md5 = util.new_md5_hasher()
+    md5.update(data)
+
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
     ase._name = 'blob'
+    ase._md5 = md5.hexdigest()
+
     rmgr = rops.DownloadResumeManager(resumefile)
     d = models.Descriptor(fp, ase, opts, rmgr)
 
-    data = b'0' * opts.chunk_size_bytes
-    md5 = util.new_md5_hasher()
-    md5.update(data)
     offsets, _ = d.next_offsets()
-    d.write_unchecked_hmac_data(offsets, data)
+    d.write_unchecked_data(offsets, data)
     d.perform_chunked_integrity_check()
     assert d._next_integrity_chunk == 1
     assert len(d._unchecked_chunks) == 0
diff --git a/tests/test_blobxfer_models_upload.py b/tests/test_blobxfer_models_upload.py
index 21d9494..e6447d7 100644
--- a/tests/test_blobxfer_models_upload.py
+++ b/tests/test_blobxfer_models_upload.py
@@ -25,7 +25,7 @@ def test_localsourcepaths_files(tmpdir):
     defpath.join('world.txt').write('world')
     defpath.join('moo.cow').write('y')
 
-    a = upload.LocalSourcePaths()
+    a = upload.LocalSourcePath()
     a.add_include('*.txt')
     a.add_includes(['moo.cow', '*blah*'])
     with pytest.raises(ValueError):
@@ -45,7 +45,7 @@ def test_localsourcepaths_files(tmpdir):
     assert str(defpath.join('world.txt')) in a_set
     assert str(defpath.join('moo.cow')) not in a_set
 
-    b = upload.LocalSourcePaths()
+    b = upload.LocalSourcePath()
     b.add_includes(['moo.cow', '*blah*'])
     b.add_include('*.txt')
     b.add_excludes(['world.txt'])
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index 749e835..08702b1 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -34,8 +34,9 @@
 @mock.patch('blobxfer.operations.azure.blob.check_if_single_blob')
 def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
     downdir = tmpdir.join('down')
+    downdir.mkdir()
 
-    # non-file tests
+    # no spec sources
     ds = models.Specification(
         download_options=options.Download(
             check_file_md5=True,
@@ -44,6 +45,7 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -52,25 +54,42 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             str(downdir)
         ),
     )
-
     with pytest.raises(RuntimeError):
         ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
 
+    # blob directory
     asp = azops.SourcePath()
     p = 'cont/remote/path'
     asp.add_path_with_storage_account(p, 'sa')
-
     ds.add_azure_source_path(asp)
-
     patched_blob.return_value = False
     ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
     assert ds.destination.is_dir
 
+    # blob single file + rename
+    ds = models.Specification(
+        download_options=options.Download(
+            check_file_md5=True,
+            chunk_size_bytes=4194304,
+            delete_extraneous_destination=False,
+            mode=azmodels.StorageModes.Auto,
+            overwrite=True,
+            recursive=True,
+            rename=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=mock.MagicMock(),
+        local_destination_path=models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+    ds.add_azure_source_path(asp)
     patched_blob.return_value = True
     with pytest.raises(RuntimeError):
         ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
 
-    # file tests
+    # file directory
     ds = models.Specification(
         download_options=options.Download(
             check_file_md5=True,
@@ -79,6 +98,7 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             mode=azmodels.StorageModes.File,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -87,13 +107,30 @@ def test_ensure_local_destination(patched_blob, patched_file, tmpdir):
             str(downdir)
         ),
     )
-
     ds.add_azure_source_path(asp)
-
     patched_file.return_value = (False, None)
     ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
     assert ds.destination.is_dir
 
+    # file single + rename
+    ds = models.Specification(
+        download_options=options.Download(
+            check_file_md5=True,
+            chunk_size_bytes=4194304,
+            delete_extraneous_destination=False,
+            mode=azmodels.StorageModes.File,
+            overwrite=True,
+            recursive=True,
+            rename=True,
+            restore_file_attributes=False,
+            rsa_private_key=None,
+        ),
+        skip_on_options=mock.MagicMock(),
+        local_destination_path=models.LocalDestinationPath(
+            str(downdir)
+        ),
+    )
+    ds.add_azure_source_path(asp)
     patched_file.return_value = (True, mock.MagicMock())
     with pytest.raises(RuntimeError):
         ops.Downloader.ensure_local_destination(mock.MagicMock(), ds)
@@ -113,6 +150,7 @@ def test_check_download_conditions(tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=False,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -137,6 +175,7 @@ def test_check_download_conditions(tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -148,7 +187,9 @@ def test_check_download_conditions(tmpdir):
         local_destination_path=models.LocalDestinationPath('dest'),
     )
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
-    result = d._check_download_conditions(ep, mock.MagicMock())
+    rfile = mock.MagicMock()
+    rfile.md5 = 'abc'
+    result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.CheckMd5
 
     ds = models.Specification(
@@ -159,6 +200,7 @@ def test_check_download_conditions(tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -181,6 +223,7 @@ def test_check_download_conditions(tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -211,6 +254,7 @@ def test_check_download_conditions(tmpdir):
             mode=azmodels.StorageModes.Auto,
             overwrite=True,
             recursive=True,
+            rename=False,
             restore_file_attributes=False,
             rsa_private_key=None,
         ),
@@ -675,6 +719,7 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._spec.options.chunk_size_bytes = 1
     d._spec.options.mode = azmodels.StorageModes.Auto
     d._spec.options.overwrite = True
+    d._spec.options.rename = False
     d._spec.skip_on = mock.MagicMock()
     d._spec.skip_on.md5_match = False
     d._spec.skip_on.lmt_ge = False
@@ -716,6 +761,8 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     b = azure.storage.blob.models.Blob(name='name')
     b.properties.content_length = 1
     patched_lb.side_effect = [[b]]
+    d._spec.destination.is_dir = False
+    d._spec.options.rename = True
     d._pre_md5_skip_on_check = mock.MagicMock()
     d._check_download_conditions = mock.MagicMock()
     d._check_download_conditions.return_value = ops.DownloadAction.Skip
diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py
index 64294d3..e294a0e 100644
--- a/tests/test_blobxfer_util.py
+++ b/tests/test_blobxfer_util.py
@@ -215,9 +215,10 @@ def test_blob_is_snapshot():
 
 
 def test_parse_blob_snapshot_parameter():
+    base = '/cont/a'
     param = '2017-02-23T22:21:14.8121864Z'
-    a = '/cont/a?snapshot=' + param
-    assert blobxfer.util.parse_blob_snapshot_parameter(a) == param
+    a = base + '?snapshot=' + param
+    assert blobxfer.util.parse_blob_snapshot_parameter(a) == (base, param)
 
     a = '/cont/a?snapshot='
     assert blobxfer.util.parse_blob_snapshot_parameter(a) is None

From 6c33bc1d174da8e9aab426b3ba3256f851383c9f Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 16 May 2017 15:05:06 -0700
Subject: [PATCH 24/47] Monkeypatch python sdk timeout value

---
 blobxfer/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py
index e05319c..8babc97 100644
--- a/blobxfer/__init__.py
+++ b/blobxfer/__init__.py
@@ -28,3 +28,6 @@
 import azure.storage
 azure.storage._constants.USER_AGENT_STRING = 'blobxfer/{} {}'.format(
     __version__, azure.storage._constants.USER_AGENT_STRING)
+
+# monkeypatch SOCKET_TIMEOUT value in Azure Storage SDK
+azure.storage._constants.SOCKET_TIMEOUT = (5, 300)

From 85183bae71abcd294efc9fc6b3fe341a2cde89ce Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 16 May 2017 15:05:58 -0700
Subject: [PATCH 25/47] Begin upload work

---
 blobxfer/api.py                            |   3 +
 blobxfer/models/azure.py                   |  61 +-
 blobxfer/models/crypto.py                  |  23 +
 blobxfer/models/download.py                |  24 +-
 blobxfer/models/options.py                 |  11 +-
 blobxfer/models/upload.py                  | 350 ++++++++++-
 blobxfer/operations/azure/__init__.py      |  49 ++
 blobxfer/operations/azure/blob/__init__.py |  29 +
 blobxfer/operations/azure/file.py          | 118 +++-
 blobxfer/operations/crypto.py              |  36 +-
 blobxfer/operations/download.py            |   8 +-
 blobxfer/operations/md5.py                 |   3 +
 blobxfer/operations/progress.py            |  80 ++-
 blobxfer/operations/upload.py              | 681 +++++++++++++++++++++
 cli/cli.py                                 |  53 +-
 cli/settings.py                            | 114 +++-
 16 files changed, 1560 insertions(+), 83 deletions(-)
 create mode 100644 blobxfer/operations/upload.py

diff --git a/blobxfer/api.py b/blobxfer/api.py
index f8c3378..9034b2e 100644
--- a/blobxfer/api.py
+++ b/blobxfer/api.py
@@ -48,3 +48,6 @@
 from .operations.download import (  # noqa
     Downloader
 )
+from .operations.upload import (  # noqa
+    Uploader
+)
diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index afc971d..fbc319f 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -97,6 +97,16 @@ def name(self):
         """
         return self._name
 
+    @property
+    def path(self):
+        # type: (StorageEntity) -> str
+        """Entity path
+        :param StorageEntity self: this
+        :rtype: str
+        :return: remote path of entity
+        """
+        return '{}/{}'.format(self._container, self._name)
+
     @property
     def lmt(self):
         # type: (StorageEntity) -> datetime.datetime
@@ -117,6 +127,15 @@ def size(self):
         """
         return self._size
 
+    @size.setter
+    def size(self, value):
+        # type: (StorageEntity, int) -> None
+        """Set entity size
+        :param StorageEntity self: this
+        :param int value: value
+        """
+        self._size = value
+
     @property
     def snapshot(self):
         # type: (StorageEntity) -> str
@@ -161,13 +180,23 @@ def is_encrypted(self):
     def encryption_metadata(self):
         # type: (StorageEntity) ->
         #        blobxfer.models.crypto.EncryptionMetadata
-        """Entity metadata (type)
+        """Get encryption metadata
         :param StorageEntity self: this
         :rtype: blobxfer.models.crypto.EncryptionMetadata
         :return: encryption metadata of entity
         """
         return self._encryption
 
+    @encryption_metadata.setter
+    def encryption_metadata(self, value):
+        # type: (StorageEntity,
+        #        blobxfer.models.crypto.EncryptionMetadata) -> None
+        """Set encryption metadata
+        :param StorageEntity self: this
+        :param blobxfer.models.crypto.EncryptionMetadata value: value
+        """
+        self._encryption = value
+
     def populate_from_blob(self, sa, blob):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
         #        azure.storage.blob.models.Blob) -> None
@@ -206,3 +235,33 @@ def populate_from_file(self, sa, file):
         self._md5 = file.properties.content_settings.content_md5
         self._mode = StorageModes.File
         self._client = sa.file_client
+
+    def populate_from_local(self, sa, container, name, mode):
+        # type: (StorageEntity, blobxfer.operations.azure.StorageAccount
+        #        str, str, blobxfer.models.azure.StorageModes) -> None
+        """Populate properties from local
+        :param StorageEntity self: this
+        :param blobxfer.operations.azure.StorageAccount sa: storage account
+        :param str container: container
+        :param str name: name
+        :param blobxfer.models.azure.StorageModes mode: storage mode
+        """
+        self._container = container
+        self._name = name
+        self._mode = mode
+        if mode == StorageModes.Append:
+            self._client = sa.append_blob_client
+        elif mode == StorageModes.Block:
+            self._client = sa.block_blob_client
+        elif mode == StorageModes.File:
+            self._client = sa.file_client
+        elif mode == StorageModes.Page:
+            self._client = sa.page_blob_client
+        elif mode == StorageModes.Auto:
+            name = self.name.lower()
+            if name.endswith('.vhd') or name.endswith('.vhdx'):
+                self._client = sa.page_blob_client
+                self._mode = StorageModes.Page
+            else:
+                self._client = sa.block_blob_client
+                self._mode = StorageModes.Block
diff --git a/blobxfer/models/crypto.py b/blobxfer/models/crypto.py
index 91a2f4a..56c0c79 100644
--- a/blobxfer/models/crypto.py
+++ b/blobxfer/models/crypto.py
@@ -35,6 +35,7 @@
 import hashlib
 import hmac
 import json
+import os
 # non-stdlib imports
 # local imports
 import blobxfer.models.offload
@@ -126,6 +127,7 @@ def __init__(self):
         self.encryption_metadata_authentication = None
         self._symkey = None
         self._signkey = None
+        self._rsa_public_key = None
 
     @property
     def symmetric_key(self):
@@ -163,6 +165,27 @@ def encryption_metadata_exists(md):
             pass
         return False
 
+    def create_new_metadata(self, rsa_public_key):
+        # type: (EncryptionMetadata,
+        #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey)
+        #        -> None
+        """Create new metadata entries for encryption (upload)
+        :param EncryptionMetadata self: this
+        :param cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey:
+            rsa public key
+        """
+        self._rsa_public_key = rsa_public_key
+        self._symkey = os.urandom(
+            blobxfer.operations.crypto._AES256_KEYLENGTH_BYTES)
+        self._signkey = os.urandom(
+            blobxfer.operations.crypto._AES256_KEYLENGTH_BYTES)
+        self.content_encryption_iv = os.urandom(AES256_BLOCKSIZE_BYTES)
+        self.encryption_agent = EncryptionAgent(
+            encryption_algorithm=EncryptionMetadata._ENCRYPTION_ALGORITHM,
+            protocol=EncryptionMetadata._ENCRYPTION_PROTOCOL_VERSION,
+        )
+        self.encryption_mode = EncryptionMetadata._ENCRYPTION_MODE
+
     def convert_from_json(self, md, blobname, rsaprivatekey):
         # type: (EncryptionMetadata, dict, str,
         #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey)
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index dd26515..e1c9266 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -158,12 +158,17 @@ def __init__(
         self.skip_on = skip_on_options
         self.destination = local_destination_path
         self.sources = []
+        # validate compatible options
+        if not self.options.check_file_md5 and self.skip_on.md5_match:
+            raise ValueError(
+                'Cannot specify skip on MD5 match without file MD5 enabled')
 
     def add_azure_source_path(self, source):
-        # type: (Specification, AzureSourcePath) -> None
+        # type: (Specification, blobxfer.operations.azure.SourcePath) -> None
         """Add an Azure Source Path
         :param DownloadSpecification self: this
-        :param AzureSourcePath source: Azure source path to add
+        :param blobxfer.operations.Azure.SourcePath source:
+            Azure source path to add
         """
         self.sources.append(source)
 
@@ -174,7 +179,7 @@ class Descriptor(object):
     _AES_BLOCKSIZE = blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES
 
     def __init__(self, lpath, ase, options, resume_mgr):
-        # type: (DownloadDescriptior, pathlib.Path,
+        # type: (Descriptior, pathlib.Path,
         #        blobxfer.models.azure.StorageEntity,
         #        blobxfer.models.options.Download,
         #        blobxfer.operations.resume.DownloadResumeManager) -> None
@@ -362,8 +367,8 @@ def _resume(self):
             return self._ase.size
         # encrypted files are not resumable due to hmac requirement
         if self._ase.is_encrypted:
-            logger.debug('cannot resume encrypted entity {}/{}'.format(
-                self._ase.container, self._ase.name))
+            logger.debug('cannot resume encrypted entity {}'.format(
+                self._ase.path))
             return None
         # check if intermediate (blobtmp) exists
         if not self.local_path.exists():
@@ -372,8 +377,7 @@ def _resume(self):
             return None
         if self.hmac is not None:
             raise RuntimeError(
-                'unexpected hmac object for entity {}/{}'.format(
-                    self._ase.container, self._ase.name))
+                'unexpected hmac object for entity {}'.format(self._ase.path))
         # re-hash from 0 to offset if needed
         _fd_offset = 0
         _end_offset = min((curr_chunk * rr.chunk_size, rr.length))
@@ -623,7 +627,7 @@ def finalize_file(self):
                 self._ase.encryption_metadata.encryption_authentication.
                 algorithm,
                 'OK' if check else 'MISMATCH',
-                self._ase.name,
+                self._ase.path,
                 digest,
                 mac,
             )
@@ -633,14 +637,14 @@ def finalize_file(self):
                 check = True
             msg = 'MD5: {}, {} {} <L..R> {}'.format(
                 'OK' if check else 'MISMATCH',
-                self._ase.name,
+                self._ase.path,
                 digest,
                 self._ase.md5,
             )
         else:
             check = True
             msg = 'MD5: SKIPPED, {} None <L..R> {}'.format(
-                self._ase.name,
+                self._ase.path,
                 self._ase.md5
             )
         # cleanup if download failed
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index 10b53b1..9a3af9f 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -58,6 +58,12 @@
         'md5_match',
     ]
 )
+FileProperties = collections.namedtuple(
+    'FileProperties', [
+        'attributes',
+        'md5',
+    ]
+)
 Upload = collections.namedtuple(
     'Upload', [
         'chunk_size_bytes',
@@ -66,13 +72,10 @@
         'overwrite',
         'recursive',
         'rename',
-        'rsa_private_key',
         'rsa_public_key',
-        'store_file_attributes',
-        'store_file_md5',
+        'store_file_properties',
         'strip_components',
         'vectored_io',
-        'split_size_bytes',
     ]
 )
 Download = collections.namedtuple(
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index d8c898e..cbe1816 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -31,31 +31,85 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 import collections
+import enum
 import logging
+import math
 import os
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
     import pathlib
+import threading
 # non-stdlib imports
 # local imports
 import blobxfer.models
+import blobxfer.models.crypto
 import blobxfer.util
 
 # create logger
 logger = logging.getLogger(__name__)
+# global defines
+_MAX_BLOCK_CHUNKSIZE_BYTES = 268435456
+_MAX_NONBLOCK_CHUNKSIZE_BYTES = 4194304
 
 
-LocalPath = collections.namedtuple(
-    'LocalPath', [
-        'parent_path',
-        'relative_path',
+# named tuples
+Offsets = collections.namedtuple(
+    'Offsets', [
+        'chunk_num',
+        'block_id',
+        'fd_start',
+        'num_bytes',
+        'range_end',
+        'range_start',
+        'pad',
     ]
 )
 
 
+class VectoredIoDistributionMode(enum.Enum):
+    Disabled = 'disabled'
+    Stripe = 'stripe'
+    Replica = 'replica'
+
+    def __str__(self):
+        return self.value
+
+
+class LocalPath(object):
+    def __init__(self, parent_path, relative_path):
+        self.parent_path = parent_path
+        self.relative_path = relative_path
+        # populate properties
+        self._stat = self.absolute_path.stat()
+
+    @property
+    def absolute_path(self):
+        return self.parent_path / self.relative_path
+
+    @property
+    def size(self):
+        return self._stat.st_size
+
+    @property
+    def mode(self):
+        return str(oct(self._stat.st_mode))
+
+    @property
+    def uid(self):
+        return self._stat.st_uid
+
+    @property
+    def gid(self):
+        return self._stat.st_gid
+
+
 class LocalSourcePath(blobxfer.models._BaseSourcePaths):
     """Local Source Path"""
+
+    def can_rename(self):
+        return len(self._paths) == 1 and self._paths[0].is_file()
+
     def files(self):
         # type: (LocalSourcePaths) -> LocalPath
         """Generator for files in paths
@@ -65,7 +119,16 @@ def files(self):
         """
         for _path in self._paths:
             _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
-            _expath = pathlib.Path(_ppath)
+            _expath = pathlib.Path(_ppath).resolve()
+            # check if path is a single file
+            tmp = pathlib.Path(_ppath)
+            if tmp.is_file():
+                yield LocalPath(
+                    parent_path=tmp.parent,
+                    relative_path=pathlib.Path(tmp.name)
+                )
+                continue
+            del tmp
             for entry in blobxfer.util.scantree(_ppath):
                 _rpath = pathlib.Path(entry.path).relative_to(_ppath)
                 if not self._inclusion_check(_rpath):
@@ -78,24 +141,281 @@ def files(self):
 class Specification(object):
     """Upload Specification"""
     def __init__(
-            self, upload_options, skip_on_options, remote_destination_path):
+            self, upload_options, skip_on_options, local_source_path):
         # type: (Specification, blobxfer.models.options.Upload,
-        #        blobxfer.models.options.SkipOn, RemoteDestinationPath) -> None
+        #        blobxfer.models.options.SkipOn, LocalSourcePath) -> None
         """Ctor for Specification
         :param UploadSpecification self: this
         :param blobxfer.models.options.Upload upload_options: upload options
         :param blobxfer.models.options.SkipOn skip_on_options: skip on options
-        :param RemoteDestinationPath remote_destination_path: remote dest path
+        :param LocalSourcePath local_source_path: local source path
         """
         self.options = upload_options
         self.skip_on = skip_on_options
-        self.destination = remote_destination_path
-        self.sources = []
+        self.destinations = []
+        self.sources = local_source_path
+        # validate options
+        if self.options.rename:
+            # ensure only one internal path is present
+            if len(self.sources.paths) > 1:
+                raise ValueError(
+                    'cannot add more than one internal source path if rename '
+                    'is specified')
+            # check if internal source path is directory and rename is enabled
+            if self.sources.paths[0].is_dir():
+                raise ValueError(
+                    'cannot rename a directory of files to upload')
+        if (self.options.rsa_public_key and
+                self.options.vectored_io.
+                multi_storage_account_distribution_mode ==
+                VectoredIoDistributionMode.Stripe):
+            raise ValueError(
+                'cannot enable encryption and multi-storage account '
+                'vectored IO in stripe mode')
+        if self.options.chunk_size_bytes <= 0:
+            raise ValueError('chunk size must be positive')
+        if self.options.chunk_size_bytes > _MAX_BLOCK_CHUNKSIZE_BYTES:
+            raise ValueError(
+                ('chunk size value of {} exceeds maximum allowable '
+                 'of {}').format(
+                     self.options.chunk_size_bytes,
+                     _MAX_BLOCK_CHUNKSIZE_BYTES))
 
-    def add_local_source_path(self, source):
-        # type: (Specification, LocalSourcePath) -> None
-        """Add a Local Source Path
+    def add_azure_destination_path(self, dest):
+        # type: (Specification,
+        #        blobxfer.operations.azure.DestinationPath) -> None
+        """Add a remote Azure Destination path
         :param UploadSpecification self: this
-        :param LocalSourcePath source: Local source path to add
+        :param blobxfer.operations.azure.DestinationPath dest:
+            Remote destination path
+        """
+        self.destinations.append(dest)
+
+
+class Descriptor(object):
+    """Upload Descriptor"""
+
+    _AES_BLOCKSIZE = blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES
+
+    def __init__(self, lpath, ase, uid, options, resume_mgr):
+        # type: (Descriptior, LocalPath,
+        #        blobxfer.models.azure.StorageEntity, str,
+        #        blobxfer.models.options.Upload,
+        #        blobxfer.operations.resume.UploadResumeManager) -> None
+        """Ctor for Descriptor
+        :param Descriptor self: this
+        :param LocalPath lpath: local path
+        :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
+        :param str uid: unique id
+        :param blobxfer.models.options.Upload options: download options
+        :param blobxfer.operations.resume.UploadResumeManager resume_mgr:
+            upload resume manager
+        """
+        self.local_path = lpath
+        self.unique_id = uid
+        self._offset = 0
+        self._chunk_num = 0
+        self._next_integrity_chunk = 0
+        self._finalized = False
+        self._meta_lock = threading.Lock()
+        self._hasher_lock = threading.Lock()
+        self._resume_mgr = resume_mgr
+        self._ase = ase
+        self.current_iv = None
+        self._initialize_encryption(options)
+        # calculate the total number of ops required for transfer
+        self._compute_remote_size()
+        self._adjust_chunk_size(options)
+        self._total_chunks = self._compute_total_chunks(self._chunk_size)
+        self._outstanding_ops = self._total_chunks
+        # initialize integrity checkers
+        self.hmac = None
+        self.md5 = None
+        self._initialize_integrity_checkers(options)
+
+    @property
+    def entity(self):
+        # type: (Descriptor) -> blobxfer.models.azure.StorageEntity
+        """Get linked blobxfer.models.azure.StorageEntity
+        :param Descriptor self: this
+        :rtype: blobxfer.models.azure.StorageEntity
+        :return: blobxfer.models.azure.StorageEntity
+        """
+        return self._ase
+
+    @property
+    def must_compute_md5(self):
+        # type: (Descriptor) -> bool
+        """Check if MD5 must be computed
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if MD5 must be computed
+        """
+        return self.md5 is not None
+
+    @property
+    def all_operations_completed(self):
+        # type: (Descriptor) -> bool
+        """All operations are completed
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if all operations completed
+        """
+        with self._meta_lock:
+            return (self._outstanding_ops == 0 and
+                    len(self._unchecked_chunks) == 0)
+
+    @property
+    def is_resumable(self):
+        # type: (Descriptor) -> bool
+        """Download is resume capable
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if resumable
+        """
+        return self._resume_mgr is not None and self.hmac is None
+
+    def hmac_iv(self, iv):
+        # type: (Descriptor, bytes) -> None
+        """Send IV through hasher
+        :param Descriptor self: this
+        :param bytes iv: iv
+        """
+        with self._hasher_lock:
+            self.hmac.update(iv)
+
+    def _initialize_encryption(self, options):
+        # type: (Descriptor, blobxfer.models.options.Upload) -> None
+        """Download is resume capable
+        :param Descriptor self: this
+        :param blobxfer.models.options.Upload options: upload options
+        """
+        if options.rsa_public_key is not None:
+            em = blobxfer.models.crypto.EncryptionMetadata()
+            em.create_new_metadata(options.rsa_public_key)
+            self.current_iv = em.content_encryption_iv
+            self._ase.encryption_metadata = em
+
+    def _compute_remote_size(self):
+        # type: (Descriptor, int) -> None
+        """Compute total remote file size
+        :param Descriptor self: this
+        :rtype: int
+        :return: remote file size
+        """
+        size = self.local_path.size
+        if size > 0:
+            if self._ase.is_encrypted:
+                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
+                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                    self._AES_BLOCKSIZE
+            else:
+                allocatesize = size
+            if allocatesize < 0:
+                allocatesize = 0
+        else:
+            allocatesize = 0
+        self._ase.size = allocatesize
+        logger.debug('remote size for {} is {} bytes'.format(
+            self._ase.path, self._ase.size))
+
+    def _adjust_chunk_size(self, options):
+        # type: (Descriptor, blobxfer.models.options.Upload) -> None
+        """Adjust chunk size for entity mode
+        :param Descriptor self: this
+        :param blobxfer.models.options.Upload options: upload options
+        """
+        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
+        # ensure chunk sizes are compatible with mode
+        if self._ase.mode == blobxfer.models.azure.StorageModes.Append:
+            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+                logger.debug(
+                    'adjusting chunk size to {} for append blobs'.format(
+                        self._chunk_size))
+        elif self._ase.mode == blobxfer.models.azure.StorageModes.Block:
+            if self._chunk_size > _MAX_BLOCK_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_BLOCK_CHUNKSIZE_BYTES
+                logger.debug(
+                    'adjusting chunk size to {} for block blobs'.format(
+                        self._chunk_size))
+        elif self._ase.mode == blobxfer.models.azure.StorageModes.File:
+            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+                logger.debug(
+                    'adjusting chunk size to {} for files'.format(
+                        self._chunk_size))
+        elif self._ase.mode == blobxfer.models.azure.StorageModes.Page:
+            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+                logger.debug(
+                    'adjusting chunk size to {} for page blobs'.format(
+                        self._chunk_size))
+
+    def _compute_total_chunks(self, chunk_size):
+        # type: (Descriptor, int) -> int
+        """Compute total number of chunks for entity
+        :param Descriptor self: this
+        :param int chunk_size: chunk size
+        :rtype: int
+        :return: num chunks
+        """
+        try:
+            return int(math.ceil(self._ase.size / chunk_size))
+        except ZeroDivisionError:
+            return 0
+
+    def _initialize_integrity_checkers(self, options):
+        # type: (Descriptor, blobxfer.models.options.Upload) -> None
+        """Initialize file integrity checkers
+        :param Descriptor self: this
+        :param blobxfer.models.options.Upload options: upload options
+        """
+        if self._ase.is_encrypted:
+            # ensure symmetric key exists
+            if blobxfer.util.is_none_or_empty(
+                    self._ase.encryption_metadata.symmetric_key):
+                raise RuntimeError(
+                    'symmetric key is invalid: provide RSA private key '
+                    'or metadata corrupt')
+            self.hmac = self._ase.encryption_metadata.initialize_hmac()
+        if self.hmac is None and options.store_file_properties.md5:
+            self.md5 = blobxfer.util.new_md5_hasher()
+
+    def next_offsets(self):
+        # type: (Descriptor) -> Offsets
+        """Retrieve the next offsets
+        :param Descriptor self: this
+        :rtype: Offsets
+        :return: upload offsets
         """
-        self.sources.append(source)
+        # TODO RESUME
+#         resume_bytes = self._resume()
+        resume_bytes = None
+        with self._meta_lock:
+#             if self._offset >= self._ase.size:
+#                 return None, resume_bytes
+            if self._offset + self._chunk_size > self._ase.size:
+                chunk = self._ase.size - self._offset
+            else:
+                chunk = self._chunk_size
+            num_bytes = chunk
+            chunk_num = self._chunk_num
+            fd_start = self._offset
+            range_start = self._offset
+            range_end = self._offset + num_bytes - 1
+            self._offset += chunk
+            self._chunk_num += 1
+            if self._ase.is_encrypted and self._offset >= self._ase.size:
+                pad = True
+            else:
+                pad = False
+            return Offsets(
+                chunk_num=chunk_num,
+                block_id='{0:08d}'.format(chunk_num),
+                fd_start=fd_start,
+                num_bytes=chunk,
+                range_start=range_start,
+                range_end=range_end,
+                pad=pad,
+            ), resume_bytes
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index cc33834..8581c9a 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -301,3 +301,52 @@ def _populate_from_list_blobs(self, creds, options, general_options):
                 ase = blobxfer.models.azure.StorageEntity(cont, ed)
                 ase.populate_from_blob(sa, blob)
                 yield ase
+
+
+class DestinationPath(blobxfer.models._BaseSourcePaths):
+    """Azure Destination Path"""
+    def __init__(self):
+        # type: (SourcePath) -> None
+        """Ctor for SourcePath
+        :param SourcePath self: this
+        """
+        super(DestinationPath, self).__init__()
+        self._path_map = {}
+
+    def add_path_with_storage_account(self, remote_path, storage_account):
+        # type: (SourcePath, str, str) -> None
+        """Add a path with an associated storage account
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :param str storage_account: storage account to associate with path
+        """
+        if len(self._path_map) >= 1:
+            raise RuntimeError(
+                'cannot add multiple remote paths to SourcePath objects')
+        rpath = blobxfer.util.normalize_azure_path(remote_path)
+        self.add_path(rpath)
+        self._path_map[rpath] = storage_account
+
+    def lookup_storage_account(self, remote_path):
+        # type: (SourcePath, str) -> str
+        """Lookup the storage account associated with the remote path
+        :param SourcePath self: this
+        :param str remote_path: remote path
+        :rtype: str
+        :return: storage account associated with path
+        """
+        return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
+
+    # TODO IS THIS NEEDED?
+    def generate_entities_for_mode(self, creds, options):
+        for _path in self._paths:
+            rpath = str(_path)
+            cont, dir = blobxfer.util.explode_azure_path(rpath)
+            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
+
+            if options.rsa_public_key is not None:
+                ed = blobxfer.models.crypto.EncryptionMetadata()
+            else:
+                ed = None
+            ase = blobxfer.models.azure.StorageEntity(cont, ed)
+            ase.populate_from_blob(sa, blob)
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index c94c9c9..dbe8263 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -61,6 +61,35 @@ def check_if_single_blob(client, container, prefix, timeout=None):
     return True
 
 
+def get_blob_properties(client, container, prefix, mode, timeout=None):
+    if mode == blobxfer.models.azure.StorageModes.File:
+        raise RuntimeError('cannot list Azure Files from blob client')
+    try:
+        blob = client.get_blob_properties(
+            container_name=container, blob_name=prefix, timeout=timeout)
+    except azure.common.AzureMissingResourceHttpError:
+        return None
+    if (mode == blobxfer.models.azure.StorageModes.Append and
+            blob.properties.blob_type !=
+            azure.storage.blob.models._BlobTypes.AppendBlob):
+        raise RuntimeError(
+            'existing blob type {} mismatch with mode {}'.format(
+                blob.properties.blob_type, mode))
+    elif (mode == blobxfer.models.azure.StorageModes.Block and
+            blob.properties.blob_type !=
+            azure.storage.blob.models._BlobTypes.BlockBlob):
+        raise RuntimeError(
+            'existing blob type {} mismatch with mode {}'.format(
+                blob.properties.blob_type, mode))
+    elif (mode == blobxfer.models.azure.StorageModes.Page and
+            blob.properties.blob_type !=
+            azure.storage.blob.models._BlobTypes.PageBlob):
+        raise RuntimeError(
+            'existing blob type {} mismatch with mode {}'.format(
+                blob.properties.blob_type, mode))
+    return blob
+
+
 def list_blobs(client, container, prefix, mode, recursive, timeout=None):
     # type: (azure.storage.blob.BaseBlobService, str, str,
     #        blobxfer.models.azure.StorageModes, bool, int) ->
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index 43e89ca..7a4e076 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -89,31 +89,47 @@ def parse_file_path(filepath):
     return (dirname, fname)
 
 
-def check_if_single_file(client, fileshare, prefix, timeout=None):
+def get_file_properties(client, fileshare, prefix, timeout=None):
     # type: (azure.storage.file.FileService, str, str, int) ->
-    #        Tuple[bool, azure.storage.file.models.File]
-    """Check if prefix is a single file or multiple files
+    #        azure.storage.file.models.File
+    """Get file properties
     :param FileService client: blob client
     :param str fileshare: file share name
     :param str prefix: path prefix
     :param int timeout: timeout
-    :rtype: tuple
-    :return: (if prefix in fileshare is a single file, file)
+    :rtype: azure.storage.file.models.File
+    :return: file properties
     """
-    file = None
-    if blobxfer.util.is_none_or_empty(prefix):
-        return (False, file)
     dirname, fname = parse_file_path(prefix)
     try:
-        file = client.get_file_properties(
+        return client.get_file_properties(
             share_name=fileshare,
             directory_name=dirname,
             file_name=fname,
             timeout=timeout,
         )
     except azure.common.AzureMissingResourceHttpError:
+        return None
+
+
+def check_if_single_file(client, fileshare, prefix, timeout=None):
+    # type: (azure.storage.file.FileService, str, str, int) ->
+    #        Tuple[bool, azure.storage.file.models.File]
+    """Check if prefix is a single file or multiple files
+    :param FileService client: blob client
+    :param str fileshare: file share name
+    :param str prefix: path prefix
+    :param int timeout: timeout
+    :rtype: tuple
+    :return: (if prefix in fileshare is a single file, file)
+    """
+    if blobxfer.util.is_none_or_empty(prefix):
+        return (False, None)
+    file = get_file_properties(client, fileshare, prefix, timeout)
+    if file is None:
         return (False, file)
-    return (True, file)
+    else:
+        return (True, file)
 
 
 def list_files(client, fileshare, prefix, recursive, timeout=None):
@@ -178,3 +194,85 @@ def get_file_range(ase, offsets, timeout=None):
         validate_content=False,  # HTTPS takes care of integrity during xfer
         timeout=timeout,
     ).content
+
+
+def create_share(ase, containers_created, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Create file share
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict containers_created: containers already created map
+    :param int timeout: timeout
+    """
+    key = ase.client.account_name + ':file=' + ase.container
+    if key not in containers_created:
+        ase.client.create_share(
+            share_name=ase.container,
+            fail_on_exist=False,
+            timeout=timeout)
+        containers_created.add(key)
+        logger.info('created file share {} on storage account {}'.format(
+            ase.container, ase.client.account_name))
+
+
+def create_all_parent_directories(ase, dirs_created, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Create all parent directories for a file
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict dirs_created: directories already created map
+    :param int timeout: timeout
+    """
+    dirs = pathlib.Path(ase.name).parts
+    if len(dirs) <= 1:
+        return
+    dk = ase.client.account_name + ':' + ase.container
+    for i in range(0, len(dirs)):
+        dir = str(pathlib.Path(*(dirs[0:i + 1])))
+        if dk not in dirs_created or dir not in dirs_created[dk]:
+            ase.client.create_directory(
+                share_name=ase.container,
+                directory_name=dir,
+                fail_on_exist=False,
+                timeout=timeout)
+            if dk not in dirs_created:
+                dirs_created[dk] = set()
+            dirs_created[dk].add(dir)
+
+
+def create_file(ase, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, int) -> None
+    """Create file remotely
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int timeout: timeout
+    """
+    dir, fpath = parse_file_path(ase.name)
+    ase.client.create_file(
+        share_name=ase.container,
+        directory_name=dir,
+        file_name=fpath,
+        content_length=ase.size,
+        content_settings=None,
+        timeout=timeout)
+
+
+def put_file_range(ase, local_file, offsets, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, pathlib.path,
+    #        blobxfer.models.upload.Offsets, int) -> None
+    """Puts a range of bytes into the remote file
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param pathlib.Path local_file: local file
+    :param blobxfer.models.upload.Offsets offsets: upload offsets
+    :param int timeout: timeout
+    """
+    dir, fpath = parse_file_path(ase.name)
+    with local_file.open('rb') as fd:
+        fd.seek(offsets.range_start, 0)
+        data = fd.read(offsets.num_bytes)
+    ase.client.update_range(
+        share_name=ase.container,
+        directory_name=dir,
+        file_name=fpath,
+        data=data,
+        start_range=offsets.range_start,
+        end_range=offsets.range_end,
+        validate_content=False,  # integrity is enforced with HTTPS
+        timeout=timeout)
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index f494d81..3097b96 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -38,6 +38,7 @@
     import queue
 except ImportError:  # noqa
     import Queue as queue
+import tempfile
 # non-stdlib imports
 import cryptography.hazmat.backends
 import cryptography.hazmat.primitives.asymmetric.padding
@@ -78,7 +79,7 @@ def load_rsa_private_key_file(rsakeyfile, passphrase):
         return cryptography.hazmat.primitives.serialization.\
             load_pem_private_key(
                 keyfile.read(),
-                passphrase,
+                passphrase.encode('utf8') if passphrase is not None else None,
                 backend=cryptography.hazmat.backends.default_backend()
             )
 
@@ -245,8 +246,18 @@ def _worker_process(self):
             except queue.Empty:
                 continue
             if inst[0] == CryptoAction.Encrypt:
-                # TODO on upload
-                raise NotImplementedError()
+                local_file, offsets, symkey, iv = \
+                    inst[1], inst[2], inst[3], inst[4]
+                with open(local_file, 'rb') as fd:
+                    data = fd.read()
+                encdata = blobxfer.operations.crypto.aes_cbc_encrypt_data(
+                    symkey, iv, data, offsets.pad)
+                with tempfile.NamedTemporaryFile(
+                        mode='wb', delete=False) as fd:
+                    fpath = fd.name
+                    fd.write(encdata)
+                self._done_cv.acquire()
+                self._done_queue.put(fpath)
             elif inst[0] == CryptoAction.Decrypt:
                 final_path, local_path, offsets, symkey, iv, hmac_datafile = \
                     inst[1], inst[2], inst[3], inst[4], inst[5], inst[6]
@@ -260,8 +271,9 @@ def _worker_process(self):
                     with open(local_path, 'r+b') as fd:
                         fd.seek(offsets.fd_start, 0)
                         fd.write(data)
-            self._done_cv.acquire()
-            self._done_queue.put(final_path)
+                self._done_cv.acquire()
+                self._done_queue.put(final_path)
+            # notify and release condition var
             self._done_cv.notify()
             self._done_cv.release()
 
@@ -282,3 +294,17 @@ def add_decrypt_chunk(
             (CryptoAction.Decrypt, final_path, local_path, offsets, symkey,
              iv, hmac_datafile)
         )
+
+    def add_encrypt_chunk(self, local_file, offsets, symkey, iv):
+        # type: (CryptoOffload, pathlib.Path, blobxfer.models.upload.Offsets,
+        #        bytes, bytes) -> None
+        """Add a chunk to encrypt
+        :param CryptoOffload self: this
+        :param pathlib.Path local_file: local file
+        :param blobxfer.models.upload.Offsets offsets: offsets
+        :param bytes symkey: symmetric key
+        :param bytes iv: initialization vector
+        """
+        self._task_queue.put(
+            (CryptoAction.Encrypt, str(local_file), offsets, symkey, iv)
+        )
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index f0e08ec..fa379fc 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -192,8 +192,8 @@ def _check_download_conditions(self, lpath, rfile):
             return DownloadAction.Download
         if not self._spec.options.overwrite:
             logger.info(
-                'not overwriting local file: {} (remote: {}/{})'.format(
-                    lpath, rfile.container, rfile.name))
+                'not overwriting local file: {} (remote: {})'.format(
+                    lpath, rfile.path))
             return DownloadAction.Skip
         # check skip on options, MD5 match takes priority
         if (self._spec.skip_on.md5_match and
@@ -355,7 +355,7 @@ def _initialize_download_threads(self):
 
     def _wait_for_download_threads(self, terminate):
         # type: (Downloader, bool) -> None
-        """Terminate download threads
+        """Wait for download threads
         :param Downloader self: this
         :param bool terminate: terminate threads
         """
@@ -623,7 +623,7 @@ def start(self):
         :param Downloader self: this
         """
         try:
-            blobxfer.operations.progress.output_download_parameters(
+            blobxfer.operations.progress.output_parameters(
                 self._general_options, self._spec)
             self._run()
         except (KeyboardInterrupt, Exception) as ex:
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
index dbd05fb..e04daec 100644
--- a/blobxfer/operations/md5.py
+++ b/blobxfer/operations/md5.py
@@ -120,6 +120,9 @@ def add_localfile_for_md5_check(self, filename, remote_md5, mode):
         :param str remote_md5: remote MD5 to compare against
         :param blobxfer.models.azure.StorageModes mode: mode
         """
+        if blobxfer.util.is_none_or_empty(remote_md5):
+            raise ValueError('comparison MD5 is empty for file {}'.format(
+                filename))
         if mode == blobxfer.models.azure.StorageModes.Page:
             pagealign = True
         else:
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index 99d7445..b0f3bf4 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -48,10 +48,10 @@
 def update_progress_bar(
         go, optext, start, total_files, files_sofar, total_bytes,
         bytes_sofar):
-    # type: (blobxfer.options.General, str, datetime.datetime, int, int, int,
-    #        int) -> None
+    # type: (blobxfer.models.options.General, str, datetime.datetime, int,
+    #        int, int, int) -> None
     """Update the progress bar
-    :param blobxfer.options.General go: general options
+    :param blobxfer.models.options.General go: general options
     :param str optext: operation prefix text
     :param datetime.datetime start: start time
     :param int total_files: total number of files
@@ -89,10 +89,11 @@ def update_progress_bar(
     sys.stdout.flush()
 
 
-def output_download_parameters(general_options, spec):
-    # type: (Downloader) -> None
-    """Output configuration block
-    :param Downloader downloader: this
+def output_parameters(general_options, spec):
+    # type: (blobxfer.models.options.General, object) -> None
+    """Output parameters
+    :param blobxfer.models.options.General general_options: general options
+    :param object spec: upload or download spec
     """
     log = []
     log.append('===========================')
@@ -106,25 +107,37 @@ def output_download_parameters(general_options, spec):
         platform.python_version(),
         azure.storage._constants.__version__,
         requests.__version__))
-    log.append('   transfer direction: {}'.format('local->Azure'))
-    log.append('              workers: xfer={} md5={} crypto={}'.format(
-        general_options.concurrency.transfer_threads,
-        general_options.concurrency.md5_processes
-        if spec.options.check_file_md5 else 0,
-        general_options.concurrency.crypto_processes))
-    log.append('              timeout: {}'.format(
-        general_options.timeout_sec))
+    # specific preamble
+    if isinstance(spec, blobxfer.models.download.Specification):
+        log.append('   transfer direction: {}'.format('Azure -> local'))
+        log.append('              workers: xfer={} md5={} crypto={}'.format(
+            general_options.concurrency.transfer_threads,
+            general_options.concurrency.md5_processes
+            if spec.options.check_file_md5 else 0,
+            general_options.concurrency.crypto_processes))
+    elif isinstance(spec, blobxfer.models.upload.Specification):
+        log.append('   transfer direction: {}'.format('local -> Azure'))
+        log.append('              workers: xfer={} md5={} crypto={}'.format(
+            general_options.concurrency.transfer_threads,
+            general_options.concurrency.md5_processes
+            if spec.skip_on.md5_match or spec.options.store_file_properties.md5
+            else 0,
+            general_options.concurrency.crypto_processes))
+
+    # TODO handle synccopy spec
+
+    # common block
     log.append('          resume file: {}'.format(
         general_options.resume_file))
+    log.append('              timeout: {}'.format(
+        general_options.timeout_sec))
+    log.append('                 mode: {}'.format(
+        spec.options.mode))
     log.append('              skip on: fs_match={} lmt_ge={} md5={}'.format(
         spec.skip_on.filesize_match,
         spec.skip_on.lmt_ge,
         spec.skip_on.md5_match))
-    log.append('                 mode: {}'.format(
-        spec.options.mode))
-    log.append('     compute file md5: {}'.format(
-        spec.options.check_file_md5))
-    log.append('   chunk size (bytes): {}'.format(
+    log.append('           chunk size: {} bytes'.format(
         spec.options.chunk_size_bytes))
     log.append('    delete extraneous: {}'.format(
         spec.options.delete_extraneous_destination))
@@ -132,14 +145,29 @@ def output_download_parameters(general_options, spec):
         spec.options.overwrite))
     log.append('            recursive: {}'.format(
         spec.options.recursive))
+
+    # TODO only output rename single if not synccopy
     log.append('        rename single: {}'.format(
         spec.options.rename))
-    log.append('      file attributes: {}'.format(
-        spec.options.restore_file_attributes))
-    log.append('      rsa private key: {}'.format(
-        'Loaded' if spec.options.rsa_private_key else 'None'))
-    log.append('    local destination: {}'.format(
-        spec.destination.path))
+
+    # specific epilog
+    if isinstance(spec, blobxfer.models.download.Specification):
+        log.append('     compute file md5: {}'.format(
+            spec.options.check_file_md5))
+        log.append('      file attributes: {}'.format(
+            spec.options.restore_file_attributes))
+        log.append('      rsa private key: {}'.format(
+            'Loaded' if spec.options.rsa_private_key else 'None'))
+        log.append('    local destination: {}'.format(
+            spec.destination.path))
+    elif isinstance(spec, blobxfer.models.upload.Specification):
+        log.append('     store properties: attr={} md5={}'.format(
+            spec.options.store_file_properties.attributes,
+            spec.options.store_file_properties.md5))
+        log.append('       rsa public key: {}'.format(
+            'Loaded' if spec.options.rsa_public_key else 'None'))
+        log.append('   local source paths: {}'.format(
+            ' '.join([str(src) for src in spec.sources.paths])))
     log.append('===========================')
     log = os.linesep.join(log)
     if blobxfer.util.is_not_empty(general_options.log_file):
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
new file mode 100644
index 0000000..5f83b7b
--- /dev/null
+++ b/blobxfer/operations/upload.py
@@ -0,0 +1,681 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import enum
+import logging
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
+try:
+    import queue
+except ImportError:  # noqa
+    import Queue as queue
+import threading
+# non-stdlib imports
+# local imports
+import blobxfer.models.crypto
+import blobxfer.operations.azure.blob
+import blobxfer.operations.azure.file
+import blobxfer.operations.crypto
+import blobxfer.operations.md5
+import blobxfer.operations.progress
+import blobxfer.operations.resume
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+
+
+class UploadAction(enum.Enum):
+    Skip = 1
+    CheckMd5 = 2
+    Upload = 3
+
+
+class Uploader(object):
+    """Uploader"""
+    def __init__(self, general_options, creds, spec):
+        # type: (Uploader, blobxfer.models.options.General,
+        #        blobxfer.operations.azure.StorageCredentials,
+        #        blobxfer.models.upload.Specification) -> None
+        """Ctor for Uploader
+        :param Uploader self: this
+        :param blobxfer.models.options.General general_options: general opts
+        :param blobxfer.operations.azure.StorageCredentials creds: creds
+        :param blobxfer.models.uplaod.Specification spec: upload spec
+        """
+        self._all_remote_files_processed = False
+        self._crypto_offload = None
+        self._md5_meta_lock = threading.Lock()
+        self._md5_map = {}
+        self._md5_offload = None
+        self._upload_lock = threading.Lock()
+        self._upload_queue = queue.Queue()
+        self._upload_set = set()
+        self._upload_start_time = None
+        self._upload_threads = []
+        self._upload_total = None
+        self._upload_sofar = 0
+        self._upload_bytes_total = None
+        self._upload_bytes_sofar = 0
+        self._upload_terminate = False
+        self._start_time = None
+        self._delete_after = set()
+        self._ud_map = {}
+        self._containers_created = set()
+        self._fileshare_dir_lock = threading.Lock()
+        self._dirs_created = {}
+        self._general_options = general_options
+        self._creds = creds
+        self._spec = spec
+        self._resume = None
+        self._exceptions = []
+
+    @property
+    def termination_check(self):
+        # type: (Uploader) -> bool
+        """Check if terminated
+        :param Uploader self: this
+        :rtype: bool
+        :return: if terminated
+        """
+        with self._upload_lock:
+            return (self._upload_terminate or
+                    len(self._exceptions) > 0 or
+                    (self._all_remote_files_processed and
+                     len(self._upload_set) == 0))
+
+    @property
+    def termination_check_md5(self):
+        # type: (Uploader) -> bool
+        """Check if terminated from MD5 context
+        :param Uploader self: this
+        :rtype: bool
+        :return: if terminated from MD5 context
+        """
+        with self._md5_meta_lock:
+            with self._upload_lock:
+                return (self._upload_terminate or
+                        (self._all_remote_files_processed and
+                         len(self._md5_map) == 0 and
+                         len(self._upload_set) == 0))
+
+    def _update_progress_bar(self):
+        # type: (Uploader) -> None
+        """Update progress bar
+        :param Uploader self: this
+        """
+        blobxfer.operations.progress.update_progress_bar(
+            self._general_options,
+            'upload',
+            self._upload_start_time,
+            self._upload_total,
+            self._upload_sofar,
+            self._upload_bytes_total,
+            self._upload_bytes_sofar,
+        )
+
+    def _pre_md5_skip_on_check(self, src, rfile):
+        # type: (Uploader, blobxfer.models.upload.LocalPath,
+        #        blobxfer.models.azure.StorageEntity) -> None
+        """Perform pre MD5 skip on check
+        :param Uploader self: this
+        :param blobxfer.models.upload.LocalPath src: local path
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
+        """
+        # if encryption metadata is present, check for pre-encryption
+        # md5 in blobxfer extensions
+        md5 = None
+        if rfile.encryption_metadata is not None:
+            md5 = rfile.encryption_metadata.blobxfer_extensions.\
+                pre_encrypted_content_md5
+        if md5 is None:
+            md5 = rfile.md5
+        slpath = str(src.absolute_path)
+        with self._md5_meta_lock:
+            self._md5_map[slpath] = (src, rfile)
+        self._md5_offload.add_localfile_for_md5_check(slpath, md5, rfile.mode)
+
+    def _post_md5_skip_on_check(self, filename, md5_match):
+        # type: (Uploader, str, bool) -> None
+        """Perform post MD5 skip on check
+        :param Uploader self: this
+        :param str filename: local filename
+        :param bool md5_match: if MD5 matches
+        """
+        uid = self._create_unique_id(src, rfile)
+        with self._md5_meta_lock:
+            src, rfile = self._md5_map.pop(filename)
+        if md5_match:
+            with self._upload_lock:
+                self._upload_set.remove(uid)
+                self._upload_total -= 1
+                self._upload_bytes_total -= src.size
+        else:
+            self._add_to_upload_queue(src, rfile, uid)
+
+    def _check_for_uploads_from_md5(self):
+        # type: (Uploader) -> None
+        """Check queue for a file to upload
+        :param Uploader self: this
+        """
+        cv = self._md5_offload.done_cv
+        while not self.termination_check_md5:
+            result = None
+            cv.acquire()
+            while True:
+                result = self._md5_offload.pop_done_queue()
+                if result is None:
+                    # use cv timeout due to possible non-wake while running
+                    cv.wait(1)
+                    # check for terminating conditions
+                    if self.termination_check_md5:
+                        break
+                else:
+                    break
+            cv.release()
+            if result is not None:
+                self._post_md5_skip_on_check(result[0], result[1])
+
+    def _check_for_crypto_done(self):
+        # type: (Uploader) -> None
+        """Check queue for crypto done
+        :param Uploader self: this
+        """
+        cv = self._crypto_offload.done_cv
+        while not self.termination_check:
+            result = None
+            cv.acquire()
+            while True:
+                result = self._crypto_offload.pop_done_queue()
+                if result is None:
+                    # use cv timeout due to possible non-wake while running
+                    cv.wait(1)
+                    # check for terminating conditions
+                    if self.termination_check:
+                        break
+                else:
+                    break
+            cv.release()
+            if result is not None:
+                try:
+                    with self._upload_lock:
+                        dd = self._ud_map[result]
+                    dd.perform_chunked_integrity_check()
+                except KeyError:
+                    # this can happen if all of the last integrity
+                    # chunks are processed at once
+                    pass
+
+    def _add_to_upload_queue(self, src, rfile, uid):
+        # type: (Uploader, blobxfer.models.upload.LocalPath,
+        #        blobxfer.models.azure.StorageEntity, str) -> None
+        """Add remote file to download queue
+        :param Uploader self: this
+        :param blobxfer.models.upload.LocalPath src: local path
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
+        :param str uid: unique id
+        """
+        # prepare local file for upload
+        ud = blobxfer.models.upload.Descriptor(
+            src, rfile, uid, self._spec.options, self._resume)
+        if ud.entity.is_encrypted:
+            with self._upload_lock:
+                self._ud_map[uid] = ud
+        # add download descriptor to queue
+        self._upload_queue.put(ud)
+        if self._upload_start_time is None:
+            with self._upload_lock:
+                if self._upload_start_time is None:
+                    self._upload_start_time = blobxfer.util.datetime_now()
+
+    def _initialize_upload_threads(self):
+        # type: (Uploader) -> None
+        """Initialize upload threads
+        :param Uploader self: this
+        """
+        logger.debug('spawning {} transfer threads'.format(
+            self._general_options.concurrency.transfer_threads))
+        for _ in range(self._general_options.concurrency.transfer_threads):
+            thr = threading.Thread(target=self._worker_thread_upload)
+            self._upload_threads.append(thr)
+            thr.start()
+
+    def _wait_for_upload_threads(self, terminate):
+        # type: (Uploader, bool) -> None
+        """Wait for upload threads
+        :param Uploader self: this
+        :param bool terminate: terminate threads
+        """
+        if terminate:
+            self._upload_terminate = terminate
+        for thr in self._upload_threads:
+            thr.join()
+
+    def _worker_thread_upload(self):
+        # type: (Uploader) -> None
+        """Worker thread upload
+        :param Uploader self: this
+        """
+        while not self.termination_check:
+            try:
+                ud = self._upload_queue.get(False, 0.25)
+            except queue.Empty:
+                continue
+            try:
+                self._process_upload_descriptor(ud)
+            except Exception as e:
+                with self._upload_lock:
+                    self._exceptions.append(e)
+
+    def _put_data(self, ud, offsets):
+        if ud.entity.mode == blobxfer.models.azure.StorageModes.File:
+            if offsets.chunk_num == 0:
+                # create container if necessary
+                blobxfer.operations.azure.file.create_share(
+                    ud.entity, self._containers_created,
+                    self._general_options.timeout_sec)
+                # create parent directories
+                with self._fileshare_dir_lock:
+                    blobxfer.operations.azure.file.\
+                        create_all_parent_directories(
+                            ud.entity, self._dirs_created,
+                            self._general_options.timeout_sec)
+                # create remote file
+                blobxfer.operations.azure.file.create_file(
+                    ud.entity, self._general_options.timeout_sec)
+            # upload chunk
+            blobxfer.operations.azure.file.put_file_range(
+                ud.entity, ud.local_path.absolute_path, offsets,
+                self._general_options.timeout_sec)
+
+        else:
+            # TODO all upload types
+            data = blobxfer.operations.azure.blob.get_blob_range(
+                dd.entity, offsets, self._general_options.timeout_sec)
+
+    def _process_upload_descriptor(self, ud):
+        # type: (Uploader, blobxfer.models.upload.Descriptor) -> None
+        """Process upload descriptor
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor: upload descriptor
+        """
+        # update progress bar
+        self._update_progress_bar()
+        # get download offsets
+        offsets, resume_bytes = ud.next_offsets()
+        # add resume bytes to counter
+        if resume_bytes is not None:
+            with self._upload_lock:
+                self._upload_bytes_sofar += resume_bytes
+                logger.debug('adding {} sofar {} from {}'.format(
+                    resume_bytes, self._upload_bytes_sofar, ud._ase.name))
+            del resume_bytes
+        print(offsets)
+        # check if all operations completed
+        if offsets is None and ud.all_operations_completed:
+            # finalize file
+            ud.finalize_file()
+            # accounting
+            with self._upload_lock:
+                if ud.entity.is_encrypted:
+                    self._ud_map.pop(ud.unique_id)
+                self._upload_set.remove(ud.unique_id)
+                self._upload_sofar += 1
+            return
+        # re-enqueue for other threads to download
+        self._upload_queue.put(ud)
+        if offsets is None:
+            return
+        # encrypt if necessary
+        if ud.entity.is_encrypted:
+            # send iv through hmac
+            ud.hmac_iv(ud.current_iv)
+            # encrypt data
+            if self._crypto_offload is not None:
+                self._crypto_offload.add_encrypt_chunk(
+                    str(ud.local_path.absolute_path), offsets,
+                    ud.entity.encryption_metadata.symmetric_key,
+                    ud.current_iv)
+                # encrypted data will be retrieved from a temp file once
+                # retrieved from crypto queue
+                return
+            else:
+                # TODO pickup here, read data from file
+
+                encdata = blobxfer.operations.crypto.aes_cbc_decrypt_data(
+                    ud.entity.encryption_metadata.symmetric_key,
+                    ud.current_iv, data, offsets.pad)
+                # send encrypted data through hmac
+
+        # TODO send data as optional param if encrypted
+        # issue put range
+        self._put_data(ud, offsets)
+        # accounting
+        with self._upload_lock:
+            self._upload_bytes_sofar += offsets.num_bytes
+
+    def _cleanup_temporary_files(self):
+        # type: (Uploader) -> None
+        """Cleanup temporary files in case of an exception or interrupt.
+        This function is not thread-safe.
+        :param Uploader self: this
+        """
+        # iterate through dd map and cleanup files
+        for key in self._ud_map:
+            dd = self._ud_map[key]
+            try:
+                dd.cleanup_all_temporary_files()
+            except Exception as e:
+                logger.exception(e)
+
+    def _delete_extraneous_files(self):
+        # type: (Uploader) -> None
+        """Delete extraneous files cataloged
+        :param Uploader self: this
+        """
+        logger.info('attempting to delete {} extraneous files'.format(
+            len(self._delete_after)))
+        for file in self._delete_after:
+            try:
+                file.unlink()
+            except OSError:
+                pass
+
+    def _check_upload_conditions(self, lpath, rfile):
+        # type: (Uploader, pathlib.Path,
+        #        blobxfer.models.azure.StorageEntity) -> UploadAction
+        """Check for upload conditions
+        :param Uploader self: this
+        :param pathlib.Path lpath: local path
+        :param blobxfer.models.azure.StorageEntity rfile: remote file
+        :rtype: UploadAction
+        :return: upload action
+        """
+        # check if file still exists
+        if not lpath.exists():
+            return UploadAction.Skip
+        # if remote file doesn't exist, upload
+        if rfile is None:
+            return UploadAction.Upload
+        # check overwrite option
+        if not self._spec.options.overwrite:
+            logger.info(
+                'not overwriting remote file: {} (local: {})'.format(
+                    rfile.path, lpath))
+            return UploadAction.Skip
+        # check skip on options, MD5 match takes priority
+        if (self._spec.skip_on.md5_match and
+                blobxfer.util.is_not_empty(rfile.md5)):
+            return UploadAction.CheckMd5
+        # if neither of the remaining skip on actions are activated, upload
+        if (not self._spec.skip_on.filesize_match and
+                not self._spec.skip_on.lmt_ge):
+            return UploadAction.Upload
+        # check skip on file size match
+        ul_fs = None
+        if self._spec.skip_on.filesize_match:
+            lsize = lpath.stat().st_size
+            if rfile.mode == blobxfer.models.azure.StorageModes.Page:
+                lsize = blobxfer.util.page_align_content_length(lsize)
+            if rfile.size == lsize:
+                ul_fs = False
+            else:
+                ul_fs = True
+        # check skip on lmt ge
+        ul_lmt = None
+        if self._spec.skip_on.lmt_ge:
+            mtime = blobxfer.util.datetime_from_timestamp(
+                lpath.stat().st_mtime)
+            if rfile.lmt >= mtime:
+                ul_lmt = False
+            else:
+                ul_lmt = True
+        # upload if either skip on mismatch is True
+        if ul_fs or ul_lmt:
+            return UploadAction.Upload
+        else:
+            return UploadAction.Skip
+
+    def _generate_entity_for_source(self, local_path):
+        # type: (Uploader, blobxfer.models.upload.LocalSourcePath) -> ???
+        """Generate entities for source path
+        :param Uploader self: this
+        :param blobxfer.models.upload.LocalSourcePath local_path: local path
+        """
+        # construct stripped destination path
+        spath = local_path.relative_path
+        if self._spec.options.strip_components > 0:
+            _rparts = local_path.relative_path.parts
+            _strip = min(
+                (len(_rparts) - 1, self._spec.options.strip_components)
+            )
+            if _strip > 0:
+                spath = pathlib.Path(*_rparts[_strip:])
+        # for each destination:
+        # 1. prepend non-container path
+        # 2. bind client from mode
+        # 3. perform get blob or file properties
+        for dst in self._spec.destinations:
+            for dpath in dst.paths:
+                sdpath = str(dpath)
+                cont, dir = blobxfer.util.explode_azure_path(sdpath)
+                # apply rename
+                if self._spec.options.rename:
+                    name = dir
+                else:
+                    name = str(spath / dir)
+                if blobxfer.util.is_none_or_empty(name):
+                    raise ValueError(
+                        'must specify a container for destination: {}'.format(
+                            dpath))
+                # apply strip components
+                print(cont, name)
+                sa = self._creds.get_storage_account(
+                    dst.lookup_storage_account(sdpath))
+                if (self._spec.options.mode ==
+                        blobxfer.models.azure.StorageModes.File):
+                    fp = blobxfer.operations.azure.file.get_file_properties(
+                        sa.file_client, cont, name,
+                        timeout=self._general_options.timeout_sec)
+                else:
+                    fp = blobxfer.operations.azure.blob.get_blob_properties(
+                        sa.block_blob_client, cont, name,
+                        self._spec.options.mode,
+                        timeout=self._general_options.timeout_sec)
+                if fp is not None:
+                    if blobxfer.models.crypto.EncryptionMetadata.\
+                            encryption_metadata_exists(fp.metadata):
+                        ed = blobxfer.models.crypto.EncryptionMetadata()
+                        ed.convert_from_json(fp.metadata, fp.name, None)
+                    else:
+                        ed = None
+                    ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                    if (self._spec.options.mode ==
+                            blobxfer.models.azure.StorageModes.File):
+                        ase.populate_from_file(sa, fp)
+                    else:
+                        ase.populate_from_blob(sa, fp)
+                else:
+                    ase = None
+                lpath = local_path.parent_path / local_path.relative_path
+                action = self._check_upload_conditions(lpath, ase)
+                if ase is None:
+                    if self._spec.options.rsa_public_key:
+                        ed = blobxfer.models.crypto.EncryptionMetadata()
+                    else:
+                        ed = None
+                    ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                    ase.populate_from_local(
+                        sa, cont, name, self._spec.options.mode)
+                yield action, ase
+
+    def _create_unique_id(self, src, ase):
+        return ';'.join(
+            (str(src.absolute_path), ase._client.account_name, ase.path)
+        )
+
+    def _run(self):
+        # type: (Uploader) -> None
+        """Execute Uploader
+        :param Uploader self: this
+        """
+        # mark start
+        self._start_time = blobxfer.util.datetime_now()
+        logger.info('blobxfer start time: {0}'.format(self._start_time))
+        # initialize resume db if specified
+#         if self._general_options.resume_file is not None:
+#             self._resume = blobxfer.operations.resume.DownloadResumeManager(
+#                 self._general_options.resume_file)
+        # initialize MD5 processes
+        if ((self._spec.options.store_file_properties.md5 or
+             self._spec.skip_on.md5_match) and
+                self._general_options.concurrency.md5_processes > 0):
+            self._md5_offload = blobxfer.operations.md5.LocalFileMd5Offload(
+                num_workers=self._general_options.concurrency.md5_processes)
+            self._md5_offload.initialize_check_thread(
+                self._check_for_uploads_from_md5)
+        # initialize crypto processes
+        if self._general_options.concurrency.crypto_processes > 0:
+            self._crypto_offload = blobxfer.operations.crypto.CryptoOffload(
+                num_workers=self._general_options.concurrency.crypto_processes)
+            self._crypto_offload.initialize_check_thread(
+                self._check_for_crypto_done)
+        # initialize upload threads
+        self._initialize_upload_threads()
+        # initialize local counters
+        nfiles = 0
+        total_size = 0
+        skipped_files = 0
+        skipped_size = 0
+        if not self._spec.sources.can_rename() and self._spec.options.rename:
+            raise RuntimeError(
+                'cannot rename to specified destination with multiple sources')
+        # iterate through source paths to upload
+        for sfile in self._spec.sources.files():
+            # create associated storage entity (destination) for file
+            for action, ase in self._generate_entity_for_source(sfile):
+                print(sfile.parent_path, sfile.relative_path, sfile.absolute_path, action, ase.container, ase.name)
+                print(sfile.size, sfile.mode, sfile.uid, sfile.gid)
+                print(self._create_unique_id(sfile, ase))
+                if action == UploadAction.Skip:
+                    skipped_files += 1
+                    skipped_size += ase.size if ase.size is not None else 0
+                    continue
+                # add to potential upload set
+                uid = self._create_unique_id(sfile, ase)
+                with self._upload_lock:
+                    self._upload_set.add(uid)
+                if action == UploadAction.CheckMd5:
+                    self._pre_md5_skip_on_check(sfile, ase)
+                elif action == UploadAction.Upload:
+                    self._add_to_upload_queue(sfile, ase, uid)
+
+                    nfiles += 1
+                    total_size += sfile.size
+
+        self._upload_total = nfiles - skipped_files
+        self._upload_bytes_total = total_size - skipped_size
+        upload_size_mib = self._upload_bytes_total / blobxfer.util.MEGABYTE
+        # set remote files processed
+        with self._md5_meta_lock:
+            self._all_remote_files_processed = True
+        logger.debug(
+            ('{0} remote files processed, waiting for upload completion '
+             'of {1:.4f} MiB').format(nfiles, upload_size_mib))
+        del nfiles
+        del total_size
+        del skipped_files
+        del skipped_size
+        # wait for downloads to complete
+        self._wait_for_upload_threads(terminate=False)
+        end_time = blobxfer.util.datetime_now()
+        # update progress bar
+        self._update_progress_bar()
+        # check for exceptions
+        if len(self._exceptions) > 0:
+            logger.error('exceptions encountered while downloading')
+            # raise the first one
+            raise self._exceptions[0]
+        # check for mismatches
+        if (self._upload_sofar != self._upload_total or
+                self._upload_bytes_sofar != self._upload_bytes_total):
+            raise RuntimeError(
+                'download mismatch: [count={}/{} bytes={}/{}]'.format(
+                    self._upload_sofar, self._upload_total,
+                    self._upload_bytes_sofar, self._upload_bytes_total))
+        # delete all remaining local files not accounted for if
+        # delete extraneous enabled
+        self._delete_extraneous_files()
+        # delete resume file if we've gotten this far
+        if self._resume is not None:
+            self._resume.delete()
+        # output throughput
+        if self._upload_start_time is not None:
+            dltime = (end_time - self._upload_start_time).total_seconds()
+            logger.info(
+                ('elapsed download + verify time and throughput: {0:.3f} sec, '
+                 '{1:.4f} Mbps').format(
+                     dltime, download_size_mib * 8 / dltime))
+        end_time = blobxfer.util.datetime_now()
+        logger.info('blobxfer end time: {0} (elapsed: {1:.3f} sec)'.format(
+            end_time, (end_time - self._start_time).total_seconds()))
+
+    def start(self):
+        # type: (Uploader) -> None
+        """Start the Uploader
+        :param Uploader self: this
+        """
+        try:
+            blobxfer.operations.progress.output_parameters(
+                self._general_options, self._spec)
+            self._run()
+        except (KeyboardInterrupt, Exception) as ex:
+            if isinstance(ex, KeyboardInterrupt):
+                logger.info(
+                    'KeyboardInterrupt detected, force terminating '
+                    'processes and threads (this may take a while)...')
+            try:
+                self._wait_for_upload_threads(terminate=True)
+            finally:
+                self._cleanup_temporary_files()
+            raise
+        finally:
+            # shutdown processes
+            if self._md5_offload is not None:
+                self._md5_offload.finalize_processes()
+            if self._crypto_offload is not None:
+                self._crypto_offload.finalize_processes()
+            # close resume file
+            if self._resume is not None:
+                self._resume.close()
diff --git a/cli/cli.py b/cli/cli.py
index d27efc8..fdb9c0f 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -368,6 +368,7 @@ def callback(ctx, param, value):
     return click.option(
         '--file-attributes',
         expose_value=False,
+        default=False,
         is_flag=False,
         help='Store or restore file attributes [False]',
         callback=callback)(f)
@@ -412,6 +413,20 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _multi_storage_account_distribution_mode(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options[
+            'multi_storage_account_distribution_mode'] = value.lower()
+        return value
+    return click.option(
+        '--multi-storage-account-distribution-mode',
+        expose_value=False,
+        default='disabled',
+        help='Multiple storage account distribution mode [stripe]',
+        callback=callback)(f)
+
+
 def _overwrite_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -438,6 +453,20 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _rename_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['rename'] = value
+        return value
+    return click.option(
+        '--rename',
+        expose_value=False,
+        is_flag=True,
+        default=False,
+        help='Rename a single file upload or download [False]',
+        callback=callback)(f)
+
+
 def _rsa_private_key_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -572,7 +601,22 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _stripe_chunk_size_bytes_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['stripe_chunk_size_bytes'] = value
+        return value
+    return click.option(
+        '--stripe-chunk-size-bytes',
+        expose_value=False,
+        type=int,
+        default=1073741824,
+        help='Stripe width in bytes [1073741824]',
+        callback=callback)(f)
+
+
 def upload_options(f):
+    f = _stripe_chunk_size_bytes_option(f)
     f = _strip_components_option(f)
     f = _skip_on_md5_match_option(f)
     f = _skip_on_lmt_ge_option(f)
@@ -581,8 +625,10 @@ def upload_options(f):
     f = _rsa_public_key_option(f)
     f = _rsa_private_key_passphrase_option(f)
     f = _rsa_private_key_option(f)
+    f = _rename_option(f)
     f = _recursive_option(f)
     f = _overwrite_option(f)
+    f = _multi_storage_account_distribution_mode(f)
     f = _mode_option(f)
     f = _include_option(f)
     f = _file_md5_option(f)
@@ -602,6 +648,7 @@ def download_options(f):
     f = _sas_option(f)
     f = _rsa_private_key_passphrase_option(f)
     f = _rsa_private_key_option(f)
+    f = _rename_option(f)
     f = _recursive_option(f)
     f = _overwrite_option(f)
     f = _mode_option(f)
@@ -702,7 +749,11 @@ def upload(ctx, local_resource, storage_account, remote_path):
         ctx.cli_options, settings.TransferAction.Upload, local_resource,
         storage_account, remote_path)
     ctx.initialize()
-    blobxfer.api.upload_block()
+    specs = settings.create_upload_specifications(ctx.config)
+    for spec in specs:
+        blobxfer.api.Uploader(
+            ctx.general_options, ctx.credentials, spec
+        ).start()
 
 
 @cli.group()
diff --git a/cli/settings.py b/cli/settings.py
index 088a4f4..6dc4f72 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -36,6 +36,7 @@
 import blobxfer.models.azure
 import blobxfer.models.download
 import blobxfer.models.options
+import blobxfer.models.upload
 import blobxfer.operations.azure
 import blobxfer.operations.crypto
 import blobxfer.util
@@ -97,6 +98,7 @@ def add_cli_options(
                     'mode': cli_options['mode'],
                     'overwrite': cli_options['overwrite'],
                     'recursive': cli_options['recursive'],
+                    'rename': cli_options['rename'],
                     'rsa_private_key': cli_options['rsa_private_key'],
                     'rsa_private_key_passphrase': cli_options[
                         'rsa_private_key_passphrase'],
@@ -107,9 +109,17 @@ def add_cli_options(
                         'lmt_ge': cli_options['skip_on_lmt_ge'],
                         'md5_match': cli_options['skip_on_md5_match'],
                     },
-                    'store_file_attributes': cli_options['file_attributes'],
-                    'store_file_md5': cli_options['file_md5'],
+                    'store_file_properties': {
+                        'attributes': cli_options['file_attributes'],
+                        'md5': cli_options['file_md5'],
+                    },
                     'strip_components': cli_options['strip_components'],
+                    'vectored_io': {
+                        'stripe_chunk_size_bytes': cli_options[
+                            'stripe_chunk_size_bytes'],
+                        'multi_storage_account_distribution_mode': cli_options[
+                            'multi_storage_account_distribution_mode'],
+                    },
                 },
             }
         elif action == TransferAction.Download:
@@ -125,6 +135,7 @@ def add_cli_options(
                     'mode': cli_options['mode'],
                     'overwrite': cli_options['overwrite'],
                     'recursive': cli_options['recursive'],
+                    'rename': cli_options['rename'],
                     'rsa_private_key': cli_options['rsa_private_key'],
                     'rsa_private_key_passphrase': cli_options[
                         'rsa_private_key_passphrase'],
@@ -287,11 +298,6 @@ def create_download_specifications(config):
                 rpk, rpkp)
         else:
             rpk = None
-        # ensure compatible options
-        if (not conf['options']['check_file_md5'] and
-                conf['options']['skip_on']['md5_match']):
-            raise ValueError(
-                'Cannot specify skip on MD5 match without file MD5 enabled')
         # create specification
         ds = blobxfer.models.download.Specification(
             download_options=blobxfer.models.options.Download(
@@ -302,6 +308,7 @@ def create_download_specifications(config):
                 mode=mode,
                 overwrite=conf['options']['overwrite'],
                 recursive=conf['options']['recursive'],
+                rename=conf['options']['rename'],
                 restore_file_attributes=conf[
                     'options']['restore_file_attributes'],
                 rsa_private_key=rpk,
@@ -329,5 +336,98 @@ def create_download_specifications(config):
             if blobxfer.util.is_not_empty(conf['exclude']):
                 asp.add_excludes(conf['exclude'])
             ds.add_azure_source_path(asp)
+        # append spec to list
         specs.append(ds)
     return specs
+
+
+def create_upload_specifications(config):
+    # type: (dict) -> List[blobxfer.models.upload.Specification]
+    """Create a list of Upload Specification objects from configuration
+    :param dict config: config dict
+    :rtype: list
+    :return: list of Upload Specification objects
+    """
+    specs = []
+    for conf in config['upload']:
+        # create upload options
+        confmode = conf['options']['mode'].lower()
+        if confmode == 'auto':
+            mode = blobxfer.models.azure.StorageModes.Auto
+        elif confmode == 'append':
+            mode = blobxfer.models.azure.StorageModes.Append
+        elif confmode == 'block':
+            mode = blobxfer.models.azure.StorageModes.Block
+        elif confmode == 'file':
+            mode = blobxfer.models.azure.StorageModes.File
+        elif confmode == 'page':
+            mode = blobxfer.models.azure.StorageModes.Page
+        else:
+            raise ValueError('unknown mode: {}'.format(confmode))
+        # load RSA public key PEM if specified
+        rpk = conf['options']['rsa_public_key']
+        if blobxfer.util.is_not_empty(rpk):
+            rpk = blobxfer.operations.crypto.load_rsa_public_key_file(rpk)
+        if rpk is None:
+            # load RSA private key PEM file if specified
+            rpk = conf['options']['rsa_private_key']
+            if blobxfer.util.is_not_empty(rpk):
+                rpkp = conf['options']['rsa_private_key_passphrase']
+                rpk = blobxfer.operations.crypto.load_rsa_private_key_file(
+                    rpk, rpkp)
+                rpk = rpk.public_key()
+            else:
+                rpk = None
+        # create local source paths
+        lsp = blobxfer.models.upload.LocalSourcePath()
+        lsp.add_paths(conf['source'])
+        if blobxfer.util.is_not_empty(conf['include']):
+            lsp.add_includes(conf['include'])
+        if blobxfer.util.is_not_empty(conf['exclude']):
+            lsp.add_excludes(conf['exclude'])
+        # create specification
+        us = blobxfer.models.upload.Specification(
+            upload_options=blobxfer.models.options.Upload(
+                chunk_size_bytes=conf['options']['chunk_size_bytes'],
+                delete_extraneous_destination=conf[
+                    'options']['delete_extraneous_destination'],
+                mode=mode,
+                overwrite=conf['options']['overwrite'],
+                recursive=conf['options']['recursive'],
+                rename=conf['options']['rename'],
+                rsa_public_key=rpk,
+                store_file_properties=blobxfer.models.options.FileProperties(
+                    attributes=conf[
+                        'options']['store_file_properties']['attributes'],
+                    md5=conf['options']['store_file_properties']['md5'],
+                ),
+                strip_components=conf['options']['strip_components'],
+                vectored_io=blobxfer.models.options.VectoredIo(
+                    stripe_chunk_size_bytes=conf[
+                        'options']['vectored_io']['stripe_chunk_size_bytes'],
+                    multi_storage_account_distribution_mode=blobxfer.
+                    models.upload.VectoredIoDistributionMode(
+                        conf['options']['vectored_io'][
+                            'multi_storage_account_distribution_mode'].lower(
+                            )),
+                ),
+            ),
+            skip_on_options=blobxfer.models.options.SkipOn(
+                filesize_match=conf['options']['skip_on']['filesize_match'],
+                lmt_ge=conf['options']['skip_on']['lmt_ge'],
+                md5_match=conf['options']['skip_on']['md5_match'],
+            ),
+            local_source_path=lsp,
+        )
+        # create remote destination paths
+        for dst in conf['destination']:
+            if len(dst) != 1:
+                raise RuntimeError(
+                    'invalid number of destination pairs specified per entry')
+            sa = next(iter(dst))
+            adp = blobxfer.operations.azure.DestinationPath()
+            adp.add_path_with_storage_account(dst[sa], sa)
+            us.add_azure_destination_path(adp)
+        # append spec to list
+        specs.append(us)
+    return specs

From 52c504ad6c53b2e13133be320963d49fe1d678c7 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 18 May 2017 08:18:14 -0700
Subject: [PATCH 26/47] Fix filter check on download

- Retry interval shortened
- Allow multiple include/exclude filters to be specified on the CLI
---
 blobxfer/models/__init__.py           |  8 ++++----
 blobxfer/models/upload.py             | 11 +++++------
 blobxfer/operations/azure/__init__.py | 18 ++++--------------
 blobxfer/retry.py                     |  4 ++--
 cli/cli.py                            |  2 ++
 5 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/blobxfer/models/__init__.py b/blobxfer/models/__init__.py
index 82ac224..b9e9fbc 100644
--- a/blobxfer/models/__init__.py
+++ b/blobxfer/models/__init__.py
@@ -67,7 +67,7 @@ def add_include(self, incl):
         :param str incl: include filter
         """
         if self._include is None:
-            self._include = [incl]
+            self._include = list(incl)
         else:
             self._include.append(incl)
 
@@ -78,7 +78,7 @@ def add_includes(self, includes):
         :param list includes: list of includes
         """
         if not isinstance(includes, list):
-            raise ValueError('includes is not of type list')
+            includes = list(includes)
         if self._include is None:
             self._include = includes
         else:
@@ -91,7 +91,7 @@ def add_exclude(self, excl):
         :param str excl: exclude filter
         """
         if self._exclude is None:
-            self._exclude = [excl]
+            self._exclude = list(excl)
         else:
             self._exclude.append(excl)
 
@@ -102,7 +102,7 @@ def add_excludes(self, excludes):
         :param list excludes: list of excludes
         """
         if not isinstance(excludes, list):
-            raise ValueError('excludes is not of type list')
+            excludes = list(excludes)
         if self._exclude is None:
             self._exclude = excludes
         else:
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index cbe1816..8d8a09d 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -123,17 +123,16 @@ def files(self):
             # check if path is a single file
             tmp = pathlib.Path(_ppath)
             if tmp.is_file():
-                yield LocalPath(
-                    parent_path=tmp.parent,
-                    relative_path=pathlib.Path(tmp.name)
-                )
+                if self._inclusion_check(tmp.name):
+                    yield LocalPath(
+                        parent_path=tmp.parent,
+                        relative_path=pathlib.Path(tmp.name)
+                    )
                 continue
             del tmp
             for entry in blobxfer.util.scantree(_ppath):
                 _rpath = pathlib.Path(entry.path).relative_to(_ppath)
                 if not self._inclusion_check(_rpath):
-                    logger.debug(
-                        'skipping file {} due to filters'.format(_rpath))
                     continue
                 yield LocalPath(parent_path=_expath, relative_path=_rpath)
 
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 8581c9a..8664ac8 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -261,6 +261,8 @@ def _populate_from_list_files(self, creds, options, general_options):
             for file in blobxfer.operations.azure.file.list_files(
                     sa.file_client, cont, dir, options.recursive,
                     general_options.timeout_sec):
+                if not self._inclusion_check(file.name):
+                    continue
                 if blobxfer.models.crypto.EncryptionMetadata.\
                         encryption_metadata_exists(file.metadata):
                     ed = blobxfer.models.crypto.EncryptionMetadata()
@@ -291,6 +293,8 @@ def _populate_from_list_blobs(self, creds, options, general_options):
             for blob in blobxfer.operations.azure.blob.list_blobs(
                     sa.block_blob_client, cont, dir, options.mode,
                     options.recursive, general_options.timeout_sec):
+                if not self._inclusion_check(blob.name):
+                    continue
                 if blobxfer.models.crypto.EncryptionMetadata.\
                         encryption_metadata_exists(blob.metadata):
                     ed = blobxfer.models.crypto.EncryptionMetadata()
@@ -336,17 +340,3 @@ def lookup_storage_account(self, remote_path):
         :return: storage account associated with path
         """
         return self._path_map[blobxfer.util.normalize_azure_path(remote_path)]
-
-    # TODO IS THIS NEEDED?
-    def generate_entities_for_mode(self, creds, options):
-        for _path in self._paths:
-            rpath = str(_path)
-            cont, dir = blobxfer.util.explode_azure_path(rpath)
-            sa = creds.get_storage_account(self.lookup_storage_account(rpath))
-
-            if options.rsa_public_key is not None:
-                ed = blobxfer.models.crypto.EncryptionMetadata()
-            else:
-                ed = None
-            ase = blobxfer.models.azure.StorageEntity(cont, ed)
-            ase.populate_from_blob(sa, blob)
diff --git a/blobxfer/retry.py b/blobxfer/retry.py
index ce42bd4..892b25c 100644
--- a/blobxfer/retry.py
+++ b/blobxfer/retry.py
@@ -37,7 +37,7 @@
 
 class ExponentialRetryWithMaxWait(azure.storage.retry._Retry):
     """Exponential Retry with Max Wait (infinite retries)"""
-    def __init__(self, initial_backoff=1, max_backoff=8, reset_at_max=True):
+    def __init__(self, initial_backoff=0.1, max_backoff=2, reset_at_max=True):
         # type: (ExponentialRetryWithMaxWait, int, int, bool) -> None
         """Ctor for ExponentialRetryWithMaxWait
         :param ExponentialRetryWithMaxWait self: this
@@ -78,7 +78,7 @@ def _backoff(self, context):
         if context.count == 1:
             backoff = self.initial_backoff
         else:
-            backoff = self.initial_backoff << (context.count - 1)
+            backoff = self.initial_backoff * (context.count - 1)
         if backoff > self.max_backoff and self.reset_at_max:
             backoff = self.initial_backoff
             context.count = 1
diff --git a/cli/cli.py b/cli/cli.py
index fdb9c0f..f7a5f53 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -356,6 +356,7 @@ def callback(ctx, param, value):
         '--exclude',
         expose_value=False,
         default=None,
+        multiple=True,
         help='Exclude pattern',
         callback=callback)(f)
 
@@ -396,6 +397,7 @@ def callback(ctx, param, value):
         '--include',
         expose_value=False,
         default=None,
+        multiple=True,
         help='Include pattern',
         callback=callback)(f)
 

From 28b9bc9df57efc0ebb72d5d23795060b00e6c89b Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 19 May 2017 15:01:49 -0700
Subject: [PATCH 27/47] Continue upload work

- Add one-shot parameter
- Begin accommodating vectorized output
---
 blobxfer/models/azure.py      |   1 +
 blobxfer/models/options.py    |   3 +-
 blobxfer/models/upload.py     |  83 +++++++++++-----
 blobxfer/operations/upload.py | 182 +++++++++++++++++++++++++---------
 cli/cli.py                    |  59 +++++++----
 cli/settings.py               |  10 +-
 6 files changed, 239 insertions(+), 99 deletions(-)

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index fbc319f..b2b34b8 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -66,6 +66,7 @@ def __init__(self, container, ed=None):
         self._md5 = None
         self._encryption = ed
         self._vio = None
+        self.replica_targets = None
 
     @property
     def client(self):
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index 9a3af9f..14e4e09 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -48,7 +48,7 @@
 VectoredIo = collections.namedtuple(
     'VectoredIoOptions', [
         'stripe_chunk_size_bytes',
-        'multi_storage_account_distribution_mode',
+        'distribution_mode',
     ]
 )
 SkipOn = collections.namedtuple(
@@ -69,6 +69,7 @@
         'chunk_size_bytes',
         'delete_extraneous_destination',
         'mode',
+        'one_shot_bytes',
         'overwrite',
         'recursive',
         'rename',
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 8d8a09d..34584dd 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -49,8 +49,9 @@
 # create logger
 logger = logging.getLogger(__name__)
 # global defines
-_MAX_BLOCK_CHUNKSIZE_BYTES = 268435456
-_MAX_NONBLOCK_CHUNKSIZE_BYTES = 4194304
+_MAX_BLOCK_BLOB_ONESHOT_BYTES = 268435456
+_MAX_BLOCK_BLOB_CHUNKSIZE_BYTES = 268435456
+_MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES = 4194304
 
 
 # named tuples
@@ -65,6 +66,13 @@
         'pad',
     ]
 )
+LocalPathView = collections.namedtuple(
+    'LocalPathView', [
+        'fd_end',
+        'fd_start',
+        'slice_num',
+    ]
+)
 
 
 class VectoredIoDistributionMode(enum.Enum):
@@ -77,11 +85,19 @@ def __str__(self):
 
 
 class LocalPath(object):
-    def __init__(self, parent_path, relative_path):
+    def __init__(self, parent_path, relative_path, view=None):
         self.parent_path = parent_path
         self.relative_path = relative_path
         # populate properties
         self._stat = self.absolute_path.stat()
+        if view is None:
+            self.view = LocalPathView(
+                fd_start=0,
+                fd_end=self.size,
+                slice_num=0,
+            )
+        else:
+            self.view = view
 
     @property
     def absolute_path(self):
@@ -91,6 +107,10 @@ def absolute_path(self):
     def size(self):
         return self._stat.st_size
 
+    @property
+    def lmt(self):
+        return self._stat.st_mtime
+
     @property
     def mode(self):
         return str(oct(self._stat.st_mode))
@@ -164,21 +184,22 @@ def __init__(
             if self.sources.paths[0].is_dir():
                 raise ValueError(
                     'cannot rename a directory of files to upload')
-        if (self.options.rsa_public_key and
-                self.options.vectored_io.
-                multi_storage_account_distribution_mode ==
-                VectoredIoDistributionMode.Stripe):
-            raise ValueError(
-                'cannot enable encryption and multi-storage account '
-                'vectored IO in stripe mode')
         if self.options.chunk_size_bytes <= 0:
             raise ValueError('chunk size must be positive')
-        if self.options.chunk_size_bytes > _MAX_BLOCK_CHUNKSIZE_BYTES:
+        if self.options.chunk_size_bytes > _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
             raise ValueError(
                 ('chunk size value of {} exceeds maximum allowable '
                  'of {}').format(
                      self.options.chunk_size_bytes,
-                     _MAX_BLOCK_CHUNKSIZE_BYTES))
+                     _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES))
+        if self.options.one_shot_bytes < 0:
+            raise ValueError('one shot bytes value must be at least 0')
+        if self.options.one_shot_bytes > _MAX_BLOCK_BLOB_ONESHOT_BYTES:
+            raise ValueError(
+                ('one shot bytes value of {} exceeds maximum allowable '
+                 'of {}').format(
+                     self.options.chunk_size_bytes,
+                     _MAX_BLOCK_BLOB_ONESHOT_BYTES))
 
     def add_azure_destination_path(self, dest):
         # type: (Specification,
@@ -267,13 +288,24 @@ def all_operations_completed(self):
     @property
     def is_resumable(self):
         # type: (Descriptor) -> bool
-        """Download is resume capable
+        """Upload is resume capable
         :param Descriptor self: this
         :rtype: bool
         :return: if resumable
         """
         return self._resume_mgr is not None and self.hmac is None
 
+    @property
+    def one_shot(self):
+        # type: (Descriptor) -> bool
+        """Upload is a one-shot block upload
+        :param Descriptor self: this
+        :rtype: bool
+        :return: is one-shot capable
+        """
+        return (self._ase.mode == blobxfer.models.azure.StorageModes.Block and
+                self._total_chunks == 1)
+
     def hmac_iv(self, iv):
         # type: (Descriptor, bytes) -> None
         """Send IV through hasher
@@ -327,26 +359,29 @@ def _adjust_chunk_size(self, options):
         self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
         # ensure chunk sizes are compatible with mode
         if self._ase.mode == blobxfer.models.azure.StorageModes.Append:
-            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
-                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+            if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
                     'adjusting chunk size to {} for append blobs'.format(
                         self._chunk_size))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.Block:
-            if self._chunk_size > _MAX_BLOCK_CHUNKSIZE_BYTES:
-                self._chunk_size = _MAX_BLOCK_CHUNKSIZE_BYTES
-                logger.debug(
-                    'adjusting chunk size to {} for block blobs'.format(
-                        self._chunk_size))
+            if self._ase.size <= options.one_shot_bytes:
+                self._chunk_size = options.one_shot_bytes
+            else:
+                if self._chunk_size > _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
+                    self._chunk_size = _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES
+                    logger.debug(
+                        'adjusting chunk size to {} for block blobs'.format(
+                            self._chunk_size))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.File:
-            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
-                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+            if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
                     'adjusting chunk size to {} for files'.format(
                         self._chunk_size))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.Page:
-            if self._chunk_size > _MAX_NONBLOCK_CHUNKSIZE_BYTES:
-                self._chunk_size = _MAX_NONBLOCK_CHUNKSIZE_BYTES
+            if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
+                self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
                     'adjusting chunk size to {} for page blobs'.format(
                         self._chunk_size))
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 5f83b7b..67d29e2 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -32,6 +32,7 @@
 # stdlib imports
 import enum
 import logging
+import math
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
@@ -318,9 +319,9 @@ def _put_data(self, ud, offsets):
             blobxfer.operations.azure.file.put_file_range(
                 ud.entity, ud.local_path.absolute_path, offsets,
                 self._general_options.timeout_sec)
-
         else:
             # TODO all upload types
+            # TODO handle one-shot uploads for block blobs
             data = blobxfer.operations.azure.blob.get_blob_range(
                 dd.entity, offsets, self._general_options.timeout_sec)
 
@@ -353,7 +354,7 @@ def _process_upload_descriptor(self, ud):
                 self._upload_set.remove(ud.unique_id)
                 self._upload_sofar += 1
             return
-        # re-enqueue for other threads to download
+        # re-enqueue for other threads to upload
         self._upload_queue.put(ud)
         if offsets is None:
             return
@@ -412,17 +413,18 @@ def _delete_extraneous_files(self):
             except OSError:
                 pass
 
-    def _check_upload_conditions(self, lpath, rfile):
-        # type: (Uploader, pathlib.Path,
+    def _check_upload_conditions(self, local_path, rfile):
+        # type: (Uploader, blobxfer.models.upload.LocalPath,
         #        blobxfer.models.azure.StorageEntity) -> UploadAction
         """Check for upload conditions
         :param Uploader self: this
-        :param pathlib.Path lpath: local path
+        :param blobxfer.models.LocalPath local_path: local path
         :param blobxfer.models.azure.StorageEntity rfile: remote file
         :rtype: UploadAction
         :return: upload action
         """
-        # check if file still exists
+        lpath = local_path.absolute_path
+        # check if local file still exists
         if not lpath.exists():
             return UploadAction.Skip
         # if remote file doesn't exist, upload
@@ -445,7 +447,7 @@ def _check_upload_conditions(self, lpath, rfile):
         # check skip on file size match
         ul_fs = None
         if self._spec.skip_on.filesize_match:
-            lsize = lpath.stat().st_size
+            lsize = local_path.size
             if rfile.mode == blobxfer.models.azure.StorageModes.Page:
                 lsize = blobxfer.util.page_align_content_length(lsize)
             if rfile.size == lsize:
@@ -455,8 +457,7 @@ def _check_upload_conditions(self, lpath, rfile):
         # check skip on lmt ge
         ul_lmt = None
         if self._spec.skip_on.lmt_ge:
-            mtime = blobxfer.util.datetime_from_timestamp(
-                lpath.stat().st_mtime)
+            mtime = blobxfer.util.datetime_from_timestamp(local_path.lmt)
             if rfile.lmt >= mtime:
                 ul_lmt = False
             else:
@@ -467,7 +468,33 @@ def _check_upload_conditions(self, lpath, rfile):
         else:
             return UploadAction.Skip
 
-    def _generate_entity_for_source(self, local_path):
+    def _check_for_existing_remote(self, sa, cont, name):
+        if self._spec.options.mode == blobxfer.models.azure.StorageModes.File:
+            fp = blobxfer.operations.azure.file.get_file_properties(
+                sa.file_client, cont, name,
+                timeout=self._general_options.timeout_sec)
+        else:
+            fp = blobxfer.operations.azure.blob.get_blob_properties(
+                sa.block_blob_client, cont, name, self._spec.options.mode,
+                timeout=self._general_options.timeout_sec)
+        if fp is not None:
+            if blobxfer.models.crypto.EncryptionMetadata.\
+                    encryption_metadata_exists(fp.metadata):
+                ed = blobxfer.models.crypto.EncryptionMetadata()
+                ed.convert_from_json(fp.metadata, fp.name, None)
+            else:
+                ed = None
+            ase = blobxfer.models.azure.StorageEntity(cont, ed)
+            if (self._spec.options.mode ==
+                    blobxfer.models.azure.StorageModes.File):
+                ase.populate_from_file(sa, fp)
+            else:
+                ase.populate_from_blob(sa, fp)
+        else:
+            ase = None
+        return ase
+
+    def _generate_destination_for_source(self, local_path):
         # type: (Uploader, blobxfer.models.upload.LocalSourcePath) -> ???
         """Generate entities for source path
         :param Uploader self: this
@@ -500,36 +527,16 @@ def _generate_entity_for_source(self, local_path):
                         'must specify a container for destination: {}'.format(
                             dpath))
                 # apply strip components
-                print(cont, name)
                 sa = self._creds.get_storage_account(
                     dst.lookup_storage_account(sdpath))
-                if (self._spec.options.mode ==
-                        blobxfer.models.azure.StorageModes.File):
-                    fp = blobxfer.operations.azure.file.get_file_properties(
-                        sa.file_client, cont, name,
-                        timeout=self._general_options.timeout_sec)
-                else:
-                    fp = blobxfer.operations.azure.blob.get_blob_properties(
-                        sa.block_blob_client, cont, name,
-                        self._spec.options.mode,
-                        timeout=self._general_options.timeout_sec)
-                if fp is not None:
-                    if blobxfer.models.crypto.EncryptionMetadata.\
-                            encryption_metadata_exists(fp.metadata):
-                        ed = blobxfer.models.crypto.EncryptionMetadata()
-                        ed.convert_from_json(fp.metadata, fp.name, None)
-                    else:
-                        ed = None
-                    ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                    if (self._spec.options.mode ==
-                            blobxfer.models.azure.StorageModes.File):
-                        ase.populate_from_file(sa, fp)
-                    else:
-                        ase.populate_from_blob(sa, fp)
-                else:
+                # do not check for existing remote right now if striped
+                # vectored io mode
+                if (self._spec.options.vectored_io.distribution_mode ==
+                        blobxfer.models.upload.
+                        VectoredIoDistributionMode.Stripe):
                     ase = None
-                lpath = local_path.parent_path / local_path.relative_path
-                action = self._check_upload_conditions(lpath, ase)
+                else:
+                    ase = self._check_for_existing_remote(sa, cont, name)
                 if ase is None:
                     if self._spec.options.rsa_public_key:
                         ed = blobxfer.models.crypto.EncryptionMetadata()
@@ -538,13 +545,87 @@ def _generate_entity_for_source(self, local_path):
                     ase = blobxfer.models.azure.StorageEntity(cont, ed)
                     ase.populate_from_local(
                         sa, cont, name, self._spec.options.mode)
-                yield action, ase
+                yield sa, ase
 
     def _create_unique_id(self, src, ase):
         return ';'.join(
             (str(src.absolute_path), ase._client.account_name, ase.path)
         )
 
+    def append_slice_suffix_to_name(self, name, slice):
+        return '{}.bxslice-{}'.format(name, slice)
+
+    def _vectorize_and_bind(self, local_path, dest):
+        # type: (Uploader, blobxfer.models.upload.LocalPath,
+        #        List[blobxfer.models.azure.StorageEntity]) -> None
+        """Vectorize local path to destinations and bind
+        :param Uploader self: this
+        :param blobxfer.models.LocalPath local_path: local path
+        :param list rfile: remote file
+        """
+        if (self._spec.options.vectored_io.distribution_mode ==
+                blobxfer.models.upload.VectoredIoDistributionMode.Stripe):
+            num_dest = len(dest)
+            # compute total number of slices
+            slices = int(math.ceil(
+                local_path.size /
+                self._spec.options.vectored_io.stripe_chunk_size_bytes))
+            logger.debug(
+                '{} slices for vectored out of {} to {} destinations'.format(
+                    slices, local_path.absolute_path, num_dest))
+            # create new local path to ase mappings
+            curr = 0
+            slice = 0
+            for i in range(0, slices):
+                start = curr
+                end = (
+                    curr +
+                    self._spec.options.vectored_io.stripe_chunk_size_bytes
+                )
+                if end > local_path.size:
+                    end = local_path.size
+                sa, ase = dest[i % num_dest]
+                name = self.append_slice_suffix_to_name(ase.name, slice)
+                ase = self._check_for_existing_remote(sa, ase.container, name)
+                lp_slice = blobxfer.models.upload.LocalPath(
+                    parent_path=local_path.parent_path,
+                    relative_path=local_path.relative_path,
+                    view=blobxfer.models.upload.LocalPathView(
+                        fd_start=start,
+                        fd_end=end,
+                        slice_num=slice,
+                    )
+                )
+                action = self._check_upload_conditions(lp_slice, ase)
+                yield action, lp_slice, ase
+                start += curr
+                slice += 1
+        elif (self._spec.options.vectored_io.distribution_mode ==
+                blobxfer.models.upload.VectoredIoDistributionMode.Replica):
+            action_map = {}
+            for _, ase in dest:
+                action = self._check_upload_conditions(local_path, ase)
+                if action not in action_map:
+                    action_map[action] = []
+                action_map[action].append(ase)
+            for action in action_map:
+                dst = action_map[action]
+                if len(dst) == 1:
+                    yield action, local_path, dst[0]
+                else:
+                    if (action == UploadAction.CheckMd5 or
+                            action == UploadAction.Skip):
+                        for ase in dst:
+                            yield action, local_path, ase
+                    else:
+                        primary_ase = dst[0]
+                        primary_ase.replica_targets.extend(dst[1:])
+                        yield action, local_path, primary_ase
+        else:
+            for _, ase in dest:
+                action = self._check_upload_conditions(local_path, ase)
+                yield action, local_path, ase
+
     def _run(self):
         # type: (Uploader) -> None
         """Execute Uploader
@@ -582,27 +663,32 @@ def _run(self):
             raise RuntimeError(
                 'cannot rename to specified destination with multiple sources')
         # iterate through source paths to upload
-        for sfile in self._spec.sources.files():
-            # create associated storage entity (destination) for file
-            for action, ase in self._generate_entity_for_source(sfile):
-                print(sfile.parent_path, sfile.relative_path, sfile.absolute_path, action, ase.container, ase.name)
-                print(sfile.size, sfile.mode, sfile.uid, sfile.gid)
-                print(self._create_unique_id(sfile, ase))
+        for src in self._spec.sources.files():
+            # create a destination array for the source
+            dest = [
+                (sa, ase) for sa, ase in
+                self._generate_destination_for_source(src)
+            ]
+            for action, lp, ase in self._vectorize_and_bind(src, dest):
+                print(lp.parent_path, lp.relative_path, lp.absolute_path, action, ase.container, ase.name)
+                print(lp.size, lp.mode, lp.uid, lp.gid)
+                print(self._create_unique_id(lp, ase))
+                print('replicas', len(ase.replica_targets) if ase.replica_targets is not None else 'none')
                 if action == UploadAction.Skip:
                     skipped_files += 1
                     skipped_size += ase.size if ase.size is not None else 0
                     continue
                 # add to potential upload set
-                uid = self._create_unique_id(sfile, ase)
+                uid = self._create_unique_id(lp, ase)
                 with self._upload_lock:
                     self._upload_set.add(uid)
                 if action == UploadAction.CheckMd5:
-                    self._pre_md5_skip_on_check(sfile, ase)
+                    self._pre_md5_skip_on_check(lp, ase)
                 elif action == UploadAction.Upload:
-                    self._add_to_upload_queue(sfile, ase, uid)
+                    self._add_to_upload_queue(lp, ase, uid)
 
                     nfiles += 1
-                    total_size += sfile.size
+                    total_size += lp.size
 
         self._upload_total = nfiles - skipped_files
         self._upload_bytes_total = total_size - skipped_size
diff --git a/cli/cli.py b/cli/cli.py
index f7a5f53..bc11ef6 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -334,6 +334,20 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _distribution_mode(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['distribution_mode'] = value.lower()
+        return value
+    return click.option(
+        '--distribution-mode',
+        expose_value=False,
+        default='disabled',
+        help='Vectored IO distribution mode: disabled, replica, '
+        'stripe [disabled]',
+        callback=callback)(f)
+
+
 def _endpoint_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -415,17 +429,19 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
-def _multi_storage_account_distribution_mode(f):
+def _one_shot_bytes_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
-        clictx.cli_options[
-            'multi_storage_account_distribution_mode'] = value.lower()
+        clictx.cli_options['one_shot_bytes'] = value
         return value
     return click.option(
-        '--multi-storage-account-distribution-mode',
+        '--one-shot-bytes',
         expose_value=False,
-        default='disabled',
-        help='Multiple storage account distribution mode [stripe]',
+        type=int,
+        default=0,
+        help='File sizes less than or equal to the specified byte threshold '
+        'will be uploaded as one-shot for block blobs; the valid range that '
+        'can be specified is 0 to 256MiB [0]',
         callback=callback)(f)
 
 
@@ -577,6 +593,20 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
+def _stripe_chunk_size_bytes_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['stripe_chunk_size_bytes'] = value
+        return value
+    return click.option(
+        '--stripe-chunk-size-bytes',
+        expose_value=False,
+        type=int,
+        default=1073741824,
+        help='Vectored IO stripe width in bytes [1073741824]',
+        callback=callback)(f)
+
+
 def _sync_copy_dest_access_key_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -603,20 +633,6 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
-def _stripe_chunk_size_bytes_option(f):
-    def callback(ctx, param, value):
-        clictx = ctx.ensure_object(CliContext)
-        clictx.cli_options['stripe_chunk_size_bytes'] = value
-        return value
-    return click.option(
-        '--stripe-chunk-size-bytes',
-        expose_value=False,
-        type=int,
-        default=1073741824,
-        help='Stripe width in bytes [1073741824]',
-        callback=callback)(f)
-
-
 def upload_options(f):
     f = _stripe_chunk_size_bytes_option(f)
     f = _strip_components_option(f)
@@ -630,13 +646,14 @@ def upload_options(f):
     f = _rename_option(f)
     f = _recursive_option(f)
     f = _overwrite_option(f)
-    f = _multi_storage_account_distribution_mode(f)
+    f = _one_shot_bytes_option(f)
     f = _mode_option(f)
     f = _include_option(f)
     f = _file_md5_option(f)
     f = _file_attributes(f)
     f = _exclude_option(f)
     f = _endpoint_option(f)
+    f = _distribution_mode(f)
     f = _delete_option(f)
     f = _chunk_size_bytes_option(f)
     f = _access_key_option(f)
diff --git a/cli/settings.py b/cli/settings.py
index 6dc4f72..5834c12 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -96,6 +96,7 @@ def add_cli_options(
                     'chunk_size_bytes': cli_options['chunk_size_bytes'],
                     'delete_extraneous_destination': cli_options['delete'],
                     'mode': cli_options['mode'],
+                    'one_shot_bytes': cli_options['one_shot_bytes'],
                     'overwrite': cli_options['overwrite'],
                     'recursive': cli_options['recursive'],
                     'rename': cli_options['rename'],
@@ -117,8 +118,7 @@ def add_cli_options(
                     'vectored_io': {
                         'stripe_chunk_size_bytes': cli_options[
                             'stripe_chunk_size_bytes'],
-                        'multi_storage_account_distribution_mode': cli_options[
-                            'multi_storage_account_distribution_mode'],
+                        'distribution_mode': cli_options['distribution_mode'],
                     },
                 },
             }
@@ -392,6 +392,7 @@ def create_upload_specifications(config):
                 delete_extraneous_destination=conf[
                     'options']['delete_extraneous_destination'],
                 mode=mode,
+                one_shot_bytes=conf['options']['one_shot_bytes'],
                 overwrite=conf['options']['overwrite'],
                 recursive=conf['options']['recursive'],
                 rename=conf['options']['rename'],
@@ -405,11 +406,10 @@ def create_upload_specifications(config):
                 vectored_io=blobxfer.models.options.VectoredIo(
                     stripe_chunk_size_bytes=conf[
                         'options']['vectored_io']['stripe_chunk_size_bytes'],
-                    multi_storage_account_distribution_mode=blobxfer.
+                    distribution_mode=blobxfer.
                     models.upload.VectoredIoDistributionMode(
                         conf['options']['vectored_io'][
-                            'multi_storage_account_distribution_mode'].lower(
-                            )),
+                            'distribution_mode'].lower()),
                 ),
             ),
             skip_on_options=blobxfer.models.options.SkipOn(

From 813080ad5532fbe38356836b44c94cf3444f789b Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Sun, 21 May 2017 20:06:20 -0700
Subject: [PATCH 28/47] Basic file and page upload support

---
 blobxfer/models/upload.py                  |  80 ++++++--
 blobxfer/operations/azure/blob/__init__.py |  18 ++
 blobxfer/operations/azure/blob/append.py   |  15 ++
 blobxfer/operations/azure/blob/block.py    |  19 +-
 blobxfer/operations/azure/blob/page.py     |  53 ++++++
 blobxfer/operations/azure/file.py          |  34 +++-
 blobxfer/operations/md5.py                 |  22 +++
 blobxfer/operations/upload.py              | 212 ++++++++++++++++-----
 8 files changed, 380 insertions(+), 73 deletions(-)

diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 34584dd..a0ee766 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -248,6 +248,8 @@ def __init__(self, lpath, ase, uid, options, resume_mgr):
         self._adjust_chunk_size(options)
         self._total_chunks = self._compute_total_chunks(self._chunk_size)
         self._outstanding_ops = self._total_chunks
+        if blobxfer.util.is_not_empty(self._ase.replica_targets):
+            self._outstanding_ops *= len(self._ase.replica_targets)
         # initialize integrity checkers
         self.hmac = None
         self.md5 = None
@@ -282,8 +284,7 @@ def all_operations_completed(self):
         :return: if all operations completed
         """
         with self._meta_lock:
-            return (self._outstanding_ops == 0 and
-                    len(self._unchecked_chunks) == 0)
+            return self._outstanding_ops == 0
 
     @property
     def is_resumable(self):
@@ -296,24 +297,51 @@ def is_resumable(self):
         return self._resume_mgr is not None and self.hmac is None
 
     @property
-    def one_shot(self):
+    def requires_put_block_list(self):
         # type: (Descriptor) -> bool
-        """Upload is a one-shot block upload
+        """Requires a put block list operation to finalize
         :param Descriptor self: this
         :rtype: bool
-        :return: is one-shot capable
+        :return: if finalize requires a put block list
         """
         return (self._ase.mode == blobxfer.models.azure.StorageModes.Block and
-                self._total_chunks == 1)
+                self._total_chunks > 1)
 
-    def hmac_iv(self, iv):
+    @property
+    def requires_set_blob_properties_md5(self):
+        # type: (Descriptor) -> bool
+        """Requires a set file properties for md5 to finalize
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if finalize requires a put file properties
+        """
+        return (not self.entity.is_encrypted and self.must_compute_md5 and
+                self.entity.mode == blobxfer.models.azure.StorageModes.Page)
+
+    @property
+    def requires_set_file_properties_md5(self):
+        # type: (Descriptor) -> bool
+        """Requires a set file properties for md5 to finalize
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if finalize requires a put file properties
+        """
+        return (not self.entity.is_encrypted and self.must_compute_md5 and
+                self.entity.mode == blobxfer.models.azure.StorageModes.File)
+
+    def complete_offset_upload(self):
+        with self._meta_lock:
+            self._outstanding_ops -= 1
+        # TODO save resume state
+
+    def hmac_data(self, data):
         # type: (Descriptor, bytes) -> None
-        """Send IV through hasher
+        """Send data through hmac hasher
         :param Descriptor self: this
-        :param bytes iv: iv
+        :param bytes data: data
         """
         with self._hasher_lock:
-            self.hmac.update(iv)
+            self.hmac.update(data)
 
     def _initialize_encryption(self, options):
         # type: (Descriptor, blobxfer.models.options.Upload) -> None
@@ -321,7 +349,10 @@ def _initialize_encryption(self, options):
         :param Descriptor self: this
         :param blobxfer.models.options.Upload options: upload options
         """
-        if options.rsa_public_key is not None:
+        # TODO support append blobs?
+        if (options.rsa_public_key is not None and
+                (self._ase.mode == blobxfer.models.azure.StorageModes.Block or
+                 self._ase.mode == blobxfer.models.azure.StorageModes.File)):
             em = blobxfer.models.crypto.EncryptionMetadata()
             em.create_new_metadata(options.rsa_public_key)
             self.current_iv = em.content_encryption_iv
@@ -413,7 +444,8 @@ def _initialize_integrity_checkers(self, options):
                     'symmetric key is invalid: provide RSA private key '
                     'or metadata corrupt')
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
-        if self.hmac is None and options.store_file_properties.md5:
+        # both hmac and md5 can be enabled
+        if options.store_file_properties.md5:
             self.md5 = blobxfer.util.new_md5_hasher()
 
     def next_offsets(self):
@@ -424,13 +456,13 @@ def next_offsets(self):
         :return: upload offsets
         """
         # TODO RESUME
-#         resume_bytes = self._resume()
         resume_bytes = None
+#         resume_bytes = self._resume()
         with self._meta_lock:
-#             if self._offset >= self._ase.size:
-#                 return None, resume_bytes
-            if self._offset + self._chunk_size > self._ase.size:
-                chunk = self._ase.size - self._offset
+            if self._offset >= self.local_path.view.fd_end:
+                return None, resume_bytes
+            if self._offset + self._chunk_size > self.local_path.view.fd_end:
+                chunk = self.local_path.view.fd_end - self._offset
             else:
                 chunk = self._chunk_size
             num_bytes = chunk
@@ -440,7 +472,8 @@ def next_offsets(self):
             range_end = self._offset + num_bytes - 1
             self._offset += chunk
             self._chunk_num += 1
-            if self._ase.is_encrypted and self._offset >= self._ase.size:
+            if (self._ase.is_encrypted and
+                    self._offset >= self.local_path.view.fd_end):
                 pad = True
             else:
                 pad = False
@@ -453,3 +486,14 @@ def next_offsets(self):
                 range_end=range_end,
                 pad=pad,
             ), resume_bytes
+
+    def read_data(self, offsets):
+        # compute start from view
+        start = self.local_path.view.fd_start + offsets.range_start
+        with self.local_path.absolute_path.open('rb') as fd:
+            fd.seek(start, 0)
+            data = fd.read(offsets.num_bytes)
+        if self.must_compute_md5:
+            with self._hasher_lock:
+                self.md5.update(data)
+        return data
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index dbe8263..40b40c4 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -158,3 +158,21 @@ def get_blob_range(ase, offsets, timeout=None):
         validate_content=False,  # HTTPS takes care of integrity during xfer
         timeout=timeout,
     ).content
+
+
+def create_container(ase, containers_created, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Create blob container
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict containers_created: containers already created map
+    :param int timeout: timeout
+    """
+    key = ase.client.account_name + ':blob=' + ase.container
+    if key not in containers_created:
+        ase.client.create_container(
+            container_name=ase.container,
+            fail_on_exist=False,
+            timeout=timeout)
+        containers_created.add(key)
+        logger.info('created blob container {} on storage account {}'.format(
+            ase.container, ase.client.account_name))
diff --git a/blobxfer/operations/azure/blob/append.py b/blobxfer/operations/azure/blob/append.py
index 087e33b..e28fcdb 100644
--- a/blobxfer/operations/azure/blob/append.py
+++ b/blobxfer/operations/azure/blob/append.py
@@ -62,3 +62,18 @@ def create_client(storage_account):
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
+
+
+def create_blob(ase, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, int) -> None
+    """Create append blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int timeout: timeout
+    """
+    ase.client.create_blob(
+        container_name=ase.container,
+        blob_name=ase.name,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name)
+        ),
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/blob/block.py b/blobxfer/operations/azure/blob/block.py
index 00f7eb3..9318a76 100644
--- a/blobxfer/operations/azure/blob/block.py
+++ b/blobxfer/operations/azure/blob/block.py
@@ -64,6 +64,19 @@ def create_client(storage_account):
     return client
 
 
-def upload_block():
-    logger.info('upload block')
-    print('upload')
+def create_blob(ase, data, md5, encmeta, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, int) -> None
+    """Create one shot block blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int timeout: timeout
+    """
+    if encmeta is not None:
+        raise NotImplementedError()
+    ase.client._put_blob(
+        container_name=ase.container,
+        blob_name=ase.name,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name),
+            content_md5=md5,
+        ),
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 05d36b6..5601557 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -62,3 +62,56 @@ def create_client(storage_account):
     # set retry policy
     client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry
     return client
+
+
+def create_blob(ase, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, int) -> None
+    """Create page blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int timeout: timeout
+    """
+    ase.client.create_blob(
+        container_name=ase.container,
+        blob_name=ase.name,
+        content_length=ase.size,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name)
+        ),
+        timeout=timeout)
+
+
+def put_page(ase, page_start, page_end, data, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity,
+    #        int, int, bytes, int) -> None
+    """Puts a page into remote blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int page_start: page range start
+    :param int page_end: page range end
+    :param bytes data: data
+    :param int timeout: timeout
+    """
+    ase.client.update_page(
+        container_name=ase.container,
+        blob_name=ase.name,
+        page=data,
+        start_range=page_start,
+        end_range=page_end,
+        validate_content=False,  # integrity is enforced with HTTPS
+        timeout=timeout)
+
+
+def set_blob_md5(ase, md5, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, str, int) -> None
+    """Set blob properties MD5
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param str md5: md5 as base64
+    :param int timeout: timeout
+    """
+    ase.client.set_blob_properties(
+        container_name=ase.container,
+        blob_name=ase.name,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name),
+            content_md5=md5,
+        ),
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index 7a4e076..4970970 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -250,23 +250,22 @@ def create_file(ase, timeout=None):
         directory_name=dir,
         file_name=fpath,
         content_length=ase.size,
-        content_settings=None,
+        content_settings=azure.storage.file.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(fpath)
+        ),
         timeout=timeout)
 
 
-def put_file_range(ase, local_file, offsets, timeout=None):
-    # type: (blobxfer.models.azure.StorageEntity, pathlib.path,
-    #        blobxfer.models.upload.Offsets, int) -> None
+def put_file_range(ase, offsets, data, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity,
+    #        blobxfer.models.upload.Offsets, bytes, int) -> None
     """Puts a range of bytes into the remote file
     :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
-    :param pathlib.Path local_file: local file
     :param blobxfer.models.upload.Offsets offsets: upload offsets
+    :param bytes data: data
     :param int timeout: timeout
     """
     dir, fpath = parse_file_path(ase.name)
-    with local_file.open('rb') as fd:
-        fd.seek(offsets.range_start, 0)
-        data = fd.read(offsets.num_bytes)
     ase.client.update_range(
         share_name=ase.container,
         directory_name=dir,
@@ -276,3 +275,22 @@ def put_file_range(ase, local_file, offsets, timeout=None):
         end_range=offsets.range_end,
         validate_content=False,  # integrity is enforced with HTTPS
         timeout=timeout)
+
+
+def set_file_md5(ase, md5, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, str, int) -> None
+    """Set file properties MD5
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param str md5: md5 as base64
+    :param int timeout: timeout
+    """
+    dir, fpath = parse_file_path(ase.name)
+    ase.client.set_file_properties(
+        share_name=ase.container,
+        directory_name=dir,
+        file_name=fpath,
+        content_settings=azure.storage.file.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(fpath),
+            content_md5=md5,
+        ),
+        timeout=timeout)
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
index e04daec..7239a2d 100644
--- a/blobxfer/operations/md5.py
+++ b/blobxfer/operations/md5.py
@@ -42,6 +42,9 @@
 
 # create logger
 logger = logging.getLogger(__name__)
+# global defines
+_EMPTY_MAX_PAGE_SIZE_MD5 = 'tc+p1sj+vWGPkawoQ9UKHA=='
+_MAX_PAGE_SIZE_BYTES = 4194304
 
 
 def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
@@ -80,6 +83,25 @@ def compute_md5_for_data_asbase64(data):
     return blobxfer.util.base64_encode_as_string(hasher.digest())
 
 
+def check_data_is_empty(data):
+    # type: (bytes) -> bool
+    """Check if data is empty via MD5
+    :param bytes data: data to check
+    :rtype: bool
+    :return: if data is empty
+    """
+    contentmd5 = compute_md5_for_data_asbase64(data)
+    datalen = len(data)
+    if datalen == _MAX_PAGE_SIZE_BYTES:
+        if contentmd5 == _EMPTY_MAX_PAGE_SIZE_MD5:
+            return True
+    else:
+        data_chk = b'\0' * datalen
+        if compute_md5_for_data_asbase64(data_chk) == contentmd5:
+            return True
+    return False
+
+
 class LocalFileMd5Offload(blobxfer.models.offload._MultiprocessOffload):
     """LocalFileMd5Offload"""
     def __init__(self, num_workers):
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 67d29e2..985a154 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -90,6 +90,10 @@ def __init__(self, general_options, creds, spec):
         self._upload_bytes_total = None
         self._upload_bytes_sofar = 0
         self._upload_terminate = False
+        self._transfer_lock = threading.Lock()
+        self._transfer_queue = queue.Queue()
+        self._transfer_set = set()
+        self._transfer_threads = []
         self._start_time = None
         self._delete_after = set()
         self._ud_map = {}
@@ -111,10 +115,12 @@ def termination_check(self):
         :return: if terminated
         """
         with self._upload_lock:
-            return (self._upload_terminate or
-                    len(self._exceptions) > 0 or
-                    (self._all_remote_files_processed and
-                     len(self._upload_set) == 0))
+            with self._transfer_lock:
+                return (self._upload_terminate or
+                        len(self._exceptions) > 0 or
+                        (self._all_remote_files_processed and
+                         len(self._upload_set) == 0 and
+                         len(self._transfer_set) == 0))
 
     @property
     def termination_check_md5(self):
@@ -272,6 +278,18 @@ def _initialize_upload_threads(self):
             self._upload_threads.append(thr)
             thr.start()
 
+    def _initialize_transfer_threads(self):
+        # type: (Uploader) -> None
+        """Initialize transfer threads
+        :param Uploader self: this
+        """
+        logger.debug('spawning {} transfer threads'.format(
+            self._general_options.concurrency.transfer_threads))
+        for _ in range(self._general_options.concurrency.transfer_threads):
+            thr = threading.Thread(target=self._worker_thread_transfer)
+            self._transfer_threads.append(thr)
+            thr.start()
+
     def _wait_for_upload_threads(self, terminate):
         # type: (Uploader, bool) -> None
         """Wait for upload threads
@@ -283,47 +301,100 @@ def _wait_for_upload_threads(self, terminate):
         for thr in self._upload_threads:
             thr.join()
 
-    def _worker_thread_upload(self):
+    def _worker_thread_transfer(self):
         # type: (Uploader) -> None
-        """Worker thread upload
+        """Worker thread transfer
         :param Uploader self: this
         """
         while not self.termination_check:
             try:
-                ud = self._upload_queue.get(False, 0.25)
+                ud, ase, offsets, data = self._transfer_queue.get(
+                    block=False, timeout=0.03)
             except queue.Empty:
                 continue
             try:
-                self._process_upload_descriptor(ud)
+                self._process_transfer(ud, ase, offsets, data)
             except Exception as e:
                 with self._upload_lock:
                     self._exceptions.append(e)
 
-    def _put_data(self, ud, offsets):
-        if ud.entity.mode == blobxfer.models.azure.StorageModes.File:
+    def _process_transfer(self, ud, ase, offsets, data):
+        # issue put range
+        self._put_data(ase, offsets, data)
+        # accounting
+        with self._transfer_lock:
+            self._transfer_set.remove(
+                self._create_unique_transfer_id(ud.local_path, ase, offsets))
+            self._upload_bytes_sofar += offsets.num_bytes
+        ud.complete_offset_upload()
+
+    def _put_data(self, ase, offsets, data):
+        print('UL', offsets)
+        if ase.mode == blobxfer.models.azure.StorageModes.File:
             if offsets.chunk_num == 0:
                 # create container if necessary
                 blobxfer.operations.azure.file.create_share(
-                    ud.entity, self._containers_created,
-                    self._general_options.timeout_sec)
+                    ase, self._containers_created,
+                    timeout=self._general_options.timeout_sec)
                 # create parent directories
                 with self._fileshare_dir_lock:
                     blobxfer.operations.azure.file.\
                         create_all_parent_directories(
-                            ud.entity, self._dirs_created,
-                            self._general_options.timeout_sec)
+                            ase, self._dirs_created,
+                            timeout=self._general_options.timeout_sec)
                 # create remote file
                 blobxfer.operations.azure.file.create_file(
-                    ud.entity, self._general_options.timeout_sec)
-            # upload chunk
+                    ase, timeout=self._general_options.timeout_sec)
+            # upload range
             blobxfer.operations.azure.file.put_file_range(
-                ud.entity, ud.local_path.absolute_path, offsets,
-                self._general_options.timeout_sec)
-        else:
-            # TODO all upload types
-            # TODO handle one-shot uploads for block blobs
-            data = blobxfer.operations.azure.blob.get_blob_range(
-                dd.entity, offsets, self._general_options.timeout_sec)
+                ase, offsets, data, timeout=self._general_options.timeout_sec)
+        elif ase.mode == blobxfer.models.azure.StorageModes.Append:
+            raise NotImplementedError()
+        elif ase.mode == blobxfer.models.azure.StorageModes.Block:
+            # TODO handle one-shot uploads for block blobs (get md5 as well)
+            raise NotImplementedError()
+        elif ase.mode == blobxfer.models.azure.StorageModes.Page:
+            if offsets.chunk_num == 0:
+                # create container if necessary
+                blobxfer.operations.azure.blob.create_container(
+                    ase, self._containers_created,
+                    timeout=self._general_options.timeout_sec)
+                # create remote blob
+                blobxfer.operations.azure.blob.page.create_blob(
+                    ase, timeout=self._general_options.timeout_sec)
+            # align page
+            aligned = blobxfer.util.page_align_content_length(
+                offsets.num_bytes)
+            if aligned != offsets.num_bytes:
+                data = data.ljust(aligned, b'\0')
+            if blobxfer.operations.md5.check_data_is_empty(data):
+                return
+            # upload page
+            blobxfer.operations.azure.blob.page.put_page(
+                ase, offsets.range_start, offsets.range_start + aligned - 1,
+                data, timeout=self._general_options.timeout_sec)
+
+    def _worker_thread_upload(self):
+        # type: (Uploader) -> None
+        """Worker thread upload
+        :param Uploader self: this
+        """
+        import time
+        while not self.termination_check:
+            try:
+                if (len(self._transfer_set) >
+                        self._general_options.concurrency.transfer_threads):
+                    time.sleep(0.03)
+                    continue
+                else:
+                    ud = self._upload_queue.get(False, 0.03)
+            except queue.Empty:
+                continue
+            try:
+                self._process_upload_descriptor(ud)
+            except Exception as e:
+                with self._upload_lock:
+                    self._exceptions.append(e)
 
     def _process_upload_descriptor(self, ud):
         # type: (Uploader, blobxfer.models.upload.Descriptor) -> None
@@ -342,11 +413,10 @@ def _process_upload_descriptor(self, ud):
                 logger.debug('adding {} sofar {} from {}'.format(
                     resume_bytes, self._upload_bytes_sofar, ud._ase.name))
             del resume_bytes
-        print(offsets)
         # check if all operations completed
         if offsets is None and ud.all_operations_completed:
             # finalize file
-            ud.finalize_file()
+            self._finalize_file(ud)
             # accounting
             with self._upload_lock:
                 if ud.entity.is_encrypted:
@@ -354,14 +424,17 @@ def _process_upload_descriptor(self, ud):
                 self._upload_set.remove(ud.unique_id)
                 self._upload_sofar += 1
             return
-        # re-enqueue for other threads to upload
-        self._upload_queue.put(ud)
+        # if nothing to upload, re-enqueue for finalization
         if offsets is None:
+            self._upload_queue.put(ud)
             return
+
+        # TODO encryption
+
         # encrypt if necessary
         if ud.entity.is_encrypted:
             # send iv through hmac
-            ud.hmac_iv(ud.current_iv)
+            ud.hmac_data(ud.current_iv)
             # encrypt data
             if self._crypto_offload is not None:
                 self._crypto_offload.add_encrypt_chunk(
@@ -372,19 +445,62 @@ def _process_upload_descriptor(self, ud):
                 # retrieved from crypto queue
                 return
             else:
-                # TODO pickup here, read data from file
-
-                encdata = blobxfer.operations.crypto.aes_cbc_decrypt_data(
+                # read data from file and encrypt
+                data = ud.read_data(offsets)
+                encdata = blobxfer.operations.crypto.aes_cbc_encrypt_data(
                     ud.entity.encryption_metadata.symmetric_key,
                     ud.current_iv, data, offsets.pad)
                 # send encrypted data through hmac
-
-        # TODO send data as optional param if encrypted
-        # issue put range
-        self._put_data(ud, offsets)
-        # accounting
-        with self._upload_lock:
-            self._upload_bytes_sofar += offsets.num_bytes
+                ud.hmac_data(encdata)
+                data = encdata
+                # TODO save last 16 encrypted bytes for next IV
+        else:
+            data = ud.read_data(offsets)
+        # re-enqueue for other threads to upload
+        self._upload_queue.put(ud)
+        # add data to transfer queue
+        with self._transfer_lock:
+            self._transfer_set.add(
+                self._create_unique_transfer_id(
+                    ud.local_path, ud.entity, offsets))
+        self._transfer_queue.put((ud, ud.entity, offsets, data))
+        # iterate replicas
+        if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+            for ase in ud.entity.replica_targets:
+                with self._transfer_lock:
+                    self._transfer_set.add(
+                        self._create_unique_transfer_id(
+                            ud.local_path, ase, offsets))
+                self._transfer_queue.put((ud, ase, offsets, data))
+
+    def _finalize_file(self, ud):
+        # create encryption metadata for file/blob
+        if ud.entity.is_encrypted:
+            # TODO
+            pass
+        # put block list for non one-shot block blobs
+        if ud.requires_put_block_list:
+            # TODO
+            pass
+        # set md5 blob property if not encrypted
+        if ud.requires_set_blob_properties_md5:
+            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+            blobxfer.operations.azure.blob.page.set_blob_md5(
+                ud.entity, digest, timeout=self._general_options.timeout_sec)
+            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                for ase in ud.entity.replica_targets:
+                    blobxfer.operations.azure.blob.page.set_blob_md5(
+                        ase, digest, timeout=self._general_options.timeout_sec)
+        # set md5 file property if not encrypted
+        if ud.requires_set_file_properties_md5:
+            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+            blobxfer.operations.azure.file.set_file_md5(
+                ud.entity, digest, timeout=self._general_options.timeout_sec)
+            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                for ase in ud.entity.replica_targets:
+                    blobxfer.operations.azure.file.set_file_md5(
+                        ase, digest, timeout=self._general_options.timeout_sec)
+        # TODO set file metadata if encrypted
 
     def _cleanup_temporary_files(self):
         # type: (Uploader) -> None
@@ -552,6 +668,12 @@ def _create_unique_id(self, src, ase):
             (str(src.absolute_path), ase._client.account_name, ase.path)
         )
 
+    def _create_unique_transfer_id(self, local_path, ase, offsets):
+        return ';'.join(
+            (str(local_path.absolute_path), ase._client.account_name, ase.path,
+             str(local_path.view.fd_start), str(offsets.range_start))
+        )
+
     def append_slice_suffix_to_name(self, name, slice):
         return '{}.bxslice-{}'.format(name, slice)
 
@@ -654,6 +776,7 @@ def _run(self):
                 self._check_for_crypto_done)
         # initialize upload threads
         self._initialize_upload_threads()
+        self._initialize_transfer_threads()
         # initialize local counters
         nfiles = 0
         total_size = 0
@@ -710,14 +833,14 @@ def _run(self):
         self._update_progress_bar()
         # check for exceptions
         if len(self._exceptions) > 0:
-            logger.error('exceptions encountered while downloading')
+            logger.error('exceptions encountered while uploading')
             # raise the first one
             raise self._exceptions[0]
         # check for mismatches
         if (self._upload_sofar != self._upload_total or
                 self._upload_bytes_sofar != self._upload_bytes_total):
             raise RuntimeError(
-                'download mismatch: [count={}/{} bytes={}/{}]'.format(
+                'upload mismatch: [count={}/{} bytes={}/{}]'.format(
                     self._upload_sofar, self._upload_total,
                     self._upload_bytes_sofar, self._upload_bytes_total))
         # delete all remaining local files not accounted for if
@@ -728,11 +851,12 @@ def _run(self):
             self._resume.delete()
         # output throughput
         if self._upload_start_time is not None:
-            dltime = (end_time - self._upload_start_time).total_seconds()
+            ultime = (end_time - self._upload_start_time).total_seconds()
+            mibps = upload_size_mib / ultime
             logger.info(
-                ('elapsed download + verify time and throughput: {0:.3f} sec, '
-                 '{1:.4f} Mbps').format(
-                     dltime, download_size_mib * 8 / dltime))
+                ('elapsed upload + verify time and throughput: {0:.3f} sec, '
+                 '{1:.4f} Mbps ({2:.3f} MiB/s)').format(
+                     ultime, mibps * 8, mibps))
         end_time = blobxfer.util.datetime_now()
         logger.info('blobxfer end time: {0} (elapsed: {1:.3f} sec)'.format(
             end_time, (end_time - self._start_time).total_seconds()))

From ccd8db28d38229ff4481d854f9812a11308e4ed1 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Mon, 22 May 2017 20:26:52 -0700
Subject: [PATCH 29/47] Block uploading

- Fix striped uploads
- Add --delete support to upload
- Block size auto-select support
- Metadata support including file attr and vectored io
- Disable container create on insufficient sas token privilege
---
 blobxfer/models/azure.py                   |  34 +-
 blobxfer/models/crypto.py                  |  18 +-
 blobxfer/models/metadata.py                | 132 ++++++++
 blobxfer/models/upload.py                  | 193 +++++++++--
 blobxfer/operations/azure/__init__.py      |  27 +-
 blobxfer/operations/azure/blob/__init__.py |  38 +++
 blobxfer/operations/azure/blob/block.py    |  69 +++-
 blobxfer/operations/azure/blob/page.py     |  14 +
 blobxfer/operations/azure/file.py          |  64 ++++
 blobxfer/operations/upload.py              | 356 +++++++++++++++------
 blobxfer/util.py                           |  11 +
 cli/cli.py                                 |   6 +-
 12 files changed, 795 insertions(+), 167 deletions(-)
 create mode 100644 blobxfer/models/metadata.py

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index b2b34b8..d44af48 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -56,6 +56,7 @@ def __init__(self, container, ed=None):
         :param blobxfer.models.crypto.EncryptionMetadata ed:
             encryption metadata
         """
+        self._create_containers = None
         self._client = None
         self._container = container
         self._name = None
@@ -68,6 +69,16 @@ def __init__(self, container, ed=None):
         self._vio = None
         self.replica_targets = None
 
+    @property
+    def create_containers(self):
+        # type: (StorageEntity) -> bool
+        """Create containers
+        :param StorageEntity self: this
+        :rtype: bool
+        :return: create containers
+        """
+        return self._create_containers
+
     @property
     def client(self):
         # type: (StorageEntity) -> object
@@ -198,15 +209,17 @@ def encryption_metadata(self, value):
         """
         self._encryption = value
 
-    def populate_from_blob(self, sa, blob):
+    def populate_from_blob(self, sa, blob, path):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
-        #        azure.storage.blob.models.Blob) -> None
+        #        azure.storage.blob.models.Blob, str) -> None
         """Populate properties from Blob
         :param StorageEntity self: this
         :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.blob.models.Blob blob: blob to populate from
+        :param str path: full path to blob
         """
-        self._name = blob.name
+        self._create_containers = sa.create_containers
+        self._name = path
         self._snapshot = blob.snapshot
         self._lmt = blob.properties.last_modified
         self._size = blob.properties.content_length
@@ -221,15 +234,17 @@ def populate_from_blob(self, sa, blob):
             self._mode = StorageModes.Page
             self._client = sa.page_blob_client
 
-    def populate_from_file(self, sa, file):
+    def populate_from_file(self, sa, file, path):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
-        #        azure.storage.file.models.File) -> None
+        #        azure.storage.file.models.File, str) -> None
         """Populate properties from File
         :param StorageEntity self: this
         :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.file.models.File file: file to populate from
+        :param str path: full path to file
         """
-        self._name = file.name
+        self._create_containers = sa.create_containers
+        self._name = path
         self._snapshot = None
         self._lmt = file.properties.last_modified
         self._size = file.properties.content_length
@@ -237,18 +252,19 @@ def populate_from_file(self, sa, file):
         self._mode = StorageModes.File
         self._client = sa.file_client
 
-    def populate_from_local(self, sa, container, name, mode):
+    def populate_from_local(self, sa, container, path, mode):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount
         #        str, str, blobxfer.models.azure.StorageModes) -> None
         """Populate properties from local
         :param StorageEntity self: this
         :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param str container: container
-        :param str name: name
+        :param str path: full path to file
         :param blobxfer.models.azure.StorageModes mode: storage mode
         """
+        self._create_containers = sa.create_containers
         self._container = container
-        self._name = name
+        self._name = path
         self._mode = mode
         if mode == StorageModes.Append:
             self._client = sa.append_blob_client
diff --git a/blobxfer/models/crypto.py b/blobxfer/models/crypto.py
index 56c0c79..6a50c9c 100644
--- a/blobxfer/models/crypto.py
+++ b/blobxfer/models/crypto.py
@@ -186,14 +186,14 @@ def create_new_metadata(self, rsa_public_key):
         )
         self.encryption_mode = EncryptionMetadata._ENCRYPTION_MODE
 
-    def convert_from_json(self, md, blobname, rsaprivatekey):
+    def convert_from_json(self, md, entityname, rsaprivatekey):
         # type: (EncryptionMetadata, dict, str,
         #        cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey)
         #        -> None
         """Read metadata json into objects
         :param EncryptionMetadata self: this
         :param dict md: metadata dictionary
-        :param str blobname: blob name
+        :param str entityname: entity name
         :param rsaprivatekey: RSA private key
         :type rsaprivatekey:
             cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
@@ -221,11 +221,11 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
         if (self.encryption_agent.encryption_algorithm !=
                 EncryptionMetadata._ENCRYPTION_ALGORITHM):
             raise RuntimeError('{}: unknown block cipher: {}'.format(
-                blobname, self.encryption_agent.encryption_algorithm))
+                entityname, self.encryption_agent.encryption_algorithm))
         if (self.encryption_agent.protocol !=
                 EncryptionMetadata._ENCRYPTION_PROTOCOL_VERSION):
             raise RuntimeError('{}: unknown encryption protocol: {}'.format(
-                blobname, self.encryption_agent.protocol))
+                entityname, self.encryption_agent.protocol))
         self.encryption_authentication = EncryptionAuthentication(
             algorithm=ed[
                 EncryptionMetadata._JSON_KEY_INTEGRITY_AUTH][
@@ -238,13 +238,13 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
                 EncryptionMetadata._AUTH_ALGORITHM):
             raise RuntimeError(
                 '{}: unknown integrity/auth method: {}'.format(
-                    blobname, self.encryption_authentication.algorithm))
+                    entityname, self.encryption_authentication.algorithm))
         self.encryption_mode = ed[
             EncryptionMetadata._JSON_KEY_ENCRYPTION_MODE]
         if self.encryption_mode != EncryptionMetadata._ENCRYPTION_MODE:
             raise RuntimeError(
                 '{}: unknown encryption mode: {}'.format(
-                    blobname, self.encryption_mode))
+                    entityname, self.encryption_mode))
         try:
             _eak = ed[EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY][
                 EncryptionMetadata._JSON_KEY_ENCRYPTED_AUTHKEY]
@@ -265,7 +265,7 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
         if (self.wrapped_content_key.algorithm !=
                 EncryptionMetadata._ENCRYPTED_KEY_SCHEME):
             raise RuntimeError('{}: unknown key encryption scheme: {}'.format(
-                blobname, self.wrapped_content_key.algorithm))
+                entityname, self.wrapped_content_key.algorithm))
         # if RSA key is a public key, stop here as keys cannot be decrypted
         if rsaprivatekey is None:
             return
@@ -304,7 +304,7 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
                     EncryptionMetadata._AUTH_ALGORITHM):
                 raise RuntimeError(
                     '{}: unknown integrity/auth method: {}'.format(
-                        blobname,
+                        entityname,
                         self.encryption_metadata_authentication.algorithm))
             # verify hmac
             authhmac = base64.b64decode(
@@ -317,7 +317,7 @@ def convert_from_json(self, md, blobname, rsaprivatekey):
             if hmacsha256.digest() != authhmac:
                 raise RuntimeError(
                     '{}: encryption metadata authentication failed'.format(
-                        blobname))
+                        entityname))
 
     def convert_to_json_with_mac(self):
         # TODO
diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py
new file mode 100644
index 0000000..5ebeea3
--- /dev/null
+++ b/blobxfer/models/metadata.py
@@ -0,0 +1,132 @@
+# Copyright (c) Microsoft Corporation
+#
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# compat imports
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals
+)
+from builtins import (  # noqa
+    bytes, dict, int, list, object, range, ascii, chr, hex, input,
+    next, oct, open, pow, round, super, filter, map, zip)
+# stdlib imports
+import logging
+# non-stdlib imports
+# local imports
+import blobxfer.util
+
+# create logger
+logger = logging.getLogger(__name__)
+# global defines
+JSON_KEY_BLOBXFER_METADATA = 'BlobxferMetadata'
+# file attributes
+_JSON_KEY_FILE_ATTRIBUTES = 'FileAttributes'
+_JSON_KEY_FILE_ATTRIBUTES_POSIX = 'POSIX'
+_JSON_KEY_FILE_ATTRIBUTES_WINDOWS = 'Windows'
+_JSON_KEY_FILE_ATTRIBUTES_MODE = 'mode'
+_JSON_KEY_FILE_ATTRIBUTES_UID = 'uid'
+_JSON_KEY_FILE_ATTRIBUTES_GID = 'gid'
+# vectored io
+_JSON_KEY_VECTORED_IO = 'VectoredIO'
+_JSON_KEY_VECTORED_IO_MODE = 'Mode'
+_JSON_KEY_VECTORED_IO_STRIPE = 'Stripe'
+_JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE = 'TotalSize'
+_JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START = 'OffsetStart'
+_JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES = 'TotalSlices'
+_JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID = 'SliceId'
+_JSON_KEY_VECTORED_IO_STRIPE_NEXT = 'Next'
+
+
+def generate_fileattr_metadata(local_path, metadata):
+    # type: (blobxfer.models.upload.LocalPath, dict) -> dict
+    """Generate file attribute metadata dict
+    :param blobxfer.models.upload.LocalPath local_path: local path
+    :param dict metadata: existing metadata dict
+    :rtype: dict
+    :return: merged metadata dictionary
+    """
+    if blobxfer.util.on_windows():
+        logger.warning(
+            'file attributes store/restore on Windows is not supported yet')
+        return None
+    else:
+        md = {
+            _JSON_KEY_FILE_ATTRIBUTES: {
+                _JSON_KEY_FILE_ATTRIBUTES_POSIX: {
+                    _JSON_KEY_FILE_ATTRIBUTES_MODE: local_path.mode,
+                    _JSON_KEY_FILE_ATTRIBUTES_UID: local_path.uid,
+                    _JSON_KEY_FILE_ATTRIBUTES_GID: local_path.gid,
+                }
+            }
+        }
+        return blobxfer.util.merge_dict(metadata, md)
+
+
+def restore_fileattr(path, metadata):
+    # type: (pathlib.Path, dict) -> None
+    """Restore file attributes from metadata
+    :param pathlib.Path path: path to modify
+    :param dict metadata: existing metadata dict
+    """
+    if blobxfer.util.on_windows():
+        logger.warning(
+            'file attributes store/restore on Windows is not supported yet')
+    raise NotImplementedError()
+
+
+def create_vectored_io_next_entry(ase):
+    # type: (blobxfer.models.upload.LocalPath) -> str
+    """Create Vectored IO next entry id
+    :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
+    :rtype: str
+    :return: vectored io next entry
+    """
+    return ';'.join(
+        (ase.client.primary_endpoint, ase.container, ase.name)
+    )
+
+
+def generate_vectored_io_stripe_metadata(local_path, metadata):
+    # type: (blobxfer.models.upload.LocalPath, dict) -> dict
+    """Generate vectored io stripe metadata dict
+    :param blobxfer.models.upload.LocalPath local_path: local path
+    :param dict metadata: existing metadata dict
+    :rtype: dict
+    :return: merged metadata dictionary
+    """
+    md = {
+        _JSON_KEY_VECTORED_IO: {
+            _JSON_KEY_VECTORED_IO_MODE: _JSON_KEY_VECTORED_IO_STRIPE,
+            _JSON_KEY_VECTORED_IO_STRIPE: {
+                _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE: local_path.total_size,
+                _JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START:
+                local_path.view.fd_start,
+                _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES:
+                local_path.view.total_slices,
+                _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID:
+                local_path.view.slice_num,
+                _JSON_KEY_VECTORED_IO_STRIPE_NEXT: local_path.view.next,
+            }
+        }
+    }
+    return blobxfer.util.merge_dict(metadata, md)
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index a0ee766..07852cb 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -32,6 +32,7 @@
 # stdlib imports
 import collections
 import enum
+import json
 import logging
 import math
 import os
@@ -43,7 +44,9 @@
 # non-stdlib imports
 # local imports
 import blobxfer.models
+import blobxfer.models.azure
 import blobxfer.models.crypto
+import blobxfer.models.metadata
 import blobxfer.util
 
 # create logger
@@ -52,14 +55,14 @@
 _MAX_BLOCK_BLOB_ONESHOT_BYTES = 268435456
 _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES = 268435456
 _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES = 4194304
+_MAX_NUM_CHUNKS = 50000
+_DEFAULT_AUTO_CHUNKSIZE_BYTES = 16777216
 
 
 # named tuples
 Offsets = collections.namedtuple(
     'Offsets', [
         'chunk_num',
-        'block_id',
-        'fd_start',
         'num_bytes',
         'range_end',
         'range_start',
@@ -70,7 +73,10 @@
     'LocalPathView', [
         'fd_end',
         'fd_start',
+        'mode',
+        'next',
         'slice_num',
+        'total_slices',
     ]
 )
 
@@ -93,11 +99,15 @@ def __init__(self, parent_path, relative_path, view=None):
         if view is None:
             self.view = LocalPathView(
                 fd_start=0,
-                fd_end=self.size,
+                fd_end=self._stat.st_size,
                 slice_num=0,
+                mode=VectoredIoDistributionMode.Disabled,
+                total_slices=1,
+                next=None,
             )
         else:
             self.view = view
+        self._size = self.view.fd_end - self.view.fd_start
 
     @property
     def absolute_path(self):
@@ -105,6 +115,10 @@ def absolute_path(self):
 
     @property
     def size(self):
+        return self._size
+
+    @property
+    def total_size(self):
         return self._stat.st_size
 
     @property
@@ -184,8 +198,8 @@ def __init__(
             if self.sources.paths[0].is_dir():
                 raise ValueError(
                     'cannot rename a directory of files to upload')
-        if self.options.chunk_size_bytes <= 0:
-            raise ValueError('chunk size must be positive')
+        if self.options.chunk_size_bytes < 0:
+            raise ValueError('chunk size cannot be negative')
         if self.options.chunk_size_bytes > _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
             raise ValueError(
                 ('chunk size value of {} exceeds maximum allowable '
@@ -241,6 +255,7 @@ def __init__(self, lpath, ase, uid, options, resume_mgr):
         self._hasher_lock = threading.Lock()
         self._resume_mgr = resume_mgr
         self._ase = ase
+        self._store_file_attr = options.store_file_properties.attributes
         self.current_iv = None
         self._initialize_encryption(options)
         # calculate the total number of ops required for transfer
@@ -286,6 +301,18 @@ def all_operations_completed(self):
         with self._meta_lock:
             return self._outstanding_ops == 0
 
+    @property
+    def last_block_num(self):
+        # type: (Descriptor) -> bool
+        """Last used block number for block id, should only be called for
+        finalize operation
+        :param Descriptor self: this
+        :rtype: int
+        :return: block number
+        """
+        with self._meta_lock:
+            return self._chunk_num - 1
+
     @property
     def is_resumable(self):
         # type: (Descriptor) -> bool
@@ -296,6 +323,37 @@ def is_resumable(self):
         """
         return self._resume_mgr is not None and self.hmac is None
 
+    @property
+    def remote_is_file(self):
+        # type: (Descriptor) -> bool
+        """Remote destination is an Azure File
+        :param Descriptor self: this
+        :rtype: bool
+        :return: remote is an Azure File
+        """
+        return self.entity.mode == blobxfer.models.azure.StorageModes.File
+
+    @property
+    def remote_is_page_blob(self):
+        # type: (Descriptor) -> bool
+        """Remote destination is an Azure Page Blob
+        :param Descriptor self: this
+        :rtype: bool
+        :return: remote is an Azure Page Blob
+        """
+        return self.entity.mode == blobxfer.models.azure.StorageModes.Page
+
+    @property
+    def is_one_shot_block_blob(self):
+        # type: (Descriptor) -> bool
+        """Is one shot block blob
+        :param Descriptor self: this
+        :rtype: bool
+        :return: if upload is a one-shot block blob
+        """
+        return (self._ase.mode == blobxfer.models.azure.StorageModes.Block and
+                self._total_chunks == 1)
+
     @property
     def requires_put_block_list(self):
         # type: (Descriptor) -> bool
@@ -308,15 +366,14 @@ def requires_put_block_list(self):
                 self._total_chunks > 1)
 
     @property
-    def requires_set_blob_properties_md5(self):
+    def requires_non_encrypted_md5_put(self):
         # type: (Descriptor) -> bool
         """Requires a set file properties for md5 to finalize
         :param Descriptor self: this
         :rtype: bool
         :return: if finalize requires a put file properties
         """
-        return (not self.entity.is_encrypted and self.must_compute_md5 and
-                self.entity.mode == blobxfer.models.azure.StorageModes.Page)
+        return not self.entity.is_encrypted and self.must_compute_md5
 
     @property
     def requires_set_file_properties_md5(self):
@@ -327,7 +384,7 @@ def requires_set_file_properties_md5(self):
         :return: if finalize requires a put file properties
         """
         return (not self.entity.is_encrypted and self.must_compute_md5 and
-                self.entity.mode == blobxfer.models.azure.StorageModes.File)
+                self.remote_is_file)
 
     def complete_offset_upload(self):
         with self._meta_lock:
@@ -350,7 +407,7 @@ def _initialize_encryption(self, options):
         :param blobxfer.models.options.Upload options: upload options
         """
         # TODO support append blobs?
-        if (options.rsa_public_key is not None and
+        if (options.rsa_public_key is not None and self._ase.size > 0 and
                 (self._ase.mode == blobxfer.models.azure.StorageModes.Block or
                  self._ase.mode == blobxfer.models.azure.StorageModes.File)):
             em = blobxfer.models.crypto.EncryptionMetadata()
@@ -387,35 +444,57 @@ def _adjust_chunk_size(self, options):
         :param Descriptor self: this
         :param blobxfer.models.options.Upload options: upload options
         """
-        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
+        chunk_size = options.chunk_size_bytes
+        # auto-select chunk size
+        if chunk_size == 0:
+            if self._ase.mode != blobxfer.models.azure.StorageModes.Block:
+                chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
+            else:
+                if self._ase.size == 0:
+                    chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
+                else:
+                    chunk_size = _DEFAULT_AUTO_CHUNKSIZE_BYTES
+                    while chunk_size < _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
+                        chunks = int(math.ceil(self._ase.size / chunk_size))
+                        if chunks <= _MAX_NUM_CHUNKS:
+                            break
+                        chunk_size = chunk_size << 1
+            logger.debug(
+                'auto-selected chunk size of {} for {}'.format(
+                    chunk_size, self.local_path.absolute_path))
+        self._chunk_size = min((chunk_size, self._ase.size))
         # ensure chunk sizes are compatible with mode
         if self._ase.mode == blobxfer.models.azure.StorageModes.Append:
             if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
                 self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
-                    'adjusting chunk size to {} for append blobs'.format(
-                        self._chunk_size))
+                    ('adjusting chunk size to {} for append blob '
+                     'from {}').format(
+                         self._chunk_size, self.local_path.absolute_path))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.Block:
             if self._ase.size <= options.one_shot_bytes:
-                self._chunk_size = options.one_shot_bytes
+                self._chunk_size = min(
+                    (self._ase.size, options.one_shot_bytes)
+                )
             else:
                 if self._chunk_size > _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
                     self._chunk_size = _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES
                     logger.debug(
-                        'adjusting chunk size to {} for block blobs'.format(
-                            self._chunk_size))
+                        ('adjusting chunk size to {} for block blob '
+                         'from {}').format(
+                            self._chunk_size, self.local_path.absolute_path))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.File:
             if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
                 self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
-                    'adjusting chunk size to {} for files'.format(
-                        self._chunk_size))
+                    'adjusting chunk size to {} for file from {}'.format(
+                        self._chunk_size, self.local_path.absolute_path))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.Page:
             if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
                 self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES
                 logger.debug(
-                    'adjusting chunk size to {} for page blobs'.format(
-                        self._chunk_size))
+                    'adjusting chunk size to {} for page blob from {}'.format(
+                        self._chunk_size, self.local_path.absolute_path))
 
     def _compute_total_chunks(self, chunk_size):
         # type: (Descriptor, int) -> int
@@ -426,9 +505,30 @@ def _compute_total_chunks(self, chunk_size):
         :return: num chunks
         """
         try:
-            return int(math.ceil(self._ase.size / chunk_size))
+            chunks = int(math.ceil(self._ase.size / chunk_size))
         except ZeroDivisionError:
-            return 0
+            chunks = 1
+        if chunks > 50000:
+            max_vector = False
+            if self._ase.mode == blobxfer.models.azure.StorageModes.Block:
+                if self._chunk_size == _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES:
+                    max_vector = True
+            elif self._chunk_size == _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
+                max_vector = True
+            if max_vector:
+                raise RuntimeError(
+                    ('number of chunks {} exceeds maximum permissible '
+                     'limit and chunk size is set at the maximum value '
+                     'for {}. Please try using stripe mode '
+                     'vectorization to overcome this limitation').format(
+                        chunks, self.local_path.absolute_path))
+            else:
+                raise RuntimeError(
+                    ('number of chunks {} exceeds maximum permissible '
+                     'limit for {}, please adjust chunk size higher or '
+                     'set to -1 for automatic chunk size selection').format(
+                         chunks, self.local_path.absolute_path))
+        return chunks
 
     def _initialize_integrity_checkers(self, options):
         # type: (Descriptor, blobxfer.models.options.Upload) -> None
@@ -459,35 +559,32 @@ def next_offsets(self):
         resume_bytes = None
 #         resume_bytes = self._resume()
         with self._meta_lock:
-            if self._offset >= self.local_path.view.fd_end:
+            if self._chunk_num >= self._total_chunks:
                 return None, resume_bytes
-            if self._offset + self._chunk_size > self.local_path.view.fd_end:
-                chunk = self.local_path.view.fd_end - self._offset
+            if self._offset + self._chunk_size > self._ase.size:
+                num_bytes = self._ase.size - self._offset
             else:
-                chunk = self._chunk_size
-            num_bytes = chunk
+                num_bytes = self._chunk_size
             chunk_num = self._chunk_num
-            fd_start = self._offset
             range_start = self._offset
             range_end = self._offset + num_bytes - 1
-            self._offset += chunk
+            self._offset += num_bytes
             self._chunk_num += 1
-            if (self._ase.is_encrypted and
-                    self._offset >= self.local_path.view.fd_end):
+            if self._ase.is_encrypted and self._offset >= self._ase.size:
                 pad = True
             else:
                 pad = False
             return Offsets(
                 chunk_num=chunk_num,
-                block_id='{0:08d}'.format(chunk_num),
-                fd_start=fd_start,
-                num_bytes=chunk,
+                num_bytes=num_bytes,
                 range_start=range_start,
                 range_end=range_end,
                 pad=pad,
             ), resume_bytes
 
     def read_data(self, offsets):
+        if offsets.num_bytes == 0:
+            return None
         # compute start from view
         start = self.local_path.view.fd_start + offsets.range_start
         with self.local_path.absolute_path.open('rb') as fd:
@@ -497,3 +594,31 @@ def read_data(self, offsets):
             with self._hasher_lock:
                 self.md5.update(data)
         return data
+
+    def generate_metadata(self):
+        genmeta = {}
+        encmeta = {}
+        # generate encryption metadata
+        if self._ase.is_encrypted:
+            raise NotImplementedError()
+        # generate file attribute metadata
+        if self._store_file_attr:
+            merged = blobxfer.models.metadata.generate_fileattr_metadata(
+                self.local_path, genmeta)
+            if merged is not None:
+                genmeta = merged
+        # generate vectored io metadata
+        if self.local_path.view.mode == VectoredIoDistributionMode.Stripe:
+            merged = blobxfer.models.metadata.\
+                generate_vectored_io_stripe_metadata(self.local_path, genmeta)
+            if merged is not None:
+                genmeta = merged
+        metadata = {}
+        if len(genmeta) > 0:
+            metadata[blobxfer.models.metadata.JSON_KEY_BLOBXFER_METADATA] = \
+                json.dumps(genmeta, ensure_ascii=False, sort_keys=True)
+        if len(encmeta) > 0:
+            raise NotImplementedError()
+        if len(metadata) == 0:
+            return None
+        return metadata
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 8664ac8..a237532 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -95,7 +95,8 @@ def __init__(self, name, key, endpoint, transfer_threads):
         self.name = name
         self.key = key
         self.endpoint = endpoint
-        self.is_sas = self._key_is_sas(self.key)
+        self.is_sas = StorageAccount._key_is_sas(self.key)
+        self.create_containers = self._container_creation_allowed()
         # normalize sas keys
         if self.is_sas and self.key.startswith('?'):
             self.key = self.key[1:]
@@ -133,6 +134,26 @@ def _key_is_sas(key):
                 return True
         return False
 
+    def _container_creation_allowed(self):
+        # # type: (StorageAccount) -> bool
+        """Check if container creation is allowed
+        :param StorageAccount self: this
+        :rtype: bool
+        :return: if container creation is allowed
+        """
+        if self.is_sas:
+            # search for account sas "c" resource
+            sasparts = self.key.split('&')
+            for part in sasparts:
+                tmp = part.split('=')
+                if tmp[0] == 'srt':
+                    if 'c' in tmp[1]:
+                        return True
+        else:
+            # storage account key always allows container creation
+            return True
+        return False
+
     def _create_clients(self):
         # type: (StorageAccount) -> None
         """Create Azure Storage clients
@@ -271,7 +292,7 @@ def _populate_from_list_files(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_file(sa, file)
+                ase.populate_from_file(sa, file, dir)
                 yield ase
 
     def _populate_from_list_blobs(self, creds, options, general_options):
@@ -303,7 +324,7 @@ def _populate_from_list_blobs(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_blob(sa, blob)
+                ase.populate_from_blob(sa, blob, dir)
                 yield ase
 
 
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index 40b40c4..49b3678 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -139,6 +139,41 @@ def list_blobs(client, container, prefix, mode, recursive, timeout=None):
         yield blob
 
 
+def list_all_blobs(client, container, timeout=None):
+    # type: (azure.storage.blob.BaseBlobService, str, int) ->
+    #        azure.storage.blob.models.Blob
+    """List all blobs in a container
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param int timeout: timeout
+    :rtype: azure.storage.blob.models.Blob
+    :return: generator of blobs
+    """
+    blobs = client.list_blobs(
+        container_name=container,
+        prefix=None,
+        timeout=timeout,
+    )
+    for blob in blobs:
+        yield blob
+
+
+def delete_blob(client, container, name, timeout=None):
+    # type: (azure.storage.blob.BaseBlobService, str, str, int) -> None
+    """Delete blob, including all associated snapshots
+    :param azure.storage.blob.BaseBlobService client: blob client
+    :param str container: container
+    :param str name: blob name
+    :param int timeout: timeout
+    """
+    client.delete_blob(
+        container_name=container,
+        blob_name=name,
+        delete_snapshots=azure.storage.blob.models.DeleteSnapshot.Include,
+        timeout=timeout,
+    )
+
+
 def get_blob_range(ase, offsets, timeout=None):
     # type: (blobxfer.models.azure.StorageEntity,
     #        blobxfer.models.download.Offsets, int) -> bytes
@@ -167,6 +202,9 @@ def create_container(ase, containers_created, timeout=None):
     :param dict containers_created: containers already created map
     :param int timeout: timeout
     """
+    # check if auth allows create container
+    if not ase.create_containers:
+        return
     key = ase.client.account_name + ':blob=' + ase.container
     if key not in containers_created:
         ase.client.create_container(
diff --git a/blobxfer/operations/azure/blob/block.py b/blobxfer/operations/azure/blob/block.py
index 9318a76..b8e5bc5 100644
--- a/blobxfer/operations/azure/blob/block.py
+++ b/blobxfer/operations/azure/blob/block.py
@@ -64,19 +64,80 @@ def create_client(storage_account):
     return client
 
 
-def create_blob(ase, data, md5, encmeta, timeout=None):
-    # type: (blobxfer.models.azure.StorageEntity, int) -> None
+def create_blob(ase, data, md5, metadata, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, bytes, str, dict,
+    #        int) -> None
     """Create one shot block blob
     :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param bytes data: blob data
+    :param str md5: md5 as base64
+    :param dict metadata: metadata kv pairs
     :param int timeout: timeout
     """
-    if encmeta is not None:
-        raise NotImplementedError()
     ase.client._put_blob(
         container_name=ase.container,
         blob_name=ase.name,
+        blob=data,
         content_settings=azure.storage.blob.models.ContentSettings(
             content_type=blobxfer.util.get_mime_type(ase.name),
             content_md5=md5,
         ),
+        metadata=metadata,
+        validate_content=False,  # integrity is enforced with HTTPS
+        timeout=timeout)
+
+
+def _format_block_id(chunk_num):
+    # type: (int) -> str
+    """Create a block id given a block (chunk) number
+    :param int chunk_num: chunk number
+    :rtype: str
+    :return: block id
+    """
+    return '{0:08d}'.format(chunk_num)
+
+
+def put_block(ase, offsets, data, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity,
+    #        blobxfer.models.upload.Offsets, bytes, int) -> None
+    """Puts a block into remote blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param blobxfer.models.upload.Offsets offsets: upload offsets
+    :param bytes data: data
+    :param int timeout: timeout
+    """
+    ase.client.put_block(
+        container_name=ase.container,
+        blob_name=ase.name,
+        block=data,
+        block_id=_format_block_id(offsets.chunk_num),
+        validate_content=False,  # integrity is enforced with HTTPS
+        timeout=timeout)
+
+
+def put_block_list(ase, last_block_num, md5, metadata, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, bytes, str, dict,
+    #        int) -> None
+    """Create block blob from blocks
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param int last_block_num: last block number (chunk_num)
+    :param str md5: md5 as base64
+    :param dict metadata: metadata kv pairs
+    :param int timeout: timeout
+    """
+    # construct block list
+    block_list = [
+        azure.storage.blob.BlobBlock(id=_format_block_id(x))
+        for x in range(0, last_block_num + 1)
+    ]
+    ase.client.put_block_list(
+        container_name=ase.container,
+        blob_name=ase.name,
+        block_list=block_list,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name),
+            content_md5=md5,
+        ),
+        metadata=metadata,
+        validate_content=False,  # integrity is enforced with HTTPS
         timeout=timeout)
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 5601557..859b4bb 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -115,3 +115,17 @@ def set_blob_md5(ase, md5, timeout=None):
             content_md5=md5,
         ),
         timeout=timeout)
+
+
+def set_blob_metadata(ase, metadata, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Set blob metadata
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict metadata: metadata kv pairs
+    :param int timeout: timeout
+    """
+    ase.client.set_blob_metadata(
+        container_name=ase.container,
+        blob_name=ase.name,
+        metadata=metadata,
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index 4970970..32f1b13 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -174,6 +174,49 @@ def list_files(client, fileshare, prefix, recursive, timeout=None):
                     dirs.append(fspath)
 
 
+def list_all_files(client, fileshare, timeout=None):
+    # type: (azure.storage.file.FileService, str, int) -> str
+    """List all files in share
+    :param azure.storage.file.FileService client: file client
+    :param str fileshare: file share
+    :param int timeout: timeout
+    :rtype: str
+    :return: file name
+    """
+    dirs = [None]
+    while len(dirs) > 0:
+        dir = dirs.pop()
+        files = client.list_directories_and_files(
+            share_name=fileshare,
+            directory_name=dir,
+            timeout=timeout,
+        )
+        for file in files:
+            fspath = str(
+                pathlib.Path(dir if dir is not None else '') / file.name)
+            if type(file) == azure.storage.file.models.File:
+                yield fspath
+            else:
+                dirs.append(fspath)
+
+
+def delete_file(client, fileshare, name, timeout=None):
+    # type: (azure.storage.file.FileService, str, str, int) -> None
+    """Delete file from share
+    :param azure.storage.file.FileService client: file client
+    :param str fileshare: file share
+    :param str name: file name
+    :param int timeout: timeout
+    """
+    dir, fpath = parse_file_path(name)
+    client.delete_file(
+        share_name=fileshare,
+        directory_name=dir,
+        file_name=fpath,
+        timeout=timeout,
+    )
+
+
 def get_file_range(ase, offsets, timeout=None):
     # type: (blobxfer.models.azure.StorageEntity,
     #        blobxfer.models.download.Offsets, int) -> bytes
@@ -203,6 +246,9 @@ def create_share(ase, containers_created, timeout=None):
     :param dict containers_created: containers already created map
     :param int timeout: timeout
     """
+    # check if auth allows create container
+    if not ase.create_containers:
+        return
     key = ase.client.account_name + ':file=' + ase.container
     if key not in containers_created:
         ase.client.create_share(
@@ -224,6 +270,8 @@ def create_all_parent_directories(ase, dirs_created, timeout=None):
     dirs = pathlib.Path(ase.name).parts
     if len(dirs) <= 1:
         return
+    # remove last part (which is the file)
+    dirs = dirs[:-1]
     dk = ase.client.account_name + ':' + ase.container
     for i in range(0, len(dirs)):
         dir = str(pathlib.Path(*(dirs[0:i + 1])))
@@ -294,3 +342,19 @@ def set_file_md5(ase, md5, timeout=None):
             content_md5=md5,
         ),
         timeout=timeout)
+
+
+def set_file_metadata(ase, metadata, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Set file metadata
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict metadata: metadata kv pairs
+    :param int timeout: timeout
+    """
+    dir, fpath = parse_file_path(ase.name)
+    ase.client.set_file_metadata(
+        share_name=ase.container,
+        directory_name=dir,
+        file_name=fpath,
+        metadata=metadata,
+        timeout=timeout)
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 985a154..3b38ea5 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -75,7 +75,7 @@ def __init__(self, general_options, creds, spec):
         :param blobxfer.operations.azure.StorageCredentials creds: creds
         :param blobxfer.models.uplaod.Specification spec: upload spec
         """
-        self._all_remote_files_processed = False
+        self._all_local_files_processed = False
         self._crypto_offload = None
         self._md5_meta_lock = threading.Lock()
         self._md5_map = {}
@@ -95,7 +95,7 @@ def __init__(self, general_options, creds, spec):
         self._transfer_set = set()
         self._transfer_threads = []
         self._start_time = None
-        self._delete_after = set()
+        self._delete_exclude = set()
         self._ud_map = {}
         self._containers_created = set()
         self._fileshare_dir_lock = threading.Lock()
@@ -118,7 +118,7 @@ def termination_check(self):
             with self._transfer_lock:
                 return (self._upload_terminate or
                         len(self._exceptions) > 0 or
-                        (self._all_remote_files_processed and
+                        (self._all_local_files_processed and
                          len(self._upload_set) == 0 and
                          len(self._transfer_set) == 0))
 
@@ -133,7 +133,7 @@ def termination_check_md5(self):
         with self._md5_meta_lock:
             with self._upload_lock:
                 return (self._upload_terminate or
-                        (self._all_remote_files_processed and
+                        (self._all_local_files_processed and
                          len(self._md5_map) == 0 and
                          len(self._upload_set) == 0))
 
@@ -301,6 +301,17 @@ def _wait_for_upload_threads(self, terminate):
         for thr in self._upload_threads:
             thr.join()
 
+    def _wait_for_transfer_threads(self, terminate):
+        # type: (Uploader, bool) -> None
+        """Wait for transfer threads
+        :param Uploader self: this
+        :param bool terminate: terminate threads
+        """
+        if terminate:
+            self._upload_terminate = terminate
+        for thr in self._transfer_threads:
+            thr.join()
+
     def _worker_thread_transfer(self):
         # type: (Uploader) -> None
         """Worker thread transfer
@@ -320,7 +331,7 @@ def _worker_thread_transfer(self):
 
     def _process_transfer(self, ud, ase, offsets, data):
         # issue put range
-        self._put_data(ase, offsets, data)
+        self._put_data(ud, ase, offsets, data)
         # accounting
         with self._transfer_lock:
             self._transfer_set.remove(
@@ -328,9 +339,32 @@ def _process_transfer(self, ud, ase, offsets, data):
             self._upload_bytes_sofar += offsets.num_bytes
         ud.complete_offset_upload()
 
-    def _put_data(self, ase, offsets, data):
+    def _put_data(self, ud, ase, offsets, data):
         print('UL', offsets)
-        if ase.mode == blobxfer.models.azure.StorageModes.File:
+        if ase.mode == blobxfer.models.azure.StorageModes.Append:
+            raise NotImplementedError()
+        elif ase.mode == blobxfer.models.azure.StorageModes.Block:
+            if offsets.chunk_num == 0:
+                # create container if necessary
+                blobxfer.operations.azure.blob.create_container(
+                    ase, self._containers_created,
+                    timeout=self._general_options.timeout_sec)
+                # handle one-shot uploads
+                if ud.is_one_shot_block_blob:
+                    metadata = ud.generate_metadata()
+                    if ud.must_compute_md5:
+                        digest = blobxfer.util.base64_encode_as_string(
+                            ud.md5.digest())
+                    else:
+                        digest = None
+                    blobxfer.operations.azure.blob.block.create_blob(
+                        ase, data, digest, metadata,
+                        timeout=self._general_options.timeout_sec)
+                    return
+            # upload block
+            blobxfer.operations.azure.blob.block.put_block(
+                ase, offsets, data, timeout=self._general_options.timeout_sec)
+        elif ase.mode == blobxfer.models.azure.StorageModes.File:
             if offsets.chunk_num == 0:
                 # create container if necessary
                 blobxfer.operations.azure.file.create_share(
@@ -346,13 +380,10 @@ def _put_data(self, ase, offsets, data):
                 blobxfer.operations.azure.file.create_file(
                     ase, timeout=self._general_options.timeout_sec)
             # upload range
-            blobxfer.operations.azure.file.put_file_range(
-                ase, offsets, data, timeout=self._general_options.timeout_sec)
-        elif ase.mode == blobxfer.models.azure.StorageModes.Append:
-            raise NotImplementedError()
-        elif ase.mode == blobxfer.models.azure.StorageModes.Block:
-            # TODO handle one-shot uploads for block blobs (get md5 as well)
-            raise NotImplementedError()
+            if data is not None:
+                blobxfer.operations.azure.file.put_file_range(
+                    ase, offsets, data,
+                    timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.Page:
             if offsets.chunk_num == 0:
                 # create container if necessary
@@ -362,6 +393,8 @@ def _put_data(self, ase, offsets, data):
                 # create remote blob
                 blobxfer.operations.azure.blob.page.create_blob(
                     ase, timeout=self._general_options.timeout_sec)
+            if data is None:
+                return
             # align page
             aligned = blobxfer.util.page_align_content_length(
                 offsets.num_bytes)
@@ -382,9 +415,9 @@ def _worker_thread_upload(self):
         import time
         while not self.termination_check:
             try:
-                if (len(self._transfer_set) >
-                        self._general_options.concurrency.transfer_threads):
-                    time.sleep(0.03)
+                if (len(self._transfer_set) >=
+                        self._general_options.concurrency.transfer_threads * 2):
+                    time.sleep(0.5)
                     continue
                 else:
                     ud = self._upload_queue.get(False, 0.03)
@@ -408,7 +441,7 @@ def _process_upload_descriptor(self, ud):
         offsets, resume_bytes = ud.next_offsets()
         # add resume bytes to counter
         if resume_bytes is not None:
-            with self._upload_lock:
+            with self._transfer_lock:
                 self._upload_bytes_sofar += resume_bytes
                 logger.debug('adding {} sofar {} from {}'.format(
                     resume_bytes, self._upload_bytes_sofar, ud._ase.name))
@@ -474,33 +507,67 @@ def _process_upload_descriptor(self, ud):
                 self._transfer_queue.put((ud, ase, offsets, data))
 
     def _finalize_file(self, ud):
-        # create encryption metadata for file/blob
-        if ud.entity.is_encrypted:
-            # TODO
-            pass
+        metadata = ud.generate_metadata()
         # put block list for non one-shot block blobs
         if ud.requires_put_block_list:
-            # TODO
-            pass
-        # set md5 blob property if not encrypted
-        if ud.requires_set_blob_properties_md5:
-            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
-            blobxfer.operations.azure.blob.page.set_blob_md5(
-                ud.entity, digest, timeout=self._general_options.timeout_sec)
-            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                for ase in ud.entity.replica_targets:
-                    blobxfer.operations.azure.blob.page.set_blob_md5(
-                        ase, digest, timeout=self._general_options.timeout_sec)
-        # set md5 file property if not encrypted
-        if ud.requires_set_file_properties_md5:
-            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
-            blobxfer.operations.azure.file.set_file_md5(
-                ud.entity, digest, timeout=self._general_options.timeout_sec)
+            if ud.must_compute_md5:
+                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+            else:
+                digest = None
+            blobxfer.operations.azure.blob.block.put_block_list(
+                ud.entity, ud.last_block_num, digest, metadata,
+                timeout=self._general_options.timeout_sec)
             if blobxfer.util.is_not_empty(ud.entity.replica_targets):
                 for ase in ud.entity.replica_targets:
-                    blobxfer.operations.azure.file.set_file_md5(
-                        ase, digest, timeout=self._general_options.timeout_sec)
-        # TODO set file metadata if encrypted
+                    blobxfer.operations.azure.blob.block.put_block_list(
+                        ase, ud.last_block_num, digest, metadata,
+                        timeout=self._general_options.timeout_sec)
+        # page blob finalization
+        if ud.remote_is_page_blob:
+            # set md5 page blob property if required
+            if ud.requires_non_encrypted_md5_put:
+                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+                blobxfer.operations.azure.blob.page.set_blob_md5(
+                    ud.entity, digest,
+                    timeout=self._general_options.timeout_sec)
+                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                    for ase in ud.entity.replica_targets:
+                        blobxfer.operations.azure.blob.page.set_blob_md5(
+                            ase, digest,
+                            timeout=self._general_options.timeout_sec)
+            # set metadata if needed
+            if blobxfer.util.is_not_empty(metadata):
+                blobxfer.operations.azure.blob.page.set_blob_metadata(
+                    ud.entity, metadata,
+                    timeout=self._general_options.timeout_sec)
+                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                    for ase in ud.entity.replica_targets:
+                        blobxfer.operations.azure.blob.page.set_blob_metadata(
+                            ase, metadata,
+                            timeout=self._general_options.timeout_sec)
+        # azure file finalization
+        if ud.remote_is_file:
+            # set md5 file property if required
+            if ud.requires_non_encrypted_md5_put:
+                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+                blobxfer.operations.azure.file.set_file_md5(
+                    ud.entity, digest,
+                    timeout=self._general_options.timeout_sec)
+                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                    for ase in ud.entity.replica_targets:
+                        blobxfer.operations.azure.file.set_file_md5(
+                            ase, digest,
+                            timeout=self._general_options.timeout_sec)
+            # set file metadata if needed
+            if blobxfer.util.is_not_empty(metadata):
+                blobxfer.operations.azure.file.set_file_metadata(
+                    ud.entity, metadata,
+                    timeout=self._general_options.timeout_sec)
+                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                    for ase in ud.entity.replica_targets:
+                        blobxfer.operations.azure.file.set_file_metadata(
+                            ase, metadata,
+                            timeout=self._general_options.timeout_sec)
 
     def _cleanup_temporary_files(self):
         # type: (Uploader) -> None
@@ -516,18 +583,68 @@ def _cleanup_temporary_files(self):
             except Exception as e:
                 logger.exception(e)
 
+    def _get_destination_paths(self):
+        # type: (Uploader) ->
+        #        Tuple[blobxfer.operations.azure.StorageAccount, str, str]
+        """Get destination paths
+        :param Uploader self: this
+        :rtype: tuple
+        :return: (storage account, container, name)
+        """
+        for dst in self._spec.destinations:
+            for dpath in dst.paths:
+                sdpath = str(dpath)
+                cont, dir = blobxfer.util.explode_azure_path(sdpath)
+                sa = self._creds.get_storage_account(
+                    dst.lookup_storage_account(sdpath))
+                yield sa, cont, dir, dpath
+
     def _delete_extraneous_files(self):
         # type: (Uploader) -> None
-        """Delete extraneous files cataloged
+        """Delete extraneous files on the remote
         :param Uploader self: this
         """
-        logger.info('attempting to delete {} extraneous files'.format(
-            len(self._delete_after)))
-        for file in self._delete_after:
-            try:
-                file.unlink()
-            except OSError:
-                pass
+        if not self._spec.options.delete_extraneous_destination:
+            return
+        # list blobs for all destinations
+        checked = set()
+        deleted = 0
+        print(self._delete_exclude)
+        for sa, container, _, _ in self._get_destination_paths():
+            key = ';'.join((sa.name, sa.endpoint, container))
+            if key in checked:
+                continue
+            logger.debug(
+                'attempting to delete extraneous blobs/files from: {}'.format(
+                    key))
+            if (self._spec.options.mode ==
+                    blobxfer.models.azure.StorageModes.File):
+                files = blobxfer.operations.azure.file.list_all_files(
+                    sa.file_client, container,
+                    timeout=self._general_options.timeout_sec)
+                for file in files:
+                    id = self._create_deletion_id(
+                        sa.file_client, container, file)
+                    print(id)
+                    if id not in self._delete_exclude:
+                        blobxfer.operations.azure.file.delete_file(
+                            sa.file_client, container, file,
+                            timeout=self._general_options.timeout_sec)
+                        deleted += 1
+            else:
+                blobs = blobxfer.operations.azure.blob.list_all_blobs(
+                    sa.block_blob_client, container,
+                    timeout=self._general_options.timeout_sec)
+                for blob in blobs:
+                    id = self._create_deletion_id(
+                        sa.block_blob_client, container, blob.name)
+                    if id not in self._delete_exclude:
+                        blobxfer.operations.azure.blob.delete_blob(
+                            sa.block_blob_client, container, blob.name,
+                            timeout=self._general_options.timeout_sec)
+                        deleted += 1
+            checked.add(key)
+        logger.info('deleted {} extraneous blobs/files'.format(deleted))
 
     def _check_upload_conditions(self, local_path, rfile):
         # type: (Uploader, blobxfer.models.upload.LocalPath,
@@ -603,9 +720,9 @@ def _check_for_existing_remote(self, sa, cont, name):
             ase = blobxfer.models.azure.StorageEntity(cont, ed)
             if (self._spec.options.mode ==
                     blobxfer.models.azure.StorageModes.File):
-                ase.populate_from_file(sa, fp)
+                ase.populate_from_file(sa, fp, name)
             else:
-                ase.populate_from_blob(sa, fp)
+                ase.populate_from_blob(sa, fp, name)
         else:
             ase = None
         return ase
@@ -618,6 +735,7 @@ def _generate_destination_for_source(self, local_path):
         """
         # construct stripped destination path
         spath = local_path.relative_path
+        # apply strip components
         if self._spec.options.strip_components > 0:
             _rparts = local_path.relative_path.parts
             _strip = min(
@@ -625,53 +743,47 @@ def _generate_destination_for_source(self, local_path):
             )
             if _strip > 0:
                 spath = pathlib.Path(*_rparts[_strip:])
-        # for each destination:
-        # 1. prepend non-container path
-        # 2. bind client from mode
-        # 3. perform get blob or file properties
-        for dst in self._spec.destinations:
-            for dpath in dst.paths:
-                sdpath = str(dpath)
-                cont, dir = blobxfer.util.explode_azure_path(sdpath)
-                # apply rename
-                if self._spec.options.rename:
-                    name = dir
-                else:
-                    name = str(spath / dir)
-                if blobxfer.util.is_none_or_empty(name):
-                    raise ValueError(
-                        'must specify a container for destination: {}'.format(
-                            dpath))
-                # apply strip components
-                sa = self._creds.get_storage_account(
-                    dst.lookup_storage_account(sdpath))
-                # do not check for existing remote right now if striped
-                # vectored io mode
-                if (self._spec.options.vectored_io.distribution_mode ==
-                        blobxfer.models.upload.
-                        VectoredIoDistributionMode.Stripe):
-                    ase = None
+        # create a storage entity for each destination
+        for sa, cont, name, dpath in self._get_destination_paths():
+            # apply rename
+            if not self._spec.options.rename:
+                name = str(spath / name)
+            if blobxfer.util.is_none_or_empty(name):
+                raise ValueError(
+                    ('invalid destination, must specify a container or '
+                     'fileshare and remote file name: {}').format(dpath))
+            # do not check for existing remote right now if striped
+            # vectored io mode
+            if (self._spec.options.vectored_io.distribution_mode ==
+                    blobxfer.models.upload.
+                    VectoredIoDistributionMode.Stripe):
+                ase = None
+            else:
+                ase = self._check_for_existing_remote(sa, cont, name)
+            if ase is None:
+                if self._spec.options.rsa_public_key:
+                    ed = blobxfer.models.crypto.EncryptionMetadata()
                 else:
-                    ase = self._check_for_existing_remote(sa, cont, name)
-                if ase is None:
-                    if self._spec.options.rsa_public_key:
-                        ed = blobxfer.models.crypto.EncryptionMetadata()
-                    else:
-                        ed = None
-                    ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                    ase.populate_from_local(
-                        sa, cont, name, self._spec.options.mode)
-                yield sa, ase
+                    ed = None
+                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                ase.populate_from_local(
+                    sa, cont, name, self._spec.options.mode)
+            yield sa, ase
 
     def _create_unique_id(self, src, ase):
         return ';'.join(
-            (str(src.absolute_path), ase._client.account_name, ase.path)
+            (str(src.absolute_path), ase._client.primary_endpoint, ase.path)
         )
 
     def _create_unique_transfer_id(self, local_path, ase, offsets):
         return ';'.join(
-            (str(local_path.absolute_path), ase._client.account_name, ase.path,
-             str(local_path.view.fd_start), str(offsets.range_start))
+            (str(local_path.absolute_path), ase._client.primary_endpoint,
+             ase.path, str(local_path.view.fd_start), str(offsets.range_start))
+        )
+
+    def _create_deletion_id(self, client, container, name):
+        return ';'.join(
+            (client.primary_endpoint, container, name)
         )
 
     def append_slice_suffix_to_name(self, name, slice):
@@ -687,41 +799,68 @@ def _vectorize_and_bind(self, local_path, dest):
         """
         if (self._spec.options.vectored_io.distribution_mode ==
                 blobxfer.models.upload.VectoredIoDistributionMode.Stripe):
-            num_dest = len(dest)
             # compute total number of slices
             slices = int(math.ceil(
-                local_path.size /
+                local_path.total_size /
                 self._spec.options.vectored_io.stripe_chunk_size_bytes))
+            # check if vectorization is possible
+            if slices == 1:
+                sa, ase = dest[0]
+                action = self._check_upload_conditions(local_path, ase)
+                yield action, local_path, ase
+                return
+            num_dest = len(dest)
             logger.debug(
                 '{} slices for vectored out of {} to {} destinations'.format(
                     slices, local_path.absolute_path, num_dest))
+            # pre-populate slice map for next pointers
+            slice_map = {}
+            for i in range(0, slices):
+                sa, ase = dest[i % num_dest]
+                name = self.append_slice_suffix_to_name(ase.name, i)
+                sase = self._check_for_existing_remote(sa, ase.container, name)
+                if sase is None:
+                    if self._spec.options.rsa_public_key:
+                        ed = blobxfer.models.crypto.EncryptionMetadata()
+                    else:
+                        ed = None
+                    sase = blobxfer.models.azure.StorageEntity(
+                        ase.container, ed)
+                    sase.populate_from_local(
+                        sa, ase.container, name, self._spec.options.mode)
+                slice_map[i] = sase
             # create new local path to ase mappings
             curr = 0
-            slice = 0
             for i in range(0, slices):
                 start = curr
                 end = (
                     curr +
                     self._spec.options.vectored_io.stripe_chunk_size_bytes
                 )
-                if end > local_path.size:
-                    end = local_path.size
-                sa, ase = dest[i % num_dest]
-                name = self.append_slice_suffix_to_name(ase.name, slice)
-                ase = self._check_for_existing_remote(sa, ase.container, name)
+                if end > local_path.total_size:
+                    end = local_path.total_size
+                ase = slice_map[i]
+                if i < slices - 1:
+                    next_entry = blobxfer.models.metadata.\
+                        create_vectored_io_next_entry(slice_map[i+1])
+                else:
+                    next_entry = None
                 lp_slice = blobxfer.models.upload.LocalPath(
                     parent_path=local_path.parent_path,
                     relative_path=local_path.relative_path,
                     view=blobxfer.models.upload.LocalPathView(
                         fd_start=start,
                         fd_end=end,
-                        slice_num=slice,
+                        slice_num=i,
+                        mode=self._spec.options.vectored_io.distribution_mode,
+                        total_slices=slices,
+                        next=next_entry,
                     )
                 )
+                print(lp_slice.view)
                 action = self._check_upload_conditions(lp_slice, ase)
                 yield action, lp_slice, ase
-                start += curr
-                slice += 1
+                curr = end
         elif (self._spec.options.vectored_io.distribution_mode ==
                 blobxfer.models.upload.VectoredIoDistributionMode.Replica):
             action_map = {}
@@ -794,9 +933,14 @@ def _run(self):
             ]
             for action, lp, ase in self._vectorize_and_bind(src, dest):
                 print(lp.parent_path, lp.relative_path, lp.absolute_path, action, ase.container, ase.name)
-                print(lp.size, lp.mode, lp.uid, lp.gid)
+                print(lp.total_size, lp.size, lp.mode, lp.uid, lp.gid)
                 print(self._create_unique_id(lp, ase))
                 print('replicas', len(ase.replica_targets) if ase.replica_targets is not None else 'none')
+                if self._spec.options.delete_extraneous_destination:
+                    self._delete_exclude.add(
+                        self._create_deletion_id(
+                            ase._client, ase.container, ase.name)
+                    )
                 if action == UploadAction.Skip:
                     skipped_files += 1
                     skipped_size += ase.size if ase.size is not None else 0
@@ -818,16 +962,17 @@ def _run(self):
         upload_size_mib = self._upload_bytes_total / blobxfer.util.MEGABYTE
         # set remote files processed
         with self._md5_meta_lock:
-            self._all_remote_files_processed = True
+            self._all_local_files_processed = True
         logger.debug(
-            ('{0} remote files processed, waiting for upload completion '
+            ('{0} local files processed, waiting for upload completion '
              'of {1:.4f} MiB').format(nfiles, upload_size_mib))
         del nfiles
         del total_size
         del skipped_files
         del skipped_size
-        # wait for downloads to complete
+        # wait for uploads to complete
         self._wait_for_upload_threads(terminate=False)
+        self._wait_for_transfer_threads(terminate=False)
         end_time = blobxfer.util.datetime_now()
         # update progress bar
         self._update_progress_bar()
@@ -876,6 +1021,7 @@ def start(self):
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
             try:
+                self._wait_for_transfer_threads(terminate=True)
                 self._wait_for_upload_threads(terminate=True)
             finally:
                 self._cleanup_temporary_files()
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 9b4e644..cce84f0 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -40,6 +40,7 @@
     from os import scandir as scandir
 except ImportError:  # noqa
     from scandir import scandir as scandir
+import platform
 import re
 import sys
 # non-stdlib imports
@@ -50,6 +51,7 @@
 
 # global defines
 MEGABYTE = 1048576
+_ON_WINDOWS = platform.system() == 'Windows'
 _REGISTERED_LOGGER_HANDLERS = []
 _PAGEBLOB_BOUNDARY = 512
 
@@ -63,6 +65,15 @@ def on_python2():
     return future.utils.PY2
 
 
+def on_windows():
+    # type: (None) -> bool
+    """Execution on Windows
+    :rtype: bool
+    :return: if on Windows
+    """
+    return _ON_WINDOWS
+
+
 def setup_logger(logger, logfile):  # noqa
     # type: (logger, str) -> None
     """Set up logger"""
diff --git a/cli/cli.py b/cli/cli.py
index bc11ef6..c3b9b2d 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -317,7 +317,8 @@ def callback(ctx, param, value):
         expose_value=False,
         type=int,
         default=4194304,
-        help='Block or chunk size in bytes [4194304]',
+        help='Block or chunk size in bytes; set to 0 for auto-select '
+        'on upload [0]',
         callback=callback)(f)
 
 
@@ -381,10 +382,9 @@ def callback(ctx, param, value):
         clictx.cli_options['file_attributes'] = value
         return value
     return click.option(
-        '--file-attributes',
+        '--file-attributes/--no-file-attributes',
         expose_value=False,
         default=False,
-        is_flag=False,
         help='Store or restore file attributes [False]',
         callback=callback)(f)
 

From 69edbc9fde68a44d7c1513cd1b38799217779b62 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 25 May 2017 09:01:57 -0700
Subject: [PATCH 30/47] Client-side encryption upload support

- Split transfer threads into disk/transfer
- Various fixes
- Update dependencies to latest
---
 blobxfer/models/crypto.py                  |  74 ++++-
 blobxfer/models/options.py                 |  24 +-
 blobxfer/models/upload.py                  |  48 ++-
 blobxfer/operations/azure/blob/__init__.py |  19 +-
 blobxfer/operations/azure/blob/page.py     |   2 +-
 blobxfer/operations/azure/file.py          |  18 +-
 blobxfer/operations/crypto.py              |   8 +-
 blobxfer/operations/progress.py            |  28 +-
 blobxfer/operations/upload.py              | 347 +++++++++++----------
 cli/cli.py                                 |  17 +-
 cli/settings.py                            |   2 +
 setup.py                                   |   8 +-
 12 files changed, 388 insertions(+), 207 deletions(-)

diff --git a/blobxfer/models/crypto.py b/blobxfer/models/crypto.py
index 6a50c9c..c6670f2 100644
--- a/blobxfer/models/crypto.py
+++ b/blobxfer/models/crypto.py
@@ -106,6 +106,7 @@ class EncryptionMetadata(object):
     _JSON_KEY_ENCRYPTED_AUTHKEY = 'EncryptedAuthenticationKey'
     _JSON_KEY_CONTENT_IV = 'ContentEncryptionIV'
     _JSON_KEY_KEYID = 'KeyId'
+    _JSON_KEY_KEY_WRAPPING_METADATA = 'KeyWrappingMetadata'
     _JSON_KEY_BLOBXFER_EXTENSIONS = 'BlobxferExtensions'
     _JSON_KEY_PREENCRYPTED_MD5 = 'PreEncryptedContentMD5'
 
@@ -319,9 +320,76 @@ def convert_from_json(self, md, entityname, rsaprivatekey):
                     '{}: encryption metadata authentication failed'.format(
                         entityname))
 
-    def convert_to_json_with_mac(self):
-        # TODO
-        pass
+    def convert_to_json_with_mac(self, md5digest, hmacdigest):
+        # type: (EncryptionMetadata, str, str) -> dict
+        """Constructs metadata for encryption
+        :param EncryptionMetadata self: this
+        :param str md5digest: md5 digest
+        :param str hmacdigest: hmac-sha256 digest (data)
+        :rtype: dict
+        :return: encryption metadata
+        """
+        enc_content_key = blobxfer.operations.crypto.\
+            rsa_encrypt_key_base64_encoded(
+                None, self._rsa_public_key, self.symmetric_key)
+        enc_sign_key = blobxfer.operations.crypto.\
+            rsa_encrypt_key_base64_encoded(
+                None, self._rsa_public_key, self.signing_key)
+
+        encjson = {
+            EncryptionMetadata._JSON_KEY_ENCRYPTION_MODE:
+            EncryptionMetadata._ENCRYPTION_MODE,
+            EncryptionMetadata._JSON_KEY_CONTENT_IV:
+            blobxfer.util.base64_encode_as_string(self.content_encryption_iv),
+            EncryptionMetadata._JSON_KEY_WRAPPEDCONTENTKEY: {
+                EncryptionMetadata._JSON_KEY_KEYID: 'private:pem',
+                EncryptionMetadata._JSON_KEY_ENCRYPTED_KEY: enc_content_key,
+                EncryptionMetadata._JSON_KEY_ENCRYPTED_AUTHKEY: enc_sign_key,
+                EncryptionMetadata._JSON_KEY_ALGORITHM:
+                EncryptionMetadata._ENCRYPTED_KEY_SCHEME,
+            },
+            EncryptionMetadata._JSON_KEY_ENCRYPTION_AGENT: {
+                EncryptionMetadata._JSON_KEY_PROTOCOL:
+                EncryptionMetadata._ENCRYPTION_PROTOCOL_VERSION,
+                EncryptionMetadata._JSON_KEY_ENCRYPTION_ALGORITHM:
+                EncryptionMetadata._ENCRYPTION_ALGORITHM,
+            },
+            EncryptionMetadata._JSON_KEY_INTEGRITY_AUTH: {
+                EncryptionMetadata._JSON_KEY_ALGORITHM:
+                EncryptionMetadata._AUTH_ALGORITHM,
+            },
+            EncryptionMetadata._JSON_KEY_KEY_WRAPPING_METADATA: {},
+        }
+        if md5digest is not None:
+            encjson[EncryptionMetadata._JSON_KEY_BLOBXFER_EXTENSIONS] = {
+                EncryptionMetadata._JSON_KEY_PREENCRYPTED_MD5: md5digest
+            }
+        if hmacdigest is not None:
+            encjson[EncryptionMetadata._JSON_KEY_INTEGRITY_AUTH][
+                EncryptionMetadata._JSON_KEY_MAC] = hmacdigest
+        bencjson = json.dumps(
+            encjson, sort_keys=True, ensure_ascii=False).encode(
+                EncryptionMetadata._AUTH_ENCODING_TYPE)
+        encjson = {
+            EncryptionMetadata._METADATA_KEY_NAME:
+            json.dumps(encjson, sort_keys=True)
+        }
+        # compute MAC over encjson
+        hmacsha256 = hmac.new(self._signkey, digestmod=hashlib.sha256)
+        hmacsha256.update(bencjson)
+        authjson = {
+            EncryptionMetadata._JSON_KEY_AUTH_METAAUTH: {
+                EncryptionMetadata._JSON_KEY_ALGORITHM:
+                EncryptionMetadata._AUTH_ALGORITHM,
+                EncryptionMetadata._JSON_KEY_AUTH_ENCODING:
+                EncryptionMetadata._AUTH_ENCODING_TYPE,
+                EncryptionMetadata._JSON_KEY_MAC:
+                blobxfer.util.base64_encode_as_string(hmacsha256.digest()),
+            }
+        }
+        encjson[EncryptionMetadata._METADATA_KEY_AUTH_NAME] = json.dumps(
+            authjson, sort_keys=True)
+        return encjson
 
     def initialize_hmac(self):
         # type: (EncryptionMetadata) -> hmac.HMAC
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index 14e4e09..cdc32df 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -103,15 +103,19 @@
 
 class Concurrency(object):
     """Concurrency Options"""
-    def __init__(self, crypto_processes, md5_processes, transfer_threads):
+    def __init__(
+            self, crypto_processes, md5_processes, disk_threads,
+            transfer_threads):
         """Ctor for Concurrency Options
         :param Concurrency self: this
         :param int crypto_processes: number of crypto procs
         :param int md5_processes: number of md5 procs
+        :param int disk_threads: number of disk threads
         :param int transfer_threads: number of transfer threads
         """
         self.crypto_processes = crypto_processes
         self.md5_processes = md5_processes
+        self.disk_threads = disk_threads
         self.transfer_threads = transfer_threads
         # allow crypto processes to be zero (which will inline crypto
         # routines with main process)
@@ -121,11 +125,21 @@ def __init__(self, crypto_processes, md5_processes, transfer_threads):
             self.md5_processes = multiprocessing.cpu_count() // 2
         if self.md5_processes < 1:
             self.md5_processes = 1
-        if self.transfer_threads is None or self.transfer_threads < 1:
-            self.transfer_threads = multiprocessing.cpu_count() * 4
-            # cap maximum number of threads from cpu count to 96
-            if self.transfer_threads > 96:
+        auto_disk = False
+        if self.disk_threads is None or self.disk_threads < 1:
+            self.disk_threads = multiprocessing.cpu_count() * 4
+            # cap maximum number of disk threads from cpu count to 96
+            if self.disk_threads > 96:
                 self.transfer_threads = 96
+            auto_disk = True
+        if self.transfer_threads is None or self.transfer_threads < 1:
+            if auto_disk:
+                self.transfer_threads = self.disk_threads << 1
+            else:
+                self.transfer_threads = multiprocessing.cpu_count() * 2
+            # cap maximum number of threads from cpu count to 64
+            if self.transfer_threads > 64:
+                self.transfer_threads = 64
 
 
 class General(object):
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 07852cb..da3a1fe 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -387,6 +387,10 @@ def requires_set_file_properties_md5(self):
                 self.remote_is_file)
 
     def complete_offset_upload(self):
+        # type: (Descriptor) -> None
+        """Complete the upload for the offset
+        :param Descriptor self: this
+        """
         with self._meta_lock:
             self._outstanding_ops -= 1
         # TODO save resume state
@@ -407,7 +411,7 @@ def _initialize_encryption(self, options):
         :param blobxfer.models.options.Upload options: upload options
         """
         # TODO support append blobs?
-        if (options.rsa_public_key is not None and self._ase.size > 0 and
+        if (options.rsa_public_key is not None and self.local_path.size > 0 and
                 (self._ase.mode == blobxfer.models.azure.StorageModes.Block or
                  self._ase.mode == blobxfer.models.azure.StorageModes.File)):
             em = blobxfer.models.crypto.EncryptionMetadata()
@@ -426,7 +430,7 @@ def _compute_remote_size(self):
         if size > 0:
             if self._ase.is_encrypted:
                 # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
-                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                allocatesize = (size // self._AES_BLOCKSIZE + 1) * \
                     self._AES_BLOCKSIZE
             else:
                 allocatesize = size
@@ -541,8 +545,9 @@ def _initialize_integrity_checkers(self, options):
             if blobxfer.util.is_none_or_empty(
                     self._ase.encryption_metadata.symmetric_key):
                 raise RuntimeError(
-                    'symmetric key is invalid: provide RSA private key '
-                    'or metadata corrupt')
+                    ('symmetric key is invalid: provide RSA private key '
+                     'or metadata corrupt for {}').format(
+                         self.local_path.absolute_path))
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
         # both hmac and md5 can be enabled
         if options.store_file_properties.md5:
@@ -583,10 +588,19 @@ def next_offsets(self):
             ), resume_bytes
 
     def read_data(self, offsets):
+        # type: (Descriptor, Offsets) -> bytes
+        """Read data from file
+        :param Descriptor self: this
+        :param Offsets offsets: offsets
+        :rtype: bytes
+        :return: file data
+        """
         if offsets.num_bytes == 0:
             return None
         # compute start from view
         start = self.local_path.view.fd_start + offsets.range_start
+        # encrypted offsets will read past the end of the file due
+        # to padding, but will be accounted for after encryption+padding
         with self.local_path.absolute_path.open('rb') as fd:
             fd.seek(start, 0)
             data = fd.read(offsets.num_bytes)
@@ -596,11 +610,28 @@ def read_data(self, offsets):
         return data
 
     def generate_metadata(self):
+        # type: (Descriptor) -> dict
+        """Generate metadata for descriptor
+        :param Descriptor self: this
+        :rtype: dict or None
+        :return: kv metadata dict
+        """
         genmeta = {}
         encmeta = {}
         # generate encryption metadata
         if self._ase.is_encrypted:
-            raise NotImplementedError()
+            if self.must_compute_md5:
+                md5digest = blobxfer.util.base64_encode_as_string(
+                    self.md5.digest())
+            else:
+                md5digest = None
+            if self.hmac is not None:
+                hmacdigest = blobxfer.util.base64_encode_as_string(
+                    self.hmac.digest())
+            else:
+                hmacdigest = None
+            encmeta = self._ase.encryption_metadata.convert_to_json_with_mac(
+                md5digest, hmacdigest)
         # generate file attribute metadata
         if self._store_file_attr:
             merged = blobxfer.models.metadata.generate_fileattr_metadata(
@@ -613,12 +644,13 @@ def generate_metadata(self):
                 generate_vectored_io_stripe_metadata(self.local_path, genmeta)
             if merged is not None:
                 genmeta = merged
-        metadata = {}
+        if len(encmeta) > 0:
+            metadata = encmeta
+        else:
+            metadata = {}
         if len(genmeta) > 0:
             metadata[blobxfer.models.metadata.JSON_KEY_BLOBXFER_METADATA] = \
                 json.dumps(genmeta, ensure_ascii=False, sort_keys=True)
-        if len(encmeta) > 0:
-            raise NotImplementedError()
         if len(metadata) == 0:
             return None
         return metadata
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index 49b3678..63fd4a1 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -207,10 +207,15 @@ def create_container(ase, containers_created, timeout=None):
         return
     key = ase.client.account_name + ':blob=' + ase.container
     if key not in containers_created:
-        ase.client.create_container(
-            container_name=ase.container,
-            fail_on_exist=False,
-            timeout=timeout)
-        containers_created.add(key)
-        logger.info('created blob container {} on storage account {}'.format(
-            ase.container, ase.client.account_name))
+        try:
+            ase.client.create_container(
+                container_name=ase.container,
+                fail_on_exist=True,
+                timeout=timeout)
+        except azure.common.AzureConflictHttpError:
+            pass
+        else:
+            containers_created.add(key)
+            logger.info(
+                'created blob container {} on storage account {}'.format(
+                    ase.container, ase.client.account_name))
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 859b4bb..4223a30 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -73,7 +73,7 @@ def create_blob(ase, timeout=None):
     ase.client.create_blob(
         container_name=ase.container,
         blob_name=ase.name,
-        content_length=ase.size,
+        content_length=blobxfer.util.page_align_content_length(ase.size),
         content_settings=azure.storage.blob.models.ContentSettings(
             content_type=blobxfer.util.get_mime_type(ase.name)
         ),
diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py
index 32f1b13..3a905bf 100644
--- a/blobxfer/operations/azure/file.py
+++ b/blobxfer/operations/azure/file.py
@@ -251,13 +251,17 @@ def create_share(ase, containers_created, timeout=None):
         return
     key = ase.client.account_name + ':file=' + ase.container
     if key not in containers_created:
-        ase.client.create_share(
-            share_name=ase.container,
-            fail_on_exist=False,
-            timeout=timeout)
-        containers_created.add(key)
-        logger.info('created file share {} on storage account {}'.format(
-            ase.container, ase.client.account_name))
+        try:
+            ase.client.create_share(
+                share_name=ase.container,
+                fail_on_exist=True,
+                timeout=timeout)
+        except azure.common.AzureConflictHttpError:
+            pass
+        else:
+            containers_created.add(key)
+            logger.info('created file share {} on storage account {}'.format(
+                ase.container, ase.client.account_name))
 
 
 def create_all_parent_directories(ase, dirs_created, timeout=None):
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 3097b96..6f1e011 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -242,10 +242,11 @@ def _worker_process(self):
         """
         while not self.terminated:
             try:
-                inst = self._task_queue.get(True, 0.25)
+                inst = self._task_queue.get(True, 0.1)
             except queue.Empty:
                 continue
-            if inst[0] == CryptoAction.Encrypt:
+            # UNUSED due to AES256-CBC FullBlob mode
+            if inst[0] == CryptoAction.Encrypt:  # noqa
                 local_file, offsets, symkey, iv = \
                     inst[1], inst[2], inst[3], inst[4]
                 with open(local_file, 'rb') as fd:
@@ -295,7 +296,8 @@ def add_decrypt_chunk(
              iv, hmac_datafile)
         )
 
-    def add_encrypt_chunk(self, local_file, offsets, symkey, iv):
+    # UNUSED due to AES256-CBC FullBlob mode
+    def add_encrypt_chunk(self, local_file, offsets, symkey, iv):  # noqa
         # type: (CryptoOffload, pathlib.Path, blobxfer.models.upload.Offsets,
         #        bytes, bytes) -> None
         """Add a chunk to encrypt
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index b0f3bf4..b2b6c26 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -110,19 +110,23 @@ def output_parameters(general_options, spec):
     # specific preamble
     if isinstance(spec, blobxfer.models.download.Specification):
         log.append('   transfer direction: {}'.format('Azure -> local'))
-        log.append('              workers: xfer={} md5={} crypto={}'.format(
-            general_options.concurrency.transfer_threads,
-            general_options.concurrency.md5_processes
-            if spec.options.check_file_md5 else 0,
-            general_options.concurrency.crypto_processes))
+        log.append(
+            '              workers: disk={} xfer={} md5={} crypto={}'.format(
+                general_options.concurrency.disk_threads,
+                general_options.concurrency.transfer_threads,
+                general_options.concurrency.md5_processes
+                if spec.options.check_file_md5 else 0,
+                general_options.concurrency.crypto_processes))
     elif isinstance(spec, blobxfer.models.upload.Specification):
         log.append('   transfer direction: {}'.format('local -> Azure'))
-        log.append('              workers: xfer={} md5={} crypto={}'.format(
-            general_options.concurrency.transfer_threads,
-            general_options.concurrency.md5_processes
-            if spec.skip_on.md5_match or spec.options.store_file_properties.md5
-            else 0,
-            general_options.concurrency.crypto_processes))
+        log.append(
+            '              workers: disk={} xfer={} md5={} crypto={}'.format(
+                general_options.concurrency.disk_threads,
+                general_options.concurrency.transfer_threads,
+                general_options.concurrency.md5_processes
+                if spec.skip_on.md5_match or
+                spec.options.store_file_properties.md5 else 0,
+                0))
 
     # TODO handle synccopy spec
 
@@ -161,6 +165,8 @@ def output_parameters(general_options, spec):
         log.append('    local destination: {}'.format(
             spec.destination.path))
     elif isinstance(spec, blobxfer.models.upload.Specification):
+        log.append('       one shot bytes: {}'.format(
+            spec.options.one_shot_bytes))
         log.append('     store properties: attr={} md5={}'.format(
             spec.options.store_file_properties.attributes,
             spec.options.store_file_properties.md5))
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 3b38ea5..7f9230a 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -42,6 +42,7 @@
 except ImportError:  # noqa
     import Queue as queue
 import threading
+import time
 # non-stdlib imports
 # local imports
 import blobxfer.models.crypto
@@ -84,7 +85,7 @@ def __init__(self, general_options, creds, spec):
         self._upload_queue = queue.Queue()
         self._upload_set = set()
         self._upload_start_time = None
-        self._upload_threads = []
+        self._disk_threads = []
         self._upload_total = None
         self._upload_sofar = 0
         self._upload_bytes_total = None
@@ -137,6 +138,59 @@ def termination_check_md5(self):
                          len(self._md5_map) == 0 and
                          len(self._upload_set) == 0))
 
+    @staticmethod
+    def create_unique_id(src, ase):
+        # type: (blobxfer.models.upload.LocalPath,
+        #        blobxfer.models.azure.StorageEntity) -> str
+        """Create a unique id given a LocalPath and StorageEntity
+        :param blobxfer.models.upload.LocalPath src: local path
+        :param blobxfer.models.azure.StorageEntity ase: azure storage entity
+        :rtype: str
+        :return: unique id for pair
+        """
+        return ';'.join(
+            (str(src.absolute_path), ase._client.primary_endpoint, ase.path)
+        )
+
+    @staticmethod
+    def create_unique_transfer_id(local_path, ase, offsets):
+        # type: (blobxfer.models.upload.LocalPath,
+        #        blobxfer.models.azure.StorageEntity) -> str
+        """Create a unique transfer id given a offsets
+        :param blobxfer.models.upload.LocalPath local_path: local path
+        :param blobxfer.models.azure.StorageEntity ase: azure storage entity
+        :param blobxfer.models.upload.Offsets offsets: upload offsets
+        :rtype: str
+        :return: unique id for transfer
+        """
+        return ';'.join(
+            (str(local_path.absolute_path), ase._client.primary_endpoint,
+             ase.path, str(local_path.view.fd_start), str(offsets.range_start))
+        )
+
+    @staticmethod
+    def create_deletion_id(client, container, name):
+        # type: (azure.storage.StorageClient, str, str) -> str
+        """Create a unique deletion id
+        :param azure.storage.StorageClient client: storage client
+        :param str container: container name
+        :param str name: entity name
+        :rtype: str
+        :return: unique id for deletion
+        """
+        return ';'.join((client.primary_endpoint, container, name))
+
+    @staticmethod
+    def append_slice_suffix_to_name(name, slice):
+        # type: (str, int) -> str
+        """Append a vectored io (stripe) slice suffix to a given name
+        :param str name: entity name
+        :param int slice: slice num
+        :rtype: str
+        :return: name with appended suffix
+        """
+        return '{}.bxslice-{}'.format(name, slice)
+
     def _update_progress_bar(self):
         # type: (Uploader) -> None
         """Update progress bar
@@ -180,9 +234,9 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         :param str filename: local filename
         :param bool md5_match: if MD5 matches
         """
-        uid = self._create_unique_id(src, rfile)
         with self._md5_meta_lock:
             src, rfile = self._md5_map.pop(filename)
+        uid = blobxfer.operations.upload.Uploader.create_unique_id(src, rfile)
         if md5_match:
             with self._upload_lock:
                 self._upload_set.remove(uid)
@@ -214,36 +268,6 @@ def _check_for_uploads_from_md5(self):
             if result is not None:
                 self._post_md5_skip_on_check(result[0], result[1])
 
-    def _check_for_crypto_done(self):
-        # type: (Uploader) -> None
-        """Check queue for crypto done
-        :param Uploader self: this
-        """
-        cv = self._crypto_offload.done_cv
-        while not self.termination_check:
-            result = None
-            cv.acquire()
-            while True:
-                result = self._crypto_offload.pop_done_queue()
-                if result is None:
-                    # use cv timeout due to possible non-wake while running
-                    cv.wait(1)
-                    # check for terminating conditions
-                    if self.termination_check:
-                        break
-                else:
-                    break
-            cv.release()
-            if result is not None:
-                try:
-                    with self._upload_lock:
-                        dd = self._ud_map[result]
-                    dd.perform_chunked_integrity_check()
-                except KeyError:
-                    # this can happen if all of the last integrity
-                    # chunks are processed at once
-                    pass
-
     def _add_to_upload_queue(self, src, rfile, uid):
         # type: (Uploader, blobxfer.models.upload.LocalPath,
         #        blobxfer.models.azure.StorageEntity, str) -> None
@@ -266,16 +290,16 @@ def _add_to_upload_queue(self, src, rfile, uid):
                 if self._upload_start_time is None:
                     self._upload_start_time = blobxfer.util.datetime_now()
 
-    def _initialize_upload_threads(self):
+    def _initialize_disk_threads(self):
         # type: (Uploader) -> None
-        """Initialize upload threads
+        """Initialize disk threads
         :param Uploader self: this
         """
-        logger.debug('spawning {} transfer threads'.format(
+        logger.debug('spawning {} disk threads'.format(
             self._general_options.concurrency.transfer_threads))
         for _ in range(self._general_options.concurrency.transfer_threads):
             thr = threading.Thread(target=self._worker_thread_upload)
-            self._upload_threads.append(thr)
+            self._disk_threads.append(thr)
             thr.start()
 
     def _initialize_transfer_threads(self):
@@ -290,15 +314,15 @@ def _initialize_transfer_threads(self):
             self._transfer_threads.append(thr)
             thr.start()
 
-    def _wait_for_upload_threads(self, terminate):
+    def _wait_for_disk_threads(self, terminate):
         # type: (Uploader, bool) -> None
-        """Wait for upload threads
+        """Wait for disk threads
         :param Uploader self: this
         :param bool terminate: terminate threads
         """
         if terminate:
             self._upload_terminate = terminate
-        for thr in self._upload_threads:
+        for thr in self._disk_threads:
             thr.join()
 
     def _wait_for_transfer_threads(self, terminate):
@@ -320,7 +344,7 @@ def _worker_thread_transfer(self):
         while not self.termination_check:
             try:
                 ud, ase, offsets, data = self._transfer_queue.get(
-                    block=False, timeout=0.03)
+                    block=False, timeout=0.1)
             except queue.Empty:
                 continue
             try:
@@ -330,17 +354,40 @@ def _worker_thread_transfer(self):
                     self._exceptions.append(e)
 
     def _process_transfer(self, ud, ase, offsets, data):
+        # type: (Uploader, blobxfer.models.upload.Descriptor,
+        #        blobxfer.models.azure.StorageEntity,
+        #        blobxfer.models.upload.Offsets, bytes) -> None
+        """Process transfer instructions
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param blobxfer.models.azure.StorageEntity ase: Storage entity
+        :param blobxfer.models.upload.Offsets offsets: offsets
+        :param bytes data: data to upload
+        """
         # issue put range
         self._put_data(ud, ase, offsets, data)
         # accounting
         with self._transfer_lock:
             self._transfer_set.remove(
-                self._create_unique_transfer_id(ud.local_path, ase, offsets))
+                blobxfer.operations.upload.Uploader.create_unique_transfer_id(
+                    ud.local_path, ase, offsets))
             self._upload_bytes_sofar += offsets.num_bytes
+            if offsets.chunk_num == 0:
+                self._upload_bytes_total += ase.size
         ud.complete_offset_upload()
 
     def _put_data(self, ud, ase, offsets, data):
-        print('UL', offsets)
+        # type: (Uploader, blobxfer.models.upload.Descriptor,
+        #        blobxfer.models.azure.StorageEntity,
+        #        blobxfer.models.upload.Offsets, bytes) -> None
+        """Put data in Azure
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param blobxfer.models.azure.StorageEntity ase: Storage entity
+        :param blobxfer.models.upload.Offsets offsets: offsets
+        :param bytes data: data to upload
+        """
+        print('UL', offsets, ase.path, len(data) if data is not None else None)
         if ase.mode == blobxfer.models.azure.StorageModes.Append:
             raise NotImplementedError()
         elif ase.mode == blobxfer.models.azure.StorageModes.Block:
@@ -352,7 +399,7 @@ def _put_data(self, ud, ase, offsets, data):
                 # handle one-shot uploads
                 if ud.is_one_shot_block_blob:
                     metadata = ud.generate_metadata()
-                    if ud.must_compute_md5:
+                    if not ud.entity.is_encrypted and ud.must_compute_md5:
                         digest = blobxfer.util.base64_encode_as_string(
                             ud.md5.digest())
                     else:
@@ -385,6 +432,7 @@ def _put_data(self, ud, ase, offsets, data):
                     ase, offsets, data,
                     timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.Page:
+            # compute aligned size
             if offsets.chunk_num == 0:
                 # create container if necessary
                 blobxfer.operations.azure.blob.create_container(
@@ -412,15 +460,15 @@ def _worker_thread_upload(self):
         """Worker thread upload
         :param Uploader self: this
         """
-        import time
         while not self.termination_check:
             try:
-                if (len(self._transfer_set) >=
-                        self._general_options.concurrency.transfer_threads * 2):
-                    time.sleep(0.5)
+                if (len(self._transfer_set) >
+                        self._general_options.concurrency.
+                        transfer_threads * 4):
+                    time.sleep(0.2)
                     continue
                 else:
-                    ud = self._upload_queue.get(False, 0.03)
+                    ud = self._upload_queue.get(False, 0.1)
             except queue.Empty:
                 continue
             try:
@@ -461,23 +509,13 @@ def _process_upload_descriptor(self, ud):
         if offsets is None:
             self._upload_queue.put(ud)
             return
-
-        # TODO encryption
-
         # encrypt if necessary
-        if ud.entity.is_encrypted:
-            # send iv through hmac
-            ud.hmac_data(ud.current_iv)
+        if ud.entity.is_encrypted and ud.entity.size > 0:
+            # send iv through hmac if first chunk
+            if offsets.chunk_num == 0:
+                ud.hmac_data(ud.current_iv)
             # encrypt data
-            if self._crypto_offload is not None:
-                self._crypto_offload.add_encrypt_chunk(
-                    str(ud.local_path.absolute_path), offsets,
-                    ud.entity.encryption_metadata.symmetric_key,
-                    ud.current_iv)
-                # encrypted data will be retrieved from a temp file once
-                # retrieved from crypto queue
-                return
-            else:
+            if self._crypto_offload is None:
                 # read data from file and encrypt
                 data = ud.read_data(offsets)
                 encdata = blobxfer.operations.crypto.aes_cbc_encrypt_data(
@@ -486,7 +524,19 @@ def _process_upload_descriptor(self, ud):
                 # send encrypted data through hmac
                 ud.hmac_data(encdata)
                 data = encdata
-                # TODO save last 16 encrypted bytes for next IV
+                # save last 16 encrypted bytes for next IV
+                ud.current_iv = \
+                    encdata[-blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES:]
+            else:  # noqa
+                # crypto offload is not supported with AES256-CBC FullBlob
+                raise NotImplementedError()
+                # self._crypto_offload.add_encrypt_chunk(
+                #     str(ud.local_path.absolute_path), offsets,
+                #     ud.entity.encryption_metadata.symmetric_key,
+                #     ud.current_iv)
+                # encrypted data will be retrieved from a temp file once
+                # retrieved from crypto queue
+                # return_early = True
         else:
             data = ud.read_data(offsets)
         # re-enqueue for other threads to upload
@@ -494,7 +544,7 @@ def _process_upload_descriptor(self, ud):
         # add data to transfer queue
         with self._transfer_lock:
             self._transfer_set.add(
-                self._create_unique_transfer_id(
+                blobxfer.operations.upload.Uploader.create_unique_transfer_id(
                     ud.local_path, ud.entity, offsets))
         self._transfer_queue.put((ud, ud.entity, offsets, data))
         # iterate replicas
@@ -502,15 +552,21 @@ def _process_upload_descriptor(self, ud):
             for ase in ud.entity.replica_targets:
                 with self._transfer_lock:
                     self._transfer_set.add(
-                        self._create_unique_transfer_id(
-                            ud.local_path, ase, offsets))
+                        blobxfer.operations.upload.Uploader.
+                        create_unique_transfer_id(ud.local_path, ase, offsets)
+                    )
                 self._transfer_queue.put((ud, ase, offsets, data))
 
     def _finalize_file(self, ud):
+        # type: (Uploader, blobxfer.models.upload.Descriptor) -> None
+        """Finalize file upload
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor: upload descriptor
+        """
         metadata = ud.generate_metadata()
         # put block list for non one-shot block blobs
         if ud.requires_put_block_list:
-            if ud.must_compute_md5:
+            if not ud.entity.is_encrypted and ud.must_compute_md5:
                 digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
             else:
                 digest = None
@@ -569,20 +625,6 @@ def _finalize_file(self, ud):
                             ase, metadata,
                             timeout=self._general_options.timeout_sec)
 
-    def _cleanup_temporary_files(self):
-        # type: (Uploader) -> None
-        """Cleanup temporary files in case of an exception or interrupt.
-        This function is not thread-safe.
-        :param Uploader self: this
-        """
-        # iterate through dd map and cleanup files
-        for key in self._ud_map:
-            dd = self._ud_map[key]
-            try:
-                dd.cleanup_all_temporary_files()
-            except Exception as e:
-                logger.exception(e)
-
     def _get_destination_paths(self):
         # type: (Uploader) ->
         #        Tuple[blobxfer.operations.azure.StorageAccount, str, str]
@@ -609,7 +651,6 @@ def _delete_extraneous_files(self):
         # list blobs for all destinations
         checked = set()
         deleted = 0
-        print(self._delete_exclude)
         for sa, container, _, _ in self._get_destination_paths():
             key = ';'.join((sa.name, sa.endpoint, container))
             if key in checked:
@@ -623,9 +664,8 @@ def _delete_extraneous_files(self):
                     sa.file_client, container,
                     timeout=self._general_options.timeout_sec)
                 for file in files:
-                    id = self._create_deletion_id(
-                        sa.file_client, container, file)
-                    print(id)
+                    id = blobxfer.operations.upload.Uploader.\
+                        create_deletion_id(sa.file_client, container, file)
                     if id not in self._delete_exclude:
                         blobxfer.operations.azure.file.delete_file(
                             sa.file_client, container, file,
@@ -636,8 +676,9 @@ def _delete_extraneous_files(self):
                     sa.block_blob_client, container,
                     timeout=self._general_options.timeout_sec)
                 for blob in blobs:
-                    id = self._create_deletion_id(
-                        sa.block_blob_client, container, blob.name)
+                    id = blobxfer.operations.upload.Uploader.\
+                        create_deletion_id(
+                            sa.block_blob_client, container, blob.name)
                     if id not in self._delete_exclude:
                         blobxfer.operations.azure.blob.delete_blob(
                             sa.block_blob_client, container, blob.name,
@@ -702,6 +743,14 @@ def _check_upload_conditions(self, local_path, rfile):
             return UploadAction.Skip
 
     def _check_for_existing_remote(self, sa, cont, name):
+        # type: (Uploader, blobxfer.operations.azure.StorageAccount,
+        #        str, str) -> bobxfer.models.azure.StorageEntity
+        """Check for an existing remote file
+        :param Uploader self: this
+        :param blobxfer.operations.azure.StorageAccount sa: storage account
+        :param str cont: container
+        :param str name: entity name
+        """
         if self._spec.options.mode == blobxfer.models.azure.StorageModes.File:
             fp = blobxfer.operations.azure.file.get_file_properties(
                 sa.file_client, cont, name,
@@ -728,10 +777,14 @@ def _check_for_existing_remote(self, sa, cont, name):
         return ase
 
     def _generate_destination_for_source(self, local_path):
-        # type: (Uploader, blobxfer.models.upload.LocalSourcePath) -> ???
+        # type: (Uploader, blobxfer.models.upload.LocalSourcePath) ->
+        #        Tuple[blobxfer.operations.azure.StorageAccount,
+        #        blobxfer.models.azure.StorageEntity)
         """Generate entities for source path
         :param Uploader self: this
         :param blobxfer.models.upload.LocalSourcePath local_path: local path
+        :rtype: tuple
+        :return: storage account, storage entity
         """
         # construct stripped destination path
         spath = local_path.relative_path
@@ -761,41 +814,24 @@ def _generate_destination_for_source(self, local_path):
             else:
                 ase = self._check_for_existing_remote(sa, cont, name)
             if ase is None:
-                if self._spec.options.rsa_public_key:
-                    ed = blobxfer.models.crypto.EncryptionMetadata()
-                else:
-                    ed = None
-                ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                # encryption metadata will be populated later, if required
+                ase = blobxfer.models.azure.StorageEntity(cont, ed=None)
                 ase.populate_from_local(
                     sa, cont, name, self._spec.options.mode)
             yield sa, ase
 
-    def _create_unique_id(self, src, ase):
-        return ';'.join(
-            (str(src.absolute_path), ase._client.primary_endpoint, ase.path)
-        )
-
-    def _create_unique_transfer_id(self, local_path, ase, offsets):
-        return ';'.join(
-            (str(local_path.absolute_path), ase._client.primary_endpoint,
-             ase.path, str(local_path.view.fd_start), str(offsets.range_start))
-        )
-
-    def _create_deletion_id(self, client, container, name):
-        return ';'.join(
-            (client.primary_endpoint, container, name)
-        )
-
-    def append_slice_suffix_to_name(self, name, slice):
-        return '{}.bxslice-{}'.format(name, slice)
-
     def _vectorize_and_bind(self, local_path, dest):
         # type: (Uploader, blobxfer.models.upload.LocalPath,
-        #        List[blobxfer.models.azure.StorageEntity]) -> None
-        """Vectorize local path to destinations and bind
+        #        List[blobxfer.models.azure.StorageEntity]) ->
+        #        Tuple[blobxfer.operations.upload.UploadAction,
+        #        blobxfer.models.upload.LocalPath,
+        #        blobxfer.models.azure.StorageEntity]
+        """Vectorize local path to destinations, if necessary, and bind
         :param Uploader self: this
         :param blobxfer.models.LocalPath local_path: local path
-        :param list rfile: remote file
+        :param list dest: list of destination tuples (sa, ase)
+        :rtype: tuple
+        :return: action, LocalPath, ase
         """
         if (self._spec.options.vectored_io.distribution_mode ==
                 blobxfer.models.upload.VectoredIoDistributionMode.Stripe):
@@ -817,15 +853,13 @@ def _vectorize_and_bind(self, local_path, dest):
             slice_map = {}
             for i in range(0, slices):
                 sa, ase = dest[i % num_dest]
-                name = self.append_slice_suffix_to_name(ase.name, i)
+                name = blobxfer.operations.upload.Uploader.\
+                    append_slice_suffix_to_name(ase.name, i)
                 sase = self._check_for_existing_remote(sa, ase.container, name)
                 if sase is None:
-                    if self._spec.options.rsa_public_key:
-                        ed = blobxfer.models.crypto.EncryptionMetadata()
-                    else:
-                        ed = None
+                    # encryption metadata will be populated later, if required
                     sase = blobxfer.models.azure.StorageEntity(
-                        ase.container, ed)
+                        ase.container, ed=None)
                     sase.populate_from_local(
                         sa, ase.container, name, self._spec.options.mode)
                 slice_map[i] = sase
@@ -857,7 +891,6 @@ def _vectorize_and_bind(self, local_path, dest):
                         next=next_entry,
                     )
                 )
-                print(lp_slice.view)
                 action = self._check_upload_conditions(lp_slice, ase)
                 yield action, lp_slice, ase
                 curr = end
@@ -909,18 +942,21 @@ def _run(self):
                 self._check_for_uploads_from_md5)
         # initialize crypto processes
         if self._general_options.concurrency.crypto_processes > 0:
-            self._crypto_offload = blobxfer.operations.crypto.CryptoOffload(
-                num_workers=self._general_options.concurrency.crypto_processes)
-            self._crypto_offload.initialize_check_thread(
-                self._check_for_crypto_done)
-        # initialize upload threads
-        self._initialize_upload_threads()
+            logger.warning(
+                'crypto offload for upload is not possible due to '
+                'sequential nature of {} and FullBlob encryption mode'.format(
+                    blobxfer.models.crypto.EncryptionMetadata.
+                    _ENCRYPTION_ALGORITHM)
+            )
+        # initialize worker threads
+        self._initialize_disk_threads()
         self._initialize_transfer_threads()
         # initialize local counters
-        nfiles = 0
-        total_size = 0
         skipped_files = 0
         skipped_size = 0
+        approx_total_bytes = 0
+        self._upload_total = 0
+        self._upload_bytes_total = 0
         if not self._spec.sources.can_rename() and self._spec.options.rename:
             raise RuntimeError(
                 'cannot rename to specified destination with multiple sources')
@@ -932,46 +968,43 @@ def _run(self):
                 self._generate_destination_for_source(src)
             ]
             for action, lp, ase in self._vectorize_and_bind(src, dest):
-                print(lp.parent_path, lp.relative_path, lp.absolute_path, action, ase.container, ase.name)
-                print(lp.total_size, lp.size, lp.mode, lp.uid, lp.gid)
-                print(self._create_unique_id(lp, ase))
-                print('replicas', len(ase.replica_targets) if ase.replica_targets is not None else 'none')
                 if self._spec.options.delete_extraneous_destination:
                     self._delete_exclude.add(
-                        self._create_deletion_id(
+                        blobxfer.operations.upload.Uploader.create_deletion_id(
                             ase._client, ase.container, ase.name)
                     )
                 if action == UploadAction.Skip:
                     skipped_files += 1
                     skipped_size += ase.size if ase.size is not None else 0
                     continue
+                approx_total_bytes += lp.size
                 # add to potential upload set
-                uid = self._create_unique_id(lp, ase)
+                uid = blobxfer.operations.upload.Uploader.create_unique_id(
+                    lp, ase)
                 with self._upload_lock:
                     self._upload_set.add(uid)
+                    self._upload_total += 1
                 if action == UploadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lp, ase)
                 elif action == UploadAction.Upload:
                     self._add_to_upload_queue(lp, ase, uid)
-
-                    nfiles += 1
-                    total_size += lp.size
-
-        self._upload_total = nfiles - skipped_files
-        self._upload_bytes_total = total_size - skipped_size
-        upload_size_mib = self._upload_bytes_total / blobxfer.util.MEGABYTE
         # set remote files processed
         with self._md5_meta_lock:
             self._all_local_files_processed = True
-        logger.debug(
-            ('{0} local files processed, waiting for upload completion '
-             'of {1:.4f} MiB').format(nfiles, upload_size_mib))
-        del nfiles
-        del total_size
+        with self._upload_lock:
+            self._upload_total -= skipped_files
+            self._upload_bytes_total -= skipped_size
+            upload_size_mib = approx_total_bytes / blobxfer.util.MEGABYTE
+            logger.debug(
+                ('{0} local/remote files processed, waiting for upload '
+                 'completion of approx. {1:.4f} MiB').format(
+                     self._upload_total, upload_size_mib))
         del skipped_files
         del skipped_size
+        del upload_size_mib
+        del approx_total_bytes
         # wait for uploads to complete
-        self._wait_for_upload_threads(terminate=False)
+        self._wait_for_disk_threads(terminate=False)
         self._wait_for_transfer_threads(terminate=False)
         end_time = blobxfer.util.datetime_now()
         # update progress bar
@@ -997,11 +1030,12 @@ def _run(self):
         # output throughput
         if self._upload_start_time is not None:
             ultime = (end_time - self._upload_start_time).total_seconds()
-            mibps = upload_size_mib / ultime
+            mibup = self._upload_bytes_total / blobxfer.util.MEGABYTE
+            mibps = mibup / ultime
             logger.info(
-                ('elapsed upload + verify time and throughput: {0:.3f} sec, '
-                 '{1:.4f} Mbps ({2:.3f} MiB/s)').format(
-                     ultime, mibps * 8, mibps))
+                ('elapsed upload + verify time and throughput of {0:.4f} '
+                 'GiB: {1:.3f} sec, {2:.4f} Mbps ({3:.3f} MiB/s)').format(
+                     mibup / 1024, ultime, mibps * 8, mibps))
         end_time = blobxfer.util.datetime_now()
         logger.info('blobxfer end time: {0} (elapsed: {1:.3f} sec)'.format(
             end_time, (end_time - self._start_time).total_seconds()))
@@ -1022,10 +1056,9 @@ def start(self):
                     'processes and threads (this may take a while)...')
             try:
                 self._wait_for_transfer_threads(terminate=True)
-                self._wait_for_upload_threads(terminate=True)
+                self._wait_for_disk_threads(terminate=True)
             finally:
-                self._cleanup_temporary_files()
-            raise
+                raise
         finally:
             # shutdown processes
             if self._md5_offload is not None:
diff --git a/cli/cli.py b/cli/cli.py
index c3b9b2d..8ce121b 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -120,7 +120,21 @@ def callback(ctx, param, value):
         expose_value=False,
         type=int,
         default=0,
-        help='Concurrent crypto processes',
+        help='Concurrent crypto processes (download only)',
+        callback=callback)(f)
+
+
+def _disk_threads_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['disk_threads'] = value
+        return value
+    return click.option(
+        '--disk-threads',
+        expose_value=False,
+        type=int,
+        default=0,
+        help='Concurrent disk threads',
         callback=callback)(f)
 
 
@@ -225,6 +239,7 @@ def common_options(f):
     f = _progress_bar_option(f)
     f = _md5_processes_option(f)
     f = _log_file_option(f)
+    f = _disk_threads_option(f)
     f = _crypto_processes_option(f)
     return f
 
diff --git a/cli/settings.py b/cli/settings.py
index 5834c12..d198359 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -225,6 +225,7 @@ def merge_settings(config, cli_options):
     config['options']['progress_bar'] = cli_options['progress_bar']
     config['options']['resume_file'] = cli_options['resume_file']
     config['options']['timeout_sec'] = cli_options['timeout']
+    config['options']['disk_threads'] = cli_options['disk_threads']
     config['options']['transfer_threads'] = cli_options['transfer_threads']
     config['options']['verbose'] = cli_options['verbose']
 
@@ -256,6 +257,7 @@ def create_general_options(config):
     return blobxfer.models.options.General(
         concurrency=blobxfer.models.options.Concurrency(
             crypto_processes=config['options']['crypto_processes'],
+            disk_threads=config['options']['disk_threads'],
             md5_processes=config['options']['md5_processes'],
             transfer_threads=config['options']['transfer_threads'],
         ),
diff --git a/setup.py b/setup.py
index 2725ade..a9031a7 100644
--- a/setup.py
+++ b/setup.py
@@ -39,14 +39,14 @@
 ]
 
 install_requires = [
-    'azure-common==1.1.5',
-    'azure-storage==0.34.0',
+    'azure-common==1.1.6',
+    'azure-storage==0.34.2',
     'click==6.7',
     'cryptography>=1.8.1',
     'future==0.16.0',
     'python-dateutil==2.6.0',
-    'requests==2.13.0',
-    'ruamel.yaml==0.14.8',
+    'requests==2.14.2',
+    'ruamel.yaml==0.14.12',
 ]
 
 if sys.version_info < (3, 4):

From a949367261151d74acb6c85238f393ed29ed80cc Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 26 May 2017 08:03:02 -0700
Subject: [PATCH 31/47] Split transfer/disk threads on download

- Fix some naming/path issues
- Fix race condition on container/entity init
- Upload progress bar
---
 blobxfer/models/azure.py              |  11 +-
 blobxfer/models/download.py           |  26 +++-
 blobxfer/operations/azure/__init__.py |   3 +
 blobxfer/operations/crypto.py         |   2 +-
 blobxfer/operations/download.py       | 193 ++++++++++++++++++++------
 blobxfer/operations/progress.py       |   2 +-
 blobxfer/operations/upload.py         | 120 +++++++++-------
 7 files changed, 252 insertions(+), 105 deletions(-)

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index d44af48..1e3325e 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -31,6 +31,10 @@
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
 import enum
+try:
+    import pathlib2 as pathlib
+except ImportError:  # noqa
+    import pathlib
 # non-stdlib imports
 from azure.storage.blob.models import _BlobTypes as BlobTypes
 # local imports
@@ -219,7 +223,7 @@ def populate_from_blob(self, sa, blob, path):
         :param str path: full path to blob
         """
         self._create_containers = sa.create_containers
-        self._name = path
+        self._name = str(pathlib.Path(path) / blob.name)
         self._snapshot = blob.snapshot
         self._lmt = blob.properties.last_modified
         self._size = blob.properties.content_length
@@ -244,7 +248,10 @@ def populate_from_file(self, sa, file, path):
         :param str path: full path to file
         """
         self._create_containers = sa.create_containers
-        self._name = path
+        if path is not None:
+            self._name = str(pathlib.Path(path) / file.name)
+        else:
+            self._name = file.name
         self._snapshot = None
         self._lmt = file.properties.last_modified
         self._size = file.properties.content_length
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index e1c9266..3c5f686 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -517,7 +517,10 @@ def write_unchecked_data(self, offsets, data):
             temp=False,
         )
         with self._meta_lock:
-            self._unchecked_chunks[offsets.chunk_num] = unchecked
+            self._unchecked_chunks[offsets.chunk_num] = {
+                'ucc': unchecked,
+                'decrypted': True,
+            }
 
     def write_unchecked_hmac_data(self, offsets, data):
         # type: (Descriptor, Offsets, bytes) -> None
@@ -537,9 +540,21 @@ def write_unchecked_hmac_data(self, offsets, data):
             temp=True,
         )
         with self._meta_lock:
-            self._unchecked_chunks[offsets.chunk_num] = unchecked
+            self._unchecked_chunks[offsets.chunk_num] = {
+                'ucc': unchecked,
+                'decrypted': False,
+            }
         return str(unchecked.file_path)
 
+    def mark_unchecked_chunk_decrypted(self, chunk_num):
+        # type: (Descriptor, int) -> None
+        """Mark an unchecked chunk as decrypted
+        :param Descriptor self: this
+        :param int chunk_num: unchecked chunk number
+        """
+        with self._meta_lock:
+            self._unchecked_chunks[chunk_num]['decrypted'] = True
+
     def perform_chunked_integrity_check(self):
         # type: (Descriptor) -> None
         """Hash data against stored hasher safely
@@ -552,8 +567,9 @@ def perform_chunked_integrity_check(self):
             with self._meta_lock:
                 chunk_num = self._next_integrity_chunk
                 # check if the next chunk is ready
-                if chunk_num in self._unchecked_chunks:
-                    ucc = self._unchecked_chunks.pop(chunk_num)
+                if (chunk_num in self._unchecked_chunks and
+                        self._unchecked_chunks[chunk_num]['decrypted']):
+                    ucc = self._unchecked_chunks.pop(chunk_num)['ucc']
                 else:
                     break
             # hash data and set next integrity chunk
@@ -653,7 +669,7 @@ def finalize_file(self):
             # delete temp download file
             self.local_path.unlink()
             return
-        logger.debug(msg)
+        logger.info(msg)
 
         # TODO set file uid/gid and mode
 
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index a237532..9b19423 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -292,6 +292,9 @@ def _populate_from_list_files(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = blobxfer.models.azure.StorageEntity(cont, ed)
+                if dir is not None:
+                    dir, _ = blobxfer.operations.azure.file.parse_file_path(
+                        dir)
                 ase.populate_from_file(sa, file, dir)
                 yield ase
 
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 6f1e011..19c56a4 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -273,7 +273,7 @@ def _worker_process(self):
                         fd.seek(offsets.fd_start, 0)
                         fd.write(data)
                 self._done_cv.acquire()
-                self._done_queue.put(final_path)
+                self._done_queue.put((final_path, offsets))
             # notify and release condition var
             self._done_cv.notify()
             self._done_cv.release()
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index fa379fc..47c237e 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -41,6 +41,7 @@
 except ImportError:  # noqa
     import Queue as queue
 import threading
+import time
 # non-stdlib imports
 # local imports
 import blobxfer.models.crypto
@@ -79,11 +80,15 @@ def __init__(self, general_options, creds, spec):
         self._md5_meta_lock = threading.Lock()
         self._md5_map = {}
         self._md5_offload = None
-        self._download_lock = threading.Lock()
-        self._download_queue = queue.Queue()
-        self._download_set = set()
+        self._transfer_lock = threading.Lock()
+        self._transfer_queue = queue.Queue()
+        self._transfer_set = set()
+        self._transfer_threads = []
+        self._disk_operation_lock = threading.Lock()
+        self._disk_queue = queue.Queue()
+        self._disk_set = set()
+        self._disk_threads = []
         self._download_start_time = None
-        self._download_threads = []
         self._download_total = None
         self._download_sofar = 0
         self._download_bytes_total = None
@@ -106,11 +111,13 @@ def termination_check(self):
         :rtype: bool
         :return: if terminated
         """
-        with self._download_lock:
-            return (self._download_terminate or
-                    len(self._exceptions) > 0 or
-                    (self._all_remote_files_processed and
-                     len(self._download_set) == 0))
+        with self._transfer_lock:
+            with self._disk_operation_lock:
+                return (self._download_terminate or
+                        len(self._exceptions) > 0 or
+                        (self._all_remote_files_processed and
+                         len(self._transfer_set) == 0 and
+                         len(self._disk_set) == 0))
 
     @property
     def termination_check_md5(self):
@@ -121,11 +128,11 @@ def termination_check_md5(self):
         :return: if terminated from MD5 context
         """
         with self._md5_meta_lock:
-            with self._download_lock:
+            with self._transfer_lock:
                 return (self._download_terminate or
                         (self._all_remote_files_processed and
                          len(self._md5_map) == 0 and
-                         len(self._download_set) == 0))
+                         len(self._transfer_set) == 0))
 
     @staticmethod
     def ensure_local_destination(creds, spec):
@@ -163,6 +170,20 @@ def ensure_local_destination(creds, spec):
         # ensure destination path
         spec.destination.ensure_path_exists()
 
+    @staticmethod
+    def create_unique_disk_operation_id(dd, offsets):
+        # type: (blobxfer.models.download.Descriptor,
+        #        blobxfer.models.download.Offsets) -> None
+        """Create a unique disk operation id
+        :param blobxfer.models.download.Descriptor dd: download descriptor
+        :param blobxfer.models.download.Offsets offsets: download offsets
+        """
+        # TODO add local view offset or slice num with stripe support
+        return ';'.join(
+            (str(dd.local_path), dd.entity._client.primary_endpoint,
+             dd.entity.path, str(offsets.range_start))
+        )
+
     def _update_progress_bar(self):
         # type: (Downloader) -> None
         """Update progress bar
@@ -260,8 +281,8 @@ def _post_md5_skip_on_check(self, filename, md5_match):
             rfile = self._md5_map.pop(filename)
         lpath = pathlib.Path(filename)
         if md5_match:
-            with self._download_lock:
-                self._download_set.remove(lpath)
+            with self._transfer_lock:
+                self._transfer_set.remove(lpath)
                 self._download_total -= 1
                 self._download_bytes_total -= lpath.stat().st_size
         else:
@@ -303,7 +324,7 @@ def _check_for_crypto_done(self):
                 result = self._crypto_offload.pop_done_queue()
                 if result is None:
                     # use cv timeout due to possible non-wake while running
-                    cv.wait(1)
+                    cv.wait(0.1)
                     # check for terminating conditions
                     if self.termination_check:
                         break
@@ -312,9 +333,10 @@ def _check_for_crypto_done(self):
             cv.release()
             if result is not None:
                 try:
-                    with self._download_lock:
-                        dd = self._dd_map[result]
-                    dd.perform_chunked_integrity_check()
+                    final_path, offsets = result
+                    with self._transfer_lock:
+                        dd = self._dd_map[final_path]
+                    self._finalize_chunk(dd, offsets)
                 except KeyError:
                     # this can happen if all of the last integrity
                     # chunks are processed at once
@@ -332,28 +354,51 @@ def _add_to_download_queue(self, lpath, rfile):
         dd = blobxfer.models.download.Descriptor(
             lpath, rfile, self._spec.options, self._resume)
         if dd.entity.is_encrypted:
-            with self._download_lock:
+            with self._transfer_lock:
                 self._dd_map[str(dd.final_path)] = dd
         # add download descriptor to queue
-        self._download_queue.put(dd)
+        self._transfer_queue.put(dd)
         if self._download_start_time is None:
-            with self._download_lock:
+            with self._transfer_lock:
                 if self._download_start_time is None:
                     self._download_start_time = blobxfer.util.datetime_now()
 
-    def _initialize_download_threads(self):
+    def _initialize_disk_threads(self):
         # type: (Downloader) -> None
         """Initialize download threads
         :param Downloader self: this
         """
+        logger.debug('spawning {} disk threads'.format(
+            self._general_options.concurrency.disk_threads))
+        for _ in range(self._general_options.concurrency.disk_threads):
+            thr = threading.Thread(target=self._worker_thread_disk)
+            self._disk_threads.append(thr)
+            thr.start()
+
+    def _initialize_transfer_threads(self):
+        # type: (Downloader) -> None
+        """Initialize transfer threads
+        :param Downloader self: this
+        """
         logger.debug('spawning {} transfer threads'.format(
             self._general_options.concurrency.transfer_threads))
         for _ in range(self._general_options.concurrency.transfer_threads):
-            thr = threading.Thread(target=self._worker_thread_download)
-            self._download_threads.append(thr)
+            thr = threading.Thread(target=self._worker_thread_transfer)
+            self._transfer_threads.append(thr)
             thr.start()
 
-    def _wait_for_download_threads(self, terminate):
+    def _wait_for_disk_threads(self, terminate):
+        # type: (Downloader, bool) -> None
+        """Wait for disk threads
+        :param Downloader self: this
+        :param bool terminate: terminate threads
+        """
+        if terminate:
+            self._download_terminate = terminate
+        for thr in self._disk_threads:
+            thr.join()
+
+    def _wait_for_transfer_threads(self, terminate):
         # type: (Downloader, bool) -> None
         """Wait for download threads
         :param Downloader self: this
@@ -361,30 +406,53 @@ def _wait_for_download_threads(self, terminate):
         """
         if terminate:
             self._download_terminate = terminate
-        for thr in self._download_threads:
+        for thr in self._transfer_threads:
             thr.join()
 
-    def _worker_thread_download(self):
+    def _worker_thread_transfer(self):
         # type: (Downloader) -> None
         """Worker thread download
         :param Downloader self: this
         """
         while not self.termination_check:
             try:
-                dd = self._download_queue.get(False, 0.25)
+                if (len(self._disk_set) >
+                        self._general_options.concurrency.
+                        disk_threads * 4):
+                    time.sleep(0.2)
+                    continue
+                else:
+                    dd = self._transfer_queue.get(block=False, timeout=0.1)
             except queue.Empty:
                 continue
             try:
                 self._process_download_descriptor(dd)
             except Exception as e:
-                with self._download_lock:
+                with self._transfer_lock:
+                    self._exceptions.append(e)
+
+    def _worker_thread_disk(self):
+        # type: (Downloader) -> None
+        """Worker thread for disk
+        :param Downloader self: this
+        """
+        while not self.termination_check:
+            try:
+                dd, offsets, data = self._disk_queue.get(
+                    block=False, timeout=0.1)
+            except queue.Empty:
+                continue
+            try:
+                self._process_data(dd, offsets, data)
+            except Exception as e:
+                with self._transfer_lock:
                     self._exceptions.append(e)
 
     def _process_download_descriptor(self, dd):
         # type: (Downloader, blobxfer.models.download.Descriptor) -> None
         """Process download descriptor
         :param Downloader self: this
-        :param blobxfer.models.download.Descriptor: download descriptor
+        :param blobxfer.models.download.Descriptor dd: download descriptor
         """
         # update progress bar
         self._update_progress_bar()
@@ -392,7 +460,7 @@ def _process_download_descriptor(self, dd):
         offsets, resume_bytes = dd.next_offsets()
         # add resume bytes to counter
         if resume_bytes is not None:
-            with self._download_lock:
+            with self._disk_operation_lock:
                 self._download_bytes_sofar += resume_bytes
                 logger.debug('adding {} sofar {} from {}'.format(
                     resume_bytes, self._download_bytes_sofar, dd._ase.name))
@@ -402,14 +470,14 @@ def _process_download_descriptor(self, dd):
             # finalize file
             dd.finalize_file()
             # accounting
-            with self._download_lock:
+            with self._transfer_lock:
                 if dd.entity.is_encrypted:
                     self._dd_map.pop(str(dd.final_path))
-                self._download_set.remove(dd.final_path)
+                self._transfer_set.remove(dd.final_path)
                 self._download_sofar += 1
             return
         # re-enqueue for other threads to download
-        self._download_queue.put(dd)
+        self._transfer_queue.put(dd)
         if offsets is None:
             return
         # issue get range
@@ -419,9 +487,22 @@ def _process_download_descriptor(self, dd):
         else:
             data = blobxfer.operations.azure.blob.get_blob_range(
                 dd.entity, offsets, self._general_options.timeout_sec)
-        # accounting
-        with self._download_lock:
-            self._download_bytes_sofar += offsets.num_bytes
+        # enqueue data for processing
+        with self._disk_operation_lock:
+            self._disk_set.add(
+                blobxfer.operations.download.Downloader.
+                create_unique_disk_operation_id(dd, offsets))
+        self._disk_queue.put((dd, offsets, data))
+
+    def _process_data(self, dd, offsets, data):
+        # type: (Downloader, blobxfer.models.download.Descriptor,
+        #        blobxfer.models.download.Offsets, bytes) -> None
+        """Process downloaded data for disk
+        :param Downloader self: this
+        :param blobxfer.models.download.Descriptor dd: download descriptor
+        :param blobxfer.models.download.Offsets offsets: offsets
+        :param bytes data: data to process
+        """
         # decrypt if necessary
         if dd.entity.is_encrypted:
             # slice data to proper bounds and get iv for chunk
@@ -457,9 +538,28 @@ def _process_download_descriptor(self, dd):
         else:
             # write data to disk
             dd.write_unchecked_data(offsets, data)
+        # finalize chunk
+        self._finalize_chunk(dd, offsets)
+
+    def _finalize_chunk(self, dd, offsets):
+        # type: (Downloader, blobxfer.models.download.Descriptor,
+        #        blobxfer.models.download.Offsets) -> None
+        """Finalize written chunk
+        :param Downloader self: this
+        :param blobxfer.models.download.Descriptor dd: download descriptor
+        :param blobxfer.models.download.Offsets offsets: offsets
+        """
+        if dd.entity.is_encrypted:
+            dd.mark_unchecked_chunk_decrypted(offsets.chunk_num)
         # integrity check data and write to disk (this is called
         # regardless of md5/hmac enablement for resume purposes)
         dd.perform_chunked_integrity_check()
+        # remove from disk set and add bytes to counter
+        with self._disk_operation_lock:
+            self._disk_set.remove(
+                blobxfer.operations.download.Downloader.
+                create_unique_disk_operation_id(dd, offsets))
+            self._download_bytes_sofar += offsets.num_bytes
 
     def _cleanup_temporary_files(self):
         # type: (Downloader) -> None
@@ -532,7 +632,8 @@ def _run(self):
             self._crypto_offload.initialize_check_thread(
                 self._check_for_crypto_done)
         # initialize download threads
-        self._initialize_download_threads()
+        self._initialize_transfer_threads()
+        self._initialize_disk_threads()
         # initialize local counters
         nfiles = 0
         total_size = 0
@@ -563,8 +664,8 @@ def _run(self):
                     skipped_size += rfile.size
                     continue
                 # add potential download to set
-                with self._download_lock:
-                    self._download_set.add(lpath)
+                with self._transfer_lock:
+                    self._transfer_set.add(lpath)
                 # either MD5 check or download now
                 if action == DownloadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lpath, rfile)
@@ -584,7 +685,8 @@ def _run(self):
         del skipped_files
         del skipped_size
         # wait for downloads to complete
-        self._wait_for_download_threads(terminate=False)
+        self._wait_for_transfer_threads(terminate=False)
+        self._wait_for_disk_threads(terminate=False)
         end_time = blobxfer.util.datetime_now()
         # update progress bar
         self._update_progress_bar()
@@ -609,10 +711,12 @@ def _run(self):
         # output throughput
         if self._download_start_time is not None:
             dltime = (end_time - self._download_start_time).total_seconds()
+            dlmibspeed = download_size_mib / dltime
             logger.info(
-                ('elapsed download + verify time and throughput: {0:.3f} sec, '
-                 '{1:.4f} Mbps').format(
-                     dltime, download_size_mib * 8 / dltime))
+                ('elapsed download + verify time and throughput of {0:.4f} '
+                 'GiB: {1:.3f} sec, {2:.4f} Mbps ({3:.3f} MiB/sec)').format(
+                     download_size_mib / 1024, dltime, dlmibspeed * 8,
+                     dlmibspeed))
         end_time = blobxfer.util.datetime_now()
         logger.info('blobxfer end time: {0} (elapsed: {1:.3f} sec)'.format(
             end_time, (end_time - self._start_time).total_seconds()))
@@ -632,7 +736,8 @@ def start(self):
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
             try:
-                self._wait_for_download_threads(terminate=True)
+                self._wait_for_transfer_threads(terminate=True)
+                self._wait_for_disk_threads(terminate=True)
             finally:
                 self._cleanup_temporary_files()
             raise
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index b2b6c26..a0689ed 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -66,7 +66,7 @@ def update_progress_bar(
     if diff <= 0:
         # arbitrarily give a small delta
         diff = 1e-9
-    if total_bytes is None:
+    if total_bytes is None or total_bytes == 0 or bytes_sofar > total_bytes:
         done = 0
     else:
         done = float(bytes_sofar) / total_bytes
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 7f9230a..d33aad6 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -76,7 +76,7 @@ def __init__(self, general_options, creds, spec):
         :param blobxfer.operations.azure.StorageCredentials creds: creds
         :param blobxfer.models.uplaod.Specification spec: upload spec
         """
-        self._all_local_files_processed = False
+        self._all_files_processed = False
         self._crypto_offload = None
         self._md5_meta_lock = threading.Lock()
         self._md5_map = {}
@@ -119,7 +119,7 @@ def termination_check(self):
             with self._transfer_lock:
                 return (self._upload_terminate or
                         len(self._exceptions) > 0 or
-                        (self._all_local_files_processed and
+                        (self._all_files_processed and
                          len(self._upload_set) == 0 and
                          len(self._transfer_set) == 0))
 
@@ -134,7 +134,7 @@ def termination_check_md5(self):
         with self._md5_meta_lock:
             with self._upload_lock:
                 return (self._upload_terminate or
-                        (self._all_local_files_processed and
+                        (self._all_files_processed and
                          len(self._md5_map) == 0 and
                          len(self._upload_set) == 0))
 
@@ -196,6 +196,8 @@ def _update_progress_bar(self):
         """Update progress bar
         :param Uploader self: this
         """
+        if not self._all_files_processed:
+            return
         blobxfer.operations.progress.update_progress_bar(
             self._general_options,
             'upload',
@@ -297,7 +299,7 @@ def _initialize_disk_threads(self):
         """
         logger.debug('spawning {} disk threads'.format(
             self._general_options.concurrency.transfer_threads))
-        for _ in range(self._general_options.concurrency.transfer_threads):
+        for _ in range(self._general_options.concurrency.disk_threads):
             thr = threading.Thread(target=self._worker_thread_upload)
             self._disk_threads.append(thr)
             thr.start()
@@ -368,13 +370,15 @@ def _process_transfer(self, ud, ase, offsets, data):
         self._put_data(ud, ase, offsets, data)
         # accounting
         with self._transfer_lock:
+            if offsets.chunk_num == 0:
+                self._upload_bytes_total += ase.size
+            self._upload_bytes_sofar += offsets.num_bytes
             self._transfer_set.remove(
                 blobxfer.operations.upload.Uploader.create_unique_transfer_id(
                     ud.local_path, ase, offsets))
-            self._upload_bytes_sofar += offsets.num_bytes
-            if offsets.chunk_num == 0:
-                self._upload_bytes_total += ase.size
         ud.complete_offset_upload()
+        # update progress bar
+        self._update_progress_bar()
 
     def _put_data(self, ud, ase, offsets, data):
         # type: (Uploader, blobxfer.models.upload.Descriptor,
@@ -391,61 +395,34 @@ def _put_data(self, ud, ase, offsets, data):
         if ase.mode == blobxfer.models.azure.StorageModes.Append:
             raise NotImplementedError()
         elif ase.mode == blobxfer.models.azure.StorageModes.Block:
-            if offsets.chunk_num == 0:
-                # create container if necessary
-                blobxfer.operations.azure.blob.create_container(
-                    ase, self._containers_created,
+            # handle one-shot uploads
+            if ud.is_one_shot_block_blob:
+                metadata = ud.generate_metadata()
+                if not ud.entity.is_encrypted and ud.must_compute_md5:
+                    digest = blobxfer.util.base64_encode_as_string(
+                        ud.md5.digest())
+                else:
+                    digest = None
+                blobxfer.operations.azure.blob.block.create_blob(
+                    ase, data, digest, metadata,
                     timeout=self._general_options.timeout_sec)
-                # handle one-shot uploads
-                if ud.is_one_shot_block_blob:
-                    metadata = ud.generate_metadata()
-                    if not ud.entity.is_encrypted and ud.must_compute_md5:
-                        digest = blobxfer.util.base64_encode_as_string(
-                            ud.md5.digest())
-                    else:
-                        digest = None
-                    blobxfer.operations.azure.blob.block.create_blob(
-                        ase, data, digest, metadata,
-                        timeout=self._general_options.timeout_sec)
-                    return
+                return
             # upload block
             blobxfer.operations.azure.blob.block.put_block(
                 ase, offsets, data, timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.File:
-            if offsets.chunk_num == 0:
-                # create container if necessary
-                blobxfer.operations.azure.file.create_share(
-                    ase, self._containers_created,
-                    timeout=self._general_options.timeout_sec)
-                # create parent directories
-                with self._fileshare_dir_lock:
-                    blobxfer.operations.azure.file.\
-                        create_all_parent_directories(
-                            ase, self._dirs_created,
-                            timeout=self._general_options.timeout_sec)
-                # create remote file
-                blobxfer.operations.azure.file.create_file(
-                    ase, timeout=self._general_options.timeout_sec)
             # upload range
             if data is not None:
                 blobxfer.operations.azure.file.put_file_range(
                     ase, offsets, data,
                     timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.Page:
-            # compute aligned size
-            if offsets.chunk_num == 0:
-                # create container if necessary
-                blobxfer.operations.azure.blob.create_container(
-                    ase, self._containers_created,
-                    timeout=self._general_options.timeout_sec)
-                # create remote blob
-                blobxfer.operations.azure.blob.page.create_blob(
-                    ase, timeout=self._general_options.timeout_sec)
             if data is None:
                 return
-            # align page
+            # compute aligned size
             aligned = blobxfer.util.page_align_content_length(
                 offsets.num_bytes)
+            # align page
             if aligned != offsets.num_bytes:
                 data = data.ljust(aligned, b'\0')
             if blobxfer.operations.md5.check_data_is_empty(data):
@@ -477,14 +454,48 @@ def _worker_thread_upload(self):
                 with self._upload_lock:
                     self._exceptions.append(e)
 
+    def _prepare_upload(self, ase, offsets):
+        # type: (Uploader, blobxfer.models.azure.StorageEntity,
+        #        blobxfer.models.upload.Offsets) -> None
+        """Prepare upload
+        :param Uploader self: this
+        :param blobxfer.models.azure.StorageEntity ase: Storage entity
+        :param blobxfer.models.upload.Offsets offsets: offsets
+        """
+        if ase.mode == blobxfer.models.azure.StorageModes.Block:
+            # create container if necessary
+            blobxfer.operations.azure.blob.create_container(
+                ase, self._containers_created,
+                timeout=self._general_options.timeout_sec)
+        elif ase.mode == blobxfer.models.azure.StorageModes.File:
+            # create share directory structure
+            with self._fileshare_dir_lock:
+                # create container if necessary
+                blobxfer.operations.azure.file.create_share(
+                    ase, self._containers_created,
+                    timeout=self._general_options.timeout_sec)
+                # create parent directories
+                blobxfer.operations.azure.file.create_all_parent_directories(
+                    ase, self._dirs_created,
+                    timeout=self._general_options.timeout_sec)
+            # create remote file
+            blobxfer.operations.azure.file.create_file(
+                ase, timeout=self._general_options.timeout_sec)
+        elif ase.mode == blobxfer.models.azure.StorageModes.Page:
+            # create container if necessary
+            blobxfer.operations.azure.blob.create_container(
+                ase, self._containers_created,
+                timeout=self._general_options.timeout_sec)
+            # create remote blob
+            blobxfer.operations.azure.blob.page.create_blob(
+                ase, timeout=self._general_options.timeout_sec)
+
     def _process_upload_descriptor(self, ud):
         # type: (Uploader, blobxfer.models.upload.Descriptor) -> None
         """Process upload descriptor
         :param Uploader self: this
         :param blobxfer.models.upload.Descriptor: upload descriptor
         """
-        # update progress bar
-        self._update_progress_bar()
         # get download offsets
         offsets, resume_bytes = ud.next_offsets()
         # add resume bytes to counter
@@ -509,6 +520,9 @@ def _process_upload_descriptor(self, ud):
         if offsets is None:
             self._upload_queue.put(ud)
             return
+        # prepare upload
+        if offsets.chunk_num == 0:
+            self._prepare_upload(ud.entity, offsets)
         # encrypt if necessary
         if ud.entity.is_encrypted and ud.entity.size > 0:
             # send iv through hmac if first chunk
@@ -769,9 +783,11 @@ def _check_for_existing_remote(self, sa, cont, name):
             ase = blobxfer.models.azure.StorageEntity(cont, ed)
             if (self._spec.options.mode ==
                     blobxfer.models.azure.StorageModes.File):
-                ase.populate_from_file(sa, fp, name)
+                dir, _ = blobxfer.operations.azure.file.parse_file_path(name)
+                ase.populate_from_file(sa, fp, dir)
             else:
-                ase.populate_from_blob(sa, fp, name)
+                # blob.name contains full path, no need to specify dir
+                ase.populate_from_blob(sa, fp, '')
         else:
             ase = None
         return ase
@@ -990,7 +1006,7 @@ def _run(self):
                     self._add_to_upload_queue(lp, ase, uid)
         # set remote files processed
         with self._md5_meta_lock:
-            self._all_local_files_processed = True
+            self._all_files_processed = True
         with self._upload_lock:
             self._upload_total -= skipped_files
             self._upload_bytes_total -= skipped_size

From 3c9e2041732ea2fcea3180af4953c2945982a1a1 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 26 May 2017 09:50:31 -0700
Subject: [PATCH 32/47] File attribute restore support

- Revert blob naming changes in populate_from
- Expand path for pem file reads
---
 blobxfer/models/azure.py              | 28 ++++++--
 blobxfer/models/download.py           | 19 ++++-
 blobxfer/models/metadata.py           | 47 +++++++++++++
 blobxfer/operations/azure/__init__.py |  2 +-
 blobxfer/operations/crypto.py         |  6 +-
 blobxfer/operations/progress.py       | 99 ++++++++++++++-------------
 blobxfer/operations/upload.py         |  3 +-
 setup.py                              |  4 +-
 8 files changed, 147 insertions(+), 61 deletions(-)

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index 1e3325e..1e10e03 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -38,6 +38,7 @@
 # non-stdlib imports
 from azure.storage.blob.models import _BlobTypes as BlobTypes
 # local imports
+import blobxfer.models.metadata
 
 
 # enums
@@ -51,7 +52,7 @@ class StorageModes(enum.Enum):
 
 class StorageEntity(object):
     """Azure Storage Entity"""
-    def __init__(self, container, ed=None):
+    def __init__(self, container, ed=None, fileattr=None):
         # type: (StorageEntity, str
         #        blobxfer.models.crypto.EncryptionMetadata) -> None
         """Ctor for StorageEntity
@@ -71,6 +72,7 @@ def __init__(self, container, ed=None):
         self._md5 = None
         self._encryption = ed
         self._vio = None
+        self._fileattr = None
         self.replica_targets = None
 
     @property
@@ -213,17 +215,30 @@ def encryption_metadata(self, value):
         """
         self._encryption = value
 
-    def populate_from_blob(self, sa, blob, path):
+    @property
+    def file_attributes(self):
+        # type: (StorageEntity) -> object
+        """Return file attributes collection
+        :param StorageEntity self: this
+        :rtype: blobxfer.models.metadata.PosixFileAttr or
+            blobxfer.models.metadata.WindowsFileAttr or None
+        :return: file attributes
+        """
+        return self._fileattr
+
+    def populate_from_blob(self, sa, blob):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
-        #        azure.storage.blob.models.Blob, str) -> None
+        #        azure.storage.blob.models.Blob) -> None
         """Populate properties from Blob
         :param StorageEntity self: this
         :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.blob.models.Blob blob: blob to populate from
-        :param str path: full path to blob
         """
+        # set file attributes from metadata
+        self._fileattr = blobxfer.models.metadata.fileattr_from_metadata(
+            blob.metadata)
         self._create_containers = sa.create_containers
-        self._name = str(pathlib.Path(path) / blob.name)
+        self._name = blob.name
         self._snapshot = blob.snapshot
         self._lmt = blob.properties.last_modified
         self._size = blob.properties.content_length
@@ -247,6 +262,9 @@ def populate_from_file(self, sa, file, path):
         :param azure.storage.file.models.File file: file to populate from
         :param str path: full path to file
         """
+        # set file attributes from metadata
+        self._fileattr = blobxfer.models.metadata.fileattr_from_metadata(
+            file.metadata)
         self._create_containers = sa.create_containers
         if path is not None:
             self._name = str(pathlib.Path(path) / file.name)
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index 3c5f686..ef92068 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -162,6 +162,9 @@ def __init__(
         if not self.options.check_file_md5 and self.skip_on.md5_match:
             raise ValueError(
                 'Cannot specify skip on MD5 match without file MD5 enabled')
+        if (self.options.restore_file_attributes and
+                not blobxfer.util.on_windows() and os.getuid() != 0):
+            logger.warning('Cannot set file uid/gid without root privileges')
 
     def add_azure_source_path(self, source):
         # type: (Specification, blobxfer.operations.azure.SourcePath) -> None
@@ -670,9 +673,19 @@ def finalize_file(self):
             self.local_path.unlink()
             return
         logger.info(msg)
-
-        # TODO set file uid/gid and mode
-
+        # set file uid/gid and mode
+        if self._ase.file_attributes is not None:
+            if blobxfer.util.on_windows():
+                # TODO not implemented yet
+                pass
+            else:
+                self.local_path.chmod(int(self._ase.file_attributes.mode, 8))
+                if os.getuid() == 0:
+                    os.chown(
+                        str(self.local_path),
+                        self._ase.file_attributes.uid,
+                        self._ase.file_attributes.gid
+                    )
         # move temp download file to final path
         blobxfer.util.replace_file(self.local_path, self.final_path)
         # update resume file
diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py
index 5ebeea3..139ed63 100644
--- a/blobxfer/models/metadata.py
+++ b/blobxfer/models/metadata.py
@@ -30,6 +30,8 @@
     bytes, dict, int, list, object, range, ascii, chr, hex, input,
     next, oct, open, pow, round, super, filter, map, zip)
 # stdlib imports
+import collections
+import json
 import logging
 # non-stdlib imports
 # local imports
@@ -55,6 +57,18 @@
 _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES = 'TotalSlices'
 _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID = 'SliceId'
 _JSON_KEY_VECTORED_IO_STRIPE_NEXT = 'Next'
+# named tuples
+PosixFileAttr = collections.namedtuple(
+    'PosixFileAttr', [
+        'mode',
+        'uid',
+        'gid',
+    ]
+)
+WindowsFileAttr = collections.namedtuple(
+    'WindowsFileAttr', [
+    ]
+)
 
 
 def generate_fileattr_metadata(local_path, metadata):
@@ -82,6 +96,39 @@ def generate_fileattr_metadata(local_path, metadata):
         return blobxfer.util.merge_dict(metadata, md)
 
 
+def fileattr_from_metadata(md):
+    # type: (dict) -> bool
+    """Convert fileattr metadata in json metadata
+    :param dict md: metadata dictionary
+    :rtype: PosixFileAttr or WindowsFileAttr or None
+    :return: fileattr metadata
+    """
+    try:
+        mdattr = json.loads(
+            md[JSON_KEY_BLOBXFER_METADATA])[_JSON_KEY_FILE_ATTRIBUTES]
+    except (KeyError, TypeError):
+        return None
+    else:
+        if blobxfer.util.on_windows():
+            logger.warning(
+                'file attributes store/restore on Windows is not supported '
+                'yet')
+            fileattr = None
+        else:
+            try:
+                fileattr = PosixFileAttr(
+                    mode=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][
+                        _JSON_KEY_FILE_ATTRIBUTES_MODE],
+                    uid=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][
+                        _JSON_KEY_FILE_ATTRIBUTES_UID],
+                    gid=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][
+                        _JSON_KEY_FILE_ATTRIBUTES_GID],
+                )
+            except KeyError:
+                fileattr = None
+        return fileattr
+
+
 def restore_fileattr(path, metadata):
     # type: (pathlib.Path, dict) -> None
     """Restore file attributes from metadata
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 9b19423..61dfe53 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -327,7 +327,7 @@ def _populate_from_list_blobs(self, creds, options, general_options):
                 else:
                     ed = None
                 ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_blob(sa, blob, dir)
+                ase.populate_from_blob(sa, blob)
                 yield ase
 
 
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index 19c56a4..ba6982c 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -75,7 +75,8 @@ def load_rsa_private_key_file(rsakeyfile, passphrase):
     :rtype: cryptography.hazmat.primitives.asymmetric.rsa.RSAPrivateKey
     :return: RSAPrivateKey
     """
-    with open(rsakeyfile, 'rb') as keyfile:
+    keypath = os.path.expandvars(os.path.expanduser(rsakeyfile))
+    with open(keypath, 'rb') as keyfile:
         return cryptography.hazmat.primitives.serialization.\
             load_pem_private_key(
                 keyfile.read(),
@@ -92,7 +93,8 @@ def load_rsa_public_key_file(rsakeyfile):
     :rtype: cryptography.hazmat.primitives.asymmetric.rsa.RSAPublicKey
     :return: RSAPublicKey
     """
-    with open(rsakeyfile, 'rb') as keyfile:
+    keypath = os.path.expandvars(os.path.expanduser(rsakeyfile))
+    with open(keypath, 'rb') as keyfile:
         return cryptography.hazmat.primitives.serialization.\
             load_pem_public_key(
                 keyfile.read(),
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index a0689ed..b9d93a7 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -36,6 +36,7 @@
 import sys
 # non-stdlib imports
 import azure.storage
+import cryptography
 import requests
 # local imports
 import blobxfer.util
@@ -95,86 +96,92 @@ def output_parameters(general_options, spec):
     :param blobxfer.models.options.General general_options: general options
     :param object spec: upload or download spec
     """
+    sep = '============================================'
     log = []
-    log.append('===========================')
-    log.append(' azure blobxfer parameters')
-    log.append('===========================')
-    log.append('     blobxfer version: {}'.format(
+    log.append(sep)
+    log.append('         Azure blobxfer parameters')
+    log.append(sep)
+    log.append('         blobxfer version: {}'.format(
         blobxfer.version.__version__))
-    log.append('             platform: {}'.format(platform.platform()))
-    log.append('               python: {} {} az.stor={} req={}'.format(
-        platform.python_implementation(),
-        platform.python_version(),
-        azure.storage._constants.__version__,
-        requests.__version__))
+    log.append('                 platform: {}'.format(platform.platform()))
+    log.append(
+        '               components: {}={} az.stor={} crypt={} req={}'.format(
+            platform.python_implementation(),
+            platform.python_version(),
+            azure.storage._constants.__version__,
+            cryptography.__version__,
+            requests.__version__,))
     # specific preamble
     if isinstance(spec, blobxfer.models.download.Specification):
-        log.append('   transfer direction: {}'.format('Azure -> local'))
+        log.append('       transfer direction: {}'.format('Azure -> local'))
         log.append(
-            '              workers: disk={} xfer={} md5={} crypto={}'.format(
-                general_options.concurrency.disk_threads,
-                general_options.concurrency.transfer_threads,
-                general_options.concurrency.md5_processes
-                if spec.options.check_file_md5 else 0,
-                general_options.concurrency.crypto_processes))
+            ('                  workers: disk={} xfer={} md5={} '
+             'crypto={}').format(
+                 general_options.concurrency.disk_threads,
+                 general_options.concurrency.transfer_threads,
+                 general_options.concurrency.md5_processes
+                 if spec.options.check_file_md5 else 0,
+                 general_options.concurrency.crypto_processes))
     elif isinstance(spec, blobxfer.models.upload.Specification):
-        log.append('   transfer direction: {}'.format('local -> Azure'))
+        log.append('       transfer direction: {}'.format('local -> Azure'))
         log.append(
-            '              workers: disk={} xfer={} md5={} crypto={}'.format(
-                general_options.concurrency.disk_threads,
-                general_options.concurrency.transfer_threads,
-                general_options.concurrency.md5_processes
-                if spec.skip_on.md5_match or
-                spec.options.store_file_properties.md5 else 0,
-                0))
+            ('                  workers: disk={} xfer={} md5={} '
+             'crypto={}').format(
+                 general_options.concurrency.disk_threads,
+                 general_options.concurrency.transfer_threads,
+                 general_options.concurrency.md5_processes
+                 if spec.skip_on.md5_match or
+                 spec.options.store_file_properties.md5 else 0,
+                 0))
 
     # TODO handle synccopy spec
 
     # common block
-    log.append('          resume file: {}'.format(
+    log.append('              resume file: {}'.format(
         general_options.resume_file))
-    log.append('              timeout: {}'.format(
+    log.append('                  timeout: {}'.format(
         general_options.timeout_sec))
-    log.append('                 mode: {}'.format(
+    log.append('                     mode: {}'.format(
         spec.options.mode))
-    log.append('              skip on: fs_match={} lmt_ge={} md5={}'.format(
-        spec.skip_on.filesize_match,
-        spec.skip_on.lmt_ge,
-        spec.skip_on.md5_match))
-    log.append('           chunk size: {} bytes'.format(
+    log.append(
+        '                  skip on: fs_match={} lmt_ge={} md5={}'.format(
+            spec.skip_on.filesize_match,
+            spec.skip_on.lmt_ge,
+            spec.skip_on.md5_match))
+    log.append('               chunk size: {} bytes'.format(
         spec.options.chunk_size_bytes))
-    log.append('    delete extraneous: {}'.format(
+    log.append('        delete extraneous: {}'.format(
         spec.options.delete_extraneous_destination))
-    log.append('            overwrite: {}'.format(
+    log.append('                overwrite: {}'.format(
         spec.options.overwrite))
-    log.append('            recursive: {}'.format(
+    log.append('                recursive: {}'.format(
         spec.options.recursive))
 
     # TODO only output rename single if not synccopy
-    log.append('        rename single: {}'.format(
+    log.append('            rename single: {}'.format(
         spec.options.rename))
 
     # specific epilog
     if isinstance(spec, blobxfer.models.download.Specification):
-        log.append('     compute file md5: {}'.format(
+        log.append('         compute file md5: {}'.format(
             spec.options.check_file_md5))
-        log.append('      file attributes: {}'.format(
+        log.append('  restore file attributes: {}'.format(
             spec.options.restore_file_attributes))
-        log.append('      rsa private key: {}'.format(
+        log.append('          rsa private key: {}'.format(
             'Loaded' if spec.options.rsa_private_key else 'None'))
-        log.append('    local destination: {}'.format(
+        log.append('        local destination: {}'.format(
             spec.destination.path))
     elif isinstance(spec, blobxfer.models.upload.Specification):
-        log.append('       one shot bytes: {}'.format(
+        log.append('           one shot bytes: {}'.format(
             spec.options.one_shot_bytes))
-        log.append('     store properties: attr={} md5={}'.format(
+        log.append('         store properties: attr={} md5={}'.format(
             spec.options.store_file_properties.attributes,
             spec.options.store_file_properties.md5))
-        log.append('       rsa public key: {}'.format(
+        log.append('           rsa public key: {}'.format(
             'Loaded' if spec.options.rsa_public_key else 'None'))
-        log.append('   local source paths: {}'.format(
+        log.append('       local source paths: {}'.format(
             ' '.join([str(src) for src in spec.sources.paths])))
-    log.append('===========================')
+    log.append(sep)
     log = os.linesep.join(log)
     if blobxfer.util.is_not_empty(general_options.log_file):
         print(log)
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index d33aad6..7fc094f 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -786,8 +786,7 @@ def _check_for_existing_remote(self, sa, cont, name):
                 dir, _ = blobxfer.operations.azure.file.parse_file_path(name)
                 ase.populate_from_file(sa, fp, dir)
             else:
-                # blob.name contains full path, no need to specify dir
-                ase.populate_from_blob(sa, fp, '')
+                ase.populate_from_blob(sa, fp)
         else:
             ase = None
         return ase
diff --git a/setup.py b/setup.py
index a9031a7..c61be64 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
         fd.read(), re.MULTILINE).group(1)
 
-if not version:
+if not version or len(version) == 0:
     raise RuntimeError('Cannot find version')
 
 packages = [
@@ -42,7 +42,7 @@
     'azure-common==1.1.6',
     'azure-storage==0.34.2',
     'click==6.7',
-    'cryptography>=1.8.1',
+    'cryptography>=1.8.2',
     'future==0.16.0',
     'python-dateutil==2.6.0',
     'requests==2.14.2',

From f9ba1bf47d3617b07634115c3c716f246bfc2d49 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Sun, 28 May 2017 17:03:30 -0700
Subject: [PATCH 33/47] Add download support for vectored stripes

---
 blobxfer/models/azure.py              |  20 ++-
 blobxfer/models/download.py           | 190 +++++++++++++++++---------
 blobxfer/models/metadata.py           |  94 ++++++++++++-
 blobxfer/models/resume.py             |  27 ++--
 blobxfer/operations/azure/__init__.py | 127 ++++++++++++++---
 blobxfer/operations/crypto.py         |  18 +--
 blobxfer/operations/download.py       |  66 +++++++--
 blobxfer/operations/resume.py         |  44 ++++--
 8 files changed, 435 insertions(+), 151 deletions(-)

diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index 1e10e03..abfa10e 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -226,7 +226,17 @@ def file_attributes(self):
         """
         return self._fileattr
 
-    def populate_from_blob(self, sa, blob):
+    @property
+    def vectored_io(self):
+        # type: (StorageEntity) -> object
+        """Return vectored io metadata, currently stripe only
+        :param StorageEntity self: this
+        :rtype: blobxfer.models.metadata.VectoredStripe or None
+        :return: vectored io metadata
+        """
+        return self._vio
+
+    def populate_from_blob(self, sa, blob, vio=None):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
         #        azure.storage.blob.models.Blob) -> None
         """Populate properties from Blob
@@ -234,9 +244,10 @@ def populate_from_blob(self, sa, blob):
         :param blobxfer.operations.azure.StorageAccount sa: storage account
         :param azure.storage.blob.models.Blob blob: blob to populate from
         """
-        # set file attributes from metadata
+        # set props from metadata
         self._fileattr = blobxfer.models.metadata.fileattr_from_metadata(
             blob.metadata)
+        self._vio = vio
         self._create_containers = sa.create_containers
         self._name = blob.name
         self._snapshot = blob.snapshot
@@ -253,7 +264,7 @@ def populate_from_blob(self, sa, blob):
             self._mode = StorageModes.Page
             self._client = sa.page_blob_client
 
-    def populate_from_file(self, sa, file, path):
+    def populate_from_file(self, sa, file, path, vio=None):
         # type: (StorageEntity, blobxfer.operations.azure.StorageAccount,
         #        azure.storage.file.models.File, str) -> None
         """Populate properties from File
@@ -262,9 +273,10 @@ def populate_from_file(self, sa, file, path):
         :param azure.storage.file.models.File file: file to populate from
         :param str path: full path to file
         """
-        # set file attributes from metadata
+        # set props from metadata
         self._fileattr = blobxfer.models.metadata.fileattr_from_metadata(
             file.metadata)
+        self._vio = vio
         self._create_containers = sa.create_containers
         if path is not None:
             self._name = str(pathlib.Path(path) / file.name)
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index ef92068..e2adcbc 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -69,6 +69,12 @@
         'temp',
     ]
 )
+LocalPathView = collections.namedtuple(
+    'LocalPathView', [
+        'fd_end',
+        'fd_start',
+    ]
+)
 
 
 class LocalDestinationPath(object):
@@ -206,11 +212,7 @@ def __init__(self, lpath, ase, options, resume_mgr):
         self._ase = ase
         # set paths
         self.final_path = lpath
-        # create path holding the temporary file to download to
-        _tmp = list(lpath.parts[:-1])
-        _tmp.append(lpath.name + '.bxtmp')
-        self.local_path = pathlib.Path(*_tmp)
-        del _tmp
+        self._view = None
         # calculate the total number of ops required for transfer
         self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
         self._total_chunks = self._compute_total_chunks(self._chunk_size)
@@ -218,6 +220,7 @@ def __init__(self, lpath, ase, options, resume_mgr):
         # initialize integrity checkers
         self.hmac = None
         self.md5 = None
+        self._integrity_failed = False
         self._initialize_integrity_checkers(options)
 
     @property
@@ -293,36 +296,74 @@ def _initialize_integrity_checkers(self, options):
                 blobxfer.util.is_not_empty(self._ase.md5)):
             self.md5 = blobxfer.util.new_md5_hasher()
 
+    def _compute_allocated_size(self, size):
+        # type: (Descriptor, int) -> int
+        """Compute allocated size on disk
+        :param Descriptor self: this
+        :param int size: size (content length)
+        :rtype: int
+        :return: required size on disk
+        """
+        # compute size
+        if size > 0:
+            if self._ase.is_encrypted:
+                # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
+                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
+                    self._AES_BLOCKSIZE
+            else:
+                allocatesize = size
+            if allocatesize < 0:
+                allocatesize = 0
+        else:
+            allocatesize = 0
+        return allocatesize
+
+    def _set_final_path_view(self):
+        # type: (Descriptor) -> int
+        """Set final path view and return required space on disk
+        :param Descriptor self: this
+        :rtype: int
+        :return: required size on disk
+        """
+        slicesize = self._compute_allocated_size(self._ase.size)
+        if self._ase.vectored_io is None:
+            self._view = LocalPathView(
+                fd_start=0,
+                fd_end=slicesize,
+            )
+            return self._ase.size
+        else:
+            name = self.final_path.name
+            name = blobxfer.models.metadata.\
+                remove_vectored_io_slice_suffix_from_name(
+                    name, self._ase.vectored_io.slice_id)
+            _tmp = list(self.final_path.parts[:-1])
+            _tmp.append(name)
+            self.final_path = pathlib.Path(*_tmp)
+            self._view = LocalPathView(
+                fd_start=self._ase.vectored_io.offset_start,
+                fd_end=self._ase.vectored_io.offset_start + slicesize,
+            )
+            return self._ase.vectored_io.total_size
+
     def _allocate_disk_space(self):
-        # type: (Descriptor, int) -> None
+        # type: (Descriptor) -> None
         """Perform file allocation (possibly sparse)
         :param Descriptor self: this
-        :param int size: size
         """
         with self._meta_lock:
-            if self._allocated:
+            if self._allocated or self._offset != 0:
                 return
-            size = self._ase.size
-            # compute size
-            if size > 0:
-                if self._ase.is_encrypted:
-                    # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
-                    allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
-                        self._AES_BLOCKSIZE
-                else:
-                    allocatesize = size
-                if allocatesize < 0:
-                    allocatesize = 0
-            else:
-                allocatesize = 0
+            # set local path view
+            allocatesize = self._set_final_path_view()
             # check if path already exists and is of sufficient size
-            if (not self.local_path.exists() or
-                    self.local_path.stat().st_size != allocatesize):
+            if (not self.final_path.exists() or
+                    self.final_path.stat().st_size != allocatesize):
                 # create parent path
-                self.local_path.parent.mkdir(
+                self.final_path.parent.mkdir(
                     mode=0o750, parents=True, exist_ok=True)
                 # allocate file
-                with self.local_path.open('wb') as fd:
+                with self.final_path.open('wb') as fd:
                     if allocatesize > 0:
                         try:
                             os.posix_fallocate(fd.fileno(), 0, allocatesize)
@@ -341,7 +382,7 @@ def _resume(self):
         if self._resume_mgr is None or self._offset > 0 or self._finalized:
             return None
         # check if path exists in resume db
-        rr = self._resume_mgr.get_record(str(self.final_path))
+        rr = self._resume_mgr.get_record(self._ase)
         if rr is None:
             logger.debug('no resume record for {}'.format(self.final_path))
             return None
@@ -373,10 +414,11 @@ def _resume(self):
             logger.debug('cannot resume encrypted entity {}'.format(
                 self._ase.path))
             return None
-        # check if intermediate (blobtmp) exists
-        if not self.local_path.exists():
-            logger.warning('temporary download file {} does not exist'.format(
-                rr.temp_path))
+        self._allocate_disk_space()
+        # check if final path exists
+        if not self.final_path.exists():
+            logger.warning('download path {} does not exist'.format(
+                self.final_path))
             return None
         if self.hmac is not None:
             raise RuntimeError(
@@ -387,10 +429,14 @@ def _resume(self):
         if self.md5 is not None and curr_chunk > 0:
             _blocksize = blobxfer.util.MEGABYTE << 2
             logger.debug(
-                'integrity checking existing file {} to offset {}'.format(
-                    self.final_path, _end_offset))
+                'integrity checking existing file {} offset {} -> {}'.format(
+                    self.final_path,
+                    self._view.fd_start,
+                    self._view.fd_start + _end_offset)
+            )
             with self._hasher_lock:
-                with self.local_path.open('rb') as filedesc:
+                with self.final_path.open('rb') as filedesc:
+                    filedesc.seek(self._view.fd_start, 0)
                     while _fd_offset < _end_offset:
                         if (_fd_offset + _blocksize) > _end_offset:
                             _blocksize = _end_offset - _fd_offset
@@ -403,7 +449,7 @@ def _resume(self):
             if rr.md5hexdigest != hexdigest:
                 logger.warning(
                     'MD5 mismatch resume={} computed={} for {}'.format(
-                         rr.md5hexdigest, hexdigest, self.local_path))
+                         rr.md5hexdigest, hexdigest, self.final_path))
                 # reset hasher
                 self.md5 = blobxfer.util.new_md5_hasher()
                 return None
@@ -434,7 +480,7 @@ def cleanup_all_temporary_files(self):
         """
         # delete local file
         try:
-            self.local_path.unlink()
+            self.final_path.unlink()
         except OSError:
             pass
         # iterate unchecked chunks and delete
@@ -510,13 +556,11 @@ def write_unchecked_data(self, offsets, data):
         :param Offsets offsets: download offsets
         :param bytes data: data
         """
-        with self.local_path.open('r+b') as fd:
-            fd.seek(offsets.fd_start, 0)
-            fd.write(data)
+        self.write_data(offsets, data)
         unchecked = UncheckedChunk(
             data_len=len(data),
-            fd_start=offsets.fd_start,
-            file_path=self.local_path,
+            fd_start=self._view.fd_start + offsets.fd_start,
+            file_path=self.final_path,
             temp=False,
         )
         with self._meta_lock:
@@ -593,9 +637,8 @@ def perform_chunked_integrity_check(self):
                 self._next_integrity_chunk += 1
                 if self.is_resumable:
                     self._resume_mgr.add_or_update_record(
-                        self.final_path, self.local_path, self._ase.size,
-                        self._chunk_size, self._next_integrity_chunk, False,
-                        md5hexdigest,
+                        self.final_path, self._ase, self._chunk_size,
+                        self._next_integrity_chunk, False, md5hexdigest,
                     )
                 # decrement outstanding op counter
                 self._outstanding_ops -= 1
@@ -609,8 +652,8 @@ def _update_resume_for_completed(self):
             return
         with self._meta_lock:
             self._resume_mgr.add_or_update_record(
-                self.final_path, self.local_path, self._ase.size,
-                self._chunk_size, self._next_integrity_chunk, True, None,
+                self.final_path, self._ase, self._chunk_size,
+                self._next_integrity_chunk, True, None,
             )
 
     def write_data(self, offsets, data):
@@ -621,13 +664,14 @@ def write_data(self, offsets, data):
         :param bytes data: data
         """
         if len(data) > 0:
-            with self.local_path.open('r+b') as fd:
-                fd.seek(offsets.fd_start, 0)
+            with self.final_path.open('r+b') as fd:
+                # offset some internal view
+                fd.seek(self._view.fd_start + offsets.fd_start, 0)
                 fd.write(data)
 
-    def finalize_file(self):
+    def finalize_integrity(self):
         # type: (Descriptor) -> None
-        """Finalize file download
+        """Finalize integrity check for download
         :param Descriptor self: this
         """
         with self._meta_lock:
@@ -668,26 +712,40 @@ def finalize_file(self):
             )
         # cleanup if download failed
         if not check:
+            self._integrity_failed = True
             logger.error(msg)
-            # delete temp download file
-            self.local_path.unlink()
-            return
         logger.info(msg)
+
+    def _restore_file_attributes(self):
+        # type: (Descriptor) -> None
+        """Restore file attributes for file
+        :param Descriptor self: this
+        """
+        if self._ase.file_attributes is None:
+            return
         # set file uid/gid and mode
-        if self._ase.file_attributes is not None:
-            if blobxfer.util.on_windows():
-                # TODO not implemented yet
-                pass
-            else:
-                self.local_path.chmod(int(self._ase.file_attributes.mode, 8))
-                if os.getuid() == 0:
-                    os.chown(
-                        str(self.local_path),
-                        self._ase.file_attributes.uid,
-                        self._ase.file_attributes.gid
-                    )
-        # move temp download file to final path
-        blobxfer.util.replace_file(self.local_path, self.final_path)
+        if blobxfer.util.on_windows():
+            # TODO not implemented yet
+            pass
+        else:
+            self.final_path.chmod(int(self._ase.file_attributes.mode, 8))
+            if os.getuid() == 0:
+                os.chown(
+                    str(self.final_path),
+                    self._ase.file_attributes.uid,
+                    self._ase.file_attributes.gid
+                )
+
+    def finalize_file(self):
+        # type: (Descriptor) -> None
+        """Finalize file for download
+        :param Descriptor self: this
+        """
+        # delete bad file if integrity failed
+        if self._integrity_failed:
+            self.final_path.unlink()
+        else:
+            self._restore_file_attributes()
         # update resume file
         self._update_resume_for_completed()
         with self._meta_lock:
diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py
index 139ed63..f891696 100644
--- a/blobxfer/models/metadata.py
+++ b/blobxfer/models/metadata.py
@@ -40,7 +40,7 @@
 # create logger
 logger = logging.getLogger(__name__)
 # global defines
-JSON_KEY_BLOBXFER_METADATA = 'BlobxferMetadata'
+JSON_KEY_BLOBXFER_METADATA = 'blobxfer_metadata'
 # file attributes
 _JSON_KEY_FILE_ATTRIBUTES = 'FileAttributes'
 _JSON_KEY_FILE_ATTRIBUTES_POSIX = 'POSIX'
@@ -60,15 +60,32 @@
 # named tuples
 PosixFileAttr = collections.namedtuple(
     'PosixFileAttr', [
+        'gid',
         'mode',
         'uid',
-        'gid',
     ]
 )
 WindowsFileAttr = collections.namedtuple(
     'WindowsFileAttr', [
     ]
 )
+VectoredStripe = collections.namedtuple(
+    'VectoredStripe', [
+        'next',
+        'offset_start',
+        'slice_id',
+        'total_size',
+        'total_slices',
+    ]
+)
+VectoredNextEntry = collections.namedtuple(
+    'VectoredNextEntry', [
+        'storage_account_name',
+        'endpoint',
+        'container',
+        'name',
+    ]
+)
 
 
 def generate_fileattr_metadata(local_path, metadata):
@@ -97,7 +114,7 @@ def generate_fileattr_metadata(local_path, metadata):
 
 
 def fileattr_from_metadata(md):
-    # type: (dict) -> bool
+    # type: (dict) -> collections.namedtuple
     """Convert fileattr metadata in json metadata
     :param dict md: metadata dictionary
     :rtype: PosixFileAttr or WindowsFileAttr or None
@@ -153,6 +170,38 @@ def create_vectored_io_next_entry(ase):
     )
 
 
+def explode_vectored_io_next_entry(entry):
+    # type: (str, int) -> str
+    """Explode next vectored io entry
+    :param str entry: next entry
+    :rtype: VectoredNextEntry
+    :return: vectored next entry
+    """
+    tmp = entry.split(';')
+    _sa = tmp[0].split('.')
+    return VectoredNextEntry(
+        storage_account_name=_sa[0],
+        endpoint='.'.join(_sa[2:]),
+        container=tmp[1],
+        name=tmp[2],
+    )
+
+
+def remove_vectored_io_slice_suffix_from_name(name, slice):
+    # type: (str, int) -> str
+    """Remove vectored io (stripe) slice suffix from a given name
+    :param str name: entity name
+    :param int slice: slice num
+    :rtype: str
+    :return: name without suffix
+    """
+    suffix = '.bxslice-{}'.format(slice)
+    if name.endswith(suffix):
+        return name[:-len(suffix)]
+    else:
+        return name
+
+
 def generate_vectored_io_stripe_metadata(local_path, metadata):
     # type: (blobxfer.models.upload.LocalPath, dict) -> dict
     """Generate vectored io stripe metadata dict
@@ -172,8 +221,45 @@ def generate_vectored_io_stripe_metadata(local_path, metadata):
                 local_path.view.total_slices,
                 _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID:
                 local_path.view.slice_num,
-                _JSON_KEY_VECTORED_IO_STRIPE_NEXT: local_path.view.next,
+                _JSON_KEY_VECTORED_IO_STRIPE_NEXT:
+                explode_vectored_io_next_entry(local_path.view.next),
             }
         }
     }
     return blobxfer.util.merge_dict(metadata, md)
+
+
+def vectored_io_from_metadata(md):
+    # type: (dict) -> collections.namedtuple
+    """Convert vectored io metadata in json metadata
+    :param dict md: metadata dictionary
+    :rtype: VectoredStripe or None
+    :return: vectored io metadata
+    """
+    try:
+        mdattr = json.loads(
+            md[JSON_KEY_BLOBXFER_METADATA])[_JSON_KEY_VECTORED_IO]
+    except (KeyError, TypeError):
+        pass
+    else:
+        if mdattr[_JSON_KEY_VECTORED_IO_MODE] == _JSON_KEY_VECTORED_IO_STRIPE:
+            mdstripe = mdattr[_JSON_KEY_VECTORED_IO_STRIPE]
+            try:
+                nextptr = explode_vectored_io_next_entry(
+                    mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_NEXT])
+            except (KeyError, AttributeError):
+                nextptr = None
+            vio = VectoredStripe(
+                total_size=mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE],
+                offset_start=mdstripe[
+                    _JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START],
+                total_slices=mdstripe[
+                    _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES],
+                slice_id=mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID],
+                next=nextptr,
+            )
+            return vio
+        else:
+            raise RuntimeError('Cannot handle Vectored IO mode: {}'.format(
+                mdattr[_JSON_KEY_VECTORED_IO_MODE]))
+    return None
diff --git a/blobxfer/models/resume.py b/blobxfer/models/resume.py
index 37a5acc..aa8b9da 100644
--- a/blobxfer/models/resume.py
+++ b/blobxfer/models/resume.py
@@ -37,20 +37,19 @@
 class Download(object):
     """Download resume object"""
     def __init__(
-            self, final_path, temp_path, length, chunk_size,
-            next_integrity_chunk, completed, md5):
-        # type: (Download, str, str, int, int, int, str) -> None
+            self, final_path, length, chunk_size, next_integrity_chunk,
+            completed, md5):
+        # type: (Download, str, int, int, int, bool, str) -> None
         """Ctor for Download
         :param Download self: this
         :param str final_path: final path
-        :param str temp_path: temporary path
         :param int length: total bytes
         :param int chunk_size: chunk size in bytes
         :param int next_integrity_chunk: next integrity chunk
+        :param bool completed: completed
         :param str md5: md5 hex digest
         """
         self._final_path = final_path
-        self._temp_path = temp_path
         self._length = length
         self._chunk_size = chunk_size
         self._next_integrity_chunk = next_integrity_chunk
@@ -67,16 +66,6 @@ def final_path(self):
         """
         return self._final_path
 
-    @property
-    def temp_path(self):
-        # type: (Download) -> str
-        """Temp path
-        :param Download self: this
-        :rtype: str
-        :return: temp path
-        """
-        return self._temp_path
-
     @property
     def length(self):
         # type: (Download) -> int
@@ -163,9 +152,9 @@ def __repr__(self):
         :rtype: str
         :return: representation string
         """
-        return ('Download<final_path={} temp_path={} length={} chunk_size={} '
+        return ('Download<final_path={} length={} chunk_size={} '
                 'next_integrity_chunk={} completed={} md5={}>').format(
-                    self.final_path, self.temp_path, self.length,
-                    self.chunk_size, self.next_integrity_chunk,
-                    self.completed, self.md5hexdigest,
+                    self.final_path, self.length, self.chunk_size,
+                    self.next_integrity_chunk, self.completed,
+                    self.md5hexdigest,
                 )
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 61dfe53..177d41d 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -34,6 +34,7 @@
 import requests
 # local imports
 import blobxfer.models
+import blobxfer.models.metadata
 import blobxfer.operations.azure.blob.append
 import blobxfer.operations.azure.blob.block
 import blobxfer.operations.azure.blob.page
@@ -263,6 +264,100 @@ def files(self, creds, options, general_options):
                     creds, options, general_options):
                 yield blob
 
+    def _convert_to_storage_entity_with_encryption_metadata(
+            self, options, sa, entity, vio, is_file, container, dir):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download, StorageAccount, object,
+        #        blobxfer.models.metadata.VectoredStripe, bool, str,
+        #        str) -> StorageEntity
+        """Convert entity into StorageEntity with encryption metadata if avail
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param StorageAccount sa: storage account
+        :param object entity: Storage File or Blob object
+        :param blobxfer.models.metadata.VectoredStripe vio: Vectored stripe
+        :param bool is_file: is a file object
+        :param str container: container
+        :param str dir: Azure File directory structure
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        if blobxfer.models.crypto.EncryptionMetadata.\
+                encryption_metadata_exists(entity.metadata):
+            ed = blobxfer.models.crypto.EncryptionMetadata()
+            ed.convert_from_json(
+                entity.metadata, file.name, options.rsa_private_key)
+        else:
+            ed = None
+        ase = blobxfer.models.azure.StorageEntity(container, ed)
+        if is_file:
+            ase.populate_from_file(sa, entity, dir, vio)
+        else:
+            ase.populate_from_blob(sa, entity, vio)
+        return ase
+
+    def _handle_vectored_io_stripe(
+            self, creds, options, general_options, sa, entity, is_file,
+            container, dir=None):
+        # type: (SourcePath, StorageCredentials,
+        #        blobxfer.models.options.Download,
+        #        blobxfer.models.options.General, StorageAccount, object,
+        #        bool, str, str) -> StorageEntity
+        """Handle Vectored IO stripe entries
+        :param SourcePath self: this
+        :param StorageCredentials creds: storage creds
+        :param blobxfer.models.options.Download options: download options
+        :param blobxfer.models.options.General general_options: general options
+        :param StorageAccount sa: storage account
+        :param object entity: Storage File or Blob object
+        :param bool is_file: is a file object
+        :param str container: container
+        :param str dir: Azure File directory structure
+        :rtype: StorageEntity
+        :return: Azure storage entity object
+        """
+        vio = blobxfer.models.metadata.vectored_io_from_metadata(
+            entity.metadata)
+        if not isinstance(vio, blobxfer.models.metadata.VectoredStripe):
+            ase = self._convert_to_storage_entity_with_encryption_metadata(
+                options, sa, entity, None, is_file, container, dir)
+            yield ase
+            return
+        # if this slice is not the first, ignore. the reason for this is
+        # 1. Ensures direct get on a slice does nothing unless the
+        # zero-th blob is retrieved/accessed (eliminates partial data
+        # download), which will reconstruct all of the stripes via next
+        # pointers
+        # 2. Data is not retrieved multiple times for the same slice without
+        # having to maintain a fetched map
+        if vio.slice_id != 0:
+            yield None
+            return
+        # yield this entity
+        ase = self._convert_to_storage_entity_with_encryption_metadata(
+            options, sa, entity, vio, is_file, container, dir)
+        yield ase
+        # iterate all slices
+        while vio.next is not None:
+            # follow next pointer
+            sa = creds.get_storage_account(vio.next.storage_account_name)
+            if is_file:
+                entity = blobxfer.operations.azure.file.get_file_properties(
+                    sa.file_client, vio.next.container, vio.next.name,
+                    timeout=general_options.timeout_sec)
+                _, dir = blobxfer.util.explode_azure_path(vio.next.name)
+            else:
+                entity = blobxfer.operations.azure.blob.get_blob_properties(
+                    sa.block_blob_client, vio.next.container, vio.next.name,
+                    ase.mode, timeout=general_options.timeout_sec)
+            vio = blobxfer.models.metadata.vectored_io_from_metadata(
+                entity.metadata)
+            # yield next
+            ase = self._convert_to_storage_entity_with_encryption_metadata(
+                options, sa, entity, vio, is_file, container, dir)
+            yield ase
+
     def _populate_from_list_files(self, creds, options, general_options):
         # type: (SourcePath, StorageCredentials,
         #        blobxfer.models.options.Download,
@@ -284,19 +379,15 @@ def _populate_from_list_files(self, creds, options, general_options):
                     general_options.timeout_sec):
                 if not self._inclusion_check(file.name):
                     continue
-                if blobxfer.models.crypto.EncryptionMetadata.\
-                        encryption_metadata_exists(file.metadata):
-                    ed = blobxfer.models.crypto.EncryptionMetadata()
-                    ed.convert_from_json(
-                        file.metadata, file.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = blobxfer.models.azure.StorageEntity(cont, ed)
                 if dir is not None:
                     dir, _ = blobxfer.operations.azure.file.parse_file_path(
                         dir)
-                ase.populate_from_file(sa, file, dir)
-                yield ase
+                for ase in self._handle_vectored_io_stripe(
+                        creds, options, general_options, sa, file, True, cont,
+                        dir):
+                    if ase is None:
+                        continue
+                    yield ase
 
     def _populate_from_list_blobs(self, creds, options, general_options):
         # type: (SourcePath, StorageCredentials,
@@ -319,16 +410,12 @@ def _populate_from_list_blobs(self, creds, options, general_options):
                     options.recursive, general_options.timeout_sec):
                 if not self._inclusion_check(blob.name):
                     continue
-                if blobxfer.models.crypto.EncryptionMetadata.\
-                        encryption_metadata_exists(blob.metadata):
-                    ed = blobxfer.models.crypto.EncryptionMetadata()
-                    ed.convert_from_json(
-                        blob.metadata, blob.name, options.rsa_private_key)
-                else:
-                    ed = None
-                ase = blobxfer.models.azure.StorageEntity(cont, ed)
-                ase.populate_from_blob(sa, blob)
-                yield ase
+                for ase in self._handle_vectored_io_stripe(
+                        creds, options, general_options, sa, blob, False,
+                        cont):
+                    if ase is None:
+                        continue
+                    yield ase
 
 
 class DestinationPath(blobxfer.models._BaseSourcePaths):
diff --git a/blobxfer/operations/crypto.py b/blobxfer/operations/crypto.py
index ba6982c..76cf001 100644
--- a/blobxfer/operations/crypto.py
+++ b/blobxfer/operations/crypto.py
@@ -262,7 +262,8 @@ def _worker_process(self):
                 self._done_cv.acquire()
                 self._done_queue.put(fpath)
             elif inst[0] == CryptoAction.Decrypt:
-                final_path, local_path, offsets, symkey, iv, hmac_datafile = \
+                final_path, internal_fdstart, offsets, symkey, iv, \
+                    hmac_datafile = \
                     inst[1], inst[2], inst[3], inst[4], inst[5], inst[6]
                 # read encrypted data from disk
                 with open(hmac_datafile, 'rb') as fd:
@@ -271,8 +272,8 @@ def _worker_process(self):
                     symkey, iv, encdata, offsets.unpad)
                 # write decrypted data to disk
                 if len(data) > 0:
-                    with open(local_path, 'r+b') as fd:
-                        fd.seek(offsets.fd_start, 0)
+                    with open(final_path, 'r+b') as fd:
+                        fd.seek(internal_fdstart + offsets.fd_start, 0)
                         fd.write(data)
                 self._done_cv.acquire()
                 self._done_queue.put((final_path, offsets))
@@ -281,21 +282,22 @@ def _worker_process(self):
             self._done_cv.release()
 
     def add_decrypt_chunk(
-            self, final_path, local_path, offsets, symkey, iv, hmac_datafile):
-        # type: (CryptoOffload, str, str, blobxfer.models.download.Offsets,
+            self, final_path, internal_fdstart, offsets, symkey, iv,
+            hmac_datafile):
+        # type: (CryptoOffload, str, int, blobxfer.models.download.Offsets,
         #        bytes, bytes, str) -> None
         """Add a chunk to decrypt
         :param CryptoOffload self: this
         :param str final_path: final path
-        :param str local_path: temp local path
+        :param int internal_fdstart: internal fd offset start
         :param blobxfer.models.download.Offsets offsets: offsets
         :param bytes symkey: symmetric key
         :param bytes iv: initialization vector
         :param str hmac_datafile: encrypted data file
         """
         self._task_queue.put(
-            (CryptoAction.Decrypt, final_path, local_path, offsets, symkey,
-             iv, hmac_datafile)
+            (CryptoAction.Decrypt, final_path, internal_fdstart, offsets,
+             symkey, iv, hmac_datafile)
         )
 
     # UNUSED due to AES256-CBC FullBlob mode
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 47c237e..eb01eac 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -97,6 +97,7 @@ def __init__(self, general_options, creds, spec):
         self._start_time = None
         self._delete_after = set()
         self._dd_map = {}
+        self._vio_map = {}
         self._general_options = general_options
         self._creds = creds
         self._spec = spec
@@ -170,17 +171,30 @@ def ensure_local_destination(creds, spec):
         # ensure destination path
         spec.destination.ensure_path_exists()
 
+    @staticmethod
+    def create_unique_transfer_operation_id(ase):
+        # type: (blobxfer.models.azure.StorageEntity) -> str
+        """Create a unique transfer operation id
+        :param blobxfer.models.azure.StorageEntity ase: storage entity
+        :rtype: str
+        :return: unique transfer id
+        """
+        return ';'.join(
+            (ase._client.primary_endpoint, ase.path, str(ase.vectored_io))
+        )
+
     @staticmethod
     def create_unique_disk_operation_id(dd, offsets):
         # type: (blobxfer.models.download.Descriptor,
-        #        blobxfer.models.download.Offsets) -> None
+        #        blobxfer.models.download.Offsets) -> str
         """Create a unique disk operation id
         :param blobxfer.models.download.Descriptor dd: download descriptor
         :param blobxfer.models.download.Offsets offsets: download offsets
+        :rtype: str
+        :return: unique disk id
         """
-        # TODO add local view offset or slice num with stripe support
         return ';'.join(
-            (str(dd.local_path), dd.entity._client.primary_endpoint,
+            (str(dd.final_path), dd.entity._client.primary_endpoint,
              dd.entity.path, str(offsets.range_start))
         )
 
@@ -282,7 +296,9 @@ def _post_md5_skip_on_check(self, filename, md5_match):
         lpath = pathlib.Path(filename)
         if md5_match:
             with self._transfer_lock:
-                self._transfer_set.remove(lpath)
+                self._transfer_set.remove(
+                    blobxfer.operations.download.Downloader.
+                    create_unique_transfer_operation_id(rfile))
                 self._download_total -= 1
                 self._download_bytes_total -= lpath.stat().st_size
         else:
@@ -467,14 +483,37 @@ def _process_download_descriptor(self, dd):
             del resume_bytes
         # check if all operations completed
         if offsets is None and dd.all_operations_completed:
-            # finalize file
-            dd.finalize_file()
+            finalize = True
+            # finalize integrity
+            dd.finalize_integrity()
             # accounting
             with self._transfer_lock:
+                sfpath = str(dd.final_path)
                 if dd.entity.is_encrypted:
-                    self._dd_map.pop(str(dd.final_path))
-                self._transfer_set.remove(dd.final_path)
+                    self._dd_map.pop(sfpath)
+                self._transfer_set.remove(
+                    blobxfer.operations.download.Downloader.
+                    create_unique_transfer_operation_id(dd.entity))
                 self._download_sofar += 1
+                if dd.entity.vectored_io is not None:
+                    if sfpath not in self._vio_map:
+                        self._vio_map[sfpath] = 1
+                    else:
+                        self._vio_map[sfpath] += 1
+                    if (self._vio_map[sfpath] ==
+                            dd.entity.vectored_io.total_slices):
+                        self._vio_map.pop(sfpath)
+                    else:
+                        finalize = False
+            del sfpath
+            # finalize file
+            if finalize:
+                dd.finalize_file()
+                # remove from delete after set
+                try:
+                    self._delete_after.remove(dd.final_path)
+                except KeyError:
+                    pass
             return
         # re-enqueue for other threads to download
         self._transfer_queue.put(dd)
@@ -524,7 +563,7 @@ def _process_data(self, dd, offsets, data):
             # decrypt data
             if self._crypto_offload is not None:
                 self._crypto_offload.add_decrypt_chunk(
-                    str(dd.final_path), str(dd.local_path), offsets,
+                    str(dd.final_path), dd._view.fd_start, offsets,
                     dd.entity.encryption_metadata.symmetric_key,
                     iv, _hmac_datafile)
                 # data will be integrity checked and written once
@@ -652,11 +691,6 @@ def _run(self):
                 else:
                     lpath = pathlib.Path(
                         self._spec.destination.path, rfile.name)
-                # remove from delete after set
-                try:
-                    self._delete_after.remove(lpath)
-                except KeyError:
-                    pass
                 # check on download conditions
                 action = self._check_download_conditions(lpath, rfile)
                 if action == DownloadAction.Skip:
@@ -665,7 +699,9 @@ def _run(self):
                     continue
                 # add potential download to set
                 with self._transfer_lock:
-                    self._transfer_set.add(lpath)
+                    self._transfer_set.add(
+                        blobxfer.operations.download.Downloader.
+                        create_unique_transfer_operation_id(rfile))
                 # either MD5 check or download now
                 if action == DownloadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lpath, rfile)
diff --git a/blobxfer/operations/resume.py b/blobxfer/operations/resume.py
index 0f76562..97e37e4 100644
--- a/blobxfer/operations/resume.py
+++ b/blobxfer/operations/resume.py
@@ -92,45 +92,59 @@ def datalock(self, acquire=True):
             if acquire:
                 self._lock.release()
 
-    def get_record(self, final_path, lock=True):
+    @staticmethod
+    def generate_record_key(ase):
+        # type: (blobxfer.models.azure.StorageEntity) -> str
+        """Generate a record key
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
+        :rtype: str
+        :return: record key
+        """
+        return '{}:{}'.format(ase._client.primary_endpoint, ase.path)
+
+    def get_record(self, ase, key=None, lock=True):
         # type: (DownloadResumeManager, str,
         #        bool) -> blobxfer.models.resume.Download
         """Get a resume record
         :param DownloadResumeManager self: this
-        :param str final_path: final path
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
+        :param str key: record key
         :param bool lock: acquire lock
         :rtype: blobxfer.models.resume.Download
         :return: Download record
         """
+        if key is None:
+            key = blobxfer.operations.resume.DownloadResumeManager.\
+                generate_record_key(ase)
         with self.datalock(lock):
             try:
-                return self._data[final_path]
+                return self._data[key]
             except KeyError:
                 return None
 
     def add_or_update_record(
-            self, final_path, temp_path, length, chunk_size,
-            next_integrity_chunk, completed, md5):
-        # type: (DownloadResumeManager, pathlib.Path, pathlib.Path, int, int,
-        #        int, bool, str) -> None
+            self, final_path, ase, chunk_size, next_integrity_chunk,
+            completed, md5):
+        # type: (DownloadResumeManager, pathlib.Path,
+        #        blobxfer.models.azure.StorageEntity, int, int, bool,
+        #        str) -> None
         """Get a resume record
         :param DownloadResumeManager self: this
         :param pathlib.Path final_path: final path
-        :param pathlib.Path temp_path: temp local path
-        :param int length: content length
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
         :param int chunk_size: chunk size in bytes
         :param int next_integrity_chunk: next integrity chunk
         :param bool completed: if completed
         :param str md5: md5 hex digest
         """
-        sfp = str(final_path)
+        key = blobxfer.operations.resume.DownloadResumeManager.\
+            generate_record_key(ase)
         with self.datalock():
-            dl = self.get_record(sfp, lock=False)
+            dl = self.get_record(ase, key=key, lock=False)
             if dl is None:
                 dl = blobxfer.models.resume.Download(
-                    final_path=sfp,
-                    temp_path=str(temp_path),
-                    length=length,
+                    final_path=str(final_path),
+                    length=ase._size,
                     chunk_size=chunk_size,
                     next_integrity_chunk=next_integrity_chunk,
                     completed=completed,
@@ -145,5 +159,5 @@ def add_or_update_record(
                 else:
                     dl.next_integrity_chunk = next_integrity_chunk
                     dl.md5hexdigest = md5
-            self._data[sfp] = dl
+            self._data[key] = dl
             self._data.sync()

From e8ab3780649ad79e6cd44464097ee3dd34056a61 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Sun, 28 May 2017 19:39:40 -0700
Subject: [PATCH 34/47] Docstring updates

---
 blobxfer/models/crypto.py |  3 ++-
 blobxfer/models/upload.py | 57 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/blobxfer/models/crypto.py b/blobxfer/models/crypto.py
index c6670f2..b7b0004 100644
--- a/blobxfer/models/crypto.py
+++ b/blobxfer/models/crypto.py
@@ -329,13 +329,14 @@ def convert_to_json_with_mac(self, md5digest, hmacdigest):
         :rtype: dict
         :return: encryption metadata
         """
+        # encrypt keys
         enc_content_key = blobxfer.operations.crypto.\
             rsa_encrypt_key_base64_encoded(
                 None, self._rsa_public_key, self.symmetric_key)
         enc_sign_key = blobxfer.operations.crypto.\
             rsa_encrypt_key_base64_encoded(
                 None, self._rsa_public_key, self.signing_key)
-
+        # generate json
         encjson = {
             EncryptionMetadata._JSON_KEY_ENCRYPTION_MODE:
             EncryptionMetadata._ENCRYPTION_MODE,
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index da3a1fe..bc251f9 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -91,7 +91,16 @@ def __str__(self):
 
 
 class LocalPath(object):
+    """Local Path"""
+
     def __init__(self, parent_path, relative_path, view=None):
+        # type: (LocalPath, pathlib.Path, pathlib.Path, LocalPathView) -> None
+        """Ctor for LocalPath
+        :param LocalPath self: this
+        :param pathlib.Path parent_path: parent path
+        :param pathlib.Path relative_path: relative path
+        :param LocalPathView view: local path view
+        """
         self.parent_path = parent_path
         self.relative_path = relative_path
         # populate properties
@@ -111,30 +120,72 @@ def __init__(self, parent_path, relative_path, view=None):
 
     @property
     def absolute_path(self):
+        # type: (LocalPath) -> pathlib.Path
+        """Absolute path
+        :param LocalPath self: this
+        :rtype: pathlib.Path
+        :return: absolute path
+        """
         return self.parent_path / self.relative_path
 
     @property
     def size(self):
+        # type: (LocalPath) -> int
+        """Size of view
+        :param LocalPath self: this
+        :rtype: int
+        :return: size of view portion of the file
+        """
         return self._size
 
     @property
     def total_size(self):
+        # type: (LocalPath) -> int
+        """Total Size of file
+        :param LocalPath self: this
+        :rtype: int
+        :return: total size of file (non-view)
+        """
         return self._stat.st_size
 
     @property
     def lmt(self):
+        # type: (LocalPath) -> int
+        """mtime of file
+        :param LocalPath self: this
+        :rtype: int
+        :return: mtime of file
+        """
         return self._stat.st_mtime
 
     @property
     def mode(self):
+        # type: (LocalPath) -> str
+        """Octal file mode
+        :param LocalPath self: this
+        :rtype: str
+        :return: octal file mode
+        """
         return str(oct(self._stat.st_mode))
 
     @property
     def uid(self):
+        # type: (LocalPath) -> int
+        """Uid of file
+        :param LocalPath self: this
+        :rtype: int
+        :return: uid of file
+        """
         return self._stat.st_uid
 
     @property
     def gid(self):
+        # type: (LocalPath) -> int
+        """Gid of file
+        :param LocalPath self: this
+        :rtype: int
+        :return: gid of file
+        """
         return self._stat.st_gid
 
 
@@ -142,6 +193,12 @@ class LocalSourcePath(blobxfer.models._BaseSourcePaths):
     """Local Source Path"""
 
     def can_rename(self):
+        # type: (LocalSourcePaths) -> bool
+        """Check if ource can be renamed
+        :param LocalSourcePath self: this
+        :rtype: bool
+        :return: if rename possible
+        """
         return len(self._paths) == 1 and self._paths[0].is_file()
 
     def files(self):

From 53d0beb8d9d14228adc88c2b3a644f6a5a3ada21 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Mon, 29 May 2017 18:00:29 -0700
Subject: [PATCH 35/47] Support stdin

---
 blobxfer/__init__.py            |  11 +++
 blobxfer/models/upload.py       | 119 +++++++++++++++++++++++++-------
 blobxfer/operations/progress.py |  22 ++++--
 blobxfer/operations/upload.py   |  40 ++++++++---
 4 files changed, 150 insertions(+), 42 deletions(-)

diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py
index 8babc97..29ee1b0 100644
--- a/blobxfer/__init__.py
+++ b/blobxfer/__init__.py
@@ -22,6 +22,7 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
+import sys
 from .version import __version__  # noqa
 
 # monkeypatch User-Agent string
@@ -31,3 +32,13 @@
 
 # monkeypatch SOCKET_TIMEOUT value in Azure Storage SDK
 azure.storage._constants.SOCKET_TIMEOUT = (5, 300)
+
+# set stdin source
+if sys.version_info >= (3, 0):
+    STDIN = sys.stdin.buffer
+else:
+    # set stdin to binary mode on Windows
+    if sys.platform == 'win32':
+        import os, msvcrt  # noqa
+        msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
+    STDIN = sys.stdin
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index bc251f9..8f4c579 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -53,7 +53,7 @@
 logger = logging.getLogger(__name__)
 # global defines
 _MAX_BLOCK_BLOB_ONESHOT_BYTES = 268435456
-_MAX_BLOCK_BLOB_CHUNKSIZE_BYTES = 268435456
+_MAX_BLOCK_BLOB_CHUNKSIZE_BYTES = 104857600
 _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES = 4194304
 _MAX_NUM_CHUNKS = 50000
 _DEFAULT_AUTO_CHUNKSIZE_BYTES = 16777216
@@ -93,18 +93,30 @@ def __str__(self):
 class LocalPath(object):
     """Local Path"""
 
-    def __init__(self, parent_path, relative_path, view=None):
-        # type: (LocalPath, pathlib.Path, pathlib.Path, LocalPathView) -> None
+    def __init__(self, parent_path, relative_path, use_stdin=False, view=None):
+        # type: (LocalPath, pathlib.Path, pathlib.Path, bool,
+        #        LocalPathView) -> None
         """Ctor for LocalPath
         :param LocalPath self: this
         :param pathlib.Path parent_path: parent path
         :param pathlib.Path relative_path: relative path
+        :param bool use_stdin: use stdin
         :param LocalPathView view: local path view
         """
         self.parent_path = parent_path
         self.relative_path = relative_path
+        self.use_stdin = use_stdin
         # populate properties
-        self._stat = self.absolute_path.stat()
+        if self.use_stdin:
+            # create dummy stat object
+            self._stat = type('stat', (object,), {})
+            self._stat.st_size = 0
+            self._stat.st_mtime = 0
+            self._stat.st_mode = 0
+            self._stat.st_uid = 0
+            self._stat.st_gid = 0
+        else:
+            self._stat = self.absolute_path.stat()
         if view is None:
             self.view = LocalPathView(
                 fd_start=0,
@@ -194,13 +206,25 @@ class LocalSourcePath(blobxfer.models._BaseSourcePaths):
 
     def can_rename(self):
         # type: (LocalSourcePaths) -> bool
-        """Check if ource can be renamed
+        """Check if source can be renamed
         :param LocalSourcePath self: this
         :rtype: bool
         :return: if rename possible
         """
         return len(self._paths) == 1 and self._paths[0].is_file()
 
+    @staticmethod
+    def is_stdin(path):
+        # type: (str) -> bool
+        """Check if path is stdin
+        :param str path: path to check
+        :rtype: bool
+        :return: if path is stdin
+        """
+        if path == '-' or path == '/dev/stdin':
+            return True
+        return False
+
     def files(self):
         # type: (LocalSourcePaths) -> LocalPath
         """Generator for files in paths
@@ -210,6 +234,15 @@ def files(self):
         """
         for _path in self._paths:
             _ppath = os.path.expandvars(os.path.expanduser(str(_path)))
+            # check of path is stdin
+            if blobxfer.models.upload.LocalSourcePath.is_stdin(_ppath):
+                yield LocalPath(
+                    parent_path=pathlib.Path(),
+                    relative_path=pathlib.Path('stdin'),
+                    use_stdin=True,
+                )
+                continue
+            # resolve path
             _expath = pathlib.Path(_ppath).resolve()
             # check if path is a single file
             tmp = pathlib.Path(_ppath)
@@ -217,7 +250,8 @@ def files(self):
                 if self._inclusion_check(tmp.name):
                     yield LocalPath(
                         parent_path=tmp.parent,
-                        relative_path=pathlib.Path(tmp.name)
+                        relative_path=pathlib.Path(tmp.name),
+                        use_stdin=False,
                     )
                 continue
             del tmp
@@ -225,7 +259,11 @@ def files(self):
                 _rpath = pathlib.Path(entry.path).relative_to(_ppath)
                 if not self._inclusion_check(_rpath):
                     continue
-                yield LocalPath(parent_path=_expath, relative_path=_rpath)
+                yield LocalPath(
+                    parent_path=_expath,
+                    relative_path=_rpath,
+                    use_stdin=False,
+                )
 
 
 class Specification(object):
@@ -523,7 +561,12 @@ def _adjust_chunk_size(self, options):
             logger.debug(
                 'auto-selected chunk size of {} for {}'.format(
                     chunk_size, self.local_path.absolute_path))
-        self._chunk_size = min((chunk_size, self._ase.size))
+        if self.local_path.use_stdin:
+            self._chunk_size = max(
+                (chunk_size, _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES)
+            )
+        else:
+            self._chunk_size = min((chunk_size, self._ase.size))
         # ensure chunk sizes are compatible with mode
         if self._ase.mode == blobxfer.models.azure.StorageModes.Append:
             if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES:
@@ -533,7 +576,8 @@ def _adjust_chunk_size(self, options):
                      'from {}').format(
                          self._chunk_size, self.local_path.absolute_path))
         elif self._ase.mode == blobxfer.models.azure.StorageModes.Block:
-            if self._ase.size <= options.one_shot_bytes:
+            if (not self.local_path.use_stdin and
+                    self._ase.size <= options.one_shot_bytes):
                 self._chunk_size = min(
                     (self._ase.size, options.one_shot_bytes)
                 )
@@ -569,6 +613,8 @@ def _compute_total_chunks(self, chunk_size):
             chunks = int(math.ceil(self._ase.size / chunk_size))
         except ZeroDivisionError:
             chunks = 1
+        if self.local_path.use_stdin and chunks == 0:
+            chunks = 1
         if chunks > 50000:
             max_vector = False
             if self._ase.mode == blobxfer.models.azure.StorageModes.Block:
@@ -645,26 +691,49 @@ def next_offsets(self):
             ), resume_bytes
 
     def read_data(self, offsets):
-        # type: (Descriptor, Offsets) -> bytes
+        # type: (Descriptor, Offsets) -> Tuple[bytes, Offsets]
         """Read data from file
         :param Descriptor self: this
         :param Offsets offsets: offsets
-        :rtype: bytes
-        :return: file data
-        """
-        if offsets.num_bytes == 0:
-            return None
-        # compute start from view
-        start = self.local_path.view.fd_start + offsets.range_start
-        # encrypted offsets will read past the end of the file due
-        # to padding, but will be accounted for after encryption+padding
-        with self.local_path.absolute_path.open('rb') as fd:
-            fd.seek(start, 0)
-            data = fd.read(offsets.num_bytes)
-        if self.must_compute_md5:
+        :rtype: tuple
+        :return: (file data bytes, new Offsets if stdin)
+        """
+        newoffset = None
+        if not self.local_path.use_stdin:
+            if offsets.num_bytes == 0:
+                return None, None
+            # compute start from view
+            start = self.local_path.view.fd_start + offsets.range_start
+            # encrypted offsets will read past the end of the file due
+            # to padding, but will be accounted for after encryption+padding
+            with self.local_path.absolute_path.open('rb') as fd:
+                fd.seek(start, 0)
+                data = fd.read(offsets.num_bytes)
+        else:
+            data = blobxfer.STDIN.read(self._chunk_size)
+            if not data:
+                with self._meta_lock:
+                    self._total_chunks -= 1
+                    self._chunk_num -= 1
+                    self._outstanding_ops -= 1
+            else:
+                num_bytes = len(data)
+                with self._meta_lock:
+                    newoffset = Offsets(
+                        chunk_num=self._chunk_num - 1,
+                        num_bytes=num_bytes,
+                        range_start=self._offset,
+                        range_end=self._offset + num_bytes - 1,
+                        pad=False,
+                    )
+                    self._total_chunks += 1
+                    self._outstanding_ops += 1
+                    self._offset += num_bytes
+                    self._ase.size += num_bytes
+        if self.must_compute_md5 and data:
             with self._hasher_lock:
                 self.md5.update(data)
-        return data
+        return data, newoffset
 
     def generate_metadata(self):
         # type: (Descriptor) -> dict
@@ -690,7 +759,7 @@ def generate_metadata(self):
             encmeta = self._ase.encryption_metadata.convert_to_json_with_mac(
                 md5digest, hmacdigest)
         # generate file attribute metadata
-        if self._store_file_attr:
+        if self._store_file_attr and not self.local_path.use_stdin:
             merged = blobxfer.models.metadata.generate_fileattr_metadata(
                 self.local_path, genmeta)
             if merged is not None:
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index b9d93a7..654653f 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -48,9 +48,9 @@
 
 def update_progress_bar(
         go, optext, start, total_files, files_sofar, total_bytes,
-        bytes_sofar):
+        bytes_sofar, stdin_upload=False):
     # type: (blobxfer.models.options.General, str, datetime.datetime, int,
-    #        int, int, int) -> None
+    #        int, int, int, bool) -> None
     """Update the progress bar
     :param blobxfer.models.options.General go: general options
     :param str optext: operation prefix text
@@ -59,6 +59,7 @@ def update_progress_bar(
     :param int files_sofar: files transfered so far
     :param int total_bytes: total number of bytes
     :param int bytes_sofar: bytes transferred so far
+    :param bool stdin_upload: stdin upload
     """
     if (not go.progress_bar or blobxfer.util.is_none_or_empty(go.log_file) or
             start is None):
@@ -80,11 +81,18 @@ def update_progress_bar(
         fprog = 'n/a'
     else:
         fprog = '{}/{}'.format(files_sofar, total_files)
-    sys.stdout.write(
-        ('\r{0} progress: [{1:30s}] {2:.2f}% {3:12.3f} MiB/sec, '
-         '{4} {5}').format(
-             optext, '>' * int(done * 30), done * 100, rate, fprog, rtext)
-    )
+    if stdin_upload:
+        sys.stdout.write(
+            ('\r{0} progress: [{1:30s}]   n/a % {2:12.3f} MiB/sec, '
+             '{3} {4}').format(
+                 optext, '>' * int(total_bytes % 30), rate, fprog, rtext)
+        )
+    else:
+        sys.stdout.write(
+            ('\r{0} progress: [{1:30s}] {2:.2f}% {3:12.3f} MiB/sec, '
+             '{4} {5}').format(
+                 optext, '>' * int(done * 30), done * 100, rate, fprog, rtext)
+        )
     if files_sofar == total_files:
         sys.stdout.write(os.linesep)
     sys.stdout.flush()
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 7fc094f..adfcc77 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -191,10 +191,11 @@ def append_slice_suffix_to_name(name, slice):
         """
         return '{}.bxslice-{}'.format(name, slice)
 
-    def _update_progress_bar(self):
-        # type: (Uploader) -> None
+    def _update_progress_bar(self, stdin=False):
+        # type: (Uploader, bool) -> None
         """Update progress bar
         :param Uploader self: this
+        :param bool stdin: stdin upload
         """
         if not self._all_files_processed:
             return
@@ -206,6 +207,7 @@ def _update_progress_bar(self):
             self._upload_sofar,
             self._upload_bytes_total,
             self._upload_bytes_sofar,
+            stdin_upload=stdin,
         )
 
     def _pre_md5_skip_on_check(self, src, rfile):
@@ -370,7 +372,9 @@ def _process_transfer(self, ud, ase, offsets, data):
         self._put_data(ud, ase, offsets, data)
         # accounting
         with self._transfer_lock:
-            if offsets.chunk_num == 0:
+            if ud.local_path.use_stdin:
+                self._upload_bytes_total += offsets.num_bytes
+            elif offsets.chunk_num == 0:
                 self._upload_bytes_total += ase.size
             self._upload_bytes_sofar += offsets.num_bytes
             self._transfer_set.remove(
@@ -378,7 +382,7 @@ def _process_transfer(self, ud, ase, offsets, data):
                     ud.local_path, ase, offsets))
         ud.complete_offset_upload()
         # update progress bar
-        self._update_progress_bar()
+        self._update_progress_bar(stdin=ud.local_path.use_stdin)
 
     def _put_data(self, ud, ase, offsets, data):
         # type: (Uploader, blobxfer.models.upload.Descriptor,
@@ -462,7 +466,15 @@ def _prepare_upload(self, ase, offsets):
         :param blobxfer.models.azure.StorageEntity ase: Storage entity
         :param blobxfer.models.upload.Offsets offsets: offsets
         """
-        if ase.mode == blobxfer.models.azure.StorageModes.Block:
+        if ase.mode == blobxfer.models.azure.StorageModes.Append:
+            # create container if necessary
+            blobxfer.operations.azure.blob.create_container(
+                ase, self._containers_created,
+                timeout=self._general_options.timeout_sec)
+            # create remote blob
+            blobxfer.operations.azure.blob.append.create_blob(
+                ase, timeout=self._general_options.timeout_sec)
+        elif ase.mode == blobxfer.models.azure.StorageModes.Block:
             # create container if necessary
             blobxfer.operations.azure.blob.create_container(
                 ase, self._containers_created,
@@ -496,7 +508,7 @@ def _process_upload_descriptor(self, ud):
         :param Uploader self: this
         :param blobxfer.models.upload.Descriptor: upload descriptor
         """
-        # get download offsets
+        # get upload offsets
         offsets, resume_bytes = ud.next_offsets()
         # add resume bytes to counter
         if resume_bytes is not None:
@@ -531,7 +543,7 @@ def _process_upload_descriptor(self, ud):
             # encrypt data
             if self._crypto_offload is None:
                 # read data from file and encrypt
-                data = ud.read_data(offsets)
+                data, _ = ud.read_data(offsets)
                 encdata = blobxfer.operations.crypto.aes_cbc_encrypt_data(
                     ud.entity.encryption_metadata.symmetric_key,
                     ud.current_iv, data, offsets.pad)
@@ -552,9 +564,15 @@ def _process_upload_descriptor(self, ud):
                 # retrieved from crypto queue
                 # return_early = True
         else:
-            data = ud.read_data(offsets)
+            data, newoffset = ud.read_data(offsets)
+            # set new offset if stdin
+            if newoffset is not None:
+                offsets = newoffset
         # re-enqueue for other threads to upload
         self._upload_queue.put(ud)
+        # no data can be returned on stdin uploads
+        if not data:
+            return
         # add data to transfer queue
         with self._transfer_lock:
             self._transfer_set.add(
@@ -713,7 +731,7 @@ def _check_upload_conditions(self, local_path, rfile):
         """
         lpath = local_path.absolute_path
         # check if local file still exists
-        if not lpath.exists():
+        if not local_path.use_stdin and not lpath.exists():
             return UploadAction.Skip
         # if remote file doesn't exist, upload
         if rfile is None:
@@ -849,7 +867,8 @@ def _vectorize_and_bind(self, local_path, dest):
         :return: action, LocalPath, ase
         """
         if (self._spec.options.vectored_io.distribution_mode ==
-                blobxfer.models.upload.VectoredIoDistributionMode.Stripe):
+                blobxfer.models.upload.VectoredIoDistributionMode.Stripe and
+                not local_path.use_stdin):
             # compute total number of slices
             slices = int(math.ceil(
                 local_path.total_size /
@@ -897,6 +916,7 @@ def _vectorize_and_bind(self, local_path, dest):
                 lp_slice = blobxfer.models.upload.LocalPath(
                     parent_path=local_path.parent_path,
                     relative_path=local_path.relative_path,
+                    use_stdin=False,
                     view=blobxfer.models.upload.LocalPathView(
                         fd_start=start,
                         fd_end=end,

From 5ae05958ed13c94b47e73f067f739889261ced73 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 30 May 2017 08:55:36 -0700
Subject: [PATCH 36/47] Append blob support

---
 blobxfer/__init__.py                       |   5 +-
 blobxfer/models/azure.py                   |  32 ++++
 blobxfer/models/upload.py                  |  17 +-
 blobxfer/operations/azure/blob/__init__.py |  31 ++++
 blobxfer/operations/azure/blob/append.py   |  15 ++
 blobxfer/operations/azure/blob/page.py     |  31 ----
 blobxfer/operations/progress.py            |   2 +
 blobxfer/operations/upload.py              | 190 +++++++++++++--------
 cli/cli.py                                 |   3 +-
 9 files changed, 216 insertions(+), 110 deletions(-)

diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py
index 29ee1b0..0a8432f 100644
--- a/blobxfer/__init__.py
+++ b/blobxfer/__init__.py
@@ -38,7 +38,8 @@
     STDIN = sys.stdin.buffer
 else:
     # set stdin to binary mode on Windows
-    if sys.platform == 'win32':
-        import os, msvcrt  # noqa
+    if sys.platform == 'win32':  # noqa
+        import msvcrt
+        import os
         msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
     STDIN = sys.stdin
diff --git a/blobxfer/models/azure.py b/blobxfer/models/azure.py
index abfa10e..b9eb088 100644
--- a/blobxfer/models/azure.py
+++ b/blobxfer/models/azure.py
@@ -71,6 +71,8 @@ def __init__(self, container, ed=None, fileattr=None):
         self._snapshot = None
         self._md5 = None
         self._encryption = ed
+        self._from_local = False
+        self._append_create = True
         self._vio = None
         self._fileattr = None
         self.replica_targets = None
@@ -184,6 +186,35 @@ def mode(self):
         """
         return self._mode
 
+    @property
+    def from_local(self):
+        # type: (StorageEntity) -> bool
+        """If entity was created from a local file (no remote exists)
+        :param StorageEntity self: this
+        :rtype: bool
+        :return: if entity is from local (no remote exists)
+        """
+        return self._from_local
+
+    @property
+    def append_create(self):
+        # type: (StorageEntity) -> bool
+        """If append blob should be created
+        :param StorageEntity self: this
+        :rtype: bool
+        :return: if append blob should be created
+        """
+        return self._append_create
+
+    @append_create.setter
+    def append_create(self, value):
+        # type: (StorageEntity, bool) -> None
+        """Set append create option
+        :param StorageEntity self: this
+        :param bool value: value to set
+        """
+        self._append_create = value
+
     @property
     def is_encrypted(self):
         # type: (StorageEntity) -> bool
@@ -303,6 +334,7 @@ def populate_from_local(self, sa, container, path, mode):
         self._container = container
         self._name = path
         self._mode = mode
+        self._from_local = True
         if mode == StorageModes.Append:
             self._client = sa.append_blob_client
         elif mode == StorageModes.Block:
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 8f4c579..585ea51 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -438,6 +438,16 @@ def remote_is_page_blob(self):
         """
         return self.entity.mode == blobxfer.models.azure.StorageModes.Page
 
+    @property
+    def remote_is_append_blob(self):
+        # type: (Descriptor) -> bool
+        """Remote destination is an Azure Append Blob
+        :param Descriptor self: this
+        :rtype: bool
+        :return: remote is an Azure Append Blob
+        """
+        return self.entity.mode == blobxfer.models.azure.StorageModes.Append
+
     @property
     def is_one_shot_block_blob(self):
         # type: (Descriptor) -> bool
@@ -468,7 +478,8 @@ def requires_non_encrypted_md5_put(self):
         :rtype: bool
         :return: if finalize requires a put file properties
         """
-        return not self.entity.is_encrypted and self.must_compute_md5
+        return (not self.entity.is_encrypted and self.must_compute_md5 and
+                not self.remote_is_append_blob)
 
     @property
     def requires_set_file_properties_md5(self):
@@ -505,7 +516,6 @@ def _initialize_encryption(self, options):
         :param Descriptor self: this
         :param blobxfer.models.options.Upload options: upload options
         """
-        # TODO support append blobs?
         if (options.rsa_public_key is not None and self.local_path.size > 0 and
                 (self._ase.mode == blobxfer.models.azure.StorageModes.Block or
                  self._ase.mode == blobxfer.models.azure.StorageModes.File)):
@@ -653,7 +663,8 @@ def _initialize_integrity_checkers(self, options):
                          self.local_path.absolute_path))
             self.hmac = self._ase.encryption_metadata.initialize_hmac()
         # both hmac and md5 can be enabled
-        if options.store_file_properties.md5:
+        if (options.store_file_properties.md5 and
+                not self.remote_is_append_blob):
             self.md5 = blobxfer.util.new_md5_hasher()
 
     def next_offsets(self):
diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py
index 63fd4a1..e256319 100644
--- a/blobxfer/operations/azure/blob/__init__.py
+++ b/blobxfer/operations/azure/blob/__init__.py
@@ -219,3 +219,34 @@ def create_container(ase, containers_created, timeout=None):
             logger.info(
                 'created blob container {} on storage account {}'.format(
                     ase.container, ase.client.account_name))
+
+
+def set_blob_md5(ase, md5, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, str, int) -> None
+    """Set blob properties MD5
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param str md5: md5 as base64
+    :param int timeout: timeout
+    """
+    ase.client.set_blob_properties(
+        container_name=ase.container,
+        blob_name=ase.name,
+        content_settings=azure.storage.blob.models.ContentSettings(
+            content_type=blobxfer.util.get_mime_type(ase.name),
+            content_md5=md5,
+        ),
+        timeout=timeout)
+
+
+def set_blob_metadata(ase, metadata, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
+    """Set blob metadata
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param dict metadata: metadata kv pairs
+    :param int timeout: timeout
+    """
+    ase.client.set_blob_metadata(
+        container_name=ase.container,
+        blob_name=ase.name,
+        metadata=metadata,
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/blob/append.py b/blobxfer/operations/azure/blob/append.py
index e28fcdb..abc276a 100644
--- a/blobxfer/operations/azure/blob/append.py
+++ b/blobxfer/operations/azure/blob/append.py
@@ -77,3 +77,18 @@ def create_blob(ase, timeout=None):
             content_type=blobxfer.util.get_mime_type(ase.name)
         ),
         timeout=timeout)
+
+
+def append_block(ase, data, timeout=None):
+    # type: (blobxfer.models.azure.StorageEntity, bytes, int) -> None
+    """Appends a block into remote blob
+    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
+    :param bytes data: data
+    :param int timeout: timeout
+    """
+    ase.client.append_block(
+        container_name=ase.container,
+        blob_name=ase.name,
+        block=data,
+        validate_content=False,  # integrity is enforced with HTTPS
+        timeout=timeout)
diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py
index 4223a30..aa92b14 100644
--- a/blobxfer/operations/azure/blob/page.py
+++ b/blobxfer/operations/azure/blob/page.py
@@ -98,34 +98,3 @@ def put_page(ase, page_start, page_end, data, timeout=None):
         end_range=page_end,
         validate_content=False,  # integrity is enforced with HTTPS
         timeout=timeout)
-
-
-def set_blob_md5(ase, md5, timeout=None):
-    # type: (blobxfer.models.azure.StorageEntity, str, int) -> None
-    """Set blob properties MD5
-    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
-    :param str md5: md5 as base64
-    :param int timeout: timeout
-    """
-    ase.client.set_blob_properties(
-        container_name=ase.container,
-        blob_name=ase.name,
-        content_settings=azure.storage.blob.models.ContentSettings(
-            content_type=blobxfer.util.get_mime_type(ase.name),
-            content_md5=md5,
-        ),
-        timeout=timeout)
-
-
-def set_blob_metadata(ase, metadata, timeout=None):
-    # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None
-    """Set blob metadata
-    :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity
-    :param dict metadata: metadata kv pairs
-    :param int timeout: timeout
-    """
-    ase.client.set_blob_metadata(
-        container_name=ase.container,
-        blob_name=ase.name,
-        metadata=metadata,
-        timeout=timeout)
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index 654653f..07a9281 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -145,6 +145,8 @@ def output_parameters(general_options, spec):
     # TODO handle synccopy spec
 
     # common block
+    log.append('                 log file: {}'.format(
+        general_options.log_file))
     log.append('              resume file: {}'.format(
         general_options.resume_file))
     log.append('                  timeout: {}'.format(
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index adfcc77..b756590 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -381,6 +381,9 @@ def _process_transfer(self, ud, ase, offsets, data):
                 blobxfer.operations.upload.Uploader.create_unique_transfer_id(
                     ud.local_path, ase, offsets))
         ud.complete_offset_upload()
+        # add descriptor back to upload queue only for append blobs
+        if ud.entity.mode == blobxfer.models.azure.StorageModes.Append:
+            self._upload_queue.put(ud)
         # update progress bar
         self._update_progress_bar(stdin=ud.local_path.use_stdin)
 
@@ -397,7 +400,10 @@ def _put_data(self, ud, ase, offsets, data):
         """
         print('UL', offsets, ase.path, len(data) if data is not None else None)
         if ase.mode == blobxfer.models.azure.StorageModes.Append:
-            raise NotImplementedError()
+            # append block
+            if data is not None:
+                blobxfer.operations.azure.blob.append.append_block(
+                    ase, data, timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.Block:
             # handle one-shot uploads
             if ud.is_one_shot_block_blob:
@@ -467,13 +473,14 @@ def _prepare_upload(self, ase, offsets):
         :param blobxfer.models.upload.Offsets offsets: offsets
         """
         if ase.mode == blobxfer.models.azure.StorageModes.Append:
-            # create container if necessary
-            blobxfer.operations.azure.blob.create_container(
-                ase, self._containers_created,
-                timeout=self._general_options.timeout_sec)
-            # create remote blob
-            blobxfer.operations.azure.blob.append.create_blob(
-                ase, timeout=self._general_options.timeout_sec)
+            if ase.append_create:
+                # create container if necessary
+                blobxfer.operations.azure.blob.create_container(
+                    ase, self._containers_created,
+                    timeout=self._general_options.timeout_sec)
+                # create remote blob
+                blobxfer.operations.azure.blob.append.create_blob(
+                    ase, timeout=self._general_options.timeout_sec)
         elif ase.mode == blobxfer.models.azure.StorageModes.Block:
             # create container if necessary
             blobxfer.operations.azure.blob.create_container(
@@ -520,7 +527,7 @@ def _process_upload_descriptor(self, ud):
         # check if all operations completed
         if offsets is None and ud.all_operations_completed:
             # finalize file
-            self._finalize_file(ud)
+            self._finalize_upload(ud)
             # accounting
             with self._upload_lock:
                 if ud.entity.is_encrypted:
@@ -568,8 +575,9 @@ def _process_upload_descriptor(self, ud):
             # set new offset if stdin
             if newoffset is not None:
                 offsets = newoffset
-        # re-enqueue for other threads to upload
-        self._upload_queue.put(ud)
+        # re-enqueue for other threads to upload if not append
+        if ud.entity.mode != blobxfer.models.azure.StorageModes.Append:
+            self._upload_queue.put(ud)
         # no data can be returned on stdin uploads
         if not data:
             return
@@ -589,73 +597,106 @@ def _process_upload_descriptor(self, ud):
                     )
                 self._transfer_queue.put((ud, ase, offsets, data))
 
-    def _finalize_file(self, ud):
+    def _finalize_block_blob(self, ud, metadata):
+        """Finalize Block blob
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param dict metadata: metadata dict
+        """
+        if not ud.entity.is_encrypted and ud.must_compute_md5:
+            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+        else:
+            digest = None
+        blobxfer.operations.azure.blob.block.put_block_list(
+            ud.entity, ud.last_block_num, digest, metadata,
+            timeout=self._general_options.timeout_sec)
+        if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+            for ase in ud.entity.replica_targets:
+                blobxfer.operations.azure.blob.block.put_block_list(
+                    ase, ud.last_block_num, digest, metadata,
+                    timeout=self._general_options.timeout_sec)
+
+    def _set_blob_md5(self, ud):
+        """Set blob MD5
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        """
+        digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+        blobxfer.operations.azure.blob.set_blob_md5(
+            ud.entity, digest, timeout=self._general_options.timeout_sec)
+        if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+            for ase in ud.entity.replica_targets:
+                blobxfer.operations.azure.blob.set_blob_md5(
+                    ase, digest, timeout=self._general_options.timeout_sec)
+
+    def _set_blob_metadata(self, ud, metadata):
+        """Set blob metadata
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param dict metadata: metadata dict
+        """
+        blobxfer.operations.azure.blob.set_blob_metadata(
+            ud.entity, metadata, timeout=self._general_options.timeout_sec)
+        if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+            for ase in ud.entity.replica_targets:
+                blobxfer.operations.azure.blob.set_blob_metadata(
+                    ase, metadata, timeout=self._general_options.timeout_sec)
+
+    def _finalize_nonblock_blob(self, ud, metadata):
+        """Finalize Non-Block blob
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param dict metadata: metadata dict
+        """
+        # set md5 page blob property if required
+        if ud.requires_non_encrypted_md5_put:
+            self._set_blob_md5(ud)
+        # set metadata if needed
+        if blobxfer.util.is_not_empty(metadata):
+            self._set_blob_metadata(ud, metadata)
+
+    def _finalize_azure_file(self, ud, metadata):
+        # type: (Uploader, blobxfer.models.upload.Descriptor, dict) -> None
+        """Finalize Azure File
+        :param Uploader self: this
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
+        :param dict metadata: metadata dict
+        """
+        # set md5 file property if required
+        if ud.requires_non_encrypted_md5_put:
+            digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
+            blobxfer.operations.azure.file.set_file_md5(
+                ud.entity, digest, timeout=self._general_options.timeout_sec)
+            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                for ase in ud.entity.replica_targets:
+                    blobxfer.operations.azure.file.set_file_md5(
+                        ase, digest, timeout=self._general_options.timeout_sec)
+        # set file metadata if needed
+        if blobxfer.util.is_not_empty(metadata):
+            blobxfer.operations.azure.file.set_file_metadata(
+                ud.entity, metadata, timeout=self._general_options.timeout_sec)
+            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
+                for ase in ud.entity.replica_targets:
+                    blobxfer.operations.azure.file.set_file_metadata(
+                        ase, metadata,
+                        timeout=self._general_options.timeout_sec)
+
+    def _finalize_upload(self, ud):
         # type: (Uploader, blobxfer.models.upload.Descriptor) -> None
         """Finalize file upload
         :param Uploader self: this
-        :param blobxfer.models.upload.Descriptor: upload descriptor
+        :param blobxfer.models.upload.Descriptor ud: upload descriptor
         """
         metadata = ud.generate_metadata()
-        # put block list for non one-shot block blobs
         if ud.requires_put_block_list:
-            if not ud.entity.is_encrypted and ud.must_compute_md5:
-                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
-            else:
-                digest = None
-            blobxfer.operations.azure.blob.block.put_block_list(
-                ud.entity, ud.last_block_num, digest, metadata,
-                timeout=self._general_options.timeout_sec)
-            if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                for ase in ud.entity.replica_targets:
-                    blobxfer.operations.azure.blob.block.put_block_list(
-                        ase, ud.last_block_num, digest, metadata,
-                        timeout=self._general_options.timeout_sec)
-        # page blob finalization
-        if ud.remote_is_page_blob:
-            # set md5 page blob property if required
-            if ud.requires_non_encrypted_md5_put:
-                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
-                blobxfer.operations.azure.blob.page.set_blob_md5(
-                    ud.entity, digest,
-                    timeout=self._general_options.timeout_sec)
-                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                    for ase in ud.entity.replica_targets:
-                        blobxfer.operations.azure.blob.page.set_blob_md5(
-                            ase, digest,
-                            timeout=self._general_options.timeout_sec)
-            # set metadata if needed
-            if blobxfer.util.is_not_empty(metadata):
-                blobxfer.operations.azure.blob.page.set_blob_metadata(
-                    ud.entity, metadata,
-                    timeout=self._general_options.timeout_sec)
-                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                    for ase in ud.entity.replica_targets:
-                        blobxfer.operations.azure.blob.page.set_blob_metadata(
-                            ase, metadata,
-                            timeout=self._general_options.timeout_sec)
-        # azure file finalization
-        if ud.remote_is_file:
-            # set md5 file property if required
-            if ud.requires_non_encrypted_md5_put:
-                digest = blobxfer.util.base64_encode_as_string(ud.md5.digest())
-                blobxfer.operations.azure.file.set_file_md5(
-                    ud.entity, digest,
-                    timeout=self._general_options.timeout_sec)
-                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                    for ase in ud.entity.replica_targets:
-                        blobxfer.operations.azure.file.set_file_md5(
-                            ase, digest,
-                            timeout=self._general_options.timeout_sec)
-            # set file metadata if needed
-            if blobxfer.util.is_not_empty(metadata):
-                blobxfer.operations.azure.file.set_file_metadata(
-                    ud.entity, metadata,
-                    timeout=self._general_options.timeout_sec)
-                if blobxfer.util.is_not_empty(ud.entity.replica_targets):
-                    for ase in ud.entity.replica_targets:
-                        blobxfer.operations.azure.file.set_file_metadata(
-                            ase, metadata,
-                            timeout=self._general_options.timeout_sec)
+            # put block list for non one-shot block blobs
+            self._finalize_block_blob(ud, metadata)
+        elif ud.remote_is_page_blob or ud.remote_is_append_blob:
+            # append and page blob finalization
+            self._finalize_nonblock_blob(ud, metadata)
+        elif ud.remote_is_file:
+            # azure file finalization
+            self._finalize_azure_file(ud, metadata)
 
     def _get_destination_paths(self):
         # type: (Uploader) ->
@@ -734,10 +775,13 @@ def _check_upload_conditions(self, local_path, rfile):
         if not local_path.use_stdin and not lpath.exists():
             return UploadAction.Skip
         # if remote file doesn't exist, upload
-        if rfile is None:
+        if rfile is None or rfile.from_local:
             return UploadAction.Upload
         # check overwrite option
         if not self._spec.options.overwrite:
+            if rfile.mode == blobxfer.models.azure.StorageModes.Append:
+                rfile.append_create = False
+                return UploadAction.Upload
             logger.info(
                 'not overwriting remote file: {} (local: {})'.format(
                     rfile.path, lpath))
diff --git a/cli/cli.py b/cli/cli.py
index 8ce121b..a5e7ab1 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -469,7 +469,8 @@ def callback(ctx, param, value):
         '--overwrite/--no-overwrite',
         expose_value=False,
         default=True,
-        help='Overwrite destination if exists [True]',
+        help='Overwrite destination if exists. For append blobs, '
+        '--no-overwrite will append to any existing blob. [True]',
         callback=callback)(f)
 
 

From 351eee537c0ef23c8d867a630bd68efc2ab7d72a Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 30 May 2017 12:59:39 -0700
Subject: [PATCH 37/47] useconfig upload/download support

- Fix various yaml/dict config issues
- Allow md5 checks through vectored io stripes
---
 blobxfer/models/download.py     | 116 ++++++++++++++++++++---------
 blobxfer/models/metadata.py     |   3 +-
 blobxfer/operations/download.py |  74 ++++++++++++------
 blobxfer/operations/md5.py      |  51 +++++++++----
 blobxfer/operations/upload.py   |  21 +++---
 cli/cli.py                      |  57 +++++++++++---
 cli/settings.py                 | 128 ++++++++++++++++++--------------
 7 files changed, 297 insertions(+), 153 deletions(-)

diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index e2adcbc..a197a25 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -49,7 +49,8 @@
 
 # create logger
 logger = logging.getLogger(__name__)
-
+# global defines
+_AUTO_SELECT_CHUNKSIZE_BYTES = 16777216
 # named tuples
 Offsets = collections.namedtuple(
     'Offsets', [
@@ -167,10 +168,12 @@ def __init__(
         # validate compatible options
         if not self.options.check_file_md5 and self.skip_on.md5_match:
             raise ValueError(
-                'Cannot specify skip on MD5 match without file MD5 enabled')
+                'cannot specify skip on MD5 match without file MD5 enabled')
         if (self.options.restore_file_attributes and
                 not blobxfer.util.on_windows() and os.getuid() != 0):
-            logger.warning('Cannot set file uid/gid without root privileges')
+            logger.warning('cannot set file uid/gid without root privileges')
+        if self.options.chunk_size_bytes < 0:
+            raise ValueError('chunk size cannot be negative')
 
     def add_azure_source_path(self, source):
         # type: (Specification, blobxfer.operations.azure.SourcePath) -> None
@@ -212,9 +215,14 @@ def __init__(self, lpath, ase, options, resume_mgr):
         self._ase = ase
         # set paths
         self.final_path = lpath
-        self._view = None
+        self.view = None
+        # auto-select chunk size
+        if options.chunk_size_bytes == 0:
+            chunk_size_bytes = _AUTO_SELECT_CHUNKSIZE_BYTES
+        else:
+            chunk_size_bytes = options.chunk_size_bytes
+        self._chunk_size = min((chunk_size_bytes, self._ase.size))
         # calculate the total number of ops required for transfer
-        self._chunk_size = min((options.chunk_size_bytes, self._ase.size))
         self._total_chunks = self._compute_total_chunks(self._chunk_size)
         self._outstanding_ops = self._total_chunks
         # initialize integrity checkers
@@ -296,20 +304,23 @@ def _initialize_integrity_checkers(self, options):
                 blobxfer.util.is_not_empty(self._ase.md5)):
             self.md5 = blobxfer.util.new_md5_hasher()
 
-    def _compute_allocated_size(self, size):
-        # type: (Descriptor, int) -> int
+    @staticmethod
+    def compute_allocated_size(size, is_encrypted):
+        # type: (int, bool) -> int
         """Compute allocated size on disk
-        :param Descriptor self: this
         :param int size: size (content length)
+        :param bool is_ecrypted: if entity is encrypted
         :rtype: int
         :return: required size on disk
         """
         # compute size
         if size > 0:
-            if self._ase.is_encrypted:
+            if is_encrypted:
                 # cipher_len_without_iv = (clear_len / aes_bs + 1) * aes_bs
-                allocatesize = (size // self._AES_BLOCKSIZE - 1) * \
-                    self._AES_BLOCKSIZE
+                allocatesize = (
+                    size //
+                    blobxfer.models.download.Descriptor._AES_BLOCKSIZE - 1
+                ) * blobxfer.models.download.Descriptor._AES_BLOCKSIZE
             else:
                 allocatesize = size
             if allocatesize < 0:
@@ -318,6 +329,49 @@ def _compute_allocated_size(self, size):
             allocatesize = 0
         return allocatesize
 
+    @staticmethod
+    def generate_view(ase):
+        # type: (blobxfer.models.azure.StorageEntity) ->
+        #       Tuple[LocalPathView, int]
+        """Generate local path view and total size required
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
+        :rtype: tuple
+        :return: (local path view, allocation size)
+        """
+        slicesize = blobxfer.models.download.Descriptor.compute_allocated_size(
+            ase.size, ase.is_encrypted)
+        if ase.vectored_io is None:
+            view = LocalPathView(
+                fd_start=0,
+                fd_end=slicesize,
+            )
+            total_size = ase.size
+        else:
+            view = LocalPathView(
+                fd_start=ase.vectored_io.offset_start,
+                fd_end=ase.vectored_io.offset_start + slicesize,
+            )
+            total_size = ase.vectored_io.total_size
+        return view, total_size
+
+    @staticmethod
+    def convert_vectored_io_slice_to_final_path_name(local_path, ase):
+        # type: (pathlib.Path,
+        #        blobxfer.models.azure.StorageEntity) -> pathlib.Path
+        """Convert vectored io slice to final path name
+        :param pathlib.Path local_path: local path
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
+        :rtype: pathlib.Path
+        :return: converted final path
+        """
+        name = local_path.name
+        name = blobxfer.models.metadata.\
+            remove_vectored_io_slice_suffix_from_name(
+                name, ase.vectored_io.slice_id)
+        _tmp = list(local_path.parts[:-1])
+        _tmp.append(name)
+        return pathlib.Path(*_tmp)
+
     def _set_final_path_view(self):
         # type: (Descriptor) -> int
         """Set final path view and return required space on disk
@@ -325,26 +379,16 @@ def _set_final_path_view(self):
         :rtype: int
         :return: required size on disk
         """
-        slicesize = self._compute_allocated_size(self._ase.size)
-        if self._ase.vectored_io is None:
-            self._view = LocalPathView(
-                fd_start=0,
-                fd_end=slicesize,
-            )
-            return self._ase.size
-        else:
-            name = self.final_path.name
-            name = blobxfer.models.metadata.\
-                remove_vectored_io_slice_suffix_from_name(
-                    name, self._ase.vectored_io.slice_id)
-            _tmp = list(self.final_path.parts[:-1])
-            _tmp.append(name)
-            self.final_path = pathlib.Path(*_tmp)
-            self._view = LocalPathView(
-                fd_start=self._ase.vectored_io.offset_start,
-                fd_end=self._ase.vectored_io.offset_start + slicesize,
-            )
-            return self._ase.vectored_io.total_size
+        # set final path if vectored io stripe
+        if self._ase.vectored_io is not None:
+            self.final_path = blobxfer.models.download.Descriptor.\
+                convert_vectored_io_slice_to_final_path_name(
+                    self.final_path, self._ase)
+        # generate view
+        view, total_size = blobxfer.models.download.Descriptor.generate_view(
+            self._ase)
+        self.view = view
+        return total_size
 
     def _allocate_disk_space(self):
         # type: (Descriptor) -> None
@@ -431,12 +475,12 @@ def _resume(self):
             logger.debug(
                 'integrity checking existing file {} offset {} -> {}'.format(
                     self.final_path,
-                    self._view.fd_start,
-                    self._view.fd_start + _end_offset)
+                    self.view.fd_start,
+                    self.view.fd_start + _end_offset)
             )
             with self._hasher_lock:
                 with self.final_path.open('rb') as filedesc:
-                    filedesc.seek(self._view.fd_start, 0)
+                    filedesc.seek(self.view.fd_start, 0)
                     while _fd_offset < _end_offset:
                         if (_fd_offset + _blocksize) > _end_offset:
                             _blocksize = _end_offset - _fd_offset
@@ -559,7 +603,7 @@ def write_unchecked_data(self, offsets, data):
         self.write_data(offsets, data)
         unchecked = UncheckedChunk(
             data_len=len(data),
-            fd_start=self._view.fd_start + offsets.fd_start,
+            fd_start=self.view.fd_start + offsets.fd_start,
             file_path=self.final_path,
             temp=False,
         )
@@ -666,7 +710,7 @@ def write_data(self, offsets, data):
         if len(data) > 0:
             with self.final_path.open('r+b') as fd:
                 # offset some internal view
-                fd.seek(self._view.fd_start + offsets.fd_start, 0)
+                fd.seek(self.view.fd_start + offsets.fd_start, 0)
                 fd.write(data)
 
     def finalize_integrity(self):
diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py
index f891696..7d5ea0d 100644
--- a/blobxfer/models/metadata.py
+++ b/blobxfer/models/metadata.py
@@ -221,8 +221,7 @@ def generate_vectored_io_stripe_metadata(local_path, metadata):
                 local_path.view.total_slices,
                 _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID:
                 local_path.view.slice_num,
-                _JSON_KEY_VECTORED_IO_STRIPE_NEXT:
-                explode_vectored_io_next_entry(local_path.view.next),
+                _JSON_KEY_VECTORED_IO_STRIPE_NEXT: local_path.view.next,
             }
         }
     }
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index eb01eac..3df37bc 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -89,9 +89,9 @@ def __init__(self, general_options, creds, spec):
         self._disk_set = set()
         self._disk_threads = []
         self._download_start_time = None
-        self._download_total = None
+        self._download_total = 0
         self._download_sofar = 0
-        self._download_bytes_total = None
+        self._download_bytes_total = 0
         self._download_bytes_sofar = 0
         self._download_terminate = False
         self._start_time = None
@@ -224,7 +224,13 @@ def _check_download_conditions(self, lpath, rfile):
         :return: download action
         """
         if not lpath.exists():
-            return DownloadAction.Download
+            if rfile.vectored_io is not None:
+                fpath = blobxfer.models.download.Descriptor.\
+                    convert_vectored_io_slice_to_final_path_name(lpath, rfile)
+                if not fpath.exists():
+                    return DownloadAction.Download
+            else:
+                return DownloadAction.Download
         if not self._spec.options.overwrite:
             logger.info(
                 'not overwriting local file: {} (remote: {})'.format(
@@ -279,28 +285,44 @@ def _pre_md5_skip_on_check(self, lpath, rfile):
                 pre_encrypted_content_md5
         if md5 is None:
             md5 = rfile.md5
-        slpath = str(lpath)
+        key = blobxfer.operations.download.Downloader.\
+            create_unique_transfer_operation_id(rfile)
         with self._md5_meta_lock:
-            self._md5_map[slpath] = rfile
-        self._md5_offload.add_localfile_for_md5_check(slpath, md5, rfile.mode)
+            self._md5_map[key] = rfile
+        slpath = str(lpath)
+        # temporarily create a download descriptor view for vectored io
+        if rfile.vectored_io is not None:
+            view, _ = blobxfer.models.download.Descriptor.generate_view(rfile)
+            fpath = str(
+                blobxfer.models.download.Descriptor.
+                convert_vectored_io_slice_to_final_path_name(lpath, rfile)
+            )
+        else:
+            fpath = slpath
+        self._md5_offload.add_localfile_for_md5_check(
+            key, slpath, fpath, md5, rfile.mode, view)
 
-    def _post_md5_skip_on_check(self, filename, md5_match):
-        # type: (Downloader, str, bool) -> None
+    def _post_md5_skip_on_check(self, key, filename, size, md5_match):
+        # type: (Downloader, str, str, int, bool) -> None
         """Perform post MD5 skip on check
         :param Downloader self: this
+        :param str key: md5 map key
         :param str filename: local filename
+        :param int size: size of checked data
         :param bool md5_match: if MD5 matches
         """
         with self._md5_meta_lock:
-            rfile = self._md5_map.pop(filename)
+            rfile = self._md5_map.pop(key)
         lpath = pathlib.Path(filename)
         if md5_match:
+            if size is None:
+                size = lpath.stat().st_size
             with self._transfer_lock:
                 self._transfer_set.remove(
                     blobxfer.operations.download.Downloader.
                     create_unique_transfer_operation_id(rfile))
                 self._download_total -= 1
-                self._download_bytes_total -= lpath.stat().st_size
+                self._download_bytes_total -= size
         else:
             self._add_to_download_queue(lpath, rfile)
 
@@ -325,7 +347,8 @@ def _check_for_downloads_from_md5(self):
                     break
             cv.release()
             if result is not None:
-                self._post_md5_skip_on_check(result[0], result[1])
+                self._post_md5_skip_on_check(
+                    result[0], result[1], result[2], result[3])
 
     def _check_for_crypto_done(self):
         # type: (Downloader) -> None
@@ -563,7 +586,7 @@ def _process_data(self, dd, offsets, data):
             # decrypt data
             if self._crypto_offload is not None:
                 self._crypto_offload.add_decrypt_chunk(
-                    str(dd.final_path), dd._view.fd_start, offsets,
+                    str(dd.final_path), dd.view.fd_start, offsets,
                     dd.entity.encryption_metadata.symmetric_key,
                     iv, _hmac_datafile)
                 # data will be integrity checked and written once
@@ -674,16 +697,12 @@ def _run(self):
         self._initialize_transfer_threads()
         self._initialize_disk_threads()
         # initialize local counters
-        nfiles = 0
-        total_size = 0
         skipped_files = 0
         skipped_size = 0
         # iterate through source paths to download
         for src in self._spec.sources:
             for rfile in src.files(
                     self._creds, self._spec.options, self._general_options):
-                nfiles += 1
-                total_size += rfile.size
                 # form local path for remote file
                 if (not self._spec.destination.is_dir and
                         self._spec.options.rename):
@@ -702,22 +721,26 @@ def _run(self):
                     self._transfer_set.add(
                         blobxfer.operations.download.Downloader.
                         create_unique_transfer_operation_id(rfile))
+                    self._download_total += 1
+                    self._download_bytes_total += rfile.size
                 # either MD5 check or download now
                 if action == DownloadAction.CheckMd5:
                     self._pre_md5_skip_on_check(lpath, rfile)
                 elif action == DownloadAction.Download:
                     self._add_to_download_queue(lpath, rfile)
-        self._download_total = nfiles - skipped_files
-        self._download_bytes_total = total_size - skipped_size
-        download_size_mib = self._download_bytes_total / blobxfer.util.MEGABYTE
         # set remote files processed
         with self._md5_meta_lock:
             self._all_remote_files_processed = True
-        logger.debug(
-            ('{0} remote files processed, waiting for download completion '
-             'of {1:.4f} MiB').format(nfiles, download_size_mib))
-        del nfiles
-        del total_size
+        with self._transfer_lock:
+            self._download_total -= skipped_files
+            self._download_bytes_total -= skipped_size
+            download_size_mib = (
+                self._download_bytes_total / blobxfer.util.MEGABYTE
+            )
+            logger.debug(
+                ('{0} remote files processed, waiting for download '
+                 'completion of approx. {1:.4f} MiB').format(
+                     self._download_total, download_size_mib))
         del skipped_files
         del skipped_size
         # wait for downloads to complete
@@ -747,6 +770,9 @@ def _run(self):
         # output throughput
         if self._download_start_time is not None:
             dltime = (end_time - self._download_start_time).total_seconds()
+            download_size_mib = (
+                self._download_bytes_total / blobxfer.util.MEGABYTE
+            )
             dlmibspeed = download_size_mib / dltime
             logger.info(
                 ('elapsed download + verify time and throughput of {0:.4f} '
diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py
index 7239a2d..f14431f 100644
--- a/blobxfer/operations/md5.py
+++ b/blobxfer/operations/md5.py
@@ -47,7 +47,8 @@
 _MAX_PAGE_SIZE_BYTES = 4194304
 
 
-def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
+def compute_md5_for_file_asbase64(
+        filename, pagealign=False, start=None, end=None, blocksize=65536):
     # type: (str, bool, int) -> str
     """Compute MD5 hash for file and encode as Base64
     :param str filename: file to compute MD5 for
@@ -58,7 +59,16 @@ def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
     """
     hasher = blobxfer.util.new_md5_hasher()
     with open(filename, 'rb') as filedesc:
+        if start is not None:
+            filedesc.seek(start)
+            curr = start
+        else:
+            curr = 0
         while True:
+            if end is not None and curr + blocksize > end:
+                blocksize = end - curr
+            if blocksize == 0:
+                break
             buf = filedesc.read(blocksize)
             if not buf:
                 break
@@ -68,6 +78,7 @@ def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536):
                 if aligned != buflen:
                     buf = buf.ljust(aligned, b'\0')
             hasher.update(buf)
+            curr += blocksize
         return blobxfer.util.base64_encode_as_string(hasher.digest())
 
 
@@ -120,33 +131,47 @@ def _worker_process(self):
         """
         while not self.terminated:
             try:
-                filename, remote_md5, pagealign = self._task_queue.get(
-                    True, 0.25)
+                key, lpath, fpath, remote_md5, pagealign, lpview = \
+                    self._task_queue.get(True, 0.1)
             except queue.Empty:
                 continue
+            if lpview is None:
+                start = None
+                end = None
+                size = None
+            else:
+                start = lpview.fd_start
+                end = lpview.fd_end
+                size = end - start
             md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64(
-                filename, pagealign)
-            logger.debug('MD5: {} <L..R> {} {}'.format(
-                md5, remote_md5, filename))
+                fpath, pagealign, start, end)
+            logger.debug('pre-transfer MD5 check: {} <L..R> {} {}'.format(
+                md5, remote_md5, fpath))
             self._done_cv.acquire()
-            self._done_queue.put((filename, md5 == remote_md5))
+            self._done_queue.put((key, lpath, size, md5 == remote_md5))
             self._done_cv.notify()
             self._done_cv.release()
 
-    def add_localfile_for_md5_check(self, filename, remote_md5, mode):
-        # type: (LocalFileMd5Offload, str, str,
-        #        blobxfer.models.azure.StorageModes) -> None
+    def add_localfile_for_md5_check(
+            self, key, lpath, fpath, remote_md5, mode, lpview):
+        # type: (LocalFileMd5Offload, str, str, str, str,
+        #        blobxfer.models.azure.StorageModes, object) -> None
         """Add a local file to MD5 check queue
         :param LocalFileMd5Offload self: this
-        :param str filename: file to compute MD5 for
+        :param str key: md5 map key
+        :param str lpath: "local" path for descriptor
+        :param str fpath: "final" path for/where file
         :param str remote_md5: remote MD5 to compare against
         :param blobxfer.models.azure.StorageModes mode: mode
+        :param object lpview: local path view
         """
         if blobxfer.util.is_none_or_empty(remote_md5):
             raise ValueError('comparison MD5 is empty for file {}'.format(
-                filename))
+                lpath))
         if mode == blobxfer.models.azure.StorageModes.Page:
             pagealign = True
         else:
             pagealign = False
-        self._task_queue.put((filename, remote_md5, pagealign))
+        self._task_queue.put(
+            (key, lpath, fpath, remote_md5, pagealign, lpview)
+        )
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index b756590..6bf9761 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -226,26 +226,26 @@ def _pre_md5_skip_on_check(self, src, rfile):
                 pre_encrypted_content_md5
         if md5 is None:
             md5 = rfile.md5
-        slpath = str(src.absolute_path)
+        key = blobxfer.operations.upload.Uploader.create_unique_id(src, rfile)
         with self._md5_meta_lock:
-            self._md5_map[slpath] = (src, rfile)
-        self._md5_offload.add_localfile_for_md5_check(slpath, md5, rfile.mode)
+            self._md5_map[key] = (src, rfile)
+        self._md5_offload.add_localfile_for_md5_check(
+            key, None, str(src.absolute_path), md5, rfile.mode, src.view)
 
-    def _post_md5_skip_on_check(self, filename, md5_match):
+    def _post_md5_skip_on_check(self, key, md5_match):
         # type: (Uploader, str, bool) -> None
         """Perform post MD5 skip on check
         :param Uploader self: this
-        :param str filename: local filename
+        :param str key: md5 map key
         :param bool md5_match: if MD5 matches
         """
         with self._md5_meta_lock:
-            src, rfile = self._md5_map.pop(filename)
+            src, rfile = self._md5_map.pop(key)
         uid = blobxfer.operations.upload.Uploader.create_unique_id(src, rfile)
         if md5_match:
             with self._upload_lock:
                 self._upload_set.remove(uid)
                 self._upload_total -= 1
-                self._upload_bytes_total -= src.size
         else:
             self._add_to_upload_queue(src, rfile, uid)
 
@@ -270,7 +270,7 @@ def _check_for_uploads_from_md5(self):
                     break
             cv.release()
             if result is not None:
-                self._post_md5_skip_on_check(result[0], result[1])
+                self._post_md5_skip_on_check(result[0], result[3])
 
     def _add_to_upload_queue(self, src, rfile, uid):
         # type: (Uploader, blobxfer.models.upload.LocalPath,
@@ -398,7 +398,6 @@ def _put_data(self, ud, ase, offsets, data):
         :param blobxfer.models.upload.Offsets offsets: offsets
         :param bytes data: data to upload
         """
-        print('UL', offsets, ase.path, len(data) if data is not None else None)
         if ase.mode == blobxfer.models.azure.StorageModes.Append:
             # append block
             if data is not None:
@@ -875,9 +874,9 @@ def _generate_destination_for_source(self, local_path):
                 spath = pathlib.Path(*_rparts[_strip:])
         # create a storage entity for each destination
         for sa, cont, name, dpath in self._get_destination_paths():
-            # apply rename
+            # if not renaming, form name from with spath
             if not self._spec.options.rename:
-                name = str(spath / name)
+                name = str(name / spath)
             if blobxfer.util.is_none_or_empty(name):
                 raise ValueError(
                     ('invalid destination, must specify a container or '
diff --git a/cli/cli.py b/cli/cli.py
index a5e7ab1..7375891 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -93,12 +93,14 @@ def _init_config(self):
         if blobxfer.util.is_not_empty(self.yaml_config):
             self.yaml_config = pathlib.Path(self.yaml_config)
             self._read_yaml_file(self.yaml_config)
-        # merge cli options with config
-        settings.merge_settings(self.config, self.cli_options)
+        else:
+            # merge cli options with config
+            settings.merge_settings(self.config, self.cli_options)
         # set log file if specified
-        blobxfer.util.setup_logger(logger, self.config['options']['log_file'])
+        blobxfer.util.setup_logger(
+            logger, self.config['options'].get('log_file', None))
         # output config
-        if self.config['options']['verbose']:
+        if self.config['options'].get('verbose', False):
             blobxfer.util.set_verbose_logger_handlers()
             logger.debug('config: \n' + json.dumps(self.config, indent=4))
         # free mem
@@ -174,7 +176,7 @@ def callback(ctx, param, value):
         '--progress-bar/--no-progress-bar',
         expose_value=False,
         default=True,
-        help='Display progress bar instead of console logs',
+        help='Display progress bar instead of console logs [True]',
         callback=callback)(f)
 
 
@@ -331,7 +333,7 @@ def callback(ctx, param, value):
         '--chunk-size-bytes',
         expose_value=False,
         type=int,
-        default=4194304,
+        default=0,
         help='Block or chunk size in bytes; set to 0 for auto-select '
         'on upload [0]',
         callback=callback)(f)
@@ -794,19 +796,54 @@ def upload(ctx, local_resource, storage_account, remote_path):
 @cli.group()
 @pass_cli_context
 def useconfig(ctx):
-    """Use config file for transfer"""
+    """Use yaml configuration file for transfer"""
     pass
 
 
-@useconfig.command('upload')
+@useconfig.command('download')
+@config_arguments
+@common_options
+@pass_cli_context
+def useconfig_download(ctx, config):
+    """Download blobs or files from Azure Storage via yaml configuration"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Download, None, None, None)
+    ctx.initialize()
+    specs = settings.create_download_specifications(ctx.config)
+    for spec in specs:
+        blobxfer.api.Downloader(
+            ctx.general_options, ctx.credentials, spec
+        ).start()
+
+
+@useconfig.command('synccopy')
 @config_arguments
 @common_options
 @pass_cli_context
-def useconfig_upload(ctx):
-    """Upload files to Azure File Storage"""
+def useconfig_synccopy(ctx, config):
+    """Synchronously copy blobs between Azure Storage accounts via yaml
+    configuration"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Synccopy, None, None, None)
     ctx.initialize()
     raise NotImplementedError()
 
 
+@useconfig.command('upload')
+@config_arguments
+@common_options
+@pass_cli_context
+def useconfig_upload(ctx, config):
+    """Upload files to Azure Storage via yaml configuration"""
+    settings.add_cli_options(
+        ctx.cli_options, settings.TransferAction.Upload, None, None, None)
+    ctx.initialize()
+    specs = settings.create_upload_specifications(ctx.config)
+    for spec in specs:
+        blobxfer.api.Uploader(
+            ctx.general_options, ctx.credentials, spec
+        ).start()
+
+
 if __name__ == '__main__':
     cli()
diff --git a/cli/settings.py b/cli/settings.py
index d198359..5911719 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -219,15 +219,22 @@ def merge_settings(config, cli_options):
     # merge general options
     if 'options' not in config:
         config['options'] = {}
-    config['options']['crypto_processes'] = cli_options['crypto_processes']
     config['options']['log_file'] = cli_options['log_file']
-    config['options']['md5_processes'] = cli_options['md5_processes']
     config['options']['progress_bar'] = cli_options['progress_bar']
     config['options']['resume_file'] = cli_options['resume_file']
     config['options']['timeout_sec'] = cli_options['timeout']
-    config['options']['disk_threads'] = cli_options['disk_threads']
-    config['options']['transfer_threads'] = cli_options['transfer_threads']
     config['options']['verbose'] = cli_options['verbose']
+    # merge concurrency options
+    if 'concurrency' not in config['options']:
+        config['options']['concurrency'] = {}
+    config['options']['concurrency']['crypto_processes'] = \
+        cli_options['crypto_processes']
+    config['options']['concurrency']['disk_threads'] = \
+        cli_options['disk_threads']
+    config['options']['concurrency']['md5_processes'] = \
+        cli_options['md5_processes']
+    config['options']['concurrency']['transfer_threads'] = \
+        cli_options['transfer_threads']
 
 
 def create_azure_storage_credentials(config, general_options):
@@ -254,18 +261,19 @@ def create_general_options(config):
     :rtype: blobxfer.models.options.General
     :return: general options object
     """
+    conc = config['options'].get('concurrency', {})
     return blobxfer.models.options.General(
         concurrency=blobxfer.models.options.Concurrency(
-            crypto_processes=config['options']['crypto_processes'],
-            disk_threads=config['options']['disk_threads'],
-            md5_processes=config['options']['md5_processes'],
-            transfer_threads=config['options']['transfer_threads'],
+            crypto_processes=conc.get('crypto_processes', 0),
+            disk_threads=conc.get('disk_threads', 0),
+            md5_processes=conc.get('md5_processes', 0),
+            transfer_threads=conc.get('transfer_threads', 0),
         ),
-        log_file=config['options']['log_file'],
-        progress_bar=config['options']['progress_bar'],
-        resume_file=config['options']['resume_file'],
-        timeout_sec=config['options']['timeout_sec'],
-        verbose=config['options']['verbose'],
+        log_file=config['options'].get('log_file', None),
+        progress_bar=config['options'].get('progress_bar', True),
+        resume_file=config['options'].get('resume_file', None),
+        timeout_sec=config['options'].get('timeout_sec', None),
+        verbose=config['options'].get('verbose', False),
     )
 
 
@@ -279,7 +287,7 @@ def create_download_specifications(config):
     specs = []
     for conf in config['download']:
         # create download options
-        confmode = conf['options']['mode'].lower()
+        confmode = conf['options'].get('mode', 'auto').lower()
         if confmode == 'auto':
             mode = blobxfer.models.azure.StorageModes.Auto
         elif confmode == 'append':
@@ -293,32 +301,33 @@ def create_download_specifications(config):
         else:
             raise ValueError('unknown mode: {}'.format(confmode))
         # load RSA private key PEM file if specified
-        rpk = conf['options']['rsa_private_key']
+        rpk = conf['options'].get('rsa_private_key', None)
         if blobxfer.util.is_not_empty(rpk):
-            rpkp = conf['options']['rsa_private_key_passphrase']
+            rpkp = conf['options'].get('rsa_private_key_passphrase', None)
             rpk = blobxfer.operations.crypto.load_rsa_private_key_file(
                 rpk, rpkp)
         else:
             rpk = None
         # create specification
+        sod = conf['options'].get('skip_on', {})
         ds = blobxfer.models.download.Specification(
             download_options=blobxfer.models.options.Download(
-                check_file_md5=conf['options']['check_file_md5'],
-                chunk_size_bytes=conf['options']['chunk_size_bytes'],
-                delete_extraneous_destination=conf[
-                    'options']['delete_extraneous_destination'],
+                check_file_md5=conf['options'].get('check_file_md5', False),
+                chunk_size_bytes=conf['options'].get('chunk_size_bytes', 0),
+                delete_extraneous_destination=conf['options'].get(
+                    'delete_extraneous_destination', False),
                 mode=mode,
-                overwrite=conf['options']['overwrite'],
-                recursive=conf['options']['recursive'],
-                rename=conf['options']['rename'],
+                overwrite=conf['options'].get('overwrite', True),
+                recursive=conf['options'].get('recursive', True),
+                rename=conf['options'].get('rename', False),
                 restore_file_attributes=conf[
-                    'options']['restore_file_attributes'],
+                    'options'].get('restore_file_attributes', False),
                 rsa_private_key=rpk,
             ),
             skip_on_options=blobxfer.models.options.SkipOn(
-                filesize_match=conf['options']['skip_on']['filesize_match'],
-                lmt_ge=conf['options']['skip_on']['lmt_ge'],
-                md5_match=conf['options']['skip_on']['md5_match'],
+                filesize_match=sod.get('filesize_match', False),
+                lmt_ge=sod.get('lmt_ge', False),
+                md5_match=sod.get('md5_match', False),
             ),
             local_destination_path=blobxfer.models.download.
             LocalDestinationPath(
@@ -333,10 +342,12 @@ def create_download_specifications(config):
             sa = next(iter(src))
             asp = blobxfer.operations.azure.SourcePath()
             asp.add_path_with_storage_account(src[sa], sa)
-            if blobxfer.util.is_not_empty(conf['include']):
-                asp.add_includes(conf['include'])
-            if blobxfer.util.is_not_empty(conf['exclude']):
-                asp.add_excludes(conf['exclude'])
+            incl = conf.get('include', None)
+            if blobxfer.util.is_not_empty(incl):
+                asp.add_includes(incl)
+            excl = conf.get('exclude', None)
+            if blobxfer.util.is_not_empty(excl):
+                asp.add_excludes(excl)
             ds.add_azure_source_path(asp)
         # append spec to list
         specs.append(ds)
@@ -353,7 +364,7 @@ def create_upload_specifications(config):
     specs = []
     for conf in config['upload']:
         # create upload options
-        confmode = conf['options']['mode'].lower()
+        confmode = conf['options'].get('mode', 'auto').lower()
         if confmode == 'auto':
             mode = blobxfer.models.azure.StorageModes.Auto
         elif confmode == 'append':
@@ -367,14 +378,14 @@ def create_upload_specifications(config):
         else:
             raise ValueError('unknown mode: {}'.format(confmode))
         # load RSA public key PEM if specified
-        rpk = conf['options']['rsa_public_key']
+        rpk = conf['options'].get('rsa_public_key', None)
         if blobxfer.util.is_not_empty(rpk):
             rpk = blobxfer.operations.crypto.load_rsa_public_key_file(rpk)
         if rpk is None:
             # load RSA private key PEM file if specified
-            rpk = conf['options']['rsa_private_key']
+            rpk = conf['options'].get('rsa_private_key', None)
             if blobxfer.util.is_not_empty(rpk):
-                rpkp = conf['options']['rsa_private_key_passphrase']
+                rpkp = conf['options'].get('rsa_private_key_passphrase', None)
                 rpk = blobxfer.operations.crypto.load_rsa_private_key_file(
                     rpk, rpkp)
                 rpk = rpk.public_key()
@@ -383,41 +394,44 @@ def create_upload_specifications(config):
         # create local source paths
         lsp = blobxfer.models.upload.LocalSourcePath()
         lsp.add_paths(conf['source'])
-        if blobxfer.util.is_not_empty(conf['include']):
-            lsp.add_includes(conf['include'])
-        if blobxfer.util.is_not_empty(conf['exclude']):
-            lsp.add_excludes(conf['exclude'])
+        incl = conf.get('include', None)
+        if blobxfer.util.is_not_empty(incl):
+            lsp.add_includes(incl)
+        excl = conf.get('exclude', None)
+        if blobxfer.util.is_not_empty(excl):
+            lsp.add_excludes(excl)
         # create specification
+        sfp = conf['options'].get('store_file_properties', {})
+        vio = conf['options'].get('vectored_io', {})
+        sod = conf['options'].get('skip_on', {})
         us = blobxfer.models.upload.Specification(
             upload_options=blobxfer.models.options.Upload(
-                chunk_size_bytes=conf['options']['chunk_size_bytes'],
-                delete_extraneous_destination=conf[
-                    'options']['delete_extraneous_destination'],
+                chunk_size_bytes=conf['options'].get('chunk_size_bytes', 0),
+                delete_extraneous_destination=conf['options'].get(
+                    'delete_extraneous_destination', False),
                 mode=mode,
-                one_shot_bytes=conf['options']['one_shot_bytes'],
-                overwrite=conf['options']['overwrite'],
-                recursive=conf['options']['recursive'],
-                rename=conf['options']['rename'],
+                one_shot_bytes=conf['options'].get('one_shot_bytes', 0),
+                overwrite=conf['options'].get('overwrite', True),
+                recursive=conf['options'].get('recursive', True),
+                rename=conf['options'].get('rename', False),
                 rsa_public_key=rpk,
                 store_file_properties=blobxfer.models.options.FileProperties(
-                    attributes=conf[
-                        'options']['store_file_properties']['attributes'],
-                    md5=conf['options']['store_file_properties']['md5'],
+                    attributes=sfp.get('attributes', False),
+                    md5=sfp.get('md5', False),
                 ),
-                strip_components=conf['options']['strip_components'],
+                strip_components=conf['options'].get('strip_components', 1),
                 vectored_io=blobxfer.models.options.VectoredIo(
-                    stripe_chunk_size_bytes=conf[
-                        'options']['vectored_io']['stripe_chunk_size_bytes'],
+                    stripe_chunk_size_bytes=vio.get(
+                        'stripe_chunk_size_bytes', 1073741824),
                     distribution_mode=blobxfer.
                     models.upload.VectoredIoDistributionMode(
-                        conf['options']['vectored_io'][
-                            'distribution_mode'].lower()),
+                        vio.get('distribution_mode', 'disabled').lower()),
                 ),
             ),
             skip_on_options=blobxfer.models.options.SkipOn(
-                filesize_match=conf['options']['skip_on']['filesize_match'],
-                lmt_ge=conf['options']['skip_on']['lmt_ge'],
-                md5_match=conf['options']['skip_on']['md5_match'],
+                filesize_match=sod.get('filesize_match', False),
+                lmt_ge=sod.get('lmt_ge', False),
+                md5_match=sod.get('md5_match', False),
             ),
             local_source_path=lsp,
         )

From 4e0ee1b21f2da8950410ff41bd8924d86b14bf6b Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 30 May 2017 15:53:18 -0700
Subject: [PATCH 38/47] Fix packaging issues

---
 cli/cli.py | 4 ++--
 setup.py   | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/cli/cli.py b/cli/cli.py
index 7375891..bf397b0 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -41,7 +41,7 @@
 import blobxfer.api
 import blobxfer.util
 # local imports
-import settings
+from . import settings
 
 # create logger
 logger = logging.getLogger('blobxfer')
@@ -736,7 +736,7 @@ def config_arguments(f):
 @click.version_option(version=blobxfer.__version__)
 @click.pass_context
 def cli(ctx):
-    """Blobxfer-CLI: Azure Storage transfer tool"""
+    """Blobxfer: Azure Storage transfer tool"""
     pass
 
 
diff --git a/setup.py b/setup.py
index c61be64..212597c 100644
--- a/setup.py
+++ b/setup.py
@@ -39,10 +39,9 @@
 ]
 
 install_requires = [
-    'azure-common==1.1.6',
     'azure-storage==0.34.2',
     'click==6.7',
-    'cryptography>=1.8.2',
+    'cryptography>=1.9',
     'future==0.16.0',
     'python-dateutil==2.6.0',
     'requests==2.14.2',
@@ -61,8 +60,7 @@
     version=version,
     author='Microsoft Corporation, Azure Batch and HPC Team',
     author_email='',
-    description=(
-        'Azure storage transfer tool and library with AzCopy-like features'),
+    description='Azure storage transfer tool and library',
     long_description=long_description,
     platforms='any',
     url='https://github.com/Azure/blobxfer',

From a04a5724bc6ff5b810a1901881121a943a536d5f Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Tue, 30 May 2017 19:58:11 -0700
Subject: [PATCH 39/47] Upload resume support

---
 blobxfer/models/resume.py     | 139 ++++++++++++++++++++++++++++++++++
 blobxfer/models/upload.py     | 133 ++++++++++++++++++++++++++++++--
 blobxfer/operations/resume.py | 100 +++++++++++++++++++-----
 blobxfer/operations/upload.py |   9 ++-
 setup.py                      |   1 +
 5 files changed, 352 insertions(+), 30 deletions(-)

diff --git a/blobxfer/models/resume.py b/blobxfer/models/resume.py
index aa8b9da..a0108cb 100644
--- a/blobxfer/models/resume.py
+++ b/blobxfer/models/resume.py
@@ -158,3 +158,142 @@ def __repr__(self):
                     self.next_integrity_chunk, self.completed,
                     self.md5hexdigest,
                 )
+
+
+class Upload(object):
+    """Upload resume object"""
+    def __init__(
+            self, local_path, length, chunk_size, total_chunks,
+            completed_chunks, completed, md5):
+        # type: (Upload, str, int, int, int, int, bool, str) -> None
+        """Ctor for Upload
+        :param Upload self: this
+        :param str local_path: local path
+        :param int length: total bytes
+        :param int chunk_size: chunk size in bytes
+        :param int total_chunks: total chunks
+        :param int completed_chunks: completed chunks
+        :param bool completed: completed
+        :param str md5: md5 hex digest
+        """
+        self._local_path = local_path
+        self._length = length
+        self._chunk_size = chunk_size
+        self._total_chunks = total_chunks
+        self._completed_chunks = completed_chunks
+        self._completed = completed
+        self._md5hexdigest = md5 if md5 is not None else None
+
+    @property
+    def local_path(self):
+        # type: (Upload) -> str
+        """Local path
+        :param Upload self: this
+        :rtype: str
+        :return: local path
+        """
+        return self._local_path
+
+    @property
+    def length(self):
+        # type: (Upload) -> int
+        """Content length
+        :param Upload self: this
+        :rtype: int
+        :return: number of bytes
+        """
+        return self._length
+
+    @property
+    def chunk_size(self):
+        # type: (Upload) -> int
+        """Chunk size
+        :param Upload self: this
+        :rtype: int
+        :return: chunk size in bytes
+        """
+        return self._chunk_size
+
+    @property
+    def total_chunks(self):
+        # type: (Upload) -> int
+        """Get total number of chunks
+        :param Upload self: this
+        :rtype: int
+        :return: total chunks
+        """
+        return self._total_chunks
+
+    @property
+    def completed_chunks(self):
+        # type: (Upload) -> int
+        """Get Completed chunks
+        :param Upload self: this
+        :rtype: int
+        :return: completed chunks
+        """
+        return self._completed_chunks
+
+    @completed_chunks.setter
+    def completed_chunks(self, value):
+        # type: (Upload, int) -> None
+        """Set Completed chunks
+        :param Upload self: this
+        :param int value: completed chunks
+        """
+        self._completed_chunks = value
+
+    @property
+    def completed(self):
+        # type: (Upload) -> bool
+        """Get Completed
+        :param Upload self: this
+        :rtype: bool
+        :return: if completed
+        """
+        return self._completed
+
+    @completed.setter
+    def completed(self, value):
+        # type: (Upload) -> None
+        """Set Completed
+        :param Upload self: this
+        :param bool value: completion value
+        """
+        self._completed = value
+
+    @property
+    def md5hexdigest(self):
+        # type: (Upload) -> str
+        """Get md5 hex digest
+        :param Upload self: this
+        :rtype: str
+        :return: md5 hex digest
+        """
+        return self._md5hexdigest
+
+    @md5hexdigest.setter
+    def md5hexdigest(self, value):
+        # type: (Upload) -> None
+        """Set md5 hex digest value if value is not None
+        :param Upload self: this
+        :param str value: md5 hex digest
+        """
+        if value is None:
+            return
+        self._md5hexdigest = value
+
+    def __repr__(self):
+        # type: (Upload) -> str
+        """Return representation
+        :param Upload self: this
+        :rtype: str
+        :return: representation string
+        """
+        return ('Upload<local_path={} length={} chunk_size={} '
+                'total_chunks={} completed_chunks={} completed={} '
+                'md5={}>').format(
+                    self.local_path, self.length, self.chunk_size,
+                    self.total_chunks, self.completed_chunks, self.completed,
+                    self.md5hexdigest,
+                )
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index 585ea51..d411bb0 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -42,6 +42,7 @@
     import pathlib
 import threading
 # non-stdlib imports
+import bitstring
 # local imports
 import blobxfer.models
 import blobxfer.models.azure
@@ -57,6 +58,7 @@
 _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES = 4194304
 _MAX_NUM_CHUNKS = 50000
 _DEFAULT_AUTO_CHUNKSIZE_BYTES = 16777216
+_MAX_MD5_CACHE_RESUME_ENTRIES = 25
 
 
 # named tuples
@@ -360,6 +362,10 @@ def __init__(self, lpath, ase, uid, options, resume_mgr):
         self._outstanding_ops = self._total_chunks
         if blobxfer.util.is_not_empty(self._ase.replica_targets):
             self._outstanding_ops *= len(self._ase.replica_targets)
+        if self._resume_mgr:
+            self._completed_chunks = bitstring.BitArray(
+                length=self._total_chunks)
+            self._md5_cache = {}
         # initialize integrity checkers
         self.hmac = None
         self.md5 = None
@@ -416,7 +422,8 @@ def is_resumable(self):
         :rtype: bool
         :return: if resumable
         """
-        return self._resume_mgr is not None and self.hmac is None
+        return (self._resume_mgr is not None and self.hmac is None and
+                not self.remote_is_append_blob)
 
     @property
     def remote_is_file(self):
@@ -492,14 +499,40 @@ def requires_set_file_properties_md5(self):
         return (not self.entity.is_encrypted and self.must_compute_md5 and
                 self.remote_is_file)
 
-    def complete_offset_upload(self):
-        # type: (Descriptor) -> None
+    def complete_offset_upload(self, chunk_num):
+        # type: (Descriptor, int) -> None
         """Complete the upload for the offset
         :param Descriptor self: this
+        :param int chunk_num: chunk num completed
         """
         with self._meta_lock:
             self._outstanding_ops -= 1
-        # TODO save resume state
+            # save resume state
+            if self.is_resumable:
+                self._completed_chunks.set(True, chunk_num)
+                completed = self._outstanding_ops == 0
+                if not completed and self.must_compute_md5:
+                    last_consecutive = (
+                        self._completed_chunks.find('0b0')[0] - 1
+                    )
+                    md5digest = self._md5_cache[last_consecutive]
+                else:
+                    md5digest = None
+                    if completed:
+                        last_consecutive = None
+                        self._md5_cache.clear()
+                self._resume_mgr.add_or_update_record(
+                    self.local_path.absolute_path, self._ase, self._chunk_size,
+                    self._total_chunks, self._completed_chunks.int, completed,
+                    md5digest,
+                )
+                # prune md5 cache
+                if len(self._md5_cache) > _MAX_MD5_CACHE_RESUME_ENTRIES:
+                    mkeys = sorted(list(self._md5_cache.keys()))
+                    for key in mkeys:
+                        if key >= last_consecutive:
+                            break
+                        self._md5_cache.pop(key)
 
     def hmac_data(self, data):
         # type: (Descriptor, bytes) -> None
@@ -667,6 +700,92 @@ def _initialize_integrity_checkers(self, options):
                 not self.remote_is_append_blob):
             self.md5 = blobxfer.util.new_md5_hasher()
 
+    def _resume(self):
+        if self._resume_mgr is None or self._offset > 0:
+            return None
+        # check if path exists in resume db
+        rr = self._resume_mgr.get_record(self._ase)
+        if rr is None:
+            logger.debug('no resume record for {}'.format(self._ase.path))
+            return None
+        # ensure lengths are the same
+        if rr.length != self._ase.size:
+            logger.warning('resume length mismatch {} -> {}'.format(
+                rr.length, self._ase.size))
+            return None
+        # set offsets if completed
+        if rr.completed:
+            with self._meta_lock:
+                logger.debug('{} upload already completed'.format(
+                    self._ase.path))
+                self._offset = rr.total_chunks * rr.chunk_size
+                self._chunk_num = rr.total_chunks
+                self._chunk_size = rr.chunk_size
+                self._total_chunks = rr.total_chunks
+                self._completed_chunks.int = rr.completed_chunks
+                self._outstanding_ops = 0
+            return self._ase.size
+        # encrypted files are not resumable due to hmac requirement
+        if self._ase.is_encrypted:
+            logger.debug('cannot resume encrypted entity {}'.format(
+                self._ase.path))
+            return None
+        # check if path exists
+        if not pathlib.Path(rr.local_path).exists():
+            logger.warning('resume from local path {} does not exist'.format(
+                rr.local_path))
+            return None
+        # re-hash from 0 to offset if needed
+        _cc = bitstring.BitArray(length=rr.total_chunks)
+        _cc.int = rr.completed_chunks
+        curr_chunk = _cc.find('0b0')[0]
+        del _cc
+        _fd_offset = 0
+        _end_offset = min((curr_chunk * rr.chunk_size, rr.length))
+        if self.md5 is not None and curr_chunk > 0:
+            _blocksize = blobxfer.util.MEGABYTE << 2
+            logger.debug(
+                'integrity checking existing file {} offset {} -> {}'.format(
+                    self._ase.path,
+                    self.local_path.view.fd_start,
+                    self.local_path.view.fd_start + _end_offset)
+            )
+            with self._hasher_lock:
+                with self.local_path.absolute_path.open('rb') as filedesc:
+                    filedesc.seek(self.local_path.view.fd_start, 0)
+                    while _fd_offset < _end_offset:
+                        if (_fd_offset + _blocksize) > _end_offset:
+                            _blocksize = _end_offset - _fd_offset
+                        _buf = filedesc.read(_blocksize)
+                        self.md5.update(_buf)
+                        _fd_offset += _blocksize
+            del _blocksize
+            # compare hashes
+            hexdigest = self.md5.hexdigest()
+            if rr.md5hexdigest != hexdigest:
+                logger.warning(
+                    'MD5 mismatch resume={} computed={} for {}'.format(
+                         rr.md5hexdigest, hexdigest, self._ase.path))
+                # reset hasher
+                self.md5 = blobxfer.util.new_md5_hasher()
+                return None
+        # set values from resume
+        with self._meta_lock:
+            self._offset = _end_offset
+            self._chunk_num = curr_chunk
+            self._chunk_size = rr.chunk_size
+            self._total_chunks = rr.total_chunks
+            self._completed_chunks = bitstring.BitArray(length=rr.total_chunks)
+            self._completed_chunks.set(True, range(0, curr_chunk + 1))
+            self._outstanding_ops = rr.total_chunks - curr_chunk
+            logger.debug(
+                ('resuming file {} from byte={} chunk={} chunk_size={} '
+                 'total_chunks={} outstanding_ops={}').format(
+                     self._ase.path, self._offset, self._chunk_num,
+                     self._chunk_size, self._total_chunks,
+                     self._outstanding_ops))
+        return _end_offset
+
     def next_offsets(self):
         # type: (Descriptor) -> Offsets
         """Retrieve the next offsets
@@ -674,9 +793,7 @@ def next_offsets(self):
         :rtype: Offsets
         :return: upload offsets
         """
-        # TODO RESUME
-        resume_bytes = None
-#         resume_bytes = self._resume()
+        resume_bytes = self._resume()
         with self._meta_lock:
             if self._chunk_num >= self._total_chunks:
                 return None, resume_bytes
@@ -744,6 +861,8 @@ def read_data(self, offsets):
         if self.must_compute_md5 and data:
             with self._hasher_lock:
                 self.md5.update(data)
+                if self.is_resumable:
+                    self._md5_cache[self._chunk_num - 1] = self.md5.hexdigest()
         return data, newoffset
 
     def generate_metadata(self):
diff --git a/blobxfer/operations/resume.py b/blobxfer/operations/resume.py
index 97e37e4..0458bec 100644
--- a/blobxfer/operations/resume.py
+++ b/blobxfer/operations/resume.py
@@ -44,12 +44,11 @@
 logger = logging.getLogger(__name__)
 
 
-class DownloadResumeManager():
-    """Download Resume Manager"""
+class _BaseResumeManager():
     def __init__(self, resume_file):
-        # type: (DownloadResumeManager, str) -> None
-        """Ctor for DownloadResumeManager
-        :param DownloadResumeManager self: this
+        # type: (_BaseResumeManager, str) -> None
+        """Ctor for _BaseResumeManager
+        :param _BaseResumeManager self: this
         :param pathlib.Path resume_file: resume file
         """
         self._lock = threading.Lock()
@@ -58,18 +57,18 @@ def __init__(self, resume_file):
             str(resume_file), protocol=pickle.HIGHEST_PROTOCOL)
 
     def close(self):
-        # type: (DownloadResumeManager) -> None
+        # type: (_BaseResumeManager) -> None
         """Close the internal data store
-        :param DownloadResumeManager self: this
+        :param _BaseResumeManager self: this
         """
         if self._data is not None:
             self._data.close()
             self._data = None
 
     def delete(self):
-        # type: (DownloadResumeManager) -> None
+        # type: (_BaseResumeManager) -> None
         """Delete the resume file db
-        :param DownloadResumeManager self: this
+        :param _BaseResumeManager self: this
         """
         self.close()
         try:
@@ -79,9 +78,9 @@ def delete(self):
 
     @contextlib.contextmanager
     def datalock(self, acquire=True):
-        # type: (DownloadResumeManager) -> None
+        # type: (_BaseResumeManager) -> None
         """Delete the resume file db
-        :param DownloadResumeManager self: this
+        :param _BaseResumeManager self: this
         :param bool acquire: acquire lock
         """
         if acquire:
@@ -103,18 +102,18 @@ def generate_record_key(ase):
         return '{}:{}'.format(ase._client.primary_endpoint, ase.path)
 
     def get_record(self, ase, key=None, lock=True):
-        # type: (DownloadResumeManager, str,
-        #        bool) -> blobxfer.models.resume.Download
+        # type: (_BaseResumeManager, str,
+        #        bool) -> object
         """Get a resume record
-        :param DownloadResumeManager self: this
+        :param _BaseResumeManager self: this
         :param blobxfer.models.azure.StorageEntity ase: Storage Entity
         :param str key: record key
         :param bool lock: acquire lock
-        :rtype: blobxfer.models.resume.Download
-        :return: Download record
+        :rtype: blobxfer.models.resume._Base
+        :return: _Base record
         """
         if key is None:
-            key = blobxfer.operations.resume.DownloadResumeManager.\
+            key = blobxfer.operations.resume._BaseResumeManager.\
                 generate_record_key(ase)
         with self.datalock(lock):
             try:
@@ -122,13 +121,24 @@ def get_record(self, ase, key=None, lock=True):
             except KeyError:
                 return None
 
+
+class DownloadResumeManager(_BaseResumeManager):
+    """Download Resume Manager"""
+    def __init__(self, resume_file):
+        # type: (DownloadResumeManager, str) -> None
+        """Ctor for DownloadResumeManager
+        :param DownloadResumeManager self: this
+        :param pathlib.Path resume_file: resume file
+        """
+        super(DownloadResumeManager, self).__init__(resume_file)
+
     def add_or_update_record(
             self, final_path, ase, chunk_size, next_integrity_chunk,
             completed, md5):
         # type: (DownloadResumeManager, pathlib.Path,
         #        blobxfer.models.azure.StorageEntity, int, int, bool,
         #        str) -> None
-        """Get a resume record
+        """Add or update a resume record
         :param DownloadResumeManager self: this
         :param pathlib.Path final_path: final path
         :param blobxfer.models.azure.StorageEntity ase: Storage Entity
@@ -137,7 +147,7 @@ def add_or_update_record(
         :param bool completed: if completed
         :param str md5: md5 hex digest
         """
-        key = blobxfer.operations.resume.DownloadResumeManager.\
+        key = blobxfer.operations.resume._BaseResumeManager.\
             generate_record_key(ase)
         with self.datalock():
             dl = self.get_record(ase, key=key, lock=False)
@@ -161,3 +171,55 @@ def add_or_update_record(
                     dl.md5hexdigest = md5
             self._data[key] = dl
             self._data.sync()
+
+
+class UploadResumeManager(_BaseResumeManager):
+    """Upload Resume Manager"""
+    def __init__(self, resume_file):
+        # type: (UploadResumeManager, str) -> None
+        """Ctor for UploadResumeManager
+        :param UploadResumeManager self: this
+        :param pathlib.Path resume_file: resume file
+        """
+        super(UploadResumeManager, self).__init__(resume_file)
+
+    def add_or_update_record(
+            self, local_path, ase, chunk_size, total_chunks, completed_chunks,
+            completed, md5):
+        # type: (UploadResumeManager, pathlib.Path,
+        #        blobxfer.models.azure.StorageEntity, int, int, int, bool,
+        #        str) -> None
+        """Add or update a resume record
+        :param UploadResumeManager self: this
+        :param pathlib.Path local_path: local path
+        :param blobxfer.models.azure.StorageEntity ase: Storage Entity
+        :param int chunk_size: chunk size in bytes
+        :param int total_chunks: total chunks
+        :param int completed_chunks: completed chunks bitarray
+        :param bool completed: if completed
+        :param str md5: md5 hex digest
+        """
+        key = blobxfer.operations.resume._BaseResumeManager.\
+            generate_record_key(ase)
+        with self.datalock():
+            ul = self.get_record(ase, key=key, lock=False)
+            if ul is None:
+                ul = blobxfer.models.resume.Upload(
+                    local_path=str(local_path),
+                    length=ase._size,
+                    chunk_size=chunk_size,
+                    total_chunks=total_chunks,
+                    completed_chunks=completed_chunks,
+                    completed=completed,
+                    md5=md5,
+                )
+            else:
+                if ul.completed or completed_chunks == ul.completed_chunks:
+                    return
+                if completed:
+                    ul.completed = completed
+                else:
+                    ul.completed_chunks = completed_chunks
+                    ul.md5hexdigest = md5
+            self._data[key] = ul
+            self._data.sync()
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 6bf9761..a946d7c 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -380,7 +380,7 @@ def _process_transfer(self, ud, ase, offsets, data):
             self._transfer_set.remove(
                 blobxfer.operations.upload.Uploader.create_unique_transfer_id(
                     ud.local_path, ase, offsets))
-        ud.complete_offset_upload()
+        ud.complete_offset_upload(offsets.chunk_num)
         # add descriptor back to upload queue only for append blobs
         if ud.entity.mode == blobxfer.models.azure.StorageModes.Append:
             self._upload_queue.put(ud)
@@ -519,6 +519,7 @@ def _process_upload_descriptor(self, ud):
         # add resume bytes to counter
         if resume_bytes is not None:
             with self._transfer_lock:
+                self._upload_bytes_total += ud.entity.size
                 self._upload_bytes_sofar += resume_bytes
                 logger.debug('adding {} sofar {} from {}'.format(
                     resume_bytes, self._upload_bytes_sofar, ud._ase.name))
@@ -1007,9 +1008,9 @@ def _run(self):
         self._start_time = blobxfer.util.datetime_now()
         logger.info('blobxfer start time: {0}'.format(self._start_time))
         # initialize resume db if specified
-#         if self._general_options.resume_file is not None:
-#             self._resume = blobxfer.operations.resume.DownloadResumeManager(
-#                 self._general_options.resume_file)
+        if self._general_options.resume_file is not None:
+            self._resume = blobxfer.operations.resume.UploadResumeManager(
+                self._general_options.resume_file)
         # initialize MD5 processes
         if ((self._spec.options.store_file_properties.md5 or
              self._spec.skip_on.md5_match) and
diff --git a/setup.py b/setup.py
index 212597c..1f36502 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,7 @@
 
 install_requires = [
     'azure-storage==0.34.2',
+    'bitstring==3.1.5',
     'click==6.7',
     'cryptography>=1.9',
     'future==0.16.0',

From e308ed85953b21870a66fc52388dabd49bad72d6 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Wed, 31 May 2017 21:01:37 -0700
Subject: [PATCH 40/47] Fix existing tests

- Fix various issues uncovered from UTs
---
 blobxfer/models/__init__.py                |  26 +-
 blobxfer/models/download.py                |   4 +-
 blobxfer/models/options.py                 |  18 +-
 blobxfer/operations/azure/__init__.py      |   2 +-
 blobxfer/operations/download.py            |   6 +-
 blobxfer/operations/progress.py            |   4 +-
 blobxfer/operations/resume.py              |  16 +-
 blobxfer/operations/upload.py              |   5 +-
 blobxfer/retry.py                          |  32 ++-
 blobxfer/util.py                           |   2 +-
 cli/cli.py                                 |   6 +-
 test_requirements.txt                      |   6 +-
 tests/test_blobxfer_models_azure.py        |   2 +-
 tests/test_blobxfer_models_download.py     | 120 ++++-----
 tests/test_blobxfer_models_options.py      |  30 ++-
 tests/test_blobxfer_models_resume.py       |   3 +-
 tests/test_blobxfer_models_upload.py       |  13 +-
 tests/test_blobxfer_operations_azure.py    |   4 +-
 tests/test_blobxfer_operations_crypto.py   |   4 +-
 tests/test_blobxfer_operations_download.py | 269 +++++++++++++++------
 tests/test_blobxfer_operations_md5.py      |  24 +-
 tests/test_blobxfer_operations_progress.py |   4 +-
 tests/test_blobxfer_operations_resume.py   |  25 +-
 tests/test_blobxfer_retry.py               |  20 +-
 tox.ini                                    |   4 +-
 25 files changed, 403 insertions(+), 246 deletions(-)

diff --git a/blobxfer/models/__init__.py b/blobxfer/models/__init__.py
index b9e9fbc..1d2e850 100644
--- a/blobxfer/models/__init__.py
+++ b/blobxfer/models/__init__.py
@@ -60,17 +60,6 @@ def paths(self):
         """
         return self._paths
 
-    def add_include(self, incl):
-        # type: (_BaseSourcePaths, str) -> None
-        """Add an include
-        :param _BaseSourcePaths self: this
-        :param str incl: include filter
-        """
-        if self._include is None:
-            self._include = list(incl)
-        else:
-            self._include.append(incl)
-
     def add_includes(self, includes):
         # type: (_BaseSourcePaths, list) -> None
         """Add a list of includes
@@ -78,23 +67,12 @@ def add_includes(self, includes):
         :param list includes: list of includes
         """
         if not isinstance(includes, list):
-            includes = list(includes)
+            includes = [includes]
         if self._include is None:
             self._include = includes
         else:
             self._include.extend(includes)
 
-    def add_exclude(self, excl):
-        # type: (_BaseSourcePaths, str) -> None
-        """Add an exclude
-        :param _BaseSourcePaths self: this
-        :param str excl: exclude filter
-        """
-        if self._exclude is None:
-            self._exclude = list(excl)
-        else:
-            self._exclude.append(excl)
-
     def add_excludes(self, excludes):
         # type: (_BaseSourcePaths, list) -> None
         """Add a list of excludes
@@ -102,7 +80,7 @@ def add_excludes(self, excludes):
         :param list excludes: list of excludes
         """
         if not isinstance(excludes, list):
-            excludes = list(excludes)
+            excludes = [excludes]
         if self._exclude is None:
             self._exclude = excludes
         else:
diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py
index a197a25..cc363ef 100644
--- a/blobxfer/models/download.py
+++ b/blobxfer/models/download.py
@@ -345,7 +345,7 @@ def generate_view(ase):
                 fd_start=0,
                 fd_end=slicesize,
             )
-            total_size = ase.size
+            total_size = slicesize
         else:
             view = LocalPathView(
                 fd_start=ase.vectored_io.offset_start,
@@ -529,7 +529,7 @@ def cleanup_all_temporary_files(self):
             pass
         # iterate unchecked chunks and delete
         for key in self._unchecked_chunks:
-            ucc = self._unchecked_chunks[key]
+            ucc = self._unchecked_chunks[key]['ucc']
             if ucc.temp:
                 try:
                     ucc.file_path.unlink()
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index cdc32df..c516d01 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -122,24 +122,24 @@ def __init__(
         if self.crypto_processes is None or self.crypto_processes < 1:
             self.crypto_processes = 0
         if self.md5_processes is None or self.md5_processes < 1:
-            self.md5_processes = multiprocessing.cpu_count() // 2
+            self.md5_processes = multiprocessing.cpu_count() >> 1
         if self.md5_processes < 1:
             self.md5_processes = 1
         auto_disk = False
         if self.disk_threads is None or self.disk_threads < 1:
-            self.disk_threads = multiprocessing.cpu_count() * 4
-            # cap maximum number of disk threads from cpu count to 96
-            if self.disk_threads > 96:
-                self.transfer_threads = 96
+            self.disk_threads = multiprocessing.cpu_count() << 1
+            # cap maximum number of disk threads from cpu count to 64
+            if self.disk_threads > 64:
+                self.disk_threads = 64
             auto_disk = True
         if self.transfer_threads is None or self.transfer_threads < 1:
             if auto_disk:
                 self.transfer_threads = self.disk_threads << 1
             else:
-                self.transfer_threads = multiprocessing.cpu_count() * 2
-            # cap maximum number of threads from cpu count to 64
-            if self.transfer_threads > 64:
-                self.transfer_threads = 64
+                self.transfer_threads = multiprocessing.cpu_count() << 2
+            # cap maximum number of threads from cpu count to 96
+            if self.transfer_threads > 96:
+                self.transfer_threads = 96
 
 
 class General(object):
diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py
index 177d41d..67d531f 100644
--- a/blobxfer/operations/azure/__init__.py
+++ b/blobxfer/operations/azure/__init__.py
@@ -287,7 +287,7 @@ def _convert_to_storage_entity_with_encryption_metadata(
                 encryption_metadata_exists(entity.metadata):
             ed = blobxfer.models.crypto.EncryptionMetadata()
             ed.convert_from_json(
-                entity.metadata, file.name, options.rsa_private_key)
+                entity.metadata, entity.name, options.rsa_private_key)
         else:
             ed = None
         ase = blobxfer.models.azure.StorageEntity(container, ed)
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 3df37bc..a369d0f 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -298,6 +298,7 @@ def _pre_md5_skip_on_check(self, lpath, rfile):
                 convert_vectored_io_slice_to_final_path_name(lpath, rfile)
             )
         else:
+            view = None
             fpath = slpath
         self._md5_offload.add_localfile_for_md5_check(
             key, slpath, fpath, md5, rfile.mode, view)
@@ -453,11 +454,10 @@ def _worker_thread_transfer(self):
         """Worker thread download
         :param Downloader self: this
         """
+        max_set_len = self._general_options.concurrency.disk_threads << 2
         while not self.termination_check:
             try:
-                if (len(self._disk_set) >
-                        self._general_options.concurrency.
-                        disk_threads * 4):
+                if len(self._disk_set) > max_set_len:
                     time.sleep(0.2)
                     continue
                 else:
diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py
index 07a9281..0bf132e 100644
--- a/blobxfer/operations/progress.py
+++ b/blobxfer/operations/progress.py
@@ -39,6 +39,8 @@
 import cryptography
 import requests
 # local imports
+import blobxfer.models.download
+import blobxfer.models.upload
 import blobxfer.util
 import blobxfer.version
 
@@ -158,7 +160,7 @@ def output_parameters(general_options, spec):
             spec.skip_on.filesize_match,
             spec.skip_on.lmt_ge,
             spec.skip_on.md5_match))
-    log.append('               chunk size: {} bytes'.format(
+    log.append('         chunk size bytes: {}'.format(
         spec.options.chunk_size_bytes))
     log.append('        delete extraneous: {}'.format(
         spec.options.delete_extraneous_destination))
diff --git a/blobxfer/operations/resume.py b/blobxfer/operations/resume.py
index 0458bec..88172e4 100644
--- a/blobxfer/operations/resume.py
+++ b/blobxfer/operations/resume.py
@@ -44,7 +44,8 @@
 logger = logging.getLogger(__name__)
 
 
-class _BaseResumeManager():
+class _BaseResumeManager(object):
+    """Base Resume Manager"""
     def __init__(self, resume_file):
         # type: (_BaseResumeManager, str) -> None
         """Ctor for _BaseResumeManager
@@ -99,18 +100,21 @@ def generate_record_key(ase):
         :rtype: str
         :return: record key
         """
-        return '{}:{}'.format(ase._client.primary_endpoint, ase.path)
+        key = '{}:{}'.format(ase._client.primary_endpoint, ase.path)
+        if blobxfer.util.on_python2():
+            return key.encode('utf8')
+        else:
+            return key
 
     def get_record(self, ase, key=None, lock=True):
-        # type: (_BaseResumeManager, str,
-        #        bool) -> object
+        # type: (_BaseResumeManager, str, bool) -> object
         """Get a resume record
         :param _BaseResumeManager self: this
         :param blobxfer.models.azure.StorageEntity ase: Storage Entity
         :param str key: record key
         :param bool lock: acquire lock
-        :rtype: blobxfer.models.resume._Base
-        :return: _Base record
+        :rtype: object
+        :return: resume record object
         """
         if key is None:
             key = blobxfer.operations.resume._BaseResumeManager.\
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index a946d7c..02447c9 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -446,11 +446,10 @@ def _worker_thread_upload(self):
         """Worker thread upload
         :param Uploader self: this
         """
+        max_set_len = self._general_options.concurrency.transfer_threads << 2
         while not self.termination_check:
             try:
-                if (len(self._transfer_set) >
-                        self._general_options.concurrency.
-                        transfer_threads * 4):
+                if len(self._transfer_set) > max_set_len:
                     time.sleep(0.2)
                     continue
                 else:
diff --git a/blobxfer/retry.py b/blobxfer/retry.py
index 892b25c..daee22a 100644
--- a/blobxfer/retry.py
+++ b/blobxfer/retry.py
@@ -37,23 +37,34 @@
 
 class ExponentialRetryWithMaxWait(azure.storage.retry._Retry):
     """Exponential Retry with Max Wait (infinite retries)"""
-    def __init__(self, initial_backoff=0.1, max_backoff=2, reset_at_max=True):
-        # type: (ExponentialRetryWithMaxWait, int, int, bool) -> None
+    def __init__(
+            self, initial_backoff=0.1, max_backoff=1, max_retries=None,
+            reset_at_max=True):
+        # type: (ExponentialRetryWithMaxWait, int, int, int, bool) -> None
         """Ctor for ExponentialRetryWithMaxWait
         :param ExponentialRetryWithMaxWait self: this
         :param int initial_backoff: initial backoff
         :param int max_backoff: max backoff
+        :param int max_retries: max retries
         :param bool reset_at_max: reset after reaching max wait
         """
+        if max_backoff <= 0:
+            raise ValueError(
+                'max backoff is non-positive: {}'.format(max_backoff))
+        if max_retries is not None and max_retries < 0:
+            raise ValueError(
+                'max retries is invalid: {}'.format(max_retries))
         if max_backoff < initial_backoff:
             raise ValueError(
                 'max backoff {} less than initial backoff {}'.format(
                     max_backoff, initial_backoff))
+        self._backoff_count = 0
+        self._last_backoff = initial_backoff
         self.initial_backoff = initial_backoff
         self.max_backoff = max_backoff
         self.reset_at_max = reset_at_max
         super(ExponentialRetryWithMaxWait, self).__init__(
-            max_backoff if self.reset_at_max else 2147483647, False)
+            max_retries if max_retries is not None else 2147483647, False)
 
     def retry(self, context):
         # type: (ExponentialRetryWithMaxWait,
@@ -75,11 +86,12 @@ def _backoff(self, context):
         :rtype: int
         :return: backoff amount
         """
-        if context.count == 1:
-            backoff = self.initial_backoff
+        self._backoff_count += 1
+        if self._backoff_count == 1:
+            self._last_backoff = self.initial_backoff
         else:
-            backoff = self.initial_backoff * (context.count - 1)
-        if backoff > self.max_backoff and self.reset_at_max:
-            backoff = self.initial_backoff
-            context.count = 1
-        return backoff
+            self._last_backoff *= 2
+        if self._last_backoff > self.max_backoff and self.reset_at_max:
+            self._backoff_count = 1
+            self._last_backoff = self.initial_backoff
+        return self._last_backoff
diff --git a/blobxfer/util.py b/blobxfer/util.py
index cce84f0..166b98f 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -65,7 +65,7 @@ def on_python2():
     return future.utils.PY2
 
 
-def on_windows():
+def on_windows():  # noqa
     # type: (None) -> bool
     """Execution on Windows
     :rtype: bool
diff --git a/cli/cli.py b/cli/cli.py
index bf397b0..ec4f3f9 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -41,7 +41,11 @@
 import blobxfer.api
 import blobxfer.util
 # local imports
-from . import settings
+try:
+    from . import settings
+except (SystemError, ImportError):  # noqa
+    # for local testing
+    import settings
 
 # create logger
 logger = logging.getLogger('blobxfer')
diff --git a/test_requirements.txt b/test_requirements.txt
index bc58365..c576b44 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,5 +1,5 @@
 flake8>=3.3.0
 mock>=2.0.0; python_version < '3.3'
-pypandoc>=1.3.3
-pytest>=3.0.7
-pytest-cov>=2.4.0
+pypandoc>=1.4
+pytest>=3.1.1
+pytest-cov>=2.5.1
diff --git a/tests/test_blobxfer_models_azure.py b/tests/test_blobxfer_models_azure.py
index 6ddc95b..f075092 100644
--- a/tests/test_blobxfer_models_azure.py
+++ b/tests/test_blobxfer_models_azure.py
@@ -49,6 +49,6 @@ def test_azurestorageentity():
     assert ase.snapshot is not None
 
     blob.snapshot = None
-    ase.populate_from_file(mock.MagicMock(), blob)
+    ase.populate_from_file(mock.MagicMock(), blob, 'path')
     assert ase.mode == azmodels.StorageModes.File
     assert ase.snapshot is None
diff --git a/tests/test_blobxfer_models_download.py b/tests/test_blobxfer_models_download.py
index 918a7f0..c1b568e 100644
--- a/tests/test_blobxfer_models_download.py
+++ b/tests/test_blobxfer_models_download.py
@@ -110,32 +110,33 @@ def test_downloaddescriptor(tmpdir):
     d._allocate_disk_space()
 
     assert d.entity == ase
+    assert d.entity.is_encrypted
     assert not d.must_compute_md5
+    assert d.hmac is not None
     assert d._total_chunks == 64
     assert d._offset == 0
     assert d.final_path == lp
-    assert str(d.local_path) == str(lp) + '.bxtmp'
     assert d._allocated
-    assert d.local_path.stat().st_size == 1024 - 16
+    assert d.final_path.stat().st_size == ase._size - 16
 
     d._allocate_disk_space()
     assert d._allocated
 
-    d.local_path.unlink()
-    ase._size = 1
+    d.final_path.unlink()
+    ase._size = 32
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
-    assert d._total_chunks == 1
+    assert d._total_chunks == 2
     assert d._allocated
-    assert d.local_path.stat().st_size == 0
+    assert d.final_path.stat().st_size == ase._size - 16
 
-    d.local_path.unlink()
+    d.final_path.unlink()
     ase._encryption = None
     ase._size = 1024
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
     assert d._allocated
-    assert d.local_path.stat().st_size == 1024
+    assert d.final_path.stat().st_size == ase._size
 
     # pre-existing file check
     ase._size = 0
@@ -143,13 +144,12 @@ def test_downloaddescriptor(tmpdir):
     d._allocate_disk_space()
     assert d._total_chunks == 0
     assert d._allocated
-    assert d.local_path.stat().st_size == 0
+    assert d.final_path.stat().st_size == ase._size
 
 
 @unittest.skipIf(util.on_python2(), 'fallocate does not exist')
 def test_downloaddescriptor_allocate_disk_space_via_seek(tmpdir):
     fp = pathlib.Path(str(tmpdir.join('fp')))
-    lp = pathlib.Path(str(tmpdir.join('fp.bxtmp')))
     opts = mock.MagicMock()
     opts.check_file_md5 = True
     opts.chunk_size_bytes = 256
@@ -162,14 +162,13 @@ def test_downloaddescriptor_allocate_disk_space_via_seek(tmpdir):
         patched_fallocate.side_effect = [AttributeError()]
         d._allocate_disk_space()
         assert d._allocated
-        assert not fp.exists()
-        assert lp.stat().st_size == ase._size
+        assert fp.exists()
+        assert fp.stat().st_size == ase._size
 
 
 def test_downloaddescriptor_resume(tmpdir):
     resumefile = pathlib.Path(str(tmpdir.join('resume')))
     fp = pathlib.Path(str(tmpdir.join('fp')))
-    lp = pathlib.Path(str(tmpdir.join('fp.bxtmp')))
 
     opts = mock.MagicMock()
     opts.check_file_md5 = True
@@ -177,6 +176,7 @@ def test_downloaddescriptor_resume(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 128
     ase._name = 'blob'
+    ase._client = mock.MagicMock()
 
     # test no record
     rmgr = rops.DownloadResumeManager(resumefile)
@@ -185,7 +185,7 @@ def test_downloaddescriptor_resume(tmpdir):
     assert rb is None
 
     # test length mismatch
-    rmgr.add_or_update_record(str(fp), str(lp), 127, 0, 0, False, None)
+    rmgr.add_or_update_record(str(fp), ase, 0, 0, False, None)
     rb = d._resume()
     assert rb is None
 
@@ -193,7 +193,7 @@ def test_downloaddescriptor_resume(tmpdir):
     rmgr.delete()
     rmgr = rops.DownloadResumeManager(resumefile)
 
-    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 0, 0, False, None)
+    rmgr.add_or_update_record(str(fp), ase, 0, 0, False, None)
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
     assert rb is None
@@ -202,7 +202,7 @@ def test_downloaddescriptor_resume(tmpdir):
     rmgr.delete()
     rmgr = rops.DownloadResumeManager(resumefile)
 
-    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, True, None)
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, True, None)
     d = models.Descriptor(fp, ase, opts, rmgr)
     fp.touch()
     rb = d._resume()
@@ -215,22 +215,23 @@ def test_downloaddescriptor_resume(tmpdir):
 
     ase._encryption = mock.MagicMock()
     ase._encryption.symmetric_key = b'123'
-    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, None)
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
     assert rb is None
 
-    # test if intermediate file not exists
+    # test up to chunk
     rmgr.delete()
     rmgr = rops.DownloadResumeManager(resumefile)
     ase = azmodels.StorageEntity('cont')
     ase._size = 128
     ase._name = 'blob'
+    ase._client = mock.MagicMock()
 
-    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, None)
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
-    assert rb is None
+    assert rb == 32
 
     # ensure hmac not populated
     rmgr.delete()
@@ -238,9 +239,10 @@ def test_downloaddescriptor_resume(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 128
     ase._name = 'blob'
-    lp.touch()
+    ase._client = mock.MagicMock()
+    fp.touch()
 
-    rmgr.add_or_update_record(str(fp), str(lp), ase._size, 32, 1, False, None)
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, None)
     d = models.Descriptor(fp, ase, opts, rmgr)
     d.hmac = True
     with pytest.raises(RuntimeError):
@@ -251,13 +253,12 @@ def test_downloaddescriptor_resume(tmpdir):
     rmgr = rops.DownloadResumeManager(resumefile)
 
     data = os.urandom(32)
-    with lp.open('wb') as f:
+    with fp.open('wb') as f:
         f.write(data)
     md5 = util.new_md5_hasher()
     md5.update(data)
 
-    rmgr.add_or_update_record(
-        str(fp), str(lp), ase._size, 32, 1, False, md5.hexdigest())
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, md5.hexdigest())
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
     assert rb == 32
@@ -265,8 +266,7 @@ def test_downloaddescriptor_resume(tmpdir):
     # md5 hash mismatch
     rmgr.delete()
     rmgr = rops.DownloadResumeManager(resumefile)
-    rmgr.add_or_update_record(
-        str(fp), str(lp), ase._size, 32, 1, False, 'abc')
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, 'abc')
     ase._md5 = 'abc'
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
@@ -278,10 +278,10 @@ def test_downloaddescriptor_resume(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 128
     ase._name = 'blob'
+    ase._client = mock.MagicMock()
     ase._mode = azmodels.StorageModes.Page
 
-    rmgr.add_or_update_record(
-        str(fp), str(lp), ase._size, 32, 1, False, md5.hexdigest())
+    rmgr.add_or_update_record(str(fp), ase, 32, 1, False, md5.hexdigest())
     d = models.Descriptor(fp, ase, opts, rmgr)
     rb = d._resume()
     assert rb == 32
@@ -443,10 +443,11 @@ def test_write_unchecked_data(tmpdir):
 
     assert offsets.chunk_num in d._unchecked_chunks
     ucc = d._unchecked_chunks[offsets.chunk_num]
-    assert ucc.data_len == ase._size
-    assert ucc.fd_start == offsets.fd_start
-    assert ucc.file_path == d.local_path
-    assert not ucc.temp
+    assert ucc['ucc'].data_len == ase._size
+    assert ucc['ucc'].fd_start == offsets.fd_start
+    assert ucc['ucc'].file_path == d.final_path
+    assert not ucc['ucc'].temp
+    assert ucc['decrypted']
 
 
 def test_write_unchecked_hmac_data(tmpdir):
@@ -464,10 +465,11 @@ def test_write_unchecked_hmac_data(tmpdir):
 
     assert offsets.chunk_num in d._unchecked_chunks
     ucc = d._unchecked_chunks[offsets.chunk_num]
-    assert ucc.data_len == ase._size
-    assert ucc.fd_start == offsets.fd_start
-    assert ucc.file_path != d.local_path
-    assert ucc.temp
+    assert ucc['ucc'].data_len == ase._size
+    assert ucc['ucc'].fd_start == offsets.fd_start
+    assert ucc['ucc'].file_path != d.final_path
+    assert ucc['ucc'].temp
+    assert not ucc['decrypted']
 
 
 def test_perform_chunked_integrity_check(tmpdir):
@@ -505,10 +507,12 @@ def test_perform_chunked_integrity_check(tmpdir):
     offsets1, _ = d.next_offsets()
     d.write_unchecked_hmac_data(offsets1, data)
     ucc1 = d._unchecked_chunks[offsets1.chunk_num]
+    ucc['decrypted'] = True
+    ucc1['decrypted'] = True
     d.perform_chunked_integrity_check()
 
-    assert not ucc.file_path.exists()
-    assert not ucc1.file_path.exists()
+    assert ucc['ucc'].file_path != d.final_path
+    assert ucc1['ucc'].file_path != d.final_path
     assert d._next_integrity_chunk == 2
     assert 0 not in d._unchecked_chunks
     assert 1 not in d._unchecked_chunks
@@ -529,6 +533,7 @@ def test_perform_chunked_integrity_check(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
     ase._name = 'blob'
+    ase._client = mock.MagicMock()
     ase._md5 = md5.hexdigest()
 
     rmgr = rops.DownloadResumeManager(resumefile)
@@ -539,7 +544,7 @@ def test_perform_chunked_integrity_check(tmpdir):
     d.perform_chunked_integrity_check()
     assert d._next_integrity_chunk == 1
     assert len(d._unchecked_chunks) == 0
-    dr = rmgr.get_record(str(fp))
+    dr = rmgr.get_record(ase)
     assert dr.next_integrity_chunk == 1
     assert dr.md5hexdigest == md5.hexdigest()
 
@@ -553,11 +558,12 @@ def test_update_resume_for_completed(tmpdir):
     ase = azmodels.StorageEntity('cont')
     ase._size = 32
     ase._name = 'blob'
+    ase._client = mock.MagicMock()
     rmgr = rops.DownloadResumeManager(resumefile)
     d = models.Descriptor(fp, ase, opts, rmgr)
     offsets, _ = d.next_offsets()
     d._update_resume_for_completed()
-    dr = rmgr.get_record(str(fp))
+    dr = rmgr.get_record(ase)
     assert dr.completed
 
 
@@ -575,8 +581,8 @@ def test_cleanup_all_temporary_files(tmpdir):
     d.write_unchecked_data(offsets, data)
     assert len(d._unchecked_chunks) == 1
     d.cleanup_all_temporary_files()
-    assert not d.local_path.exists()
-    assert not d._unchecked_chunks[0].file_path.exists()
+    assert not d.final_path.exists()
+    assert not d._unchecked_chunks[0]['ucc'].file_path.exists()
 
     lp = pathlib.Path(str(tmpdir.join('b')))
     d = models.Descriptor(lp, ase, opts, None)
@@ -585,11 +591,10 @@ def test_cleanup_all_temporary_files(tmpdir):
     data = b'0' * opts.chunk_size_bytes
     d.write_unchecked_hmac_data(offsets, data)
     assert len(d._unchecked_chunks) == 1
-    d.local_path.unlink()
-    d._unchecked_chunks[0].file_path.unlink()
+    d._unchecked_chunks[0]['ucc'].file_path.unlink()
     d.cleanup_all_temporary_files()
-    assert not d.local_path.exists()
-    assert not d._unchecked_chunks[0].file_path.exists()
+    assert not d.final_path.exists()
+    assert not d._unchecked_chunks[0]['ucc'].file_path.exists()
 
 
 def test_write_data(tmpdir):
@@ -606,11 +611,11 @@ def test_write_data(tmpdir):
     data = b'0' * ase._size
     d.write_data(offsets, data)
 
-    assert d.local_path.exists()
-    assert d.local_path.stat().st_size == len(data)
+    assert d.final_path.exists()
+    assert d.final_path.stat().st_size == len(data)
 
 
-def test_finalize_file(tmpdir):
+def test_finalize_integrity_and_file(tmpdir):
     # already finalized
     lp = pathlib.Path(str(tmpdir.join('af')))
     opts = mock.MagicMock()
@@ -624,11 +629,12 @@ def test_finalize_file(tmpdir):
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
     d._finalized = True
+    d.finalize_integrity()
     d.finalize_file()
 
-    assert d.local_path.exists()
-    assert not d.final_path.exists()
-    d.local_path.unlink()
+    assert d.final_path.exists()
+    assert d.final_path.stat().st_size == ase._size
+    d.final_path.unlink()
 
     # hmac check success
     lp = pathlib.Path(str(tmpdir.join('a')))
@@ -654,9 +660,9 @@ def test_finalize_file(tmpdir):
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
     d.hmac.update(data)
+    d.finalize_integrity()
     d.finalize_file()
 
-    assert not d.local_path.exists()
     assert d.final_path.exists()
     assert d.final_path.stat().st_size == len(data)
 
@@ -676,9 +682,9 @@ def test_finalize_file(tmpdir):
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
     d.md5.update(data)
+    d.finalize_integrity()
     d.finalize_file()
 
-    assert not d.local_path.exists()
     assert d.final_path.exists()
     assert d.final_path.stat().st_size == len(data)
 
@@ -694,9 +700,9 @@ def test_finalize_file(tmpdir):
 
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
+    d.finalize_integrity()
     d.finalize_file()
 
-    assert not d.local_path.exists()
     assert d.final_path.exists()
     assert d.final_path.stat().st_size == len(data)
 
@@ -714,9 +720,9 @@ def test_finalize_file(tmpdir):
     d = models.Descriptor(lp, ase, opts, None)
     d._allocate_disk_space()
     d.md5.update(data)
+    d.finalize_integrity()
     d.finalize_file()
 
-    assert not d.local_path.exists()
     assert not d.final_path.exists()
 
 
diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py
index 1ee72bb..31edde7 100644
--- a/tests/test_blobxfer_models_options.py
+++ b/tests/test_blobxfer_models_options.py
@@ -21,22 +21,38 @@ def test_concurrency_options(patched_cc):
     a = options.Concurrency(
         crypto_processes=-1,
         md5_processes=0,
+        disk_threads=-1,
         transfer_threads=-2,
     )
 
     assert a.crypto_processes == 0
     assert a.md5_processes == 1
+    assert a.disk_threads == 2
+    assert a.transfer_threads == 4
+
+    a = options.Concurrency(
+        crypto_processes=-1,
+        md5_processes=0,
+        disk_threads=1,
+        transfer_threads=-1,
+    )
+
+    assert a.crypto_processes == 0
+    assert a.md5_processes == 1
+    assert a.disk_threads == 1
     assert a.transfer_threads == 4
 
 
 @mock.patch('multiprocessing.cpu_count', return_value=64)
-def test_concurrency_options_max_transfer_threads(patched_cc):
+def test_concurrency_options_max_disk_and_transfer_threads(patched_cc):
     a = options.Concurrency(
         crypto_processes=1,
         md5_processes=1,
+        disk_threads=None,
         transfer_threads=None,
     )
 
+    assert a.disk_threads == 64
     assert a.transfer_threads == 96
 
 
@@ -45,7 +61,8 @@ def test_general_options():
         concurrency=options.Concurrency(
             crypto_processes=1,
             md5_processes=2,
-            transfer_threads=3,
+            disk_threads=3,
+            transfer_threads=4,
         ),
         log_file='abc.log',
         progress_bar=False,
@@ -56,7 +73,8 @@ def test_general_options():
 
     assert a.concurrency.crypto_processes == 1
     assert a.concurrency.md5_processes == 2
-    assert a.concurrency.transfer_threads == 3
+    assert a.concurrency.disk_threads == 3
+    assert a.concurrency.transfer_threads == 4
     assert a.log_file == 'abc.log'
     assert not a.progress_bar
     assert a.resume_file == pathlib.Path('abc')
@@ -67,7 +85,8 @@ def test_general_options():
         concurrency=options.Concurrency(
             crypto_processes=1,
             md5_processes=2,
-            transfer_threads=3,
+            disk_threads=3,
+            transfer_threads=4,
         ),
         progress_bar=False,
         resume_file=None,
@@ -77,7 +96,8 @@ def test_general_options():
 
     assert a.concurrency.crypto_processes == 1
     assert a.concurrency.md5_processes == 2
-    assert a.concurrency.transfer_threads == 3
+    assert a.concurrency.disk_threads == 3
+    assert a.concurrency.transfer_threads == 4
     assert a.log_file is None
     assert not a.progress_bar
     assert a.resume_file is None
diff --git a/tests/test_blobxfer_models_resume.py b/tests/test_blobxfer_models_resume.py
index 55a6009..7fb12a3 100644
--- a/tests/test_blobxfer_models_resume.py
+++ b/tests/test_blobxfer_models_resume.py
@@ -8,9 +8,8 @@
 
 
 def test_download():
-    d = rmodels.Download('fp', 'tp', 1, 2, 0, False, '')
+    d = rmodels.Download('fp', 1, 2, 0, False, '')
     assert d.final_path == 'fp'
-    assert d.temp_path == 'tp'
     assert d.length == 1
     assert d.chunk_size == 2
     assert d.next_integrity_chunk == 0
diff --git a/tests/test_blobxfer_models_upload.py b/tests/test_blobxfer_models_upload.py
index e6447d7..7d9e057 100644
--- a/tests/test_blobxfer_models_upload.py
+++ b/tests/test_blobxfer_models_upload.py
@@ -7,7 +7,6 @@
 except ImportError:  # noqa
     import pathlib
 # non-stdlib imports
-import pytest
 # module under test
 import blobxfer.models.upload as upload
 
@@ -26,14 +25,10 @@ def test_localsourcepaths_files(tmpdir):
     defpath.join('moo.cow').write('y')
 
     a = upload.LocalSourcePath()
-    a.add_include('*.txt')
+    a.add_includes('*.txt')
     a.add_includes(['moo.cow', '*blah*'])
-    with pytest.raises(ValueError):
-        a.add_includes('abc')
-    a.add_exclude('**/blah.x')
+    a.add_excludes('**/blah.x')
     a.add_excludes(['world.txt'])
-    with pytest.raises(ValueError):
-        a.add_excludes('abc')
     a.add_path(str(tmpdir))
     a_set = set()
     for file in a.files():
@@ -47,9 +42,9 @@ def test_localsourcepaths_files(tmpdir):
 
     b = upload.LocalSourcePath()
     b.add_includes(['moo.cow', '*blah*'])
-    b.add_include('*.txt')
+    b.add_includes('*.txt')
     b.add_excludes(['world.txt'])
-    b.add_exclude('**/blah.x')
+    b.add_excludes('**/blah.x')
     b.add_paths([pathlib.Path(str(tmpdir))])
     for file in a.files():
         sfile = str(file.parent_path / file.relative_path)
diff --git a/tests/test_blobxfer_operations_azure.py b/tests/test_blobxfer_operations_azure.py
index 346fab6..0322aa4 100644
--- a/tests/test_blobxfer_operations_azure.py
+++ b/tests/test_blobxfer_operations_azure.py
@@ -106,7 +106,7 @@ def test_azuresourcepath_files(patched_lf, patched_em):
     i = 0
     for file in asp.files(creds, options, mock.MagicMock()):
         i += 1
-        assert file.name == 'name'
+        assert file.name == 'remote/name'
         assert file.encryption_metadata is None
     assert i == 1
 
@@ -119,7 +119,7 @@ def test_azuresourcepath_files(patched_lf, patched_em):
     i = 0
     for file in asp.files(creds, options, mock.MagicMock()):
         i += 1
-        assert file.name == 'name'
+        assert file.name == 'remote/name'
         assert file.encryption_metadata is not None
     assert i == 1
 
diff --git a/tests/test_blobxfer_operations_crypto.py b/tests/test_blobxfer_operations_crypto.py
index d3fdc62..f3dfc61 100644
--- a/tests/test_blobxfer_operations_crypto.py
+++ b/tests/test_blobxfer_operations_crypto.py
@@ -118,7 +118,7 @@ def test_cryptooffload_decrypt(tmpdir):
             unpad=False,
         )
         a.add_decrypt_chunk(
-            'fp', str(bfile), offsets, symkey, iv, hmacfile)
+            str(bfile), 0, offsets, symkey, iv, hmacfile)
         i = 33
         checked = False
         while i > 0:
@@ -127,7 +127,7 @@ def test_cryptooffload_decrypt(tmpdir):
                 time.sleep(0.3)
                 i -= 1
                 continue
-            assert result == 'fp'
+            assert result == (str(bfile), offsets)
             checked = True
             break
         assert checked
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index 08702b1..fcc2865 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -285,18 +285,26 @@ def test_pre_md5_skip_on_check():
     rfile = azmodels.StorageEntity('cont')
     rfile._encryption = mock.MagicMock()
     rfile._encryption.blobxfer_extensions = mock.MagicMock()
-    rfile._encryption.blobxfer_extensions.pre_encrypted_content_md5 = \
-        'abc'
+    rfile._encryption.blobxfer_extensions.pre_encrypted_content_md5 = 'abc'
+    rfile._client = mock.MagicMock()
+    rfile._client.primary_endpoint = 'ep'
+    rfile._name = 'name'
+    rfile._vio = None
 
     lpath = 'lpath'
+    key = ops.Downloader.create_unique_transfer_operation_id(rfile)
     d._pre_md5_skip_on_check(lpath, rfile)
-    assert lpath in d._md5_map
+    assert key in d._md5_map
 
+    rfile._name = 'name2'
     lpath = 'lpath2'
     rfile._encryption = None
     rfile._md5 = 'abc'
+    key = ops.Downloader.create_unique_transfer_operation_id(rfile)
     d._pre_md5_skip_on_check(lpath, rfile)
-    assert lpath in d._md5_map
+    assert key in d._md5_map
+
+    assert len(d._md5_map) == 2
 
 
 def test_post_md5_skip_on_check(tmpdir):
@@ -309,28 +317,45 @@ def test_post_md5_skip_on_check(tmpdir):
     lpath = str(lp)
     rfile = azmodels.StorageEntity('cont')
     rfile._md5 = 'abc'
+    rfile._client = mock.MagicMock()
+    rfile._client.primary_endpoint = 'ep'
+    rfile._name = 'name'
+    rfile._vio = None
+    rfile._size = 256
     d._pre_md5_skip_on_check(lpath, rfile)
-    d._download_set.add(pathlib.Path(lpath))
-    assert lpath in d._md5_map
+    key = ops.Downloader.create_unique_transfer_operation_id(rfile)
+    d._transfer_set.add(key)
+    assert key in d._md5_map
 
-    d._post_md5_skip_on_check(lpath, True)
-    assert lpath not in d._md5_map
+    d._post_md5_skip_on_check(key, lpath, rfile._size, True)
+    assert key not in d._md5_map
 
     d._add_to_download_queue = mock.MagicMock()
     d._pre_md5_skip_on_check(lpath, rfile)
-    d._download_set.add(pathlib.Path(lpath))
-    d._post_md5_skip_on_check(lpath, False)
+    d._transfer_set.add(key)
+    d._post_md5_skip_on_check(key, lpath, rfile._size, False)
     assert d._add_to_download_queue.call_count == 1
 
 
 def test_check_for_downloads_from_md5():
     lpath = 'lpath'
+    rfile = azmodels.StorageEntity('cont')
+    rfile._md5 = 'abc'
+    rfile._client = mock.MagicMock()
+    rfile._client.primary_endpoint = 'ep'
+    rfile._name = 'name'
+    rfile._vio = None
+    rfile._size = 256
+    key = ops.Downloader.create_unique_transfer_operation_id(rfile)
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-    d._md5_map[lpath] = mock.MagicMock()
-    d._download_set.add(pathlib.Path(lpath))
+    d._md5_map[key] = rfile
+    d._transfer_set.add(key)
     d._md5_offload = mock.MagicMock()
     d._md5_offload.done_cv = multiprocessing.Condition()
-    d._md5_offload.pop_done_queue.side_effect = [None, (lpath, False)]
+    d._md5_offload.pop_done_queue.side_effect = [
+        None,
+        (key, lpath, rfile._size, False),
+    ]
     d._add_to_download_queue = mock.MagicMock()
     d._all_remote_files_processed = False
     d._download_terminate = True
@@ -343,11 +368,14 @@ def test_check_for_downloads_from_md5():
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-        d._md5_map[lpath] = mock.MagicMock()
-        d._download_set.add(pathlib.Path(lpath))
+        d._md5_map[key] = rfile
+        d._transfer_set.add(key)
         d._md5_offload = mock.MagicMock()
         d._md5_offload.done_cv = multiprocessing.Condition()
-        d._md5_offload.pop_done_queue.side_effect = [None, (lpath, False)]
+        d._md5_offload.pop_done_queue.side_effect = [
+            None,
+            (key, lpath, rfile._size, False),
+        ]
         d._add_to_download_queue = mock.MagicMock()
         patched_tc.side_effect = [False, False, True]
         d._check_for_downloads_from_md5()
@@ -359,8 +387,8 @@ def test_check_for_downloads_from_md5():
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-        d._md5_map[lpath] = mock.MagicMock()
-        d._download_set.add(pathlib.Path(lpath))
+        d._md5_map[key] = rfile
+        d._transfer_set.add(key)
         d._md5_offload = mock.MagicMock()
         d._md5_offload.done_cv = multiprocessing.Condition()
         d._md5_offload.pop_done_queue.side_effect = [None]
@@ -372,15 +400,25 @@ def test_check_for_downloads_from_md5():
 
 def test_check_for_crypto_done():
     lpath = 'lpath'
+    rfile = azmodels.StorageEntity('cont')
+    rfile._md5 = 'abc'
+    rfile._client = mock.MagicMock()
+    rfile._client.primary_endpoint = 'ep'
+    rfile._name = 'name'
+    rfile._vio = None
+    rfile._size = 256
+    key = ops.Downloader.create_unique_transfer_operation_id(rfile)
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-    d._download_set.add(pathlib.Path(lpath))
+    d._transfer_set.add(key)
     dd = mock.MagicMock()
     d._dd_map[lpath] = dd
+    offsets = mock.MagicMock()
+    offsets.range_start = 0
     d._crypto_offload = mock.MagicMock()
     d._crypto_offload.done_cv = multiprocessing.Condition()
     d._crypto_offload.pop_done_queue.side_effect = [
         None,
-        lpath,
+        (lpath, offsets)
     ]
     d._all_remote_files_processed = False
     d._download_terminate = True
@@ -393,14 +431,16 @@ def test_check_for_crypto_done():
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-        d._download_set.add(pathlib.Path(lpath))
+        d._transfer_set.add(key)
         dd = mock.MagicMock()
+        dd.entity = rfile
+        dd.final_path = lpath
         d._dd_map[lpath] = dd
         d._crypto_offload = mock.MagicMock()
         d._crypto_offload.done_cv = multiprocessing.Condition()
         d._crypto_offload.pop_done_queue.side_effect = [
             None,
-            lpath,
+            (lpath, offsets),
         ]
         patched_tc.side_effect = [False, False, True]
         d._complete_chunk_download = mock.MagicMock()
@@ -413,13 +453,15 @@ def test_check_for_crypto_done():
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
-        d._download_set.add(pathlib.Path(lpath))
+        d._transfer_set.add(key)
         dd = mock.MagicMock()
+        dd.entity = rfile
+        dd.final_path = lpath
         d._crypto_offload = mock.MagicMock()
         d._crypto_offload.done_cv = multiprocessing.Condition()
         d._crypto_offload.pop_done_queue.side_effect = [
             None,
-            lpath,
+            (lpath, offsets),
         ]
         patched_tc.side_effect = [False, False, True]
         d._complete_chunk_download = mock.MagicMock()
@@ -438,39 +480,41 @@ def test_add_to_download_queue(tmpdir):
     d._spec.options.chunk_size_bytes = 1
 
     d._add_to_download_queue(lpath, ase)
-    assert d._download_queue.qsize() == 1
+    assert d._transfer_queue.qsize() == 1
     assert path in d._dd_map
 
 
-def test_initialize_and_terminate_download_threads():
+def test_initialize_and_terminate_transfer_threads():
     opts = mock.MagicMock()
     opts.concurrency.transfer_threads = 2
     d = ops.Downloader(opts, mock.MagicMock(), mock.MagicMock())
-    d._worker_thread_download = mock.MagicMock()
+    d._worker_thread_transfer = mock.MagicMock()
 
-    d._initialize_download_threads()
-    assert len(d._download_threads) == 2
+    d._initialize_transfer_threads()
+    assert len(d._transfer_threads) == 2
 
-    d._wait_for_download_threads(terminate=True)
+    d._wait_for_transfer_threads(terminate=True)
     assert d._download_terminate
-    for thr in d._download_threads:
+    for thr in d._transfer_threads:
         assert not thr.is_alive()
 
 
 @mock.patch('blobxfer.operations.crypto.aes_cbc_decrypt_data')
 @mock.patch('blobxfer.operations.azure.file.get_file_range')
 @mock.patch('blobxfer.operations.azure.blob.get_blob_range')
-def test_worker_thread_download(
+def test_worker_thread_transfer(
         patched_gbr, patched_gfr, patched_acdd, tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._complete_chunk_download = mock.MagicMock()
     d._download_terminate = True
-    d._worker_thread_download()
+    d._general_options.concurrency.transfer_threads = 1
+    d._general_options.concurrency.disk_threads = 1
+    d._worker_thread_transfer()
     assert d._complete_chunk_download.call_count == 0
 
     d._download_terminate = False
     d._all_remote_files_processed = True
-    d._worker_thread_download()
+    d._worker_thread_transfer()
     assert d._complete_chunk_download.call_count == 0
 
     with mock.patch(
@@ -486,11 +530,11 @@ def test_worker_thread_download(
         opts.check_file_md5 = False
         opts.chunk_size_bytes = 16
         dd = models.Descriptor(lp, ase, opts, None)
-        d._download_queue = mock.MagicMock()
-        d._download_queue.get.side_effect = [queue.Empty, dd]
+        d._transfer_queue = mock.MagicMock()
+        d._transfer_queue.get.side_effect = [queue.Empty, dd]
         d._process_download_descriptor = mock.MagicMock()
         d._process_download_descriptor.side_effect = RuntimeError('oops')
-        d._worker_thread_download()
+        d._worker_thread_transfer()
         assert len(d._exceptions) == 1
         assert d._process_download_descriptor.call_count == 1
 
@@ -503,26 +547,35 @@ def test_worker_thread_download(
                 new_callable=mock.PropertyMock) as patched_aoc:
             d = ops.Downloader(
                 mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+            d._general_options.concurrency.transfer_threads = 1
+            d._general_options.concurrency.disk_threads = 1
             opts = mock.MagicMock()
             opts.check_file_md5 = False
             opts.chunk_size_bytes = 16
             ase = azmodels.StorageEntity('cont')
             ase._size = 16
+            ase._client = mock.MagicMock()
+            ase._client.primary_endpoint = 'ep'
+            ase._name = 'name'
+            ase._vio = None
+            key = ops.Downloader.create_unique_transfer_operation_id(ase)
             ase._encryption = mock.MagicMock()
             ase._encryption.symmetric_key = b'abc'
             lp = pathlib.Path(str(tmpdir.join('a')))
             dd = models.Descriptor(lp, ase, opts, None)
             dd.next_offsets = mock.MagicMock(
                 side_effect=[(None, 1), (None, 2)])
+            dd.finalize_integrity = mock.MagicMock()
             dd.finalize_file = mock.MagicMock()
             dd.perform_chunked_integrity_check = mock.MagicMock()
+            dd.all_operations_completed.side_effect = [False, True]
             patched_aoc.side_effect = [False, True]
             patched_tc.side_effect = [False, False, False, True]
             d._dd_map[str(lp)] = dd
-            d._download_set.add(lp)
-            d._download_queue = mock.MagicMock()
-            d._download_queue.get.side_effect = [queue.Empty, dd, dd]
-            d._worker_thread_download()
+            d._transfer_set.add(key)
+            d._transfer_queue = mock.MagicMock()
+            d._transfer_queue.get.side_effect = [queue.Empty, dd, dd]
+            d._worker_thread_transfer()
             assert str(lp) not in d._dd_map
             assert dd.finalize_file.call_count == 1
             assert d._download_sofar == 1
@@ -533,23 +586,33 @@ def test_worker_thread_download(
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._general_options.concurrency.transfer_threads = 1
+        d._general_options.concurrency.disk_threads = 1
         opts = mock.MagicMock()
         opts.check_file_md5 = True
         opts.chunk_size_bytes = 16
         ase = azmodels.StorageEntity('cont')
         ase._mode = azmodels.StorageModes.File
         ase._size = 16
+        ase._client = mock.MagicMock()
+        ase._client.primary_endpoint = 'ep'
+        ase._name = 'name'
+        ase._vio = None
+        key = ops.Downloader.create_unique_transfer_operation_id(ase)
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('b')))
         dd = models.Descriptor(lp, ase, opts, None)
         dd.finalize_file = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
         d._dd_map[str(lp)] = mock.MagicMock()
-        d._download_set.add(lp)
-        d._download_queue = mock.MagicMock()
-        d._download_queue.get.side_effect = [dd]
+        d._transfer_set.add(key)
+        d._transfer_queue = mock.MagicMock()
+        d._transfer_queue.get.side_effect = [dd]
         patched_tc.side_effect = [False, True]
-        d._worker_thread_download()
+        d._worker_thread_transfer()
+        assert len(d._disk_set) == 1
+        a, b, c = d._disk_queue.get()
+        d._process_data(a, b, c)
         assert dd.perform_chunked_integrity_check.call_count == 1
 
     with mock.patch(
@@ -557,6 +620,8 @@ def test_worker_thread_download(
             new_callable=mock.PropertyMock) as patched_tc:
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
+        d._general_options.concurrency.transfer_threads = 1
+        d._general_options.concurrency.disk_threads = 1
         opts = mock.MagicMock()
         opts.check_file_md5 = False
         opts.chunk_size_bytes = 16
@@ -566,6 +631,11 @@ def test_worker_thread_download(
         ase._encryption = mock.MagicMock()
         ase._encryption.symmetric_key = b'abc'
         ase._encryption.content_encryption_iv = b'0' * 16
+        ase._client = mock.MagicMock()
+        ase._client.primary_endpoint = 'ep'
+        ase._name = 'name'
+        ase._vio = None
+        key = ops.Downloader.create_unique_transfer_operation_id(ase)
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('c')))
         dd = models.Descriptor(lp, ase, opts, None)
@@ -575,11 +645,14 @@ def test_worker_thread_download(
         d._crypto_offload = mock.MagicMock()
         d._crypto_offload.add_decrypt_chunk = mock.MagicMock()
         d._dd_map[str(lp)] = dd
-        d._download_set.add(lp)
-        d._download_queue = mock.MagicMock()
-        d._download_queue.get.side_effect = [dd]
+        d._transfer_set.add(key)
+        d._transfer_queue = mock.MagicMock()
+        d._transfer_queue.get.side_effect = [dd]
         patched_tc.side_effect = [False, True]
-        d._worker_thread_download()
+        d._worker_thread_transfer()
+        assert len(d._disk_set) == 1
+        a, b, c = d._disk_queue.get()
+        d._process_data(a, b, c)
         assert d._crypto_offload.add_decrypt_chunk.call_count == 1
         assert dd.write_unchecked_hmac_data.call_count == 1
 
@@ -589,6 +662,8 @@ def test_worker_thread_download(
         d = ops.Downloader(
             mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
         d._general_options.concurrency.crypto_processes = 0
+        d._general_options.concurrency.transfer_threads = 1
+        d._general_options.concurrency.disk_threads = 1
         opts = mock.MagicMock()
         opts.check_file_md5 = False
         opts.chunk_size_bytes = 16
@@ -598,19 +673,28 @@ def test_worker_thread_download(
         ase._encryption = mock.MagicMock()
         ase._encryption.symmetric_key = b'abc'
         ase._encryption.content_encryption_iv = b'0' * 16
+        ase._client = mock.MagicMock()
+        ase._client.primary_endpoint = 'ep'
+        ase._name = 'name'
+        ase._vio = None
+        key = ops.Downloader.create_unique_transfer_operation_id(ase)
         patched_gfr.return_value = b'0' * ase._size
         lp = pathlib.Path(str(tmpdir.join('d')))
         dd = models.Descriptor(lp, ase, opts, None)
         dd.next_offsets()
         dd.write_unchecked_hmac_data = mock.MagicMock()
         dd.perform_chunked_integrity_check = mock.MagicMock()
+        dd.mark_unchecked_chunk_decrypted = mock.MagicMock()
         patched_acdd.return_value = b'0' * 16
         d._dd_map[str(lp)] = mock.MagicMock()
-        d._download_set.add(lp)
-        d._download_queue = mock.MagicMock()
-        d._download_queue.get.side_effect = [dd]
+        d._transfer_set.add(key)
+        d._transfer_queue = mock.MagicMock()
+        d._transfer_queue.get.side_effect = [dd, dd]
         patched_tc.side_effect = [False, True]
-        d._worker_thread_download()
+        d._worker_thread_transfer()
+        assert len(d._disk_set) == 1
+        a, b, c = d._disk_queue.get()
+        d._process_data(a, b, c)
         assert patched_acdd.call_count == 1
         assert dd.write_unchecked_hmac_data.call_count == 1
         assert dd.perform_chunked_integrity_check.call_count == 1
@@ -631,7 +715,7 @@ def test_cleanup_temporary_files(tmpdir):
     d._general_options.resume_file = pathlib.Path('abc')
     d._dd_map[0] = dd
     d._cleanup_temporary_files()
-    assert dd.local_path.exists()
+    assert dd.final_path.exists()
 
     lp = pathlib.Path(str(tmpdir.join('b')))
     opts = mock.MagicMock()
@@ -645,7 +729,7 @@ def test_cleanup_temporary_files(tmpdir):
     d._general_options.resume_file = None
     d._dd_map[0] = dd
     d._cleanup_temporary_files()
-    assert not dd.local_path.exists()
+    assert not dd.final_path.exists()
 
     lp = pathlib.Path(str(tmpdir.join('c')))
     opts = mock.MagicMock()
@@ -661,7 +745,7 @@ def test_cleanup_temporary_files(tmpdir):
     d._general_options.resume_file = None
     d._dd_map[0] = dd
     d._cleanup_temporary_files()
-    assert dd.local_path.exists()
+    assert dd.final_path.exists()
 
 
 def test_catalog_local_files_for_deletion(tmpdir):
@@ -699,21 +783,16 @@ def test_delete_extraneous_files(tmpdir):
     d._delete_extraneous_files()
 
 
-@mock.patch('blobxfer.operations.md5.LocalFileMd5Offload')
-@mock.patch('blobxfer.operations.azure.blob.list_blobs')
-@mock.patch(
-    'blobxfer.operations.download.Downloader.ensure_local_destination',
-    return_value=True
-)
-def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
+def _create_downloader_for_start(td):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._cleanup_temporary_files = mock.MagicMock()
     d._download_start = datetime.datetime.now(tz=dateutil.tz.tzlocal())
-    d._initialize_download_threads = mock.MagicMock()
-    patched_lfmo._check_thread = mock.MagicMock()
-    d._general_options.concurrency.crypto_processes = 1
+    d._initialize_transfer_threads = mock.MagicMock()
+    d._general_options.concurrency.crypto_processes = 0
     d._general_options.concurrency.md5_processes = 1
-    d._general_options.resume_file = pathlib.Path(str(tmpdir.join('rf')))
+    d._general_options.concurrency.disk_threads = 1
+    d._general_options.concurrency.transfer_threads = 1
+    d._general_options.resume_file = pathlib.Path(str(td.join('rf')))
     d._spec.sources = []
     d._spec.options = mock.MagicMock()
     d._spec.options.chunk_size_bytes = 1
@@ -725,50 +804,84 @@ def test_start(patched_eld, patched_lb, patched_lfmo, tmpdir):
     d._spec.skip_on.lmt_ge = False
     d._spec.skip_on.filesize_match = False
     d._spec.destination = mock.MagicMock()
-    d._spec.destination.path = pathlib.Path(str(tmpdir))
+    d._spec.destination.path = pathlib.Path(str(td))
     d._download_start_time = util.datetime_now()
+    d._pre_md5_skip_on_check = mock.MagicMock()
+    d._check_download_conditions = mock.MagicMock()
+    d._all_remote_files_processed = False
 
     p = '/cont/remote/path'
     asp = azops.SourcePath()
     asp.add_path_with_storage_account(p, 'sa')
     d._spec.sources.append(asp)
 
-    b = azure.storage.blob.models.Blob(name='name')
+    return d
+
+
+@mock.patch('blobxfer.operations.md5.LocalFileMd5Offload')
+@mock.patch('blobxfer.operations.azure.blob.list_blobs')
+@mock.patch(
+    'blobxfer.operations.download.Downloader.ensure_local_destination',
+    return_value=True
+)
+@mock.patch(
+    'blobxfer.operations.download.Downloader.'
+    'create_unique_transfer_operation_id',
+    return_value='id'
+)
+@mock.patch(
+    'blobxfer.operations.download.Downloader._wait_for_transfer_threads',
+    return_value=None
+)
+@mock.patch(
+    'blobxfer.operations.download.Downloader._wait_for_disk_threads',
+    return_value=None
+)
+def test_start(
+        patched_wdt, patched_wtt, patched_cutoi, patched_eld, patched_lb,
+        patched_lfmo, tmpdir):
+    patched_lfmo._check_thread = mock.MagicMock()
+
+    b = azure.storage.blob.models.Blob(name='remote/path/name')
     b.properties.content_length = 1
     patched_lb.side_effect = [[b]]
-    d._pre_md5_skip_on_check = mock.MagicMock()
-    d._check_download_conditions = mock.MagicMock()
+    d = _create_downloader_for_start(tmpdir)
     d._check_download_conditions.return_value = ops.DownloadAction.Skip
+    d._download_sofar = -1
+    d._download_bytes_sofar = -1
     d.start()
     assert d._pre_md5_skip_on_check.call_count == 0
 
     patched_lb.side_effect = [[b]]
-    d._all_remote_files_processed = False
+    d = _create_downloader_for_start(tmpdir)
     d._check_download_conditions.return_value = ops.DownloadAction.CheckMd5
+    d._download_sofar = -1
     with pytest.raises(RuntimeError):
         d.start()
+    d._download_terminate = True
     assert d._pre_md5_skip_on_check.call_count == 1
 
     b.properties.content_length = 0
     patched_lb.side_effect = [[b]]
-    d._all_remote_files_processed = False
+    d = _create_downloader_for_start(tmpdir)
     d._check_download_conditions.return_value = ops.DownloadAction.Download
     with pytest.raises(RuntimeError):
         d.start()
-    assert d._download_queue.qsize() == 1
+    d._download_terminate = True
+    assert d._transfer_queue.qsize() == 1
 
     # test exception count
     b = azure.storage.blob.models.Blob(name='name')
     b.properties.content_length = 1
     patched_lb.side_effect = [[b]]
+    d = _create_downloader_for_start(tmpdir)
     d._spec.destination.is_dir = False
     d._spec.options.rename = True
-    d._pre_md5_skip_on_check = mock.MagicMock()
-    d._check_download_conditions = mock.MagicMock()
     d._check_download_conditions.return_value = ops.DownloadAction.Skip
     d._exceptions = [RuntimeError('oops')]
     with pytest.raises(RuntimeError):
         d.start()
+    d._download_terminate = True
     assert d._pre_md5_skip_on_check.call_count == 0
 
 
@@ -776,11 +889,11 @@ def test_start_keyboard_interrupt():
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
     d._general_options.resume_file = None
     d._run = mock.MagicMock(side_effect=KeyboardInterrupt)
-    d._wait_for_download_threads = mock.MagicMock()
+    d._wait_for_transfer_threads = mock.MagicMock()
     d._cleanup_temporary_files = mock.MagicMock()
     d._md5_offload = mock.MagicMock()
 
     with pytest.raises(KeyboardInterrupt):
         d.start()
-    assert d._wait_for_download_threads.call_count == 1
+    assert d._wait_for_transfer_threads.call_count == 1
     assert d._cleanup_temporary_files.call_count == 1
diff --git a/tests/test_blobxfer_operations_md5.py b/tests/test_blobxfer_operations_md5.py
index 5bd7b20..02be647 100644
--- a/tests/test_blobxfer_operations_md5.py
+++ b/tests/test_blobxfer_operations_md5.py
@@ -57,6 +57,8 @@ def test_finalize_md5_processes():
 def test_from_add_to_done_non_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
+    fpath = str(file)
+    key = 'key'
 
     remote_md5 = ops.compute_md5_for_file_asbase64(str(file))
 
@@ -67,7 +69,7 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
         assert result is None
 
         a.add_localfile_for_md5_check(
-            str(file), remote_md5, azmodels.StorageModes.Block)
+            key, fpath, fpath, remote_md5, azmodels.StorageModes.Block, None)
         i = 33
         checked = False
         while i > 0:
@@ -76,9 +78,11 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
                 time.sleep(0.3)
                 i -= 1
                 continue
-            assert len(result) == 2
-            assert result[0] == str(file)
-            assert result[1]
+            assert len(result) == 4
+            assert result[0] == key
+            assert result[1] == str(file)
+            assert result[2] is None
+            assert result[3]
             checked = True
             break
         assert checked
@@ -90,6 +94,8 @@ def test_from_add_to_done_non_pagealigned(tmpdir):
 def test_from_add_to_done_pagealigned(tmpdir):
     file = tmpdir.join('a')
     file.write('abc')
+    fpath = str(file)
+    key = 'key'
 
     remote_md5 = ops.compute_md5_for_file_asbase64(str(file), True)
 
@@ -100,7 +106,7 @@ def test_from_add_to_done_pagealigned(tmpdir):
         assert result is None
 
         a.add_localfile_for_md5_check(
-            str(file), remote_md5, azmodels.StorageModes.Page)
+            key, fpath, fpath, remote_md5, azmodels.StorageModes.Page, None)
         i = 33
         checked = False
         while i > 0:
@@ -109,9 +115,11 @@ def test_from_add_to_done_pagealigned(tmpdir):
                 time.sleep(0.3)
                 i -= 1
                 continue
-            assert len(result) == 2
-            assert result[0] == str(file)
-            assert result[1]
+            assert len(result) == 4
+            assert result[0] == key
+            assert result[1] == str(file)
+            assert result[2] is None
+            assert result[3]
             checked = True
             break
         assert checked
diff --git a/tests/test_blobxfer_operations_progress.py b/tests/test_blobxfer_operations_progress.py
index 75f9f79..721501e 100644
--- a/tests/test_blobxfer_operations_progress.py
+++ b/tests/test_blobxfer_operations_progress.py
@@ -13,12 +13,12 @@
 import blobxfer.operations.progress as ops
 
 
-def test_output_download_parameters():
+def test_output_parameters():
     go = mock.MagicMock()
     spec = mock.MagicMock()
     go.log_file = 'abc'
 
-    ops.output_download_parameters(go, spec)
+    ops.output_parameters(go, spec)
 
     assert util.is_not_empty(go.log_file)
 
diff --git a/tests/test_blobxfer_operations_resume.py b/tests/test_blobxfer_operations_resume.py
index 52f11b8..9894d3b 100644
--- a/tests/test_blobxfer_operations_resume.py
+++ b/tests/test_blobxfer_operations_resume.py
@@ -2,6 +2,10 @@
 """Tests for operations resume"""
 
 # stdlib imports
+try:
+    import unittest.mock as mock
+except ImportError:  # noqa
+    import mock
 try:
     import pathlib2 as pathlib
 except ImportError:  # noqa
@@ -23,23 +27,28 @@ def test_download_resume_manager(tmpdir):
     assert drm._data is None
     assert not tmpdb.exists()
 
+    ase = mock.MagicMock()
+    ase._name = 'name'
+    ase._client.primary_endpoint = 'ep'
+    ase._size = 16
+
     final_path = 'fp'
     drm = ops.DownloadResumeManager(tmpdb)
-    drm.add_or_update_record(final_path, 'tp', 1, 2, 0, False, None)
-    d = drm.get_record(final_path)
+    drm.add_or_update_record(final_path, ase, 2, 0, False, None)
+    d = drm.get_record(ase)
 
     assert d.final_path == final_path
 
-    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, False, 'abc')
-    d = drm.get_record(final_path)
+    drm.add_or_update_record(final_path, ase, 2, 1, False, 'abc')
+    d = drm.get_record(ase)
 
     assert d.final_path == final_path
     assert not d.completed
     assert d.next_integrity_chunk == 1
     assert d.md5hexdigest == 'abc'
 
-    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, True, None)
-    d = drm.get_record(final_path)
+    drm.add_or_update_record(final_path, ase, 2, 1, True, None)
+    d = drm.get_record(ase)
 
     assert d.final_path == final_path
     assert d.completed
@@ -47,8 +56,8 @@ def test_download_resume_manager(tmpdir):
     assert d.md5hexdigest == 'abc'
 
     # idempotent check after completed
-    drm.add_or_update_record(final_path, 'tp', 1, 2, 1, True, None)
-    d = drm.get_record(final_path)
+    drm.add_or_update_record(final_path, ase, 2, 1, True, None)
+    d = drm.get_record(ase)
 
     assert d.final_path == final_path
     assert d.completed
diff --git a/tests/test_blobxfer_retry.py b/tests/test_blobxfer_retry.py
index 9d84b90..d44fa21 100644
--- a/tests/test_blobxfer_retry.py
+++ b/tests/test_blobxfer_retry.py
@@ -17,26 +17,34 @@ def test_exponentialretrywithmaxwait():
         er = retry.ExponentialRetryWithMaxWait(
             initial_backoff=1, max_backoff=0)
 
+    with pytest.raises(ValueError):
+        er = retry.ExponentialRetryWithMaxWait(
+            initial_backoff=1, max_backoff=1, max_retries=-1)
+
+    with pytest.raises(ValueError):
+        er = retry.ExponentialRetryWithMaxWait(
+            initial_backoff=2, max_backoff=1)
+
     er = retry.ExponentialRetryWithMaxWait()
     context = mock.MagicMock()
     context.count = 0
     context.response.status = 500
     bo = er.retry(context)
     assert context.count == 1
-    assert bo == 1
+    assert bo == 0.1
 
     bo = er.retry(context)
     assert context.count == 2
-    assert bo == 2
+    assert bo == 0.2
 
     bo = er.retry(context)
     assert context.count == 3
-    assert bo == 4
+    assert bo == 0.4
 
     bo = er.retry(context)
     assert context.count == 4
-    assert bo == 8
+    assert bo == 0.8
 
     bo = er.retry(context)
-    assert context.count == 1
-    assert bo == 1
+    assert context.count == 5
+    assert bo == 0.1
diff --git a/tox.ini b/tox.ini
index 58a6df6..d05615f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,8 +4,8 @@ envlist = py27, py35
 [testenv]
 deps = -rtest_requirements.txt
 commands =
-  #flake8 {envsitepackagesdir}/blobxfer_cli/
-  #flake8 {envsitepackagesdir}/blobxfer/
+  flake8 {envsitepackagesdir}/blobxfer_cli/
+  flake8 {envsitepackagesdir}/blobxfer/
   py.test \
     -x -l -s \
     --ignore venv/ \

From 2999d0bc297faa68e2c64cb60abfb460a4ffa759 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 1 Jun 2017 08:21:11 -0700
Subject: [PATCH 41/47] Fix vectored replica mode

- Fix MD5 check condition
---
 blobxfer/models/metadata.py                | 20 +++++++++++-
 blobxfer/models/upload.py                  |  5 ++-
 blobxfer/operations/download.py            | 14 +++-----
 blobxfer/operations/upload.py              | 38 +++++++++++++---------
 tests/test_blobxfer_operations_download.py |  3 ++
 5 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py
index 7d5ea0d..ead4b79 100644
--- a/blobxfer/models/metadata.py
+++ b/blobxfer/models/metadata.py
@@ -88,6 +88,24 @@
 )
 
 
+def get_md5_from_metadata(ase):
+    # type: (blobxfer.models.azure.StorageEntity) -> str
+    """Get MD5 from properties or metadata
+    :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
+    :rtype: str or None
+    :return: md5
+    """
+    # if encryption metadata is present, check for pre-encryption
+    # md5 in blobxfer extensions
+    md5 = None
+    if ase.is_encrypted:
+        md5 = ase.encryption_metadata.blobxfer_extensions.\
+            pre_encrypted_content_md5
+    if blobxfer.util.is_none_or_empty(md5):
+        md5 = ase.md5
+    return md5
+
+
 def generate_fileattr_metadata(local_path, metadata):
     # type: (blobxfer.models.upload.LocalPath, dict) -> dict
     """Generate file attribute metadata dict
@@ -159,7 +177,7 @@ def restore_fileattr(path, metadata):
 
 
 def create_vectored_io_next_entry(ase):
-    # type: (blobxfer.models.upload.LocalPath) -> str
+    # type: (blobxfer.models.azure.StorageEntity) -> str
     """Create Vectored IO next entry id
     :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity
     :rtype: str
diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py
index d411bb0..d5151b4 100644
--- a/blobxfer/models/upload.py
+++ b/blobxfer/models/upload.py
@@ -361,7 +361,7 @@ def __init__(self, lpath, ase, uid, options, resume_mgr):
         self._total_chunks = self._compute_total_chunks(self._chunk_size)
         self._outstanding_ops = self._total_chunks
         if blobxfer.util.is_not_empty(self._ase.replica_targets):
-            self._outstanding_ops *= len(self._ase.replica_targets)
+            self._outstanding_ops *= len(self._ase.replica_targets) + 1
         if self._resume_mgr:
             self._completed_chunks = bitstring.BitArray(
                 length=self._total_chunks)
@@ -577,6 +577,9 @@ def _compute_remote_size(self):
         else:
             allocatesize = 0
         self._ase.size = allocatesize
+        if blobxfer.util.is_not_empty(self._ase.replica_targets):
+            for rt in self._ase.replica_targets:
+                rt.size = allocatesize
         logger.debug('remote size for {} is {} bytes'.format(
             self._ase.path, self._ase.size))
 
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index a369d0f..3cbef61 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -45,6 +45,7 @@
 # non-stdlib imports
 # local imports
 import blobxfer.models.crypto
+import blobxfer.models.metadata
 import blobxfer.operations.azure.blob
 import blobxfer.operations.azure.file
 import blobxfer.operations.crypto
@@ -237,8 +238,8 @@ def _check_download_conditions(self, lpath, rfile):
                     lpath, rfile.path))
             return DownloadAction.Skip
         # check skip on options, MD5 match takes priority
-        if (self._spec.skip_on.md5_match and
-                blobxfer.util.is_not_empty(rfile.md5)):
+        md5 = blobxfer.models.metadata.get_md5_from_metadata(rfile)
+        if self._spec.skip_on.md5_match and blobxfer.util.is_not_empty(md5):
             return DownloadAction.CheckMd5
         # if neither of the remaining skip on actions are activated, download
         if (not self._spec.skip_on.filesize_match and
@@ -277,14 +278,7 @@ def _pre_md5_skip_on_check(self, lpath, rfile):
         :param pathlib.Path lpath: local path
         :param blobxfer.models.azure.StorageEntity rfile: remote file
         """
-        # if encryption metadata is present, check for pre-encryption
-        # md5 in blobxfer extensions
-        md5 = None
-        if rfile.encryption_metadata is not None:
-            md5 = rfile.encryption_metadata.blobxfer_extensions.\
-                pre_encrypted_content_md5
-        if md5 is None:
-            md5 = rfile.md5
+        md5 = blobxfer.models.metadata.get_md5_from_metadata(rfile)
         key = blobxfer.operations.download.Downloader.\
             create_unique_transfer_operation_id(rfile)
         with self._md5_meta_lock:
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 02447c9..232e8ba 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -46,7 +46,11 @@
 # non-stdlib imports
 # local imports
 import blobxfer.models.crypto
+import blobxfer.models.metadata
 import blobxfer.operations.azure.blob
+import blobxfer.operations.azure.blob.append
+import blobxfer.operations.azure.blob.block
+import blobxfer.operations.azure.blob.page
 import blobxfer.operations.azure.file
 import blobxfer.operations.crypto
 import blobxfer.operations.md5
@@ -86,9 +90,9 @@ def __init__(self, general_options, creds, spec):
         self._upload_set = set()
         self._upload_start_time = None
         self._disk_threads = []
-        self._upload_total = None
+        self._upload_total = 0
         self._upload_sofar = 0
-        self._upload_bytes_total = None
+        self._upload_bytes_total = 0
         self._upload_bytes_sofar = 0
         self._upload_terminate = False
         self._transfer_lock = threading.Lock()
@@ -218,14 +222,7 @@ def _pre_md5_skip_on_check(self, src, rfile):
         :param blobxfer.models.upload.LocalPath src: local path
         :param blobxfer.models.azure.StorageEntity rfile: remote file
         """
-        # if encryption metadata is present, check for pre-encryption
-        # md5 in blobxfer extensions
-        md5 = None
-        if rfile.encryption_metadata is not None:
-            md5 = rfile.encryption_metadata.blobxfer_extensions.\
-                pre_encrypted_content_md5
-        if md5 is None:
-            md5 = rfile.md5
+        md5 = blobxfer.models.metadata.get_md5_from_metadata(rfile)
         key = blobxfer.operations.upload.Uploader.create_unique_id(src, rfile)
         with self._md5_meta_lock:
             self._md5_map[key] = (src, rfile)
@@ -786,8 +783,8 @@ def _check_upload_conditions(self, local_path, rfile):
                     rfile.path, lpath))
             return UploadAction.Skip
         # check skip on options, MD5 match takes priority
-        if (self._spec.skip_on.md5_match and
-                blobxfer.util.is_not_empty(rfile.md5)):
+        md5 = blobxfer.models.metadata.get_md5_from_metadata(rfile)
+        if self._spec.skip_on.md5_match and blobxfer.util.is_not_empty(md5):
             return UploadAction.CheckMd5
         # if neither of the remaining skip on actions are activated, upload
         if (not self._spec.skip_on.filesize_match and
@@ -991,7 +988,17 @@ def _vectorize_and_bind(self, local_path, dest):
                             yield action, local_path, ase
                     else:
                         primary_ase = dst[0]
+                        if primary_ase.replica_targets is None:
+                            primary_ase.replica_targets = []
                         primary_ase.replica_targets.extend(dst[1:])
+                        # add replica targets to deletion exclusion set
+                        if self._spec.options.delete_extraneous_destination:
+                            for rt in primary_ase.replica_targets:
+                                self._delete_exclude.add(
+                                    blobxfer.operations.upload.Uploader.
+                                    create_deletion_id(
+                                        rt._client, rt.container, rt.name)
+                                )
                         yield action, local_path, primary_ase
         else:
             for _, ase in dest:
@@ -1019,7 +1026,8 @@ def _run(self):
             self._md5_offload.initialize_check_thread(
                 self._check_for_uploads_from_md5)
         # initialize crypto processes
-        if self._general_options.concurrency.crypto_processes > 0:
+        if (self._spec.options.rsa_public_key is not None and
+                self._general_options.concurrency.crypto_processes > 0):
             logger.warning(
                 'crypto offload for upload is not possible due to '
                 'sequential nature of {} and FullBlob encryption mode'.format(
@@ -1033,8 +1041,6 @@ def _run(self):
         skipped_files = 0
         skipped_size = 0
         approx_total_bytes = 0
-        self._upload_total = 0
-        self._upload_bytes_total = 0
         if not self._spec.sources.can_rename() and self._spec.options.rename:
             raise RuntimeError(
                 'cannot rename to specified destination with multiple sources')
@@ -1056,6 +1062,8 @@ def _run(self):
                     skipped_size += ase.size if ase.size is not None else 0
                     continue
                 approx_total_bytes += lp.size
+                if blobxfer.util.is_not_empty(ase.replica_targets):
+                    approx_total_bytes += lp.size * len(ase.replica_targets)
                 # add to potential upload set
                 uid = blobxfer.operations.upload.Uploader.create_unique_id(
                     lp, ase)
diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py
index fcc2865..90b83fe 100644
--- a/tests/test_blobxfer_operations_download.py
+++ b/tests/test_blobxfer_operations_download.py
@@ -189,6 +189,7 @@ def test_check_download_conditions(tmpdir):
     d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds)
     rfile = mock.MagicMock()
     rfile.md5 = 'abc'
+    rfile._encryption = None
     result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.CheckMd5
 
@@ -238,6 +239,7 @@ def test_check_download_conditions(tmpdir):
     rfile = azmodels.StorageEntity('cont')
     rfile._size = util.page_align_content_length(ep.stat().st_size)
     rfile._mode = azmodels.StorageModes.Page
+    rfile._encryption = None
     result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.Skip
 
@@ -269,6 +271,7 @@ def test_check_download_conditions(tmpdir):
     rfile = azmodels.StorageEntity('cont')
     rfile._lmt = datetime.datetime.now(dateutil.tz.tzutc()) + \
         datetime.timedelta(days=1)
+    rfile._encryption = None
     result = d._check_download_conditions(ep, rfile)
     assert result == ops.DownloadAction.Download
 

From 6267551cc7e96b06d0c8e59559d9621601832faa Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 1 Jun 2017 13:02:31 -0700
Subject: [PATCH 42/47] Add base documentation

- Update README
- Rev version to 1.0.0a2
---
 CHANGELOG.md                          | 93 ++++++++++++++++-----------
 README.md                             | 57 ++++++++++++++--
 blobxfer/version.py                   |  2 +-
 cli/cli.py                            |  4 +-
 docs/01-installation.md               | 72 +++++++++++++++++++++
 docs/10-cli-usage.md                  | 30 +++++++++
 docs/20-yaml-configuration.md         |  3 +
 docs/30-vectored-io.md                |  3 +
 docs/40-client-side-encryption.md     | 29 +++++++++
 docs/80-blobxfer-python-library.md    |  3 +
 docs/98-performance-considerations.md | 68 ++++++++++++++++++++
 docs/99-current-limitations.md        | 24 +++++++
 docs/index.md                         | 16 +++++
 13 files changed, 356 insertions(+), 48 deletions(-)
 create mode 100644 docs/01-installation.md
 create mode 100644 docs/10-cli-usage.md
 create mode 100644 docs/20-yaml-configuration.md
 create mode 100644 docs/30-vectored-io.md
 create mode 100644 docs/40-client-side-encryption.md
 create mode 100644 docs/80-blobxfer-python-library.md
 create mode 100644 docs/98-performance-considerations.md
 create mode 100644 docs/99-current-limitations.md
 create mode 100644 docs/index.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d1e4233..3abb9b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,158 +2,172 @@
 
 ## [Unreleased]
 
+## [1.0.0a2] - 2017-06-XX
+### Changed
+- From scratch rewrite providing a consistent CLI experience and a vast
+array of new and advanced features. Please see the
+[1.0.0 Milestone](https://github.com/Azure/blobxfer/milestone/1) for a
+catalog of changes.
+- All dependencies updated to latest
+
+### Removed
+- Azure Service Management certificate support
+
+### Security
+- Update cryptography requirement to 1.9
+
 ## [0.12.1] - 2016-12-09
-#### Changed
+### Changed
 - Update all dependencies to latest versions
 
-#### Fixed
+### Fixed
 - Allow page blobs up to 1TB
 
-#### Security
+### Security
 - Update cryptography requirement to 1.6
 
 ## [0.12.0] - 2016-10-17
-#### Added
+### Added
 - Support for Account-level SAS keys
 - Update README regarding non-normalized exceptions being thrown (#5)
 
 ## [0.11.5] - 2016-10-03
-#### Changed
+### Changed
 - Update all dependencies to latest versions
 
-#### Fixed
+### Fixed
 - Fix incorrect fileshare path splitting (#3)
 
-#### Security
+### Security
 - Update cryptography requirement to 1.5.2
 
 ## [0.11.4] - 2016-09-12
-#### Added
+### Added
 - Created [Docker image](https://hub.docker.com/r/alfpark/blobxfer)
 
-#### Changed
+### Changed
 - Update all dependencies to latest versions
 
-#### Fixed
+### Fixed
 - Fix `--delete` and blob listing with azure-storage (#1)
 
-#### Security
+### Security
 - Update cryptography requirement to 1.5
 
 ## [0.11.2] - 2016-07-28
-#### Added
+### Added
 - Allow rsakeypassphrase to be passed as an environment variable
 
 ## 0.11.1 - 2016-07-05
-#### Added
+### Added
 - Allow storage account or sas key credentials to be passed as
   environment variables
 
 ## 0.11.0 - 2016-06-09
-#### Added
+### Added
 - Azure Files support, please refer to the General Notes section for
   limitations
 
-#### Changed
+### Changed
 - `--blobep` option has been renamed to `--endpoint`
 
 ## 0.10.1 - 2016-06-06
-#### Changed
+### Changed
 - Update all dependencies to latest versions
 - Add flag for block/page level md5 computation which is now disabled by
   default
 
-#### Fixed
+### Fixed
 - Update against breaking changes from azure-storage 0.32.0
 
-#### Removed
+### Removed
 - Remove RC designation from encryption/decryption functionality
 
-#### Security
+### Security
 - Update cryptography requirement to 1.4
 
 ## 0.10.0 - 2016-03-22
-#### Added
+### Added
 - Added ``--disable-urllib-warnings`` option to suppress urllib3 warnings
   (use with care)
 
-#### Changed
+### Changed
 - Update script for compatibility with azure-storage 0.30.0 which
   is now a required dependency
 - Promote encryption to RC status
 - `--blobep` now refers to endpoint suffix rather than blob endpoint
   (e.g., core.windows.net rather than blob.core.windows.net)
 
-#### Security
+### Security
 - Update cryptography requirement to 1.3
 
 ## 0.9.9.11 - 2016-02-22
-#### Changed
+### Changed
 - Pin azure dependencies due to breaking changes
 
-#### Fixed
+### Fixed
 - Minor bug fixes
 
-#### Security
+### Security
 - Update cryptography requirement to 1.2.2
 
 ## 0.9.9.10 - 2016-01-31
-#### Fixed
+### Fixed
 - Fix regression in blob name encoding with Python3
 
 ## 0.9.9.9 - 2016-01-29
-#### Added
+### Added
 - Emit warning when attempting to use remoteresource with a directory upload
 
-#### Changed
+### Changed
 - Update setup.py dependencies to latest available versions
 
-#### Fixed
+### Fixed
 - Fix regression in single file upload and remoteresource renaming
 - Replace socket exception handling with requests ConnectionError handling
 - Properly handle blob names containing `?` if using SAS
 
 ## 0.9.9.8 - 2016-01-06
-#### Fixed
+### Fixed
 - Disable unnecessary thread daemonization
 - Gracefully handle KeyboardInterrupts
 - Explicitly add azure-common to setup.py install reqs
 
 ## 0.9.9.7 - 2016-01-05
-#### Added
+### Added
 - Add python environment and package info to parameter dump to aid issue/bug
   reports
 
-#### Changed
+### Changed
 - Reduce number of default concurrent workers to 3x CPU count
 - Change azure\_request backoff mechanism
 
-#### Fixed
+### Fixed
 - Make base requirements non-optional in import process
 - Update azure\_request exception handling to support new Azure Storage Python
   SDK errors
 
 ## 0.9.9.6 - 2016-01-04
-#### Added
+### Added
 - Encryption support
 - No file overwrite on download option
 - Auto-detection of file mimetype
 - Remote delete option
 - Include pattern option
 
-#### Changed
+### Changed
 - Replace keeprootdir with strip-components option
 - Reduce the number of default concurrent workers to 4x CPU count
 
-#### Fixed
+### Fixed
 - Fix shared key upload with non-existent container
 - Fix zero-byte blob download issue
 
 ## 0.9.9.5 - 2015-09-27
-#### Added
+### Added
 - File collation support
 
-#### Fixed
+### Fixed
 - Fix page alignment bug
 - Reduce memory usage
 
@@ -183,7 +197,8 @@
   `--no-skiponmatch`.
 - 0.8.2: performance regression fixes
 
-[Unreleased]: https://github.com/Azure/blobxfer/compare/0.12.1...HEAD
+[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a2...HEAD
+[1.0.0a2]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a2
 [0.12.1]: https://github.com/Azure/blobxfer/compare/0.12.0...0.12.1
 [0.12.0]: https://github.com/Azure/blobxfer/compare/0.11.5...0.12.0
 [0.11.5]: https://github.com/Azure/blobxfer/compare/0.11.4...0.11.5
diff --git a/README.md b/README.md
index 6853cb4..a2cf2b9 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,57 @@
-blobxfer
-========
+[![Build Status](https://travis-ci.org/Azure/blobxfer.svg?branch=master)](https://travis-ci.org/Azure/blobxfer)
+[![Coverage Status](https://coveralls.io/repos/github/Azure/blobxfer/badge.svg?branch=master)](https://coveralls.io/github/Azure/blobxfer?branch=master)
+[![PyPI](https://img.shields.io/pypi/v/blobxfer.svg)](https://pypi.python.org/pypi/blobxfer)
+[![PyPI](https://img.shields.io/pypi/pyversions/blobxfer.svg)](https://pypi.python.org/pypi/blobxfer)
+[![Docker Pulls](https://img.shields.io/docker/pulls/alfpark/blobxfer.svg)](https://hub.docker.com/r/alfpark/blobxfer)
+[![Image Layers](https://images.microbadger.com/badges/image/alfpark/blobxfer:latest.svg)](http://microbadger.com/images/alfpark/blobxfer)
 
-AzCopy-like OS independent Azure storage blob and file share transfer tool
+# blobxfer
+`blobxfer` is an advanced data movement tool and library for Azure Storage
+Blob and Files. With `blobxfer` you can copy your files into or out of Azure
+Storage with the CLI or integrate the `blobxfer` data movement library into
+your own Python scripts.
 
-Change Log
-----------
+## Major Features
+* Command-line interface (CLI) providing data movement capability to and
+from Azure Blob and File Storage
+* High-performance design with asynchronous transfers and disk I/O
+* YAML configuration driven execution support
+* Resume support
+* Vectored IO
+  * `stripe` mode allows striping a single file across multiple blobs to
+    break through single blob or fileshare throughput limits including
+    multi-storage account destinations
+  * `replica` mode allows replication of a file across multiple locations
+    including multi-storage account destinations
+* Client-side encryption support
+* Advanced skip options for rsync-like operations
+* Store/restore POSIX filemode and uid/gid
+* `stdin` piping support
+* Append blob support
+* Configurable one-shot block upload support
+* Block (chunk) size selection support
+* Rsync-like `--delete-after` support
+* Support for reading from blob snapshots
+* Automatic block blob size adjustment for uploading
+* Automatic uploading of VHD and VHDX files as page blobs
+* Wildcard filtering with include and exclude support
+* No clobber support in either direction
+* File logging support
 
-See the [CHANGELOG.md](https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md) file.
+## Installation
+`blobxfer` is on [PyPI](https://pypi.python.org/pypi/blobxfer) and on
+[Docker Hub](https://hub.docker.com/r/alfpark/blobxfer/). Please refer to
+the [installation guide](https://github.com/Azure/blobxfer/blob/master/docs/01-installation.md)
+on how to install `blobxfer`.
+
+## Documentation
+Please refer to the [blobxfer Documentation](https://github.com/Azure/blobxfer/blob/master/docs)
+for more details and usage information.
+
+## Change Log
+For recent changes, please refer to the
+[CHANGELOG.md](https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md)
+file.
 
 ------------------------------------------------------------------------
 
diff --git a/blobxfer/version.py b/blobxfer/version.py
index 9e8b65b..6280e0b 100644
--- a/blobxfer/version.py
+++ b/blobxfer/version.py
@@ -22,4 +22,4 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-__version__ = '1.0.0a1'
+__version__ = '1.0.0a2'
diff --git a/cli/cli.py b/cli/cli.py
index ec4f3f9..9b8c788 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -771,12 +771,12 @@ def synccopy(
         ctx, local_resource, storage_account, remote_path,
         sync_copy_dest_storage_account, sync_copy_dest_remote_path):
     """Synchronously copy blobs between Azure Storage accounts"""
+    raise NotImplementedError()
     settings.add_cli_options(
         ctx.cli_options, settings.TransferAction.Synccopy, local_resource,
         storage_account, remote_path, sync_copy_dest_storage_account,
         sync_copy_dest_remote_path)
     ctx.initialize()
-    raise NotImplementedError()
 
 
 @cli.command('upload')
@@ -827,10 +827,10 @@ def useconfig_download(ctx, config):
 def useconfig_synccopy(ctx, config):
     """Synchronously copy blobs between Azure Storage accounts via yaml
     configuration"""
+    raise NotImplementedError()
     settings.add_cli_options(
         ctx.cli_options, settings.TransferAction.Synccopy, None, None, None)
     ctx.initialize()
-    raise NotImplementedError()
 
 
 @useconfig.command('upload')
diff --git a/docs/01-installation.md b/docs/01-installation.md
new file mode 100644
index 0000000..cffe6c6
--- /dev/null
+++ b/docs/01-installation.md
@@ -0,0 +1,72 @@
+# blobxfer Installation
+`blobxfer` is a pure Python package, however, some dependencies require a C
+compiler and supporting libraries if there is no binary wheel. Please follow
+the pre-requisites section first prior to invoking installation via `pip`.
+Alternatively, you can use the
+[blobxfer Docker image](https://hub.docker.com/r/alfpark/blobxfer/).
+
+## Pre-requisites
+`blobxfer` depends on `cryptography` and `ruamel.yaml` which require a
+C compiler if your platform does not have a pre-made binary wheel. Please
+follow the instructions below for your platform.
+
+### Ubuntu
+```shell
+apt-get update
+# for Python3 (recommended)
+apt-get install -y build-essential libssl-dev libffi-dev python3-dev python3-pip
+# for Python2
+apt-get install -y build-essential libssl-dev libffi-dev python-dev python-pip
+```
+
+### CentOS/RHEL
+```shell
+# for Python2
+yum install -y gcc openssl-dev libffi-devel python-devel
+curl -fSsL https://bootstrap.pypa.io/get-pip.py | python
+```
+
+### SLES/OpenSUSE
+```shell
+zypper ref
+# for Python2
+zypper -n in gcc libopenssl-devel libffi48-devel python-devel
+curl -fSsL https://bootstrap.pypa.io/get-pip.py | python
+```
+
+## Installation via `pip`
+[blobxfer](https://pypi.python.org/pypi/blobxfer) is on PyPI and can be
+installed via:
+
+```shell
+# for Python2
+pip install blobxfer
+# for Python3
+pip3 instlal blobxfer
+```
+
+`blobxfer` is compatible with Python 2.7 and 3.3+. To install for Python 3
+(which is recommended), some distributions may use `pip3` instead of `pip`.
+Installing into your user area via `--user` or via a virtual environment
+is recommended to avoid installation issues with system-wide Python
+packages.
+
+## Installation via Docker
+[blobxfer](https://hub.docker.com/r/alfpark/blobxfer/) is also on Docker
+Hub and can be retrieved via:
+
+```shell
+docker pull alfpark/blobxfer
+```
+
+## Troubleshooting
+#### `azure.storage` dependency not found
+If you get an error that `azure.storage` cannot be found or loaded this means
+that there was an issue installing this package with other `azure` packages
+that share the same base namespace. You can correct this by issuing:
+```shell
+# for Python2
+pip install azure-storage
+# for Python3
+pip3 install azure-storage
+```
diff --git a/docs/10-cli-usage.md b/docs/10-cli-usage.md
new file mode 100644
index 0000000..9c9f111
--- /dev/null
+++ b/docs/10-cli-usage.md
@@ -0,0 +1,30 @@
+# blobxfer Command-Line Usage
+
+## TODO
+
+
+### General Notes
+* `blobxfer` does not take any leases on blobs or containers. It is up to the
+user to ensure that blobs are not modified while download/uploads are being
+performed.
+* No validation is performed regarding container and file naming and length
+restrictions.
+* `blobxfer` will attempt to download from blob storage as-is. If the source
+filename is incompatible with the destination operating system, then failure
+may result.
+* When using SAS, the SAS key must be a container- or share-level SAS if
+performing recursive directory upload or container/file share download.
+* If uploading via service-level SAS keys, the container or file share must
+already be created in Azure storage prior to upload. Account-level SAS keys
+with the signed resource type of `c` (i.e., container-level permission) is
+required for to allow conatiner or file share creation.
+* When uploading files as page blobs, the content is page boundary
+byte-aligned. The MD5 for the blob is computed using the final aligned data
+if the source is not page boundary byte-aligned. This enables these page
+blobs or files to be skipped during subsequent download or upload with the
+appropriate `skip_on` option, respectively.
+* Globbing of wildcards must be disabled by your shell (or properly quoted)
+during invoking `blobxfer` such that include and exclude patterns can be
+read verbatim without the shell expanding the wildcards.
+* The `--delete` operates similarly to `--delete-after` in rsync. Please
+note that this option interacts with `--include` and `--exclude` filters.
diff --git a/docs/20-yaml-configuration.md b/docs/20-yaml-configuration.md
new file mode 100644
index 0000000..000e01f
--- /dev/null
+++ b/docs/20-yaml-configuration.md
@@ -0,0 +1,3 @@
+# blobxfer YAML Configuration
+
+## TODO
diff --git a/docs/30-vectored-io.md b/docs/30-vectored-io.md
new file mode 100644
index 0000000..0eb67fd
--- /dev/null
+++ b/docs/30-vectored-io.md
@@ -0,0 +1,3 @@
+# blobxfer Vectored IO
+
+## TODO
diff --git a/docs/40-client-side-encryption.md b/docs/40-client-side-encryption.md
new file mode 100644
index 0000000..e16d87a
--- /dev/null
+++ b/docs/40-client-side-encryption.md
@@ -0,0 +1,29 @@
+# blobxfer Client-side Encryption Notes
+Please read the following carefully regarding client-side encryption support
+in `blobxfer`. Additionally, current limitations for client-side encryption
+can be found [here](99-current-limitations.md).
+
+* Encryption is performed using AES256-CBC. MACs are generated using
+HMAC-SHA256.
+* All required information regarding the encryption process is stored on
+each blob's `encryptiondata` and `encryptiondata_authentication` metadata
+fields. These metadata entries are used on download to configure the proper
+download parameters for the decryption process as well as to authenticate
+the `encryptiondata` metadata and the encrypted entity. Encryption metadata
+set by `blobxfer` (or any Azure Storage SDK) should not be modified or
+the blob/file may be unrecoverable.
+* Keys for the AES256 block cipher are generated on a per-blob/file basis.
+These keys are encrypted using RSAES-OAEP and encoded in the metadata.
+* MD5 for both the pre-encrypted and encrypted version of the file is stored
+in the entity metadata, if enabled. `skip_on` options will still work
+transparently with encrypted blobs/files.
+* MAC integrity checks are preferred over MD5 to validate encrypted data.
+* Attempting to upload the same file that exists in Azure Storage, but the
+file in Azure Storage is not encrypted will not occur if any `skip_on` match
+condition succeeds. This behavior can be overridden by deleting the target
+file in Azure Storage or disabling the `skip_on` behavior.
+* Attempting to upload the same file as an encrypted blob with a different
+RSA key will not occur if the file content MD5 is the same. This behavior
+can be overridden by deleting the target file in Azure Storage or disabling
+the `skip_on` `md5_match` behavior.
+* Zero-byte files are not encrypted.
diff --git a/docs/80-blobxfer-python-library.md b/docs/80-blobxfer-python-library.md
new file mode 100644
index 0000000..e0d74a2
--- /dev/null
+++ b/docs/80-blobxfer-python-library.md
@@ -0,0 +1,3 @@
+# blobxfer Python Library
+
+## TODO
diff --git a/docs/98-performance-considerations.md b/docs/98-performance-considerations.md
new file mode 100644
index 0000000..6ade0a2
--- /dev/null
+++ b/docs/98-performance-considerations.md
@@ -0,0 +1,68 @@
+# blobxfer Performance Considerations
+Please read the following carefully regarding considerations that should
+be applied with regard to performance and `blobxfer`. Additionally,
+please review the
+[Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/)
+for an overview of general performance targets that apply to Azure Blobs
+and File shares.
+
+## Concurrency
+* `blobxfer` offers four concurrency knobs. Each one should be tuned for
+maximum performance according to your system and network characteristics.
+  1. MD5 processes: computing MD5 for potential omission from transfer due
+     to `skip_on` `md5_match` being specified are offloaded to the specified
+     number of processors.
+  2. Crypto processes: decrypting encrypted blobs and files can be offloaded
+     to the specified number of processors. Due to the inherent
+     non-parallelizable encryption algorithm used, this is ignored for
+     encryption (uploads).
+  3. Disk threads: concurrency in reading (uploads) and writing (downloads) to
+     disk is controlled by the number of disk threads.
+  4. Transfer threads: concurrency in the number of threads from/to Azure
+     Storage is controlled by the number of transfer threads.
+* The thread concurrency options (disk and transfer) can be set to a
+non-positive number to be automatically set as a multiple of the number of
+cores available on the machine.
+
+## Azure File Share Performance
+File share performance can be "slow" or become a bottleneck, especially for
+file shares containing thousands of files as multiple REST calls must be
+performed for each file. Currently, a single file share has a limit of up
+to 60 MB/s and 1000 8KB IOPS. Please refer to the
+[Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/)
+for performance targets and limits regarding Azure Storage File shares.
+If scalable high performance is required, consider using Blob storage
+instead.
+
+## MD5 Hashing
+MD5 hashing will impose some performance penalties to check if the file
+should be uploaded or downloaded. For instance, if uploading and the local
+file is determined to be different than it's remote counterpart, then the
+time spent performing the MD5 comparison is lost.
+
+## Client-side Encryption
+Client-side encryption will naturally impose a performance penalty on
+`blobxfer` both for uploads (encrypting) and downloads (decrypting) depending
+upon the processor speed and number of cores available. Additionally, for
+uploads, encryption is not parallelizable and is in-lined with the main
+process.
+
+## pyOpenSSL
+As of requests 2.6.0 and Python versions < 2.7.9 (i.e., interpreter found on
+default Ubuntu 14.04 installations, 16.04 is not affected), if certain
+packages are installed, as those found in `requests[security]` then the
+underlying urllib3 package will utilize the `ndg-httpsclient` package which
+will use `pyOpenSSL`. This will ensure the peers are fully validated. However,
+this incurs a rather larger performance penalty. If you understand the
+potential security risks for disabling this behavior due to high performance
+requirements, you can either remove `ndg-httpsclient` or use `blobxfer` in a
+virtualenv environment without the `ndg-httpsclient` package. Python
+versions >= 2.7.9 are not affected by this issue.
+
+Additionally, `urllib3` (which `requests` uses) may use `pyOpenSSL` which
+may result in exceptions being thrown that are not normalized by `urllib3`.
+This may result in exceptions that should be retried, but are not. It is
+recommended to upgrade your Python where `pyOpenSSL` is not required for
+fully validating peers and such that `blobxfer` can operate without
+`pyOpenSSL` in a secure fashion. You can also run `blobxfer` via Docker
+or in a virtualenv environment without `pyOpenSSL`.
diff --git a/docs/99-current-limitations.md b/docs/99-current-limitations.md
new file mode 100644
index 0000000..9b8a3f7
--- /dev/null
+++ b/docs/99-current-limitations.md
@@ -0,0 +1,24 @@
+# blobxfer Current Limitations
+Please read this section carefully for any current known limitations to
+`blobxfer`.
+
+### Client-side Encryption
+* Client-side encryption is currently only available for block blobs and
+Azure Files.
+* `stdin` sources cannot be encrypted.
+* Azure KeyVault key references are currently not supported.
+
+### Platform-specific Issues
+* File attribute store/restore is not supported on Windows.
+
+### Resume Support
+* Encrypted uploads/downloads cannot currently be resumed as the Python
+SHA256 object cannot be pickled.
+* Append blobs currently cannot be resumed for upload.
+
+### Other Limitations
+* MD5 is not computed for append blobs.
+* Empty directories are not created locally when downloading from an Azure
+File share which has empty directories.
+* Empty directories are not deleted if `--delete` is specified and no files
+remain in the directory on the Azure File share.
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..b05fcc4
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,16 @@
+# blobxfer Documentation
+`blobxfer` is a transfer tool and library to move data between local file
+systems and Azure Storage. `blobxfer` command-line interface is powered by
+an advanced, high performance data movement library in Python with the same
+name. The `blobxfer` data movement library is built on the
+[Azure Storage Python SDK](https://github.com/Azure/azure-storage-python).
+Please refer to the following documents detailing the usage of `blobxfer`.
+
+1. [Installation](01-installation.md)
+2. [Command-Line Usage](10-cli-usage.md)
+3. [YAML Configuration](20-yaml-configuration.md)
+4. [Vectored IO](30-vectored-io.md)
+5. [Client-side Encryption](40-client-side-encryption.md)
+6. [blobxfer Data Movement Library](80-blobxfer-python-library.md)
+7. [Performance Considerations](98-performance-considerations.md)
+8. [Current Limitations](99-current-limitations.md)

From 0dca02ce83624743abf3a848a5a00209581a5374 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 1 Jun 2017 13:55:54 -0700
Subject: [PATCH 43/47] Doc updates

- Remove useconfig command and instead use --config option
---
 README.md                             |  19 +-
 cli/cli.py                            | 245 ++++++++++----------------
 cli/settings.py                       |  19 +-
 docs/01-installation.md               |   4 +-
 docs/10-cli-usage.md                  | 144 ++++++++++++++-
 docs/98-performance-considerations.md |  22 +--
 docs/{index.md => README.md}          |   0
 7 files changed, 268 insertions(+), 185 deletions(-)
 rename docs/{index.md => README.md} (100%)

diff --git a/README.md b/README.md
index a2cf2b9..ef6c462 100644
--- a/README.md
+++ b/README.md
@@ -14,27 +14,28 @@ your own Python scripts.
 ## Major Features
 * Command-line interface (CLI) providing data movement capability to and
 from Azure Blob and File Storage
+* Standalone library for integration with scripts or other Python packages
 * High-performance design with asynchronous transfers and disk I/O
 * YAML configuration driven execution support
 * Resume support
 * Vectored IO
-  * `stripe` mode allows striping a single file across multiple blobs to
-    break through single blob or fileshare throughput limits including
-    multi-storage account destinations
-  * `replica` mode allows replication of a file across multiple locations
-    including multi-storage account destinations
+  * `stripe` mode allows striping a single file across multiple blobs (even
+    to multiple storage accounts) to break through single blob or fileshare
+    throughput limits
+  * `replica` mode allows replication of a file across multiple destinations
+    including to multiple storage accounts
 * Client-side encryption support
 * Advanced skip options for rsync-like operations
 * Store/restore POSIX filemode and uid/gid
-* `stdin` piping support
+* Read/pipe from `stdin` support
 * Append blob support
 * Configurable one-shot block upload support
 * Block (chunk) size selection support
-* Rsync-like `--delete-after` support
+* Rsync-like delete support
 * Support for reading from blob snapshots
 * Automatic block blob size adjustment for uploading
-* Automatic uploading of VHD and VHDX files as page blobs
-* Wildcard filtering with include and exclude support
+* Automatic uploading of VHD/VHDX files as page blobs
+* Include and exclude filtering support
 * No clobber support in either direction
 * File logging support
 
diff --git a/cli/cli.py b/cli/cli.py
index 9b8c788..7f58c04 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -57,7 +57,6 @@ class CliContext(object):
     """CliContext class: holds context for CLI commands"""
     def __init__(self):
         """Ctor for CliContext"""
-        self.yaml_config = None
         self.config = {}
         self.cli_options = {}
         self.credentials = None
@@ -85,8 +84,8 @@ def _read_yaml_file(self, yaml_file):
                     f, Loader=ruamel.yaml.RoundTripLoader)
             else:
                 self.config = blobxfer.util.merge_dict(
-                    self.config, ruamel.yaml.load(
-                        f, Loader=ruamel.yaml.RoundTripLoader))
+                    ruamel.yaml.load(f, Loader=ruamel.yaml.RoundTripLoader),
+                    self.config)
 
     def _init_config(self):
         # type: (CliContext) -> None
@@ -94,9 +93,9 @@ def _init_config(self):
         :param CliContext self: this
         """
         # load yaml config file into memory
-        if blobxfer.util.is_not_empty(self.yaml_config):
-            self.yaml_config = pathlib.Path(self.yaml_config)
-            self._read_yaml_file(self.yaml_config)
+        if blobxfer.util.is_not_empty(self.cli_options['yaml_config']):
+            yaml_config = pathlib.Path(self.cli_options['yaml_config'])
+            self._read_yaml_file(yaml_config)
         else:
             # merge cli options with config
             settings.merge_settings(self.config, self.cli_options)
@@ -108,7 +107,6 @@ def _init_config(self):
             blobxfer.util.set_verbose_logger_handlers()
             logger.debug('config: \n' + json.dumps(self.config, indent=4))
         # free mem
-        del self.yaml_config
         del self.cli_options
 
 
@@ -116,6 +114,19 @@ def _init_config(self):
 pass_cli_context = click.make_pass_decorator(CliContext, ensure=True)
 
 
+def _config_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['yaml_config'] = value
+        return value
+    return click.option(
+        '--config',
+        expose_value=False,
+        help='YAML configuration file',
+        envvar='BLOBXFER_CONFIG_FILE',
+        callback=callback)(f)
+
+
 def _crypto_processes_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
@@ -237,81 +248,61 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
-def common_options(f):
-    f = _verbose_option(f)
-    f = _transfer_threads_option(f)
-    f = _timeout_option(f)
-    f = _resume_file_option(f)
-    f = _progress_bar_option(f)
-    f = _md5_processes_option(f)
-    f = _log_file_option(f)
-    f = _disk_threads_option(f)
-    f = _crypto_processes_option(f)
-    return f
-
-
-def _local_resource_argument(f):
+def _local_resource_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
-        clictx.local_resource = value
+        clictx.cli_options['local_resource'] = value
         return value
-    return click.argument(
-        'local-resource',
+    return click.option(
+        '--local-resource',
+        expose_value=False,
+        help='Local resource',
         callback=callback)(f)
 
 
-def _storage_account_argument(f):
+def _storage_account_name_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
         clictx.cli_options['storage_account'] = value
         return value
-    return click.argument(
-        'storage-account',
+    return click.option(
+        '--storage-account',
+        expose_value=False,
+        help='Storage account name',
+        envvar='BLOBXFER_STORAGE_ACCOUNT_NAME',
         callback=callback)(f)
 
 
-def _remote_path_argument(f):
+def _remote_path_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
         clictx.cli_options['remote_path'] = value
         return value
-    return click.argument(
-        'remote-path',
+    return click.option(
+        '--remote-path',
+        expose_value=False,
+        help='Remote path on Azure Storage',
         callback=callback)(f)
 
 
-def upload_download_arguments(f):
-    f = _remote_path_argument(f)
-    f = _storage_account_argument(f)
-    f = _local_resource_argument(f)
+def common_options(f):
+    f = _verbose_option(f)
+    f = _transfer_threads_option(f)
+    f = _timeout_option(f)
+    f = _resume_file_option(f)
+    f = _progress_bar_option(f)
+    f = _md5_processes_option(f)
+    f = _log_file_option(f)
+    f = _disk_threads_option(f)
+    f = _crypto_processes_option(f)
+    f = _config_option(f)
     return f
 
 
-def _sync_copy_dest_storage_account_argument(f):
-    def callback(ctx, param, value):
-        clictx = ctx.ensure_object(CliContext)
-        clictx.cli_options['sync_copy_dest_storage_account'] = value
-        return value
-    return click.argument(
-        'sync-copy-dest-storage-account',
-        callback=callback)(f)
-
-
-def _sync_copy_dest_remote_path_argument(f):
-    def callback(ctx, param, value):
-        clictx = ctx.ensure_object(CliContext)
-        clictx.cli_options['sync_copy_dest_remote_path'] = value
-        return value
-    return click.argument(
-        'sync-copy-dest-remote-path',
-        callback=callback)(f)
-
-
-def sync_copy_arguments(f):
-    f = _sync_copy_dest_remote_path_argument(f)
-    f = _sync_copy_dest_storage_account_argument(f)
-    f = _remote_path_argument(f)
-    f = _storage_account_argument(f)
+def upload_download_options(f):
+    f = _remote_path_option(f)
+    f = _storage_account_name_option(f)
+    f = _local_resource_option(f)
     return f
 
 
@@ -321,10 +312,10 @@ def callback(ctx, param, value):
         clictx.cli_options['access_key'] = value
         return value
     return click.option(
-        '--access-key',
+        '--storage-account-key',
         expose_value=False,
         help='Storage account access key',
-        envvar='BLOBXFER_ACCESS_KEY',
+        envvar='BLOBXFER_STORAGE_ACCOUNT_KEY',
         callback=callback)(f)
 
 
@@ -516,7 +507,7 @@ def callback(ctx, param, value):
         '--rsa-private-key',
         expose_value=False,
         default=None,
-        help='RSA private key',
+        help='RSA private key PEM file',
         envvar='BLOBXFER_RSA_PRIVATE_KEY',
         callback=callback)(f)
 
@@ -544,7 +535,7 @@ def callback(ctx, param, value):
         '--rsa-public-key',
         expose_value=False,
         default=None,
-        help='RSA public key',
+        help='RSA public key PEM file',
         envvar='BLOBXFER_RSA_PUBLIC_KEY',
         callback=callback)(f)
 
@@ -635,10 +626,35 @@ def callback(ctx, param, value):
         clictx.cli_options['sync_copy_dest_access_key'] = value
         return value
     return click.option(
-        '--sync-copy-dest-access-key',
+        '--sync-copy-dest-storage-account-key',
         expose_value=False,
         help='Storage account access key for synccopy destination',
-        envvar='BLOBXFER_SYNC_COPY_DEST_ACCESS_KEY',
+        envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_KEY',
+        callback=callback)(f)
+
+
+def _sync_copy_dest_storage_account_name_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_storage_account'] = value
+        return value
+    return click.option(
+        '--sync-copy-dest-storage-account',
+        expose_value=False,
+        help='Storage account name for synccopy destination',
+        envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_NAME',
+        callback=callback)(f)
+
+
+def _sync_copy_dest_remote_path_option(f):
+    def callback(ctx, param, value):
+        clictx = ctx.ensure_object(CliContext)
+        clictx.cli_options['sync_copy_dest_remote_path'] = value
+        return value
+    return click.option(
+        '--sync-copy-dest-remote-path',
+        expose_value=False,
+        help='Remote path on Azure Storage for synccopy destination',
         callback=callback)(f)
 
 
@@ -651,7 +667,7 @@ def callback(ctx, param, value):
         '--sync-copy-dest-sas',
         expose_value=False,
         help='Shared access signature for synccopy destination',
-        envvar='BLOBXFER_SYNC_COPY_SAS',
+        envvar='BLOBXFER_SYNC_COPY_DEST_SAS',
         callback=callback)(f)
 
 
@@ -705,12 +721,16 @@ def download_options(f):
 
 
 def sync_copy_options(f):
+    f = _sync_copy_dest_storage_account_name_option(f)
     f = _sync_copy_dest_sas_option(f)
+    f = _sync_copy_dest_remote_path_option(f)
     f = _sync_copy_dest_access_key_option(f)
+    f = _storage_account_name_option(f)
     f = _skip_on_md5_match_option(f)
     f = _skip_on_lmt_ge_option(f)
     f = _skip_on_filesize_match_option(f)
     f = _sas_option(f)
+    f = _remote_path_option(f)
     f = _overwrite_option(f)
     f = _mode_option(f)
     f = _include_option(f)
@@ -721,21 +741,6 @@ def sync_copy_options(f):
     return f
 
 
-def _config_argument(f):
-    def callback(ctx, param, value):
-        clictx = ctx.ensure_object(CliContext)
-        clictx.yaml_config = value
-        return value
-    return click.argument(
-        'config',
-        callback=callback)(f)
-
-
-def config_arguments(f):
-    f = _config_argument(f)
-    return f
-
-
 @click.group(context_settings=_CONTEXT_SETTINGS)
 @click.version_option(version=blobxfer.__version__)
 @click.pass_context
@@ -745,15 +750,13 @@ def cli(ctx):
 
 
 @cli.command('download')
-@upload_download_arguments
+@upload_download_options
 @download_options
 @common_options
 @pass_cli_context
-def download(ctx, local_resource, storage_account, remote_path):
+def download(ctx):
     """Download blobs or files from Azure Storage"""
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Download, local_resource,
-        storage_account, remote_path)
+    settings.add_cli_options(ctx.cli_options, settings.TransferAction.Download)
     ctx.initialize()
     specs = settings.create_download_specifications(ctx.config)
     for spec in specs:
@@ -763,84 +766,24 @@ def download(ctx, local_resource, storage_account, remote_path):
 
 
 @cli.command('synccopy')
-@sync_copy_arguments
 @sync_copy_options
 @common_options
 @pass_cli_context
-def synccopy(
-        ctx, local_resource, storage_account, remote_path,
-        sync_copy_dest_storage_account, sync_copy_dest_remote_path):
+def synccopy(ctx):
     """Synchronously copy blobs between Azure Storage accounts"""
     raise NotImplementedError()
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Synccopy, local_resource,
-        storage_account, remote_path, sync_copy_dest_storage_account,
-        sync_copy_dest_remote_path)
+    settings.add_cli_options(ctx.cli_options, settings.TransferAction.Synccopy)
     ctx.initialize()
 
 
 @cli.command('upload')
-@upload_download_arguments
+@upload_download_options
 @upload_options
 @common_options
 @pass_cli_context
-def upload(ctx, local_resource, storage_account, remote_path):
+def upload(ctx):
     """Upload files to Azure Storage"""
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Upload, local_resource,
-        storage_account, remote_path)
-    ctx.initialize()
-    specs = settings.create_upload_specifications(ctx.config)
-    for spec in specs:
-        blobxfer.api.Uploader(
-            ctx.general_options, ctx.credentials, spec
-        ).start()
-
-
-@cli.group()
-@pass_cli_context
-def useconfig(ctx):
-    """Use yaml configuration file for transfer"""
-    pass
-
-
-@useconfig.command('download')
-@config_arguments
-@common_options
-@pass_cli_context
-def useconfig_download(ctx, config):
-    """Download blobs or files from Azure Storage via yaml configuration"""
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Download, None, None, None)
-    ctx.initialize()
-    specs = settings.create_download_specifications(ctx.config)
-    for spec in specs:
-        blobxfer.api.Downloader(
-            ctx.general_options, ctx.credentials, spec
-        ).start()
-
-
-@useconfig.command('synccopy')
-@config_arguments
-@common_options
-@pass_cli_context
-def useconfig_synccopy(ctx, config):
-    """Synchronously copy blobs between Azure Storage accounts via yaml
-    configuration"""
-    raise NotImplementedError()
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Synccopy, None, None, None)
-    ctx.initialize()
-
-
-@useconfig.command('upload')
-@config_arguments
-@common_options
-@pass_cli_context
-def useconfig_upload(ctx, config):
-    """Upload files to Azure Storage via yaml configuration"""
-    settings.add_cli_options(
-        ctx.cli_options, settings.TransferAction.Upload, None, None, None)
+    settings.add_cli_options(ctx.cli_options, settings.TransferAction.Upload)
     ctx.initialize()
     specs = settings.create_upload_specifications(ctx.config)
     for spec in specs:
diff --git a/cli/settings.py b/cli/settings.py
index 5911719..378325c 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -49,21 +49,16 @@ class TransferAction(enum.Enum):
     Synccopy = 3,
 
 
-def add_cli_options(
-        cli_options, action, local_resource=None, storage_account=None,
-        remote_path=None, sync_copy_dest_storage_account=None,
-        sync_copy_dest_remote_path=None):
-    # type: (dict, str, str, str, str, str, str) -> None
+def add_cli_options(cli_options, action):
+    # type: (dict, str) -> None
     """Adds CLI options to the configuration object
     :param dict cli_options: CLI options dict
     :param TransferAction action: action
-    :param str local_resource: local resource
-    :param str storage_account: storage account
-    :param str remote_path: remote path
-    :param str sync_copy_dest_storage_account: synccopy dest sa
-    :param str sync_copy_dest_remote_path: synccopy dest rp
     """
     cli_options['_action'] = action.name.lower()
+    local_resource = cli_options['local_resource']
+    storage_account = cli_options['storage_account']
+    remote_path = cli_options['remote_path']
     if blobxfer.util.is_not_empty(storage_account):
         # add credentials
         try:
@@ -149,6 +144,10 @@ def add_cli_options(
                 },
             }
         elif action == TransferAction.Synccopy:
+            sync_copy_dest_storage_account = \
+                cli_options['sync_copy_dest_storage_account']
+            sync_copy_dest_remote_path = \
+                cli_options['sync_copy_dest_remote_path']
             if blobxfer.util.is_none_or_empty(sync_copy_dest_storage_account):
                 raise RuntimeError(
                     'must specify a destination storage account')
diff --git a/docs/01-installation.md b/docs/01-installation.md
index cffe6c6..7b85c52 100644
--- a/docs/01-installation.md
+++ b/docs/01-installation.md
@@ -66,7 +66,7 @@ that there was an issue installing this package with other `azure` packages
 that share the same base namespace. You can correct this by issuing:
 ```shell
 # for Python2
-pip install azure-storage
+pip install --upgrade --force-reinstall azure-storage
 # for Python3
-pip3 install azure-storage
+pip3 install --upgrade --force-reinstall azure-storage
 ```
diff --git a/docs/10-cli-usage.md b/docs/10-cli-usage.md
index 9c9f111..7c06508 100644
--- a/docs/10-cli-usage.md
+++ b/docs/10-cli-usage.md
@@ -1,9 +1,149 @@
 # blobxfer Command-Line Usage
+`blobxfer` operates using a command followed by options. Each
+command will be detailed along with all options available.
 
-## TODO
+## Commands
+### `download`
+Downloads a remote Azure path, which may contain many resources, to the
+local machine. This command requires at the minimum, the following options:
+* `--storage-account-name`
+* `--remote-path`
+* `--local-resource`
+Additionally, an authentication option for the storage account is required.
+Please see the Authentication sub-section below under Options.
 
+### `upload`
+Uploads a local path to a remote Azure path. The local path may contain
+many resources on the local machine. This command requires at the minimum,
+the following options:
+* `--local-resource`
+* `--storage-account-name`
+* `--remote-path`
 
-### General Notes
+Additionally, an authentication option for the storage account is required.
+Please see the Authentication sub-section below under Options.
+
+### `synccopy`
+TODO: not yet implemented.
+
+## Options
+### General
+* `--config` specifies the YAML configuration file to use. This can be
+optionally provided through an environment variable `BLOBXFER_CONFIG_FILE`.
+* `--file-md5` or `--no-file-md5` controls if the file MD5 should be computed.
+* `--local-resource` is the local resource path.
+* `--log-file` specifies the log file to write to.
+* `--mode` is the operating mode. The default is `auto` but may be set to
+`append`, `block`, `file`, or `page`. If specified with the `upload`
+command, then all files will be uploaded as the specified `mode` type.
+If specified with `download`, then only remote entities with that `mode`
+type are downloaded. Note that `file` should be specified if interacting
+with Azure File shares.
+* `--overwrite` or `--no-overwrite` controls clobber semantics at the
+destination.
+* `--progress-bar` or `--no-progress-bar` controls if a progress bar is
+output to the console.
+* `--recursive` or `--no-recursive` controls if the source path should be
+recursively uploaded or downloaded.
+* `--remote-path` is the remote Azure path. This path must contain the
+Blob container or File share at the begining, e.g., `mycontainer/vdir`
+* `--resume-file` specifies the resume file to write to.
+* `--timeout` is the integral timeout value in seconds to use.
+* `-h` or `--help` can be passed at every command level to receive context
+sensitive help.
+* `-v` will output verbose messages including the configuration used
+
+### Authentication
+`blobxfer` supports both Storage Account access keys and Shared Access
+Signature (SAS) tokens. One type must be supplied with all commands in
+order to successfully authenticate against Azure Storage. These options are:
+* `--storage-account-key` is the storage account access key. This can be
+optionally provided through an environment variable
+`BLOBXFER_STORAGE_ACCOUNT_KEY` instead.
+* `--sas` is a shared access signature (sas) token. This can can be
+optionally provided through an environment variable `BLOBXFER_SAS` instead.
+
+### Concurrency
+Please see the [performance considerations](98-performance-considerations.md)
+document for more information regarding concurrency options.
+* `--crypto-processes` is the number of decryption offload processes to spawn.
+`0` will in-line the decryption routine with the main thread.
+* `--disk-threads` is the number of threads to create for disk I/O.
+* `--md5-processes` is the number of MD5 offload processes to spawn for
+comparing files with `skip_on` `md5_match`.
+* `--transfer-threads` is the number of threads to create for transferring
+to/from Azure Storage.
+
+### Connection
+* `--endpoint` is the Azure Storage endpoint to connect to; the default is
+Azure Public regions, or `core.windows.net`.
+* `--storage-account-name` is the storage account to connect to.
+
+### Encryption
+* `--rsa-private-key` is the RSA private key in PEM format to use. This can
+be provided for uploads but must be specified to decrypt encrypted remote
+entities. This can be optionally provided through an environment variable
+`BLOBXFER_RSA_PRIVATE_KEY`.
+* `--rsa-private-key-passphrase` is the RSA private key passphrase. This can
+be optionally provided through an environment variable
+`BLOBXFER_RSA_PRIVATE_KEY_PASSPHRASE`.
+* `--rsa-public-key` is the RSA public key in PEM format to use. This
+can only be provided for uploads. This can be optionally provided through an
+environment variable `BLOBXFER_RSA_PUBLIC_KEY`.
+
+### Filtering
+* `--exclude` is an exclude pattern to use; this can be specified multiple
+times. Exclude patterns are applied after include patterns. If both an exclude
+and an include pattern match a target, the target is excluded.
+* `--include` is an include pattern to use; this can be specified multiple
+times
+
+### Skip On
+* `--skip-on-filesize-match` will skip the transfer action if the filesizes
+match between source and destination. This should not be specified for
+encrypted files.
+* `--skip-on-lmt-ge` will skip the transfer action:
+  * On upload if the last modified time of the remote file is greater than
+    or equal to the local file.
+  * On download if the last modified time of the local file is greater than
+    or equal to the remote file.
+* `--skip-on-md5-match` will skip the transfer action if the MD5 hash match
+between source and destination. This can be transparently used through
+encrypted files that have been uploaded with `blobxfer`.
+
+### Vectored IO
+Please see the [Vectored IO](30-vectored-io.md) document for more information
+regarding Vectored IO operations in `blobxfer`.
+* `--distribution-mode` is the Vectored IO distribution mode
+  * `disabled` which is default (no Vectored IO)
+  * `replica` which will replicate source files to target destinations on
+    upload
+  * `stripe`which will stripe source files to target destinations on upload
+* `--stripe-chunk-size-bytes` is the stripe chunk width for stripe-based
+Vectored IO operations
+
+### Other
+* `--delete` deletes extraneous files at the remote destination path on
+uploads and at the local resource on downloads. This actions occur after the
+transfer has taken place.
+* `--one-shot-bytes` controls the number of bytes to "one shot" a block
+Blob upload. The maximum value that can be specified is 256MiB. This may
+be useful when using account-level SAS keys and enforcing non-overwrite
+behavior.
+* `--rename` renames a single file upload or download to the target
+destination or source path, respectively.
+* `--strip-components N` will strip the leading `N` components from the
+file path. The default is `1`.
+
+## Examples
+### `download` Examples
+TODO.
+blobxfer download
+
+### `upload` Examples
+TODO.
+
+## General Notes
 * `blobxfer` does not take any leases on blobs or containers. It is up to the
 user to ensure that blobs are not modified while download/uploads are being
 performed.
diff --git a/docs/98-performance-considerations.md b/docs/98-performance-considerations.md
index 6ade0a2..89ff0c6 100644
--- a/docs/98-performance-considerations.md
+++ b/docs/98-performance-considerations.md
@@ -9,17 +9,17 @@ and File shares.
 ## Concurrency
 * `blobxfer` offers four concurrency knobs. Each one should be tuned for
 maximum performance according to your system and network characteristics.
-  1. MD5 processes: computing MD5 for potential omission from transfer due
-     to `skip_on` `md5_match` being specified are offloaded to the specified
-     number of processors.
-  2. Crypto processes: decrypting encrypted blobs and files can be offloaded
-     to the specified number of processors. Due to the inherent
-     non-parallelizable encryption algorithm used, this is ignored for
-     encryption (uploads).
-  3. Disk threads: concurrency in reading (uploads) and writing (downloads) to
-     disk is controlled by the number of disk threads.
-  4. Transfer threads: concurrency in the number of threads from/to Azure
-     Storage is controlled by the number of transfer threads.
+  * Disk threads: concurrency in reading (uploads) and writing (downloads) to
+    disk is controlled by the number of disk threads.
+  * Transfer threads: concurrency in the number of threads from/to Azure
+    Storage is controlled by the number of transfer threads.
+  * MD5 processes: computing MD5 for potential omission from transfer due
+    to `skip_on` `md5_match` being specified are offloaded to the specified
+    number of processors.
+  * Crypto processes: decrypting encrypted blobs and files can be offloaded
+    to the specified number of processors. Due to the inherent
+    non-parallelizable encryption algorithm used, this is ignored for
+    encryption (uploads).
 * The thread concurrency options (disk and transfer) can be set to a
 non-positive number to be automatically set as a multiple of the number of
 cores available on the machine.
diff --git a/docs/index.md b/docs/README.md
similarity index 100%
rename from docs/index.md
rename to docs/README.md

From a327445e45ceede3471e4c3967b8320cd81cbabb Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 1 Jun 2017 14:55:47 -0700
Subject: [PATCH 44/47] Update Dockerfile to Alpine 3.6 and libressl

---
 docker/Dockerfile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 42e8b2d..a713e15 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,12 +1,11 @@
 # Dockerfile for Azure/blobxfer
 
-FROM gliderlabs/alpine:3.4
+FROM alpine:3.6
 MAINTAINER Fred Park <https://github.com/Azure/blobxfer>
 
-RUN apk add --update --no-cache musl build-base python3 python3-dev openssl-dev libffi-dev ca-certificates \
-    && pip3 install --no-cache-dir --upgrade pip \
+RUN apk add --update --no-cache musl build-base python3 python3-dev libressl-dev libffi-dev ca-certificates \
     && pip3 install --no-cache-dir --upgrade blobxfer \
-    && apk del --purge build-base python3-dev openssl-dev libffi-dev \
+    && apk del --purge build-base python3-dev libressl-dev libffi-dev \
     && rm /var/cache/apk/*
 
 ENTRYPOINT ["blobxfer"]

From bb9f68a3bb04a1d74632bba6d2a84105b8b312d8 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Thu, 1 Jun 2017 20:38:15 -0700
Subject: [PATCH 45/47] More documentation

- Rename some options
- Catch KeyErrors and rethrow for required options to clarify
---
 CHANGELOG.md                  |   6 +-
 README.md                     |  12 +-
 cli/cli.py                    |   8 +-
 cli/settings.py               |  45 ++++++--
 docs/01-installation.md       |  29 +++--
 docs/10-cli-usage.md          | 102 +++++++++++++++--
 docs/20-yaml-configuration.md | 210 +++++++++++++++++++++++++++++++++-
 7 files changed, 370 insertions(+), 42 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3abb9b2..ba7b442 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,16 @@
 
 ## [Unreleased]
 
-## [1.0.0a2] - 2017-06-XX
+## [1.0.0a2] - 2017-06-02
 ### Changed
 - From scratch rewrite providing a consistent CLI experience and a vast
 array of new and advanced features. Please see the
 [1.0.0 Milestone](https://github.com/Azure/blobxfer/milestone/1) for a
 catalog of changes.
+- **Breaking Changes:** there have been a significant number of breaking
+changes with the rewrite from the command-line invocation of `blobxfer`
+itself to the options and environment variable names. Please review the
+usage documentation carefully when upgrading from 0.12.1.
 - All dependencies updated to latest
 
 ### Removed
diff --git a/README.md b/README.md
index ef6c462..fd2c904 100644
--- a/README.md
+++ b/README.md
@@ -18,24 +18,24 @@ from Azure Blob and File Storage
 * High-performance design with asynchronous transfers and disk I/O
 * YAML configuration driven execution support
 * Resume support
-* Vectored IO
+* Vectored IO support
   * `stripe` mode allows striping a single file across multiple blobs (even
     to multiple storage accounts) to break through single blob or fileshare
     throughput limits
   * `replica` mode allows replication of a file across multiple destinations
     including to multiple storage accounts
 * Client-side encryption support
+* Support all blob types for both upload and download
 * Advanced skip options for rsync-like operations
 * Store/restore POSIX filemode and uid/gid
-* Read/pipe from `stdin` support
-* Append blob support
-* Configurable one-shot block upload support
-* Block (chunk) size selection support
-* Rsync-like delete support
+* Support for reading/pipe from `stdin`
 * Support for reading from blob snapshots
+* Configurable one-shot block upload support
+* Configurable chunk size for both upload and download
 * Automatic block blob size adjustment for uploading
 * Automatic uploading of VHD/VHDX files as page blobs
 * Include and exclude filtering support
+* Rsync-like delete support
 * No clobber support in either direction
 * File logging support
 
diff --git a/cli/cli.py b/cli/cli.py
index 7f58c04..4b1e211 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -256,7 +256,7 @@ def callback(ctx, param, value):
     return click.option(
         '--local-resource',
         expose_value=False,
-        help='Local resource',
+        help='Local resource; use - for stdin',
         callback=callback)(f)
 
 
@@ -266,7 +266,7 @@ def callback(ctx, param, value):
         clictx.cli_options['storage_account'] = value
         return value
     return click.option(
-        '--storage-account',
+        '--storage-account-name',
         expose_value=False,
         help='Storage account name',
         envvar='BLOBXFER_STORAGE_ACCOUNT_NAME',
@@ -602,7 +602,7 @@ def callback(ctx, param, value):
         expose_value=False,
         type=int,
         default=1,
-        help='Strip leading file path components [1]',
+        help='Strip leading file path components on upload [1]',
         callback=callback)(f)
 
 
@@ -639,7 +639,7 @@ def callback(ctx, param, value):
         clictx.cli_options['sync_copy_dest_storage_account'] = value
         return value
     return click.option(
-        '--sync-copy-dest-storage-account',
+        '--sync-copy-dest-storage-account-name',
         expose_value=False,
         help='Storage account name for synccopy destination',
         envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_NAME',
diff --git a/cli/settings.py b/cli/settings.py
index 378325c..1507c82 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -56,9 +56,24 @@ def add_cli_options(cli_options, action):
     :param TransferAction action: action
     """
     cli_options['_action'] = action.name.lower()
-    local_resource = cli_options['local_resource']
-    storage_account = cli_options['storage_account']
-    remote_path = cli_options['remote_path']
+    try:
+        local_resource = cli_options['local_resource']
+        if blobxfer.util.is_none_or_empty(local_resource):
+            raise KeyError()
+    except KeyError:
+        raise ValueError('--local-resource must be specified')
+    try:
+        storage_account = cli_options['storage_account']
+        if blobxfer.util.is_none_or_empty(storage_account):
+            raise KeyError()
+    except KeyError:
+        raise ValueError('--storage-account-name must be specified')
+    try:
+        remote_path = cli_options['remote_path']
+        if blobxfer.util.is_none_or_empty(remote_path):
+            raise KeyError()
+    except KeyError:
+        raise ValueError('--remote-path must be specified')
     if blobxfer.util.is_not_empty(storage_account):
         # add credentials
         try:
@@ -144,13 +159,23 @@ def add_cli_options(cli_options, action):
                 },
             }
         elif action == TransferAction.Synccopy:
-            sync_copy_dest_storage_account = \
-                cli_options['sync_copy_dest_storage_account']
-            sync_copy_dest_remote_path = \
-                cli_options['sync_copy_dest_remote_path']
-            if blobxfer.util.is_none_or_empty(sync_copy_dest_storage_account):
-                raise RuntimeError(
-                    'must specify a destination storage account')
+            try:
+                sync_copy_dest_storage_account = \
+                    cli_options['sync_copy_dest_storage_account']
+                if blobxfer.util.is_none_or_empty(
+                        sync_copy_dest_storage_account):
+                    raise KeyError()
+            except KeyError:
+                raise ValueError(
+                    '--sync-copy-dest-storage-account-name must be specified')
+            try:
+                sync_copy_dest_remote_path = \
+                    cli_options['sync_copy_dest_remote_path']
+                if blobxfer.util.is_none_or_empty(sync_copy_dest_remote_path):
+                    raise KeyError()
+            except KeyError:
+                raise ValueError(
+                    '--sync-copy-dest-remote-path must be specified')
             arg = {
                 'source': sa_rp,
                 'destination': [
diff --git a/docs/01-installation.md b/docs/01-installation.md
index 7b85c52..2609f07 100644
--- a/docs/01-installation.md
+++ b/docs/01-installation.md
@@ -1,8 +1,8 @@
 # blobxfer Installation
 `blobxfer` is a pure Python package, however, some dependencies require a C
-compiler and supporting libraries if there is no binary wheel. Please follow
-the pre-requisites section first prior to invoking installation via `pip`.
-Alternatively, you can use the
+compiler and supporting libraries if there is no binary wheel for that
+dependency and your platform. Please follow the pre-requisites section first
+prior to invoking installation via `pip`. Alternatively, you can use the
 [blobxfer Docker image](https://hub.docker.com/r/alfpark/blobxfer/).
 
 ## Pre-requisites
@@ -34,15 +34,26 @@ zypper -n in gcc libopenssl-devel libffi48-devel python-devel
 curl -fSsL https://bootstrap.pypa.io/get-pip.py | python
 ```
 
+### Mac OS X
+Python 2.7 should come pre-installed. However, if you want to install
+`blobxfer` for Python 3.5+ (recommended), please follow the steps outlined on
+[this guide](http://docs.python-guide.org/en/latest/starting/install/osx/)
+to ensure that you have the latest version of Python, a compiler and pip.
+
+### Windows
+Please install at least Python 3.5 or higher to avoid requiring a
+compiler. If you must use Python 2.7, you can download the necessary
+development headers and compiler [from Microsoft](http://aka.ms/vcpython27).
+
 ## Installation via `pip`
 [blobxfer](https://pypi.python.org/pypi/blobxfer) is on PyPI and can be
 installed via:
 
 ```shell
+# for Python3 (recommended)
+pip3 install blobxfer
 # for Python2
 pip install blobxfer
-# for Python3
-pip3 instlal blobxfer
 ```
 
 `blobxfer` is compatible with Python 2.7 and 3.3+. To install for Python 3
@@ -61,12 +72,12 @@ docker pull alfpark/blobxfer
 
 ## Troubleshooting
 #### `azure.storage` dependency not found
-If you get an error that `azure.storage` cannot be found or loaded this means
-that there was an issue installing this package with other `azure` packages
+If you get an error that `azure.storage` cannot be found or loaded, then
+most likely there was a conflict with this package with other `azure` packages
 that share the same base namespace. You can correct this by issuing:
 ```shell
-# for Python2
-pip install --upgrade --force-reinstall azure-storage
 # for Python3
 pip3 install --upgrade --force-reinstall azure-storage
+# for Python2
+pip install --upgrade --force-reinstall azure-storage
 ```
diff --git a/docs/10-cli-usage.md b/docs/10-cli-usage.md
index 7c06508..8931464 100644
--- a/docs/10-cli-usage.md
+++ b/docs/10-cli-usage.md
@@ -2,13 +2,20 @@
 `blobxfer` operates using a command followed by options. Each
 command will be detailed along with all options available.
 
-## Commands
+### Quick Navigation
+1. [Commands](#commands)
+2. [Options](#options)
+3. [Example Invocations](#examples)
+4. [General Notes](#general-notes)
+
+## <a name="commands"></a>Commands
 ### `download`
 Downloads a remote Azure path, which may contain many resources, to the
 local machine. This command requires at the minimum, the following options:
 * `--storage-account-name`
 * `--remote-path`
 * `--local-resource`
+
 Additionally, an authentication option for the storage account is required.
 Please see the Authentication sub-section below under Options.
 
@@ -23,15 +30,27 @@ the following options:
 Additionally, an authentication option for the storage account is required.
 Please see the Authentication sub-section below under Options.
 
+If piping from `stdin`, `--local-resource` should be set to `-` as per
+convention.
+
 ### `synccopy`
 TODO: not yet implemented.
 
-## Options
+## <a name="options"></a>Options
 ### General
 * `--config` specifies the YAML configuration file to use. This can be
 optionally provided through an environment variable `BLOBXFER_CONFIG_FILE`.
+* `--chunk-size-bytes` is the chunk size in bytes. For downloads, this
+is the maximum length of data to transfer per request. For uploads, this
+corresponds to one of block size for append and block blobs, page size for
+page blobs, or file chunk for files. Only block blobs can have a block size
+of up to 100MiB, all others have a maximum of 4MiB.
+* `--file-attributes` or `--no-file-attributes` controls if POSIX file
+attributes (mode and ownership) should be stored or restored. Note that to
+restore uid/gid, `blobxfer` must be run as root or under sudo.
 * `--file-md5` or `--no-file-md5` controls if the file MD5 should be computed.
-* `--local-resource` is the local resource path.
+* `--local-resource` is the local resource path. Set to `-` if piping from
+`stdin`.
 * `--log-file` specifies the log file to write to.
 * `--mode` is the operating mode. The default is `auto` but may be set to
 `append`, `block`, `file`, or `page`. If specified with the `upload`
@@ -117,8 +136,11 @@ regarding Vectored IO operations in `blobxfer`.
 * `--distribution-mode` is the Vectored IO distribution mode
   * `disabled` which is default (no Vectored IO)
   * `replica` which will replicate source files to target destinations on
-    upload
-  * `stripe`which will stripe source files to target destinations on upload
+    upload. Note that replicating across multiple destinations will require
+    a YAML configuration file.
+  * `stripe` which will stripe source files to target destinations on upload.
+    Note that striping across multiple destinations will require a YAML
+    configuration file.
 * `--stripe-chunk-size-bytes` is the stripe chunk width for stripe-based
 Vectored IO operations
 
@@ -135,15 +157,73 @@ destination or source path, respectively.
 * `--strip-components N` will strip the leading `N` components from the
 file path. The default is `1`.
 
-## Examples
+## <a name="examples"></a>Example Invocations
 ### `download` Examples
-TODO.
-blobxfer download
+#### Download an Entire Encrypted Blob Container to Current Working Directory
+```shell
+blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-public-key ~/mypubkey.pem
+```
+
+#### Download an Entire File Share to Designated Path and Skip On Filesize Matches
+```shell
+blobxfer download --mode file --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path myfileshare --local-resource /my/path --skip-on-filesize-match
+```
+
+#### Download only Page Blobs in Blob Container Virtual Directory Non-recursively and Cleanup Local Path to Match Remote Path
+```shell
+blobxfer download --mode page --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource /my/pageblobs --no-recursive --delete
+```
+
+#### Resume Incomplete Downloads Matching an Include Pattern and Log to File and Restore POSIX File Attributes
+```shell
+blobxfer download --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource . --include '*.bin' --resume-file myresumefile.db --log-file blobxfer.log --file-attributes
+```
+
+#### Download a Blob Snapshot
+```shell
+blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path "mycontainer/file.bin?snapshot=2017-04-20T02:12:49.0311708Z" --local-resource .
+```
+
+#### Download using a YAML Configuration File
+```shell
+blobxfer download --config myconfig.yaml
+```
 
 ### `upload` Examples
-TODO.
-
-## General Notes
+#### Upload Current Working Directory as Encrypted Block Blobs Non-recursively
+```shell
+blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-private-key ~/myprivatekey.pem --no-recursive
+```
+
+#### Upload Specific Path Recursively to a File Share, Store File MD5 and POSIX File Attributes to a File Share and Exclude Some Files
+```shell
+blobxfer upload --mode file --storage-account-name mystorageaccount --sas "mysastoken" --remote-path myfileshare --local-resource . --file-md5 --file-attributes --exclude '*.bak'
+```
+
+#### Upload Single File with Resume and Striped Vectored IO into 512MiB Chunks
+```shell
+blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /some/huge/file --resume-file hugefileresume.db --distribution-mode stripe --stripe-chunk-size-bytes 536870912
+```
+
+#### Upload Specific Path but Skip On Any MD5 Matches, Store File MD5 and Cleanup Remote Path to Match Local Path
+```shell
+blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /my/path --file-md5 --skip-on-md5-match --delete
+```
+
+#### Upload From Piped `stdin`
+```shell
+curl -fSsL https://some.uri | blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource -
+```
+
+#### Upload using a YAML Configuration File
+```shell
+blobxfer upload --config myconfig.yaml
+```
+
+### `synccopy` Examples
+TODO: not implemented yet.
+
+## <a name="general-notes"></a>General Notes
 * `blobxfer` does not take any leases on blobs or containers. It is up to the
 user to ensure that blobs are not modified while download/uploads are being
 performed.
diff --git a/docs/20-yaml-configuration.md b/docs/20-yaml-configuration.md
index 000e01f..78437b0 100644
--- a/docs/20-yaml-configuration.md
+++ b/docs/20-yaml-configuration.md
@@ -1,3 +1,211 @@
 # blobxfer YAML Configuration
+`blobxfer` accepts YAML configuration files to drive the transfer. YAML
+configuration files are specified with the `--config` option to any
+`blobxfer` command.
 
-## TODO
+## Schema
+The `blobxfer` YAML schema consists of 5 distinct "sections". The following
+sub-sections will describe each. You may combine all 5 sections into the
+same YAML file if desired as `blobxfer` will only read the required sections
+to execute the specified command.
+
+#### Configuration Sections
+1. [`azure_storage`](#azure-storage)
+2. [`options`](#options)
+3. [`download`](#download)
+4. [`upload`](#upload)
+5. [`synccopy`](#synccopy)
+
+### <a name="azure-storage"></a>`azure_storage`
+The `azure_storage` section specifies Azure Storage credentials that will
+be referenced for any transfer while processing the YAML file. This section
+is required.
+
+```yaml
+azure_storage:
+    endpoint: core.windows.net
+    accounts:
+        mystorageaccount0: ABCDEF...
+        mystorageaccount1: ?se...
+```
+
+* `endpoint` specifies for which endpoint to connect to with Azure Storage.
+Generally this can be omitted if using Public Azure regions.
+* `accounts` is a dictionary of storage account names and either a
+storage account key or a shared access signature token.
+
+### <a name="options"></a>`options`
+The `options` section specifies general options that may be applied across
+all other sections in the YAML configuration.
+
+```yaml
+options:
+    log_file: /path/to/blobxfer.log
+    resume_file: /path/to/resumefile.db
+    progress_bar: true
+    verbose: true
+    timeout_sec: null
+    concurrency:
+        md5_processes: 2
+        crypto_processes: 2
+        disk_threads: 16
+        transfer_threads: 32
+```
+
+* `log_file` is the location of the log file to write to
+* `resume_file` is the location of the resume database to create
+* `progress_bar` controls display of a progress bar output to the console
+* `verbose` controls if verbose logging is enabled
+* `timeout_sec` is the timeout to apply to requests/responses
+* `concurrency` is a dictionary of concurrency limits
+  * `md5_processes` is the number of MD5 offload processes to create for
+    MD5 comparison checking
+  * `crypto_processes` is the number of decryption offload processes to create
+  * `disk_threads` is the number of threads for disk I/O
+  * `transfer_threads` is the number of threads for network transfers
+
+### <a name="download"></a>`download`
+The `download` section specifies download sources and destination. Note
+that `download` refers to a list of objects, thus you may specify as many
+of these sub-configuration blocks on the `download` property as you need.
+When the `download` command with the YAML config is specified, the list
+is iterated and all specified sources are downloaded.
+
+```yaml
+download:
+    - source:
+        - mystorageaccount0: mycontainer
+        - mystorageaccount1: someothercontainer/vpath
+      destination: /path/to/store/downloads
+      include:
+        - "*.txt"
+        - "*.bxslice-*"
+      exclude:
+        - "*.bak"
+      options:
+          check_file_md5: true
+          chunk_size_bytes: 16777216
+          delete_extraneous_destination: false
+          mode: auto
+          overwrite: true
+          recursive: true
+          rename: false
+          restore_file_attributes: true
+          rsa_private_key: myprivatekey.pem
+          rsa_private_key_passphrase: myoptionalpassword
+          skip_on:
+              filesize_match: false
+              lmt_ge: false
+              md5_match: true
+    - source:
+        # next if needed...
+```
+
+* `source` is a list of storage account to remote path mappings
+* `destination` is the local resource path
+* `include` is a list of include patterns
+* `exclude` is a list of exclude patterns
+* `options` are download-specific options
+  * `check_file_md5` will integrity check downloaded files using the stored MD5
+  * `chunk_size_bytes` is the maximum amount of data to download per request
+  * `delete_extraneous_destination` will cleanup any files locally that are
+    not found on the remote. Note that this interacts with include and
+    exclude filters.
+  * `mode` is the operating mode
+  * `overwrite` specifies clobber behavior
+  * `recursive` specifies if remote paths should be recursively searched for
+    entities to download
+  * `rename` will rename a single entity source path to the `destination`
+  * `restore_file_attributes` will restore POSIX file mode and ownership if
+    stored on the entity metadata
+  * `rsa_private_key` is the RSA private key PEM file to use to decrypt
+    encrypted blobs or files
+  * `rsa_private_key_passphrase` is the RSA private key passphrase, if required
+  * `skip_on` are skip on options to use
+    * `filesize_match` skip if file size match
+    * `lmt_ge` skip if local file has a last modified time greater than or
+      equal to the remote file
+    * `md5_match` skip if MD5 match
+
+### <a name="upload"></a>`upload`
+The `upload` section specifies upload sources and destinations. Note
+that `upload` refers to a list of objects, thus you may specify as many
+of these sub-configuration blocks on the `upload` property as you need.
+When the `upload` command with the YAML config is specified, the list
+is iterated and all specified sources are uploaded.
+
+```yaml
+upload:
+    - source:
+        - /path/to/hugefile1
+        - /path/to/hugefile2
+      destination:
+        - mystorageaccount0: mycontainer/vdir
+        - mystorageaccount1: someothercontainer/vdir2
+      include:
+        - "*.bin"
+      exclude:
+        - "*.tmp"
+      options:
+          mode: auto
+          chunk_size_bytes: 0
+          delete_extraneous_destination: true
+          one_shot_bytes: 33554432
+          overwrite: true
+          recursive: true
+          rename: false
+          rsa_public_key: mypublickey.pem
+          skip_on:
+              filesize_match: false
+              lmt_ge: false
+              md5_match: true
+          store_file_properties:
+              attributes: true
+              md5: true
+          strip_components: 1
+          vectored_io:
+              stripe_chunk_size_bytes: 1000000
+              distribution_mode: stripe
+    - source:
+        # next if needed...
+```
+
+* `source` is a list of local resource paths
+* `destination` is a list of storage account to remote path mappings
+* `include` is a list of include patterns
+* `exclude` is a list of exclude patterns
+* `options` are upload-specific options
+  * `mode` is the operating mode
+  * `chunk_size_bytes` is the maximum amount of data to upload per request.
+    This corresponds to the block size for block and append blobs, page size
+    for page blobs, and the file chunk for files. Only block blobs can have
+    a block size of up to 100MiB, all others have a maximum of 4MiB.
+  * `one_shot_bytes` is the size limit to upload block blobs in a single
+    request.
+  * `overwrite` specifies clobber behavior
+  * `recursive` specifies if local paths should be recursively searched for
+    files to upload
+  * `rename` will rename a single entity destination path to a single `source`
+  * `rsa_public_key` is the RSA public key PEM file to use to encrypt files
+  * `skip_on` are skip on options to use
+    * `filesize_match` skip if file size match
+    * `lmt_ge` skip if remote file has a last modified time greater than or
+      equal to the local file
+    * `md5_match` skip if MD5 match
+  * `store_file_properties` stores the following file properties if enabled
+    * `attributes` will store POSIX file mode and ownership
+    * `md5` will store the MD5 of the file
+  * `strip_components` is the number of leading path components to strip
+  * `vectored_io` are the Vectored IO options to apply to the upload
+    * `stripe_chunk_size_bytes` is the stripe width for each chunk if `stripe`
+      `distribution_mode` is selected
+    * `distribution_mode` is the Vectored IO mode to use which can be one of
+      * `disabled` will disable Vectored IO
+      * `replica` which will replicate source files to target destinations on
+        upload. Note that more than one destination should be specified.
+      * `stripe` which will stripe source files to target destinations on
+        upload. If more than one destination is specified, striping occurs in
+        round-robin order amongst the destinations listed.
+
+### <a name="synccopy"></a>`synccopy`
+TODO: not yet implemented.

From b7782619d1503d2f10e4ab37a26f16a4ed71eef3 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 2 Jun 2017 08:01:24 -0700
Subject: [PATCH 46/47] Add Vectored IO docs

---
 docs/30-vectored-io.md | 96 +++++++++++++++++++++++++++++++++++++++++-
 docs/README.md         |  2 +-
 2 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/docs/30-vectored-io.md b/docs/30-vectored-io.md
index 0eb67fd..a007b7d 100644
--- a/docs/30-vectored-io.md
+++ b/docs/30-vectored-io.md
@@ -1,3 +1,95 @@
-# blobxfer Vectored IO
+# blobxfer Vectored I/O
+`blobxfer` supports Vectored I/O (scatter/gather) which can help alleviate
+problems associated with
+[single blob or single fileshare throughput limits](https://docs.microsoft.com/en-us/azure/storage/storage-scalability-targets).
+Additionally, `blobxfer` has the ability to replicate a single source to
+multiple destinations to allow for increased resiliency or throughput for
+consumption later.
 
-## TODO
+## Distribution Modes
+`blobxfer` supports two distribution modes: `replica` and `stripe`. The
+following sections describe each.
+
+### Replica
+`replica` mode replicates an entire file (or set of files) across all
+specified destinations. This allows for multiple backups, resiliency,
+and potentially increased download throughput later if the clients understand
+how to download from multiple sources.
+
+The logic is fairly simple in how this is accomplished. Each source file
+has portions of the file read from disk, buffered in memory and then
+replicated across multiple storage accounts.
+
+```
+                       Whole File             +---------------------+
+                       Replication            |                     |
+             +------------------------------> |  Destination 0:     |
+             |                                |  Storage Account A  |
+             |                                |                     |
+             |                                +---------------------+
+             |
+             |
++------------+---------------+  Whole File    +---------------------+
+|                            |  Replication   |                     |
+|  10 GiB VHD on Local Disk  +--------------> |  Destination 1:     |
+|                            |                |  Storage Account B  |
++------------+---------------+                |                     |
+             |                                +---------------------+
+             |
+             |
+             |                                +---------------------+
+             |         Whole File             |                     |
+             |         Replication            |  Destination 2:     |
+             +------------------------------> |  Storage Account C  |
+                                              |                     |
+                                              +---------------------+
+```
+
+In order to take advantage of `replica` Vectored IO, you must use a YAML
+configuration file to define multiple destinations.
+
+### Stripe
+`stripe` mode will splice a file into multiple chunks and scatter these
+chunks across destinations specified. These destinations can be different
+containers within the same storage account or even containers distributed
+across multiple storage accounts if single storage account bandwidth limits
+are insufficient.
+
+`blobxfer` will slice the source file into multiple chunks where the
+`stripe_chunk_size_bytes` is the stripe width of each chunk. This parameter
+will allow you to effectively control how many blobs/files are created on
+Azure. `blobxfer` will then round-robin through all of the destinations
+specified to store the slices. Information required to reconstruct the
+original file is stored on the blob or file metadata. It is important to
+keep this metadata in-tact or reconstruction will fail.
+
+```
+                                                     +---------------------+
+                                                     |                     | <-----------------------------------+
+                                                     |  Destination 1:     |                                     |
+                                                     |  Storage Account B  | <---------------------+             |
+                                                     |                     |                       |             |
+                                                     +---------------------+ <-------+             |             |
+                                                                                     |             |             |
+                                                         ^             ^             |             |             |
+                                                         |             |             |             |             |
+                                 1 GiB Stripe            |             |             |             |             |
++-----------------------------+  Width        +------+---+--+------+---+--+------+---+--+------+---+--+------+---+--+
+|                             |               |      |      |      |      |      |      |      |      |      |      |
+|  10 GiB File on Local Disk  | +-----------> |  D0  |  D1  |  D0  |  D1  |  D0  |  D1  |  D0  |  D1  |  D0  |  D1  |
+|                             |               |      |      |      |      |      |      |      |      |      |      |
++-----------------------------+  10 Vectored  +---+--+------+---+--+------+---+--+------+---+--+------+---+--+------+
+                                 Slices           |             |             |             |             |
+                                                  |             |             |             |             |
+                                                  |             v             |             |             |
+                                                  |                           |             |             |
+                                                  +> +---------------------+ <+             |             |
+                                                     |                     |                |             |
+                                                     |  Destination 0:     | <--------------+             |
+                                                     |  Storage Account A  |                              |
+                                                     |                     | <----------------------------+
+                                                     +---------------------+
+```
+
+In order to take advantage of `stripe` Vectored IO, you must use a YAML
+configuration file to define multiple destinations.
diff --git a/docs/README.md b/docs/README.md
index b05fcc4..eb1b4f5 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -9,7 +9,7 @@ Please refer to the following documents detailing the usage of `blobxfer`.
 1. [Installation](01-installation.md)
 2. [Command-Line Usage](10-cli-usage.md)
 3. [YAML Configuration](20-yaml-configuration.md)
-4. [Vectored IO](30-vectored-io.md)
+4. [Vectored I/O](30-vectored-io.md)
 5. [Client-side Encryption](40-client-side-encryption.md)
 6. [blobxfer Data Movement Library](80-blobxfer-python-library.md)
 7. [Performance Considerations](98-performance-considerations.md)

From e1d97fa3cb813559cff15661aa6d13d12dc30175 Mon Sep 17 00:00:00 2001
From: Fred Park <fred.park@microsoft.com>
Date: Fri, 2 Jun 2017 08:16:46 -0700
Subject: [PATCH 47/47] Tag for 1.0.0a3 release

- Rename some options
- Make thread join more robust on Python2
---
 CHANGELOG.md                          |  6 ++--
 blobxfer/models/options.py            |  6 +++-
 blobxfer/operations/download.py       |  8 ++---
 blobxfer/operations/upload.py         |  4 +--
 blobxfer/util.py                      | 14 +++++++++
 blobxfer/version.py                   |  2 +-
 cli/cli.py                            | 42 ++++++++++++++------------
 cli/settings.py                       | 12 ++++----
 docs/01-installation.md               |  7 +++--
 docs/10-cli-usage.md                  | 43 +++++++++++++++------------
 docs/30-vectored-io.md                |  4 +--
 docs/98-performance-considerations.md | 42 ++++++++++++++++++++++++--
 12 files changed, 129 insertions(+), 61 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba7b442..fd71b89 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 ## [Unreleased]
 
-## [1.0.0a2] - 2017-06-02
+## [1.0.0a3] - 2017-06-02
 ### Changed
 - From scratch rewrite providing a consistent CLI experience and a vast
 array of new and advanced features. Please see the
@@ -201,8 +201,8 @@ usage documentation carefully when upgrading from 0.12.1.
   `--no-skiponmatch`.
 - 0.8.2: performance regression fixes
 
-[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a2...HEAD
-[1.0.0a2]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a2
+[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a3...HEAD
+[1.0.0a3]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a3
 [0.12.1]: https://github.com/Azure/blobxfer/compare/0.12.0...0.12.1
 [0.12.0]: https://github.com/Azure/blobxfer/compare/0.11.5...0.12.0
 [0.11.5]: https://github.com/Azure/blobxfer/compare/0.11.4...0.11.5
diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py
index c516d01..2a17c1a 100644
--- a/blobxfer/models/options.py
+++ b/blobxfer/models/options.py
@@ -105,13 +105,14 @@ class Concurrency(object):
     """Concurrency Options"""
     def __init__(
             self, crypto_processes, md5_processes, disk_threads,
-            transfer_threads):
+            transfer_threads, is_download=None):
         """Ctor for Concurrency Options
         :param Concurrency self: this
         :param int crypto_processes: number of crypto procs
         :param int md5_processes: number of md5 procs
         :param int disk_threads: number of disk threads
         :param int transfer_threads: number of transfer threads
+        :param bool is_download: download hint
         """
         self.crypto_processes = crypto_processes
         self.md5_processes = md5_processes
@@ -131,6 +132,9 @@ def __init__(
             # cap maximum number of disk threads from cpu count to 64
             if self.disk_threads > 64:
                 self.disk_threads = 64
+            # for downloads, cap disk threads to lower value
+            if is_download and self.disk_threads > 16:
+                self.disk_threads = 16
             auto_disk = True
         if self.transfer_threads is None or self.transfer_threads < 1:
             if auto_disk:
diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py
index 3cbef61..9d51d32 100644
--- a/blobxfer/operations/download.py
+++ b/blobxfer/operations/download.py
@@ -430,7 +430,7 @@ def _wait_for_disk_threads(self, terminate):
         if terminate:
             self._download_terminate = terminate
         for thr in self._disk_threads:
-            thr.join()
+            blobxfer.util.join_thread(thr)
 
     def _wait_for_transfer_threads(self, terminate):
         # type: (Downloader, bool) -> None
@@ -441,7 +441,7 @@ def _wait_for_transfer_threads(self, terminate):
         if terminate:
             self._download_terminate = terminate
         for thr in self._transfer_threads:
-            thr.join()
+            blobxfer.util.join_thread(thr)
 
     def _worker_thread_transfer(self):
         # type: (Downloader) -> None
@@ -452,7 +452,7 @@ def _worker_thread_transfer(self):
         while not self.termination_check:
             try:
                 if len(self._disk_set) > max_set_len:
-                    time.sleep(0.2)
+                    time.sleep(0.1)
                     continue
                 else:
                     dd = self._transfer_queue.get(block=False, timeout=0.1)
@@ -792,8 +792,8 @@ def start(self):
                     'KeyboardInterrupt detected, force terminating '
                     'processes and threads (this may take a while)...')
             try:
-                self._wait_for_transfer_threads(terminate=True)
                 self._wait_for_disk_threads(terminate=True)
+                self._wait_for_transfer_threads(terminate=True)
             finally:
                 self._cleanup_temporary_files()
             raise
diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py
index 232e8ba..9db2863 100644
--- a/blobxfer/operations/upload.py
+++ b/blobxfer/operations/upload.py
@@ -447,10 +447,10 @@ def _worker_thread_upload(self):
         while not self.termination_check:
             try:
                 if len(self._transfer_set) > max_set_len:
-                    time.sleep(0.2)
+                    time.sleep(0.1)
                     continue
                 else:
-                    ud = self._upload_queue.get(False, 0.1)
+                    ud = self._upload_queue.get(block=False, timeout=0.1)
             except queue.Empty:
                 continue
             try:
diff --git a/blobxfer/util.py b/blobxfer/util.py
index 166b98f..a17b8a5 100644
--- a/blobxfer/util.py
+++ b/blobxfer/util.py
@@ -124,6 +124,20 @@ def is_not_empty(obj):
     return obj is not None and len(obj) > 0
 
 
+def join_thread(thr):
+    # type: (threading.Thread) -> None
+    """Join a thread
+    :type threading.Thread thr: thread to join
+    """
+    if on_python2():
+        while True:
+            thr.join(timeout=1)
+            if not thr.isAlive():
+                break
+    else:
+        thr.join()
+
+
 def merge_dict(dict1, dict2):
     # type: (dict, dict) -> dict
     """Recursively merge dictionaries: dict2 on to dict1. This differs
diff --git a/blobxfer/version.py b/blobxfer/version.py
index 6280e0b..0f2a584 100644
--- a/blobxfer/version.py
+++ b/blobxfer/version.py
@@ -22,4 +22,4 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-__version__ = '1.0.0a2'
+__version__ = '1.0.0a3'
diff --git a/cli/cli.py b/cli/cli.py
index 4b1e211..4c12bbd 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -62,13 +62,15 @@ def __init__(self):
         self.credentials = None
         self.general_options = None
 
-    def initialize(self):
-        # type: (CliContext) -> None
+    def initialize(self, action):
+        # type: (CliContext, settings.TransferAction) -> None
         """Initialize context
         :param CliContext self: this
+        :param settings.TransferAction action: transfer action
         """
         self._init_config()
-        self.general_options = settings.create_general_options(self.config)
+        self.general_options = settings.create_general_options(
+            self.config, action)
         self.credentials = settings.create_azure_storage_credentials(
             self.config, self.general_options)
 
@@ -164,7 +166,8 @@ def callback(ctx, param, value):
         '--log-file',
         expose_value=False,
         default=None,
-        help='Log to file specified',
+        help='Log to file specified; this must be specified for progress '
+        'bar to show',
         callback=callback)(f)
 
 
@@ -191,7 +194,8 @@ def callback(ctx, param, value):
         '--progress-bar/--no-progress-bar',
         expose_value=False,
         default=True,
-        help='Display progress bar instead of console logs [True]',
+        help='Display progress bar instead of console logs; log file must '
+        'be specified [True]',
         callback=callback)(f)
 
 
@@ -254,22 +258,22 @@ def callback(ctx, param, value):
         clictx.cli_options['local_resource'] = value
         return value
     return click.option(
-        '--local-resource',
+        '--local-path',
         expose_value=False,
-        help='Local resource; use - for stdin',
+        help='Local path; use - for stdin',
         callback=callback)(f)
 
 
-def _storage_account_name_option(f):
+def _storage_account_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
         clictx.cli_options['storage_account'] = value
         return value
     return click.option(
-        '--storage-account-name',
+        '--storage-account',
         expose_value=False,
         help='Storage account name',
-        envvar='BLOBXFER_STORAGE_ACCOUNT_NAME',
+        envvar='BLOBXFER_STORAGE_ACCOUNT',
         callback=callback)(f)
 
 
@@ -301,7 +305,7 @@ def common_options(f):
 
 def upload_download_options(f):
     f = _remote_path_option(f)
-    f = _storage_account_name_option(f)
+    f = _storage_account_option(f)
     f = _local_resource_option(f)
     return f
 
@@ -633,16 +637,16 @@ def callback(ctx, param, value):
         callback=callback)(f)
 
 
-def _sync_copy_dest_storage_account_name_option(f):
+def _sync_copy_dest_storage_account_option(f):
     def callback(ctx, param, value):
         clictx = ctx.ensure_object(CliContext)
         clictx.cli_options['sync_copy_dest_storage_account'] = value
         return value
     return click.option(
-        '--sync-copy-dest-storage-account-name',
+        '--sync-copy-dest-storage-account',
         expose_value=False,
         help='Storage account name for synccopy destination',
-        envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_NAME',
+        envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT',
         callback=callback)(f)
 
 
@@ -721,11 +725,11 @@ def download_options(f):
 
 
 def sync_copy_options(f):
-    f = _sync_copy_dest_storage_account_name_option(f)
+    f = _sync_copy_dest_storage_account_option(f)
     f = _sync_copy_dest_sas_option(f)
     f = _sync_copy_dest_remote_path_option(f)
     f = _sync_copy_dest_access_key_option(f)
-    f = _storage_account_name_option(f)
+    f = _storage_account_option(f)
     f = _skip_on_md5_match_option(f)
     f = _skip_on_lmt_ge_option(f)
     f = _skip_on_filesize_match_option(f)
@@ -757,7 +761,7 @@ def cli(ctx):
 def download(ctx):
     """Download blobs or files from Azure Storage"""
     settings.add_cli_options(ctx.cli_options, settings.TransferAction.Download)
-    ctx.initialize()
+    ctx.initialize(settings.TransferAction.Download)
     specs = settings.create_download_specifications(ctx.config)
     for spec in specs:
         blobxfer.api.Downloader(
@@ -773,7 +777,7 @@ def synccopy(ctx):
     """Synchronously copy blobs between Azure Storage accounts"""
     raise NotImplementedError()
     settings.add_cli_options(ctx.cli_options, settings.TransferAction.Synccopy)
-    ctx.initialize()
+    ctx.initialize(settings.TransferAction.Synccopy)
 
 
 @cli.command('upload')
@@ -784,7 +788,7 @@ def synccopy(ctx):
 def upload(ctx):
     """Upload files to Azure Storage"""
     settings.add_cli_options(ctx.cli_options, settings.TransferAction.Upload)
-    ctx.initialize()
+    ctx.initialize(settings.TransferAction.Upload)
     specs = settings.create_upload_specifications(ctx.config)
     for spec in specs:
         blobxfer.api.Uploader(
diff --git a/cli/settings.py b/cli/settings.py
index 1507c82..24d1a7f 100644
--- a/cli/settings.py
+++ b/cli/settings.py
@@ -61,13 +61,13 @@ def add_cli_options(cli_options, action):
         if blobxfer.util.is_none_or_empty(local_resource):
             raise KeyError()
     except KeyError:
-        raise ValueError('--local-resource must be specified')
+        raise ValueError('--local-path must be specified')
     try:
         storage_account = cli_options['storage_account']
         if blobxfer.util.is_none_or_empty(storage_account):
             raise KeyError()
     except KeyError:
-        raise ValueError('--storage-account-name must be specified')
+        raise ValueError('--storage-account must be specified')
     try:
         remote_path = cli_options['remote_path']
         if blobxfer.util.is_none_or_empty(remote_path):
@@ -167,7 +167,7 @@ def add_cli_options(cli_options, action):
                     raise KeyError()
             except KeyError:
                 raise ValueError(
-                    '--sync-copy-dest-storage-account-name must be specified')
+                    '--sync-copy-dest-storage-account must be specified')
             try:
                 sync_copy_dest_remote_path = \
                     cli_options['sync_copy_dest_remote_path']
@@ -278,10 +278,11 @@ def create_azure_storage_credentials(config, general_options):
     return creds
 
 
-def create_general_options(config):
-    # type: (dict) -> blobxfer.models.options.General
+def create_general_options(config, action):
+    # type: (dict, TransferAction) -> blobxfer.models.options.General
     """Create a General Options object from configuration
     :param dict config: config dict
+    :param TransferAction action: transfer action
     :rtype: blobxfer.models.options.General
     :return: general options object
     """
@@ -292,6 +293,7 @@ def create_general_options(config):
             disk_threads=conc.get('disk_threads', 0),
             md5_processes=conc.get('md5_processes', 0),
             transfer_threads=conc.get('transfer_threads', 0),
+            is_download=action == TransferAction.Download,
         ),
         log_file=config['options'].get('log_file', None),
         progress_bar=config['options'].get('progress_bar', True),
diff --git a/docs/01-installation.md b/docs/01-installation.md
index 2609f07..9a3fd74 100644
--- a/docs/01-installation.md
+++ b/docs/01-installation.md
@@ -72,9 +72,10 @@ docker pull alfpark/blobxfer
 
 ## Troubleshooting
 #### `azure.storage` dependency not found
-If you get an error that `azure.storage` cannot be found or loaded, then
-most likely there was a conflict with this package with other `azure` packages
-that share the same base namespace. You can correct this by issuing:
+If you get an error such as `ImportError: No module named storage` or that
+`azure.storage` cannot be found or loaded, then most likely there was a
+conflict with this package with other `azure` packages that share the same
+base namespace. You can correct this by issuing:
 ```shell
 # for Python3
 pip3 install --upgrade --force-reinstall azure-storage
diff --git a/docs/10-cli-usage.md b/docs/10-cli-usage.md
index 8931464..2f3aad4 100644
--- a/docs/10-cli-usage.md
+++ b/docs/10-cli-usage.md
@@ -12,9 +12,9 @@ command will be detailed along with all options available.
 ### `download`
 Downloads a remote Azure path, which may contain many resources, to the
 local machine. This command requires at the minimum, the following options:
-* `--storage-account-name`
+* `--storage-account`
 * `--remote-path`
-* `--local-resource`
+* `--local-path`
 
 Additionally, an authentication option for the storage account is required.
 Please see the Authentication sub-section below under Options.
@@ -23,14 +23,14 @@ Please see the Authentication sub-section below under Options.
 Uploads a local path to a remote Azure path. The local path may contain
 many resources on the local machine. This command requires at the minimum,
 the following options:
-* `--local-resource`
-* `--storage-account-name`
+* `--local-path`
+* `--storage-account`
 * `--remote-path`
 
 Additionally, an authentication option for the storage account is required.
 Please see the Authentication sub-section below under Options.
 
-If piping from `stdin`, `--local-resource` should be set to `-` as per
+If piping from `stdin`, `--local-path` should be set to `-` as per
 convention.
 
 ### `synccopy`
@@ -49,9 +49,10 @@ of up to 100MiB, all others have a maximum of 4MiB.
 attributes (mode and ownership) should be stored or restored. Note that to
 restore uid/gid, `blobxfer` must be run as root or under sudo.
 * `--file-md5` or `--no-file-md5` controls if the file MD5 should be computed.
-* `--local-resource` is the local resource path. Set to `-` if piping from
+* `--local-path` is the local resource path. Set to `-` if piping from
 `stdin`.
-* `--log-file` specifies the log file to write to.
+* `--log-file` specifies the log file to write to. This must be specified
+for a progress bar to be output to console.
 * `--mode` is the operating mode. The default is `auto` but may be set to
 `append`, `block`, `file`, or `page`. If specified with the `upload`
 command, then all files will be uploaded as the specified `mode` type.
@@ -61,12 +62,16 @@ with Azure File shares.
 * `--overwrite` or `--no-overwrite` controls clobber semantics at the
 destination.
 * `--progress-bar` or `--no-progress-bar` controls if a progress bar is
-output to the console.
+output to the console. `--log-file` must be specified for a progress bar
+to be output.
 * `--recursive` or `--no-recursive` controls if the source path should be
 recursively uploaded or downloaded.
 * `--remote-path` is the remote Azure path. This path must contain the
 Blob container or File share at the begining, e.g., `mycontainer/vdir`
 * `--resume-file` specifies the resume file to write to.
+* `--storage-account` specifies the storage account to use. This can be
+optionally provided through an environment variable `BLOBXFER_STORAGE_ACCOUNT`
+instead.
 * `--timeout` is the integral timeout value in seconds to use.
 * `-h` or `--help` can be passed at every command level to receive context
 sensitive help.
@@ -96,7 +101,7 @@ to/from Azure Storage.
 ### Connection
 * `--endpoint` is the Azure Storage endpoint to connect to; the default is
 Azure Public regions, or `core.windows.net`.
-* `--storage-account-name` is the storage account to connect to.
+* `--storage-account` is the storage account to connect to.
 
 ### Encryption
 * `--rsa-private-key` is the RSA private key in PEM format to use. This can
@@ -161,27 +166,27 @@ file path. The default is `1`.
 ### `download` Examples
 #### Download an Entire Encrypted Blob Container to Current Working Directory
 ```shell
-blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-public-key ~/mypubkey.pem
+blobxfer download --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path . --rsa-public-key ~/mypubkey.pem
 ```
 
 #### Download an Entire File Share to Designated Path and Skip On Filesize Matches
 ```shell
-blobxfer download --mode file --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path myfileshare --local-resource /my/path --skip-on-filesize-match
+blobxfer download --mode file --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path myfileshare --local-path /my/path --skip-on-filesize-match
 ```
 
 #### Download only Page Blobs in Blob Container Virtual Directory Non-recursively and Cleanup Local Path to Match Remote Path
 ```shell
-blobxfer download --mode page --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource /my/pageblobs --no-recursive --delete
+blobxfer download --mode page --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-path /my/pageblobs --no-recursive --delete
 ```
 
 #### Resume Incomplete Downloads Matching an Include Pattern and Log to File and Restore POSIX File Attributes
 ```shell
-blobxfer download --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource . --include '*.bin' --resume-file myresumefile.db --log-file blobxfer.log --file-attributes
+blobxfer download --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-path . --include '*.bin' --resume-file myresumefile.db --log-file blobxfer.log --file-attributes
 ```
 
 #### Download a Blob Snapshot
 ```shell
-blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path "mycontainer/file.bin?snapshot=2017-04-20T02:12:49.0311708Z" --local-resource .
+blobxfer download --storage-account mystorageaccount --sas "mysastoken" --remote-path "mycontainer/file.bin?snapshot=2017-04-20T02:12:49.0311708Z" --local-path .
 ```
 
 #### Download using a YAML Configuration File
@@ -192,27 +197,27 @@ blobxfer download --config myconfig.yaml
 ### `upload` Examples
 #### Upload Current Working Directory as Encrypted Block Blobs Non-recursively
 ```shell
-blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-private-key ~/myprivatekey.pem --no-recursive
+blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path . --rsa-private-key ~/myprivatekey.pem --no-recursive
 ```
 
 #### Upload Specific Path Recursively to a File Share, Store File MD5 and POSIX File Attributes to a File Share and Exclude Some Files
 ```shell
-blobxfer upload --mode file --storage-account-name mystorageaccount --sas "mysastoken" --remote-path myfileshare --local-resource . --file-md5 --file-attributes --exclude '*.bak'
+blobxfer upload --mode file --storage-account mystorageaccount --sas "mysastoken" --remote-path myfileshare --local-path . --file-md5 --file-attributes --exclude '*.bak'
 ```
 
 #### Upload Single File with Resume and Striped Vectored IO into 512MiB Chunks
 ```shell
-blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /some/huge/file --resume-file hugefileresume.db --distribution-mode stripe --stripe-chunk-size-bytes 536870912
+blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path /some/huge/file --resume-file hugefileresume.db --distribution-mode stripe --stripe-chunk-size-bytes 536870912
 ```
 
 #### Upload Specific Path but Skip On Any MD5 Matches, Store File MD5 and Cleanup Remote Path to Match Local Path
 ```shell
-blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /my/path --file-md5 --skip-on-md5-match --delete
+blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path /my/path --file-md5 --skip-on-md5-match --delete
 ```
 
 #### Upload From Piped `stdin`
 ```shell
-curl -fSsL https://some.uri | blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource -
+curl -fSsL https://some.uri | blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path -
 ```
 
 #### Upload using a YAML Configuration File
diff --git a/docs/30-vectored-io.md b/docs/30-vectored-io.md
index a007b7d..1d17c40 100644
--- a/docs/30-vectored-io.md
+++ b/docs/30-vectored-io.md
@@ -91,5 +91,5 @@ keep this metadata in-tact or reconstruction will fail.
                                                      +---------------------+
 ```
 
-In order to take advantage of `stripe` Vectored IO, you must use a YAML
-configuration file to define multiple destinations.
+In order to take advantage of `stripe` Vectored IO across multiple
+destinations, you must use a YAML configuration file.
diff --git a/docs/98-performance-considerations.md b/docs/98-performance-considerations.md
index 89ff0c6..8a511fc 100644
--- a/docs/98-performance-considerations.md
+++ b/docs/98-performance-considerations.md
@@ -3,8 +3,8 @@ Please read the following carefully regarding considerations that should
 be applied with regard to performance and `blobxfer`. Additionally,
 please review the
 [Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/)
-for an overview of general performance targets that apply to Azure Blobs
-and File shares.
+for an overview of general performance targets that apply to Azure Blobs,
+File shares and Storage Account types (GRS, LRS, ZRS, etc).
 
 ## Concurrency
 * `blobxfer` offers four concurrency knobs. Each one should be tuned for
@@ -23,6 +23,44 @@ maximum performance according to your system and network characteristics.
 * The thread concurrency options (disk and transfer) can be set to a
 non-positive number to be automatically set as a multiple of the number of
 cores available on the machine.
+* For uploads, there should be a sufficient number of disk threads to ensure
+that all transfer threads have work to do. For downloads, there should be
+sufficient number of disk threads to write data to disk so transfer threads
+are not artificially blocked.
+
+## Chunk Sizing
+Chunk sizing refers to the `chunk_size_bytes` option and the meaning of which
+varies upon the context of uploading or downloading.
+
+### Uploads
+For uploads, chunk sizes correspond to the maximum amount of data to transfer
+with a single request. The Azure Storage service imposes maximums depending
+upon the type of entity that is being written. For block blobs, the maximum
+is 100MiB (although you may "one-shot" up to 256MiB). For page blobs, the
+maximum is 4MiB. For append blobs, the maximum is 4MiB. For Azure Files,
+the maximum is 4MiB.
+
+For block blobs, setting the chunk size to something greater than 4MiB will
+not only allow you larger file sizes (recall that the maximum number of
+blocks for a block blob is 50000, thus at 100MiB blocks, you can create a
+5TiB block blob object) but will allow you to amortize larger portions of
+data transfer over each request/response overhead. `blobxfer` can
+automatically select the proper block size given your file, but will not
+automatically tune the chunk size as that depends upon your system and
+network characteristics.
+
+### Downloads
+For downloads, chunk sizes correspond to the maximum amount of data to
+request from the server for each request. It is important to keep a balance
+between the chunk size and the number of in-flight operations afforded by
+the `transfer_threads` concurrency control. `blobxfer` does not automatically
+tune this (but can automatically set it to a value that should work for
+most situations) due to varying system and network conditions.
+
+Additionally, disk write performance is typically lower than disk read
+performance so you need to ensure that the number of `disk_threads` is not
+set to a very large number to prevent thrashing and highly random write
+patterns.
 
 ## Azure File Share Performance
 File share performance can be "slow" or become a bottleneck, especially for