diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..50a06f1
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,32 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+NIL
+
+## [0.2.0] - 2019-04-20
+Complies with [Dirhash Standard](https://github.com/andhus/dirhash) Version [0.1.0](https://github.com/andhus/dirhash/releases/v0.1.0)
+
+### Added
+- A first implementation based on the formalized [Dirhash Standard](https://github.com/andhus/dirhash).
+- This changelog.
+- Results form a new benchmark run after changes. The `benchmark/run.py` now outputs results files which names include the `dirhash.__version__`.
+
+### Changed
+- **Significant breaking changes** from version 0.1.1 - both regarding API and the 
+underlying method/protocol for computing the hash. This means that **hashes 
+computed with this version will differ from hashes computed with version < 0.2.0 for 
+same directory**.
+- This dirhash python implementation has moved to here 
+[github.com/andhus/dirhash-python](https://github.com/andhus/dirhash-python) from 
+the previous repository 
+[github.com/andhus/dirhash](https://github.com/andhus/dirhash) 
+which now contains the formal description of the Dirhash Standard.
+
+### Removed
+- All support for the `.dirhashignore` file. This seemed superfluous, please file an 
+issue if you need this feature.
diff --git a/README.md b/README.md
index d7b8fae..e9c938b 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,15 @@
-[![Build Status](https://travis-ci.com/andhus/dirhash.svg?branch=master)](https://travis-ci.com/andhus/dirhash)
-[![codecov](https://codecov.io/gh/andhus/dirhash/branch/master/graph/badge.svg)](https://codecov.io/gh/andhus/dirhash)
+[![Build Status](https://travis-ci.com/andhus/dirhash-python.svg?branch=master)](https://travis-ci.com/andhus/dirhash-python)
+[![codecov](https://codecov.io/gh/andhus/dirhash-python/branch/master/graph/badge.svg)](https://codecov.io/gh/andhus/dirhash-python)
 
 # dirhash
-A lightweight python module and tool for computing the hash of any
+A lightweight python module and CLI for computing the hash of any
 directory based on its files' structure and content.
-- Supports any hashing algorithm of Python's built-in `hashlib` module
-- `.gitignore` style "wildmatch" patterns for expressive filtering of files to 
-include/exclude.
+- Supports all hashing algorithms of Python's built-in `hashlib` module.
+- Glob/wildcard (".gitignore style") path matching for expressive filtering of files to include/exclude.
 - Multiprocessing for up to [6x speed-up](#performance)
 
+The hash is computed according to the [Dirhash Standard](https://github.com/andhus/dirhash), which is designed to allow for consistent and collision resistant generation/verification of directory hashes across implementations.
+
 ## Installation
 From PyPI:
 ```commandline
@@ -16,7 +17,7 @@ pip install dirhash
 ```
 Or directly from source:
 ```commandline
-git clone git@github.com:andhus/dirhash.git
+git clone git@github.com:andhus/dirhash-python.git
 pip install dirhash/
 ```
 
@@ -25,16 +26,16 @@ Python module:
 ```python
 from dirhash import dirhash
 
-dirpath = 'path/to/directory'
-dir_md5          = dirhash(dirpath, 'md5')
-filtered_sha1    = dirhash(dirpath, 'sha1', ignore=['.*', '.*/', '*.pyc'])
-pyfiles_sha3_512 = dirhash(dirpath, 'sha3_512', match=['*.py'])
+dirpath = "path/to/directory"
+dir_md5 = dirhash(dirpath, "md5")
+pyfiles_md5 = dirhash(dirpath, "md5", match=["*.py"])
+no_hidden_sha1 = dirhash(dirpath, "sha1", ignore=[".*", ".*/"])
 ```
 CLI:
 ```commandline
 dirhash path/to/directory -a md5
-dirhash path/to/directory -a sha1 -i ".*  .*/  *.pyc"
-dirhash path/to/directory -a sha3_512 -m "*.py"
+dirhash path/to/directory -a md5 --match "*.py"
+dirhash path/to/directory -a sha1 --ignore ".*"  ".*/"
 ```
 
 ## Why?
@@ -66,7 +67,7 @@ and executing `hashlib` code.
 The main effort to boost performance is support for multiprocessing, where the
 reading and hashing is parallelized over individual files.
 
-As a reference, let's compare the performance of the `dirhash` [CLI](https://github.com/andhus/dirhash/blob/master/dirhash/cli.py) 
+As a reference, let's compare the performance of the `dirhash` [CLI](https://github.com/andhus/dirhash-python/cli.py) 
 with the shell command:
 
 `find path/to/folder -type f -print0 | sort -z | xargs -0 md5 | md5` 
@@ -87,7 +88,7 @@ shell reference     | nested_32k_32kB | 6.82     | -> 1.0
 `dirhash`           | nested_32k_32kB | 3.43     | 2.00
 `dirhash`(8 workers)| nested_32k_32kB | 1.14     | **6.00**
 
-The benchmark was run a MacBook Pro (2018), further details and source code [here](https://github.com/andhus/dirhash/tree/master/benchmark).
+The benchmark was run a MacBook Pro (2018), further details and source code [here](https://github.com/andhus/dirhash-python/benchmark).
 
 ## Documentation
-Please refer to `dirhash -h` and the python [source code](https://github.com/andhus/dirhash/blob/master/dirhash/__init__.py).
\ No newline at end of file
+Please refer to `dirhash -h`, the python [source code](https://github.com/andhus/dirhash/dirhash-python/__init__.py) and the [Dirhash Standard](https://github.com/andhus/dirhash).
\ No newline at end of file
diff --git a/benchmark/results_v0.2.0.csv b/benchmark/results_v0.2.0.csv
new file mode 100644
index 0000000..0e783dc
--- /dev/null
+++ b/benchmark/results_v0.2.0.csv
@@ -0,0 +1,51 @@
+,test_case,implementation,algorithm,workers,t_best,t_median,speed-up (median)
+0,flat_8_128MB,shell reference,md5,1,2.079,2.083,1.0
+1,flat_8_128MB,dirhash_impl,md5,1,1.734,1.945,1.0709511568123393
+2,flat_8_128MB,dirhash_impl,md5,2,0.999,1.183,1.760777683854607
+3,flat_8_128MB,dirhash_impl,md5,4,0.711,0.728,2.8612637362637368
+4,flat_8_128MB,dirhash_impl,md5,8,0.504,0.518,4.021235521235521
+5,flat_1k_1MB,shell reference,md5,1,3.383,3.679,1.0
+6,flat_1k_1MB,dirhash_impl,md5,1,1.846,1.921,1.9151483602290473
+7,flat_1k_1MB,dirhash_impl,md5,2,1.137,1.158,3.1770293609671847
+8,flat_1k_1MB,dirhash_impl,md5,4,0.74,0.749,4.911882510013351
+9,flat_1k_1MB,dirhash_impl,md5,8,0.53,0.534,6.889513108614231
+10,flat_32k_32kB,shell reference,md5,1,13.827,18.213,1.0
+11,flat_32k_32kB,dirhash_impl,md5,1,13.655,13.808,1.3190179606025494
+12,flat_32k_32kB,dirhash_impl,md5,2,3.276,3.33,5.469369369369369
+13,flat_32k_32kB,dirhash_impl,md5,4,2.409,2.421,7.522924411400249
+14,flat_32k_32kB,dirhash_impl,md5,8,2.045,2.086,8.731064237775648
+15,nested_1k_1MB,shell reference,md5,1,3.284,3.332,1.0
+16,nested_1k_1MB,dirhash_impl,md5,1,1.717,1.725,1.9315942028985504
+17,nested_1k_1MB,dirhash_impl,md5,2,1.026,1.034,3.222437137330754
+18,nested_1k_1MB,dirhash_impl,md5,4,0.622,0.633,5.263823064770932
+19,nested_1k_1MB,dirhash_impl,md5,8,0.522,0.529,6.29867674858223
+20,nested_32k_32kB,shell reference,md5,1,11.898,12.125,1.0
+21,nested_32k_32kB,dirhash_impl,md5,1,13.858,14.146,0.8571327583769263
+22,nested_32k_32kB,dirhash_impl,md5,2,2.781,2.987,4.059256779377302
+23,nested_32k_32kB,dirhash_impl,md5,4,1.894,1.92,6.315104166666667
+24,nested_32k_32kB,dirhash_impl,md5,8,1.55,1.568,7.732780612244897
+25,flat_8_128MB,shell reference,sha1,1,2.042,2.05,1.0
+26,flat_8_128MB,dirhash_impl,sha1,1,1.338,1.354,1.5140324963072376
+27,flat_8_128MB,dirhash_impl,sha1,2,0.79,0.794,2.5818639798488663
+28,flat_8_128MB,dirhash_impl,sha1,4,0.583,0.593,3.456998313659359
+29,flat_8_128MB,dirhash_impl,sha1,8,0.483,0.487,4.209445585215605
+30,flat_1k_1MB,shell reference,sha1,1,2.118,2.129,1.0
+31,flat_1k_1MB,dirhash_impl,sha1,1,1.39,1.531,1.3905943827563685
+32,flat_1k_1MB,dirhash_impl,sha1,2,0.925,0.932,2.2843347639484977
+33,flat_1k_1MB,dirhash_impl,sha1,4,0.614,0.629,3.384737678855326
+34,flat_1k_1MB,dirhash_impl,sha1,8,0.511,0.52,4.094230769230769
+35,flat_32k_32kB,shell reference,sha1,1,10.551,10.97,1.0
+36,flat_32k_32kB,dirhash_impl,sha1,1,4.663,4.76,2.304621848739496
+37,flat_32k_32kB,dirhash_impl,sha1,2,3.108,3.235,3.3910355486862445
+38,flat_32k_32kB,dirhash_impl,sha1,4,2.342,2.361,4.6463362981787375
+39,flat_32k_32kB,dirhash_impl,sha1,8,2.071,2.094,5.2387774594078325
+40,nested_1k_1MB,shell reference,sha1,1,2.11,2.159,1.0
+41,nested_1k_1MB,dirhash_impl,sha1,1,1.436,1.47,1.4687074829931972
+42,nested_1k_1MB,dirhash_impl,sha1,2,0.925,0.937,2.3041622198505864
+43,nested_1k_1MB,dirhash_impl,sha1,4,0.627,0.643,3.357698289269051
+44,nested_1k_1MB,dirhash_impl,sha1,8,0.516,0.527,4.096774193548386
+45,nested_32k_32kB,shell reference,sha1,1,3.982,7.147,1.0
+46,nested_32k_32kB,dirhash_impl,sha1,1,4.114,4.156,1.7196823869104911
+47,nested_32k_32kB,dirhash_impl,sha1,2,2.598,2.616,2.7320336391437308
+48,nested_32k_32kB,dirhash_impl,sha1,4,1.809,1.831,3.9033315128345167
+49,nested_32k_32kB,dirhash_impl,sha1,8,1.552,1.58,4.523417721518987
diff --git a/benchmark/results_v0.2.0.json b/benchmark/results_v0.2.0.json
new file mode 100644
index 0000000..71a652b
--- /dev/null
+++ b/benchmark/results_v0.2.0.json
@@ -0,0 +1,402 @@
+[
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "shell reference",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 2.079,
+        "t_median": 2.083
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 1.734,
+        "t_median": 1.945
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 2,
+        "t_best": 0.999,
+        "t_median": 1.183
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 4,
+        "t_best": 0.711,
+        "t_median": 0.728
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 8,
+        "t_best": 0.504,
+        "t_median": 0.518
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "shell reference",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 3.383,
+        "t_median": 3.679
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 1.846,
+        "t_median": 1.921
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 2,
+        "t_best": 1.137,
+        "t_median": 1.158
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 4,
+        "t_best": 0.74,
+        "t_median": 0.749
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 8,
+        "t_best": 0.53,
+        "t_median": 0.534
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "shell reference",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 13.827,
+        "t_median": 18.213
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 13.655,
+        "t_median": 13.808
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 2,
+        "t_best": 3.276,
+        "t_median": 3.33
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 4,
+        "t_best": 2.409,
+        "t_median": 2.421
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 8,
+        "t_best": 2.045,
+        "t_median": 2.086
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "shell reference",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 3.284,
+        "t_median": 3.332
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 1.717,
+        "t_median": 1.725
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 2,
+        "t_best": 1.026,
+        "t_median": 1.034
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 4,
+        "t_best": 0.622,
+        "t_median": 0.633
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 8,
+        "t_best": 0.522,
+        "t_median": 0.529
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "shell reference",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 11.898,
+        "t_median": 12.125
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 1,
+        "t_best": 13.858,
+        "t_median": 14.146
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 2,
+        "t_best": 2.781,
+        "t_median": 2.987
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 4,
+        "t_best": 1.894,
+        "t_median": 1.92
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "md5",
+        "workers": 8,
+        "t_best": 1.55,
+        "t_median": 1.568
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "shell reference",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 2.042,
+        "t_median": 2.05
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 1.338,
+        "t_median": 1.354
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 2,
+        "t_best": 0.79,
+        "t_median": 0.794
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 4,
+        "t_best": 0.583,
+        "t_median": 0.593
+    },
+    {
+        "test_case": "flat_8_128MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 8,
+        "t_best": 0.483,
+        "t_median": 0.487
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "shell reference",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 2.118,
+        "t_median": 2.129
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 1.39,
+        "t_median": 1.531
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 2,
+        "t_best": 0.925,
+        "t_median": 0.932
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 4,
+        "t_best": 0.614,
+        "t_median": 0.629
+    },
+    {
+        "test_case": "flat_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 8,
+        "t_best": 0.511,
+        "t_median": 0.52
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "shell reference",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 10.551,
+        "t_median": 10.97
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 4.663,
+        "t_median": 4.76
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 2,
+        "t_best": 3.108,
+        "t_median": 3.235
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 4,
+        "t_best": 2.342,
+        "t_median": 2.361
+    },
+    {
+        "test_case": "flat_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 8,
+        "t_best": 2.071,
+        "t_median": 2.094
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "shell reference",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 2.11,
+        "t_median": 2.159
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 1.436,
+        "t_median": 1.47
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 2,
+        "t_best": 0.925,
+        "t_median": 0.937
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 4,
+        "t_best": 0.627,
+        "t_median": 0.643
+    },
+    {
+        "test_case": "nested_1k_1MB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 8,
+        "t_best": 0.516,
+        "t_median": 0.527
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "shell reference",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 3.982,
+        "t_median": 7.147
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 1,
+        "t_best": 4.114,
+        "t_median": 4.156
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 2,
+        "t_best": 2.598,
+        "t_median": 2.616
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 4,
+        "t_best": 1.809,
+        "t_median": 1.831
+    },
+    {
+        "test_case": "nested_32k_32kB",
+        "implementation": "dirhash",
+        "algorithm": "sha1",
+        "workers": 8,
+        "t_best": 1.552,
+        "t_median": 1.58
+    }
+]
\ No newline at end of file
diff --git a/benchmark/run.py b/benchmark/run.py
index 5dc5ed3..f930b2e 100644
--- a/benchmark/run.py
+++ b/benchmark/run.py
@@ -6,6 +6,8 @@
 
 from statistics import median, mean
 
+from dirhash import __version__
+
 
 BENCHMARK_ROOT = os.path.abspath(
     os.path.join(__file__, os.pardir)
@@ -117,7 +119,7 @@ def get_reference_shell_cmd(dirpath, algorithm):
 
 
 def get_dirhash_shell_cmd(dirpath, algorithm, workers=1):
-    return 'dirhash {} -a {} -w {}'.format(dirpath, algorithm, workers)
+    return 'dirhash {} -a {} -j {}'.format(dirpath, algorithm, workers)
 
 
 def benchmark(dirpath, algorithm, **kwargs):
@@ -164,7 +166,9 @@ def benchmark(dirpath, algorithm, **kwargs):
             result = benchmark(test_case, algorithm=alg, runs=5, repetitions=1)
             results.extend(result)
 
-    with open(os.path.join(BENCHMARK_ROOT, 'results.json'), 'w') as f:
+    result_fname = 'results_v{}'.format(__version__)
+
+    with open(os.path.join(BENCHMARK_ROOT, result_fname + '.json'), 'w') as f:
         json.dump(results, f, indent=4)
 
     try:
@@ -188,6 +192,6 @@ def benchmark(dirpath, algorithm, **kwargs):
         print(df_hd_1w)
         print('\nAverage speedup multiprocess (8 workers): {}'.format(mean_speedup_8w))
         print(df_hd_8w)
-        df.to_csv(os.path.join(BENCHMARK_ROOT, 'results.csv'))
+        df.to_csv(os.path.join(BENCHMARK_ROOT, result_fname + '.csv'))
     except ImportError:
         pass
diff --git a/setup.py b/setup.py
index 05d7790..242919a 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 import os
 from setuptools import setup, find_packages
 
-VERSION = '0.1.1'
+VERSION = '0.2.0'
 
 DESCRIPTION = 'Python module and CLI for hashing of file system directories.'
 
@@ -19,14 +19,11 @@
     description=DESCRIPTION,
     long_description=long_description,
     long_description_content_type="text/markdown",
-    url='https://github.com/andhus/dirhash',
+    url='https://github.com/andhus/dirhash-python',
     author="Anders Huss",
     author_email="andhus@kth.se",
     license='MIT',
-    install_requires=[
-        'pathspec>=0.5.9',
-        'scandir>=1.9.0;python_version<"3.5"'
-    ],
+    install_requires=['scantree>=0.0.1'],
     packages=find_packages('src'),
     package_dir={'': 'src'},
     include_package_data=True,
diff --git a/src/dirhash/__init__.py b/src/dirhash/__init__.py
index 4a4d875..cee8bd6 100644
--- a/src/dirhash/__init__.py
+++ b/src/dirhash/__init__.py
@@ -1,9 +1,5 @@
 #!/usr/bin/env python
-"""dirhash - a python module (and CLI) for hashing of file system directories.
-
-Provides the functions:
-- `dirhash`
-- `get_included_paths`.
+"""dirhash - a python library (and CLI) for hashing of file system directories.
 """
 from __future__ import print_function, division
 
@@ -14,445 +10,588 @@
 from functools import partial
 from multiprocessing import Pool
 
-# Use the built-in version of scandir/walk if possible (python > 3.5),
-# otherwise use the scandir module version
-try:
-    from os import scandir
-except ImportError:  # pragma: no cover
-    from scandir import scandir
-
-from pathspec import PathSpec
-from pathspec import RecursionError as _RecursionError
-from pathspec.patterns import GitWildMatchPattern
-
+from scantree import (
+    scantree,
+    RecursionFilter,
+    CyclicLinkedDir,
+)
+
+__all__ = [
+    '__version__',
+    'algorithms_guaranteed',
+    'algorithms_available',
+    'dirhash',
+    'dirhash_impl',
+    'included_paths',
+    'Filter',
+    'get_match_patterns',
+    'Protocol'
+]
 
 __version__ = pkg_resources.require("dirhash")[0].version
 
 algorithms_guaranteed = {'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'}
 algorithms_available = hashlib.algorithms_available
 
-ignorefilename = '.dirhashignore'
-
 
 def dirhash(
     directory,
     algorithm,
-    match=None,
+    match=("*",),
     ignore=None,
+    linked_dirs=True,
+    linked_files=True,
+    empty_dirs=False,
+    entry_properties=('name', 'data'),
+    allow_cyclic_links=False,
     chunk_size=2**20,
-    content_only=False,
-    paths_only=False,
-    follow_links=True,
-    include_empty=False,
-    workers=None,
-    **kwargs
+    jobs=1
 ):
     """Computes the hash of a directory based on its structure and content.
 
     # Arguments
-        directory (str | pathlib.Path): Path to the directory to hash.
-        algorithm (str): The name of the hashing algorithm to use. It is also
-            possible to provide a callable object that returns an instance
-            implementing the `hashlib._hashlib.HASH` interface.
-        match ([str] | None): A list (or iterable) of match-patterns for files to
-            include when computing the hash. Default `None` which is equivalent to
-            `['*']`, i.e. everything is included. See "Path Selection and Filtering"
+        directory: Union[str, pathlib.Path] - Path to the directory to hash.
+        algorithm: str - The name of the hashing algorithm to use. See
+            `dirhash.algorithms_available` for the available options.
+        match: Iterable[str] - An iterable of glob/wildcard match-patterns for paths
+            to include when computing the hash. Default is ["*"] which means that all
+            files and directories are matched.  To e.g. only include python source
+            files, use: `match=["*.py"]`. See "Path Selection and Filtering" section
             below for further details.
-        ignore ([str] | None): A list (or iterable) of match-patterns for files to
-            ignore when computing the hash. Default `None` (no ignore patterns). See
-            "Path Selection and Filtering" below for further details.
-        chunk_size (int): The number of bytes to read in one go from files while
+        ignore: Optional[Iterable[str]] - An iterable of glob/wildcard match-patterns
+            for paths to ignore when computing the hash. Default `None` (no ignore
+            patterns). To e.g. exclude hidden files and directories use:
+            `ignore=[".*/", ".*"]`. See "Path Selection and Filtering" section below
+            for further details.
+        linked_dirs: bool - If `True` (default), follow symbolic links to other
+            *directories* and include these and their content in the hash
+            computation.
+        linked_files: bool - If `True` (default), include symbolic linked files in
+            the hash computation.
+        empty_dirs: bool - If `True`, include empty directories when computing the
+            hash. A directory is considered empty if it does not contain any files
+            that *matches provided matching criteria*. Default `False`, i.e. empty
+            directories are ignored (as is done in git version control).
+        entry_properties: Iterable[str] - A set (i.e. order does not matter) of the
+            file/directory properties to consider when computing the hash. Supported
+            properties are {"name", "data", "is_link"} where at least one of
+            "name" and "data" must be included. Default is ["name", "data"] which
+            means that the content (actual data) as well as the path relative to the
+            root `directory` of files will affect the hash value. See "Entry
+            Properties Interpretation" section below for further details. 
+        allow_cyclic_links: bool - If `False` (default) a `SymlinkRecursionError` is
+            raised on presence of cyclic symbolic links. If set to `True` the the
+            dirhash value for directory causing the cyclic link is replaced with the
+            hash function hexdigest of the relative path from the link to the target.
+        chunk_size: int - The number of bytes to read in one go from files while
             being hashed. A too small size will slow down the processing and a larger
             size consumes more working memory. Default 2**20 byte = 1 MiB.
-        content_only (bool): Compute the hash only based on the content of files -
-            *not* their names or the names of their parent directories. Default
-            `False`.
-                NOTE that the tree structure in which files are organized under the
-            the `directory` root still influences the computed hash. As longs as all
-            files have the same content and are organised the same way in relation to
-            all other files in the Directed Acyclic Graph representing the file-tree,
-            the hash will remain the same (but the "name of nodes" does not matter).
-            This option can e.g. be used to verify that that data is unchanged after
-            renaming files (change extensions etc.).
-        paths_only (bool): Compute the hash only based on the name and location of
-            files in the file tree under the `directory` root. Default `False`.
-                This option can e.g. be used to check if any files have been
-            added/moved/removed, ignoring the content of each file. This is
-            considerably faster than including content.
-        follow_links (bool): If true, follow symbolic links to other *directories*.
-            NOTE that symbolic links to other *files* are always included (as if the
-            link was the actual file). Default `False`.
-        include_empty (bool): Include empty directories when computing the hash. A
-            directory is considered empty if it does not contain any files *matching
-            provided matching criteria*. Default `False`, i.e. empty directories are
-            ignored (as with git version control).
-        workers (int | None): The number of processes to use when computing the hash.
-            Default `None`, equivalent to `1`, which means no multiprocessing is
-            used. NOTE that using multiprocessing can significantly speed-up
-            execution, see `https://github.com/andhus/dirhash/tree/master/benchmark`
-            for further details.
+        jobs: int - The number of processes to use when computing the hash.
+            Default `1`, which means that a single (the main) process is used. NOTE
+            that using multiprocessing can significantly speed-up execution, see
+            `https://github.com/andhus/dirhash-python/benchmark` for further
+            details.
 
     # Returns
-        The hash/checksum as a string the of hexadecimal digits (the result of
+        str - The hash/checksum as a string of the hexadecimal digits (the result of
         `hexdigest` method of the hashlib._hashlib.HASH object corresponding to the
         provided `algorithm`).
 
     # Raises
-        ValueError: For incorrectly provided arguments.
+        TypeError/ValueError: For incorrectly provided arguments.
         SymlinkRecursionError: In case the `directory` contains symbolic links that
-            lead to (infinite) recursion.
+            lead to (infinite) recursion and `allow_cyclic_links=False` (default).
 
     # Path Selection and Filtering
-        Provided match-patterns determine what paths within the `directory` to
-        include when computing the hash value. These follow the ".gitignore
-        wildcard style" of path matching. Paths *relative to the root `directory`
-        (excluding the name of the directory itself) are matched against the
-        patterns.
+        Provided glob/wildcard (".gitignore style") match-patterns determine what
+        paths within the `directory` to include when computing the hash value. Paths
+        *relative to the root `directory`* (i.e. excluding the name of the root
+        directory itself) are matched against the patterns.
             The `match` argument represent what should be *included* - as opposed
-        to `ignore` patterns for which matches are *excluded*. Using `ignore` is
+        to the `ignore` argument for which matches are *excluded*. Using `ignore` is
         just short for adding the same patterns to the `match` argument with the
         prefix "!", i.e. the calls bellow are equivalent:
-
-            `dirhash(..., match=['*', '!<pattern>'])`
-            `dirhash(..., match=['*', '!<pattern>'], ignore=[])`
-            `dirhash(..., match=['*'], ignore=['<pattern>'])`
-            `dirhash(..., ignore=['<pattern>'])`
-
-        If a file named ".dirhashignore" (available by the `dirhash.ignorefilename`
-        module attribute) exists *directly* under the provided `directory`, then each
-        line (not starting with "#") of this file is added to the ignore patterns.
-
-        The following kwargs can also be used (possibly together with `match` and/or
-        `ignore`):
-            `ignore_extensions` ([str]): list (iterable) of file extensions to
-                exclude. Short for adding `'*[.]<extension>'`to the `ignore` patterns
-                where the dot [.] is added if <extension> does not already start with
-                a dot.
-            `ignore_hidden` (bool): Short for adding `['.*', '.*/']` to the `ignore`
-                patterns, which will exclude hidden files and directories.
-
-        To validate which paths are included, call `dirhash.get_included_paths` with
-        the same values for the arguments: `match`, `ignore` `follow_links`,
-        `include_empty`, `ignore_extensions` and `ignore_hidden` to get a list of all
-        paths that will be included when computing the hash by this function.
+            `dirhash(..., match=["*", "!<pattern>"])`
+            `dirhash(..., ignore=["<pattern>"])`
+        To validate which paths are included, call `dirhash.included_paths` with
+        the same values for the arguments: `match`, `ignore`, `linked_dirs`,
+        `linked_files` and `empty_dirs` to get a list of all paths that will be
+        included when computing the hash by this function.
+
+    # Entry Properties Interpretation
+        - ["name", "data"] (Default) - The name as well as data is included. Due to
+            the recursive nature of the dirhash computation, "name" implies that the
+            path relative to the root `directory` of each file/directory affects the
+            computed hash value.
+        - ["data"] - Compute the hash only based on the data of files -
+            *not* their names or the names of their parent directories. NOTE that
+            the tree structure in which files are organized under the `directory`
+            root still influences the computed hash. As longs as all files have
+            the same content and are organised the same way in relation to all
+            other files in the Directed Acyclic Graph representing the file-tree,
+            the hash will remain the same (but the "name of nodes" does not
+            matter). This option can e.g. be used to verify that that data is
+            unchanged after renaming files (change extensions etc.).
+        - ["name"] - Compute the hash only based on the name and location of
+            files in the file tree under the `directory` root. This option can
+            e.g. be used to check if any files have been added/moved/removed,
+            ignoring the content of each file.
+        - "is_link" - if this options is added to any of the cases above the
+            hash value is also affected by whether a file or directory is a
+            symbolic link or not. NOTE: with this property added, the hash
+            will be different than without it even if there are no symbolic links
+            in the directory.
+
+    # References
+        See https://github.com/andhus/dirhash/README.md for a formal
+        description of how the returned hash value is computed.
     """
-    abspath = os.path.abspath(directory)
-    _verify_is_directory(abspath)
+    filter_ = Filter(
+        match_patterns=get_match_patterns(match=match, ignore=ignore),
+        linked_dirs=linked_dirs,
+        linked_files=linked_files,
+        empty_dirs=empty_dirs
+    )
+    protocol = Protocol(
+        entry_properties=entry_properties,
+        allow_cyclic_links=allow_cyclic_links
+    )
+    return dirhash_impl(
+        directory=directory,
+        algorithm=algorithm,
+        filter_=filter_,
+        protocol=protocol,
+        chunk_size=chunk_size,
+        jobs=jobs
+    )
+
+
+def dirhash_impl(
+    directory,
+    algorithm,
+    filter_=None,
+    protocol=None,
+    chunk_size=2**20,
+    jobs=1
+):
+    """Computes the hash of a directory based on its structure and content.
 
-    if content_only and paths_only:
-        raise ValueError(
-            'only one of arguments `content_only` and `paths_only` can be True')
+    In contrast to `dirhash.dirhash`, this function accepts custom implementations of
+    the `dirhash.Filter` and `dirhash.Protocol` classes.
 
+    # Arguments
+        directory: Union[str, pathlib.Path] - Path to the directory to hash.
+        algorithm: str - The name of the hashing algorithm to use. See
+            `dirhash.algorithms_available` for the available options.
+            It is also possible to provide a callable object that returns an instance
+            implementing the `hashlib._hashlib.HASH` interface.
+        filter_: dirhash.Filter - Determines what files and directories to include
+            when computing the hash. See docs of `dirhash.Filter` for further
+            details.
+        protocol: dirhash.Protocol - Determines (mainly) what properties of files and
+            directories to consider when computing the hash value.
+        chunk_size: int - The number of bytes to read in one go from files while
+            being hashed. A too small size will slow down the processing and a larger
+            size consumes more working memory. Default 2**20 byte = 1 MiB.
+        jobs: int - The number of processes to use when computing the hash.
+            Default `1`, which means that a single (the main) process is used. NOTE
+            that using multiprocessing can significantly speed-up execution, see
+            `https://github.com/andhus/dirhash/tree/master/benchmark` for further
+            details.
+
+    # Returns
+        str - The hash/checksum as a string of the hexadecimal digits (the result of
+        `hexdigest` method of the hashlib._hashlib.HASH object corresponding to the
+        provided `algorithm`).
+
+    # Raises
+        TypeError/ValueError: For incorrectly provided arguments.
+        SymlinkRecursionError: In case the `directory` contains symbolic links that
+            lead to (infinite) recursion and the protocol option `allow_cyclic_links`
+            is `False`.
+
+    # References
+        See https://github.com/andhus/dirhash/README.md for a formal
+        description of how the returned hash value is computed.
+    """
+    def get_instance(value, cls_, argname):
+        if isinstance(value, cls_):
+            return value
+        if value is None:
+            return cls_()
+        raise TypeError('{} must be an instance of {} or None'.format(argname, cls_))
+
+    filter_ = get_instance(filter_, Filter, 'filter_')
+    protocol = get_instance(protocol, Protocol, 'protocol')
     hasher_factory = _get_hasher_factory(algorithm)
-    match_filter = _get_match_filter(directory, match=match, ignore=ignore, **kwargs)
 
-    cache = {}
+    def dir_apply(dir_node):
+        if not filter_.empty_dirs:
+            if dir_node.path.relative == '' and dir_node.empty:
+                # only check if root node is empty (other empty dirs are filter
+                # before `dir_apply` with `filter_.empty_dirs=False`)
+                raise ValueError('{}: Nothing to hash'.format(directory))
+        descriptor = protocol.get_descriptor(dir_node)
+        _dirhash = hasher_factory(descriptor.encode('utf-8')).hexdigest()
+
+        return dir_node.path, _dirhash
+
+    if jobs == 1:
+        cache = {}
+
+        def file_apply(path):
+            return path, _get_filehash(
+                path.real,
+                hasher_factory,
+                chunk_size=chunk_size,
+                cache=cache
+            )
 
-    if workers is not None and workers > 1:
-        # extract all (unique) files
-        _, file_realpaths = _get_leafs(
-            abspath=abspath,
-            match_filter=match_filter,
-            follow_links=follow_links,
-            include_empty=False,
+        _, dirhash_ = scantree(
+            directory,
+            recursion_filter=filter_,
+            file_apply=file_apply,
+            dir_apply=dir_apply,
+            follow_links=True,
+            allow_cyclic_links=protocol.allow_cyclic_links,
+            cache_file_apply=False,
+            include_empty=filter_.empty_dirs,
+            jobs=1
+        )
+    else:  # multiprocessing
+        real_paths = set()
+
+        def extract_real_paths(path):
+            real_paths.add(path.real)
+            return path
+
+        root_node = scantree(
+            directory,
+            recursion_filter=filter_,
+            file_apply=extract_real_paths,
+            follow_links=True,
+            allow_cyclic_links=protocol.allow_cyclic_links,
+            cache_file_apply=False,
+            include_empty=filter_.empty_dirs,
+            jobs=1
         )
+        real_paths = list(real_paths)
         # hash files in parallel
-        pool = Pool(workers)
-        try:
-            file_hashes = pool.map(
-                partial(
-                    _get_filehash,
-                    hasher_factory=hasher_factory,
-                    chunk_size=chunk_size
-                ),
-                file_realpaths
-            )
-        finally:
-            pool.close()
-        # prepare the cache with precomputed file hashes
-        cache = dict(zip(file_realpaths, file_hashes))
-
-    dirhash = _get_dirhash(
-        abspath=abspath,
-        relpath='',
-        hasher_factory=hasher_factory,
-        content_only=content_only,
-        paths_only=paths_only,
-        chunk_size=chunk_size,
-        match_filter=match_filter,
-        follow_links=follow_links,
-        include_empty=include_empty,
-        included_leafs=[],
-        included_file_realpaths=set(),
-        visited_dirs={},
-        cache=cache
-    )
-    if dirhash is _EMPTY:
-        if include_empty:
-            return hasher_factory(_empty_dir_descriptor.encode('utf-8')).hexdigest()
-        else:
-            raise ValueError('{}: Nothing to hash'.format(directory))
+        file_hashes = _parmap(
+            partial(
+                _get_filehash,
+                hasher_factory=hasher_factory,
+                chunk_size=chunk_size
+            ),
+            real_paths,
+            jobs=jobs
+        )
+        # prepare the mapping with precomputed file hashes
+        real_path_to_hash = dict(zip(real_paths, file_hashes))
+
+        def file_apply(path):
+            return path, real_path_to_hash[path.real]
+
+        _, dirhash_ = root_node.apply(file_apply=file_apply, dir_apply=dir_apply)
 
-    return dirhash
+    return dirhash_
 
 
-def get_included_paths(
+def included_paths(
     directory,
-    match=None,
+    match=("*",),
     ignore=None,
-    follow_links=True,
-    include_empty=False,
-    **kwargs
+    linked_dirs=True,
+    linked_files=True,
+    empty_dirs=False,
+    allow_cyclic_links=False,
 ):
     """Inspect what paths are included for the corresponding arguments to the
     `dirhash.dirhash` function.
 
     # Arguments:
         This function accepts the following subset of the function `dirhash.dirhash`
-        arguments: `directory`, `match`, `ignore` `follow_links`, `include_empty`,
-        `ignore_extensions` and `ignore_hidden`, with the same meaning. See docs of
-        `dirhash.dirhash` for further details.
+        arguments: `directory`, `match`, `ignore`, `linked_dirs`, `linked_files`,
+        `empty_dirs` and `allow_cyclic_links`, *with the same interpretation*. See
+        docs of `dirhash.dirhash` for further details.
 
     # Returns
-        A sorted list of the paths ([str]) that would be included in computing the
-        hash of `directory` given the provided arguments.
+        List[str] - A sorted list of the paths that would be included when computing
+        the hash of the `directory` using `dirhash.dirhash` and the same arguments.
     """
-    abspath = os.path.abspath(directory)
-    _verify_is_directory(abspath)
-    match_filter = _get_match_filter(abspath, match=match, ignore=ignore, **kwargs)
-    included_leafs, _ = _get_leafs(
-        abspath=abspath,
-        match_filter=match_filter,
-        follow_links=follow_links,
-        include_empty=include_empty,
+    filter_ = Filter(
+        match_patterns=get_match_patterns(match=match, ignore=ignore),
+        linked_dirs=linked_dirs,
+        linked_files=linked_files,
+        empty_dirs=empty_dirs
     )
+    protocol = Protocol(allow_cyclic_links=allow_cyclic_links)
 
-    return sorted(included_leafs)
-
+    leafpaths = scantree(
+        directory,
+        recursion_filter=filter_,
+        follow_links=True,
+        allow_cyclic_links=protocol.allow_cyclic_links,
+        include_empty=filter_.empty_dirs
+    ).leafpaths()
 
-def _get_leafs(
-    abspath,
-    match_filter,
-    follow_links=True,
-    include_empty=False,
-):
-    """An inexpensive "dry-run" of the `_get_dirhash` function to get the leaf-paths
-    that will be included in computing the hash.
-    """
-    included_leafs = []
-    included_file_realpaths = set()
-    _get_dirhash(
-        abspath=abspath,
-        relpath='',
-        hasher_factory=_PlaceHolderHasher,  # avoid computing any hash
-        content_only=False,
-        paths_only=True,  # avoid opening files!
-        chunk_size=None,  # never used
-        match_filter=match_filter,
-        follow_links=follow_links,
-        include_empty=include_empty,
-        included_leafs=included_leafs,
-        included_file_realpaths=included_file_realpaths,
-        visited_dirs={}
-    )
-    return included_leafs, included_file_realpaths
+    return [
+        path.relative if path.is_file() else os.path.join(path.relative, '.')
+        for path in leafpaths
+    ]
 
 
-_null_chr = '\000'
-_component_separator = _null_chr
-_descriptor_separator = _null_chr * 2
-_dirs_files_separator = _null_chr * 3
-_empty_dir_descriptor = _dirs_files_separator
+class Filter(RecursionFilter):
+    """Specification of what files and directories to include for the `dirhash`
+    computation.
 
+    # Arguments
+        match: Iterable[str] - An iterable of glob/wildcard (".gitignore style")
+            match patterns for selection of which files and directories to include.
+            Paths *relative to the root `directory`* (i.e. excluding the name of the
+            root directory itself) are matched against the provided patterns. For
+            example, to include all files, except for hidden ones use:
+            `match=['*', '!.*']` Default `None` which is equivalent to `['*']`,
+            i.e. everything is included.
+        linked_dirs: bool - If `True` (default), follow symbolic links to other
+            *directories* and include these and their content in the hash
+            computation.
+        linked_files: bool - If `True` (default), include symbolic linked files in
+            the hash computation.
+        empty_dirs: bool - If `True`, include empty directories when computing the
+            hash. A directory is considered empty if it does not contain any files
+            that *matches provided matching criteria*. Default `False`, i.e. empty
+            directories are ignored (as is done in git version control).
+    """
+    def __init__(
+        self,
+        match_patterns=None,
+        linked_dirs=True,
+        linked_files=True,
+        empty_dirs=False
+    ):
+        super(Filter, self).__init__(
+            linked_dirs=linked_dirs,
+            linked_files=linked_files,
+            match=match_patterns
+        )
+        self.empty_dirs = empty_dirs
 
-def _verify_is_directory(directory):
-    if not os.path.exists(directory):
-        raise ValueError('{}: No such directory'.format(directory))
-    if not os.path.isdir(directory):
-        raise ValueError('{}: Is not a directory'.format(directory))
 
+def get_match_patterns(
+    match=None,
+    ignore=None,
+    ignore_extensions=None,
+    ignore_hidden=False,
+):
+    """Helper to compose a list of list of glob/wildcard (".gitignore style") match
+    patterns based on options dedicated for a few standard use-cases.
 
-def _get_match_filter(dir_abspath, ignore, **kwargs):
-    """Helper to construct a function for filtering of paths.
+    # Arguments
+        match: Optional[List[str]] - A list of match-patterns for files to *include*.
+            Default `None` which is equivalent to `['*']`, i.e. everything is
+            included (unless excluded by arguments below).
+        ignore: Optional[List[str]] -  A list of match-patterns for files to
+            *ignore*. Default `None` (no ignore patterns).
+        ignore_extensions: Optional[List[str]] -  A list of file extensions to
+            ignore. Short for `ignore=['*.<my extension>', ...]` Default `None` (no
+            extensions ignored).
+        ignore_hidden: bool - If `True` ignore hidden files and directories. Short
+            for `ignore=['.*', '.*/']` Default `False`.
     """
+    match = ['*'] if match is None else list(match)
     ignore = [] if ignore is None else list(ignore)
-    ignore = _parse_ignorefile(dir_abspath) + ignore
-
-    match_spec = _get_match_spec(ignore=ignore, **kwargs)
-    path_spec = PathSpec.from_lines(GitWildMatchPattern, match_spec)
+    ignore_extensions = [] if ignore_extensions is None else list(ignore_extensions)
 
-    return path_spec.match_files
+    if ignore_hidden:
+        ignore.extend(['.*', '.*/'])
 
+    for ext in ignore_extensions:
+        if not ext.startswith('.'):
+            ext = '.' + ext
+        ext = '*' + ext
+        ignore.append(ext)
 
-def _get_dirhash(abspath, *args, **kwargs):
-    """Entry point of the recursive dirhash implementation, with the only purpose to
-    provide a more informative error in case of (infinite) symlink recursion.
-    """
-    try:
-        return _get_dirhash_recursive(os.path.realpath(abspath), *args, **kwargs)
-    except SymlinkRecursionError as e:
-        raise SymlinkRecursionError(
-            real_path=e.real_path,
-            first_path=os.path.join(abspath, e.first_path),
-            second_path=os.path.join(abspath, e.second_path)
-        )
+    match_spec = match + ['!' + ign for ign in ignore]
 
+    def deduplicate(items):
+        items_set = set([])
+        dd_items = []
+        for item in items:
+            if item not in items_set:
+                dd_items.append(item)
+                items_set.add(item)
 
-def _get_dirhash_recursive(
-    realpath,
-    relpath,
-    hasher_factory,
-    content_only,
-    paths_only,
-    chunk_size,
-    match_filter,
-    include_empty,
-    follow_links,
-    included_leafs,
-    included_file_realpaths,
-    visited_dirs,
-    cache=None
-):
-    """Recursive implementation for computing the hash of a directory based on its
-    structure and content.
+        return dd_items
 
-    # Arguments
-        realpath (str): Real path to the current directory to hash.
-        relpath (str): Path to the current directory relative to the root directory
-            (entry point) for the recursion.
-        hasher_factory (f: f() -> hashlib._hashlib.HASH): Callable that returns a
-            instance of the hashlib._hashlib.HASH interface.
-        match_filter (f: f(str) -> bool): Function for filtering leaf paths (files
-            and possibly empty directories) to include.
-        included_leafs ([str]): Continuously appended list of leaf paths (files
-            and possibly empty directories) that are included. Used by
-            `dirhash.get_included_paths`.
-        included_file_realpaths ({str}): Continuously updated set of the real paths
-            to included files. Used by `dirhash.dirhash` when files are hashed using
-            multiprocessing.
-        visited_dirs ({str: str}): Mapping from real path to root-relative path of
-            directories visited previously in *current branch* of recursion. Used to
-            detect if there are symlinks leading to infinite recursion.
-        cache ({str: str}): Mapping from real file path to hash value of already
-            hashed files. Used to avoid duplicating hash computations in the case of
-            repeated occurrence of files by symlinks, as well as to inject
-            precomputed hashes by the multiprocessing implementation
-
-        For args: `content_only`, `paths_only`, `chunk_size`, `include_empty` and
-        `follow_links` see docs of `dirhash.dirhash`.
+    return deduplicate(match_spec)
 
-    # Raises
-        SymlinkRecursionError: in case the current directory has already been
-            visited in current branch of recursion (i.e. would get infinite recursion
-            if continuing).
 
-    # Side-effects
-        Continuously updates arguments: `included_leafs`, `included_file_realpaths`,
-        `visited_dirs` and `cache`.
+class Protocol(object):
+    """Specifications of which file and directory properties to consider when
+        computing the `dirhash` value.
 
-    # Returns
-        The hash/checksum as a string the of hexadecimal digits of the current
-        `directory` or `hahsdir._EMPTY` if there are no files or directories to
-        include.
+    # Arguments
+        entry_properties: Iterable[str] - A combination of the supported properties
+            {"name", "data", "is_link"} where at least one of "name" and "data" is
+            included. Interpretation:
+            - ["name", "data"] (Default) - The name as well as data is included. Due
+                to the recursive nature of the dirhash computation, "name" implies
+                that the path relative to the root `directory` of each file/directory
+                affects the computed hash value.
+            - ["data"] - Compute the hash only based on the data of files -
+                *not* their names or the names of their parent directories. NOTE that
+                the tree structure in which files are organized under the `directory`
+                root still influences the computed hash. As longs as all files have
+                the same content and are organised the same way in relation to all
+                other files in the Directed Acyclic Graph representing the file-tree,
+                the hash will remain the same (but the "name of nodes" does not
+                matter). This option can e.g. be used to verify that that data is
+                unchanged after renaming files (change extensions etc.).
+            - ["name"] - Compute the hash only based on the name and location of
+                files in the file tree under the `directory` root. This option can
+                e.g. be used to check if any files have been added/moved/removed,
+                ignoring the content of each file.
+            - "is_link" - if this options is added to any of the cases above the
+                hash value is also affected by whether a file or directory is a
+                symbolic link or not. NOTE: which this property added, the hash
+                will be different than without it even if there are no symbolic links
+                in the directory.
+        allow_cyclic_links: bool - If `False` (default) a `SymlinkRecursionError` is
+            raised on presence of cyclic symbolic links. If set to `True` the the
+            dirhash value for directory causing the cyclic link is replaced with the
+            hash function hexdigest of the relative path from the link to the target.
     """
-    fwd_kwargs = vars()
-    del fwd_kwargs['realpath']
-    del fwd_kwargs['relpath']
-
-    if follow_links:
-        if realpath in visited_dirs:
-            raise SymlinkRecursionError(
-                real_path=realpath,
-                # below will be replaced by full abspath in `_get_dirhash`
-                first_path=visited_dirs[realpath],
-                second_path=relpath
-            )
-        visited_dirs[realpath] = relpath
-
-    subdirs, files = [], []
-    symlink_files = set()
-    for dir_entry in scandir(realpath):
-        if dir_entry.is_dir(follow_symlinks=follow_links):
-            subdirs.append(dir_entry)
-        elif dir_entry.is_file(follow_symlinks=True):
-            files.append(dir_entry)
-            if dir_entry.is_symlink():
-                symlink_files.add(dir_entry.name)
-
-    subdir_descriptors = []
-    for subdir in subdirs:
-        if subdir.is_symlink():
-            sub_realpath = os.path.realpath(subdir.path)
-        else:
-            sub_realpath = subdir.path
-        sub_relpath = os.path.join(relpath, subdir.name)
-        sub_dirhash = _get_dirhash_recursive(sub_realpath, sub_relpath, **fwd_kwargs)
-        if sub_dirhash is _EMPTY:
-            if not include_empty:
-                continue
-            if next(match_filter([sub_relpath]), None) is None:
-                # dir is not a match
-                continue
-            # included empty (leaf) directories represented as `path/to/directory/.`
-            included_leafs.append(os.path.join(sub_relpath, '.'))
-            sub_dirhash = hasher_factory(
-                _empty_dir_descriptor.encode('utf-8')
-            ).hexdigest()
-
-        if content_only:
-            subdir_descriptor = sub_dirhash
-        else:
-            subdir_descriptor = _component_separator.join([sub_dirhash, subdir.name])
-        subdir_descriptors.append(subdir_descriptor)
-
-    subdirs_descriptor = _descriptor_separator.join(sorted(subdir_descriptors))
-
-    file_descriptors = []
-    for file_relpath in match_filter(
-        os.path.join(relpath, file_.name) for file_ in files
+    class EntryProperties(object):
+        NAME = 'name'
+        DATA = 'data'
+        IS_LINK = 'is_link'
+        options = {NAME, DATA, IS_LINK}
+        _DIRHASH = 'dirhash'
+
+    _entry_property_separator = '\000'
+    _entry_descriptor_separator = '\000\000'
+
+    def __init__(
+        self,
+        entry_properties=('name', 'data'),
+        allow_cyclic_links=False
     ):
-        filename = os.path.basename(file_relpath)
-        file_realpath = os.path.join(realpath, filename)
-        if filename in symlink_files:
-            file_realpath = os.path.realpath(file_realpath)
-        included_leafs.append(file_relpath)
-        included_file_realpaths.add(file_realpath)
+        entry_properties = set(entry_properties)
+        if not entry_properties.issubset(self.EntryProperties.options):
+            raise ValueError(
+                'entry properties {} not supported'.format(
+                    entry_properties - self.EntryProperties.options)
+            )
+        if not (
+            self.EntryProperties.NAME in entry_properties or
+            self.EntryProperties.DATA in entry_properties
+        ):
+            raise ValueError(
+                'at least one of entry properties `name` and `data` must be used'
+            )
+        self.entry_properties = entry_properties
+        self._include_name = self.EntryProperties.NAME in entry_properties
+        self._include_data = self.EntryProperties.DATA in entry_properties
+        self._include_is_link = self.EntryProperties.IS_LINK in entry_properties
+
+        if not isinstance(allow_cyclic_links, bool):
+            raise ValueError(
+                'allow_cyclic_link must be a boolean, '
+                'got {}'.format(allow_cyclic_links)
+            )
+        self.allow_cyclic_links = allow_cyclic_links
+
+    def get_descriptor(self, dir_node):
+        if isinstance(dir_node, CyclicLinkedDir):
+            return self._get_cyclic_linked_dir_descriptor(dir_node)
+
+        entries = dir_node.directories + dir_node.files
+        entry_descriptors = [
+            self._get_entry_descriptor(
+                self._get_entry_properties(path, entry_hash)
+            ) for path, entry_hash in entries
+        ]
+        return self._entry_descriptor_separator.join(sorted(entry_descriptors))
+
+    @classmethod
+    def _get_entry_descriptor(cls, entry_properties):
+        entry_strings = [
+            '{}:{}'.format(name, value)
+            for name, value in entry_properties
+        ]
+        return cls._entry_property_separator.join(sorted(entry_strings))
+
+    def _get_entry_properties(self, path, entry_hash):
+        properties = []
+        if path.is_dir():
+            properties.append((self.EntryProperties._DIRHASH, entry_hash))
+        elif self._include_data:  # path is file
+            properties.append((self.EntryProperties.DATA, entry_hash))
+
+        if self._include_name:
+            properties.append((self.EntryProperties.NAME, path.name))
+        if self._include_is_link:
+            properties.append((self.EntryProperties.IS_LINK, path.is_symlink))
+
+        return properties
+
+    def _get_cyclic_linked_dir_descriptor(self, dir_node):
+        relpath = dir_node.path.relative
+        target_relpath = dir_node.target_path.relative
+        path_to_target = os.path.relpath(
+            # the extra '.' is needed if link back to root, because
+            # an empty path ('') is not supported by os.path.relpath
+            os.path.join('.', target_relpath),
+            os.path.join('.', relpath)
+        )
+        # TODO normalize posix!
+        return path_to_target
 
-        if paths_only:
-            file_descriptors.append(filename)
-            continue
 
-        filehash = _get_filehash(file_realpath, hasher_factory, chunk_size, cache)
+def _get_hasher_factory(algorithm):
+    """Returns a "factory" of hasher instances corresponding to the given algorithm
+    name. Bypasses input argument `algorithm` if it is already a hasher factory
+    (verified by attempting calls to required methods).
+    """
+    if algorithm in algorithms_guaranteed:
+        return getattr(hashlib, algorithm)
 
-        if content_only:
-            file_descriptors.append(filehash)
-        else:
-            file_descriptors.append(_component_separator.join([filehash, filename]))
+    if algorithm in algorithms_available:
+        return partial(hashlib.new, algorithm)
 
-    files_descriptor = _descriptor_separator.join(sorted(file_descriptors))
+    try:  # bypass algorithm if already a hasher factory
+        hasher = algorithm(b'')
+        hasher.update(b'')
+        hasher.hexdigest()
+        return algorithm
+    except:
+        pass
 
-    is_empty = (subdirs_descriptor == '' and files_descriptor == '')
-    if is_empty:
-        return _EMPTY
+    raise ValueError(
+        '`algorithm` must be one of: {}`'.format(algorithms_available))
 
-    descriptor = ''.join(
-        [subdirs_descriptor, _dirs_files_separator, files_descriptor]
-    )
 
-    dirhash = hasher_factory(descriptor.encode('utf-8')).hexdigest()
+def _parmap(func, iterable, jobs=1):
+    """Map with multiprocessing.Pool"""
+    if jobs == 1:
+        return [func(element) for element in iterable]
 
-    if follow_links:
-        del visited_dirs[realpath]
+    pool = Pool(jobs)
+    try:
+        results = pool.map(func, iterable)
+    finally:
+        pool.close()
 
-    return dirhash
+    return results
 
 
 def _get_filehash(filepath, hasher_factory, chunk_size, cache=None):
-    """Compute the hash for given filepath.
+    """Compute the hash of the given filepath.
 
     # Arguments
-        filepath (str): Path to the file to hash.
-        hasher_factory (f: f() -> hashlib._hashlib.HASH): Callable that returns an
+        filepath: str - Path to the file to hash.
+        hasher_factory: (f: f() -> hashlib._hashlib.HASH): Callable that returns an
             instance of the `hashlib._hashlib.HASH` interface.
         chunk_size (int): The number of bytes to read in one go from files while
             being hashed.
@@ -479,111 +618,3 @@ def _get_filehash(filepath, hasher_factory, chunk_size, cache=None):
             hasher.update(chunk)
 
     return hasher.hexdigest()
-
-
-class SymlinkRecursionError(_RecursionError):
-    """Raised when symlinks cause a cyclic graph of directories.
-
-    Extends the `pathspec.util.RecursionError` but with a different name (avoid
-    overriding the built-in error!) and with a more informative string representation
-    (used in `dirhash.cli`).
-    """
-    def __str__(self):
-        # _RecursionError.__str__ prints args without context
-        return 'Symlink recursion: {}'.format(self.message)
-
-
-class _Empty(object):
-    """The single instance of this class, `_EMPTY` below, is used as return value for
-    `_get_dirhash_recursive` in the case of an empty directory.
-    """
-    pass
-
-
-_EMPTY = _Empty()
-
-
-def _get_hasher_factory(algorithm):
-    """Returns a "factory" of hasher instances corresponding to the given algorithm
-    name. Bypasses input argument `algorithm` if it is already a hasher factory
-    (verified by attempting calls to required methods).
-    """
-    if algorithm in algorithms_guaranteed:
-        return getattr(hashlib, algorithm)
-
-    if algorithm in algorithms_available:
-        return partial(hashlib.new, algorithm)
-
-    try:  # bypass algorithm if already a hasher factory
-        hasher = algorithm(b'')
-        hasher.update(b'')
-        hasher.hexdigest()
-        return algorithm
-    except:
-        pass
-
-    raise ValueError(
-        '`algorithm` must be one of: {}`'.format(algorithms_available))
-
-
-class _PlaceHolderHasher(object):
-    """A hasher that does nothing and always returns an empty string.
-
-    Used in the `_get_leafs` "dry-run" of the `_get_dirhash_recursive` function.
-    """
-
-    def __init__(self, *args, **kwargs):
-        pass
-
-    def hexdigest(self):
-        return ''
-
-
-def _get_match_spec(
-    match=None,
-    ignore=None,
-    ignore_extensions=None,
-    ignore_hidden=False,
-):
-    """Combines the different arguments for providing match/ignore-patterns into a
-    single list of match-patterns.
-    """
-    match = ['*'] if match is None else list(match)
-    ignore = [] if ignore is None else list(ignore)
-    ignore_extensions = [] if ignore_extensions is None else list(ignore_extensions)
-
-    if ignore_hidden:
-        ignore.extend(['.*', '.*/'])
-
-    for ext in ignore_extensions:
-        if not ext.startswith('.'):
-            ext = '.' + ext
-        ext = '*' + ext
-        ignore.append(ext)
-
-    match_spec = match + ['!' + ign for ign in ignore]
-
-    def deduplicate(items):
-        items_set = set([])
-        dd_items = []
-        for item in items:
-            if item not in items_set:
-                dd_items.append(item)
-                items_set.add(item)
-
-        return dd_items
-
-    return deduplicate(match_spec)
-
-
-def _parse_ignorefile(directory):
-    """Parse ignore file in `directory` (if exists) and return a list of ignore
-    patterns."""
-    ignorefilepath = os.path.join(directory, ignorefilename)
-    if not os.path.exists(ignorefilepath):
-        return []
-
-    with open(ignorefilepath) as f:
-        ignore = [p for p in f.read().splitlines() if not p.startswith('#')]
-
-    return ignore
diff --git a/src/dirhash/cli.py b/src/dirhash/cli.py
index 80fcb02..06e4044 100644
--- a/src/dirhash/cli.py
+++ b/src/dirhash/cli.py
@@ -3,7 +3,6 @@
 """
 from __future__ import print_function
 
-import os
 import sys
 import argparse
 
@@ -11,7 +10,25 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Determine the hash for directory.')
+    try:
+        kwargs = get_kwargs(sys.argv[1:])
+        if kwargs.pop('list'):
+            # kwargs below have no effect when listing
+            for k in ['algorithm', 'chunk_size', 'jobs', 'entry_properties']:
+                kwargs.pop(k)
+            for leafpath in dirhash.included_paths(**kwargs):
+                print(leafpath)
+        else:
+            print(dirhash.dirhash(**kwargs))
+    except Exception as e:  # pragma: no cover (not picked up by coverage)
+        sys.stderr.write('dirhash: {}\n'.format(e))
+        sys.exit(1)
+
+
+def get_kwargs(args):
+    parser = argparse.ArgumentParser(
+        description='Determine the hash for a directory.'
+    )
     parser.add_argument(
         '-v', '--version',
         action='version',
@@ -26,146 +43,137 @@ def main():
         choices=dirhash.algorithms_available,
         default='md5',
         help=(
-            'Hashing algorithm to use. Always available: {}. Additionally available '
-            'on current platform: {}. Note that the same algorithm may appear '
-            'multiple times in this set under different names (thanks to '
-            'OpenSSL) [https://docs.python.org/2/library/hashlib.html]'.format(
+            'Hashing algorithm to use, by default "md5". Always available: {}. '
+            'Additionally available on current platform: {}. Note that the same '
+            'algorithm may appear multiple times in this set under different names '
+            '(thanks to OpenSSL) '
+            '[https://docs.python.org/2/library/hashlib.html]'.format(
                 sorted(dirhash.algorithms_guaranteed),
                 sorted(dirhash.algorithms_available - dirhash.algorithms_guaranteed)
             )
         ),
         metavar=''
     )
-    parser.add_argument(
+
+    filter_options = parser.add_argument_group(
+        title='Filtering options',
+        description=(
+            'Specify what files and directories to include. All files and '
+            'directories (including symbolic links) are included by default. The '
+            '--match/--ignore arguments allows for selection using glob/wildcard '
+            '(".gitignore style") path matching. Paths relative to the root '
+            '`directory` (i.e. excluding the name of the root directory itself) are '
+            'matched against the provided patterns. For example, to only include '
+            'python source files, use: `dirhash path/to/dir -m "*.py"` or to '
+            'exclude hidden files and directories use: '
+            '`dirhash path/to.dir -i ".*" ".*/"` which is short for '
+            '`dirhash path/to.dir -m "*" "!.*" "!.*/"`. By adding the --list '
+            'argument, all included paths, for the given filtering arguments, are '
+            'returned instead of the hash value. For further details see '
+            'https://github.com/andhus/dirhash/README.md#filtering'
+        )
+    )
+    filter_options.add_argument(
         '-m', '--match',
-        type=str,
-        default='*',
-        help='String of match-patterns, separated by blank space.'
+        nargs='+',
+        default=['*'],
+        help=(
+            'One or several patterns for paths to include. NOTE: patterns '
+            'with an asterisk must be in quotes ("*") or the asterisk '
+            'preceded by an escape character (\*).'
+        ),
+        metavar=''
     )
-    parser.add_argument(
+    filter_options.add_argument(
         '-i', '--ignore',
-        type=str,
+        nargs='+',
         default=None,
-        help='String of ignore-patterns, separated by blank space.',
+        help=(
+            'One or several patterns for paths to exclude. NOTE: patterns '
+            'with an asterisk must be in quotes ("*") or the asterisk '
+            'preceded by an escape character (\*).'
+        ),
+        metavar=''
     )
-    parser.add_argument(
-        '-d', '--ignore-hidden',
+    filter_options.add_argument(
+        '--empty-dirs',
         action='store_true',
         default=False,
-        help='Ignore hidden ("dot") files and directories (short for '
-             '`-ignore ".*, "`).'
+        help='Include empty directories (containing no files that meet the matching '
+             'criteria and no non-empty sub directories).'
     )
-    parser.add_argument(
-        '-x', '--ignore-extensions',
+    filter_options.add_argument(
+        '--no-linked-dirs',
+        dest='linked_dirs',
+        action='store_false',
+        help='Do not include symbolic links to other directories.'
+    )
+    filter_options.add_argument(
+        '--no-linked-files',
+        dest='linked_files',
+        action='store_false',
+        help='Do not include symbolic links to files.'
+    )
+    parser.set_defaults(linked_dirs=True, linked_files=True)
+
+    protocol_options = parser.add_argument_group(
+        title='Protocol options',
+        description=(
+            'Specify what properties of files and directories to include and '
+            'whether to allow cyclic links. For further details see '
+            'https://github.com/andhus/dirhash/DIRHASH_STANDARD.md#protocol'
+        )
+    )
+    protocol_options.add_argument(
+        '-p', '--properties',
         nargs='+',
-        help='List of file extensions to ignore.',
+        dest='entry_properties',
+        default=['data', 'name'],
+        help=(
+            'List of file/directory properties to include in the hash. Available '
+            'properties are: {} and at least one of name and data must be '
+            'included. Default is [data name] which means that both the name/paths'
+            ' and content (actual data) of files and directories will be included'
+        ).format(list(dirhash.Protocol.EntryProperties.options)),
         metavar=''
     )
-
-    target_group = parser.add_mutually_exclusive_group(required=False)
-    target_group.add_argument(
-        '-c', '--content-only',
-        action='store_true',
+    protocol_options.add_argument(
+        '-c', '--allow-cyclic-links',
         default=False,
-        help='Hash only the content of files, not the name and location of files '
-             'within the directory. NOTE (!) the hash will be different if the '
-             '(alpha numerical) order of file paths changes.'
-    )
-    target_group.add_argument(
-        '-p', '--paths-only',
         action='store_true',
-        default=False,
-        help='Hash only the file paths, i.e. the name and location of files '
-             'within the directory.'
+        help=(
+            'Allow presence of cyclic links (by hashing the relative path to the '
+            'target directory).'
+        )
     )
 
-    parser.add_argument(
-        '--no-follow-links',
-        dest='follow_links',
-        action='store_false',
-        help='Do not follow symbolic links to other *directories*. NOTE: directly '
-             'linked files are always included.'
+    implementation_options = parser.add_argument_group(
+        title='Implementation options',
+        description=''
     )
-    parser.set_defaults(follow_links=True)
-    parser.add_argument(
-        '--include-empty',
-        action='store_true',
-        default=False,
-        help='Include empty directories (containing no files that meet the matching '
-             'criteria). Note that the path to the directory itself must still meet '
-             'the matching criteria (matched as if it was a file).'
-    )
-    parser.add_argument(
+    implementation_options.add_argument(
         '-s', '--chunk-size',
         default=2**20,
         type=int,
-        help='The chunk size (in bytes) for reading fo files.'
+        help='The chunk size (in bytes) for reading of files.'
     )
-    parser.add_argument(
-        '-w', '--workers',
+    implementation_options.add_argument(
+        '-j', '--jobs',
         type=int,
-        default=1,
-        help='Number of workers (parallel processes) to use.'
+        default=1,  # TODO make default number of cores?
+        help='Number of jobs (parallel processes) to use.'
     )
-    parser.add_argument(
+
+    special_options = parser.add_argument_group(title='Special options')
+    special_options.add_argument(
         '-l', '--list',
         action='store_true',
         default=False,
-        help='List the file paths that will be taken into account, followed by the '
-             'hash of directory structure'
+        help='List the file paths that will be taken into account, given the '
+             'provided filtering options.'
     )
 
-    args = parser.parse_args()
-
-    try:
-        kwargs = preprocess_kwargs(vars(args))
-        if kwargs.pop('list'):
-            # kwargs below have no effect when listing
-            for k in [
-                'chunk_size', 'content_only', 'paths_only', 'algorithm', 'workers'
-            ]:
-                kwargs.pop(k)
-            for leafpath in dirhash.get_included_paths(**kwargs):
-                print(leafpath)
-        else:
-            print(dirhash.dirhash(**kwargs))
-    except Exception as e:
-        sys.stderr.write('dirhash: {}\n'.format(e))
-        sys.exit(1)
-
-
-def preprocess_kwargs(kwargs):
-    kwargs['match'] = parse_string_arg(kwargs['match'])
-    kwargs['ignore'] = parse_string_arg(kwargs['ignore'])
-    # for consistency with `match` and `ignore`, we allow ignore_extensions to be a
-    # space separate string (not the recommended usages).
-    x = 'ignore_extensions'
-    if kwargs[x] is not None:
-        if len(kwargs[x]) == 1:
-            kwargs[x] = parse_string_arg(kwargs[x][0])
-    else:
-        kwargs[x] = []
-
-    remote_ignorefile = os.environ.get('DIRHASH_IGNORE', None)
-    root_ignorefile_path = os.path.join(kwargs['directory'], dirhash.ignorefilename)
-    if os.path.exists(root_ignorefile_path):
-        kwargs['ignore'] = (
-            dirhash._parse_ignorefile(kwargs['directory']) + list(kwargs['ignore']))
-    elif remote_ignorefile:
-        if not os.path.exists(remote_ignorefile):
-            raise ValueError(
-                'DIRHASH_IGNORE={}: No such file'.format(remote_ignorefile)
-            )
-        with open(remote_ignorefile) as f:
-            kwargs['ignore'] = f.readlines() + kwargs['ignore']
-
-    return kwargs
-
-
-def parse_string_arg(string_arg):
-    if string_arg is None or string_arg == '':
-        return []
-    return string_arg.split(' ')
+    return vars(parser.parse_args(args))
 
 
 if __name__ == '__main__':  # pragma: no cover
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 558f9cd..3886fb9 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -66,9 +66,92 @@ def create_default_tree(tmpdir):
 
 
 class TestCLI(object):
+    @pytest.mark.parametrize(
+        'argstring, non_default_kwargs',
+        [
+            (
+                '. -a md5',
+                {}
+            ),
+            (
+                '.. -a md5',
+                {'directory': '..'}
+            ),
+            (
+                'target-dir -a md5',
+                {'directory': 'target-dir'}
+            ),
+            (
+                '. -a sha256',
+                {'algorithm': 'sha256'}
+            ),
+            # Filtering options
+            (
+                '. -a md5 -m "*" "!.*"',
+                {'match': ['*', '!.*']}
+            ),
+            (
+                '. -a md5 --match "d1/*" "d2/*" --ignore "*.txt"',
+                {'match': ['d1/*', 'd2/*'], 'ignore': ['*.txt']}
+            ),
+            (
+                '. -a md5 --empty-dirs',
+                {'empty_dirs': True}
+            ),
+            (
+                '. -a md5 --no-linked-dirs',
+                {'linked_dirs': False}
+            ),
+            (
+                '. -a md5 --no-linked-files',
+                {'linked_files': False}
+            ),
+            # Protocol options
+            (
+                '. -a md5 --allow-cyclic-links',
+                {'allow_cyclic_links': True}
+
+            ),
+            (
+                '. -a md5 --properties name',
+                {'entry_properties': ['name']}
 
-    def test_preprocess_kwargs(self):
-        pass
+            ),
+            (
+                '. -a md5 --properties name data',
+                {'entry_properties': ['name', 'data']}
+
+            ),
+            # Implementation
+            (
+                '. -a md5 -j 10',
+                {'jobs': 10}
+            ),
+            (
+                '. -a md5 -s 32000',
+                {'chunk_size': 32000}
+            ),
+        ]
+    )
+    def test_get_kwargs(self, argstring, non_default_kwargs):
+        from dirhash.cli import get_kwargs
+        kwargs_expected = {
+            'list': False,
+            'directory': '.',
+            'algorithm': 'md5',
+            'match': ['*'],
+            'ignore': None,
+            'empty_dirs': False,
+            'linked_dirs': True,
+            'linked_files': True,
+            'entry_properties': ['data', 'name'],
+            'allow_cyclic_links': False,
+            'chunk_size': 2 ** 20,
+            'jobs': 1
+        }
+        kwargs_expected.update(non_default_kwargs)
+        kwargs = get_kwargs(shlex.split(argstring))
+        assert kwargs == kwargs_expected
 
     @pytest.mark.parametrize(
         'description, argstrings, output',
@@ -78,9 +161,8 @@ def test_preprocess_kwargs(self):
               '. --list',
               '. -a md5 --list',
               '. -a sha256 --list',
-              '. --content-only --list',
-              '. --paths-only --list',
-              '. --workers 2 --list',
+              '. --properties name --list',
+              '. --jobs 2 --list',
               '. --chunk-size 2 --list'],
              ('.dir/file\n'
               '.file\n'
@@ -89,37 +171,30 @@ def test_preprocess_kwargs(self):
               'file.ext1\n'
               'file.ext2\n')),
             ('IGNORE EXTENSION',
-             ['. -x .ext1 --list',
-              '. --ignore-extensions .ext1 --list',
-              '. -i "*.ext1" --list',
+             ['. -i "*.ext1" --list',
               '. --ignore "*.ext1" --list',
-              '. -m "* !*.ext1" --list',
-              '. --match "* !*.ext1" --list'],
+              '. -m "*" "!*.ext1" --list',
+              '. --match "*" "!*.ext1" --list'],
              ('.dir/file\n'
               '.file\n'
               'dir/file\n'
               'file\n'
               'file.ext2\n')),
             ('IGNORE MULTIPLE EXTENSIONS',
-             ['. -x .ext1 .ext2 --list',
-              '. -x ".ext1 .ext2" --list',
-              '. --ignore-extensions .ext1 .ext2 --list',
-              '. -i "*.ext1 *.ext2" --list',
+             ['. -i "*.ext1" "*.ext2" --list',
               '. -i "*.ext*" --list'],
              ('.dir/file\n'
               '.file\n'
               'dir/file\n'
               'file\n')),
             ('IGNORE HIDDEN',
-             ['. -d --list',
-              '. --ignore-hidden --list',
-              '. -i ".* .*/" --list'],
+             ['. -i ".*" ".*/" --list'],
              ('dir/file\n'
               'file\n'
               'file.ext1\n'
               'file.ext2\n')),
             ('INCLUDE EMPTY',
-             ['. --include-empty --list'],
+             ['. --empty-dirs --list'],
              ('.dir/file\n'
               '.file\n'
               'dir/file\n'
@@ -138,104 +213,20 @@ def test_list(self, description, argstrings, output, tmpdir):
                 assert error == ''
                 assert o == output
 
-    def test_root_dirhashignore(self, tmpdir):
-        create_default_tree(tmpdir)
-        with tmpdir.as_cwd():
-            output, error, returncode = dirhash_run('. --list')
-        assert returncode == 0
-        assert error == ''
-        assert output == (
-            '.dir/file\n'
-            '.file\n'
-            'dir/file\n'
-            'file\n'
-            'file.ext1\n'
-            'file.ext2\n'
-        )
-
-        tmpdir.join(dirhash.ignorefilename).write('*.ext*')
-        with tmpdir.as_cwd():
-            output, error, returncode = dirhash_run('. --list')
-        assert returncode == 0
-        assert error == ''
-        assert output == (
-            '.dir/file\n'
-            '.dirhashignore\n'
-            '.file\n'
-            'dir/file\n'
-            'file\n'
-        )
-
-        tmpdir.join(dirhash.ignorefilename).write('*.ext*\n#comment\n.*/\n')
-        with tmpdir.as_cwd():
-            output, error, returncode = dirhash_run('. --list')
-        assert returncode == 0
-        assert error == ''
-        assert output == (
-            '.dirhashignore\n'
-            '.file\n'
-            'dir/file\n'
-            'file\n'
-        )
-
-    def test_remote_dirhashignore(self, tmpdir):
-        rootdir = tmpdir.mkdir('root')
-        create_default_tree(rootdir)
-        remote_dirhashignore = tmpdir.join('my_hashignore')
-        remote_dirhashignore.write('*.ext*\n#comment\n.*/\n')
-
-        with rootdir.as_cwd():
-            output, error, returncode = dirhash_run('. --list')
-        assert returncode == 0
-        assert error == ''
-        assert output == (
-            '.dir/file\n'
-            '.file\n'
-            'dir/file\n'
-            'file\n'
-            'file.ext1\n'
-            'file.ext2\n'
-        )
-
-        with rootdir.as_cwd():
-            output, error, returncode = dirhash_run(
-                '. --list', add_env={'DIRHASH_IGNORE': str(remote_dirhashignore)}
-            )
-        assert returncode == 0
-        assert error == ''
-        assert output == (
-            '.file\n'
-            'dir/file\n'
-            'file\n'
-        )
-
-    def test_error_on_remote_dirhashignore_does_not_exist(self, tmpdir):
-        rootdir = tmpdir.mkdir('root')
-        create_default_tree(rootdir)
-        remote_dirhashignore = tmpdir.join('non_existing_hashignore')
-        with rootdir.as_cwd():
-            output, error, returncode = dirhash_run(
-                '. --list', add_env={'DIRHASH_IGNORE': str(remote_dirhashignore)}
-            )
-            assert returncode == 1
-            assert error.startswith('dirhash: DIRHASH_IGNORE=')
-            assert error.endswith(': No such file\n')
-            assert output == ''
-
     @pytest.mark.parametrize(
         'argstring, kwargs, expected_hashes',
         [
             ('. -a md5',
              {'algorithm': 'md5'},
-             ['e0d03dd48ab90d232ffabc0da9f08745',
-              'fd1cc95ac2207c3f7d72c18fe01c675e',
-              '0e4a5d4f8c1e4fda174a04c5693c6ea1']
+             ['594c48dde0776b03eddeeb0232190be7',
+              'd8ab965636d48e407b73b9dbba4cb928',
+              '050e7bc9ffcb09c15186c04e0f8026df']
              ),
             ('. -a sha256',
              {'algorithm': 'sha256'},
-             ['f25c5dd69d60c1f127481407829c23e2be87df9d28d3c3e9d353b68cd4f7462d',
-              'd444e19712ed1e318917b73a3623b9360e8489854d65586d3b74a6894e980b42',
-              '8ab8e97f1bca5491c355c22f5f0236079f774e5d19454020d76becaf0c03c346']),
+             ['23a04964149889e932ba3348fe22442f4f6a3b3fec616a386a70579ee857ab7b',
+              '7b76bac43e963f9561f37b96b92d7a174094bff230c6efbf1d8bf650e8b40b7a',
+              '7156da2b2e5a2926eb4b72e65f389343cb6aca0578f0aedcd6f7457abd67d8f5']),
         ]
     )
     def test_hash_result(self, argstring, kwargs, expected_hashes, tmpdir):
@@ -244,8 +235,12 @@ def test_hash_result(self, argstring, kwargs, expected_hashes, tmpdir):
         create_default_tree(tmpdir)
         with tmpdir.as_cwd():
             for add_argstring, add_kwargs, expected_hash in zip(
-                ['', ' --content-only', ' --paths-only'],
-                [{}, {'content_only': True}, {'paths_only': True}],
+                ['', ' -p data', ' -p name'],
+                [
+                    {},
+                    {'entry_properties': ['data']},
+                    {'entry_properties': ['name']},
+                ],
                 expected_hashes
             ):
                 # run CLI
@@ -257,7 +252,7 @@ def test_hash_result(self, argstring, kwargs, expected_hashes, tmpdir):
                 cli_hash = cli_out[:-1]
 
                 # run CLI multiproc
-                full_argstring_mp = argstring + add_argstring + ' --workers 2'
+                full_argstring_mp = argstring + add_argstring + ' --jobs 2'
                 cli_out_mp, error_mp, returncode_mp = dirhash_run(full_argstring_mp)
                 assert error_mp == ''
                 assert returncode_mp == 0
@@ -270,3 +265,9 @@ def test_hash_result(self, argstring, kwargs, expected_hashes, tmpdir):
                 lib_hash = dirhash.dirhash(str(tmpdir), **full_kwargs)
 
                 assert cli_hash == cli_hash_mp == lib_hash == expected_hash
+
+    def test_error_bad_argument(self, tmpdir):
+        with tmpdir.as_cwd():
+            o, error, returncode = dirhash_run('. --chunk-size not_an_int')
+            assert returncode > 0
+            assert error != ''
diff --git a/tests/test_dirhash.py b/tests/test_dirhash.py
index 2e9afba..0111d78 100644
--- a/tests/test_dirhash.py
+++ b/tests/test_dirhash.py
@@ -7,17 +7,20 @@
 from time import sleep, time
 
 import pytest
-from pathspec import RecursionError
 
 from dirhash import (
     _get_hasher_factory,
-    _get_match_spec,
-    get_included_paths,
+    get_match_patterns,
+    included_paths,
     dirhash,
     algorithms_available,
     algorithms_guaranteed,
-    _empty_dir_descriptor
+    Protocol,
+    _parmap,
+    Filter,
+    dirhash_impl
 )
+from scantree import SymlinkRecursionError
 
 
 class TestGetHasherFactory(object):
@@ -75,55 +78,55 @@ def hexdigest(self):
         assert hasher_factory is MockHasher
 
 
-class TestGetMatchSpec(object):
+class TestGetMatchPatterns(object):
 
     def test_default_match_all(self):
-        ms = _get_match_spec()
+        ms = get_match_patterns()
         assert ms == ['*']
 
     def test_only_match(self):
-        ms = _get_match_spec(match=['a*', 'b*'])
+        ms = get_match_patterns(match=['a*', 'b*'])
         assert ms == ['a*', 'b*']
 
     def test_only_ignore(self):
-        ms = _get_match_spec(ignore=['a*', 'b*'])
+        ms = get_match_patterns(ignore=['a*', 'b*'])
         assert ms == ['*', '!a*', '!b*']
 
     def test_match_and_ignore(self):
-        ms = _get_match_spec(match=['a*'], ignore=['*.ext'])
+        ms = get_match_patterns(match=['a*'], ignore=['*.ext'])
         assert ms == ['a*', '!*.ext']
 
     def test_ignore_hidden(self):
-        ms = _get_match_spec(ignore_hidden=True)
+        ms = get_match_patterns(ignore_hidden=True)
         assert ms == ['*', '!.*', '!.*/']
 
         # should not duplicate if present in (general) ignore
-        ms = _get_match_spec(ignore=['.*'], ignore_hidden=True)
+        ms = get_match_patterns(ignore=['.*'], ignore_hidden=True)
         assert ms == ['*', '!.*', '!.*/']
 
-        ms = _get_match_spec(ignore=['.*/'], ignore_hidden=True)
+        ms = get_match_patterns(ignore=['.*/'], ignore_hidden=True)
         assert ms == ['*', '!.*/', '!.*']
 
-        ms = _get_match_spec(ignore=['.*', '.*/'], ignore_hidden=True)
+        ms = get_match_patterns(ignore=['.*', '.*/'], ignore_hidden=True)
         assert ms == ['*', '!.*', '!.*/']
 
     def test_ignore_extensions(self):
-        ms = _get_match_spec(ignore_extensions=['.ext'])
+        ms = get_match_patterns(ignore_extensions=['.ext'])
         assert ms == ['*', '!*.ext']
 
         # automatically adds '.'
-        ms = _get_match_spec(ignore_extensions=['ext'])
+        ms = get_match_patterns(ignore_extensions=['ext'])
         assert ms == ['*', '!*.ext']
 
         # mixed also works
-        ms = _get_match_spec(ignore_extensions=['ext1', '.ext2'])
+        ms = get_match_patterns(ignore_extensions=['ext1', '.ext2'])
         assert ms == ['*', '!*.ext1', '!*.ext2']
 
         # should not duplicate if present in (general) ignore
-        ms = _get_match_spec(ignore=['*.ext'], ignore_extensions=['.ext'])
+        ms = get_match_patterns(ignore=['*.ext'], ignore_extensions=['.ext'])
         assert ms == ['*', '!*.ext']
 
-        ms = _get_match_spec(ignore=['*.ext'], ignore_extensions=['ext'])
+        ms = get_match_patterns(ignore=['*.ext'], ignore_extensions=['ext'])
         assert ms == ['*', '!*.ext']
 
 
@@ -169,11 +172,11 @@ def test_basic(self):
         self.mkfile('root/d2/f1')
 
         expected_filepaths = ['d1/d11/f1', 'd1/f1', 'd2/f1', 'f1']
-        filepaths = get_included_paths(self.path_to('root'))
+        filepaths = included_paths(self.path_to('root'))
         assert filepaths == expected_filepaths
 
         # end with '/' or not should not matter
-        filepaths = get_included_paths(self.path_to('root/'))
+        filepaths = included_paths(self.path_to('root/'))
         assert filepaths == expected_filepaths
 
     def test_not_a_directory(self):
@@ -181,9 +184,9 @@ def test_not_a_directory(self):
         self.mkfile('root/f1')
         # does not exist
         with pytest.raises(ValueError):
-            get_included_paths(self.path_to('wrong_root'))
+            included_paths(self.path_to('wrong_root'))
         with pytest.raises(ValueError):
-            get_included_paths(self.path_to('root/f1'))
+            included_paths(self.path_to('root/f1'))
 
     def test_symlinked_file(self):
         self.mkdirs('root')
@@ -191,12 +194,20 @@ def test_symlinked_file(self):
         self.mkfile('linked_file')
         self.symlink('linked_file', 'root/f2')
 
-        # NOTE `follow_links` hash no effect if only the file is linked (as is the
-        # case here), linked _files_ are always included.
-        filepaths = get_included_paths(self.path_to('root'), follow_links=False)
+        filepaths = included_paths(
+            self.path_to('root'),
+            linked_files=True
+        )
         assert filepaths == ['f1', 'f2']
 
-        filepaths = get_included_paths(self.path_to('root'), follow_links=True)
+        filepaths = included_paths(
+            self.path_to('root'),
+            linked_files=False
+        )
+        assert filepaths == ['f1']
+
+        # default is 'linked_files': True
+        filepaths = included_paths(self.path_to('root'), )
         assert filepaths == ['f1', 'f2']
 
     def test_symlinked_dir(self):
@@ -207,26 +218,45 @@ def test_symlinked_dir(self):
         self.mkfile('linked_dir/f2')
         self.symlink('linked_dir', 'root/d1')
 
-        filepaths = get_included_paths(self.path_to('root'), follow_links=False)
+        filepaths = included_paths(
+            self.path_to('root'),
+            linked_dirs=False
+        )
         assert filepaths == ['f1']
 
-        filepaths = get_included_paths(self.path_to('root'), follow_links=True)
+        filepaths = included_paths(
+            self.path_to('root'),
+            linked_dirs=True
+        )
         assert filepaths == ['d1/f1', 'd1/f2', 'f1']
 
-        # default is `follow_links=True`
-        filepaths = get_included_paths(self.path_to('root'))
+        # default is 'linked_dirs': True
+        filepaths = included_paths(self.path_to('root'))
         assert filepaths == ['d1/f1', 'd1/f2', 'f1']
 
-    def test_raise_on_infinite_recursion(self):
+    def test_cyclic_link(self):
         self.mkdirs('root/d1')
         self.symlink('root', 'root/d1/link_back')
-        with pytest.raises(RecursionError) as exc_info:
-            get_included_paths(self.path_to('root'), follow_links=True)
+        with pytest.raises(SymlinkRecursionError) as exc_info:
+            included_paths(
+                self.path_to('root'),
+                allow_cyclic_links=False
+            )
         assert exc_info.value.real_path == os.path.realpath(self.path_to('root'))
         assert exc_info.value.first_path == self.path_to('root/')
         assert exc_info.value.second_path == self.path_to('root/d1/link_back')
         assert str(exc_info.value).startswith('Symlink recursion:')
 
+        filepaths = included_paths(
+            self.path_to('root'),
+            allow_cyclic_links=True
+        )
+        assert filepaths == ['d1/link_back/.']
+
+        # default is 'allow_cyclic_links': False
+        with pytest.raises(SymlinkRecursionError):
+            filepaths = included_paths(self.path_to('root'))
+
     def test_ignore_hidden_files(self):
         self.mkdirs('root/d1')
         self.mkdirs('root/.d2')
@@ -238,16 +268,20 @@ def test_ignore_hidden_files(self):
         self.mkfile('root/.d2/f1')
 
         # no ignore
-        filepaths = get_included_paths(self.path_to('root'))
+        filepaths = included_paths(self.path_to('root'))
         assert filepaths == ['.d2/f1', '.f2', 'd1/.f2', 'd1/f1', 'f1']
 
         # with ignore
-        filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*'])
+        filepaths = included_paths(
+            self.path_to('root'),
+            match=['*', '!.*']
+        )
         assert filepaths == ['.d2/f1', 'd1/f1', 'f1']
 
     def test_exclude_hidden_dirs(self):
         self.mkdirs('root/d1')
         self.mkdirs('root/.d2')
+        self.mkdirs('root/d1/.d1')
 
         self.mkfile('root/f1')
         self.mkfile('root/.f2')
@@ -256,11 +290,14 @@ def test_exclude_hidden_dirs(self):
         self.mkfile('root/.d2/f1')
 
         # no ignore
-        filepaths = get_included_paths(self.path_to('root'))
-        assert filepaths == ['.d2/f1', '.f2', 'd1/.f2', 'd1/f1', 'f1']
+        filepaths = included_paths(self.path_to('root'), empty_dirs=True)
+        assert filepaths == ['.d2/f1', '.f2', 'd1/.d1/.', 'd1/.f2', 'd1/f1', 'f1']
 
         # with ignore
-        filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*/'])
+        filepaths = included_paths(
+            self.path_to('root'),
+            match=['*', '!.*/']
+        )
         assert filepaths == ['.f2', 'd1/.f2', 'd1/f1', 'f1']
 
     def test_exclude_hidden_dirs_and_files(self):
@@ -274,11 +311,11 @@ def test_exclude_hidden_dirs_and_files(self):
         self.mkfile('root/.d2/f1')
 
         # no ignore
-        filepaths = get_included_paths(self.path_to('root'))
+        filepaths = included_paths(self.path_to('root'))
         assert filepaths == ['.d2/f1', '.f2', 'd1/.f2', 'd1/f1', 'f1']
 
         # using ignore
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*/', '!.*']
         )
@@ -298,7 +335,7 @@ def test_exclude_extensions(self):
         self.mkfile('root/d1/f.txt')
         self.mkfile('root/d1/f.skip1')
 
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!*.skip1', '!*.skip2']
         )
@@ -314,14 +351,20 @@ def test_empty_dirs_include_vs_exclude(self):
         self.mkfile('root/d1/f')
         self.mkfile('root/d3/d31/f')
 
-        filepaths = get_included_paths(self.path_to('root'), include_empty=False)
+        filepaths = included_paths(
+            self.path_to('root'),
+            empty_dirs=False
+        )
         assert filepaths == ['d1/f', 'd3/d31/f']
 
         # `include_empty=False` is default
-        filepaths = get_included_paths(self.path_to('root'))
+        filepaths = included_paths(self.path_to('root'))
         assert filepaths == ['d1/f', 'd3/d31/f']
 
-        filepaths = get_included_paths(self.path_to('root'), include_empty=True)
+        filepaths = included_paths(
+            self.path_to('root'),
+            empty_dirs=True
+        )
         assert filepaths == ['d1/f', 'd2/.', 'd3/d31/f', 'd4/d41/.']
 
     def test_empty_dirs_because_of_filter_include_vs_exclude(self):
@@ -331,63 +374,63 @@ def test_empty_dirs_because_of_filter_include_vs_exclude(self):
         self.mkfile('root/d1/f')
         self.mkfile('root/d2/.f')
 
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*'],
-            include_empty=False
+            empty_dirs=False
         )
         assert filepaths == ['d1/f']
 
         # `include_empty=False` is default
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*'],
         )
         assert filepaths == ['d1/f']
 
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*'],
-            include_empty=True
+            empty_dirs=True
         )
         assert filepaths == ['d1/f', 'd2/.']
 
-    def test_empty_dir_not_included_due_to_not_match(self):
+    def test_empty_dir_inclusion_not_affected_by_match(self):
         self.mkdirs('root/d1')
         self.mkdirs('root/.d2')
 
-        filepaths = get_included_paths(
+        # NOTE that empty dirs are not excluded by match_patterns:
+
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*'],
-            include_empty=True
+            empty_dirs=True
         )
-        assert filepaths == ['d1/.']
+        assert filepaths == ['.d2/.', 'd1/.']
 
-        # NOTE that empty dirs are matched as is they were files (leafs!)
-        # TODO better option?
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
             match=['*', '!.*/'],
-            include_empty=True
+            empty_dirs=True
         )
         assert filepaths == ['.d2/.', 'd1/.']
 
-        filepaths = get_included_paths(
+        filepaths = included_paths(
             self.path_to('root'),
-            match=['*', '!d1/'],
-            include_empty=True
+            match=['*', '!d1'],
+            empty_dirs=True
         )
         assert filepaths == ['.d2/.', 'd1/.']
 
 
 def dirhash_mp_comp(*args, **kwargs):
     res = dirhash(*args, **kwargs)
-    res_mp = dirhash(workers=2, *args, **kwargs)
+    res_mp = dirhash(jobs=2, *args, **kwargs)
     assert res == res_mp
     return res
 
 
-class Testdirhash(TempDirTest):
+class TestDirhash(TempDirTest):
 
     def test_guaranteed_algorithms(self):
         self.mkdirs('root/d1/d11')
@@ -398,22 +441,49 @@ def test_guaranteed_algorithms(self):
         self.mkfile('root/d2/f1', 'd')
 
         for algorithm, expected_hash in [
-            ('md5', '23315916fc3a935b5ed3e120a202aea4'),
-            ('sha1', '6119b22d2916a4af7032802cdb95c742a217fe9f'),
-            ('sha224', 'cdb3a780741c08d6c4ffc6aa0725787f6fbef3e80d81c8850215ef61'),
-            ('sha256', '6fa5594ea7fb6a05fd36c152e6576522'
-                       'a5f37b07c2d797f2ed96527ae18f3fe3'),
-            ('sha384', '453ebd36d95e24149f184589df49f69b'
-                       'f289af3e889c916cc93f0e02367f4d48'
-                       'aef2593ef29f0ecdf3b6e05572e90066'),
-            ('sha512', 'f52ac9eeeb5160637afa91f1f20f1a60'
-                       'ce80a55ac3757f8bb9225e10edc131b4'
-                       '2da10497706ef4f06d36f13dae77540b'
-                       'c0e5484c7f79f87a83c76ae103fff4fa')
+            ('md5', '3c631c7f5771468a2187494f802fad8f'),
+            ('sha1', '992aa2d00d2ed94f0c19eff7f151f5c6a7e0cc41'),
+            ('sha224', '18013e1df933d5781b2eddb94aceeb7ab689643f1df24060fb478999'),
+            ('sha256', 'ef7e95269fbc0e3478ad31fddd1c7d08'
+                       '907d189c61725332e8a2fd14448fe175'),
+            ('sha384', '64ef4360c172bc68250f9326ea231cd1'
+                       '46a7fa1afe9d386cee0cae0e9f1b4ad2'
+                       '1df050d1df436cff792bbe81d6698026'),
+            ('sha512', '7854226eb0278bc136056998890a8399'
+                       'f85ca383f7c54665026358d28b5dc716'
+                       '0ec654d2bcebf5d60974f82ed820600d'
+                       '8e807ea53d57578d076ec1c82f501208')
         ]:
             hash_value = dirhash_mp_comp(self.path_to('root'), algorithm)
             assert hash_value == expected_hash
 
+    def test_recursive_descriptor(self):
+        self.mkdirs('root/d1')
+        self.mkdirs('root/d2')
+        self.mkfile('root/f1', 'a')
+        self.mkfile('root/d1/f12', 'b')
+
+        f1_desc = 'data:a\000name:f1'
+        f12_desc = 'data:b\000name:f12'
+        d1_desc = 'dirhash:{}\000name:d1'.format(f12_desc)
+        d2_desc = 'dirhash:\000name:d2'
+
+        empty_dirs_false_expected = '\000\000'.join([f1_desc, d1_desc])
+        empty_dirs_true_expected = '\000\000'.join([f1_desc, d2_desc, d1_desc])
+
+        empty_dirs_false = dirhash(
+            self.path_to('root'),
+            algorithm=IdentityHasher
+        )
+        assert empty_dirs_false == empty_dirs_false_expected
+
+        empty_dirs_true = dirhash(
+            self.path_to('root'),
+            algorithm=IdentityHasher,
+            empty_dirs=True
+        )
+        assert empty_dirs_true == empty_dirs_true_expected
+
     def test_symlinked_file(self):
         self.mkdirs('root1')
         self.mkfile('root1/f1', 'a')
@@ -424,16 +494,20 @@ def test_symlinked_file(self):
         self.mkfile('root2/f1', 'a')
         self.mkfile('root2/f2', 'b')
 
-        root1_follow_true = dirhash_mp_comp(
-            self.path_to('root1'), algorithm='md5', follow_links=True)
-        root1_follow_false = dirhash_mp_comp(
-            self.path_to('root1'), algorithm='md5', follow_links=False)
+        root1_linked_files_true = dirhash_mp_comp(
+            self.path_to('root1'), algorithm='md5'
+        )
+        root1_linked_files_false = dirhash_mp_comp(
+            self.path_to('root1'), algorithm='md5',
+            linked_files=False
+        )
+
         root2 = dirhash_mp_comp(
-            self.path_to('root2'), algorithm='md5')
+            self.path_to('root2'), algorithm='md5'
+        )
 
-        # NOTE `follow_links` hash no effect if only the file is linked (as is the
-        # case here), linked _files_ are always included.
-        assert root1_follow_false == root1_follow_true == root2
+        assert root1_linked_files_false != root1_linked_files_true
+        assert root1_linked_files_true == root2
 
     def test_symlinked_dir(self):
         self.mkdirs('root1')
@@ -449,15 +523,22 @@ def test_symlinked_dir(self):
         self.mkfile('root2/d1/f1', 'b')
         self.mkfile('root2/d1/f2', 'c')
 
-        root1_follow_true = dirhash_mp_comp(
-            self.path_to('root1'), algorithm='md5', follow_links=True)
-        root1_follow_false = dirhash_mp_comp(
-            self.path_to('root1'), algorithm='md5', follow_links=False)
+        root1_linked_dirs_true = dirhash_mp_comp(
+            self.path_to('root1'),
+            algorithm='md5',
+            linked_dirs=True
+        )
+        root1_linked_dirs_false = dirhash_mp_comp(
+            self.path_to('root1'),
+            algorithm='md5',
+            linked_dirs=False
+        )
         root2 = dirhash_mp_comp(
-            self.path_to('root2'), algorithm='md5')
+            self.path_to('root2'), algorithm='md5'
+        )
 
-        assert root1_follow_false != root1_follow_true
-        assert root1_follow_true == root2
+        assert root1_linked_dirs_false != root1_linked_dirs_true
+        assert root1_linked_dirs_true == root2
 
     def test_cache_used_for_symlinks(self):
 
@@ -480,11 +561,13 @@ def test_raise_on_empty_root_without_include_empty(self):
 
     def test_empty_root_include_empty(self):
         self.mkdirs('root')
-        dirhash = dirhash_mp_comp(self.path_to('root'), 'sha256', include_empty=True)
-        expected_dirhash = hashlib.sha256(
-            _empty_dir_descriptor.encode('utf-8')
-        ).hexdigest()
-        assert dirhash == expected_dirhash
+        dirhash_ = dirhash_mp_comp(
+            self.path_to('root'),
+            'sha256',
+            empty_dirs=True
+        )
+        expected_dirhash = hashlib.sha256(''.encode('utf-8')).hexdigest()
+        assert dirhash_ == expected_dirhash
 
     def test_include_empty(self):
         self.mkdirs('root/d1')
@@ -492,9 +575,15 @@ def test_include_empty(self):
         self.mkfile('root/d1/f')
 
         args = (self.path_to('root'), 'sha256')
-        dirhash = dirhash_mp_comp(*args, include_empty=False)
-        dirhash_empty = dirhash_mp_comp(*args, include_empty=True)
-        assert dirhash != dirhash_empty
+        dirhash_ = dirhash_mp_comp(
+            *args,
+            empty_dirs=False
+        )
+        dirhash_empty = dirhash_mp_comp(
+            *args,
+            empty_dirs=True
+        )
+        assert dirhash_ != dirhash_empty
 
     def test_chunksize(self):
         self.mkdirs('root')
@@ -502,12 +591,13 @@ def test_chunksize(self):
 
         hash_value = dirhash_mp_comp(self.path_to('root'), 'sha256')
         for chunk_size in [2**4, 2**8, 2**16]:
-            assert (
-                dirhash_mp_comp(self.path_to('root'), 'sha256', chunk_size=chunk_size) ==
-                hash_value
-            )
+            assert dirhash_mp_comp(
+                self.path_to('root'),
+                'sha256',
+                chunk_size=chunk_size
+            ) == hash_value
 
-    def test_content_only(self):
+    def test_data_only(self):
         self.mkdirs('root1')
         self.mkfile('root1/a.txt', 'abc')
         self.mkfile('root1/b.txt', 'def')
@@ -519,13 +609,18 @@ def test_content_only(self):
         hash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256')
         assert hash1 != hash2
 
-        # with `content_only` hash remains the same as long as order of files is the
-        # same (based on sorting of file paths)
-        chash1 = dirhash_mp_comp(self.path_to('root1'), 'sha256', content_only=True)
-        chash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256', content_only=True)
-        assert chash1 == chash2
+        # with entry hash remains the same as long as order of files is the
+        # same
+        [dhash1, dhash2] = [
+            dirhash_mp_comp(
+                self.path_to(root),
+                'sha256',
+                entry_properties=['data']
+            ) for root in ['root1', 'root2']
+        ]
+        assert dhash1 == dhash2
 
-    def test_paths_only(self):
+    def test_name_only(self):
         self.mkdirs('root1')
         self.mkfile('root1/a.txt', 'abc')
         self.mkfile('root1/b.txt', 'def')
@@ -537,61 +632,59 @@ def test_paths_only(self):
         hash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256')
         assert hash1 != hash2
 
-        chash1 = dirhash_mp_comp(self.path_to('root1'), 'sha256', paths_only=True)
-        chash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256', paths_only=True)
-        assert chash1 == chash2
-
-    def test_raise_on_content_only_and_paths_only(self):
-        self.mkdirs('root1')
-        self.mkfile('root1/a.txt', 'abc')
-        dirhash_mp_comp(self.path_to('root1'), 'sha256')  # ok!
-        with pytest.raises(ValueError):
+        [dhash1, dhash2] = [
             dirhash_mp_comp(
-                self.path_to('root1'),
+                self.path_to(root),
                 'sha256',
-                content_only=True,
-                paths_only=True
-            )
+                entry_properties=['name']
+            ) for root in ['root1', 'root2']
+        ]
+        assert dhash1 == dhash2
 
-    def test_collision_attempt(self):
+    def test_is_link_property(self):
         self.mkdirs('root1')
-        self.mkfile('root1/ab')
-        self.mkfile('root1/c')
-        hash1 = dirhash_mp_comp(self.path_to('root1'), 'sha256')
-
+        self.mkfile('root1/a.txt', 'abc')
+        self.mkfile('root1/b.txt', 'def')
         self.mkdirs('root2')
-        self.mkfile('root2/a')
-        self.mkfile('root2/bc')
-        hash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256')
+        self.mkfile('b_target', 'def')
+        self.mkfile('root2/a.txt', 'abc')
+        self.symlink('b_target', 'root2/b.txt')
 
-        assert not hash1 == hash2
+        hash1 = dirhash_mp_comp(self.path_to('root1'), 'sha256')
+        hash2 = dirhash_mp_comp(self.path_to('root2'), 'sha256')
+        assert hash1 == hash2
 
-    def test_ignorefile(self):
+        for entry_properties in [
+            ['name', 'data', 'is_link'],
+            ['name', 'is_link'],
+            ['data', 'is_link'],
+        ]:
+            [hash1, hash2] = [
+                dirhash_mp_comp(
+                    self.path_to(root),
+                    'sha256',
+                    entry_properties=entry_properties
+                ) for root in ['root1', 'root2']
+            ]
+            assert hash1 != hash2
+
+    def test_raise_on_not_at_least_one_of_name_and_data(self):
         self.mkdirs('root1')
-        self.mkdirs('root2')
-        for fname in ['a', '.b', 'c.txt']:
-            self.mkfile(os.path.join('root1', fname))
-            self.mkfile(os.path.join('root2', fname))
+        self.mkfile('root1/a.txt', 'abc')
+        dirhash_mp_comp(self.path_to('root1'), 'sha256')  # check ok
+        with pytest.raises(ValueError):
+            dirhash_mp_comp(
+                self.path_to('root1'),
+                'sha256',
+                entry_properties=[]
+            )
 
-        ignorefile = (
-            '# my dirhash ignore patterns\n'
-            '.*\n'
-        )
-        self.mkfile('root1/.dirhashignore', ignorefile)
-        assert (
-            dirhash_mp_comp(self.path_to('root1'), 'sha256') ==
-            dirhash_mp_comp(self.path_to('root2'), 'sha256', ignore=['.*'])
-        )
-        assert (
-            dirhash_mp_comp(self.path_to('root1'), 'sha256', ignore=['*.txt']) ==
-            dirhash_mp_comp(self.path_to('root2'), 'sha256', ignore=['.*', '*.txt'])
-        )
-        # ignore file should _not_ be ignored by default:
-        self.mkfile('root1/.dirhashignore', '# empty ignorefile')
-        assert (
-            dirhash_mp_comp(self.path_to('root1'), 'sha256') !=
-            dirhash_mp_comp(self.path_to('root2'), 'sha256')
-        )
+        with pytest.raises(ValueError):
+            dirhash_mp_comp(
+                self.path_to('root1'),
+                'sha256',
+                entry_properties=['is_link']
+            )
 
     def test_multiproc_speedup(self):
 
@@ -609,14 +702,126 @@ def test_multiproc_speedup(self):
         assert elapsed_sequential > expected_min_elapsed
 
         start = time()
-        dirhash(self.path_to('root'), algorithm=SlowHasher, workers=num_files)
+        dirhash(self.path_to('root'), algorithm=SlowHasher, jobs=num_files)
         end = time()
         elapsed_muliproc = end - start
-        assert elapsed_muliproc < expected_min_elapsed / 2  # at least half!
+        assert elapsed_muliproc < expected_min_elapsed
+        # just check "any speedup", the overhead varies (and is high on Travis)
+
+    def test_cache_by_real_path_speedup(self, tmpdir):
+        num_links = 10
+
+        # reference run without links
+        root1 = tmpdir.join('root1')
+        root1.ensure(dir=True)
+        for i in range(num_links):
+            file_i = root1.join('file_{}'.format(i))
+            file_i.write('< one chunk content', ensure=True)
+
+        wait_time = SlowHasher.wait_time
+        expected_min_elapsed = wait_time * num_links
+        start = time()
+        dirhash(root1, algorithm=SlowHasher)
+        end = time()
+        elapsed_sequential = end - start
+        assert elapsed_sequential > expected_min_elapsed
+        overhead = elapsed_sequential - expected_min_elapsed
+
+        # all links to same file
+        root2 = tmpdir.join('root2')
+        root2.ensure(dir=True)
+        target_file = tmpdir.join('target_file')
+        target_file.ensure()
+        for i in range(num_links):
+            root2.join('link_{}'.format(i)).mksymlinkto(target_file)
+
+        overhead_margin_factor = 1.5
+        expected_max_elapsed = overhead * overhead_margin_factor + wait_time
+        assert expected_max_elapsed < expected_min_elapsed
+        start = time()
+        dirhash(root2, algorithm=SlowHasher)
+        end = time()
+        elapsed_cache = end - start
+        assert elapsed_cache < expected_max_elapsed
+
+    def test_cache_together_with_multiprocess_speedup(self, tmpdir):
+        target_file_names = ['target_file_1', 'target_file_2']
+        num_links_per_file = 10
+        num_links = num_links_per_file * len(target_file_names)
+
+        # reference run without links
+        root1 = tmpdir.join('root1')
+        root1.ensure(dir=True)
+        for i in range(num_links):
+            file_i = root1.join('file_{}'.format(i))
+            file_i.write('< one chunk content', ensure=True)
+
+        jobs = 2
+        wait_time = SlowHasher.wait_time
+        expected_min_elapsed = wait_time * num_links / jobs
+        start = time()
+        dirhash(root1, algorithm=SlowHasher, jobs=jobs)
+        end = time()
+        elapsed_sequential = end - start
+        assert elapsed_sequential > expected_min_elapsed
+        overhead = elapsed_sequential - expected_min_elapsed
+
+        root2 = tmpdir.join('root2')
+        root2.ensure(dir=True)
+        for i, target_file_name in enumerate(target_file_names):
+            target_file = tmpdir.join(target_file_name)
+            target_file.write('< one chunk content', ensure=True)
+            for j in range(num_links_per_file):
+                root2.join('link_{}_{}'.format(i, j)).mksymlinkto(target_file)
+
+        overhead_margin_factor = 1.5
+        expected_max_elapsed = overhead * overhead_margin_factor + wait_time * 2
+        assert expected_max_elapsed < expected_min_elapsed
+        start = time()
+        dirhash(root2, algorithm=SlowHasher, jobs=jobs)
+        end = time()
+        elapsed_mp_cache = end - start
+        assert elapsed_mp_cache < expected_max_elapsed
+
+    def test_hash_cyclic_link_to_root(self):
+        self.mkdirs('root/d1')
+        self.symlink('root', 'root/d1/link_back')
+        dirhash(
+            self.path_to('root'),
+            'sha256',
+            allow_cyclic_links=True
+        )
+
+    def test_hash_cyclic_link(self):
+        self.mkdirs('root/d1/d2')
+        self.symlink('root/d1', 'root/d1/d2/link_back')
+        dirhash(
+            self.path_to('root'),
+            'sha256',
+            allow_cyclic_links=True
+        )
+
+    def test_pass_filtering_instance(self):
+        self.mkdirs('root')
+        self.mkfile('root/f1', '')
+        dirhash_impl(self.path_to('root'), 'sha256', filter_=Filter())
+
+    def test_pass_protocol_instance(self):
+        self.mkdirs('root')
+        self.mkfile('root/f1', '')
+        dirhash_impl(self.path_to('root'), 'sha256', protocol=Protocol())
+
+    def test_raise_on_wrong_type(self):
+        self.mkdirs('root')
+        self.mkfile('root/f1', '')
+        with pytest.raises(TypeError):
+            dirhash_impl(self.path_to('root'), 'sha256', filter_='')
+        with pytest.raises(TypeError):
+            dirhash_impl(self.path_to('root'), 'sha256', protocol='')
 
 
 class SlowHasher(object):
-    wait_time = 0.1
+    wait_time = 0.05
 
     def __init__(self, *args, **kwargs):
         pass
@@ -627,3 +832,36 @@ def update(self, data):
 
     def hexdigest(self):
         return ''
+
+
+class IdentityHasher(object):
+
+    def __init__(self, initial_data=b''):
+        self.datas = [initial_data.decode('utf-8')]
+
+    def update(self, data):
+        self.datas.append(data.decode('utf-8'))
+
+    def hexdigest(self):
+        return ''.join(self.datas)
+
+
+class TestProtocol(object):
+
+    def test_raise_for_invalid_entry_properties(self):
+        with pytest.raises(ValueError):
+            Protocol(entry_properties=['not-valid'])
+
+    def test_raise_for_invalid_allow_cyclic_links(self):
+        with pytest.raises(ValueError):
+            Protocol(allow_cyclic_links='not-valid')
+
+
+def mock_func(x):
+    return x * 2
+
+
+@pytest.mark.parametrize('jobs', [1, 2, 4])
+def test_parmap(jobs):
+    inputs = [1, 2, 3, 4]
+    assert _parmap(mock_func, inputs, jobs=jobs) == [2, 4, 6, 8]