dida-do · felixbrunner · Aug 1, 2023 · Aug 1, 2023 · Sep 14, 2023 · Sep 14, 2023
diff --git a/README.md b/README.md
@@ -2,4 +2,5 @@
 
 Here are some of [dida](https://dida.do/)'s public projects.
 * [Handwriting app](https://github.com/dida-do/public/tree/master/handwriting_app)
-* [Labeling tool](https://github.com/dida-do/public/tree/master/labelingtool)
+* [Labeling tool](https://github.com/dida-do/public/tree/master/labelingtool)
+* [Explainable ROCKET](https://github.com/dida-do/public/tree/master/xrocket)
diff --git a/xrocket/.gitignore b/xrocket/.gitignore
@@ -0,0 +1,7 @@
+# ignore contents of these directories
+.venv/*
+
+# ignore these extensions
+*.csv
+*.zip
+*.pyc
diff --git a/xrocket/README.md b/xrocket/README.md
@@ -0,0 +1,31 @@
+# X-ROCKET code repository
+
+To use the X-rocket encoder for timeseries embeddings install the dependencies in `requirements.txt` and import as follows:
+```python
+from xrocket.encoder import XRocket
+```
+
+Then initialize the encoder with the desired hyperparameters:
+```python
+XRocket(
+    in_channels: int,
+    max_kernel_span: int,
+    combination_order: int = 1,
+    combination_method: str = "additive",
+    feature_cap: int = 10_000,
+    kernel_length: int = 9,
+    max_dilations: int = 32,
+)
+```
+
+The following hyperparameters can be chosen:
+- in_channels: The number of channels in the data.
+- max_kernel_span: Maximum length to be considered for patter search,
+    usually set to the number of time-observations in a typical timeseries.
+- combination_order: The maximum number of channels to be interacted, default=1.
+- combination_method: Keyword for the channel mixing method, default='additive'.
+- feature_cap: Maximum number of embedding values to be considered, default=10,000.
+- kernel_length: The length of the 1D convolutional kernels, default=9.
+- max_dilations: The maximum number of distinct dilation values, default=32.
+
+If the encoder thresholds are not explicitly fit to a data example before encoding, the first example will automatically define the thresholds.
diff --git a/xrocket/example.ipynb b/xrocket/example.ipynb
diff --git a/xrocket/requirements.txt b/xrocket/requirements.txt
@@ -0,0 +1,114 @@
+#
+# This file is autogenerated by pip-compile with Python 3.9
+# by the following command:
+#
+#    pip-compile requirements.in
+#
+aeon==0.4.0
+    # via -r requirements.in
+attrs==23.1.0
+    # via aeon
+cmake==3.27.4.1
+    # via triton
+deprecated==1.2.14
+    # via aeon
+filelock==3.12.4
+    # via
+    #   torch
+    #   triton
+jinja2==3.1.2
+    # via torch
+joblib==1.3.2
+    # via scikit-learn
+lit==16.0.6
+    # via triton
+llvmlite==0.40.1
+    # via numba
+markupsafe==2.1.3
+    # via jinja2
+mpmath==1.3.0
+    # via sympy
+networkx==3.1
+    # via torch
+numba==0.57.1
+    # via aeon
+numpy==1.24.4
+    # via
+    #   -r requirements.in
+    #   aeon
+    #   numba
+    #   pandas
+    #   scikit-learn
+    #   scipy
+nvidia-cublas-cu11==11.10.3.66
+    # via
+    #   nvidia-cudnn-cu11
+    #   nvidia-cusolver-cu11
+    #   torch
+nvidia-cuda-cupti-cu11==11.7.101
+    # via torch
+nvidia-cuda-nvrtc-cu11==11.7.99
+    # via torch
+nvidia-cuda-runtime-cu11==11.7.99
+    # via torch
+nvidia-cudnn-cu11==8.5.0.96
+    # via torch
+nvidia-cufft-cu11==10.9.0.58
+    # via torch
+nvidia-curand-cu11==10.2.10.91
+    # via torch
+nvidia-cusolver-cu11==11.4.0.1
+    # via torch
+nvidia-cusparse-cu11==11.7.4.91
+    # via torch
+nvidia-nccl-cu11==2.14.3
+    # via torch
+nvidia-nvtx-cu11==11.7.91
+    # via torch
+packaging==23.1
+    # via aeon
+pandas==2.0.3
+    # via
+    #   -r requirements.in
+    #   aeon
+python-dateutil==2.8.2
+    # via pandas
+pytz==2023.3.post1
+    # via pandas
+scikit-learn==1.2.2
+    # via
+    #   -r requirements.in
+    #   aeon
+scipy==1.11.2
+    # via
+    #   aeon
+    #   scikit-learn
+six==1.16.0
+    # via python-dateutil
+sympy==1.12
+    # via torch
+threadpoolctl==3.2.0
+    # via scikit-learn
+torch==2.0.1
+    # via
+    #   -r requirements.in
+    #   triton
+triton==2.0.0
+    # via torch
+typing-extensions==4.7.1
+    # via torch
+tzdata==2023.3
+    # via pandas
+wheel==0.41.2
+    # via
+    #   nvidia-cublas-cu11
+    #   nvidia-cuda-cupti-cu11
+    #   nvidia-cuda-runtime-cu11
+    #   nvidia-curand-cu11
+    #   nvidia-cusparse-cu11
+    #   nvidia-nvtx-cu11
+wrapt==1.15.0
+    # via deprecated
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools
diff --git a/xrocket/setup.py b/xrocket/setup.py
@@ -0,0 +1,19 @@
+from setuptools import find_packages, setup
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setup(
+    name="xrocket",
+    version="0.1",
+    description="explainable rocket implementation",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="dida Datenschmiede GmbH",
+    author_email="info@dida.do",
+    packages=find_packages(),
+    install_requires=[
+        "pytorch"
+    ],
+    python_requires=">=3.6",
+)
diff --git a/xrocket/xrocket/__init__.py b/xrocket/xrocket/__init__.py
@@ -0,0 +1 @@
+from xrocket.encoder import XRocket
diff --git a/xrocket/xrocket/block.py b/xrocket/xrocket/block.py
@@ -0,0 +1,159 @@
+import torch
+from torch import nn
+from xrocket.convolutions import RocketConv
+from xrocket.multichannel import ChannelMix
+from xrocket.pooling import PPVThresholds
+
+class DilationBlock(nn.Module):
+    """MiniRocket block for transformation of timeseries at a single dilation value.
+
+    This layer serves to perform the encoding of an input timeseries with a fixed
+    dilation value.
+    A DilationBlock consists of the following three sublayers:
+     - RocketConv
+     - ChannelMix
+     - PPVThresholds
+    A forward pass transforms a tensor of shape (Batch * Channels * Timeobs)
+    into a tensor of shape (Batch * (Features/Dilation)).
+
+    This implementation is based on the descriptions in:
+    Dempster, Angus, Daniel F. Schmidt, and Geoffrey I. Webb.
+    "Minirocket: A very fast (almost) deterministic transform for time series classification."
+    Proceedings of the 27th ACM SIGKDD conference on knowledge discovery & data mining. 2021.
+
+    The block structure deviates from the original paper and sublayers have differences as
+    explained in the respective implementations.
+
+    Attributes:
+        in_channels: Number of channels in each timeseries.
+        dilation: The dilation value to apply to the convolutional kernels.
+        num_thresholds: The number of thresholds per channel combination.
+        combination_order: The maximum number of channels to be interacted.
+        combination_method: The channel mixing method, either 'additive' or 'multiplicative'.
+        kernel_length: Number of paramters in each kernel, default = 9.
+        num_kernels: The number of kernels considered in the module.
+        num_combinations: The number of channel combinations considered in the module.
+        feature_names: (pattern, dilation, channels, threshold) tuples to identify features.
+        is_fitted: Indicates that thresholds are fitted to a data example.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        dilation: int,
+        num_thresholds: int = 1,
+        combination_order: int = 1,
+        combination_method: str = "additive",
+        kernel_length: int = 9,
+    ):
+        """Set up attributes including quantile values for the layer.
+
+        Args:
+            in_channels: Number of channels in each timeseries.
+            dilation: The dilation value to apply to the convolutional kernels.
+            num_thresholds: The number of thresholds per channel combination.
+            combination_order: The maximum number of channels to be interacted.
+            combination_method: Keyword for the channel mixing method, default='additive'.
+            kernel_length: Number of paramters in each kernel, default = 9.
+        """
+        super().__init__()
+
+        # set up constituent layers
+        self.conv = RocketConv(
+            in_channels=in_channels,
+            dilation=dilation,
+            kernel_length=kernel_length,
+        )
+        self.mix = ChannelMix(
+            in_channels=self.conv.out_channels,
+            in_kernels=self.conv.num_kernels,
+            order=combination_order,
+            method=combination_method,
+        )
+        self.thresholds = PPVThresholds(
+            num_thresholds=num_thresholds,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Perform a forward pass to calculate a feature vector.
+
+        Pooling thresholds will be fit to the first example if not yet fitted.
+
+        Args:
+            x: Tensor of shape (Batch * Channels * Timeobs)
+
+        Returns:
+            x: Tensor of shape (Batch * (Features/Dilations))
+        """
+        x = self.conv(x)
+        x = self.mix(x)
+        x = self.thresholds(x)
+        x = torch.flatten(x, start_dim=1)
+        return x
+
+    @property
+    def in_channels(self) -> int:
+        """The number of incoming channels."""
+        return self.conv.in_channels
+
+    @property
+    def dilation(self) -> int:
+        """The value to dilute the kernels with over the time dimension."""
+        return self.conv.dilation
+
+    @property
+    def combination_order(self) -> int:
+        """The highest number of channels to combine in a feature."""
+        return self.mix.order
+
+    @property
+    def num_kernels(self) -> int:
+        """The number of kernels in the convolutional block."""
+        return self.conv.num_kernels
+
+    @property
+    def num_combinations(self) -> int:
+        """The total number of channel combinations."""
+        return self.mix.num_combinations
+
+    @property
+    def num_thresholds(self) -> int:
+        """The number of thresholds to apply to each channel combinations."""
+        return self.thresholds.num_thresholds
+
+    def fit(self, x: torch.Tensor) -> None:
+        """Obtain pooling threshold values from an input.
+
+        Accepts either a single example or a batch as an input.
+
+        Args:
+            x: Tensor of shape (Channels * Timeobs) or
+                Tensor of shape (Batch * Channels * Timeobs)
+        """
+        x = self.conv(x)
+        x = self.mix(x)
+        self.thresholds.fit(x)
+
+    @property
+    def is_fitted(self) -> bool:
+        """Indicates if module biases were fitted to data."""
+        return self.thresholds.is_fitted
+
+    @property
+    def feature_names(self) -> list[tuple]:
+        """(pattern, dilation, channels, threshold) tuples to identify features."""
+        assert self.is_fitted, "module needs to be fitted for thresholds to be named"
+        feature_names = [
+            (
+                str(pattern),
+                self.dilation,
+                str(channels),
+                f"{threshold:.4f}",
+            )
+            for pattern, channels, threshold in zip(
+                self.conv.patterns * self.num_combinations * self.num_thresholds,
+                self.mix.combinations * self.num_thresholds,
+                self.thresholds.thresholds,
+            )
+        ]
+        return feature_names