Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

Here are some of [dida](https://dida.do/)'s public projects.
* [Handwriting app](https://github.com/dida-do/public/tree/master/handwriting_app)
* [Labeling tool](https://github.com/dida-do/public/tree/master/labelingtool)
* [Labeling tool](https://github.com/dida-do/public/tree/master/labelingtool)
* [Explainable ROCKET](https://github.com/dida-do/public/tree/master/xrocket)
7 changes: 7 additions & 0 deletions xrocket/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ignore contents of these directories
.venv/*

# ignore these extensions
*.csv
*.zip
*.pyc
31 changes: 31 additions & 0 deletions xrocket/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# X-ROCKET code repository

To use the X-rocket encoder for timeseries embeddings install the dependencies in `requirements.txt` and import as follows:
```python
from xrocket.encoder import XRocket
```

Then initialize the encoder with the desired hyperparameters:
```python
XRocket(
in_channels: int,
max_kernel_span: int,
combination_order: int = 1,
combination_method: str = "additive",
feature_cap: int = 10_000,
kernel_length: int = 9,
max_dilations: int = 32,
)
```

The following hyperparameters can be chosen:
- in_channels: The number of channels in the data.
- max_kernel_span: Maximum length to be considered for patter search,
usually set to the number of time-observations in a typical timeseries.
- combination_order: The maximum number of channels to be interacted, default=1.
- combination_method: Keyword for the channel mixing method, default='additive'.
- feature_cap: Maximum number of embedding values to be considered, default=10,000.
- kernel_length: The length of the 1D convolutional kernels, default=9.
- max_dilations: The maximum number of distinct dilation values, default=32.

If the encoder thresholds are not explicitly fit to a data example before encoding, the first example will automatically define the thresholds.
217 changes: 217 additions & 0 deletions xrocket/example.ipynb

Large diffs are not rendered by default.

114 changes: 114 additions & 0 deletions xrocket/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile requirements.in
#
aeon==0.4.0
# via -r requirements.in
attrs==23.1.0
# via aeon
cmake==3.27.4.1
# via triton
deprecated==1.2.14
# via aeon
filelock==3.12.4
# via
# torch
# triton
jinja2==3.1.2
# via torch
joblib==1.3.2
# via scikit-learn
lit==16.0.6
# via triton
llvmlite==0.40.1
# via numba
markupsafe==2.1.3
# via jinja2
mpmath==1.3.0
# via sympy
networkx==3.1
# via torch
numba==0.57.1
# via aeon
numpy==1.24.4
# via
# -r requirements.in
# aeon
# numba
# pandas
# scikit-learn
# scipy
nvidia-cublas-cu11==11.10.3.66
# via
# nvidia-cudnn-cu11
# nvidia-cusolver-cu11
# torch
nvidia-cuda-cupti-cu11==11.7.101
# via torch
nvidia-cuda-nvrtc-cu11==11.7.99
# via torch
nvidia-cuda-runtime-cu11==11.7.99
# via torch
nvidia-cudnn-cu11==8.5.0.96
# via torch
nvidia-cufft-cu11==10.9.0.58
# via torch
nvidia-curand-cu11==10.2.10.91
# via torch
nvidia-cusolver-cu11==11.4.0.1
# via torch
nvidia-cusparse-cu11==11.7.4.91
# via torch
nvidia-nccl-cu11==2.14.3
# via torch
nvidia-nvtx-cu11==11.7.91
# via torch
packaging==23.1
# via aeon
pandas==2.0.3
# via
# -r requirements.in
# aeon
python-dateutil==2.8.2
# via pandas
pytz==2023.3.post1
# via pandas
scikit-learn==1.2.2
# via
# -r requirements.in
# aeon
scipy==1.11.2
# via
# aeon
# scikit-learn
six==1.16.0
# via python-dateutil
sympy==1.12
# via torch
threadpoolctl==3.2.0
# via scikit-learn
torch==2.0.1
# via
# -r requirements.in
# triton
triton==2.0.0
# via torch
typing-extensions==4.7.1
# via torch
tzdata==2023.3
# via pandas
wheel==0.41.2
# via
# nvidia-cublas-cu11
# nvidia-cuda-cupti-cu11
# nvidia-cuda-runtime-cu11
# nvidia-curand-cu11
# nvidia-cusparse-cu11
# nvidia-nvtx-cu11
wrapt==1.15.0
# via deprecated

# The following packages are considered to be unsafe in a requirements file:
# setuptools
19 changes: 19 additions & 0 deletions xrocket/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from setuptools import find_packages, setup

with open("README.md", "r") as fh:
long_description = fh.read()

setup(
name="xrocket",
version="0.1",
description="explainable rocket implementation",
long_description=long_description,
long_description_content_type="text/markdown",
author="dida Datenschmiede GmbH",
author_email="info@dida.do",
packages=find_packages(),
install_requires=[
"pytorch"
],
python_requires=">=3.6",
)
1 change: 1 addition & 0 deletions xrocket/xrocket/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from xrocket.encoder import XRocket
159 changes: 159 additions & 0 deletions xrocket/xrocket/block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import torch
from torch import nn
from xrocket.convolutions import RocketConv
from xrocket.multichannel import ChannelMix
from xrocket.pooling import PPVThresholds

class DilationBlock(nn.Module):
"""MiniRocket block for transformation of timeseries at a single dilation value.

This layer serves to perform the encoding of an input timeseries with a fixed
dilation value.
A DilationBlock consists of the following three sublayers:
- RocketConv
- ChannelMix
- PPVThresholds
A forward pass transforms a tensor of shape (Batch * Channels * Timeobs)
into a tensor of shape (Batch * (Features/Dilation)).

This implementation is based on the descriptions in:
Dempster, Angus, Daniel F. Schmidt, and Geoffrey I. Webb.
"Minirocket: A very fast (almost) deterministic transform for time series classification."
Proceedings of the 27th ACM SIGKDD conference on knowledge discovery & data mining. 2021.

The block structure deviates from the original paper and sublayers have differences as
explained in the respective implementations.

Attributes:
in_channels: Number of channels in each timeseries.
dilation: The dilation value to apply to the convolutional kernels.
num_thresholds: The number of thresholds per channel combination.
combination_order: The maximum number of channels to be interacted.
combination_method: The channel mixing method, either 'additive' or 'multiplicative'.
kernel_length: Number of paramters in each kernel, default = 9.
num_kernels: The number of kernels considered in the module.
num_combinations: The number of channel combinations considered in the module.
feature_names: (pattern, dilation, channels, threshold) tuples to identify features.
is_fitted: Indicates that thresholds are fitted to a data example.
"""

def __init__(
self,
in_channels: int,
dilation: int,
num_thresholds: int = 1,
combination_order: int = 1,
combination_method: str = "additive",
kernel_length: int = 9,
):
"""Set up attributes including quantile values for the layer.

Args:
in_channels: Number of channels in each timeseries.
dilation: The dilation value to apply to the convolutional kernels.
num_thresholds: The number of thresholds per channel combination.
combination_order: The maximum number of channels to be interacted.
combination_method: Keyword for the channel mixing method, default='additive'.
kernel_length: Number of paramters in each kernel, default = 9.
"""
super().__init__()

# set up constituent layers
self.conv = RocketConv(
in_channels=in_channels,
dilation=dilation,
kernel_length=kernel_length,
)
self.mix = ChannelMix(
in_channels=self.conv.out_channels,
in_kernels=self.conv.num_kernels,
order=combination_order,
method=combination_method,
)
self.thresholds = PPVThresholds(
num_thresholds=num_thresholds,
)

def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Perform a forward pass to calculate a feature vector.

Pooling thresholds will be fit to the first example if not yet fitted.

Args:
x: Tensor of shape (Batch * Channels * Timeobs)

Returns:
x: Tensor of shape (Batch * (Features/Dilations))
"""
x = self.conv(x)
x = self.mix(x)
x = self.thresholds(x)
x = torch.flatten(x, start_dim=1)
return x

@property
def in_channels(self) -> int:
"""The number of incoming channels."""
return self.conv.in_channels

@property
def dilation(self) -> int:
"""The value to dilute the kernels with over the time dimension."""
return self.conv.dilation

@property
def combination_order(self) -> int:
"""The highest number of channels to combine in a feature."""
return self.mix.order

@property
def num_kernels(self) -> int:
"""The number of kernels in the convolutional block."""
return self.conv.num_kernels

@property
def num_combinations(self) -> int:
"""The total number of channel combinations."""
return self.mix.num_combinations

@property
def num_thresholds(self) -> int:
"""The number of thresholds to apply to each channel combinations."""
return self.thresholds.num_thresholds

def fit(self, x: torch.Tensor) -> None:
"""Obtain pooling threshold values from an input.

Accepts either a single example or a batch as an input.

Args:
x: Tensor of shape (Channels * Timeobs) or
Tensor of shape (Batch * Channels * Timeobs)
"""
x = self.conv(x)
x = self.mix(x)
self.thresholds.fit(x)

@property
def is_fitted(self) -> bool:
"""Indicates if module biases were fitted to data."""
return self.thresholds.is_fitted

@property
def feature_names(self) -> list[tuple]:
"""(pattern, dilation, channels, threshold) tuples to identify features."""
assert self.is_fitted, "module needs to be fitted for thresholds to be named"
feature_names = [
(
str(pattern),
self.dilation,
str(channels),
f"{threshold:.4f}",
)
for pattern, channels, threshold in zip(
self.conv.patterns * self.num_combinations * self.num_thresholds,
self.mix.combinations * self.num_thresholds,
self.thresholds.thresholds,
)
]
return feature_names
Loading